Index: /trunk/Top/W1.v
===================================================================
--- /trunk/Top/W1.v	(revision 6)
+++ /trunk/Top/W1.v	(revision 6)
@@ -0,0 +1,675 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    SPARC SoC single-core top level for Altera StratixIV devkit
+// Module Name:    W1 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+module W1(
+
+   input         sysclk,
+   input         sysrst,
+
+   // ddr3 memory interface
+   inout  [63:0] ddr3_dq,
+   inout  [ 7:0] ddr3_dqs,
+   inout  [ 7:0] ddr3_dqs_n,
+   inout         ddr3_ck,
+   inout         ddr3_ck_n,
+   output        ddr3_reset,
+   output [12:0] ddr3_a,
+   output [ 2:0] ddr3_ba,
+   output        ddr3_ras_n,
+   output        ddr3_cas_n,
+   output        ddr3_we_n,
+   output        ddr3_cs_n,
+   output        ddr3_odt,
+   output        ddr3_ce,
+   output [ 7:0] ddr3_dm,
+
+   output        phy_init_done, // LED
+   input         rup,
+   input         rdn,
+	
+   // Console interface
+   input  srx,
+   output stx,
+   input  [1:0] flash_rev,
+   
+   /* MII interface replaced by SGMII
+   
+   input        mtx_clk_pad_i, 
+   output [3:0] mtxd_pad_o, 
+   output       mtxen_pad_o, 
+   output       mtxerr_pad_o, 
+   input        mrx_clk_pad_i, 
+   input  [3:0] mrxd_pad_i, 
+   input        mrxdv_pad_i, 
+   input        mrxerr_pad_i, 
+   input        mcoll_pad_i, 
+   input        mcrs_pad_i, */
+   
+	//SGMII
+	//output       mdc, 
+   //inout        md, 
+   
+   //output eth_rst,
+   //output eth_tx,
+   //input  eth_rx,
+   
+   //output led_10,
+   //output led_100,
+   //output led_1000,
+   //output led_link,
+   //output led_disp_err,
+   //output led_char_err,
+   //output led_an,
+	
+   output     [24:0] flash_addr,
+   input      [15:0] flash_data,
+   output            flash_oen,
+   output            flash_wen,
+   output            flash_cen,
+   output            flash_clk,
+   output            flash_adv,
+   output            flash_rst
+);
+
+wire wb_rst_i;
+wire [35:0] CONTROL0;
+wire [35:0] CONTROL1;
+wire [35:0] CONTROL2;
+wire [1:0] VIO_SIG;
+
+reg [31:0] cycle_count;
+
+assign flash_clk=1;
+assign flash_adv=0;
+assign flash_rst=!wb_rst_i;
+
+wire [63:0] m0_dat_i;
+wire [63:0] m0_dat_o;
+wire [63:0] m0_adr_i;
+wire [ 7:0] m0_sel_i;
+wire        m0_we_i;
+wire        m0_cyc_i; 
+wire        m0_stb_i;
+wire        m0_ack_o;
+
+wire [63:0] m1_dat_i;
+wire [63:0] m1_dat_o;
+wire [63:0] m1_adr_i;
+wire [ 7:0] m1_sel_i;
+wire        m1_we_i;
+wire        m1_cyc_i; 
+wire        m1_stb_i;
+wire        m1_ack_o;
+
+wire [63:0] s0_dat_i; 
+wire [63:0] s0_dat_o;
+wire [63:0] s0_adr_o;
+wire [ 7:0] s0_sel_o;
+wire        s0_we_o;
+wire        s0_cyc_o; 
+wire        s0_stb_o;
+wire        s0_ack_i;
+
+wire [63:0] s1_dat_i; 
+wire [63:0] s1_dat_o;
+wire [63:0] s1_adr_o;
+wire [ 7:0] s1_sel_o;
+wire        s1_we_o;
+wire        s1_cyc_o; 
+wire        s1_stb_o;
+wire        s1_ack_i;
+
+wire [63:0] s2_dat_i; 
+wire [63:0] s2_dat_o;
+wire [63:0] s2_adr_o;
+wire [ 7:0] s2_sel_o;
+wire        s2_we_o;
+wire        s2_cyc_o; 
+wire        s2_stb_o;
+wire        s2_ack_i;
+
+wire [63:0] s3_dat_i; 
+wire [63:0] s3_dat_o;
+wire [63:0] s3_adr_o;
+wire [ 7:0] s3_sel_o;
+wire        s3_we_o;
+wire        s3_cyc_o; 
+wire        s3_stb_o;
+wire        s3_ack_i;
+
+wire [63:0] s4_dat_i; 
+wire [63:0] s4_dat_o;
+wire [63:0] s4_adr_o;
+wire [ 7:0] s4_sel_o;
+wire        s4_we_o;
+wire        s4_cyc_o; 
+wire        s4_stb_o;
+wire        s4_ack_i;
+
+wb_conbus_top wishbone (
+    .clk_i(wb_clk_i), 
+    .rst_i(wb_rst_i), 
+    
+    //CPU
+    .m0_dat_i(m0_dat_i), 
+    .m0_dat_o(m0_dat_o), 
+    .m0_adr_i(m0_adr_i), 
+    .m0_sel_i(m0_sel_i), 
+    .m0_we_i(m0_we_i), 
+    .m0_cyc_i(m0_cyc_i), 
+    .m0_stb_i(m0_stb_i), 
+    .m0_ack_o(m0_ack_o), 
+    .m0_err_o(), 
+    .m0_rty_o(), 
+    .m0_cab_i(0),
+    
+    //Ethernet
+    .m1_dat_i(m1_dat_i), 
+    .m1_dat_o(m1_dat_o), 
+    .m1_adr_i(m1_adr_i), 
+    .m1_sel_i(m1_sel_i), 
+    .m1_we_i(m1_we_i), 
+    .m1_cyc_i(m1_cyc_i), 
+    .m1_stb_i(m1_stb_i), 
+    .m1_ack_o(m1_ack_o), 
+    .m1_err_o(m1_err_o), 
+    .m1_rty_o(m1_rty_o), 
+    .m1_cab_i(m1_cab_i), 
+
+    .m2_dat_i(0), 
+    .m2_dat_o(), 
+    .m2_adr_i(0), 
+    .m2_sel_i(0), 
+    .m2_we_i(0), 
+    .m2_cyc_i(0), 
+    .m2_stb_i(0), 
+    .m2_ack_o(), 
+    .m2_err_o(), 
+    .m2_rty_o(), 
+    .m2_cab_i(0), 
+
+    .m3_dat_i(0), 
+    .m3_dat_o(), 
+    .m3_adr_i(0), 
+    .m3_sel_i(0), 
+    .m3_we_i(0), 
+    .m3_cyc_i(0), 
+    .m3_stb_i(0), 
+    .m3_ack_o(), 
+    .m3_err_o(), 
+    .m3_rty_o(), 
+    .m3_cab_i(0), 
+
+    .m4_dat_i(0), 
+    .m4_dat_o(), 
+    .m4_adr_i(0), 
+    .m4_sel_i(0), 
+    .m4_we_i(0), 
+    .m4_cyc_i(0), 
+    .m4_stb_i(0), 
+    .m4_ack_o(), 
+    .m4_err_o(), 
+    .m4_rty_o(), 
+    .m4_cab_i(0), 
+
+    .m5_dat_i(0), 
+    .m5_dat_o(), 
+    .m5_adr_i(0), 
+    .m5_sel_i(0), 
+    .m5_we_i(0), 
+    .m5_cyc_i(0), 
+    .m5_stb_i(0), 
+    .m5_ack_o(), 
+    .m5_err_o(), 
+    .m5_rty_o(), 
+    .m5_cab_i(0), 
+
+    .m6_dat_i(0), 
+    .m6_dat_o(), 
+    .m6_adr_i(0), 
+    .m6_sel_i(0), 
+    .m6_we_i(0), 
+    .m6_cyc_i(0), 
+    .m6_stb_i(0), 
+    .m6_ack_o(), 
+    .m6_err_o(), 
+    .m6_rty_o(), 
+    .m6_cab_i(0), 
+
+    .m7_dat_i(0), 
+    .m7_dat_o(), 
+    .m7_adr_i(0), 
+    .m7_sel_i(0), 
+    .m7_we_i(0), 
+    .m7_cyc_i(0), 
+    .m7_stb_i(0), 
+    .m7_ack_o(), 
+    .m7_err_o(), 
+    .m7_rty_o(), 
+    .m7_cab_i(0), 
+
+    //DRAM
+    .s0_dat_i(s0_dat_i), 
+    .s0_dat_o(s0_dat_o), 
+    .s0_adr_o(s0_adr_o), 
+    .s0_sel_o(s0_sel_o), 
+    .s0_we_o(s0_we_o), 
+    .s0_cyc_o(s0_cyc_o), 
+    .s0_stb_o(s0_stb_o), 
+    .s0_ack_i(s0_ack_i), 
+    .s0_err_i(0), 
+    .s0_rty_i(0), 
+    .s0_cab_o(),
+    
+    //Flash
+    .s1_dat_i(s1_dat_i), 
+    .s1_dat_o(s1_dat_o), 
+    .s1_adr_o(s1_adr_o), 
+    .s1_sel_o(s1_sel_o), 
+    .s1_we_o(s1_we_o), 
+    .s1_cyc_o(s1_cyc_o), 
+    .s1_stb_o(s1_stb_o), 
+    .s1_ack_i(s1_ack_i), 
+    .s1_err_i(s1_err_i), 
+    .s1_rty_i(s1_rty_i), 
+    .s1_cab_o(s1_cab_o), 
+
+    //Ethernet
+    .s2_dat_i(s2_dat_i), 
+    .s2_dat_o(s2_dat_o), 
+    .s2_adr_o(s2_adr_o), 
+    .s2_sel_o(s2_sel_o), 
+    .s2_we_o(s2_we_o), 
+    .s2_cyc_o(s2_cyc_o), 
+    .s2_stb_o(s2_stb_o), 
+    .s2_ack_i(s2_ack_i), 
+    .s2_err_i(s2_err_i), 
+    .s2_rty_i(s2_rty_i), 
+    .s2_cab_o(s2_cab_o), 
+
+    //UART
+    .s3_dat_i({s3_dat_i[31:0],s3_dat_i[31:0]}), 
+    .s3_dat_o(s3_dat_o), 
+    .s3_adr_o(s3_adr_o), 
+    .s3_sel_o(s3_sel_o), 
+    .s3_we_o(s3_we_o), 
+    .s3_cyc_o(s3_cyc_o), 
+    .s3_stb_o(s3_stb_o), 
+    .s3_ack_i(s3_ack_i), 
+    .s3_err_i(s3_err_i), 
+    .s3_rty_i(s3_rty_i), 
+    .s3_cab_o(s3_cab_o), 
+
+    //Second flash interface for fff8xxxxxx ram disk addressing
+    .s4_dat_i(s4_dat_i), 
+    .s4_dat_o(s4_dat_o), 
+    .s4_adr_o(s4_adr_o), 
+    .s4_sel_o(s4_sel_o), 
+    .s4_we_o(s4_we_o), 
+    .s4_cyc_o(s4_cyc_o), 
+    .s4_stb_o(s4_stb_o), 
+    .s4_ack_i(s4_ack_i), 
+    .s4_err_i(s4_err_i), 
+    .s4_rty_i(s4_rty_i), 
+    .s4_cab_o(s4_cab_o), 
+
+    .s5_dat_i(0), 
+    .s5_dat_o(), 
+    .s5_adr_o(), 
+    .s5_sel_o(), 
+    .s5_we_o(), 
+    .s5_cyc_o(), 
+    .s5_stb_o(), 
+    .s5_ack_i(0), 
+    .s5_err_i(0), 
+    .s5_rty_i(0), 
+    .s5_cab_o(), 
+
+    .s6_dat_i(0), 
+    .s6_dat_o(), 
+    .s6_adr_o(), 
+    .s6_sel_o(), 
+    .s6_we_o(), 
+    .s6_cyc_o(), 
+    .s6_stb_o(), 
+    .s6_ack_i(0), 
+    .s6_err_i(0), 
+    .s6_rty_i(0), 
+    .s6_cab_o(), 
+
+    .s7_dat_i(0), 
+    .s7_dat_o(), 
+    .s7_adr_o(), 
+    .s7_sel_o(), 
+    .s7_we_o(), 
+    .s7_cyc_o(), 
+    .s7_stb_o(), 
+    .s7_ack_i(0), 
+    .s7_err_i(0), 
+    .s7_rty_i(0), 
+    .s7_cab_o() 
+);
+	
+s1_top cpu (
+    .sys_clock_i(wb_clk_i), 
+    .sys_reset_i(wb_rst_i), 
+    .eth_irq_i(eth_irq), 
+    .wbm_ack_i(m0_ack_o), 
+    .wbm_data_i(m0_dat_o), 
+    .wbm_cycle_o(m0_cyc_i), 
+    .wbm_strobe_o(m0_stb_i), 
+    .wbm_we_o(m0_we_i), 
+    .wbm_addr_o(m0_adr_i), 
+    .wbm_data_o(m0_dat_i), 
+    .wbm_sel_o(m0_sel_i)
+    );
+
+wire [7:0] fifo_used;
+
+dram_wb dram_wb_inst (
+    .clk200(sysclk), 
+    .rup(rup),
+    .rdn(rdn),
+    .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    .wb_dat_i(s0_dat_o), 
+    .wb_dat_o(s0_dat_i), 
+    .wb_adr_i(s0_adr_o), 
+    .wb_sel_i(s0_sel_o), 
+    .wb_we_i(s0_we_o), 
+    .wb_cyc_i(s0_cyc_o), 
+    .wb_stb_i(s0_stb_o), 
+    .wb_ack_o(s0_ack_i), 
+    .wb_err_o(s0_err_i), 
+    .wb_rty_o(s0_rty_i), 
+    .wb_cab_i(s0_cab_o), 
+    .ddr3_dq(ddr3_dq), 
+    .ddr3_dqs(ddr3_dqs), 
+    .ddr3_dqs_n(ddr3_dqs_n), 
+    .ddr3_ck(ddr3_ck), 
+    .ddr3_ck_n(ddr3_ck_n), 
+    .ddr3_reset(ddr3_reset),
+    .ddr3_a(ddr3_a), 
+    .ddr3_ba(ddr3_ba), 
+    .ddr3_ras_n(ddr3_ras_n), 
+    .ddr3_cas_n(ddr3_cas_n), 
+    .ddr3_we_n(ddr3_we_n), 
+    .ddr3_cs_n(ddr3_cs_n), 
+    .ddr3_odt(ddr3_odt), 
+    .ddr3_ce(ddr3_ce), 
+    .ddr3_dm(ddr3_dm), 
+    .phy_init_done(phy_init_done), 
+    .dcm_locked(dcm_locked), 
+    .fifo_used(fifo_used),
+    .sysrst(sysrst)
+);
+
+WBFLASH flash (
+    .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    
+    .wb_dat_i(s1_dat_o), 
+    .wb_dat_o(s1_dat_i), 
+    .wb_adr_i(s1_adr_o), 
+    .wb_sel_i(s1_sel_o), 
+    .wb_we_i(s1_we_o), 
+    .wb_cyc_i(s1_cyc_o), 
+    .wb_stb_i(s1_stb_o), 
+    .wb_ack_o(s1_ack_i), 
+    .wb_err_o(s1_err_i), 
+    .wb_rty_o(s1_rty_i), 
+    .wb_cab_i(s1_cab_o), 
+
+    .wb1_dat_i(s4_dat_o), 
+    .wb1_dat_o(s4_dat_i), 
+    .wb1_adr_i(s4_adr_o), 
+    .wb1_sel_i(s4_sel_o), 
+    .wb1_we_i(s4_we_o), 
+    .wb1_cyc_i(s4_cyc_o), 
+    .wb1_stb_i(s4_stb_o), 
+    .wb1_ack_o(s4_ack_i), 
+    .wb1_err_o(s4_err_i), 
+    .wb1_rty_o(s4_rty_i), 
+    .wb1_cab_i(s4_cab_o), 
+
+    .flash_addr(flash_addr), 
+    .flash_data(flash_data), 
+    .flash_oen(flash_oen), 
+    .flash_wen(flash_wen), 
+    .flash_cen(flash_cen),
+    .flash_rev(flash_rev)
+);
+
+uart_top uart16550 (
+    .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    .wb_adr_i({s3_adr_o[4:3],s3_sel_o[3:0]==4'h0 ? 1'b0:1'b1,2'b00}), 
+    .wb_dat_i(s3_sel_o[3:0]==4'h0 ? {s3_dat_o[39:32],s3_dat_o[47:40],s3_dat_o[55:48],s3_dat_o[63:56]}:{s3_dat_o[7:0],s3_dat_o[15:8],s3_dat_o[23:16],s3_dat_o[31:24]}), 
+    .wb_dat_o({s3_dat_i[7:0],s3_dat_i[15:8],s3_dat_i[23:16],s3_dat_i[31:24]}), 
+    .wb_we_i(s3_we_o), 
+    .wb_stb_i(s3_stb_o), 
+    .wb_cyc_i(s3_cyc_o), 
+    .wb_ack_o(s3_ack_i), 
+    .wb_sel_i(s3_sel_o[3:0]==4'h0 ? {s3_sel_o[4],s3_sel_o[5],s3_sel_o[6],s3_sel_o[7]}:{s3_sel_o[0],s3_sel_o[1],s3_sel_o[2],s3_sel_o[3]}), // Big endian 
+    .int_o(int_o), 
+    .stx_pad_o(stx), 
+    .srx_pad_i(srx), 
+    .rts_pad_o(), 
+    .cts_pad_i(1), 
+    .dtr_pad_o(), 
+    .dsr_pad_i(1), 
+    .ri_pad_i(0), 
+    .dcd_pad_i(1),
+	 .baud_o(baud_o)
+);
+
+/*
+// OpenCores 10/100 Ethernet MAC
+eth_top eth_mac (
+    .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    
+    .wb_dat_i(wb_sel_i[7:4]==4'b0 ? {wb_dat_i[7:0],wb_dat_i[15:8],wb_dat_i[23:16],wb_dat_i[31:24]}:{wb_dat_i[39:32],wb_dat_i[47:40],wb_dat_i[55:48],wb_dat_i[63:56]}), 
+    .wb_dat_o(dat_o), 
+    .wb_adr_i(wb_adr_i[31:0]), 
+    .wb_sel_i(wb_sel_i[7:4]==4'b0 ? {wb_sel_i[0],wb_sel_i[1],wb_sel_i[2],wb_sel_i[3]}:{wb_sel_i[4],wb_sel_i[5],wb_sel_i[6],wb_sel_i[7]}), 
+    .wb_we_i(wb_we_i), 
+    .wb_cyc_i(wb_cyc_i), 
+    .wb_stb_i(wb_stb_i), 
+    .wb_ack_o(wb_ack_o), 
+    .wb_err_o(wb_err_o), 
+    .m_wb_adr_o(m_wb_adr_o[31:0]), 
+    .m_wb_sel_o(sel_o), 
+    .m_wb_we_o(m_wb_we_o), 
+    .m_wb_dat_o(mdat_o), 
+    .m_wb_dat_i(m_wb_adr_o[2] ? {m_wb_dat_i[7:0],m_wb_dat_i[15:8],m_wb_dat_i[23:16],m_wb_dat_i[31:24]}:{m_wb_dat_i[39:32],m_wb_dat_i[47:40],m_wb_dat_i[55:48],m_wb_dat_i[63:56]}), 
+    .m_wb_cyc_o(m_wb_cyc_o), 
+    .m_wb_stb_o(m_wb_stb_o), 
+    .m_wb_ack_i(m_wb_ack_i), 
+    .m_wb_err_i(m_wb_err_i), 
+    
+    .mtx_clk_pad_i(mtx_clk), 
+    .mtxd_pad_o(mtxd), 
+    .mtxen_pad_o(mtxen), 
+    .mtxerr_pad_o(mtxerr), 
+    .mrx_clk_pad_i(mrx_clk), 
+    .mrxd_pad_i(mrxd), 
+    .mrxdv_pad_i(mrxdv), 
+    .mrxerr_pad_i(mrxerr), 
+    .mcoll_pad_i(mcoll), 
+    .mcrs_pad_i(mcrs), 
+    .mdc_pad_o(mdc), 
+    .md_pad_i(md_i), 
+    .md_pad_o(md_o), 
+    .md_padoe_o(md_oe), 
+    .int_o(int_eth)
+); */
+
+/*eth_sgmii eth_ctrl (
+   .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    .sysclk(sysclk),
+    
+    .wb_dat_i(s2_dat_o), 
+    .wb_dat_o(s2_dat_i), 
+    .wb_adr_i(s2_adr_o), 
+    .wb_sel_i(s2_sel_o), 
+    .wb_we_i(s2_we_o), 
+    .wb_cyc_i(s2_cyc_o), 
+    .wb_stb_i(s2_stb_o), 
+    .wb_ack_o(s2_ack_i), 
+    .wb_err_o(s2_err_i), 
+
+    .m_wb_adr_o(m1_adr_i), 
+    .m_wb_sel_o(m1_sel_i), 
+    .m_wb_we_o(m1_we_i), 
+    .m_wb_dat_o(m1_dat_i), 
+    .m_wb_dat_i(m1_dat_o), 
+    .m_wb_cyc_o(m1_cyc_i), 
+    .m_wb_stb_o(m1_stb_i), 
+    .m_wb_ack_i(m1_ack_o), 
+    .m_wb_err_i(m1_err_o), 
+    
+    .sgmii_tx(eth_tx),
+    .sgmii_rx(eth_rx),
+    .led_10(led_10),
+    .led_100(led_100),
+    .led_1000(led_1000),
+    .led_an(led_an),
+    .led_disp_err(led_disp_err),
+    .led_char_err(led_char_err),
+    .led_link(led_link),
+    
+    .md(md),
+    .mdc(mdc),
+    
+    .int_eth(eth_int)
+);
+*/
+assign eth_rst=!wb_rst_i; // PHY reset
+	 
+wire sysrst_p;
+assign sysrst_p=!sysrst;
+
+// Standard PLL
+pll pll_inst(
+	.RST_IN(sysrst_p),
+	.CLKIN1_IN(sysclk),
+	.CLKOUT0_OUT(wb_clk_i), //Up to 75 MHz on Stratix IV
+	.LOCKED_OUT(dcm_locked)
+);
+	
+assign wb_rst_i=(!dcm_locked || !phy_init_done);
+	 
+//reg [223:0] ILA_DATA;
+
+/*
+[63:0]    address
+[127:64]  data to core
+[191:128] data from core
+[199:192] sel
+[200]     cyc
+[201]     stb
+[202]     we
+[203]     ack
+*/
+
+// SignalTap II
+/*ST ila(
+	.acq_clk(wb_clk_i),
+	.acq_data_in(ILA_DATA),
+	.acq_trigger_in(ILA_DATA),
+	.storage_enable(ILA_DATA[203]) // wb_ack
+);*/
+
+// InSystem Sources
+/*VIO vio_inst(
+	.probe(0),
+	.source_clk(wb_clk_i),
+	.source(VIO_SIG)
+);*/
+
+/*always @(posedge wb_clk_i or posedge wb_rst_i)
+   if(wb_rst_i)
+	   cycle_count<=0;
+	else
+	   cycle_count<=cycle_count+1;
+
+always @( * )
+   begin
+      case(VIO_SIG)
+         2'b00:
+            begin
+               ILA_DATA[63:0]<=m0_adr_i;
+               ILA_DATA[127:64]<=m0_dat_o;
+               ILA_DATA[191:128]<=m0_dat_i;
+               ILA_DATA[199:192]<=m0_sel_i;
+               ILA_DATA[200]<=m0_cyc_i;
+               ILA_DATA[201]<=m0_stb_i;
+               ILA_DATA[202]<=m0_we_i;
+               ILA_DATA[203]<=m0_ack_o;
+            end
+         2'b01:
+            begin
+               ILA_DATA[63:0]<=m1_adr_i;
+               ILA_DATA[127:64]<=m1_dat_o;
+               ILA_DATA[191:128]<=m1_dat_i;
+               ILA_DATA[199:192]<=m1_sel_i;
+               ILA_DATA[200]<=m1_cyc_i;
+               ILA_DATA[201]<=m1_stb_i;
+               ILA_DATA[202]<=m1_we_i;
+               ILA_DATA[203]<=m1_ack_o;
+            end
+         2'b10:
+            begin
+               ILA_DATA[63:0]<=s2_adr_o;
+               ILA_DATA[127:64]<=s2_dat_o;
+               ILA_DATA[191:128]<=s2_dat_i;
+               ILA_DATA[199:192]<=s2_sel_o;
+               ILA_DATA[200]<=s2_cyc_o;
+               ILA_DATA[201]<=s2_stb_o;
+               ILA_DATA[202]<=s2_we_o;
+               ILA_DATA[203]<=s2_ack_i;
+            end
+         2'b11:
+            begin
+               ILA_DATA[63:0]<=s4_adr_o;
+               ILA_DATA[127:64]<=s4_dat_o;
+               ILA_DATA[191:128]<=s4_dat_i;
+               ILA_DATA[199:192]<=s4_sel_o;
+               ILA_DATA[200]<=s4_cyc_o;
+               ILA_DATA[201]<=s4_stb_o;
+               ILA_DATA[202]<=s4_we_o;
+               ILA_DATA[203]<=s4_ack_i;
+            end
+      endcase
+      ILA_DATA[204]<=stx;
+      ILA_DATA[205]<=srx;
+      ILA_DATA[206]<=baud_o;
+      //ILA_DATA[220:207]<=cycle_count[31:18];
+      ILA_DATA[220:213]<=fifo_used;
+      ILA_DATA[212:207]<=cycle_count[31:26];
+      ILA_DATA[221]<=dcm_locked;
+      ILA_DATA[222]<=wb_rst_i;
+      ILA_DATA[223]<=phy_init_done;
+   end
+*/
+endmodule
Index: /trunk/ml50x_U1_fpga.ucf
===================================================================
--- /trunk/ml50x_U1_fpga.ucf	(revision 6)
+++ /trunk/ml50x_U1_fpga.ucf	(revision 6)
@@ -0,0 +1,540 @@
+NET  AUDIO_BIT_CLK        LOC="AF18";  # Bank 4, Vcco=3.3V, No DCI    
+NET  AUDIO_SDATA_IN       LOC="AE18";  # Bank 4, Vcco=3.3V, No DCI    
+NET  AUDIO_SDATA_OUT      LOC="AG16";  # Bank 4, Vcco=3.3V, No DCI    
+NET  AUDIO_SYNC           LOC="AF19";  # Bank 4, Vcco=3.3V, No DCI    
+NET  BUS_ERROR_1          LOC="F6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  BUS_ERROR_2          LOC="T10";   # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  CFG_ADDR_OUT0        LOC="AE12";  # Bank 2, Vcco=3.3V      
+NET  CFG_ADDR_OUT1        LOC="AE13";  # Bank 2, Vcco=3.3V      
+NET  CLK_27MHZ_FPGA       LOC="AG18";  # Bank 4, Vcco=3.3V, No DCI      
+NET  CLK_33MHZ_FPGA       LOC="AH17";  # Bank 4, Vcco=3.3V, No DCI      
+NET  CLK_FPGA_N           LOC="K19";   # Bank 3, Vcco=2.5V, No DCI      
+NET  CLK_FPGA_P           LOC="L19";   # Bank 3, Vcco=2.5V, No DCI      
+NET  CLKBUF_Q0_N          LOC="H3";    # Bank 116, MGTREFCLKN_116, GTP_DUAL_X0Y4
+NET  CLKBUF_Q0_P          LOC="H4";    # Bank 116, MGTREFCLKP_116, GTP_DUAL_X0Y4
+NET  CLKBUF_Q1_N          LOC="J19";   # Bank 3, Vcco=2.5V, No DCI      
+NET  CLKBUF_Q1_P          LOC="K18";   # Bank 3, Vcco=2.5V, No DCI      
+NET  CPLD_IO_1            LOC="W10";   # Bank 18, Vcco=3.3V, No DCI      
+NET  CPU_TCK              LOC="E6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  CPU_TDO              LOC="E7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  CPU_TMS              LOC="U10";   # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  CPU_TRST             LOC="V10";   # Bank 18, Vcco=3.3V, No DCI      
+NET  DDR2_A0              LOC="L30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A1              LOC="M30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A2              LOC="N29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A3              LOC="P29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A4              LOC="K31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A5              LOC="L31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A6              LOC="P31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A7              LOC="P30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A8              LOC="M31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A9              LOC="R28";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_A10             LOC="J31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A11             LOC="R29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A12             LOC="T31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_A13             LOC="H29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  DDR2_BA0             LOC="G31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_BA1             LOC="J30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_BA2             LOC="R31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CAS_B           LOC="E31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CKE0            LOC="T28";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CKE1            LOC="U30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CLK0_N          LOC="AJ29";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CLK0_P          LOC="AK29";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CLK1_N          LOC="F28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CLK1_P          LOC="E28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CS0_B           LOC="L29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_CS1_B           LOC="J29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D0              LOC="AF30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D1              LOC="AK31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D2              LOC="AF31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D3              LOC="AD30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D4              LOC="AJ30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D5              LOC="AF29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D6              LOC="AD29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D7              LOC="AE29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D8              LOC="AH27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D9              LOC="AF28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D10             LOC="AH28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D11             LOC="AA28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D12             LOC="AG25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D13             LOC="AJ26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D14             LOC="AG28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D15             LOC="AB28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D16             LOC="AC28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D17             LOC="AB25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D18             LOC="AC27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D19             LOC="AA26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D20             LOC="AB26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D21             LOC="AA24";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D22             LOC="AB27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D23             LOC="AA25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D24             LOC="AC29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D25             LOC="AB30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D26             LOC="W31";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D27             LOC="V30";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D28             LOC="AC30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D29             LOC="W29";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D30             LOC="V27";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D31             LOC="W27";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D32             LOC="V29";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D33             LOC="Y27";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D34             LOC="Y26";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D35             LOC="W24";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D36             LOC="V28";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D37             LOC="W25";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D38             LOC="W26";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D39             LOC="V24";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D40             LOC="R24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D41             LOC="P25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D42             LOC="N24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D43             LOC="P26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D44             LOC="T24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D45             LOC="N25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D46             LOC="P27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D47             LOC="N28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D48             LOC="M28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D49             LOC="L28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D50             LOC="F25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D51             LOC="H25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D52             LOC="K27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D53             LOC="K28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D54             LOC="H24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D55             LOC="G26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D56             LOC="G25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D57             LOC="M26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D58             LOC="J24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D59             LOC="L26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D60             LOC="J27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D61             LOC="M25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D62             LOC="L25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_D63             LOC="L24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM0             LOC="AJ31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM1             LOC="AE28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM2             LOC="Y24";   # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM3             LOC="Y31";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM4             LOC="V25";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM5             LOC="P24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM6             LOC="F26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DM7             LOC="J25";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS0_N          LOC="AA30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS0_P          LOC="AA29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS1_N          LOC="AK27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS1_P          LOC="AK28";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS2_N          LOC="AJ27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS2_P          LOC="AK26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS3_N          LOC="AA31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS3_P          LOC="AB31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS4_N          LOC="Y29";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS4_P          LOC="Y28";   # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS5_N          LOC="E27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS5_P          LOC="E26";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS6_N          LOC="G28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS6_P          LOC="H28";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS7_N          LOC="H27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_DQS7_P          LOC="G27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_ODT0            LOC="F31";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_ODT1            LOC="F30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_RAS_B           LOC="H30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_SCL             LOC="E29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_SDA             LOC="F29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DDR2_WE_B            LOC="K29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DVI_D0               LOC="AB8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D1               LOC="AC8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D2               LOC="AN12";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D3               LOC="AP12";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D4               LOC="AA9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D5               LOC="AA8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D6               LOC="AM13";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D7               LOC="AN13";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D8               LOC="AA10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D9               LOC="AB10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D10              LOC="AP14";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_D11              LOC="AN14";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_DE               LOC="AE8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_GPIO1            LOC="N30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  DVI_H                LOC="AM12";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_RESET_B          LOC="AK6";   # Bank 18, Vcco=3.3V, No DCI
+NET  DVI_V                LOC="AM11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_XCLK_N           LOC="AL10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  DVI_XCLK_P           LOC="AL11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FAN_ALERT_B          LOC="T30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FLASH_ADV_B          LOC="F13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FLASH_AUDIO_RESET_B  LOC="AG17";  # Bank 4, Vcco=3.3V, No DCI
+NET  FLASH_CE_B           LOC="AE14";  # Bank 2, Vcco=3.3V
+NET  FLASH_CLK            LOC="N9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FLASH_OE_B           LOC="AF14";  # Bank 2, Vcco=3.3V
+NET  FLASH_WAIT           LOC="G13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_AVDD            LOC="T18";   # Bank 0, Vcco=3.3V
+NET  FPGA_CCLK-R          LOC="N15";   # Bank 0, Vcco=3.3V
+NET  FPGA_CPU_RESET_B     LOC="E9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_CS_B            LOC="N22";   # Bank 0, Vcco=3.3V
+NET  FPGA_CS0_B           LOC="AF21";  # Bank 2, Vcco=3.3V
+NET  FPGA_DIFF_CLK_OUT_N  LOC="J21";   # Bank 3, Vcco=2.5V, No DCI
+NET  FPGA_DIFF_CLK_OUT_P  LOC="J20";   # Bank 3, Vcco=2.5V, No DCI
+NET  FPGA_DIN             LOC="P15";   # Bank 0, Vcco=3.3V
+NET  FPGA_DONE            LOC="M15";   # Bank 0, Vcco=3.3V
+NET  FPGA_DOUT_BUSY       LOC="AD15";  # Bank 0, Vcco=3.3V
+NET  FPGA_DX_N            LOC="W17";   # Bank 0, Vcco=3.3V
+NET  FPGA_DX_P            LOC="W18";   # Bank 0, Vcco=3.3V
+NET  FPGA_EXP_TCK         LOC="AB15";  # Bank 0, Vcco=3.3V
+NET  FPGA_EXP_TMS         LOC="AC14";  # Bank 0, Vcco=3.3V
+NET  FPGA_HSWAPEN         LOC="M23";   # Bank 0, Vcco=3.3V
+NET  FPGA_INIT_B          LOC="N14";   # Bank 0, Vcco=3.3V
+NET  FPGA_M0              LOC="AD21";  # Bank 0, Vcco=3.3V
+NET  FPGA_M1              LOC="AC22";  # Bank 0, Vcco=3.3V
+NET  FPGA_M2              LOC="AD22";  # Bank 0, Vcco=3.3V
+NET  FPGA_PROG_B          LOC="M22";   # Bank 0, Vcco=3.3V
+NET  FPGA_RDWR_B          LOC="N23";   # Bank 0, Vcco=3.3V
+NET  FPGA_ROTARY_INCA     LOC="AH30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_ROTARY_INCB     LOC="AG30";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_ROTARY_PUSH     LOC="AH29";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_SERIAL1_RX      LOC="AG15";  # Bank 4, Vcco=3.3V, No DCI
+NET  FPGA_SERIAL1_TX      LOC="AG20";  # Bank 4, Vcco=3.3V, No DCI
+NET  FPGA_SERIAL2_RX      LOC="G10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_SERIAL2_TX      LOC="F10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_TDI             LOC="AC15";  # Bank 0, Vcco=3.3V
+NET  FPGA_TDO             LOC="AD14";  # Bank 0, Vcco=3.3V
+NET  FPGA_V_N             LOC="V17";   # Bank 0, Vcco=3.3V (SYSMON External Input: VN) J9-10
+NET  FPGA_V_P             LOC="U18";   # Bank 0, Vcco=3.3V (SYSMON External Input: VP) J9-9 
+NET  FPGA_VBATT           LOC="L23";   # Bank 0, Vcco=3.3V
+NET  FPGA_VREFP           LOC="V18";   # Bank 0, Vcco=3.3V
+NET  FPGA_VRN_B11         LOC="N33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  FPGA_VRN_B13         LOC="AG33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  FPGA_VRN_B17         LOC="AD31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRN_B19         LOC="N27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRN_B20         LOC="L10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_VRN_B21         LOC="AJ25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRN_B22         LOC="AF8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_VRP_B11         LOC="M33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  FPGA_VRP_B13         LOC="AH33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  FPGA_VRP_B17         LOC="AE31";  # Bank 17, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRP_B19         LOC="M27";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRP_B20         LOC="L11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  FPGA_VRP_B21         LOC="AH25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  FPGA_VRP_B22         LOC="AE9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW1         LOC="U25";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW2         LOC="AG27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW3         LOC="AF25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW4         LOC="AF26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW5         LOC="AE27";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW6         LOC="AE26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW7         LOC="AC25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_DIP_SW8         LOC="AC24";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_0           LOC="H18";   # Bank 3, Vcco=2.5V, No DCI
+NET  GPIO_LED_1           LOC="L18";   # Bank 3, Vcco=2.5V, No DCI
+NET  GPIO_LED_2           LOC="G15";   # Bank 3, Vcco=2.5V, No DCI
+NET  GPIO_LED_3           LOC="AD26";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_4           LOC="G16";   # Bank 3, Vcco=2.5V, No DCI
+NET  GPIO_LED_5           LOC="AD25";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_6           LOC="AD24";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_7           LOC="AE24";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_C           LOC="E8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  GPIO_LED_E           LOC="AG23";  # Bank 2, Vcco=3.3V
+NET  GPIO_LED_N           LOC="AF13";  # Bank 2, Vcco=3.3V
+NET  GPIO_LED_S           LOC="AG12";  # Bank 2, Vcco=3.3V
+NET  GPIO_LED_W           LOC="AF23";  # Bank 2, Vcco=3.3V
+NET  GPIO_SW_C            LOC="AJ6";   # Bank 18, Vcco=3.3V, No DCI
+NET  GPIO_SW_E            LOC="AK7";   # Bank 18, Vcco=3.3V, No DCI
+NET  GPIO_SW_N            LOC="U8";    # Bank 18, Vcco=3.3V, No DCI
+NET  GPIO_SW_S            LOC="V8";    # Bank 18, Vcco=3.3V, No DCI
+NET  GPIO_SW_W            LOC="AJ7";   # Bank 18, Vcco=3.3V, No DCI
+NET  HDR1_2               LOC="H33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_4               LOC="F34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_6               LOC="H34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_8               LOC="G33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_10              LOC="G32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_12              LOC="H32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_14              LOC="J32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_16              LOC="J34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_18              LOC ="L33";  # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_20              LOC="M32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_22              LOC="P34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_24              LOC="N34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_26              LOC="AA34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[5]) J6-26
+NET  HDR1_28              LOC="AD32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_30              LOC="Y34";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[5]) J6-30
+NET  HDR1_32              LOC="Y32";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_34              LOC="W32";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_36              LOC="AH34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_38              LOC="AE32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_40              LOC="AG32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_42              LOC="AH32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_44              LOC="AK34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_46              LOC="AK33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_48              LOC="AJ32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_50              LOC="AK32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_52              LOC="AL34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_54              LOC="AL33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_56              LOC="AM33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_58              LOC="AJ34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_60              LOC="AM32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_62              LOC="AN34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR1_64              LOC="AN33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR2_2_SM_8_N        LOC="K34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[15]) J4-2
+NET  HDR2_4_SM_8_P        LOC="L34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[15]) J4-4
+NET  HDR2_6_SM_7_N        LOC="K32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[14]) J4-6
+NET  HDR2_8_SM_7_P        LOC="K33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[14]) J4-8
+NET  HDR2_10_DIFF_0_N     LOC="N32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[13]) J4-10
+NET  HDR2_12_DIFF_0_P     LOC="P32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[13]) J4-12
+NET  HDR2_14_DIFF_1_N     LOC="R34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[12]) J4-14
+NET  HDR2_16_DIFF_1_P     LOC="T33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[12]) J4-16
+NET  HDR2_18_DIFF_2_N     LOC="R32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[11]) J4-18
+NET  HDR2_20_DIFF_2_P     LOC="R33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[11]) J4-20
+NET  HDR2_22_SM_10_N      LOC="T34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[10]) J4-22
+NET  HDR2_24_SM_10_P      LOC="U33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[10]) J4-24
+NET  HDR2_26_SM_11_N      LOC="U31";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[9]) J4-26
+NET  HDR2_28_SM_11_P      LOC="U32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[9]) J4-28
+NET  HDR2_30_DIFF_3_N     LOC="V33";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[8]) J4-30
+NET  HDR2_32_DIFF_3_P     LOC="V32";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[8]) J4-32
+NET  HDR2_34_SM_15_N      LOC="V34";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[7]) J4-34
+NET  HDR2_36_SM_15_P      LOC="W34";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[7]) J4-36
+NET  HDR2_38_SM_6_N       LOC="AA33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[6]) J4-38
+NET  HDR2_40_SM_6_P       LOC="Y33";   # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[6]) J4-40
+NET  HDR2_42_SM_14_N      LOC="AE34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[0]) J4-42
+NET  HDR2_44_SM_14_P      LOC="AF34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[0]) J4-44
+NET  HDR2_46_SM_12_N      LOC="AE33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[1]) J4-46
+NET  HDR2_48_SM_12_P      LOC="AF33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[1]) J4-48
+NET  HDR2_50_SM_5_N       LOC="AD34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[4]) J4-50
+NET  HDR2_52_SM_5_P       LOC="AC34";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[4]) J4-52
+NET  HDR2_54_SM_13_N      LOC="AB32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[3]) J4-54
+NET  HDR2_56_SM_13_P      LOC="AC32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[3]) J4-56
+NET  HDR2_58_SM_4_N       LOC="AB33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXN[2]) J4-58
+NET  HDR2_60_SM_4_P       LOC="AC33";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20 (SYSMON External Input: VAUXP[2]) J4-60
+NET  HDR2_62_SM_9_N       LOC="AP32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  HDR2_64_SM_9_P       LOC="AN32";  # Bank 13, Vcco=2.5V or 3.3V user selectable by J20
+NET  IIC_SCL_MAIN         LOC="F9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  IIC_SCL_SFP          LOC="R26";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  IIC_SCL_VIDEO        LOC="U27";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  IIC_SDA_MAIN         LOC="F8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  IIC_SDA_SFP          LOC="U28";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  IIC_SDA_VIDEO        LOC="T29";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  KEYBOARD_CLK         LOC="T26";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  KEYBOARD_DATA        LOC="T25";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors
+NET  LCD_FPGA_DB4         LOC="T9";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  LCD_FPGA_DB5         LOC="G7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  LCD_FPGA_DB6         LOC="G6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  LCD_FPGA_DB7         LOC="T11";   # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  LCD_FPGA_E           LOC="AC9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  LCD_FPGA_RS          LOC="J17";   # Bank 3, Vcco=2.5V, No DCI      
+NET  LCD_FPGA_RW          LOC="AC10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  LOOPBK_114_N         LOC="AG1";   # Bank 118, MGTRXN1_118, GTP_DUAL_X0Y1
+NET  LOOPBK_114_N         LOC="AH2";   # Bank 118, MGTTXN1_118, GTP_DUAL_X0Y1
+NET  LOOPBK_114_P         LOC="AH1";   # Bank 118, MGTRXP1_118, GTP_DUAL_X0Y1
+NET  LOOPBK_114_P         LOC="AJ2";   # Bank 118, MGTTXP1_118, GTP_DUAL_X0Y1
+NET  LOOPBK_116_N         LOC="R1";    # Bank 112, MGTRXN1_112, GTP_DUAL_X0Y3
+NET  LOOPBK_116_N         LOC="T2";    # Bank 112, MGTTXN1_112, GTP_DUAL_X0Y3
+NET  LOOPBK_116_P         LOC="T1";    # Bank 112, MGTRXP1_112, GTP_DUAL_X0Y3
+NET  LOOPBK_116_P         LOC="U2";    # Bank 112, MGTTXP1_112, GTP_DUAL_X0Y3
+NET  MOUSE_CLK            LOC="R27";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  MOUSE_DATA           LOC="U26";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  PC4_HALT_B           LOC="W9";    # Bank 18, Vcco=3.3V, No DCI      
+NET  PCIE_CLK_QO_N        LOC="AF3";   # Bank 118, MGTREFCLKN_118, GTP_DUAL_X0Y1
+NET  PCIE_CLK_QO_P        LOC="AF4";   # Bank 118, MGTREFCLKP_118, GTP_DUAL_X0Y1
+NET  PCIE_PRSNT_B_FPGA    LOC="AF24";  # Bank 21, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  PCIE_RX_N            LOC="AF1";   # Bank 118, MGTRXN0_118, GTP_DUAL_X0Y1
+NET  PCIE_RX_P            LOC="AE1";   # Bank 118, MGTRXP0_118, GTP_DUAL_X0Y1
+NET  PCIE_TX_N            LOC="AE2";   # Bank 118, MGTTXN0_118, GTP_DUAL_X0Y1
+NET  PCIE_TX_P            LOC="AD2";   # Bank 118, MGTTXP0_118, GTP_DUAL_X0Y1
+NET  PHY_COL              LOC="B32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_CRS              LOC="E34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_INT              LOC="H20";   # Bank 3, Vcco=2.5V, No DCI      
+NET  PHY_MDC              LOC="H19";   # Bank 3, Vcco=2.5V, No DCI      
+NET  PHY_MDIO             LOC="H13";   # Bank 3, Vcco=2.5V, No DCI      
+NET  PHY_RESET            LOC="J14";   # Bank 3, Vcco=2.5V, No DCI      
+NET  PHY_RXCLK            LOC="H17";   # Bank 3, Vcco=2.5V, No DCI      
+NET  PHY_RXCTL_RXDV       LOC="E32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_RXD0             LOC="A33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_RXD1             LOC="B33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_RXD2             LOC="C33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20      
+NET  PHY_RXD3             LOC="C32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_RXD4             LOC="D32";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_RXD5             LOC="C34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_RXD6             LOC="D34";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_RXD7             LOC="F33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_RXER             LOC="E33";   # Bank 11, Vcco=2.5V or 3.3V user selectable by J20
+NET  PHY_TXC_GTXCLK       LOC="J16";   # Bank 3, Vcco=2.5V, No DCI
+NET  PHY_TXCLK            LOC="K17";   # Bank 3, Vcco=2.5V, No DCI
+NET  PHY_TXCTL_TXEN       LOC="AJ10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD0             LOC="AF11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD1             LOC="AE11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD2             LOC="AH9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD3             LOC="AH10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD4             LOC="AG8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD5             LOC="AH8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD6             LOC="AG10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  PHY_TXD7             LOC="AG11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  PHY_TXER             LOC="AJ9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  PIEZO_SPEAKER        LOC="G30";   # Bank 15, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  RESERVED1            LOC="AB23";  # Bank 0, Vcco=3.3V      
+NET  RESERVED2            LOC="AC23";  # Bank 0, Vcco=3.3V      
+NET  RREF                 LOC="V4";    # Bank 112, MGTRREF_112, GTP_DUAL_X0Y3
+NET  SATA1_RX_N           LOC="Y1";    # Bank 114, MGTRXN0_114, GTP_DUAL_X0Y2
+NET  SATA1_RX_P           LOC="W1";    # Bank 114, MGTRXP0_114, GTP_DUAL_X0Y2
+NET  SATA1_TX_N           LOC="W2";    # Bank 114, MGTTXN0_114, GTP_DUAL_X0Y2
+NET  SATA1_TX_P           LOC="V2";    # Bank 114, MGTTXP0_114, GTP_DUAL_X0Y2
+NET  SATA2_RX_N           LOC="AA1";   # Bank 114, MGTRXN1_114, GTP_DUAL_X0Y2
+NET  SATA2_RX_P           LOC="AB1";   # Bank 114, MGTRXP1_114, GTP_DUAL_X0Y2
+NET  SATA2_TX_N           LOC="AB2";   # Bank 114, MGTTXN1_114, GTP_DUAL_X0Y2
+NET  SATA2_TX_P           LOC="AC2";   # Bank 114, MGTTXP1_114, GTP_DUAL_X0Y2
+NET  SATACLK_QO_N         LOC="Y3";    # Bank 114, MGTREFCLKN_114, GTP_DUAL_X0Y2
+NET  SATACLK_QO_P         LOC="Y4";    # Bank 114, MGTREFCLKP_114, GTP_DUAL_X0Y2
+NET  SFP_RX_N             LOC="H1";    # Bank 116, MGTRXN0_116, GTP_DUAL_X0Y4
+NET  SFP_RX_P             LOC="G1";    # Bank 116, MGTRXP0_116, GTP_DUAL_X0Y4
+NET  SFP_TX_DISABLE_FPGA  LOC="K24";   # Bank 19, Vcco=1.8V, DCI using 49.9 ohm resistors      
+NET  SFP_TX_N             LOC="G2";    # Bank 116, MGTTXN0_116, GTP_DUAL_X0Y4
+NET  SFP_TX_P             LOC="F2";    # Bank 116, MGTTXP0_116, GTP_DUAL_X0Y4
+NET  SGMII_RX_N           LOC="P1";    # Bank 112, MGTRXN0_112, GTP_DUAL_X0Y3
+NET  SGMII_RX_P           LOC="N1";    # Bank 112, MGTRXP0_112, GTP_DUAL_X0Y3
+NET  SGMII_TX_N           LOC="N2";    # Bank 112, MGTTXN0_112, GTP_DUAL_X0Y3
+NET  SGMII_TX_P           LOC="M2";    # Bank 112, MGTTXP0_112, GTP_DUAL_X0Y3
+NET  SGMIICLK_QO_N        LOC="P3";    # Bank 112, MGTREFCLKN_112, GTP_DUAL_X0Y3
+NET  SGMIICLK_QO_P        LOC="P4";    # Bank 112, MGTREFCLKP_112, GTP_DUAL_X0Y3
+NET  SMA_DIFF_CLK_IN_N    LOC="H15";   # Bank 3, Vcco=2.5V, No DCI      
+NET  SMA_DIFF_CLK_IN_P    LOC="H14";   # Bank 3, Vcco=2.5V, No DCI      
+NET  SMA_RX_N             LOC="J1";    # Bank 116, MGTRXN1_116, GTP_DUAL_X0Y4
+NET  SMA_RX_P             LOC="K1";    # Bank 116, MGTRXP1_116, GTP_DUAL_X0Y4
+NET  SMA_TX_N             LOC="K2";    # Bank 116, MGTTXN1_116, GTP_DUAL_X0Y4
+NET  SMA_TX_P             LOC="L2";    # Bank 116, MGTTXP1_116, GTP_DUAL_X0Y4
+NET  SPI_CE_B             LOC="V9";    # Bank 18, Vcco=3.3V, No DCI      
+NET  SRAM_ADV_LD_B        LOC="H8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_BW0             LOC="D10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_BW1             LOC="D11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_BW2             LOC="J11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_BW3             LOC="K11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_CLK             LOC="AG21";  # Bank 4, Vcco=3.3V, No DCI      
+NET  SRAM_CLK             LOC="G8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_CS_B            LOC="J10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D16             LOC="N10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D17             LOC="E13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D18             LOC="E12";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D19             LOC="L9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D20             LOC="M10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors      
+NET  SRAM_D21             LOC="E11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D22             LOC="F11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D23             LOC="L8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D24             LOC="M8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D25             LOC="G12";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D26             LOC="G11";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D27             LOC="C13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D28             LOC="B13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D29             LOC="K9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D30             LOC="K8";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_D31             LOC="J9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_DQP0            LOC="D12";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_DQP1            LOC="C12";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_DQP2            LOC="H10";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_DQP3            LOC="H9";    # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_FLASH_A0        LOC="K12";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A1        LOC="K13";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A2        LOC="H23";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A3        LOC="G23";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A4        LOC="H12";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A5        LOC="J12";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A6        LOC="K22";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A7        LOC="K23";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A8        LOC="K14";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A9        LOC="L14";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A10       LOC="H22";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A11       LOC="G22";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A12       LOC="J15";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A13       LOC="K16";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A14       LOC="K21";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A15       LOC="J22";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A16       LOC="L16";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A17       LOC="L15";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A18       LOC="L20";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A19       LOC="L21";   # Bank 1, Vcco=3.3V
+NET  SRAM_FLASH_A20       LOC="AE23";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_A21       LOC="AE22";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D0        LOC="AD19";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D1        LOC="AE19";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D2        LOC="AE17";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D3        LOC="AF16";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D4        LOC="AD20";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D5        LOC="AE21";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D6        LOC="AE16";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D7        LOC="AF15";  # Bank 2, Vcco=3.3V
+NET  SRAM_FLASH_D8        LOC="AH13";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D9        LOC="AH14";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D10       LOC="AH19";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D11       LOC="AH20";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D12       LOC="AG13";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D13       LOC="AH12";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D14       LOC="AH22";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_D15       LOC="AG22";  # Bank 4, Vcco=3.3V, No DCI
+NET  SRAM_FLASH_WE_B      LOC="AF20";  # Bank 2, Vcco=3.3V
+NET  SRAM_MODE            LOC="A13";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SRAM_OE_B            LOC="B12";   # Bank 20, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA00         LOC="G5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA01_USB_A0  LOC="N7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA02_USB_A1  LOC="N5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA03         LOC="P5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA04         LOC="R6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA05         LOC="M6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPA06         LOC="L6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPBRDY        LOC="H5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPCE          LOC="M5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPIRQ         LOC="M7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPOE_USB_RD_B LOC="N8";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_MPWE_USB_WR_B LOC="R9";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D0        LOC="P9";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D1        LOC="T8";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D2        LOC="J7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D3        LOC="H7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D4        LOC="R7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D5        LOC="U7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D6        LOC="P7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D7        LOC="P6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D8        LOC="R8";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D9        LOC="L5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D10       LOC="L4";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D11       LOC="K6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D12       LOC="J5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D13       LOC="T6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D14       LOC="K7";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  SYSACE_USB_D15       LOC="J6";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_CLK              LOC="AD9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS1E             LOC="AK9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS1O             LOC="AF10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS2E             LOC="AK8";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS2O             LOC="AF9";   # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS3              LOC="AJ11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS4              LOC="AK11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS5              LOC="AD11";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  TRC_TS6              LOC="AD10";  # Bank 22, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  USB_CS_B             LOC="P10";   # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  USB_INT              LOC="F5";    # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  USB_RESET_B          LOC="R11";   # Bank 12, Vcco=3.3V, DCI using 49.9 ohm resistors
+NET  USER_CLK             LOC="AH15";  # Bank 4, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE0         LOC="AC4";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE1         LOC="AC5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE2         LOC="AB6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE3         LOC="AB7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE4         LOC="AA5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE5         LOC="AB5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE6         LOC="AC7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_BLUE7         LOC="AD7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_CLAMP         LOC="AH7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_COAST         LOC="AG7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_DATA_CLK      LOC="AH18";  # Bank 4, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN0        LOC="Y8";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN1        LOC="Y9";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN2        LOC="AD4";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN3        LOC="AD5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN4        LOC="AA6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN5        LOC="Y7";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN6        LOC="AD6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_GREEN7        LOC="AE6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_HSOUT         LOC="AE7";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_ODD_EVEN_B    LOC="W6";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED0          LOC="AG5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED1          LOC="AF5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED2          LOC="W7";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED3          LOC="V7";    # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED4          LOC="AH5";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED5          LOC="AG6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED6          LOC="Y11";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_RED7          LOC="W11";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_SOGOUT        LOC="AF6";   # Bank 18, Vcco=3.3V, No DCI
+NET  VGA_IN_VSOUT         LOC="Y6";    # Bank 18, Vcco=3.3V, No DCI
Index: /trunk/synplicity/proj_1.prj
===================================================================
--- /trunk/synplicity/proj_1.prj	(revision 6)
+++ /trunk/synplicity/proj_1.prj	(revision 6)
@@ -0,0 +1,337 @@
+#-- Synopsys, Inc.
+#-- Version E-2010.09-SP3
+#-- Project file /home/sal/Desktop/sparc64soc/synplicity/proj_1.prj
+
+#project files
+add_file -verilog "../trunk/Top/W1.v"
+add_file -verilog "../trunk/OC-UART/raminfr.v"
+add_file -verilog "../trunk/OC-UART/timescale.v"
+add_file -verilog "../trunk/OC-UART/uart_debug_if.v"
+add_file -verilog "../trunk/OC-UART/uart_defines.v"
+add_file -verilog "../trunk/OC-UART/uart_receiver.v"
+add_file -verilog "../trunk/OC-UART/uart_regs.v"
+add_file -verilog "../trunk/OC-UART/uart_rfifo.v"
+add_file -verilog "../trunk/OC-UART/uart_sync_flops.v"
+add_file -verilog "../trunk/OC-UART/uart_tfifo.v"
+add_file -verilog "../trunk/OC-UART/uart_top.v"
+add_file -verilog "../trunk/OC-UART/uart_transmitter.v"
+add_file -verilog "../trunk/OC-UART/uart_wb.v"
+add_file -verilog "../trunk/NOR-flash/WBFLASH.v"
+add_file -verilog "../trunk/os2wb/l1ddir.v"
+add_file -verilog "../trunk/os2wb/l1dir.v"
+add_file -verilog "../trunk/os2wb/l1idir.v"
+add_file -verilog "../trunk/os2wb/os2wb.v"
+add_file -verilog "../trunk/os2wb/os2wb_dual.v"
+add_file -verilog "../trunk/os2wb/rst_ctrl.v"
+add_file -verilog "../trunk/os2wb/s1_top.v"
+add_file -verilog "../trunk/T1-common/common/cluster_header.v"
+add_file -verilog "../trunk/T1-common/common/cluster_header_ctu.v"
+add_file -verilog "../trunk/T1-common/common/cluster_header_dup.v"
+add_file -verilog "../trunk/T1-common/common/cluster_header_sync.v"
+add_file -verilog "../trunk/T1-common/common/cmp_sram_redhdr.v"
+add_file -verilog "../trunk/T1-common/common/dbl_buf.v"
+add_file -verilog "../trunk/T1-common/common/swrvr_clib.v"
+add_file -verilog "../trunk/T1-common/common/swrvr_dlib.v"
+add_file -verilog "../trunk/T1-common/common/sync_pulse_synchronizer.v"
+add_file -verilog "../trunk/T1-common/common/synchronizer_asr.v"
+add_file -verilog "../trunk/T1-common/common/synchronizer_asr_dup.v"
+add_file -verilog "../trunk/T1-common/common/test_stub_bist.v"
+add_file -verilog "../trunk/T1-common/common/test_stub_scan.v"
+add_file -verilog "../trunk/T1-common/common/ucb_bus_in.v"
+add_file -verilog "../trunk/T1-common/common/ucb_bus_out.v"
+add_file -verilog "../trunk/T1-common/common/ucb_flow_2buf.v"
+add_file -verilog "../trunk/T1-common/common/ucb_flow_jbi.v"
+add_file -verilog "../trunk/T1-common/common/ucb_flow_spi.v"
+add_file -verilog "../trunk/T1-common/common/ucb_noflow.v"
+add_file -verilog "../trunk/T1-common/m1/m1.V"
+add_file -verilog "../trunk/T1-common/srams/bw_r_cm16x40.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_cm16x40b.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_dcd.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_dcm.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_efa.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_frf.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_icd.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_idct.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_irf.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_irf_fpga1.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_irf_register.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_l2d.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_l2d_32k.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_l2d_rep_bot.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_l2d_rep_top.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_l2t.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf16x128d.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf16x160.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf16x32.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf32x108.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf32x152b.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_rf32x80.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_scm.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_tlb.v"
+add_file -verilog "../trunk/T1-common/srams/bw_r_tlb_fpga.v"
+add_file -verilog "../trunk/T1-common/srams/bw_rf_16x65.v"
+add_file -verilog "../trunk/T1-common/srams/bw_rf_16x81.v"
+add_file -verilog "../trunk/T1-common/srams/regfile_1w_4r.v"
+add_file -verilog "../trunk/T1-common/u1/u1.V"
+add_file -verilog "../trunk/T1-FPU/bw_clk_cl_fpu_cmp.v"
+add_file -verilog "../trunk/T1-FPU/fpu.v"
+add_file -verilog "../trunk/T1-FPU/fpu_add.v"
+add_file -verilog "../trunk/T1-FPU/fpu_add_ctl.v"
+add_file -verilog "../trunk/T1-FPU/fpu_add_exp_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_add_frac_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_53b.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_64b.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_lvl1.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_lvl2.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_lvl3.v"
+add_file -verilog "../trunk/T1-FPU/fpu_cnt_lead0_lvl4.v"
+add_file -verilog "../trunk/T1-FPU/fpu_denorm_3b.v"
+add_file -verilog "../trunk/T1-FPU/fpu_denorm_3to1.v"
+add_file -verilog "../trunk/T1-FPU/fpu_denorm_frac.v"
+add_file -verilog "../trunk/T1-FPU/fpu_div.v"
+add_file -verilog "../trunk/T1-FPU/fpu_div_ctl.v"
+add_file -verilog "../trunk/T1-FPU/fpu_div_exp_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_div_frac_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in2_gt_in1_2b.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in2_gt_in1_3b.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in2_gt_in1_3to1.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in2_gt_in1_frac.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in_ctl.v"
+add_file -verilog "../trunk/T1-FPU/fpu_in_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_mul.v"
+add_file -verilog "../trunk/T1-FPU/fpu_mul_ctl.v"
+add_file -verilog "../trunk/T1-FPU/fpu_mul_exp_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_mul_frac_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_out.v"
+add_file -verilog "../trunk/T1-FPU/fpu_out_ctl.v"
+add_file -verilog "../trunk/T1-FPU/fpu_out_dp.v"
+add_file -verilog "../trunk/T1-FPU/fpu_rptr_groups.v"
+add_file -verilog "../trunk/T1-FPU/fpu_rptr_macros.v"
+add_file -verilog "../trunk/T1-FPU/fpu_rptr_min_global.v"
+add_file -verilog "../trunk/WB/wb_conbus_arb.v"
+add_file -verilog "../trunk/WB/wb_conbus_defines.v"
+add_file -verilog "../trunk/WB/wb_conbus_top.v"
+add_file -verilog "../trunk/WB2ALTDDR3/dram_wb.v"
+add_file -verilog "../xup5lx110t/cachedir.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram_fifo.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/pcx_fifo.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/dram.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_chipscope.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_ctrl.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_idelay_ctrl.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_infrastructure.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_mem_if_top.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_calib.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_ctl_io.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_dm_iob.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_dq_iob.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_dqs_iob.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_init.v"
+add_file -vhdl -lib work "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_init.vhd"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_io.v"
+add_file -vhdl -lib work "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_io.vhd"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_top.v"
+add_file -vhdl -lib work "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_top.vhd"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_write.v"
+add_file -vhdl -lib work "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_phy_write.vhd"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_top.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_usr_addr_fifo.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_usr_rd.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_usr_top.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/dram/user_design/rtl/ddr2_usr_wr.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_alu.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_alu_16eql.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_aluadder64.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_aluaddsub.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_alulogic.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_aluor32.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_aluspr.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_aluzcmp64.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_byp.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_byp_eccgen.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_div.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_div_32eql.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_div_yreg.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecc.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecc_dec.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl_cnt6.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl_divcntl.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl_eccctl.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl_mdqctl.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_ecl_wb.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_eclbyplog.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_eclbyplog_rs1.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_eclccr.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_eclcomp7.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_reg.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_rml.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_rml_cwp.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_rml_inc3.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_rndrob.v"
+add_file -verilog "../trunk/T1-CPU/exu/sparc_exu_shft.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu_ctl.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu_ctl_visctl.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu_dp.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu_part_add32.v"
+add_file -verilog "../trunk/T1-CPU/ffu/sparc_ffu_vis.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_cmp35.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_ctr5.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_dcl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_dec.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_errctl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_errdp.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_fcl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_fdp.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_ifqctl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_ifqdp.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_imd.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_incr46.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_invctl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_lfsr5.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_lru4.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_mbist.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_milfsm.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_par16.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_par32.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_par34.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_rndrob.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_sscan.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_swl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_swpla.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_thrcmpl.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_thrfsm.v"
+add_file -verilog "../trunk/T1-CPU/ifu/sparc_ifu_wseldp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_asi_decode.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_dc_parity_gen.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_dcache_lfsr.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_dcdp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_dctl.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_dctldp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_excpctl.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_pcx_qmon.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_qctl1.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_qctl2.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_qdp1.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_qdp2.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_rrobin_picker2.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_stb_ctl.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_stb_ctldp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_stb_rwctl.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_stb_rwdp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_tagdp.v"
+add_file -verilog "../trunk/T1-CPU/lsu/lsu_tlbdp.v"
+add_file -verilog "../trunk/T1-CPU/mul/mul64.v"
+add_file -verilog "../trunk/T1-CPU/mul/sparc_mul_cntl.v"
+add_file -verilog "../trunk/T1-CPU/mul/sparc_mul_dp.v"
+add_file -verilog "../trunk/T1-CPU/mul/sparc_mul_top.v"
+add_file -verilog "../trunk/T1-CPU/rtl/bw_clk_cl_sparc_cmp.v"
+add_file -verilog "../trunk/T1-CPU/rtl/cpx_spc_buf.v"
+add_file -verilog "../trunk/T1-CPU/rtl/cpx_spc_rpt.v"
+add_file -verilog "../trunk/T1-CPU/rtl/sparc.v"
+add_file -verilog "../trunk/T1-CPU/rtl/spc_pcx_buf.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_ctl.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_lsurpt.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_lsurpt1.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_maaddr.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_maaeqb.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_mactl.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_madp.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_maexp.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_mald.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_mamul.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_mared.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_mast.v"
+add_file -verilog "../trunk/T1-CPU/spu/spu_wen.v"
+add_file -verilog "../trunk/T1-CPU/tlu/sparc_tlu_dec64.v"
+add_file -verilog "../trunk/T1-CPU/tlu/sparc_tlu_intctl.v"
+add_file -verilog "../trunk/T1-CPU/tlu/sparc_tlu_intdp.v"
+add_file -verilog "../trunk/T1-CPU/tlu/sparc_tlu_penc64.v"
+add_file -verilog "../trunk/T1-CPU/tlu/sparc_tlu_zcmp64.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_addern_32.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_hyperv.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_incr64.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_misctl.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_mmu_ctl.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_mmu_dp.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_pib.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_prencoder16.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_rrobin_picker.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_tcl.v"
+add_file -verilog "../trunk/T1-CPU/tlu/tlu_tdp.v"
+add_file -verilog "../xup5lx110t/ipcore_dir/pll.v"
+
+
+#implementation: "rev_1"
+impl -add rev_1 -type fpga
+
+#
+#implementation attributes
+
+set_option -vlog_std v2001
+set_option -project_relative_includes 1
+set_option -enable_nfilter 0
+set_option -hdl_define -set "FPGA FPGA_SYN"
+set_option -include_path /home/sal/Desktop/sparc64soc/trunk/T1-common/include/
+
+#device options
+set_option -technology Virtex5
+set_option -part XC5VLX20T
+set_option -package FF323
+set_option -speed_grade -1
+set_option -part_companion ""
+
+#compilation/mapping options
+set_option -use_fsm_explorer 0
+set_option -top_module "W1"
+
+# mapper_options
+set_option -frequency auto
+set_option -write_verilog 0
+set_option -write_vhdl 0
+
+# Xilinx Virtex2
+set_option -run_prop_extract 1
+set_option -maxfan 10000
+set_option -disable_io_insertion 0
+set_option -pipe 1
+set_option -update_models_cp 0
+set_option -retiming 0
+set_option -no_sequential_opt 0
+set_option -fixgatedclocks 3
+set_option -fixgeneratedclocks 3
+
+# Xilinx Virtex5
+set_option -enable_prepacking 1
+
+# NFilter
+set_option -popfeed 0
+set_option -constprop 0
+set_option -createhierarchy 0
+
+# sequential_optimization_options
+set_option -symbolic_fsm_compiler 1
+
+# Compiler Options
+set_option -compiler_compatible 0
+set_option -resource_sharing 1
+
+#VIF options
+set_option -write_vif 1
+
+#automatic place and route (vendor) options
+set_option -write_apr_constraint 1
+
+#set result format/file last
+project -result_file "./rev_1/W1.edf"
+
+#design plan options
+set_option -nfilter_user_path ""
+impl -active "rev_1"
Index: /trunk/WB/wb_conbus_defines.v
===================================================================
--- /trunk/WB/wb_conbus_defines.v	(revision 6)
+++ /trunk/WB/wb_conbus_defines.v	(revision 6)
@@ -0,0 +1,42 @@
+/////////////////////////////////////////////////////////////////////
+////                                                             ////
+////  WISHBONE Connection ShareBus Definitions                   ////
+////                                                             ////
+////                                                             ////
+////  Author: Johny Chi	                                         ////
+////          chisuhua@yahoo.com.cn                              ////
+////                                                             ////
+////                                                             ////
+////  Downloaded from: http://www.opencores.org/cores/wb_conmax/ ////
+////                                                             ////
+/////////////////////////////////////////////////////////////////////
+///                                                              ////
+//// Copyright (C) 2000 Authors and OPENCORES.ORG                 ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+
+
+
+`timescale 1ns / 10ps
+
Index: /trunk/WB/wb_conbus_top.v
===================================================================
--- /trunk/WB/wb_conbus_top.v	(revision 6)
+++ /trunk/WB/wb_conbus_top.v	(revision 6)
@@ -0,0 +1,660 @@
+/////////////////////////////////////////////////////////////////////
+////                                                             ////
+////  WISHBONE Connection Bus Top Level		                 ////
+////                                                             ////
+////                                                             ////
+////  Author: Johny Chi			                         ////
+////          chisuhua@yahoo.com.cn                              ////
+////                                                             ////
+////                                                             ////
+////                                                             ////
+/////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000 Authors and OPENCORES.ORG                 ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+//  Description
+//	1. Up to 8 masters and 8 slaves share bus Wishbone connection
+//	2. no priorty arbitor , 8 masters are processed in a round
+//	   robin way,
+//	3. if WB_USE_TRISTATE was defined, the share bus is a tristate
+//	   bus, and use less logic resource.
+//	4. wb_conbus was synthesis to XC2S100-5-PQ208 using synplify,
+//     Max speed >60M , and 374 SLICE if using Multiplexor bus
+//		or 150 SLICE if using tri-state bus.
+//
+`include "wb_conbus_defines.v"
+`define			dw	 64		// Data bus Width
+`define			aw	 64		// Address bus Width
+`define			sw   `dw / 8	// Number of Select Lines
+`define			mbusw  `aw + `sw + `dw +4 	//address width + byte select width + dat width + cyc + we + stb +cab , input from master interface
+`define			sbusw	 3	//  ack + err + rty, input from slave interface
+`define			mselectw  8	// number of masters
+`define			sselectw  8	// number of slavers
+
+//`define 		WB_USE_TRISTATE
+
+
+module wb_conbus_top(
+	clk_i, rst_i,
+
+	// Master 0 Interface
+	m0_dat_i, m0_dat_o, m0_adr_i, m0_sel_i, m0_we_i, m0_cyc_i,
+	m0_stb_i, m0_ack_o, m0_err_o, m0_rty_o, m0_cab_i,
+
+	// Master 1 Interface
+	m1_dat_i, m1_dat_o, m1_adr_i, m1_sel_i, m1_we_i, m1_cyc_i,
+	m1_stb_i, m1_ack_o, m1_err_o, m1_rty_o, m1_cab_i,
+
+	// Master 2 Interface
+	m2_dat_i, m2_dat_o, m2_adr_i, m2_sel_i, m2_we_i, m2_cyc_i,
+	m2_stb_i, m2_ack_o, m2_err_o, m2_rty_o, m2_cab_i,
+
+	// Master 3 Interface
+	m3_dat_i, m3_dat_o, m3_adr_i, m3_sel_i, m3_we_i, m3_cyc_i,
+	m3_stb_i, m3_ack_o, m3_err_o, m3_rty_o, m3_cab_i,
+
+	// Master 4 Interface
+	m4_dat_i, m4_dat_o, m4_adr_i, m4_sel_i, m4_we_i, m4_cyc_i,
+	m4_stb_i, m4_ack_o, m4_err_o, m4_rty_o, m4_cab_i,
+
+	// Master 5 Interface
+	m5_dat_i, m5_dat_o, m5_adr_i, m5_sel_i, m5_we_i, m5_cyc_i,
+	m5_stb_i, m5_ack_o, m5_err_o, m5_rty_o, m5_cab_i,
+
+	// Master 6 Interface
+	m6_dat_i, m6_dat_o, m6_adr_i, m6_sel_i, m6_we_i, m6_cyc_i,
+	m6_stb_i, m6_ack_o, m6_err_o, m6_rty_o, m6_cab_i,
+
+	// Master 7 Interface
+	m7_dat_i, m7_dat_o, m7_adr_i, m7_sel_i, m7_we_i, m7_cyc_i,
+	m7_stb_i, m7_ack_o, m7_err_o, m7_rty_o, m7_cab_i,
+
+	// Slave 0 Interface
+	s0_dat_i, s0_dat_o, s0_adr_o, s0_sel_o, s0_we_o, s0_cyc_o,
+	s0_stb_o, s0_ack_i, s0_err_i, s0_rty_i, s0_cab_o,
+
+	// Slave 1 Interface
+	s1_dat_i, s1_dat_o, s1_adr_o, s1_sel_o, s1_we_o, s1_cyc_o,
+	s1_stb_o, s1_ack_i, s1_err_i, s1_rty_i, s1_cab_o,
+
+	// Slave 2 Interface
+	s2_dat_i, s2_dat_o, s2_adr_o, s2_sel_o, s2_we_o, s2_cyc_o,
+	s2_stb_o, s2_ack_i, s2_err_i, s2_rty_i, s2_cab_o,
+
+	// Slave 3 Interface
+	s3_dat_i, s3_dat_o, s3_adr_o, s3_sel_o, s3_we_o, s3_cyc_o,
+	s3_stb_o, s3_ack_i, s3_err_i, s3_rty_i, s3_cab_o,
+
+	// Slave 4 Interface
+	s4_dat_i, s4_dat_o, s4_adr_o, s4_sel_o, s4_we_o, s4_cyc_o,
+	s4_stb_o, s4_ack_i, s4_err_i, s4_rty_i, s4_cab_o,
+
+	// Slave 5 Interface
+	s5_dat_i, s5_dat_o, s5_adr_o, s5_sel_o, s5_we_o, s5_cyc_o,
+	s5_stb_o, s5_ack_i, s5_err_i, s5_rty_i, s5_cab_o,
+
+	// Slave 6 Interface
+	s6_dat_i, s6_dat_o, s6_adr_o, s6_sel_o, s6_we_o, s6_cyc_o,
+	s6_stb_o, s6_ack_i, s6_err_i, s6_rty_i, s6_cab_o,
+
+	// Slave 7 Interface
+	s7_dat_i, s7_dat_o, s7_adr_o, s7_sel_o, s7_we_o, s7_cyc_o,
+	s7_stb_o, s7_ack_i, s7_err_i, s7_rty_i, s7_cab_o
+
+	);
+
+////////////////////////////////////////////////////////////////////
+//
+// Module Parameters
+//
+
+
+parameter		s0_addr_w = 1 ;		   	// slave 0 address decode width
+parameter		s0_addr = 1'b0;	// slave 0 address
+parameter		s1_addr_w = 41 ;	   		// slave 1 address decode width
+parameter		s1_addr = {40'h800000FFF0,1'b0};	// slave 1 address 
+parameter		s2_addr_w = 56 ;  		   
+parameter		s2_addr = {56'h800000FFF0C2C1};	// slave 2 address
+parameter		s3_addr_w = 60 ;  		   
+parameter		s3_addr = {60'h800000FFF0C2C00};	// slave 3 address
+parameter		s4_addr_w = 37 ;  		   
+parameter		s4_addr = {36'h800000FFF,1'b1};	// slave 4 address
+parameter		s5_addr_w = 60 ;  		   
+parameter		s5_addr = {60'h400000F00000000};	// slave 5 address
+parameter		s6_addr_w = 60 ;  		   
+parameter		s6_addr = {60'h500000F00000000};	// slave 6 address
+parameter		s7_addr_w = 60 ;  		   
+parameter		s7_addr = {60'h600000F00000000};	// slave 7 address
+
+
+////////////////////////////////////////////////////////////////////
+//
+// Module IOs
+//
+
+input		clk_i, rst_i;
+
+// Master 0 Interface
+input	[`dw-1:0]	m0_dat_i;
+output	[`dw-1:0]	m0_dat_o;
+input	[`aw-1:0]	m0_adr_i;
+input	[`sw-1:0]	m0_sel_i;
+input			m0_we_i;
+input			m0_cyc_i;
+input			m0_stb_i;
+input			m0_cab_i;
+output			m0_ack_o;
+output			m0_err_o;
+output			m0_rty_o;
+
+// Master 1 Interface
+input	[`dw-1:0]	m1_dat_i;
+output	[`dw-1:0]	m1_dat_o;
+input	[`aw-1:0]	m1_adr_i;
+input	[`sw-1:0]	m1_sel_i;
+input			m1_we_i;
+input			m1_cyc_i;
+input			m1_stb_i;
+input			m1_cab_i;
+output			m1_ack_o;
+output			m1_err_o;
+output			m1_rty_o;
+
+// Master 2 Interface
+input	[`dw-1:0]	m2_dat_i;
+output	[`dw-1:0]	m2_dat_o;
+input	[`aw-1:0]	m2_adr_i;
+input	[`sw-1:0]	m2_sel_i;
+input			m2_we_i;
+input			m2_cyc_i;
+input			m2_stb_i;
+input			m2_cab_i;
+output			m2_ack_o;
+output			m2_err_o;
+output			m2_rty_o;
+
+// Master 3 Interface
+input	[`dw-1:0]	m3_dat_i;
+output	[`dw-1:0]	m3_dat_o;
+input	[`aw-1:0]	m3_adr_i;
+input	[`sw-1:0]	m3_sel_i;
+input			m3_we_i;
+input			m3_cyc_i;
+input			m3_stb_i;
+input			m3_cab_i;
+output			m3_ack_o;
+output			m3_err_o;
+output			m3_rty_o;
+
+// Master 4 Interface
+input	[`dw-1:0]	m4_dat_i;
+output	[`dw-1:0]	m4_dat_o;
+input	[`aw-1:0]	m4_adr_i;
+input	[`sw-1:0]	m4_sel_i;
+input			m4_we_i;
+input			m4_cyc_i;
+input			m4_stb_i;
+input			m4_cab_i;
+output			m4_ack_o;
+output			m4_err_o;
+output			m4_rty_o;
+
+// Master 5 Interface
+input	[`dw-1:0]	m5_dat_i;
+output	[`dw-1:0]	m5_dat_o;
+input	[`aw-1:0]	m5_adr_i;
+input	[`sw-1:0]	m5_sel_i;
+input			m5_we_i;
+input			m5_cyc_i;
+input			m5_stb_i;
+input			m5_cab_i;
+output			m5_ack_o;
+output			m5_err_o;
+output			m5_rty_o;
+
+// Master 6 Interface
+input	[`dw-1:0]	m6_dat_i;
+output	[`dw-1:0]	m6_dat_o;
+input	[`aw-1:0]	m6_adr_i;
+input	[`sw-1:0]	m6_sel_i;
+input			m6_we_i;
+input			m6_cyc_i;
+input			m6_stb_i;
+input			m6_cab_i;
+output			m6_ack_o;
+output			m6_err_o;
+output			m6_rty_o;
+
+// Master 7 Interface
+input	[`dw-1:0]	m7_dat_i;
+output	[`dw-1:0]	m7_dat_o;
+input	[`aw-1:0]	m7_adr_i;
+input	[`sw-1:0]	m7_sel_i;
+input			m7_we_i;
+input			m7_cyc_i;
+input			m7_stb_i;
+input			m7_cab_i;
+output			m7_ack_o;
+output			m7_err_o;
+output			m7_rty_o;
+
+// Slave 0 Interface
+input	[`dw-1:0]	s0_dat_i;
+output	[`dw-1:0]	s0_dat_o;
+output	[`aw-1:0]	s0_adr_o;
+output	[`sw-1:0]	s0_sel_o;
+output			s0_we_o;
+output			s0_cyc_o;
+output			s0_stb_o;
+output			s0_cab_o;
+input			s0_ack_i;
+input			s0_err_i;
+input			s0_rty_i;
+
+// Slave 1 Interface
+input	[`dw-1:0]	s1_dat_i;
+output	[`dw-1:0]	s1_dat_o;
+output	[`aw-1:0]	s1_adr_o;
+output	[`sw-1:0]	s1_sel_o;
+output			s1_we_o;
+output			s1_cyc_o;
+output			s1_stb_o;
+output			s1_cab_o;
+input			s1_ack_i;
+input			s1_err_i;
+input			s1_rty_i;
+
+// Slave 2 Interface
+input	[`dw-1:0]	s2_dat_i;
+output	[`dw-1:0]	s2_dat_o;
+output	[`aw-1:0]	s2_adr_o;
+output	[`sw-1:0]	s2_sel_o;
+output			s2_we_o;
+output			s2_cyc_o;
+output			s2_stb_o;
+output			s2_cab_o;
+input			s2_ack_i;
+input			s2_err_i;
+input			s2_rty_i;
+
+// Slave 3 Interface
+input	[`dw-1:0]	s3_dat_i;
+output	[`dw-1:0]	s3_dat_o;
+output	[`aw-1:0]	s3_adr_o;
+output	[`sw-1:0]	s3_sel_o;
+output			s3_we_o;
+output			s3_cyc_o;
+output			s3_stb_o;
+output			s3_cab_o;
+input			s3_ack_i;
+input			s3_err_i;
+input			s3_rty_i;
+
+// Slave 4 Interface
+input	[`dw-1:0]	s4_dat_i;
+output	[`dw-1:0]	s4_dat_o;
+output	[`aw-1:0]	s4_adr_o;
+output	[`sw-1:0]	s4_sel_o;
+output			s4_we_o;
+output			s4_cyc_o;
+output			s4_stb_o;
+output			s4_cab_o;
+input			s4_ack_i;
+input			s4_err_i;
+input			s4_rty_i;
+
+// Slave 5 Interface
+input	[`dw-1:0]	s5_dat_i;
+output	[`dw-1:0]	s5_dat_o;
+output	[`aw-1:0]	s5_adr_o;
+output	[`sw-1:0]	s5_sel_o;
+output			s5_we_o;
+output			s5_cyc_o;
+output			s5_stb_o;
+output			s5_cab_o;
+input			s5_ack_i;
+input			s5_err_i;
+input			s5_rty_i;
+
+// Slave 6 Interface
+input	[`dw-1:0]	s6_dat_i;
+output	[`dw-1:0]	s6_dat_o;
+output	[`aw-1:0]	s6_adr_o;
+output	[`sw-1:0]	s6_sel_o;
+output			s6_we_o;
+output			s6_cyc_o;
+output			s6_stb_o;
+output			s6_cab_o;
+input			s6_ack_i;
+input			s6_err_i;
+input			s6_rty_i;
+
+// Slave 7 Interface
+input	[`dw-1:0]	s7_dat_i;
+output	[`dw-1:0]	s7_dat_o;
+output	[`aw-1:0]	s7_adr_o;
+output	[`sw-1:0]	s7_sel_o;
+output			s7_we_o;
+output			s7_cyc_o;
+output			s7_stb_o;
+output			s7_cab_o;
+input			s7_ack_i;
+input			s7_err_i;
+input			s7_rty_i;
+
+
+////////////////////////////////////////////////////////////////////
+//
+// Local wires
+//
+
+wire	[`mselectw -1:0]	i_gnt_arb;
+wire	[2:0]	gnt;
+reg	[`sselectw -1:0]	i_ssel_dec;
+`ifdef	WB_USE_TRISTATE
+wire	[`mbusw -1:0]	i_bus_m;
+`else
+reg		[`mbusw -1:0]	i_bus_m;		// internal share bus, master data and control to slave
+`endif
+wire		[`dw -1:0]		i_dat_s;	// internal share bus , slave data to master
+wire	[`sbusw -1:0]	i_bus_s;			// internal share bus , slave control to master
+
+
+
+////////////////////////////////////////////////////////////////////
+//
+// Master output Interfaces
+//
+
+// master0
+assign	m0_dat_o = i_dat_s;
+assign  {m0_ack_o, m0_err_o, m0_rty_o} = i_bus_s & {3{i_gnt_arb[0]}};
+
+// master1
+assign	m1_dat_o = i_dat_s;
+assign  {m1_ack_o, m1_err_o, m1_rty_o} = i_bus_s & {3{i_gnt_arb[1]}};
+
+// master2
+
+assign	m2_dat_o = i_dat_s;
+assign  {m2_ack_o, m2_err_o, m2_rty_o} = i_bus_s & {3{i_gnt_arb[2]}};
+
+// master3
+
+assign	m3_dat_o = i_dat_s;
+assign  {m3_ack_o, m3_err_o, m3_rty_o} = i_bus_s & {3{i_gnt_arb[3]}};
+
+// master4
+
+assign	m4_dat_o = i_dat_s;
+assign  {m4_ack_o, m4_err_o, m4_rty_o} = i_bus_s & {3{i_gnt_arb[4]}};
+
+// master5
+
+assign	m5_dat_o = i_dat_s;
+assign  {m5_ack_o, m5_err_o, m5_rty_o} = i_bus_s & {3{i_gnt_arb[5]}};
+
+// master6
+
+assign	m6_dat_o = i_dat_s;
+assign  {m6_ack_o, m6_err_o, m6_rty_o} = i_bus_s & {3{i_gnt_arb[6]}};
+
+// master7
+
+assign	m7_dat_o = i_dat_s;
+assign  {m7_ack_o, m7_err_o, m7_rty_o} = i_bus_s & {3{i_gnt_arb[7]}};
+
+
+assign  i_bus_s = {s0_ack_i | s1_ack_i | s2_ack_i | s3_ack_i | s4_ack_i | s5_ack_i | s6_ack_i | s7_ack_i ,
+				   s0_err_i | s1_err_i | s2_err_i | s3_err_i | s4_err_i | s5_err_i | s6_err_i | s7_err_i ,
+				   s0_rty_i | s1_rty_i | s2_rty_i | s3_rty_i | s4_rty_i | s5_rty_i | s6_rty_i | s7_rty_i };
+
+////////////////////////////////
+//	Slave output interface
+//
+// slave0
+assign  {s0_adr_o, s0_sel_o, s0_dat_o, s0_we_o, s0_cab_o,s0_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s0_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[0];  // stb_o = cyc_i & stb_i & i_ssel_dec
+
+// slave1
+
+assign  {s1_adr_o, s1_sel_o, s1_dat_o, s1_we_o, s1_cab_o, s1_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s1_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[1];
+
+// slave2
+
+assign  {s2_adr_o, s2_sel_o, s2_dat_o, s2_we_o, s2_cab_o, s2_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s2_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[2];
+
+// slave3
+
+assign  {s3_adr_o, s3_sel_o, s3_dat_o, s3_we_o, s3_cab_o, s3_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s3_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[3];
+
+// slave4
+
+assign  {s4_adr_o, s4_sel_o, s4_dat_o, s4_we_o, s4_cab_o, s4_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s4_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[4];
+
+// slave5
+
+assign  {s5_adr_o, s5_sel_o, s5_dat_o, s5_we_o, s5_cab_o, s5_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s5_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[5];
+
+// slave6
+
+assign  {s6_adr_o, s6_sel_o, s6_dat_o, s6_we_o, s6_cab_o, s6_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s6_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[6];
+
+// slave7
+
+assign  {s7_adr_o, s7_sel_o, s7_dat_o, s7_we_o, s7_cab_o, s7_cyc_o} = i_bus_m[`mbusw -1:1];
+assign	s7_stb_o = i_bus_m[1] & i_bus_m[0] & i_ssel_dec[7];
+
+///////////////////////////////////////
+//	Master and Slave input interface
+//
+
+`ifdef	WB_USE_TRISTATE
+// input from master interface
+assign	i_bus_m = i_gnt_arb[0] ? {m0_adr_i, m0_sel_i, m0_dat_i, m0_we_i, m0_cab_i, m0_cyc_i, m0_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[1] ? {m1_adr_i, m1_sel_i, m1_dat_i, m1_we_i, m1_cab_i,m1_cyc_i, m1_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[2] ? {m2_adr_i, m2_sel_i, m2_dat_i,  m2_we_i, m2_cab_i, m2_cyc_i, m2_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[3] ? {m3_adr_i, m3_sel_i, m3_dat_i,  m3_we_i, m3_cab_i, m3_cyc_i, m3_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[4] ? {m4_adr_i, m4_sel_i, m4_dat_i,  m4_we_i, m4_cab_i, m4_cyc_i, m4_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[5] ? {m5_adr_i, m5_sel_i, m5_dat_i, m5_we_i, m5_cab_i, m5_cyc_i,  m5_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[6] ? {m6_adr_i, m6_sel_i, m6_dat_i, m6_we_i, m6_cab_i, m6_cyc_i, m6_stb_i} : 72'bz ;
+assign	i_bus_m = i_gnt_arb[7] ? {m7_adr_i, m7_sel_i, m7_dat_i, m7_we_i, m7_cab_i, m7_cyc_i,m7_stb_i} : 72'bz ;
+// input from slave interface
+assign  i_dat_s = i_ssel_dec[0] ? s0_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[1] ? s1_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[2] ? s2_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[3] ? s3_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[4] ? s4_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[5] ? s5_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[6] ? s6_dat_i: 32'bz;
+assign  i_dat_s = i_ssel_dec[7] ? s7_dat_i: 32'bz;
+
+`else
+
+always @(gnt , m0_adr_i, m0_sel_i, m0_dat_i, m0_we_i, m0_cab_i, m0_cyc_i,m0_stb_i,
+		m1_adr_i, m1_sel_i, m1_dat_i, m1_we_i, m1_cab_i, m1_cyc_i,m1_stb_i,
+		m2_adr_i, m2_sel_i, m2_dat_i, m2_we_i, m2_cab_i, m2_cyc_i,m2_stb_i,
+		m3_adr_i, m3_sel_i, m3_dat_i, m3_we_i, m3_cab_i, m3_cyc_i,m3_stb_i,
+		m4_adr_i, m4_sel_i, m4_dat_i, m4_we_i, m4_cab_i, m4_cyc_i,m4_stb_i,
+		m5_adr_i, m5_sel_i, m5_dat_i, m5_we_i, m5_cab_i, m5_cyc_i,m5_stb_i,
+		m6_adr_i, m6_sel_i, m6_dat_i, m6_we_i, m6_cab_i, m6_cyc_i,m6_stb_i,
+		m7_adr_i, m7_sel_i, m7_dat_i, m7_we_i, m7_cab_i, m7_cyc_i,m7_stb_i)
+		case(gnt)
+			3'h0:	i_bus_m = {m0_adr_i, m0_sel_i, m0_dat_i, m0_we_i, m0_cab_i, m0_cyc_i,m0_stb_i};
+			3'h1:	i_bus_m = {m1_adr_i, m1_sel_i, m1_dat_i, m1_we_i, m1_cab_i, m1_cyc_i,m1_stb_i};
+			3'h2:	i_bus_m = {m2_adr_i, m2_sel_i, m2_dat_i, m2_we_i, m2_cab_i, m2_cyc_i,m2_stb_i};
+			3'h3:	i_bus_m = {m3_adr_i, m3_sel_i, m3_dat_i, m3_we_i, m3_cab_i, m3_cyc_i,m3_stb_i};
+			3'h4:	i_bus_m = {m4_adr_i, m4_sel_i, m4_dat_i, m4_we_i, m4_cab_i, m4_cyc_i,m4_stb_i};
+			3'h5:	i_bus_m = {m5_adr_i, m5_sel_i, m5_dat_i, m5_we_i, m5_cab_i, m5_cyc_i,m5_stb_i};
+			3'h6:	i_bus_m = {m6_adr_i, m6_sel_i, m6_dat_i, m6_we_i, m6_cab_i, m6_cyc_i,m6_stb_i};
+			3'h7:	i_bus_m = {m7_adr_i, m7_sel_i, m7_dat_i, m7_we_i, m7_cab_i, m7_cyc_i,m7_stb_i};
+			default:i_bus_m =  72'b0;//{m0_adr_i, m0_sel_i, m0_dat_i, m0_we_i, m0_cab_i, m0_cyc_i,m0_stb_i};
+endcase			
+
+assign	i_dat_s = i_ssel_dec[0] ? s0_dat_i :
+				  i_ssel_dec[1] ? s1_dat_i :
+				  i_ssel_dec[2] ? s2_dat_i :
+				  i_ssel_dec[3] ? s3_dat_i :
+				  i_ssel_dec[4] ? s4_dat_i :
+				  i_ssel_dec[5] ? s5_dat_i :
+				  i_ssel_dec[6] ? s6_dat_i :
+				  i_ssel_dec[7] ? s7_dat_i : {`dw{1'b0}}; 
+`endif
+//
+// arbitor 
+//
+assign i_gnt_arb[0] = (gnt == 3'd0);
+assign i_gnt_arb[1] = (gnt == 3'd1);
+assign i_gnt_arb[2] = (gnt == 3'd2);
+assign i_gnt_arb[3] = (gnt == 3'd3);
+assign i_gnt_arb[4] = (gnt == 3'd4);
+assign i_gnt_arb[5] = (gnt == 3'd5);
+assign i_gnt_arb[6] = (gnt == 3'd6);
+assign i_gnt_arb[7] = (gnt == 3'd7);
+
+wb_conbus_arb	wb_conbus_arb(
+	.clk(clk_i), 
+	.rst(rst_i),
+	.req({	m7_cyc_i,
+		m6_cyc_i,
+		m5_cyc_i,
+		m4_cyc_i,
+		m3_cyc_i,
+		m2_cyc_i,
+		m1_cyc_i,
+		m0_cyc_i}),
+	.gnt(gnt)
+);
+
+//////////////////////////////////
+// 		address decode logic
+//
+wire [7:0]	m0_ssel_dec, m1_ssel_dec, m2_ssel_dec, m3_ssel_dec, m4_ssel_dec, m5_ssel_dec, m6_ssel_dec, m7_ssel_dec;
+always @(gnt, m0_ssel_dec, m1_ssel_dec, m2_ssel_dec, m3_ssel_dec, m4_ssel_dec, m5_ssel_dec, m6_ssel_dec, m7_ssel_dec)
+	case(gnt)
+		3'h0: i_ssel_dec = m0_ssel_dec;
+		3'h1: i_ssel_dec = m1_ssel_dec;
+		3'h2: i_ssel_dec = m2_ssel_dec;
+		3'h3: i_ssel_dec = m3_ssel_dec;
+		3'h4: i_ssel_dec = m4_ssel_dec;
+		3'h5: i_ssel_dec = m5_ssel_dec;
+		3'h6: i_ssel_dec = m6_ssel_dec;
+		3'h7: i_ssel_dec = m7_ssel_dec;
+		default: i_ssel_dec = 7'b0;
+endcase
+//
+//	decode all master address before arbitor for running faster
+//	
+assign m0_ssel_dec[0] = (m0_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m0_ssel_dec[1] = (m0_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m0_ssel_dec[2] = (m0_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m0_ssel_dec[3] = (m0_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m0_ssel_dec[4] = (m0_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m0_ssel_dec[5] = (m0_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m0_ssel_dec[6] = (m0_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m0_ssel_dec[7] = (m0_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m1_ssel_dec[0] = (m1_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m1_ssel_dec[1] = (m1_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m1_ssel_dec[2] = (m1_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m1_ssel_dec[3] = (m1_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m1_ssel_dec[4] = (m1_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m1_ssel_dec[5] = (m1_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m1_ssel_dec[6] = (m1_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m1_ssel_dec[7] = (m1_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m2_ssel_dec[0] = (m2_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m2_ssel_dec[1] = (m2_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m2_ssel_dec[2] = (m2_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m2_ssel_dec[3] = (m2_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m2_ssel_dec[4] = (m2_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m2_ssel_dec[5] = (m2_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m2_ssel_dec[6] = (m2_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m2_ssel_dec[7] = (m2_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m3_ssel_dec[0] = (m3_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m3_ssel_dec[1] = (m3_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m3_ssel_dec[2] = (m3_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m3_ssel_dec[3] = (m3_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m3_ssel_dec[4] = (m3_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m3_ssel_dec[5] = (m3_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m3_ssel_dec[6] = (m3_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m3_ssel_dec[7] = (m3_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m4_ssel_dec[0] = (m4_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m4_ssel_dec[1] = (m4_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m4_ssel_dec[2] = (m4_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m4_ssel_dec[3] = (m4_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m4_ssel_dec[4] = (m4_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m4_ssel_dec[5] = (m4_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m4_ssel_dec[6] = (m4_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m4_ssel_dec[7] = (m4_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m5_ssel_dec[0] = (m5_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m5_ssel_dec[1] = (m5_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m5_ssel_dec[2] = (m5_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m5_ssel_dec[3] = (m5_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m5_ssel_dec[4] = (m5_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m5_ssel_dec[5] = (m5_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m5_ssel_dec[6] = (m5_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m5_ssel_dec[7] = (m5_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m6_ssel_dec[0] = (m6_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m6_ssel_dec[1] = (m6_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m6_ssel_dec[2] = (m6_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m6_ssel_dec[3] = (m6_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m6_ssel_dec[4] = (m6_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m6_ssel_dec[5] = (m6_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m6_ssel_dec[6] = (m6_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m6_ssel_dec[7] = (m6_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+assign m7_ssel_dec[0] = (m7_adr_i[`aw -1 : `aw - s0_addr_w ] == s0_addr);
+assign m7_ssel_dec[1] = (m7_adr_i[`aw -1 : `aw - s1_addr_w ] == s1_addr);
+assign m7_ssel_dec[2] = (m7_adr_i[`aw -1 : `aw - s2_addr_w ] == s2_addr);
+assign m7_ssel_dec[3] = (m7_adr_i[`aw -1 : `aw - s3_addr_w ] == s3_addr);
+assign m7_ssel_dec[4] = (m7_adr_i[`aw -1 : `aw - s4_addr_w ] == s4_addr);
+assign m7_ssel_dec[5] = (m7_adr_i[`aw -1 : `aw - s5_addr_w ] == s5_addr);
+assign m7_ssel_dec[6] = (m7_adr_i[`aw -1 : `aw - s6_addr_w ] == s6_addr);
+assign m7_ssel_dec[7] = (m7_adr_i[`aw -1 : `aw - s7_addr_w ] == s7_addr);
+
+//assign i_ssel_dec[0] = (i_bus_m[`mbusw -1 : `mbusw - s0_addr_w ] == s0_addr);
+//assign i_ssel_dec[1] = (i_bus_m[`mbusw -1 : `mbusw - s1_addr_w ] == s1_addr);
+//assign i_ssel_dec[2] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s2_addr);
+//assign i_ssel_dec[3] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s3_addr);
+//assign i_ssel_dec[4] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s4_addr);
+//assign i_ssel_dec[5] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s5_addr);
+//assign i_ssel_dec[6] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s6_addr);
+//assign i_ssel_dec[7] = (i_bus_m[`mbusw -1 : `mbusw - s27_addr_w ] == s7_addr);
+
+
+endmodule
+
Index: /trunk/WB/wb_conbus_arb.v
===================================================================
--- /trunk/WB/wb_conbus_arb.v	(revision 6)
+++ /trunk/WB/wb_conbus_arb.v	(revision 6)
@@ -0,0 +1,252 @@
+/////////////////////////////////////////////////////////////////////
+////                                                             ////
+////  General Round Robin Arbiter                                ////
+////                                                             ////
+////                                                             ////
+////  Author: Rudolf Usselmann                                   ////
+////          rudi@asics.ws                                      ////
+////                                                             ////
+////                                                             ////
+////  Downloaded from: http://www.opencores.org/cores/wb_conmax/ ////
+////                                                             ////
+/////////////////////////////////////////////////////////////////////
+////                                                             ////
+//// Copyright (C) 2000-2002 Rudolf Usselmann                    ////
+////                         www.asics.ws                        ////
+////                         rudi@asics.ws                       ////
+////                                                             ////
+//// This source file may be used and distributed without        ////
+//// restriction provided that this copyright statement is not   ////
+//// removed from the file and that any derivative work contains ////
+//// the original copyright notice and the associated disclaimer.////
+////                                                             ////
+////     THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY     ////
+//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED   ////
+//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS   ////
+//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR      ////
+//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,         ////
+//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES    ////
+//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE   ////
+//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR        ////
+//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  ////
+//// LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT  ////
+//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT  ////
+//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE         ////
+//// POSSIBILITY OF SUCH DAMAGE.                                 ////
+////                                                             ////
+/////////////////////////////////////////////////////////////////////
+
+
+//
+//	copy from wb_conmax
+//
+//
+//
+//
+//                        
+
+`include "wb_conbus_defines.v"
+
+module wb_conbus_arb(clk, rst, req, gnt);
+
+input		clk;
+input		rst;
+input	[7:0]	req;		// Req input
+output	[2:0]	gnt; 		// Grant output
+//input		next;		// Next Target
+
+///////////////////////////////////////////////////////////////////////
+//
+// Parameters
+//
+
+
+parameter	[2:0]
+                grant0 = 3'h0,
+                grant1 = 3'h1,
+                grant2 = 3'h2,
+                grant3 = 3'h3,
+                grant4 = 3'h4,
+                grant5 = 3'h5,
+                grant6 = 3'h6,
+                grant7 = 3'h7;
+
+///////////////////////////////////////////////////////////////////////
+//
+// Local Registers and Wires
+//
+
+reg [2:0]	state, next_state;
+
+///////////////////////////////////////////////////////////////////////
+//
+//  Misc Logic 
+//
+
+assign	gnt = state;
+
+always@(posedge clk or posedge rst)
+	if(rst)		state <= #1 grant0;
+	else		state <= #1 next_state;
+
+///////////////////////////////////////////////////////////////////////
+//
+// Next State Logic
+//   - implements round robin arbitration algorithm
+//   - switches grant if current req is dropped or next is asserted
+//   - parks at last grant
+//
+
+always@(state or req )
+   begin
+	next_state = state;	// Default Keep State
+	case(state)		// synopsys parallel_case full_case
+ 	   grant0:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[0] )
+		   begin
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+		   end
+ 	   grant1:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[1] )
+		   begin
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+		   end
+ 	   grant2:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[2] )
+		   begin
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+		   end
+ 	   grant3:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[3] )
+		   begin
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+		   end
+ 	   grant4:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[4] )
+		   begin
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+		   end
+ 	   grant5:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[5] )
+		   begin
+			if(req[6])	next_state = grant6;
+			else
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+		   end
+ 	   grant6:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[6] )
+		   begin
+			if(req[7])	next_state = grant7;
+			else
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+		   end
+ 	   grant7:
+		// if this req is dropped or next is asserted, check for other req's
+		if(!req[7] )
+		   begin
+			if(req[0])	next_state = grant0;
+			else
+			if(req[1])	next_state = grant1;
+			else
+			if(req[2])	next_state = grant2;
+			else
+			if(req[3])	next_state = grant3;
+			else
+			if(req[4])	next_state = grant4;
+			else
+			if(req[5])	next_state = grant5;
+			else
+			if(req[6])	next_state = grant6;
+		   end
+	endcase
+   end
+
+endmodule 
+
Index: /trunk/WB2ALTDDR3/dram_wb.v
===================================================================
--- /trunk/WB2ALTDDR3/dram_wb.v	(revision 6)
+++ /trunk/WB2ALTDDR3/dram_wb.v	(revision 6)
@@ -0,0 +1,346 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    Bridge from Wishbone to Altera DDR3 controller
+// Module Name:    wb2altddr3 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+
+module dram_wb(
+   input             clk200,
+   input             rup,
+   input             rdn,
+
+   input             wb_clk_i,
+   input             wb_rst_i,
+    
+   input      [63:0] wb_dat_i, 
+   output reg [63:0] wb_dat_o, 
+   input      [63:0] wb_adr_i, 
+   input      [ 7:0] wb_sel_i, 
+   input             wb_we_i, 
+   input             wb_cyc_i, 
+   input             wb_stb_i, 
+   output            wb_ack_o, 
+   output            wb_err_o, 
+   output            wb_rty_o, 
+   input             wb_cab_i, 
+
+   inout      [63:0] ddr3_dq,
+   inout      [ 7:0] ddr3_dqs,
+   inout      [ 7:0] ddr3_dqs_n,
+   inout             ddr3_ck,
+   inout             ddr3_ck_n,
+   output            ddr3_reset,
+   output     [12:0] ddr3_a,
+   output     [ 2:0] ddr3_ba,
+   output            ddr3_ras_n,
+   output            ddr3_cas_n,
+   output            ddr3_we_n,
+   output            ddr3_cs_n,
+   output            ddr3_odt,
+   output            ddr3_ce,
+   output     [ 7:0] ddr3_dm,
+
+   output            phy_init_done,
+   
+   output     [ 7:0] fifo_used,
+    
+   input             dcm_locked,
+   input             sysrst
+);
+
+wire [255:0] rd_data_fifo_out;
+reg  [ 23:0] rd_addr_cache;
+wire [ 71:0] wr_dout;
+wire [ 31:0] cmd_out;
+reg          wb_stb_i_d;
+reg  [ 31:0] mask_data;
+
+wire dram_ready;
+wire fifo_empty;
+reg       push_tran;
+
+//wire [13:0] parallelterminationcontrol;
+//wire [13:0] seriesterminationcontrol;
+
+dram dram_ctrl(
+    .sys_clk(clk200),
+    .sys_rst_n(sysrst),  // Resets all
+    .phy_init_done(phy_init_done),
+    .app_wdf_mask_data(mask_data),
+    .app_af_addr(cmd_out[25:2]),
+    .rd_data_valid(rd_data_valid),
+    .rd_data_fifo_out(rd_data_fifo_out),    
+    .app_wdf_data(wr_dout[63:0]),
+	 
+	 // in dubbio
+	 .app_wdf_wren(1'b1),
+         .app_af_wren(1'b1),
+	 .app_af_afull(),
+	 .app_wdf_afull(),
+         .app_af_cmd(),
+	 .clk0_tb(),
+	 .idly_clk_200(clk200),
+	 
+    .rst0_tb(ddr3_reset),
+    .ddr2_dqs(ddr3_dqs),
+    .ddr2_dqs_n(ddr3_dqs_n),
+    .ddr2_ck(ddr3_ck),
+    .ddr2_ck_n(ddr3_ck_n),
+    .ddr2_dq(ddr3_dq),
+    .ddr2_ras_n(ddr3_ras_n),
+    .ddr2_cas_n(ddr3_cas_n),
+    .ddr2_odt(ddr3_odt),
+    .ddr2_cs_n(ddr3_cs_n),
+    .ddr2_cke(ddr3_ce),
+    .ddr2_we_n(ddr3_we_n),
+    .ddr2_ba(ddr3_ba),
+	 .ddr2_a(ddr3_a),
+    .ddr2_dm(ddr3_dm)
+//               |
+//non sostituiti\|/
+//               V 
+//    .phy_clk(ddr_clk),         // User clock
+//    .local_ready(dram_ready),
+//    .local_burstbegin(push_tran),
+//    .local_read_req(!cmd_out[31] && push_tran),
+//    .local_write_req(cmd_out[31] && push_tran),
+//    .local_wdata({wr_dout[63:0],wr_dout[63:0],wr_dout[63:0],wr_dout[63:0]}),
+//    .local_size(3'b001)
+    
+);
+
+/* comment by sal
+dram dram_ctrl(
+    .pll_ref_clk(clk200),
+    .global_reset_n(sysrst),  // Resets all
+    .soft_reset_n(1),    // Resets all but PLL
+    
+    .reset_request_n(), // Active when not ready (PLL not locked)
+    .reset_phy_clk_n(), // Reset input sync to phy_clk
+
+    .phy_clk(ddr_clk),         // User clock
+    .dll_reference_clk(), // For external DLL
+
+    .dqs_delay_ctrl_export(),
+    .aux_scan_clk(),
+    .aux_scan_clk_reset_n(),
+    .aux_full_rate_clk(),
+    .aux_half_rate_clk(),
+    
+    .oct_ctl_rs_value(seriesterminationcontrol),
+    .oct_ctl_rt_value(parallelterminationcontrol),
+
+    .local_init_done(phy_init_done),
+
+    .local_ready(dram_ready),
+    .local_address(cmd_out[25:2]),
+    .local_burstbegin(push_tran),
+    .local_read_req(!cmd_out[31] && push_tran),
+    .local_write_req(cmd_out[31] && push_tran),
+    .local_wdata_req(),
+    .local_wdata({wr_dout[63:0],wr_dout[63:0],wr_dout[63:0],wr_dout[63:0]}),
+    .local_be(mask_data),
+    .local_size(3'b001),
+    .local_rdata_valid(rd_data_valid),
+    .local_rdata(rd_data_fifo_out),
+    .local_refresh_ack(),
+    
+    .mem_clk(ddr3_ck),
+    .mem_clk_n(ddr3_ck_n),
+    .mem_reset_n(ddr3_reset),
+    .mem_dq(ddr3_dq),
+    .mem_dqs(ddr3_dqs),
+    .mem_dqsn(ddr3_dqs_n),
+    .mem_odt(ddr3_odt),
+    .mem_cs_n(ddr3_cs_n),
+    .mem_cke(ddr3_ce),
+    .mem_addr(ddr3_a),
+    .mem_ba(ddr3_ba),
+    .mem_ras_n(ddr3_ras_n),
+    .mem_cas_n(ddr3_cas_n),
+    .mem_we_n(ddr3_we_n),
+    .mem_dm(ddr3_dm)
+);
+*/
+assign ddr_rst=!phy_init_done;
+
+/*oct_alt_oct_power_f4c oct
+( 
+    .parallelterminationcontrol(parallelterminationcontrol),
+    .seriesterminationcontrol(seriesterminationcontrol),
+    .rdn(rdn),
+    .rup(rup)
+) ; */
+
+always @( * )
+   case(cmd_out[1:0])
+      2'b00:mask_data<={24'h000000,wr_dout[71:64]};
+      2'b01:mask_data<={16'h0000,wr_dout[71:64],8'h00};
+      2'b10:mask_data<={8'h00,wr_dout[71:64],16'h0000};
+      2'b11:mask_data<={wr_dout[71:64],24'h000000};
+   endcase
+
+//wire [254:0] trig0;
+
+/*ila1 ila1_inst (
+    .CONTROL(CONTROL),
+    .CLK(ddr_clk),
+    .TRIG0(trig0)
+);*/
+
+/*assign trig0[127:0]=rd_data_fifo_out;
+assign trig0[199:128]=wr_dout;
+assign trig0[231:200]=cmd_out;
+assign trig0[232]=0;
+assign trig0[233]=0;
+assign trig0[234]=rd_data_valid;
+assign trig0[235]=0;
+assign trig0[236]=fifo_empty;
+assign trig0[237]=0;
+assign trig0[238]=0;
+assign trig0[254:239]=0;
+*/
+
+reg fifo_full_d;
+reg written;
+reg       fifo_read;
+
+dram_fifo fifo(
+   .rst(ddr_rst),
+   .wr_clk(wb_clk_i),
+   .rd_clk(ddr_clk),
+   .din({wb_sel_i,wb_dat_i,wb_we_i,wb_adr_i[33:3]}),
+   .wr_en(wb_cyc_i && wb_stb_i && (!wb_stb_i_d || (fifo_full_d && !written)) && !fifo_full && !(rd_addr_cache==wb_adr_i[28:5] && !wb_we_i)),
+   .full(fifo_full),
+   .rd_en(fifo_read),
+   .dout({wr_dout,cmd_out}),
+   .wr_data_count(fifo_used),
+   .empty(fifo_empty)
+);
+
+`define DDR_IDLE    3'b000
+`define DDR_WRITE_1 3'b001
+`define DDR_WRITE_2 3'b010
+`define DDR_READ_1  3'b011
+`define DDR_READ_2  3'b100
+
+reg [2:0] ddr_state;
+reg rd_data_valid_stb;
+reg wb_ack_d1;
+
+always @(posedge ddr_clk or posedge ddr_rst)
+   if(ddr_rst)
+      begin
+         ddr_state<=`DDR_IDLE;
+         fifo_read<=0;
+         push_tran<=0;
+         rd_data_valid_stb<=0;
+      end
+   else
+      case(ddr_state)
+         `DDR_IDLE:
+            if(!fifo_empty && dram_ready)
+               begin
+                  push_tran<=1;
+                  if(cmd_out[31])
+                     begin
+                        ddr_state<=`DDR_WRITE_1;
+                        fifo_read<=1;
+                     end
+                  else
+                     ddr_state<=`DDR_READ_1;
+               end
+         `DDR_WRITE_1:
+            begin
+               push_tran<=0;
+               fifo_read<=0;
+               ddr_state<=`DDR_WRITE_2; // Protect against FIFO empty signal latency
+            end
+         `DDR_WRITE_2:
+            ddr_state<=`DDR_IDLE;
+         `DDR_READ_1:
+            begin
+               push_tran<=0;
+               if(rd_data_valid)
+                  begin
+                     rd_data_valid_stb<=1;
+                     fifo_read<=1;
+                     ddr_state<=`DDR_READ_2;
+                  end
+            end
+         `DDR_READ_2:
+            begin
+               fifo_read<=0;
+               if(wb_ack_d1) // Enought delay to protect against FIFO empty signal latency
+                  begin
+                     rd_data_valid_stb<=0;
+                     ddr_state<=`DDR_IDLE;
+                  end
+            end
+      endcase
+
+reg rd_data_valid_stb_d1;
+reg rd_data_valid_stb_d2;
+reg rd_data_valid_stb_d3;
+reg rd_data_valid_stb_d4;
+reg [255:0] rd_data_fifo_out_d;
+reg wb_ack_d;
+
+always @( * )
+   case(wb_adr_i[4:3])
+      2'b00:wb_dat_o<=rd_data_fifo_out_d[63:0];
+      2'b01:wb_dat_o<=rd_data_fifo_out_d[127:64];
+      2'b10:wb_dat_o<=rd_data_fifo_out_d[191:128];
+      2'b11:wb_dat_o<=rd_data_fifo_out_d[255:192];
+   endcase
+
+always @(posedge wb_clk_i or posedge wb_rst_i)
+   if(wb_rst_i)
+      rd_addr_cache<=24'hFFFFFF;
+   else
+   begin
+      wb_stb_i_d<=wb_stb_i;
+      if(wb_cyc_i && wb_stb_i)
+         if(!wb_we_i)
+            rd_addr_cache<=wb_ack_o ? wb_adr_i[28:5]:rd_addr_cache;
+         else
+            if(rd_addr_cache==wb_adr_i[28:5])
+               rd_addr_cache<=24'hFFFFFF;
+      rd_data_valid_stb_d1<=rd_data_valid_stb;
+      rd_data_valid_stb_d2<=rd_data_valid_stb_d1;
+      rd_data_valid_stb_d3<=rd_data_valid_stb_d2;
+      rd_data_valid_stb_d4<=rd_data_valid_stb_d3;
+      fifo_full_d<=fifo_full;
+      if(wb_ack_o)
+         written<=0;
+      else
+         if(!fifo_full && fifo_full_d)
+            written<=1;
+   end
+
+assign wb_ack_o=wb_we_i ? (wb_cyc_i && wb_stb_i && !fifo_full):(rd_data_valid_stb_d2 && !rd_data_valid_stb_d3) || (rd_addr_cache==wb_adr_i[28:5]);
+
+always @(posedge ddr_clk)
+   begin
+      wb_ack_d<=wb_ack_o;
+      wb_ack_d1<=wb_ack_d;
+      if(rd_data_valid)
+         rd_data_fifo_out_d<=rd_data_fifo_out;
+   end
+    
+endmodule
Index: /trunk/NOR-flash/WBFLASH.v
===================================================================
--- /trunk/NOR-flash/WBFLASH.v	(revision 6)
+++ /trunk/NOR-flash/WBFLASH.v	(revision 6)
@@ -0,0 +1,154 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    Wishbone NOR flash controller
+// Module Name:    wbflash 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+module WBFLASH(
+    input             wb_clk_i,
+    input             wb_rst_i,
+    
+    input      [63:0] wb_dat_i, 
+    output     [63:0] wb_dat_o, 
+    input      [63:0] wb_adr_i, 
+    input      [ 7:0] wb_sel_i, 
+    input             wb_we_i, 
+    input             wb_cyc_i, 
+    input             wb_stb_i, 
+    output reg        wb_ack_o, 
+    output            wb_err_o, 
+    output            wb_rty_o, 
+    input             wb_cab_i,
+     
+    input      [63:0] wb1_dat_i, 
+    output     [63:0] wb1_dat_o, 
+    input      [63:0] wb1_adr_i, 
+    input      [ 7:0] wb1_sel_i, 
+    input             wb1_we_i, 
+    input             wb1_cyc_i, 
+    input             wb1_stb_i, 
+    output reg        wb1_ack_o, 
+    output            wb1_err_o, 
+    output            wb1_rty_o, 
+    input             wb1_cab_i,
+
+    output reg [24:0] flash_addr,
+    input      [15:0] flash_data,
+    output            flash_oen,
+    output            flash_wen,
+    output            flash_cen,
+    input      [ 1:0] flash_rev
+     //output            flash_ldn
+);
+
+assign wb_err_o=0;
+assign wb_rty_o=0;
+
+reg  [1:0] wordcnt;
+reg  [2:0] cyclecnt;
+reg [63:0] wb_dat;
+reg [63:0] wb1_dat;
+reg [63:0] wb_dat_inv;
+reg [63:0] cache_addr;
+reg [63:0] cache_addr1;
+
+always @(posedge wb_clk_i or posedge wb_rst_i)
+   if(wb_rst_i)
+      begin
+         cache_addr<=64'b0;
+         cache_addr1<=64'b0;
+      end
+   else
+      if((!wb_cyc_i || !wb_stb_i) && (!wb1_cyc_i || !wb1_stb_i))
+         begin
+            wordcnt<=2'b00;
+            cyclecnt<=3'b000;
+            wb_ack_o<=0;
+            wb1_ack_o<=0;
+         end
+      else
+         if(wb_stb_i)
+            if(wb_adr_i==cache_addr)
+               wb_ack_o<=1;
+            else
+               if(cyclecnt!=3'b111)
+                  cyclecnt<=cyclecnt+1;
+               else
+                  begin
+                     cyclecnt<=0;
+                     case(wordcnt)
+                        2'b00:wb_dat[63:48]<={flash_data[7:0],flash_data[15:8]};
+                        2'b01:wb_dat[47:32]<={flash_data[7:0],flash_data[15:8]};
+                        2'b10:wb_dat[31:16]<={flash_data[7:0],flash_data[15:8]};
+                        2'b11:wb_dat[15: 0]<={flash_data[7:0],flash_data[15:8]};
+                     endcase
+                     if(wordcnt!=2'b11)
+                        wordcnt<=wordcnt+1;
+                     else
+                        begin
+                           wb_ack_o<=1;
+                           cache_addr<=wb_adr_i;
+                        end
+                  end      
+         else
+            if(wb1_adr_i==cache_addr1)
+               wb1_ack_o<=1;
+            else
+               if(cyclecnt!=3'b111)
+                  cyclecnt<=cyclecnt+1;
+               else
+                   begin
+                      cyclecnt<=0;
+                      case(wordcnt)
+                         2'b00:wb1_dat[63:48]<={flash_data[7:0],flash_data[15:8]};
+                         2'b01:wb1_dat[47:32]<={flash_data[7:0],flash_data[15:8]};
+                         2'b10:wb1_dat[31:16]<={flash_data[7:0],flash_data[15:8]};
+                         2'b11:wb1_dat[15: 0]<={flash_data[7:0],flash_data[15:8]};
+                      endcase
+                      if(wordcnt!=2'b11)
+                         wordcnt<=wordcnt+1;
+                      else
+                         begin
+                            wb1_ack_o<=1;
+                            cache_addr1<=wb1_adr_i;
+                         end
+                   end      
+
+assign wb_dat_o=wb_dat;
+assign wb1_dat_o=wb1_dat;
+
+wire [1:0] flash_rev_d;
+
+assign flash_rev_d=wb_rst_i ? flash_rev:flash_rev_d;
+
+always @( * )
+   case({wb1_stb_i,flash_rev_d})
+      3'b000:flash_addr<={wb_adr_i[25:3],wordcnt}+25'h0000000;
+      3'b001:flash_addr<={wb_adr_i[25:3],wordcnt}+25'h0100000;
+      3'b010:flash_addr<={wb_adr_i[25:3],wordcnt}+25'h0200000;
+      3'b011:flash_addr<={wb_adr_i[25:3],wordcnt}+25'h0300000;
+      3'b100:flash_addr<={wb1_adr_i[25:3],wordcnt}+25'h0400000;
+      3'b101:flash_addr<={wb1_adr_i[25:3],wordcnt}+25'h0400000;
+      3'b110:flash_addr<={wb1_adr_i[25:3],wordcnt}+25'h0400000;
+      3'b111:flash_addr<={wb1_adr_i[25:3],wordcnt}+25'h0400000;
+   endcase
+
+assign flash_oen=((wb_cyc_i && wb_stb_i) || (wb1_cyc_i && wb1_stb_i) ? 0:1);
+assign flash_wen=1;
+assign flash_cen=0;
+
+endmodule
Index: /trunk/T1-CPU/ffu/sparc_ffu_dp.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu_dp.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu_dp.v	(revision 6)
@@ -0,0 +1,541 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu_dp
+//	Description: This is the ffu datapath.  It stores the 2 128 bit operands
+//  	and the result (puts result in the 1st source to save space). 
+*/
+
+`include "iop.h"
+
+module sparc_ffu_dp (/*AUTOARG*/
+   // Outputs
+   so, dp_frf_data, ffu_lsu_data, dp_vis_rs1_data, dp_vis_rs2_data, 
+   dp_ctl_rs2_sign, dp_ctl_fsr_fcc, dp_ctl_fsr_rnd, dp_ctl_fsr_tem, 
+   dp_ctl_fsr_aexc, dp_ctl_fsr_cexc, dp_ctl_ld_fcc, 
+   dp_ctl_gsr_mask_e, dp_ctl_gsr_scale_e, dp_ctl_synd_out_low, 
+   dp_ctl_synd_out_high, 
+   // Inputs
+   rclk, se, si, ctl_dp_rst_l, frf_dp_data, cpx_fpu_data, 
+   lsu_ffu_ld_data, vis_dp_rd_data, ctl_dp_wsr_data_w2, ctl_dp_sign, 
+   ctl_dp_exc_w2, ctl_dp_fcc_w2, ctl_dp_ftt_w2, ctl_dp_noshift64_frf, 
+   ctl_dp_shift_frf_right, ctl_dp_shift_frf_left, 
+   ctl_dp_zero_low32_frf, ctl_dp_output_sel_rs1, 
+   ctl_dp_output_sel_rs2, ctl_dp_output_sel_frf, 
+   ctl_dp_output_sel_fsr, ctl_dp_noflip_lsu, ctl_dp_flip_lsu, 
+   ctl_dp_noflip_fpu, ctl_dp_flip_fpu, ctl_dp_rs2_frf_read, 
+   ctl_dp_rs2_sel_vis, ctl_dp_rs2_sel_fpu_lsu, ctl_dp_rs2_keep_data, 
+   ctl_dp_rd_ecc, ctl_dp_fp_thr, ctl_dp_fsr_sel_old, 
+   ctl_dp_fsr_sel_ld, ctl_dp_fsr_sel_fpu, ctl_dp_gsr_wsr_w2, 
+   ctl_dp_thr_e, ctl_dp_new_rs1, ctl_dp_ecc_sel_frf
+   ) ;
+   input rclk;
+   input se;
+   input si;
+   input ctl_dp_rst_l;
+   input [77:0] frf_dp_data;
+   input [63:0]   cpx_fpu_data;
+   input [63:0] lsu_ffu_ld_data;
+   input [63:0] vis_dp_rd_data;
+
+   input [36:0] ctl_dp_wsr_data_w2;
+
+   input [1:0]   ctl_dp_sign;  // sign after abs or neg
+   input [9:0] 	 ctl_dp_exc_w2;
+   input [7:0] 	 ctl_dp_fcc_w2;
+   input [2:0] 	 ctl_dp_ftt_w2;
+
+   // mux selects
+   input         ctl_dp_noshift64_frf;     // choose output from FRF
+   input         ctl_dp_shift_frf_right;
+   input         ctl_dp_shift_frf_left;
+
+   input         ctl_dp_zero_low32_frf;
+   
+   input         ctl_dp_output_sel_rs1;  // choose output to lsu
+   input         ctl_dp_output_sel_rs2;
+   input         ctl_dp_output_sel_frf;
+   input         ctl_dp_output_sel_fsr;
+
+   input         ctl_dp_noflip_lsu;// inputs from lsu and fpu
+   input         ctl_dp_flip_lsu;
+   input         ctl_dp_noflip_fpu;
+   input         ctl_dp_flip_fpu;
+   
+   input         ctl_dp_rs2_frf_read;    // choose r2
+   input         ctl_dp_rs2_sel_vis;
+   input         ctl_dp_rs2_sel_fpu_lsu;
+   input         ctl_dp_rs2_keep_data;
+   input         ctl_dp_rd_ecc;
+   
+   input [3:0] 	 ctl_dp_fp_thr;
+
+   input [3:0] 	 ctl_dp_fsr_sel_old,     // choose what to update FSR with
+		 ctl_dp_fsr_sel_ld,
+		 ctl_dp_fsr_sel_fpu;
+
+   input [3:0]   ctl_dp_gsr_wsr_w2;
+   input [3:0]   ctl_dp_thr_e;
+   
+
+   // rs1 selects
+   input         ctl_dp_new_rs1;
+   
+   // 2:1 mux selects
+   input      ctl_dp_ecc_sel_frf;
+
+   // outputs
+   output       so;
+   output [63:0] dp_frf_data;
+   output [63:0] ffu_lsu_data;
+   output [63:0] dp_vis_rs1_data;
+   output [63:0] dp_vis_rs2_data;
+   output [1:0]  dp_ctl_rs2_sign;  // sign for rs2
+   output [7:0]  dp_ctl_fsr_fcc;
+   output [1:0]  dp_ctl_fsr_rnd;
+   output [4:0]  dp_ctl_fsr_tem;
+   output [4:0]  dp_ctl_fsr_aexc;
+   output [4:0]  dp_ctl_fsr_cexc;
+   
+   output [7:0]  dp_ctl_ld_fcc;
+
+   output [31:0] dp_ctl_gsr_mask_e;
+   output [4:0]  dp_ctl_gsr_scale_e;
+   
+   
+   output [6:0] dp_ctl_synd_out_low;   // signals for ecc errors
+   output [6:0] dp_ctl_synd_out_high;
+ 
+   wire         clk;
+   wire         reset;
+   // local signals
+   wire [63:0] 	 fpu_ffu_data;
+   wire [63:0]   lsu_ffu_ld_data_d1;
+   wire [63:0]   rs2_rd_data;  // stores both the rs2 and rd data
+   wire [63:0]   rs2_rd_data_next;
+   wire [63:0]   write_data;    // needed since block loads are pipelined
+   wire [63:0]   rs2_data_changed;
+   wire [63:0]   local_rd_data;
+   wire [63:0]   rs1_data;
+   wire [63:0]   rs1_data_next;
+   wire [63:0]   shifted_frf_data;
+   wire [63:0]   new_frf_data;
+   wire [63:0]   lsu_fpu_data;
+   wire [63:0]   frf_data_in;
+   wire [6:0]    synd_in_low;   // input ecc for lower word
+   wire [6:0]    synd_in_h;   // input ecc for upper word
+   wire [63:0] corr_data_next;
+   wire [63:0] corr_data;
+   wire [63:0] ecc_data_in;
+
+   wire [27:0] 	 current_fsr,
+		 t0_fsr,
+		 t1_fsr,
+		 t2_fsr,
+		 t3_fsr;
+   wire [27:0] 	 t0_fsr_nxt,
+		 t1_fsr_nxt,
+		 t2_fsr_nxt,
+		 t3_fsr_nxt;
+   wire [27:0] 	 t0_ldfsr_data,
+		             t0_fpufsr_data;
+   wire [27:0] 	 t1_ldfsr_data,
+		             t1_fpufsr_data;
+   wire [27:0] 	 t2_ldfsr_data,
+		             t2_fpufsr_data;
+   wire [27:0] 	 t3_ldfsr_data,
+		             t3_fpufsr_data;
+
+   wire [36:0]   gsr_e;
+   wire [36:0]   t0_gsr;
+   wire [36:0]   t0_gsr_nxt;
+   wire [36:0]   t1_gsr;
+   wire [36:0]   t1_gsr_nxt;
+   wire [36:0]   t2_gsr;
+   wire [36:0]   t2_gsr_nxt;
+   wire [36:0]   t3_gsr;
+   wire [36:0]   t3_gsr_nxt;
+
+   assign        reset = ~ctl_dp_rst_l;
+   assign        clk= rclk;
+
+   dff_s #(64) cpx_reg(.din(cpx_fpu_data[63:0]),
+			               .q   (fpu_ffu_data[63:0]),
+			               .clk (clk), .se(se), .si(), .so());
+
+   // flop for lsu data.  the data is flopped in ffu, but the vld is flopped in the lsu.
+   // This is for timing reasons on the valid bit and Sanjay didn't want to redo the
+   // lsu dp for the data portion
+   dff_s #(64) lsu_data_dff(.din(lsu_ffu_ld_data[63:0]), .clk(clk), .q(lsu_ffu_ld_data_d1[63:0]),
+                          .se(se), .si(), .so());
+   assign dp_ctl_ld_fcc[7:0] = {lsu_ffu_ld_data_d1[37:32], lsu_ffu_ld_data_d1[11:10]};
+   
+   ///////////////////////////////////////////////
+   //	Input from FRF (shift as needed for singles)
+   //	The data needs to be shifted around because these are 64 bit reads but
+   //	the required data might be in either the upper or lower 32 bits for
+   //	singles.  If it is a double then the data is left alone.
+   //	If it is a single move and the source and target have the same alignment
+   //	then no change happens.  If it is a single move and the source and target
+   //	have different alignments the operands get moved into place for the write.
+   //	If it is data that will be sent to the lsu the data is moved into the lower
+   //	32 bits.  If the data will be sent to the fpu the data is moved to the upper
+   //	32 bits (if not there already)
+   ///////////////////////////////////////////////
+   assign  frf_data_in[63:32] = frf_dp_data[70:39];
+   assign  frf_data_in[31:0] = frf_dp_data[31:0];
+   mux3ds #(64) frf_input_mux(.dout(shifted_frf_data[63:0]),
+                              .in0(frf_data_in[63:0]),
+                              .in1({32'b0, frf_data_in[63:32]}),
+                              .in2({frf_data_in[31:0], 32'b0}),
+                              .sel0(ctl_dp_noshift64_frf),
+                              .sel1(ctl_dp_shift_frf_right),
+                              .sel2(ctl_dp_shift_frf_left));
+   assign  new_frf_data[63:32] = shifted_frf_data[63:32];
+   assign  new_frf_data[31:0] = shifted_frf_data[31:0] & {32{~ctl_dp_zero_low32_frf}};
+
+   mux4ds #(64) lsu_fpu_input_mux(.dout(lsu_fpu_data[63:0]),
+                                  .in0(lsu_ffu_ld_data_d1[63:0]),
+                                  .in1({lsu_ffu_ld_data_d1[31:0], 32'b0}),
+                                  .in2(fpu_ffu_data[63:0]),
+                                  .in3({32'b0, fpu_ffu_data[63:32]}),
+                                  .sel0(ctl_dp_noflip_lsu),
+                                  .sel1(ctl_dp_flip_lsu),
+                                  .sel2(ctl_dp_noflip_fpu),
+                                  .sel3(ctl_dp_flip_fpu));
+
+   // Data to FRF
+   dp_buffer #(64) frf_out_buf(.in(write_data[63:0]), .dout (dp_frf_data[63:0]));
+
+
+
+   // Data to LSU
+   // Mux for lsu data between two sets of data and the direct 
+   // frf output for stores
+   mux4ds #(64) output_mux(.dout (ffu_lsu_data[63:0]),
+                           .in0  (rs2_rd_data[63:0]),
+                           .in1  (rs1_data[63:0]),
+                           .in2  (shifted_frf_data[63:0]),
+			                     .in3  ({26'b0, current_fsr[27:20], 2'b0, current_fsr[19:15], 6'b0, current_fsr[14:12], 2'b0, current_fsr[11:0]}),
+                           .sel0 (ctl_dp_output_sel_rs2),
+                           .sel1 (ctl_dp_output_sel_rs1),
+                           .sel2 (ctl_dp_output_sel_frf),
+			                     .sel3 (ctl_dp_output_sel_fsr));
+
+   // RS2 can take value from frf  (with modification to sign), from lsu 
+   // or keep value
+   // The modification to the sign bits allows for FABS and FNEG
+   assign dp_ctl_rs2_sign[1:0] = {new_frf_data[63], new_frf_data[31]};
+   
+   assign rs2_data_changed[63:0] = {ctl_dp_sign[1], new_frf_data[62:32], 
+				    ctl_dp_sign[0], new_frf_data[30:0]};
+
+   dp_mux2es #(64) local_rd_mux(.dout(local_rd_data[63:0]),
+                             .in0(rs2_data_changed[63:0]),
+                             .in1(corr_data[63:0]),
+                             .sel(ctl_dp_rd_ecc));
+
+   mux4ds #(64) rs2_rd_mux(.dout (rs2_rd_data_next[63:0]),
+                           .in0  (local_rd_data[63:0]),
+                           .in1  (vis_dp_rd_data[63:0]),
+                           .in2  (lsu_fpu_data[63:0]),
+                           .in3  (rs2_rd_data[63:0]),
+                           .sel0 (ctl_dp_rs2_frf_read),
+                           .sel1 (ctl_dp_rs2_sel_vis),
+                           .sel2 (ctl_dp_rs2_sel_fpu_lsu),
+                           .sel3 (ctl_dp_rs2_keep_data));
+   
+   dff_s #(64) rs2_rd_dff(.din (rs2_rd_data_next[63:0]), 
+			                  .q   (rs2_rd_data[63:0]),
+			                  .clk (clk), .se(se), .si(), .so());
+   assign dp_vis_rs2_data[63:0] = rs2_rd_data[63:0];
+   dff_s #(64) write_data_dff(.din(rs2_rd_data[63:0]),
+                            .q(write_data[63:0]),
+                            .clk(clk), .se(se), .si(), .so());
+   
+   ////////////////////////////////////////////////////////
+   //			RS1
+   ////////////////////////////////////////////////////////
+   // RS1 next either takes value from frf or keeps value
+   dp_mux2es #(64) rs1_mux(.dout (rs1_data_next[63:0]),
+                           .in0  (rs1_data[63:0]),
+                           .in1  (new_frf_data[63:0]),
+                           .sel  (ctl_dp_new_rs1));
+   
+   dff_s #(64) rs1_dff(.din (rs1_data_next[63:0]), 
+		                 .q   (rs1_data[63:0]),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign dp_vis_rs1_data[63:0] = rs1_data[63:0];
+
+
+   /////////////////////////////////////////////////////////
+   //			FSR
+   /////////////////////////////////////////////////////////
+   // FSR takes data from load
+   // fsr is set by ldfsr, ldxfsr, or any fpu operation
+   assign t0_ldfsr_data[27:0] = {ctl_dp_fcc_w2[7:2],     // fcc3,2,1
+			                           lsu_ffu_ld_data_d1[31:30],   // RND mode
+			                           //2'b0,                     // rsvd
+			                           lsu_ffu_ld_data_d1[27:23],   // TEM
+			                           //6'b0,                     // NS, rsvd, ver
+			                           t0_fsr[14:12],       // ftt
+			                           //2'b0,                     // qne, rsvd
+			                           lsu_ffu_ld_data_d1[11:0]};   // fcc0, aexc, cexc
+   
+   assign t0_fpufsr_data[27:0] = {ctl_dp_fcc_w2[7:2],
+			                            t0_fsr[21:20],  // rnd
+			                            t0_fsr[19:15],  // TEM
+			                            ctl_dp_ftt_w2[2:0],  // ftt
+			                            ctl_dp_fcc_w2[1:0],
+			                            ctl_dp_exc_w2[9:0]};
+
+   assign t1_ldfsr_data[27:0] = {ctl_dp_fcc_w2[7:2],     // fcc3,2,1
+			                           lsu_ffu_ld_data_d1[31:30],   // RND mode
+			                           //2'b0,                     // rsvd
+			                           lsu_ffu_ld_data_d1[27:23],   // TEM
+			                           //6'b0,                     // NS, rsvd, ver
+			                           t1_fsr[14:12],       // ftt
+			                           //2'b0,                     // qne, rsvd
+			                           lsu_ffu_ld_data_d1[11:0]};   // fcc0, aexc, cexc
+   
+   assign t1_fpufsr_data[27:0] = {ctl_dp_fcc_w2[7:2],
+			                            t1_fsr[21:20],  // rnd
+			                            t1_fsr[19:15],  // TEM
+			                            ctl_dp_ftt_w2[2:0],  // ftt
+			                            ctl_dp_fcc_w2[1:0],
+			                            ctl_dp_exc_w2[9:0]};
+
+   assign t2_ldfsr_data[27:0] = {ctl_dp_fcc_w2[7:2],     // fcc3,2,1
+			                           lsu_ffu_ld_data_d1[31:30],   // RND mode
+			                           //2'b0,                     // rsvd
+			                           lsu_ffu_ld_data_d1[27:23],   // TEM
+			                           //6'b0,                     // NS, rsvd, ver
+			                           t2_fsr[14:12],       // ftt
+			                           //2'b0,                     // qne, rsvd
+			                           lsu_ffu_ld_data_d1[11:0]};   // fcc0, aexc, cexc
+   
+   assign t2_fpufsr_data[27:0] = {ctl_dp_fcc_w2[7:2],
+			                            t2_fsr[21:20],  // rnd
+			                            t2_fsr[19:15],  // TEM
+			                            ctl_dp_ftt_w2[2:0],  // ftt
+			                            ctl_dp_fcc_w2[1:0],
+			                            ctl_dp_exc_w2[9:0]};
+
+   assign t3_ldfsr_data[27:0] = {ctl_dp_fcc_w2[7:2],     // fcc3,2,1
+			                           lsu_ffu_ld_data_d1[31:30],   // RND mode
+			                           //2'b0,                     // rsvd
+			                           lsu_ffu_ld_data_d1[27:23],   // TEM
+			                           //6'b0,                     // NS, rsvd, ver
+			                           t3_fsr[14:12],       // ftt
+			                           //2'b0,                     // qne, rsvd
+			                           lsu_ffu_ld_data_d1[11:0]};   // fcc0, aexc, cexc
+   
+   assign t3_fpufsr_data[27:0] = {ctl_dp_fcc_w2[7:2],
+			                            t3_fsr[21:20],  // rnd
+			                            t3_fsr[19:15],  // TEM
+			                            ctl_dp_ftt_w2[2:0],  // ftt
+			                            ctl_dp_fcc_w2[1:0],
+			                            ctl_dp_exc_w2[9:0]};
+
+`ifdef FPGA_SYN_1THREAD
+   
+   mux3ds #28 fsr0_mux(.dout (t0_fsr_nxt[27:0]),
+		       .in0  (t0_fsr[27:0]),
+		       .in1  (t0_ldfsr_data[27:0]),
+		       .in2  (t0_fpufsr_data[27:0]),
+		       .sel0 (ctl_dp_fsr_sel_old[0]),
+		       .sel1 (ctl_dp_fsr_sel_ld[0]),
+		       .sel2 (ctl_dp_fsr_sel_fpu[0]));   
+   // FSR registers
+   // need only 28 flops for FSR since rest are always 0
+   dffr_s #28 fsr0_reg(.din (t0_fsr_nxt[27:0]),
+		                 .q   (t0_fsr[27:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign current_fsr[27:0] = t0_fsr[27:0];
+   
+`else
+   
+   mux3ds #28 fsr0_mux(.dout (t0_fsr_nxt[27:0]),
+		       .in0  (t0_fsr[27:0]),
+		       .in1  (t0_ldfsr_data[27:0]),
+		       .in2  (t0_fpufsr_data[27:0]),
+		       .sel0 (ctl_dp_fsr_sel_old[0]),
+		       .sel1 (ctl_dp_fsr_sel_ld[0]),
+		       .sel2 (ctl_dp_fsr_sel_fpu[0]));
+   mux3ds #28 fsr1_mux(.dout (t1_fsr_nxt[27:0]),
+		       .in0  (t1_fsr[27:0]),
+		       .in1  (t1_ldfsr_data[27:0]),
+		       .in2  (t1_fpufsr_data[27:0]),
+		       .sel0 (ctl_dp_fsr_sel_old[1]),
+		       .sel1 (ctl_dp_fsr_sel_ld[1]),
+		       .sel2 (ctl_dp_fsr_sel_fpu[1]));
+   mux3ds #28 fsr2_mux(.dout (t2_fsr_nxt[27:0]),
+		       .in0  (t2_fsr[27:0]),
+		       .in1  (t2_ldfsr_data[27:0]),
+		       .in2  (t2_fpufsr_data[27:0]),
+		       .sel0 (ctl_dp_fsr_sel_old[2]),
+		       .sel1 (ctl_dp_fsr_sel_ld[2]),
+		       .sel2 (ctl_dp_fsr_sel_fpu[2]));
+   mux3ds #28 fsr3_mux(.dout (t3_fsr_nxt[27:0]),
+		       .in0  (t3_fsr[27:0]),
+		       .in1  (t3_ldfsr_data[27:0]),
+		       .in2  (t3_fpufsr_data[27:0]),
+		       .sel0 (ctl_dp_fsr_sel_old[3]),
+		       .sel1 (ctl_dp_fsr_sel_ld[3]),
+		       .sel2 (ctl_dp_fsr_sel_fpu[3]));
+   
+   // FSR registers
+   // need only 28 flops for FSR since rest are always 0
+   dffr_s #28 fsr0_reg(.din (t0_fsr_nxt[27:0]),
+		                 .q   (t0_fsr[27:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #28 fsr1_reg(.din (t1_fsr_nxt[27:0]),
+		                 .q   (t1_fsr[27:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #28 fsr2_reg(.din (t2_fsr_nxt[27:0]),
+		                 .q   (t2_fsr[27:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #28 fsr3_reg(.din (t3_fsr_nxt[27:0]),
+		                 .q   (t3_fsr[27:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   // Current FSR
+   mux4ds #28 curr_fsr_mux(.dout (current_fsr[27:0]),
+			   .in0  (t0_fsr[27:0]),
+			   .in1  (t1_fsr[27:0]),
+			   .in2  (t2_fsr[27:0]),
+			   .in3  (t3_fsr[27:0]),
+			   .sel0 (ctl_dp_fp_thr[0]),
+			   .sel1 (ctl_dp_fp_thr[1]),
+			   .sel2 (ctl_dp_fp_thr[2]),
+			   .sel3 (ctl_dp_fp_thr[3]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   assign dp_ctl_fsr_fcc = {current_fsr[27:22], current_fsr[11:10]};
+   assign dp_ctl_fsr_rnd = current_fsr[21:20];
+   assign dp_ctl_fsr_tem = current_fsr[19:15];
+   assign dp_ctl_fsr_aexc = current_fsr[9:5];
+   assign dp_ctl_fsr_cexc = current_fsr[4:0];
+   
+   ////////////////////////////////////////////////////////////
+   //		ECC generation and correction
+   ////////////////////////////////////////////////////////////
+   dp_mux2es #(64) ecc_mux(.dout(ecc_data_in[63:0]),
+                           .in0(rs2_rd_data[63:0]),
+                           .in1({frf_dp_data[70:39], frf_dp_data[31:0]}),
+                           .sel(ctl_dp_ecc_sel_frf));
+
+   assign     synd_in_low[6:0] = {7{ctl_dp_ecc_sel_frf}} & frf_dp_data[38:32];
+   assign     synd_in_h[6:0] = {7{ctl_dp_ecc_sel_frf}} & frf_dp_data[77:71];
+   
+   zzecc_sctag_ecc39 ecccor_low(.din(ecc_data_in[31:0]),
+                                .parity(synd_in_low[6:0]),
+                                .dout(corr_data_next[31:0]),
+                                .pflag(dp_ctl_synd_out_low[6]),
+                                .cflag(dp_ctl_synd_out_low[5:0]));
+   
+   zzecc_sctag_ecc39 ecccor_high(.din(ecc_data_in[63:32]),
+                                .parity(synd_in_h[6:0]),
+                                .dout(corr_data_next[63:32]),
+                                .pflag(dp_ctl_synd_out_high[6]),
+                                .cflag(dp_ctl_synd_out_high[5:0]));
+
+   
+   dff_s #(64) ecc_corr_data(.din(corr_data_next[63:0]), .q(corr_data[63:0]),
+                           .clk(clk), .se(se), .si(), .so());
+
+
+   ////////////////////////////////////////////////
+   // GSR Storage
+   ////////////////////////////////////////////////
+   // GSR registers
+   // need only 37 flops for GSR since rest are always 0
+   // and the align and rnd fields are in the ctl block
+`ifdef FPGA_SYN_1THREAD
+   dffr_s #37 gsr0_reg(.din (t0_gsr_nxt[36:0]),
+		                 .q   (t0_gsr[36:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign     t0_gsr_nxt[36:0] = t0_gsr[36:0];
+   assign     gsr_e[36:0] = t0_gsr[36:0];
+   
+`else
+   
+   dffr_s #37 gsr0_reg(.din (t0_gsr_nxt[36:0]),
+		                 .q   (t0_gsr[36:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #37 gsr1_reg(.din (t1_gsr_nxt[36:0]),
+		                 .q   (t1_gsr[36:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #37 gsr2_reg(.din (t2_gsr_nxt[36:0]),
+		                 .q   (t2_gsr[36:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dffr_s #37 gsr3_reg(.din (t3_gsr_nxt[36:0]),
+		                 .q   (t3_gsr[36:0]),
+                     .rst(reset),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   dp_mux2es #(37) gsr0_mux(.dout(t0_gsr_nxt[36:0]),
+                            .in0(t0_gsr[36:0]),
+                            .in1(ctl_dp_wsr_data_w2[36:0]),
+                            .sel(ctl_dp_gsr_wsr_w2[0]));
+   dp_mux2es #(37) gsr1_mux(.dout(t1_gsr_nxt[36:0]),
+                            .in0(t1_gsr[36:0]),
+                            .in1(ctl_dp_wsr_data_w2[36:0]),
+                            .sel(ctl_dp_gsr_wsr_w2[1]));
+   dp_mux2es #(37) gsr2_mux(.dout(t2_gsr_nxt[36:0]),
+                            .in0(t2_gsr[36:0]),
+                            .in1(ctl_dp_wsr_data_w2[36:0]),
+                            .sel(ctl_dp_gsr_wsr_w2[2]));
+   dp_mux2es #(37) gsr3_mux(.dout(t3_gsr_nxt[36:0]),
+                            .in0(t3_gsr[36:0]),
+                            .in1(ctl_dp_wsr_data_w2[36:0]),
+                            .sel(ctl_dp_gsr_wsr_w2[3]));
+   
+   
+   // GSR_E
+   mux4ds #37 curr_gsr_mux(.dout (gsr_e[36:0]),
+			   .in0  (t0_gsr[36:0]),
+			   .in1  (t1_gsr[36:0]),
+			   .in2  (t2_gsr[36:0]),
+			   .in3  (t3_gsr[36:0]),
+			   .sel0 (ctl_dp_thr_e[0]),
+			   .sel1 (ctl_dp_thr_e[1]),
+			   .sel2 (ctl_dp_thr_e[2]),
+			   .sel3 (ctl_dp_thr_e[3]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   assign     dp_ctl_gsr_scale_e[4:0] = gsr_e[4:0];
+   assign     dp_ctl_gsr_mask_e[31:0] = gsr_e[36:5];
+
+   
+   
+
+endmodule // sparc_ffu_dp
Index: /trunk/T1-CPU/ffu/sparc_ffu_part_add32.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu_part_add32.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu_part_add32.v	(revision 6)
@@ -0,0 +1,48 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu_part_add32.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu_part_add32
+//      Description: This is the ffu VIS adder.  It can do either 
+//				2 16 bit adds or 1 32 bit add.
+*/
+module sparc_ffu_part_add32 (/*AUTOARG*/
+   // Outputs
+   z, 
+   // Inputs
+   a, b, cin, add32
+   ) ;
+   input [31:0] a;
+   input [31:0] b;
+   input        cin;
+   input        add32;
+
+   output [31:0] z;
+
+   wire          cout15; // carry out from lower 16 bit add
+   wire          cin16; // carry in to the upper 16 bit add
+
+   assign        cin16 = (add32)? cout15: cin;
+
+   assign      {cout15, z[15:0]} = a[15:0]+b[15:0]+{15'b0,cin};   
+   assign      z[31:16] = a[31:16]+b[31:16]+{15'b0,cin16};   
+   
+endmodule // sparc_ffu_part_add32
Index: /trunk/T1-CPU/ffu/sparc_ffu_ctl_visctl.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu_ctl_visctl.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu_ctl_visctl.v	(revision 6)
@@ -0,0 +1,509 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu_ctl_visctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu_ctl_visctl
+//	Description: This is the ffu vis control block.
+*/
+module sparc_ffu_ctl_visctl (/*AUTOARG*/
+   // Outputs
+   ctl_vis_sel_add, ctl_vis_sel_log, ctl_vis_sel_align, 
+   ctl_vis_add32, ctl_vis_subtract, ctl_vis_cin, ctl_vis_align0, 
+   ctl_vis_align2, ctl_vis_align4, ctl_vis_align6, ctl_vis_align_odd, 
+   ctl_vis_log_sel_pass, ctl_vis_log_sel_nand, ctl_vis_log_sel_nor, 
+   ctl_vis_log_sel_xor, ctl_vis_log_invert_rs1, 
+   ctl_vis_log_invert_rs2, ctl_vis_log_constant, 
+   ctl_vis_log_pass_const, ctl_vis_log_pass_rs1, 
+   ctl_vis_log_pass_rs2, vis_result, illegal_vis_e, vis_nofrf_e, 
+   visop_m, visop_w_vld, vis_wen_next, fpu_rnd, 
+   ffu_exu_rsr_data_hi_m, ffu_exu_rsr_data_mid_m, 
+   ffu_exu_rsr_data_lo_m, ctl_dp_wsr_data_w2, ctl_dp_gsr_wsr_w2, 
+   ctl_dp_thr_e, 
+   // Inputs
+   clk, se, reset, opf, tid_w2, tid_e, tid, visop_e, kill_w, 
+   ifu_tlu_sraddr_d, exu_ffu_wsr_inst_e, exu_ffu_gsr_align_m, 
+   exu_ffu_gsr_rnd_m, exu_ffu_gsr_mask_m, exu_ffu_gsr_scale_m, 
+   ifu_ffu_rnd_e, dp_ctl_fsr_rnd, flush_w2, thr_match_mw2, 
+   thr_match_ww2, ifu_tlu_inst_vld_w, ue_trap_w3, frs1_e, frs2_e, 
+   frd_e, rollback_c3, rollback_rs2_w2, visop, rollback_rs1_w3, 
+   dp_ctl_gsr_mask_e, dp_ctl_gsr_scale_e
+   ) ;
+   input clk;
+   input se;
+   input reset;
+   input [8:0] opf;
+   input [1:0] tid_w2;
+   input [1:0] tid_e;
+   input [1:0] tid;
+   input      visop_e;
+   input       kill_w;
+   input [6:0] ifu_tlu_sraddr_d; 
+   input       exu_ffu_wsr_inst_e;
+   input [2:0] exu_ffu_gsr_align_m;
+   input [2:0] exu_ffu_gsr_rnd_m;
+   input [31:0] exu_ffu_gsr_mask_m;
+   input [4:0]  exu_ffu_gsr_scale_m;
+   input [2:0] ifu_ffu_rnd_e;
+   input [1:0] dp_ctl_fsr_rnd;
+   input      flush_w2;
+   input      thr_match_mw2;
+   input      thr_match_ww2;
+   input      ifu_tlu_inst_vld_w;
+   input      ue_trap_w3;
+   input [4:0] frs1_e;
+   input [4:0] frs2_e;
+   input [4:0] frd_e;
+   input       rollback_c3;
+   input       rollback_rs2_w2;
+   input       visop;
+   input       rollback_rs1_w3;
+   input [31:0] dp_ctl_gsr_mask_e;
+   input [4:0]  dp_ctl_gsr_scale_e;
+   
+   output      ctl_vis_sel_add;
+   output      ctl_vis_sel_log;
+   output      ctl_vis_sel_align;
+   output      ctl_vis_add32;
+   output      ctl_vis_subtract;
+   output      ctl_vis_cin;
+   output      ctl_vis_align0;
+   output      ctl_vis_align2;
+   output      ctl_vis_align4;
+   output      ctl_vis_align6;
+   output      ctl_vis_align_odd;
+   output      ctl_vis_log_sel_pass;
+   output      ctl_vis_log_sel_nand;
+   output      ctl_vis_log_sel_nor;
+   output      ctl_vis_log_sel_xor;
+   output      ctl_vis_log_invert_rs1;
+   output      ctl_vis_log_invert_rs2;
+   output      ctl_vis_log_constant;
+   output      ctl_vis_log_pass_const;
+   output      ctl_vis_log_pass_rs1;
+   output      ctl_vis_log_pass_rs2;
+   output      vis_result;
+   output      illegal_vis_e;
+   output      vis_nofrf_e;
+   output      visop_m;
+   output      visop_w_vld;
+   output      vis_wen_next;
+   output [1:0] fpu_rnd;
+   output [31:0] ffu_exu_rsr_data_hi_m;
+   output [2:0]  ffu_exu_rsr_data_mid_m;
+   output [7:0] ffu_exu_rsr_data_lo_m;
+
+   output [36:0] ctl_dp_wsr_data_w2;
+   output [3:0] ctl_dp_gsr_wsr_w2;
+   output [3:0] ctl_dp_thr_e;
+   
+   wire         illegal_rs1_e;
+   wire         illegal_rs2_e;
+   wire         illegal_siam_e;
+   wire         rs2_check_nonzero_e;
+   wire         rs1_check_nonzero_e;
+   wire        visop_e;
+   wire        issue_visop_e;
+   wire        visop_m;
+   wire        visop_w;
+   wire        visop_w_vld;
+   wire        visop_w2_vld;
+   wire        visop_w2;
+   wire        visop_w3;
+   wire        visop_w3_vld;
+   wire        add;
+   wire        align;
+   wire        logic;
+   wire        siam;
+   wire        alignaddr;
+
+   wire        opf_log_zero;
+   wire        opf_log_one;
+   wire        opf_log_src1;
+   wire        opf_log_src2;
+   wire        opf_log_not1;
+   wire        opf_log_not2;
+   wire        opf_log_or;
+   wire        opf_log_nor;
+   wire        opf_log_and;
+   wire        opf_log_nand;
+   wire        opf_log_xor;
+   wire        opf_log_xnor;
+   wire        opf_log_ornot1;
+   wire        opf_log_ornot2;
+   wire        opf_log_andnot1;
+   wire        opf_log_andnot2;
+   wire        invert_rs1_next;
+   wire        invert_rs2_next;
+   wire        log_pass_rs1_next;
+   wire        log_pass_rs2_next;
+   wire        log_pass_rs1;
+   wire        log_pass_rs2;
+   
+   wire [2:0] t0_gsr_rnd;
+   wire [2:0] t1_gsr_rnd;
+   wire [2:0] t2_gsr_rnd;
+   wire [2:0] t3_gsr_rnd;
+   wire [2:0] t0_gsr_align;
+   wire [2:0] t1_gsr_align;
+   wire [2:0] t2_gsr_align;
+   wire [2:0] t3_gsr_align;
+   wire [2:0] t0_gsr_rnd_next;
+   wire [2:0] t1_gsr_rnd_next;
+   wire [2:0] t2_gsr_rnd_next;
+   wire [2:0] t3_gsr_rnd_next;
+   wire [2:0] t0_gsr_align_next;
+   wire [2:0] t1_gsr_align_next;
+   wire [2:0] t2_gsr_align_next;
+   wire [2:0] t3_gsr_align_next;
+   wire [2:0] gsr_rnd_e;
+   wire [2:0] gsr_align_e;
+   wire       t0_rnd_wen_l;
+   wire       t0_gsr_wsr_w2;
+   wire       t0_siam_w2;
+   wire       t0_align_wen_l;
+   wire       t0_alignaddr_w2;
+   wire       t1_rnd_wen_l;
+   wire       t1_gsr_wsr_w2;
+   wire       t1_siam_w2;
+   wire       t1_align_wen_l;
+   wire       t1_alignaddr_w2;
+   wire       t2_rnd_wen_l;
+   wire       t2_gsr_wsr_w2;
+   wire       t2_siam_w2;
+   wire       t2_align_wen_l;
+   wire       t2_alignaddr_w2;
+   wire       t3_rnd_wen_l;
+   wire       t3_gsr_wsr_w2;
+   wire       t3_siam_w2;
+   wire       t3_align_wen_l;
+   wire       t3_alignaddr_w2;
+
+   wire [2:0] siam_rnd;
+   wire [3:0] thr_w2;
+   wire [3:0] ctl_dp_thr_e;
+   wire [3:0] thr_fp;
+   wire       gsr_addr_d;
+   wire       gsr_addr_e;
+   wire       wgsr_e;
+   wire       wgsr_m;
+   wire       wgsr_w;
+   wire       wgsr_vld_m;
+   wire       wgsr_vld_w;
+   wire       wgsr_vld_w2;
+   wire       wgsr_w2;
+   wire [2:0] gsr_rnd;
+   wire [1:0] fpu_rnd_next;
+   wire [2:0]  gsr_align;
+   wire [2:0]  gsr_align_d1;
+
+   wire [2:0] align_addr_data_w2;
+   wire [2:0] wgsr_align_offset_w;
+   wire [2:0] wgsr_rnd_w;
+   wire [2:0] wgsr_align_offset_w2;
+   wire [2:0] wgsr_rnd_w2;
+
+   wire [36:0] wsr_data_m;
+   wire [36:0] wsr_data_w;
+
+
+   //////////////////////////////////////
+   // VIS PIPELINE
+   //------------------------------------
+   // Note: rs2_ce, rs2_ue, rs1_ue will kill vis instruction
+   //       in addition to any traps, etc.
+   //       These are incorporated into the "kill" signals
+   // E: ren rs2
+   // M: ren rs1
+   // W: rs2 data ready, check rs2 ecc
+   // W2: rs1 data ready, check rs1 ecc
+   // W3: execute vis operation (result written to rs2/rd flop)
+   // W4: gen ecc and write to frf
+   
+   dff_s visop_e2m(.din(issue_visop_e), .clk(clk), .q(visop_m), .si(), .so(), .se(se));
+   dff_s visop_m2w(.din(visop_m), .clk(clk), .q(visop_w), .si(), .so(), .se(se));
+   dff_s visop_w2w2(.din(visop_w_vld), .clk(clk), .q(visop_w2), .si(), .so(), .se(se));
+   dff_s visop_w22w3(.din(visop_w2_vld), .clk(clk), .q(visop_w3), .si(), .so(), .se(se));
+
+   assign     issue_visop_e = visop_e | visop & rollback_c3;
+   // only check kills in w since they are accumulated into kill_w
+   assign      visop_w_vld = visop_w & ~kill_w;
+   assign      visop_w2_vld = visop_w2 & ~flush_w2 & ~rollback_rs2_w2;
+   assign      visop_w3_vld = visop_w3 & ~ue_trap_w3 & ~rollback_rs1_w3;
+
+   assign      vis_result = visop_w3_vld;
+   assign      vis_wen_next = vis_result & ~siam & ~alignaddr;
+
+   ////////////////////////////////////
+   // Decode opf
+   ////////////////////////////////////
+   assign      add = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & opf[4] & ~opf[3];
+   assign      align = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & ~opf[4] & opf[3] & ~opf[2] & ~opf[1] & ~opf[0];
+   assign      logic = ~opf[8] & ~opf[7] & opf[6] & opf[5];
+   assign siam = ~opf[8] & opf[7] & ~opf[6] & ~opf[5] & ~opf[4] & ~opf[3] & ~opf[2] & ~opf[1] & opf[0];
+   assign alignaddr = ~opf[8] & ~opf[7] & ~opf[6] & ~opf[5] & opf[4] & opf[3] & ~opf[2] & ~opf[0]; //alignaddress
+
+   assign illegal_vis_e = (visop_e & ~(add | align | logic | siam | alignaddr) | 
+                           illegal_rs1_e | illegal_rs2_e | illegal_siam_e);
+   assign rs1_check_nonzero_e = visop_e & (siam | (logic & (opf_log_zero | opf_log_one | opf_log_src2 | opf_log_not2)));
+   assign rs2_check_nonzero_e = visop_e & logic & (opf_log_zero | opf_log_one | opf_log_src1 | opf_log_not1);
+   assign illegal_rs1_e = (frs1_e[4:0] != 5'b00000) & rs1_check_nonzero_e;
+   assign illegal_rs2_e = (frs2_e[4:0] != 5'b00000) & rs2_check_nonzero_e;
+   assign illegal_siam_e = ((frd_e[4:0] != 5'b00000) | frs2_e[4] | frs2_e[3]) & siam & visop_e;
+
+   assign vis_nofrf_e = visop_e & (siam | alignaddr | opf_log_zero | opf_log_one);
+   
+   // controls for add
+   // Make subtract come out of its own flop for loading purposes (very critical timing)
+   dff_s sub_dff(.din(opf[2]), .clk(clk), .q(ctl_vis_subtract), .se(se), .si(), .so());
+   assign ctl_vis_cin = opf[2];
+   assign ctl_vis_add32 = opf[1];
+
+   // controls for logic
+   assign opf_log_zero = ~opf[4] & ~opf[3] & ~opf[2] & ~opf[1];
+   assign opf_log_nor = ~opf[4] & ~opf[3] & ~opf[2] & opf[1];
+   assign opf_log_andnot2 = ~opf[4] & ~opf[3] & opf[2] & ~opf[1];
+   assign opf_log_not2 = ~opf[4] & ~opf[3] & opf[2] & opf[1];
+   assign opf_log_andnot1 = ~opf[4] & opf[3] & ~opf[2] & ~opf[1];
+   assign opf_log_not1 = ~opf[4] & opf[3] & ~opf[2] & opf[1];
+   assign opf_log_xor = ~opf[4] & opf[3] & opf[2] & ~opf[1];
+   assign opf_log_nand = ~opf[4] & opf[3] & opf[2] & opf[1];
+   assign opf_log_and = opf[4] & ~opf[3] & ~opf[2] & ~opf[1];
+   assign opf_log_xnor = opf[4] & ~opf[3] & ~opf[2] & opf[1];
+   assign opf_log_src1 = opf[4] & ~opf[3] & opf[2] & ~opf[1];
+   assign opf_log_ornot2 = opf[4] & ~opf[3] & opf[2] & opf[1];
+   assign opf_log_src2 = opf[4] & opf[3] & ~opf[2] & ~opf[1];
+   assign opf_log_ornot1 = opf[4] & opf[3] & ~opf[2] & opf[1];
+   assign opf_log_or = opf[4] & opf[3] & opf[2] & ~opf[1];
+   assign opf_log_one = opf[4] & opf[3] & opf[2] & opf[1];
+
+   // selects for logic mux
+   assign ctl_vis_log_sel_nand = opf_log_or | opf_log_nand | opf_log_ornot1 | opf_log_ornot2;
+   assign ctl_vis_log_sel_xor = opf_log_xor | opf_log_xnor;
+   assign ctl_vis_log_sel_nor = opf_log_and | opf_log_nor | opf_log_andnot1 | opf_log_andnot2;
+   assign ctl_vis_log_sel_pass = (opf_log_zero | opf_log_one | opf_log_src1 | opf_log_src2 |
+                                  opf_log_not1 | opf_log_not2);
+
+   assign invert_rs1_next = (opf_log_not1 | opf_log_or | opf_log_and | 
+                                    opf_log_ornot2 | opf_log_andnot2);
+   assign invert_rs2_next = (opf_log_not2 | opf_log_or | opf_log_and | 
+                                    opf_log_ornot1 | opf_log_andnot1 | opf_log_xnor);
+   dff_s invert_rs1_dff(.din(invert_rs1_next), .clk(clk), .q(ctl_vis_log_invert_rs1),
+                      .se(se), .si(), .so());
+   dff_s invert_rs2_dff(.din(invert_rs2_next), .clk(clk), .q(ctl_vis_log_invert_rs2),
+                      .se(se), .si(), .so());
+   // precalculate to help timing
+   assign log_pass_rs1_next = opf_log_src1 | opf_log_not1;
+   assign log_pass_rs2_next = opf_log_src2 | opf_log_not2;
+   dff_s #(2) log_pass_dff(.din({log_pass_rs1_next,log_pass_rs2_next}), .clk(clk),
+                         .q({log_pass_rs1,log_pass_rs2}), .se(se), .si(), .so());
+   
+   assign ctl_vis_log_pass_rs1 = log_pass_rs1;
+   assign ctl_vis_log_pass_rs2 = log_pass_rs2 & ~log_pass_rs1;
+   assign ctl_vis_log_constant = opf_log_one;
+   assign ctl_vis_log_pass_const = ~(ctl_vis_log_pass_rs1 | ctl_vis_log_pass_rs2);
+   
+   // controls for falign
+   assign ctl_vis_align0 = ~gsr_align_d1[2] & ~gsr_align_d1[1];
+   assign ctl_vis_align2 = ~gsr_align_d1[2] & gsr_align_d1[1];
+   assign ctl_vis_align4 = gsr_align_d1[2] & ~gsr_align_d1[1];
+   assign ctl_vis_align6 = gsr_align_d1[2] & gsr_align_d1[1];
+   assign ctl_vis_align_odd = gsr_align_d1[0];
+
+   // controls for output mux
+   assign ctl_vis_sel_add = add;
+   assign ctl_vis_sel_align = align;
+   assign ctl_vis_sel_log = ~(add | align);
+
+   ///////////////////////////////////////////////////////////
+   // GSR.alignaddr_offset, GSR.IM, GSR.IRND
+   ///////////////////////////////////////////////////////////
+
+   mux4ds #(6) curr_gsr_mux(.dout({gsr_rnd[2:0], gsr_align[2:0]}),
+                            .in0({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}),
+                            .in1({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}),
+                            .in2({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}),
+                            .in3({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}),
+                            .sel0(thr_fp[0]),
+                            .sel1(thr_fp[1]),
+                            .sel2(thr_fp[2]),
+                            .sel3(thr_fp[3]));
+   mux4ds #(6) gsr_e_mux(.dout({gsr_rnd_e[2:0], gsr_align_e[2:0]}),
+                            .in0({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}),
+                            .in1({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}),
+                            .in2({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}),
+                            .in3({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}),
+                            .sel0(ctl_dp_thr_e[0]),
+                            .sel1(ctl_dp_thr_e[1]),
+                            .sel2(ctl_dp_thr_e[2]),
+                            .sel3(ctl_dp_thr_e[3]));
+   dff_s #(43) gsr_e2m(.din({dp_ctl_gsr_mask_e[31:0],gsr_rnd_e[2:0],
+                          dp_ctl_gsr_scale_e[4:0],gsr_align_e[2:0]}), .clk(clk),
+                    .q({ffu_exu_rsr_data_hi_m[31:0],ffu_exu_rsr_data_mid_m[2:0], ffu_exu_rsr_data_lo_m[7:0]}),
+                    .se(se), .si(), .so());
+   dff_s #(3) gsr_align_dff(.din(gsr_align[2:0]), .clk(clk), .q(gsr_align_d1[2:0]), .se(se), .si(), .so());
+
+   // put in to help timing for sending to lsu
+   dff_s #(2) fpu_rnd_dff(.din(fpu_rnd_next[1:0]), .clk(clk), .q(fpu_rnd[1:0]), .si(), .so(), .se(se));
+   assign      fpu_rnd_next[1:0] = (gsr_rnd[2])? gsr_rnd[1:0]: dp_ctl_fsr_rnd[1:0];
+
+   // if alignaddress_little then write the 2's complement
+   assign     align_addr_data_w2[2:0] = (opf[1])? (~wgsr_align_offset_w2[2:0] + 3'b001):
+                                                  wgsr_align_offset_w2[2:0];
+   
+   assign     gsr_addr_d = (ifu_tlu_sraddr_d[6:0] == 7'b0010011);
+   assign     wgsr_e = exu_ffu_wsr_inst_e & gsr_addr_e;
+   dff_s gsr_addr_d2e(.din(gsr_addr_d), .clk(clk), .q(gsr_addr_e), .se(se), .si(), .so());
+
+   // need independent kill checks because this isn't killed by new fpop
+   assign     wgsr_vld_m = wgsr_m & ~(thr_match_mw2 & flush_w2);
+   assign     wgsr_vld_w = wgsr_w & ifu_tlu_inst_vld_w & ~(thr_match_ww2 & flush_w2);
+   assign     wgsr_vld_w2 = wgsr_w2 & ~flush_w2;
+   dff_s wgsr_e2m(.din(wgsr_e), .clk(clk), .q(wgsr_m), .si(), .so(), .se(se));
+   dff_s wgsr_m2w(.din(wgsr_vld_m), .clk(clk), .q(wgsr_w), .si(), .so(), .se(se));
+   dff_s wgsr_w2w2(.din(wgsr_vld_w), .clk(clk), .q(wgsr_w2), .si(), .so(), .se(se));
+
+   assign     thr_w2[3] = (tid_w2[1:0] == 2'b11);
+   assign     thr_w2[2] = (tid_w2[1:0] == 2'b10);
+   assign     thr_w2[1] = (tid_w2[1:0] == 2'b01);
+   assign     thr_w2[0] = (tid_w2[1:0] == 2'b00);
+   assign     ctl_dp_thr_e[3] = (tid_e[1:0] == 2'b11);
+   assign     ctl_dp_thr_e[2] = (tid_e[1:0] == 2'b10);
+   assign     ctl_dp_thr_e[1] = (tid_e[1:0] == 2'b01);
+   assign     ctl_dp_thr_e[0] = (tid_e[1:0] == 2'b00);
+   assign     thr_fp[3] = (tid[1:0] == 2'b11);
+   assign     thr_fp[2] = (tid[1:0] == 2'b10);
+   assign     thr_fp[1] = (tid[1:0] == 2'b01);
+   assign     thr_fp[0] = (tid[1:0] == 2'b00);
+   
+   assign     t0_siam_w2 = thr_fp[0] & siam & visop_w2_vld;
+   assign     t0_gsr_wsr_w2 = thr_w2[0] & wgsr_vld_w2;
+   assign     t0_alignaddr_w2 = thr_fp[0] & alignaddr & visop_w2_vld;
+   assign     t0_rnd_wen_l = ~(t0_gsr_wsr_w2 | t0_siam_w2);
+   assign     t0_align_wen_l = ~(t0_gsr_wsr_w2 | t0_alignaddr_w2);
+   assign     t1_siam_w2 = thr_fp[1] & siam & visop_w2_vld;
+   assign     t1_gsr_wsr_w2 = thr_w2[1] & wgsr_vld_w2;
+   assign     t1_alignaddr_w2 = thr_fp[1] & alignaddr & visop_w2_vld;
+   assign     t1_rnd_wen_l = ~(t1_gsr_wsr_w2 | t1_siam_w2);
+   assign     t1_align_wen_l = ~(t1_gsr_wsr_w2 | t1_alignaddr_w2);
+   assign     t2_siam_w2 = thr_fp[2] & siam & visop_w2_vld;
+   assign     t2_gsr_wsr_w2 = thr_w2[2] & wgsr_vld_w2;
+   assign     t2_alignaddr_w2 = thr_fp[2] & alignaddr & visop_w2_vld;
+   assign     t2_rnd_wen_l = ~(t2_gsr_wsr_w2 | t2_siam_w2);
+   assign     t2_align_wen_l = ~(t2_gsr_wsr_w2 | t2_alignaddr_w2);
+   assign     t3_siam_w2 = thr_fp[3] & siam & visop_w2_vld;
+   assign     t3_gsr_wsr_w2 = thr_w2[3] & wgsr_vld_w2;
+   assign     t3_alignaddr_w2 = thr_fp[3] & alignaddr & visop_w2_vld;
+   assign     t3_rnd_wen_l = ~(t3_gsr_wsr_w2 | t3_siam_w2);
+   assign     t3_align_wen_l = ~(t3_gsr_wsr_w2 | t3_alignaddr_w2);
+
+   assign     ctl_dp_gsr_wsr_w2[3:0] = {t3_gsr_wsr_w2,t2_gsr_wsr_w2,t1_gsr_wsr_w2,t0_gsr_wsr_w2};
+
+   // Storage flops and muxes
+   mux3ds #(3) t0_rnd_mux(.dout(t0_gsr_rnd_next[2:0]),
+                          .in0(t0_gsr_rnd[2:0]),
+                          .in1(wgsr_rnd_w2[2:0]),
+                          .in2(siam_rnd[2:0]),
+                          .sel0(t0_rnd_wen_l),
+                          .sel1(t0_gsr_wsr_w2),
+                          .sel2(t0_siam_w2));
+   mux3ds #(3) t0_align_mux(.dout(t0_gsr_align_next[2:0]),
+                            .in0(t0_gsr_align[2:0]),
+                            .in1(wgsr_align_offset_w2[2:0]),
+                            .in2(align_addr_data_w2[2:0]),
+                            .sel0(t0_align_wen_l),
+                            .sel1(t0_gsr_wsr_w2),
+                            .sel2(t0_alignaddr_w2));
+   mux3ds #(3) t1_rnd_mux(.dout(t1_gsr_rnd_next[2:0]),
+                          .in0(t1_gsr_rnd[2:0]),
+                          .in1(wgsr_rnd_w2[2:0]),
+                          .in2(siam_rnd[2:0]),
+                          .sel0(t1_rnd_wen_l),
+                          .sel1(t1_gsr_wsr_w2),
+                          .sel2(t1_siam_w2));
+   mux3ds #(3) t1_align_mux(.dout(t1_gsr_align_next[2:0]),
+                            .in0(t1_gsr_align[2:0]),
+                            .in1(wgsr_align_offset_w2[2:0]),
+                            .in2(align_addr_data_w2[2:0]),
+                            .sel0(t1_align_wen_l),
+                            .sel1(t1_gsr_wsr_w2),
+                            .sel2(t1_alignaddr_w2));
+   mux3ds #(3) t2_rnd_mux(.dout(t2_gsr_rnd_next[2:0]),
+                          .in0(t2_gsr_rnd[2:0]),
+                          .in1(wgsr_rnd_w2[2:0]),
+                          .in2(siam_rnd[2:0]),
+                          .sel0(t2_rnd_wen_l),
+                          .sel1(t2_gsr_wsr_w2),
+                          .sel2(t2_siam_w2));
+   mux3ds #(3) t2_align_mux(.dout(t2_gsr_align_next[2:0]),
+                            .in0(t2_gsr_align[2:0]),
+                            .in1(wgsr_align_offset_w2[2:0]),
+                            .in2(align_addr_data_w2[2:0]),
+                            .sel0(t2_align_wen_l),
+                            .sel1(t2_gsr_wsr_w2),
+                            .sel2(t2_alignaddr_w2));
+   mux3ds #(3) t3_rnd_mux(.dout(t3_gsr_rnd_next[2:0]),
+                          .in0(t3_gsr_rnd[2:0]),
+                          .in1(wgsr_rnd_w2[2:0]),
+                          .in2(siam_rnd[2:0]),
+                          .sel0(t3_rnd_wen_l),
+                          .sel1(t3_gsr_wsr_w2),
+                          .sel2(t3_siam_w2));
+   mux3ds #(3) t3_align_mux(.dout(t3_gsr_align_next[2:0]),
+                            .in0(t3_gsr_align[2:0]),
+                            .in1(wgsr_align_offset_w2[2:0]),
+                            .in2(align_addr_data_w2[2:0]),
+                            .sel0(t3_align_wen_l),
+                            .sel1(t3_gsr_wsr_w2),
+                            .sel2(t3_alignaddr_w2));
+
+
+   dffr_s #(6) t0_gsr_dff(.din({t0_gsr_rnd_next[2:0], t0_gsr_align_next[2:0]}), .clk(clk),
+                       .q({t0_gsr_rnd[2:0], t0_gsr_align[2:0]}), .se(se),
+                       .si(), .so(), .rst(reset));
+   dffr_s #(6) t1_gsr_dff(.din({t1_gsr_rnd_next[2:0], t1_gsr_align_next[2:0]}), .clk(clk),
+                       .q({t1_gsr_rnd[2:0], t1_gsr_align[2:0]}), .se(se),
+                       .si(), .so(), .rst(reset));
+   dffr_s #(6) t2_gsr_dff(.din({t2_gsr_rnd_next[2:0], t2_gsr_align_next[2:0]}), .clk(clk),
+                       .q({t2_gsr_rnd[2:0], t2_gsr_align[2:0]}), .se(se),
+                       .si(), .so(), .rst(reset));
+   dffr_s #(6) t3_gsr_dff(.din({t3_gsr_rnd_next[2:0], t3_gsr_align_next[2:0]}), .clk(clk),
+                       .q({t3_gsr_rnd[2:0], t3_gsr_align[2:0]}), .se(se),
+                       .si(), .so(), .rst(reset));
+
+   dffre_s #(3) siam_rnd_dff(.din(ifu_ffu_rnd_e[2:0]), .clk(clk),
+                          .q(siam_rnd), .se(se), .si(), .so(),
+                           .rst(reset), .en(visop_e));
+   dff_s #(3) align_offset_dff1(.din(exu_ffu_gsr_align_m[2:0]), .clk(clk),
+                             .q(wgsr_align_offset_w[2:0]), .se(se), .si(), .so());
+   dff_s #(3) align_offset_dff2(.din(wgsr_align_offset_w[2:0]), .clk(clk),
+                             .q(wgsr_align_offset_w2[2:0]), .se(se), .si(), .so());
+   dff_s #(3) rnd_dff1(.din(exu_ffu_gsr_rnd_m[2:0]), .clk(clk),
+                      .q(wgsr_rnd_w[2:0]), .se(se), .si(), .so());
+   dff_s #(3) rnd_dff2(.din(wgsr_rnd_w[2:0]), .clk(clk),
+                      .q(wgsr_rnd_w2[2:0]), .se(se), .si(), .so());
+   assign     wsr_data_m[36:0] = {exu_ffu_gsr_mask_m[31:0], exu_ffu_gsr_scale_m[4:0]};
+   dff_s #(37) wsr_data_m2w(.din(wsr_data_m[36:0]), .clk(clk), .q(wsr_data_w[36:0]),
+                          .se(se), .si(), .so());
+   dff_s #(37) wsr_data_w2w2(.din(wsr_data_w[36:0]), .clk(clk), .q(ctl_dp_wsr_data_w2[36:0]),
+                          .se(se), .si(), .so());
+           
+   
+endmodule // sparc_ffu_ctl_visctl
Index: /trunk/T1-CPU/ffu/sparc_ffu.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu.v	(revision 6)
@@ -0,0 +1,501 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu
+//  Description: This is the top level for the floating point frontend unit (ffu).
+//  It instantiates the control (ffu_ctl), datapath (ffu_dp), and register file
+//			(frf).
+*/
+
+`include "iop.h"
+`define FPRET_CMP    69
+`define FPRET_CC_HI  68
+`define FPRET_CC_LO  67
+`define FPRET_EXC_HI 76
+`define FPRET_EXC_LO 72
+
+
+module sparc_ffu (/*AUTOARG*/
+   // Outputs
+   so, ffu_tlu_trap_ue, ffu_tlu_trap_other, ffu_tlu_trap_ieee754, 
+   ffu_tlu_ill_inst_m, ffu_tlu_fpu_tid, ffu_tlu_fpu_cmplt, 
+   ffu_lsu_kill_fst_w, ffu_lsu_fpop_rq_vld, ffu_lsu_blk_st_va_e, 
+   ffu_lsu_blk_st_e, ffu_ifu_tid_w2, ffu_ifu_stallreq, 
+   ffu_ifu_inj_ack, ffu_ifu_fst_ce_w, ffu_ifu_fpop_done_w2, 
+   ffu_ifu_err_synd_w2, ffu_ifu_err_reg_w2, ffu_ifu_ecc_ue_w2, 
+   ffu_ifu_ecc_ce_w2, ffu_ifu_cc_w2, ffu_ifu_cc_vld_w2, ffu_lsu_data, 
+   short_so0, ffu_exu_rsr_data_m, 
+   // Inputs
+   si, sehold, se, rclk, lsu_ffu_stb_full3, lsu_ffu_stb_full2, 
+   lsu_ffu_stb_full1, lsu_ffu_stb_full0, lsu_ffu_ld_vld, 
+   lsu_ffu_ld_data, lsu_ffu_flush_pipe_w, lsu_ffu_blk_asi_e, 
+   lsu_ffu_bld_cnt_w, lsu_ffu_ack, ifu_tlu_sraddr_d, 
+   ifu_tlu_inst_vld_w, ifu_tlu_flush_w, ifu_tlu_flsh_inst_e, 
+   ifu_lsu_ld_inst_e, ifu_ffu_visop_d, ifu_ffu_tid_d, 
+   ifu_ffu_stfsr_d, ifu_ffu_quad_op_e, ifu_ffu_mvcnd_m, 
+   ifu_ffu_ldxfsr_d, ifu_ffu_ldst_single_d, ifu_ffu_ldfsr_d, 
+   ifu_ffu_inj_frferr, ifu_ffu_fst_d, ifu_ffu_frs2_d, ifu_ffu_frs1_d, 
+   ifu_ffu_frd_d, ifu_ffu_fpopcode_d, ifu_ffu_fpop2_d, 
+   ifu_ffu_fpop1_d, ifu_ffu_fld_d, ifu_ffu_fcc_num_d, 
+   ifu_exu_nceen_e, ifu_exu_ecc_mask, ifu_exu_disable_ce_e, grst_l, 
+   exu_ffu_wsr_inst_e, exu_ffu_ist_e, exu_ffu_gsr_scale_m, 
+   exu_ffu_gsr_rnd_m, exu_ffu_gsr_mask_m, exu_ffu_gsr_align_m, 
+   cpx_vld, cpx_req, cpx_fpu_data, cpx_fpexc, cpx_fcmp, cpx_fccval, 
+   arst_l, mux_drive_disable, mem_write_disable, short_si0,
+//sotheas,8/17/04: eco 6529
+   lsu_ffu_st_dtlb_perr_g
+//////////////////////////////////
+   ) ;
+
+   output [80:0]        ffu_lsu_data;           // From dp of sparc_ffu_dp.v, ...
+   output               short_so0;
+
+   input                mux_drive_disable;
+   input                mem_write_disable;
+   input                short_si0;
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input                arst_l;                 // To ctl of sparc_ffu_ctl.v
+   input [1:0]          cpx_fccval;             // To ctl of sparc_ffu_ctl.v
+   input                cpx_fcmp;               // To ctl of sparc_ffu_ctl.v
+   input [4:0]          cpx_fpexc;              // To ctl of sparc_ffu_ctl.v
+   input [63:0]         cpx_fpu_data;           // To dp of sparc_ffu_dp.v
+   input [3:0]          cpx_req;                // To ctl of sparc_ffu_ctl.v
+   input                cpx_vld;                // To ctl of sparc_ffu_ctl.v
+   input [2:0]          exu_ffu_gsr_align_m;    // To ctl of sparc_ffu_ctl.v
+   input [31:0]         exu_ffu_gsr_mask_m;     // To ctl of sparc_ffu_ctl.v
+   input [2:0]          exu_ffu_gsr_rnd_m;      // To ctl of sparc_ffu_ctl.v
+   input [4:0]          exu_ffu_gsr_scale_m;    // To ctl of sparc_ffu_ctl.v
+   input                exu_ffu_ist_e;          // To ctl of sparc_ffu_ctl.v
+   input                exu_ffu_wsr_inst_e;     // To ctl of sparc_ffu_ctl.v
+   input                grst_l;                 // To ctl of sparc_ffu_ctl.v
+   input                ifu_exu_disable_ce_e;   // To ctl of sparc_ffu_ctl.v
+   input [6:0]          ifu_exu_ecc_mask;       // To ctl of sparc_ffu_ctl.v
+   input                ifu_exu_nceen_e;        // To ctl of sparc_ffu_ctl.v
+   input [1:0]          ifu_ffu_fcc_num_d;      // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_fld_d;          // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_fpop1_d;        // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_fpop2_d;        // To ctl of sparc_ffu_ctl.v
+   input [8:0]          ifu_ffu_fpopcode_d;     // To ctl of sparc_ffu_ctl.v
+   input [4:0]          ifu_ffu_frd_d;          // To ctl of sparc_ffu_ctl.v
+   input [4:0]          ifu_ffu_frs1_d;         // To ctl of sparc_ffu_ctl.v
+   input [4:0]          ifu_ffu_frs2_d;         // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_fst_d;          // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_inj_frferr;     // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_ldfsr_d;        // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_ldst_single_d;  // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_ldxfsr_d;       // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_mvcnd_m;        // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_quad_op_e;      // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_stfsr_d;        // To ctl of sparc_ffu_ctl.v
+   input [1:0]          ifu_ffu_tid_d;          // To ctl of sparc_ffu_ctl.v
+   input                ifu_ffu_visop_d;        // To ctl of sparc_ffu_ctl.v
+   input                ifu_lsu_ld_inst_e;      // To ctl of sparc_ffu_ctl.v
+   input                ifu_tlu_flsh_inst_e;    // To ctl of sparc_ffu_ctl.v
+   input                ifu_tlu_flush_w;        // To ctl of sparc_ffu_ctl.v
+   input                ifu_tlu_inst_vld_w;     // To ctl of sparc_ffu_ctl.v
+   input [6:0]          ifu_tlu_sraddr_d;       // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_ack;            // To ctl of sparc_ffu_ctl.v
+   input [2:0]          lsu_ffu_bld_cnt_w;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_blk_asi_e;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_flush_pipe_w;   // To ctl of sparc_ffu_ctl.v
+   input [63:0]         lsu_ffu_ld_data;        // To dp of sparc_ffu_dp.v
+   input                lsu_ffu_ld_vld;         // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_stb_full0;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_stb_full1;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_stb_full2;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_stb_full3;      // To ctl of sparc_ffu_ctl.v
+   input                lsu_ffu_st_dtlb_perr_g; // sotheas,8/17/04: fixed eco 6529, signal to sparc_ffu_ctl.v
+   input                rclk;                   // To frf of bw_r_frf.v, ...
+   input                se;                     // To frf of bw_r_frf.v, ...
+   input                sehold;                 // To frf of bw_r_frf.v
+   input                si;                     // To dp of sparc_ffu_dp.v
+   // End of automatics
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output [3:0]         ffu_ifu_cc_vld_w2;      // From ctl of sparc_ffu_ctl.v
+   output [7:0]         ffu_ifu_cc_w2;          // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_ecc_ce_w2;      // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_ecc_ue_w2;      // From ctl of sparc_ffu_ctl.v
+   output [5:0]         ffu_ifu_err_reg_w2;     // From ctl of sparc_ffu_ctl.v
+   output [13:0]        ffu_ifu_err_synd_w2;    // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_fpop_done_w2;   // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_fst_ce_w;       // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_inj_ack;        // From ctl of sparc_ffu_ctl.v
+   output               ffu_ifu_stallreq;       // From ctl of sparc_ffu_ctl.v
+   output [1:0]         ffu_ifu_tid_w2;         // From ctl of sparc_ffu_ctl.v
+   output               ffu_lsu_blk_st_e;       // From ctl of sparc_ffu_ctl.v
+   output [5:3]         ffu_lsu_blk_st_va_e;    // From ctl of sparc_ffu_ctl.v
+   output               ffu_lsu_fpop_rq_vld;    // From ctl of sparc_ffu_ctl.v
+   output               ffu_lsu_kill_fst_w;     // From ctl of sparc_ffu_ctl.v
+   output               ffu_tlu_fpu_cmplt;      // From ctl of sparc_ffu_ctl.v
+   output [1:0]         ffu_tlu_fpu_tid;        // From ctl of sparc_ffu_ctl.v
+   output               ffu_tlu_ill_inst_m;     // From ctl of sparc_ffu_ctl.v
+   output               ffu_tlu_trap_ieee754;   // From ctl of sparc_ffu_ctl.v
+   output               ffu_tlu_trap_other;     // From ctl of sparc_ffu_ctl.v
+   output               ffu_tlu_trap_ue;        // From ctl of sparc_ffu_ctl.v
+   output               so;                     // From dp of sparc_ffu_dp.v
+   // End of automatics
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire                 ctl_dp_ecc_sel_frf;     // From ctl of sparc_ffu_ctl.v
+   wire [9:0]           ctl_dp_exc_w2;          // From ctl of sparc_ffu_ctl.v
+   wire [7:0]           ctl_dp_fcc_w2;          // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_flip_fpu;        // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_flip_lsu;        // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_fp_thr;          // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_fsr_sel_fpu;     // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_fsr_sel_ld;      // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_fsr_sel_old;     // From ctl of sparc_ffu_ctl.v
+   wire [2:0]           ctl_dp_ftt_w2;          // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_gsr_wsr_w2;      // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_new_rs1;         // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_noflip_fpu;      // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_noflip_lsu;      // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_noshift64_frf;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_output_sel_frf;  // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_output_sel_fsr;  // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_output_sel_rs1;  // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_output_sel_rs2;  // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rd_ecc;          // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rs2_frf_read;    // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rs2_keep_data;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rs2_sel_fpu_lsu; // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rs2_sel_vis;     // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_rst_l;           // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_shift_frf_left;  // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_shift_frf_right; // From ctl of sparc_ffu_ctl.v
+   wire [1:0]           ctl_dp_sign;            // From ctl of sparc_ffu_ctl.v
+   wire [3:0]           ctl_dp_thr_e;           // From ctl of sparc_ffu_ctl.v
+   wire [36:0]          ctl_dp_wsr_data_w2;     // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_dp_zero_low32_frf;  // From ctl of sparc_ffu_ctl.v
+   wire [6:0]           ctl_frf_addr;           // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_frf_ren;            // From ctl of sparc_ffu_ctl.v
+   wire [1:0]           ctl_frf_wen;            // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_add32;          // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_align0;         // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_align2;         // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_align4;         // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_align6;         // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_align_odd;      // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_cin;            // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_constant;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_invert_rs1; // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_invert_rs2; // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_pass_const; // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_pass_rs1;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_pass_rs2;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_sel_nand;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_sel_nor;    // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_sel_pass;   // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_log_sel_xor;    // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_sel_add;        // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_sel_align;      // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_sel_log;        // From ctl of sparc_ffu_ctl.v
+   wire                 ctl_vis_subtract;       // From ctl of sparc_ffu_ctl.v
+   wire [4:0]           dp_ctl_fsr_aexc;        // From dp of sparc_ffu_dp.v
+   wire [4:0]           dp_ctl_fsr_cexc;        // From dp of sparc_ffu_dp.v
+   wire [7:0]           dp_ctl_fsr_fcc;         // From dp of sparc_ffu_dp.v
+   wire [1:0]           dp_ctl_fsr_rnd;         // From dp of sparc_ffu_dp.v
+   wire [4:0]           dp_ctl_fsr_tem;         // From dp of sparc_ffu_dp.v
+   wire [31:0]          dp_ctl_gsr_mask_e;      // From dp of sparc_ffu_dp.v
+   wire [4:0]           dp_ctl_gsr_scale_e;     // From dp of sparc_ffu_dp.v
+   wire [7:0]           dp_ctl_ld_fcc;          // From dp of sparc_ffu_dp.v
+   wire [1:0]           dp_ctl_rs2_sign;        // From dp of sparc_ffu_dp.v
+   wire [6:0]           dp_ctl_synd_out_high;   // From dp of sparc_ffu_dp.v
+   wire [6:0]           dp_ctl_synd_out_low;    // From dp of sparc_ffu_dp.v
+   wire [63:0]          dp_vis_rs1_data;        // From dp of sparc_ffu_dp.v
+   wire [63:0]          dp_vis_rs2_data;        // From dp of sparc_ffu_dp.v
+   wire [77:0]          frf_dp_data;            // From frf of bw_r_frf.v
+   wire [63:0]          vis_dp_rd_data;         // From vis of sparc_ffu_vis.v
+   // End of automatics
+   wire [77:0]          dp_frf_data;
+   
+   output [63:0]        ffu_exu_rsr_data_m;
+   wire [31:0]          ffu_exu_rsr_data_hi_m;
+   wire [2:0]          ffu_exu_rsr_data_mid_m;
+   wire [7:0]          ffu_exu_rsr_data_lo_m;
+
+   wire                short_scan_1;
+   
+   assign              ffu_exu_rsr_data_m[63:0] = {ffu_exu_rsr_data_hi_m[31:0], 4'b0, 
+                                                   ffu_exu_rsr_data_mid_m[2:0], 17'b0,
+                                                   ffu_exu_rsr_data_lo_m[7:0]};
+
+   bw_r_frf frf(
+                .si(short_si0),
+                .so(short_scan_1),
+                .dp_frf_data            (dp_frf_data[77:0]),
+                .rst_tri_en             (mem_write_disable),
+                /*AUTOINST*/
+                // Outputs
+                .frf_dp_data            (frf_dp_data[77:0]),
+                // Inputs
+                .rclk                   (rclk),
+                .se                     (se),
+                .sehold                 (sehold),
+                .ctl_frf_wen            (ctl_frf_wen[1:0]),
+                .ctl_frf_ren            (ctl_frf_ren),
+                .ctl_frf_addr           (ctl_frf_addr[6:0]));
+
+   sparc_ffu_dp dp(
+                   .dp_frf_data         ({dp_frf_data[70:39],dp_frf_data[31:0]}),
+                   /*AUTOINST*/
+                   // Outputs
+                   .so                  (so),
+                   .ffu_lsu_data        (ffu_lsu_data[63:0]),
+                   .dp_vis_rs1_data     (dp_vis_rs1_data[63:0]),
+                   .dp_vis_rs2_data     (dp_vis_rs2_data[63:0]),
+                   .dp_ctl_rs2_sign     (dp_ctl_rs2_sign[1:0]),
+                   .dp_ctl_fsr_fcc      (dp_ctl_fsr_fcc[7:0]),
+                   .dp_ctl_fsr_rnd      (dp_ctl_fsr_rnd[1:0]),
+                   .dp_ctl_fsr_tem      (dp_ctl_fsr_tem[4:0]),
+                   .dp_ctl_fsr_aexc     (dp_ctl_fsr_aexc[4:0]),
+                   .dp_ctl_fsr_cexc     (dp_ctl_fsr_cexc[4:0]),
+                   .dp_ctl_ld_fcc       (dp_ctl_ld_fcc[7:0]),
+                   .dp_ctl_gsr_mask_e   (dp_ctl_gsr_mask_e[31:0]),
+                   .dp_ctl_gsr_scale_e  (dp_ctl_gsr_scale_e[4:0]),
+                   .dp_ctl_synd_out_low (dp_ctl_synd_out_low[6:0]),
+                   .dp_ctl_synd_out_high(dp_ctl_synd_out_high[6:0]),
+                   // Inputs
+                   .rclk                (rclk),
+                   .se                  (se),
+                   .si                  (si),
+                   .ctl_dp_rst_l        (ctl_dp_rst_l),
+                   .frf_dp_data         (frf_dp_data[77:0]),
+                   .cpx_fpu_data        (cpx_fpu_data[63:0]),
+                   .lsu_ffu_ld_data     (lsu_ffu_ld_data[63:0]),
+                   .vis_dp_rd_data      (vis_dp_rd_data[63:0]),
+                   .ctl_dp_wsr_data_w2  (ctl_dp_wsr_data_w2[36:0]),
+                   .ctl_dp_sign         (ctl_dp_sign[1:0]),
+                   .ctl_dp_exc_w2       (ctl_dp_exc_w2[9:0]),
+                   .ctl_dp_fcc_w2       (ctl_dp_fcc_w2[7:0]),
+                   .ctl_dp_ftt_w2       (ctl_dp_ftt_w2[2:0]),
+                   .ctl_dp_noshift64_frf(ctl_dp_noshift64_frf),
+                   .ctl_dp_shift_frf_right(ctl_dp_shift_frf_right),
+                   .ctl_dp_shift_frf_left(ctl_dp_shift_frf_left),
+                   .ctl_dp_zero_low32_frf(ctl_dp_zero_low32_frf),
+                   .ctl_dp_output_sel_rs1(ctl_dp_output_sel_rs1),
+                   .ctl_dp_output_sel_rs2(ctl_dp_output_sel_rs2),
+                   .ctl_dp_output_sel_frf(ctl_dp_output_sel_frf),
+                   .ctl_dp_output_sel_fsr(ctl_dp_output_sel_fsr),
+                   .ctl_dp_noflip_lsu   (ctl_dp_noflip_lsu),
+                   .ctl_dp_flip_lsu     (ctl_dp_flip_lsu),
+                   .ctl_dp_noflip_fpu   (ctl_dp_noflip_fpu),
+                   .ctl_dp_flip_fpu     (ctl_dp_flip_fpu),
+                   .ctl_dp_rs2_frf_read (ctl_dp_rs2_frf_read),
+                   .ctl_dp_rs2_sel_vis  (ctl_dp_rs2_sel_vis),
+                   .ctl_dp_rs2_sel_fpu_lsu(ctl_dp_rs2_sel_fpu_lsu),
+                   .ctl_dp_rs2_keep_data(ctl_dp_rs2_keep_data),
+                   .ctl_dp_rd_ecc       (ctl_dp_rd_ecc),
+                   .ctl_dp_fp_thr       (ctl_dp_fp_thr[3:0]),
+                   .ctl_dp_fsr_sel_old  (ctl_dp_fsr_sel_old[3:0]),
+                   .ctl_dp_fsr_sel_ld   (ctl_dp_fsr_sel_ld[3:0]),
+                   .ctl_dp_fsr_sel_fpu  (ctl_dp_fsr_sel_fpu[3:0]),
+                   .ctl_dp_gsr_wsr_w2   (ctl_dp_gsr_wsr_w2[3:0]),
+                   .ctl_dp_thr_e        (ctl_dp_thr_e[3:0]),
+                   .ctl_dp_new_rs1      (ctl_dp_new_rs1),
+                   .ctl_dp_ecc_sel_frf  (ctl_dp_ecc_sel_frf));
+
+   sparc_ffu_ctl ctl(
+                     .si(short_scan_1),
+                     .so                (short_so0),
+                     .ffu_exu_rsr_data_hi_m(ffu_exu_rsr_data_hi_m[31:0]),
+                     .ffu_exu_rsr_data_lo_m(ffu_exu_rsr_data_lo_m[7:0]),
+                     .ffu_exu_rsr_data_mid_m(ffu_exu_rsr_data_mid_m[2:0]),
+                     .ctl_frf_write_synd({dp_frf_data[77:71],dp_frf_data[38:32]}),
+                     .rst_tri_en        (mux_drive_disable),
+                     /*AUTOINST*/
+                     // Outputs
+                     .ctl_dp_gsr_wsr_w2 (ctl_dp_gsr_wsr_w2[3:0]),
+                     .ctl_dp_thr_e      (ctl_dp_thr_e[3:0]),
+                     .ctl_dp_wsr_data_w2(ctl_dp_wsr_data_w2[36:0]),
+                     .ctl_vis_add32     (ctl_vis_add32),
+                     .ctl_vis_align0    (ctl_vis_align0),
+                     .ctl_vis_align2    (ctl_vis_align2),
+                     .ctl_vis_align4    (ctl_vis_align4),
+                     .ctl_vis_align6    (ctl_vis_align6),
+                     .ctl_vis_align_odd (ctl_vis_align_odd),
+                     .ctl_vis_cin       (ctl_vis_cin),
+                     .ctl_vis_log_constant(ctl_vis_log_constant),
+                     .ctl_vis_log_invert_rs1(ctl_vis_log_invert_rs1),
+                     .ctl_vis_log_invert_rs2(ctl_vis_log_invert_rs2),
+                     .ctl_vis_log_pass_const(ctl_vis_log_pass_const),
+                     .ctl_vis_log_pass_rs1(ctl_vis_log_pass_rs1),
+                     .ctl_vis_log_pass_rs2(ctl_vis_log_pass_rs2),
+                     .ctl_vis_log_sel_nand(ctl_vis_log_sel_nand),
+                     .ctl_vis_log_sel_nor(ctl_vis_log_sel_nor),
+                     .ctl_vis_log_sel_pass(ctl_vis_log_sel_pass),
+                     .ctl_vis_log_sel_xor(ctl_vis_log_sel_xor),
+                     .ctl_vis_sel_add   (ctl_vis_sel_add),
+                     .ctl_vis_sel_align (ctl_vis_sel_align),
+                     .ctl_vis_sel_log   (ctl_vis_sel_log),
+                     .ctl_vis_subtract  (ctl_vis_subtract),
+                     .ctl_dp_rst_l      (ctl_dp_rst_l),
+                     .ffu_ifu_fpop_done_w2(ffu_ifu_fpop_done_w2),
+                     .ffu_ifu_cc_vld_w2 (ffu_ifu_cc_vld_w2[3:0]),
+                     .ffu_ifu_cc_w2     (ffu_ifu_cc_w2[7:0]),
+                     .ffu_ifu_tid_w2    (ffu_ifu_tid_w2[1:0]),
+                     .ffu_ifu_stallreq  (ffu_ifu_stallreq),
+                     .ffu_ifu_ecc_ce_w2 (ffu_ifu_ecc_ce_w2),
+                     .ffu_ifu_ecc_ue_w2 (ffu_ifu_ecc_ue_w2),
+                     .ffu_ifu_err_reg_w2(ffu_ifu_err_reg_w2[5:0]),
+                     .ffu_ifu_err_synd_w2(ffu_ifu_err_synd_w2[13:0]),
+                     .ffu_ifu_fst_ce_w  (ffu_ifu_fst_ce_w),
+                     .ffu_lsu_kill_fst_w(ffu_lsu_kill_fst_w),
+                     .ffu_ifu_inj_ack   (ffu_ifu_inj_ack),
+                     .ffu_lsu_data      (ffu_lsu_data[80:64]),
+                     .ffu_lsu_fpop_rq_vld(ffu_lsu_fpop_rq_vld),
+                     .ffu_lsu_blk_st_va_e(ffu_lsu_blk_st_va_e[5:3]),
+                     .ffu_lsu_blk_st_e  (ffu_lsu_blk_st_e),
+                     .ffu_tlu_trap_ieee754(ffu_tlu_trap_ieee754),
+                     .ffu_tlu_trap_other(ffu_tlu_trap_other),
+                     .ffu_tlu_trap_ue   (ffu_tlu_trap_ue),
+                     .ffu_tlu_ill_inst_m(ffu_tlu_ill_inst_m),
+                     .ffu_tlu_fpu_tid   (ffu_tlu_fpu_tid[1:0]),
+                     .ffu_tlu_fpu_cmplt (ffu_tlu_fpu_cmplt),
+                     .ctl_frf_ren       (ctl_frf_ren),
+                     .ctl_frf_wen       (ctl_frf_wen[1:0]),
+                     .ctl_frf_addr      (ctl_frf_addr[6:0]),
+                     .ctl_dp_fp_thr     (ctl_dp_fp_thr[3:0]),
+                     .ctl_dp_fcc_w2     (ctl_dp_fcc_w2[7:0]),
+                     .ctl_dp_ftt_w2     (ctl_dp_ftt_w2[2:0]),
+                     .ctl_dp_exc_w2     (ctl_dp_exc_w2[9:0]),
+                     .ctl_dp_ecc_sel_frf(ctl_dp_ecc_sel_frf),
+                     .ctl_dp_output_sel_rs1(ctl_dp_output_sel_rs1),
+                     .ctl_dp_output_sel_rs2(ctl_dp_output_sel_rs2),
+                     .ctl_dp_output_sel_frf(ctl_dp_output_sel_frf),
+                     .ctl_dp_output_sel_fsr(ctl_dp_output_sel_fsr),
+                     .ctl_dp_rs2_frf_read(ctl_dp_rs2_frf_read),
+                     .ctl_dp_rs2_sel_vis(ctl_dp_rs2_sel_vis),
+                     .ctl_dp_rs2_sel_fpu_lsu(ctl_dp_rs2_sel_fpu_lsu),
+                     .ctl_dp_rs2_keep_data(ctl_dp_rs2_keep_data),
+                     .ctl_dp_rd_ecc     (ctl_dp_rd_ecc),
+                     .ctl_dp_fsr_sel_ld (ctl_dp_fsr_sel_ld[3:0]),
+                     .ctl_dp_fsr_sel_fpu(ctl_dp_fsr_sel_fpu[3:0]),
+                     .ctl_dp_fsr_sel_old(ctl_dp_fsr_sel_old[3:0]),
+                     .ctl_dp_noshift64_frf(ctl_dp_noshift64_frf),
+                     .ctl_dp_shift_frf_right(ctl_dp_shift_frf_right),
+                     .ctl_dp_shift_frf_left(ctl_dp_shift_frf_left),
+                     .ctl_dp_zero_low32_frf(ctl_dp_zero_low32_frf),
+                     .ctl_dp_new_rs1    (ctl_dp_new_rs1),
+                     .ctl_dp_sign       (ctl_dp_sign[1:0]),
+                     .ctl_dp_flip_fpu   (ctl_dp_flip_fpu),
+                     .ctl_dp_flip_lsu   (ctl_dp_flip_lsu),
+                     .ctl_dp_noflip_fpu (ctl_dp_noflip_fpu),
+                     .ctl_dp_noflip_lsu (ctl_dp_noflip_lsu),
+                     // Inputs
+                     .dp_ctl_gsr_mask_e (dp_ctl_gsr_mask_e[31:0]),
+                     .dp_ctl_gsr_scale_e(dp_ctl_gsr_scale_e[4:0]),
+                     .exu_ffu_gsr_align_m(exu_ffu_gsr_align_m[2:0]),
+                     .exu_ffu_gsr_mask_m(exu_ffu_gsr_mask_m[31:0]),
+                     .exu_ffu_gsr_rnd_m (exu_ffu_gsr_rnd_m[2:0]),
+                     .exu_ffu_gsr_scale_m(exu_ffu_gsr_scale_m[4:0]),
+                     .exu_ffu_wsr_inst_e(exu_ffu_wsr_inst_e),
+                     .ifu_tlu_sraddr_d  (ifu_tlu_sraddr_d[6:0]),
+                     .lsu_ffu_st_dtlb_perr_g  (lsu_ffu_st_dtlb_perr_g), //sotheas,8/17/04: fixed eco 6529
+                     .rclk              (rclk),
+                     .se                (se),
+                     .grst_l            (grst_l),
+                     .arst_l            (arst_l),
+                     .dp_ctl_rs2_sign   (dp_ctl_rs2_sign[1:0]),
+                     .cpx_vld           (cpx_vld),
+                     .cpx_fcmp          (cpx_fcmp),
+                     .cpx_req           (cpx_req[3:0]),
+                     .cpx_fccval        (cpx_fccval[1:0]),
+                     .cpx_fpexc         (cpx_fpexc[4:0]),
+                     .dp_ctl_fsr_fcc    (dp_ctl_fsr_fcc[7:0]),
+                     .dp_ctl_fsr_rnd    (dp_ctl_fsr_rnd[1:0]),
+                     .dp_ctl_fsr_tem    (dp_ctl_fsr_tem[4:0]),
+                     .dp_ctl_fsr_aexc   (dp_ctl_fsr_aexc[4:0]),
+                     .dp_ctl_fsr_cexc   (dp_ctl_fsr_cexc[4:0]),
+                     .dp_ctl_synd_out_low(dp_ctl_synd_out_low[6:0]),
+                     .dp_ctl_synd_out_high(dp_ctl_synd_out_high[6:0]),
+                     .ifu_ffu_fpop1_d   (ifu_ffu_fpop1_d),
+                     .ifu_ffu_fpop2_d   (ifu_ffu_fpop2_d),
+                     .ifu_ffu_visop_d   (ifu_ffu_visop_d),
+                     .ifu_ffu_fpopcode_d(ifu_ffu_fpopcode_d[8:0]),
+                     .ifu_ffu_frs1_d    (ifu_ffu_frs1_d[4:0]),
+                     .ifu_ffu_frs2_d    (ifu_ffu_frs2_d[4:0]),
+                     .ifu_ffu_frd_d     (ifu_ffu_frd_d[4:0]),
+                     .ifu_ffu_fld_d     (ifu_ffu_fld_d),
+                     .ifu_ffu_fst_d     (ifu_ffu_fst_d),
+                     .ifu_ffu_ldst_single_d(ifu_ffu_ldst_single_d),
+                     .ifu_ffu_tid_d     (ifu_ffu_tid_d[1:0]),
+                     .ifu_ffu_fcc_num_d (ifu_ffu_fcc_num_d[1:0]),
+                     .ifu_ffu_mvcnd_m   (ifu_ffu_mvcnd_m),
+                     .ifu_ffu_inj_frferr(ifu_ffu_inj_frferr),
+                     .ifu_exu_ecc_mask  (ifu_exu_ecc_mask[6:0]),
+                     .ifu_ffu_ldfsr_d   (ifu_ffu_ldfsr_d),
+                     .ifu_ffu_ldxfsr_d  (ifu_ffu_ldxfsr_d),
+                     .ifu_ffu_stfsr_d   (ifu_ffu_stfsr_d),
+                     .ifu_ffu_quad_op_e (ifu_ffu_quad_op_e),
+                     .ifu_tlu_inst_vld_w(ifu_tlu_inst_vld_w),
+                     .lsu_ffu_flush_pipe_w(lsu_ffu_flush_pipe_w),
+                     .ifu_tlu_flush_w   (ifu_tlu_flush_w),
+                     .lsu_ffu_ack       (lsu_ffu_ack),
+                     .lsu_ffu_ld_vld    (lsu_ffu_ld_vld),
+                     .lsu_ffu_bld_cnt_w (lsu_ffu_bld_cnt_w[2:0]),
+                     .dp_ctl_ld_fcc     (dp_ctl_ld_fcc[7:0]),
+                     .ifu_exu_nceen_e   (ifu_exu_nceen_e),
+                     .ifu_exu_disable_ce_e(ifu_exu_disable_ce_e),
+                     .lsu_ffu_blk_asi_e (lsu_ffu_blk_asi_e),
+                     .exu_ffu_ist_e     (exu_ffu_ist_e),
+                     .ifu_tlu_flsh_inst_e(ifu_tlu_flsh_inst_e),
+                     .ifu_lsu_ld_inst_e (ifu_lsu_ld_inst_e),
+                     .lsu_ffu_stb_full0 (lsu_ffu_stb_full0),
+                     .lsu_ffu_stb_full1 (lsu_ffu_stb_full1),
+                     .lsu_ffu_stb_full2 (lsu_ffu_stb_full2),
+                     .lsu_ffu_stb_full3 (lsu_ffu_stb_full3));
+
+   sparc_ffu_vis vis(/*AUTOINST*/
+                     // Outputs
+                     .vis_dp_rd_data    (vis_dp_rd_data[63:0]),
+                     // Inputs
+                     .dp_vis_rs1_data   (dp_vis_rs1_data[63:0]),
+                     .dp_vis_rs2_data   (dp_vis_rs2_data[63:0]),
+                     .ctl_vis_sel_add   (ctl_vis_sel_add),
+                     .ctl_vis_sel_log   (ctl_vis_sel_log),
+                     .ctl_vis_sel_align (ctl_vis_sel_align),
+                     .ctl_vis_add32     (ctl_vis_add32),
+                     .ctl_vis_subtract  (ctl_vis_subtract),
+                     .ctl_vis_cin       (ctl_vis_cin),
+                     .ctl_vis_align0    (ctl_vis_align0),
+                     .ctl_vis_align2    (ctl_vis_align2),
+                     .ctl_vis_align4    (ctl_vis_align4),
+                     .ctl_vis_align6    (ctl_vis_align6),
+                     .ctl_vis_align_odd (ctl_vis_align_odd),
+                     .ctl_vis_log_sel_pass(ctl_vis_log_sel_pass),
+                     .ctl_vis_log_sel_nand(ctl_vis_log_sel_nand),
+                     .ctl_vis_log_sel_nor(ctl_vis_log_sel_nor),
+                     .ctl_vis_log_sel_xor(ctl_vis_log_sel_xor),
+                     .ctl_vis_log_invert_rs1(ctl_vis_log_invert_rs1),
+                     .ctl_vis_log_invert_rs2(ctl_vis_log_invert_rs2),
+                     .ctl_vis_log_constant(ctl_vis_log_constant),
+                     .ctl_vis_log_pass_const(ctl_vis_log_pass_const),
+                     .ctl_vis_log_pass_rs1(ctl_vis_log_pass_rs1),
+                     .ctl_vis_log_pass_rs2(ctl_vis_log_pass_rs2));
+endmodule // sparc_ffu
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl")
+// End:
+
Index: /trunk/T1-CPU/ffu/sparc_ffu_vis.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu_vis.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu_vis.v	(revision 6)
@@ -0,0 +1,177 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu_vis.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu_vis
+//      Description: This is the ffu VIS blk.
+//	It implements FALIGN, partitioned add and logicals.
+*/
+
+module sparc_ffu_vis(/*AUTOARG*/
+   // Outputs
+   vis_dp_rd_data, 
+   // Inputs
+   dp_vis_rs1_data, dp_vis_rs2_data, ctl_vis_sel_add, 
+   ctl_vis_sel_log, ctl_vis_sel_align, ctl_vis_add32, 
+   ctl_vis_subtract, ctl_vis_cin, ctl_vis_align0, ctl_vis_align2, 
+   ctl_vis_align4, ctl_vis_align6, ctl_vis_align_odd, 
+   ctl_vis_log_sel_pass, ctl_vis_log_sel_nand, ctl_vis_log_sel_nor, 
+   ctl_vis_log_sel_xor, ctl_vis_log_invert_rs1, 
+   ctl_vis_log_invert_rs2, ctl_vis_log_constant, 
+   ctl_vis_log_pass_const, ctl_vis_log_pass_rs1, 
+   ctl_vis_log_pass_rs2
+   );
+
+   input [63:0] dp_vis_rs1_data;
+   input [63:0] dp_vis_rs2_data;
+   input        ctl_vis_sel_add;
+   input        ctl_vis_sel_log;
+   input        ctl_vis_sel_align;
+   input        ctl_vis_add32;
+   input        ctl_vis_subtract;
+   input        ctl_vis_cin;
+   input         ctl_vis_align0;
+   input         ctl_vis_align2;
+   input         ctl_vis_align4;
+   input         ctl_vis_align6;
+   input         ctl_vis_align_odd;
+   input         ctl_vis_log_sel_pass;
+   input         ctl_vis_log_sel_nand;
+   input         ctl_vis_log_sel_nor;
+   input         ctl_vis_log_sel_xor;
+   input         ctl_vis_log_invert_rs1;
+   input         ctl_vis_log_invert_rs2;
+   input         ctl_vis_log_constant;
+   input         ctl_vis_log_pass_const;
+   input         ctl_vis_log_pass_rs1;
+   input         ctl_vis_log_pass_rs2;
+
+   output [63:0] vis_dp_rd_data;
+
+   wire [71:0]   align_data1;
+   wire [63:0]   align_rs1;
+   wire [63:8]   align_rs2;
+
+   wire [63:0]   add_out;
+   wire [63:0]   log_out;
+   wire [63:0]   align_out;
+   wire [63:0]   add_in_rs1;
+   wire [63:0]   add_in_rs2;
+
+   wire [63:0]   logic_nor;
+   wire [63:0]   logic_pass;
+   wire [63:0]   logic_xor;
+   wire [63:0]   logic_nand;
+   wire [63:0]   logic_rs1;
+   wire [63:0]   logic_rs2;
+   
+   /////////////////////////////////////////////////////////////////
+   // Logic for partitioned addition.
+   //----------------------------------
+   // RS1 is normal RS1 data, RS2 is inverted by subtraction signal.
+   /////////////////////////////////////////////////////////////////
+   assign        add_in_rs1[63:0] = dp_vis_rs1_data[63:0];
+   assign        add_in_rs2[63:0] = dp_vis_rs2_data[63:0] ^ {64{ctl_vis_subtract}};
+   sparc_ffu_part_add32 part_adder_hi(.z(add_out[63:32]),
+                                   .add32(ctl_vis_add32),
+                                   .a(add_in_rs1[63:32]),
+                                   .b(add_in_rs2[63:32]),
+                                   .cin(ctl_vis_cin));
+   sparc_ffu_part_add32 part_adder_lo(.z(add_out[31:0]),
+                                   .add32(ctl_vis_add32),
+                                   .a(add_in_rs1[31:0]),
+                                   .b(add_in_rs2[31:0]),
+                                   .cin(ctl_vis_cin));
+
+   ///////////////////////////////////////////////////////////////////////////
+   // Datapath for FALIGNDATA
+   //---------------------------------------------------------------
+   // FALIGNDATA concatenates rs1 and rs2 and shifts them by byte to create
+   // an 8 byte value.  The first mux creates a 72 bit value and the
+   // 2nd mux picks 64 bits out of these for the output.
+   ///////////////////////////////////////////////////////////////////////////
+   dp_buffer #(64) align_rs1_buf(.dout(align_rs1[63:0]), .in(dp_vis_rs1_data[63:0]));
+   dp_buffer #(56) align_rs2_buf(.dout(align_rs2[63:8]), .in(dp_vis_rs2_data[63:8]));
+   mux4ds #(72) falign_mux1(.dout(align_data1[71:0]),
+                            .in0({align_rs1[63:0], align_rs2[63:56]}),
+                            .in1({align_rs1[47:0], align_rs2[63:40]}),
+                            .in2({align_rs1[31:0], align_rs2[63:24]}),
+                            .in3({align_rs1[15:0], align_rs2[63:8]}),
+                            .sel0(ctl_vis_align0),
+                            .sel1(ctl_vis_align2),
+                            .sel2(ctl_vis_align4),
+                            .sel3(ctl_vis_align6));
+   dp_mux2es #(64) falign_mux2(.dout(align_out[63:0]),
+                              .in0(align_data1[71:8]),
+                              .in1(align_data1[63:0]),
+                              .sel(ctl_vis_align_odd));
+
+   ///////////////////////////////////////////////////////////////////////////
+   // Datapath for VIS logicals
+   //-----------------------------------------------------------------------
+   // VIS logicals perform 3 fundamental ops: NAND, NOR and XOR plus inverted
+   // versions of the inputs to create the other versions.  These 3 outputs are
+   // muxed with a choice of 1, 0, rs1 or rs2.
+   ///////////////////////////////////////////////////////////////////////////
+
+   // create inverted versions of data if desired
+   assign        logic_rs1[63:0] = dp_vis_rs1_data[63:0] ^ {64{ctl_vis_log_invert_rs1}};
+   assign        logic_rs2[63:0] = dp_vis_rs2_data[63:0] ^ {64{ctl_vis_log_invert_rs2}};
+
+   // 3 basic logical operations
+   assign        logic_nor[63:0] = ~(logic_rs1[63:0] | logic_rs2[63:0]);
+   assign        logic_nand[63:0] = ~(logic_rs1[63:0] & logic_rs2[63:0]);
+   assign        logic_xor[63:0] = (logic_rs1[63:0] ^ logic_rs2[63:0]);
+   
+   // mux for pass through data
+   mux3ds #(64) pass_mux(.dout(logic_pass[63:0]),
+                         .in0({64{ctl_vis_log_constant}}),
+                         .in1(logic_rs1[63:0]),
+                         .in2(logic_rs2[63:0]),
+                         .sel0(ctl_vis_log_pass_const),
+                         .sel1(ctl_vis_log_pass_rs1),
+                         .sel2(ctl_vis_log_pass_rs2));
+
+   // pick between logic outputs
+   mux4ds #(64) logic_mux(.dout(log_out[63:0]),
+                          .in0(logic_nor[63:0]),
+                          .in1(logic_nand[63:0]),
+                          .in2(logic_xor[63:0]),
+                          .in3(logic_pass[63:0]),
+                          .sel0(ctl_vis_log_sel_nor),
+                          .sel1(ctl_vis_log_sel_nand),
+                          .sel2(ctl_vis_log_sel_xor),
+                          .sel3(ctl_vis_log_sel_pass));
+
+
+   
+   //////////////////////////////////////////////////////////
+   // output mux
+   //////////////////////////////////////////////////////////
+   mux3ds #(64) output_mux(.dout(vis_dp_rd_data[63:0]),
+                           .in0(add_out[63:0]),
+                           .in1(log_out[63:0]),
+                           .in2(align_out[63:0]),
+                           .sel0(ctl_vis_sel_add),
+                           .sel1(ctl_vis_sel_log),
+                           .sel2(ctl_vis_sel_align));
+   
+endmodule // sparc_ffu_vis
Index: /trunk/T1-CPU/ffu/sparc_ffu_ctl.v
===================================================================
--- /trunk/T1-CPU/ffu/sparc_ffu_ctl.v	(revision 6)
+++ /trunk/T1-CPU/ffu/sparc_ffu_ctl.v	(revision 6)
@@ -0,0 +1,1896 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ffu_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ffu_ctl
+//	Description: This is the ffu control block.
+*/
+
+`include "iop.h"
+`define FSR_NVA   9
+`define FSR_OFA   8
+`define FSR_UFA   7
+`define FSR_DZA   6
+`define FSR_NXA   5
+`define FSR_NVC   4
+`define FSR_OFC   3
+`define FSR_UFC   2
+`define FSR_DZC   1
+`define FSR_NXC   0
+
+
+module sparc_ffu_ctl (/*AUTOARG*/
+   // Outputs
+   ffu_exu_rsr_data_mid_m, ffu_exu_rsr_data_lo_m, 
+   ffu_exu_rsr_data_hi_m, ctl_vis_subtract, ctl_vis_sel_log, 
+   ctl_vis_sel_align, ctl_vis_sel_add, ctl_vis_log_sel_xor, 
+   ctl_vis_log_sel_pass, ctl_vis_log_sel_nor, ctl_vis_log_sel_nand, 
+   ctl_vis_log_pass_rs2, ctl_vis_log_pass_rs1, 
+   ctl_vis_log_pass_const, ctl_vis_log_invert_rs2, 
+   ctl_vis_log_invert_rs1, ctl_vis_log_constant, ctl_vis_cin, 
+   ctl_vis_align_odd, ctl_vis_align6, ctl_vis_align4, ctl_vis_align2, 
+   ctl_vis_align0, ctl_vis_add32, ctl_dp_wsr_data_w2, ctl_dp_thr_e, 
+   ctl_dp_gsr_wsr_w2, so, ctl_dp_rst_l, ffu_ifu_fpop_done_w2, 
+   ffu_ifu_cc_vld_w2, ffu_ifu_cc_w2, ffu_ifu_tid_w2, 
+   ffu_ifu_stallreq, ffu_ifu_ecc_ce_w2, ffu_ifu_ecc_ue_w2, 
+   ffu_ifu_err_reg_w2, ffu_ifu_err_synd_w2, ffu_ifu_fst_ce_w, 
+   ffu_lsu_kill_fst_w, ffu_ifu_inj_ack, ffu_lsu_data, 
+   ffu_lsu_fpop_rq_vld, ffu_lsu_blk_st_va_e, ffu_lsu_blk_st_e, 
+   ffu_tlu_trap_ieee754, ffu_tlu_trap_other, ffu_tlu_trap_ue, 
+   ffu_tlu_ill_inst_m, ffu_tlu_fpu_tid, ffu_tlu_fpu_cmplt, 
+   ctl_frf_ren, ctl_frf_wen, ctl_frf_addr, ctl_dp_fp_thr, 
+   ctl_dp_fcc_w2, ctl_dp_ftt_w2, ctl_dp_exc_w2, ctl_dp_ecc_sel_frf, 
+   ctl_dp_output_sel_rs1, ctl_dp_output_sel_rs2, 
+   ctl_dp_output_sel_frf, ctl_dp_output_sel_fsr, ctl_dp_rs2_frf_read, 
+   ctl_dp_rs2_sel_vis, ctl_dp_rs2_sel_fpu_lsu, ctl_dp_rs2_keep_data, 
+   ctl_dp_rd_ecc, ctl_dp_fsr_sel_ld, ctl_dp_fsr_sel_fpu, 
+   ctl_dp_fsr_sel_old, ctl_dp_noshift64_frf, ctl_dp_shift_frf_right, 
+   ctl_dp_shift_frf_left, ctl_dp_zero_low32_frf, ctl_dp_new_rs1, 
+   ctl_dp_sign, ctl_dp_flip_fpu, ctl_dp_flip_lsu, ctl_dp_noflip_fpu, 
+   ctl_dp_noflip_lsu, ctl_frf_write_synd, 
+   // Inputs
+   ifu_tlu_sraddr_d, exu_ffu_wsr_inst_e, exu_ffu_gsr_scale_m, 
+   exu_ffu_gsr_rnd_m, exu_ffu_gsr_mask_m, exu_ffu_gsr_align_m, 
+   dp_ctl_gsr_scale_e, dp_ctl_gsr_mask_e, rclk, si, se, grst_l, 
+   arst_l, rst_tri_en, dp_ctl_rs2_sign, cpx_vld, cpx_fcmp, cpx_req, 
+   cpx_fccval, cpx_fpexc, dp_ctl_fsr_fcc, dp_ctl_fsr_rnd, 
+   dp_ctl_fsr_tem, dp_ctl_fsr_aexc, dp_ctl_fsr_cexc, 
+   dp_ctl_synd_out_low, dp_ctl_synd_out_high, ifu_ffu_fpop1_d, 
+   ifu_ffu_fpop2_d, ifu_ffu_visop_d, ifu_ffu_fpopcode_d, 
+   ifu_ffu_frs1_d, ifu_ffu_frs2_d, ifu_ffu_frd_d, ifu_ffu_fld_d, 
+   ifu_ffu_fst_d, ifu_ffu_ldst_single_d, ifu_ffu_tid_d, 
+   ifu_ffu_fcc_num_d, ifu_ffu_mvcnd_m, ifu_ffu_inj_frferr, 
+   ifu_exu_ecc_mask, ifu_ffu_ldfsr_d, ifu_ffu_ldxfsr_d, 
+   ifu_ffu_stfsr_d, ifu_ffu_quad_op_e, ifu_tlu_inst_vld_w, 
+   lsu_ffu_flush_pipe_w, ifu_tlu_flush_w, lsu_ffu_ack, 
+   lsu_ffu_ld_vld, lsu_ffu_bld_cnt_w, dp_ctl_ld_fcc, ifu_exu_nceen_e, 
+   ifu_exu_disable_ce_e, lsu_ffu_blk_asi_e, exu_ffu_ist_e, 
+   ifu_tlu_flsh_inst_e, ifu_lsu_ld_inst_e, lsu_ffu_stb_full0, 
+   lsu_ffu_stb_full1, lsu_ffu_stb_full2, lsu_ffu_stb_full3,
+//sotheas,8/17/04: fixed eco 6529
+   lsu_ffu_st_dtlb_perr_g
+//////////////////////////////
+   ) ;
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input [31:0]         dp_ctl_gsr_mask_e;      // To visctl of sparc_ffu_ctl_visctl.v
+   input [4:0]          dp_ctl_gsr_scale_e;     // To visctl of sparc_ffu_ctl_visctl.v
+   input [2:0]          exu_ffu_gsr_align_m;    // To visctl of sparc_ffu_ctl_visctl.v
+   input [31:0]         exu_ffu_gsr_mask_m;     // To visctl of sparc_ffu_ctl_visctl.v
+   input [2:0]          exu_ffu_gsr_rnd_m;      // To visctl of sparc_ffu_ctl_visctl.v
+   input [4:0]          exu_ffu_gsr_scale_m;    // To visctl of sparc_ffu_ctl_visctl.v
+   input                exu_ffu_wsr_inst_e;     // To visctl of sparc_ffu_ctl_visctl.v
+   input [6:0]          ifu_tlu_sraddr_d;       // To visctl of sparc_ffu_ctl_visctl.v
+   // End of automatics
+   input rclk;
+   input si;
+   input se;
+   input grst_l;
+   input arst_l;
+   input rst_tri_en;
+   input [1:0] dp_ctl_rs2_sign;
+
+   input          cpx_vld;
+   input          cpx_fcmp;
+   input [3:0]    cpx_req;
+   input [1:0]    cpx_fccval;
+   input [4:0]    cpx_fpexc;
+   input [7:0] dp_ctl_fsr_fcc;
+   input [1:0] dp_ctl_fsr_rnd;
+   input [4:0] dp_ctl_fsr_tem;
+	 input [4:0] dp_ctl_fsr_aexc;
+	 input [4:0] dp_ctl_fsr_cexc;
+
+   input [6:0] dp_ctl_synd_out_low;   // signals for ecc errors
+   input [6:0] dp_ctl_synd_out_high;
+
+   input       ifu_ffu_fpop1_d;
+   input       ifu_ffu_fpop2_d;
+   input       ifu_ffu_visop_d;
+   input [8:0] ifu_ffu_fpopcode_d;
+   input [4:0] ifu_ffu_frs1_d;
+   input [4:0] ifu_ffu_frs2_d;
+   input [4:0] ifu_ffu_frd_d;
+   input       ifu_ffu_fld_d;
+   input       ifu_ffu_fst_d;
+   input       ifu_ffu_ldst_single_d;
+   input [1:0] ifu_ffu_tid_d;
+   input [1:0] ifu_ffu_fcc_num_d;
+   input       ifu_ffu_mvcnd_m;
+
+   input       ifu_ffu_inj_frferr;
+   input [6:0] ifu_exu_ecc_mask;
+
+   input       ifu_ffu_ldfsr_d,
+	       ifu_ffu_ldxfsr_d,
+	       ifu_ffu_stfsr_d;
+   input       ifu_ffu_quad_op_e;
+   
+   input       ifu_tlu_inst_vld_w;
+   input       lsu_ffu_flush_pipe_w;
+   input       ifu_tlu_flush_w;
+   
+   input        lsu_ffu_ack;
+   input        lsu_ffu_ld_vld;
+   input [2:0]  lsu_ffu_bld_cnt_w;
+   input [7:0]  dp_ctl_ld_fcc;
+
+   input        ifu_exu_nceen_e;// enable ecc traps
+   input        ifu_exu_disable_ce_e; // all ce are treated as ue
+   input        lsu_ffu_blk_asi_e;
+   input        exu_ffu_ist_e;
+   input        ifu_tlu_flsh_inst_e;
+   input        ifu_lsu_ld_inst_e;
+   input        lsu_ffu_stb_full0;
+   input        lsu_ffu_stb_full1;
+   input        lsu_ffu_stb_full2;
+   input        lsu_ffu_stb_full3;
+
+   input        lsu_ffu_st_dtlb_perr_g; //sotheas,8/17/04: fixed eco 6529, when asserted terminated
+                                        //                 block store
+
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output [3:0]         ctl_dp_gsr_wsr_w2;      // From visctl of sparc_ffu_ctl_visctl.v
+   output [3:0]         ctl_dp_thr_e;           // From visctl of sparc_ffu_ctl_visctl.v
+   output [36:0]        ctl_dp_wsr_data_w2;     // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_add32;          // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_align0;         // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_align2;         // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_align4;         // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_align6;         // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_align_odd;      // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_cin;            // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_constant;   // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_invert_rs1; // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_invert_rs2; // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_pass_const; // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_pass_rs1;   // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_pass_rs2;   // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_sel_nand;   // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_sel_nor;    // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_sel_pass;   // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_log_sel_xor;    // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_sel_add;        // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_sel_align;      // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_sel_log;        // From visctl of sparc_ffu_ctl_visctl.v
+   output               ctl_vis_subtract;       // From visctl of sparc_ffu_ctl_visctl.v
+   output [31:0]        ffu_exu_rsr_data_hi_m;  // From visctl of sparc_ffu_ctl_visctl.v
+   output [7:0]         ffu_exu_rsr_data_lo_m;  // From visctl of sparc_ffu_ctl_visctl.v
+   output [2:0]         ffu_exu_rsr_data_mid_m; // From visctl of sparc_ffu_ctl_visctl.v
+   // End of automatics
+   output               so;
+   output               ctl_dp_rst_l;
+   output       ffu_ifu_fpop_done_w2;
+   output [3:0] ffu_ifu_cc_vld_w2;// one hot valid for each set of fcc
+   output [7:0] ffu_ifu_cc_w2;// all 4 sets of fcc
+   output [1:0] ffu_ifu_tid_w2;
+   output       ffu_ifu_stallreq; // stall pipe so blk st can issue
+   
+   output       ffu_ifu_ecc_ce_w2;  // correctable ecc error
+   output       ffu_ifu_ecc_ue_w2;  // uncorrectable ecc error
+   output [5:0] ffu_ifu_err_reg_w2;
+   output [13:0] ffu_ifu_err_synd_w2;
+   output       ffu_ifu_fst_ce_w;
+   output       ffu_lsu_kill_fst_w;
+   output       ffu_ifu_inj_ack;
+
+   output [80:64] ffu_lsu_data;
+   output 	  ffu_lsu_fpop_rq_vld ;   // ffu dispatches fpop issue request.
+   output [5:3]  ffu_lsu_blk_st_va_e;
+   output        ffu_lsu_blk_st_e;
+
+   output 	  ffu_tlu_trap_ieee754;
+   output 	  ffu_tlu_trap_other;
+   output     ffu_tlu_trap_ue;
+   output     ffu_tlu_ill_inst_m;
+
+   output [1:0] ffu_tlu_fpu_tid;
+   output       ffu_tlu_fpu_cmplt;
+   
+   output       ctl_frf_ren;
+   output [1:0] ctl_frf_wen;
+   output [6:0] ctl_frf_addr;
+
+   output [3:0] ctl_dp_fp_thr;
+   
+   output [7:0] ctl_dp_fcc_w2;
+   output [2:0] ctl_dp_ftt_w2;
+   output [9:0] ctl_dp_exc_w2;
+
+   output ctl_dp_ecc_sel_frf;
+   
+
+   // mux selects
+   output       ctl_dp_output_sel_rs1;
+   output 	ctl_dp_output_sel_rs2;
+   output 	ctl_dp_output_sel_frf;
+   output 	ctl_dp_output_sel_fsr;
+   
+   output 	ctl_dp_rs2_frf_read;
+   output 	ctl_dp_rs2_sel_vis;
+   output 	ctl_dp_rs2_sel_fpu_lsu;
+   output 	ctl_dp_rs2_keep_data;
+   output   ctl_dp_rd_ecc;
+
+   output [3:0] ctl_dp_fsr_sel_ld,
+		ctl_dp_fsr_sel_fpu,
+		ctl_dp_fsr_sel_old;
+   
+   output 	ctl_dp_noshift64_frf;
+   output 	ctl_dp_shift_frf_right;
+   output 	ctl_dp_shift_frf_left;
+   output 	ctl_dp_zero_low32_frf;
+   
+   output ctl_dp_new_rs1;
+   
+   output [1:0] ctl_dp_sign;
+   
+   output       ctl_dp_flip_fpu;
+   output       ctl_dp_flip_lsu;
+   output       ctl_dp_noflip_fpu;
+   output       ctl_dp_noflip_lsu;
+
+
+   wire     clk;
+   wire     reset;
+   wire     ffu_reset_l;
+   // FPOP is broken into parts:
+   // [8:4] fpop_high
+   // [3:2] fpop_mid
+   // [1:0] fpop_size
+   wire         fpop_size_0;    // 2 lsbs of fpop
+   wire         fpop_size_1;
+
+   wire         fpop_high_0;    // 4 msbs of fpop
+   wire         fpop_high_2;
+   wire         fpop_high_4;
+   wire         fpop_high_5; 
+   wire         fpop_high_6; 
+   wire         fpop_high_8;
+   wire         fpop_high_a;
+   wire         fpop_high_c;
+   wire         fpop_high_d;
+   wire         fpop_high_e;
+   wire         fpop_high_10;
+   wire         fpop_high_18;
+   wire    fpop_low_1;
+   wire    fpop_low_2;
+   wire    fpop_low_4;
+   wire    fpop_low_5;
+   wire    fpop_low_6;
+   wire    fpop_low_8;
+   wire    fpop_low_9;
+   wire    fpop_low_a;
+   wire    fpop_low_d;
+   wire    fpop_low_e;
+
+   wire         source_single_e;
+   wire         source_single_next;
+   wire         source_single;
+
+   wire         dest_single_e;
+   wire         dest_single_next;
+   wire         dest_single;
+
+   wire [4:0]   frs1_e;
+   wire [5:0]   rs1_e;
+   wire [5:0]   rs1_next;
+   wire [5:0]   rs1;
+   wire [4:0]   frs2_e;
+   wire [5:0]   rs2_e;
+   wire [4:0]   frd_e;
+   wire         ldst_single_e;
+   wire [5:0]   rd_e;
+   wire [5:1]   st_rd_d;
+   wire [5:1]   write_addr;
+
+   wire [5:0] 	rs2_next;
+   wire [5:0] 	rs2;
+   wire [5:0] 	rd_next;
+   wire [5:0] 	rd;
+   wire [5:1]   blk_rd;
+
+   wire         is_fpop_d;
+   wire    shift_frf_rs2_m;
+   wire    shift_frf_rs1_w;
+   wire    shift_frf_right_next;
+   wire    shift_frf_right;
+   wire    shift_frf_left_next;
+   wire    shift_frf_left;
+   wire    noshift64_frf_next;
+   wire    noshift64_frf;
+
+   wire 	abs_w;
+   wire 	neg_w;
+   wire 	cond_move_e;
+   wire 	cond_move_m;
+   wire 	move_e;
+   wire 	move_m;
+   wire 	move_m_valid;
+   wire 	move_w;
+   wire   move_w_valid;
+   wire   move_w2;
+   wire   move_w2_vld;
+   wire   move_wen_m;
+   wire   move_wen_w;
+   wire 	move_wen_w2;
+   wire 	move_wen_w2_valid;
+
+   wire   vis_nofrf_e;
+   wire 	ren_rs2_e;
+   wire 	ren_rs2_e_vld;
+   wire 	ren_rs2_m;
+   wire 	ren_rs2_m_vld;
+   wire 	ren_rs2_w;
+   wire 	ren_rs2_w2;
+   wire 	ren_rs2_w3;
+   wire 	ren_rs2_w4;
+   wire 	ren_rs1_e;
+   wire 	ren_rs1_m;
+   wire 	ren_rs1_w;
+   wire 	ren_rs1_w_vld;
+   wire 	ren_rs1_w2_vld;
+   wire 	ren_rs1_w2;
+   wire 	ren_rs1_w3;
+   wire 	ren_rs1_w4;
+   wire 	ren_rs1_w5;
+   wire 	read_rs1;
+   wire 	read_rs2;
+   wire   read_rd;
+   wire 	read_bst;
+
+   wire 	fpu_op_e;
+   wire 	fpu_op_m;
+   wire 	fpu_op_w_vld;
+   wire 	fpu_op_w;
+   wire 	fpu_op_w2;
+   wire 	fpu_op_w2_vld;
+   wire 	fpu_op_w3;
+   wire 	fpu_op_w3_vld;
+   wire 	any_op_e;
+   wire 	any_op_m;
+   wire 	any_op_w;
+   wire 	any_op_w2;
+   wire 	any_op_m_valid;
+   wire   visop_e;
+   wire   visop_m;
+   wire   visop_w_vld;
+   
+   wire 	fld_e;
+   wire 	fld_m;
+   wire 	fst_e;
+   wire 	fst_m;
+   wire 	fst_w;
+   wire 	zero_lower_data_next;
+
+   wire   fpop1_e;
+   wire   fpop2_e;
+   wire   fpop1_next;
+   wire   fpop1;
+   wire   fpop2_next;
+   wire   fpop2;
+   wire   visop_next;
+   wire   visop;
+   wire 	kill_m;
+   wire 	killed_w;
+   wire 	kill_w;
+   wire 	kill_unimpl_w;
+   wire 	kill_fp;
+   wire 	kill_eccchk_w;
+   wire   flush_w;
+   wire   flush_w2;
+   
+   wire [1:0] 	tid_next;
+   wire [1:0]   tid;
+   wire [1:0]   extra_tid;
+   wire [8:0] 	opf_next;
+   wire [8:0] 	opf;
+   wire [5:1] 	early_frf_rnum;
+   wire [5:1] 	frf_rnum;
+   wire [1:0]   frf_tid;
+   wire [1:0]   fpu_rnd;
+
+   wire 	thr_match_mw2;
+   wire 	thr_match_ww2;
+   wire 	thr_match_fpw2;
+   wire [1:0] 	tid_e,
+		tid_m,
+		tid_w,
+                tid_w2;
+
+   wire 	fpop1_ready_w2_next;
+   wire 	fpop2_ready_w3_next;
+   wire 	fpop1_ready_w2;
+   wire 	fpop2_ready_w3;
+   wire   issue_fpop2;
+   
+   wire 	ldfsr,
+		ldxfsr,
+		stfsr_e,
+		stfsr_w,
+		stfsr_qual_w,
+		ldfsr_vld,
+		ldxfsr_vld;
+   wire   stfsr_w2;
+   wire   stfsr_w2_vld;
+   
+   wire   clear_ftt;
+   
+   wire [1:0] 	ldfsr_next;
+   
+   wire 	is_fpu_result;
+   
+   wire   output_sel_rs1_next;
+   wire   output_sel_frf_next;
+   wire   output_sel_fsr_next;
+   wire   output_sel_rs2_next;
+   wire   output_sel_rs1;
+   wire   output_sel_frf;
+   wire   output_sel_fsr;
+   wire   output_sel_rs2;
+
+   wire   ffu_op_done_next;
+   wire   ffu_op_done;
+   wire   ffu_op_done_vld;
+   wire 	external_wen_next;
+   wire 	lsu_pkt_vld;
+   wire [1:0] 	lsu_pkt_type;
+   wire 	store_ready;
+   wire 	load_pending;
+   wire 	load_pending_next;
+   wire   blk_ld_done;
+   wire   blk_ld_m;
+   wire 	blk_load_pending;
+   wire 	blk_load_pending_next;
+   wire 	fp_pending,
+		fp_pending_next;
+
+   wire [3:0] 	fcc_num_dec;
+   wire [1:0] 	fcc_num,
+		fcc_num_next;
+   
+   wire [7:0] 	fpu_fcc;
+   wire [9:0] 	fp_exc_w2;
+
+   wire         fcc_sel_fpu;
+   wire         fcc_sel_ldx;
+   wire         fcc_sel_ld;
+   wire         fcc_sel_old;
+   wire 	cc_changed;
+   wire 	rf_wen;
+   wire   rf_wen_next;
+   wire   rf_ecc_gen_next;
+   wire   rf_ecc_gen;
+   wire   vis_wen_next;
+   wire   vis_result;
+
+   wire [4:0] fsr_tem_d1;
+   wire       ieee_trap;
+   wire  	take_ieee_trap;
+   wire 	take_other_trap;
+   wire [4:0] 	ieee_trap_vec;
+   wire 	fpexc_nxc;
+   wire   fpexc_ofc;
+   wire   fpexc_ufc;
+   
+   wire [1:0] error_detected;
+   wire [1:0] possible_ue;
+   wire [1:0] ce;
+   wire [1:0] prev_err_detected;
+   wire [1:0] prev_poss_ue;
+   wire       rollback_fst_m;
+   wire       rollback_fst_w;
+   wire       rollback_rs2_w2;
+   wire       rollback_rs1_w3;
+   wire       rollback_c1_next;
+   wire   rollback_c1;
+   wire   rollback_c1_vld;
+   wire   rollback_c2;
+   wire   rollback_c3;
+   wire   rolled_back_next;
+   wire   rolled_back;
+   wire   chk_rs1_w2;
+   wire 	check_ecc_next;
+   wire [1:0] chk_ecc_m;
+   wire [1:0] chk_ecc_w;
+   wire [1:0] chk_ecc_w2;
+   wire [1:0] chk_ecc;
+   wire [1:0] chk_ecc_prev;
+   wire   disable_ce_m;
+   wire   disable_ce_w;
+   wire 	fst_ce_w;
+   wire 	fst_ue_w;
+   wire 	fst_ce_w2;
+   wire 	fst_ue_w2;
+   wire 	rs2_ce_w2;
+   wire 	rs2_ue_w2;
+   wire 	rs2_fst_ce_w2_vld;
+   wire 	rs2_fst_ue_w2_vld;
+   wire 	rs2_fst_ce_w3;
+   wire 	rs2_fst_ue_w3;
+   wire   rs1_ce_w3;
+   wire 	ce_w3;
+   wire 	ue_w3;
+   wire   nceen;
+   wire   nceen_next;
+   wire   ue_trap_w3;
+   wire [1:0] 	previous_ce;
+   wire         previous_ue;
+   wire [1:0] 	ecc_wen_next;
+   wire [1:0] 	ecc_wen_gen_next;
+   wire [1:0] 	ecc_wen_gen;
+   wire [1:0] 	ecc_wen;
+   wire 	inject_err_next;
+   wire [6:0] 	err_data;
+   wire 	inject_err;
+   wire   wen_rs1_ecc;
+   wire   wen_rs2_ecc;
+   wire   ecc_kill_rs2_w2;
+   wire [13:0] new_err_synd;
+   wire [13:0] err_synd_d1;
+   wire [13:0] err_synd_next;
+   wire [5:0]  new_err_reg;
+   wire [5:0]  err_reg_next;
+   wire [5:0]  err_reg_d1;
+   wire        log_new_err;
+   wire   kill_st_ce_w;
+   wire   possible_kill_st_ue_m;
+   wire   possible_kill_st_ce_m;
+   wire   possible_kill_st_ue_w;
+   wire   possible_kill_st_ce_w;
+   
+   wire 	unimpl_op_e,
+		unimpl_op_all_e,
+		unimpl_op_m,
+		unimpl_op_w,
+		unimpl_qual_w,
+		unimpl_qual_w2,
+		unimpl_op_w2;
+   wire    illegal_vis_e;
+   wire    illegal_vis_m;
+   wire    illegal_blk_m;
+   wire    illegal_rs1_e;
+   wire    illegal_field_e;
+   wire    convert_op_e;
+   
+   wire          cpx_vld_d1;
+   wire          cpx_fcmp_d1;
+   wire [3:0]    cpx_req_d1;
+   wire [1:0]    cpx_fccval_d1;
+   wire [4:0]    cpx_fpexc_d1;
+
+   wire   bst_m;
+   wire   bst_w;
+   wire   st_dtlb_perr_w2_l;
+   wire   can_issue_bst_c2;
+   wire   other_mem_op_e;
+   wire [5:0] bst_rs;
+   wire [2:0] bst_cnt;
+   wire [2:0] bst_cnt_next;
+   wire       bst_read_req;
+   wire       bst_issue_c1;
+   wire       bst_issue_c2;
+   wire       bst_issue_c3;
+   wire       bst_issue_c4;
+   wire       bst_issue_c5;
+   wire       bst_issue_c6;
+   wire       bst_issue_c1_next;
+   wire       bst_issue_c2_next;
+   wire       bst_issue_c3_next;
+   wire       bst_issue_c4_next;
+   wire       bst_issue_c5_next;
+   wire       bst_issue_c6_next;
+   wire       bst_done;
+   wire [2:0] bld_cnt_d1;
+   wire [2:0] bld_cnt_d2;
+   wire [2:0] bld_cnt_d3;
+   wire       stb_full0;
+   wire       stb_full1;
+   wire       stb_full2;
+   wire       stb_full3;
+   wire       stb_full_c2;
+   wire [5:0] bst_stall_cnt;
+   wire [5:0] bst_stall_cnt_next;
+   wire       bst_stall_req;
+   wire       bst_stall_req_next;
+   wire       fld_done;
+   wire       ld_ret;
+   wire   bst_ce_c4;
+   wire   bst_ue_c4;
+   wire   fixed_bst_ce;
+   wire   fixed_bst_ce_next;
+   wire   blk_asi_m;
+   
+//
+// Code begins here
+//
+   assign clk = rclk;
+   // Reset flop
+    dffrl_async rstff(.din (grst_l),
+                        .q   (ffu_reset_l),
+                        .clk (clk),
+                        .rst_l (arst_l), .si(), .so(), .se(se));
+   assign        ctl_dp_rst_l = ffu_reset_l;
+   assign        reset = ~ffu_reset_l;
+   // Stage cpx data by one cycle
+   dff_s #(13) cpx_dff(.din({cpx_vld, cpx_fcmp, cpx_req[3:0], cpx_fccval[1:0], cpx_fpexc[4:0]}),
+                     .q({cpx_vld_d1, cpx_fcmp_d1, cpx_req_d1[3:0], cpx_fccval_d1[1:0], cpx_fpexc_d1[4:0]}),
+                     .clk(clk), .se(se), .si(), .so());
+   dff_s #(3) lsu_bld_cnt1_dff(.din({lsu_ffu_bld_cnt_w[2:0]}), .clk(clk),
+                         .q({bld_cnt_d1[2:0]}), .se(se), .si(), .so());
+   dff_s #(3) lsu_bld_cnt2_dff(.din({bld_cnt_d1[2:0]}), .clk(clk),
+                         .q({bld_cnt_d2[2:0]}), .se(se), .si(), .so());
+   dff_s #(3) lsu_bld_cnt3_dff(.din({bld_cnt_d2[2:0]}), .clk(clk),
+                         .q({bld_cnt_d3[2:0]}), .se(se), .si(), .so());
+
+//----------------------------------------
+// Decode Instruction From IFU
+//----------------------------------------
+   dff_s #(3) fpop_d2e(.din({ifu_ffu_fpop1_d, ifu_ffu_fpop2_d, ifu_ffu_visop_d}), .clk(clk),
+                     .q({fpop1_e, fpop2_e, visop_e}), .se(se), .si(), .so());
+   assign        fpop1_next = (any_op_e | reset)? fpop1_e: fpop1;
+   assign        fpop2_next = (any_op_e | reset)? fpop2_e: fpop2;
+   assign        visop_next = (any_op_e | reset)? visop_e: visop;
+   dff_s #(3) fpop_dff(.din({fpop1_next,fpop2_next,visop_next}),
+                     .q({fpop1,fpop2,visop}),
+                     .clk(clk), .se(se), .si(), .so());
+   
+   assign  fpop_size_0 = ~opf[1] & ~opf[0];
+   assign  fpop_size_1 = ~opf[1] & opf[0];
+
+   assign  fpop_low_1 = ~opf[3] & ~opf[2] & ~opf[1] & opf[0];
+   assign  fpop_low_2 = ~opf[3] & ~opf[2] & opf[1] & ~opf[0];
+   assign  fpop_low_4 = ~opf[3] & opf[2] & ~opf[1] & ~opf[0];
+   assign  fpop_low_5 = ~opf[3] & opf[2] & ~opf[1] & opf[0];
+   assign  fpop_low_6 = ~opf[3] & opf[2] & opf[1] & ~opf[0];
+   assign  fpop_low_8 = opf[3] & ~opf[2] & ~opf[1] & ~opf[0];
+   assign  fpop_low_9 = opf[3] & ~opf[2] & ~opf[1] & opf[0];
+   assign  fpop_low_a = opf[3] & ~opf[2] & opf[1] & ~opf[0];
+   assign  fpop_low_d = opf[3] & opf[2] & ~opf[1] & opf[0];
+   assign  fpop_low_e = opf[3] & opf[2] & opf[1] & ~opf[0];
+   
+   assign  fpop_high_0 = ~opf[8] & ~opf[7] & ~opf[6] & ~opf[5] & ~opf[4];
+   assign  fpop_high_2 = ~opf[8] & ~opf[7] & ~opf[6] & opf[5] & ~opf[4];
+   assign  fpop_high_4 = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & ~opf[4];
+   assign  fpop_high_5 = ~opf[8] & ~opf[7] & opf[6] & ~opf[5] & opf[4];
+   assign  fpop_high_6 = ~opf[8] & ~opf[7] & opf[6] & opf[5] & ~opf[4];
+   assign  fpop_high_a = ~opf[8] & opf[7] & ~opf[6] & opf[5] & ~opf[4];
+   assign  fpop_high_8 = ~opf[8] & opf[7] & ~opf[6] & ~opf[5] & ~opf[4];
+   assign  fpop_high_c = ~opf[8] & opf[7] & opf[6] & ~opf[5] & ~opf[4];
+   assign  fpop_high_d = ~opf[8] & opf[7] & opf[6] & ~opf[5] & opf[4];
+   assign  fpop_high_e = ~opf[8] & opf[7] & opf[6] & opf[5] & ~opf[4];
+   assign  fpop_high_10 = opf[8] & ~opf[7] & ~opf[6] & ~opf[5] & ~opf[4];
+   assign  fpop_high_18 = opf[8] & opf[7] & ~opf[6] & ~opf[5] & ~opf[4];
+
+   assign  unimpl_op_e = ~((fpop_low_1 | fpop_low_2) & (fpop_high_0 | fpop_high_4 | fpop_high_8 |
+                                                        fpop1_e & fpop_high_d |
+                                                        fpop2_e & (fpop_high_5 | fpop_high_c |
+                                                                           fpop_high_10 | fpop_high_18)) |
+                           (fpop_low_4 | fpop_low_8) & fpop1_e & (fpop_high_8 | fpop_high_c) |
+                           (fpop_low_5 | fpop_low_6) & (fpop_high_4 |
+                                                        fpop1_e & fpop_high_0 |
+                                                        fpop2_e & (fpop_high_2 | fpop_high_5 |
+                                                                           fpop_high_6 | fpop_high_a |
+                                                                           fpop_high_c | fpop_high_e)) |
+                           fpop_low_6 & fpop1_e & fpop_high_c |
+                           fpop_low_9 & fpop1_e & (fpop_high_0 | fpop_high_4 | fpop_high_6 |
+                                                           fpop_high_c) |
+                           fpop_low_a & fpop1_e & (fpop_high_0 | fpop_high_4) |
+                           (fpop_low_d | fpop_low_e) & fpop1_e & fpop_high_4) & (fpop1_e | fpop2_e);
+   assign illegal_field_e = fpop2_e & (fpop_high_5 & |rd_e[4:2] |// bits 29:27 must be zero on fcmp
+                                       ~opf[4] & ~opf[2] & rs1_e[4]);// bit 18 must be zero on fmovcc
+                                       
+
+   assign  convert_op_e = fpop1_e & opf[7];
+   assign  illegal_rs1_e = (frs1_e[4:0] != 5'b00000) & (move_e & ~rollback_c3 | convert_op_e);
+      
+//
+// Decode size of source and destination. don't care for unimplemented ops
+//
+   assign source_single_e = (fpop_high_c & fpop_size_0) | //32b int
+                                 (opf[0]);// single (also quad but those are illegal
+
+/* -----\/----- EXCLUDED -----\/-----
+   assign  convert_op = (ifu_ffu_fpopcode_d[7] |fpop_high_6_d) & ifu_ffu_fpop1_d;
+   assign dest_single_d = (is_fpop_d)? (fpop_size_1_d & ~convert_op) | // sgl and not conv
+                                         (ifu_ffu_fpop1_d & ifu_ffu_fpopcode_d[7] & ~ifu_ffu_fpopcode_d[3] &
+                                          (~fpop_high_8_d | ifu_ffu_fpopcode_d[2])) |// int to s or float to short int
+                                           (ifu_ffu_visop_d & ifu_ffu_fpopcode_d[0]) :// vis single
+                                        ifu_ffu_ldst_single_d;
+ -----/\----- EXCLUDED -----/\----- */
+   assign dest_single_e = (fpop1_e & (~opf[1] & opf[0] & ~(opf[7] |fpop_high_6) | // sgl and not conv
+                                              opf[7] & ~opf[3] &
+                                              (~fpop_high_8 | opf[2])) |// int to s or float to short int
+                           fpop2_e & fpop_size_1 | 
+                           visop_e & opf[0] | // vis single
+                           (fst_e | fld_e) & ldst_single_e);
+
+   assign unimpl_op_all_e = (unimpl_op_e | ifu_ffu_quad_op_e | 
+                             illegal_rs1_e | illegal_field_e);
+   
+   dff_s #1 qopm_ff(.din (unimpl_op_all_e),
+		  .q   (unimpl_op_m),
+		  .clk (clk), .se(se), .si(), .so());
+   
+   dff_s #1 qopw_ff(.din (unimpl_op_m),
+		  .q   (unimpl_op_w),
+		  .clk (clk), .se(se), .si(), .so());
+   assign unimpl_qual_w = unimpl_op_w & ~kill_unimpl_w;
+   assign unimpl_qual_w2 = unimpl_op_w2 & ~flush_w2;
+
+   dff_s #1 qopw2_ff(.din (unimpl_qual_w),
+		   .q   (unimpl_op_w2),
+		   .clk (clk), .se(se), .si(), .so());
+   
+   
+   // Decode register encoding (bit[5] wrapped to bit[0] for non singles)
+   // Also the storage is flopped around so odd regs are at even addresses 
+   // in the regfile for singles.  this helps because everything external 
+   // expects data to be [63:0] not [31:0, 63:32] on doubles.
+   
+   assign rs1_e[5] = frs1_e[0] & ~source_single_e;// zero for singles
+   assign rs1_e[4:2] = frs1_e[4:2];
+   assign rs1_e[1] = frs1_e[1];
+   assign rs1_e[0] = frs1_e[0] & source_single_e;// only nonzro for sgl
+
+   assign rs2_e[5] = frs2_e[0] & ~source_single_e;// zero for singles
+   assign rs2_e[4:2] = frs2_e[4:2];
+   assign rs2_e[1] = frs2_e[1];
+   assign rs2_e[0] = frs2_e[0] & source_single_e;// only nonzro for sgl
+
+   assign rd_e[5] = frd_e[0] & ~dest_single_e;// zero for singles
+   assign rd_e[4:2] = frd_e[4:2];
+   assign rd_e[1] = frd_e[1];
+   assign rd_e[0] = frd_e[0] & dest_single_e;// only nonzro for sgl
+
+
+   // Decode general type of operation
+   assign is_fpop_d = ifu_ffu_fpop1_d | ifu_ffu_fpop2_d | ifu_ffu_visop_d;
+	  
+   // Do locally
+   assign move_e = fpop_high_0 & (fpop1_e | (fpop1 & rollback_c3)) |
+                                                (rollback_c3 & fpop2 & ~opf[4]);// rollback cond_move
+   // cond moves don't get rollback because they either don't happen or become unconditional
+   assign cond_move_e = fpop2_e & ~opf[4];
+
+   assign abs_w = move_w & fpop_high_0 & opf[3];
+   assign neg_w = move_w & fpop_high_0 & opf[2];
+
+   // Send to FPU
+   assign fpu_op_e = ((~fpop_high_0 & fpop1_next) | (fpop_high_5 & fpop2_next)) & (any_op_e | rollback_c3);
+
+   // FRF read indication
+   assign ren_rs2_e = (fpop1_e | fpop2_e | visop_e) | rollback_c3;
+   assign ren_rs2_e_vld = ren_rs2_e & ~vis_nofrf_e;
+   assign ren_rs2_m_vld = ren_rs2_m & (cond_move_m & ifu_ffu_mvcnd_m | ~cond_move_m);
+   assign ren_rs1_e = ((~vis_nofrf_e & visop_e) | // all vis except siam read rs1
+                       (rollback_c3 & visop) |
+                       ((fpop2_next & opf[4]) | //FCMP
+                        (fpop1_next & ~opf[7] & opf[6])) &	// add,sub,mul,div
+                       (any_op_e | rollback_c3 & ~reset));
+   assign ren_rs1_w_vld = ren_rs1_w & ~kill_eccchk_w;
+   assign ren_rs1_w2_vld = ren_rs1_w2 & ~flush_w2;
+
+   //------------------------------------
+   // Store and wait for FPop to complete
+   //------------------------------------
+   // Storage of control signals
+
+   // >>>>> added ~kill_fp
+   // set these in e so that kill_fp doesn't kill spuriously
+   assign load_pending_next = fld_e |          // set
+	        (load_pending & ~lsu_ffu_ld_vld & ~kill_fp & ~blk_ld_m);
+   assign fld_done = lsu_ffu_ld_vld & ~kill_fp & load_pending;
+   dffr_s ldpend_dff(.din (load_pending_next), 
+		               .q   (load_pending),
+		               .clk (clk), 
+		               .rst (reset),
+		               .se(se), .si(), .so());
+
+   assign blk_ld_m = fld_m & blk_asi_m;
+   assign blk_ld_done = lsu_ffu_ld_vld & ~kill_fp & (bld_cnt_d1[2:0] == 3'b111) & blk_load_pending;
+   assign blk_load_pending_next = (blk_ld_m & ~kill_m)  | // set
+          (blk_load_pending & ~kill_fp & ~ffu_ifu_fpop_done_w2);
+   dffr_s blk_ldpend_dff(.din(blk_load_pending_next),
+                       .q(blk_load_pending),
+                       .clk(clk),
+                       .rst(reset),
+                       .se(se), .si(), .so());
+   
+   assign fp_pending_next = fpu_op_e |   // set
+	                    (fp_pending & ~is_fpu_result & ~kill_fp);
+   dffr_s fppend_dff(.din(fp_pending_next), 
+		               .q(fp_pending),
+		               .clk(clk), 
+		               .rst (reset),
+		               .se(se), .si(), .so());
+
+   // rs1
+   dff_s #(5) rs1_d2e(.din(ifu_ffu_frs1_d[4:0]), .clk(clk), .q(frs1_e[4:0]), .se(se), .si(), .so());
+   mux2ds #(6) rs1_mux(.dout (rs1_next[5:0]),
+                          .in0  (rs1[5:0]),
+                          .in1  (rs1_e[5:0]),
+                          .sel0  (~any_op_e),
+                          .sel1  (any_op_e));
+   dff_s #(6) rs1_dff(.din(rs1_next[5:0]), 
+		                .clk(clk), 
+		                .q(rs1[5:0]),
+                    .se(se), .si(), .so());
+   // rs2
+   dff_s #(5) rs2_d2e(.din(ifu_ffu_frs2_d[4:0]), .clk(clk), .q(frs2_e[4:0]), .se(se), .si(), .so());
+   mux2ds #(6) rs2_mux(.dout(rs2_next[5:0]),
+                       .in0 (rs2[5:0]),
+                       .in1 (rs2_e[5:0]),
+                       .sel0(~any_op_e),
+                       .sel1 (any_op_e));
+   dff_s #(6) rs2_dff(.din (rs2_next[5:0]), 
+		                .clk (clk), 
+		                .q   (rs2[5:0]),
+                    .se(se), .si(), .so());
+   // rd
+   dff_s #(6) rd_d2e(.din({ifu_ffu_ldst_single_d,ifu_ffu_frd_d[4:0]}), .clk(clk), 
+                   .q({ldst_single_e,frd_e[4:0]}),
+                   .se(se), .si(), .so());
+   mux2ds #(6) rd_mux(.dout (rd_next[5:0]),
+                      .in0  (rd[5:0]),
+                      .in1  (rd_e[5:0]),
+                      .sel0 (~any_op_e),
+                      .sel1  (any_op_e));
+   dff_s #(6) rd_dff(.din (rd_next[5:0]), 
+		               .clk (clk),
+		               .q    (rd[5:0]),
+                   .se  (se), .si(), .so());
+   // rs size
+   mux2ds source_single_mux(.dout (source_single_next),
+                            .in0  (source_single),
+                            .in1  (source_single_e),
+                            .sel0(~any_op_e),
+                            .sel1 (any_op_e));
+   dff_s source_single_dff(.din(source_single_next), 
+			                   .clk(clk), 
+			                   .q(source_single),
+                         .se(se), .si(), .so());
+   // rd size
+   assign dest_single_next = (any_op_e)? dest_single_e: dest_single;
+   dff_s dest_single_dff(.din (dest_single_next), 
+			                    .clk (clk), 
+			                    .q   (dest_single),
+                          .se  (se), .si(), .so());
+   // thread
+   mux2ds #(2) tid_mux(.dout (tid_next[1:0]),
+                       .in0  (tid[1:0]),
+                       .in1  (tid_e[1:0]),
+                       .sel0 (~any_op_e),
+                       .sel1  (any_op_e));
+   dff_s #(2) tid_dff(.din(tid_next[1:0]), 
+		                .clk(clk), 
+		                .q(tid[1:0]),
+                    .se(se), .si(), .so());
+   // extra tid to help fanout for critical signals
+   dff_s #(2) extra_tid_dff(.din(tid_next[1:0]),
+                          .clk(clk), .q(extra_tid[1:0]), .se(se), .si(), .so());
+   // fcc num
+   mux2ds #(2) fcc_mux(.dout (fcc_num_next[1:0]),
+                       .in0   (fcc_num[1:0]),
+                       .in1   (ifu_ffu_fcc_num_d[1:0]),
+                       .sel0 (~is_fpop_d),
+                       .sel1   (is_fpop_d));
+   dff_s #(2) fcc_dff(.din (fcc_num_next[1:0]), 
+		                .clk (clk), 
+		                .q   (fcc_num[1:0]),
+                    .se  (se), .si(), .so());
+
+   // ldfsr
+   mux2ds #(2) ldfsr_mux(.dout (ldfsr_next[1:0]),
+                         .in0  ({ldfsr, ldxfsr}),
+                         .in1  ({ifu_ffu_ldfsr_d, ifu_ffu_ldxfsr_d}),
+                         .sel0  (~ifu_ffu_fld_d),
+                         .sel1  (ifu_ffu_fld_d));
+   dff_s #(2) ldfsr_dff(.din (ldfsr_next[1:0]), 
+		                  .clk (clk), 
+		                  .q   ({ldfsr, ldxfsr}),
+                      .se  (se), .si(), .so());
+
+   // op code
+   mux2ds #(9) opf_mux(.dout (opf_next[8:0]),
+                       .in0  (opf[8:0]),
+                       .in1  (ifu_ffu_fpopcode_d[8:0]),
+                       .sel0  (~is_fpop_d),
+                       .sel1  (is_fpop_d));
+   dff_s #(9) opf_dff(.din(opf_next[8:0]), 
+		                .clk(clk), 
+		                .q(opf[8:0]),
+                   .se(se), .si(), .so());
+   //----------
+   // FP Pipe
+   //----------
+   dff_s fop_e2m(.din(any_op_e), .clk(clk), 
+	             .q(any_op_m), 
+	             .se(se), .si(), .so());
+
+   dff_s fop_m2w(.din(any_op_m_valid), .clk(clk), 
+	             .q(any_op_w), 
+	             .se(se), .si(), .so());
+   dff_s fop_w2w2(.din(any_op_w), .clk(clk), 
+	             .q(any_op_w2), 
+	             .se(se), .si(), .so());
+
+   dff_s fst_d2e(.din(ifu_ffu_fst_d),  .clk(clk),  
+	             .q  (fst_e), 
+	             .se(se), .si(), .so());
+   dff_s fst_e2m(.din(fst_e), .clk(clk), 
+	             .q  (fst_m), 
+	             .se(se), .si(), .so());
+   dff_s fst_m2w(.din(fst_m), .clk(clk), 
+	             .q  (fst_w), 
+	             .se(se), .si(), .so());
+   dff_s fld_d2e(.din(ifu_ffu_fld_d),  .clk(clk),  
+	             .q  (fld_e), 
+	             .se(se), .si(), .so());
+   dff_s fld_e2m(.din(fld_e),  .clk(clk),  
+	             .q  (fld_m), 
+	             .se(se), .si(), .so());
+
+   dff_s ren_rs2_e2m(.din(ren_rs2_e_vld), .clk(clk), 
+		               .q(ren_rs2_m), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs2_m2w(.din(ren_rs2_m_vld), .clk(clk), 
+		               .q(ren_rs2_w), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs2_w2w2(.din(ren_rs2_w), .clk(clk), 
+		                .q(ren_rs2_w2), 
+		                .se(se), .si(), .so());
+   dff_s ren_rs2_w22w3(.din(ren_rs2_w2), .clk(clk), 
+		                .q(ren_rs2_w3), 
+		                .se(se), .si(), .so());
+   dff_s ren_rs2_w32w4(.din(ren_rs2_w3), .clk(clk), 
+		                .q(ren_rs2_w4), 
+		                .se(se), .si(), .so());
+   
+   dff_s ren_rs1_e2m(.din(ren_rs1_e), .clk(clk), 
+		               .q(ren_rs1_m), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs1_m2w(.din(ren_rs1_m), .clk(clk), 
+		               .q(ren_rs1_w), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs1_w2w2(.din(ren_rs1_w_vld), .clk(clk), 
+		               .q(ren_rs1_w2), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs1_w22w3(.din(ren_rs1_w2_vld), .clk(clk), 
+		               .q(ren_rs1_w3), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs1_w32w4(.din(ren_rs1_w3), .clk(clk), 
+		               .q(ren_rs1_w4), 
+		               .se(se), .si(), .so());
+   dff_s ren_rs1_w42w5(.din(ren_rs1_w4), .clk(clk), 
+		               .q(ren_rs1_w5), 
+		               .se(se), .si(), .so());
+  
+   dff_s cond_move_e2m(.din(cond_move_e), .clk(clk), 
+		                 .q(cond_move_m), 
+		                 .se(se), .si(), .so());
+   
+   dff_s move_e2m(.din(move_e), .clk(clk), 
+		            .q(move_m), 
+		            .se(se), .si(), .so());
+   dff_s move_m2w(.din(move_m_valid), .clk(clk), 
+		            .q(move_w), 
+		            .se(se), .si(), .so());
+   dff_s move_wen_m2w(.din(move_wen_m), .clk(clk), .q(move_wen_w),
+                    .se(se), .si(), .so());
+   dff_s move_wen_w2w2(.din(move_wen_w), .clk(clk), .q(move_wen_w2),
+                    .se(se), .si(), .so());
+   dff_s move_wdff(.din(move_w_valid), .clk(clk), .q(move_w2), .se(se), .si(), .so());
+
+   dff_s stfsr_d2e(.din(ifu_ffu_stfsr_d), 
+		             .q(stfsr_e), 
+		             .clk(clk), 
+		             .se(se), .si(), .so());
+  
+   dff_s fpu_op_e2m(.din(fpu_op_e), .clk(clk), 
+		              .q(fpu_op_m), 
+		              .se(se), .si(), .so());
+   dff_s fpu_op_m2w(.din(fpu_op_m), .clk(clk), 
+		              .q(fpu_op_w), 
+		              .se(se), .si(), .so());
+   dff_s fpu_op_w2w2(.din(fpu_op_w_vld), .clk(clk), 
+		              .q(fpu_op_w2), 
+		              .se(se), .si(), .so());
+   dff_s fpu_op_w22w3(.din(fpu_op_w2_vld), .clk(clk), 
+		              .q(fpu_op_w3), 
+		              .se(se), .si(), .so());
+
+   dff_s #(2) tid_d2e(.din(ifu_ffu_tid_d[1:0]), 
+		                .clk(clk), 
+		                .q(tid_e[1:0]), 
+		                .se(se), .si(), .so());
+   
+   dff_s #(2) tid_e2m(.din(tid_e[1:0]), 
+		                .clk(clk), 
+		                .q(tid_m[1:0]), 
+		                .se(se), .si(), .so());
+   
+   dff_s #(2) tid_m2w(.din(tid_m[1:0]), 
+		                .clk(clk), 
+		                .q(tid_w[1:0]), 
+		                .se(se), .si(), .so());
+   
+   dff_s #(2) tid_w2w2(.din(tid_w[1:0]), 
+		                 .clk(clk), 
+		                 .q(tid_w2[1:0]), 
+		                 .se(se), .si(), .so());
+
+   dff_s dff_killed_w(.din(kill_m), 
+		                .clk(clk), 
+		                .q(killed_w), 
+		                .se(se), .si(), .so());
+
+   dff_s dff_flush_w2(.din(flush_w), .clk(clk), .q(flush_w2), .se(se), .si(), .so());
+   
+   assign  thr_match_mw2 = ~((tid_m[1] ^ tid_w2[1]) |
+                           (tid_m[0] ^ tid_w2[0]));
+   assign  thr_match_ww2 = ~((tid_w[1] ^ tid_w2[1]) |
+                           (tid_w[0] ^ tid_w2[0]));
+
+   assign thr_match_fpw2 = ~((tid[1] ^ tid_w2[1]) |
+                           (tid[0] ^ tid_w2[0]));
+
+   // new fpops squash previous ones (only possible in m or w, but w will also have ifu_tlu_flush_w)
+   // all kill_w signals do not include lsu_ffu_flush_pipe_w.  This must be included at the final destination
+   assign flush_w = (lsu_ffu_flush_pipe_w | ifu_tlu_flush_w) & ~rolled_back;
+   assign any_op_e = fpop1_e | fpop2_e | fst_e | fld_e | visop_e;
+   assign any_op_m_valid = any_op_m & ~any_op_e;
+   assign  kill_m = (thr_match_mw2 & flush_w2) | any_op_e;
+   assign  kill_eccchk_w = (~ifu_tlu_inst_vld_w | killed_w | unimpl_op_w | any_op_e | 
+                            (thr_match_ww2 & flush_w2)) & ~rolled_back;
+   // unimplemented ops don't check rolled_back because they trap before rollback happens
+   assign  kill_unimpl_w = (~ifu_tlu_inst_vld_w | killed_w | any_op_e 
+	                  | (thr_match_ww2 & flush_w2));
+   assign  kill_w = (~ifu_tlu_inst_vld_w | killed_w | any_op_e |
+	                  unimpl_op_w | ffu_lsu_kill_fst_w | (thr_match_ww2 & flush_w2)) & ~rolled_back;
+   // this kills the "pending" signals that are set in the E stage.
+   // Since they are set in E all the kills can be delayed by one cycle without
+   // squashing a new, valid op
+   assign  kill_fp = (thr_match_fpw2 & flush_w2 | any_op_e |
+		                  any_op_w & (~ifu_tlu_inst_vld_w | unimpl_op_w)) & ~rolled_back;
+   
+
+//----------------------------
+// Control for muxes that manipulate data to/from FRF
+//----------------------------
+   // implement fmov/fmovcc
+   assign  move_wen_m = move_m | (cond_move_m & ifu_ffu_mvcnd_m);
+   assign  move_wen_w2_valid = move_wen_w2 & move_w2_vld;
+   assign  move_m_valid = (move_m | cond_move_m);
+   // used for updating fsr
+   assign  move_w_valid = move_w & ~kill_w;
+   assign  move_w2_vld = move_w2 & ~flush_w2 & ~rollback_rs2_w2 & ~(rs2_ue_w2 & nceen);
+
+   // negation or absolute value happen to rs2 in the m_stage if needed
+   assign  ctl_dp_sign[1] = (dp_ctl_rs2_sign[1] ^ neg_w) & ~abs_w;
+   assign  ctl_dp_sign[0] = (source_single) ? 
+			     (dp_ctl_rs2_sign[0] ^ neg_w) & ~abs_w : 
+	                      dp_ctl_rs2_sign[0];
+
+   //
+   // Shifts to align sgl precision 32b data
+   //
+   // mux for moving around single data from frf
+   
+   // shift on moves or stores
+   assign shift_frf_rs2_m = (rs2[0] ^ rd[0]) & (move_m | cond_move_m | visop_m) & ~fst_e;
+   assign shift_frf_rs1_w = (rs1[0] ^ rd[0]) & visop_w_vld; //check for squash
+
+   assign  shift_frf_right_next = (source_single & shift_frf_rs2_m & ~rs2[0]) |
+                                      (source_single & shift_frf_rs1_w & ~rs1[0]) |
+                                      (dest_single_e & fst_e & ~rd_e[0]);
+
+   assign  shift_frf_left_next = ((source_single & rs2[0] & (shift_frf_rs2_m | fpu_op_m & ~fst_e) |
+                                   source_single & rs1[0] & (shift_frf_rs1_w | fpu_op_w_vld)) 
+                                  & ~shift_frf_right_next);
+
+   assign  noshift64_frf_next = ~(shift_frf_right_next | shift_frf_left_next);
+   
+   assign  ctl_dp_shift_frf_right = shift_frf_right & ~rst_tri_en;
+   assign  ctl_dp_shift_frf_left = shift_frf_left & ~rst_tri_en;
+   assign  ctl_dp_noshift64_frf = noshift64_frf | rst_tri_en;
+
+   // fpu expects lower 32 bits to be zero on single operands
+   assign  zero_lower_data_next = ((source_single & ~rs1[0] & fpu_op_w_vld) | 
+                                   (source_single & ~rs2[0] & fpu_op_m));
+   
+   dff_s shift_frf_right_dff(.din(shift_frf_right_next), .clk(clk), .q(shift_frf_right),
+                           .se(se), .si(), .so());
+   dff_s shift_frf_left_dff(.din(shift_frf_left_next), .clk(clk), .q(shift_frf_left),
+                           .se(se), .si(), .so());
+   dff_s noshift64_dff(.din(noshift64_frf_next), .clk(clk), .q(noshift64_frf),
+                     .se(se), .si(), .so());
+   dff_s noshift32_dff(.din(zero_lower_data_next), .clk(clk), .q(ctl_dp_zero_low32_frf),
+                     .se(se), .si(), .so());
+   
+   wire    flip_fpu;
+   wire    flip_lsu;
+   // mux for rearranging data from fpu
+   // data comes in with msb always at b63.  This means that singles with an odd
+   // rd must be flipped so that the data ends up in the correct
+   //  registers.
+   assign  flip_fpu = (dest_single & rd[0]);// single with odd rd
+
+   // mux for rearranging data from lsu
+   // data comes in [63:0].  This means that singles with an even
+   // rd must be flipped so that the data ends up in the correct
+   //  registers.
+   assign  flip_lsu = (dest_single & ~rd[0]);// single with even rd
+
+   assign  ctl_dp_noflip_lsu = ld_ret & ~flip_lsu;
+   assign  ctl_dp_flip_lsu = ld_ret & flip_lsu;
+   assign  ctl_dp_noflip_fpu = ~ld_ret & ~flip_fpu & is_fpu_result & ~cpx_fcmp_d1;
+   assign  ctl_dp_flip_fpu = ~ld_ret & ~ctl_dp_noflip_fpu;
+   
+
+//---------------------------------
+// LSU Interface
+//---------------------------------
+
+   // Note that stores fit into the standard pipeline so they are automatically 
+   // accepted and do not require an ACK.  The lsu will check for kills in m and w.
+   
+   assign  store_ready = fst_m | bst_issue_c3;
+   assign  fpu_op_w_vld = fpu_op_w & ~kill_w;
+   assign  fpu_op_w2_vld = fpu_op_w2 & ~flush_w2 & ~ecc_kill_rs2_w2;
+   assign  fpu_op_w3_vld = fpu_op_w3 & ~ue_trap_w3 & ~rollback_rs1_w3;
+   // don't qual with inst_vld since it takes too much time?
+   // Resolved with Sanjay:
+   //    Will never receive ack in the same cycle req was first made
+   assign  fpop1_ready_w2_next = (fpu_op_w3_vld | 
+				                         (fpop1_ready_w2 & ~lsu_ffu_ack));
+
+   dffr_s #1 fpop1_w2_dff(.din (fpop1_ready_w2_next), 
+		                   .q   (fpop1_ready_w2),
+		                   .rst (reset),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   // once op1 has been accepted, move to w2, and in the next cycle op2 is ready
+   //
+   // C1 -- recv ack, send op1 (fpop1_ready_w2)
+   // C2 -- send op2 (fpop2_ready_w3)
+   assign fpop2_ready_w3_next = fpop1_ready_w2 & lsu_ffu_ack;
+   
+   dff_s fpop2_w22w3(.din (fpop2_ready_w3_next), 
+		               .q   (fpop2_ready_w3), 
+		               .clk(clk), .se(se), .si(), .so());
+
+   
+   // request in W2 and wait till an ack is received
+   //    Will never receive ack in the same cycle req was first made
+   assign ffu_lsu_fpop_rq_vld = fpu_op_w3_vld;
+
+   // valid pkt sent to lsu (after request)
+   assign issue_fpop2 = fpop2_ready_w3 & ~opf[7];// not conversion op
+   assign  lsu_pkt_vld = fpop1_ready_w2 | fpu_op_w3_vld | issue_fpop2 | store_ready;
+
+   assign lsu_pkt_type[1:0] = {store_ready, fpop2_ready_w3};
+
+   // Create packet for LSU: ffu_lsu_data[80:0]
+   //     80 = vld
+   //     79:78 = type (00 = fpu operand 1, 01 = fpu operand 2, 10 = fp store)
+   //     77:76 = tid
+   //     75:68 = floating point opcode
+   //     67:66 = fcc
+   //     65:64 = rounding mode
+   //     63:0 = data
+   assign ffu_lsu_data[80:64] = {lsu_pkt_vld, 
+				 lsu_pkt_type[1:0], 
+				 extra_tid[1:0], 
+				 opf[7:0], 
+				 fcc_num[1:0], 
+				 fpu_rnd[1:0]};  // rounding mode
+
+   // Select data to send to LSU.  This is calculated one cycle early and flopped
+   assign output_sel_rs1_next = fpop2_ready_w3_next & ~fst_e & ~bst_issue_c3_next;     // rs2 is sent first (fpop1)
+   assign output_sel_frf_next = fst_e & ~stfsr_e | bst_issue_c3_next;   // store data
+   assign output_sel_fsr_next = fst_e & stfsr_e & ~bst_issue_c3_next;
+   assign output_sel_rs2_next = ~(fpop2_ready_w3_next | fst_e | bst_issue_c3_next);
+   assign ctl_dp_output_sel_rs1 = output_sel_rs1 & ~rst_tri_en;
+   assign ctl_dp_output_sel_frf = output_sel_frf & ~rst_tri_en;
+   assign ctl_dp_output_sel_fsr = output_sel_fsr & ~rst_tri_en;
+   assign ctl_dp_output_sel_rs2 = output_sel_rs2 | rst_tri_en;
+   dff_s #(4) output_sel_dff(.din({output_sel_rs1_next,output_sel_rs2_next,output_sel_frf_next,output_sel_fsr_next}),
+                           .q({output_sel_rs1,output_sel_rs2,output_sel_frf,output_sel_fsr}),
+                           .clk(clk), .se(se), .si(), .so());
+
+
+   dff_s #1 sfsrw_ff(.din (ctl_dp_output_sel_fsr),
+		               .q   (stfsr_w),
+		               .clk (clk), .se(se), .si(), .so());
+   dff_s stfsr_wdff(.din(stfsr_qual_w), .clk(clk), .q(stfsr_w2), .se(se), .si(), .so());
+   assign stfsr_qual_w =  (stfsr_w & ~kill_w);
+   assign stfsr_w2_vld = stfsr_w2 & ~flush_w2;
+
+
+   //------------------------------------------------------
+   //  Block Stores
+   //------------------------------------------------------
+   // interface with lsu.  bst packet issues in c3
+   // check for stb_full so it doesn't confuse lsu.  Don't have to count inflight packets
+   // because none exist by bst_issue_c2
+   assign        ffu_lsu_blk_st_e = bst_issue_c2 & ~stb_full_c2;
+   assign        ffu_lsu_blk_st_va_e[5:3] = bst_cnt[2:0];
+   assign        bst_done = bst_issue_c4 & (bst_cnt[2:0] == 3'b111) & ~bst_ce_c4 & ~(bst_ue_c4 & nceen);
+
+   mux4ds stb_full_mux (.dout(stb_full_c2),
+                        .in0(stb_full0),
+                        .in1(stb_full1),
+                        .in2(stb_full2),
+                        .in3(stb_full3),
+                        .sel0(ctl_dp_fp_thr[0]),
+                        .sel1(ctl_dp_fp_thr[1]),
+                        .sel2(ctl_dp_fp_thr[2]),
+                        .sel3(ctl_dp_fp_thr[3]));
+
+   assign        other_mem_op_e = exu_ffu_ist_e | ifu_tlu_flsh_inst_e | ifu_lsu_ld_inst_e;
+   assign        can_issue_bst_c2  = (~other_mem_op_e & ~stb_full_c2);
+   assign bst_m = fst_m & blk_asi_m;
+
+   assign bst_rs[5:0] = {rd[5:4], bst_cnt[2:0], 1'b0};
+
+   // bst starts when bst hits w and is done when the 7th pckt has issued
+   assign bst_issue_c1_next = ((bst_w & ~kill_w) | 
+                               (bst_issue_c4 & ~(bst_cnt[2:0] == 3'b111) & ~bst_ce_c4 & 
+                                ~(bst_ue_c4 & nceen)) | bst_issue_c6);
+
+   // sotheas,9/14/04: fixed eco 6910, suppress block store start on dtlb perr
+   //                  using registered version of lsu_ffu_st_dtlb_perr_g
+   //   WAS:
+   //        assign bst_issue_c2_next = ((bst_issue_c1 & ~(any_op_w2 & flush_w2)) | (bst_issue_c2 & ~can_issue_bst_c2)) & ~reset;
+   //   IS:
+   dff_s #1 st_dtlbperr_ff(.din (!lsu_ffu_st_dtlb_perr_g),
+		               .q   (st_dtlb_perr_w2_l),
+		               .clk (clk), .se(se), .si(), .so());
+   assign bst_issue_c2_next = ((bst_issue_c1 & ~(any_op_w2 & flush_w2) & st_dtlb_perr_w2_l) 
+                             | (bst_issue_c2 & ~can_issue_bst_c2)) & ~reset;
+   assign bst_issue_c3_next = bst_issue_c2 & can_issue_bst_c2 & ~reset;
+   assign bst_issue_c4_next = bst_issue_c3 & ~reset;
+   assign bst_issue_c5_next = bst_issue_c4 & bst_ce_c4 & ~reset;
+   assign bst_issue_c6_next = bst_issue_c5 & ~reset;
+
+   // bst keeps reading in both c1 and c2 in case it stalls in c2
+   assign bst_read_req = bst_issue_c1 | bst_issue_c2;
+   // counter resets to 1 when bst hits w, increments when one is issued to lsu without ce
+   assign bst_cnt_next[2:0] = (bst_w? 				3'b001:
+                               (bst_issue_c4 & ~bst_ce_c4)?  (bst_cnt[2:0] + 3'b001):
+                                               bst_cnt[2:0]);
+
+   ///////////////////
+   // bst starvation
+   //----------------
+   // when six bit counter saturates then a req to stall inst issue is made
+   // The request stays high until a bst gets issued
+   ///////////////////
+   assign ffu_ifu_stallreq = bst_stall_req;
+   assign bst_stall_req_next = ((bst_stall_cnt[5:0] == 6'b111111) & bst_issue_c2 & ~can_issue_bst_c2 |
+                                bst_stall_req & other_mem_op_e);
+   assign bst_stall_cnt_next[5:0] = (~bst_issue_c2)? 6'd0: bst_stall_cnt[5:0] + 6'd1;
+
+   /////////////////////
+   // bst ecc control
+   /////////////////////
+   // if a ce occurs even after correction then it is converted to a ue
+   assign bst_ue_c4 = bst_issue_c4 & (previous_ue | (fixed_bst_ce & |previous_ce[1:0]));
+   assign bst_ce_c4 = bst_issue_c4 & |previous_ce[1:0] & ~fixed_bst_ce & ~previous_ue;
+   assign fixed_bst_ce_next = bst_ce_c4 | (fixed_bst_ce & ~bst_issue_c4);
+
+   dff_s #(4) stb_full_dff(.din({lsu_ffu_stb_full0,lsu_ffu_stb_full1,lsu_ffu_stb_full2,lsu_ffu_stb_full3}),
+                         .q({stb_full0, stb_full1, stb_full2, stb_full3}),
+                         .clk(clk), .se(se), .si(), .so());
+   dff_s blk_asi_dff(.din(lsu_ffu_blk_asi_e), .clk(clk), .q(blk_asi_m),
+                   .se(se), .si(), .so());
+   dffr_s bst_fix_ce_dff(.din(fixed_bst_ce_next), .clk(clk), .q(fixed_bst_ce), 
+                       .se(se), .si(), .so(), .rst(reset));
+   dff_s #(3) bst_cnt_dff(.din(bst_cnt_next[2:0]), .clk(clk), .q(bst_cnt[2:0]),
+                        .se(se), .si(), .so());
+   dff_s bst_m2w(.din(bst_m), .clk(clk), .q(bst_w), .se(se), .si(), .so());
+   dff_s bst_issue_c1_dff(.din(bst_issue_c1_next), .clk(clk), .q(bst_issue_c1), .se(se),
+                     .si(), .so());
+   dff_s bst_issue_c2_dff(.din(bst_issue_c2_next), .clk(clk), .q(bst_issue_c2), .se(se),
+                     .si(), .so());
+   dff_s bst_issue_c3_dff(.din(bst_issue_c3_next), .clk(clk), .q(bst_issue_c3), .se(se),
+                     .si(), .so());
+   dff_s bst_issue_c4_dff(.din(bst_issue_c4_next), .clk(clk), .q(bst_issue_c4), .se(se),
+                     .si(), .so());
+   dff_s bst_issue_c5_dff(.din(bst_issue_c5_next), .clk(clk), .q(bst_issue_c5), .se(se),
+                     .si(), .so());
+   dff_s bst_issue_c6_dff(.din(bst_issue_c6_next), .clk(clk), .q(bst_issue_c6), .se(se),
+                     .si(), .so());
+   dff_s #(6) bst_stall_cntdff(.din(bst_stall_cnt_next[5:0]), .clk(clk), .q(bst_stall_cnt[5:0]),
+                             .se(se), .si(), .so());
+   dffr_s bst_stall_reqdff(.din(bst_stall_req_next), .clk(clk), .q(bst_stall_req),
+                        .se(se), .si(), .so(), .rst(reset));
+   
+//----------------------------------------
+// Decode Returning FPU/LSU packets
+//----------------------------------------
+
+   // FPU result pulled off of cpx
+   assign is_fpu_result = (cpx_req_d1 == `FP_RET) ?
+                           cpx_vld_d1 & fp_pending : 1'b0;
+   assign ld_ret = lsu_ffu_ld_vld & ~(thr_match_fpw2 & flush_w2) & (blk_load_pending | load_pending);
+	  
+   // select frf write data
+   // don't write data on Fcompares
+   assign ctl_dp_rs2_sel_fpu_lsu = is_fpu_result & ~cpx_fcmp_d1 | ld_ret;
+   assign ctl_dp_rs2_sel_vis = vis_result & ~ctl_dp_rs2_sel_fpu_lsu;
+   assign ctl_dp_rs2_frf_read = (ren_rs2_w | ctl_dp_rd_ecc) & ~ctl_dp_rs2_sel_fpu_lsu & ~vis_result;
+   assign ctl_dp_rs2_keep_data = ~(ren_rs2_w | ctl_dp_rd_ecc | vis_result | 
+				   ctl_dp_rs2_sel_fpu_lsu);
+
+   // selects for rs2 result mux
+   assign ctl_dp_rd_ecc = fst_ce_w | rollback_rs2_w2 | bst_ce_c4 | rollback_rs1_w3;
+   
+   // Selects for rs1 mux
+   assign ctl_dp_new_rs1 = ren_rs1_w2;
+
+//----------------------------------------
+// FRF Controls
+//----------------------------------------   
+   // WEN for frf from load, FPU result or mov
+   assign external_wen_next = ld_ret & ~(ldfsr | ldxfsr) | (is_fpu_result & ~cpx_fcmp_d1 & ~take_ieee_trap);
+   assign rf_ecc_gen_next = external_wen_next | vis_wen_next;
+   
+   dff_s rf_eccgen_dff(.din(rf_ecc_gen_next), .q(rf_ecc_gen), .clk(clk), .se(se), .si(), .so());
+   dff_s rf_wen_dff(.din(rf_wen_next), .q(rf_wen), .clk(clk), .se(se), .si(), .so());
+   // check for flush_pipe for moves
+   assign rf_wen_next = rf_ecc_gen & ~(any_op_w2 & flush_w2) | move_wen_w2_valid;
+
+
+   // REN and WEN must be mutually exclusive.  This works because WEN is always after W
+   // if a new fpop has arrived to cancel it.
+   // The even register is the upper half, odd is the lower half
+   assign ctl_frf_wen[1] = ((rf_wen & ~rd[0]) | ecc_wen[1]) & ~ctl_frf_ren;   // double or even sgl
+   assign ctl_frf_wen[0] = ((rf_wen & (~dest_single | rd[0])) | ecc_wen[0]) & ~ctl_frf_ren; // dbl or odd sgl
+
+   // REN for frf  -- rd rs2 in D, rs1 in E
+   assign read_rs2 = ren_rs2_e;
+   assign read_rs1 = ren_rs1_m;
+   assign read_rd = ifu_ffu_fst_d;
+   assign read_bst = bst_read_req;
+   // expanded out the terms for reading rs2 to help timing
+   assign ctl_frf_ren = (read_rs1 | read_rs2 |
+                         ifu_ffu_fst_d | bst_read_req);
+
+   assign early_frf_rnum[5:1] = read_rs2?  rs2_next[5:1]:
+                                read_rs1?  rs1[5:1]:
+                                read_bst?  bst_rs[5:1]:
+                                           write_addr[5:1];
+   assign     st_rd_d[5:1] = {ifu_ffu_frd_d[0] & ~ifu_ffu_ldst_single_d, ifu_ffu_frd_d[4:1]};
+   mux2ds #5 frf_rnum_mux(.dout(frf_rnum[5:1]),
+                            .in0(early_frf_rnum[5:1]),
+                            .in1(st_rd_d[5:1]),
+                            .sel0(~read_rd),
+                            .sel1(read_rd));
+
+   assign frf_tid[1:0] = (read_rd)? ifu_ffu_tid_d[1:0]: tid_next[1:0];
+                          
+   assign wen_rs2_ecc = |ecc_wen[1:0] & ren_rs2_w4;
+   assign wen_rs1_ecc = |ecc_wen[1:0] & ren_rs1_w5;
+   assign blk_rd[5:1] = rd[5:1] + {2'b0, bld_cnt_d3[2:0]};
+   assign write_addr[5:1] = wen_rs2_ecc?          rs2[5:1] :
+                            wen_rs1_ecc?          rs1[5:1] :
+                            bst_issue_c6?         bst_rs[5:1]:
+                            blk_load_pending?     blk_rd[5:1]:
+                                                  rd[5:1];
+
+   // Address is combination of tid and rnum
+   assign ctl_frf_addr[6:0] = {frf_tid[1:0], frf_rnum[5:1]}; 
+  
+//----------------------------------------
+// Data from FPU forwarded to IFU
+//----------------------------------------
+   // Send thrid to IFU
+   assign ffu_ifu_tid_w2[1:0] = tid;
+
+   // completion is always signalled after the w-stage so that flush_pipe, etc.
+   // can be checked.  For lds and fpops this is signalled after they write.
+   // ecc_kill_rs2_w is checked for move and fst because the "compeletion"
+   // is signalled over a separate interface so rollback can occur.
+   // This is staged 2 cycles to allow for the cycle of ecc generation.
+   //
+   assign ffu_op_done_next = ((is_fpu_result & ~take_ieee_trap) | fld_done |
+                              blk_ld_done | bst_done | vis_result |
+                              fst_w & ~bst_w & ~kill_w);
+   dff_s ffu_op_done_dff(.din(ffu_op_done_next), .clk(clk), .q(ffu_op_done),
+                       .se(se), .si(), .so());
+   // sotheas,9/14/04: fixed eco 6910, send done on dtlb perr for block store
+   //                  using registered version of lsu_ffu_st_dtlb_perr_g
+   //   WAS:
+   //     assign ffu_op_done_vld = ffu_op_done & ~(any_op_w2 & flush_w2) | move_w2_vld;
+   assign ffu_op_done_vld = (ffu_op_done | (bst_issue_c1 & !st_dtlb_perr_w2_l) ) 
+                            & ~(any_op_w2 & flush_w2) | move_w2_vld;
+   dff_s ffu_op_done2_dff(.din(ffu_op_done_vld), .clk(clk), .q(ffu_ifu_fpop_done_w2),
+                        .se(se), .si(), .so());
+
+//------------------------------------------
+// FSR Controls
+//------------------------------------------
+
+   assign ctl_dp_fp_thr[0] = ~extra_tid[1] & ~extra_tid[0];
+   assign ctl_dp_fp_thr[1] = ~extra_tid[1] &  extra_tid[0];
+   assign ctl_dp_fp_thr[2] =  extra_tid[1] & ~extra_tid[0];
+   assign ctl_dp_fp_thr[3] =  extra_tid[1] &  extra_tid[0];
+
+   // CC's
+   assign fcc_num_dec[0] = ~fcc_num[1] & ~fcc_num[0];
+   assign fcc_num_dec[1] = ~fcc_num[1] & fcc_num[0];
+   assign fcc_num_dec[2] = fcc_num[1] & ~fcc_num[0];
+   assign fcc_num_dec[3] = fcc_num[1] & fcc_num[0];
+
+   // selects to load next fsr from
+   // stfsr or fmov always clear ftt
+   assign clear_ftt = stfsr_w2_vld | move_w2_vld | is_fpu_result;
+   
+   assign ctl_dp_fsr_sel_fpu[3:0] = ({4{is_fpu_result | move_w2_vld | take_other_trap | stfsr_w2_vld}}
+                                     & ctl_dp_fp_thr[3:0]);
+   assign ctl_dp_fsr_sel_ld[3:0] =  ({4{ld_ret & (ldfsr | ldxfsr)}} &  
+                                     ~ctl_dp_fsr_sel_fpu[3:0] & ctl_dp_fp_thr[3:0]);
+   assign ctl_dp_fsr_sel_old[3:0] =  (~ctl_dp_fsr_sel_fpu[3:0] & ~ctl_dp_fsr_sel_ld[3:0]);
+   
+   // align fcc depending on which fcc_num was used
+   mux4ds #8 fcc_ret_mux(.dout (fpu_fcc[7:0]),
+			 .in0  ({dp_ctl_fsr_fcc[7:2], cpx_fccval_d1[1:0]}),
+			 .in1  ({dp_ctl_fsr_fcc[7:4], cpx_fccval_d1[1:0], dp_ctl_fsr_fcc[1:0]}),
+			 .in2  ({dp_ctl_fsr_fcc[7:6], cpx_fccval_d1[1:0], dp_ctl_fsr_fcc[3:0]}),
+			 .in3  ({cpx_fccval_d1[1:0], dp_ctl_fsr_fcc[5:0]}),
+			 .sel0 (fcc_num_dec[0]),
+			 .sel1 (fcc_num_dec[1]),
+			 .sel2 (fcc_num_dec[2]),
+			 .sel3 (fcc_num_dec[3]));
+
+   // set fcc if this was an fcmp instruction
+   assign fcc_sel_fpu = cpx_fcmp_d1 & ~ieee_trap & is_fpu_result;
+   assign fcc_sel_ld = ~is_fpu_result & ldfsr_vld;
+   assign fcc_sel_ldx = ~is_fpu_result & ~ldfsr_vld & ldxfsr_vld;
+   assign fcc_sel_old = ~fcc_sel_fpu & ~fcc_sel_ld & ~fcc_sel_ldx;
+   mux4ds #8 fcc_set_mux(.dout (ctl_dp_fcc_w2[7:0]),
+			                   .in0  (dp_ctl_fsr_fcc[7:0]),
+			                   .in1  (fpu_fcc[7:0]),
+			                   .in2  (dp_ctl_ld_fcc[7:0]),
+			                   .in3  ({dp_ctl_fsr_fcc[7:2], dp_ctl_ld_fcc[1:0]}),
+			                   .sel0 (fcc_sel_old),
+			                   .sel1 (fcc_sel_fpu),
+			                   .sel2 (fcc_sel_ldx),
+			                   .sel3 (fcc_sel_ld));
+
+   // get fcc's from ldfsr instruction
+   assign ldfsr_vld = ldfsr & load_pending;
+   assign ldxfsr_vld = ldxfsr & load_pending;
+
+   wire   cc_changed_w2;
+
+   // fcc set by fcmp, ldfsr or ldxfsr
+   assign cc_changed = fcc_sel_fpu |
+	                     fld_done & (ldfsr_vld | ldxfsr_vld);
+
+   dff_s cc_changed_dff(.din(cc_changed), .clk(clk), .q(cc_changed_w2),
+                      .se(se), .si(), .so());
+   dff_s #(8) cc_next(.din(ctl_dp_fcc_w2[7:0]), .clk(clk), .q(ffu_ifu_cc_w2[7:0]),
+                    .se(se), .si(), .so());
+
+   assign ffu_ifu_cc_vld_w2[3:0] = ctl_dp_fp_thr[3:0] & {4{cc_changed_w2}};
+   
+//-----------------------------------
+// Traps
+//-----------------------------------   
+
+   // illegal instruction if blk ld/st is not 8 dp regs aligned
+   dff_s illegal_vis_e2m (.din(illegal_vis_e), .clk(clk), .q(illegal_vis_m),
+                        .se(se), .si(), .so());
+   assign illegal_blk_m = (blk_ld_m | bst_m) & (rd[0] | rd[1] | rd[2] | rd[3]) & ~dest_single;
+   assign ffu_tlu_ill_inst_m = illegal_blk_m | illegal_vis_m;
+   
+   assign ctl_dp_ftt_w2 = take_ieee_trap      ? 3'b001 :
+	                  unimpl_qual_w2        ? 3'b011 :
+	                  clear_ftt           ? 3'b000 :
+	                                        3'bxxx; 
+   
+// SPARC V9 Underflow, Overflow, Inexact behavior: 
+//
+//      Exception(s)             |            Current
+//      Detected    Trap Enable  | fp_        Exception
+//      in f.p.     Mask Bits    | exception_ Bits (in
+//      operation   (in FSR.TEM) | ieee_754   FSR.cexc)   
+//      ----------- ------------ | Trap       ----------- 
+//      of  uf  nx  OFM UFM NXM  | Occurs?    ofc ufc nxc  Notes
+//      --- --- --- --- --- ---  | ---------- --- --- ---  -----
+//       -   -   -   x   x   x   |  no         0   0   0   
+//       -   -   *   x   x   0   |  no         0   0   1 
+//       -   *   *   x   0   0   |  no         0   1   1
+//       *   -   *   0   x   0   |  no         1   0   1    (2)
+//                               | 
+//       -   -   *   x   x   1   |  yes        0   0   1 
+//       -   *   *   x   0   1   |  yes        0   0   1 
+//       -   *   -   x   1   x   |  yes        0   1   0
+//       -   *   *   x   1   x   |  yes        0   1   0 
+//       *   -   *   1   x   x   |  yes        1   0   0    (2)
+//       *   -   *   0   x   1   |  yes        0   0   1    (2)
+//
+//      (2) Overflow is always accompanied by inexact.
+//
+//
+// The FPU does not receive FSR.TEM bits. FSR.TEM bits are used within
+// the FFU for the following cases:
+// (1) fp_exception_ieee_754 trap detection
+//     If a FPop generates an IEEE exception (nv, of, uf, dz, nx) for
+//     which the corresponding trap enable (TEM) is set, then a
+//     fp_exception_ieee_754 trap is caused. FSR.cexc field has one bit
+//     set corresponding to the IEEE exception, and FSR.aexc field is
+//     unchanged.
+// (2) Clear FSR.nxc if an overflow (underflow) exception does trap
+//     because FSR.OFM (FSR.UFM) is set, regardless of whether FSR.NXM
+//     is set. Set FSR.ofc (FSR.ufc).
+// (3) Clear FSR.ofc (FSR.ufc) if overflow (underflow) exception traps
+//     and FSR.OFM (FSR.UFM) is not set and FSR.NXM is set. Set FSR.nxc.
+// (4) Clear FSR.ufc if the result is exact (FSR.nxc is not set) and
+//     FSR.UFM is not set. This case represents an exact denormalized
+//     result.
+//
+// Note: - FPU will signal underflow to the FFU for all "tiny" results.
+//       - FPU always reports inexact along with overflow.
+   dff_s #(5) tem_dff(.din(dp_ctl_fsr_tem[4:0]), .clk(clk), .q(fsr_tem_d1),
+                    .se(se), .si(), .so());
+   assign fpexc_nxc =
+          cpx_fpexc_d1[`FSR_NXC] &
+          ~(( fsr_tem_d1[3] & cpx_fpexc_d1[`FSR_OFC]) |    // enabled  of
+            ( fsr_tem_d1[2] & cpx_fpexc_d1[`FSR_UFC])  );  // enabled  uf
+
+   assign fpexc_ofc =
+          cpx_fpexc_d1[`FSR_OFC] &
+          ~(~fsr_tem_d1[3] & fsr_tem_d1[0] & cpx_fpexc_d1[`FSR_NXC]); // disabled of and enabled nx
+
+   assign fpexc_ufc =
+          cpx_fpexc_d1[`FSR_UFC] &
+          ~(~fsr_tem_d1[2] & ( fsr_tem_d1[0] & cpx_fpexc_d1[`FSR_NXC])) & // disabled uf & enabled nx
+          ~(~fsr_tem_d1[2] & ~cpx_fpexc_d1[`FSR_NXC]) ; // disabled uf with no inexact
+                                                               // (i.e. exact denorm w/ UFM=0)
+
+
+   assign ieee_trap_vec[4:0] =
+          ({cpx_fpexc_d1[4],
+            fpexc_ofc,
+            fpexc_ufc,
+            cpx_fpexc_d1[1],
+            fpexc_nxc       } & fsr_tem_d1[4:0]);
+
+
+   // ieee trap has least priority.  Put through a flop for timing reasons
+   assign ieee_trap = (|ieee_trap_vec[4:0]);
+   assign take_ieee_trap = ieee_trap & is_fpu_result;
+   dff_s trap_ieee_dff(.din(take_ieee_trap), .clk(clk), .q(ffu_tlu_trap_ieee754),
+                     .se(se), .si(), .so());
+
+   assign take_other_trap = unimpl_qual_w2;
+   assign ffu_tlu_trap_other = take_other_trap;
+   assign ffu_tlu_trap_ue = ue_trap_w3;
+   
+   // current exception
+   assign fp_exc_w2[4:0] =
+          ({cpx_fpexc_d1[4],
+            fpexc_ofc,
+            fpexc_ufc,
+            cpx_fpexc_d1[1],
+            fpexc_nxc       });
+
+
+
+   
+   // accrued exceptions
+   // fp_exc_w2 = dp_ctl_fsr_aexc | cpx_fpexc_d1 & {5{~take_ieee_trap}};
+   assign fp_exc_w2[`FSR_NVA] = dp_ctl_fsr_aexc[`FSR_NVC] | 
+	                     is_fpu_result & cpx_fpexc_d1[`FSR_NVC] & ~fsr_tem_d1[`FSR_NVC];
+   assign fp_exc_w2[`FSR_DZA] = dp_ctl_fsr_aexc[`FSR_DZC] | 
+	                     is_fpu_result & cpx_fpexc_d1[`FSR_DZC] & ~fsr_tem_d1[`FSR_DZC];
+   assign fp_exc_w2[`FSR_UFA] = dp_ctl_fsr_aexc[`FSR_UFC] | 
+	                     is_fpu_result & fpexc_ufc & ~fsr_tem_d1[`FSR_UFC]; 
+   assign fp_exc_w2[`FSR_OFA] = dp_ctl_fsr_aexc[`FSR_OFC] | 
+	                     is_fpu_result & fpexc_ofc & ~fsr_tem_d1[`FSR_OFC];
+   assign fp_exc_w2[`FSR_NXA] = dp_ctl_fsr_aexc[`FSR_NXC] |
+	                     is_fpu_result & fpexc_nxc & ~fsr_tem_d1[`FSR_NXC];
+   
+   assign ctl_dp_exc_w2[9:5] = fp_exc_w2[9:5];
+   // move, abs, etc will clear cexc, fpu_results will update, all else will leave unchanged
+   wire   update_cexc;
+   assign update_cexc = is_fpu_result | move_w2_vld;
+   assign ctl_dp_exc_w2[4:0] = ((update_cexc)? fp_exc_w2[4:0] & {5{is_fpu_result}}:
+                                dp_ctl_fsr_cexc[4:0]);
+
+
+
+   ////////////////////////////////
+   // ECC control
+   ////////////////////////////////
+   // Generation of the parity bit for writes
+   wire [13:0] gen_synd_d1;
+   wire        gen_par_hi;
+   wire        gen_par_low;
+   wire [6:0]  error_inj_data;
+   output [13:0] ctl_frf_write_synd;
+   dff_s #(14) gen_synd_dff (.din({dp_ctl_synd_out_high[6:0],dp_ctl_synd_out_low[6:0]}),
+                           .q(gen_synd_d1[13:0]), .clk(clk), .se(se), .si(), .so());
+   assign  gen_par_hi = ^gen_synd_d1[13:7];
+   assign  gen_par_low = ^gen_synd_d1[6:0];
+   assign  ctl_frf_write_synd[13:0] = ({gen_par_hi,gen_synd_d1[12:7],gen_par_low,gen_synd_d1[5:0]} ^ 
+                                       {error_inj_data[6:0],error_inj_data[6:0]});
+   /////////////////////////////////
+   // error injection
+   /////////////////////////////////
+   // injection doesn't check for flush on wen
+   assign  inject_err_next = ifu_ffu_inj_frferr & rf_wen_next;
+   assign error_inj_data[6:0] = {7{inject_err}} & err_data[6:0];
+   dff_s #(7) err_data_dff(.din(ifu_exu_ecc_mask[6:0]),
+                     .q(err_data[6:0]),
+                     .clk(clk), .se(se), .si(), .so());
+   dff_s err_cntl(.din(inject_err_next),
+                .q(inject_err),
+                .clk(clk), .se(se), .si(), .so());
+   // speculate on error injection (don't check flush_pipe etc)
+   assign ffu_ifu_inj_ack = inject_err;
+
+
+   // check the upper half on a double or a single with an even reg num
+   // check the lower half on a double or a single with an odd reg num
+   // ecc block will run on frf input for reads
+   // otherwise it will run on the rd_data
+   assign check_ecc_next = ren_rs2_m | fst_e | ren_rs1_w | bst_issue_c3_next;
+   dff_s check_ecc_dff(.din(check_ecc_next), .clk(clk), .q(ctl_dp_ecc_sel_frf),
+                     .se(se), .si(), .so());
+   // rs1 will not be checked if a ce was detected on rs2.  If there was a ue on rs2
+   // rs1 will be checked and a ce will be corrected, but the error on rs2 will be logged
+   assign chk_rs1_w2 = ren_rs1_w2;
+   assign chk_ecc_m[1] = fst_m & ~rd[0] & ~output_sel_fsr;
+   assign chk_ecc_m[0] = fst_m & (~dest_single | rd[0]) & ~output_sel_fsr;
+   assign chk_ecc_w[1] = ren_rs2_w & ~kill_eccchk_w & ~rs2[0];
+   assign chk_ecc_w[0] = ren_rs2_w & ~kill_eccchk_w & (~source_single | rs2[0]);
+   assign chk_ecc_w2[1] = chk_rs1_w2 & ~rs1[0];
+   assign chk_ecc_w2[0] = chk_rs1_w2 & (~source_single | rs1[0]);
+
+   assign chk_ecc[1:0] = chk_ecc_m[1:0] | chk_ecc_w[1:0] | chk_ecc_w2[1:0] | {2{bst_issue_c3}};
+   dff_s #(2) chk_ecc_dff(.din(chk_ecc[1:0]), .clk(clk), .q(chk_ecc_prev[1:0]),
+                        .se(se), .si(), .so());
+
+   assign     error_detected[1] = |dp_ctl_synd_out_high[5:0];
+   assign     error_detected[0] = |dp_ctl_synd_out_low[5:0];
+   
+   assign     possible_ue[1] = ~dp_ctl_synd_out_high[6] & chk_ecc[1];
+   assign     possible_ue[0] = ~dp_ctl_synd_out_low[6] & chk_ecc[0];
+   assign     ce[1] = dp_ctl_synd_out_high[6] & chk_ecc[1];
+   assign     ce[0] = dp_ctl_synd_out_low[6] & chk_ecc[0];
+
+   assign rollback_fst_m = ((dp_ctl_synd_out_high[6] & chk_ecc_m[1] & ~disable_ce_m) |
+                        (dp_ctl_synd_out_low[6] & chk_ecc_m[0] & ~disable_ce_m));
+   dff_s rollback_m2w(.din(rollback_fst_m), .clk(clk), .q(rollback_fst_w), .se(se), .si(), .so());
+   dff_s #(2) possible_ue_dff(.din(possible_ue[1:0]), .clk(clk), .q(prev_poss_ue[1:0]),
+                            .se(se), .si(), .so());
+   dff_s #(2) ce_dff(.din(ce[1:0]), .clk(clk), .q(previous_ce[1:0]),
+                            .se(se), .si(), .so());
+   dff_s #(2) err_det_dff(.din(error_detected[1:0]), .clk(clk), .q(prev_err_detected[1:0]),
+                            .se(se), .si(), .so());
+   assign previous_ue = |(prev_err_detected[1:0] & prev_poss_ue[1:0]);
+   
+   dff_s #(2) ecc_wen1_dff(.din(ecc_wen_gen_next[1:0]), .clk(clk), .q(ecc_wen_gen[1:0]),
+                        .se(se), .si(), .so());
+   dff_s #(2) ecc_wen2_dff(.din(ecc_wen_next[1:0]), .clk(clk), .q(ecc_wen[1:0]),
+                        .se(se), .si(), .so());
+   // if the ecc error was in the m stage we need to check for a kill
+   // if the ecc error was in the w stage we need to check flush
+   // ECC errors on rs1 will not be written back to the frf.  The data that is used will be corrected.
+   assign ecc_wen_gen_next[1:0] = previous_ce[1:0] & 
+          {2{bst_issue_c4 | fst_ce_w | rollback_rs2_w2 | rollback_rs1_w3}};
+   assign ecc_wen_next = ecc_wen_gen[1:0] & {2{~(fst_ce_w2 & flush_w2)}};
+   
+   // pass along ce and ue so trap can be signalled to ffu and tlu
+   // if disable_ce_m then don't tell ifu reissue ce.  Instead convert to a ue.
+   dff_s disable_ce_e2m(.din(ifu_exu_disable_ce_e), .clk(clk), .q(disable_ce_m),
+                      .se(se), .si(), .so());
+   dff_s disable_ce_m2w(.din(disable_ce_m), .clk(clk), .q(disable_ce_w),
+                      .se(se), .si(), .so());
+   assign fst_ce_w = rollback_fst_w & ~kill_eccchk_w;
+   assign fst_ue_w = fst_w & (previous_ue | (disable_ce_w & |(previous_ce[1:0])))  & ~kill_eccchk_w;
+   assign rollback_rs2_w2 = (ren_rs2_w2 & ~flush_w2 & |previous_ce[1:0] 
+                             & ~rolled_back);
+   assign rs2_ce_w2 = ren_rs2_w2 & |previous_ce[1:0] & ~rolled_back & ~previous_ue;
+   assign rs2_ue_w2 = ren_rs2_w2 & (previous_ue | (rolled_back & |previous_ce[1:0]));
+   // must check for flush because eccchk doesn't do this
+   assign rs2_fst_ce_w2_vld = (rs2_ce_w2 | fst_ce_w2) & ~flush_w2;
+   assign rs2_fst_ue_w2_vld = (rs2_ue_w2 | fst_ue_w2) & ~flush_w2;
+   dff_s ce_w2w2(.din(fst_ce_w), .clk(clk), .q(fst_ce_w2),
+               .se(se), .si(), .so());
+   dff_s ue_w2w2(.din(fst_ue_w), .clk(clk), .q(fst_ue_w2),
+               .se(se), .si(), .so());
+   dff_s ce_w22w3(.din(rs2_fst_ce_w2_vld), .clk(clk), .q(rs2_fst_ce_w3),
+               .se(se), .si(), .so());
+   dff_s ue_w22w3(.din(rs2_fst_ue_w2_vld), .clk(clk), .q(rs2_fst_ue_w3),
+               .se(se), .si(), .so());
+
+   assign rs1_ce_w3 = ren_rs1_w3 & |previous_ce[1:0] & ~previous_ue & ~rolled_back;
+   assign rollback_rs1_w3 = rs1_ce_w3 & ~ue_trap_w3;
+   assign ce_w3 = (rs1_ce_w3 | rs2_fst_ce_w3);
+   assign ue_w3 = (ren_rs1_w3 & (previous_ue | (rolled_back & |previous_ce[1:0]))) | rs2_fst_ue_w3;
+
+   assign ffu_ifu_ecc_ce_w2 = (ce_w3 | bst_ce_c4);
+   assign ffu_ifu_ecc_ue_w2 = (ue_w3 | bst_ue_c4);
+   
+   // error logging signals.  The error register priority is fst, bst, rs1_ue, rs2_ue, rs1_ce, rs2_ce
+   assign log_new_err = (ren_rs2_w2 | bst_issue_c4 | fst_w | 
+                         (ren_rs1_w3 & previous_ue) | (rs1_ce_w3 & ~rs2_fst_ue_w3));
+   assign new_err_reg[5:0] = fst_w ?        rd[5:0]:
+                             bst_issue_c4 ? bst_rs[5:0]:
+                             ren_rs2_w2 ?    rs2[5:0]:
+                                            rs1[5:0];
+   assign err_reg_next[5:0] = (log_new_err) ? new_err_reg[5:0] : err_reg_d1[5:0];
+   dff_s #(6) err_reg_dff(.din(err_reg_next[5:0]), .clk(clk), .q(err_reg_d1[5:0]),
+                        .se(se), .si(), .so());
+   assign ffu_ifu_err_reg_w2[5:0] = err_reg_d1[5:0];
+   
+   // storage of error syndrome for logging
+   // For singles the invalid half of the syndrome is zeroed out.
+   // The syndrome reported to the ifu will be latched until a new error is detected
+   assign      new_err_synd[13:7] = gen_synd_d1[13:7] & {7{chk_ecc_prev[1]}};
+   assign      new_err_synd[6:0] = gen_synd_d1[6:0] & {7{chk_ecc_prev[0]}};
+   assign      err_synd_next[13:0] = (log_new_err)? new_err_synd: err_synd_d1;
+   dff_s #(14) err_synd_d1ff(.din(err_synd_next[13:0]), .clk(clk), .q(err_synd_d1[13:0]),
+                          .se(se), .si(), .so());
+   assign      ffu_ifu_err_synd_w2[13:0] = err_synd_d1[13:0];
+
+   // kill moves and fpu ops
+   assign ecc_kill_rs2_w2 = rollback_rs2_w2 | (rs2_ue_w2 & nceen);
+   
+
+   // pipe along enable signal for ue traps
+   assign nceen_next = (any_op_e)? ifu_exu_nceen_e: (nceen & ~rollback_fst_w);
+   dff_s nceen_dff(.din(nceen_next), .clk(clk), .q(nceen),
+                 .se(se), .si(), .so());
+   assign ue_trap_w3 = (ue_w3 | bst_ue_c4) & nceen;
+
+   // signals for killing stores on ecc
+   // use this to kill any rs2/rd ce   
+   assign ffu_ifu_fst_ce_w = rollback_fst_w;
+
+   // These signals kill the entry in the store buffer for any ce or trapping ue.  Very critical timing.
+   assign possible_kill_st_ce_m = ((fst_m & ~output_sel_fsr & ~(disable_ce_m & ~nceen)) |
+                                   (bst_issue_c3 & ~(fixed_bst_ce & ~nceen)));
+   assign possible_kill_st_ue_m = (fst_m & ~output_sel_fsr & nceen | bst_issue_c3 & nceen);
+   assign kill_st_ce_w = (|previous_ce[1:0]) & possible_kill_st_ce_w;
+   assign ffu_lsu_kill_fst_w = (previous_ue)? possible_kill_st_ue_w: kill_st_ce_w;
+   dff_s kill_fst_ce_dff(.din(possible_kill_st_ce_m), .clk(clk), .q(possible_kill_st_ce_w),
+                       .se(se), .si(), .so());
+   dff_s kill_fst_ue_dff(.din(possible_kill_st_ue_m), .clk(clk), .q(possible_kill_st_ue_w),
+                       .se(se), .si(), .so());
+
+   // rollback signals
+   assign rollback_c1_next = rollback_rs2_w2 | rollback_rs1_w3;
+   dffr_s #(3) rollback_dff(.din({rollback_c1_next,rollback_c1_vld,rollback_c2}),
+                         .q({rollback_c1, rollback_c2, rollback_c3}),
+                         .clk(clk), .se(se), .si(), .so(), .rst(reset));
+   // if both rs1 and rs2 rollback then the state machine needs to start on rs1
+   assign rollback_c1_vld = rollback_c1 & ~ue_trap_w3 & ~rollback_rs1_w3;
+
+   assign rolled_back_next = rollback_c1_vld | rolled_back & ~any_op_e;
+   dffr_s rollback_state(.din(rolled_back_next), .q(rolled_back),
+                        .rst(reset), .clk(clk),
+                        .se(se), .si(), .so());
+   
+   //////////////////////////////
+   // Performance counter signals
+   //////////////////////////////
+   assign ffu_tlu_fpu_tid[1:0] = tid[1:0] & {2{is_fpu_result}}; // don't toggle wire if not needed
+   assign ffu_tlu_fpu_cmplt = is_fpu_result;
+
+   sparc_ffu_ctl_visctl visctl(
+                               .illegal_vis_e(illegal_vis_e),                               
+                               .vis_nofrf_e(vis_nofrf_e),
+                               .visop_e (visop_e),
+                               .visop_m (visop_m),
+                               .visop_w_vld (visop_w_vld),
+                               .vis_wen_next (vis_wen_next),
+                               .ifu_ffu_rnd_e(frs2_e[2:0]),
+                               .fpu_rnd (fpu_rnd[1:0]),
+                               .vis_result(vis_result),
+                               /*AUTOINST*/
+                               // Outputs
+                               .ctl_vis_sel_add(ctl_vis_sel_add),
+                               .ctl_vis_sel_log(ctl_vis_sel_log),
+                               .ctl_vis_sel_align(ctl_vis_sel_align),
+                               .ctl_vis_add32(ctl_vis_add32),
+                               .ctl_vis_subtract(ctl_vis_subtract),
+                               .ctl_vis_cin(ctl_vis_cin),
+                               .ctl_vis_align0(ctl_vis_align0),
+                               .ctl_vis_align2(ctl_vis_align2),
+                               .ctl_vis_align4(ctl_vis_align4),
+                               .ctl_vis_align6(ctl_vis_align6),
+                               .ctl_vis_align_odd(ctl_vis_align_odd),
+                               .ctl_vis_log_sel_pass(ctl_vis_log_sel_pass),
+                               .ctl_vis_log_sel_nand(ctl_vis_log_sel_nand),
+                               .ctl_vis_log_sel_nor(ctl_vis_log_sel_nor),
+                               .ctl_vis_log_sel_xor(ctl_vis_log_sel_xor),
+                               .ctl_vis_log_invert_rs1(ctl_vis_log_invert_rs1),
+                               .ctl_vis_log_invert_rs2(ctl_vis_log_invert_rs2),
+                               .ctl_vis_log_constant(ctl_vis_log_constant),
+                               .ctl_vis_log_pass_const(ctl_vis_log_pass_const),
+                               .ctl_vis_log_pass_rs1(ctl_vis_log_pass_rs1),
+                               .ctl_vis_log_pass_rs2(ctl_vis_log_pass_rs2),
+                               .ffu_exu_rsr_data_hi_m(ffu_exu_rsr_data_hi_m[31:0]),
+                               .ffu_exu_rsr_data_mid_m(ffu_exu_rsr_data_mid_m[2:0]),
+                               .ffu_exu_rsr_data_lo_m(ffu_exu_rsr_data_lo_m[7:0]),
+                               .ctl_dp_wsr_data_w2(ctl_dp_wsr_data_w2[36:0]),
+                               .ctl_dp_gsr_wsr_w2(ctl_dp_gsr_wsr_w2[3:0]),
+                               .ctl_dp_thr_e(ctl_dp_thr_e[3:0]),
+                               // Inputs
+                               .clk     (clk),
+                               .se      (se),
+                               .reset   (reset),
+                               .opf     (opf[8:0]),
+                               .tid_w2  (tid_w2[1:0]),
+                               .tid_e   (tid_e[1:0]),
+                               .tid     (tid[1:0]),
+                               .kill_w  (kill_w),
+                               .ifu_tlu_sraddr_d(ifu_tlu_sraddr_d[6:0]),
+                               .exu_ffu_wsr_inst_e(exu_ffu_wsr_inst_e),
+                               .exu_ffu_gsr_align_m(exu_ffu_gsr_align_m[2:0]),
+                               .exu_ffu_gsr_rnd_m(exu_ffu_gsr_rnd_m[2:0]),
+                               .exu_ffu_gsr_mask_m(exu_ffu_gsr_mask_m[31:0]),
+                               .exu_ffu_gsr_scale_m(exu_ffu_gsr_scale_m[4:0]),
+                               .dp_ctl_fsr_rnd(dp_ctl_fsr_rnd[1:0]),
+                               .flush_w2(flush_w2),
+                               .thr_match_mw2(thr_match_mw2),
+                               .thr_match_ww2(thr_match_ww2),
+                               .ifu_tlu_inst_vld_w(ifu_tlu_inst_vld_w),
+                               .ue_trap_w3(ue_trap_w3),
+                               .frs1_e  (frs1_e[4:0]),
+                               .frs2_e  (frs2_e[4:0]),
+                               .frd_e   (frd_e[4:0]),
+                               .rollback_c3(rollback_c3),
+                               .rollback_rs2_w2(rollback_rs2_w2),
+                               .visop   (visop),
+                               .rollback_rs1_w3(rollback_rs1_w3),
+                               .dp_ctl_gsr_mask_e(dp_ctl_gsr_mask_e[31:0]),
+                               .dp_ctl_gsr_scale_e(dp_ctl_gsr_scale_e[4:0]));
+
+endmodule // sparc_ffu_ctl
Index: /trunk/T1-CPU/exu/sparc_exu_shft.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_shft.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_shft.v	(revision 6)
@@ -0,0 +1,162 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_shft.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_shft
+//	Description: This block implements right and left shifting of any amount
+//								from 0 to 63.
+*/
+
+
+module sparc_exu_shft (/*AUTOARG*/
+   // Outputs
+   shft_alu_shift_out_e, 
+   // Inputs
+   ecl_shft_lshift_e_l, ecl_shft_op32_e, ecl_shft_shift4_e, 
+   ecl_shft_shift1_e, byp_alu_rs1_data_e, byp_alu_rs2_data_e, 
+   ecl_shft_enshift_e_l, ecl_shft_extendbit_e, 
+   ecl_shft_extend32bit_e_l
+   ) ;
+   input 	ecl_shft_lshift_e_l;    // if 0 do left shift.  else right shift
+   input  ecl_shft_op32_e;      // indicates 32 bit operation so upper 32 = 0
+   //input [3:0] ecl_shft_shift16_e;// [48, 32, 16, 0] shift
+   input [3:0] ecl_shft_shift4_e;// [12, 8, 4, 0] shift
+   input [3:0] ecl_shft_shift1_e;// [3, 2, 1, 0] shift
+   input [63:0] byp_alu_rs1_data_e;
+   input [5:4] byp_alu_rs2_data_e;
+   input        ecl_shft_enshift_e_l;// enables inputs to shifter
+   input        ecl_shft_extendbit_e;
+   input    ecl_shft_extend32bit_e_l;
+   
+   output [63:0] shft_alu_shift_out_e;
+
+   wire [63:0]   shifter_input; // enabled input
+   wire [63:0]   shifter_input_b1;// buffered input
+   wire [63:0]   rshifterinput; // masked for 32-bit operation
+   wire [63:0]   rshifterinput_b1; // masked for 32-bit operation
+   wire [63:0]   lshift16;      // output of the respective mux
+   wire [63:0]   rshift16;
+   wire [63:0]   lshift4;
+   wire [63:0]   rshift4;
+   wire [63:0]   lshift1;
+   wire [63:0]   rshift1;
+   wire [63:0]   lshift16_b1;      // buffed output of the respective mux
+   wire [63:0]   rshift16_b1;
+   wire [63:0]   lshift4_b1;
+   wire [63:0]   rshift4_b1;
+   wire [47:0]   shft_extendbit_e;
+   wire [3:0]    shift16_e;
+   wire          shiftby_msb;
+   wire          extend32bit_e;
+
+   assign        shiftby_msb = byp_alu_rs2_data_e[5] & ~ecl_shft_op32_e;
+   assign        shift16_e[0] = ~shiftby_msb & ~byp_alu_rs2_data_e[4];
+   assign        shift16_e[1] = ~shiftby_msb & byp_alu_rs2_data_e[4];
+   assign        shift16_e[2] = shiftby_msb & ~byp_alu_rs2_data_e[4];
+   assign        shift16_e[3] = shiftby_msb & byp_alu_rs2_data_e[4];
+   // enable inputs
+   assign   shifter_input[63:0] = byp_alu_rs1_data_e[63:0] & {64{~ecl_shft_enshift_e_l}};
+   
+   // mux between left and right shifts
+   dp_mux2es #(64) mux_shiftout(.dout(shft_alu_shift_out_e[63:0]), .in0(lshift1[63:0]),
+                           .in1(rshift1[63:0]),
+                           .sel(ecl_shft_lshift_e_l));
+   
+   // mask out top for r_shift 32bit
+   assign   extend32bit_e = ~ecl_shft_extend32bit_e_l;
+   dp_mux2es #(32) mux_rshift_extend(.dout(rshifterinput[63:32]),
+                                     .in0(byp_alu_rs1_data_e[63:32]),
+                                     .in1({32{extend32bit_e}}),
+                                     .sel(ecl_shft_op32_e));
+   assign rshifterinput[31:0] = shifter_input[31:0];
+
+   assign shft_extendbit_e[47:0] = {48{ecl_shft_extendbit_e}};
+
+   // right shift muxes
+   mux4ds #(64) mux_right16(.dout(rshift16[63:0]),
+                          .in0({shft_extendbit_e[47:0], rshifterinput_b1[63:48]}),
+                          .in1({shft_extendbit_e[47:16], rshifterinput_b1[63:32]}),
+                          .in2({shft_extendbit_e[47:32], rshifterinput_b1[63:16]}),
+                          .in3(rshifterinput_b1[63:0]),
+                          .sel0(shift16_e[3]),
+                          .sel1(shift16_e[2]),
+                          .sel2(shift16_e[1]),
+                          .sel3(shift16_e[0]));
+   mux4ds #(64) mux_right4(.dout(rshift4[63:0]),
+                         .in0({shft_extendbit_e[47:36], rshift16_b1[63:12]}),
+                         .in1({shft_extendbit_e[47:40], rshift16_b1[63:8]}),
+                         .in2({shft_extendbit_e[47:44], rshift16_b1[63:4]}),
+                         .in3(rshift16_b1[63:0]),
+                         .sel0(ecl_shft_shift4_e[3]),
+                         .sel1(ecl_shft_shift4_e[2]),
+                         .sel2(ecl_shft_shift4_e[1]),
+                         .sel3(ecl_shft_shift4_e[0]));
+   mux4ds #(64) mux_right1(.dout(rshift1[63:0]),
+                         .in0({shft_extendbit_e[47:45], rshift4_b1[63:3]}),
+                         .in1({shft_extendbit_e[47:46], rshift4_b1[63:2]}),
+                         .in2({shft_extendbit_e[47], rshift4_b1[63:1]}),
+                         .in3(rshift4_b1[63:0]),
+                         .sel0(ecl_shft_shift1_e[3]),
+                         .sel1(ecl_shft_shift1_e[2]),
+                         .sel2(ecl_shft_shift1_e[1]),
+                         .sel3(ecl_shft_shift1_e[0]));
+
+   // buffer signals to right muxes
+   dp_buffer #(64) buf_rshiftin(.dout(rshifterinput_b1[63:0]), .in(rshifterinput[63:0]));
+   dp_buffer #(64) buf_rshift16(.dout(rshift16_b1[63:0]), .in(rshift16[63:0]));
+   dp_buffer #(64) buf_rshift4(.dout(rshift4_b1[63:0]), .in(rshift4[63:0]));
+
+   // left shift muxes
+   mux4ds #(64) mux_left16(.dout(lshift16[63:0]),
+                         .in0({shifter_input_b1[15:0], {48{1'b0}}}),
+                         .in1({shifter_input_b1[31:0], {32{1'b0}}}),
+                         .in2({shifter_input_b1[47:0], {16{1'b0}}}),
+                         .in3(shifter_input_b1[63:0]),
+                         .sel0(shift16_e[3]),
+                         .sel1(shift16_e[2]),
+                         .sel2(shift16_e[1]),
+                         .sel3(shift16_e[0]));
+   mux4ds #(64) mux_left4(.dout(lshift4[63:0]),
+                        .in0({lshift16_b1[51:0], {12{1'b0}}}),
+                        .in1({lshift16_b1[55:0], {8{1'b0}}}),
+                        .in2({lshift16_b1[59:0], {4{1'b0}}}),
+                        .in3(lshift16_b1[63:0]),
+                        .sel0(ecl_shft_shift4_e[3]),
+                        .sel1(ecl_shft_shift4_e[2]),
+                        .sel2(ecl_shft_shift4_e[1]),
+                        .sel3(ecl_shft_shift4_e[0]));
+   mux4ds #(64) mux_left1(.dout(lshift1[63:0]),
+                        .in0({lshift4_b1[60:0], {3{1'b0}}}),
+                        .in1({lshift4_b1[61:0], {2{1'b0}}}),
+                        .in2({lshift4_b1[62:0], {1{1'b0}}}),
+                        .in3(lshift4_b1[63:0]),
+                        .sel0(ecl_shft_shift1_e[3]),
+                        .sel1(ecl_shft_shift1_e[2]),
+                        .sel2(ecl_shft_shift1_e[1]),
+                        .sel3(ecl_shft_shift1_e[0]));
+
+   // buffer signals to left muxes
+   dp_buffer #(64) buf_lshiftin(.dout(shifter_input_b1[63:0]), .in(shifter_input[63:0]));
+   dp_buffer #(64) buf_lshift16(.dout(lshift16_b1[63:0]), .in(lshift16[63:0]));
+   dp_buffer #(64) buf_lshift4(.dout(lshift4_b1[63:0]), .in(lshift4[63:0]));
+
+    
+endmodule // sparc_exu_shft
Index: /trunk/T1-CPU/exu/sparc_exu_aluaddsub.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_aluaddsub.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_aluaddsub.v	(revision 6)
@@ -0,0 +1,91 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_aluaddsub.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_aluaddsub
+//	Description:		This block implements addition and subtraction.
+//            It takes two operands, a carry_in, plus two control signals
+//            (subtract and use_cin).  If subtract is high, then rs2_data
+//            is subtracted from rs1_data.  If use_cin is high, then
+//            carry_in is added to the sum (addition) or subtracted from
+//            the result (subtraction).  It outputs the result of the 
+//            specified operation.  To keep the cin calculation from
+//	      being in the critical path, it is moved into the d-stage.
+//	      All other calculations are in the e-stage.
+*/
+
+module sparc_exu_aluaddsub
+  (/*AUTOARG*/
+   // Outputs
+   adder_out, spr_out, alu_ecl_cout64_e_l, alu_ecl_cout32_e, 
+   alu_ecl_adderin2_63_e, alu_ecl_adderin2_31_e, 
+   // Inputs
+   clk, se, byp_alu_rs1_data_e, byp_alu_rs2_data_e, ecl_alu_cin_e, 
+   ifu_exu_invert_d
+   );
+   input clk;
+   input se;
+   input [63:0] byp_alu_rs1_data_e;   // 1st input operand
+   input [63:0]  byp_alu_rs2_data_e;   // 2nd input operand
+   input         ecl_alu_cin_e;           // carry in
+   input         ifu_exu_invert_d;     // subtract used by adder
+
+   output [63:0] adder_out; // result of adder
+   output [63:0] spr_out;   // result of sum predict
+   output         alu_ecl_cout64_e_l;
+   output         alu_ecl_cout32_e;
+   output       alu_ecl_adderin2_63_e;
+   output       alu_ecl_adderin2_31_e;
+   
+   wire [63:0]  rs2_data;       // 2nd input to adder
+   wire [63:0]  rs1_data;       // 1st input to adder
+   wire [63:0]  subtract_d;
+   wire [63:0]  subtract_e;
+   wire         cout64_e;
+   
+////////////////////////////////////////////
+//  Module implementation
+////////////////////////////////////////////
+   assign       subtract_d[63:0] = {64{ifu_exu_invert_d}};
+   dff_s #(64) sub_dff(.din(subtract_d[63:0]), .clk(clk), .q(subtract_e[63:0]), .se(se),
+                     .si(), .so());
+
+   assign       rs1_data[63:0] = byp_alu_rs1_data_e[63:0];
+
+   assign       rs2_data[63:0] = byp_alu_rs2_data_e[63:0] ^ subtract_e[63:0];
+   
+   assign      alu_ecl_adderin2_63_e = rs2_data[63];
+   assign      alu_ecl_adderin2_31_e = rs2_data[31];
+   sparc_exu_aluadder64 adder(.rs1_data(rs1_data[63:0]), .rs2_data(rs2_data[63:0]),
+                              .cin(ecl_alu_cin_e), .adder_out(adder_out[63:0]),
+                              .cout32(alu_ecl_cout32_e), .cout64(cout64_e));
+   assign      alu_ecl_cout64_e_l = ~cout64_e;
+
+
+   // sum predict
+   sparc_exu_aluspr spr(.rs1_data(rs1_data[63:0]), .rs2_data(rs2_data[63:0]), .cin(ecl_alu_cin_e),
+                        .spr_out(spr_out[63:0]));
+
+endmodule // sparc_exu_aluaddsub
+
+
+
+
Index: /trunk/T1-CPU/exu/sparc_exu_eclbyplog_rs1.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_eclbyplog_rs1.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_eclbyplog_rs1.v	(revision 6)
@@ -0,0 +1,166 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_eclbyplog_rs1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_eclbyplog_rs1
+//	Description: This block implements the bypass logic for a single
+//	operand.  It takes the destination registers of all
+//	four forwarding sources and the rs.  It also has the
+//	thread for the instruction in each stage and whether
+//	the instruction writes to the register file.  It won't
+//	bypass if bypass_enable is low or rs =0.  This is for the
+//	special case of rs1 which has two bypass sets.  One uses
+//	the pc as an input (other) and one does not.  
+*/
+module sparc_exu_eclbyplog_rs1 (/*AUTOARG*/
+   // Outputs
+   rs_sel_mux1_m, rs_sel_mux1_w, rs_sel_mux1_w2, rs_sel_mux1_other, 
+   rs_sel_mux2_usemux1, rs_sel_mux2_rf, rs_sel_mux2_e, 
+   rs_sel_mux2_ld, rs_sel_longmux_g2, rs_sel_longmux_w2, 
+   rs_sel_longmux_ldxa, ecl_byp_rcc_mux1_sel_m, 
+   ecl_byp_rcc_mux1_sel_w, ecl_byp_rcc_mux1_sel_w2, 
+   ecl_byp_rcc_mux1_sel_other, ecl_byp_rcc_mux2_sel_usemux1, 
+   ecl_byp_rcc_mux2_sel_rf, ecl_byp_rcc_mux2_sel_e, 
+   ecl_byp_rcc_mux2_sel_ld, 
+   // Inputs
+   sehold, use_other, rs, rd_e, rd_m, ecl_irf_rd_w, ld_rd_g, 
+   wb_byplog_rd_w2, wb_byplog_rd_g2, tid_d, thr_match_de, 
+   thr_match_dm, ecl_irf_tid_w, ld_thr_match_dg, wb_byplog_tid_w2, 
+   ld_thr_match_dg2, ifu_exu_kill_e, wb_e, bypass_m, 
+   lsu_exu_dfill_vld_g, bypass_w, wb_byplog_wen_w2, wb_byplog_wen_g2, 
+   ecl_byp_ldxa_g
+   ) ;
+   input sehold;
+   input use_other;
+   input [4:0] rs;              // source register
+   input [4:0] rd_e;            // destination regs for all stages
+   input [4:0] rd_m;
+   input [4:0] ecl_irf_rd_w;
+   input [4:0] ld_rd_g;
+   input [4:0] wb_byplog_rd_w2;
+   input [4:0] wb_byplog_rd_g2;
+   input [1:0] tid_d;
+   input       thr_match_de;
+   input       thr_match_dm;
+   input [1:0] ecl_irf_tid_w;
+   input       ld_thr_match_dg;
+   input [1:0] wb_byplog_tid_w2;
+   input       ld_thr_match_dg2;
+   input       ifu_exu_kill_e;
+   input       wb_e;            // whether each stage writes to reg
+   input       bypass_m;            // file
+   input       lsu_exu_dfill_vld_g;
+   input       bypass_w;
+   input       wb_byplog_wen_w2;
+   input       wb_byplog_wen_g2;
+   input       ecl_byp_ldxa_g;
+
+   output      rs_sel_mux1_m;
+   output      rs_sel_mux1_w;
+   output      rs_sel_mux1_w2;
+   output      rs_sel_mux1_other;
+   output      rs_sel_mux2_usemux1;
+   output      rs_sel_mux2_rf;
+   output      rs_sel_mux2_e;
+   output      rs_sel_mux2_ld;
+   output      rs_sel_longmux_g2;
+   output      rs_sel_longmux_w2;
+   output      rs_sel_longmux_ldxa;
+   output      ecl_byp_rcc_mux1_sel_m;
+   output      ecl_byp_rcc_mux1_sel_w;
+   output      ecl_byp_rcc_mux1_sel_w2;
+   output      ecl_byp_rcc_mux1_sel_other;
+   output      ecl_byp_rcc_mux2_sel_usemux1;
+   output      ecl_byp_rcc_mux2_sel_rf;
+   output      ecl_byp_rcc_mux2_sel_e;
+   output      ecl_byp_rcc_mux2_sel_ld;
+   
+
+   wire         use_e, use_m, use_w, use_w2, use_rf, use_ld, use_ldxa;
+   wire         match_e, match_m, match_w, match_w2, match_ld; // outputs of comparison
+   wire         match_g2;
+   wire         bypass;         // boolean that allows bypassing
+   wire         rs_is_nonzero;
+   wire   rcc_bypass;
+
+   // Don't bypass if rs == 0 or we are supposed to use other
+   assign       rs_is_nonzero = rs[0]|rs[1]|rs[2]|rs[3]|rs[4];
+   assign       bypass = rs_is_nonzero & ~use_other & ~sehold;
+
+   // Normal pipe priority: E, M, W, RF
+   // Ld priority: LD, RF
+   // W2 priority: W2, RF
+   assign       use_e = match_e & wb_e & ~ifu_exu_kill_e;
+   assign       use_m = match_m & bypass_m & ~use_e;
+   assign       use_w = match_w & bypass_w & ~use_m & ~use_e;
+   assign       use_ld = match_ld & lsu_exu_dfill_vld_g & ~ecl_byp_ldxa_g;
+   assign       use_ldxa = match_ld & ecl_byp_ldxa_g;
+   assign       use_w2 = (match_w2 & wb_byplog_wen_w2 | match_g2 & wb_byplog_wen_g2) & ~use_e & ~use_m;
+   assign       use_rf = ~use_w2 & ~use_w & ~use_m & ~use_e & ~use_ld & ~use_ldxa;
+
+   // mux1[M, W, W2, OTHER(optional)]
+   // mux2[mux1, RF, E, LD]
+   assign       rs_sel_mux2_e = (use_e & bypass);
+   assign       rs_sel_mux2_rf = ((use_rf | ~bypass) & ~use_other);
+   assign       rs_sel_mux2_ld = (use_ld & ~use_e  & ~use_w & ~use_m & ~use_w2 & bypass);
+   assign       rs_sel_mux2_usemux1 = (use_other & ~sehold) | (~rs_sel_mux1_other & ~use_e);
+   assign rs_sel_mux1_other = ~((use_m | use_w | use_w2 | use_ldxa) & bypass);
+   assign rs_sel_mux1_w2 = ((use_ldxa | use_w2) & bypass);
+   assign rs_sel_mux1_w = (use_w & ~use_w2 & ~use_ldxa & bypass);
+   assign rs_sel_mux1_m = (use_m & ~use_w2 & ~use_ldxa & bypass);
+
+   assign rs_sel_longmux_ldxa = use_ldxa;
+   assign rs_sel_longmux_g2 = match_g2 & wb_byplog_wen_g2 & ~use_ldxa;
+   assign rs_sel_longmux_w2 = ~use_ldxa & ~(match_g2 & wb_byplog_wen_g2);
+   
+   // Bypassing for cc generation (don't use other input)
+   assign rcc_bypass = rs_is_nonzero;
+   assign ecl_byp_rcc_mux2_sel_e = use_e & rcc_bypass;
+   assign ecl_byp_rcc_mux2_sel_rf = use_rf | ~rcc_bypass;
+   assign ecl_byp_rcc_mux2_sel_ld = use_ld & ~use_e  & ~use_w & ~use_m & ~use_w2 & rcc_bypass;
+   assign ecl_byp_rcc_mux2_sel_usemux1 = (use_m | use_w | use_w2 | use_ldxa) & rcc_bypass & ~use_e;
+   assign ecl_byp_rcc_mux1_sel_other = ~(use_m | use_w | use_w2 | use_ldxa);
+   assign ecl_byp_rcc_mux1_sel_w2 = use_w2 | use_ldxa;
+   assign ecl_byp_rcc_mux1_sel_w = use_w & ~use_w2 & ~use_ldxa;
+   assign ecl_byp_rcc_mux1_sel_m = use_m & ~use_w2 & ~use_ldxa;
+   
+   // Comparisons
+   assign match_e = thr_match_de & (rs[4:0] == rd_e[4:0]);
+//   sparc_exu_eclcomp7 e_comp7(.out(match_e), .in1({tid_d[1:0],rs[4:0]}),
+//                              .in2({ecl_rml_tid_e[1:0],rd_e[4:0]}));
+   assign match_m = thr_match_dm & (rs[4:0] == rd_m[4:0]);
+//   sparc_exu_eclcomp7 m_comp7(.out(match_m), .in1({tid_d[1:0],rs[4:0]}),
+//                              .in2({tid_m[1:0],rd_m[4:0]}));
+   sparc_exu_eclcomp7 w_comp7(.out(match_w), .in1({tid_d[1:0],rs[4:0]}),
+                              .in2({ecl_irf_tid_w[1:0],ecl_irf_rd_w[4:0]}));
+   sparc_exu_eclcomp7 w2_comp7(.out(match_w2), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({wb_byplog_tid_w2[1:0],wb_byplog_rd_w2[4:0]}));
+   assign match_ld = ld_thr_match_dg & (rs[4:0] == ld_rd_g[4:0]);
+   assign match_g2 = ld_thr_match_dg2 & (rs[4:0] == wb_byplog_rd_g2[4:0]);
+/* -----\/----- EXCLUDED -----\/-----
+   sparc_exu_eclcomp7 ld_comp7(.out(match_ld), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({ld_tid_g[1:0],ld_rd_g[4:0]}));
+   sparc_exu_eclcomp7 g2_comp7(.out(match_g2), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({wb_byplog_tid_g2[1:0],wb_byplog_rd_g2[4:0]}));
+ -----/\----- EXCLUDED -----/\----- */
+
+   
+endmodule // sparc_exu_eclbyplog
Index: /trunk/T1-CPU/exu/sparc_exu_rml_inc3.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_rml_inc3.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_rml_inc3.v	(revision 6)
@@ -0,0 +1,42 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_rml_inc3.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_exu_rml_inc3 (/*AUTOARG*/
+   // Outputs
+   dout, 
+   // Inputs
+   din, inc
+   ) ;
+   input [2:0] din;
+   input       inc;
+   output [2:0] dout;
+
+   assign       dout[2] = ((~din[2] & ~din[1] & ~din[0] & ~inc) |
+                           (~din[2] & din[1] & din[0] & inc) |
+                           (din[2] & din[1] & ~din[0]) |
+                           (din[2] & ~din[1] & inc) |
+                           (din[2] & din[0] & ~inc));
+   assign dout[1] = ((~din[1] & ~din[0] & ~inc) |
+                     (din[1] & ~din[0] & inc) |
+                     (~din[1] & din[0] & inc) |
+                     (din[1] & din[0] & ~inc));
+   assign dout[0] = ~din[0];
+   
+endmodule // sparc_exu_rml_inc3
Index: /trunk/T1-CPU/exu/sparc_exu_aluspr.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_aluspr.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_aluspr.v	(revision 6)
@@ -0,0 +1,51 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_aluspr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_aluspr
+//	Description:		This block implements the sum predict for the sparc alu.
+//            It takes two operands and produces the correct result if the
+//            sum is zero.  If not, the output is undefined, but non-zero.
+*/
+
+module sparc_exu_aluspr(/*AUTOARG*/
+   // Outputs
+   spr_out, 
+   // Inputs
+   rs1_data, rs2_data, cin
+   );
+
+input [63:0] rs1_data;
+input [63:0] rs2_data;
+   input     cin;
+output [63:0] spr_out;
+
+wire [63:0] rs1_data_xor_rs2_data;
+wire [62:0] rs1_data_or_rs2_data;
+wire [63:0] shift_or;
+
+assign rs1_data_xor_rs2_data[63:0] = rs1_data[63:0] ^ rs2_data[63:0];
+assign rs1_data_or_rs2_data[62:0] = rs1_data[62:0] | rs2_data[62:0];
+assign shift_or[63:0] = {rs1_data_or_rs2_data[62:0],cin};
+
+assign spr_out[63:0] = rs1_data_xor_rs2_data[63:0] ^ shift_or[63:0];
+
+endmodule  // sparc_exu_aluspr
Index: /trunk/T1-CPU/exu/sparc_exu_div_yreg.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_div_yreg.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_div_yreg.v	(revision 6)
@@ -0,0 +1,148 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_div_yreg.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_div_yreg
+//	Description: The 4 32 bit y registers.  It can be written to
+// 		twice each cycle because by definition the writes must come
+//		from different threads.  There is no bypassing because wry switches out.
+*/
+module sparc_exu_div_yreg (/*AUTOARG*/
+   // Outputs
+   yreg_mdq_y_e, div_ecl_yreg_0_l, 
+   // Inputs
+   clk, se, byp_div_yreg_data_w, mul_div_yreg_data_g, ecl_div_thr_e, 
+   ecl_div_yreg_wen_w, ecl_div_yreg_wen_g, ecl_div_yreg_wen_l, 
+   ecl_div_yreg_data_31_g, ecl_div_yreg_shift_g
+   ) ;
+   input clk;
+   input se;
+   input [31:0] byp_div_yreg_data_w;
+   input [31:0] mul_div_yreg_data_g;
+   input [3:0]  ecl_div_thr_e;
+   input [3:0]  ecl_div_yreg_wen_w;
+   input [3:0]  ecl_div_yreg_wen_g;
+   input [3:0]  ecl_div_yreg_wen_l;// w or w2
+   input        ecl_div_yreg_data_31_g;// bit shifted in on muls
+   input [3:0]  ecl_div_yreg_shift_g;// yreg should be shifted
+   
+   output [31:0] yreg_mdq_y_e;
+   output [3:0]  div_ecl_yreg_0_l;
+
+   wire [31:0]   next_yreg_thr0;// next value for yreg
+   wire [31:0]   next_yreg_thr1;
+   wire [31:0]   next_yreg_thr2;
+   wire [31:0]   next_yreg_thr3;
+   wire [31:0]   yreg_thr0;     // current value of yreg
+   wire [31:0]   yreg_thr1;
+   wire [31:0]   yreg_thr2;
+   wire [31:0]   yreg_thr3;
+   wire [3:0]    div_ecl_yreg_0;
+   wire [31:0]   yreg_data_w1;
+
+
+   //////////////////////////////////
+   //  Output selection for yreg
+   //////////////////////////////////
+   // output the LSB of all 4 regs
+   assign        div_ecl_yreg_0[3:0] = {yreg_thr3[0],yreg_thr2[0],yreg_thr1[0],yreg_thr0[0]};
+   assign        div_ecl_yreg_0_l[3:0] = ~div_ecl_yreg_0[3:0];
+
+`ifdef FPGA_SYN_1THREAD
+
+   assign 	 yreg_mdq_y_e[31:0] = yreg_thr0[31:0];
+
+`else
+   
+   // mux between the 4 yregs
+   mux4ds #(32) mux_yreg_out(.dout(yreg_mdq_y_e[31:0]), .sel0(ecl_div_thr_e[0]),
+                         .sel1(ecl_div_thr_e[1]), .sel2(ecl_div_thr_e[2]),
+                         .sel3(ecl_div_thr_e[3]), .in0(yreg_thr0[31:0]),
+                         .in1(yreg_thr1[31:0]), .in2(yreg_thr2[31:0]),
+                         .in3(yreg_thr3[31:0]));
+`endif
+   
+   //////////////////////////////////////
+   //  Storage of yreg
+   //////////////////////////////////////
+   // pass along yreg w to w2 (for control signal timing)
+   dff_s #(32) yreg_dff_w2w2(.din(byp_div_yreg_data_w[31:0]), .clk(clk), .q(yreg_data_w1[31:0]),
+                           .se(se), .si(), .so());
+
+
+   // mux between yreg_w, yreg_g, old value
+   mux4ds #(32) mux_yregin0(.dout(next_yreg_thr0[31:0]), 
+                          .sel0(ecl_div_yreg_wen_w[0]),
+                          .sel1(ecl_div_yreg_wen_g[0]), 
+                          .sel2(ecl_div_yreg_wen_l[0]),
+                          .sel3(ecl_div_yreg_shift_g[0]),
+                          .in0(yreg_data_w1[31:0]),
+                          .in1(mul_div_yreg_data_g[31:0]), 
+                          .in2(yreg_thr0[31:0]),
+                          .in3({ecl_div_yreg_data_31_g, yreg_thr0[31:1]}));
+`ifdef FPGA_SYN_1THREAD
+   assign 	 next_yreg_thr1[31:0] = yreg_data_w1[31:0];
+   assign 	 next_yreg_thr2[31:0] = yreg_data_w1[31:0];
+   assign 	 next_yreg_thr3[31:0] = yreg_data_w1[31:0];
+   
+`else
+   
+   mux4ds #(32) mux_yregin1(.dout(next_yreg_thr1[31:0]), 
+                          .sel0(ecl_div_yreg_wen_w[1]),
+                          .sel1(ecl_div_yreg_wen_g[1]), 
+                          .sel2(ecl_div_yreg_wen_l[1]),
+                          .sel3(ecl_div_yreg_shift_g[1]),
+                          .in0(yreg_data_w1[31:0]),
+                          .in1(mul_div_yreg_data_g[31:0]), 
+                          .in2(yreg_thr1[31:0]),
+                          .in3({ecl_div_yreg_data_31_g, yreg_thr1[31:1]}));
+   mux4ds #(32) mux_yregin2(.dout(next_yreg_thr2[31:0]), 
+                          .sel0(ecl_div_yreg_wen_w[2]),
+                          .sel1(ecl_div_yreg_wen_g[2]), 
+                          .sel2(ecl_div_yreg_wen_l[2]),
+                          .sel3(ecl_div_yreg_shift_g[2]),
+                          .in0(yreg_data_w1[31:0]),
+                          .in1(mul_div_yreg_data_g[31:0]), 
+                          .in2(yreg_thr2[31:0]),
+                          .in3({ecl_div_yreg_data_31_g, yreg_thr2[31:1]}));
+   mux4ds #(32) mux_yregin3(.dout(next_yreg_thr3[31:0]), 
+                          .sel0(ecl_div_yreg_wen_w[3]),
+                          .sel1(ecl_div_yreg_wen_g[3]), 
+                          .sel2(ecl_div_yreg_wen_l[3]),
+                          .sel3(ecl_div_yreg_shift_g[3]),
+                          .in0(yreg_data_w1[31:0]),
+                          .in1(mul_div_yreg_data_g[31:0]), 
+                          .in2(yreg_thr3[31:0]),
+                          .in3({ecl_div_yreg_data_31_g, yreg_thr3[31:1]}));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   // store new value
+   dff_s #(32) dff_yreg_thr0(.din(next_yreg_thr0[31:0]), .clk(clk), .q(yreg_thr0[31:0]),
+                       .se(se), .si(), .so());
+   dff_s #(32) dff_yreg_thr1(.din(next_yreg_thr1[31:0]), .clk(clk), .q(yreg_thr1[31:0]),
+                       .se(se), .si(), .so());
+   dff_s #(32) dff_yreg_thr2(.din(next_yreg_thr2[31:0]), .clk(clk), .q(yreg_thr2[31:0]),
+                       .se(se), .si(), .so());
+   dff_s #(32) dff_yreg_thr3(.din(next_yreg_thr3[31:0]), .clk(clk), .q(yreg_thr3[31:0]),
+                       .se(se), .si(), .so());
+   
+   
+endmodule // sparc_exu_div_yreg
Index: /trunk/T1-CPU/exu/sparc_exu_eclcomp7.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_eclcomp7.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_eclcomp7.v	(revision 6)
@@ -0,0 +1,52 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_eclcomp7.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_eclcomp7
+//	Description: This block is a 7 bit comparator.  It takes 2 inputs
+// 		and outputs a 1 on out if they are equal.
+*/
+
+module sparc_exu_eclcomp7 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in1, in2
+   ) ;
+   input [6:0] in1;
+   input [6:0] in2;
+   output      out;
+
+   wire [6:0]  in1xorin2;
+   wire nor1out;
+   wire nor2out;
+   wire nor3out;
+   wire nandout;
+   
+   assign in1xorin2 = in1 ^ in2;
+   assign nor1out = ~(in1xorin2[0] | in1xorin2[1]);
+   assign nor2out = ~(in1xorin2[2] | in1xorin2[3]);
+   assign nor3out = ~(in1xorin2[4] | in1xorin2[5]);
+   assign nandout = ~(nor1out & nor2out & nor3out);
+   assign out = ~(in1xorin2[6] | nandout);
+   
+   
+endmodule // sparc_exu_eclcomp7
Index: /trunk/T1-CPU/exu/sparc_exu_eclbyplog.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_eclbyplog.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_eclbyplog.v	(revision 6)
@@ -0,0 +1,139 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_eclbyplog.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_eclbyplog
+//	Description: This block implements the bypass logic for a single
+//	operand.  It takes the destination registers of all
+//	four forwarding sources and the rs.  It also has the
+//	thread for the instruction in each stage and whether
+//	the instruction writes to the register file.  It won't
+//	bypass if rs =0.
+*/
+module sparc_exu_eclbyplog (/*AUTOARG*/
+   // Outputs
+   rs_sel_mux1_m, rs_sel_mux1_w, rs_sel_mux1_w2, rs_sel_mux1_other, 
+   rs_sel_mux2_usemux1, rs_sel_mux2_rf, rs_sel_mux2_e, 
+   rs_sel_mux2_ld, rs_sel_longmux_g2, rs_sel_longmux_w2, 
+   rs_sel_longmux_ldxa, 
+   // Inputs
+   sehold, use_other, rs, rd_e, rd_m, ecl_irf_rd_w, ld_rd_g, 
+   wb_byplog_rd_w2, wb_byplog_rd_g2, tid_d, thr_match_de, 
+   thr_match_dm, ecl_irf_tid_w, ld_thr_match_dg, wb_byplog_tid_w2, 
+   ld_thr_match_dg2, ifu_exu_kill_e, wb_e, bypass_m, 
+   lsu_exu_dfill_vld_g, bypass_w, wb_byplog_wen_w2, wb_byplog_wen_g2, 
+   ecl_byp_ldxa_g
+   ) ;
+   input sehold;
+   input use_other;
+   input [4:0] rs;              // source register
+   input [4:0] rd_e;            // destination regs for all stages
+   input [4:0] rd_m;
+   input [4:0] ecl_irf_rd_w;
+   input [4:0] ld_rd_g;
+   input [4:0] wb_byplog_rd_w2;
+   input [4:0] wb_byplog_rd_g2;
+   input [1:0] tid_d;
+   input       thr_match_de;
+   input       thr_match_dm;
+   input [1:0] ecl_irf_tid_w;
+   input       ld_thr_match_dg;
+   input [1:0] wb_byplog_tid_w2;
+   input       ld_thr_match_dg2;
+   input       ifu_exu_kill_e;
+   input       wb_e;            // whether each stage writes to reg
+   input       bypass_m;            // file
+   input       lsu_exu_dfill_vld_g;
+   input       bypass_w;
+   input       wb_byplog_wen_w2;
+   input       wb_byplog_wen_g2;
+   input       ecl_byp_ldxa_g;
+
+   output      rs_sel_mux1_m;
+   output      rs_sel_mux1_w;
+   output      rs_sel_mux1_w2;
+   output      rs_sel_mux1_other;
+   output      rs_sel_mux2_usemux1;
+   output      rs_sel_mux2_rf;
+   output      rs_sel_mux2_e;
+   output      rs_sel_mux2_ld;
+   output      rs_sel_longmux_g2;
+   output      rs_sel_longmux_w2;
+   output      rs_sel_longmux_ldxa;
+
+   wire        use_e, use_m, use_w, use_w2, use_rf, use_ld, use_ldxa;
+   wire         match_e, match_m, match_w, match_w2, match_ld; // outputs of comparison
+   wire         match_g2;
+   wire         bypass;         // boolean that allows bypassing
+   wire         rs_is_nonzero;
+
+   // Don't bypass if rs == 0 or we are supposed to use other
+   assign       rs_is_nonzero = rs[0]|rs[1]|rs[2]|rs[3]|rs[4];
+   assign       bypass = rs_is_nonzero & ~use_other & ~sehold;
+
+   // Normal pipe priority: E, M, W, RF
+   // Ld priority: LD, RF
+   // W2 priority: E, M, W2, RF
+   assign       use_e = match_e & wb_e & ~ifu_exu_kill_e;
+   assign       use_m = match_m & bypass_m & ~use_e;
+   assign       use_w = match_w & bypass_w & ~use_m & ~use_e;
+   assign       use_ld = match_ld & lsu_exu_dfill_vld_g & ~ecl_byp_ldxa_g;
+   assign       use_ldxa = match_ld & ecl_byp_ldxa_g;
+   assign       use_w2 = (match_w2 & wb_byplog_wen_w2 | match_g2 & wb_byplog_wen_g2) & ~use_e & ~use_m;
+   assign       use_rf = ~use_w2 & ~use_w & ~use_m & ~use_e & ~use_ld & ~use_ldxa;
+
+   // mux1[M, W, W2, OTHER(optional)]
+   // mux2[mux1, RF, E, LD]
+   assign       rs_sel_mux2_e = (use_e & bypass);
+   assign       rs_sel_mux2_rf = ((use_rf | ~bypass) & ~(use_other & ~sehold));
+   assign       rs_sel_mux2_ld = (use_ld & ~use_e & ~use_w & ~use_m & ~use_w2 & bypass);
+   assign       rs_sel_mux2_usemux1 = (use_other & ~sehold) | (~rs_sel_mux1_other & ~use_e);
+   assign rs_sel_mux1_other = ~((use_m | use_w | use_w2 | use_ldxa) & bypass);
+   assign rs_sel_mux1_w2 = ((use_w2 | use_ldxa) & bypass);
+   assign rs_sel_mux1_w = (use_w & ~use_w2 & ~use_ldxa & bypass);
+   assign rs_sel_mux1_m = (use_m & ~use_w2 & ~use_ldxa & bypass);
+
+   assign rs_sel_longmux_ldxa = use_ldxa;
+   assign rs_sel_longmux_g2 = match_g2 & wb_byplog_wen_g2 & ~use_ldxa;
+   assign rs_sel_longmux_w2 = ~use_ldxa & ~(match_g2 & wb_byplog_wen_g2);
+   
+   // Comparisons
+   assign match_e = thr_match_de & (rs[4:0] == rd_e[4:0]);
+//   sparc_exu_eclcomp7 e_comp7(.out(match_e), .in1({tid_d[1:0],rs[4:0]}),
+//                              .in2({ecl_rml_tid_e[1:0],rd_e[4:0]}));
+   assign match_m = thr_match_dm & (rs[4:0] == rd_m[4:0]);
+//   sparc_exu_eclcomp7 m_comp7(.out(match_m), .in1({tid_d[1:0],rs[4:0]}),
+//                              .in2({tid_m[1:0],rd_m[4:0]}));
+   sparc_exu_eclcomp7 w_comp7(.out(match_w), .in1({tid_d[1:0],rs[4:0]}),
+                              .in2({ecl_irf_tid_w[1:0],ecl_irf_rd_w[4:0]}));
+   sparc_exu_eclcomp7 w2_comp7(.out(match_w2), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({wb_byplog_tid_w2[1:0],wb_byplog_rd_w2[4:0]}));
+   assign match_ld = ld_thr_match_dg & (rs[4:0] == ld_rd_g[4:0]);
+   assign match_g2 = ld_thr_match_dg2 & (rs[4:0] == wb_byplog_rd_g2[4:0]);
+/* -----\/----- EXCLUDED -----\/-----
+   sparc_exu_eclcomp7 ld_comp7(.out(match_ld), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({ld_tid_g[1:0],ld_rd_g[4:0]}));
+   sparc_exu_eclcomp7 g2_comp7(.out(match_g2), .in1({tid_d[1:0],rs[4:0]}),
+                               .in2({wb_byplog_tid_g2[1:0],wb_byplog_rd_g2[4:0]}));
+ -----/\----- EXCLUDED -----/\----- */
+
+   
+endmodule // sparc_exu_eclbyplog
Index: /trunk/T1-CPU/exu/sparc_exu_ecl_eccctl.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl_eccctl.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl_eccctl.v	(revision 6)
@@ -0,0 +1,232 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl_eccctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecl_eccctl
+//	Description:  Implements the control logic for ecc checking.
+//		This includes picking which error to fix (only one fixed per instruction),
+//		enabling the checks, and signalling the errors.
+*/
+
+module sparc_exu_ecl_eccctl (/*AUTOARG*/
+   // Outputs
+   ue_trap_m, ecl_ecc_sel_rs1_m_l, ecl_ecc_sel_rs2_m_l, 
+   ecl_ecc_sel_rs3_m_l, ecl_ecc_log_rs1_m, ecl_ecc_log_rs2_m, 
+   ecl_ecc_log_rs3_m, ecl_byp_sel_ecc_m, ecl_ecc_rs1_use_rf_e, 
+   ecl_ecc_rs2_use_rf_e, ecl_ecc_rs3_use_rf_e, eccctl_wb_rd_m, 
+   exu_ifu_ecc_ce_m, exu_ifu_ecc_ue_m, exu_ifu_err_reg_m, 
+   ecl_byp_ecc_mask_m_l, exu_ifu_inj_ack, exu_ifu_err_synd_7_m, 
+   // Inputs
+   clk, se, rst_tri_en, ecc_ecl_rs1_ce, ecc_ecl_rs1_ue, 
+   ecc_ecl_rs2_ce, ecc_ecl_rs2_ue, ecc_ecl_rs3_ce, ecc_ecl_rs3_ue, 
+   ecl_byp_rcc_mux2_sel_rf, ecl_byp_rs2_mux2_sel_rf, 
+   ecl_byp_rs3_mux2_sel_rf, rs1_vld_e, rs2_vld_e, rs3_vld_e, 
+   ifu_exu_rs1_m, ifu_exu_rs2_m, ifu_exu_rs3_m, rml_ecl_cwp_d, 
+   ifu_exu_ecc_mask, ifu_exu_inj_irferr, ifu_exu_disable_ce_e, 
+   wb_eccctl_spec_wen_next, ifu_exu_nceen_e, ifu_exu_inst_vld_e, 
+   rml_ecl_gl_e, cancel_rs3_ecc_e
+   ) ;
+   input clk;
+   input se;
+   input rst_tri_en;
+   input       ecc_ecl_rs1_ce;
+   input       ecc_ecl_rs1_ue;
+   input       ecc_ecl_rs2_ce;
+   input       ecc_ecl_rs2_ue;
+   input       ecc_ecl_rs3_ce;
+   input       ecc_ecl_rs3_ue;
+   input       ecl_byp_rcc_mux2_sel_rf;
+   input       ecl_byp_rs2_mux2_sel_rf;
+   input       ecl_byp_rs3_mux2_sel_rf;
+   input       rs1_vld_e;
+   input       rs2_vld_e;
+   input       rs3_vld_e;
+   input [4:0] ifu_exu_rs1_m;
+   input [4:0] ifu_exu_rs2_m;
+   input [4:0] ifu_exu_rs3_m;
+   input [2:0] rml_ecl_cwp_d;
+   input [7:0] ifu_exu_ecc_mask;
+   input       ifu_exu_inj_irferr;
+   input       ifu_exu_disable_ce_e;
+   input       wb_eccctl_spec_wen_next;
+   input       ifu_exu_nceen_e;
+   input       ifu_exu_inst_vld_e;
+   input [1:0] rml_ecl_gl_e;
+   input       cancel_rs3_ecc_e;
+   
+   output      ue_trap_m;
+   output      ecl_ecc_sel_rs1_m_l;
+   output      ecl_ecc_sel_rs2_m_l;
+   output      ecl_ecc_sel_rs3_m_l;
+   output      ecl_ecc_log_rs1_m;
+   output      ecl_ecc_log_rs2_m;
+   output      ecl_ecc_log_rs3_m;
+   output      ecl_byp_sel_ecc_m;
+   output      ecl_ecc_rs1_use_rf_e;
+   output      ecl_ecc_rs2_use_rf_e;
+   output      ecl_ecc_rs3_use_rf_e;
+   output [4:0] eccctl_wb_rd_m;
+   output       exu_ifu_ecc_ce_m;
+   output       exu_ifu_ecc_ue_m;
+   output [7:0] exu_ifu_err_reg_m;
+   output [7:0] ecl_byp_ecc_mask_m_l;
+   output       exu_ifu_inj_ack;
+   output    exu_ifu_err_synd_7_m;
+   
+   wire      sel_rs1_e;
+   wire      sel_rs2_e;
+   wire      sel_rs3_e;
+   wire        sel_rs1_m;
+   wire        sel_rs2_m;
+   wire        sel_rs3_m;
+   wire        safe_sel_rs1_m;
+   wire        safe_sel_rs2_m;
+   wire        safe_sel_rs3_m;
+   wire [2:0]  cwp_e;
+   wire [2:0]  cwp_m;
+   wire [1:0]  gl_m;
+   wire        inj_irferr_m;
+   wire        inj_irferr_w;
+   wire        detect_ce_e;
+   wire        detect_ue_e;
+   wire        flag_ecc_ce_e;
+   wire        flag_ecc_ue_e;
+   wire [4:0]  log_rs_m;
+   wire        rs1_ce_m;
+   wire        rs1_ue_m;
+   wire        rs2_ce_m;
+   wire        rs2_ue_m;
+   wire        rs3_ue_m;
+   wire        rs1_sel_rf_e;
+   wire        rs2_sel_rf_e;
+   wire        rs3_sel_rf_e;
+   wire        vld_rs3_ce_e;
+   wire        vld_rs3_ue_e;
+   
+   // Store whether rf value was used for ecc checking
+   assign      ecl_ecc_rs1_use_rf_e = rs1_sel_rf_e & rs1_vld_e & ifu_exu_inst_vld_e;
+   assign      ecl_ecc_rs2_use_rf_e = rs2_sel_rf_e & rs2_vld_e & ifu_exu_inst_vld_e;
+   assign      ecl_ecc_rs3_use_rf_e = rs3_sel_rf_e & rs3_vld_e & ifu_exu_inst_vld_e; 
+
+   dff_s rs1_rf_dff(.din(ecl_byp_rcc_mux2_sel_rf), .clk(clk),
+                  .q(rs1_sel_rf_e), .se(se), .si(), .so());
+   dff_s rs2_rf_dff(.din(ecl_byp_rs2_mux2_sel_rf), .clk(clk),
+                  .q(rs2_sel_rf_e), .se(se), .si(), .so());
+   dff_s rs3_rf_dff(.din(ecl_byp_rs3_mux2_sel_rf), .clk(clk),
+                  .q(rs3_sel_rf_e), .se(se), .si(), .so());
+
+   assign      vld_rs3_ce_e = ecc_ecl_rs3_ce & ~cancel_rs3_ecc_e;
+   assign      vld_rs3_ue_e = ecc_ecl_rs3_ue & ~cancel_rs3_ecc_e;
+   assign    detect_ce_e = (ecc_ecl_rs1_ce | ecc_ecl_rs2_ce | vld_rs3_ce_e);
+   assign    detect_ue_e = (ecc_ecl_rs1_ue | ecc_ecl_rs2_ue | vld_rs3_ue_e);
+   // Generate trap signals
+   assign    flag_ecc_ue_e = (detect_ue_e | 
+                                    detect_ce_e & ifu_exu_disable_ce_e); // convert ce to ue
+   assign    flag_ecc_ce_e = detect_ce_e & ~ifu_exu_disable_ce_e;
+
+   // Pass along signal to fix errors
+   dff_s byp_sel_ecc_e2m(.din(flag_ecc_ce_e), .clk(clk), .q(ecl_byp_sel_ecc_m),
+                       .se(se), .si(), .so());
+   dff_s ecc_ue_e2m(.din(flag_ecc_ue_e), .clk(clk), .q(exu_ifu_ecc_ue_m),
+                  .se(se), .si(), .so());
+   dff_s nceen_e2m(.din(ifu_exu_nceen_e), .clk(clk), .q(nceen_m), .se(se), .si(), .so());
+   assign    ue_trap_m = exu_ifu_ecc_ue_m & nceen_m;
+   // only report ce (and replay) if no ue
+   assign      exu_ifu_ecc_ce_m = ecl_byp_sel_ecc_m & ~exu_ifu_ecc_ue_m;
+   // if globals then report %gl.  otherwise log %cwp
+   assign      exu_ifu_err_reg_m[7:5] = (~log_rs_m[4] & ~log_rs_m[3])? {1'b0,gl_m[1:0]}: cwp_m[2:0];
+   assign      exu_ifu_err_reg_m[4:0] = log_rs_m[4:0];
+   
+   // Control for mux to ecc decoder (just ce)
+   assign      sel_rs1_e = ecc_ecl_rs1_ce;
+   assign      sel_rs2_e = ~ecc_ecl_rs1_ce & ecc_ecl_rs2_ce;
+   assign      sel_rs3_e = ~(ecc_ecl_rs1_ce | ecc_ecl_rs2_ce);
+   
+   dff_s ecc_sel_rs1_dff(.din(sel_rs1_e), .clk(clk), .q(sel_rs1_m),
+                       .se(se), .si(), .so());
+   dff_s ecc_sel_rs2_dff(.din(sel_rs2_e), .clk(clk), .q(sel_rs2_m),
+                       .se(se), .si(), .so());
+   dff_s ecc_sel_rs3_dff(.din(sel_rs3_e), .clk(clk), .q(sel_rs3_m),
+                       .se(se), .si(), .so());
+   // Make selects one hot
+   assign      safe_sel_rs1_m = sel_rs1_m | rst_tri_en;
+   assign      safe_sel_rs2_m = sel_rs2_m & ~rst_tri_en;
+   assign      safe_sel_rs3_m = sel_rs3_m & ~rst_tri_en;
+   assign      ecl_ecc_sel_rs1_m_l = ~safe_sel_rs1_m;
+   assign      ecl_ecc_sel_rs2_m_l = ~safe_sel_rs2_m;
+   assign      ecl_ecc_sel_rs3_m_l = ~safe_sel_rs3_m;
+
+   // Mux to generate the rd for fixed value
+   mux3ds #(5) ecc_rd_mux(.dout(eccctl_wb_rd_m[4:0]),
+                          .in0(ifu_exu_rs1_m[4:0]),
+                          .in1(ifu_exu_rs2_m[4:0]),
+                          .in2(ifu_exu_rs3_m[4:0]),
+                          .sel0(safe_sel_rs1_m),
+                          .sel1(safe_sel_rs2_m),
+                          .sel2(safe_sel_rs3_m));
+
+   // Control for muxes for logging errors
+   assign      ecl_ecc_log_rs1_m = rs1_ue_m | (rs1_ce_m & ~rs2_ue_m & ~rs3_ue_m);
+   assign      ecl_ecc_log_rs2_m = (rs2_ue_m & ~rs1_ue_m) | (rs2_ce_m & ~rs1_ue_m & ~rs1_ce_m & ~rs3_ue_m);
+   assign      ecl_ecc_log_rs3_m = ~(ecl_ecc_log_rs1_m | ecl_ecc_log_rs2_m);
+   // Mux to generate the rs for error_logging
+   mux3ds #(5) ecc_rdlog_mux(.dout(log_rs_m[4:0]),
+                          .in0(ifu_exu_rs1_m[4:0]),
+                          .in1(ifu_exu_rs2_m[4:0]),
+                          .in2(ifu_exu_rs3_m[4:0]),
+                          .sel0(ecl_ecc_log_rs1_m),
+                          .sel1(ecl_ecc_log_rs2_m),
+                          .sel2(ecl_ecc_log_rs3_m));
+
+   dff_s #(3) cwp_d2e(.din(rml_ecl_cwp_d[2:0]), .clk(clk), .q(cwp_e[2:0]),
+                    .se(se), .si(), .so());
+   dff_s #(3) cwp_e2m(.din(cwp_e[2:0]), .clk(clk), .q(cwp_m[2:0]),
+                    .se(se), .si(), .so());
+   dff_s #(2) gl_e2m(.din(rml_ecl_gl_e[1:0]), .clk(clk), .q(gl_m[1:0]),
+                   .se(se), .si(), .so());
+
+   // Syndrome needs to know if it was really a ce or ue
+   mux3ds ecc_synd7_mux(.dout(exu_ifu_err_synd_7_m),
+                        .in0(rs1_ce_m),
+                        .in1(rs2_ce_m),
+                        .in2(~rs3_ue_m),
+                        .sel0(ecl_ecc_log_rs1_m),
+                        .sel1(ecl_ecc_log_rs2_m),
+                        .sel2(ecl_ecc_log_rs3_m));
+
+
+   // signals for injecting errors
+   // inject error if it is enabled and a write will probably happen
+   // (don't bother to check kill_w
+   assign      inj_irferr_m = wb_eccctl_spec_wen_next & ifu_exu_inj_irferr;
+   assign      ecl_byp_ecc_mask_m_l = ~(ifu_exu_ecc_mask[7:0] & {8{inj_irferr_m}});
+   dff_s inj_irferr_m2w(.din(inj_irferr_m), .clk(clk), .q(inj_irferr_w),
+                      .se(se), .si(), .so());
+   assign      exu_ifu_inj_ack = inj_irferr_w;
+
+   // Pipeline Flops
+   dff_s rs1_ue_e2m(.din(ecc_ecl_rs1_ue), .clk(clk), .q(rs1_ue_m), .se(se), .si(), .so());
+   dff_s rs1_ce_e2m(.din(ecc_ecl_rs1_ce), .clk(clk), .q(rs1_ce_m), .se(se), .si(), .so());
+   dff_s rs2_ue_e2m(.din(ecc_ecl_rs2_ue), .clk(clk), .q(rs2_ue_m), .se(se), .si(), .so());
+   dff_s rs2_ce_e2m(.din(ecc_ecl_rs2_ce), .clk(clk), .q(rs2_ce_m), .se(se), .si(), .so());
+   dff_s rs3_ue_e2m(.din(vld_rs3_ue_e), .clk(clk), .q(rs3_ue_m), .se(se), .si(), .so());
+   
+endmodule // sparc_exu_ecl_eccctl
Index: /trunk/T1-CPU/exu/sparc_exu.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu.v	(revision 6)
@@ -0,0 +1,1447 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu
+//	Description: Execution unit containing register file(IRF),
+//			execution control (ECL), ALU, shifting (SHFT).
+*/
+module sparc_exu (/*AUTOARG*/
+   // Outputs
+   exu_tlu_wsr_data_m, exu_tlu_va_oor_m, exu_tlu_va_oor_jl_ret_m, 
+   exu_tlu_ue_trap_m, exu_tlu_ttype_vld_m, exu_tlu_ttype_m, 
+   exu_tlu_spill_wtype, exu_tlu_spill_tid, exu_tlu_spill_other, 
+   exu_tlu_spill, exu_tlu_misalign_addr_jmpl_rtn_m, 
+   exu_tlu_cwp_retry, exu_tlu_cwp_cmplt_tid, exu_tlu_cwp_cmplt, 
+   exu_tlu_cwp3_w, exu_tlu_cwp2_w, exu_tlu_cwp1_w, exu_tlu_cwp0_w, 
+   exu_tlu_ccr3_w, exu_tlu_ccr2_w, exu_tlu_ccr1_w, exu_tlu_ccr0_w, 
+   exu_spu_rs3_data_e, exu_mul_rs2_data, exu_mul_rs1_data, 
+   exu_mul_input_vld, exu_mmu_early_va_e, exu_lsu_rs3_data_e, 
+   exu_lsu_rs2_data_e, exu_lsu_priority_trap_m, exu_lsu_ldst_va_e, 
+   exu_lsu_early_va_e, exu_ifu_va_oor_m, exu_ifu_spill_e, 
+   exu_ifu_regz_e, exu_ifu_regn_e, exu_ifu_oddwin_s, 
+   exu_ifu_longop_done_g, exu_ifu_inj_ack, exu_ifu_err_reg_m, 
+   exu_ifu_ecc_ue_m, exu_ifu_ecc_ce_m, exu_ifu_cc_d, exu_ifu_brpc_e, 
+   exu_ffu_wsr_inst_e, short_so0, short_so1, so0, exu_ifu_err_synd_m, 
+   // Inputs
+   tlu_exu_rsr_data_m, tlu_exu_priv_trap_m, tlu_exu_pic_twobelow_m, 
+   tlu_exu_pic_onebelow_m, tlu_exu_cwpccr_update_m, 
+   tlu_exu_cwp_retry_m, tlu_exu_cwp_m, tlu_exu_ccr_m, 
+   tlu_exu_agp_tid, tlu_exu_agp_swap, tlu_exu_agp, sehold, se, rclk, 
+   mul_exu_data_g, mul_exu_ack, lsu_exu_thr_m, 
+   lsu_exu_st_dtlb_perr_g, lsu_exu_rd_m, lsu_exu_ldxa_m, 
+   lsu_exu_ldxa_data_g, lsu_exu_ldst_miss_g2, lsu_exu_flush_pipe_w, 
+   lsu_exu_dfill_vld_g, lsu_exu_dfill_data_g, ifu_tlu_wsr_inst_d, 
+   ifu_tlu_sraddr_d, ifu_tlu_flush_m, ifu_exu_wen_d, 
+   ifu_exu_useimm_d, ifu_exu_usecin_d, ifu_exu_use_rsr_e_l, 
+   ifu_exu_tv_d, ifu_exu_ttype_vld_m, ifu_exu_tid_s2, ifu_exu_tcc_e, 
+   ifu_exu_tagop_d, ifu_exu_shiftop_d, ifu_exu_sethi_inst_d, 
+   ifu_exu_setcc_d, ifu_exu_saved_e, ifu_exu_save_d, 
+   ifu_exu_rs3o_vld_d, ifu_exu_rs3e_vld_d, ifu_exu_rs3_s, 
+   ifu_exu_rs2_vld_d, ifu_exu_rs2_s, ifu_exu_rs1_vld_d, 
+   ifu_exu_rs1_s, ifu_exu_return_d, ifu_exu_restored_e, 
+   ifu_exu_restore_d, ifu_exu_ren3_s, ifu_exu_ren2_s, ifu_exu_ren1_s, 
+   ifu_exu_rd_ifusr_e, ifu_exu_rd_ffusr_e, ifu_exu_rd_exusr_e, 
+   ifu_exu_rd_d, ifu_exu_range_check_other_d, 
+   ifu_exu_range_check_jlret_d, ifu_exu_pcver_e, ifu_exu_pc_d, 
+   ifu_exu_nceen_e, ifu_exu_muls_d, ifu_exu_muldivop_d, 
+   ifu_exu_kill_e, ifu_exu_invert_d, ifu_exu_inst_vld_w, 
+   ifu_exu_inst_vld_e, ifu_exu_inj_irferr, ifu_exu_imm_data_d, 
+   ifu_exu_ialign_d, ifu_exu_flushw_e, ifu_exu_enshift_d, 
+   ifu_exu_ecc_mask, ifu_exu_dontmv_regz1_e, ifu_exu_dontmv_regz0_e, 
+   ifu_exu_disable_ce_e, ifu_exu_dbrinst_d, ifu_exu_casa_d, 
+   ifu_exu_aluop_d, ifu_exu_addr_mask_d, grst_l, ffu_exu_rsr_data_m, 
+   arst_l, mux_drive_disable, mem_write_disable, short_si0, 
+   short_si1, si0
+   ) ;
+
+   input mux_drive_disable;
+   input mem_write_disable;
+   input short_si0;
+   input short_si1;
+   input si0;
+   output short_so0;
+   output short_so1;
+   output so0;
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input                arst_l;                 // To ecl of sparc_exu_ecl.v, ...
+   input [63:0]         ffu_exu_rsr_data_m;     // To bypass of sparc_exu_byp.v
+   input                grst_l;                 // To ecl of sparc_exu_ecl.v, ...
+   input                ifu_exu_addr_mask_d;    // To ecl of sparc_exu_ecl.v
+   input [2:0]          ifu_exu_aluop_d;        // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_casa_d;         // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_dbrinst_d;      // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_disable_ce_e;   // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_dontmv_regz0_e; // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_dontmv_regz1_e; // To ecl of sparc_exu_ecl.v
+   input [7:0]          ifu_exu_ecc_mask;       // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_enshift_d;      // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_flushw_e;       // To rml of sparc_exu_rml.v
+   input                ifu_exu_ialign_d;       // To ecl of sparc_exu_ecl.v
+   input [31:0]         ifu_exu_imm_data_d;     // To bypass of sparc_exu_byp.v
+   input                ifu_exu_inj_irferr;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_inst_vld_e;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_inst_vld_w;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_invert_d;       // To ecl of sparc_exu_ecl.v, ...
+   input                ifu_exu_kill_e;         // To ecl of sparc_exu_ecl.v
+   input [4:0]          ifu_exu_muldivop_d;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_muls_d;         // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_nceen_e;        // To ecl of sparc_exu_ecl.v
+   input [47:0]         ifu_exu_pc_d;           // To bypass of sparc_exu_byp.v
+   input [63:0]         ifu_exu_pcver_e;        // To bypass of sparc_exu_byp.v
+   input                ifu_exu_range_check_jlret_d;// To ecl of sparc_exu_ecl.v
+   input                ifu_exu_range_check_other_d;// To ecl of sparc_exu_ecl.v
+   input [4:0]          ifu_exu_rd_d;           // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_rd_exusr_e;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_rd_ffusr_e;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_rd_ifusr_e;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_ren1_s;         // To irf of bw_r_irf.v
+   input                ifu_exu_ren2_s;         // To irf of bw_r_irf.v
+   input                ifu_exu_ren3_s;         // To irf of bw_r_irf.v
+   input                ifu_exu_restore_d;      // To ecl of sparc_exu_ecl.v, ...
+   input                ifu_exu_restored_e;     // To rml of sparc_exu_rml.v
+   input                ifu_exu_return_d;       // To ecl of sparc_exu_ecl.v
+   input [4:0]          ifu_exu_rs1_s;          // To irf of bw_r_irf.v, ...
+   input                ifu_exu_rs1_vld_d;      // To ecl of sparc_exu_ecl.v
+   input [4:0]          ifu_exu_rs2_s;          // To irf of bw_r_irf.v, ...
+   input                ifu_exu_rs2_vld_d;      // To ecl of sparc_exu_ecl.v
+   input [4:0]          ifu_exu_rs3_s;          // To irf of bw_r_irf.v, ...
+   input                ifu_exu_rs3e_vld_d;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_rs3o_vld_d;     // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_save_d;         // To ecl of sparc_exu_ecl.v, ...
+   input                ifu_exu_saved_e;        // To rml of sparc_exu_rml.v
+   input                ifu_exu_setcc_d;        // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_sethi_inst_d;   // To ecl of sparc_exu_ecl.v
+   input [2:0]          ifu_exu_shiftop_d;      // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_tagop_d;        // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_tcc_e;          // To ecl of sparc_exu_ecl.v
+   input [1:0]          ifu_exu_tid_s2;         // To irf of bw_r_irf.v, ...
+   input                ifu_exu_ttype_vld_m;    // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_tv_d;           // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_use_rsr_e_l;    // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_usecin_d;       // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_useimm_d;       // To ecl of sparc_exu_ecl.v
+   input                ifu_exu_wen_d;          // To ecl of sparc_exu_ecl.v
+   input                ifu_tlu_flush_m;        // To ecl of sparc_exu_ecl.v
+   input [6:0]          ifu_tlu_sraddr_d;       // To ecl of sparc_exu_ecl.v
+   input                ifu_tlu_wsr_inst_d;     // To ecl of sparc_exu_ecl.v
+   input [63:0]         lsu_exu_dfill_data_g;   // To bypass of sparc_exu_byp.v
+   input                lsu_exu_dfill_vld_g;    // To ecl of sparc_exu_ecl.v
+   input                lsu_exu_flush_pipe_w;   // To ecl of sparc_exu_ecl.v
+   input                lsu_exu_ldst_miss_g2;   // To ecl of sparc_exu_ecl.v
+   input [63:0]         lsu_exu_ldxa_data_g;    // To bypass of sparc_exu_byp.v
+   input                lsu_exu_ldxa_m;         // To ecl of sparc_exu_ecl.v
+   input [4:0]          lsu_exu_rd_m;           // To ecl of sparc_exu_ecl.v
+   input                lsu_exu_st_dtlb_perr_g; // To ecl of sparc_exu_ecl.v
+   input [1:0]          lsu_exu_thr_m;          // To ecl of sparc_exu_ecl.v
+   input                mul_exu_ack;            // To ecl of sparc_exu_ecl.v
+   input [63:0]         mul_exu_data_g;         // To div of sparc_exu_div.v
+   input                rclk;                   // To irf of bw_r_irf.v, ...
+   input                se;                     // To irf of bw_r_irf.v, ...
+   input                sehold;                 // To irf of bw_r_irf.v, ...
+   input [1:0]          tlu_exu_agp;            // To rml of sparc_exu_rml.v
+   input                tlu_exu_agp_swap;       // To rml of sparc_exu_rml.v
+   input [1:0]          tlu_exu_agp_tid;        // To rml of sparc_exu_rml.v
+   input [7:0]          tlu_exu_ccr_m;          // To ecl of sparc_exu_ecl.v
+   input [2:0]          tlu_exu_cwp_m;          // To rml of sparc_exu_rml.v
+   input                tlu_exu_cwp_retry_m;    // To rml of sparc_exu_rml.v
+   input                tlu_exu_cwpccr_update_m;// To ecl of sparc_exu_ecl.v, ...
+   input                tlu_exu_pic_onebelow_m; // To ecl of sparc_exu_ecl.v
+   input                tlu_exu_pic_twobelow_m; // To ecl of sparc_exu_ecl.v
+   input                tlu_exu_priv_trap_m;    // To ecl of sparc_exu_ecl.v
+   input [63:0]         tlu_exu_rsr_data_m;     // To bypass of sparc_exu_byp.v
+   // End of automatics
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output               exu_ffu_wsr_inst_e;     // From ecl of sparc_exu_ecl.v
+   output [47:0]        exu_ifu_brpc_e;         // From alu of sparc_exu_alu.v
+   output [7:0]         exu_ifu_cc_d;           // From ecl of sparc_exu_ecl.v
+   output               exu_ifu_ecc_ce_m;       // From ecl of sparc_exu_ecl.v
+   output               exu_ifu_ecc_ue_m;       // From ecl of sparc_exu_ecl.v
+   output [7:0]         exu_ifu_err_reg_m;      // From ecl of sparc_exu_ecl.v
+   output               exu_ifu_inj_ack;        // From ecl of sparc_exu_ecl.v
+   output [3:0]         exu_ifu_longop_done_g;  // From ecl of sparc_exu_ecl.v
+   output [3:0]         exu_ifu_oddwin_s;       // From rml of sparc_exu_rml.v
+   output               exu_ifu_regn_e;         // From alu of sparc_exu_alu.v
+   output               exu_ifu_regz_e;         // From alu of sparc_exu_alu.v
+   output               exu_ifu_spill_e;        // From rml of sparc_exu_rml.v
+   output               exu_ifu_va_oor_m;       // From ecl of sparc_exu_ecl.v
+   output [10:3]        exu_lsu_early_va_e;     // From alu of sparc_exu_alu.v
+   output [47:0]        exu_lsu_ldst_va_e;      // From alu of sparc_exu_alu.v
+   output               exu_lsu_priority_trap_m;// From ecl of sparc_exu_ecl.v
+   output [63:0]        exu_lsu_rs2_data_e;     // From bypass of sparc_exu_byp.v
+   output [63:0]        exu_lsu_rs3_data_e;     // From bypass of sparc_exu_byp.v
+   output [7:0]         exu_mmu_early_va_e;     // From alu of sparc_exu_alu.v
+   output               exu_mul_input_vld;      // From ecl of sparc_exu_ecl.v
+   output [63:0]        exu_mul_rs1_data;       // From div of sparc_exu_div.v
+   output [63:0]        exu_mul_rs2_data;       // From div of sparc_exu_div.v
+   output [63:0]        exu_spu_rs3_data_e;     // From bypass of sparc_exu_byp.v
+   output [7:0]         exu_tlu_ccr0_w;         // From ecl of sparc_exu_ecl.v
+   output [7:0]         exu_tlu_ccr1_w;         // From ecl of sparc_exu_ecl.v
+   output [7:0]         exu_tlu_ccr2_w;         // From ecl of sparc_exu_ecl.v
+   output [7:0]         exu_tlu_ccr3_w;         // From ecl of sparc_exu_ecl.v
+   output [2:0]         exu_tlu_cwp0_w;         // From rml of sparc_exu_rml.v
+   output [2:0]         exu_tlu_cwp1_w;         // From rml of sparc_exu_rml.v
+   output [2:0]         exu_tlu_cwp2_w;         // From rml of sparc_exu_rml.v
+   output [2:0]         exu_tlu_cwp3_w;         // From rml of sparc_exu_rml.v
+   output               exu_tlu_cwp_cmplt;      // From rml of sparc_exu_rml.v
+   output [1:0]         exu_tlu_cwp_cmplt_tid;  // From rml of sparc_exu_rml.v
+   output               exu_tlu_cwp_retry;      // From rml of sparc_exu_rml.v
+   output               exu_tlu_misalign_addr_jmpl_rtn_m;// From ecl of sparc_exu_ecl.v
+   output               exu_tlu_spill;          // From rml of sparc_exu_rml.v
+   output               exu_tlu_spill_other;    // From rml of sparc_exu_rml.v
+   output [1:0]         exu_tlu_spill_tid;      // From rml of sparc_exu_rml.v
+   output [2:0]         exu_tlu_spill_wtype;    // From rml of sparc_exu_rml.v
+   output [8:0]         exu_tlu_ttype_m;        // From ecl of sparc_exu_ecl.v
+   output               exu_tlu_ttype_vld_m;    // From ecl of sparc_exu_ecl.v
+   output               exu_tlu_ue_trap_m;      // From ecl of sparc_exu_ecl.v
+   output               exu_tlu_va_oor_jl_ret_m;// From ecl of sparc_exu_ecl.v
+   output               exu_tlu_va_oor_m;       // From ecl of sparc_exu_ecl.v
+   output [63:0]        exu_tlu_wsr_data_m;     // From bypass of sparc_exu_byp.v
+   // End of automatics
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire [63:0]          alu_byp_rd_data_e;      // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_add_n32_e;      // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_add_n64_e;      // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_adder_out_63_e; // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_adderin2_31_e;  // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_adderin2_63_e;  // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_cout32_e;       // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_cout64_e_l;     // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_log_n32_e;      // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_log_n64_e;      // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_mem_addr_invalid_e_l;// From alu of sparc_exu_alu.v
+   wire                 alu_ecl_zhigh_e;        // From alu of sparc_exu_alu.v
+   wire                 alu_ecl_zlow_e;         // From alu of sparc_exu_alu.v
+   wire [63:0]          byp_alu_rcc_data_e;     // From bypass of sparc_exu_byp.v
+   wire [63:0]          byp_alu_rs1_data_e;     // From bypass of sparc_exu_byp.v
+   wire [63:0]          byp_alu_rs2_data_e_l;   // From bypass of sparc_exu_byp.v
+   wire [63:0]          byp_ecc_rcc_data_e;     // From bypass of sparc_exu_byp.v
+   wire [7:0]           byp_ecc_rs1_synd_d;     // From bypass of sparc_exu_byp.v
+   wire [7:0]           byp_ecc_rs2_synd_d;     // From bypass of sparc_exu_byp.v
+   wire [63:0]          byp_ecc_rs3_data_e;     // From bypass of sparc_exu_byp.v
+   wire [7:0]           byp_ecc_rs3_synd_d;     // From bypass of sparc_exu_byp.v
+   wire [2:0]           byp_ecl_rs1_2_0_e;      // From bypass of sparc_exu_byp.v
+   wire                 byp_ecl_rs1_31_e;       // From bypass of sparc_exu_byp.v
+   wire                 byp_ecl_rs1_63_e;       // From bypass of sparc_exu_byp.v
+   wire                 byp_ecl_rs2_31_e;       // From bypass of sparc_exu_byp.v
+   wire [3:0]           byp_ecl_rs2_3_0_e;      // From bypass of sparc_exu_byp.v
+   wire [71:0]          byp_irf_rd_data_w;      // From bypass of sparc_exu_byp.v
+   wire [71:0]          byp_irf_rd_data_w2;     // From bypass of sparc_exu_byp.v
+   wire [63:0]          div_byp_muldivout_g;    // From div of sparc_exu_div.v
+   wire [31:0]          div_byp_yreg_e;         // From div of sparc_exu_div.v
+   wire                 div_ecl_adder_out_31;   // From div of sparc_exu_div.v
+   wire                 div_ecl_cout32;         // From div of sparc_exu_div.v
+   wire                 div_ecl_cout64;         // From div of sparc_exu_div.v
+   wire                 div_ecl_d_62;           // From div of sparc_exu_div.v
+   wire                 div_ecl_d_msb;          // From div of sparc_exu_div.v
+   wire                 div_ecl_detect_zero_high;// From div of sparc_exu_div.v
+   wire                 div_ecl_detect_zero_low;// From div of sparc_exu_div.v
+   wire                 div_ecl_dividend_msb;   // From div of sparc_exu_div.v
+   wire                 div_ecl_gencc_in_31;    // From div of sparc_exu_div.v
+   wire                 div_ecl_gencc_in_msb_l; // From div of sparc_exu_div.v
+   wire                 div_ecl_low32_nonzero;  // From div of sparc_exu_div.v
+   wire                 div_ecl_upper32_equal;  // From div of sparc_exu_div.v
+   wire                 div_ecl_x_msb;          // From div of sparc_exu_div.v
+   wire                 div_ecl_xin_msb_l;      // From div of sparc_exu_div.v
+   wire [3:0]           div_ecl_yreg_0_l;       // From div of sparc_exu_div.v
+   wire [63:0]          ecc_byp_ecc_result_m;   // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs1_ce;         // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs1_ue;         // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs2_ce;         // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs2_ue;         // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs3_ce;         // From ecc of sparc_exu_ecc.v
+   wire                 ecc_ecl_rs3_ue;         // From ecc of sparc_exu_ecc.v
+   wire                 ecl_alu_cin_e;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_log_sel_and_e;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_log_sel_move_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_log_sel_or_e;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_log_sel_xor_e;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_out_sel_logic_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_out_sel_rs3_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_out_sel_shift_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_out_sel_sum_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_alu_sethi_inst_e;   // From ecl of sparc_exu_ecl.v
+   wire [2:0]           ecl_byp_3lsb_m;         // From ecl of sparc_exu_ecl.v
+   wire [7:0]           ecl_byp_ecc_mask_m_l;   // From ecl of sparc_exu_ecl.v
+   wire [7:0]           ecl_byp_eclpr_e;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_ldxa_g;         // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux1_sel_m; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux1_sel_other;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux1_sel_w; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux1_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux2_sel_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux2_sel_ld;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux2_sel_rf;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rcc_mux2_sel_usemux1;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_restore_m;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_longmux_sel_g2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_longmux_sel_ldxa;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_longmux_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux1_sel_m; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux1_sel_other;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux1_sel_w; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux1_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux2_sel_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux2_sel_ld;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux2_sel_rf;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs1_mux2_sel_usemux1;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_longmux_sel_g2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_longmux_sel_ldxa;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_longmux_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux1_sel_m; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux1_sel_other;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux1_sel_w; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux1_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux2_sel_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux2_sel_ld;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux2_sel_rf;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs2_mux2_sel_usemux1;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_longmux_sel_g2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_longmux_sel_ldxa;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_longmux_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux1_sel_m; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux1_sel_other;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux1_sel_w; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux1_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux2_sel_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux2_sel_ld;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux2_sel_rf;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3_mux2_sel_usemux1;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_longmux_sel_g2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_longmux_sel_ldxa;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_longmux_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux1_sel_m;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux1_sel_other;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux1_sel_w;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux1_sel_w2;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux2_sel_e;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux2_sel_ld;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux2_sel_rf;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_rs3h_mux2_sel_usemux1;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_alu_e;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_ecc_m;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_eclpr_e;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_ffusr_m;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_ifex_m;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_ifusr_e;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_load_g;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_load_m;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_muldiv_g;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_pipe_m;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_restore_g;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_restore_m;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_tlusr_m;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_sel_yreg_e;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_byp_std_e_l;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_almostlast_cycle;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_cin;            // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_div64;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_dividend_sign;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_keep_d;         // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_keepx;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_last_cycle;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_ld_inputs;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_get_32bit_data;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_get_new_data;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_keep_data;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_sext_rs1_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_sext_rs2_e; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_mul_wen;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_muls;           // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_muls_rs1_31_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_newq;           // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_64b;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_adder;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_div;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_neg32;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_pos32;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_sel_u32;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_subtract_l;     // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_div_thr_e;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_upper32_zero;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_upper33_one;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_upper33_zero;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_xinmask;        // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_yreg_data_31_g; // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_div_yreg_shift_g;   // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_div_yreg_wen_g;     // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_div_yreg_wen_l;     // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_div_yreg_wen_w;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_div_zero_rs2_e;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_log_rs1_m;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_log_rs2_m;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_log_rs3_m;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_rs1_use_rf_e;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_rs2_use_rf_e;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_rs3_use_rf_e;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_sel_rs1_m_l;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_sel_rs2_m_l;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_ecc_sel_rs3_m_l;    // From ecl of sparc_exu_ecl.v
+   wire [4:0]           ecl_irf_rd_g;           // From ecl of sparc_exu_ecl.v
+   wire [4:0]           ecl_irf_rd_m;           // From ecl of sparc_exu_ecl.v
+   wire [1:0]           ecl_irf_tid_g;          // From ecl of sparc_exu_ecl.v
+   wire [1:0]           ecl_irf_tid_m;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_irf_wen_w;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_irf_wen_w2;         // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_canrestore_wen_w;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_cansave_wen_w;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_cleanwin_wen_w; // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_cwp_wen_e;      // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_early_flush_w;  // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_inst_vld_w;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_kill_e;         // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_kill_w;         // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_otherwin_wen_w; // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_rml_thr_m;          // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_rml_thr_w;          // From ecl of sparc_exu_ecl.v
+   wire                 ecl_rml_wstate_wen_w;   // From ecl of sparc_exu_ecl.v
+   wire [2:0]           ecl_rml_xor_data_e;     // From ecl of sparc_exu_ecl.v
+   wire                 ecl_shft_enshift_e_l;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_shft_extend32bit_e_l;// From ecl of sparc_exu_ecl.v
+   wire                 ecl_shft_extendbit_e;   // From ecl of sparc_exu_ecl.v
+   wire                 ecl_shft_lshift_e_l;    // From ecl of sparc_exu_ecl.v
+   wire                 ecl_shft_op32_e;        // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_shft_shift1_e;      // From ecl of sparc_exu_ecl.v
+   wire [3:0]           ecl_shft_shift4_e;      // From ecl of sparc_exu_ecl.v
+   wire [71:0]          irf_byp_rs1_data_d_l;   // From irf of bw_r_irf.v
+   wire [71:0]          irf_byp_rs2_data_d_l;   // From irf of bw_r_irf.v
+   wire [71:0]          irf_byp_rs3_data_d_l;   // From irf of bw_r_irf.v
+   wire [31:0]          irf_byp_rs3h_data_d_l;  // From irf of bw_r_irf.v
+   wire [2:0]           rml_ecl_canrestore_d;   // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_ecl_cansave_d;      // From rml of sparc_exu_rml.v
+   wire                 rml_ecl_clean_window_e; // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_ecl_cleanwin_d;     // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_ecl_cwp_d;          // From rml of sparc_exu_rml.v
+   wire                 rml_ecl_fill_e;         // From rml of sparc_exu_rml.v
+   wire [1:0]           rml_ecl_gl_e;           // From rml of sparc_exu_rml.v
+   wire                 rml_ecl_kill_m;         // From rml of sparc_exu_rml.v
+   wire                 rml_ecl_other_e;        // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_ecl_otherwin_d;     // From rml of sparc_exu_rml.v
+   wire                 rml_ecl_rmlop_done_e;   // From rml of sparc_exu_rml.v
+   wire [3:0]           rml_ecl_swap_done;      // From rml of sparc_exu_rml.v
+   wire [5:0]           rml_ecl_wstate_d;       // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_ecl_wtype_e;        // From rml of sparc_exu_rml.v
+   wire [1:0]           rml_irf_cwpswap_tid_e;  // From rml of sparc_exu_rml.v
+   wire [1:0]           rml_irf_global_tid;     // From rml of sparc_exu_rml.v
+   wire                 rml_irf_kill_restore_w; // From rml of sparc_exu_rml.v
+   wire [1:0]           rml_irf_new_agp;        // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_irf_new_lo_cwp_e;   // From rml of sparc_exu_rml.v
+   wire [1:0]           rml_irf_old_agp;        // From rml of sparc_exu_rml.v
+   wire [2:0]           rml_irf_old_lo_cwp_e;   // From rml of sparc_exu_rml.v
+   wire                 rml_irf_swap_even_e;    // From rml of sparc_exu_rml.v
+   wire                 rml_irf_swap_global;    // From rml of sparc_exu_rml.v
+   wire                 rml_irf_swap_local_e;   // From rml of sparc_exu_rml.v
+   wire                 rml_irf_swap_odd_e;     // From rml of sparc_exu_rml.v
+   wire [63:0]          shft_alu_shift_out_e;   // From shft of sparc_exu_shft.v
+   // End of automatics
+   wire                 short_scan0_1;
+   wire                 scan0_1,scan0_2,scan0_3;
+
+   wire                 ecl_alu_casa_e;
+   wire [63:0]          byp_alu_rs2_data_e;
+   output [7:0]         exu_ifu_err_synd_m;
+   wire [1:0]           rml_irf_old_e_cwp_e;
+   wire [1:0]           rml_irf_new_e_cwp_e;
+`ifdef FPGA_NEW_IRF
+
+wire [ 71:0]irf_byp_rs1_data_d_l_fpga;
+wire [ 71:0]irf_byp_rs2_data_d_l_fpga;
+wire [ 71:0]irf_byp_rs3_data_d_l_fpga;
+wire [ 31:0]irf_byp_rs3h_data_d_l_fpga;
+
+/*clkdbl clkdbl_inst(
+	.inclk0(rclk),
+	.c0(rclk2x)
+);
+
+wire [738:0] ILA_DATA;
+wire [ 71:0]irf_byp_rs1_data_d_l_ref;
+wire [ 71:0]irf_byp_rs2_data_d_l_ref;
+wire [ 71:0]irf_byp_rs3_data_d_l_ref;
+wire [ 31:0]irf_byp_rs3h_data_d_l_ref;
+
+ST1 ila(
+	.acq_clk(rclk),
+	.acq_data_in(ILA_DATA),
+	.acq_trigger_in(ILA_DATA)
+);
+
+wire [35:0] ref_cnt;
+wire [ 3:0] allow;
+reg  [35:0] cnt;
+reg  [15:0] err_cnt;
+
+VIO1 vio(
+	.probe(cnt>refcnt),
+	.source({allow,ref_cnt})
+);
+
+reg ifu_exu_ren1_s_d;
+reg ifu_exu_ren2_s_d;
+reg ifu_exu_ren3_s_d;
+reg [71:0] irf_byp_rs1_data_d_l_fpga_d;
+reg [71:0] irf_byp_rs2_data_d_l_fpga_d;
+reg [71:0] irf_byp_rs3_data_d_l_fpga_d;
+reg [31:0] irf_byp_rs3h_data_d_l_fpga_d;
+reg [71:0] irf_byp_rs1_data_d_l_d;
+reg [71:0] irf_byp_rs2_data_d_l_d;
+reg [71:0] irf_byp_rs3_data_d_l_d;
+reg [31:0] irf_byp_rs3h_data_d_l_d;
+reg [31:0] written;
+reg        swap_d;
+reg        swap_d1;
+reg [ 4:0] ecl_irf_rd_m_d;
+reg [ 4:0] ecl_irf_rd_g_d;
+reg [ 2:0] current_window;
+reg [ 2:0] new_lo_cwp_d;
+reg [ 4:0] ifu_exu_rs1_s_d;
+reg [ 4:0] ifu_exu_rs2_s_d;
+reg [ 4:0] ifu_exu_rs3_s_d;
+
+wire [4:0] wraddr0=current_window[0] && ecl_irf_rd_m_d[3] ? {~ecl_irf_rd_m_d[4],ecl_irf_rd_m_d[3:0]}:ecl_irf_rd_m_d;
+wire [4:0] wraddr1=current_window[0] && ecl_irf_rd_g_d[3] ? {~ecl_irf_rd_g_d[4],ecl_irf_rd_g_d[3:0]}:ecl_irf_rd_g_d;
+wire [4:0] rdaddr0=current_window[0] && ifu_exu_rs1_s_d[3] ? {~ifu_exu_rs1_s_d[4],ifu_exu_rs1_s_d[3:0]}:ifu_exu_rs1_s_d;
+wire [4:0] rdaddr1=current_window[0] && ifu_exu_rs2_s_d[3] ? {~ifu_exu_rs2_s_d[4],ifu_exu_rs2_s_d[3:0]}:ifu_exu_rs2_s_d;
+wire [4:0] rdaddr2=current_window[0] && ifu_exu_rs3_s_d[3] ? {~ifu_exu_rs3_s_d[4],ifu_exu_rs3_s_d[3:0]}:ifu_exu_rs3_s_d;
+
+wire [3:0] syndrome;
+
+always @(posedge rclk or negedge arst_l)
+   if(~arst_l)
+      begin
+         written<=0;
+         current_window<=0;
+         swap_d<=0;
+         swap_d1<=0;
+         err_cnt<=0;
+      end
+   else
+   begin
+      if(rml_irf_swap_local_e && (current_window!=rml_irf_old_lo_cwp_e))
+         cnt<=36'b0;
+      else
+         cnt<=cnt+1;
+      if(err_cnt==16'h0360)
+         err_cnt<=16'b0;
+      else
+         if(rml_irf_swap_local_e && (current_window!=rml_irf_old_lo_cwp_e))
+            err_cnt<=err_cnt+1;
+      ifu_exu_ren1_s_d<=ifu_exu_ren1_s;
+      ifu_exu_ren2_s_d<=ifu_exu_ren2_s;
+      ifu_exu_ren3_s_d<=ifu_exu_ren3_s;
+      irf_byp_rs1_data_d_l_fpga_d<=irf_byp_rs1_data_d_l_fpga;
+      irf_byp_rs2_data_d_l_fpga_d<=irf_byp_rs2_data_d_l_fpga;
+      irf_byp_rs3_data_d_l_fpga_d<=irf_byp_rs3_data_d_l_fpga;
+      irf_byp_rs3h_data_d_l_fpga_d<=irf_byp_rs3h_data_d_l_fpga;
+      irf_byp_rs1_data_d_l_d<=irf_byp_rs1_data_d_l;
+      irf_byp_rs2_data_d_l_d<=irf_byp_rs2_data_d_l;
+      irf_byp_rs3_data_d_l_d<=irf_byp_rs3_data_d_l;
+      irf_byp_rs3h_data_d_l_d<=irf_byp_rs3h_data_d_l;
+      swap_d<=rml_irf_swap_local_e;
+      swap_d1<=swap_d;
+      ecl_irf_rd_m_d<=ecl_irf_rd_m;
+      ecl_irf_rd_g_d<=ecl_irf_rd_g;
+      new_lo_cwp_d<=rml_irf_new_lo_cwp_e;
+      ifu_exu_rs1_s_d<=ifu_exu_rs1_s;
+      ifu_exu_rs2_s_d<=ifu_exu_rs2_s;
+      ifu_exu_rs3_s_d<=ifu_exu_rs3_s;
+      if(swap_d)
+         current_window<=new_lo_cwp_d;
+      if(swap_d1)
+         if(rml_irf_kill_restore_w) // SAVE
+            written<=(ecl_irf_wen_w<<wraddr0) | (ecl_irf_wen_w2<<wraddr1);
+         else // restore
+            written<=32'hFFFFFFFF;
+      else
+         begin
+            if(ecl_irf_wen_w)
+               written[wraddr0]<=1;
+            if(ecl_irf_wen_w2)
+               written[wraddr1]<=1;
+         end
+   end 
+
+wire read_lo0=(rdaddr0>5'd7) && (rdaddr0<5'd24);
+wire read_lo1=(rdaddr1>5'd7) && (rdaddr1<5'd24);
+wire read_lo2=(rdaddr2>5'd7) && (rdaddr2<5'd24);
+wire read_known0=(!read_lo0) || written[rdaddr0];
+wire read_known1=(!read_lo1) || written[rdaddr1];
+wire read_known2=(!read_lo2) || written[rdaddr2];
+
+assign syndrome[0]=ifu_exu_ren1_s_d && (irf_byp_rs1_data_d_l_ref!=irf_byp_rs1_data_d_l_fpga);
+assign syndrome[1]=ifu_exu_ren2_s_d && (irf_byp_rs2_data_d_l_ref!=irf_byp_rs2_data_d_l_fpga);
+assign syndrome[2]=ifu_exu_ren3_s_d && (irf_byp_rs3_data_d_l_ref!=irf_byp_rs3_data_d_l_fpga);
+assign syndrome[3]=ifu_exu_ren3_s_d && (irf_byp_rs3h_data_d_l_ref!=irf_byp_rs3h_data_d_l_fpga);
+
+assign ILA_DATA[1:0]=ifu_exu_tid_s2;
+assign ILA_DATA[6:2]=ifu_exu_rs1_s;
+assign ILA_DATA[11:7]=ifu_exu_rs2_s;
+assign ILA_DATA[16:12]=ifu_exu_rs3_s;
+assign ILA_DATA[17]=ifu_exu_ren1_s;
+assign ILA_DATA[18]=ifu_exu_ren2_s;
+assign ILA_DATA[19]=ifu_exu_ren3_s;
+assign ILA_DATA[20]=ecl_irf_wen_w;
+assign ILA_DATA[21]=ecl_irf_wen_w2;
+assign ILA_DATA[26:22]=ecl_irf_rd_m_d;
+assign ILA_DATA[31:27]=ecl_irf_rd_g_d;
+assign ILA_DATA[103:32]=byp_irf_rd_data_w;
+assign ILA_DATA[175:104]=byp_irf_rd_data_w2;
+assign ILA_DATA[177:176]=ecl_irf_tid_m;
+assign ILA_DATA[179:178]=ecl_irf_tid_g;
+assign ILA_DATA[182:180]=rml_irf_old_lo_cwp_e;
+assign ILA_DATA[185:183]=rml_irf_new_lo_cwp_e;
+assign ILA_DATA[187:186]=rml_irf_old_e_cwp_e;
+assign ILA_DATA[189:188]=rml_irf_new_e_cwp_e;
+assign ILA_DATA[190]=rml_irf_swap_even_e;
+assign ILA_DATA[191]=rml_irf_swap_odd_e;
+assign ILA_DATA[192]=rml_irf_swap_local_e;
+assign ILA_DATA[193]=rml_irf_kill_restore_w;
+assign ILA_DATA[195:194]=rml_irf_cwpswap_tid_e;
+assign ILA_DATA[197:196]=rml_irf_old_agp;
+assign ILA_DATA[199:198]=rml_irf_new_agp;
+assign ILA_DATA[200]=rml_irf_swap_global;
+assign ILA_DATA[202:201]=rml_irf_global_tid;
+assign ILA_DATA[274:203]=irf_byp_rs1_data_d_l_ref;
+assign ILA_DATA[346:275]=irf_byp_rs2_data_d_l_ref;
+assign ILA_DATA[418:347]=irf_byp_rs3_data_d_l_ref;
+assign ILA_DATA[450:419]=irf_byp_rs3h_data_d_l_ref;
+assign ILA_DATA[522:451]=irf_byp_rs1_data_d_l_fpga;
+assign ILA_DATA[594:523]=irf_byp_rs2_data_d_l_fpga;
+assign ILA_DATA[666:595]=irf_byp_rs3_data_d_l_fpga;
+assign ILA_DATA[698:667]=irf_byp_rs3h_data_d_l_fpga;
+assign ILA_DATA[702:699]=syndrome;// && read_known0;
+assign ILA_DATA[705:703]=current_cwp[2:0];
+assign ILA_DATA[706]=0;
+assign ILA_DATA[737:707]={cnt[14:0],err_cnt};
+assign ILA_DATA[738]=rml_irf_swap_local_e && (current_window!=rml_irf_old_lo_cwp_e);
+//assign ILA_DATA[699]=(irf_byp_rs1_data_d_l_fpga!=irf_byp_rs1_data_d_l_fpga_d) && (irf_byp_rs1_data_d_l==irf_byp_rs1_data_d_l_d);
+//assign ILA_DATA[700]=(irf_byp_rs2_data_d_l_fpga!=irf_byp_rs2_data_d_l_fpga_d) && (irf_byp_rs2_data_d_l==irf_byp_rs2_data_d_l_d);
+//assign ILA_DATA[701]=(irf_byp_rs3_data_d_l_fpga!=irf_byp_rs3_data_d_l_fpga_d) && (irf_byp_rs3_data_d_l==irf_byp_rs3_data_d_l_d);
+//assign ILA_DATA[702]=(irf_byp_rs3h_data_d_l_fpga!=irf_byp_rs3h_data_d_l_fpga_d) && (irf_byp_rs3h_data_d_l==irf_byp_rs3h_data_d_l_d);
+*/
+wire [11:0] current_cwp;
+
+   bw_r_irf_fpga1 irf(
+                
+                .current_cwp(current_cwp),
+                .so                     (short_scan0_1),
+                .si                     (short_si0),
+                .reset_l (arst_l),
+                .rst_tri_en             (mem_write_disable),
+                .rml_irf_old_e_cwp_e    (rml_irf_old_e_cwp_e[1:0]),
+                .rml_irf_new_e_cwp_e    (rml_irf_new_e_cwp_e[1:0]),
+                /*AUTOINST*/
+                // Outputs
+                .irf_byp_rs1_data_d_l   (irf_byp_rs1_data_d_l_fpga[71:0]),
+                .irf_byp_rs2_data_d_l   (irf_byp_rs2_data_d_l_fpga[71:0]),
+                .irf_byp_rs3_data_d_l   (irf_byp_rs3_data_d_l_fpga[71:0]),
+                .irf_byp_rs3h_data_d_l  (irf_byp_rs3h_data_d_l_fpga[31:0]),
+                // Inputs
+                .rclk                   (rclk),
+                //.rclk2x                   (rclk2x),
+                .se                     (se),
+                .sehold                 (sehold),
+                .ifu_exu_tid_s2         (ifu_exu_tid_s2[1:0]),
+                .ifu_exu_rs1_s          (ifu_exu_rs1_s[4:0]),
+                .ifu_exu_rs2_s          (ifu_exu_rs2_s[4:0]),
+                .ifu_exu_rs3_s          (ifu_exu_rs3_s[4:0]),
+                .ifu_exu_ren1_s         (ifu_exu_ren1_s),
+                .ifu_exu_ren2_s         (ifu_exu_ren2_s),
+                .ifu_exu_ren3_s         (ifu_exu_ren3_s),
+                .ecl_irf_wen_w          (ecl_irf_wen_w),
+                .ecl_irf_wen_w2         (ecl_irf_wen_w2),
+                .ecl_irf_rd_m           (ecl_irf_rd_m[4:0]),
+                .ecl_irf_rd_g           (ecl_irf_rd_g[4:0]),
+                .byp_irf_rd_data_w      (byp_irf_rd_data_w[71:0]),
+                .byp_irf_rd_data_w2     (byp_irf_rd_data_w2[71:0]),
+                .ecl_irf_tid_m          (ecl_irf_tid_m[1:0]),
+                .ecl_irf_tid_g          (ecl_irf_tid_g[1:0]),
+                .rml_irf_old_lo_cwp_e   (rml_irf_old_lo_cwp_e[2:0]),
+                .rml_irf_new_lo_cwp_e   (rml_irf_new_lo_cwp_e[2:0]),
+                .rml_irf_swap_even_e    (rml_irf_swap_even_e),
+                .rml_irf_swap_odd_e     (rml_irf_swap_odd_e),
+                .rml_irf_swap_local_e   (rml_irf_swap_local_e),
+                .rml_irf_kill_restore_w (rml_irf_kill_restore_w),
+                .rml_irf_cwpswap_tid_e  (rml_irf_cwpswap_tid_e[1:0]),
+                .rml_irf_old_agp        (rml_irf_old_agp[1:0]),
+                .rml_irf_new_agp        (rml_irf_new_agp[1:0]),
+                .rml_irf_swap_global    (rml_irf_swap_global),
+                .rml_irf_global_tid     (rml_irf_global_tid[1:0]));
+
+/*   bw_r_irf irf(
+                .so                     (short_scan0_1),
+                .si                     (short_si0),
+                .reset_l (arst_l),
+                .rst_tri_en             (mem_write_disable),
+                .rml_irf_old_e_cwp_e    (rml_irf_old_e_cwp_e[1:0]),
+                .rml_irf_new_e_cwp_e    (rml_irf_new_e_cwp_e[1:0]),
+                // Outputs
+                .irf_byp_rs1_data_d_l   (irf_byp_rs1_data_d_l_ref[71:0]),
+                .irf_byp_rs2_data_d_l   (irf_byp_rs2_data_d_l_ref[71:0]),
+                .irf_byp_rs3_data_d_l   (irf_byp_rs3_data_d_l_ref[71:0]),
+                .irf_byp_rs3h_data_d_l  (irf_byp_rs3h_data_d_l_ref[31:0]),
+                // Inputs
+                .rclk                   (rclk),
+                .se                     (se),
+                .sehold                 (sehold),
+                .ifu_exu_tid_s2         (ifu_exu_tid_s2[1:0]),
+                .ifu_exu_rs1_s          (ifu_exu_rs1_s[4:0]),
+                .ifu_exu_rs2_s          (ifu_exu_rs2_s[4:0]),
+                .ifu_exu_rs3_s          (ifu_exu_rs3_s[4:0]),
+                .ifu_exu_ren1_s         (ifu_exu_ren1_s),
+                .ifu_exu_ren2_s         (ifu_exu_ren2_s),
+                .ifu_exu_ren3_s         (ifu_exu_ren3_s),
+                .ecl_irf_wen_w          (ecl_irf_wen_w),
+                .ecl_irf_wen_w2         (ecl_irf_wen_w2),
+                .ecl_irf_rd_m           (ecl_irf_rd_m[4:0]),
+                .ecl_irf_rd_g           (ecl_irf_rd_g[4:0]),
+                .byp_irf_rd_data_w      (byp_irf_rd_data_w[71:0]),
+                .byp_irf_rd_data_w2     (byp_irf_rd_data_w2[71:0]),
+                .ecl_irf_tid_m          (ecl_irf_tid_m[1:0]),
+                .ecl_irf_tid_g          (ecl_irf_tid_g[1:0]),
+                .rml_irf_old_lo_cwp_e   (rml_irf_old_lo_cwp_e[2:0]),
+                .rml_irf_new_lo_cwp_e   (rml_irf_new_lo_cwp_e[2:0]),
+                .rml_irf_swap_even_e    (rml_irf_swap_even_e),
+                .rml_irf_swap_odd_e     (rml_irf_swap_odd_e),
+                .rml_irf_swap_local_e   (rml_irf_swap_local_e),
+                .rml_irf_kill_restore_w (rml_irf_kill_restore_w),
+                .rml_irf_cwpswap_tid_e  (rml_irf_cwpswap_tid_e[1:0]),
+                .rml_irf_old_agp        (rml_irf_old_agp[1:0]),
+                .rml_irf_new_agp        (rml_irf_new_agp[1:0]),
+                .rml_irf_swap_global    (rml_irf_swap_global),
+                .rml_irf_global_tid     (rml_irf_global_tid[1:0]));*/
+
+assign irf_byp_rs1_data_d_l=/*((err_cnt>=ref_cnt[15:0]) && (cnt[19:0]>=ref_cnt[35:16])) && allow[0] ? irf_byp_rs1_data_d_l_ref:*/irf_byp_rs1_data_d_l_fpga;
+assign irf_byp_rs2_data_d_l=/*((err_cnt>=ref_cnt[15:0]) && (cnt[19:0]>=ref_cnt[35:16])) && allow[1] ? irf_byp_rs2_data_d_l_ref:*/irf_byp_rs2_data_d_l_fpga;
+assign irf_byp_rs3_data_d_l=/*((err_cnt>=ref_cnt[15:0]) && (cnt[19:0]>=ref_cnt[35:16])) && allow[2] ? irf_byp_rs3_data_d_l_ref:*/irf_byp_rs3_data_d_l_fpga;
+assign irf_byp_rs3h_data_d_l=/*((err_cnt>=ref_cnt[15:0]) && (cnt[19:0]>=ref_cnt[35:16])) && allow[3] ? irf_byp_rs3h_data_d_l_ref:*/irf_byp_rs3h_data_d_l_fpga;
+
+`else 
+   bw_r_irf irf(
+                .so                     (short_scan0_1),
+                .si                     (short_si0),
+                .reset_l (arst_l),
+                .rst_tri_en             (mem_write_disable),
+                .rml_irf_old_e_cwp_e    (rml_irf_old_e_cwp_e[1:0]),
+                .rml_irf_new_e_cwp_e    (rml_irf_new_e_cwp_e[1:0]),
+                /*AUTOINST*/
+                // Outputs
+                .irf_byp_rs1_data_d_l   (irf_byp_rs1_data_d_l[71:0]),
+                .irf_byp_rs2_data_d_l   (irf_byp_rs2_data_d_l[71:0]),
+                .irf_byp_rs3_data_d_l   (irf_byp_rs3_data_d_l[71:0]),
+                .irf_byp_rs3h_data_d_l  (irf_byp_rs3h_data_d_l[31:0]),
+                // Inputs
+                .rclk                   (rclk),
+                .se                     (se),
+                .sehold                 (sehold),
+                .ifu_exu_tid_s2         (ifu_exu_tid_s2[1:0]),
+                .ifu_exu_rs1_s          (ifu_exu_rs1_s[4:0]),
+                .ifu_exu_rs2_s          (ifu_exu_rs2_s[4:0]),
+                .ifu_exu_rs3_s          (ifu_exu_rs3_s[4:0]),
+                .ifu_exu_ren1_s         (ifu_exu_ren1_s),
+                .ifu_exu_ren2_s         (ifu_exu_ren2_s),
+                .ifu_exu_ren3_s         (ifu_exu_ren3_s),
+                .ecl_irf_wen_w          (ecl_irf_wen_w),
+                .ecl_irf_wen_w2         (ecl_irf_wen_w2),
+                .ecl_irf_rd_m           (ecl_irf_rd_m[4:0]),
+                .ecl_irf_rd_g           (ecl_irf_rd_g[4:0]),
+                .byp_irf_rd_data_w      (byp_irf_rd_data_w[71:0]),
+                .byp_irf_rd_data_w2     (byp_irf_rd_data_w2[71:0]),
+                .ecl_irf_tid_m          (ecl_irf_tid_m[1:0]),
+                .ecl_irf_tid_g          (ecl_irf_tid_g[1:0]),
+                .rml_irf_old_lo_cwp_e   (rml_irf_old_lo_cwp_e[2:0]),
+                .rml_irf_new_lo_cwp_e   (rml_irf_new_lo_cwp_e[2:0]),
+                .rml_irf_swap_even_e    (rml_irf_swap_even_e),
+                .rml_irf_swap_odd_e     (rml_irf_swap_odd_e),
+                .rml_irf_swap_local_e   (rml_irf_swap_local_e),
+                .rml_irf_kill_restore_w (rml_irf_kill_restore_w),
+                .rml_irf_cwpswap_tid_e  (rml_irf_cwpswap_tid_e[1:0]),
+                .rml_irf_old_agp        (rml_irf_old_agp[1:0]),
+                .rml_irf_new_agp        (rml_irf_new_agp[1:0]),
+                .rml_irf_swap_global    (rml_irf_swap_global),
+                .rml_irf_global_tid     (rml_irf_global_tid[1:0]));
+`endif
+   
+   sparc_exu_byp bypass(
+                        .so             (short_so1),
+                        .si             (short_si1),
+                        .byp_alu_rs2_data_e(byp_alu_rs2_data_e[63:0]),
+                        /*AUTOINST*/
+                        // Outputs
+                        .byp_alu_rs1_data_e(byp_alu_rs1_data_e[63:0]),
+                        .byp_alu_rs2_data_e_l(byp_alu_rs2_data_e_l[63:0]),
+                        .exu_lsu_rs3_data_e(exu_lsu_rs3_data_e[63:0]),
+                        .exu_spu_rs3_data_e(exu_spu_rs3_data_e[63:0]),
+                        .exu_lsu_rs2_data_e(exu_lsu_rs2_data_e[63:0]),
+                        .byp_alu_rcc_data_e(byp_alu_rcc_data_e[63:0]),
+                        .byp_irf_rd_data_w(byp_irf_rd_data_w[71:0]),
+                        .exu_tlu_wsr_data_m(exu_tlu_wsr_data_m[63:0]),
+                        .byp_irf_rd_data_w2(byp_irf_rd_data_w2[71:0]),
+                        .byp_ecc_rs3_data_e(byp_ecc_rs3_data_e[63:0]),
+                        .byp_ecc_rcc_data_e(byp_ecc_rcc_data_e[63:0]),
+                        .byp_ecl_rs2_31_e(byp_ecl_rs2_31_e),
+                        .byp_ecl_rs1_31_e(byp_ecl_rs1_31_e),
+                        .byp_ecl_rs1_63_e(byp_ecl_rs1_63_e),
+                        .byp_ecl_rs1_2_0_e(byp_ecl_rs1_2_0_e[2:0]),
+                        .byp_ecl_rs2_3_0_e(byp_ecl_rs2_3_0_e[3:0]),
+                        .byp_ecc_rs1_synd_d(byp_ecc_rs1_synd_d[7:0]),
+                        .byp_ecc_rs2_synd_d(byp_ecc_rs2_synd_d[7:0]),
+                        .byp_ecc_rs3_synd_d(byp_ecc_rs3_synd_d[7:0]),
+                        // Inputs
+                        .rclk           (rclk),
+                        .se             (se),
+                        .sehold         (sehold),
+                        .ecl_byp_rs1_mux2_sel_e(ecl_byp_rs1_mux2_sel_e),
+                        .ecl_byp_rs1_mux2_sel_rf(ecl_byp_rs1_mux2_sel_rf),
+                        .ecl_byp_rs1_mux2_sel_ld(ecl_byp_rs1_mux2_sel_ld),
+                        .ecl_byp_rs1_mux2_sel_usemux1(ecl_byp_rs1_mux2_sel_usemux1),
+                        .ecl_byp_rs1_mux1_sel_m(ecl_byp_rs1_mux1_sel_m),
+                        .ecl_byp_rs1_mux1_sel_w(ecl_byp_rs1_mux1_sel_w),
+                        .ecl_byp_rs1_mux1_sel_w2(ecl_byp_rs1_mux1_sel_w2),
+                        .ecl_byp_rs1_mux1_sel_other(ecl_byp_rs1_mux1_sel_other),
+                        .ecl_byp_rcc_mux2_sel_e(ecl_byp_rcc_mux2_sel_e),
+                        .ecl_byp_rcc_mux2_sel_rf(ecl_byp_rcc_mux2_sel_rf),
+                        .ecl_byp_rcc_mux2_sel_ld(ecl_byp_rcc_mux2_sel_ld),
+                        .ecl_byp_rcc_mux2_sel_usemux1(ecl_byp_rcc_mux2_sel_usemux1),
+                        .ecl_byp_rcc_mux1_sel_m(ecl_byp_rcc_mux1_sel_m),
+                        .ecl_byp_rcc_mux1_sel_w(ecl_byp_rcc_mux1_sel_w),
+                        .ecl_byp_rcc_mux1_sel_w2(ecl_byp_rcc_mux1_sel_w2),
+                        .ecl_byp_rcc_mux1_sel_other(ecl_byp_rcc_mux1_sel_other),
+                        .ecl_byp_rs2_mux2_sel_e(ecl_byp_rs2_mux2_sel_e),
+                        .ecl_byp_rs2_mux2_sel_rf(ecl_byp_rs2_mux2_sel_rf),
+                        .ecl_byp_rs2_mux2_sel_ld(ecl_byp_rs2_mux2_sel_ld),
+                        .ecl_byp_rs2_mux2_sel_usemux1(ecl_byp_rs2_mux2_sel_usemux1),
+                        .ecl_byp_rs2_mux1_sel_m(ecl_byp_rs2_mux1_sel_m),
+                        .ecl_byp_rs2_mux1_sel_w(ecl_byp_rs2_mux1_sel_w),
+                        .ecl_byp_rs2_mux1_sel_w2(ecl_byp_rs2_mux1_sel_w2),
+                        .ecl_byp_rs2_mux1_sel_other(ecl_byp_rs2_mux1_sel_other),
+                        .ecl_byp_rs3_mux2_sel_e(ecl_byp_rs3_mux2_sel_e),
+                        .ecl_byp_rs3_mux2_sel_rf(ecl_byp_rs3_mux2_sel_rf),
+                        .ecl_byp_rs3_mux2_sel_ld(ecl_byp_rs3_mux2_sel_ld),
+                        .ecl_byp_rs3_mux2_sel_usemux1(ecl_byp_rs3_mux2_sel_usemux1),
+                        .ecl_byp_rs3_mux1_sel_m(ecl_byp_rs3_mux1_sel_m),
+                        .ecl_byp_rs3_mux1_sel_w(ecl_byp_rs3_mux1_sel_w),
+                        .ecl_byp_rs3_mux1_sel_w2(ecl_byp_rs3_mux1_sel_w2),
+                        .ecl_byp_rs3_mux1_sel_other(ecl_byp_rs3_mux1_sel_other),
+                        .ecl_byp_rs3h_mux2_sel_e(ecl_byp_rs3h_mux2_sel_e),
+                        .ecl_byp_rs3h_mux2_sel_rf(ecl_byp_rs3h_mux2_sel_rf),
+                        .ecl_byp_rs3h_mux2_sel_ld(ecl_byp_rs3h_mux2_sel_ld),
+                        .ecl_byp_rs3h_mux2_sel_usemux1(ecl_byp_rs3h_mux2_sel_usemux1),
+                        .ecl_byp_rs3h_mux1_sel_m(ecl_byp_rs3h_mux1_sel_m),
+                        .ecl_byp_rs3h_mux1_sel_w(ecl_byp_rs3h_mux1_sel_w),
+                        .ecl_byp_rs3h_mux1_sel_w2(ecl_byp_rs3h_mux1_sel_w2),
+                        .ecl_byp_rs3h_mux1_sel_other(ecl_byp_rs3h_mux1_sel_other),
+                        .ecl_byp_rs1_longmux_sel_g2(ecl_byp_rs1_longmux_sel_g2),
+                        .ecl_byp_rs1_longmux_sel_w2(ecl_byp_rs1_longmux_sel_w2),
+                        .ecl_byp_rs1_longmux_sel_ldxa(ecl_byp_rs1_longmux_sel_ldxa),
+                        .ecl_byp_rs2_longmux_sel_g2(ecl_byp_rs2_longmux_sel_g2),
+                        .ecl_byp_rs2_longmux_sel_w2(ecl_byp_rs2_longmux_sel_w2),
+                        .ecl_byp_rs2_longmux_sel_ldxa(ecl_byp_rs2_longmux_sel_ldxa),
+                        .ecl_byp_rs3_longmux_sel_g2(ecl_byp_rs3_longmux_sel_g2),
+                        .ecl_byp_rs3_longmux_sel_w2(ecl_byp_rs3_longmux_sel_w2),
+                        .ecl_byp_rs3_longmux_sel_ldxa(ecl_byp_rs3_longmux_sel_ldxa),
+                        .ecl_byp_rs3h_longmux_sel_g2(ecl_byp_rs3h_longmux_sel_g2),
+                        .ecl_byp_rs3h_longmux_sel_w2(ecl_byp_rs3h_longmux_sel_w2),
+                        .ecl_byp_rs3h_longmux_sel_ldxa(ecl_byp_rs3h_longmux_sel_ldxa),
+                        .ecl_byp_sel_load_m(ecl_byp_sel_load_m),
+                        .ecl_byp_sel_pipe_m(ecl_byp_sel_pipe_m),
+                        .ecl_byp_sel_ecc_m(ecl_byp_sel_ecc_m),
+                        .ecl_byp_sel_muldiv_g(ecl_byp_sel_muldiv_g),
+                        .ecl_byp_sel_load_g(ecl_byp_sel_load_g),
+                        .ecl_byp_sel_restore_g(ecl_byp_sel_restore_g),
+                        .ecl_byp_std_e_l(ecl_byp_std_e_l),
+                        .ecl_byp_ldxa_g (ecl_byp_ldxa_g),
+                        .alu_byp_rd_data_e(alu_byp_rd_data_e[63:0]),
+                        .ifu_exu_imm_data_d(ifu_exu_imm_data_d[31:0]),
+                        .irf_byp_rs1_data_d_l(irf_byp_rs1_data_d_l[71:0]),
+                        .irf_byp_rs2_data_d_l(irf_byp_rs2_data_d_l[71:0]),
+                        .irf_byp_rs3_data_d_l(irf_byp_rs3_data_d_l[71:0]),
+                        .irf_byp_rs3h_data_d_l(irf_byp_rs3h_data_d_l[31:0]),
+                        .lsu_exu_dfill_data_g(lsu_exu_dfill_data_g[63:0]),
+                        .lsu_exu_ldxa_data_g(lsu_exu_ldxa_data_g[63:0]),
+                        .div_byp_muldivout_g(div_byp_muldivout_g[63:0]),
+                        .ecc_byp_ecc_result_m(ecc_byp_ecc_result_m[63:0]),
+                        .ecl_byp_ecc_mask_m_l(ecl_byp_ecc_mask_m_l[7:0]),
+                        .ifu_exu_pc_d   (ifu_exu_pc_d[47:0]),
+                        .ecl_byp_3lsb_m (ecl_byp_3lsb_m[2:0]),
+                        .ecl_byp_restore_m(ecl_byp_restore_m),
+                        .ecl_byp_sel_restore_m(ecl_byp_sel_restore_m),
+                        .ecl_byp_eclpr_e(ecl_byp_eclpr_e[7:0]),
+                        .div_byp_yreg_e (div_byp_yreg_e[31:0]),
+                        .ifu_exu_pcver_e(ifu_exu_pcver_e[63:0]),
+                        .tlu_exu_rsr_data_m(tlu_exu_rsr_data_m[63:0]),
+                        .ffu_exu_rsr_data_m(ffu_exu_rsr_data_m[63:0]),
+                        .ecl_byp_sel_yreg_e(ecl_byp_sel_yreg_e),
+                        .ecl_byp_sel_eclpr_e(ecl_byp_sel_eclpr_e),
+                        .ecl_byp_sel_ifusr_e(ecl_byp_sel_ifusr_e),
+                        .ecl_byp_sel_alu_e(ecl_byp_sel_alu_e),
+                        .ecl_byp_sel_ifex_m(ecl_byp_sel_ifex_m),
+                        .ecl_byp_sel_ffusr_m(ecl_byp_sel_ffusr_m),
+                        .ecl_byp_sel_tlusr_m(ecl_byp_sel_tlusr_m));
+
+   sparc_exu_ecc ecc(
+                     .so                (scan0_1),
+                     .si                (si0),
+                     .byp_alu_rs2_data_e(byp_alu_rs2_data_e[63:0]),
+                     /*AUTOINST*/
+                     // Outputs
+                     .ecc_ecl_rs1_ce    (ecc_ecl_rs1_ce),
+                     .ecc_ecl_rs1_ue    (ecc_ecl_rs1_ue),
+                     .ecc_ecl_rs2_ce    (ecc_ecl_rs2_ce),
+                     .ecc_ecl_rs2_ue    (ecc_ecl_rs2_ue),
+                     .ecc_ecl_rs3_ce    (ecc_ecl_rs3_ce),
+                     .ecc_ecl_rs3_ue    (ecc_ecl_rs3_ue),
+                     .ecc_byp_ecc_result_m(ecc_byp_ecc_result_m[63:0]),
+                     .exu_ifu_err_synd_m(exu_ifu_err_synd_m[6:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .byp_ecc_rcc_data_e(byp_ecc_rcc_data_e[63:0]),
+                     .ecl_ecc_rs1_use_rf_e(ecl_ecc_rs1_use_rf_e),
+                     .byp_ecc_rs1_synd_d(byp_ecc_rs1_synd_d[7:0]),
+                     .ecl_ecc_rs2_use_rf_e(ecl_ecc_rs2_use_rf_e),
+                     .byp_ecc_rs2_synd_d(byp_ecc_rs2_synd_d[7:0]),
+                     .byp_ecc_rs3_data_e(byp_ecc_rs3_data_e[63:0]),
+                     .ecl_ecc_rs3_use_rf_e(ecl_ecc_rs3_use_rf_e),
+                     .byp_ecc_rs3_synd_d(byp_ecc_rs3_synd_d[7:0]),
+                     .ecl_ecc_sel_rs1_m_l(ecl_ecc_sel_rs1_m_l),
+                     .ecl_ecc_sel_rs2_m_l(ecl_ecc_sel_rs2_m_l),
+                     .ecl_ecc_sel_rs3_m_l(ecl_ecc_sel_rs3_m_l),
+                     .ecl_ecc_log_rs1_m (ecl_ecc_log_rs1_m),
+                     .ecl_ecc_log_rs2_m (ecl_ecc_log_rs2_m),
+                     .ecl_ecc_log_rs3_m (ecl_ecc_log_rs3_m));
+   
+   sparc_exu_ecl ecl(
+                     .so                (short_so0),
+                     .si                (short_scan0_1),
+                     .rst_tri_en        (mux_drive_disable),
+                     .byp_ecl_wrccr_data_w(byp_irf_rd_data_w[7:0]),
+                     .alu_ecl_adder_out_31_e(exu_ifu_brpc_e[31]),
+                     .byp_ecl_rd_data_3lsb_m(exu_tlu_wsr_data_m[2:0]),                     
+                     .alu_ecl_adder_out_7_0_e(exu_ifu_brpc_e[7:0]),
+                     .exu_ifu_regz_e    (exu_ifu_regz_e),
+                     .ecl_alu_casa_e    (ecl_alu_casa_e),
+                     .exu_ifu_err_synd_7_m (exu_ifu_err_synd_m[7]),
+                     /*AUTOINST*/
+                     // Outputs
+                     .ecl_byp_ecc_mask_m_l(ecl_byp_ecc_mask_m_l[7:0]),
+                     .ecl_byp_eclpr_e   (ecl_byp_eclpr_e[7:0]),
+                     .ecl_byp_sel_load_g(ecl_byp_sel_load_g),
+                     .ecl_byp_sel_load_m(ecl_byp_sel_load_m),
+                     .ecl_byp_sel_muldiv_g(ecl_byp_sel_muldiv_g),
+                     .ecl_byp_sel_pipe_m(ecl_byp_sel_pipe_m),
+                     .ecl_byp_sel_restore_g(ecl_byp_sel_restore_g),
+                     .ecl_byp_sel_restore_m(ecl_byp_sel_restore_m),
+                     .ecl_div_almostlast_cycle(ecl_div_almostlast_cycle),
+                     .ecl_div_cin       (ecl_div_cin),
+                     .ecl_div_dividend_sign(ecl_div_dividend_sign),
+                     .ecl_div_keep_d    (ecl_div_keep_d),
+                     .ecl_div_keepx     (ecl_div_keepx),
+                     .ecl_div_last_cycle(ecl_div_last_cycle),
+                     .ecl_div_mul_get_32bit_data(ecl_div_mul_get_32bit_data),
+                     .ecl_div_mul_get_new_data(ecl_div_mul_get_new_data),
+                     .ecl_div_mul_keep_data(ecl_div_mul_keep_data),
+                     .ecl_div_mul_sext_rs1_e(ecl_div_mul_sext_rs1_e),
+                     .ecl_div_mul_sext_rs2_e(ecl_div_mul_sext_rs2_e),
+                     .ecl_div_newq      (ecl_div_newq),
+                     .ecl_div_sel_64b   (ecl_div_sel_64b),
+                     .ecl_div_sel_adder (ecl_div_sel_adder),
+                     .ecl_div_sel_neg32 (ecl_div_sel_neg32),
+                     .ecl_div_sel_pos32 (ecl_div_sel_pos32),
+                     .ecl_div_sel_u32   (ecl_div_sel_u32),
+                     .ecl_div_subtract_l(ecl_div_subtract_l),
+                     .ecl_div_upper32_zero(ecl_div_upper32_zero),
+                     .ecl_div_upper33_one(ecl_div_upper33_one),
+                     .ecl_div_upper33_zero(ecl_div_upper33_zero),
+                     .ecl_div_xinmask   (ecl_div_xinmask),
+                     .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0]),
+                     .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]),
+                     .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]),
+                     .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]),
+                     .ecl_ecc_log_rs1_m (ecl_ecc_log_rs1_m),
+                     .ecl_ecc_log_rs2_m (ecl_ecc_log_rs2_m),
+                     .ecl_ecc_log_rs3_m (ecl_ecc_log_rs3_m),
+                     .ecl_ecc_sel_rs1_m_l(ecl_ecc_sel_rs1_m_l),
+                     .ecl_ecc_sel_rs2_m_l(ecl_ecc_sel_rs2_m_l),
+                     .ecl_ecc_sel_rs3_m_l(ecl_ecc_sel_rs3_m_l),
+                     .ecl_rml_canrestore_wen_w(ecl_rml_canrestore_wen_w),
+                     .ecl_rml_cansave_wen_w(ecl_rml_cansave_wen_w),
+                     .ecl_rml_cleanwin_wen_w(ecl_rml_cleanwin_wen_w),
+                     .ecl_rml_cwp_wen_e (ecl_rml_cwp_wen_e),
+                     .ecl_rml_otherwin_wen_w(ecl_rml_otherwin_wen_w),
+                     .ecl_rml_wstate_wen_w(ecl_rml_wstate_wen_w),
+                     .exu_ffu_wsr_inst_e(exu_ffu_wsr_inst_e),
+                     .exu_ifu_ecc_ce_m  (exu_ifu_ecc_ce_m),
+                     .exu_ifu_ecc_ue_m  (exu_ifu_ecc_ue_m),
+                     .exu_ifu_err_reg_m (exu_ifu_err_reg_m[7:0]),
+                     .exu_ifu_inj_ack   (exu_ifu_inj_ack),
+                     .exu_ifu_longop_done_g(exu_ifu_longop_done_g[3:0]),
+                     .exu_mul_input_vld (exu_mul_input_vld),
+                     .exu_tlu_ccr0_w    (exu_tlu_ccr0_w[7:0]),
+                     .exu_tlu_ccr1_w    (exu_tlu_ccr1_w[7:0]),
+                     .exu_tlu_ccr2_w    (exu_tlu_ccr2_w[7:0]),
+                     .exu_tlu_ccr3_w    (exu_tlu_ccr3_w[7:0]),
+                     .ecl_byp_sel_alu_e (ecl_byp_sel_alu_e),
+                     .ecl_byp_sel_eclpr_e(ecl_byp_sel_eclpr_e),
+                     .ecl_byp_sel_yreg_e(ecl_byp_sel_yreg_e),
+                     .ecl_byp_sel_ifusr_e(ecl_byp_sel_ifusr_e),
+                     .ecl_byp_sel_ffusr_m(ecl_byp_sel_ffusr_m),
+                     .ecl_byp_sel_ifex_m(ecl_byp_sel_ifex_m),
+                     .ecl_byp_sel_tlusr_m(ecl_byp_sel_tlusr_m),
+                     .exu_ifu_va_oor_m  (exu_ifu_va_oor_m),
+                     .ecl_alu_out_sel_sum_e_l(ecl_alu_out_sel_sum_e_l),
+                     .ecl_alu_out_sel_rs3_e_l(ecl_alu_out_sel_rs3_e_l),
+                     .ecl_alu_out_sel_shift_e_l(ecl_alu_out_sel_shift_e_l),
+                     .ecl_alu_out_sel_logic_e_l(ecl_alu_out_sel_logic_e_l),
+                     .ecl_alu_log_sel_and_e(ecl_alu_log_sel_and_e),
+                     .ecl_alu_log_sel_or_e(ecl_alu_log_sel_or_e),
+                     .ecl_alu_log_sel_xor_e(ecl_alu_log_sel_xor_e),
+                     .ecl_alu_log_sel_move_e(ecl_alu_log_sel_move_e),
+                     .ecl_alu_sethi_inst_e(ecl_alu_sethi_inst_e),
+                     .ecl_alu_cin_e     (ecl_alu_cin_e),
+                     .ecl_shft_lshift_e_l(ecl_shft_lshift_e_l),
+                     .ecl_shft_op32_e   (ecl_shft_op32_e),
+                     .ecl_shft_shift4_e (ecl_shft_shift4_e[3:0]),
+                     .ecl_shft_shift1_e (ecl_shft_shift1_e[3:0]),
+                     .ecl_shft_enshift_e_l(ecl_shft_enshift_e_l),
+                     .ecl_byp_restore_m (ecl_byp_restore_m),
+                     .ecl_byp_rs1_mux2_sel_e(ecl_byp_rs1_mux2_sel_e),
+                     .ecl_byp_rs1_mux2_sel_rf(ecl_byp_rs1_mux2_sel_rf),
+                     .ecl_byp_rs1_mux2_sel_ld(ecl_byp_rs1_mux2_sel_ld),
+                     .ecl_byp_rs1_mux2_sel_usemux1(ecl_byp_rs1_mux2_sel_usemux1),
+                     .ecl_byp_rs1_mux1_sel_m(ecl_byp_rs1_mux1_sel_m),
+                     .ecl_byp_rs1_mux1_sel_w(ecl_byp_rs1_mux1_sel_w),
+                     .ecl_byp_rs1_mux1_sel_w2(ecl_byp_rs1_mux1_sel_w2),
+                     .ecl_byp_rs1_mux1_sel_other(ecl_byp_rs1_mux1_sel_other),
+                     .ecl_byp_rcc_mux2_sel_e(ecl_byp_rcc_mux2_sel_e),
+                     .ecl_byp_rcc_mux2_sel_rf(ecl_byp_rcc_mux2_sel_rf),
+                     .ecl_byp_rcc_mux2_sel_ld(ecl_byp_rcc_mux2_sel_ld),
+                     .ecl_byp_rcc_mux2_sel_usemux1(ecl_byp_rcc_mux2_sel_usemux1),
+                     .ecl_byp_rcc_mux1_sel_m(ecl_byp_rcc_mux1_sel_m),
+                     .ecl_byp_rcc_mux1_sel_w(ecl_byp_rcc_mux1_sel_w),
+                     .ecl_byp_rcc_mux1_sel_w2(ecl_byp_rcc_mux1_sel_w2),
+                     .ecl_byp_rcc_mux1_sel_other(ecl_byp_rcc_mux1_sel_other),
+                     .ecl_byp_rs2_mux2_sel_e(ecl_byp_rs2_mux2_sel_e),
+                     .ecl_byp_rs2_mux2_sel_rf(ecl_byp_rs2_mux2_sel_rf),
+                     .ecl_byp_rs2_mux2_sel_ld(ecl_byp_rs2_mux2_sel_ld),
+                     .ecl_byp_rs2_mux2_sel_usemux1(ecl_byp_rs2_mux2_sel_usemux1),
+                     .ecl_byp_rs2_mux1_sel_m(ecl_byp_rs2_mux1_sel_m),
+                     .ecl_byp_rs2_mux1_sel_w(ecl_byp_rs2_mux1_sel_w),
+                     .ecl_byp_rs2_mux1_sel_w2(ecl_byp_rs2_mux1_sel_w2),
+                     .ecl_byp_rs2_mux1_sel_other(ecl_byp_rs2_mux1_sel_other),
+                     .ecl_byp_rs3_mux2_sel_e(ecl_byp_rs3_mux2_sel_e),
+                     .ecl_byp_rs3_mux2_sel_rf(ecl_byp_rs3_mux2_sel_rf),
+                     .ecl_byp_rs3_mux2_sel_ld(ecl_byp_rs3_mux2_sel_ld),
+                     .ecl_byp_rs3_mux2_sel_usemux1(ecl_byp_rs3_mux2_sel_usemux1),
+                     .ecl_byp_rs3_mux1_sel_m(ecl_byp_rs3_mux1_sel_m),
+                     .ecl_byp_rs3_mux1_sel_w(ecl_byp_rs3_mux1_sel_w),
+                     .ecl_byp_rs3_mux1_sel_w2(ecl_byp_rs3_mux1_sel_w2),
+                     .ecl_byp_rs3_mux1_sel_other(ecl_byp_rs3_mux1_sel_other),
+                     .ecl_byp_rs3h_mux2_sel_e(ecl_byp_rs3h_mux2_sel_e),
+                     .ecl_byp_rs3h_mux2_sel_rf(ecl_byp_rs3h_mux2_sel_rf),
+                     .ecl_byp_rs3h_mux2_sel_ld(ecl_byp_rs3h_mux2_sel_ld),
+                     .ecl_byp_rs3h_mux2_sel_usemux1(ecl_byp_rs3h_mux2_sel_usemux1),
+                     .ecl_byp_rs3h_mux1_sel_m(ecl_byp_rs3h_mux1_sel_m),
+                     .ecl_byp_rs3h_mux1_sel_w(ecl_byp_rs3h_mux1_sel_w),
+                     .ecl_byp_rs3h_mux1_sel_w2(ecl_byp_rs3h_mux1_sel_w2),
+                     .ecl_byp_rs3h_mux1_sel_other(ecl_byp_rs3h_mux1_sel_other),
+                     .ecl_byp_rs1_longmux_sel_g2(ecl_byp_rs1_longmux_sel_g2),
+                     .ecl_byp_rs1_longmux_sel_w2(ecl_byp_rs1_longmux_sel_w2),
+                     .ecl_byp_rs1_longmux_sel_ldxa(ecl_byp_rs1_longmux_sel_ldxa),
+                     .ecl_byp_rs2_longmux_sel_g2(ecl_byp_rs2_longmux_sel_g2),
+                     .ecl_byp_rs2_longmux_sel_w2(ecl_byp_rs2_longmux_sel_w2),
+                     .ecl_byp_rs2_longmux_sel_ldxa(ecl_byp_rs2_longmux_sel_ldxa),
+                     .ecl_byp_rs3_longmux_sel_g2(ecl_byp_rs3_longmux_sel_g2),
+                     .ecl_byp_rs3_longmux_sel_w2(ecl_byp_rs3_longmux_sel_w2),
+                     .ecl_byp_rs3_longmux_sel_ldxa(ecl_byp_rs3_longmux_sel_ldxa),
+                     .ecl_byp_rs3h_longmux_sel_g2(ecl_byp_rs3h_longmux_sel_g2),
+                     .ecl_byp_rs3h_longmux_sel_w2(ecl_byp_rs3h_longmux_sel_w2),
+                     .ecl_byp_rs3h_longmux_sel_ldxa(ecl_byp_rs3h_longmux_sel_ldxa),
+                     .ecl_byp_std_e_l   (ecl_byp_std_e_l),
+                     .ecl_byp_ldxa_g    (ecl_byp_ldxa_g),
+                     .ecl_byp_3lsb_m    (ecl_byp_3lsb_m[2:0]),
+                     .ecl_ecc_rs1_use_rf_e(ecl_ecc_rs1_use_rf_e),
+                     .ecl_ecc_rs2_use_rf_e(ecl_ecc_rs2_use_rf_e),
+                     .ecl_ecc_rs3_use_rf_e(ecl_ecc_rs3_use_rf_e),
+                     .ecl_irf_rd_m      (ecl_irf_rd_m[4:0]),
+                     .ecl_irf_tid_m     (ecl_irf_tid_m[1:0]),
+                     .ecl_irf_wen_w     (ecl_irf_wen_w),
+                     .ecl_irf_wen_w2    (ecl_irf_wen_w2),
+                     .ecl_irf_rd_g      (ecl_irf_rd_g[4:0]),
+                     .ecl_irf_tid_g     (ecl_irf_tid_g[1:0]),
+                     .ecl_div_thr_e     (ecl_div_thr_e[3:0]),
+                     .ecl_rml_thr_m     (ecl_rml_thr_m[3:0]),
+                     .ecl_rml_thr_w     (ecl_rml_thr_w[3:0]),
+                     .ecl_rml_xor_data_e(ecl_rml_xor_data_e[2:0]),
+                     .ecl_div_ld_inputs (ecl_div_ld_inputs),
+                     .ecl_div_sel_div   (ecl_div_sel_div),
+                     .ecl_div_div64     (ecl_div_div64),
+                     .exu_ifu_cc_d      (exu_ifu_cc_d[7:0]),
+                     .ecl_shft_extendbit_e(ecl_shft_extendbit_e),
+                     .ecl_shft_extend32bit_e_l(ecl_shft_extend32bit_e_l),
+                     .ecl_div_zero_rs2_e(ecl_div_zero_rs2_e),
+                     .ecl_div_muls_rs1_31_e_l(ecl_div_muls_rs1_31_e_l),
+                     .ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g),
+                     .exu_tlu_va_oor_m  (exu_tlu_va_oor_m),
+                     .exu_tlu_va_oor_jl_ret_m(exu_tlu_va_oor_jl_ret_m),
+                     .ecl_rml_kill_e    (ecl_rml_kill_e),
+                     .ecl_rml_kill_w    (ecl_rml_kill_w),
+                     .ecl_byp_sel_ecc_m (ecl_byp_sel_ecc_m),
+                     .exu_tlu_ttype_m   (exu_tlu_ttype_m[8:0]),
+                     .exu_tlu_ttype_vld_m(exu_tlu_ttype_vld_m),
+                     .exu_tlu_ue_trap_m (exu_tlu_ue_trap_m),
+                     .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+                     .exu_lsu_priority_trap_m(exu_lsu_priority_trap_m),
+                     .ecl_div_mul_wen   (ecl_div_mul_wen),
+                     .ecl_div_muls      (ecl_div_muls),
+                     .ecl_rml_early_flush_w(ecl_rml_early_flush_w),
+                     .ecl_rml_inst_vld_w(ecl_rml_inst_vld_w),
+                     // Inputs
+                     .div_ecl_adder_out_31(div_ecl_adder_out_31),
+                     .div_ecl_cout32    (div_ecl_cout32),
+                     .div_ecl_cout64    (div_ecl_cout64),
+                     .div_ecl_d_62      (div_ecl_d_62),
+                     .div_ecl_d_msb     (div_ecl_d_msb),
+                     .div_ecl_detect_zero_high(div_ecl_detect_zero_high),
+                     .div_ecl_detect_zero_low(div_ecl_detect_zero_low),
+                     .div_ecl_dividend_msb(div_ecl_dividend_msb),
+                     .div_ecl_gencc_in_31(div_ecl_gencc_in_31),
+                     .div_ecl_gencc_in_msb_l(div_ecl_gencc_in_msb_l),
+                     .div_ecl_low32_nonzero(div_ecl_low32_nonzero),
+                     .div_ecl_upper32_equal(div_ecl_upper32_equal),
+                     .div_ecl_x_msb     (div_ecl_x_msb),
+                     .div_ecl_xin_msb_l (div_ecl_xin_msb_l),
+                     .ecc_ecl_rs1_ce    (ecc_ecl_rs1_ce),
+                     .ecc_ecl_rs1_ue    (ecc_ecl_rs1_ue),
+                     .ecc_ecl_rs2_ce    (ecc_ecl_rs2_ce),
+                     .ecc_ecl_rs2_ue    (ecc_ecl_rs2_ue),
+                     .ecc_ecl_rs3_ce    (ecc_ecl_rs3_ce),
+                     .ecc_ecl_rs3_ue    (ecc_ecl_rs3_ue),
+                     .ifu_exu_disable_ce_e(ifu_exu_disable_ce_e),
+                     .ifu_exu_ecc_mask  (ifu_exu_ecc_mask[7:0]),
+                     .ifu_exu_inj_irferr(ifu_exu_inj_irferr),
+                     .ifu_exu_inst_vld_e(ifu_exu_inst_vld_e),
+                     .ifu_exu_inst_vld_w(ifu_exu_inst_vld_w),
+                     .ifu_exu_muldivop_d(ifu_exu_muldivop_d[4:0]),
+                     .ifu_exu_return_d  (ifu_exu_return_d),
+                     .ifu_tlu_sraddr_d  (ifu_tlu_sraddr_d[6:0]),
+                     .ifu_tlu_wsr_inst_d(ifu_tlu_wsr_inst_d),
+                     .lsu_exu_ldst_miss_g2(lsu_exu_ldst_miss_g2),
+                     .mul_exu_ack       (mul_exu_ack),
+                     .rml_ecl_canrestore_d(rml_ecl_canrestore_d[2:0]),
+                     .rml_ecl_cansave_d (rml_ecl_cansave_d[2:0]),
+                     .rml_ecl_cleanwin_d(rml_ecl_cleanwin_d[2:0]),
+                     .rml_ecl_cwp_d     (rml_ecl_cwp_d[2:0]),
+                     .rml_ecl_gl_e      (rml_ecl_gl_e[1:0]),
+                     .rml_ecl_kill_m    (rml_ecl_kill_m),
+                     .rml_ecl_otherwin_d(rml_ecl_otherwin_d[2:0]),
+                     .rml_ecl_rmlop_done_e(rml_ecl_rmlop_done_e),
+                     .rml_ecl_swap_done (rml_ecl_swap_done[3:0]),
+                     .rml_ecl_wstate_d  (rml_ecl_wstate_d[5:0]),
+                     .sehold            (sehold),
+                     .tlu_exu_ccr_m     (tlu_exu_ccr_m[7:0]),
+                     .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                     .rclk              (rclk),
+                     .se                (se),
+                     .grst_l            (grst_l),
+                     .arst_l            (arst_l),
+                     .ifu_exu_dbrinst_d (ifu_exu_dbrinst_d),
+                     .ifu_exu_aluop_d   (ifu_exu_aluop_d[2:0]),
+                     .ifu_exu_shiftop_d (ifu_exu_shiftop_d[2:0]),
+                     .ifu_exu_invert_d  (ifu_exu_invert_d),
+                     .ifu_exu_usecin_d  (ifu_exu_usecin_d),
+                     .ifu_exu_enshift_d (ifu_exu_enshift_d),
+                     .byp_ecl_rs2_3_0_e (byp_ecl_rs2_3_0_e[3:0]),
+                     .byp_ecl_rs1_2_0_e (byp_ecl_rs1_2_0_e[2:0]),
+                     .ifu_exu_use_rsr_e_l(ifu_exu_use_rsr_e_l),
+                     .ifu_exu_rd_exusr_e(ifu_exu_rd_exusr_e),
+                     .ifu_exu_rd_ifusr_e(ifu_exu_rd_ifusr_e),
+                     .ifu_exu_rd_ffusr_e(ifu_exu_rd_ffusr_e),
+                     .ifu_exu_rs1_vld_d (ifu_exu_rs1_vld_d),
+                     .ifu_exu_rs2_vld_d (ifu_exu_rs2_vld_d),
+                     .ifu_exu_rs3e_vld_d(ifu_exu_rs3e_vld_d),
+                     .ifu_exu_rs3o_vld_d(ifu_exu_rs3o_vld_d),
+                     .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                     .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                     .ifu_exu_rd_d      (ifu_exu_rd_d[4:0]),
+                     .ifu_exu_tid_s2    (ifu_exu_tid_s2[1:0]),
+                     .ifu_exu_kill_e    (ifu_exu_kill_e),
+                     .ifu_exu_wen_d     (ifu_exu_wen_d),
+                     .ifu_exu_ialign_d  (ifu_exu_ialign_d),
+                     .alu_ecl_add_n64_e (alu_ecl_add_n64_e),
+                     .alu_ecl_add_n32_e (alu_ecl_add_n32_e),
+                     .alu_ecl_log_n64_e (alu_ecl_log_n64_e),
+                     .alu_ecl_log_n32_e (alu_ecl_log_n32_e),
+                     .alu_ecl_zhigh_e   (alu_ecl_zhigh_e),
+                     .alu_ecl_zlow_e    (alu_ecl_zlow_e),
+                     .ifu_exu_setcc_d   (ifu_exu_setcc_d),
+                     .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                     .lsu_exu_rd_m      (lsu_exu_rd_m[4:0]),
+                     .lsu_exu_thr_m     (lsu_exu_thr_m[1:0]),
+                     .lsu_exu_ldxa_m    (lsu_exu_ldxa_m),
+                     .byp_ecl_rs1_31_e  (byp_ecl_rs1_31_e),
+                     .byp_ecl_rs2_31_e  (byp_ecl_rs2_31_e),
+                     .byp_ecl_rs1_63_e  (byp_ecl_rs1_63_e),
+                     .alu_ecl_cout64_e_l(alu_ecl_cout64_e_l),
+                     .alu_ecl_cout32_e  (alu_ecl_cout32_e),
+                     .alu_ecl_adder_out_63_e(alu_ecl_adder_out_63_e),
+                     .alu_ecl_adderin2_63_e(alu_ecl_adderin2_63_e),
+                     .alu_ecl_adderin2_31_e(alu_ecl_adderin2_31_e),
+                     .ifu_exu_rs1_s     (ifu_exu_rs1_s[4:0]),
+                     .ifu_exu_rs2_s     (ifu_exu_rs2_s[4:0]),
+                     .ifu_exu_rs3_s     (ifu_exu_rs3_s[4:0]),
+                     .ifu_exu_tagop_d   (ifu_exu_tagop_d),
+                     .ifu_exu_tv_d      (ifu_exu_tv_d),
+                     .ifu_exu_muls_d    (ifu_exu_muls_d),
+                     .div_ecl_yreg_0_l  (div_ecl_yreg_0_l[3:0]),
+                     .alu_ecl_mem_addr_invalid_e_l(alu_ecl_mem_addr_invalid_e_l),
+                     .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                     .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                     .ifu_exu_addr_mask_d(ifu_exu_addr_mask_d),
+                     .ifu_exu_save_d    (ifu_exu_save_d),
+                     .ifu_exu_restore_d (ifu_exu_restore_d),
+                     .ifu_exu_casa_d    (ifu_exu_casa_d),
+                     .rml_ecl_clean_window_e(rml_ecl_clean_window_e),
+                     .rml_ecl_fill_e    (rml_ecl_fill_e),
+                     .rml_ecl_other_e   (rml_ecl_other_e),
+                     .rml_ecl_wtype_e   (rml_ecl_wtype_e[2:0]),
+                     .ifu_exu_tcc_e     (ifu_exu_tcc_e),
+                     .ifu_exu_useimm_d  (ifu_exu_useimm_d),
+                     .ifu_exu_nceen_e   (ifu_exu_nceen_e),
+                     .ifu_tlu_flush_m   (ifu_tlu_flush_m),
+                     .ifu_exu_ttype_vld_m(ifu_exu_ttype_vld_m),
+                     .tlu_exu_priv_trap_m(tlu_exu_priv_trap_m),
+                     .tlu_exu_pic_onebelow_m(tlu_exu_pic_onebelow_m),
+                     .tlu_exu_pic_twobelow_m(tlu_exu_pic_twobelow_m),
+                     .lsu_exu_flush_pipe_w(lsu_exu_flush_pipe_w),
+                     .ifu_exu_sethi_inst_d(ifu_exu_sethi_inst_d),
+                     .lsu_exu_st_dtlb_perr_g(lsu_exu_st_dtlb_perr_g));
+   
+   sparc_exu_alu alu(
+                     .byp_alu_rs3_data_e(exu_lsu_rs3_data_e[63:0]),
+                     .so                (scan0_2),
+                     .si                (scan0_1),
+                     .ifu_lsu_casa_e (ecl_alu_casa_e),
+                     /*AUTOINST*/
+                     // Outputs
+                     .alu_byp_rd_data_e (alu_byp_rd_data_e[63:0]),
+                     .exu_ifu_brpc_e    (exu_ifu_brpc_e[47:0]),
+                     .exu_lsu_ldst_va_e (exu_lsu_ldst_va_e[47:0]),
+                     .exu_lsu_early_va_e(exu_lsu_early_va_e[10:3]),
+                     .exu_mmu_early_va_e(exu_mmu_early_va_e[7:0]),
+                     .alu_ecl_add_n64_e (alu_ecl_add_n64_e),
+                     .alu_ecl_add_n32_e (alu_ecl_add_n32_e),
+                     .alu_ecl_log_n64_e (alu_ecl_log_n64_e),
+                     .alu_ecl_log_n32_e (alu_ecl_log_n32_e),
+                     .alu_ecl_zhigh_e   (alu_ecl_zhigh_e),
+                     .alu_ecl_zlow_e    (alu_ecl_zlow_e),
+                     .exu_ifu_regz_e    (exu_ifu_regz_e),
+                     .exu_ifu_regn_e    (exu_ifu_regn_e),
+                     .alu_ecl_adderin2_63_e(alu_ecl_adderin2_63_e),
+                     .alu_ecl_adderin2_31_e(alu_ecl_adderin2_31_e),
+                     .alu_ecl_adder_out_63_e(alu_ecl_adder_out_63_e),
+                     .alu_ecl_cout32_e  (alu_ecl_cout32_e),
+                     .alu_ecl_cout64_e_l(alu_ecl_cout64_e_l),
+                     .alu_ecl_mem_addr_invalid_e_l(alu_ecl_mem_addr_invalid_e_l),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .byp_alu_rs1_data_e(byp_alu_rs1_data_e[63:0]),
+                     .byp_alu_rs2_data_e_l(byp_alu_rs2_data_e_l[63:0]),
+                     .byp_alu_rcc_data_e(byp_alu_rcc_data_e[63:0]),
+                     .ecl_alu_cin_e     (ecl_alu_cin_e),
+                     .ifu_exu_invert_d  (ifu_exu_invert_d),
+                     .ecl_alu_log_sel_and_e(ecl_alu_log_sel_and_e),
+                     .ecl_alu_log_sel_or_e(ecl_alu_log_sel_or_e),
+                     .ecl_alu_log_sel_xor_e(ecl_alu_log_sel_xor_e),
+                     .ecl_alu_log_sel_move_e(ecl_alu_log_sel_move_e),
+                     .ecl_alu_out_sel_sum_e_l(ecl_alu_out_sel_sum_e_l),
+                     .ecl_alu_out_sel_rs3_e_l(ecl_alu_out_sel_rs3_e_l),
+                     .ecl_alu_out_sel_shift_e_l(ecl_alu_out_sel_shift_e_l),
+                     .ecl_alu_out_sel_logic_e_l(ecl_alu_out_sel_logic_e_l),
+                     .shft_alu_shift_out_e(shft_alu_shift_out_e[63:0]),
+                     .ecl_alu_sethi_inst_e(ecl_alu_sethi_inst_e));
+   sparc_exu_shft shft(/*AUTOINST*/
+                       // Outputs
+                       .shft_alu_shift_out_e(shft_alu_shift_out_e[63:0]),
+                       // Inputs
+                       .ecl_shft_lshift_e_l(ecl_shft_lshift_e_l),
+                       .ecl_shft_op32_e (ecl_shft_op32_e),
+                       .ecl_shft_shift4_e(ecl_shft_shift4_e[3:0]),
+                       .ecl_shft_shift1_e(ecl_shft_shift1_e[3:0]),
+                       .byp_alu_rs1_data_e(byp_alu_rs1_data_e[63:0]),
+                       .byp_alu_rs2_data_e(byp_alu_rs2_data_e[5:4]),
+                       .ecl_shft_enshift_e_l(ecl_shft_enshift_e_l),
+                       .ecl_shft_extendbit_e(ecl_shft_extendbit_e),
+                       .ecl_shft_extend32bit_e_l(ecl_shft_extend32bit_e_l));
+
+   sparc_exu_div div(
+                     .so                (scan0_3),
+                     .si                (scan0_2),
+                     .byp_div_rs1_data_e(byp_alu_rs1_data_e[63:0]),
+                     .byp_div_rs2_data_e(byp_alu_rs2_data_e[63:0]),
+                     .byp_div_yreg_data_w(byp_irf_rd_data_w[31:0]),
+                     /*AUTOINST*/
+                     // Outputs
+                     .div_ecl_xin_msb_l (div_ecl_xin_msb_l),
+                     .div_ecl_x_msb     (div_ecl_x_msb),
+                     .div_ecl_d_msb     (div_ecl_d_msb),
+                     .div_ecl_cout64    (div_ecl_cout64),
+                     .div_ecl_cout32    (div_ecl_cout32),
+                     .div_ecl_gencc_in_msb_l(div_ecl_gencc_in_msb_l),
+                     .div_ecl_gencc_in_31(div_ecl_gencc_in_31),
+                     .div_ecl_upper32_equal(div_ecl_upper32_equal),
+                     .div_ecl_low32_nonzero(div_ecl_low32_nonzero),
+                     .div_ecl_dividend_msb(div_ecl_dividend_msb),
+                     .div_byp_muldivout_g(div_byp_muldivout_g[63:0]),
+                     .div_byp_yreg_e    (div_byp_yreg_e[31:0]),
+                     .div_ecl_yreg_0_l  (div_ecl_yreg_0_l[3:0]),
+                     .exu_mul_rs1_data  (exu_mul_rs1_data[63:0]),
+                     .exu_mul_rs2_data  (exu_mul_rs2_data[63:0]),
+                     .div_ecl_adder_out_31(div_ecl_adder_out_31),
+                     .div_ecl_detect_zero_low(div_ecl_detect_zero_low),
+                     .div_ecl_detect_zero_high(div_ecl_detect_zero_high),
+                     .div_ecl_d_62      (div_ecl_d_62),
+                     // Inputs
+                     .ecl_div_thr_e     (ecl_div_thr_e[3:0]),
+                     .ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g),
+                     .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0]),
+                     .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]),
+                     .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]),
+                     .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]),
+                     .rclk              (rclk),
+                     .se                (se),
+                     .ecl_div_keep_d    (ecl_div_keep_d),
+                     .ecl_div_ld_inputs (ecl_div_ld_inputs),
+                     .ecl_div_sel_adder (ecl_div_sel_adder),
+                     .ecl_div_last_cycle(ecl_div_last_cycle),
+                     .ecl_div_almostlast_cycle(ecl_div_almostlast_cycle),
+                     .ecl_div_div64     (ecl_div_div64),
+                     .ecl_div_sel_u32   (ecl_div_sel_u32),
+                     .ecl_div_sel_pos32 (ecl_div_sel_pos32),
+                     .ecl_div_sel_neg32 (ecl_div_sel_neg32),
+                     .ecl_div_sel_64b   (ecl_div_sel_64b),
+                     .ecl_div_upper32_zero(ecl_div_upper32_zero),
+                     .ecl_div_upper33_one(ecl_div_upper33_one),
+                     .ecl_div_upper33_zero(ecl_div_upper33_zero),
+                     .mul_exu_data_g    (mul_exu_data_g[63:0]),
+                     .ecl_div_sel_div   (ecl_div_sel_div),
+                     .ecl_div_mul_wen   (ecl_div_mul_wen),
+                     .ecl_div_dividend_sign(ecl_div_dividend_sign),
+                     .ecl_div_subtract_l(ecl_div_subtract_l),
+                     .ecl_div_cin       (ecl_div_cin),
+                     .ecl_div_newq      (ecl_div_newq),
+                     .ecl_div_xinmask   (ecl_div_xinmask),
+                     .ecl_div_keepx     (ecl_div_keepx),
+                     .ecl_div_mul_get_new_data(ecl_div_mul_get_new_data),
+                     .ecl_div_mul_keep_data(ecl_div_mul_keep_data),
+                     .ecl_div_mul_get_32bit_data(ecl_div_mul_get_32bit_data),
+                     .ecl_div_mul_sext_rs2_e(ecl_div_mul_sext_rs2_e),
+                     .ecl_div_mul_sext_rs1_e(ecl_div_mul_sext_rs1_e),
+                     .ecl_div_muls_rs1_31_e_l(ecl_div_muls_rs1_31_e_l),
+                     .ecl_div_muls      (ecl_div_muls),
+                     .ecl_div_zero_rs2_e(ecl_div_zero_rs2_e));
+
+   sparc_exu_rml rml(
+                     .current_cwp(current_cwp),
+                     .so                (so0),
+                     .si                (scan0_3),
+                  .rst_tri_en        (mux_drive_disable),
+                     .exu_tlu_wsr_data_w(byp_irf_rd_data_w[5:0]),
+                     .rml_irf_old_e_cwp_e(rml_irf_old_e_cwp_e[1:0]),
+                     .rml_irf_new_e_cwp_e(rml_irf_new_e_cwp_e[1:0]),
+                     /*AUTOINST*/
+                     // Outputs
+                     .exu_tlu_cwp0_w    (exu_tlu_cwp0_w[2:0]),
+                     .exu_tlu_cwp1_w    (exu_tlu_cwp1_w[2:0]),
+                     .exu_tlu_cwp2_w    (exu_tlu_cwp2_w[2:0]),
+                     .exu_tlu_cwp3_w    (exu_tlu_cwp3_w[2:0]),
+                     .exu_tlu_cwp_retry (exu_tlu_cwp_retry),
+                     .exu_tlu_spill_other(exu_tlu_spill_other),
+                     .exu_tlu_spill_wtype(exu_tlu_spill_wtype[2:0]),
+                     .exu_tlu_cwp_cmplt (exu_tlu_cwp_cmplt),
+                     .exu_tlu_cwp_cmplt_tid(exu_tlu_cwp_cmplt_tid[1:0]),
+                     .rml_ecl_cwp_d     (rml_ecl_cwp_d[2:0]),
+                     .rml_ecl_cansave_d (rml_ecl_cansave_d[2:0]),
+                     .rml_ecl_canrestore_d(rml_ecl_canrestore_d[2:0]),
+                     .rml_ecl_otherwin_d(rml_ecl_otherwin_d[2:0]),
+                     .rml_ecl_wstate_d  (rml_ecl_wstate_d[5:0]),
+                     .rml_ecl_cleanwin_d(rml_ecl_cleanwin_d[2:0]),
+                     .rml_ecl_fill_e    (rml_ecl_fill_e),
+                     .rml_ecl_clean_window_e(rml_ecl_clean_window_e),
+                     .rml_ecl_other_e   (rml_ecl_other_e),
+                     .rml_ecl_wtype_e   (rml_ecl_wtype_e[2:0]),
+                     .exu_ifu_spill_e   (exu_ifu_spill_e),
+                     .rml_ecl_gl_e      (rml_ecl_gl_e[1:0]),
+                     .rml_irf_old_lo_cwp_e(rml_irf_old_lo_cwp_e[2:0]),
+                     .rml_irf_new_lo_cwp_e(rml_irf_new_lo_cwp_e[2:0]),
+                     .rml_irf_swap_even_e(rml_irf_swap_even_e),
+                     .rml_irf_swap_odd_e(rml_irf_swap_odd_e),
+                     .rml_irf_swap_local_e(rml_irf_swap_local_e),
+                     .rml_irf_kill_restore_w(rml_irf_kill_restore_w),
+                     .rml_irf_cwpswap_tid_e(rml_irf_cwpswap_tid_e[1:0]),
+                     .rml_ecl_swap_done (rml_ecl_swap_done[3:0]),
+                     .rml_ecl_rmlop_done_e(rml_ecl_rmlop_done_e),
+                     .exu_ifu_oddwin_s  (exu_ifu_oddwin_s[3:0]),
+                     .exu_tlu_spill     (exu_tlu_spill),
+                     .exu_tlu_spill_tid (exu_tlu_spill_tid[1:0]),
+                     .rml_ecl_kill_m    (rml_ecl_kill_m),
+                     .rml_irf_old_agp   (rml_irf_old_agp[1:0]),
+                     .rml_irf_new_agp   (rml_irf_new_agp[1:0]),
+                     .rml_irf_swap_global(rml_irf_swap_global),
+                     .rml_irf_global_tid(rml_irf_global_tid[1:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .grst_l            (grst_l),
+                     .arst_l            (arst_l),
+                     .ifu_exu_tid_s2    (ifu_exu_tid_s2[1:0]),
+                     .ifu_exu_save_d    (ifu_exu_save_d),
+                     .ifu_exu_restore_d (ifu_exu_restore_d),
+                     .ifu_exu_saved_e   (ifu_exu_saved_e),
+                     .ifu_exu_restored_e(ifu_exu_restored_e),
+                     .ifu_exu_flushw_e  (ifu_exu_flushw_e),
+                     .ecl_rml_thr_m     (ecl_rml_thr_m[3:0]),
+                     .ecl_rml_thr_w     (ecl_rml_thr_w[3:0]),
+                     .ecl_rml_cwp_wen_e (ecl_rml_cwp_wen_e),
+                     .ecl_rml_cansave_wen_w(ecl_rml_cansave_wen_w),
+                     .ecl_rml_canrestore_wen_w(ecl_rml_canrestore_wen_w),
+                     .ecl_rml_otherwin_wen_w(ecl_rml_otherwin_wen_w),
+                     .ecl_rml_wstate_wen_w(ecl_rml_wstate_wen_w),
+                     .ecl_rml_cleanwin_wen_w(ecl_rml_cleanwin_wen_w),
+                     .ecl_rml_xor_data_e(ecl_rml_xor_data_e[2:0]),
+                     .ecl_rml_kill_e    (ecl_rml_kill_e),
+                     .ecl_rml_kill_w    (ecl_rml_kill_w),
+                     .ecl_rml_early_flush_w(ecl_rml_early_flush_w),
+                     .tlu_exu_agp       (tlu_exu_agp[1:0]),
+                     .tlu_exu_agp_swap  (tlu_exu_agp_swap),
+                     .tlu_exu_agp_tid   (tlu_exu_agp_tid[1:0]),
+                     .tlu_exu_cwp_m     (tlu_exu_cwp_m[2:0]),
+                     .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                     .ecl_rml_inst_vld_w(ecl_rml_inst_vld_w),
+                     .tlu_exu_cwp_retry_m(tlu_exu_cwp_retry_m));
+endmodule // sparc_exu
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl")
+// End:
Index: /trunk/T1-CPU/exu/sparc_exu_alu.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_alu.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_alu.v	(revision 6)
@@ -0,0 +1,187 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_alu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_alu
+*/
+
+module sparc_exu_alu
+(
+ /*AUTOARG*/
+   // Outputs
+   so, alu_byp_rd_data_e, exu_ifu_brpc_e, exu_lsu_ldst_va_e, 
+   exu_lsu_early_va_e, exu_mmu_early_va_e, alu_ecl_add_n64_e, 
+   alu_ecl_add_n32_e, alu_ecl_log_n64_e, alu_ecl_log_n32_e, 
+   alu_ecl_zhigh_e, alu_ecl_zlow_e, exu_ifu_regz_e, exu_ifu_regn_e, 
+   alu_ecl_adderin2_63_e, alu_ecl_adderin2_31_e, 
+   alu_ecl_adder_out_63_e, alu_ecl_cout32_e, alu_ecl_cout64_e_l, 
+   alu_ecl_mem_addr_invalid_e_l, 
+   // Inputs
+   rclk, se, si, byp_alu_rs1_data_e, byp_alu_rs2_data_e_l, 
+   byp_alu_rs3_data_e, byp_alu_rcc_data_e, ecl_alu_cin_e, 
+   ifu_exu_invert_d, ecl_alu_log_sel_and_e, ecl_alu_log_sel_or_e, 
+   ecl_alu_log_sel_xor_e, ecl_alu_log_sel_move_e, 
+   ecl_alu_out_sel_sum_e_l, ecl_alu_out_sel_rs3_e_l, 
+   ecl_alu_out_sel_shift_e_l, ecl_alu_out_sel_logic_e_l, 
+   shft_alu_shift_out_e, ecl_alu_sethi_inst_e, ifu_lsu_casa_e
+   );
+   input rclk;
+   input se;
+   input si;
+   input [63:0] byp_alu_rs1_data_e;   // source operand 1
+   input [63:0] byp_alu_rs2_data_e_l;  // source operand 2
+   input [63:0] byp_alu_rs3_data_e;  // source operand 3
+   input [63:0] byp_alu_rcc_data_e;  // source operand for reg condition codes
+   input        ecl_alu_cin_e;            // cin for adder
+   input        ifu_exu_invert_d;
+   input  ecl_alu_log_sel_and_e;// These 4 wires are select lines for the logic
+   input  ecl_alu_log_sel_or_e;// block mux.  They are active high and choose the
+   input  ecl_alu_log_sel_xor_e;// output they describe.
+   input  ecl_alu_log_sel_move_e;
+   input  ecl_alu_out_sel_sum_e_l;// The following 4 are select lines for 
+   input  ecl_alu_out_sel_rs3_e_l;// the output stage mux.  They are active high
+   input  ecl_alu_out_sel_shift_e_l;// and choose the output of the respective block.
+   input  ecl_alu_out_sel_logic_e_l;
+   input [63:0] shft_alu_shift_out_e;// result from shifter
+   input        ecl_alu_sethi_inst_e;
+   input        ifu_lsu_casa_e;
+   
+   output       so;
+   output [63:0] alu_byp_rd_data_e;          // alu result
+   output [47:0] exu_ifu_brpc_e;// branch pc output
+   output [47:0] exu_lsu_ldst_va_e; // address for lsu
+   output [10:3] exu_lsu_early_va_e; // faster bits for cache
+   output [7:0]  exu_mmu_early_va_e;
+   output        alu_ecl_add_n64_e;
+   output        alu_ecl_add_n32_e;
+   output        alu_ecl_log_n64_e;
+   output        alu_ecl_log_n32_e;
+   output        alu_ecl_zhigh_e;
+   output        alu_ecl_zlow_e;
+   output    exu_ifu_regz_e;              // rs1_data == 0 
+   output    exu_ifu_regn_e;
+   output    alu_ecl_adderin2_63_e;
+   output    alu_ecl_adderin2_31_e;
+   output    alu_ecl_adder_out_63_e;
+   output    alu_ecl_cout32_e;       // To ecl of sparc_exu_ecl.v
+   output    alu_ecl_cout64_e_l;       // To ecl of sparc_exu_ecl.v
+   output    alu_ecl_mem_addr_invalid_e_l;// adder_out[63:48] not all 1 or all 0
+                                
+   wire         clk;
+   wire [63:0] logic_out;       // result of logic block
+   wire [63:0] adder_out;       // result of adder
+   wire [63:0] spr_out;         // result of sum predict
+   wire [63:0] zcomp_in;        // result going to zcompare
+   wire [63:0] va_e;            // complete va
+   wire [63:0] byp_alu_rs2_data_e;
+   wire        invert_e;
+   wire        ecl_alu_out_sel_sum_e;
+   wire        ecl_alu_out_sel_rs3_e;
+   wire        ecl_alu_out_sel_shift_e;
+   wire        ecl_alu_out_sel_logic_e;
+   assign      clk = rclk;
+   assign      byp_alu_rs2_data_e[63:0] = ~byp_alu_rs2_data_e_l[63:0];
+   assign      ecl_alu_out_sel_sum_e = ~ecl_alu_out_sel_sum_e_l;
+   assign      ecl_alu_out_sel_rs3_e = ~ecl_alu_out_sel_rs3_e_l;
+   assign      ecl_alu_out_sel_shift_e = ~ecl_alu_out_sel_shift_e_l;
+   assign      ecl_alu_out_sel_logic_e = ~ecl_alu_out_sel_logic_e_l;
+
+   // Zero comparison for exu_ifu_regz_e
+   sparc_exu_aluzcmp64 regzcmp(.in(byp_alu_rcc_data_e[63:0]), .zero64(exu_ifu_regz_e));
+   assign     exu_ifu_regn_e = byp_alu_rcc_data_e[63];
+
+   // mux between adder output and rs1 (for casa) for lsu va
+   dp_mux2es #(64)  lsu_va_mux(.dout(va_e[63:0]),
+                               .in0(adder_out[63:0]),
+                               .in1(byp_alu_rs1_data_e[63:0]),
+                               .sel(ifu_lsu_casa_e));
+   assign     exu_lsu_ldst_va_e[47:0] = va_e[47:0];
+   // for bits 10:4 we have a separate bus that is not used for cas
+   assign     exu_lsu_early_va_e[10:3] = adder_out[10:3];
+   // mmu needs bits 7:0
+   assign     exu_mmu_early_va_e[7:0] = adder_out[7:0];
+   
+   
+   // Adder
+   assign     exu_ifu_brpc_e[47:0] = adder_out[47:0];
+   assign     alu_ecl_adder_out_63_e = adder_out[63];
+   sparc_exu_aluaddsub addsub(.adder_out(adder_out[63:0]),
+                              /*AUTOINST*/
+                              // Outputs
+                              .spr_out  (spr_out[63:0]),
+                              .alu_ecl_cout64_e_l(alu_ecl_cout64_e_l),
+                              .alu_ecl_cout32_e(alu_ecl_cout32_e),
+                              .alu_ecl_adderin2_63_e(alu_ecl_adderin2_63_e),
+                              .alu_ecl_adderin2_31_e(alu_ecl_adderin2_31_e),
+                              // Inputs
+                              .clk      (clk),
+                              .se       (se),
+                              .byp_alu_rs1_data_e(byp_alu_rs1_data_e[63:0]),
+                              .byp_alu_rs2_data_e(byp_alu_rs2_data_e[63:0]),
+                              .ecl_alu_cin_e(ecl_alu_cin_e),
+                              .ifu_exu_invert_d(ifu_exu_invert_d));
+
+   // Logic/pass rs2_data
+   dff_s invert_d2e(.din(ifu_exu_invert_d), .clk(clk), .q(invert_e), .se(se), .si(), .so());
+   sparc_exu_alulogic logic(.rs1_data(byp_alu_rs1_data_e[63:0]),
+                            .rs2_data(byp_alu_rs2_data_e[63:0]),
+                            .isand(ecl_alu_log_sel_and_e),
+                            .isor(ecl_alu_log_sel_or_e),
+                            .isxor(ecl_alu_log_sel_xor_e),
+                            .pass_rs2_data(ecl_alu_log_sel_move_e),
+                            .inv_logic(invert_e), .logic_out(logic_out[63:0]),
+                            .ifu_exu_sethi_inst_e(ecl_alu_sethi_inst_e));
+
+   // Mux between sum predict and logic outputs for zcc
+   dp_mux2es #(64)  zcompmux(.dout(zcomp_in[63:0]),
+                           .in0(logic_out[63:0]),
+                           .in1(spr_out[63:0]),
+                           .sel(ecl_alu_out_sel_sum_e));
+
+   // Zero comparison for zero cc
+//   sparc_exu_aluzcmp64 zcccmp(.in(zcomp_in[63:0]), .zero64(alu_ecl_z64_e),
+//                          .zero32(alu_ecl_z32_e));
+   assign        alu_ecl_zlow_e = ~(|zcomp_in[31:0]);
+   assign        alu_ecl_zhigh_e = ~(|zcomp_in[63:32]);
+
+   // Get Negative ccs
+   assign   alu_ecl_add_n64_e = adder_out[63];
+   assign   alu_ecl_add_n32_e = adder_out[31];
+   assign   alu_ecl_log_n64_e = logic_out[63];
+   assign   alu_ecl_log_n32_e = logic_out[31];
+
+   
+   // Mux for output
+   mux4ds #(64) output_mux(.dout(alu_byp_rd_data_e[63:0]), 
+                         .in0(adder_out[63:0]),
+                         .in1(byp_alu_rs3_data_e[63:0]),
+                         .in2(shft_alu_shift_out_e[63:0]),
+                         .in3(logic_out[63:0]), 
+                         .sel0(ecl_alu_out_sel_sum_e),
+                         .sel1(ecl_alu_out_sel_rs3_e),
+                         .sel2(ecl_alu_out_sel_shift_e),
+                         .sel3(ecl_alu_out_sel_logic_e));
+
+   // memory address checks
+   sparc_exu_alu_16eql chk_mem_addr(.equal(alu_ecl_mem_addr_invalid_e_l),
+                                    .in(va_e[63:47]));
+   
+endmodule  // sparc_exu_alu
Index: /trunk/T1-CPU/exu/sparc_exu_aluzcmp64.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_aluzcmp64.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_aluzcmp64.v	(revision 6)
@@ -0,0 +1,54 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_aluzcmp64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//
+//  Module Name: sparc_exu_aluzcmp64
+//	Description: This block determines if the input 'source' is zero.
+//		It provides to outputs.  zero64 is high if all 64 bits
+//		are zero, while zero32 is high if the lower 32 bits are
+//		zero.  It uses 2 32 bit or gates and then NORs them.
+*/
+
+module sparc_exu_aluzcmp64
+  (
+   in,
+   zero64,
+   zero32
+   );
+
+   input [63:0] in;         // input operand
+
+   output zero64;               // true if input is zero
+   output zero32;               // true if lower 32 bits are zero
+
+   wire   low_nonzero;					// low 32 is nonzero
+   wire   high_nonzero;         // high 32 is nonzero
+
+   // evaluate each half of the input
+   sparc_exu_aluor32 lowcmp(.in(in[31:0]), .out(low_nonzero));
+   sparc_exu_aluor32 highcmp(.in(in[63:32]), .out(high_nonzero));
+
+   assign zero32 = ~low_nonzero;
+   assign zero64 = ~(low_nonzero | high_nonzero);
+
+endmodule // sparc_exu_aluzcmp64
+   
Index: /trunk/T1-CPU/exu/sparc_exu_rndrob.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_rndrob.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_rndrob.v	(revision 6)
@@ -0,0 +1,84 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_rndrob.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_rndrob
+//  Description:	
+//  Round robin scheduler.  Least priority to the last granted
+//  customer.  If no requests, the priority remains the same. 
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_exu_rndrob(/*AUTOARG*/
+   // Outputs
+   grant_vec, 
+   // Inputs
+   clk, reset, se, req_vec, advance
+   );
+
+   input     clk, reset, se;
+
+   input  [3:0] req_vec;
+   input 	advance;
+   
+   output [3:0] grant_vec;
+   
+   wire [3:0] 	pv,
+		next_pv,
+		park_vec;
+   
+   
+   assign  next_pv =  advance ? grant_vec : park_vec;
+   
+   dffr_s #4  park_reg(.din  (next_pv[3:0]),
+		    .clk  (clk),
+		    .q    (pv[3:0]),
+		    .rst  (reset),
+		    .se   (se), .si(), .so());
+
+   assign  park_vec = pv;
+
+   // if noone requests, don't advance, otherwise we'll go back to 0
+   // and will not be fair to other requestors
+   assign grant_vec[0] =  park_vec[3] & req_vec[0] |
+		  park_vec[2] & ~req_vec[3] & req_vec[0] |
+		  park_vec[1] & ~req_vec[2] & ~req_vec[3] & req_vec[0] |
+	          ~req_vec[1] & ~req_vec[2] & ~req_vec[3];
+   
+   assign grant_vec[1] = park_vec[0] & req_vec[1] |
+		  park_vec[3] & ~req_vec[0] & req_vec[1] |
+		  park_vec[2] & ~req_vec[3] & ~req_vec[0] & req_vec[1] |
+	          req_vec[1] & ~req_vec[2] & ~req_vec[3] & ~req_vec[0];
+
+   assign grant_vec[2] = park_vec[1] & req_vec[2] |
+		  park_vec[0] & ~req_vec[1] & req_vec[2] |
+		  park_vec[3] & ~req_vec[0] & ~req_vec[1] & req_vec[2] |
+		  req_vec[2] & ~req_vec[3] & ~req_vec[0] & ~req_vec[1];
+
+   assign grant_vec[3] = park_vec[2] & req_vec[3] |
+		  park_vec[1] & ~req_vec[2] & req_vec[3] |
+		  park_vec[0] & ~req_vec[1] & ~req_vec[2] & req_vec[3] |
+		  req_vec[3] & ~req_vec[0] & ~req_vec[1] & ~req_vec[2];
+
+endmodule // sparc_exu_rndrob
+
+   
+   
Index: /trunk/T1-CPU/exu/sparc_exu_ecl_divcntl.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl_divcntl.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl_divcntl.v	(revision 6)
@@ -0,0 +1,405 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl_divcntl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_divcntl
+//	Description: Control block for div.  Division takes 1 cycle to load
+//		the values, 65 cycles to calculate the result, and 1 cycle to 
+//    calculate the ccs and check for overflow.
+//   	Controlled by a one hot state machine and a 6 bit counter.
+*/
+
+`define IDLE 0
+`define RUN 1
+`define LAST_CALC 2
+`define CHK_OVFL 3
+`define FIX_OVFL 4
+`define DONE 5
+
+module sparc_exu_ecl_divcntl (/*AUTOARG*/
+   // Outputs
+   ecl_div_xinmask, ecl_div_keep_d, ecl_div_ld_inputs, 
+   ecl_div_sel_adder, ecl_div_last_cycle, ecl_div_almostlast_cycle, 
+   ecl_div_sel_div, divcntl_wb_req_g, divcntl_ccr_cc_w2, 
+   ecl_div_sel_64b, ecl_div_sel_u32, ecl_div_sel_pos32, 
+   ecl_div_sel_neg32, ecl_div_upper32_zero, ecl_div_upper33_one, 
+   ecl_div_upper33_zero, ecl_div_dividend_sign, ecl_div_newq, 
+   ecl_div_subtract_l, ecl_div_keepx, ecl_div_cin, 
+   // Inputs
+   clk, se, reset, mdqctl_divcntl_input_vld, wb_divcntl_ack_g, 
+   mdqctl_divcntl_reset_div, div_ecl_gencc_in_msb_l, 
+   div_ecl_gencc_in_31, div_ecl_upper32_equal, div_ecl_low32_nonzero, 
+   ecl_div_signed_div, div_ecl_dividend_msb, div_ecl_xin_msb_l, 
+   div_ecl_x_msb, div_ecl_d_msb, div_ecl_cout64, 
+   div_ecl_divisorin_31, ecl_div_div64, mdqctl_divcntl_muldone, 
+   ecl_div_muls, div_ecl_adder_out_31, muls_rs1_31_m_l, 
+   div_ecl_cout32, rs2_data_31_m, div_ecl_detect_zero_high, 
+   div_ecl_detect_zero_low, div_ecl_d_62
+   ) ;
+   input     clk;
+   input     se;
+   input     reset;
+   input     mdqctl_divcntl_input_vld;
+   input     wb_divcntl_ack_g;
+   input     mdqctl_divcntl_reset_div;
+   input     div_ecl_gencc_in_msb_l;
+   input     div_ecl_gencc_in_31;
+   input     div_ecl_upper32_equal;
+   input     div_ecl_low32_nonzero;
+   input     ecl_div_signed_div;
+   input     div_ecl_dividend_msb;
+   input     div_ecl_xin_msb_l;
+   input     div_ecl_x_msb;
+   input     div_ecl_d_msb;
+   input     div_ecl_cout64;
+   input     div_ecl_divisorin_31;
+   input     ecl_div_div64;
+   input     mdqctl_divcntl_muldone;
+   input     ecl_div_muls;
+   input  div_ecl_adder_out_31;
+   input  muls_rs1_31_m_l;
+   input  div_ecl_cout32;
+   input  rs2_data_31_m;
+   input         div_ecl_detect_zero_high;
+   input         div_ecl_detect_zero_low;
+   input         div_ecl_d_62;
+   
+   output    ecl_div_xinmask;
+   output    ecl_div_keep_d;
+   output    ecl_div_ld_inputs;
+   output    ecl_div_sel_adder;
+   output    ecl_div_last_cycle;   // last cycle of calculation
+   output    ecl_div_almostlast_cycle;//
+   output    ecl_div_sel_div;
+   output    divcntl_wb_req_g;
+   output [7:0] divcntl_ccr_cc_w2;
+   output       ecl_div_sel_64b;
+   output       ecl_div_sel_u32;
+   output       ecl_div_sel_pos32;
+   output       ecl_div_sel_neg32;
+   output       ecl_div_upper32_zero;
+   output       ecl_div_upper33_one;
+   output       ecl_div_upper33_zero;
+   output       ecl_div_dividend_sign;
+   output       ecl_div_newq;
+   output       ecl_div_subtract_l;
+   output       ecl_div_keepx;
+   output        ecl_div_cin;
+   
+   wire         firstq;
+   wire         q_next;        // next q bit
+   wire         adderin1_64;   // msbs for adder
+   wire         adderin2_64;
+   wire         firstlast_sub; // subtract for first and last cycle
+   wire         sub_next;      // next cycle will subtract
+   wire         subtract;
+   wire         bit64_halfadd; // partial result for qpredict
+   wire         partial_qpredict;
+   wire [1:0]   q_next_nocout;
+   wire [1:0]   sub_next_nocout;
+   wire         partial_qpredict_l;
+   wire          divisor_sign;
+   wire          detect_zero;
+   wire          new_zero_rem_with_zero;
+   wire          new_zero_rem_no_zero;
+   wire          zero_rem_d;
+   wire          zero_rem_q;
+   wire          last_cin_with_zero;
+   wire          last_cin_no_zero;
+   wire          last_cin;
+   wire          last_cin_next;
+   
+   // overflow correction wires
+   wire          upper32_equal_d1;
+   wire          gencc_in_msb_l_d1;
+   wire          gencc_in_31_d1;
+   wire          sel_div_d1;
+   wire          low32_nonzero_d1;
+   
+   // Condition code generation wires
+   wire [3:0]   xcc;
+   wire [3:0]   icc;
+   wire         unsign_ovfl;
+   wire         pos_ovfl;
+   wire         neg_ovfl;
+   wire         muls_c;
+   wire         next_muls_c;
+   wire         muls_v;
+   wire         next_muls_v;
+   wire         muls_rs1_data_31_m;
+   wire         div_adder_out_31_w;
+   wire         rs2_data_31_w;
+   wire         muls_rs1_data_31_w;
+   wire         ovfl_32;
+   wire         div_v;
+   
+   wire [5:0]   div_state;
+   wire [5:0]   next_state;
+   wire         go_idle,
+                stay_idle,
+                go_run,
+                stay_run,
+                go_last_calc,
+                go_chk_ovfl,
+                go_fix_ovfl,
+                go_done,
+                stay_done;
+
+
+   wire         reset_cnt;
+   wire [5:0]   cntr;
+   wire         cntris63;
+
+   /////////////////////////////////
+   // G arbitration between MUL/DIV
+   /////////////////////////////////
+   assign        divcntl_wb_req_g = div_state[`DONE] | 
+                      (~(div_state[`DONE] | div_state[`CHK_OVFL] | div_state[`FIX_OVFL]) &mdqctl_divcntl_muldone);
+   assign        ecl_div_sel_div = ~(~(div_state[`DONE] | div_state[`CHK_OVFL] | div_state[`FIX_OVFL]) & 
+                                    mdqctl_divcntl_muldone);
+   
+   // state flop
+   dff_s #(6) divstate_dff(.din(next_state[5:0]), .clk(clk), .q(div_state[5:0]), .se(se), .si(),
+                    .so());
+
+   // output logic and state decode
+   assign        ecl_div_almostlast_cycle = go_last_calc & ~ecl_div_ld_inputs;
+   assign        ecl_div_sel_adder = (div_state[`RUN] | div_state[`LAST_CALC]) & ~ecl_div_ld_inputs;
+   assign        ecl_div_last_cycle = div_state[`LAST_CALC];
+   assign        ecl_div_ld_inputs = mdqctl_divcntl_input_vld;
+   assign        ecl_div_keep_d = ~(ecl_div_sel_adder | ecl_div_ld_inputs);
+   assign        reset_cnt = ~div_state[`RUN];
+   
+   // next state logic
+   assign        stay_idle = div_state[`IDLE] & ~mdqctl_divcntl_input_vld;
+   assign        go_idle = div_state[`DONE] & wb_divcntl_ack_g;
+   assign        next_state[`IDLE] = go_idle | stay_idle | mdqctl_divcntl_reset_div | reset;
+
+   assign        stay_run = div_state[`RUN] & ~cntris63 & ~ecl_div_muls;
+   assign        go_run = (div_state[`IDLE] & mdqctl_divcntl_input_vld);
+   assign        next_state[`RUN] = (go_run | stay_run) & 
+                                      ~mdqctl_divcntl_reset_div & ~reset;
+
+   assign        go_last_calc = div_state[`RUN] & (cntris63);
+   assign        next_state[`LAST_CALC] = go_last_calc & ~mdqctl_divcntl_reset_div & ~reset;
+
+   // chk_ovfl and fix_ovfl are place holders to guarantee that the overflow checking
+   // takes place on the result.  No special logic occurs in them compared to the done state.
+   assign        go_chk_ovfl = div_state[`LAST_CALC];
+   assign        next_state[`CHK_OVFL] = go_chk_ovfl & ~mdqctl_divcntl_reset_div & ~reset;
+
+   assign        go_fix_ovfl = div_state[`CHK_OVFL] | (div_state[`RUN] & ecl_div_muls);
+   assign        next_state[`FIX_OVFL] = go_fix_ovfl & ~mdqctl_divcntl_reset_div & ~reset;
+
+   assign        go_done = div_state[`FIX_OVFL];
+   assign        stay_done = div_state[`DONE] & ~wb_divcntl_ack_g;
+   assign        next_state[`DONE] = (go_done | stay_done) & ~mdqctl_divcntl_reset_div & ~reset;
+   
+   // counter
+   sparc_exu_ecl_cnt6 cnt6(.reset       (reset_cnt),
+                           /*AUTOINST*/
+                           // Outputs
+                           .cntr        (cntr[5:0]),
+                           // Inputs
+                           .clk         (clk),
+                           .se          (se));
+
+   assign        cntris63 = cntr[5] & cntr[4] & cntr[3] & cntr[2] & cntr[1] & cntr[0];
+
+
+   ///////////////////////////////
+   // Random logic for divider
+   ///////////////////////////////
+   // Generation of sign extension of dividend and divisor
+   assign        ecl_div_dividend_sign = ecl_div_signed_div & div_ecl_dividend_msb;
+   assign        ecl_div_xinmask = div_ecl_divisorin_31 & ecl_div_signed_div;
+
+   assign        divisor_sign = div_ecl_x_msb & ecl_div_signed_div;
+   
+   // Generation of next bit of quotient
+   ////////////////////////////////////////////////////////////////
+   //	Calculate the next q.  Requires calculating the result
+   // of the 65th bit of the adder and xoring it with the sign of
+   // the divisor.  The order of these xors is switched for critical
+   // path considerations.
+   ////////////////////////////////////////////////////////////////
+   assign        adderin1_64 = div_ecl_d_msb;
+   assign        adderin2_64 = (ecl_div_signed_div & div_ecl_x_msb) ^ subtract;
+   assign        bit64_halfadd = adderin1_64 ^ adderin2_64;
+   assign        partial_qpredict = bit64_halfadd ^ ~(div_ecl_x_msb & ecl_div_signed_div);
+   assign        partial_qpredict_l = ~partial_qpredict;
+   //assign        qpredict = partial_qpredict ^ div_ecl_cout64;
+   //assign        firstq = ~ecl_div_signed_div | div_ecl_xin_msb_l; 
+   assign        firstq = ecl_div_dividend_sign;
+
+   mux2ds #(2) qnext_mux(.dout(q_next_nocout[1:0]), 
+                            .in0({partial_qpredict, partial_qpredict_l}),
+                            .in1({2{firstq}}),
+                            .sel0(~ecl_div_ld_inputs),
+                            .sel1(ecl_div_ld_inputs));
+   dp_mux2es qnext_cout_mux(.dout(q_next),
+                            .in0(q_next_nocout[1]),
+                            .in1(q_next_nocout[0]),
+                            .sel(div_ecl_cout64));
+
+   dff_s q_dff(.din(q_next), .clk(clk), .q(ecl_div_newq), .se(se), .si(),
+               .so());
+
+
+   ////////////////////////////
+   // Subtraction logic and subtract flop
+   //-------------------------------------
+   // To take the subtraction calc out of the critical path,
+   // it is done in the previous cycle and part is done with a
+   // mux.  The result is put into a flop.
+   ////////////////////////////
+   assign firstlast_sub = ~ecl_div_almostlast_cycle & ~ecl_div_muls &
+          (~ecl_div_signed_div | ~(div_ecl_dividend_msb ^ ~div_ecl_xin_msb_l));
+                                                       
+   assign        ecl_div_keepx = ~(ecl_div_ld_inputs |
+                                  ecl_div_almostlast_cycle);
+
+   mux2ds #(2) subnext_mux(.dout(sub_next_nocout[1:0]), 
+                              .in0({2{firstlast_sub}}),
+                              .in1({partial_qpredict, partial_qpredict_l}),
+                              .sel0(~ecl_div_keepx),
+                              .sel1(ecl_div_keepx));
+   dp_mux2es subtract_cout_mux(.dout(sub_next),
+                            .in0(sub_next_nocout[1]),
+                            .in1(sub_next_nocout[0]),
+                            .sel(div_ecl_cout64));
+   
+   dff_s sub_dff(.din(sub_next), .clk(clk), .q(subtract), .se(se), .si(),
+               .so());
+
+   assign        ecl_div_subtract_l = ~subtract;
+
+
+   /////////////////////////////////////////////
+   // Carry in logic
+   //--------------------------------------------
+   // The carry is usually just subtract.  The
+   // quotient correction for signed division
+   // sometimes has to adjust it though.
+   /////////////////////////////////////////////
+   assign        detect_zero = div_ecl_detect_zero_low & div_ecl_detect_zero_high;
+
+   assign ecl_div_cin = (ecl_div_last_cycle)? last_cin: subtract;
+   // stores if the partial remainder was ever zero.
+/* -----\/----- EXCLUDED -----\/-----
+   // changed for timing
+    assign        zero_rem_d = ~ecl_div_ld_inputs & (div_ecl_detect_zero | zero_rem_q) & 
+                                                     (~div_ecl_d_62 | ecl_div_almostlast_cycle);
+ -----/\----- EXCLUDED -----/\----- */
+   assign new_zero_rem_with_zero = ~ecl_div_ld_inputs & (~div_ecl_d_62 | ecl_div_almostlast_cycle);
+   assign new_zero_rem_no_zero = zero_rem_q & new_zero_rem_with_zero;
+   assign zero_rem_d = (detect_zero)? new_zero_rem_with_zero: new_zero_rem_no_zero;
+   dff_s zero_rem_dff(.din(zero_rem_d), .clk(clk), .q(zero_rem_q),
+                    .se(se), .si(), .so());
+   
+/* -----\/----- EXCLUDED -----\/-----
+   // changed for timing
+   assign last_cin_next = ecl_div_signed_div & (divisor_sign & ~div_ecl_d_62 | 
+                                                ~divisor_sign &div_ecl_d_62&~zero_rem_d |
+                                                divisor_sign &div_ecl_d_62&zero_rem_d);
+ -----/\----- EXCLUDED -----/\----- */
+   assign last_cin_with_zero = ecl_div_signed_div & (divisor_sign & ~div_ecl_d_62 | 
+                                                ~divisor_sign &div_ecl_d_62&~new_zero_rem_with_zero |
+                                                divisor_sign &div_ecl_d_62&new_zero_rem_with_zero);
+   assign last_cin_no_zero = ecl_div_signed_div & (divisor_sign & ~div_ecl_d_62 | 
+                                                ~divisor_sign &div_ecl_d_62&~new_zero_rem_no_zero |
+                                                divisor_sign &div_ecl_d_62&new_zero_rem_no_zero);
+   assign last_cin_next = (detect_zero)? last_cin_with_zero: last_cin_no_zero;
+   dff_s last_cin_dff(.din(last_cin_next), .clk(clk), .q(last_cin),
+                    .se(se), .si(), .so());
+   
+   ///////////////////////////////
+   // Condition code generation
+   ///////////////////////////////
+   // There is a special case:
+   // For 64 bit signed division largest neg/-1 = largest neg
+   // However for 32 bit division this will give us positive overflow.
+   // This is detected by a sign switch on this case.
+   wire   inputs_neg_d;
+   wire   inputs_neg_q;
+   wire   large_neg_ovfl;
+   assign inputs_neg_d = div_ecl_dividend_msb & div_ecl_divisorin_31;
+   assign large_neg_ovfl = inputs_neg_q & ~gencc_in_msb_l_d1;
+   dffe_s inputs_neg_dff(.din(inputs_neg_d), .clk(clk), .q(inputs_neg_q), 
+                       .en(ecl_div_ld_inputs), .se(se), .si(), .so());
+   dff_s #(5) cc_sig_dff(.din({div_ecl_upper32_equal, div_ecl_gencc_in_msb_l,
+                             div_ecl_gencc_in_31, ecl_div_sel_div, div_ecl_low32_nonzero}),
+                         .q({upper32_equal_d1, gencc_in_msb_l_d1,
+                             gencc_in_31_d1, sel_div_d1, low32_nonzero_d1}),
+                         .clk(clk), .se(se), .si(), .so());
+   // selects for correcting divide overflow
+   assign        ecl_div_sel_64b = ecl_div_div64 | ecl_div_muls;
+   assign        ecl_div_sel_u32 = ~ecl_div_sel_64b & ~ecl_div_signed_div;
+   assign 			 ecl_div_sel_pos32 = (~ecl_div_sel_64b & ecl_div_signed_div & 
+                                      (gencc_in_msb_l_d1 | large_neg_ovfl));
+   assign        ecl_div_sel_neg32 = (~ecl_div_sel_64b & ecl_div_signed_div & 
+                                      ~gencc_in_msb_l_d1 & ~large_neg_ovfl);
+
+   // results of checking are staged one cycle for timing reasons
+   // this is the reason for the chk and fix ovfl states
+   assign        ecl_div_upper32_zero = upper32_equal_d1 & gencc_in_msb_l_d1;
+   assign        ecl_div_upper33_zero = (upper32_equal_d1 & gencc_in_msb_l_d1 & 
+                                         ~gencc_in_31_d1);
+   assign        ecl_div_upper33_one = (upper32_equal_d1 & ~gencc_in_msb_l_d1 & 
+                                        gencc_in_31_d1);
+
+   // divide overflow
+   assign        unsign_ovfl = ecl_div_sel_u32 & ~ecl_div_upper32_zero & sel_div_d1;
+   assign        pos_ovfl = ecl_div_sel_pos32 & ~ecl_div_upper33_zero & sel_div_d1;
+   assign        neg_ovfl = ecl_div_sel_neg32 & ~ecl_div_upper33_one & sel_div_d1;
+   assign        div_v = pos_ovfl | unsign_ovfl | neg_ovfl;
+
+   // muls carry and overflow
+   assign next_muls_c = (div_state[`RUN]) ? div_ecl_cout32: muls_c;
+
+   assign        muls_rs1_data_31_m = ~muls_rs1_31_m_l;
+   dff_s #(3) muls_overlow_dff(.din({muls_rs1_data_31_m, rs2_data_31_m, div_ecl_adder_out_31}),
+                             .q({muls_rs1_data_31_w, rs2_data_31_w, div_adder_out_31_w}),
+                             .clk(clk), .se(se), .si(), .so());
+   assign ovfl_32 = ((muls_rs1_data_31_w & rs2_data_31_w & ~div_adder_out_31_w) |
+                     (~muls_rs1_data_31_w & ~rs2_data_31_w & div_adder_out_31_w));
+   assign next_muls_v = (div_state[`FIX_OVFL]) ? ovfl_32: muls_v;
+   dff_s muls_c_dff(.din(next_muls_c), .clk(clk), .q(muls_c),
+                  .se(se), .si(), .so());
+   dff_s muls_v_dff(.din(next_muls_v), .clk(clk), .q(muls_v),
+                  .se(se), .si(), .so());
+  
+   // negative
+   assign xcc[3] = ~gencc_in_msb_l_d1 & ~unsign_ovfl & ~pos_ovfl;
+   assign icc[3] = (gencc_in_31_d1 & ~pos_ovfl) | neg_ovfl | unsign_ovfl;
+   // zero
+   assign xcc[2] = upper32_equal_d1 & gencc_in_msb_l_d1 & ~low32_nonzero_d1;
+   assign icc[2] = ~low32_nonzero_d1 & ~div_v; // nonzero checks before ovfl
+   //overflow
+   assign xcc[1] = 1'b0;
+   assign icc[1] = (ecl_div_muls & sel_div_d1) ? muls_v: div_v;
+   // carry
+   assign xcc[0] = 1'b0;
+   assign icc[0] = ecl_div_muls & sel_div_d1 & muls_c;
+
+   assign divcntl_ccr_cc_w2 = {xcc, icc};
+
+endmodule // sparc_exu_divcntl
Index: /trunk/T1-CPU/exu/sparc_exu_aluor32.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_aluor32.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_aluor32.v	(revision 6)
@@ -0,0 +1,104 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_aluor32.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_aluor32
+//	Description: This block performs a 32 bit OR of the input source.
+//			The result is the output nonzero.
+*/
+
+
+module sparc_exu_aluor32
+  (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [31:0] in;         // input to be compared to zero
+
+   output       out;       // or of input bits
+
+   wire         nor1_1;
+   wire         nor1_2;
+   wire         nor1_3;
+   wire         nor1_4;
+   wire         nor1_5;
+   wire         nor1_6;
+   wire         nor1_7;
+   wire         nor1_8;
+   wire         nor1_9;
+   wire         nor1_10;
+   wire         nor1_11;
+   wire         nor1_12;
+   wire         nor1_13;
+   wire         nor1_14;
+   wire         nor1_15;
+   wire         nor1_16;
+   wire         nand2_1;
+   wire         nand2_2;
+   wire         nand2_3;
+   wire         nand2_4;
+   wire         inv3_1;
+   wire         inv3_2;
+   wire         inv3_3;
+   wire         inv3_4;
+
+   assign       nor1_1 = ~(in[1] | in[0]);
+   assign       nor1_2 = ~(in[3] | in[2]);
+   assign       nor1_3 = ~(in[5] | in[4]);
+   assign       nor1_4 = ~(in[7] | in[6]);
+   assign       nor1_5 = ~(in[9] | in[8]);
+   assign       nor1_6 = ~(in[11] | in[10]);
+   assign       nor1_7 = ~(in[13] | in[12]);
+   assign       nor1_8 = ~(in[15] | in[14]);
+   assign       nor1_9 = ~(in[17] | in[16]);
+   assign       nor1_10 = ~(in[19] | in[18]);
+   assign       nor1_11 = ~(in[21] | in[20]);
+   assign       nor1_12 = ~(in[23] | in[22]);
+   assign       nor1_13 = ~(in[25] | in[24]);
+   assign       nor1_14 = ~(in[27] | in[26]);
+   assign       nor1_15 = ~(in[29] | in[28]);
+   assign       nor1_16 = ~(in[31] | in[30]);
+
+   assign       nand2_1 = ~(nor1_1 & nor1_2 & nor1_3 & nor1_4);
+   assign       nand2_2 = ~(nor1_5 & nor1_6 & nor1_7 & nor1_8);
+   assign       nand2_3 = ~(nor1_9 & nor1_10 & nor1_11 & nor1_12);
+   assign       nand2_4 = ~(nor1_13 & nor1_14 & nor1_15 & nor1_16);
+
+   assign       inv3_1 = ~nand2_1;
+   assign       inv3_2 = ~nand2_2;
+   assign       inv3_3 = ~nand2_3;
+   assign       inv3_4 = ~nand2_4;
+
+   assign       out = ~(inv3_1 & inv3_2 & inv3_3 & inv3_4);
+
+endmodule // sparc_exu_aluor32
+
+   
+
+   
+                           
+
+                            
+                            
+                          
Index: /trunk/T1-CPU/exu/sparc_exu_alu_16eql.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_alu_16eql.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_alu_16eql.v	(revision 6)
@@ -0,0 +1,72 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_alu_16eql.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_alu_16eql
+//	Description: Takes a 17 bit input and generates an active low output
+//			signifying that all 17 bits have the same value.
+*/
+module sparc_exu_alu_16eql (/*AUTOARG*/
+   // Outputs
+   equal, 
+   // Inputs
+   in
+   ) ;
+   input [16:0] in;
+
+   output       equal;
+
+   wire [15:0]  inxor;
+   wire [7:0]   nor1;
+   wire [1:0]   nand2;
+
+   assign inxor[0] = in[15] ^ in[14];
+   assign inxor[1] = in[14] ^ in[13];
+   assign inxor[2] = in[13] ^ in[12];
+   assign inxor[3] = in[12] ^ in[11];
+   assign inxor[4] = in[11] ^ in[10];
+   assign inxor[5] = in[10] ^ in[9];
+   assign inxor[6] = in[9] ^ in[8];
+   assign inxor[7] = in[8] ^ in[7];
+   assign inxor[8] = in[7] ^ in[6];
+   assign inxor[9] = in[6] ^ in[5];
+   assign inxor[10] = in[5] ^ in[4];
+   assign inxor[11] = in[4] ^ in[3];
+   assign inxor[12] = in[3] ^ in[2];
+   assign inxor[13] = in[2] ^ in[1];
+   assign inxor[14] = in[1] ^ in[0];
+   assign inxor[15] = in[16] ^ in[15];
+
+   assign nor1[0] = ~(inxor[15] | inxor[14]);
+   assign       nor1[1] = ~(inxor[1] | inxor[0]);
+   assign       nor1[2] = ~(inxor[3] | inxor[2]);
+   assign       nor1[3] = ~(inxor[5] | inxor[4]);
+   assign       nor1[4] = ~(inxor[7] | inxor[6]);
+   assign       nor1[5] = ~(inxor[9] | inxor[8]);
+   assign       nor1[6] = ~(inxor[11] | inxor[10]);
+   assign       nor1[7] = ~(inxor[13] | inxor[12]);
+
+   assign       nand2[0] = ~(nor1[1] & nor1[2] & nor1[3] & nor1[4]);
+   assign       nand2[1] = ~(nor1[5] & nor1[6] & nor1[7] & nor1[0]);
+
+   assign       equal = ~(nand2[1] | nand2[0]);
+   
+endmodule // sparc_exu_div_32eql
Index: /trunk/T1-CPU/exu/sparc_exu_ecc.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecc.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecc.v	(revision 6)
@@ -0,0 +1,165 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecc.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecc
+//	Description: This block performs the ecc check and correction as well as
+// 			doing the w2 write port arbitration and the w2 ecc generation.
+*/
+module sparc_exu_ecc (/*AUTOARG*/
+   // Outputs
+   so, ecc_ecl_rs1_ce, ecc_ecl_rs1_ue, ecc_ecl_rs2_ce, 
+   ecc_ecl_rs2_ue, ecc_ecl_rs3_ce, ecc_ecl_rs3_ue, 
+   ecc_byp_ecc_result_m, exu_ifu_err_synd_m, 
+   // Inputs
+   rclk, se, si, byp_ecc_rcc_data_e, ecl_ecc_rs1_use_rf_e, 
+   byp_ecc_rs1_synd_d, byp_alu_rs2_data_e, ecl_ecc_rs2_use_rf_e, 
+   byp_ecc_rs2_synd_d, byp_ecc_rs3_data_e, ecl_ecc_rs3_use_rf_e, 
+   byp_ecc_rs3_synd_d, ecl_ecc_sel_rs1_m_l, ecl_ecc_sel_rs2_m_l, 
+   ecl_ecc_sel_rs3_m_l, ecl_ecc_log_rs1_m, ecl_ecc_log_rs2_m, 
+   ecl_ecc_log_rs3_m
+   ) ;
+   input rclk;
+   input se;
+   input si;
+   input [63:0] byp_ecc_rcc_data_e;
+   input        ecl_ecc_rs1_use_rf_e;
+   input [7:0]  byp_ecc_rs1_synd_d;
+   input [63:0] byp_alu_rs2_data_e;
+   input        ecl_ecc_rs2_use_rf_e;
+   input [7:0]  byp_ecc_rs2_synd_d;
+   input [63:0] byp_ecc_rs3_data_e;
+   input        ecl_ecc_rs3_use_rf_e;
+   input [7:0]  byp_ecc_rs3_synd_d;
+   input        ecl_ecc_sel_rs1_m_l;
+   input        ecl_ecc_sel_rs2_m_l;
+   input        ecl_ecc_sel_rs3_m_l;
+   input        ecl_ecc_log_rs1_m;
+   input        ecl_ecc_log_rs2_m;
+   input        ecl_ecc_log_rs3_m;
+
+   output       so;
+   output       ecc_ecl_rs1_ce;
+   output       ecc_ecl_rs1_ue;
+   output       ecc_ecl_rs2_ce;
+   output       ecc_ecl_rs2_ue;
+   output       ecc_ecl_rs3_ce;
+   output       ecc_ecl_rs3_ue;
+
+   output [63:0] ecc_byp_ecc_result_m;
+   output [6:0]  exu_ifu_err_synd_m;
+
+   wire          clk;
+   wire         sel_rs1_m;
+   wire         sel_rs2_m;
+   wire         sel_rs3_m;
+   wire [7:0]   rs1_ecc_e;
+   wire [6:0]   rs1_err_e;      // syndrome generated by checker
+   wire [6:0]   rs1_err_m;      // syndrome generated by checker
+   wire [7:0]   rs2_ecc_e;
+   wire [6:0]   rs2_err_e;      // syndrome generated by checker
+   wire [6:0]   rs2_err_m;      // syndrome generated by checker
+   wire [7:0]   rs3_ecc_e;
+   wire [6:0]   rs3_err_e;      // syndrome generated by checker
+   wire [6:0]   rs3_err_m;      // syndrome generated by checker
+   wire [6:0]   err_m;
+   wire [63:0]  ecc_datain_m;
+   wire [63:0]  byp_ecc_rcc_data_m;
+   wire [63:0]  byp_alu_rs2_data_m;
+   wire [63:0]  exu_lsu_rs3_data_m;
+   wire [63:0]  error_data_m;
+
+   assign       clk = rclk;
+   // Pass along ecc parity bits from RF
+   dff_s #(8) rs1_ecc_d2e(.din(byp_ecc_rs1_synd_d[7:0]), .clk(clk), .q(rs1_ecc_e[7:0]),
+                      .se(se), .si(), .so());
+   dff_s #(8) rs2_ecc_d2e(.din(byp_ecc_rs2_synd_d[7:0]), .clk(clk), .q(rs2_ecc_e[7:0]),
+                      .se(se), .si(), .so());
+   dff_s #(8) rs3_ecc_d2e(.din(byp_ecc_rs3_synd_d[7:0]), .clk(clk), .q(rs3_ecc_e[7:0]),
+                      .se(se), .si(), .so());
+   
+   // Check the ecc for all 4 outputs from RF
+   zzecc_exu_chkecc2 chk_rs1(.d(byp_ecc_rcc_data_e[63:0]),
+                            .vld(ecl_ecc_rs1_use_rf_e),
+                            .p(rs1_ecc_e[7:0]),
+                            .q(rs1_err_e[6:0]),
+                            .ce(ecc_ecl_rs1_ce), .ue(ecc_ecl_rs1_ue), .ne());
+   zzecc_exu_chkecc2 chk_rs2(.d(byp_alu_rs2_data_e[63:0]),
+                            .vld(ecl_ecc_rs2_use_rf_e),
+                            .p(rs2_ecc_e[7:0]),
+                            .q(rs2_err_e[6:0]),
+                            .ce(ecc_ecl_rs2_ce), .ue(ecc_ecl_rs2_ue), .ne());
+   zzecc_exu_chkecc2 chk_rs3(.d(byp_ecc_rs3_data_e[63:0]),
+                                .vld(ecl_ecc_rs3_use_rf_e),
+                                .p(rs3_ecc_e[7:0]),
+                                .q(rs3_err_e[6:0]),
+                                .ce(ecc_ecl_rs3_ce), .ue(ecc_ecl_rs3_ue), .ne());
+
+   // Put results from checkers into flops
+   dff_s #(7) rs1_err_e2m(.din(rs1_err_e[6:0]), .clk(clk), .q(rs1_err_m[6:0]),
+                      .se(se), .si(), .so());
+   dff_s #(7) rs2_err_e2m(.din(rs2_err_e[6:0]), .clk(clk), .q(rs2_err_m[6:0]),
+                      .se(se), .si(), .so());
+   dff_s #(7) rs3o_err_e2m(.din(rs3_err_e[6:0]), .clk(clk), .q(rs3_err_m[6:0]),
+                      .se(se), .si(), .so());
+
+   // Pass along RF data to M stage
+   dff_s #(64) rs1_data_e2m(.din(byp_ecc_rcc_data_e[63:0]), .clk(clk), .q(byp_ecc_rcc_data_m[63:0]),
+                        .se(se), .si(), .so());
+   dff_s #(64) rs2_data_e2m(.din(byp_alu_rs2_data_e[63:0]), .clk(clk), .q(byp_alu_rs2_data_m[63:0]),
+                        .se(se), .si(), .so());
+   dff_s #(64) rs3_data_e2m(.din(byp_ecc_rs3_data_e[63:0]), .clk(clk), 
+                         .q(exu_lsu_rs3_data_m[63:0]),
+                         .se(se), .si(), .so());
+
+   // Mux between 3 different ports for syndrome and data
+   assign       sel_rs1_m = ~ecl_ecc_sel_rs1_m_l;
+   assign       sel_rs2_m = ~ecl_ecc_sel_rs2_m_l;
+   assign       sel_rs3_m = ~ecl_ecc_sel_rs3_m_l;
+   mux3ds #(7) syn_mux(.dout(err_m[6:0]),
+                     .in0(rs1_err_m[6:0]),
+                     .in1(rs2_err_m[6:0]),
+                     .in2(rs3_err_m[6:0]),
+                     .sel0(sel_rs1_m),
+                     .sel1(sel_rs2_m),
+                     .sel2(sel_rs3_m));
+   mux3ds #(64) data_m_mux(.dout(ecc_datain_m[63:0]),
+                     .in0(byp_ecc_rcc_data_m[63:0]),
+                     .in1(byp_alu_rs2_data_m[63:0]),
+                     .in2(exu_lsu_rs3_data_m[63:0]),
+                     .sel0(sel_rs1_m),
+                     .sel1(sel_rs2_m),
+                     .sel2(sel_rs3_m));
+
+   mux3ds #(7) syn_log_mux(.dout(exu_ifu_err_synd_m[6:0]),
+                           .in0(rs1_err_m[6:0]),
+                           .in1(rs2_err_m[6:0]),
+                           .in2(rs3_err_m[6:0]),
+                           .sel0(ecl_ecc_log_rs1_m),
+                           .sel1(ecl_ecc_log_rs2_m),
+                           .sel2(ecl_ecc_log_rs3_m));
+   // Decode syndrome from checker
+   sparc_exu_ecc_dec decode(.e          (error_data_m[63:0]),
+                            .q          (err_m[6:0]));
+   assign       ecc_byp_ecc_result_m[63:0] = ecc_datain_m[63:0] ^ error_data_m[63:0];
+
+
+endmodule // sparc_exu_ecc
Index: /trunk/T1-CPU/exu/sparc_exu_ecl_wb.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl_wb.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl_wb.v	(revision 6)
@@ -0,0 +1,557 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl_wb.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecl_wb
+//	Description:  Implements the writeback logic for the exu.
+//		This includes the control signals for the w1 and w2 input 
+//   	muxes as well as keeping track of the wen signal for ALU ops.
+*/
+
+module sparc_exu_ecl_wb (/*AUTOARG*/
+   // Outputs
+   wb_ccr_wrccr_w, ecl_rml_cwp_wen_e, ecl_rml_cansave_wen_w, 
+   ecl_rml_canrestore_wen_w, ecl_rml_otherwin_wen_w, 
+   ecl_rml_wstate_wen_w, ecl_rml_cleanwin_wen_w, ecl_byp_sel_load_m, 
+   ecl_byp_sel_restore_m, ecl_byp_sel_pipe_m, ecl_byp_restore_m, 
+   ecl_irf_tid_m, ecl_irf_rd_m, ecl_irf_rd_g, ecl_irf_wen_w2, 
+   ecl_irf_tid_g, wb_e, bypass_m, ecl_irf_wen_w, ecl_byp_sel_load_g, 
+   ecl_byp_sel_muldiv_g, ecl_byp_sel_restore_g, wb_divcntl_ack_g, 
+   wb_ccr_setcc_g, ecl_byp_eclpr_e, exu_ifu_longop_done_g, 
+   ecl_div_yreg_wen_w, ecl_div_yreg_wen_g, ecl_div_yreg_shift_g, 
+   ecl_div_yreg_wen_l, wb_eccctl_spec_wen_next, bypass_w, 
+   wb_byplog_rd_w2, wb_byplog_tid_w2, wb_byplog_wen_w2, 
+   wb_byplog_rd_g2, wb_byplog_wen_g2, read_yreg_e, 
+   exu_ffu_wsr_inst_e, 
+   // Inputs
+   clk, se, reset, sehold, ld_rd_g, ld_tid_g, lsu_exu_dfill_vld_g, 
+   lsu_exu_ldst_miss_g2, rd_m, tid_m, thr_m, tid_w1, ifu_exu_wen_d, 
+   ifu_exu_kill_e, ecl_exu_kill_m, rml_ecl_kill_m, ifu_tlu_flush_w, 
+   flush_w1, divcntl_wb_req_g, mdqctl_wb_divrd_g, mdqctl_wb_divthr_g, 
+   mdqctl_wb_mulrd_g, mdqctl_wb_multhr_g, mdqctl_wb_divsetcc_g, 
+   mdqctl_wb_mulsetcc_g, ecl_div_sel_div, ifu_tlu_wsr_inst_d, 
+   ifu_tlu_sraddr_d, rml_ecl_cwp_d, rml_ecl_cansave_d, 
+   rml_ecl_canrestore_d, rml_ecl_otherwin_d, rml_ecl_wstate_d, 
+   rml_ecl_cleanwin_d, exu_ifu_cc_d, rml_ecl_swap_done, 
+   rml_ecl_rmlop_done_e, mdqctl_wb_yreg_wen_g, 
+   mdqctl_wb_yreg_shift_g, ecl_byp_sel_ecc_m, eccctl_wb_rd_m, 
+   ifu_exu_inst_vld_e, ifu_exu_inst_vld_w, ifu_exu_return_d, 
+   restore_e, rml_ecl_fill_e, early_flush_w, ecl_byp_ldxa_g
+   ) ;
+   input clk;
+   input se;
+   input reset;
+   input sehold;
+   input [4:0] ld_rd_g;
+   input [1:0] ld_tid_g;
+   input       lsu_exu_dfill_vld_g;
+   input        lsu_exu_ldst_miss_g2;
+   input [4:0]  rd_m;
+   input [1:0]  tid_m;
+   input [3:0]  thr_m;
+   input [1:0]  tid_w1;
+   input        ifu_exu_wen_d;
+   input        ifu_exu_kill_e;
+   input        ecl_exu_kill_m;
+   input        rml_ecl_kill_m; // kill from spill or fill trap
+   input        ifu_tlu_flush_w;
+   input        flush_w1;
+   input        divcntl_wb_req_g;
+   input [4:0]  mdqctl_wb_divrd_g;
+   input [1:0]  mdqctl_wb_divthr_g;
+   input [4:0]  mdqctl_wb_mulrd_g;
+   input [1:0]  mdqctl_wb_multhr_g;
+   input        mdqctl_wb_divsetcc_g;
+   input        mdqctl_wb_mulsetcc_g;
+   input        ecl_div_sel_div;
+   input        ifu_tlu_wsr_inst_d;
+   input [6:0] ifu_tlu_sraddr_d;
+   input [2:0] rml_ecl_cwp_d;
+   input [2:0] rml_ecl_cansave_d;
+   input [2:0] rml_ecl_canrestore_d;
+   input [2:0] rml_ecl_otherwin_d;
+   input [5:0] rml_ecl_wstate_d;
+   input [2:0] rml_ecl_cleanwin_d;
+   input [7:0] exu_ifu_cc_d;
+   input [3:0] rml_ecl_swap_done;
+   input       rml_ecl_rmlop_done_e;
+   input         mdqctl_wb_yreg_wen_g;
+   input         mdqctl_wb_yreg_shift_g;
+   input         ecl_byp_sel_ecc_m;
+   input  [4:0] eccctl_wb_rd_m;
+   input        ifu_exu_inst_vld_e;
+   input        ifu_exu_inst_vld_w;
+   input        ifu_exu_return_d;
+   input  restore_e;
+   input  rml_ecl_fill_e;
+   input  early_flush_w;
+   input        ecl_byp_ldxa_g;
+   
+   output      wb_ccr_wrccr_w;
+   output      ecl_rml_cwp_wen_e;
+   output      ecl_rml_cansave_wen_w;
+   output      ecl_rml_canrestore_wen_w;
+   output      ecl_rml_otherwin_wen_w;
+   output      ecl_rml_wstate_wen_w;
+   output      ecl_rml_cleanwin_wen_w;
+   output      ecl_byp_sel_load_m;
+   output      ecl_byp_sel_restore_m;
+   output      ecl_byp_sel_pipe_m;
+   output      ecl_byp_restore_m;
+   output [1:0] ecl_irf_tid_m;
+   output [4:0] ecl_irf_rd_m;
+   output [4:0] ecl_irf_rd_g;
+   output       ecl_irf_wen_w2;
+   output [1:0] ecl_irf_tid_g;
+   output       wb_e;
+   output       bypass_m;
+   output       ecl_irf_wen_w;
+   output       ecl_byp_sel_load_g;
+   output       ecl_byp_sel_muldiv_g;
+   output       ecl_byp_sel_restore_g;
+   output       wb_divcntl_ack_g;
+   output       wb_ccr_setcc_g;
+   output [7:0] ecl_byp_eclpr_e;
+   output [3:0]  exu_ifu_longop_done_g;
+   output [3:0]  ecl_div_yreg_wen_w;
+   output [3:0]  ecl_div_yreg_wen_g;
+   output [3:0]  ecl_div_yreg_shift_g;
+   output [3:0]  ecl_div_yreg_wen_l;// w or w2 or shift
+   output        wb_eccctl_spec_wen_next;
+   output        bypass_w;
+   output [4:0] wb_byplog_rd_w2;
+   output [1:0] wb_byplog_tid_w2;
+   output       wb_byplog_wen_w2;
+   output [4:0] wb_byplog_rd_g2;
+   output       wb_byplog_wen_g2;
+   output       read_yreg_e;
+   output       exu_ffu_wsr_inst_e;
+
+   wire          wb_e;
+   wire          wb_m;
+   wire          wb_w;
+   wire          inst_vld_noflush_wen_m;
+   wire          inst_vld_noflush_wen_w;
+   wire       ecl_irf_wen_g;
+   wire      yreg_wen_w;
+   wire      yreg_wen_w1;
+   wire      yreg_wen_w1_vld;
+   wire      wen_no_inst_vld_m;         // load or restore or ce wen
+   wire        wen_no_inst_vld_w;
+   wire        wen_w_inst_vld;
+   wire        valid_e;
+   wire        valid_m;
+   wire    valid_w;
+   wire    ecl_sel_mul_g;
+   wire    ecl_sel_div_g;
+   wire [1:0] muldiv_tid;
+   wire        setcc_g;        // without wen from divcntl
+   wire    wrsr_e;
+   wire    wrsr_m;
+   wire    wrsr_w;
+   wire    [6:0] sraddr_e;
+   wire    [6:0] sraddr_m;
+   wire    [6:0] sraddr_w;
+   wire    sraddr_ccr_w;
+   wire    sraddr_y_w;
+   wire    sraddr_cwp_e;
+   wire    sraddr_cansave_w;
+   wire    sraddr_canrestore_w;
+   wire    sraddr_cleanwin_w;
+   wire    sraddr_otherwin_w;
+   wire    sraddr_wstate_w;
+   wire    sel_cleanwin_d;
+   wire    sel_otherwin_d;
+   wire    sel_wstate_d;
+   wire    sel_canrestore_d;
+   wire    sel_ccr_d;
+   wire    sel_cansave_d;
+   wire    sel_cwp_d;
+   wire    sel_rdpr_mux1_d;
+   wire [2:0] rdpr_mux1_out;
+   wire [7:0] rdpr_mux2_out;
+   wire [3:0] muldiv_done_g;
+   wire [3:0]    multhr_dec_g;
+   wire [3:0]    divthr_dec_g;
+   wire [3:0]    thrdec_w1;
+   wire   short_longop_done_e;
+   wire   short_longop_done_m;
+   wire [3:0] short_longop_done;
+   wire       return_e;
+   wire   restore_m;
+   wire   restore_w;
+   wire   vld_restore_e;
+   wire   vld_restore_w;
+   wire   restore_request;
+   wire   restore_wen;
+   wire   restore_ready;
+   wire   restore_ready_next;
+   wire   restore_picked;
+   wire [3:0]  restore_done;
+   wire [1:0] restore_tid;
+   wire [4:0] restore_rd;
+   wire [3:0] restore_thr;
+   wire [3:0] ecl_longop_done_kill_m;
+   wire [3:0] ecl_longop_done_nokill_m;
+   wire       dfill_vld_g2;
+   wire       ld_g;
+   wire       ld_g2;
+   wire [1:0] dfill_tid_g2;
+   wire [4:0] dfill_rd_g2;
+   wire       kill_ld_g2;
+   wire [1:0] tid_w2;
+   wire [4:0] rd_w2;
+   
+   ////////////////////////////////////////////
+   // Pass along result of load for one cycle
+   ////////////////////////////////////////////
+   assign     ld_g = lsu_exu_dfill_vld_g | ecl_byp_ldxa_g;
+   dff_s dfill_vld_dff (.din(ld_g), .clk(clk), .q(ld_g2),
+                      .se(se), .si(), .so());
+   assign     kill_ld_g2 = flush_w1 & (dfill_tid_g2[1:0] == tid_w1[1:0]);
+   assign     dfill_vld_g2 = ld_g2 & ~kill_ld_g2 & ~lsu_exu_ldst_miss_g2;
+   dff_s #(2) dfill_tid_dff(.din(ld_tid_g[1:0]), .clk(clk), .q(dfill_tid_g2[1:0]),
+                          .se(se), .si(), .so());
+   dff_s #(5) dfill_rd_dff(.din(ld_rd_g[4:0]), .clk(clk), .q(dfill_rd_g2[4:0]),
+                         .se(se), .si(), .so());
+
+   ///////////////////////////////////////////
+   // Help with bypassing of long latency ops
+   ///////////////////////////////////////////
+   assign       wb_byplog_rd_w2[4:0] = rd_w2[4:0];
+   assign       wb_byplog_wen_w2 = ecl_irf_wen_w2;
+   assign       wb_byplog_tid_w2[1:0] = tid_w2[1:0];
+   assign       wb_byplog_rd_g2[4:0] = dfill_rd_g2[4:0];
+   assign       wb_byplog_wen_g2 = ld_g2;
+   
+   
+   ////////////////////////////////////////////////////////////////
+   // G selection logic (picks between LOAD and MUL/DIV outputs)
+   ////////////////////////////////////////////////////////////////
+   // select signals: priority LOAD, RESTORE, MUL, DIV
+   assign      ecl_byp_sel_load_g = (ld_g2 & (wb_m | wrsr_m | ecl_byp_sel_ecc_m));
+   assign      ecl_byp_sel_restore_g = restore_request & ((wb_m | wrsr_m | ecl_byp_sel_ecc_m) ^ ld_g2);
+   assign      ecl_byp_sel_muldiv_g = ~(ecl_byp_sel_load_g | ecl_byp_sel_restore_g);
+   assign      ecl_sel_mul_g = ~ecl_div_sel_div & ecl_byp_sel_muldiv_g;
+   assign      ecl_sel_div_g = ecl_div_sel_div & ecl_byp_sel_muldiv_g;
+   assign      wb_divcntl_ack_g = ecl_byp_sel_muldiv_g;
+
+   assign      muldiv_tid[1:0] = (ecl_div_sel_div)? mdqctl_wb_divthr_g[1:0]: mdqctl_wb_multhr_g[1:0];
+   assign muldiv_done_g[3] = ((wb_divcntl_ack_g & divcntl_wb_req_g) & 
+                              muldiv_tid[1] & muldiv_tid[0]); 
+   assign muldiv_done_g[2] = ((wb_divcntl_ack_g & divcntl_wb_req_g) &
+                              muldiv_tid[1] & ~muldiv_tid[0]); 
+   assign muldiv_done_g[1] = ((wb_divcntl_ack_g & divcntl_wb_req_g) &
+                              ~muldiv_tid[1] & muldiv_tid[0]); 
+   assign muldiv_done_g[0] = ((wb_divcntl_ack_g & divcntl_wb_req_g) &
+                              ~muldiv_tid[1] & ~muldiv_tid[0]); 
+
+   assign ecl_irf_wen_g = (sehold)? ecl_irf_wen_w2: 
+                                   (ecl_byp_sel_load_g & dfill_vld_g2 |
+                                    (ecl_byp_sel_restore_g & restore_wen) |
+                                    (ecl_byp_sel_muldiv_g & divcntl_wb_req_g));
+
+   dff_s wen_w2_dff(.din(ecl_irf_wen_g), .clk(clk), .q(ecl_irf_wen_w2),
+                  .se(se), .si(), .so());
+   mux4ds #(5) rd_g_mux(.dout(ecl_irf_rd_g[4:0]), .in0(dfill_rd_g2[4:0]),
+                       .in1(mdqctl_wb_divrd_g[4:0]),
+                       .in2(mdqctl_wb_mulrd_g[4:0]),
+                        .in3(restore_rd[4:0]),
+                       .sel0(ecl_byp_sel_load_g),
+                       .sel1(ecl_sel_div_g),
+                        .sel2(ecl_sel_mul_g),
+                        .sel3(ecl_byp_sel_restore_g));
+   mux4ds #(2) thr_g_mux(.dout(ecl_irf_tid_g[1:0]), .in0(dfill_tid_g2[1:0]),
+                        .in1(mdqctl_wb_divthr_g[1:0]),
+                        .in2(mdqctl_wb_multhr_g[1:0]),
+                         .in3(restore_tid[1:0]),
+                        .sel0(ecl_byp_sel_load_g),
+                        .sel1(ecl_sel_div_g),
+                         .sel2(ecl_sel_mul_g),
+                         .sel3(ecl_byp_sel_restore_g));
+   mux2ds setcc_g_mux(.dout(setcc_g),
+                         .in0(mdqctl_wb_mulsetcc_g),
+                         .in1(mdqctl_wb_divsetcc_g),
+                         .sel0(~ecl_div_sel_div),
+                         .sel1(ecl_div_sel_div));
+   dff_s #(2) dff_thr_g2w2(.din(ecl_irf_tid_g[1:0]), .clk(clk), .q(tid_w2[1:0]), .se(se),
+                      .si(), .so());
+   dff_s #(5) dff_rd_g2w2(.din(ecl_irf_rd_g[4:0]), .clk(clk), .q(rd_w2[4:0]), .se(se),
+                     .si(), .so());
+   // needs wen to setcc
+   assign wb_ccr_setcc_g = wb_divcntl_ack_g & divcntl_wb_req_g & setcc_g;
+   
+
+   ///////////////////
+   // W1 port control
+   ///////////////////
+   // sehold will turn off in pipe writes and put the hold functionality through
+   // the non inst_vld part
+   // Mux between load and ALU for rd, thr, and wen
+   assign      ecl_byp_sel_load_m = ~(wb_m | wrsr_m | ecl_byp_sel_ecc_m) & ld_g2;
+   assign      ecl_byp_sel_pipe_m = (wb_m | wrsr_m) & ~ecl_byp_sel_ecc_m; 
+   assign      ecl_byp_sel_restore_m = ~(wb_m | wrsr_m | ld_g2 | ecl_byp_sel_ecc_m);
+   assign      wen_no_inst_vld_m = (sehold)? ecl_irf_wen_w: 
+                                             ((dfill_vld_g2 & ecl_byp_sel_load_m) |
+                                              (ecl_byp_sel_restore_m & restore_wen));
+   dff_s dff_lsu_wen_m2w(.din(wen_no_inst_vld_m), .clk(clk), .q(wen_no_inst_vld_w), .se(se), .si(),
+                       .so());
+   // ecc_wen must be kept separate because it needs to check inst_vld but not flush
+   assign      inst_vld_noflush_wen_m = ecl_byp_sel_ecc_m & ~sehold;
+   dff_s ecc_wen_m2w(.din(inst_vld_noflush_wen_m), .clk(clk), .q(inst_vld_noflush_wen_w), .se(se), .si(), .so());
+   
+   assign ecl_irf_tid_m[1:0] = ((ecl_byp_sel_load_m)? dfill_tid_g2[1:0]:
+                                (ecl_byp_sel_restore_m)? restore_tid[1:0]:
+                                tid_m[1:0]);
+
+   mux4ds #(5) rd_mux(.dout(ecl_irf_rd_m[4:0]), 
+                      .in0(rd_m[4:0]), 
+                      .in1(dfill_rd_g2[4:0]),
+                      .in2(eccctl_wb_rd_m[4:0]),
+                      .in3(restore_rd[4:0]),
+                      .sel0(ecl_byp_sel_pipe_m), 
+                      .sel1(ecl_byp_sel_load_m),
+                      .sel2(ecl_byp_sel_ecc_m),
+                      .sel3(ecl_byp_sel_restore_m));
+   assign wen_w_inst_vld = valid_w | inst_vld_noflush_wen_w;
+   assign ecl_irf_wen_w = ifu_exu_inst_vld_w & wen_w_inst_vld | wen_no_inst_vld_w;
+
+   // bypass valid logic and flops
+   dff_s dff_wb_d2e(.din(ifu_exu_wen_d), .clk(clk), .q(wb_e), .se(se),
+                  .si(), .so());
+   dff_s dff_wb_e2m(.din(valid_e), .clk(clk), .q(wb_m), .se(se),
+                  .si(), .so());
+   dffr_s dff_wb_m2w(.din(valid_m), .clk(clk), .q(wb_w), .se(se),
+                  .si(), .so(), .rst(reset));
+   assign  valid_e = wb_e & ~ifu_exu_kill_e & ~restore_e & ~wrsr_e;// restore doesn't finish on time
+   assign  bypass_m = wb_m;// bypass doesn't need to check for traps or sehold
+   assign  valid_m = bypass_m & ~rml_ecl_kill_m & ~sehold;// sehold turns off writes from this path
+   assign  valid_w = (wb_w & ~early_flush_w & ~ifu_tlu_flush_w);// check inst_vld later
+   // don't check flush for bypass
+   assign  bypass_w = wb_w | inst_vld_noflush_wen_w | wen_no_inst_vld_w;
+
+   // speculative wen for ecc injection
+   assign  wb_eccctl_spec_wen_next = valid_m | dfill_vld_g2 | restore_request |  divcntl_wb_req_g;
+
+   ///////////////////////////////////////////////////////
+   // Priviledged register read and write flops and logic
+   ///////////////////////////////////////////////////////
+/* -----\/----- EXCLUDED -----\/-----
+   Decoded sraddr
+   sraddr[5] = 1-priv, 0-state
+   Y -   0
+   CCR - 2
+   CWP - 9
+   CANSAVE - a
+   CARESTORE - b
+   CLEANWIN - c
+   OTHERWIN - d
+   WSTATE - e 
+   GSR - 0x13
+ -----/\----- EXCLUDED -----/\----- */
+   assign  ecl_rml_cwp_wen_e = sraddr_cwp_e & wrsr_e;
+   assign  sraddr_cwp_e = ~sraddr_e[6] & sraddr_e[5] & ~sraddr_e[4] & sraddr_e[3] & ~sraddr_e[2] &
+           ~sraddr_e[1] & sraddr_e[0];
+
+   assign  sraddr_y_w = ~sraddr_w[6] & ~sraddr_w[5] & ~sraddr_w[4] & ~sraddr_w[3] & ~sraddr_w[2] & 
+           ~sraddr_w[1] & ~sraddr_w[0];
+   assign  sraddr_ccr_w = ~sraddr_w[6] & ~sraddr_w[5] & ~sraddr_w[4] & ~sraddr_w[3] & ~sraddr_w[2] &
+           sraddr_w[1] & ~sraddr_w[0];
+   assign  sraddr_cansave_w = ~sraddr_w[6] & sraddr_w[5] & ~sraddr_w[4] & sraddr_w[3] & ~sraddr_w[2] &
+           sraddr_w[1] & ~sraddr_w[0];
+   assign  sraddr_canrestore_w = ~sraddr_w[6] & sraddr_w[5] & ~sraddr_w[4] & sraddr_w[3] & ~sraddr_w[2] &
+           sraddr_w[1] & sraddr_w[0];
+   assign  sraddr_cleanwin_w = ~sraddr_w[6] & sraddr_w[5] & ~sraddr_w[4] & sraddr_w[3] & sraddr_w[2] &
+           ~sraddr_w[1] & ~sraddr_w[0];
+   assign  sraddr_otherwin_w = ~sraddr_w[6] & sraddr_w[5] & ~sraddr_w[4] & sraddr_w[3] & sraddr_w[2] &
+           ~sraddr_w[1] & sraddr_w[0];
+   assign  sraddr_wstate_w = ~sraddr_w[6] & sraddr_w[5] & ~sraddr_w[4] & sraddr_w[3] & sraddr_w[2] &
+           sraddr_w[1] & ~sraddr_w[0];
+
+   // yreg writes cycle after w and checks flush in that cycle
+   assign  yreg_wen_w = sraddr_y_w & wrsr_w & ifu_exu_inst_vld_w;
+   assign  yreg_wen_w1_vld = yreg_wen_w1 & ~flush_w1;
+
+   // controls for all other writes (and flush checks) are in their respective blocks
+   assign  wb_ccr_wrccr_w = sraddr_ccr_w & wrsr_w;
+   assign  ecl_rml_cansave_wen_w = sraddr_cansave_w & wrsr_w;
+   assign  ecl_rml_canrestore_wen_w = sraddr_canrestore_w & wrsr_w;
+   assign  ecl_rml_cleanwin_wen_w = sraddr_cleanwin_w & wrsr_w;
+   assign  ecl_rml_otherwin_wen_w = sraddr_otherwin_w & wrsr_w;
+   assign  ecl_rml_wstate_wen_w = sraddr_wstate_w & wrsr_w;
+   
+
+   dff_s dff_wrsr_d2e(.din(ifu_tlu_wsr_inst_d), .clk(clk), .q(wrsr_e), .se(se),
+                   .si(), .so());
+   assign  exu_ffu_wsr_inst_e = wrsr_e;
+   dff_s dff_wrsr_e2m(.din(wrsr_e), .clk(clk), .q(wrsr_m), .se(se),
+                   .si(), .so());
+   dff_s dff_wrsr_m2w(.din(wrsr_m), .clk(clk), .q(wrsr_w), .se(se),
+                   .si(), .so());
+   dff_s #(7) dff_sraddr_d2e(.din(ifu_tlu_sraddr_d[6:0]), .clk(clk), .q(sraddr_e[6:0]), .se(se),
+                       .si(), .so());
+   dff_s #(7) dff_sraddr_e2m(.din(sraddr_e[6:0]), .clk(clk), .q(sraddr_m[6:0]), .se(se),
+                       .si(), .so());
+   dff_s #(7) dff_sraddr_m2w(.din(sraddr_m[6:0]), .clk(clk), .q(sraddr_w[6:0]), .se(se),
+                       .si(), .so());
+   dff_s dff_yreg_wen_w2w1(.din(yreg_wen_w), .clk(clk), .q(yreg_wen_w1), .se(se), .si(), .so());
+   
+   // Logic for rdpr/rdsr
+   // This mux takes advantage of the fact that these 4 encodings don't overlap
+   assign sel_cleanwin_d = ~ifu_tlu_sraddr_d[1] & ~ifu_tlu_sraddr_d[0];
+   assign sel_otherwin_d = ~ifu_tlu_sraddr_d[1] & ifu_tlu_sraddr_d[0];
+   assign sel_cansave_d = ifu_tlu_sraddr_d[1] & ~ifu_tlu_sraddr_d[0];
+   assign sel_canrestore_d = ifu_tlu_sraddr_d[1] & ifu_tlu_sraddr_d[0];
+   mux4ds #(3) rdpr_mux1(.dout(rdpr_mux1_out[2:0]),
+                       .in0(rml_ecl_canrestore_d[2:0]),
+                       .in1(rml_ecl_cleanwin_d[2:0]),
+                       .in2(rml_ecl_cansave_d[2:0]),
+                       .in3(rml_ecl_otherwin_d[2:0]),
+                       .sel0(sel_canrestore_d),
+                       .sel1(sel_cleanwin_d),
+                       .sel2(sel_cansave_d),
+                       .sel3(sel_otherwin_d));
+   assign sel_ccr_d = ~ifu_tlu_sraddr_d[3];
+   assign sel_cwp_d = ifu_tlu_sraddr_d[3] & ~ifu_tlu_sraddr_d[2] & ~ifu_tlu_sraddr_d[1] & ifu_tlu_sraddr_d[0];
+   assign sel_wstate_d = ifu_tlu_sraddr_d[3] & ifu_tlu_sraddr_d[2] & ifu_tlu_sraddr_d[1] & ~ifu_tlu_sraddr_d[0];
+   assign sel_rdpr_mux1_d = ~(sel_ccr_d | sel_cwp_d | sel_wstate_d);
+   mux4ds #(8) rdpr_mux2(.dout(rdpr_mux2_out[7:0]),
+                       .in0(exu_ifu_cc_d[7:0]),
+                       .in1({5'b0, rml_ecl_cwp_d[2:0]}),
+                       .in2({2'b0, rml_ecl_wstate_d[5:0]}),
+                       .in3({5'b0, rdpr_mux1_out[2:0]}),
+                       .sel0(sel_ccr_d),
+                       .sel1(sel_cwp_d),
+                       .sel2(sel_wstate_d),
+                       .sel3(sel_rdpr_mux1_d));
+
+   assign read_yreg_e = ~(sraddr_e[3] | sraddr_e[1]);
+   dff_s #(8) rdpr_dff(.din(rdpr_mux2_out[7:0]), .clk(clk), .q(ecl_byp_eclpr_e[7:0]),
+                   .se(se), .si(), .so());
+
+
+   ///////////////////////////////
+   // YREG write enable logic
+   ///////////////////////////////
+   // decode thr_g for mux select
+   assign multhr_dec_g[0] = ~mdqctl_wb_multhr_g[1] & ~mdqctl_wb_multhr_g[0];
+   assign multhr_dec_g[1] = ~mdqctl_wb_multhr_g[1] & mdqctl_wb_multhr_g[0];
+   assign multhr_dec_g[2] = mdqctl_wb_multhr_g[1] & ~mdqctl_wb_multhr_g[0];
+   assign multhr_dec_g[3] = mdqctl_wb_multhr_g[1] & mdqctl_wb_multhr_g[0];
+
+   assign divthr_dec_g[0] = ~mdqctl_wb_divthr_g[1] & ~mdqctl_wb_divthr_g[0];
+   assign divthr_dec_g[1] = ~mdqctl_wb_divthr_g[1] & mdqctl_wb_divthr_g[0];
+   assign divthr_dec_g[2] = mdqctl_wb_divthr_g[1] & ~mdqctl_wb_divthr_g[0];
+   assign divthr_dec_g[3] = mdqctl_wb_divthr_g[1] & mdqctl_wb_divthr_g[0];
+
+   assign thrdec_w1[0] = ~tid_w1[1] & ~tid_w1[0];
+   assign thrdec_w1[1] = ~tid_w1[1] & tid_w1[0];
+   assign thrdec_w1[2] = tid_w1[1] & ~tid_w1[0];
+   assign thrdec_w1[3] = tid_w1[1] & tid_w1[0];
+
+   // enable input for each thread
+   
+   assign ecl_div_yreg_shift_g[0] = divthr_dec_g[0] & mdqctl_wb_yreg_shift_g;
+   assign ecl_div_yreg_wen_w[0] = (thrdec_w1[0] & yreg_wen_w1_vld &
+                                   ~ecl_div_yreg_shift_g[0] &
+                                   ~ecl_div_yreg_wen_g[0]);
+   assign ecl_div_yreg_wen_g[0] = (multhr_dec_g[0] & mdqctl_wb_yreg_wen_g & 
+                                   ~ecl_div_yreg_shift_g[0]);
+   assign ecl_div_yreg_wen_l[0] = ~(ecl_div_yreg_wen_w[0] | ecl_div_yreg_wen_g[0]
+                                    | ecl_div_yreg_shift_g[0]);
+   assign ecl_div_yreg_shift_g[1] = divthr_dec_g[1] & mdqctl_wb_yreg_shift_g;
+   assign ecl_div_yreg_wen_w[1] = (thrdec_w1[1] & yreg_wen_w1_vld &
+                                   ~ecl_div_yreg_shift_g[1] &
+                                   ~ecl_div_yreg_wen_g[1]);
+   assign ecl_div_yreg_wen_g[1] = (multhr_dec_g[1] & mdqctl_wb_yreg_wen_g & 
+                                   ~ecl_div_yreg_shift_g[1]);
+   assign ecl_div_yreg_wen_l[1] = ~(ecl_div_yreg_wen_w[1] | ecl_div_yreg_wen_g[1]
+                                    | ecl_div_yreg_shift_g[1]);
+   assign ecl_div_yreg_shift_g[2] = divthr_dec_g[2] & mdqctl_wb_yreg_shift_g;
+   assign ecl_div_yreg_wen_w[2] = (thrdec_w1[2] & yreg_wen_w1_vld &
+                                   ~ecl_div_yreg_shift_g[2] &
+                                   ~ecl_div_yreg_wen_g[2]);
+   assign ecl_div_yreg_wen_g[2] = (multhr_dec_g[2] & mdqctl_wb_yreg_wen_g & 
+                                   ~ecl_div_yreg_shift_g[2]);
+   assign ecl_div_yreg_wen_l[2] = ~(ecl_div_yreg_wen_w[2] | ecl_div_yreg_wen_g[2]
+                                    | ecl_div_yreg_shift_g[2]);
+   assign ecl_div_yreg_shift_g[3] = divthr_dec_g[3] & mdqctl_wb_yreg_shift_g;
+   assign ecl_div_yreg_wen_w[3] = (thrdec_w1[3] & yreg_wen_w1_vld &
+                                   ~ecl_div_yreg_shift_g[3] &
+                                   ~ecl_div_yreg_wen_g[3]);
+   assign ecl_div_yreg_wen_g[3] = (multhr_dec_g[3] & mdqctl_wb_yreg_wen_g & 
+                                   ~ecl_div_yreg_shift_g[3]);
+   assign ecl_div_yreg_wen_l[3] = ~(ecl_div_yreg_wen_w[3] | ecl_div_yreg_wen_g[3]
+                                    | ecl_div_yreg_shift_g[3]);
+
+   //////////////////////////////////////////////////////////
+   // Completion logic for restore
+   //////////////////////////////////////////////////////////
+
+   // only worry about restores.  Returns are automatically switched back in
+   assign ecl_byp_restore_m = restore_m;
+   assign vld_restore_e = restore_e & wb_e & ~return_e & ~rml_ecl_fill_e & ifu_exu_inst_vld_e;
+   assign vld_restore_w = (restore_w & ~ifu_tlu_flush_w & ~early_flush_w 
+                           & ifu_exu_inst_vld_w & ~reset);
+
+   assign restore_request = restore_w | restore_ready;
+   assign restore_wen = vld_restore_w | restore_ready;
+   assign restore_picked = ecl_byp_sel_restore_m | ecl_byp_sel_restore_g;
+   assign restore_done[3:0] = restore_thr[3:0] & {4{restore_picked & restore_request}};
+   // restore request waits for kills in the w stage.  they
+   // won't start until after the flop
+   assign restore_ready_next = (vld_restore_w  | restore_ready) & ~restore_picked;
+
+   dffe_s #(2) restore_tid_dff(.din(tid_m[1:0]), .clk(clk), .q(restore_tid[1:0]),
+                             .se(se), .si(), .so(), .en(restore_m));
+   dffe_s #(5) restore_rd_dff(.din(rd_m[4:0]), .clk(clk), .q(restore_rd[4:0]),
+                            .se(se), .si(), .so(), .en(restore_m));
+   dff_s return_d2e(.din(ifu_exu_return_d), .clk(clk), .q(return_e),
+                   .se(se), .si(), .so());
+   dff_s restore_e2m(.din(vld_restore_e), .clk(clk), .q(restore_m),
+                   .se(se), .si(), .so());
+   dff_s restore_m2w(.din(restore_m), .clk(clk), .q(restore_w),
+                   .se(se), .si(), .so());
+   dff_s restore_ready_dff(.din(restore_ready_next), .q(restore_ready),
+                         .clk(clk), .se(se), .so(), .si());
+
+   //////////////////////////////////////////////////////////
+   // Completion logic for non integer-pipeline operations
+   //////////////////////////////////////////////////////////
+   // short_longops must check inst_vld_e to protect against invalid completion signal
+   assign short_longop_done_e = (rml_ecl_rmlop_done_e | (restore_e & ~wb_e & ~return_e)) & 
+                                  ifu_exu_inst_vld_e & ~ifu_exu_kill_e;
+   dff_s longop_done_e2m (.din(short_longop_done_e), .clk(clk), .q(short_longop_done_m), .se(se), .si(), .so());
+   assign short_longop_done[3:0] = thr_m[3:0] & {4{short_longop_done_m}};
+   
+   assign ecl_longop_done_nokill_m[3:0] = (muldiv_done_g[3:0] | restore_done[3:0] | short_longop_done[3:0] | 
+                                           rml_ecl_swap_done[3:0]);
+   assign ecl_longop_done_kill_m[3:0] = (muldiv_done_g[3:0] | restore_done[3:0] | rml_ecl_swap_done[3:0]);
+   assign exu_ifu_longop_done_g[3:0] = (ecl_exu_kill_m)? ecl_longop_done_kill_m[3:0]: ecl_longop_done_nokill_m[3:0];
+   
+
+   // decode tid
+   assign restore_thr[3] = restore_tid[1] & restore_tid[0];
+   assign restore_thr[2] = restore_tid[1] & ~restore_tid[0];
+   assign restore_thr[1] = ~restore_tid[1] & restore_tid[0];
+   assign restore_thr[0] = ~restore_tid[1] & ~restore_tid[0];
+
+endmodule // sparc_exu_ecl_wb
Index: /trunk/T1-CPU/exu/sparc_exu_eclccr.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_eclccr.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_eclccr.v	(revision 6)
@@ -0,0 +1,264 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_eclccr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_eclccr
+//	Description: 4 bit condition code registers with forwarding.  Takes
+//	the e_stage result and writes on the w stage.
+*/
+
+module sparc_exu_eclccr (/*AUTOARG*/
+   // Outputs
+   exu_ifu_cc_d, exu_tlu_ccr0_w, exu_tlu_ccr1_w, exu_tlu_ccr2_w, 
+   exu_tlu_ccr3_w, 
+   // Inputs
+   clk, se, alu_xcc_e, alu_icc_e, tid_d, thrdec_d, thr_match_dm, 
+   thr_match_de, tid_w, thr_w, ifu_exu_kill_e, ifu_exu_setcc_d, 
+   byp_ecl_wrccr_data_w, wb_ccr_wrccr_w, wb_ccr_setcc_g, 
+   divcntl_ccr_cc_w2, wb_ccr_thr_g, tlu_exu_cwpccr_update_m, 
+   tlu_exu_ccr_m, ifu_exu_inst_vld_w, ifu_tlu_flush_w, early_flush_w
+   ) ;
+   input clk;
+   input se;
+   input [3:0] alu_xcc_e;    // condition codes from the alu
+   input [3:0] alu_icc_e;
+   input [1:0] tid_d;   // thread for each stage
+   input [3:0] thrdec_d;   // decoded tid_d for mux select
+   input       thr_match_dm;
+   input       thr_match_de;
+   input [1:0] tid_w;
+   input [3:0] thr_w;        // decoded tid_w
+   input       ifu_exu_kill_e;
+   input       ifu_exu_setcc_d;
+   input [7:0] byp_ecl_wrccr_data_w;// for the WRCCR operation (LSBs of
+   input       wb_ccr_wrccr_w; // ALU result) + wen signal
+   input       wb_ccr_setcc_g;
+   input [7:0] divcntl_ccr_cc_w2;
+   input [1:0] wb_ccr_thr_g;
+   input       tlu_exu_cwpccr_update_m;
+   input [7:0] tlu_exu_ccr_m;
+   input       ifu_exu_inst_vld_w;
+   input       ifu_tlu_flush_w;
+   input       early_flush_w;
+   
+   output [7:0] exu_ifu_cc_d;   // condition codes for current thread
+   output [7:0] exu_tlu_ccr0_w;
+   output [7:0] exu_tlu_ccr1_w;
+   output [7:0] exu_tlu_ccr2_w;
+   output [7:0] exu_tlu_ccr3_w;
+
+   wire [7:0]   partial_cc_d;   // partial bypassed ccr
+   wire [7:0]   alu_cc_e;   // alu combined condition codes
+   wire [7:0]   alu_cc_m;   // m stage alu ccs
+   wire [7:0]   alu_cc_w; 
+   wire [7:0]   exu_ifu_cc_w;   // writeback data
+   wire         setcc_e;        // from previous stage
+   wire         setcc_m;
+   wire         setcc_w;
+   wire         valid_setcc_e;  // after comparing with kill
+   wire         valid_setcc_m;
+   wire         valid_setcc_w;
+   wire         setcc_w2;
+   wire [7:0]   ccrin_thr0;
+   wire [7:0]   ccrin_thr1;
+   wire [7:0]   ccrin_thr2;
+   wire [7:0]   ccrin_thr3;
+   wire [7:0]   ccr_d;
+   wire [7:0]   ccr_thr0;
+   wire [7:0]   ccr_thr1;
+   wire [7:0]   ccr_thr2;
+   wire [7:0]   ccr_thr3;
+   wire         use_alu_cc;
+   wire         use_ccr;
+   wire         use_cc_e;
+   wire         use_cc_m;
+   wire         use_cc_w;
+   wire  [1:0]   tid_dxorw;
+   wire         thr_match_de;
+   wire         thrmatch_w;
+   wire [1:0]   thr_w2;
+   wire          thr0_w2;
+   wire          thr1_w2;
+   wire          thr2_w2;
+   wire          thr3_w2;
+   wire          wen_thr0_w;    // write enable for each input/thread
+   wire          wen_thr0_w2;
+   wire          wen_thr1_w;
+   wire          wen_thr1_w2;
+   wire          wen_thr2_w;
+   wire          wen_thr2_w2;
+   wire          wen_thr3_w;
+   wire          wen_thr3_w2;
+   wire          wen_thr0_l;      // overall write enable for each thread
+   wire          wen_thr1_l;
+   wire          wen_thr2_l;
+   wire          wen_thr3_l;
+   wire          bypass_cc_w;
+
+   wire [7:0]    ccr_m;
+
+
+   // D2E flops
+   dff_s dff_setcc_d2e(.din(ifu_exu_setcc_d), .clk(clk), .q(setcc_e),
+                     .se(se), .si(), .so());
+   
+   // E stage
+   assign       alu_cc_e = {alu_xcc_e, alu_icc_e};
+   assign       valid_setcc_e = setcc_e & ~ifu_exu_kill_e;
+
+   dff_s #(8) dff_cc_e2m(.din(alu_cc_e[7:0]), .clk(clk), .q(alu_cc_m[7:0]),
+                  .se(se), .si(), .so());
+   dff_s dff_setcc_e2m(.din(valid_setcc_e), .clk(clk), .q(setcc_m),
+                     .se(se), .si(), .so());
+   
+   // M stage
+   assign       valid_setcc_m = setcc_m | tlu_exu_cwpccr_update_m;
+   mux2ds #(8) mux_ccr_m(.dout(ccr_m[7:0]),
+                            .in0(alu_cc_m[7:0]),
+                            .in1(tlu_exu_ccr_m[7:0]),
+                            .sel0(~tlu_exu_cwpccr_update_m),
+                            .sel1(tlu_exu_cwpccr_update_m));
+
+   dff_s #(8) dff_cc_m2w(.din(ccr_m[7:0]), .clk(clk), .q(alu_cc_w[7:0]),
+                  .se(se), .si(), .so());
+   dff_s dff_setcc_m2w(.din(valid_setcc_m), .clk(clk), .q(setcc_w),
+                     .se(se), .si(), .so());
+
+   // W stage
+   assign bypass_cc_w = ifu_exu_inst_vld_w & setcc_w;
+   assign valid_setcc_w = ~ifu_tlu_flush_w & ~early_flush_w & ifu_exu_inst_vld_w & (setcc_w | wb_ccr_wrccr_w);
+
+   // mux with wrccr
+   assign        use_alu_cc = ~(wb_ccr_wrccr_w);
+   mux2ds #(8) mux_ccrin_cc(.dout(exu_ifu_cc_w[7:0]), .sel0(wb_ccr_wrccr_w),
+                          .sel1(use_alu_cc),
+                          .in0(byp_ecl_wrccr_data_w[7:0]),
+                          .in1(alu_cc_w[7:0]));
+
+   dff_s #(3) setcc_g2w2 (.din({wb_ccr_setcc_g, wb_ccr_thr_g[1:0]}), .clk(clk), 
+                        .q({setcc_w2, thr_w2[1:0]}),
+                        .se(se), .si(), .so());
+
+   
+   /////////////////////////
+   // Storage of ccr
+   /////////////////////////
+`ifdef FPGA_SYN_1THREAD
+     
+   assign          thr0_w2 = ~thr_w2[1] & ~thr_w2[0];
+   assign          wen_thr0_w = (thr_w[0] & valid_setcc_w & ~wen_thr0_w2);
+   assign 	   wen_thr0_w2 = thr0_w2 & setcc_w2;
+   assign 	   wen_thr0_l = ~(wen_thr0_w | wen_thr0_w2);    
+   // mux between cc_w, cc_w2, old value, tlu value
+   mux3ds #(8) mux_ccrin0(.dout(ccrin_thr0[7:0]), .sel0(wen_thr0_w),
+                          .sel1(wen_thr0_w2), .sel2(wen_thr0_l),
+                          .in0(exu_ifu_cc_w[7:0]),
+                          .in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr0[7:0]));
+   // store new value
+   dff_s #(8) dff_ccr_thr0(.din(ccrin_thr0[7:0]), .clk(clk), .q(ccr_thr0[7:0]),
+                       .se(se), .si(), .so());
+   assign 	   ccr_d[7:0] = ccr_thr0[7:0];
+   
+`else // !`ifdef FPGA_SYN_1THREAD
+   
+   // decode thr_w2 for mux select
+   assign        thr0_w2 = ~thr_w2[1] & ~thr_w2[0];
+   assign        thr1_w2 = ~thr_w2[1] & thr_w2[0];
+   assign        thr2_w2 = thr_w2[1] & ~thr_w2[0];
+   assign        thr3_w2 = thr_w2[1] & thr_w2[0];
+   // enable input for each thread
+   assign        wen_thr0_w = (thr_w[0] & valid_setcc_w & ~wen_thr0_w2);
+   assign        wen_thr0_w2 = thr0_w2 & setcc_w2;
+   assign        wen_thr0_l = ~(wen_thr0_w | wen_thr0_w2);  
+   assign        wen_thr1_w = (thr_w[1] & valid_setcc_w & ~wen_thr1_w2);
+   assign        wen_thr1_w2 = (thr1_w2 & setcc_w2);
+   assign        wen_thr1_l = ~(wen_thr1_w | wen_thr1_w2);  
+   assign        wen_thr2_w = (thr_w[2] & valid_setcc_w & ~wen_thr2_w2);
+   assign        wen_thr2_w2 = (thr2_w2 & setcc_w2);
+   assign        wen_thr2_l = ~(wen_thr2_w | wen_thr2_w2);  
+   assign        wen_thr3_w = (thr_w[3] & valid_setcc_w & ~wen_thr3_w2);
+   assign        wen_thr3_w2 = (thr3_w2 & setcc_w2);
+   assign        wen_thr3_l = ~(wen_thr3_w | wen_thr3_w2);  
+
+   // mux between cc_w, cc_w2, old value, tlu value
+   mux3ds #(8) mux_ccrin0(.dout(ccrin_thr0[7:0]), .sel0(wen_thr0_w),
+                          .sel1(wen_thr0_w2), .sel2(wen_thr0_l),
+                          .in0(exu_ifu_cc_w[7:0]),
+                          .in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr0[7:0]));
+   mux3ds #(8) mux_ccrin1(.dout(ccrin_thr1[7:0]), .sel0(wen_thr1_w),
+                          .sel1(wen_thr1_w2), .sel2(wen_thr1_l),
+                          .in0(exu_ifu_cc_w[7:0]),
+                          .in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr1[7:0]));
+   mux3ds #(8) mux_ccrin2(.dout(ccrin_thr2[7:0]), .sel0(wen_thr2_w),
+                          .sel1(wen_thr2_w2), .sel2(wen_thr2_l),
+                          .in0(exu_ifu_cc_w[7:0]),
+                          .in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr2[7:0]));
+   mux3ds #(8) mux_ccrin3(.dout(ccrin_thr3[7:0]), .sel0(wen_thr3_w),
+                          .sel1(wen_thr3_w2), .sel2(wen_thr3_l),
+                          .in0(exu_ifu_cc_w[7:0]),
+                          .in1(divcntl_ccr_cc_w2[7:0]), .in2(ccr_thr3[7:0]));
+
+   // store new value
+   dff_s #(8) dff_ccr_thr0(.din(ccrin_thr0[7:0]), .clk(clk), .q(ccr_thr0[7:0]),
+                       .se(se), .si(), .so());
+   dff_s #(8) dff_ccr_thr1(.din(ccrin_thr1[7:0]), .clk(clk), .q(ccr_thr1[7:0]),
+                       .se(se), .si(), .so());
+   dff_s #(8) dff_ccr_thr2(.din(ccrin_thr2[7:0]), .clk(clk), .q(ccr_thr2[7:0]),
+                       .se(se), .si(), .so());
+   dff_s #(8) dff_ccr_thr3(.din(ccrin_thr3[7:0]), .clk(clk), .q(ccr_thr3[7:0]),
+                       .se(se), .si(), .so());
+
+
+   // mux between the 4 sets of ccrs
+   mux4ds #(8) mux_ccr_out(.dout(ccr_d[7:0]), .sel0(thrdec_d[0]),
+                         .sel1(thrdec_d[1]), .sel2(thrdec_d[2]),
+                         .sel3(thrdec_d[3]), .in0(ccr_thr0[7:0]),
+                         .in1(ccr_thr1[7:0]), .in2(ccr_thr2[7:0]),
+                         .in3(ccr_thr3[7:0]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   // bypass the ccs to the output.  Only alu result needs to be bypassed
+   assign        exu_ifu_cc_d[7:0] = (use_cc_e)? alu_cc_e[7:0]: partial_cc_d[7:0]; 
+   mux3ds #(8) mux_ccr_bypass1(.dout(partial_cc_d[7:0]), 
+                               .sel0(use_ccr),
+                               .sel1(use_cc_m),
+                               .sel2(use_cc_w), 
+                               .in0(ccr_d[7:0]),
+                               .in1(alu_cc_m[7:0]),
+                               .in2(alu_cc_w[7:0]));
+
+   assign        use_cc_e = valid_setcc_e & thr_match_de;
+   assign        use_cc_m = setcc_m & thr_match_dm;
+   assign        use_cc_w = bypass_cc_w & thrmatch_w & ~use_cc_m;
+   assign        use_ccr = ~(use_cc_m | use_cc_w);
+
+   assign        tid_dxorw = tid_w ^ tid_d;
+   
+   assign        thrmatch_w = ~(tid_dxorw[1] | tid_dxorw[0]);
+
+   // generate ccr_w for the tlu
+   assign        exu_tlu_ccr0_w[7:0] = ccr_thr0[7:0];
+   assign        exu_tlu_ccr1_w[7:0] = ccr_thr1[7:0];
+   assign        exu_tlu_ccr2_w[7:0] = ccr_thr2[7:0];
+   assign        exu_tlu_ccr3_w[7:0] = ccr_thr3[7:0];
+
+   
+endmodule // sparc_exu_eclccr
Index: /trunk/T1-CPU/exu/sparc_exu_byp_eccgen.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_byp_eccgen.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_byp_eccgen.v	(revision 6)
@@ -0,0 +1,186 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_byp_eccgen.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////////////
+//  Module Name: zzecc_exu_byp_eccgen2
+//      Description: This block generates the 8 bit ecc for a 64 bit input
+//                              It is split over 2 cycles to accomodate the timing requirements of 
+//                              the other blocks.
+
+module sparc_exu_byp_eccgen ( p, d, msk, clk, se);
+   input [63:0] d;
+   input [7:0]  msk;
+   input        clk;
+   input        se;
+   output [7:0] p;
+
+   wire [7:0]   p0_g;
+   wire [7:0]   p0_w;
+   wire [7:0]   p1_g;
+   wire [7:0]   p1_w;
+   wire [7:0]   p2_g;
+   wire [7:0]   p2_w;
+   wire [7:0]   p3_g;
+   wire [7:0]   p3_w;
+   wire [3:0]   p4_g;
+   wire [3:0]   p4_w;
+   wire [1:0]   p5_g;
+   wire [1:0]   p5_w;
+   wire [1:0]   p6_g;
+   wire [1:0]   p6_w;
+   wire [7:0]   p7_g;
+   wire [7:0]   p7_w;
+   wire 	msk_w5;
+   wire 	msk_w4;
+
+   // Flops to store intermediate results
+  dff_s Imsk_5_  ( .q(msk_w5),  .din(msk[5]),  .clk(clk), .se(se), .si(), .so());
+  dff_s Imsk_4_  ( .q(msk_w4),  .din(msk[4]),  .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_7_ ( .q(p0_w[7]), .din(p0_g[7]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_6_ ( .q(p0_w[6]), .din(p0_g[6]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_5_ ( .q(p0_w[5]), .din(p0_g[5]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_4_ ( .q(p0_w[4]), .din(p0_g[4]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_3_ ( .q(p0_w[3]), .din(p0_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_2_ ( .q(p0_w[2]), .din(p0_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_1_ ( .q(p0_w[1]), .din(p0_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip0ff_0_ ( .q(p0_w[0]), .din(p0_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_7_ ( .q(p1_w[7]), .din(p1_g[7]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_6_ ( .q(p1_w[6]), .din(p1_g[6]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_5_ ( .q(p1_w[5]), .din(p1_g[5]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_4_ ( .q(p1_w[4]), .din(p1_g[4]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_3_ ( .q(p1_w[3]), .din(p1_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_2_ ( .q(p1_w[2]), .din(p1_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_1_ ( .q(p1_w[1]), .din(p1_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip1ff_0_ ( .q(p1_w[0]), .din(p1_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_7_ ( .q(p2_w[7]), .din(p2_g[7]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_6_ ( .q(p2_w[6]), .din(p2_g[6]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_5_ ( .q(p2_w[5]), .din(p2_g[5]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_4_ ( .q(p2_w[4]), .din(p2_g[4]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_3_ ( .q(p2_w[3]), .din(p2_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_2_ ( .q(p2_w[2]), .din(p2_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_1_ ( .q(p2_w[1]), .din(p2_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip2ff_0_ ( .q(p2_w[0]), .din(p2_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_7_ ( .q(p3_w[7]), .din(p3_g[7]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_6_ ( .q(p3_w[6]), .din(p3_g[6]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_5_ ( .q(p3_w[5]), .din(p3_g[5]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_4_ ( .q(p3_w[4]), .din(p3_g[4]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_3_ ( .q(p3_w[3]), .din(p3_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_2_ ( .q(p3_w[2]), .din(p3_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_1_ ( .q(p3_w[1]), .din(p3_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip3ff_0_ ( .q(p3_w[0]), .din(p3_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip4ff_3_ ( .q(p4_w[3]), .din(p4_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip4ff_2_ ( .q(p4_w[2]), .din(p4_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip4ff_1_ ( .q(p4_w[1]), .din(p4_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip4ff_0_ ( .q(p4_w[0]), .din(p4_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip5ff_1_ ( .q(p5_w[1]), .din(p5_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip5ff_0_ ( .q(p5_w[0]), .din(p5_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip6ff_1_ ( .q(p6_w[1]), .din(p6_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip6ff_0_ ( .q(p6_w[0]), .din(p6_g[0]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_7_ ( .q(p7_w[7]), .din(p7_g[7]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_6_ ( .q(p7_w[6]), .din(p7_g[6]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_5_ ( .q(p7_w[5]), .din(p7_g[5]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_4_ ( .q(p7_w[4]), .din(p7_g[4]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_3_ ( .q(p7_w[3]), .din(p7_g[3]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_2_ ( .q(p7_w[2]), .din(p7_g[2]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_1_ ( .q(p7_w[1]), .din(p7_g[1]), .clk(clk), .se(se), .si(), .so());
+  dff_s Ip7ff_0_ ( .q(p7_w[0]), .din(p7_g[0]), .clk(clk), .se(se), .si(), .so());
+
+
+   // p[0]
+   assign p[0] = p0_w[0] ^ p0_w[1] ^ p0_w[2] ^ p0_w[3] ^ p0_w[4] ^ p0_w[5] ^ p0_w[6] ^ p0_w[7]; 
+   assign p0_g[0] = d[0]  ^ d[1]  ^ d[3]  ^ d[4]; 
+   assign p0_g[1] = d[6]  ^ d[8]  ^ d[10] ^ d[11];
+   assign p0_g[2] = d[13] ^ d[15] ^ d[17] ^ d[19];
+   assign p0_g[3] = d[21] ^ d[23] ^ d[25] ^ d[26];
+   assign p0_g[4] = d[28] ^ d[30] ^ d[32] ^ d[34];
+   assign p0_g[5] = d[36] ^ d[38] ^ d[40] ^ d[42];
+   assign p0_g[6] = d[44] ^ d[46] ^ d[48] ^ d[50];
+   assign p0_g[7] = d[52] ^ d[54] ^ d[56] ^ d[57] ^ d[59] ^ d[61] ^ d[63] ^ msk[0];
+
+   // p[1]
+   assign p[1] = p1_w[0] ^ p1_w[1] ^ p1_w[2] ^ p1_w[3] ^ p1_w[4] ^ p1_w[5] ^ p1_w[6] ^ p1_w[7];
+   assign p1_g[0] = d[0]  ^ d[2]  ^ d[3]  ^ d[5]; 
+   assign p1_g[1] = d[6]  ^ d[9]  ^ d[10] ^ d[12];
+   assign p1_g[2] = d[13] ^ d[16] ^ d[17] ^ d[20];
+   assign p1_g[3] = d[21] ^ d[24] ^ d[25] ^ d[27];
+   assign p1_g[4] = d[28] ^ d[31] ^ d[32] ^ d[35];
+   assign p1_g[5] = d[36] ^ d[39] ^ d[40] ^ d[43];
+   assign p1_g[6] = d[44] ^ d[47] ^ d[48] ^ d[51];
+   assign p1_g[7] = d[52] ^ d[55] ^ d[56] ^ d[58] ^ d[59] ^ d[62] ^ d[63] ^ msk[1];
+
+   // p[2]
+   assign p[2] = p2_w[0] ^ p2_w[1] ^ p2_w[2] ^ p2_w[3] ^ p2_w[4] ^ p2_w[5] ^ p2_w[6] ^ p2_w[7];
+   assign p2_g[0] = d[1]  ^ d[2]  ^ d[3]  ^ d[7]; 
+   assign p2_g[1] = d[8]  ^ d[9]  ^ d[10] ^ d[14];
+   assign p2_g[2] = d[15] ^ d[16] ^ d[17] ^ d[22];
+   assign p2_g[3] = d[23] ^ d[24] ^ d[25] ^ d[29];
+   assign p2_g[4] = d[30] ^ d[31] ^ d[32] ^ d[37];
+   assign p2_g[5] = d[38] ^ d[39] ^ d[40] ^ d[45];
+   assign p2_g[6] = d[46] ^ d[47] ^ d[48] ^ d[53];
+   assign p2_g[7] = d[54] ^ d[55] ^ d[56] ^ d[60] ^ d[61] ^ d[62] ^ d[63] ^ msk[2];
+   
+   // p[3]
+   assign p[3] =  p3_w[0] ^ p3_w[1] ^ p3_w[2] ^ p3_w[3] ^ p3_w[4] ^ p3_w[5] ^ p3_w[6] ^ p3_w[7];
+   assign p3_g[0] = d[4]  ^ d[5]  ^ d[6]  ^ d[7]; 
+   assign p3_g[1] = d[8]  ^ d[9]  ^ d[10] ^ d[18];
+   assign p3_g[2] = d[19] ^ d[20] ^ d[21] ^ d[22];
+   assign p3_g[3] = d[23] ^ d[24] ^ d[25] ^ d[33];
+   assign p3_g[4] = d[34] ^ d[35] ^ d[36] ^ d[37];
+   assign p3_g[5] = d[38] ^ d[39] ^ d[40] ^ d[49];
+   assign p3_g[6] = d[50] ^ d[51] ^ d[52] ^ d[53];
+   assign p3_g[7] = d[54] ^ d[55] ^ d[56] ^ msk[3];
+
+   // p[4]
+   assign p[4] =  p4_w[0] ^ p4_w[1] ^ p4_w[2] ^ p4_w[3] ^ msk_w4;
+
+   assign p4_g[0] = d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15] ^ d[16] ^ d[17] ^ d[18];
+   assign p4_g[1] = d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23] ^ d[24] ^ d[25] ^ d[41];
+   assign p4_g[2] = d[42] ^ d[43] ^ d[44] ^ d[45] ^ d[46] ^ d[47] ^ d[48] ^ d[49];
+   assign p4_g[3] = d[50] ^ d[51] ^ d[52] ^ d[53] ^ d[54] ^ d[55] ^ d[56];
+
+   // p[5]
+   assign p[5] =  p5_w[0] ^ p5_w[1] ^ p4_w[2] ^ p4_w[3] ^ msk_w5;
+   assign p5_g[0] = d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31] ^ d[32] ^ d[33];
+   assign p5_g[1] = d[34] ^ d[35] ^ d[36] ^ d[37] ^ d[38] ^ d[39] ^ d[40] ^ d[41];
+/* -----\/----- EXCLUDED -----\/-----
+   assign p5_g[2] = (d[42]  ^ d[43]  ^ d[44]  ^ d[45]  ^
+                     d[46]  ^ d[47]  ^ d[48]  ^ d[49]);
+   assign p5_g[3] = (d[50]  ^ d[51]  ^ d[52]  ^ d[53]  ^
+                     d[54]  ^ d[55]  ^ d[56]); 
+ -----/\----- EXCLUDED -----/\----- */
+
+   // p[6]
+   assign p[6] =  p6_w[0] ^ p6_w[1];
+   assign p6_g[0] = d[57] ^ d[58] ^ d[59] ^ d[60];
+   assign p6_g[1] = d[61] ^ d[62] ^ d[63] ^ msk[6]; 
+
+   // p[7]
+   assign p[7] = p7_w[0] ^ p7_w[1] ^ p7_w[2] ^ p7_w[3] ^ p7_w[4] ^ p7_w[5] ^ p7_w[6] ^ p7_w[7];
+   assign p7_g[0] = d[0]  ^ d[1]  ^ d[2]  ^ d[4]; 
+   assign p7_g[1] = d[5]  ^ d[7]  ^ d[10] ^ d[11];
+   assign p7_g[2] = d[12] ^ d[14] ^ d[17] ^ d[18];
+   assign p7_g[3] = d[21] ^ d[23] ^ d[24] ^ d[26];
+   assign p7_g[4] = d[27] ^ d[29] ^ d[32] ^ d[33];
+   assign p7_g[5] = d[36] ^ d[38] ^ d[39] ^ d[41];
+   assign p7_g[6] = d[44] ^ d[46] ^ d[47] ^ d[50];
+   assign p7_g[7] = d[51] ^ d[53] ^ d[56] ^ d[57] ^ d[58] ^ d[60] ^ d[63] ^ msk[7];
+       
+endmodule // zzecc_exu_byp_eccgen3
+
Index: /trunk/T1-CPU/exu/sparc_exu_ecl.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl.v	(revision 6)
@@ -0,0 +1,1525 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecl
+//	Description:  Implements all the control logic for the exu.
+//		This includes: bypass logic, ccr control and ccr bypassing,
+//			  w2 arbitration logic, mux selects for alu and shift.
+//				Also implements the ccrs.
+*/
+
+module sparc_exu_ecl
+(/*AUTOARG*/
+   // Outputs
+   exu_tlu_ccr3_w, exu_tlu_ccr2_w, exu_tlu_ccr1_w, exu_tlu_ccr0_w, 
+   exu_mul_input_vld, exu_ifu_longop_done_g, exu_ifu_inj_ack, 
+   exu_ifu_err_synd_7_m, exu_ifu_err_reg_m, exu_ifu_ecc_ue_m, 
+   exu_ifu_ecc_ce_m, exu_ffu_wsr_inst_e, ecl_rml_wstate_wen_w, 
+   ecl_rml_otherwin_wen_w, ecl_rml_cwp_wen_e, ecl_rml_cleanwin_wen_w, 
+   ecl_rml_cansave_wen_w, ecl_rml_canrestore_wen_w, 
+   ecl_ecc_sel_rs3_m_l, ecl_ecc_sel_rs2_m_l, ecl_ecc_sel_rs1_m_l, 
+   ecl_ecc_log_rs3_m, ecl_ecc_log_rs2_m, ecl_ecc_log_rs1_m, 
+   ecl_div_yreg_wen_w, ecl_div_yreg_wen_l, ecl_div_yreg_wen_g, 
+   ecl_div_yreg_shift_g, ecl_div_xinmask, ecl_div_upper33_zero, 
+   ecl_div_upper33_one, ecl_div_upper32_zero, ecl_div_subtract_l, 
+   ecl_div_sel_u32, ecl_div_sel_pos32, ecl_div_sel_neg32, 
+   ecl_div_sel_adder, ecl_div_sel_64b, ecl_div_newq, 
+   ecl_div_mul_sext_rs2_e, ecl_div_mul_sext_rs1_e, 
+   ecl_div_mul_keep_data, ecl_div_mul_get_new_data, 
+   ecl_div_mul_get_32bit_data, ecl_div_last_cycle, ecl_div_keepx, 
+   ecl_div_keep_d, ecl_div_dividend_sign, ecl_div_cin, 
+   ecl_div_almostlast_cycle, ecl_byp_sel_restore_m, 
+   ecl_byp_sel_restore_g, ecl_byp_sel_pipe_m, ecl_byp_sel_muldiv_g, 
+   ecl_byp_sel_load_m, ecl_byp_sel_load_g, ecl_byp_eclpr_e, 
+   ecl_byp_ecc_mask_m_l, so, ecl_byp_sel_alu_e, ecl_byp_sel_eclpr_e, 
+   ecl_byp_sel_yreg_e, ecl_byp_sel_ifusr_e, ecl_byp_sel_ffusr_m, 
+   ecl_byp_sel_ifex_m, ecl_byp_sel_tlusr_m, exu_ifu_va_oor_m, 
+   ecl_alu_out_sel_sum_e_l, ecl_alu_out_sel_rs3_e_l, 
+   ecl_alu_out_sel_shift_e_l, ecl_alu_out_sel_logic_e_l, 
+   ecl_alu_log_sel_and_e, ecl_alu_log_sel_or_e, 
+   ecl_alu_log_sel_xor_e, ecl_alu_log_sel_move_e, 
+   ecl_alu_sethi_inst_e, ecl_alu_cin_e, ecl_shft_lshift_e_l, 
+   ecl_shft_op32_e, ecl_shft_shift4_e, ecl_shft_shift1_e, 
+   ecl_shft_enshift_e_l, ecl_byp_restore_m, ecl_byp_rs1_mux2_sel_e, 
+   ecl_byp_rs1_mux2_sel_rf, ecl_byp_rs1_mux2_sel_ld, 
+   ecl_byp_rs1_mux2_sel_usemux1, ecl_byp_rs1_mux1_sel_m, 
+   ecl_byp_rs1_mux1_sel_w, ecl_byp_rs1_mux1_sel_w2, 
+   ecl_byp_rs1_mux1_sel_other, ecl_byp_rcc_mux2_sel_e, 
+   ecl_byp_rcc_mux2_sel_rf, ecl_byp_rcc_mux2_sel_ld, 
+   ecl_byp_rcc_mux2_sel_usemux1, ecl_byp_rcc_mux1_sel_m, 
+   ecl_byp_rcc_mux1_sel_w, ecl_byp_rcc_mux1_sel_w2, 
+   ecl_byp_rcc_mux1_sel_other, ecl_byp_rs2_mux2_sel_e, 
+   ecl_byp_rs2_mux2_sel_rf, ecl_byp_rs2_mux2_sel_ld, 
+   ecl_byp_rs2_mux2_sel_usemux1, ecl_byp_rs2_mux1_sel_m, 
+   ecl_byp_rs2_mux1_sel_w, ecl_byp_rs2_mux1_sel_w2, 
+   ecl_byp_rs2_mux1_sel_other, ecl_byp_rs3_mux2_sel_e, 
+   ecl_byp_rs3_mux2_sel_rf, ecl_byp_rs3_mux2_sel_ld, 
+   ecl_byp_rs3_mux2_sel_usemux1, ecl_byp_rs3_mux1_sel_m, 
+   ecl_byp_rs3_mux1_sel_w, ecl_byp_rs3_mux1_sel_w2, 
+   ecl_byp_rs3_mux1_sel_other, ecl_byp_rs3h_mux2_sel_e, 
+   ecl_byp_rs3h_mux2_sel_rf, ecl_byp_rs3h_mux2_sel_ld, 
+   ecl_byp_rs3h_mux2_sel_usemux1, ecl_byp_rs3h_mux1_sel_m, 
+   ecl_byp_rs3h_mux1_sel_w, ecl_byp_rs3h_mux1_sel_w2, 
+   ecl_byp_rs3h_mux1_sel_other, ecl_byp_rs1_longmux_sel_g2, 
+   ecl_byp_rs1_longmux_sel_w2, ecl_byp_rs1_longmux_sel_ldxa, 
+   ecl_byp_rs2_longmux_sel_g2, ecl_byp_rs2_longmux_sel_w2, 
+   ecl_byp_rs2_longmux_sel_ldxa, ecl_byp_rs3_longmux_sel_g2, 
+   ecl_byp_rs3_longmux_sel_w2, ecl_byp_rs3_longmux_sel_ldxa, 
+   ecl_byp_rs3h_longmux_sel_g2, ecl_byp_rs3h_longmux_sel_w2, 
+   ecl_byp_rs3h_longmux_sel_ldxa, ecl_byp_std_e_l, ecl_byp_ldxa_g, 
+   ecl_byp_3lsb_m, ecl_ecc_rs1_use_rf_e, ecl_ecc_rs2_use_rf_e, 
+   ecl_ecc_rs3_use_rf_e, ecl_irf_rd_m, ecl_irf_tid_m, ecl_irf_wen_w, 
+   ecl_irf_wen_w2, ecl_irf_rd_g, ecl_irf_tid_g, ecl_div_thr_e, 
+   ecl_rml_thr_m, ecl_rml_thr_w, ecl_rml_xor_data_e, 
+   ecl_div_ld_inputs, ecl_div_sel_div, ecl_div_div64, exu_ifu_cc_d, 
+   ecl_shft_extendbit_e, ecl_shft_extend32bit_e_l, 
+   ecl_div_zero_rs2_e, ecl_div_muls_rs1_31_e_l, 
+   ecl_div_yreg_data_31_g, exu_tlu_va_oor_m, exu_tlu_va_oor_jl_ret_m, 
+   ecl_rml_kill_e, ecl_rml_kill_w, ecl_byp_sel_ecc_m, 
+   exu_tlu_ttype_m, exu_tlu_ttype_vld_m, exu_tlu_ue_trap_m, 
+   exu_tlu_misalign_addr_jmpl_rtn_m, exu_lsu_priority_trap_m, 
+   ecl_div_mul_wen, ecl_div_muls, ecl_rml_early_flush_w, 
+   ecl_rml_inst_vld_w, ecl_alu_casa_e, 
+   // Inputs
+   tlu_exu_cwpccr_update_m, tlu_exu_ccr_m, sehold, rst_tri_en, 
+   rml_ecl_wstate_d, rml_ecl_swap_done, rml_ecl_rmlop_done_e, 
+   rml_ecl_otherwin_d, rml_ecl_kill_m, rml_ecl_gl_e, rml_ecl_cwp_d, 
+   rml_ecl_cleanwin_d, rml_ecl_cansave_d, rml_ecl_canrestore_d, 
+   mul_exu_ack, lsu_exu_ldst_miss_g2, ifu_tlu_wsr_inst_d, 
+   ifu_tlu_sraddr_d, ifu_exu_return_d, ifu_exu_muldivop_d, 
+   ifu_exu_inst_vld_w, ifu_exu_inst_vld_e, ifu_exu_inj_irferr, 
+   ifu_exu_ecc_mask, ifu_exu_disable_ce_e, ecc_ecl_rs3_ue, 
+   ecc_ecl_rs3_ce, ecc_ecl_rs2_ue, ecc_ecl_rs2_ce, ecc_ecl_rs1_ue, 
+   ecc_ecl_rs1_ce, div_ecl_xin_msb_l, div_ecl_x_msb, 
+   div_ecl_upper32_equal, div_ecl_low32_nonzero, 
+   div_ecl_gencc_in_msb_l, div_ecl_gencc_in_31, div_ecl_dividend_msb, 
+   div_ecl_detect_zero_low, div_ecl_detect_zero_high, div_ecl_d_msb, 
+   div_ecl_d_62, div_ecl_cout64, div_ecl_cout32, 
+   div_ecl_adder_out_31, byp_ecl_wrccr_data_w, rclk, se, si, grst_l, 
+   arst_l, ifu_exu_dbrinst_d, ifu_exu_aluop_d, ifu_exu_shiftop_d, 
+   ifu_exu_invert_d, ifu_exu_usecin_d, ifu_exu_enshift_d, 
+   byp_ecl_rs2_3_0_e, byp_ecl_rs1_2_0_e, byp_ecl_rd_data_3lsb_m, 
+   ifu_exu_use_rsr_e_l, ifu_exu_rd_exusr_e, ifu_exu_rd_ifusr_e, 
+   ifu_exu_rd_ffusr_e, ifu_exu_rs1_vld_d, ifu_exu_rs2_vld_d, 
+   ifu_exu_rs3e_vld_d, ifu_exu_rs3o_vld_d, ifu_exu_dontmv_regz0_e, 
+   ifu_exu_dontmv_regz1_e, ifu_exu_rd_d, ifu_exu_tid_s2, 
+   ifu_exu_kill_e, ifu_exu_wen_d, ifu_exu_ialign_d, exu_ifu_regz_e, 
+   alu_ecl_add_n64_e, alu_ecl_add_n32_e, alu_ecl_log_n64_e, 
+   alu_ecl_log_n32_e, alu_ecl_zhigh_e, alu_ecl_zlow_e, 
+   ifu_exu_setcc_d, lsu_exu_dfill_vld_g, lsu_exu_rd_m, lsu_exu_thr_m, 
+   lsu_exu_ldxa_m, byp_ecl_rs1_31_e, byp_ecl_rs2_31_e, 
+   byp_ecl_rs1_63_e, alu_ecl_cout64_e_l, alu_ecl_cout32_e, 
+   alu_ecl_adder_out_63_e, alu_ecl_adder_out_31_e, 
+   alu_ecl_adderin2_63_e, alu_ecl_adderin2_31_e, ifu_exu_rs1_s, 
+   ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_exu_tagop_d, ifu_exu_tv_d, 
+   ifu_exu_muls_d, div_ecl_yreg_0_l, alu_ecl_mem_addr_invalid_e_l, 
+   ifu_exu_range_check_jlret_d, ifu_exu_range_check_other_d, 
+   ifu_exu_addr_mask_d, ifu_exu_save_d, ifu_exu_restore_d, 
+   ifu_exu_casa_d, rml_ecl_clean_window_e, rml_ecl_fill_e, 
+   rml_ecl_other_e, rml_ecl_wtype_e, ifu_exu_tcc_e, 
+   alu_ecl_adder_out_7_0_e, ifu_exu_useimm_d, ifu_exu_nceen_e, 
+   ifu_tlu_flush_m, ifu_exu_ttype_vld_m, tlu_exu_priv_trap_m, 
+   tlu_exu_pic_onebelow_m, tlu_exu_pic_twobelow_m, 
+   lsu_exu_flush_pipe_w, ifu_exu_sethi_inst_d, 
+   lsu_exu_st_dtlb_perr_g
+   );
+   
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+input [7:0]             byp_ecl_wrccr_data_w;   // To ccr of sparc_exu_eclccr.v
+input                   div_ecl_adder_out_31;   // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_cout32;         // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_cout64;         // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_d_62;           // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_d_msb;          // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_detect_zero_high;// To divcntl of sparc_exu_ecl_divcntl.v, ...
+input                   div_ecl_detect_zero_low;// To divcntl of sparc_exu_ecl_divcntl.v, ...
+input                   div_ecl_dividend_msb;   // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_gencc_in_31;    // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_gencc_in_msb_l; // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_low32_nonzero;  // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_upper32_equal;  // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_x_msb;          // To divcntl of sparc_exu_ecl_divcntl.v
+input                   div_ecl_xin_msb_l;      // To divcntl of sparc_exu_ecl_divcntl.v
+input                   ecc_ecl_rs1_ce;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ecc_ecl_rs1_ue;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ecc_ecl_rs2_ce;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ecc_ecl_rs2_ue;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ecc_ecl_rs3_ce;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ecc_ecl_rs3_ue;         // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ifu_exu_disable_ce_e;   // To eccctl of sparc_exu_ecl_eccctl.v
+input [7:0]             ifu_exu_ecc_mask;       // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ifu_exu_inj_irferr;     // To eccctl of sparc_exu_ecl_eccctl.v
+input                   ifu_exu_inst_vld_e;     // To writeback of sparc_exu_ecl_wb.v, ...
+input                   ifu_exu_inst_vld_w;     // To ccr of sparc_exu_eclccr.v, ...
+input [4:0]             ifu_exu_muldivop_d;     // To mdqctl of sparc_exu_ecl_mdqctl.v
+input                   ifu_exu_return_d;       // To writeback of sparc_exu_ecl_wb.v
+input [6:0]             ifu_tlu_sraddr_d;       // To writeback of sparc_exu_ecl_wb.v
+input                   ifu_tlu_wsr_inst_d;     // To writeback of sparc_exu_ecl_wb.v
+input                   lsu_exu_ldst_miss_g2;   // To writeback of sparc_exu_ecl_wb.v
+input                   mul_exu_ack;            // To mdqctl of sparc_exu_ecl_mdqctl.v
+input [2:0]             rml_ecl_canrestore_d;   // To writeback of sparc_exu_ecl_wb.v
+input [2:0]             rml_ecl_cansave_d;      // To writeback of sparc_exu_ecl_wb.v
+input [2:0]             rml_ecl_cleanwin_d;     // To writeback of sparc_exu_ecl_wb.v
+input [2:0]             rml_ecl_cwp_d;          // To writeback of sparc_exu_ecl_wb.v, ...
+input [1:0]             rml_ecl_gl_e;           // To eccctl of sparc_exu_ecl_eccctl.v
+input                   rml_ecl_kill_m;         // To writeback of sparc_exu_ecl_wb.v
+input [2:0]             rml_ecl_otherwin_d;     // To writeback of sparc_exu_ecl_wb.v
+input                   rml_ecl_rmlop_done_e;   // To writeback of sparc_exu_ecl_wb.v
+input [3:0]             rml_ecl_swap_done;      // To writeback of sparc_exu_ecl_wb.v
+input [5:0]             rml_ecl_wstate_d;       // To writeback of sparc_exu_ecl_wb.v
+input                   rst_tri_en;             // To eccctl of sparc_exu_ecl_eccctl.v
+input                   sehold;                 // To writeback of sparc_exu_ecl_wb.v, ...
+input [7:0]             tlu_exu_ccr_m;          // To ccr of sparc_exu_eclccr.v
+input                   tlu_exu_cwpccr_update_m;// To ccr of sparc_exu_eclccr.v
+// End of automatics
+   input 				rclk;
+   input        se;
+   input        si;
+   input        grst_l;
+   input        arst_l;
+   input        ifu_exu_dbrinst_d;// rs1 bypass should use pc
+   input [2:0]  ifu_exu_aluop_d;// partially decoded op for exu operation
+   input [2:0]  ifu_exu_shiftop_d;
+   input        ifu_exu_invert_d;       // invert logic output
+   input        ifu_exu_usecin_d;        // use cin for add ops
+   input        ifu_exu_enshift_d;     // enable shifter
+   input [3:0]  byp_ecl_rs2_3_0_e;
+   input [2:0]  byp_ecl_rs1_2_0_e;
+   input [2:0]  byp_ecl_rd_data_3lsb_m;
+   input        ifu_exu_use_rsr_e_l;      // e stage instruction uses sr
+   input        ifu_exu_rd_exusr_e;
+   input        ifu_exu_rd_ifusr_e;
+   input        ifu_exu_rd_ffusr_e;
+   input        ifu_exu_rs1_vld_d;
+   input        ifu_exu_rs2_vld_d;
+   input        ifu_exu_rs3e_vld_d;
+   input        ifu_exu_rs3o_vld_d;
+   input        ifu_exu_dontmv_regz0_e;// a move instruction got killed
+   input        ifu_exu_dontmv_regz1_e;
+   input [4:0]  ifu_exu_rd_d;           // destination register
+   input [1:0]  ifu_exu_tid_s2;          // thread of inst in s stage
+   input        ifu_exu_kill_e;         // kill instruction in e-stage
+   input        ifu_exu_wen_d;  // instruction in d-stage writes to regfile
+   input        ifu_exu_ialign_d;// instruction is alignaddress
+   input        exu_ifu_regz_e;
+   input        alu_ecl_add_n64_e;
+   input        alu_ecl_add_n32_e;
+   input        alu_ecl_log_n64_e;
+   input        alu_ecl_log_n32_e;
+   input        alu_ecl_zhigh_e;
+   input        alu_ecl_zlow_e;
+   input        ifu_exu_setcc_d;
+   input        lsu_exu_dfill_vld_g; // load data is valid
+   input [4:0]  lsu_exu_rd_m;  // load destination register
+   input [1:0]  lsu_exu_thr_m; // load thread
+   input        lsu_exu_ldxa_m;
+   input  byp_ecl_rs1_31_e;
+   input  byp_ecl_rs2_31_e;
+   input  byp_ecl_rs1_63_e;
+   input       alu_ecl_cout64_e_l;
+   input       alu_ecl_cout32_e;
+   input       alu_ecl_adder_out_63_e;
+   input       alu_ecl_adder_out_31_e;
+   input       alu_ecl_adderin2_63_e;
+   input       alu_ecl_adderin2_31_e;
+   input [4:0]  ifu_exu_rs1_s;  // source addresses
+   input [4:0]  ifu_exu_rs2_s;
+   input [4:0]  ifu_exu_rs3_s;
+   input        ifu_exu_tagop_d;// add or sub sets icc.v with tagged overflow
+   input        ifu_exu_tv_d;   // 32 bit overflow causes exception
+   input  ifu_exu_muls_d;
+   input  [3:0] div_ecl_yreg_0_l;
+   input  alu_ecl_mem_addr_invalid_e_l;
+   input  ifu_exu_range_check_jlret_d;
+   input  ifu_exu_range_check_other_d;
+   input  ifu_exu_addr_mask_d;
+   input      ifu_exu_save_d;
+   input      ifu_exu_restore_d;
+   input      ifu_exu_casa_d;
+   input  rml_ecl_clean_window_e;
+   input  rml_ecl_fill_e;
+   input  rml_ecl_other_e;
+   input  [2:0] rml_ecl_wtype_e;
+   input        ifu_exu_tcc_e;
+   input [7:0]  alu_ecl_adder_out_7_0_e;
+   input       ifu_exu_useimm_d;
+   input       ifu_exu_nceen_e;
+   input       ifu_tlu_flush_m;
+   input       ifu_exu_ttype_vld_m;
+   input        tlu_exu_priv_trap_m;
+   input        tlu_exu_pic_onebelow_m;
+   input        tlu_exu_pic_twobelow_m;
+   input       lsu_exu_flush_pipe_w;
+   input       ifu_exu_sethi_inst_d;
+   input       lsu_exu_st_dtlb_perr_g;
+  
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output [7:0]         ecl_byp_ecc_mask_m_l;   // From eccctl of sparc_exu_ecl_eccctl.v
+   output [7:0]         ecl_byp_eclpr_e;        // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_load_g;     // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_load_m;     // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_muldiv_g;   // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_pipe_m;     // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_restore_g;  // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_byp_sel_restore_m;  // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_div_almostlast_cycle;// From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_cin;            // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_dividend_sign;  // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_keep_d;         // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_keepx;          // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_last_cycle;     // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_mul_get_32bit_data;// From mdqctl of sparc_exu_ecl_mdqctl.v
+   output               ecl_div_mul_get_new_data;// From mdqctl of sparc_exu_ecl_mdqctl.v
+   output               ecl_div_mul_keep_data;  // From mdqctl of sparc_exu_ecl_mdqctl.v
+   output               ecl_div_mul_sext_rs1_e; // From mdqctl of sparc_exu_ecl_mdqctl.v
+   output               ecl_div_mul_sext_rs2_e; // From mdqctl of sparc_exu_ecl_mdqctl.v
+   output               ecl_div_newq;           // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_sel_64b;        // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_sel_adder;      // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_sel_neg32;      // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_sel_pos32;      // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_sel_u32;        // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_subtract_l;     // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_upper32_zero;   // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_upper33_one;    // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_upper33_zero;   // From divcntl of sparc_exu_ecl_divcntl.v
+   output               ecl_div_xinmask;        // From divcntl of sparc_exu_ecl_divcntl.v
+   output [3:0]         ecl_div_yreg_shift_g;   // From writeback of sparc_exu_ecl_wb.v
+   output [3:0]         ecl_div_yreg_wen_g;     // From writeback of sparc_exu_ecl_wb.v
+   output [3:0]         ecl_div_yreg_wen_l;     // From writeback of sparc_exu_ecl_wb.v
+   output [3:0]         ecl_div_yreg_wen_w;     // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_ecc_log_rs1_m;      // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_ecc_log_rs2_m;      // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_ecc_log_rs3_m;      // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_ecc_sel_rs1_m_l;    // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_ecc_sel_rs2_m_l;    // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_ecc_sel_rs3_m_l;    // From eccctl of sparc_exu_ecl_eccctl.v
+   output               ecl_rml_canrestore_wen_w;// From writeback of sparc_exu_ecl_wb.v
+   output               ecl_rml_cansave_wen_w;  // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_rml_cleanwin_wen_w; // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_rml_cwp_wen_e;      // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_rml_otherwin_wen_w; // From writeback of sparc_exu_ecl_wb.v
+   output               ecl_rml_wstate_wen_w;   // From writeback of sparc_exu_ecl_wb.v
+   output               exu_ffu_wsr_inst_e;     // From writeback of sparc_exu_ecl_wb.v
+   output               exu_ifu_ecc_ce_m;       // From eccctl of sparc_exu_ecl_eccctl.v
+   output               exu_ifu_ecc_ue_m;       // From eccctl of sparc_exu_ecl_eccctl.v
+   output [7:0]         exu_ifu_err_reg_m;      // From eccctl of sparc_exu_ecl_eccctl.v
+   output               exu_ifu_err_synd_7_m;   // From eccctl of sparc_exu_ecl_eccctl.v
+   output               exu_ifu_inj_ack;        // From eccctl of sparc_exu_ecl_eccctl.v
+   output [3:0]         exu_ifu_longop_done_g;  // From writeback of sparc_exu_ecl_wb.v
+   output               exu_mul_input_vld;      // From mdqctl of sparc_exu_ecl_mdqctl.v
+   output [7:0]         exu_tlu_ccr0_w;         // From ccr of sparc_exu_eclccr.v
+   output [7:0]         exu_tlu_ccr1_w;         // From ccr of sparc_exu_eclccr.v
+   output [7:0]         exu_tlu_ccr2_w;         // From ccr of sparc_exu_eclccr.v
+   output [7:0]         exu_tlu_ccr3_w;         // From ccr of sparc_exu_eclccr.v
+   // End of automatics
+   output               so;
+   output               ecl_byp_sel_alu_e;
+   output               ecl_byp_sel_eclpr_e;
+   output               ecl_byp_sel_yreg_e;
+   output               ecl_byp_sel_ifusr_e;
+   output               ecl_byp_sel_ffusr_m;
+   output               ecl_byp_sel_ifex_m;
+   output               ecl_byp_sel_tlusr_m;
+   output   exu_ifu_va_oor_m;
+   output ecl_alu_out_sel_sum_e_l;
+   output ecl_alu_out_sel_rs3_e_l;
+   output ecl_alu_out_sel_shift_e_l;
+   output ecl_alu_out_sel_logic_e_l;
+   output ecl_alu_log_sel_and_e;
+   output ecl_alu_log_sel_or_e;
+   output ecl_alu_log_sel_xor_e;
+   output ecl_alu_log_sel_move_e;
+   output ecl_alu_sethi_inst_e;
+   output ecl_alu_cin_e;    // cin for add/sub operations
+   output  ecl_shft_lshift_e_l;  // if 0 do left shift.  else right shift
+   output  ecl_shft_op32_e;      // indicates 32 bit operation so upper 32 = 0
+   //output [3:0] ecl_shft_shift16_e;// [48, 32, 16, 0] shift
+   output [3:0] ecl_shft_shift4_e;// [12, 8, 4, 0] shift
+   output [3:0] ecl_shft_shift1_e;// [3, 2, 1, 0] shift
+   output        ecl_shft_enshift_e_l;// enables inputs to shifter
+   output        ecl_byp_restore_m;
+   output ecl_byp_rs1_mux2_sel_e;// select lines for bypass muxes for rs1
+   output ecl_byp_rs1_mux2_sel_rf;
+   output ecl_byp_rs1_mux2_sel_ld;
+   output ecl_byp_rs1_mux2_sel_usemux1;
+   output ecl_byp_rs1_mux1_sel_m;
+   output ecl_byp_rs1_mux1_sel_w;
+   output ecl_byp_rs1_mux1_sel_w2;
+   output ecl_byp_rs1_mux1_sel_other;
+   output ecl_byp_rcc_mux2_sel_e;// select lines for bypass muxes for rcc
+   output ecl_byp_rcc_mux2_sel_rf;
+   output ecl_byp_rcc_mux2_sel_ld;
+   output ecl_byp_rcc_mux2_sel_usemux1;
+   output ecl_byp_rcc_mux1_sel_m;
+   output ecl_byp_rcc_mux1_sel_w;
+   output ecl_byp_rcc_mux1_sel_w2;
+   output ecl_byp_rcc_mux1_sel_other;
+   output ecl_byp_rs2_mux2_sel_e;// select lines for bypass muxes for rs2
+   output ecl_byp_rs2_mux2_sel_rf;
+   output ecl_byp_rs2_mux2_sel_ld;
+   output ecl_byp_rs2_mux2_sel_usemux1;
+   output ecl_byp_rs2_mux1_sel_m;
+   output ecl_byp_rs2_mux1_sel_w;
+   output ecl_byp_rs2_mux1_sel_w2;
+   output ecl_byp_rs2_mux1_sel_other;
+   output ecl_byp_rs3_mux2_sel_e; // select lines for bypass muxes for rs3
+   output ecl_byp_rs3_mux2_sel_rf;
+   output ecl_byp_rs3_mux2_sel_ld;
+   output ecl_byp_rs3_mux2_sel_usemux1;
+   output ecl_byp_rs3_mux1_sel_m;
+   output ecl_byp_rs3_mux1_sel_w;
+   output ecl_byp_rs3_mux1_sel_w2;
+   output ecl_byp_rs3_mux1_sel_other;
+   output ecl_byp_rs3h_mux2_sel_e; // select lines for bypass muxes for rs3 double
+   output ecl_byp_rs3h_mux2_sel_rf;
+   output ecl_byp_rs3h_mux2_sel_ld;
+   output ecl_byp_rs3h_mux2_sel_usemux1;
+   output ecl_byp_rs3h_mux1_sel_m;
+   output ecl_byp_rs3h_mux1_sel_w;
+   output ecl_byp_rs3h_mux1_sel_w2;
+   output ecl_byp_rs3h_mux1_sel_other;
+   output ecl_byp_rs1_longmux_sel_g2;
+   output ecl_byp_rs1_longmux_sel_w2;
+   output ecl_byp_rs1_longmux_sel_ldxa;
+   output ecl_byp_rs2_longmux_sel_g2;
+   output ecl_byp_rs2_longmux_sel_w2;
+   output ecl_byp_rs2_longmux_sel_ldxa;
+   output ecl_byp_rs3_longmux_sel_g2;
+   output ecl_byp_rs3_longmux_sel_w2;
+   output ecl_byp_rs3_longmux_sel_ldxa;
+   output ecl_byp_rs3h_longmux_sel_g2;
+   output ecl_byp_rs3h_longmux_sel_w2;
+   output ecl_byp_rs3h_longmux_sel_ldxa;
+   output ecl_byp_std_e_l;
+   output ecl_byp_ldxa_g;       // use the ldxa return data
+   output [2:0] ecl_byp_3lsb_m;
+   output                ecl_ecc_rs1_use_rf_e;
+   output                ecl_ecc_rs2_use_rf_e;
+   output                ecl_ecc_rs3_use_rf_e;
+   output  [4:0] ecl_irf_rd_m;
+   output  [1:0] ecl_irf_tid_m;
+   output        ecl_irf_wen_w;
+   output        ecl_irf_wen_w2;// write enable for w2
+   output  [4:0] ecl_irf_rd_g; // w2 destination register
+   output [1:0]  ecl_irf_tid_g;     // thread of inst in long w stage
+   output [3:0]  ecl_div_thr_e;
+   output [3:0] ecl_rml_thr_m;
+   output [3:0] ecl_rml_thr_w;
+   output [2:0] ecl_rml_xor_data_e;
+   output        ecl_div_ld_inputs;
+   output        ecl_div_sel_div;
+   output        ecl_div_div64;
+   output [7:0]  exu_ifu_cc_d;
+   output ecl_shft_extendbit_e;     // bit that gets appended on right shifts
+   output ecl_shft_extend32bit_e_l;   // bit that gets appended on 32 bit right shfts
+   output         ecl_div_zero_rs2_e;// used on muls ops
+   output         ecl_div_muls_rs1_31_e_l;
+   output         ecl_div_yreg_data_31_g;
+   output         exu_tlu_va_oor_m;
+   output         exu_tlu_va_oor_jl_ret_m;
+   output        ecl_rml_kill_e;
+   output        ecl_rml_kill_w;
+   output        ecl_byp_sel_ecc_m;
+   output [8:0] exu_tlu_ttype_m;
+   output       exu_tlu_ttype_vld_m;
+   output       exu_tlu_ue_trap_m;
+   output   exu_tlu_misalign_addr_jmpl_rtn_m;
+   output   exu_lsu_priority_trap_m;
+   output   ecl_div_mul_wen;
+   output   ecl_div_muls;
+   output   ecl_rml_early_flush_w;
+   output   ecl_rml_inst_vld_w;
+   output   ecl_alu_casa_e;
+
+   
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire                 bypass_m;               // From writeback of sparc_exu_ecl_wb.v
+   wire                 bypass_w;               // From writeback of sparc_exu_ecl_wb.v
+   wire [7:0]           divcntl_ccr_cc_w2;      // From divcntl of sparc_exu_ecl_divcntl.v
+   wire                 divcntl_wb_req_g;       // From divcntl of sparc_exu_ecl_divcntl.v
+   wire [4:0]           eccctl_wb_rd_m;         // From eccctl of sparc_exu_ecl_eccctl.v
+   wire                 ecl_div_signed_div;     // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_divcntl_input_vld;// From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_divcntl_muldone; // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_divcntl_reset_div;// From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire [4:0]           mdqctl_wb_divrd_g;      // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_wb_divsetcc_g;   // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire [1:0]           mdqctl_wb_divthr_g;     // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire [4:0]           mdqctl_wb_mulrd_g;      // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_wb_mulsetcc_g;   // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire [1:0]           mdqctl_wb_multhr_g;     // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_wb_yreg_shift_g; // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire                 mdqctl_wb_yreg_wen_g;   // From mdqctl of sparc_exu_ecl_mdqctl.v
+   wire [4:0]           wb_byplog_rd_g2;        // From writeback of sparc_exu_ecl_wb.v
+   wire [4:0]           wb_byplog_rd_w2;        // From writeback of sparc_exu_ecl_wb.v
+   wire [1:0]           wb_byplog_tid_w2;       // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_byplog_wen_g2;       // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_byplog_wen_w2;       // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_ccr_setcc_g;         // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_ccr_wrccr_w;         // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_divcntl_ack_g;       // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_e;                   // From writeback of sparc_exu_ecl_wb.v
+   wire                 wb_eccctl_spec_wen_next;// From writeback of sparc_exu_ecl_wb.v
+   // End of automatics
+   wire                 clk;
+   wire                 reset;
+   wire                 ecl_reset_l;
+   wire                ecl_byp_rs1_mux2_sel_rf;// To eccctl of sparc_exu_ecl_eccctl.v
+   wire                ecl_byp_rs2_mux2_sel_rf;// To eccctl of sparc_exu_ecl_eccctl.v
+   wire                ecl_byp_rs3_mux2_sel_rf;// To eccctl of sparc_exu_ecl_eccctl.v
+   wire                ldxa_g;
+   wire                ecl_byp_ldxa_g;
+   wire                rs1_vld_e;
+   wire                rs2_vld_e;
+   wire                std_d;
+   wire                std_e;
+   wire                rs3_vld_d;
+   wire                rs3_vld_e;
+   wire                cancel_rs3_ecc_e;
+   wire [4:0]  ifu_exu_rs1_d;  // source addresses
+   wire [4:0]  ifu_exu_rs2_d;
+   wire [4:0]  ifu_exu_rs3_d;
+   wire [2:0]   ifu_exu_aluop_e;
+   wire [2:0]   shiftop_d;
+   wire [2:0]   shiftop_e;
+   wire         enshift_e;
+   wire         sel_sum_d;
+   wire         sel_sum_e;
+   wire         sub_e;        // Do subtraction for add ops
+   wire         shft_sext_e;     // sign extend for R shift.  must be 0 for left
+   wire         is_logic_e;       // opcode is for logic op
+   wire         dont_move_e;
+   wire         sethi_e;
+   wire [4:0]   rd_e;
+   wire  [4:0] rd_m;
+   wire  [4:0] ecl_irf_rd_w;
+   wire [1:0]  tid_d;
+   wire [3:0] thr_d;
+   wire [1:0]  tid_e;
+   wire  [1:0] tid_m;
+   wire [1:0]  tid_w;
+   wire [1:0]  tid_w1;
+   wire  [1:0] ecl_irf_tid_w;
+   wire [3:0]  thr_m;
+   wire [3:0]  ecl_rml_thr_w;
+   wire        ecl_irf_wen_w;
+   wire          extend64bit;   // bit that gets appended on 64 bit right shfts
+   wire         c_used_d;       // actual c_in calculated in d_stage
+   wire [1:0]  adder_xcc;
+   wire [1:0]  adder_icc;
+   wire        cc_e_3;          // cc_e for muls
+   wire        cc_e_1;
+   wire [3:0]  alu_xcc_e; // 64 bit ccs NZVC
+   wire [3:0]  alu_icc_e; // 32 bit ccs NZVC
+   wire        ialign_e;
+   wire        ialign_m;
+   wire        ifu_exu_tv_e;
+   wire        ifu_exu_tagop_e;
+   wire        tag_overflow;    // tag overflow has occured
+   wire     tag_overflow_trap_e;   
+   wire           ifu_exu_range_check_jlret_e;
+   wire           ifu_exu_range_check_other_e;
+   wire           addr_mask_e;
+   wire           valid_range_check_jlret_e;
+   wire           ifu_exu_range_check_jlret_m;
+   wire           ifu_exu_range_check_other_m;
+   wire           alu_ecl_mem_addr_invalid_m_l;
+   wire           misalign_addr_e;
+   wire           muls_rs1_31_m_l;
+   wire           rs2_data_31_m;
+   wire       save_e;
+   wire       restore_e;
+   wire [4:0] real_rd_e;
+   wire       ifu_tlu_flush_w;
+   wire          flush_w;
+   wire          flush_w1;
+   wire          part_early_flush_m;
+   wire          part_early_flush_w;
+   wire          pic_trap_m;
+   wire          inst_vld_w1;
+   wire          tlu_priv_trap_w;
+   wire          early_flush_w;
+   wire          thr_match_ew;
+   wire          thr_match_mw1;
+   wire          thr_match_mw;
+   wire          thr_match_sd;
+   wire          thr_match_de;
+   wire          thr_match_se;
+   wire          thr_match_dm;
+   wire          ld_thr_match_sm;
+   wire          ld_thr_match_dg;
+   wire          ld_thr_match_sg;
+   wire          ld_thr_match_dg2;
+   wire	  	     ecl_exu_kill_m;
+   wire	  	     kill_rml_m;
+   wire          kill_rml_w;
+   wire [3:0]    perr_store_next;
+   wire [3:0]    perr_store;
+   wire [3:0]    perr_kill;
+   wire [4:0]    ld_rd_g;
+   wire [1:0]    ld_tid_g;
+
+   wire          read_yreg_e;
+   wire          read_ffusr_e;
+   wire          read_tlusr_e;
+   wire          read_ffusr_m;
+   wire          read_tlusr_m;
+   
+   // trap logic
+   wire          ue_trap_m;
+   wire [8:0]    early1_ttype_e;
+   wire [8:0]    early2_ttype_e;
+   wire [8:0]    early_ttype_m;
+   wire          early_ttype_vld_e;
+   wire          early_ttype_vld_m;
+   wire          pick_not_aligned;
+   wire          pick_tcc;
+   wire          pick_normal_ttype;   
+   wire          fill_trap_e;
+   wire          fill_trap_m;
+   wire       next_yreg_data_31;
+   wire       muls_e;
+   wire       zero_rs2_d;
+   wire       div_e;
+   wire       div_zero_m;
+
+
+   wire [4:0]  ifu_exu_rs1_e;
+   wire [4:0]  ifu_exu_rs1_m;
+   wire [4:0]  ifu_exu_rs2_e;
+   wire [4:0]  ifu_exu_rs2_m;
+   wire [4:0]  ifu_exu_rs3_e;
+   wire [4:0]  ifu_exu_rs3_m;
+   wire [3:0]  div_ecl_yreg_0;
+   wire   div_ecl_yreg_0_d;
+   
+   assign clk = rclk;
+   // Reset flop
+    dffrl_async rstff(.din (grst_l),
+                        .q   (ecl_reset_l),
+                        .clk (clk),
+                        .rst_l (arst_l), .se(se), .si(), .so());
+   assign reset = ~ecl_reset_l;
+   
+   // Pipeline flops for irf control signals
+   dff_s #(5) dff_rs1_s2d(.din(ifu_exu_rs1_s[4:0]), .clk(clk), .q(ifu_exu_rs1_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs2_s2d(.din(ifu_exu_rs2_s[4:0]), .clk(clk), .q(ifu_exu_rs2_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs3_s2d(.din(ifu_exu_rs3_s[4:0]), .clk(clk), .q(ifu_exu_rs3_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs1_d2e(.din(ifu_exu_rs1_d[4:0]), .clk(clk), .q(ifu_exu_rs1_e[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs2_d2e(.din(ifu_exu_rs2_d[4:0]), .clk(clk), .q(ifu_exu_rs2_e[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs3_d2e(.din(ifu_exu_rs3_d[4:0]), .clk(clk), .q(ifu_exu_rs3_e[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs1_e2m(.din(ifu_exu_rs1_e[4:0]), .clk(clk), .q(ifu_exu_rs1_m[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs2_e2m(.din(ifu_exu_rs2_e[4:0]), .clk(clk), .q(ifu_exu_rs2_m[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_rs3_e2m(.din(ifu_exu_rs3_e[4:0]), .clk(clk), .q(ifu_exu_rs3_m[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #(5) dff_ld_rd_m2g(.din(lsu_exu_rd_m[4:0]), .clk(clk), .q(ld_rd_g[4:0]), .se(se), .si(), .so());  
+   dff_s #(2) dff_ld_tid_m2g(.din(lsu_exu_thr_m[1:0]), .clk(clk), .q(ld_tid_g[1:0]), .se(se), .si(), .so());
+   
+   // Pipeline flops for control signals
+   dff_s #(3) dff_aluop_d2e(.din(ifu_exu_aluop_d[2:0]), .clk(clk), .q(ifu_exu_aluop_e[2:0]),
+                        .se(se), .si(), .so());
+   dff_s #(3) dff_shiftop_d2e(.din(shiftop_d[2:0]), .clk(clk),
+                          .q(shiftop_e[2:0]), .se(se),
+                          .si(), .so());
+   dff_s dff_enshift_d2e(.din(ifu_exu_enshift_d), .clk(clk), .q(enshift_e),
+                       .se(se), .si(), .so());
+   dff_s dff_sel_sum_d2e(.din(sel_sum_d), .clk(clk), .q(sel_sum_e),
+                       .se(se), .si(), .so());
+   dff_s dff_tv_d2e(.din(ifu_exu_tv_d), .clk(clk), .q(ifu_exu_tv_e),
+                  .se(se), .si(), .so());
+   dff_s dff_tagop_d2e(.din(ifu_exu_tagop_d), .clk(clk), .q(ifu_exu_tagop_e),
+                  .se(se), .si(), .so());
+   dff_s dff_ialign_d2e(.din(ifu_exu_ialign_d), .clk(clk), .q(ialign_e),
+                      .se(se), .si(), .so());
+   dff_s dff_ialign_e2m(.din(ialign_e), .clk(clk), .q(ialign_m),
+                      .se(se), .si(), .so());
+   dff_s ldxa_dff(.din(lsu_exu_ldxa_m), .clk(clk), .q(ldxa_g), .se(se), .si(), .so());
+   dff_s sethi_d2e(.din(ifu_exu_sethi_inst_d), .clk(clk), .q(sethi_e), .se(se), .si(), .so());
+   dff_s rs1_vld_d2e(.din(ifu_exu_rs1_vld_d), .clk(clk), .q(rs1_vld_e), .se(se), .si(), .so());
+   dff_s rs2_vld_d2e(.din(ifu_exu_rs2_vld_d), .clk(clk), .q(rs2_vld_e), .se(se), .si(), .so());
+   assign rs3_vld_d = ifu_exu_rs3e_vld_d | ifu_exu_rs3o_vld_d;
+   dff_s rs3_vld_d2e(.din(rs3_vld_d), .q(rs3_vld_e), .clk(clk), .se(se), .si(), .so());
+   dff_s casa_d2e(.din(ifu_exu_casa_d), .q(ecl_alu_casa_e), .clk(clk), .se(se), .si(), .so());
+   
+   ///////////////////////////////
+   // ALU Control
+   ///////////////////////////////
+   // Decode opcode for ALU
+   // aluop: [move, log1, log0]
+   // ADD = 00, AND = 01, OR = 10, XOR = 11
+   // log_sel: [and, or, xor, pass]
+   // out_sel: [sum, logic, shift]
+   assign ecl_alu_log_sel_and_e = 
+          (~ifu_exu_aluop_e[2] & ~ifu_exu_aluop_e[1] & ifu_exu_aluop_e[0]);
+   assign ecl_alu_log_sel_or_e = (~ifu_exu_aluop_e[2] & ifu_exu_aluop_e[1]
+                                     & ~ifu_exu_aluop_e[0]);
+   assign ecl_alu_log_sel_xor_e = (~ifu_exu_aluop_e[2] & ifu_exu_aluop_e[1]
+                                      & ifu_exu_aluop_e[0]);
+   assign ecl_alu_log_sel_move_e = 
+          (ifu_exu_aluop_e[2] | ~(ifu_exu_aluop_e[1] | ifu_exu_aluop_e[0]));
+
+   assign is_logic_e = ifu_exu_aluop_e[2] | ifu_exu_aluop_e[1] |
+          ifu_exu_aluop_e[0];
+
+   assign ecl_alu_sethi_inst_e = sethi_e;// | ifu_exu_sethi_inst_e;
+
+   assign dont_move_e = (exu_ifu_regz_e)? ifu_exu_dontmv_regz1_e:ifu_exu_dontmv_regz0_e;
+
+   assign sel_sum_d = ~(ifu_exu_enshift_d | ifu_exu_aluop_d[2] |ifu_exu_aluop_d[1] |ifu_exu_aluop_d[0]); 
+   assign ecl_alu_out_sel_sum_e_l = ~sel_sum_e;
+   assign ecl_alu_out_sel_shift_e_l = ~(~is_logic_e & ~sel_sum_e);
+   assign ecl_alu_out_sel_logic_e_l = ~(is_logic_e & ~dont_move_e & ~sel_sum_e);
+   assign ecl_alu_out_sel_rs3_e_l = ~(is_logic_e & dont_move_e & ~sel_sum_e);// dontmove includes is_logic
+
+   assign ecl_byp_sel_alu_e = ifu_exu_use_rsr_e_l;
+   assign ecl_byp_sel_ifusr_e = ~ifu_exu_use_rsr_e_l & ifu_exu_rd_ifusr_e;
+   assign ecl_byp_sel_yreg_e = ~ifu_exu_use_rsr_e_l & ~ifu_exu_rd_ifusr_e & read_yreg_e;
+   assign ecl_byp_sel_eclpr_e = ~ifu_exu_use_rsr_e_l & ~ifu_exu_rd_ifusr_e & ~read_yreg_e;
+        
+   assign read_ffusr_e = ~ifu_exu_use_rsr_e_l & ifu_exu_rd_ffusr_e;
+   assign read_tlusr_e = ~ifu_exu_use_rsr_e_l & ~ifu_exu_rd_ffusr_e & ~ifu_exu_rd_ifusr_e & ~ifu_exu_rd_exusr_e;
+   assign ecl_byp_sel_ffusr_m = read_ffusr_m;
+   assign ecl_byp_sel_tlusr_m = read_tlusr_m & ~read_ffusr_m;
+   assign ecl_byp_sel_ifex_m = ~read_tlusr_m & ~read_ffusr_m;
+
+   dff_s #(2) rsr_e2m(.din({read_ffusr_e, read_tlusr_e}), .clk(clk),
+                    .q({read_ffusr_m, read_tlusr_m}), .se(se), .si(), .so());
+ 
+   // ecc checking on rs3 will be cancelled if mov happens
+   assign cancel_rs3_ecc_e = ~dont_move_e & is_logic_e;
+   
+   // compute xor for write to cwp
+   assign ecl_rml_xor_data_e = byp_ecl_rs1_2_0_e[2:0] ^ byp_ecl_rs2_3_0_e[2:0];
+   // Logic for muls control signals
+   // icc.v ^ icc.n
+   assign         ecl_div_muls_rs1_31_e_l = ~(cc_e_3 ^ cc_e_1);
+   assign div_ecl_yreg_0[3:0] = ~div_ecl_yreg_0_l[3:0];
+   mux4ds yreg0_mux(.dout(div_ecl_yreg_0_d),
+                    .in0(div_ecl_yreg_0[0]),
+                    .in1(div_ecl_yreg_0[1]),
+                    .in2(div_ecl_yreg_0[2]),
+                    .in3(div_ecl_yreg_0[3]),
+                    .sel0(thr_d[0]),
+                    .sel1(thr_d[1]),
+                    .sel2(thr_d[2]),
+                    .sel3(thr_d[3]));
+                       
+   assign zero_rs2_d = ifu_exu_muls_d & ~div_ecl_yreg_0_d;
+
+   assign next_yreg_data_31 = (muls_e)? byp_ecl_rs1_2_0_e[0]:ecl_div_yreg_data_31_g; 
+   dff_s dff_rs1_b0_m2w(.din(next_yreg_data_31), .clk(clk), .q(ecl_div_yreg_data_31_g),
+                .se(se), .si(), .so());
+   
+   // Logic for carryin and subtract
+   assign      c_used_d = ~(ifu_exu_invert_d ^ ~(exu_ifu_cc_d[0] & ifu_exu_usecin_d));
+   // Pipeline flops
+   dff_s sub_dff(.din(ifu_exu_invert_d), .clk(clk), .q(sub_e), .se(se),
+               .si(), .so());
+   dff_s c_used_dff(.din(c_used_d), .clk(clk), .q(ecl_alu_cin_e), .se(se),
+                 .si(), .so());
+   dff_s dff_muls_d2e(.din(ifu_exu_muls_d), .clk(clk), .q(muls_e),
+                .se(se), .si(), .so());
+   dff_s zero_rs2_dff(.din(zero_rs2_d), .clk(clk), .q(ecl_div_zero_rs2_e),
+                    .se(se), .si(), .so());
+   dff_s #(2) cc_d2e(.din({exu_ifu_cc_d[3],exu_ifu_cc_d[1]}), .clk(clk), .q({cc_e_3,cc_e_1}),
+                   .se(se), .si(), .so());
+   dff_s mulsrs131_e2m(.din(ecl_div_muls_rs1_31_e_l), .clk(clk),
+                     .q(muls_rs1_31_m_l),
+                     .se(se), .si(), .so());
+   dff_s rs2_31_e2m(.din(byp_ecl_rs2_31_e), .clk(clk),
+                  .q(rs2_data_31_m), .se(se), .si(), .so());
+   
+   dff_s save_dff(.din(ifu_exu_save_d), .clk(clk), .q(save_e), .se(se),
+                .si(), .so());
+   dff_s restore_dff(.din(ifu_exu_restore_d), .clk(clk), .q(restore_e), .se(se),
+                .si(), .so());
+   
+   // Condition code generation
+   assign      adder_xcc[0] = (~alu_ecl_cout64_e_l ^ sub_e) & sel_sum_e;
+   assign      adder_icc[0] = (alu_ecl_cout32_e ^ sub_e) & sel_sum_e;
+/* -----\/----- EXCLUDED -----\/-----
+   assign adder_xcc[1] = ((byp_ecl_rs1_63_e & alu_ecl_adderin2_63_e & 
+                             ~alu_ecl_adder_out_63_e) |
+                           (~byp_ecl_rs1_63_e & ~alu_ecl_adderin2_63_e &
+                             alu_ecl_adder_out_63_e));
+   assign adder_icc[1] = ((byp_ecl_rs1_31_e & alu_ecl_adderin2_31_e & 
+                             ~alu_ecl_adder_out_31_e) |
+                           (~byp_ecl_rs1_31_e & ~alu_ecl_adderin2_31_e &
+                             alu_ecl_adder_out_31_e));
+ -----/\----- EXCLUDED -----/\----- */
+   assign adder_xcc[1] = (alu_ecl_adder_out_63_e) ? (~byp_ecl_rs1_63_e & ~alu_ecl_adderin2_63_e & sel_sum_e):
+                                                      (byp_ecl_rs1_63_e & alu_ecl_adderin2_63_e & sel_sum_e);
+   assign adder_icc[1] = (alu_ecl_adder_out_31_e) ? ((~byp_ecl_rs1_31_e & ~alu_ecl_adderin2_31_e | tag_overflow) 
+                                                     & sel_sum_e):
+                                                      ((byp_ecl_rs1_31_e & alu_ecl_adderin2_31_e | tag_overflow)
+                                                       & sel_sum_e);
+   // Tagged overflow
+   assign tag_overflow = (byp_ecl_rs1_2_0_e[0] | byp_ecl_rs1_2_0_e[1] |
+                          byp_ecl_rs2_3_0_e[0] | byp_ecl_rs2_3_0_e[1]) & ifu_exu_tagop_e;
+
+   // Set V C ccs assuming they are 0s for logic and shifting
+   assign alu_xcc_e[3] = (sel_sum_e)? alu_ecl_add_n64_e: alu_ecl_log_n64_e;
+   assign alu_xcc_e[2] = alu_ecl_zlow_e & alu_ecl_zhigh_e;
+   assign alu_xcc_e[1:0] = adder_xcc[1:0]; // includes sel_sum
+   
+   assign alu_icc_e[3] = (sel_sum_e)? alu_ecl_add_n32_e: alu_ecl_log_n32_e;
+   assign alu_icc_e[2] = alu_ecl_zlow_e;
+   assign alu_icc_e[1:0] = adder_icc[1:0]; // includes sel_sum
+
+   // Tag overflow exception on TV instruction with icc.v
+   assign   tag_overflow_trap_e = ifu_exu_tv_e & adder_icc[1];
+   
+   // Mem address exception generation and flops
+   assign   misalign_addr_e = (alu_ecl_adder_out_7_0_e[1] | alu_ecl_adder_out_7_0_e[0]) & ifu_exu_range_check_jlret_e;
+   // jlret is used for misalign (E stage) and va hole (M stage).
+   // if address mask is on then the va hole is not checked
+   assign   valid_range_check_jlret_e = ifu_exu_range_check_jlret_e & ~addr_mask_e;
+   assign   exu_ifu_va_oor_m = ~alu_ecl_mem_addr_invalid_m_l;
+   assign exu_tlu_va_oor_m = (~alu_ecl_mem_addr_invalid_m_l &
+                              ifu_exu_range_check_other_m);
+   assign exu_tlu_va_oor_jl_ret_m = (~alu_ecl_mem_addr_invalid_m_l &
+                                     ifu_exu_range_check_jlret_m);
+   dff_s dff_addr_mask_d2e (.din(ifu_exu_addr_mask_d), .clk(clk), .q(addr_mask_e),
+                          .se(se), .si(), .so());
+   dff_s dff_mem_invalid_e2m(.din(alu_ecl_mem_addr_invalid_e_l), .clk(clk),
+                           .q(alu_ecl_mem_addr_invalid_m_l), .se(se),
+                           .si(), .so());
+   dff_s dff_misalign_addr_e2m(.din(misalign_addr_e), .clk(clk),
+                           .q(exu_tlu_misalign_addr_jmpl_rtn_m), .se(se),
+                           .si(), .so());
+   dff_s dff_range_check_jlret_d2e(.din(ifu_exu_range_check_jlret_d), .clk(clk),
+                            .q(ifu_exu_range_check_jlret_e), .se(se),
+                            .si(), .so());
+   dff_s dff_range_check_jlret_e2m(.din(valid_range_check_jlret_e), .clk(clk),
+                            .q(ifu_exu_range_check_jlret_m), .se(se),
+                            .si(), .so());
+   dff_s dff_range_check_other_d2e(.din(ifu_exu_range_check_other_d), .clk(clk),
+                            .q(ifu_exu_range_check_other_e), .se(se),
+                            .si(), .so());
+   dff_s dff_range_check_other_e2m(.din(ifu_exu_range_check_other_e), .clk(clk),
+                            .q(ifu_exu_range_check_other_m), .se(se),
+                            .si(), .so());
+
+   // 3lsbs can be zeroes for ialign
+   assign ecl_byp_3lsb_m[2:0] = (ialign_m)? 3'b0: byp_ecl_rd_data_3lsb_m[2:0];
+
+   /////////////////////////////
+   // Generate Shift control
+   /////////////////////////////
+   assign shiftop_d[2:0] = ifu_exu_shiftop_d[2:0] & {3{ifu_exu_enshift_d}};
+   // shiftop:
+   //   2 = 64bit shift
+   //   1 = Rshift (1), LShift (0)
+   //   0 = arithmetic shift
+   assign ecl_shft_lshift_e_l = shiftop_e[1];
+   assign shft_sext_e = shiftop_e[0];
+   assign ecl_shft_op32_e = ~shiftop_e[2];
+   assign ecl_shft_enshift_e_l = ~enshift_e;
+   // decide what sign extension for right shifts should be (in parallel w/
+   // masking operation)
+   assign ecl_shft_extend32bit_e_l = ~(ecl_shft_op32_e & byp_ecl_rs1_31_e
+                                   & shft_sext_e);
+   assign extend64bit = shft_sext_e & byp_ecl_rs1_63_e &
+          ~ecl_shft_op32_e;
+   assign ecl_shft_extendbit_e = (extend64bit | ~ecl_shft_extend32bit_e_l);
+   
+   // Get rid of top bit for 32 bit instructions
+   //assign mod_shiftby_e[5]  = shiftop_e[2] & byp_ecl_rs2_3_0_e[5];
+   // decode shiftby input into mux control signals
+   //assign ecl_shft_shift16_e[0] = (~mod_shiftby_e[5] & ~mod_shiftby_e[4]);
+   //assign ecl_shft_shift16_e[1] = (~mod_shiftby_e[5] & mod_shiftby_e[4]);
+   //assign ecl_shft_shift16_e[2] = (mod_shiftby_e[5] & ~mod_shiftby_e[4]);
+   //assign ecl_shft_shift16_e[3] = (mod_shiftby_e[5] & mod_shiftby_e[4]);
+
+   assign ecl_shft_shift4_e[0] = (~byp_ecl_rs2_3_0_e[3] & ~byp_ecl_rs2_3_0_e[2]);
+   assign ecl_shft_shift4_e[1] = (~byp_ecl_rs2_3_0_e[3] & byp_ecl_rs2_3_0_e[2]);
+   assign ecl_shft_shift4_e[2] = (byp_ecl_rs2_3_0_e[3] & ~byp_ecl_rs2_3_0_e[2]);
+   assign ecl_shft_shift4_e[3] = (byp_ecl_rs2_3_0_e[3] & byp_ecl_rs2_3_0_e[2]);
+
+   assign ecl_shft_shift1_e[0] = (~byp_ecl_rs2_3_0_e[1] & ~byp_ecl_rs2_3_0_e[0]);
+   assign ecl_shft_shift1_e[1] = (~byp_ecl_rs2_3_0_e[1] & byp_ecl_rs2_3_0_e[0]);
+   assign ecl_shft_shift1_e[2] = (byp_ecl_rs2_3_0_e[1] & ~byp_ecl_rs2_3_0_e[0]);
+   assign ecl_shft_shift1_e[3] = (byp_ecl_rs2_3_0_e[1] & byp_ecl_rs2_3_0_e[0]);
+
+
+   // pipeline flops for bypass data
+   dff_s #(5) dff_rd_d2e(.din(ifu_exu_rd_d[4:0]), .clk(clk), .q(rd_e[4:0]), .se(se),
+                     .si(), .so());
+   // account for switch of ins outs on save/restore
+   assign real_rd_e[4] = rd_e[4] ^ (rd_e[3] & (save_e | restore_e));
+   assign real_rd_e[3:0] = rd_e[3:0];
+   dff_s #(5) dff_rd_e2m(.din(real_rd_e[4:0]), .clk(clk), .q(rd_m[4:0]), .se(se),
+                     .si(), .so());
+   dff_s #(5) dff_rd_m2w(.din(ecl_irf_rd_m[4:0]), .clk(clk), .q(ecl_irf_rd_w[4:0]), .se(se),
+                     .si(), .so());
+   dff_s #(2) dff_thr_s2d(.din(ifu_exu_tid_s2[1:0]), .clk(clk), .q(tid_d[1:0]), .se(se),
+                      .si(), .so());
+   dff_s #(2) dff_tid_d2e(.din(tid_d[1:0]), .clk(clk), .q(tid_e[1:0]), .se(se),
+                      .si(), .so());
+   dff_s #(2) dff_thr_e2m(.din(tid_e[1:0]), .clk(clk), .q(tid_m[1:0]), .se(se),
+                      .si(), .so());
+   // Need the original thr and the one with ld thr muxed in
+   dff_s #(2) dff_tid_m2w(.din(tid_m[1:0]), .clk(clk), .q(tid_w[1:0]), .se(se),
+                      .si(), .so());
+   dff_s #(2) dff_tid_w2w1(.din(tid_w[1:0]), .clk(clk), .q(tid_w1[1:0]), .se(se),
+                      .si(), .so());
+   dff_s #(2) dff_irf_thr_m2w(.din(ecl_irf_tid_m[1:0]), .clk(clk), .q(ecl_irf_tid_w[1:0]), .se(se),
+                      .si(), .so());
+
+   // Thread decode
+   // decode tid_d
+   assign        thr_d[0] = ~tid_d[1] & ~tid_d[0];
+   assign        thr_d[1] = ~tid_d[1] & tid_d[0];
+   assign        thr_d[2] = tid_d[1] & ~tid_d[0];
+   assign        thr_d[3] = tid_d[1] & tid_d[0];
+
+   // decode thr_e
+   assign        ecl_div_thr_e[0] = ~tid_e[1] & ~tid_e[0];
+   assign        ecl_div_thr_e[1] = ~tid_e[1] & tid_e[0];
+   assign        ecl_div_thr_e[2] = tid_e[1] & ~tid_e[0];
+   assign        ecl_div_thr_e[3] = tid_e[1] & tid_e[0];
+   
+   // decode thr_m
+   assign        thr_m[0] = ~tid_m[1] & ~tid_m[0];
+   assign        thr_m[1] = ~tid_m[1] & tid_m[0];
+   assign        thr_m[2] = tid_m[1] & ~tid_m[0];
+   assign        thr_m[3] = tid_m[1] & tid_m[0];
+   assign        ecl_rml_thr_m[3:0] = thr_m[3:0];
+   // decode tid_w
+   assign        ecl_rml_thr_w[0] = ~tid_w[1] & ~tid_w[0];
+   assign        ecl_rml_thr_w[1] = ~tid_w[1] & tid_w[0];
+   assign        ecl_rml_thr_w[2] = tid_w[1] & ~tid_w[0];
+   assign        ecl_rml_thr_w[3] = tid_w[1] & tid_w[0];
+
+   //////////////////////////////////////
+   // Kill logic
+   //////////////////////////////////////
+   // a parity error on a store should kill the next instruction on that thread
+   // perr_store_w sets the bit.  perr_kill_m says that the instruction in M should
+   // be killed.  However, it does not check inst_vld or flush so it might be killing
+   // an invalid instruction.  Therefore perr_store does not get cleared until W.  This
+   // might cause an extra perr_kill_m, but that is OK because subsequent instructions will
+   // be killed until the trap is taken.
+   wire [3:0]    perr_store_w;
+   wire [3:0]    perr_clear_w;
+   wire          perr_kill_m;
+   assign        perr_store_w[3] = tid_w[1] & tid_w[0] & lsu_exu_st_dtlb_perr_g;
+   assign        perr_store_w[2] = tid_w[1] & ~tid_w[0] & lsu_exu_st_dtlb_perr_g;
+   assign        perr_store_w[1] = ~tid_w[1] & tid_w[0] & lsu_exu_st_dtlb_perr_g;
+   assign        perr_store_w[0] = ~tid_w[1] & ~tid_w[0] & lsu_exu_st_dtlb_perr_g;
+   assign        perr_store_next[3] = perr_store_w[3] | perr_store[3] & ~perr_clear_w[3];
+   assign        perr_store_next[2] = perr_store_w[2] | perr_store[2] & ~perr_clear_w[2];
+   assign        perr_store_next[1] = perr_store_w[1] | perr_store[1] & ~perr_clear_w[1];
+   assign        perr_store_next[0] = perr_store_w[0] | perr_store[0] & ~perr_clear_w[0];
+   assign        perr_kill[3] = tid_m[1] & tid_m[0] & perr_store[3];
+   assign        perr_kill[2] = tid_m[1] & ~tid_m[0] & perr_store[2];
+   assign        perr_kill[1] = ~tid_m[1] & tid_m[0] & perr_store[1];
+   assign        perr_kill[0] = ~tid_m[1] & ~tid_m[0] & perr_store[0];
+   assign        perr_kill_m = |perr_kill[3:0] | lsu_exu_st_dtlb_perr_g & thr_match_mw;
+   assign        perr_clear_w[3] = tid_w[1] & tid_w[0] & perr_store[3] & ifu_exu_inst_vld_w & ~ifu_tlu_flush_w;
+   assign        perr_clear_w[2] = tid_w[1] & ~tid_w[0] & perr_store[2] & ifu_exu_inst_vld_w & ~ifu_tlu_flush_w;
+   assign        perr_clear_w[1] = ~tid_w[1] & tid_w[0] & perr_store[1] & ifu_exu_inst_vld_w & ~ifu_tlu_flush_w;
+   assign        perr_clear_w[0] = ~tid_w[1] & ~tid_w[0] & perr_store[0] & ifu_exu_inst_vld_w & ~ifu_tlu_flush_w;
+   
+   dffr_s #(4) perr_dff(.din(perr_store_next[3:0]), .clk(clk), .q(perr_store[3:0]), .si(), .so(), .se(se), .rst(reset));
+   
+   // calculate an early flush for killing writes in W
+   // the pic trap occurs if there are too many instructions on a given thread.
+   dff_s inst_vld_ww1(.din(ifu_exu_inst_vld_w), .clk(clk), .q(inst_vld_w1), .se(se), .si(), .so());
+   assign pic_trap_m = ((tlu_exu_pic_onebelow_m & (thr_match_mw & ifu_exu_inst_vld_w | 
+                                                   thr_match_mw1 & inst_vld_w1)) | 
+                        (tlu_exu_pic_twobelow_m & thr_match_mw & ifu_exu_inst_vld_w &
+                         thr_match_mw1 & inst_vld_w1));
+   assign        part_early_flush_m = (exu_tlu_ttype_vld_m | ifu_exu_ttype_vld_m | exu_tlu_va_oor_jl_ret_m | 
+                                       perr_kill_m | pic_trap_m);
+   dff_s priv_trap_dff(.din(tlu_exu_priv_trap_m), .clk(clk), .q(tlu_priv_trap_w), .se(se), .si(), .so());
+   dff_s early_flush_dff(.din(part_early_flush_m), .clk(clk), .q(part_early_flush_w), .se(se), .si(), .so());
+   assign        early_flush_w = part_early_flush_w | tlu_priv_trap_w;
+   assign        ecl_rml_early_flush_w = early_flush_w;
+   
+   // buffer this off so it only sees one load from the ifu
+   assign        ecl_rml_inst_vld_w = ifu_exu_inst_vld_w & ~ifu_tlu_flush_w;
+
+   dff_s flush_m2w(.din(ifu_tlu_flush_m), .clk(clk), .q(ifu_tlu_flush_w), .se(se), .si(), .so());
+   assign        flush_w = ifu_tlu_flush_w | lsu_exu_flush_pipe_w;
+   dff_s flush_w_dff(.din(flush_w), .clk(clk), .q(flush_w1), .se(se), .si(), .so());
+   // allow misalign address on returns to kill the cwp switch
+   // ttype[7] is a fill_trap so the return misalign should be ignored
+   // UE trap should kill window ops.  This check is needed here because the
+   // window traps will override the flush_W signals.
+   assign        kill_rml_m = (ue_trap_m | ifu_exu_ttype_vld_m | perr_kill_m | pic_trap_m |
+                               (exu_tlu_misalign_addr_jmpl_rtn_m & ~exu_tlu_ttype_m[7]));
+   dff_s kill_rml_mw(.din(kill_rml_m), .clk(clk) , .q(kill_rml_w), .se(se), .si(), .so());
+   // include tlu_priv_trap to cancel window traps
+   assign        ecl_rml_kill_w = tlu_priv_trap_w | kill_rml_w;
+
+   // pass kill_e through to the rml
+   assign        ecl_rml_kill_e = ifu_exu_kill_e;
+   
+   assign        ecl_exu_kill_m = thr_match_mw1 & flush_w1;
+   assign thr_match_mw = ~((tid_w[1] ^ tid_m[1]) |
+                           (tid_w[0] ^ tid_m[0]));
+   assign thr_match_ew = ~((tid_e[1] ^ tid_w[1]) |
+                           (tid_e[0] ^ tid_w[0]));
+   dff_s thr_match_ew_dff(.din(thr_match_ew), .clk(clk), .q(thr_match_mw1), .se(se), .si(), .so());
+
+   // ldxa needs to check inst_vld and prior flushes
+   assign ecl_byp_ldxa_g = ldxa_g & ifu_exu_inst_vld_w;   
+   
+   // controls for outputs to lsu
+   assign std_d = ifu_exu_rs3e_vld_d & ifu_exu_rs3o_vld_d;
+   dff_s std_d2e (.din(std_d), .q(std_e), .clk(clk), .se(se), .si(), .so());
+   assign ecl_byp_std_e_l = ~std_e;
+
+  
+   //////////////////////////////////////
+   // Trap output logic
+   //-----------------------
+   // In pipe traps (with priority order):
+   // 029h: uncorrected ecc trap
+   // 0C0h-0FFh: Fill trap
+   // 024h: clean window trap
+   // 034h: mem_address_not_aligned
+   // 023h: Tag Overflow
+   // 028h: Div by zero
+   // 100h-17Fh: Trap instruction
+   //////////////////////////////////////
+   // ecc traps must be enabled
+   assign fill_trap_e = rml_ecl_fill_e;
+   
+   assign early_ttype_vld_e = (rml_ecl_clean_window_e | rml_ecl_fill_e | 
+                                 tag_overflow_trap_e | ifu_exu_tcc_e |
+                                 misalign_addr_e);
+   // This ttype includes clean window, fill, tag overflow, tcc, misalign address, and div zero.
+   // Note that this will be div_zero on any divide instruction.  The valid will only be asserted if
+   // div_zero is detected though.
+   assign early1_ttype_e[8] = 1'b0;
+   assign early1_ttype_e[7] = fill_trap_e;
+   assign early1_ttype_e[6] = fill_trap_e;
+   assign early1_ttype_e[5] = (rml_ecl_fill_e & rml_ecl_other_e) | 
+                                (~rml_ecl_fill_e & (rml_ecl_clean_window_e | tag_overflow_trap_e | div_e));
+   assign early1_ttype_e[4] = fill_trap_e & rml_ecl_wtype_e[2];
+   assign early1_ttype_e[3] = (rml_ecl_fill_e & rml_ecl_wtype_e[1]) |
+                           (~rml_ecl_fill_e & ~rml_ecl_clean_window_e & ~tag_overflow_trap_e & div_e);
+   assign early1_ttype_e[2] = (fill_trap_e & rml_ecl_wtype_e[0]) |
+                                 (~rml_ecl_fill_e & rml_ecl_clean_window_e);
+   assign early1_ttype_e[1] = ~rml_ecl_fill_e & ~rml_ecl_clean_window_e & tag_overflow_trap_e;
+   assign early1_ttype_e[0] = (~rml_ecl_fill_e & ~rml_ecl_clean_window_e & tag_overflow_trap_e);
+   
+   // mux together the ttypes
+   // tcc only can be combined with an ue which is caught later so it isn't qualified by other traps
+   assign pick_normal_ttype = ~pick_not_aligned & ~ifu_exu_tcc_e;
+   assign pick_tcc = ifu_exu_tcc_e;
+   assign pick_not_aligned = ~(rml_ecl_fill_e | rml_ecl_clean_window_e) & misalign_addr_e & ~ifu_exu_tcc_e;
+   
+   // the ue ttype is muxed in after the flop because it is so late
+   mux3ds #(9) ttype_mux(.dout(early2_ttype_e[8:0]),
+                         .in0(early1_ttype_e[8:0]),
+                         .in1({1'b1, alu_ecl_adder_out_7_0_e[7:0]}),
+                         .in2(9'h034),
+                         .sel0(pick_normal_ttype),
+                         .sel1(pick_tcc),
+                         .sel2(pick_not_aligned));
+   assign exu_tlu_ttype_m[8:0] = (ue_trap_m)? 9'h029: early_ttype_m[8:0];
+   assign exu_tlu_ttype_vld_m = early_ttype_vld_m | ue_trap_m | div_zero_m;
+   assign exu_tlu_ue_trap_m = ue_trap_m;
+   
+   dff_s ttype_vld_e2m(.din(early_ttype_vld_e), .clk(clk), .q(early_ttype_vld_m),
+                     .se(se), .si(), .so());
+   dff_s #(9) ttype_e2m(.din(early2_ttype_e[8:0]), .clk(clk), .q(early_ttype_m[8:0]),
+                    .se(se), .si(), .so());
+   // lsu needs to know about spill and ue traps for squashing sfsr writes
+   dff_s fill_e2m(.din(fill_trap_e), .clk(clk), .q(fill_trap_m), .se(se), .si(), .so());
+   assign exu_lsu_priority_trap_m = fill_trap_m | ue_trap_m;
+
+   // Condition code Register
+   sparc_exu_eclccr ccr(.wb_ccr_thr_g(ecl_irf_tid_g[1:0]),
+                        .thrdec_d  (thr_d[3:0]),
+                        .thr_w  (ecl_rml_thr_w[3:0]),
+                       /*AUTOINST*/
+                        // Outputs
+                        .exu_ifu_cc_d   (exu_ifu_cc_d[7:0]),
+                        .exu_tlu_ccr0_w (exu_tlu_ccr0_w[7:0]),
+                        .exu_tlu_ccr1_w (exu_tlu_ccr1_w[7:0]),
+                        .exu_tlu_ccr2_w (exu_tlu_ccr2_w[7:0]),
+                        .exu_tlu_ccr3_w (exu_tlu_ccr3_w[7:0]),
+                        // Inputs
+                        .clk            (clk),
+                        .se             (se),
+                        .alu_xcc_e      (alu_xcc_e[3:0]),
+                        .alu_icc_e      (alu_icc_e[3:0]),
+                        .tid_d          (tid_d[1:0]),
+                        .thr_match_dm   (thr_match_dm),
+                        .thr_match_de   (thr_match_de),
+                        .tid_w          (tid_w[1:0]),
+                        .ifu_exu_kill_e (ifu_exu_kill_e),
+                        .ifu_exu_setcc_d(ifu_exu_setcc_d),
+                        .byp_ecl_wrccr_data_w(byp_ecl_wrccr_data_w[7:0]),
+                        .wb_ccr_wrccr_w (wb_ccr_wrccr_w),
+                        .wb_ccr_setcc_g (wb_ccr_setcc_g),
+                        .divcntl_ccr_cc_w2(divcntl_ccr_cc_w2[7:0]),
+                        .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                        .tlu_exu_ccr_m  (tlu_exu_ccr_m[7:0]),
+                        .ifu_exu_inst_vld_w(ifu_exu_inst_vld_w),
+                        .ifu_tlu_flush_w(ifu_tlu_flush_w),
+                        .early_flush_w  (early_flush_w));
+   
+   // Writeback control logic
+   sparc_exu_ecl_wb writeback(
+                              .read_yreg_e(read_yreg_e),
+                              /*AUTOINST*/
+                              // Outputs
+                              .wb_ccr_wrccr_w(wb_ccr_wrccr_w),
+                              .ecl_rml_cwp_wen_e(ecl_rml_cwp_wen_e),
+                              .ecl_rml_cansave_wen_w(ecl_rml_cansave_wen_w),
+                              .ecl_rml_canrestore_wen_w(ecl_rml_canrestore_wen_w),
+                              .ecl_rml_otherwin_wen_w(ecl_rml_otherwin_wen_w),
+                              .ecl_rml_wstate_wen_w(ecl_rml_wstate_wen_w),
+                              .ecl_rml_cleanwin_wen_w(ecl_rml_cleanwin_wen_w),
+                              .ecl_byp_sel_load_m(ecl_byp_sel_load_m),
+                              .ecl_byp_sel_restore_m(ecl_byp_sel_restore_m),
+                              .ecl_byp_sel_pipe_m(ecl_byp_sel_pipe_m),
+                              .ecl_byp_restore_m(ecl_byp_restore_m),
+                              .ecl_irf_tid_m(ecl_irf_tid_m[1:0]),
+                              .ecl_irf_rd_m(ecl_irf_rd_m[4:0]),
+                              .ecl_irf_rd_g(ecl_irf_rd_g[4:0]),
+                              .ecl_irf_wen_w2(ecl_irf_wen_w2),
+                              .ecl_irf_tid_g(ecl_irf_tid_g[1:0]),
+                              .wb_e     (wb_e),
+                              .bypass_m (bypass_m),
+                              .ecl_irf_wen_w(ecl_irf_wen_w),
+                              .ecl_byp_sel_load_g(ecl_byp_sel_load_g),
+                              .ecl_byp_sel_muldiv_g(ecl_byp_sel_muldiv_g),
+                              .ecl_byp_sel_restore_g(ecl_byp_sel_restore_g),
+                              .wb_divcntl_ack_g(wb_divcntl_ack_g),
+                              .wb_ccr_setcc_g(wb_ccr_setcc_g),
+                              .ecl_byp_eclpr_e(ecl_byp_eclpr_e[7:0]),
+                              .exu_ifu_longop_done_g(exu_ifu_longop_done_g[3:0]),
+                              .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]),
+                              .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]),
+                              .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0]),
+                              .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]),
+                              .wb_eccctl_spec_wen_next(wb_eccctl_spec_wen_next),
+                              .bypass_w (bypass_w),
+                              .wb_byplog_rd_w2(wb_byplog_rd_w2[4:0]),
+                              .wb_byplog_tid_w2(wb_byplog_tid_w2[1:0]),
+                              .wb_byplog_wen_w2(wb_byplog_wen_w2),
+                              .wb_byplog_rd_g2(wb_byplog_rd_g2[4:0]),
+                              .wb_byplog_wen_g2(wb_byplog_wen_g2),
+                              .exu_ffu_wsr_inst_e(exu_ffu_wsr_inst_e),
+                              // Inputs
+                              .clk      (clk),
+                              .se       (se),
+                              .reset    (reset),
+                              .sehold   (sehold),
+                              .ld_rd_g  (ld_rd_g[4:0]),
+                              .ld_tid_g (ld_tid_g[1:0]),
+                              .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                              .lsu_exu_ldst_miss_g2(lsu_exu_ldst_miss_g2),
+                              .rd_m     (rd_m[4:0]),
+                              .tid_m    (tid_m[1:0]),
+                              .thr_m    (thr_m[3:0]),
+                              .tid_w1   (tid_w1[1:0]),
+                              .ifu_exu_wen_d(ifu_exu_wen_d),
+                              .ifu_exu_kill_e(ifu_exu_kill_e),
+                              .ecl_exu_kill_m(ecl_exu_kill_m),
+                              .rml_ecl_kill_m(rml_ecl_kill_m),
+                              .ifu_tlu_flush_w(ifu_tlu_flush_w),
+                              .flush_w1 (flush_w1),
+                              .divcntl_wb_req_g(divcntl_wb_req_g),
+                              .mdqctl_wb_divrd_g(mdqctl_wb_divrd_g[4:0]),
+                              .mdqctl_wb_divthr_g(mdqctl_wb_divthr_g[1:0]),
+                              .mdqctl_wb_mulrd_g(mdqctl_wb_mulrd_g[4:0]),
+                              .mdqctl_wb_multhr_g(mdqctl_wb_multhr_g[1:0]),
+                              .mdqctl_wb_divsetcc_g(mdqctl_wb_divsetcc_g),
+                              .mdqctl_wb_mulsetcc_g(mdqctl_wb_mulsetcc_g),
+                              .ecl_div_sel_div(ecl_div_sel_div),
+                              .ifu_tlu_wsr_inst_d(ifu_tlu_wsr_inst_d),
+                              .ifu_tlu_sraddr_d(ifu_tlu_sraddr_d[6:0]),
+                              .rml_ecl_cwp_d(rml_ecl_cwp_d[2:0]),
+                              .rml_ecl_cansave_d(rml_ecl_cansave_d[2:0]),
+                              .rml_ecl_canrestore_d(rml_ecl_canrestore_d[2:0]),
+                              .rml_ecl_otherwin_d(rml_ecl_otherwin_d[2:0]),
+                              .rml_ecl_wstate_d(rml_ecl_wstate_d[5:0]),
+                              .rml_ecl_cleanwin_d(rml_ecl_cleanwin_d[2:0]),
+                              .exu_ifu_cc_d(exu_ifu_cc_d[7:0]),
+                              .rml_ecl_swap_done(rml_ecl_swap_done[3:0]),
+                              .rml_ecl_rmlop_done_e(rml_ecl_rmlop_done_e),
+                              .mdqctl_wb_yreg_wen_g(mdqctl_wb_yreg_wen_g),
+                              .mdqctl_wb_yreg_shift_g(mdqctl_wb_yreg_shift_g),
+                              .ecl_byp_sel_ecc_m(ecl_byp_sel_ecc_m),
+                              .eccctl_wb_rd_m(eccctl_wb_rd_m[4:0]),
+                              .ifu_exu_inst_vld_e(ifu_exu_inst_vld_e),
+                              .ifu_exu_inst_vld_w(ifu_exu_inst_vld_w),
+                              .ifu_exu_return_d(ifu_exu_return_d),
+                              .restore_e(restore_e),
+                              .rml_ecl_fill_e(rml_ecl_fill_e),
+                              .early_flush_w(early_flush_w),
+                              .ecl_byp_ldxa_g(ecl_byp_ldxa_g));
+
+   ////////////////////////
+   // ECC control logic
+   ////////////////////////
+   sparc_exu_ecl_eccctl eccctl(
+                               .ue_trap_m(ue_trap_m),
+                               /*AUTOINST*/
+                               // Outputs
+                               .ecl_ecc_sel_rs1_m_l(ecl_ecc_sel_rs1_m_l),
+                               .ecl_ecc_sel_rs2_m_l(ecl_ecc_sel_rs2_m_l),
+                               .ecl_ecc_sel_rs3_m_l(ecl_ecc_sel_rs3_m_l),
+                               .ecl_ecc_log_rs1_m(ecl_ecc_log_rs1_m),
+                               .ecl_ecc_log_rs2_m(ecl_ecc_log_rs2_m),
+                               .ecl_ecc_log_rs3_m(ecl_ecc_log_rs3_m),
+                               .ecl_byp_sel_ecc_m(ecl_byp_sel_ecc_m),
+                               .ecl_ecc_rs1_use_rf_e(ecl_ecc_rs1_use_rf_e),
+                               .ecl_ecc_rs2_use_rf_e(ecl_ecc_rs2_use_rf_e),
+                               .ecl_ecc_rs3_use_rf_e(ecl_ecc_rs3_use_rf_e),
+                               .eccctl_wb_rd_m(eccctl_wb_rd_m[4:0]),
+                               .exu_ifu_ecc_ce_m(exu_ifu_ecc_ce_m),
+                               .exu_ifu_ecc_ue_m(exu_ifu_ecc_ue_m),
+                               .exu_ifu_err_reg_m(exu_ifu_err_reg_m[7:0]),
+                               .ecl_byp_ecc_mask_m_l(ecl_byp_ecc_mask_m_l[7:0]),
+                               .exu_ifu_inj_ack(exu_ifu_inj_ack),
+                               .exu_ifu_err_synd_7_m(exu_ifu_err_synd_7_m),
+                               // Inputs
+                               .clk     (clk),
+                               .se      (se),
+                               .rst_tri_en(rst_tri_en),
+                               .ecc_ecl_rs1_ce(ecc_ecl_rs1_ce),
+                               .ecc_ecl_rs1_ue(ecc_ecl_rs1_ue),
+                               .ecc_ecl_rs2_ce(ecc_ecl_rs2_ce),
+                               .ecc_ecl_rs2_ue(ecc_ecl_rs2_ue),
+                               .ecc_ecl_rs3_ce(ecc_ecl_rs3_ce),
+                               .ecc_ecl_rs3_ue(ecc_ecl_rs3_ue),
+                               .ecl_byp_rcc_mux2_sel_rf(ecl_byp_rcc_mux2_sel_rf),
+                               .ecl_byp_rs2_mux2_sel_rf(ecl_byp_rs2_mux2_sel_rf),
+                               .ecl_byp_rs3_mux2_sel_rf(ecl_byp_rs3_mux2_sel_rf),
+                               .rs1_vld_e(rs1_vld_e),
+                               .rs2_vld_e(rs2_vld_e),
+                               .rs3_vld_e(rs3_vld_e),
+                               .ifu_exu_rs1_m(ifu_exu_rs1_m[4:0]),
+                               .ifu_exu_rs2_m(ifu_exu_rs2_m[4:0]),
+                               .ifu_exu_rs3_m(ifu_exu_rs3_m[4:0]),
+                               .rml_ecl_cwp_d(rml_ecl_cwp_d[2:0]),
+                               .ifu_exu_ecc_mask(ifu_exu_ecc_mask[7:0]),
+                               .ifu_exu_inj_irferr(ifu_exu_inj_irferr),
+                               .ifu_exu_disable_ce_e(ifu_exu_disable_ce_e),
+                               .wb_eccctl_spec_wen_next(wb_eccctl_spec_wen_next),
+                               .ifu_exu_nceen_e(ifu_exu_nceen_e),
+                               .ifu_exu_inst_vld_e(ifu_exu_inst_vld_e),
+                               .rml_ecl_gl_e(rml_ecl_gl_e[1:0]),
+                               .cancel_rs3_ecc_e(cancel_rs3_ecc_e));
+   // Bypass logic
+   // Precalculate some of the matching logic to help timing
+   assign thr_match_sd =  ~((ifu_exu_tid_s2[1] ^ tid_d[1]) |
+                           (ifu_exu_tid_s2[0] ^ tid_d[0]));
+   dff_s thr_match_sd_dff(.din(thr_match_sd), .clk(clk), .q(thr_match_de),
+                        .se(se), .si(), .so());
+   assign thr_match_se =  ~((ifu_exu_tid_s2[1] ^ tid_e[1]) |
+                           (ifu_exu_tid_s2[0] ^ tid_e[0]));
+   dff_s thr_match_se_dff(.din(thr_match_se), .clk(clk), .q(thr_match_dm),
+                        .se(se), .si(), .so());
+   assign ld_thr_match_sm = ~((ifu_exu_tid_s2[1] ^ lsu_exu_thr_m[1]) |
+                           (ifu_exu_tid_s2[0] ^ lsu_exu_thr_m[0]));
+   dff_s ld_thr_match_sm_dff(.din(ld_thr_match_sm), .clk(clk), .q(ld_thr_match_dg), .se(se),
+                           .si(), .so());
+   assign ld_thr_match_sg = ~((ifu_exu_tid_s2[1] ^ ld_tid_g[1]) |
+                           (ifu_exu_tid_s2[0] ^ ld_tid_g[0]));
+   dff_s ld_thr_match_sg_dff(.din(ld_thr_match_sg), .clk(clk), .q(ld_thr_match_dg2), .se(se),
+                           .si(), .so());
+   sparc_exu_eclbyplog_rs1 byplog_rs1(.rs_sel_mux1_m(ecl_byp_rs1_mux1_sel_m),
+                                  .rs_sel_mux1_w(ecl_byp_rs1_mux1_sel_w),
+                                  .rs_sel_mux1_w2(ecl_byp_rs1_mux1_sel_w2),
+                                  .rs_sel_mux1_other(ecl_byp_rs1_mux1_sel_other),
+                                  .rs_sel_mux2_e(ecl_byp_rs1_mux2_sel_e),
+                                  .rs_sel_mux2_rf(ecl_byp_rs1_mux2_sel_rf),
+                                  .rs_sel_mux2_ld(ecl_byp_rs1_mux2_sel_ld),
+                                  .rs_sel_mux2_usemux1(ecl_byp_rs1_mux2_sel_usemux1),
+                                  .rs_sel_longmux_g2(ecl_byp_rs1_longmux_sel_g2),
+                                  .rs_sel_longmux_w2(ecl_byp_rs1_longmux_sel_w2),
+                                  .rs_sel_longmux_ldxa(ecl_byp_rs1_longmux_sel_ldxa),
+                                  .rs   (ifu_exu_rs1_d[4:0]),
+                                  .use_other(ifu_exu_dbrinst_d),
+                                  /*AUTOINST*/
+                                      // Outputs
+                                      .ecl_byp_rcc_mux1_sel_m(ecl_byp_rcc_mux1_sel_m),
+                                      .ecl_byp_rcc_mux1_sel_w(ecl_byp_rcc_mux1_sel_w),
+                                      .ecl_byp_rcc_mux1_sel_w2(ecl_byp_rcc_mux1_sel_w2),
+                                      .ecl_byp_rcc_mux1_sel_other(ecl_byp_rcc_mux1_sel_other),
+                                      .ecl_byp_rcc_mux2_sel_usemux1(ecl_byp_rcc_mux2_sel_usemux1),
+                                      .ecl_byp_rcc_mux2_sel_rf(ecl_byp_rcc_mux2_sel_rf),
+                                      .ecl_byp_rcc_mux2_sel_e(ecl_byp_rcc_mux2_sel_e),
+                                      .ecl_byp_rcc_mux2_sel_ld(ecl_byp_rcc_mux2_sel_ld),
+                                      // Inputs
+                                      .sehold(sehold),
+                                      .rd_e(rd_e[4:0]),
+                                      .rd_m(rd_m[4:0]),
+                                      .ecl_irf_rd_w(ecl_irf_rd_w[4:0]),
+                                      .ld_rd_g(ld_rd_g[4:0]),
+                                      .wb_byplog_rd_w2(wb_byplog_rd_w2[4:0]),
+                                      .wb_byplog_rd_g2(wb_byplog_rd_g2[4:0]),
+                                      .tid_d(tid_d[1:0]),
+                                      .thr_match_de(thr_match_de),
+                                      .thr_match_dm(thr_match_dm),
+                                      .ecl_irf_tid_w(ecl_irf_tid_w[1:0]),
+                                      .ld_thr_match_dg(ld_thr_match_dg),
+                                      .wb_byplog_tid_w2(wb_byplog_tid_w2[1:0]),
+                                      .ld_thr_match_dg2(ld_thr_match_dg2),
+                                      .ifu_exu_kill_e(ifu_exu_kill_e),
+                                      .wb_e(wb_e),
+                                      .bypass_m(bypass_m),
+                                      .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                                      .bypass_w(bypass_w),
+                                      .wb_byplog_wen_w2(wb_byplog_wen_w2),
+                                      .wb_byplog_wen_g2(wb_byplog_wen_g2),
+                                      .ecl_byp_ldxa_g(ecl_byp_ldxa_g));
+
+   sparc_exu_eclbyplog byplog_rs2(.rs_sel_mux1_m(ecl_byp_rs2_mux1_sel_m),
+                                  .rs_sel_mux1_w(ecl_byp_rs2_mux1_sel_w),
+                                  .rs_sel_mux1_w2(ecl_byp_rs2_mux1_sel_w2),
+                                  .rs_sel_mux1_other(ecl_byp_rs2_mux1_sel_other),
+                                  .rs_sel_mux2_e(ecl_byp_rs2_mux2_sel_e),
+                                  .rs_sel_mux2_rf(ecl_byp_rs2_mux2_sel_rf),
+                                  .rs_sel_mux2_ld(ecl_byp_rs2_mux2_sel_ld),
+                                  .rs_sel_mux2_usemux1(ecl_byp_rs2_mux2_sel_usemux1),
+                                  .rs_sel_longmux_g2(ecl_byp_rs2_longmux_sel_g2),
+                                  .rs_sel_longmux_w2(ecl_byp_rs2_longmux_sel_w2),
+                                  .rs_sel_longmux_ldxa(ecl_byp_rs2_longmux_sel_ldxa),
+                                  .rs   (ifu_exu_rs2_d[4:0]),
+                                  .use_other(ifu_exu_useimm_d),
+                                  /*AUTOINST*/
+                                  // Inputs
+                                  .sehold(sehold),
+                                  .rd_e (rd_e[4:0]),
+                                  .rd_m (rd_m[4:0]),
+                                  .ecl_irf_rd_w(ecl_irf_rd_w[4:0]),
+                                  .ld_rd_g(ld_rd_g[4:0]),
+                                  .wb_byplog_rd_w2(wb_byplog_rd_w2[4:0]),
+                                  .wb_byplog_rd_g2(wb_byplog_rd_g2[4:0]),
+                                  .tid_d(tid_d[1:0]),
+                                  .thr_match_de(thr_match_de),
+                                  .thr_match_dm(thr_match_dm),
+                                  .ecl_irf_tid_w(ecl_irf_tid_w[1:0]),
+                                  .ld_thr_match_dg(ld_thr_match_dg),
+                                  .wb_byplog_tid_w2(wb_byplog_tid_w2[1:0]),
+                                  .ld_thr_match_dg2(ld_thr_match_dg2),
+                                  .ifu_exu_kill_e(ifu_exu_kill_e),
+                                  .wb_e (wb_e),
+                                  .bypass_m(bypass_m),
+                                  .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                                  .bypass_w(bypass_w),
+                                  .wb_byplog_wen_w2(wb_byplog_wen_w2),
+                                  .wb_byplog_wen_g2(wb_byplog_wen_g2),
+                                  .ecl_byp_ldxa_g(ecl_byp_ldxa_g));
+   sparc_exu_eclbyplog byplog_rs3(.rs_sel_mux1_m(ecl_byp_rs3_mux1_sel_m),
+                                  .rs_sel_mux1_w(ecl_byp_rs3_mux1_sel_w),
+                                  .rs_sel_mux1_w2(ecl_byp_rs3_mux1_sel_w2),
+                                  .rs_sel_mux1_other(ecl_byp_rs3_mux1_sel_other),
+                                  .rs_sel_mux2_e(ecl_byp_rs3_mux2_sel_e),
+                                  .rs_sel_mux2_rf(ecl_byp_rs3_mux2_sel_rf),
+                                  .rs_sel_mux2_ld(ecl_byp_rs3_mux2_sel_ld),
+                                  .rs_sel_mux2_usemux1(ecl_byp_rs3_mux2_sel_usemux1),
+                                  .rs_sel_longmux_g2(ecl_byp_rs3_longmux_sel_g2),
+                                  .rs_sel_longmux_w2(ecl_byp_rs3_longmux_sel_w2),
+                                  .rs_sel_longmux_ldxa(ecl_byp_rs3_longmux_sel_ldxa),
+                                  .rs   ({ifu_exu_rs3_d[4:0]}),
+                                  .use_other(1'b0),
+                                  /*AUTOINST*/
+                                  // Inputs
+                                  .sehold(sehold),
+                                  .rd_e (rd_e[4:0]),
+                                  .rd_m (rd_m[4:0]),
+                                  .ecl_irf_rd_w(ecl_irf_rd_w[4:0]),
+                                  .ld_rd_g(ld_rd_g[4:0]),
+                                  .wb_byplog_rd_w2(wb_byplog_rd_w2[4:0]),
+                                  .wb_byplog_rd_g2(wb_byplog_rd_g2[4:0]),
+                                  .tid_d(tid_d[1:0]),
+                                  .thr_match_de(thr_match_de),
+                                  .thr_match_dm(thr_match_dm),
+                                  .ecl_irf_tid_w(ecl_irf_tid_w[1:0]),
+                                  .ld_thr_match_dg(ld_thr_match_dg),
+                                  .wb_byplog_tid_w2(wb_byplog_tid_w2[1:0]),
+                                  .ld_thr_match_dg2(ld_thr_match_dg2),
+                                  .ifu_exu_kill_e(ifu_exu_kill_e),
+                                  .wb_e (wb_e),
+                                  .bypass_m(bypass_m),
+                                  .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                                  .bypass_w(bypass_w),
+                                  .wb_byplog_wen_w2(wb_byplog_wen_w2),
+                                  .wb_byplog_wen_g2(wb_byplog_wen_g2),
+                                  .ecl_byp_ldxa_g(ecl_byp_ldxa_g));
+   sparc_exu_eclbyplog byplog_rs3h(.rs_sel_mux1_m(ecl_byp_rs3h_mux1_sel_m),
+                                  .rs_sel_mux1_w(ecl_byp_rs3h_mux1_sel_w),
+                                  .rs_sel_mux1_w2(ecl_byp_rs3h_mux1_sel_w2),
+                                  .rs_sel_mux1_other(ecl_byp_rs3h_mux1_sel_other),
+                                  .rs_sel_mux2_e(ecl_byp_rs3h_mux2_sel_e),
+                                  .rs_sel_mux2_rf(ecl_byp_rs3h_mux2_sel_rf),
+                                  .rs_sel_mux2_ld(ecl_byp_rs3h_mux2_sel_ld),
+                                  .rs_sel_mux2_usemux1(ecl_byp_rs3h_mux2_sel_usemux1),
+                                  .rs_sel_longmux_g2(ecl_byp_rs3h_longmux_sel_g2),
+                                  .rs_sel_longmux_w2(ecl_byp_rs3h_longmux_sel_w2),
+                                  .rs_sel_longmux_ldxa(ecl_byp_rs3h_longmux_sel_ldxa),
+                                  .rs   ({ifu_exu_rs3_d[4:1],1'b1}),
+                                  .use_other(1'b0),
+                                  /*AUTOINST*/
+                                   // Inputs
+                                   .sehold(sehold),
+                                   .rd_e(rd_e[4:0]),
+                                   .rd_m(rd_m[4:0]),
+                                   .ecl_irf_rd_w(ecl_irf_rd_w[4:0]),
+                                   .ld_rd_g(ld_rd_g[4:0]),
+                                   .wb_byplog_rd_w2(wb_byplog_rd_w2[4:0]),
+                                   .wb_byplog_rd_g2(wb_byplog_rd_g2[4:0]),
+                                   .tid_d(tid_d[1:0]),
+                                   .thr_match_de(thr_match_de),
+                                   .thr_match_dm(thr_match_dm),
+                                   .ecl_irf_tid_w(ecl_irf_tid_w[1:0]),
+                                   .ld_thr_match_dg(ld_thr_match_dg),
+                                   .wb_byplog_tid_w2(wb_byplog_tid_w2[1:0]),
+                                   .ld_thr_match_dg2(ld_thr_match_dg2),
+                                   .ifu_exu_kill_e(ifu_exu_kill_e),
+                                   .wb_e(wb_e),
+                                   .bypass_m(bypass_m),
+                                   .lsu_exu_dfill_vld_g(lsu_exu_dfill_vld_g),
+                                   .bypass_w(bypass_w),
+                                   .wb_byplog_wen_w2(wb_byplog_wen_w2),
+                                   .wb_byplog_wen_g2(wb_byplog_wen_g2),
+                                   .ecl_byp_ldxa_g(ecl_byp_ldxa_g));
+
+   /////////////////////////
+   // Division control logic
+   /////////////////////////
+   sparc_exu_ecl_divcntl divcntl(
+                                 .div_ecl_divisorin_31(byp_ecl_rs2_31_e),                                 
+                                 /*AUTOINST*/
+                                 // Outputs
+                                 .ecl_div_xinmask(ecl_div_xinmask),
+                                 .ecl_div_keep_d(ecl_div_keep_d),
+                                 .ecl_div_ld_inputs(ecl_div_ld_inputs),
+                                 .ecl_div_sel_adder(ecl_div_sel_adder),
+                                 .ecl_div_last_cycle(ecl_div_last_cycle),
+                                 .ecl_div_almostlast_cycle(ecl_div_almostlast_cycle),
+                                 .ecl_div_sel_div(ecl_div_sel_div),
+                                 .divcntl_wb_req_g(divcntl_wb_req_g),
+                                 .divcntl_ccr_cc_w2(divcntl_ccr_cc_w2[7:0]),
+                                 .ecl_div_sel_64b(ecl_div_sel_64b),
+                                 .ecl_div_sel_u32(ecl_div_sel_u32),
+                                 .ecl_div_sel_pos32(ecl_div_sel_pos32),
+                                 .ecl_div_sel_neg32(ecl_div_sel_neg32),
+                                 .ecl_div_upper32_zero(ecl_div_upper32_zero),
+                                 .ecl_div_upper33_one(ecl_div_upper33_one),
+                                 .ecl_div_upper33_zero(ecl_div_upper33_zero),
+                                 .ecl_div_dividend_sign(ecl_div_dividend_sign),
+                                 .ecl_div_newq(ecl_div_newq),
+                                 .ecl_div_subtract_l(ecl_div_subtract_l),
+                                 .ecl_div_keepx(ecl_div_keepx),
+                                 .ecl_div_cin(ecl_div_cin),
+                                 // Inputs
+                                 .clk   (clk),
+                                 .se    (se),
+                                 .reset (reset),
+                                 .mdqctl_divcntl_input_vld(mdqctl_divcntl_input_vld),
+                                 .wb_divcntl_ack_g(wb_divcntl_ack_g),
+                                 .mdqctl_divcntl_reset_div(mdqctl_divcntl_reset_div),
+                                 .div_ecl_gencc_in_msb_l(div_ecl_gencc_in_msb_l),
+                                 .div_ecl_gencc_in_31(div_ecl_gencc_in_31),
+                                 .div_ecl_upper32_equal(div_ecl_upper32_equal),
+                                 .div_ecl_low32_nonzero(div_ecl_low32_nonzero),
+                                 .ecl_div_signed_div(ecl_div_signed_div),
+                                 .div_ecl_dividend_msb(div_ecl_dividend_msb),
+                                 .div_ecl_xin_msb_l(div_ecl_xin_msb_l),
+                                 .div_ecl_x_msb(div_ecl_x_msb),
+                                 .div_ecl_d_msb(div_ecl_d_msb),
+                                 .div_ecl_cout64(div_ecl_cout64),
+                                 .ecl_div_div64(ecl_div_div64),
+                                 .mdqctl_divcntl_muldone(mdqctl_divcntl_muldone),
+                                 .ecl_div_muls(ecl_div_muls),
+                                 .div_ecl_adder_out_31(div_ecl_adder_out_31),
+                                 .muls_rs1_31_m_l(muls_rs1_31_m_l),
+                                 .div_ecl_cout32(div_ecl_cout32),
+                                 .rs2_data_31_m(rs2_data_31_m),
+                                 .div_ecl_detect_zero_high(div_ecl_detect_zero_high),
+                                 .div_ecl_detect_zero_low(div_ecl_detect_zero_low),
+                                 .div_ecl_d_62(div_ecl_d_62));
+
+   assign div_e = mdqctl_divcntl_input_vld;
+   sparc_exu_ecl_mdqctl mdqctl(.div_zero_m(div_zero_m),
+                               .byp_alu_rs1_data_31_e(byp_ecl_rs1_31_e),
+                               .byp_alu_rs2_data_31_e(byp_ecl_rs2_31_e),
+                               /*AUTOINST*/
+                               // Outputs
+                               .mdqctl_divcntl_input_vld(mdqctl_divcntl_input_vld),
+                               .mdqctl_divcntl_reset_div(mdqctl_divcntl_reset_div),
+                               .mdqctl_divcntl_muldone(mdqctl_divcntl_muldone),
+                               .ecl_div_div64(ecl_div_div64),
+                               .ecl_div_signed_div(ecl_div_signed_div),
+                               .ecl_div_muls(ecl_div_muls),
+                               .mdqctl_wb_divthr_g(mdqctl_wb_divthr_g[1:0]),
+                               .mdqctl_wb_divrd_g(mdqctl_wb_divrd_g[4:0]),
+                               .mdqctl_wb_multhr_g(mdqctl_wb_multhr_g[1:0]),
+                               .mdqctl_wb_mulrd_g(mdqctl_wb_mulrd_g[4:0]),
+                               .mdqctl_wb_divsetcc_g(mdqctl_wb_divsetcc_g),
+                               .mdqctl_wb_mulsetcc_g(mdqctl_wb_mulsetcc_g),
+                               .mdqctl_wb_yreg_shift_g(mdqctl_wb_yreg_shift_g),
+                               .exu_mul_input_vld(exu_mul_input_vld),
+                               .mdqctl_wb_yreg_wen_g(mdqctl_wb_yreg_wen_g),
+                               .ecl_div_mul_sext_rs1_e(ecl_div_mul_sext_rs1_e),
+                               .ecl_div_mul_sext_rs2_e(ecl_div_mul_sext_rs2_e),
+                               .ecl_div_mul_get_new_data(ecl_div_mul_get_new_data),
+                               .ecl_div_mul_keep_data(ecl_div_mul_keep_data),
+                               .ecl_div_mul_get_32bit_data(ecl_div_mul_get_32bit_data),
+                               .ecl_div_mul_wen(ecl_div_mul_wen),
+                               // Inputs
+                               .clk     (clk),
+                               .se      (se),
+                               .reset   (reset),
+                               .ifu_exu_muldivop_d(ifu_exu_muldivop_d[4:0]),
+                               .tid_d   (tid_d[1:0]),
+                               .ifu_exu_rd_d(ifu_exu_rd_d[4:0]),
+                               .tid_w1  (tid_w1[1:0]),
+                               .flush_w1(flush_w1),
+                               .ifu_exu_inst_vld_w(ifu_exu_inst_vld_w),
+                               .wb_divcntl_ack_g(wb_divcntl_ack_g),
+                               .divcntl_wb_req_g(divcntl_wb_req_g),
+                               .mul_exu_ack(mul_exu_ack),
+                               .ecl_div_sel_div(ecl_div_sel_div),
+                               .ifu_exu_muls_d(ifu_exu_muls_d),
+                               .div_ecl_detect_zero_high(div_ecl_detect_zero_high),
+                               .div_ecl_detect_zero_low(div_ecl_detect_zero_low),
+                               .ifu_tlu_flush_w(ifu_tlu_flush_w),
+                               .early_flush_w(early_flush_w));
+
+endmodule // sparc_exu_ecl
+
+
Index: /trunk/T1-CPU/exu/sparc_exu_rml_cwp.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_rml_cwp.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_rml_cwp.v	(revision 6)
@@ -0,0 +1,534 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_rml_cwp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_rml_cwp
+//	Description: Register management logic.  Contains CWP, CANSAVE, CANRESTORE
+//		and other window management registers.  Generates RF related traps
+//  		and switches the global registers to alternate globals.  All the registers
+//		are written in the W stage (there is no bypassing so they must
+//		swap out) and will either get a new value generated by a window management
+//		Instruction or by a WRPS instruction.  The following traps can be generated:
+//			Fill: restore with canrestore == 0
+//			clean_window: save with cleanwin-canrestore == 0
+//			spill: flushw with cansave != nwindows -2 or
+//				save with cansave == 0
+//		It is assumed that the contents of the new window will get squashed
+//		on a clean_window or fill trap so the save or restore gets executed
+//		normally.  Spill traps or WRCWPs mean that all 16 windowed registers
+//		must be saved and restored (a 4 cycle operation).
+*/
+module sparc_exu_rml_cwp (/*AUTOARG*/
+   // Outputs
+   rml_ecl_cwp_d, rml_ecl_cwp_e, exu_tlu_cwp0_w, exu_tlu_cwp1_w, 
+   exu_tlu_cwp2_w, exu_tlu_cwp3_w, rml_irf_cwpswap_tid_e, old_cwp_e, 
+   new_cwp_e, swap_locals_ins, swap_outs, exu_tlu_spill, 
+   exu_tlu_spill_wtype, exu_tlu_spill_other, exu_tlu_spill_tid, 
+   rml_ecl_swap_done, exu_tlu_cwp_cmplt, exu_tlu_cwp_cmplt_tid, 
+   exu_tlu_cwp_retry, oddwin_w, 
+   // Inputs
+   clk, se, reset, rst_tri_en, rml_ecl_wtype_e, rml_ecl_other_e, 
+   exu_tlu_spill_e, tlu_exu_cwpccr_update_m, tlu_exu_cwp_retry_m, 
+   tlu_exu_cwp_m, thr_d, ecl_rml_thr_m, ecl_rml_thr_w, tid_e, 
+   next_cwp_w, next_cwp_e, cwp_wen_w, save_e, restore_e, 
+   ifu_exu_flushw_e, ecl_rml_cwp_wen_e, full_swap_e, rml_kill_w, next_cwp
+   ) ;
+   input clk;
+   input se;
+   input reset;
+   input rst_tri_en;
+   input [2:0] rml_ecl_wtype_e;
+   input       rml_ecl_other_e;
+   input       exu_tlu_spill_e;
+   input       tlu_exu_cwpccr_update_m;
+   input       tlu_exu_cwp_retry_m;
+   input [2:0] tlu_exu_cwp_m; // for switching cwp on return from trap
+   input [3:0] thr_d;
+   input [3:0] ecl_rml_thr_m;
+   input [3:0] ecl_rml_thr_w;
+   input [1:0] tid_e;
+   input [2:0] next_cwp_w;
+   input [2:0] next_cwp_e;
+   input       cwp_wen_w;
+   input       save_e;
+   input       restore_e;
+   input       ifu_exu_flushw_e;
+   input       ecl_rml_cwp_wen_e;
+   input       full_swap_e;
+   input       rml_kill_w;
+
+   output [2:0] rml_ecl_cwp_d;
+   output [2:0] rml_ecl_cwp_e;
+   output [2:0] exu_tlu_cwp0_w;
+   output [2:0] exu_tlu_cwp1_w;
+   output [2:0] exu_tlu_cwp2_w;
+   output [2:0] exu_tlu_cwp3_w;
+   output [1:0] rml_irf_cwpswap_tid_e;
+   output [2:0] old_cwp_e;
+   output [2:0] new_cwp_e;
+   output       swap_locals_ins;
+   output       swap_outs;
+   output      exu_tlu_spill;
+   output [2:0] exu_tlu_spill_wtype;
+   output       exu_tlu_spill_other;
+   output [1:0] exu_tlu_spill_tid;
+   output [3:0] rml_ecl_swap_done;
+   output       exu_tlu_cwp_cmplt;
+   output [1:0] exu_tlu_cwp_cmplt_tid;
+   output       exu_tlu_cwp_retry;
+   output [3:0] oddwin_w;
+   output [11:0] next_cwp;
+   
+   wire         can_swap;
+   wire         swapping;
+   wire         just_swapped;
+   wire         full_swap_m;
+   wire         full_swap_w;
+   wire [3:0]   swap_done_next_cycle;
+   wire [3:0] swap_sel_input;
+   wire [3:0] swap_sel_tlu;
+   wire [3:0] swap_keep_value;
+   wire [2:0]  trap_old_cwp_m;
+   wire   tlu_cwp_no_change;
+   wire [2:0] tlu_cwp_xor;
+   wire   cwp_cmplt_next;
+   wire [1:0] cwp_cmplt_tid_next;
+   wire       cwp_retry_next;
+   wire   cwp_fastcmplt_m;
+   wire   cwp_fastcmplt_w;
+   wire   cwpccr_update_w;
+   wire   valid_tlu_swap_w;
+   wire [2:0] tlu_exu_cwp_w;
+   wire       tlu_exu_cwp_retry_w;
+
+   wire [3:0] swap_thr;
+   wire [1:0] swap_tid;
+   wire [3:0] swap_req_vec;
+   wire       kill_swap_slot_w;
+   wire [3:0] thr_e;
+   
+   wire [1:0] swap_slot0_state;
+   wire [1:0] swap_slot1_state;
+   wire [1:0] swap_slot2_state;
+   wire [1:0] swap_slot3_state;
+   wire [1:0] swap_slot0_state_valid;
+   wire [1:0] swap_slot1_state_valid;
+   wire [1:0] swap_slot2_state_valid;
+   wire [1:0] swap_slot3_state_valid;
+   wire [1:0] next_slot0_state;
+   wire [1:0] next_slot1_state;
+   wire [1:0] next_slot2_state;
+   wire [1:0] next_slot3_state;
+   wire [3:0] swap_keep_state;
+   wire [3:0] swap_next_state;
+   wire [1:0] swap_state;
+
+   wire [3:0] next_swap_thr;
+   wire [12:0] swap_data;
+   wire [12:0] tlu_swap_data;
+   wire [12:0] swap_input_data;
+   wire [12:0] next_slot0_data;
+   wire [12:0] next_slot1_data;
+   wire [12:0] next_slot2_data;
+   wire [12:0] next_slot3_data;
+   wire [12:0] swap_slot0_data;
+   wire [12:0] swap_slot1_data;
+   wire [12:0] swap_slot2_data;
+   wire [12:0] swap_slot3_data;
+
+   wire        new_cwp_sel_swap;
+   wire [2:0]  old_swap_cwp;
+   wire [2:0]  new_swap_cwp;
+
+   
+   // wires for cwp register
+   wire [2:0]   cwp_thr0;
+   wire [2:0]   cwp_thr1;
+   wire [2:0]   cwp_thr2;
+   wire [2:0]   cwp_thr3;
+   wire [2:0]   cwp_thr0_next;
+   wire [2:0]   cwp_thr1_next;
+   wire [2:0]   cwp_thr2_next;
+   wire [2:0]   cwp_thr3_next;
+   wire          cwp_wen_thr0_w;
+   wire          cwp_wen_thr1_w;
+   wire          cwp_wen_thr2_w;
+   wire          cwp_wen_thr3_w;
+   wire [3:0]    cwp_wen_tlu_w;
+   wire [3:0] cwp_wen_spill;
+   wire [2:0] spill_cwp;
+   wire [3:0]    cwp_wen_l;
+   wire [2:0]    old_cwp_w;
+   wire        spill_next;
+   wire [1:0]  spill_tid_next;
+   wire        spill_other_next;
+   wire [2:0]  spill_wtype_next;
+
+   // decode thr_e
+   assign        thr_e[0] = ~tid_e[1] & ~tid_e[0];
+   assign        thr_e[1] = ~tid_e[1] & tid_e[0];
+   assign        thr_e[2] = tid_e[1] & ~tid_e[0];
+   assign        thr_e[3] = tid_e[1] & tid_e[0];
+   
+   /////////////////////////////////
+   // CWP output to IRF
+   /////////////////////////////////
+   // Output current_d thr on saves or restores
+   mux2ds #(2) irf_thr_mux(.dout(rml_irf_cwpswap_tid_e[1:0]),
+                              .in0(tid_e[1:0]),
+                              .in1(swap_tid[1:0]),
+                              .sel0(~can_swap),
+                              .sel1(can_swap));
+   // Output cwp_e for save, restore, flushw
+   // and swap_cwp from queue for swap restores (default)
+   // Need to have an incremented cwp for swap of outs
+   assign        old_swap_cwp[2:0] = swap_data[2:0];
+   assign        new_swap_cwp[2:0] = swap_data[5:3];
+   
+   assign        new_cwp_sel_swap = can_swap;
+
+   assign new_cwp_e[2:0] = (new_cwp_sel_swap)?  new_swap_cwp[2:0]: next_cwp_e[2:0];
+   assign old_cwp_e[2:0] = (new_cwp_sel_swap)?  old_swap_cwp[2:0]: rml_ecl_cwp_e[2:0];
+   
+ 
+   /////////////////////////////////
+   // CWP register
+   /////////////////////////////////
+   assign exu_tlu_cwp0_w[2:0] = cwp_thr0[2:0];
+   assign exu_tlu_cwp1_w[2:0] = cwp_thr1[2:0];
+   assign exu_tlu_cwp2_w[2:0] = cwp_thr2[2:0];
+   assign exu_tlu_cwp3_w[2:0] = cwp_thr3[2:0];
+   
+   mux4ds #(3) mux_cwp_old_w(.dout(old_cwp_w[2:0]), .sel0(ecl_rml_thr_w[0]),
+                             .sel1(ecl_rml_thr_w[1]), .sel2(ecl_rml_thr_w[2]),
+                             .sel3(ecl_rml_thr_w[3]), .in0(cwp_thr0[2:0]),
+                             .in1(cwp_thr1[2:0]), .in2(cwp_thr2[2:0]),
+                             .in3(cwp_thr3[2:0]));
+
+   //  Output selection for reg
+   mux4ds #(3) mux_cwp_out_d(.dout(rml_ecl_cwp_d[2:0]), .sel0(thr_d[0]),
+                             .sel1(thr_d[1]), .sel2(thr_d[2]),
+                             .sel3(thr_d[3]), .in0(cwp_thr0[2:0]),
+                             .in1(cwp_thr1[2:0]), .in2(cwp_thr2[2:0]),
+                             .in3(cwp_thr3[2:0]));
+   mux4ds #(3) mux_cwp_out_e(.dout(rml_ecl_cwp_e[2:0]), .sel0(thr_e[0]),
+                             .sel1(thr_e[1]), .sel2(thr_e[2]),
+                             .sel3(thr_e[3]), .in0(cwp_thr0[2:0]),
+                             .in1(cwp_thr1[2:0]), .in2(cwp_thr2[2:0]),
+                             .in3(cwp_thr3[2:0]));
+   mux4ds #(3) mux_cwp_trap(.dout(trap_old_cwp_m[2:0]), .sel0(ecl_rml_thr_m[0]),
+                             .sel1(ecl_rml_thr_m[1]), .sel2(ecl_rml_thr_m[2]),
+                             .sel3(ecl_rml_thr_m[3]), .in0(cwp_thr0[2:0]),
+                             .in1(cwp_thr1[2:0]), .in2(cwp_thr2[2:0]),
+                             .in3(cwp_thr3[2:0]));
+
+   //////////////////////////////////////
+   //  Storage of cwp
+   //////////////////////////////////////
+   // enable input for each thread
+   assign     cwp_wen_spill[3:0] = swap_thr[3:0] & {4{spill_next}};
+   assign        cwp_wen_thr0_w = ((ecl_rml_thr_w[0] & cwp_wen_w)) & ~cwp_wen_spill[0];
+   assign        cwp_wen_thr1_w = ((ecl_rml_thr_w[1] & cwp_wen_w)) & ~cwp_wen_spill[1];
+   assign        cwp_wen_thr2_w = ((ecl_rml_thr_w[2] & cwp_wen_w)) & ~cwp_wen_spill[2];
+   assign        cwp_wen_thr3_w = ((ecl_rml_thr_w[3] & cwp_wen_w)) & ~cwp_wen_spill[3];
+   assign        cwp_wen_tlu_w[3:0] = ecl_rml_thr_w[3:0] & {4{valid_tlu_swap_w}} & ~cwp_wen_spill &
+                                       {~cwp_wen_thr3_w,~cwp_wen_thr2_w,~cwp_wen_thr1_w,~cwp_wen_thr0_w};
+   assign        cwp_wen_l[3:0] = ~(cwp_wen_tlu_w[3:0] | cwp_wen_spill[3:0] |
+                                    {cwp_wen_thr3_w,cwp_wen_thr2_w, cwp_wen_thr1_w,cwp_wen_thr0_w});
+
+   // oddwin_w is the new value of cwp[0]
+   assign        oddwin_w[3:0] = {cwp_thr3_next[0],cwp_thr2_next[0],cwp_thr1_next[0],cwp_thr0_next[0]};
+   assign        next_cwp={cwp_thr3_next,cwp_thr2_next,cwp_thr1_next,cwp_thr0_next};
+   // mux between new and current value
+   mux4ds #(3) cwp_next0_mux(.dout(cwp_thr0_next[2:0]),
+                             .in0(cwp_thr0[2:0]),
+                             .in1(next_cwp_w[2:0]),
+                             .in2(tlu_exu_cwp_w[2:0]),
+                             .in3(spill_cwp[2:0]),
+                             .sel0(cwp_wen_l[0]),
+                             .sel1(cwp_wen_thr0_w),
+                             .sel2(cwp_wen_tlu_w[0]),
+                             .sel3(cwp_wen_spill[0]));
+   mux4ds #(3) cwp_next1_mux(.dout(cwp_thr1_next[2:0]),
+                             .in0(cwp_thr1[2:0]),
+                             .in1(next_cwp_w[2:0]),
+                             .in2(tlu_exu_cwp_w[2:0]),
+                             .in3(spill_cwp[2:0]),
+                             .sel0(cwp_wen_l[1]),
+                             .sel1(cwp_wen_thr1_w),
+                             .sel2(cwp_wen_tlu_w[1]),
+                             .sel3(cwp_wen_spill[1]));
+   mux4ds #(3) cwp_next2_mux(.dout(cwp_thr2_next[2:0]),
+                             .in0(cwp_thr2[2:0]),
+                             .in1(next_cwp_w[2:0]),
+                             .in2(tlu_exu_cwp_w[2:0]),
+                             .in3(spill_cwp[2:0]),
+                             .sel0(cwp_wen_l[2]),
+                             .sel1(cwp_wen_thr2_w),
+                             .sel2(cwp_wen_tlu_w[2]),
+                             .sel3(cwp_wen_spill[2]));
+   mux4ds #(3) cwp_next3_mux(.dout(cwp_thr3_next[2:0]),
+                             .in0(cwp_thr3[2:0]),
+                             .in1(next_cwp_w[2:0]),
+                             .in2(tlu_exu_cwp_w[2:0]),
+                             .in3(spill_cwp[2:0]),
+                             .sel0(cwp_wen_l[3]),
+                             .sel1(cwp_wen_thr3_w),
+                             .sel2(cwp_wen_tlu_w[3]),
+                             .sel3(cwp_wen_spill[3]));
+
+   // store new value
+   dff_s #(3) dff_cwp_thr0(.din(cwp_thr0_next[2:0]), .clk(clk), .q(cwp_thr0[2:0]),
+                       .se(se), .si(), .so());
+   dff_s #(3) dff_cwp_thr1(.din(cwp_thr1_next[2:0]), .clk(clk), .q(cwp_thr1[2:0]),
+                       .se(se), .si(), .so());
+   dff_s #(3) dff_cwp_thr2(.din(cwp_thr2_next[2:0]), .clk(clk), .q(cwp_thr2[2:0]),
+                       .se(se), .si(), .so());
+   dff_s #(3) dff_cwp_thr3(.din(cwp_thr3_next[2:0]), .clk(clk), .q(cwp_thr3[2:0]),
+                       .se(se), .si(), .so());
+
+
+
+   ////////////////////////////////////////////
+   // Queue for full window swaps
+   ////////////////////////////////////////////
+   // A full swap of the current window requires a 2 cycle operation.
+   // Each cycle must make sure that
+   // there isn't another instruction trying to save or restore on top of it.
+   // The same thread also cannot issue a swap to irf in back-to-back cycles.
+   // Data is stored as follows:
+   //   2:0 - CWP
+   //   5:3 - NewCWP
+   //   6   - !WRCWP/SPILL
+   //   7   - Trap return
+   //   8   - OTHER (for spill trap)
+   //   11:9- WTYPE (for spill trap)
+   //		12  - Retry (for trap return)
+   dff_s full_swap_e2m(.din(full_swap_e), .clk(clk), .q(full_swap_m), .se(se), .si(), .so());
+   dff_s full_swap_m2w(.din(full_swap_m), .clk(clk), .q(full_swap_w), .se(se), .si(), .so());
+   assign     swap_input_data = {1'b0, rml_ecl_wtype_e[2:0], rml_ecl_other_e, 1'b0, exu_tlu_spill_e, 
+                                 next_cwp_e[2:0],rml_ecl_cwp_e[2:0]};
+   assign     tlu_swap_data = {tlu_exu_cwp_retry_w, 4'b0, 1'b1, 1'b0, tlu_exu_cwp_w[2:0], old_cwp_w[2:0]};
+
+
+   assign     swap_sel_input[3:0] = thr_e[3:0] & {4{full_swap_e}};
+   assign     swap_sel_tlu[3:0] = ecl_rml_thr_w[3:0] & {4{cwpccr_update_w}} 
+                                    & ~swap_sel_input[3:0];
+   assign     swap_keep_value[3:0] = ~(swap_sel_tlu[3:0] | swap_sel_input[3:0]);
+   assign     swap_keep_state[3:0] = ~(swap_sel_tlu[3:0] | swap_sel_input[3:0]) & 
+                                        ~(swap_thr[3:0] & {4{can_swap}});
+   assign     swap_next_state[3:0] = ~(swap_sel_tlu[3:0] | swap_sel_input[3:0]) 
+                                         & (swap_thr[3:0] & {4{can_swap}});
+   mux3ds #(13) slot0_data_mux(.dout(next_slot0_data[12:0]),
+                               .in0(swap_input_data[12:0]),
+                               .in1(tlu_swap_data[12:0]),
+                               .in2(swap_slot0_data[12:0]),
+                               .sel0(swap_sel_input[0]),
+                               .sel1(swap_sel_tlu[0]),
+                               .sel2(swap_keep_value[0]));
+   mux3ds #(13) slot1_data_mux(.dout(next_slot1_data[12:0]),
+                               .in0(swap_input_data[12:0]),
+                               .in1(tlu_swap_data[12:0]),
+                               .in2(swap_slot1_data[12:0]),
+                               .sel0(swap_sel_input[1]),
+                               .sel1(swap_sel_tlu[1]),
+                               .sel2(swap_keep_value[1]));
+   mux3ds #(13) slot2_data_mux(.dout(next_slot2_data[12:0]),
+                               .in0(swap_input_data[12:0]),
+                               .in1(tlu_swap_data[12:0]),
+                               .in2(swap_slot2_data[12:0]),
+                               .sel0(swap_sel_input[2]),
+                               .sel1(swap_sel_tlu[2]),
+                               .sel2(swap_keep_value[2]));
+   mux3ds #(13) slot3_data_mux(.dout(next_slot3_data[12:0]),
+                               .in0(swap_input_data[12:0]),
+                               .in1(tlu_swap_data[12:0]),
+                               .in2(swap_slot3_data[12:0]),
+                               .sel0(swap_sel_input[3]),
+                               .sel1(swap_sel_tlu[3]),
+                               .sel2(swap_keep_value[3]));
+
+   // Muxes for slot state.
+   // There are 2 possible states:
+   // No swap done (01)
+   // Swap locals/ins done (10)
+   mux4ds #(2) slot0_state_mux(.dout(next_slot0_state[1:0]),
+                               .in0(2'b10),
+                               .in1({1'b0, valid_tlu_swap_w}),
+                               .in2(swap_slot0_state_valid[1:0]),
+                               .in3({swap_slot0_state_valid[0], 1'b0}),
+                               .sel0(swap_sel_input[0]),
+                               .sel1(swap_sel_tlu[0]),
+                               .sel2(swap_keep_state[0]),
+                               .sel3(swap_next_state[0]));
+   mux4ds #(2) slot1_state_mux(.dout(next_slot1_state[1:0]),
+                               .in0(2'b10),
+                               .in1({1'b0, valid_tlu_swap_w}),
+                               .in2(swap_slot1_state_valid[1:0]),
+                               .in3({swap_slot1_state_valid[0], 1'b0}),
+                               .sel0(swap_sel_input[1]),
+                               .sel1(swap_sel_tlu[1]),
+                               .sel2(swap_keep_state[1]),
+                               .sel3(swap_next_state[1]));
+   mux4ds #(2) slot2_state_mux(.dout(next_slot2_state[1:0]),
+                               .in0(2'b10),
+                               .in1({1'b0, valid_tlu_swap_w}),
+                               .in2(swap_slot2_state_valid[1:0]),
+                               .in3({swap_slot2_state_valid[0], 1'b0}),
+                               .sel0(swap_sel_input[2]),
+                               .sel1(swap_sel_tlu[2]),
+                               .sel2(swap_keep_state[2]),
+                               .sel3(swap_next_state[2]));
+   mux4ds #(2) slot3_state_mux(.dout(next_slot3_state[1:0]),
+                               .in0(2'b10),
+                               .in1({1'b0, valid_tlu_swap_w}),
+                               .in2(swap_slot3_state_valid[1:0]),
+                               .in3({swap_slot3_state_valid[0], 1'b0}),
+                               .sel0(swap_sel_input[3]),
+                               .sel1(swap_sel_tlu[3]),
+                               .sel2(swap_keep_state[3]),
+                               .sel3(swap_next_state[3]));
+
+   // The kill is only assessed in w1 because back to back swaps are not allowed.
+   // This means that a swap cannot start in the M or W stage.
+   assign     kill_swap_slot_w = rml_kill_w & full_swap_w;
+
+   assign     swap_slot0_state_valid[1:0] = {(swap_slot0_state[1] & ~(kill_swap_slot_w & ecl_rml_thr_w[0])),
+                                             (swap_slot0_state[0])};
+   assign     swap_slot1_state_valid[1:0] = {(swap_slot1_state[1] & ~(kill_swap_slot_w & ecl_rml_thr_w[1])),
+                                             (swap_slot1_state[0])};
+   assign     swap_slot2_state_valid[1:0] = {(swap_slot2_state[1] & ~(kill_swap_slot_w & ecl_rml_thr_w[2])),
+                                             (swap_slot2_state[0])};
+   assign     swap_slot3_state_valid[1:0] = {(swap_slot3_state[1] & ~(kill_swap_slot_w & ecl_rml_thr_w[3])),
+                                             (swap_slot3_state[0])};
+   
+   // Flops for cwp_swap data
+   dffr_s #(15) slot0_data_dff(.din({next_slot0_state[1:0], next_slot0_data[12:0]}), .clk(clk), 
+                            .q({swap_slot0_state[1:0], swap_slot0_data[12:0]}), .rst(reset),
+                            .se(se), .si(), .so());
+   dffr_s #(15) slot1_data_dff(.din({next_slot1_state[1:0], next_slot1_data[12:0]}), .clk(clk), 
+                            .q({swap_slot1_state[1:0], swap_slot1_data[12:0]}), .rst(reset),
+                            .se(se), .si(), .so());
+   dffr_s #(15) slot2_data_dff(.din({next_slot2_state[1:0], next_slot2_data[12:0]}), .clk(clk), 
+                            .q({swap_slot2_state[1:0], swap_slot2_data[12:0]}), .rst(reset),
+                            .se(se), .si(), .so());
+   dffr_s #(15) slot3_data_dff(.din({next_slot3_state[1:0], next_slot3_data[12:0]}), .clk(clk), 
+                            .q({swap_slot3_state[1:0], swap_slot3_data[12:0]}), .rst(reset),
+                            .se(se), .si(), .so());
+
+   ////////////////////////////
+   // Control for queue output
+   //	==========================
+   //	The queue results go into a flop
+   //	so that they can meet timing.
+   ////////////////////////////
+   assign     swap_req_vec[0] = (swap_slot0_state[1] | swap_slot0_state[0]);
+   assign     swap_req_vec[1] = (swap_slot1_state[1] | swap_slot1_state[0]);
+   assign     swap_req_vec[2] = (swap_slot2_state[1] | swap_slot2_state[0]);
+   assign     swap_req_vec[3] = (swap_slot3_state[1] | swap_slot3_state[0]);
+   
+   sparc_exu_rndrob cwp_output_queue(// Outputs
+                                     .grant_vec(next_swap_thr[3:0]),
+                                     // Inputs
+                                     .clk(clk),
+                                     .reset(reset),
+                                     .se(se),
+                                     .req_vec(swap_req_vec[3:0]),
+                                     .advance(can_swap));
+   dff_s #(4) dff_swap_thr(.din(next_swap_thr[3:0]), .clk(clk), .q(swap_thr[3:0]),
+                         .se(se), .si(), .so());
+   assign     swap_tid[1] = swap_thr[3] | swap_thr[2];
+   assign     swap_tid[0] = swap_thr[3] | swap_thr[1];
+
+   // make selects one hot
+   wire [3:0] swap_sel;
+   assign swap_sel[0] = ~(swap_thr[1] | swap_thr[2] | swap_thr[3]) | rst_tri_en;
+   assign swap_sel[3:1] = swap_thr[3:1] & {3{~rst_tri_en}};
+
+   mux4ds #(15) cwp_output_mux(.dout({swap_state[1:0], swap_data[12:0]}),
+                               .in0({swap_slot0_state[1:0], swap_slot0_data[12:0]}),
+                               .in1({swap_slot1_state[1:0], swap_slot1_data[12:0]}),
+                               .in2({swap_slot2_state[1:0], swap_slot2_data[12:0]}),
+                               .in3({swap_slot3_state[1:0], swap_slot3_data[12:0]}),
+                               .sel0(swap_sel[0]),
+                               .sel1(swap_sel[1]),
+                               .sel2(swap_sel[2]),
+                               .sel3(swap_sel[3]));
+
+   // To prevent back to back swap requests on the same thread, the queue cannot swap
+   // 2 cycles in a row.  Also swaps can't start in M or W to allow flush to be checked
+   dffr_s can_swap_flop(.din(swapping), .clk(clk), .q(just_swapped), .rst(reset), .se(se), .si(), .so());
+   assign     can_swap = ~(save_e | restore_e | ifu_exu_flushw_e | ecl_rml_cwp_wen_e | just_swapped);
+   assign      swap_locals_ins = can_swap & swap_state[0];
+   assign      swap_outs = can_swap & swap_state[1];
+   assign      swapping = (can_swap & |swap_state[1:0]) | full_swap_e | full_swap_m;
+
+   ///////////////////////////////////
+   // Signals for completion of swaps
+   ///////////////////////////////////
+   assign spill_next = swap_data[6] & ~swap_data[7] & swap_outs;
+   assign spill_tid_next[1:0] = swap_tid[1:0];
+   //assign exu_tlu_spill_ttype[8:0] = {3'b010, swap_data[8], swap_data[11:9], 2'b00};
+   assign spill_other_next = swap_data[8];
+   assign spill_wtype_next[2:0] = swap_data[11:9];
+   dff_s #(7) spill_dff(.din({spill_next,spill_tid_next[1:0], spill_other_next, spill_wtype_next[2:0]}),
+                      .q({exu_tlu_spill,exu_tlu_spill_tid[1:0], exu_tlu_spill_other, exu_tlu_spill_wtype[2:0]}),
+                      .clk(clk), .se(se), .si(), .so());
+   assign spill_cwp[2:0] = swap_data[5:3];
+/* -----\/----- EXCLUDED -----\/-----
+   dff_s #(3) spill_cwp_dff(.din(swap_data[5:3]), .clk(clk), .q(spill_cwp[2:0]),
+                          .se(se), .si(), .so());
+ -----/\----- EXCLUDED -----/\----- */
+   assign swap_done_next_cycle[3] = (swap_outs & ~swap_data[6] & ~swap_data[7] &
+                                     swap_tid[1] & swap_tid[0]); 
+   assign swap_done_next_cycle[2] = (swap_outs & ~swap_data[6] & ~swap_data[7] &
+                                     swap_tid[1] & ~swap_tid[0]); 
+   assign swap_done_next_cycle[1] = (swap_outs & ~swap_data[6] & ~swap_data[7] &
+                                     ~swap_tid[1] & swap_tid[0]); 
+   assign swap_done_next_cycle[0] = (swap_outs & ~swap_data[6] & ~swap_data[7] &
+                                     ~swap_tid[1] & ~swap_tid[0]); 
+
+   dff_s #(4) swap_done_dff(.din(swap_done_next_cycle[3:0]), .clk(clk),
+                        .q(rml_ecl_swap_done[3:0]), .se(se), .si(), .so());
+
+   dff_s #(4) cwp_cmplt_dff(.din({cwp_cmplt_next, cwp_cmplt_tid_next[1:0], cwp_retry_next}),
+                          .q({exu_tlu_cwp_cmplt,exu_tlu_cwp_cmplt_tid[1:0], exu_tlu_cwp_retry}),
+                          .clk(clk), .si(), .so(), .se(se));
+   assign cwp_cmplt_next = swap_outs & swap_data[7];
+   assign cwp_cmplt_tid_next[1:0] = swap_tid[1:0];
+   assign cwp_retry_next = swap_data[12];
+
+   assign tlu_cwp_xor[2:0] = trap_old_cwp_m[2:0] ^ tlu_exu_cwp_m[2:0];
+   assign tlu_cwp_no_change = ~(tlu_cwp_xor[2] | tlu_cwp_xor[1] | tlu_cwp_xor[0]); 
+   assign cwp_fastcmplt_m = tlu_exu_cwpccr_update_m & tlu_cwp_no_change;
+
+   dff_s fastcmplt_dff(.din(cwp_fastcmplt_m), .clk(clk),
+                     .q(cwp_fastcmplt_w), .se(se), .si(), .so());
+
+   ///////////////////////////////////////////////////////////
+   // Pipe along tlu_exu_done/retry so inst_vld can be caught
+   ///////////////////////////////////////////////////////////
+   dff_s #(5) tlu_data_dff(.q({cwpccr_update_w,tlu_exu_cwp_w[2:0],tlu_exu_cwp_retry_w}),
+                         .din({tlu_exu_cwpccr_update_m,tlu_exu_cwp_m[2:0],tlu_exu_cwp_retry_m}),
+                         .clk(clk), .se(se), .si(), .so());
+   assign valid_tlu_swap_w = cwpccr_update_w & ~rml_kill_w & ~cwp_fastcmplt_w;
+   
+endmodule // sparc_exu_rml_cwp
Index: /trunk/T1-CPU/exu/sparc_exu_ecc_dec.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecc_dec.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecc_dec.v	(revision 6)
@@ -0,0 +1,102 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecc_dec.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecc_dec
+//	Description:  Decodes the result from the ecc checking block
+// 			into a 64 bit value that is used to correct single bit errors.
+//			Correction is performed by e ^ data.
+*/
+module sparc_exu_ecc_dec (/*AUTOARG*/
+   // Outputs
+   e, 
+   // Inputs
+   q
+   ) ; 
+   input [6:0] q;
+   output [63:0] e;
+
+   assign e[0] = ~q[6] & ~q[5] & ~q[4] & ~q[3] & ~q[2] & q[1] & q[0];
+   assign e[1] = ~q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & ~q[1] & q[0];
+   assign e[2] = ~q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & q[1] & ~q[0];
+   assign e[3] = ~q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & q[1] & q[0];
+   assign e[4] = ~q[6] & ~q[5] & ~q[4] & q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[5] = ~q[6] & ~q[5] & ~q[4] & q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[6] = ~q[6] & ~q[5] & ~q[4] & q[3] & ~q[2] & q[1] & q[0];
+   assign e[7] = ~q[6] & ~q[5] & ~q[4] & q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[8] = ~q[6] & ~q[5] & ~q[4] & q[3] & q[2] & ~q[1] & q[0];
+   assign e[9] = ~q[6] & ~q[5] & ~q[4] & q[3] & q[2] & q[1] & ~q[0];
+   assign e[10] = ~q[6] & ~q[5] & ~q[4] & q[3] & q[2] & q[1] & q[0];
+   assign e[11] = ~q[6] & ~q[5] & q[4] & ~q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[12] = ~q[6] & ~q[5] & q[4] & ~q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[13] = ~q[6] & ~q[5] & q[4] & ~q[3] & ~q[2] & q[1] & q[0];
+   assign e[14] = ~q[6] & ~q[5] & q[4] & ~q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[15] = ~q[6] & ~q[5] & q[4] & ~q[3] & q[2] & ~q[1] & q[0];
+   assign e[16] = ~q[6] & ~q[5] & q[4] & ~q[3] & q[2] & q[1] & ~q[0];
+   assign e[17] = ~q[6] & ~q[5] & q[4] & ~q[3] & q[2] & q[1] & q[0];
+   assign e[18] = ~q[6] & ~q[5] & q[4] & q[3] & ~q[2] & ~q[1] & ~q[0];
+   assign e[19] = ~q[6] & ~q[5] & q[4] & q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[20] = ~q[6] & ~q[5] & q[4] & q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[21] = ~q[6] & ~q[5] & q[4] & q[3] & ~q[2] & q[1] & q[0];
+   assign e[22] = ~q[6] & ~q[5] & q[4] & q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[23] = ~q[6] & ~q[5] & q[4] & q[3] & q[2] & ~q[1] & q[0];
+   assign e[24] = ~q[6] & ~q[5] & q[4] & q[3] & q[2] & q[1] & ~q[0];
+   assign e[25] = ~q[6] & ~q[5] & q[4] & q[3] & q[2] & q[1] & q[0];
+   assign e[26] = ~q[6] & q[5] & ~q[4] & ~q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[27] = ~q[6] & q[5] & ~q[4] & ~q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[28] = ~q[6] & q[5] & ~q[4] & ~q[3] & ~q[2] & q[1] & q[0];
+   assign e[29] = ~q[6] & q[5] & ~q[4] & ~q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[30] = ~q[6] & q[5] & ~q[4] & ~q[3] & q[2] & ~q[1] & q[0];
+   assign e[31] = ~q[6] & q[5] & ~q[4] & ~q[3] & q[2] & q[1] & ~q[0];
+   assign e[32] = ~q[6] & q[5] & ~q[4] & ~q[3] & q[2] & q[1] & q[0];
+   assign e[33] = ~q[6] & q[5] & ~q[4] & q[3] & ~q[2] & ~q[1] & ~q[0];
+   assign e[34] = ~q[6] & q[5] & ~q[4] & q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[35] = ~q[6] & q[5] & ~q[4] & q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[36] = ~q[6] & q[5] & ~q[4] & q[3] & ~q[2] & q[1] & q[0];
+   assign e[37] = ~q[6] & q[5] & ~q[4] & q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[38] = ~q[6] & q[5] & ~q[4] & q[3] & q[2] & ~q[1] & q[0];
+   assign e[39] = ~q[6] & q[5] & ~q[4] & q[3] & q[2] & q[1] & ~q[0];
+   assign e[40] = ~q[6] & q[5] & ~q[4] & q[3] & q[2] & q[1] & q[0];
+   assign e[41] = ~q[6] & q[5] & q[4] & ~q[3] & ~q[2] & ~q[1] & ~q[0];
+   assign e[42] = ~q[6] & q[5] & q[4] & ~q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[43] = ~q[6] & q[5] & q[4] & ~q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[44] = ~q[6] & q[5] & q[4] & ~q[3] & ~q[2] & q[1] & q[0];
+   assign e[45] = ~q[6] & q[5] & q[4] & ~q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[46] = ~q[6] & q[5] & q[4] & ~q[3] & q[2] & ~q[1] & q[0];
+   assign e[47] = ~q[6] & q[5] & q[4] & ~q[3] & q[2] & q[1] & ~q[0];
+   assign e[48] = ~q[6] & q[5] & q[4] & ~q[3] & q[2] & q[1] & q[0];
+   assign e[49] = ~q[6] & q[5] & q[4] & q[3] & ~q[2] & ~q[1] & ~q[0];
+   assign e[50] = ~q[6] & q[5] & q[4] & q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[51] = ~q[6] & q[5] & q[4] & q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[52] = ~q[6] & q[5] & q[4] & q[3] & ~q[2] & q[1] & q[0];
+   assign e[53] = ~q[6] & q[5] & q[4] & q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[54] = ~q[6] & q[5] & q[4] & q[3] & q[2] & ~q[1] & q[0];
+   assign e[55] = ~q[6] & q[5] & q[4] & q[3] & q[2] & q[1] & ~q[0];
+   assign e[56] = ~q[6] & q[5] & q[4] & q[3] & q[2] & q[1] & q[0];
+   assign e[57] = q[6] & ~q[5] & ~q[4] & ~q[3] & ~q[2] & ~q[1] & q[0];
+   assign e[58] = q[6] & ~q[5] & ~q[4] & ~q[3] & ~q[2] & q[1] & ~q[0];
+   assign e[59] = q[6] & ~q[5] & ~q[4] & ~q[3] & ~q[2] & q[1] & q[0];
+   assign e[60] = q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & ~q[1] & ~q[0];
+   assign e[61] = q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & ~q[1] & q[0];
+   assign e[62] = q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & q[1] & ~q[0];
+   assign e[63] = q[6] & ~q[5] & ~q[4] & ~q[3] & q[2] & q[1] & q[0];
+   
+endmodule // sparc_exu_ecc_dec
Index: /trunk/T1-CPU/exu/sparc_exu_ecl_mdqctl.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl_mdqctl.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl_mdqctl.v	(revision 6)
@@ -0,0 +1,302 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl_mdqctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_ecl_mdqctl
+//	Description:  This block is the control logic for the multiply/divide
+// 	input buffer.  It generates the select lines for both the output
+//	to mul and div, as well as for moving the data within the buffer.
+//	There are 4 slots in the buffer, which is a modified FIFO.
+//	It will output 1 MUL and 1 DIV every cycle, as well as whether those
+// 	outputs are valid.  If none of the slots contain a valid entry, it
+//	will pass through the input to the output.  If a kill comes through
+//	and invalidates an entry, it will show up on the valid bit coming out
+//	of the mdq, but may cause a lost cycle as the kill won't affect the logic
+//	which chooses the output until the next cycle.  The block also
+//	stores the thr, rd, setcc and other control bits for each entry.
+*/
+
+`define MULS 10
+`define IS64 9
+`define SIGNED 8  
+`define SET_CC 7
+
+module sparc_exu_ecl_mdqctl (/*AUTOARG*/
+   // Outputs
+   mdqctl_divcntl_input_vld, mdqctl_divcntl_reset_div, 
+   mdqctl_divcntl_muldone, ecl_div_div64, ecl_div_signed_div, 
+   ecl_div_muls, mdqctl_wb_divthr_g, mdqctl_wb_divrd_g, 
+   mdqctl_wb_multhr_g, mdqctl_wb_mulrd_g, mdqctl_wb_divsetcc_g, 
+   mdqctl_wb_mulsetcc_g, mdqctl_wb_yreg_shift_g, exu_mul_input_vld, 
+   mdqctl_wb_yreg_wen_g, ecl_div_mul_sext_rs1_e, 
+   ecl_div_mul_sext_rs2_e, ecl_div_mul_get_new_data, 
+   ecl_div_mul_keep_data, ecl_div_mul_get_32bit_data, 
+   ecl_div_mul_wen, div_zero_m, 
+   // Inputs
+   clk, se, reset, ifu_exu_muldivop_d, tid_d, ifu_exu_rd_d, tid_w1, 
+   flush_w1, ifu_exu_inst_vld_w, wb_divcntl_ack_g, divcntl_wb_req_g, 
+   byp_alu_rs1_data_31_e, byp_alu_rs2_data_31_e, mul_exu_ack, 
+   ecl_div_sel_div, ifu_exu_muls_d, div_ecl_detect_zero_high, 
+   div_ecl_detect_zero_low, ifu_tlu_flush_w, early_flush_w
+   ) ;
+   input clk;
+   input se;
+   input reset;
+   input [4:0] ifu_exu_muldivop_d;
+   input [1:0] tid_d;
+   input [4:0] ifu_exu_rd_d;
+   input [1:0] tid_w1;
+   input       flush_w1;
+   input       ifu_exu_inst_vld_w;
+   input       wb_divcntl_ack_g;
+   input       divcntl_wb_req_g;
+   input       byp_alu_rs1_data_31_e;
+   input       byp_alu_rs2_data_31_e;
+   input       mul_exu_ack;
+   input       ecl_div_sel_div;
+   input       ifu_exu_muls_d;
+   input       div_ecl_detect_zero_high;
+   input       div_ecl_detect_zero_low;
+   input       ifu_tlu_flush_w;
+   input       early_flush_w;
+
+   
+   output      mdqctl_divcntl_input_vld;
+   output      mdqctl_divcntl_reset_div;
+   output      mdqctl_divcntl_muldone;
+   output      ecl_div_div64;
+   output      ecl_div_signed_div;
+   output      ecl_div_muls;
+   output [1:0] mdqctl_wb_divthr_g;
+   output [4:0] mdqctl_wb_divrd_g;
+   output [1:0] mdqctl_wb_multhr_g;
+   output [4:0] mdqctl_wb_mulrd_g;
+   output       mdqctl_wb_divsetcc_g;
+   output       mdqctl_wb_mulsetcc_g;
+   output       mdqctl_wb_yreg_shift_g;
+
+   
+   output       exu_mul_input_vld;
+   output       mdqctl_wb_yreg_wen_g;
+   output       ecl_div_mul_sext_rs1_e;
+   output       ecl_div_mul_sext_rs2_e;
+   output       ecl_div_mul_get_new_data;
+   output       ecl_div_mul_keep_data;
+   output       ecl_div_mul_get_32bit_data;
+   output       ecl_div_mul_wen;
+   output   div_zero_m;
+
+   wire [11:0] div_data_next;
+   wire [11:0] div_data;
+   wire        new_div_vld;
+   wire        curr_div_vld;
+   wire [11:0] div_input_data_d;
+   wire [9:0] mul_input_data_d;
+   wire [9:0] mul_data;
+   wire [9:0] mul_data_next;
+   wire        new_mul_d;
+   wire        kill_thr_mul;
+   wire        mul_kill;
+   wire        invalid_mul_w;
+   wire        div_kill;
+   wire        kill_thr_div;
+   
+   wire        mul_ready_next;
+   wire        mul_ready;
+   wire        mul_done_valid_c0;
+   wire        mul_done_valid_c1;
+   wire        mul_done_ack;
+   wire        mul_done_c0;
+   wire        mul_done_c1;
+   wire        mul_done_c2;
+   wire        mul_done_c3;
+
+   wire        isdiv_e_valid;
+   wire        isdiv_m_valid;
+   wire        ismul_e_valid;
+   wire        ismul_m_valid;
+   wire        isdiv_e;
+   wire        isdiv_m;
+   wire        isdiv_w;
+   wire        ismul_e;
+   wire        ismul_m;
+   wire        ismul_w;
+   
+   wire        div_used;
+   wire        invalid_div_w;
+   wire        div_zero_e;
+
+   // Mul result state wires
+   wire        go_mul_done;
+   wire        stay_mul_done;
+   wire        mul_done;
+   wire        next_mul_done;
+   
+   
+   ////////////////////////
+   // Divide  output DATAPATH
+   ////////////////////////
+   // store control signals
+   assign div_used = divcntl_wb_req_g & wb_divcntl_ack_g & ecl_div_sel_div;
+
+   assign new_div_vld = ifu_exu_muls_d | ifu_exu_muldivop_d[3];
+   
+   assign div_input_data_d[11:0] = {1'b1, // isdiv
+                                    ifu_exu_muls_d,
+                                    ifu_exu_muldivop_d[2], // 64bit
+                                    ifu_exu_muldivop_d[1], // signed
+                                    ifu_exu_muldivop_d[0], // setcc
+                                    ifu_exu_rd_d[4:0],
+                                    tid_d[1:0]};
+   mux2ds #(12) div_data_mux(.dout(div_data_next[11:0]),
+                                .in0({curr_div_vld, div_data[10:0]}),
+                                .in1(div_input_data_d[11:0]),
+                                .sel0(~new_div_vld),
+                                .sel1(new_div_vld));
+
+   dffr_s #(12) div_data_dff(.din(div_data_next[11:0]), .clk(clk), .q(div_data[11:0]),
+                          .se(se), .si(), .so(), .rst(reset));
+
+   //div  kill logic (kills on div by zero exception or if there isn't an outstanding div)
+   assign div_zero_e = isdiv_e & div_ecl_detect_zero_high & div_ecl_detect_zero_low & ~div_data[`MULS];
+   assign invalid_div_w = isdiv_w & (~ifu_exu_inst_vld_w | ifu_tlu_flush_w | early_flush_w);
+   assign kill_thr_div = ~(div_data[1] ^ tid_w1[1]) & ~(div_data[0] ^ tid_w1[0]);
+   assign div_kill = (flush_w1 & kill_thr_div) | invalid_div_w | new_div_vld;
+   assign curr_div_vld = div_data[11] & ~div_zero_m & ~div_kill & ~div_used;
+
+   wire   div_zero_unqual_m;
+   assign div_zero_m = div_zero_unqual_m & isdiv_m;
+   dff_s div_zero_e2m(.din(div_zero_e), .clk(clk), .q(div_zero_unqual_m), .se(se), .si(), .so());
+   
+   // pipeling for divide valid signal (for inst_vld checking)
+   dff_s isdiv_d2e(.din(new_div_vld), .clk(clk), .q(isdiv_e),
+                 .se(se), .si(), .so());
+   dff_s isdiv_e2m(.din(isdiv_e_valid), .clk(clk), .q(isdiv_m),
+                 .se(se), .si(), .so());
+   dff_s isdiv_m2w(.din(isdiv_m_valid), .clk(clk), .q(isdiv_w),
+                 .se(se), .si(), .so());
+   assign        isdiv_e_valid = isdiv_e & ~div_kill;
+   assign        isdiv_m_valid = isdiv_m & ~div_kill;
+
+   // control for div state machine
+   assign mdqctl_divcntl_reset_div = (~div_data[11] | div_kill);
+   assign mdqctl_divcntl_input_vld = isdiv_e;
+
+   // control signals for div
+   assign ecl_div_div64 = div_data[`IS64];
+   assign ecl_div_signed_div = div_data[`SIGNED];
+   assign ecl_div_muls = div_data[`MULS];
+   
+   // control for writeback on completion
+   assign mdqctl_wb_divrd_g[4:0] = div_data[6:2];
+   assign mdqctl_wb_divthr_g[1:0] = div_data[1:0];
+   assign mdqctl_wb_divsetcc_g = div_data[`SET_CC] | div_data[`MULS];
+   assign mdqctl_wb_yreg_shift_g = div_used & div_data[`MULS];
+
+   
+   ////////////////////////////////////////////////////////////////////////////
+   // Multiply control
+   //----------------------
+   // The multiply will drop the current operation if a new request is issued.
+   // This requires addition checking to make sure that the kills are for the
+   // proper operation.
+   ////////////////////////////////////////////////////////////////////////////
+   dff_s ismul_d2e(.din(ifu_exu_muldivop_d[4]), .clk(clk), .q(ismul_e),
+                 .se(se), .si(), .so());
+   dff_s ismul_e2m(.din(ismul_e_valid), .clk(clk), .q(ismul_m),
+                 .se(se), .si(), .so());
+   dff_s ismul_m2w(.din(ismul_m_valid), .clk(clk), .q(ismul_w),
+                 .se(se), .si(), .so());
+   assign ismul_e_valid = ismul_e & ~mul_kill;
+   assign        ismul_m_valid = ismul_m & ~mul_kill & ~ismul_e;
+   
+   // store control signals
+  //   assign mul_used = divcntl_wb_req_g & wb_divcntl_ack_g & ~ecl_div_sel_div;
+   assign new_mul_d = ifu_exu_muldivop_d[4];
+   
+   assign mul_input_data_d[9:0] = {ifu_exu_muldivop_d[2], // 64bit
+                                    ifu_exu_muldivop_d[1], // signed
+                                    ifu_exu_muldivop_d[0], // setcc
+                                    ifu_exu_rd_d[4:0],
+                                    tid_d[1:0]};
+   assign mul_data_next[9:0] = (new_mul_d)? mul_input_data_d[9:0]: mul_data[9:0];
+
+   dff_s #(10) mul_data_dff(.din(mul_data_next[9:0]), .clk(clk), .q(mul_data[9:0]),
+                          .se(se), .si(), .so());
+
+   // mul kill logic
+   assign kill_thr_mul = ~(mul_data[1] ^ tid_w1[1]) & ~(mul_data[0] ^ tid_w1[0]);
+   assign mul_kill = (flush_w1 & kill_thr_mul) | reset;
+   assign invalid_mul_w = ismul_w & ~ifu_exu_inst_vld_w;
+   
+   // control signals for mul data in div unit
+   assign      ecl_div_mul_keep_data = ~ismul_e;
+   assign      ecl_div_mul_get_new_data = ismul_e & mul_data[`IS64];
+   assign      ecl_div_mul_get_32bit_data = ismul_e & ~mul_data[`IS64];
+   assign      ecl_div_mul_sext_rs1_e = byp_alu_rs1_data_31_e & mul_data[`SIGNED];
+   assign      ecl_div_mul_sext_rs2_e = byp_alu_rs2_data_31_e & mul_data[`SIGNED];
+
+   // control for writeback on completion
+   assign      mdqctl_wb_yreg_wen_g = ~mul_data[`IS64] & ecl_div_mul_wen;
+   assign      mdqctl_wb_multhr_g[1:0] = mul_data[1:0];
+   assign      mdqctl_wb_mulsetcc_g = mul_data[`SET_CC];
+   assign      mdqctl_wb_mulrd_g[4:0] = mul_data[6:2];
+
+   // interface with mul and state of pending mul
+   assign      mul_ready_next = ismul_e_valid | (mul_ready & ~mul_exu_ack & ~mul_kill & ~ismul_e & ~invalid_mul_w);
+   dff_s mul_ready_dff(.din(mul_ready_next), .clk(clk), .q(mul_ready), .se(se), .si(), .so());
+   
+   assign      exu_mul_input_vld = mul_ready;
+   
+   // If there was a valid request and an ack then start passing down pipe
+   assign      mul_done_ack = mul_ready & ~mul_kill & ~ismul_e & mul_exu_ack & ~invalid_mul_w;
+   dff_s dff_done_ack2c0(.din(mul_done_ack), .clk(clk), .q(mul_done_c0),
+                       .se(se), .si(), .so());
+   // need to check here cause this could be w
+   assign        mul_done_valid_c0 = mul_done_c0 & ~mul_kill & ~invalid_mul_w & ~ismul_e;
+   dff_s dff_done_c02c1(.din(mul_done_valid_c0), .clk(clk), .q(mul_done_c1),
+                       .se(se), .si(), .so());
+   // need to check here cause this could be w1
+   assign        mul_done_valid_c1 = mul_done_c1 & ~mul_kill & ~ismul_e;
+   dff_s dff_done_c1c2(.din(mul_done_valid_c1), .clk(clk), .q(mul_done_c2),
+                       .se(se), .si(), .so());
+   dff_s dff_done_c22c3(.din(mul_done_c2), .clk(clk), .q(mul_done_c3),
+                       .se(se), .si(), .so());
+   dff_s dff_done_c32c4(.din(mul_done_c3), .clk(clk), .q(ecl_div_mul_wen),
+                       .se(se), .si(), .so());
+
+   // Mul result state machine
+   assign        go_mul_done = ~mul_done & ecl_div_mul_wen;
+   assign        stay_mul_done = mul_done & (~wb_divcntl_ack_g | ecl_div_sel_div);
+   assign        next_mul_done = ~reset & (go_mul_done | stay_mul_done);
+
+   assign        mdqctl_divcntl_muldone = mul_done;
+
+   // mul state flop
+   dff_s  mulstate_dff(.din(next_mul_done), .clk(clk), .q(mul_done), .se(se), .si(),
+                     .so());
+   
+   /////////////////////////////////////////
+   // Pipeline registers for control signals
+   /////////////////////////////////////////
+
+   
+endmodule // sparc_exu_ecl_mdqctl
Index: /trunk/T1-CPU/exu/sparc_exu_reg.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_reg.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_reg.v	(revision 6)
@@ -0,0 +1,117 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_reg.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_exu_reg (/*AUTOARG*/
+   // Outputs
+   data_out, 
+   // Inputs
+   clk, se, thr_out, wen_w, thr_w, data_in_w
+   ) ;
+   parameter SIZE = 3;
+
+   input     clk;
+   input     se;
+   input [3:0]       thr_out;
+   input             wen_w;
+   input [3:0]       thr_w;
+   input [SIZE -1:0] data_in_w;
+
+   output [SIZE-1:0] data_out;
+
+   wire [SIZE-1:0]   data_thr0;
+   wire [SIZE-1:0]   data_thr1;
+   wire [SIZE-1:0]   data_thr2;
+   wire [SIZE-1:0]   data_thr3;
+   wire [SIZE-1:0]   data_thr0_next;
+   wire [SIZE-1:0]   data_thr1_next;
+   wire [SIZE-1:0]   data_thr2_next;
+   wire [SIZE-1:0]   data_thr3_next;
+
+   wire          wen_thr0_w;
+   wire          wen_thr1_w;
+   wire          wen_thr2_w;
+   wire          wen_thr3_w;
+
+   //////////////////////////////////
+   //  Output selection for reg
+   //////////////////////////////////
+`ifdef FPGA_SYN_1THREAD
+   assign 	 data_out[SIZE -1:0] = data_thr0[SIZE -1:0];
+   assign        wen_thr0_w = (thr_w[0] & wen_w);
+   // mux between new and current value
+   mux2ds #(SIZE) data_next0_mux(.dout(data_thr0_next[SIZE -1:0]),
+                               .in0(data_thr0[SIZE -1:0]),
+                               .in1(data_in_w[SIZE -1:0]),
+                               .sel0(~wen_thr0_w),
+                               .sel1(wen_thr0_w));   
+   dff_s #(SIZE) dff_reg_thr0(.din(data_thr0_next[SIZE -1:0]), .clk(clk), .q(data_thr0[SIZE -1:0]),
+                       .se(se), .si(), .so());
+`else // !`ifdef FPGA_SYN_1THREAD
+
+   // mux between the 4 regs
+   mux4ds #(SIZE) mux_data_out1(.dout(data_out[SIZE -1:0]), .sel0(thr_out[0]),
+                               .sel1(thr_out[1]), .sel2(thr_out[2]),
+                               .sel3(thr_out[3]), .in0(data_thr0[SIZE -1:0]),
+                               .in1(data_thr1[SIZE -1:0]), .in2(data_thr2[SIZE -1:0]),
+                               .in3(data_thr3[SIZE -1:0]));
+   
+   //////////////////////////////////////
+   //  Storage of reg
+   //////////////////////////////////////
+   // enable input for each thread
+   assign        wen_thr0_w = (thr_w[0] & wen_w);
+   assign        wen_thr1_w = (thr_w[1] & wen_w);
+   assign        wen_thr2_w = (thr_w[2] & wen_w);
+   assign        wen_thr3_w = (thr_w[3] & wen_w);
+
+   // mux between new and current value
+   mux2ds #(SIZE) data_next0_mux(.dout(data_thr0_next[SIZE -1:0]),
+                               .in0(data_thr0[SIZE -1:0]),
+                               .in1(data_in_w[SIZE -1:0]),
+                               .sel0(~wen_thr0_w),
+                               .sel1(wen_thr0_w));
+   mux2ds #(SIZE) data_next1_mux(.dout(data_thr1_next[SIZE -1:0]),
+                               .in0(data_thr1[SIZE -1:0]),
+                               .in1(data_in_w[SIZE -1:0]),
+                               .sel0(~wen_thr1_w),
+                               .sel1(wen_thr1_w));
+   mux2ds #(SIZE) data_next2_mux(.dout(data_thr2_next[SIZE -1:0]),
+                               .in0(data_thr2[SIZE -1:0]),
+                               .in1(data_in_w[SIZE -1:0]),
+                               .sel0(~wen_thr2_w),
+                               .sel1(wen_thr2_w));
+   mux2ds #(SIZE) data_next3_mux(.dout(data_thr3_next[SIZE -1:0]),
+                               .in0(data_thr3[SIZE -1:0]),
+                               .in1(data_in_w[SIZE -1:0]),
+                               .sel0(~wen_thr3_w),
+                               .sel1(wen_thr3_w));
+
+   // store new value
+   dff_s #(SIZE) dff_reg_thr0(.din(data_thr0_next[SIZE -1:0]), .clk(clk), .q(data_thr0[SIZE -1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(SIZE) dff_reg_thr1(.din(data_thr1_next[SIZE -1:0]), .clk(clk), .q(data_thr1[SIZE -1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(SIZE) dff_reg_thr2(.din(data_thr2_next[SIZE -1:0]), .clk(clk), .q(data_thr2[SIZE -1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(SIZE) dff_reg_thr3(.din(data_thr3_next[SIZE -1:0]), .clk(clk), .q(data_thr3[SIZE -1:0]),
+                       .se(se), .si(), .so());
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+endmodule // sparc_exu_reg
Index: /trunk/T1-CPU/exu/sparc_exu_div.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_div.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_div.v	(revision 6)
@@ -0,0 +1,330 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_div.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_div
+*/
+module sparc_exu_div (/*AUTOARG*/
+   // Outputs
+   so, div_ecl_xin_msb_l, div_ecl_x_msb, div_ecl_d_msb, 
+   div_ecl_cout64, div_ecl_cout32, div_ecl_gencc_in_msb_l, 
+   div_ecl_gencc_in_31, div_ecl_upper32_equal, div_ecl_low32_nonzero, 
+   div_ecl_dividend_msb, div_byp_muldivout_g, div_byp_yreg_e, 
+   div_ecl_yreg_0_l, exu_mul_rs1_data, exu_mul_rs2_data, 
+   div_ecl_adder_out_31, div_ecl_detect_zero_low, 
+   div_ecl_detect_zero_high, div_ecl_d_62, 
+   // Inputs
+   ecl_div_yreg_wen_w, ecl_div_yreg_wen_l, ecl_div_yreg_wen_g, 
+   ecl_div_yreg_shift_g, ecl_div_yreg_data_31_g, ecl_div_thr_e, 
+   byp_div_yreg_data_w, rclk, se, si, ecl_div_keep_d, 
+   ecl_div_ld_inputs, ecl_div_sel_adder, ecl_div_last_cycle, 
+   ecl_div_almostlast_cycle, ecl_div_div64, ecl_div_sel_u32, 
+   ecl_div_sel_pos32, ecl_div_sel_neg32, ecl_div_sel_64b, 
+   ecl_div_upper32_zero, ecl_div_upper33_one, ecl_div_upper33_zero, 
+   mul_exu_data_g, ecl_div_sel_div, ecl_div_mul_wen, 
+   ecl_div_dividend_sign, ecl_div_subtract_l, ecl_div_cin, 
+   ecl_div_newq, ecl_div_xinmask, ecl_div_keepx, 
+   ecl_div_mul_get_new_data, ecl_div_mul_keep_data, 
+   ecl_div_mul_get_32bit_data, ecl_div_mul_sext_rs2_e, 
+   ecl_div_mul_sext_rs1_e, byp_div_rs1_data_e, byp_div_rs2_data_e, 
+   ecl_div_muls_rs1_31_e_l, ecl_div_muls, ecl_div_zero_rs2_e
+   ) ;
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input [31:0]         byp_div_yreg_data_w;    // To yreg of sparc_exu_div_yreg.v
+   input [3:0]          ecl_div_thr_e;          // To yreg of sparc_exu_div_yreg.v
+   input                ecl_div_yreg_data_31_g; // To yreg of sparc_exu_div_yreg.v
+   input [3:0]          ecl_div_yreg_shift_g;   // To yreg of sparc_exu_div_yreg.v
+   input [3:0]          ecl_div_yreg_wen_g;     // To yreg of sparc_exu_div_yreg.v
+   input [3:0]          ecl_div_yreg_wen_l;     // To yreg of sparc_exu_div_yreg.v
+   input [3:0]          ecl_div_yreg_wen_w;     // To yreg of sparc_exu_div_yreg.v
+   // End of automatics
+   input rclk;
+   input se;
+   input si;
+   input        ecl_div_keep_d; // d should store (w/ overflow calcs)
+   input        ecl_div_ld_inputs;// load in d and x
+   input        ecl_div_sel_adder;// d should use adder output
+   input        ecl_div_last_cycle;// last cycle of computations
+   input         ecl_div_almostlast_cycle;// 2nd to last cycle of div
+   input   ecl_div_div64;
+   input         ecl_div_sel_u32;
+   input         ecl_div_sel_pos32;
+   input         ecl_div_sel_neg32;
+   input         ecl_div_sel_64b;
+   input         ecl_div_upper32_zero;
+   input         ecl_div_upper33_one;
+   input         ecl_div_upper33_zero;
+   input [63:0]  mul_exu_data_g;
+   input         ecl_div_sel_div;
+   input         ecl_div_mul_wen;
+   input         ecl_div_dividend_sign;
+   input         ecl_div_subtract_l;     // add/subtract to adder
+   input         ecl_div_cin;
+   input  ecl_div_newq;         // newest q bit
+   input         ecl_div_xinmask;
+   input  ecl_div_keepx;
+   input         ecl_div_mul_get_new_data;
+   input         ecl_div_mul_keep_data;
+   input         ecl_div_mul_get_32bit_data;
+   input         ecl_div_mul_sext_rs2_e;
+   input         ecl_div_mul_sext_rs1_e;
+   input [63:0]  byp_div_rs1_data_e;
+   input [63:0]  byp_div_rs2_data_e;
+   input         ecl_div_muls_rs1_31_e_l;
+   input         ecl_div_muls;
+   input         ecl_div_zero_rs2_e;
+   
+   output        so;
+   output div_ecl_xin_msb_l;
+   output div_ecl_x_msb;
+   output div_ecl_d_msb;
+   output div_ecl_cout64;       // cout from adder
+   output div_ecl_cout32;       // cout from adder
+   output        div_ecl_gencc_in_msb_l;
+   output        div_ecl_gencc_in_31;
+   output        div_ecl_upper32_equal;
+   output        div_ecl_low32_nonzero;
+   output        div_ecl_dividend_msb;
+   output [63:0] div_byp_muldivout_g;
+   output [31:0] div_byp_yreg_e;
+   output [3:0]  div_ecl_yreg_0_l;
+   output [63:0]          exu_mul_rs1_data; 
+   output [63:0]          exu_mul_rs2_data;
+   output                 div_ecl_adder_out_31; 
+   output        div_ecl_detect_zero_low;
+   output        div_ecl_detect_zero_high;
+   output        div_ecl_d_62;
+
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire [31:0]          yreg_mdq_y_e;           // From yreg of sparc_exu_div_yreg.v
+   // End of automatics
+   wire                 clk;
+   wire [127:0]  din;           // sign extended dividend
+   wire [127:0]  d;             // current dividend/quotient
+   wire [63:0]   adder_out;     // output of adder
+   wire [127:0]  dnext;         // input to d flop
+   wire [127:0]  adder_dnext;   // combination of adder out and quotient
+   wire [63:0]   x;             // divisor
+   wire [63:0]   xin;           // sign extended (for 32bit) divisor
+   wire [63:0]   xnext;         // input to divisor flop
+   wire [63:0]   adderin1;      // first input to adder
+   wire [63:0]   adderin2;      // 2nd input to adder
+   
+   wire [63:0]   curr_q;        // current quotient
+   wire [63:0]   out64;         // 64 bit result
+   wire [63:0]   pos32;         // positive 32 bit result w/ ovfl
+   wire [63:0]   neg32;         // negative 32 bit result w/ ovfl
+   wire [63:0]   u32;           // unsigned 32 bit result w/ ovfl
+   wire [63:0]   gencc_in;
+   wire [63:0]   mul_result;
+   wire [63:0]   mul_result_next;
+   wire [127:0]  input_data_e;
+   wire [63:0]   dividend;
+   wire [63:0]   divisor;
+   wire [127:0]  next_mul_data;
+   wire [127:0]  mul_data_out;
+   wire [127:0]  mul32_input_data_e;
+   wire          subtract;
+   wire [63:0]   spr_out;
+   wire [63:0]   z_in;
+
+   assign        clk = rclk;
+   ///////////////////////////////////////
+   // Input masking for 32 bit operations
+   ///////////////////////////////////////
+   dp_buffer #(128) buf_input_data(.dout(input_data_e[127:0]), 
+                                   .in({byp_div_rs2_data_e[63:0], byp_div_rs1_data_e[63:0]}));
+   // Mux in yreg into upper 32 bits on 32 bit divides
+   dp_mux2es #(32) dividendmux(.dout(dividend[63:32]),
+                             .in0(yreg_mdq_y_e[31:0]),
+                             .in1(input_data_e[63:32]),
+                             .sel(ecl_div_div64));
+   assign        dividend[31:0] = input_data_e[31:0];
+   assign        divisor[63:0] = input_data_e[127:64];
+
+   
+   /////////////////////
+   // Output assignment
+   /////////////////////
+   dp_mux2es #(64) output_mux(.dout(div_byp_muldivout_g[63:0]), .in1(d[63:0]),
+                         .in0(mul_result[63:0]),
+                         .sel(ecl_div_sel_div));
+   ///////////////////////////
+   // Generate Condition Codes and divide by zero exception and overflow
+   ///////////////////////////
+   dp_mux2es #(64) gencc_mux(.dout(gencc_in[63:0]), 
+                          .in0(mul_result[63:0]),
+                          .in1(curr_q[63:0]),
+                          .sel(ecl_div_sel_div));
+   sparc_exu_div_32eql u32eql(.in(gencc_in[63:32]), .equal(div_ecl_upper32_equal));
+   sparc_exu_aluor32 low32or(// Outputs
+                             .out  (div_ecl_low32_nonzero),
+                             // Inputs
+                             .in    (gencc_in[31:0]));  
+   assign        div_ecl_gencc_in_msb_l = ~gencc_in[63];
+   assign        div_ecl_gencc_in_31 = gencc_in[31];
+   
+   
+   // Division overflow calculations
+   assign        curr_q = d[127:64];
+   assign        u32 = {32'b0, (curr_q[31:0] | {32{~ecl_div_upper32_zero}})}; 
+   assign        pos32 = {33'b0, (curr_q[30:0] | {31{~ecl_div_upper33_zero}})}; 
+   assign        neg32 = {{33{1'b1}}, (curr_q[30:0] & {31{ecl_div_upper33_one}})}; 
+   
+   mux4ds #(64) result_mux(.dout(out64[63:0]), .in0(curr_q[63:0]), .in1(u32[63:0]),
+                         .in2(pos32[63:0]), .in3(neg32[63:0]), .sel0(ecl_div_sel_64b),
+                         .sel1(ecl_div_sel_u32), .sel2(ecl_div_sel_pos32),
+                         .sel3(ecl_div_sel_neg32));
+   
+   //////////////////////////
+   // Logic for D (dividend)
+   //////////////////////////
+   
+   // If signed div sign extend dividend to 127 bits
+   assign        div_ecl_dividend_msb = dividend[63];
+   assign        din[62:0] = dividend[62:0];
+   dp_mux2es #(32) din_mux(.dout(din[94:63]),
+                           .in0({{31{ecl_div_dividend_sign}}, dividend[63]}),
+                           .in1({~ecl_div_muls_rs1_31_e_l, dividend[31:1]}),
+                           .sel(ecl_div_muls));
+   assign        din[127:95] = {33{ecl_div_dividend_sign}};
+//   assign        din = {{64{ecl_div_dividend_sign}}, dividend[63:0]};
+
+
+   // Select input to FF for d
+   mux3ds #(128) d_mux(.dout(dnext[127:0]), .in0({d[127:64], out64[63:0]}),
+                     .in1(adder_dnext[127:0]), .in2(din[127:0]),
+                     .sel0(ecl_div_keep_d),
+                     .sel1(ecl_div_sel_adder),
+                     .sel2(ecl_div_ld_inputs));
+   assign        div_ecl_d_62 = d[62];
+
+   // FF for d
+   dff_s #(128) d_dff(.din(dnext[127:0]), .clk(clk), .q(d[127:0]), .se(se), .si(), .so());
+
+   ////////////////////////////
+   // Logic for X (divisor)
+   ////////////////////////////
+   // if signed div and 32 bits sign extend to upper 32 bits
+   dp_mux2es #(32) xin_mux(.dout(xin[63:32]), .in1(divisor[63:32]),
+                      .in0({32{ecl_div_xinmask}}),
+                      .sel(ecl_div_div64));
+   assign        xin[31:0] = divisor[31:0] & {32{~ecl_div_zero_rs2_e}};
+   //assign xin[31:0] = divisor[31:0];
+
+   // Pick between x and divisor and 1 (use divisor on first cycle, 1 last cycle)
+   mux3ds #(64) x_mux(.dout(xnext[63:0]), .in0(x[63:0]), .in1(xin[63:0]), .in2({64'b0}),
+                    .sel0(ecl_div_keepx),
+                    .sel1(ecl_div_ld_inputs),
+                    .sel2(ecl_div_almostlast_cycle));
+
+   // FF for x
+   dff_s #(64) x_dff(.din(xnext[63:0]), .clk(clk), .q(x[63:0]), .se(se), .si(), .so());
+
+
+   ///////////////////////////
+   // Logic for inputs to adder
+   //////////////////////////
+   assign div_ecl_xin_msb_l = ~xin[63];
+   assign div_ecl_x_msb = x[63];
+   assign div_ecl_d_msb = d[127];
+   dp_mux2es #(64) in1_mux(.dout(adderin1[63:0]), .in0(d[126:63]),
+                      .in1({d[62:0], ecl_div_newq}), .sel(ecl_div_last_cycle));
+
+   assign subtract = ~ecl_div_subtract_l;
+   assign        adderin2[63:0] = x[63:0] ^ {64{subtract}};
+
+   //////////////////////////
+   //  Adder
+   /////////////////////////
+   sparc_exu_aluadder64 add64(// Outputs
+                              .adder_out(adder_out[63:0]),
+                              .cout32   (div_ecl_cout32),
+                              .cout64   (div_ecl_cout64),
+                              // Inputs
+                              .rs1_data (adderin1[63:0]),
+                              .rs2_data (adderin2[63:0]),
+                              .cin      (ecl_div_cin));
+
+   assign        adder_dnext = {adder_out[63:0], d[62:0], ecl_div_newq};
+   assign        div_ecl_adder_out_31 = adder_out[31];
+
+   // sum predict and zero detection
+   sparc_exu_aluspr spr(.rs1_data(adderin1[63:0]), .rs2_data(adderin2[63:0]), .cin(ecl_div_cin),
+                        .spr_out(spr_out[63:0]));   
+   dp_mux2es #(64) zero_detect_mux(.dout(z_in[63:0]),
+                                   .in0(spr_out[63:0]),
+                                   .in1(xin[63:0]),
+                                   .sel(ecl_div_ld_inputs));
+   //sparc_exu_aluzcmp64 regzcmp(.in(z_in[63:0]), .zero64(div_ecl_detect_zero));
+   assign        div_ecl_detect_zero_low = ~(|z_in[31:0]);
+   assign        div_ecl_detect_zero_high = ~(|z_in[63:32]);
+   
+
+   // y register
+   assign        div_byp_yreg_e = yreg_mdq_y_e;
+   sparc_exu_div_yreg yreg(.mul_div_yreg_data_g(mul_exu_data_g[63:32]),
+                           /*AUTOINST*/
+                           // Outputs
+                           .yreg_mdq_y_e(yreg_mdq_y_e[31:0]),
+                           .div_ecl_yreg_0_l(div_ecl_yreg_0_l[3:0]),
+                           // Inputs
+                           .clk         (clk),
+                           .se          (se),
+                           .byp_div_yreg_data_w(byp_div_yreg_data_w[31:0]),
+                           .ecl_div_thr_e(ecl_div_thr_e[3:0]),
+                           .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]),
+                           .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]),
+                           .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]),
+                           .ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g),
+                           .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0]));
+   
+   
+   //////////////////////////////////
+   // MULTIPLIER inputs
+   //////////////////////////////////                  
+   assign        mul32_input_data_e[127:64] = {{32{ecl_div_mul_sext_rs2_e}}, input_data_e[95:64]};
+   assign        mul32_input_data_e[63:0] = {{32{ecl_div_mul_sext_rs1_e}}, input_data_e[31:0]};
+   mux3ds #(128) mul_data_mux(.dout(next_mul_data[127:0]),
+                              .in0(input_data_e[127:0]),
+                              .in1(mul32_input_data_e[127:0]),
+                              .in2(mul_data_out[127:0]),
+                              .sel0(ecl_div_mul_get_new_data),
+                              .sel1(ecl_div_mul_get_32bit_data),
+                              .sel2(ecl_div_mul_keep_data));
+   dff_s #(128) mul_data_dff(.din(next_mul_data[127:0]), .clk(clk), .q(mul_data_out[127:0]),
+                           .se(se), .si(), .so());
+   assign        exu_mul_rs1_data = mul_data_out[63:0];
+   assign        exu_mul_rs2_data = mul_data_out[127:64];
+
+   ///////////////////////////////////
+   // Store output from mul
+   //////////////////////////////////
+   dp_mux2es #(64) mul_result_mux(.dout(mul_result_next[63:0]), .in0(mul_result[63:0]),
+                           .in1(mul_exu_data_g[63:0]),
+                           .sel(ecl_div_mul_wen));
+   dff_s #(64) mul_result_dff(.din(mul_result_next[63:0]), .clk(clk), .q(mul_result[63:0]),
+                        .se(se), .si(), .so());
+
+   
+endmodule // sparc_exu_div
Index: /trunk/T1-CPU/exu/sparc_exu_div_32eql.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_div_32eql.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_div_32eql.v	(revision 6)
@@ -0,0 +1,73 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_div_32eql.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_exu_div_32eql (/*AUTOARG*/
+   // Outputs
+   equal, 
+   // Inputs
+   in
+   ) ;
+   input [31:0] in;
+
+   output       equal;
+
+   wire [31:0]  inxor;
+   wire         notequal;
+
+   assign       inxor[0] = 1'b0;
+   assign inxor[1] = in[31] ^ in[30];
+   assign inxor[2] = in[30] ^ in[29];
+   assign inxor[3] = in[29] ^ in[28];
+   assign inxor[4] = in[28] ^ in[27];
+   assign inxor[5] = in[27] ^ in[26];
+   assign inxor[6] = in[26] ^ in[25];
+   assign inxor[7] = in[25] ^ in[24];
+   assign inxor[8] = in[24] ^ in[23];
+   assign inxor[9] = in[23] ^ in[22];
+   assign inxor[10] = in[22] ^ in[21];
+   assign inxor[11] = in[21] ^ in[20];
+   assign inxor[12] = in[20] ^ in[19];
+   assign inxor[13] = in[19] ^ in[18];
+   assign inxor[14] = in[18] ^ in[17];
+   assign inxor[15] = in[17] ^ in[16];
+   assign inxor[16] = in[16] ^ in[15];
+   assign inxor[17] = in[15] ^ in[14];
+   assign inxor[18] = in[14] ^ in[13];
+   assign inxor[19] = in[13] ^ in[12];
+   assign inxor[20] = in[12] ^ in[11];
+   assign inxor[21] = in[11] ^ in[10];
+   assign inxor[22] = in[10] ^ in[9];
+   assign inxor[23] = in[9] ^ in[8];
+   assign inxor[24] = in[8] ^ in[7];
+   assign inxor[25] = in[7] ^ in[6];
+   assign inxor[26] = in[6] ^ in[5];
+   assign inxor[27] = in[5] ^ in[4];
+   assign inxor[28] = in[4] ^ in[3];
+   assign inxor[29] = in[3] ^ in[2];
+   assign inxor[30] = in[2] ^ in[1];
+   assign inxor[31] = in[1] ^ in[0];
+
+   assign equal = ~notequal;
+   sparc_exu_aluor32 or32(// Outputs
+                          .out     (notequal),
+                          // Inputs
+                          .in       (inxor[31:0]));
+   
+endmodule // sparc_exu_div_32eql
Index: /trunk/T1-CPU/exu/sparc_exu_byp.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_byp.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_byp.v	(revision 6)
@@ -0,0 +1,550 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_byp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_byp
+//	Description: This block includes the muxes for the bypassing for all
+//		3 register outputs.  It also includes the pipeline registers 
+//		for the output of the ALU.  All other operands come from
+// 		outside the bypass block.  Rs1_data chooses between the normal
+//		bypassing paths and the PC.  Rs2_data chooses between the normal
+//		bypassing paths and the immediate.
+*/
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module sparc_exu_byp
+( /*AUTOARG*/
+   // Outputs
+   so, byp_alu_rs1_data_e, byp_alu_rs2_data_e_l, byp_alu_rs2_data_e, 
+   exu_lsu_rs3_data_e, exu_spu_rs3_data_e, exu_lsu_rs2_data_e, 
+   byp_alu_rcc_data_e, byp_irf_rd_data_w, exu_tlu_wsr_data_m, 
+   byp_irf_rd_data_w2, byp_ecc_rs3_data_e, byp_ecc_rcc_data_e, 
+   byp_ecl_rs2_31_e, byp_ecl_rs1_31_e, byp_ecl_rs1_63_e, 
+   byp_ecl_rs1_2_0_e, byp_ecl_rs2_3_0_e, byp_ecc_rs1_synd_d, 
+   byp_ecc_rs2_synd_d, byp_ecc_rs3_synd_d, 
+   // Inputs
+   rclk, se, si, sehold, ecl_byp_rs1_mux2_sel_e, 
+   ecl_byp_rs1_mux2_sel_rf, ecl_byp_rs1_mux2_sel_ld, 
+   ecl_byp_rs1_mux2_sel_usemux1, ecl_byp_rs1_mux1_sel_m, 
+   ecl_byp_rs1_mux1_sel_w, ecl_byp_rs1_mux1_sel_w2, 
+   ecl_byp_rs1_mux1_sel_other, ecl_byp_rcc_mux2_sel_e, 
+   ecl_byp_rcc_mux2_sel_rf, ecl_byp_rcc_mux2_sel_ld, 
+   ecl_byp_rcc_mux2_sel_usemux1, ecl_byp_rcc_mux1_sel_m, 
+   ecl_byp_rcc_mux1_sel_w, ecl_byp_rcc_mux1_sel_w2, 
+   ecl_byp_rcc_mux1_sel_other, ecl_byp_rs2_mux2_sel_e, 
+   ecl_byp_rs2_mux2_sel_rf, ecl_byp_rs2_mux2_sel_ld, 
+   ecl_byp_rs2_mux2_sel_usemux1, ecl_byp_rs2_mux1_sel_m, 
+   ecl_byp_rs2_mux1_sel_w, ecl_byp_rs2_mux1_sel_w2, 
+   ecl_byp_rs2_mux1_sel_other, ecl_byp_rs3_mux2_sel_e, 
+   ecl_byp_rs3_mux2_sel_rf, ecl_byp_rs3_mux2_sel_ld, 
+   ecl_byp_rs3_mux2_sel_usemux1, ecl_byp_rs3_mux1_sel_m, 
+   ecl_byp_rs3_mux1_sel_w, ecl_byp_rs3_mux1_sel_w2, 
+   ecl_byp_rs3_mux1_sel_other, ecl_byp_rs3h_mux2_sel_e, 
+   ecl_byp_rs3h_mux2_sel_rf, ecl_byp_rs3h_mux2_sel_ld, 
+   ecl_byp_rs3h_mux2_sel_usemux1, ecl_byp_rs3h_mux1_sel_m, 
+   ecl_byp_rs3h_mux1_sel_w, ecl_byp_rs3h_mux1_sel_w2, 
+   ecl_byp_rs3h_mux1_sel_other, ecl_byp_rs1_longmux_sel_g2, 
+   ecl_byp_rs1_longmux_sel_w2, ecl_byp_rs1_longmux_sel_ldxa, 
+   ecl_byp_rs2_longmux_sel_g2, ecl_byp_rs2_longmux_sel_w2, 
+   ecl_byp_rs2_longmux_sel_ldxa, ecl_byp_rs3_longmux_sel_g2, 
+   ecl_byp_rs3_longmux_sel_w2, ecl_byp_rs3_longmux_sel_ldxa, 
+   ecl_byp_rs3h_longmux_sel_g2, ecl_byp_rs3h_longmux_sel_w2, 
+   ecl_byp_rs3h_longmux_sel_ldxa, ecl_byp_sel_load_m, 
+   ecl_byp_sel_pipe_m, ecl_byp_sel_ecc_m, ecl_byp_sel_muldiv_g, 
+   ecl_byp_sel_load_g, ecl_byp_sel_restore_g, ecl_byp_std_e_l, 
+   ecl_byp_ldxa_g, alu_byp_rd_data_e, ifu_exu_imm_data_d, 
+   irf_byp_rs1_data_d_l, irf_byp_rs2_data_d_l, irf_byp_rs3_data_d_l, 
+   irf_byp_rs3h_data_d_l, lsu_exu_dfill_data_g, lsu_exu_ldxa_data_g, 
+   div_byp_muldivout_g, ecc_byp_ecc_result_m, ecl_byp_ecc_mask_m_l, 
+   ifu_exu_pc_d, ecl_byp_3lsb_m, ecl_byp_restore_m, 
+   ecl_byp_sel_restore_m, ecl_byp_eclpr_e, div_byp_yreg_e, 
+   ifu_exu_pcver_e, tlu_exu_rsr_data_m, ffu_exu_rsr_data_m, 
+   ecl_byp_sel_yreg_e, ecl_byp_sel_eclpr_e, ecl_byp_sel_ifusr_e, 
+   ecl_byp_sel_alu_e, ecl_byp_sel_ifex_m, ecl_byp_sel_ffusr_m, 
+   ecl_byp_sel_tlusr_m
+   );
+
+   input rclk;
+   input se;                    // scan enable
+   input si;
+   input sehold;
+   input ecl_byp_rs1_mux2_sel_e;// select lines for bypass muxes for rs1
+   input ecl_byp_rs1_mux2_sel_rf;
+   input ecl_byp_rs1_mux2_sel_ld;
+   input ecl_byp_rs1_mux2_sel_usemux1;
+   input ecl_byp_rs1_mux1_sel_m;
+   input ecl_byp_rs1_mux1_sel_w;
+   input ecl_byp_rs1_mux1_sel_w2;
+   input ecl_byp_rs1_mux1_sel_other;
+   input ecl_byp_rcc_mux2_sel_e;// select lines for bypass muxes for reg condition code
+   input ecl_byp_rcc_mux2_sel_rf;
+   input ecl_byp_rcc_mux2_sel_ld;
+   input ecl_byp_rcc_mux2_sel_usemux1;
+   input ecl_byp_rcc_mux1_sel_m;
+   input ecl_byp_rcc_mux1_sel_w;
+   input ecl_byp_rcc_mux1_sel_w2;
+   input ecl_byp_rcc_mux1_sel_other;
+   input ecl_byp_rs2_mux2_sel_e;// select lines for bypass muxes for rs2
+   input ecl_byp_rs2_mux2_sel_rf;
+   input ecl_byp_rs2_mux2_sel_ld;
+   input ecl_byp_rs2_mux2_sel_usemux1;
+   input ecl_byp_rs2_mux1_sel_m;
+   input ecl_byp_rs2_mux1_sel_w;
+   input ecl_byp_rs2_mux1_sel_w2;
+   input ecl_byp_rs2_mux1_sel_other;
+   input ecl_byp_rs3_mux2_sel_e;// select lines for bypass muxes for rs3
+   input ecl_byp_rs3_mux2_sel_rf;
+   input ecl_byp_rs3_mux2_sel_ld;
+   input ecl_byp_rs3_mux2_sel_usemux1;
+   input ecl_byp_rs3_mux1_sel_m;
+   input ecl_byp_rs3_mux1_sel_w;
+   input ecl_byp_rs3_mux1_sel_w2;
+   input ecl_byp_rs3_mux1_sel_other;
+   input ecl_byp_rs3h_mux2_sel_e;// select lines for bypass muxes for rs3 double
+   input ecl_byp_rs3h_mux2_sel_rf;
+   input ecl_byp_rs3h_mux2_sel_ld;
+   input ecl_byp_rs3h_mux2_sel_usemux1;
+   input ecl_byp_rs3h_mux1_sel_m;
+   input ecl_byp_rs3h_mux1_sel_w;
+   input ecl_byp_rs3h_mux1_sel_w2;
+   input ecl_byp_rs3h_mux1_sel_other;
+   input ecl_byp_rs1_longmux_sel_g2;
+   input ecl_byp_rs1_longmux_sel_w2;
+   input ecl_byp_rs1_longmux_sel_ldxa;
+   input ecl_byp_rs2_longmux_sel_g2;
+   input ecl_byp_rs2_longmux_sel_w2;
+   input ecl_byp_rs2_longmux_sel_ldxa;
+   input ecl_byp_rs3_longmux_sel_g2;
+   input ecl_byp_rs3_longmux_sel_w2;
+   input ecl_byp_rs3_longmux_sel_ldxa;
+   input ecl_byp_rs3h_longmux_sel_g2;
+   input ecl_byp_rs3h_longmux_sel_w2;
+   input ecl_byp_rs3h_longmux_sel_ldxa;
+   input ecl_byp_sel_load_m;        // m instruction uses load in w1 port
+   input ecl_byp_sel_pipe_m;
+   input ecl_byp_sel_ecc_m;
+   input ecl_byp_sel_muldiv_g;
+   input ecl_byp_sel_load_g;
+   input ecl_byp_sel_restore_g;
+   input ecl_byp_std_e_l;
+   input ecl_byp_ldxa_g;
+   input [63:0] alu_byp_rd_data_e;           // data from alu for bypass
+   input [31:0] ifu_exu_imm_data_d;     // immediate
+   input [71:0] irf_byp_rs1_data_d_l;  // RF rs1_data
+   input [71:0] irf_byp_rs2_data_d_l;  // RF rs2_data
+   input [71:0] irf_byp_rs3_data_d_l;  // RF rs3_data
+   input [31:0] irf_byp_rs3h_data_d_l;// RF rs3 double data
+   input [63:0] lsu_exu_dfill_data_g; // load data
+   input [63:0] lsu_exu_ldxa_data_g;
+   input [63:0] div_byp_muldivout_g;
+   input [63:0] ecc_byp_ecc_result_m;// result from ecc
+   input [7:0]  ecl_byp_ecc_mask_m_l;
+   input [47:0]  ifu_exu_pc_d;
+   input [2:0]   ecl_byp_3lsb_m;
+   input         ecl_byp_restore_m;
+   input         ecl_byp_sel_restore_m;
+   input [7:0]   ecl_byp_eclpr_e;
+   input [31:0]  div_byp_yreg_e;
+   input [63:0]  ifu_exu_pcver_e;
+   input [63:0]  tlu_exu_rsr_data_m;
+   input [63:0]  ffu_exu_rsr_data_m;
+   input         ecl_byp_sel_yreg_e;
+   input         ecl_byp_sel_eclpr_e;
+   input         ecl_byp_sel_ifusr_e;
+   input         ecl_byp_sel_alu_e;
+   input         ecl_byp_sel_ifex_m;
+   input         ecl_byp_sel_ffusr_m;
+   input         ecl_byp_sel_tlusr_m;
+ 
+   output        so;
+   output [63:0] byp_alu_rs1_data_e; // rs1_data operand for alu
+   output [63:0] byp_alu_rs2_data_e_l; // rs2_data operand for alu
+   output [63:0] byp_alu_rs2_data_e;
+   output [63:0] exu_lsu_rs3_data_e; // rs3_data operand for lsu
+   output [63:0] exu_spu_rs3_data_e;// rs3 data for spu
+   output [63:0]  exu_lsu_rs2_data_e;
+   output [63:0]  byp_alu_rcc_data_e;// data for reg condition codes
+   output [71:0] byp_irf_rd_data_w;
+   output [63:0] exu_tlu_wsr_data_m;          // data for writeback
+   output [71:0] byp_irf_rd_data_w2;
+   output [63:0] byp_ecc_rs3_data_e;
+   output [63:0] byp_ecc_rcc_data_e;
+   output        byp_ecl_rs2_31_e;
+   output        byp_ecl_rs1_31_e;
+   output        byp_ecl_rs1_63_e;
+   output [2:0]  byp_ecl_rs1_2_0_e;
+   output [3:0]  byp_ecl_rs2_3_0_e;
+   output [7:0]  byp_ecc_rs1_synd_d;
+   output [7:0]  byp_ecc_rs2_synd_d;
+   output [7:0]  byp_ecc_rs3_synd_d;
+
+   wire          clk;
+   wire          sehold_clk;
+   wire [63:0] irf_byp_rs1_data_d;  // RF rs1_data
+   wire [63:0] irf_byp_rs2_data_d;  // RF rs2_data
+   wire [63:0] irf_byp_rs3_data_d;  // RF rs3_data
+   wire [31:0] irf_byp_rs3h_data_d;  // RF rs3_data double
+   wire [63:0] byp_alu_rs1_data_d; // rs1 operand for alu
+   wire [63:0] byp_alu_rcc_data_d; // rcc operand for alu
+   wire [63:0] byp_alu_rs2_data_d; // rs2_data operand for alu
+   wire [63:0]   rd_data_e;          // e stage rd_data
+   wire [63:0]   rd_data_m;          // m stage non-load rd_data
+   wire [63:0]   full_rd_data_m;          // m stage non-load rd_data including rdsr
+   wire [63:0]   rd_data_g;
+   wire [63:0]   byp_irf_rd_data_m;// m stage rd_data
+   wire [63:0]   rs1_data_btwn_mux;  // intermediate net for rs1_data muxes
+   wire [63:0]   rcc_data_btwn_mux;  // intermediate net for rs1_data muxes
+   wire [63:0]   rs2_data_btwn_mux;  // intermediate net for rs2_data muxes
+   wire [63:0]   rs3_data_btwn_mux;  // intermediate net for rs3_data muxes
+   wire [31:0]   rs3h_data_btwn_mux;  // intermediate net for rs3h_data muxes
+   wire [63:0]   rs3_data_d;
+   wire [63:0]   rs3_data_e;
+   wire [31:0]   rs3h_data_d;
+   wire [31:0]   rs3h_data_e;
+   wire [63:0]   restore_rd_data;
+   wire [63:0]   restore_rd_data_next;
+   wire [63:0]   dfill_data_g;
+   wire [63:0]   dfill_data_g2;
+   wire          ecl_byp_std_e;
+   wire [7:0]    rd_synd_w_l;
+   wire [7:0]    rd_synd_w2_l;
+
+   assign        clk = rclk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf irf_write_clkbuf (	
+                                .rclk   (clk),
+                                .enb_l  (sehold),
+                                .tmb_l  (~se),
+                                .clk    (sehold_clk)
+                                ) ;
+`endif
+   
+   
+   assign        byp_ecc_rs1_synd_d[7:0] = ~irf_byp_rs1_data_d_l[71:64];
+   assign        byp_ecc_rs2_synd_d[7:0] = ~irf_byp_rs2_data_d_l[71:64];
+   assign        byp_ecc_rs3_synd_d[7:0] = ~irf_byp_rs3_data_d_l[71:64];
+   /////////////////////////////////////////
+   // Load returns go straight into a flop after mux with ldxa_data
+   /////////////////////////////////////////
+   dp_mux2es #(64) dfill_data_mux (.dout(dfill_data_g[63:0]),
+                                   .in0(lsu_exu_dfill_data_g[63:0]),
+                                   .in1(lsu_exu_ldxa_data_g[63:0]),
+                                   .sel(ecl_byp_ldxa_g));
+   dff_s #(64) dfill_data_dff (.din(dfill_data_g[63:0]), .clk(clk),
+                             .q(dfill_data_g2[63:0]), .se(se), .si(), .so());
+   
+   //////////////////////////////////////////////////
+   // RD of PR or SR
+   //////////////////////////////////////////////////
+   
+   // Mux outputs for rdpr/rdsr
+   mux4ds #(64) ifu_exu_sr_mux(.dout(rd_data_e[63:0]),
+                               .in0({32'b0, div_byp_yreg_e[31:0]}),
+                               .in1({56'b0, ecl_byp_eclpr_e[7:0]}),
+                               .in2(ifu_exu_pcver_e[63:0]),
+                               .in3(alu_byp_rd_data_e[63:0]),
+                               .sel0(ecl_byp_sel_yreg_e),
+                               .sel1(ecl_byp_sel_eclpr_e),
+                               .sel2(ecl_byp_sel_ifusr_e),
+                               .sel3(ecl_byp_sel_alu_e));
+   
+   // mux in the rdsr data from ffu and tlu
+   mux3ds #(64) sr_out_mux(.dout(full_rd_data_m[63:0]),
+                           .in0({rd_data_m[63:3], ecl_byp_3lsb_m[2:0]}),
+                           .in1(ffu_exu_rsr_data_m[63:0]),
+                           .in2(tlu_exu_rsr_data_m[63:0]),
+                           .sel0(ecl_byp_sel_ifex_m),
+                           .sel1(ecl_byp_sel_ffusr_m),
+                           .sel2(ecl_byp_sel_tlusr_m));
+   
+   // Pipeline registers for rd_data
+   dff_s #(64) dff_rd_data_e2m(.din(rd_data_e[63:0]), .clk(clk), .q(rd_data_m[63:0]),
+                           .se(se), .si(), .so());
+   dp_buffer #(64) wsr_data_buf(.dout(exu_tlu_wsr_data_m[63:0]), .in(rd_data_m[63:0]));
+   
+   // Flop for storing result from restore
+   dp_mux2es #(64) restore_buf_mux(.dout(restore_rd_data_next[63:0]),
+                                   .in0(restore_rd_data[63:0]),
+                                   .in1(rd_data_m[63:0]),
+                                   .sel(ecl_byp_restore_m));
+   dff_s #(64) dff_restore_buf(.din(restore_rd_data_next[63:0]),
+                             .q(restore_rd_data[63:0]), .clk(clk),
+                             .se(se), .si(), .so());
+   // Mux for rd_data_m between ALU and load data and ECC result and restore result
+   mux4ds #(64) rd_data_m_mux(.dout(byp_irf_rd_data_m[63:0]), 
+                              .in0(full_rd_data_m[63:0]),
+                              .in1(dfill_data_g2[63:0]),
+                              .in2(ecc_byp_ecc_result_m[63:0]),
+                              .in3(restore_rd_data[63:0]),
+                              .sel0(ecl_byp_sel_pipe_m), 
+                              .sel1(ecl_byp_sel_load_m),
+                              .sel2(ecl_byp_sel_ecc_m),
+                              .sel3(ecl_byp_sel_restore_m));
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(64) dff_rd_data_m2w(.din(byp_irf_rd_data_m[63:0]), .en (~(sehold)), .clk(clk), .q(byp_irf_rd_data_w[63:0]),
+                           .se(se), .si(), .so());
+`else
+   dff_s #(64) dff_rd_data_m2w(.din(byp_irf_rd_data_m[63:0]), .clk(sehold_clk), .q(byp_irf_rd_data_w[63:0]),
+                           .se(se), .si(), .so());
+`endif
+
+   // W2 flop
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(64) dff_rd_data_g2w(.din(rd_data_g[63:0]), .en (~(sehold)), .clk(clk), .q(byp_irf_rd_data_w2[63:0]),
+                           .se(se), .si(), .so());
+`else
+   dff_s #(64) dff_rd_data_g2w(.din(rd_data_g[63:0]), .clk(sehold_clk), .q(byp_irf_rd_data_w2[63:0]),
+                           .se(se), .si(), .so());
+`endif
+   
+   
+   // D-E pipeline registers for rs_data
+   dff_s #(64) rs1_data_dff(.din(byp_alu_rs1_data_d[63:0]), .clk(clk),
+                        .q(byp_alu_rs1_data_e[63:0]), .se(se),
+                        .si(), .so());
+   dff_s #(64) rs2_data_dff(.din(byp_alu_rs2_data_d[63:0]), .clk(clk), 
+                        .q(byp_alu_rs2_data_e[63:0]), .se(se),
+                        .si(), .so());
+   assign        byp_alu_rs2_data_e_l[63:0] = ~byp_alu_rs2_data_e[63:0];
+   assign        byp_ecl_rs2_31_e = byp_alu_rs2_data_e[31];
+   assign        byp_ecl_rs1_63_e = byp_alu_rs1_data_e[63];
+   assign        byp_ecl_rs1_31_e = byp_alu_rs1_data_e[31];
+   assign        byp_ecl_rs1_2_0_e[2:0] = byp_alu_rs1_data_e[2:0];
+   assign        byp_ecl_rs2_3_0_e[3:0] = byp_alu_rs2_data_e[3:0];
+   
+
+   dff_s #(64) rs3_data_dff(.din(rs3_data_d[63:0]), .clk(clk), 
+                        .q(rs3_data_e[63:0]), .se(se),
+                        .si(), .so());
+   dff_s #(32) rs3h_data_dff(.din(rs3h_data_d[31:0]), .clk(clk), 
+                           .q(rs3h_data_e[31:0]), .se(se),
+                           .si(), .so());
+   dff_s #(64) rcc_data_dff(.din(byp_alu_rcc_data_d[63:0]), .clk(clk), 
+                        .q(byp_alu_rcc_data_e[63:0]), .se(se),
+                        .si(), .so());
+
+   assign        ecl_byp_std_e = ~ecl_byp_std_e_l;
+   dp_mux2es #(64) rs2_data_out_mux(.dout(exu_lsu_rs2_data_e[63:0]),
+                                    .in0(byp_alu_rs2_data_e[63:0]),
+                                    .in1(rs3_data_e[63:0]),
+                                    .sel(ecl_byp_std_e));
+   dp_mux2es #(64) rs3_data_out_mux(.dout(exu_lsu_rs3_data_e[63:0]),
+                                    .in0(rs3_data_e[63:0]),
+                                    .in1({32'b0,rs3h_data_e[31:0]}),
+                                    .sel(ecl_byp_std_e));
+   // part of rs3 goes to spu.  Buffer off to help timing/loading
+   assign        exu_spu_rs3_data_e[63:0] = rs3_data_e[63:0];
+   
+   assign        byp_ecc_rs3_data_e[63:0] = rs3_data_e[63:0];
+   assign        byp_ecc_rcc_data_e[63:0] = byp_alu_rcc_data_e[63:0];
+   
+   // Forwarding Muxes
+   // Select lines are as follows:
+   // mux1[M, W, W2, OTHER(optional)]
+   // mux2[mux1, RF, E, LD]
+   assign        irf_byp_rs1_data_d[63:0] = ~irf_byp_rs1_data_d_l[63:0];
+   assign        irf_byp_rs2_data_d[63:0] = ~irf_byp_rs2_data_d_l[63:0];
+   assign        irf_byp_rs3_data_d[63:0] = ~irf_byp_rs3_data_d_l[63:0];
+   assign        irf_byp_rs3h_data_d[31:0] = ~irf_byp_rs3h_data_d_l[31:0];
+
+/* -----\/----- EXCLUDED -----\/-----
+   // the w2 bypass path is either what is being written that cycle
+   // or the load result that will be written next cycle.
+ -----/\----- EXCLUDED -----/\----- */
+   wire [63:0]   rs1_data_w2;
+   wire [63:0]   rs2_data_w2;
+   wire [63:0]   rs3_data_w2;
+   wire [31:0]   rs3h_data_w2;
+   mux3ds #(64) rs1_w2_mux(.dout(rs1_data_w2[63:0]),
+                           .in0(byp_irf_rd_data_w2[63:0]),
+                           .in1(dfill_data_g2[63:0]),
+                           .in2(lsu_exu_ldxa_data_g[63:0]),
+                           .sel0(ecl_byp_rs1_longmux_sel_w2),
+                           .sel1(ecl_byp_rs1_longmux_sel_g2),
+                           .sel2(ecl_byp_rs1_longmux_sel_ldxa));
+   mux3ds #(64) rs2_w2_mux(.dout(rs2_data_w2[63:0]),
+                           .in0(byp_irf_rd_data_w2[63:0]),
+                           .in1(dfill_data_g2[63:0]),
+                           .in2(lsu_exu_ldxa_data_g[63:0]),
+                           .sel0(ecl_byp_rs2_longmux_sel_w2),
+                           .sel1(ecl_byp_rs2_longmux_sel_g2),
+                           .sel2(ecl_byp_rs2_longmux_sel_ldxa));
+   mux3ds #(64) rs3_w2_mux(.dout(rs3_data_w2[63:0]),
+                           .in0(byp_irf_rd_data_w2[63:0]),
+                           .in1(dfill_data_g2[63:0]),
+                           .in2(lsu_exu_ldxa_data_g[63:0]),
+                           .sel0(ecl_byp_rs3_longmux_sel_w2),
+                           .sel1(ecl_byp_rs3_longmux_sel_g2),
+                           .sel2(ecl_byp_rs3_longmux_sel_ldxa));
+   mux3ds #(32) rs3h_w2_mux(.dout(rs3h_data_w2[31:0]),
+                            .in0(byp_irf_rd_data_w2[31:0]),
+                            .in1(dfill_data_g2[31:0]),
+                            .in2(lsu_exu_ldxa_data_g[31:0]),
+                            .sel0(ecl_byp_rs3h_longmux_sel_w2),
+                            .sel1(ecl_byp_rs3h_longmux_sel_g2),
+                            .sel2(ecl_byp_rs3h_longmux_sel_ldxa));
+                              
+   
+   // rs1_data muxes: RF and E are critical paths
+   mux4ds #(64) mux_rs1_data_1(.dout(rs1_data_btwn_mux[63:0]), 
+                               .in0(rd_data_m[63:0]),
+                               .in1(byp_irf_rd_data_w[63:0]),
+                               .in2(rs1_data_w2[63:0]), 
+                               .in3({{16{ifu_exu_pc_d[47]}}, ifu_exu_pc_d[47:0]}),
+                             .sel0(ecl_byp_rs1_mux1_sel_m),
+                             .sel1(ecl_byp_rs1_mux1_sel_w),
+                             .sel2(ecl_byp_rs1_mux1_sel_w2),
+                             .sel3(ecl_byp_rs1_mux1_sel_other));
+   mux4ds #(64) mux_rs1_data_2(.dout(byp_alu_rs1_data_d[63:0]),
+                             .in0(rs1_data_btwn_mux[63:0]),
+                             .in1(irf_byp_rs1_data_d[63:0]), 
+                             .in2(alu_byp_rd_data_e[63:0]),
+                             .in3(lsu_exu_dfill_data_g[63:0]),
+                             .sel0(ecl_byp_rs1_mux2_sel_usemux1),
+                             .sel1(ecl_byp_rs1_mux2_sel_rf),
+                             .sel2(ecl_byp_rs1_mux2_sel_e),
+                             .sel3(ecl_byp_rs1_mux2_sel_ld));
+   
+   // rcc_data muxes: RF and E are critical paths
+   mux4ds #(64) mux_rcc_data_1(.dout(rcc_data_btwn_mux[63:0]), 
+                               .in0(rd_data_m[63:0]),
+                               .in1(byp_irf_rd_data_w[63:0]),
+                               .in2(rs1_data_w2[63:0]), 
+                               .in3({64{1'b0}}),
+                             .sel0(ecl_byp_rcc_mux1_sel_m),
+                             .sel1(ecl_byp_rcc_mux1_sel_w),
+                             .sel2(ecl_byp_rcc_mux1_sel_w2),
+                             .sel3(ecl_byp_rcc_mux1_sel_other));
+   mux4ds #(64) mux_rcc_data_2(.dout(byp_alu_rcc_data_d[63:0]),
+                             .in0(rcc_data_btwn_mux[63:0]),
+                             .in1(irf_byp_rs1_data_d[63:0]), 
+                             .in2(alu_byp_rd_data_e[63:0]),
+                             .in3(lsu_exu_dfill_data_g[63:0]),
+                             .sel0(ecl_byp_rcc_mux2_sel_usemux1),
+                             .sel1(ecl_byp_rcc_mux2_sel_rf),
+                             .sel2(ecl_byp_rcc_mux2_sel_e),
+                             .sel3(ecl_byp_rcc_mux2_sel_ld));
+
+   // rs2_data muxes: RF and E are critical paths, optional is imm
+   mux4ds #(64) mux_rs2_data_1(.dout(rs2_data_btwn_mux[63:0]), 
+                             .in0(rd_data_m[63:0]),
+                             .in1(byp_irf_rd_data_w[63:0]),
+                             .in2(rs2_data_w2[63:0]),
+                             .in3({{32{ifu_exu_imm_data_d[31]}},
+                                   ifu_exu_imm_data_d[31:0]}),
+                             .sel0(ecl_byp_rs2_mux1_sel_m),
+                             .sel1(ecl_byp_rs2_mux1_sel_w),
+                             .sel2(ecl_byp_rs2_mux1_sel_w2),
+                             .sel3(ecl_byp_rs2_mux1_sel_other));
+   mux4ds #(64) mux_rs2_data_2(.dout(byp_alu_rs2_data_d[63:0]),
+                             .in0(rs2_data_btwn_mux[63:0]),
+                             .in1(irf_byp_rs2_data_d[63:0]), 
+                             .in2(alu_byp_rd_data_e[63:0]),
+                             .in3(lsu_exu_dfill_data_g[63:0]),
+                             .sel0(ecl_byp_rs2_mux2_sel_usemux1),
+                             .sel1(ecl_byp_rs2_mux2_sel_rf),
+                             .sel2(ecl_byp_rs2_mux2_sel_e),
+                             .sel3(ecl_byp_rs2_mux2_sel_ld));
+   
+   // rs3_data muxes: RF and E are critical paths, no optional
+   mux4ds #(64) mux_rs3_data_1(.dout(rs3_data_btwn_mux[63:0]), 
+                                .in0(rd_data_m[63:0]),
+                             .in1(byp_irf_rd_data_w[63:0]),
+                             .in2(rs3_data_w2[63:0]), .in3({64{1'b0}}),
+                             .sel0(ecl_byp_rs3_mux1_sel_m),
+                             .sel1(ecl_byp_rs3_mux1_sel_w),
+                             .sel2(ecl_byp_rs3_mux1_sel_w2),
+                             .sel3(ecl_byp_rs3_mux1_sel_other));
+   mux4ds #(64) mux_rs3_data_2(.dout(rs3_data_d[63:0]), 
+                                .in0(rs3_data_btwn_mux[63:0]),
+                                .in1(irf_byp_rs3_data_d[63:0]), 
+                                .in2(alu_byp_rd_data_e[63:0]),
+                                .in3(lsu_exu_dfill_data_g[63:0]),
+                                .sel0(ecl_byp_rs3_mux2_sel_usemux1),
+                                .sel1(ecl_byp_rs3_mux2_sel_rf),
+                                .sel2(ecl_byp_rs3_mux2_sel_e),
+                                .sel3(ecl_byp_rs3_mux2_sel_ld));
+   
+   // rs3_data muxes: RF and E are critical paths, no optional
+   mux4ds #(32) mux_rs3h_data_1(.dout(rs3h_data_btwn_mux[31:0]), 
+                                .in0(rd_data_m[31:0]),
+                             .in1(byp_irf_rd_data_w[31:0]),
+                             .in2(rs3h_data_w2[31:0]), .in3({32{1'b0}}),
+                             .sel0(ecl_byp_rs3h_mux1_sel_m),
+                             .sel1(ecl_byp_rs3h_mux1_sel_w),
+                             .sel2(ecl_byp_rs3h_mux1_sel_w2),
+                             .sel3(ecl_byp_rs3h_mux1_sel_other));
+   mux4ds #(32) mux_rs3h_data_2(.dout(rs3h_data_d[31:0]), 
+                                .in0(rs3h_data_btwn_mux[31:0]),
+                                .in1(irf_byp_rs3h_data_d[31:0]), 
+                                .in2(alu_byp_rd_data_e[31:0]),
+                                .in3(lsu_exu_dfill_data_g[31:0]),
+                                .sel0(ecl_byp_rs3h_mux2_sel_usemux1),
+                                .sel1(ecl_byp_rs3h_mux2_sel_rf),
+                                .sel2(ecl_byp_rs3h_mux2_sel_e),
+                                .sel3(ecl_byp_rs3h_mux2_sel_ld));
+ 
+   // ECC for W1
+`ifdef FPGA_SYN_CLK_DFF
+   sparc_exu_byp_eccgen w1_eccgen(.d(byp_irf_rd_data_m[63:0]),
+                                   .msk(ecl_byp_ecc_mask_m_l[7:0]),
+                                   .p(rd_synd_w_l[7:0]),
+                                  .clk(clk), .se(se));
+`else
+   sparc_exu_byp_eccgen w1_eccgen(.d(byp_irf_rd_data_m[63:0]),
+                                   .msk(ecl_byp_ecc_mask_m_l[7:0]),
+                                   .p(rd_synd_w_l[7:0]),
+                                  .clk(sehold_clk), .se(se));
+`endif
+   assign        byp_irf_rd_data_w[71:64] = ~rd_synd_w_l[7:0];
+   
+   ////////////////////////
+   // G arbitration muxes and W2 ECC
+   ////////////////////////
+   mux3ds #(64) mux_w2_data(.dout(rd_data_g[63:0]),
+                            .in0(div_byp_muldivout_g[63:0]),
+                            .in1(dfill_data_g2[63:0]),
+                            .in2(restore_rd_data[63:0]),
+                            .sel0(ecl_byp_sel_muldiv_g),
+                            .sel1(ecl_byp_sel_load_g),
+                            .sel2(ecl_byp_sel_restore_g));
+`ifdef FPGA_SYN_CLK_DFF
+   sparc_exu_byp_eccgen w2_eccgen(.d(rd_data_g[63:0]),
+                                   .msk(ecl_byp_ecc_mask_m_l[7:0]),
+                                  .p(rd_synd_w2_l[7:0]),
+                                  .clk(clk), .se(se));
+`else
+   sparc_exu_byp_eccgen w2_eccgen(.d(rd_data_g[63:0]),
+                                   .msk(ecl_byp_ecc_mask_m_l[7:0]),
+                                  .p(rd_synd_w2_l[7:0]),
+                                  .clk(sehold_clk), .se(se));
+`endif
+   assign        byp_irf_rd_data_w2[71:64] = ~rd_synd_w2_l[7:0];
+   
+endmodule // sparc_exu_byp
+
+   
Index: /trunk/T1-CPU/exu/sparc_exu_rml.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_rml.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_rml.v	(revision 6)
@@ -0,0 +1,770 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_rml.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_rml
+//	Description: Register management logic.  Contains CWP, CANSAVE, CANRESTORE
+//		and other window management registers.  Generates RF related traps
+//  		and switches the global registers to alternate globals.  All the registers
+//		are written in the W stage (there is no bypassing so they must
+//		swap out) and will either get a new value generated by a window management
+//		Instruction or by a WRPS instruction.  The following traps can be generated:
+//			Fill: restore with canrestore == 0
+//			clean_window: save with cleanwin-canrestore == 0
+//			spill: flushw with cansave != nwindows -2 or
+//				save with cansave == 0
+//		It is assumed that the contents of the new window will get squashed
+//		on a clean_window or fill trap so the save or restore gets executed
+//		normally.  Spill traps or WRCWPs mean that all 16 windowed registers
+//		must be saved and restored (a 4 cycle operation).
+*/
+module sparc_exu_rml (/*AUTOARG*/
+   // Outputs
+   exu_tlu_spill_wtype, exu_tlu_spill_other, exu_tlu_cwp_retry, 
+   exu_tlu_cwp3_w, exu_tlu_cwp2_w, exu_tlu_cwp1_w, exu_tlu_cwp0_w, 
+   so, exu_tlu_cwp_cmplt, exu_tlu_cwp_cmplt_tid, rml_ecl_cwp_d, 
+   rml_ecl_cansave_d, rml_ecl_canrestore_d, rml_ecl_otherwin_d, 
+   rml_ecl_wstate_d, rml_ecl_cleanwin_d, rml_ecl_fill_e, 
+   rml_ecl_clean_window_e, rml_ecl_other_e, rml_ecl_wtype_e, 
+   exu_ifu_spill_e, rml_ecl_gl_e, rml_irf_old_lo_cwp_e, 
+   rml_irf_new_lo_cwp_e, rml_irf_old_e_cwp_e, rml_irf_new_e_cwp_e, 
+   rml_irf_swap_even_e, rml_irf_swap_odd_e, rml_irf_swap_local_e, 
+   rml_irf_kill_restore_w, rml_irf_cwpswap_tid_e, rml_ecl_swap_done, 
+   rml_ecl_rmlop_done_e, exu_ifu_oddwin_s, exu_tlu_spill, 
+   exu_tlu_spill_tid, rml_ecl_kill_m, rml_irf_old_agp, 
+   rml_irf_new_agp, rml_irf_swap_global, rml_irf_global_tid, 
+   // Inputs
+   tlu_exu_cwp_retry_m, rst_tri_en, rclk, se, si, grst_l, arst_l, 
+   ifu_exu_tid_s2, ifu_exu_save_d, ifu_exu_restore_d, 
+   ifu_exu_saved_e, ifu_exu_restored_e, ifu_exu_flushw_e, 
+   ecl_rml_thr_m, ecl_rml_thr_w, ecl_rml_cwp_wen_e, 
+   ecl_rml_cansave_wen_w, ecl_rml_canrestore_wen_w, 
+   ecl_rml_otherwin_wen_w, ecl_rml_wstate_wen_w, 
+   ecl_rml_cleanwin_wen_w, ecl_rml_xor_data_e, ecl_rml_kill_e, 
+   ecl_rml_kill_w, ecl_rml_early_flush_w, exu_tlu_wsr_data_w, 
+   tlu_exu_agp, tlu_exu_agp_swap, tlu_exu_agp_tid, tlu_exu_cwp_m, 
+   tlu_exu_cwpccr_update_m, ecl_rml_inst_vld_w,current_cwp
+   ) ;
+   input rclk;
+   input se;
+   input si;
+   input grst_l;
+   input arst_l;
+   input [1:0] ifu_exu_tid_s2;
+   input       ifu_exu_save_d;
+   input       ifu_exu_restore_d;
+   input       ifu_exu_saved_e;
+   input       ifu_exu_restored_e;
+   input       ifu_exu_flushw_e;
+   input [3:0] ecl_rml_thr_m;
+   input [3:0] ecl_rml_thr_w;
+   input       ecl_rml_cwp_wen_e;
+   input       ecl_rml_cansave_wen_w;
+   input       ecl_rml_canrestore_wen_w;
+   input       ecl_rml_otherwin_wen_w;
+   input       ecl_rml_wstate_wen_w;
+   input       ecl_rml_cleanwin_wen_w;
+   input [2:0] ecl_rml_xor_data_e;
+   input       ecl_rml_kill_e;// needed for oddwin updates
+   input       ecl_rml_kill_w;
+   input       ecl_rml_early_flush_w;
+   input [5:0] exu_tlu_wsr_data_w; // for wstate
+   input [1:0]   tlu_exu_agp;   // alternate global pointer
+   input         tlu_exu_agp_swap;// switch globals
+   input [1:0]   tlu_exu_agp_tid;// thread that agp refers to
+   input [2:0] tlu_exu_cwp_m;   // for switching cwp on return from trap
+   input       tlu_exu_cwpccr_update_m;
+   input       ecl_rml_inst_vld_w;
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input                rst_tri_en;             // To cwp of sparc_exu_rml_cwp.v
+   input                tlu_exu_cwp_retry_m;    // To cwp of sparc_exu_rml_cwp.v
+   // End of automatics
+
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output [2:0]         exu_tlu_cwp0_w;         // From cwp of sparc_exu_rml_cwp.v
+   output [2:0]         exu_tlu_cwp1_w;         // From cwp of sparc_exu_rml_cwp.v
+   output [2:0]         exu_tlu_cwp2_w;         // From cwp of sparc_exu_rml_cwp.v
+   output [2:0]         exu_tlu_cwp3_w;         // From cwp of sparc_exu_rml_cwp.v
+   output               exu_tlu_cwp_retry;      // From cwp of sparc_exu_rml_cwp.v
+   output               exu_tlu_spill_other;    // From cwp of sparc_exu_rml_cwp.v
+   output [2:0]         exu_tlu_spill_wtype;    // From cwp of sparc_exu_rml_cwp.v
+   // End of automatics
+   output               so;
+   output      exu_tlu_cwp_cmplt;
+   output [1:0] exu_tlu_cwp_cmplt_tid;
+   output [2:0]  rml_ecl_cwp_d;
+   output [2:0]  rml_ecl_cansave_d;
+   output [2:0]  rml_ecl_canrestore_d;
+   output [2:0]  rml_ecl_otherwin_d;
+   output [5:0]  rml_ecl_wstate_d;
+   output [2:0]  rml_ecl_cleanwin_d;
+   output        rml_ecl_fill_e;
+   output        rml_ecl_clean_window_e;
+   output        rml_ecl_other_e;
+   output [2:0] rml_ecl_wtype_e;
+   output       exu_ifu_spill_e;
+   output [1:0] rml_ecl_gl_e;
+
+   output [2:0]  rml_irf_old_lo_cwp_e;  // current window pointer for locals and odds
+   output [2:0]  rml_irf_new_lo_cwp_e;  // current window pointer for locals and odd
+   output [1:0]  rml_irf_old_e_cwp_e;  // current window pointer for evens
+   output [1:0]  rml_irf_new_e_cwp_e;  // current window pointer for evens
+   output        rml_irf_swap_even_e;
+   output        rml_irf_swap_odd_e;
+   output        rml_irf_swap_local_e;
+   output        rml_irf_kill_restore_w;
+   output [1:0]  rml_irf_cwpswap_tid_e;
+
+   output [3:0] rml_ecl_swap_done;
+   output       rml_ecl_rmlop_done_e;   
+   output [3:0] exu_ifu_oddwin_s;
+   output       exu_tlu_spill;
+   output [1:0] exu_tlu_spill_tid;
+   output       rml_ecl_kill_m;
+   
+   output [1:0]  rml_irf_old_agp; // alternate global pointer
+   output [1:0]  rml_irf_new_agp; // alternate global pointer
+   output        rml_irf_swap_global;
+   output [1:0]  rml_irf_global_tid;
+   output reg [11:0] current_cwp;
+   
+   wire          clk;
+   wire [1:0]    tid_d;
+   wire [3:0]    thr_d;
+   wire [1:0]    tid_e;
+   wire          rml_reset_l;
+   wire          reset;
+   wire          save_e;
+   wire          save_m;
+   wire          restore_e;
+   wire          swap_e;
+   wire          agp_wen;
+   wire [1:0]    agp_thr0;
+   wire [1:0]    agp_thr1;
+   wire [1:0]    agp_thr2;
+   wire [1:0]    agp_thr3;
+   wire [1:0]    agp_thr0_next;
+   wire [1:0]    agp_thr1_next;
+   wire [1:0]    agp_thr2_next;
+   wire [1:0]    agp_thr3_next;
+   wire          agp_wen_thr0_w;
+   wire          agp_wen_thr1_w;
+   wire          agp_wen_thr2_w;
+   wire          agp_wen_thr3_w;
+   wire [1:0]    new_agp;   
+   wire [1:0]    agp_tid;
+   wire [3:0]    agp_thr;
+   wire        full_swap_e;
+   wire   did_restore_m;
+   wire   did_restore_w;
+   wire   kill_restore_m;
+   wire   kill_restore_w;
+
+   wire [2:0]  rml_ecl_cwp_e;
+   wire [2:0]  rml_ecl_cansave_e;
+   wire [2:0]  rml_ecl_canrestore_e;
+   wire [2:0]  rml_ecl_otherwin_e;
+   wire [2:0]  rml_ecl_cleanwin_e;
+
+   wire [2:0]  rml_next_cwp_e;        
+   wire [2:0]  rml_next_cansave_e;// e-stage of rml generated new data
+   wire [2:0]  rml_next_canrestore_e;
+   wire [2:0]  rml_next_otherwin_e;
+   wire [2:0]  rml_next_cleanwin_e;
+   
+   wire [2:0]  next_cwp_e;      
+   wire [2:0]  next_cansave_e;  // e-stage of new data
+   wire [2:0]  next_canrestore_e;
+   wire [2:0]  next_otherwin_e;
+   wire [2:0]  next_cleanwin_e;
+   wire [2:0]  next_cwp_m;      // m-stage of new data
+   wire [2:0]  next_cansave_m;
+   wire [2:0]  next_canrestore_m;
+   wire [2:0]  next_otherwin_m;
+   wire [2:0]  next_cleanwin_m;
+   wire [2:0]  next_cansave_w;// w-stage of new data
+   wire [2:0]  next_canrestore_w;
+   wire [2:0]  next_otherwin_w;
+   wire [2:0]  next_cleanwin_w;
+   wire [2:0]  next_cwp_noreset_w;
+   wire [2:0]  next_cwp_w;
+
+   wire   rml_cwp_wen_e;        // wen for cwp from rml
+   wire   rml_cwp_wen_m;        // wen for cwp from rml
+   wire [2:0] spill_cwp_e;      // next cwp if there is a spill trap 
+   wire       spill_cwp_carry0; // carry bit from spill cwp computations
+   wire       spill_cwp_carry1;
+   wire       next_cwp_sel_inc; // select line to next_cwp mux
+
+   wire        rml_cansave_wen_w;// rml generated wen
+   wire        rml_canrestore_wen_w;
+   wire        rml_otherwin_wen_w;
+   wire        rml_cleanwin_wen_w;
+
+   wire        cansave_wen_w;// wen to registers
+   wire        canrestore_wen_w;
+   wire        otherwin_wen_w;
+   wire        cleanwin_wen_w;
+   wire        cwp_wen_nokill_w;
+   wire        cwp_wen_w;
+   wire        wstate_wen_w;
+
+   wire        cwp_wen_m;       // rml generated wen w/o kills
+   wire        cansave_wen_m;
+   wire        canrestore_wen_m;
+   wire        otherwin_wen_m;
+   wire        cleanwin_wen_m;
+   wire        cansave_wen_valid_m;	// rml generated wen w/ kills
+   wire        canrestore_wen_valid_m;
+   wire        otherwin_wen_valid_m;
+   wire        cleanwin_wen_valid_m;
+
+   wire      	 cwp_wen_e;       // rml generated wen_e
+   wire        cansave_wen_e;
+   wire        canrestore_wen_e;
+   wire        otherwin_wen_e;
+   wire        cleanwin_wen_e;
+
+   wire        cansave_inc_e;
+   wire        canrestore_inc_e;
+
+   wire        spill_trap_save;
+   wire        spill_trap_flush;
+   wire        spill_m;
+   wire [2:0]  cleanwin_xor_canrestore;
+
+   wire        otherwin_is0_e;
+   wire        cansave_is0_e;
+   wire        canrestore_is0_e;
+
+   wire        swap_locals_ins;
+   wire        swap_outs;
+   wire [2:0]  old_cwp_e;
+   wire [2:0]  new_cwp_e;
+
+   wire [2:0]   rml_ecl_wtype_d;
+   wire [2:0]   rml_ecl_wtype_e;
+   wire         rml_ecl_other_d;
+   wire         rml_ecl_other_e;
+   wire        exu_tlu_spill_e;
+   wire         rml_ecl_kill_e;
+   wire         rml_kill_w;
+   wire         vld_w;
+   wire         win_trap_e;
+   wire         win_trap_m;
+   wire         win_trap_w;
+
+   assign       clk = rclk;
+   // Reset flop
+    dffrl_async rstff(.din (grst_l),
+                        .q   (rml_reset_l),
+                        .clk (clk),
+                        .rst_l (arst_l), .se(se), .si(), .so());
+   assign       reset = ~rml_reset_l;
+ 
+   dff_s #(2) tid_s2d(.din(ifu_exu_tid_s2[1:0]), .clk(clk), .q(tid_d[1:0]), .se(se), .si(), .so());
+   dff_s #(2) tid_d2e(.din(tid_d[1:0]), .clk(clk), .q(tid_e[1:0]), .se(se), .si(), .so());
+   assign       thr_d[3] = tid_d[1] & tid_d[0];
+   assign       thr_d[2] = tid_d[1] & ~tid_d[0];
+   assign       thr_d[1] = ~tid_d[1] & tid_d[0];
+   assign       thr_d[0] = ~tid_d[1] & ~tid_d[0];
+   
+   dff_s save_d2e(.din(ifu_exu_save_d), .clk(clk), .q(save_e), .se(se), .si(), .so());
+   dff_s save_e2m(.din(save_e), .clk(clk), .q(save_m), .se(se), .si(), .so());
+   dff_s restore_d2e(.din(ifu_exu_restore_d), .clk(clk), .q(restore_e), .se(se), .si(), .so());
+
+   // don't check flush_pipe in w if caused by rml trap.  Things with a higher priority
+   // than a window trap have been accumulated into ecl_rml_kill_w
+   assign       vld_w = ecl_rml_inst_vld_w & (~ecl_rml_early_flush_w | win_trap_w);
+   assign     rml_kill_w = ecl_rml_kill_w | ~vld_w;
+
+   assign     win_trap_e = rml_ecl_fill_e | exu_tlu_spill_e | rml_ecl_clean_window_e;
+   dff_s win_trap_e2m(.din(win_trap_e), .clk(clk), .q(win_trap_m), .se(se), .si(), .so());
+   dff_s win_trap_m2w(.din(win_trap_m), .clk(clk), .q(win_trap_w), .se(se), .si(), .so());
+   
+   assign canrestore_is0_e = (~rml_ecl_canrestore_e[0] & ~rml_ecl_canrestore_e[1] 
+                              & ~rml_ecl_canrestore_e[2]);
+   assign cansave_is0_e = (~rml_ecl_cansave_e[0] & ~rml_ecl_cansave_e[1] & 
+                           ~rml_ecl_cansave_e[2]);
+   assign otherwin_is0_e = ~rml_ecl_other_e;
+
+   ///////////////////////////////////////
+   // Signals that operations are done
+   // restore/return is not signalled here
+   // because it depends on the write to the
+   // irf (computed in ecl_wb)
+   ////////////////////////////////////////
+   assign rml_ecl_rmlop_done_e = (ifu_exu_saved_e | ifu_exu_restored_e |
+                                  (ifu_exu_flushw_e & ~spill_trap_flush));
+   
+   //////////////////////////
+   // Trap generation
+   //////////////////////////
+   // Fill trap generated on restore and canrestore == 0
+   assign rml_ecl_fill_e = restore_e & canrestore_is0_e; 
+   
+   // Spill trap on save with cansave == 0
+   assign spill_trap_save = save_e & cansave_is0_e;
+   assign exu_ifu_spill_e = spill_trap_save;
+   // Spill trap on wflush with cansave != (NWINDOWS - 2 = 6)
+   assign spill_trap_flush = (ifu_exu_flushw_e & ~(rml_ecl_cansave_e[2] &
+                                                 rml_ecl_cansave_e[1] & 
+                                                 ~rml_ecl_cansave_e[0]));
+   assign exu_tlu_spill_e = (spill_trap_save | spill_trap_flush);
+   dff_s spill_e2m(.din(exu_tlu_spill_e), .clk(clk), .q(spill_m), .se(se), .si(), .so());
+
+   // Clean window trap on save w/ cleanwin - canrestore == 0
+   // or cleanwin == canrestore
+   // (not signalled on spill traps because spill is higher priority)
+   assign cleanwin_xor_canrestore = rml_ecl_cleanwin_e ^ rml_ecl_canrestore_e;
+   assign rml_ecl_clean_window_e = ~(cleanwin_xor_canrestore[2] |
+                                cleanwin_xor_canrestore[1] |
+                                cleanwin_xor_canrestore[0]) & save_e & ~exu_tlu_spill_e;
+
+   // Kill signal for w1 wen bit (all others don't care)
+   assign rml_ecl_kill_e = rml_ecl_fill_e | exu_tlu_spill_e;
+   dff_s rml_kill_e2m(.din(rml_ecl_kill_e), .clk(clk), .q(rml_ecl_kill_m),
+                    .se(se), .si(), .so());
+   
+
+   // WTYPE generation
+   assign rml_ecl_other_d = (rml_ecl_otherwin_d[0] | rml_ecl_otherwin_d[1] 
+                            | rml_ecl_otherwin_d[2]);
+   dff_s other_d2e(.din(rml_ecl_other_d), .clk(clk), .q(rml_ecl_other_e), .se(se),
+                 .si(), .so());
+   mux2ds #(3) wtype_mux(.dout(rml_ecl_wtype_d[2:0]),
+                          .in0(rml_ecl_wstate_d[2:0]),
+                          .in1(rml_ecl_wstate_d[5:3]),
+                          .sel0(~rml_ecl_other_d),
+                          .sel1(rml_ecl_other_d));
+   dff_s #(3) wtype_d2e(.din(rml_ecl_wtype_d[2:0]), .clk(clk), .q(rml_ecl_wtype_e[2:0]),
+                    .se(se), .si(), .so());
+
+
+   ////////////////////////////
+   // Interface with IRF
+   ////////////////////////////
+   assign rml_irf_old_lo_cwp_e[2:0] = old_cwp_e[2:0];
+   assign rml_irf_new_lo_cwp_e[2:0] = new_cwp_e[2:0];
+   assign rml_irf_old_e_cwp_e[1:0] = (old_cwp_e[0])? old_cwp_e[2:1] + 2'b01: old_cwp_e[2:1];
+   assign rml_irf_new_e_cwp_e[1:0] = (new_cwp_e[0])? new_cwp_e[2:1] + 2'b01: new_cwp_e[2:1];
+   
+   assign rml_irf_swap_local_e = (swap_e | swap_locals_ins);
+   assign rml_irf_swap_odd_e = ((save_e | ecl_rml_cwp_wen_e | spill_trap_flush | swap_locals_ins) & old_cwp_e[0]) | 
+                                 ((restore_e | swap_outs) & ~old_cwp_e[0]);
+   assign rml_irf_swap_even_e = ((save_e | ecl_rml_cwp_wen_e | spill_trap_flush | swap_locals_ins) & ~old_cwp_e[0]) |
+                                  ((restore_e | swap_outs) & old_cwp_e[0]);
+
+   assign swap_e = save_e | restore_e | ecl_rml_cwp_wen_e | spill_trap_flush;
+   dff_s dff_did_restore_e2m(.din(swap_e), .clk(clk),
+                       .q(did_restore_m), .se(se),
+                       .si(), .so());
+   dff_s dff_did_restore_m2w(.din(did_restore_m), .clk(clk),
+                       .q(did_restore_w), .se(se),
+                       .si(), .so());
+   // kill restore on all saves (except those that spill) and any swaps that
+   // get kill signals
+   assign kill_restore_m = (~spill_m & save_m);
+   dff_s dff_kill_restore_m2w(.din(kill_restore_m), .clk(clk), .q(kill_restore_w),
+                            .se(se), .si(), .so());
+   assign rml_irf_kill_restore_w = kill_restore_w | (did_restore_w & rml_kill_w);
+
+
+   ///////////////////////////////
+   // CWP logic
+   ///////////////////////////////
+   // Logic to compute next_cwp on spill trap.
+   //  CWP = CWP + CANSAVE + 2
+   assign spill_cwp_e[0] = rml_ecl_cwp_e[0] ^ rml_ecl_cansave_e[0];
+   assign spill_cwp_carry0 = rml_ecl_cwp_e[0] & rml_ecl_cansave_e[0];
+   assign spill_cwp_e[1] = rml_ecl_cwp_e[1] ^ rml_ecl_cansave_e[1] ^ ~spill_cwp_carry0;
+   assign spill_cwp_carry1 = (rml_ecl_cwp_e[1] | rml_ecl_cansave_e[1] |
+                              spill_cwp_carry0) & ~(rml_ecl_cwp_e[1] &
+                                                    rml_ecl_cansave_e[1] &
+                                                    spill_cwp_carry0);
+   assign spill_cwp_e[2] = rml_ecl_cwp_e[2] ^ rml_ecl_cansave_e[2] ^ spill_cwp_carry1;
+
+   assign rml_cwp_wen_e = (save_e | restore_e) & ~exu_tlu_spill_e;
+   assign cwp_wen_e = (rml_cwp_wen_e | ecl_rml_cwp_wen_e) & ~ecl_rml_kill_e;
+   sparc_exu_rml_inc3 cwp_inc(.dout(rml_next_cwp_e[2:0]), .din(rml_ecl_cwp_e[2:0]),
+                                  .inc(save_e));
+
+   assign     next_cwp_sel_inc = ~(ecl_rml_cwp_wen_e | exu_tlu_spill_e);
+   mux3ds #(3) next_cwp_mux(.dout(next_cwp_e[2:0]), 
+                          .in0(rml_next_cwp_e[2:0]),
+                          .in1(ecl_rml_xor_data_e[2:0]),
+                          .in2(spill_cwp_e[2:0]),
+                          .sel0(next_cwp_sel_inc),
+                          .sel1(ecl_rml_cwp_wen_e),
+                          .sel2(exu_tlu_spill_e));
+
+   dff_s cwp_wen_e2m(.din(cwp_wen_e), .clk(clk), .q(rml_cwp_wen_m),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_cwp_e2m(.din(next_cwp_e[2:0]), .clk(clk), .q(next_cwp_m[2:0]),
+                           .se(se), .si(), .so());
+   assign     cwp_wen_m = rml_cwp_wen_m;
+   dff_s #(3) next_cwp_m2w(.din(next_cwp_m[2:0]), .clk(clk), .q(next_cwp_noreset_w[2:0]),
+                         .se(se), .si(), .so());
+   dff_s cwp_wen_m2w(.din(cwp_wen_m), .clk(clk), .q(cwp_wen_nokill_w),
+                       .se(se), .si(), .so());
+   assign cwp_wen_w = cwp_wen_nokill_w & ~rml_kill_w;
+   assign next_cwp_w[2:0] = next_cwp_noreset_w[2:0];
+
+   assign full_swap_e = (exu_tlu_spill_e | ecl_rml_cwp_wen_e);
+
+
+   // oddwin signal for ifu needs bypass from w.  It is done in M and staged for timing.
+   // This is possible because the thread is switched out so there is only one bypass condition.
+   // Only save/return will switch in fast enough for a bypass so this is the only write condition
+   // we need to check
+   wire [3:0] oddwin_m;
+   wire [3:0] oddwin_w;
+   assign     oddwin_m[3] = (cwp_wen_m & ecl_rml_thr_m[3])? next_cwp_m[0]: oddwin_w[3];
+   assign     oddwin_m[2] = (cwp_wen_m & ecl_rml_thr_m[2])? next_cwp_m[0]: oddwin_w[2];
+   assign     oddwin_m[1] = (cwp_wen_m & ecl_rml_thr_m[1])? next_cwp_m[0]: oddwin_w[1];
+   assign     oddwin_m[0] = (cwp_wen_m & ecl_rml_thr_m[0])? next_cwp_m[0]: oddwin_w[0];
+   dff_s #(4) oddwin_dff(.din(oddwin_m[3:0]), .clk(clk), .q(exu_ifu_oddwin_s[3:0]),
+                       .se(se), .si(), .so());
+
+   integer i;
+   wire [11:0] next_cwp;
+   always @(posedge clk)
+      begin
+         current_cwp[2:0]<=(cwp_wen_m & ecl_rml_thr_m[0])? next_cwp_m: next_cwp[2:0];
+         current_cwp[5:3]<=(cwp_wen_m & ecl_rml_thr_m[1])? next_cwp_m: next_cwp[5:3];
+         current_cwp[8:6]<=(cwp_wen_m & ecl_rml_thr_m[2])? next_cwp_m: next_cwp[8:6];
+         current_cwp[11:9]<=(cwp_wen_m & ecl_rml_thr_m[3])? next_cwp_m: next_cwp[11:9];
+      end
+      
+   sparc_exu_rml_cwp cwp(
+                         .swap_outs     (swap_outs),
+                         .swap_locals_ins(swap_locals_ins),
+                         .rml_ecl_cwp_e (rml_ecl_cwp_e[2:0]),
+                         .old_cwp_e     (old_cwp_e[2:0]),
+                         .new_cwp_e     (new_cwp_e[2:0]),
+                         .oddwin_w     (oddwin_w[3:0]),
+                         .next_cwp     (next_cwp),
+                         /*AUTOINST*/
+                         // Outputs
+                         .rml_ecl_cwp_d (rml_ecl_cwp_d[2:0]),
+                         .exu_tlu_cwp0_w(exu_tlu_cwp0_w[2:0]),
+                         .exu_tlu_cwp1_w(exu_tlu_cwp1_w[2:0]),
+                         .exu_tlu_cwp2_w(exu_tlu_cwp2_w[2:0]),
+                         .exu_tlu_cwp3_w(exu_tlu_cwp3_w[2:0]),
+                         .rml_irf_cwpswap_tid_e(rml_irf_cwpswap_tid_e[1:0]),
+                         .exu_tlu_spill (exu_tlu_spill),
+                         .exu_tlu_spill_wtype(exu_tlu_spill_wtype[2:0]),
+                         .exu_tlu_spill_other(exu_tlu_spill_other),
+                         .exu_tlu_spill_tid(exu_tlu_spill_tid[1:0]),
+                         .rml_ecl_swap_done(rml_ecl_swap_done[3:0]),
+                         .exu_tlu_cwp_cmplt(exu_tlu_cwp_cmplt),
+                         .exu_tlu_cwp_cmplt_tid(exu_tlu_cwp_cmplt_tid[1:0]),
+                         .exu_tlu_cwp_retry(exu_tlu_cwp_retry),
+                         // Inputs
+                         .clk           (clk),
+                         .se            (se),
+                         .reset         (reset),
+                         .rst_tri_en    (rst_tri_en),
+                         .rml_ecl_wtype_e(rml_ecl_wtype_e[2:0]),
+                         .rml_ecl_other_e(rml_ecl_other_e),
+                         .exu_tlu_spill_e(exu_tlu_spill_e),
+                         .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                         .tlu_exu_cwp_retry_m(tlu_exu_cwp_retry_m),
+                         .tlu_exu_cwp_m (tlu_exu_cwp_m[2:0]),
+                         .thr_d         (thr_d[3:0]),
+                         .ecl_rml_thr_m (ecl_rml_thr_m[3:0]),
+                         .ecl_rml_thr_w (ecl_rml_thr_w[3:0]),
+                         .tid_e         (tid_e[1:0]),
+                         .next_cwp_w    (next_cwp_w[2:0]),
+                         .next_cwp_e    (next_cwp_e[2:0]),
+                         .cwp_wen_w     (cwp_wen_w),
+                         .save_e        (save_e),
+                         .restore_e     (restore_e),
+                         .ifu_exu_flushw_e(ifu_exu_flushw_e),
+                         .ecl_rml_cwp_wen_e(ecl_rml_cwp_wen_e),
+                         .full_swap_e   (full_swap_e),
+                         .rml_kill_w    (rml_kill_w));
+
+   ///////////////////////////////
+   // Cansave logic
+   ///////////////////////////////
+   assign cansave_wen_e = ((save_e & ~cansave_is0_e & ~rml_ecl_clean_window_e) |
+                           ifu_exu_saved_e |
+                           (restore_e & ~canrestore_is0_e) |
+                           (ifu_exu_restored_e & otherwin_is0_e));
+   sparc_exu_rml_inc3 cansave_inc(.dout(rml_next_cansave_e[2:0]), .din(rml_ecl_cansave_e[2:0]),
+                                  .inc(cansave_inc_e));
+   assign cansave_inc_e = restore_e | ifu_exu_saved_e;
+
+   mux2ds #(3) next_cansave_mux(.dout(next_cansave_e[2:0]),
+                              .in0(ecl_rml_xor_data_e[2:0]),
+                              .in1(rml_next_cansave_e[2:0]),
+                              .sel0(~cansave_wen_e),
+                              .sel1(cansave_wen_e));
+   dff_s cansave_wen_e2m(.din(cansave_wen_e), .clk(clk), .q(cansave_wen_m),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_cansave_e2m(.din(next_cansave_e[2:0]), .clk(clk), .q(next_cansave_m[2:0]),
+                           .se(se), .si(), .so());
+   assign cansave_wen_valid_m = cansave_wen_m;
+   dff_s cansave_wen_m2w(.din(cansave_wen_valid_m), .clk(clk), .q(rml_cansave_wen_w),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_cansave_m2w(.din(next_cansave_m[2:0]), .clk(clk), .q(next_cansave_w[2:0]),
+                           .se(se), .si(), .so());
+   assign cansave_wen_w = (rml_cansave_wen_w | ecl_rml_cansave_wen_w) & ~rml_kill_w;
+
+   ///////////////////////////////
+   // Canrestore logic
+   ///////////////////////////////
+   assign canrestore_wen_e = ((save_e & ~cansave_is0_e & ~rml_ecl_clean_window_e) |
+                              ifu_exu_restored_e |
+                              (restore_e & ~canrestore_is0_e) |
+                              (ifu_exu_saved_e & otherwin_is0_e));
+   sparc_exu_rml_inc3 canrestore_inc(.dout(rml_next_canrestore_e[2:0]),
+                                     .din(rml_ecl_canrestore_e[2:0]),
+                                     .inc(canrestore_inc_e));
+   assign canrestore_inc_e = ifu_exu_restored_e | save_e;
+   
+   mux2ds #(3) next_canrestore_mux(.dout(next_canrestore_e[2:0]),
+                                    .in0(ecl_rml_xor_data_e[2:0]),
+                                    .in1(rml_next_canrestore_e[2:0]),
+                                    .sel0(~canrestore_wen_e),
+                                    .sel1(canrestore_wen_e));
+   dff_s canrestore_wen_e2m(.din(canrestore_wen_e), .clk(clk), .q(canrestore_wen_m),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_canrestore_e2m(.din(next_canrestore_e[2:0]), .clk(clk), .q(next_canrestore_m[2:0]),
+                           .se(se), .si(), .so());
+   assign canrestore_wen_valid_m = canrestore_wen_m;
+   dff_s canrestore_wen_m2w(.din(canrestore_wen_valid_m), .clk(clk), .q(rml_canrestore_wen_w),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_canrestore_m2w(.din(next_canrestore_m[2:0]), .clk(clk), .q(next_canrestore_w[2:0]),
+                           .se(se), .si(), .so());
+   assign canrestore_wen_w = (rml_canrestore_wen_w | ecl_rml_canrestore_wen_w) & ~rml_kill_w;
+
+   ///////////////////////////////
+   // Otherwin logic
+   ///////////////////////////////
+   // Decrements on saved or restored if otherwin != 0
+   assign otherwin_wen_e = ((ifu_exu_saved_e | ifu_exu_restored_e) 
+                            & ~otherwin_is0_e);
+   assign rml_next_otherwin_e[2] = ((rml_ecl_otherwin_e[2] & rml_ecl_otherwin_e[1]) |
+                                (rml_ecl_otherwin_e[2] & rml_ecl_otherwin_e[0]));
+   assign rml_next_otherwin_e[1] = rml_ecl_otherwin_e[1] ^ ~rml_ecl_otherwin_e[0];
+   assign rml_next_otherwin_e[0] = ~rml_ecl_otherwin_e[0];
+
+   mux2ds #(3) next_otherwin_mux(.dout(next_otherwin_e[2:0]),
+                               .in0(ecl_rml_xor_data_e[2:0]),
+                               .in1(rml_next_otherwin_e[2:0]),
+                               .sel0(~otherwin_wen_e),
+                               .sel1(otherwin_wen_e));
+   dff_s otherwin_wen_e2m(.din(otherwin_wen_e), .clk(clk), .q(otherwin_wen_m),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_otherwin_e2m(.din(next_otherwin_e[2:0]), .clk(clk), .q(next_otherwin_m[2:0]),
+                           .se(se), .si(), .so());
+   assign otherwin_wen_valid_m = otherwin_wen_m;
+   dff_s otherwin_wen_m2w(.din(otherwin_wen_valid_m), .clk(clk), .q(rml_otherwin_wen_w),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_otherwin_m2w(.din(next_otherwin_m[2:0]), .clk(clk), .q(next_otherwin_w[2:0]),
+                           .se(se), .si(), .so());
+   assign otherwin_wen_w = (rml_otherwin_wen_w | ecl_rml_otherwin_wen_w) & ~rml_kill_w;
+
+   ///////////////////////////////
+   // Cleanwin logic
+   ///////////////////////////////
+   // increments on restored if cleanwin != 7
+   assign cleanwin_wen_e = (ifu_exu_restored_e &
+                            ~(rml_ecl_cleanwin_e[2] & rml_ecl_cleanwin_e[1] 
+                              & rml_ecl_cleanwin_e[0]));
+   assign rml_next_cleanwin_e[2] = ((~rml_ecl_cleanwin_e[2] & rml_ecl_cleanwin_e[1] 
+                                 & rml_ecl_cleanwin_e[0]) | rml_ecl_cleanwin_e[2]);
+   assign rml_next_cleanwin_e[1] = rml_ecl_cleanwin_e[1] ^ rml_ecl_cleanwin_e[0];
+   assign rml_next_cleanwin_e[0] = ~rml_ecl_cleanwin_e[0];
+   
+   mux2ds #(3) next_cleanwin_mux(.dout(next_cleanwin_e[2:0]),
+                                  .in0(ecl_rml_xor_data_e[2:0]),
+                                  .in1(rml_next_cleanwin_e[2:0]),
+                                  .sel0(~cleanwin_wen_e),
+                                  .sel1(cleanwin_wen_e));
+   dff_s cleanwin_wen_e2m(.din(cleanwin_wen_e), .clk(clk), .q(cleanwin_wen_m),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_cleanwin_e2m(.din(next_cleanwin_e[2:0]), .clk(clk), .q(next_cleanwin_m[2:0]),
+                           .se(se), .si(), .so());
+   assign cleanwin_wen_valid_m = cleanwin_wen_m;
+   dff_s cleanwin_wen_m2w(.din(cleanwin_wen_valid_m), .clk(clk), .q(rml_cleanwin_wen_w),
+                       .se(se), .si(), .so());
+   dff_s #(3) next_cleanwin_m2w(.din(next_cleanwin_m[2:0]), .clk(clk), .q(next_cleanwin_w[2:0]),
+                           .se(se), .si(), .so());
+   assign cleanwin_wen_w = (rml_cleanwin_wen_w | ecl_rml_cleanwin_wen_w) & ~rml_kill_w;
+
+   ///////////////////////////////
+   // WSTATE logic
+   ///////////////////////////////
+   assign wstate_wen_w = ecl_rml_wstate_wen_w & ~rml_kill_w;
+
+   ///////////////////////////////
+   // Storage of other WMRs
+   ///////////////////////////////
+   sparc_exu_reg  cansave_reg(.clk(clk), .se(se),
+                                .data_out(rml_ecl_cansave_d[2:0]), .thr_out(thr_d[3:0]), 
+                                .thr_w(ecl_rml_thr_w[3:0]),
+                              .wen_w(cansave_wen_w), .data_in_w(next_cansave_w[2:0]));
+   dff_s #(3) cansave_d2e(.din(rml_ecl_cansave_d[2:0]), .clk(clk), .q(rml_ecl_cansave_e[2:0]), .se(se),
+                  .si(), .so());
+   sparc_exu_reg  canrestore_reg(.clk(clk), .se(se),
+                                   .data_out(rml_ecl_canrestore_d[2:0]), .thr_out(thr_d[3:0]),
+                                   .thr_w(ecl_rml_thr_w[3:0]),
+                                   .wen_w(canrestore_wen_w),
+                                   .data_in_w(next_canrestore_w[2:0]));
+   dff_s #(3) canrestore_d2e(.din(rml_ecl_canrestore_d[2:0]), .clk(clk), .q(rml_ecl_canrestore_e[2:0]),
+                         .se(se), .si(), .so());
+   sparc_exu_reg  otherwin_reg(.clk(clk), .se(se),
+                                 .data_out(rml_ecl_otherwin_d[2:0]), .thr_out(thr_d[3:0]),
+                                 .thr_w(ecl_rml_thr_w[3:0]),
+                                 .wen_w(otherwin_wen_w), .data_in_w(next_otherwin_w[2:0]));
+   dff_s #(3) otherwin_d2e(.din(rml_ecl_otherwin_d[2:0]), .clk(clk), .q(rml_ecl_otherwin_e[2:0]),
+                       .se(se), .si(), .so());
+   sparc_exu_reg  cleanwin_reg(.clk(clk), .se(se),
+                                 .data_out(rml_ecl_cleanwin_d[2:0]), .thr_out(thr_d[3:0]),
+                                 .thr_w(ecl_rml_thr_w[3:0]),
+                                 .wen_w(cleanwin_wen_w), .data_in_w(next_cleanwin_w[2:0]));
+   dff_s #(3) cleanwin_d2e(.din(rml_ecl_cleanwin_d[2:0]), .clk(clk), .q(rml_ecl_cleanwin_e[2:0]),
+                       .se(se), .si(), .so());
+   sparc_exu_reg hi_wstate_reg(.clk(clk), .se(se),
+                               .data_out(rml_ecl_wstate_d[5:3]), .thr_out(thr_d[3:0]),
+                               .thr_w(ecl_rml_thr_w[3:0]),
+                               .wen_w(wstate_wen_w), 
+                               .data_in_w(exu_tlu_wsr_data_w[5:3]));
+   sparc_exu_reg lo_wstate_reg(.clk(clk), .se(se),
+                               .data_out(rml_ecl_wstate_d[2:0]), .thr_out(thr_d[3:0]),
+                               .thr_w(ecl_rml_thr_w[3:0]),
+                               .wen_w(wstate_wen_w), 
+                               .data_in_w(exu_tlu_wsr_data_w[2:0]));
+
+
+   /////////////////////////////////
+   // Alternate Globals control
+   //----------------------------
+   /////////////////////////////////
+   assign rml_irf_new_agp[1:0] = tlu_exu_agp[1:0];
+   assign agp_tid[1:0] = tlu_exu_agp_tid[1:0];
+
+`ifdef FPGA_SYN_1THREAD
+   assign rml_irf_old_agp[1:0] = agp_thr0[1:0];
+   assign        agp_wen_thr0_w = (agp_thr[0] & agp_wen) | reset;   
+   // mux between new and current value
+   mux2ds #(2) agp_next0_mux(.dout(agp_thr0_next[1:0]),
+                               .in0(agp_thr0[1:0]),
+                               .in1(new_agp[1:0]),
+                               .sel0(~agp_wen_thr0_w),
+                               .sel1(agp_wen_thr0_w));
+   dff_s #(2) dff_agp_thr0(.din(agp_thr0_next[1:0]), .clk(clk), .q(agp_thr0[1:0]),
+                       .se(se), .si(), .so());
+   // generation of controls
+   assign        agp_wen = tlu_exu_agp_swap;
+   assign        rml_irf_swap_global = agp_wen;
+   assign        rml_irf_global_tid[1:0] = agp_tid[1:0];
+
+   // decode tids
+   assign        agp_thr[0] = ~agp_tid[1] & ~agp_tid[0];
+      // Decode agp input
+   assign new_agp[1:0] = rml_irf_new_agp[1:0] | {2{reset}};
+
+   // send current global level to ecl for error logging
+   assign rml_ecl_gl_e[1:0] = agp_thr0[1:0];
+   
+`else
+	   
+   //  Output selection for current agp
+   mux4ds #(2) mux_agp_out1(.dout(rml_irf_old_agp[1:0]), 
+                            .sel0(agp_thr[0]),
+                            .sel1(agp_thr[1]),
+                            .sel2(agp_thr[2]),
+                            .sel3(agp_thr[3]),
+                            .in0(agp_thr0[1:0]),
+                            .in1(agp_thr1[1:0]),
+                            .in2(agp_thr2[1:0]),
+                            .in3(agp_thr3[1:0]));
+
+   //////////////////////////////////////
+   //  Storage of agp
+   //////////////////////////////////////
+   
+   // enable input for each thread
+   assign        agp_wen_thr0_w = (agp_thr[0] & agp_wen) | reset;
+   assign        agp_wen_thr1_w = (agp_thr[1] & agp_wen) | reset;
+   assign        agp_wen_thr2_w = (agp_thr[2] & agp_wen) | reset;
+   assign        agp_wen_thr3_w = (agp_thr[3] & agp_wen) | reset;
+
+   // mux between new and current value
+   mux2ds #(2) agp_next0_mux(.dout(agp_thr0_next[1:0]),
+                               .in0(agp_thr0[1:0]),
+                               .in1(new_agp[1:0]),
+                               .sel0(~agp_wen_thr0_w),
+                               .sel1(agp_wen_thr0_w));
+   mux2ds #(2) agp_next1_mux(.dout(agp_thr1_next[1:0]),
+                               .in0(agp_thr1[1:0]),
+                               .in1(new_agp[1:0]),
+                               .sel0(~agp_wen_thr1_w),
+                               .sel1(agp_wen_thr1_w));
+   mux2ds #(2) agp_next2_mux(.dout(agp_thr2_next[1:0]),
+                               .in0(agp_thr2[1:0]),
+                               .in1(new_agp[1:0]),
+                               .sel0(~agp_wen_thr2_w),
+                               .sel1(agp_wen_thr2_w));
+   mux2ds #(2) agp_next3_mux(.dout(agp_thr3_next[1:0]),
+                               .in0(agp_thr3[1:0]),
+                               .in1(new_agp[1:0]),
+                               .sel0(~agp_wen_thr3_w),
+                               .sel1(agp_wen_thr3_w));
+
+   // store new value
+   dff_s #(2) dff_agp_thr0(.din(agp_thr0_next[1:0]), .clk(clk), .q(agp_thr0[1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(2) dff_agp_thr1(.din(agp_thr1_next[1:0]), .clk(clk), .q(agp_thr1[1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(2) dff_agp_thr2(.din(agp_thr2_next[1:0]), .clk(clk), .q(agp_thr2[1:0]),
+                       .se(se), .si(), .so());
+   dff_s #(2) dff_agp_thr3(.din(agp_thr3_next[1:0]), .clk(clk), .q(agp_thr3[1:0]),
+                       .se(se), .si(), .so());
+   
+   // generation of controls
+   assign        agp_wen = tlu_exu_agp_swap;
+   assign        rml_irf_swap_global = agp_wen;
+   assign        rml_irf_global_tid[1:0] = agp_tid[1:0];
+
+   // decode tids
+   assign        agp_thr[0] = ~agp_tid[1] & ~agp_tid[0];
+   assign        agp_thr[1] = ~agp_tid[1] & agp_tid[0];
+   assign        agp_thr[2] = agp_tid[1] & ~agp_tid[0];
+   assign        agp_thr[3] = agp_tid[1] & agp_tid[0];
+   
+   // Decode agp input
+   assign new_agp[1:0] = rml_irf_new_agp[1:0] | {2{reset}};
+
+   // send current global level to ecl for error logging
+   assign rml_ecl_gl_e[1:0] = ((tid_e[1:0] == 2'b00)? agp_thr0[1:0]:
+                               (tid_e[1:0] == 2'b01)? agp_thr1[1:0]:
+                               (tid_e[1:0] == 2'b10)? agp_thr2[1:0]:
+                                                              agp_thr3[1:0]);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+endmodule // sparc_exu_rml
Index: /trunk/T1-CPU/exu/sparc_exu_aluadder64.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_aluadder64.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_aluadder64.v	(revision 6)
@@ -0,0 +1,62 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_aluadder64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_aluadder64
+//	Description:		This block implements the adder for the sparc alu.
+//            It takes two operands and a carry bit.  It adds them together
+//						and sends the output to adder_out.  It outputs the overflow
+//						and carry condition codes for both 64 bit and 32 bit operations.
+*/
+
+module sparc_exu_aluadder64
+  (
+   rs1_data,
+   rs2_data,
+   cin,
+   adder_out,
+   cout32,
+   cout64
+   );
+
+   input [63:0]  rs1_data;   // 1st input operand
+   input [63:0]  rs2_data;   // 2nd input operand
+   input         cin;           // carry in
+
+   output [63:0] adder_out; // result of adder
+   output         cout32;         // Cout from lower 32 bit add
+   output         cout64;         // cout from 64 bit add
+
+
+////////////////////////////////////////////
+//  Module implementation
+////////////////////////////////////////////
+
+   assign      {cout32, adder_out[31:0]} = rs1_data[31:0]+rs2_data[31:0]+
+                                           cin;
+   assign      {cout64, adder_out[63:32]} = rs1_data[63:32] 
+               + rs2_data[63:32] + cout32;
+
+endmodule // sparc_exu_aluadder64
+
+
+
+
Index: /trunk/T1-CPU/exu/sparc_exu_ecl_cnt6.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_ecl_cnt6.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_ecl_cnt6.v	(revision 6)
@@ -0,0 +1,60 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_ecl_cnt6.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_exu_cnt6
+//	Description: 6 bit binary counter
+*/
+module sparc_exu_ecl_cnt6 (/*AUTOARG*/
+   // Outputs
+   cntr, 
+   // Inputs
+   reset, clk, se
+   ) ;
+   input reset;
+   input clk;
+   input se;
+   
+   output [5:0] cntr;
+
+   wire [5:0]   next_cntr;
+   wire         tog1;
+   wire         tog2;
+   wire         tog3;
+   wire         tog4;
+   wire         tog5;
+
+   assign       tog1 = cntr[0];
+   assign       tog2 = cntr[0] & cntr[1];
+   assign       tog3 = cntr[0] & cntr[1] & cntr[2];
+   assign       tog4 = cntr[0] & cntr[1] & cntr[2] & cntr[3];
+   assign       tog5 = cntr[0] & cntr[1] & cntr[2] & cntr[3] & cntr[4];
+   assign next_cntr[0] = ~reset & ~cntr[0];
+   assign next_cntr[1] = ~reset & ((~cntr[1] & tog1) | (cntr[1] & ~tog1)); 
+   assign next_cntr[2] = ~reset & ((~cntr[2] & tog2) | (cntr[2] & ~tog2)); 
+   assign next_cntr[3] = ~reset & ((~cntr[3] & tog3) | (cntr[3] & ~tog3)); 
+   assign next_cntr[4] = ~reset & ((~cntr[4] & tog4) | (cntr[4] & ~tog4)); 
+   assign next_cntr[5] = ~reset & ((~cntr[5] & tog5) | (cntr[5] & ~tog5)); 
+   
+
+   // counter flop
+   dff_s #(6) cntr_dff(.din(next_cntr[5:0]), .clk(clk), .q(cntr[5:0]), .se(se), .si(), .so());
+endmodule // sparc_exu_ecl_cnt6
Index: /trunk/T1-CPU/exu/sparc_exu_alulogic.v
===================================================================
--- /trunk/T1-CPU/exu/sparc_exu_alulogic.v	(revision 6)
+++ /trunk/T1-CPU/exu/sparc_exu_alulogic.v	(revision 6)
@@ -0,0 +1,89 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_exu_alulogic.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//
+//  Module Name: sparc_exu_alulogic
+//	Description: This block implements and, or, xor, xnor, nand, nor
+//		and pass_rs2_data.  And, or, Xor and pass are muxed together
+//		and then xored with an inversion signal to create
+//		xnor, nand and nor.  Both inputs are buffered before being
+//		used and the rs2_data signal is buffered again before going
+//		to the mux.
+*/
+
+module sparc_exu_alulogic (/*AUTOARG*/
+   // Outputs
+   logic_out, 
+   // Inputs
+   rs1_data, rs2_data, isand, isor, isxor, pass_rs2_data, inv_logic, 
+   ifu_exu_sethi_inst_e
+   );
+
+input [63:0] rs1_data;             // 1st input operand
+input [63:0] rs2_data;             // 2nd input operand
+input isand;
+input isor;
+input isxor;
+input pass_rs2_data;
+input inv_logic;
+   input ifu_exu_sethi_inst_e;       // zero out top half of rs2 on mov
+
+output [63:0] logic_out;      // output of logic block
+
+wire [63:0] rs1_data_bf1;                 // buffered rs1_data
+wire [63:0] rs2_data_bf1;                 // buffered rs2_data
+   wire [63:0] mov_data;
+wire [63:0] result_and;              // rs1_data & rs2_data
+wire [63:0] result_or;               // rs1_data | rs2_data
+wire [63:0] result_xor;              // rs1_data ^ rs2_data
+wire [63:0] rs2_xor_invert;           // output of mux between various results
+
+
+// mux between various results
+   mux4ds #(64) logic_mux(.dout(logic_out[63:0]),
+                        .in0(result_and[63:0]), 
+                        .in1(result_or[63:0]),
+                        .in2(result_xor[63:0]), 
+                        .in3(mov_data[63:0]), 
+                        .sel0(isand),
+                        .sel1(isor), 
+                        .sel2(isxor),
+                        .sel3(pass_rs2_data));
+
+// buffer inputs
+dp_buffer #(64) rs1_data_buf(.dout(rs1_data_bf1[63:0]), .in(rs1_data[63:0]));
+dp_buffer #(64) rs2_data_buf(.dout(rs2_data_bf1[63:0]), .in(rs2_data[63:0]));
+
+   // zero out top of rs2 for sethi_inst
+  assign   mov_data[63:32] = rs2_data_bf1[63:32] & {32{~ifu_exu_sethi_inst_e}};
+   dp_buffer #(32) rs2_data_buf2(.dout(mov_data[31:0]), .in(rs2_data_bf1[31:0]));
+
+// invert input2 for andn, orn, xnor
+assign rs2_xor_invert[63:0] = rs2_data_bf1[63:0] ^ {64{inv_logic}};
+   
+// do boolean ops
+assign result_and = rs1_data_bf1 & rs2_xor_invert;
+assign result_or = rs1_data_bf1 | rs2_xor_invert;
+assign result_xor = rs1_data_bf1 ^ rs2_xor_invert;
+
+endmodule
+
Index: /trunk/T1-CPU/rtl/cpx_spc_buf.v
===================================================================
--- /trunk/T1-CPU/rtl/cpx_spc_buf.v	(revision 6)
+++ /trunk/T1-CPU/rtl/cpx_spc_buf.v	(revision 6)
@@ -0,0 +1,43 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cpx_spc_buf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+`include "sys.h"
+`include "iop.h"
+`include "ifu.h"
+
+module cpx_spc_buf (/*AUTOARG*/
+   // Outputs
+   cpx_spc_data_cx2_buf, cpx_spc_data_rdy_cx2_buf, 
+   // Inputs
+   cpx_spc_data_cx2, cpx_spc_data_rdy_cx2
+   );
+
+   
+input  [`CPX_WIDTH-1:0] cpx_spc_data_cx2;      
+input                   cpx_spc_data_rdy_cx2;
+
+output [`CPX_WIDTH-1:0] cpx_spc_data_cx2_buf;
+output                  cpx_spc_data_rdy_cx2_buf;    
+
+assign  cpx_spc_data_rdy_cx2_buf  =  cpx_spc_data_rdy_cx2 ;
+assign  cpx_spc_data_cx2_buf  =  cpx_spc_data_cx2 ;
+
+endmodule   
+  
Index: /trunk/T1-CPU/rtl/spc_pcx_buf.v
===================================================================
--- /trunk/T1-CPU/rtl/spc_pcx_buf.v	(revision 6)
+++ /trunk/T1-CPU/rtl/spc_pcx_buf.v	(revision 6)
@@ -0,0 +1,51 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spc_pcx_buf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+`include "sys.h"
+`include "iop.h"
+
+module spc_pcx_buf (/*AUTOARG*/
+   // Outputs
+   spc_pcx_data_pa, spc_pcx_atom_pq, spc_pcx_req_pq, 
+   pcx_spc_grant_px_buf, 
+   // Inputs
+   spc_pcx_data_pa_buf, spc_pcx_atom_pq_buf, spc_pcx_req_pq_buf, 
+   pcx_spc_grant_px
+   );
+
+   
+input  [`PCX_WIDTH-1:0] spc_pcx_data_pa_buf;      
+input                   spc_pcx_atom_pq_buf;
+input  [4:0]            spc_pcx_req_pq_buf;
+input  [4:0]            pcx_spc_grant_px;
+
+output [`PCX_WIDTH-1:0] spc_pcx_data_pa;
+output                  spc_pcx_atom_pq;    
+output [4:0]            spc_pcx_req_pq;    
+output [4:0]            pcx_spc_grant_px_buf;
+
+
+assign  spc_pcx_req_pq  =  spc_pcx_req_pq_buf ;
+assign  spc_pcx_atom_pq  =  spc_pcx_atom_pq_buf ;
+assign  spc_pcx_data_pa  =  spc_pcx_data_pa_buf ;
+
+assign  pcx_spc_grant_px_buf  =  pcx_spc_grant_px ;
+endmodule   
+  
Index: /trunk/T1-CPU/rtl/cpx_spc_rpt.v
===================================================================
--- /trunk/T1-CPU/rtl/cpx_spc_rpt.v	(revision 6)
+++ /trunk/T1-CPU/rtl/cpx_spc_rpt.v	(revision 6)
@@ -0,0 +1,145 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cpx_spc_rpt.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+`include "sys.h"
+`include "iop.h"
+`include "ifu.h"
+`include "lsu.h"
+
+module cpx_spc_rpt (/*AUTOARG*/
+   // Outputs
+   so, cpx_spc_data_cx3, cpx_spc_data_rdy_cx3, 
+   cpx_spc_data_cx3_b144to140, cpx_spc_data_cx3_b120to118, 
+   cpx_spc_data_cx3_b0, cpx_spc_data_cx3_b4, cpx_spc_data_cx3_b8, 
+   cpx_spc_data_cx3_b12, cpx_spc_data_cx3_b16, cpx_spc_data_cx3_b20, 
+   cpx_spc_data_cx3_b24, cpx_spc_data_cx3_b28, cpx_spc_data_cx3_b32, 
+   cpx_spc_data_cx3_b35, cpx_spc_data_cx3_b38, cpx_spc_data_cx3_b41, 
+   cpx_spc_data_cx3_b44, cpx_spc_data_cx3_b47, cpx_spc_data_cx3_b50, 
+   cpx_spc_data_cx3_b53, cpx_spc_data_cx3_b56, cpx_spc_data_cx3_b60, 
+   cpx_spc_data_cx3_b64, cpx_spc_data_cx3_b68, cpx_spc_data_cx3_b72, 
+   cpx_spc_data_cx3_b76, cpx_spc_data_cx3_b80, cpx_spc_data_cx3_b84, 
+   cpx_spc_data_cx3_b88, cpx_spc_data_cx3_b91, cpx_spc_data_cx3_b94, 
+   cpx_spc_data_cx3_b97, cpx_spc_data_cx3_b100, 
+   cpx_spc_data_cx3_b103, cpx_spc_data_cx3_b106, 
+   cpx_spc_data_cx3_b109, 
+   // Inputs
+   rclk, si, se, cpx_spc_data_cx2, cpx_spc_data_rdy_cx2
+   );
+
+input rclk;
+   input si;
+   input se;
+
+   
+input  [`CPX_WIDTH-1:0] cpx_spc_data_cx2;      
+input                   cpx_spc_data_rdy_cx2;
+
+   output               so;
+output [`CPX_WIDTH-1:0] cpx_spc_data_cx3;
+output                  cpx_spc_data_rdy_cx3;    
+
+output [`CPX_WIDTH-1:140] cpx_spc_data_cx3_b144to140 ;
+output [`CPX_INV_CID_HI:`CPX_INV_CID_LO] cpx_spc_data_cx3_b120to118 ;
+output        cpx_spc_data_cx3_b0 ;
+output        cpx_spc_data_cx3_b4 ;
+output        cpx_spc_data_cx3_b8 ;
+output        cpx_spc_data_cx3_b12 ;
+output        cpx_spc_data_cx3_b16 ;
+output        cpx_spc_data_cx3_b20 ;
+output        cpx_spc_data_cx3_b24 ;
+output        cpx_spc_data_cx3_b28 ;
+
+output        cpx_spc_data_cx3_b32 ;
+output        cpx_spc_data_cx3_b35 ;
+output        cpx_spc_data_cx3_b38 ;
+output        cpx_spc_data_cx3_b41 ;
+output        cpx_spc_data_cx3_b44 ;
+output        cpx_spc_data_cx3_b47 ;
+output        cpx_spc_data_cx3_b50 ;
+output        cpx_spc_data_cx3_b53 ;
+
+output        cpx_spc_data_cx3_b56 ;
+output        cpx_spc_data_cx3_b60 ;
+output        cpx_spc_data_cx3_b64 ;
+output        cpx_spc_data_cx3_b68 ;
+output        cpx_spc_data_cx3_b72 ;
+output        cpx_spc_data_cx3_b76 ;
+output        cpx_spc_data_cx3_b80 ;
+output        cpx_spc_data_cx3_b84 ;
+
+output        cpx_spc_data_cx3_b88 ;
+output        cpx_spc_data_cx3_b91 ;
+output        cpx_spc_data_cx3_b94 ;
+output        cpx_spc_data_cx3_b97 ;
+output        cpx_spc_data_cx3_b100 ;
+output        cpx_spc_data_cx3_b103 ;
+output        cpx_spc_data_cx3_b106 ;
+output        cpx_spc_data_cx3_b109 ;
+
+
+reg [`CPX_WIDTH-1:0] cpx_spc_data_cx3;
+reg                  cpx_spc_data_rdy_cx3;
+   
+always @(posedge rclk) begin
+   cpx_spc_data_cx3     <= cpx_spc_data_cx2;
+   cpx_spc_data_rdy_cx3 <= cpx_spc_data_rdy_cx2;
+end
+
+//timing fix: 9/5/03 - add separate buffer to lsu for signal that are used in bypass i.e. isolate from spu/ffu loading
+assign  cpx_spc_data_cx3_b144to140[`CPX_WIDTH-1:140]  =  cpx_spc_data_cx3[`CPX_WIDTH-1:140] ;
+assign  cpx_spc_data_cx3_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO]  =  cpx_spc_data_cx3[`CPX_INV_CID_HI:`CPX_INV_CID_LO] ;
+
+assign  cpx_spc_data_cx3_b0  =  cpx_spc_data_cx3[0] ;
+assign  cpx_spc_data_cx3_b4  =  cpx_spc_data_cx3[4] ;
+assign  cpx_spc_data_cx3_b8  =  cpx_spc_data_cx3[8] ;
+assign  cpx_spc_data_cx3_b12  =  cpx_spc_data_cx3[12] ;
+assign  cpx_spc_data_cx3_b16  =  cpx_spc_data_cx3[16] ;
+assign  cpx_spc_data_cx3_b20  =  cpx_spc_data_cx3[20] ;
+assign  cpx_spc_data_cx3_b24  =  cpx_spc_data_cx3[24] ;
+assign  cpx_spc_data_cx3_b28  =  cpx_spc_data_cx3[28] ;
+
+assign  cpx_spc_data_cx3_b32  =  cpx_spc_data_cx3[32] ;
+assign  cpx_spc_data_cx3_b35  =  cpx_spc_data_cx3[35] ;
+assign  cpx_spc_data_cx3_b38  =  cpx_spc_data_cx3[38] ;
+assign  cpx_spc_data_cx3_b41  =  cpx_spc_data_cx3[41] ;
+assign  cpx_spc_data_cx3_b44  =  cpx_spc_data_cx3[44] ;
+assign  cpx_spc_data_cx3_b47  =  cpx_spc_data_cx3[47] ;
+assign  cpx_spc_data_cx3_b50  =  cpx_spc_data_cx3[50] ;
+assign  cpx_spc_data_cx3_b53  =  cpx_spc_data_cx3[53] ;
+
+assign  cpx_spc_data_cx3_b56  =  cpx_spc_data_cx3[56] ;
+assign  cpx_spc_data_cx3_b60  =  cpx_spc_data_cx3[60] ;
+assign  cpx_spc_data_cx3_b64  =  cpx_spc_data_cx3[64] ;
+assign  cpx_spc_data_cx3_b68  =  cpx_spc_data_cx3[68] ;
+assign  cpx_spc_data_cx3_b72  =  cpx_spc_data_cx3[72] ;
+assign  cpx_spc_data_cx3_b76  =  cpx_spc_data_cx3[76] ;
+assign  cpx_spc_data_cx3_b80  =  cpx_spc_data_cx3[80] ;
+assign  cpx_spc_data_cx3_b84  =  cpx_spc_data_cx3[84] ;
+
+assign  cpx_spc_data_cx3_b88  =  cpx_spc_data_cx3[88] ;
+assign  cpx_spc_data_cx3_b91  =  cpx_spc_data_cx3[91] ;
+assign  cpx_spc_data_cx3_b94  =  cpx_spc_data_cx3[94] ;
+assign  cpx_spc_data_cx3_b97  =  cpx_spc_data_cx3[97] ;
+assign  cpx_spc_data_cx3_b100  =  cpx_spc_data_cx3[100] ;
+assign  cpx_spc_data_cx3_b103  =  cpx_spc_data_cx3[103] ;
+assign  cpx_spc_data_cx3_b106  =  cpx_spc_data_cx3[106] ;
+assign  cpx_spc_data_cx3_b109  =  cpx_spc_data_cx3[109] ;
+
+endmodule   
Index: /trunk/T1-CPU/rtl/bw_clk_cl_sparc_cmp.v
===================================================================
--- /trunk/T1-CPU/rtl/bw_clk_cl_sparc_cmp.v	(revision 6)
+++ /trunk/T1-CPU/rtl/bw_clk_cl_sparc_cmp.v	(revision 6)
@@ -0,0 +1,85 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_clk_cl_sparc_cmp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+module bw_clk_cl_sparc_cmp(/*AUTOARG*/
+   // Outputs
+   so, rclk, dbginit_l, cluster_grst_l, 
+   // Inputs
+   si, se, grst_l, gdbginit_l, gclk, cluster_cken, arst_l, 
+   adbginit_l
+   );
+
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output               cluster_grst_l;         // From I0 of cluster_header.v
+   output               dbginit_l;              // From I0 of cluster_header.v
+   output               rclk;                   // From I0 of cluster_header.v
+   output               so;                     // From I0 of cluster_header.v
+   // End of automatics
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input                adbginit_l;             // To I0 of cluster_header.v
+   input                arst_l;                 // To I0 of cluster_header.v
+   input                cluster_cken;           // To I0 of cluster_header.v
+   input                gclk;                   // To I0 of cluster_header.v
+   input                gdbginit_l;             // To I0 of cluster_header.v
+   input                grst_l;                 // To I0 of cluster_header.v
+   input                se;                     // To I0 of cluster_header.v
+   input                si;                     // To I0 of cluster_header.v
+   // End of automatics
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   // End of automatics
+
+   cluster_header I0 (/*AUTOINST*/
+                      // Outputs
+                      .dbginit_l           (dbginit_l),
+                      .cluster_grst_l      (cluster_grst_l),
+                      .rclk                (rclk),
+                      .so                  (so),
+                      // Inputs
+                      .gclk                (gclk),
+                      .cluster_cken        (cluster_cken),
+                      .arst_l              (arst_l),
+                      .grst_l              (grst_l),
+                      .adbginit_l          (adbginit_l),
+                      .gdbginit_l          (gdbginit_l),
+                      .si                  (si),
+                      .se                  (se));
+
+//output          so ;
+//output          dbginit_l ;
+//output          cluster_grst_l ;
+//output          rclk ;
+//input           si ;
+//input           se ;
+//input           adbginit_l ;
+//input           gdbginit_l ;
+//input           arst_l ;
+//input           grst_l ;
+//input           cluster_cken ;
+//input           gclk ;
+   
+endmodule // bw_clk_cl_sparc_cmp
+
+// Local Variables:
+// verilog-library-directories:("." "../../common/rtl")
+// End:
Index: /trunk/T1-CPU/rtl/sparc.v
===================================================================
--- /trunk/T1-CPU/rtl/sparc.v	(revision 6)
+++ /trunk/T1-CPU/rtl/sparc.v	(revision 6)
@@ -0,0 +1,3352 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+`include "sys.h"
+`include "iop.h"
+`include "ifu.h"
+`include "tlu.h"
+`include "lsu.h"
+	 
+module sparc (/*AUTOARG*/
+   // Outputs
+   spc_pcx_req_pq, spc_pcx_atom_pq, spc_pcx_data_pa,
+	//spc_sscan_so, spc_scanout0, spc_scanout1,
+	tst_ctu_mbist_done, 
+   tst_ctu_mbist_fail, spc_efc_ifuse_data, spc_efc_dfuse_data, 
+   // Inputs
+   pcx_spc_grant_px, cpx_spc_data_rdy_cx2, cpx_spc_data_cx2, 
+   const_cpuid, const_maskid, ctu_tck, ctu_sscan_se, ctu_sscan_snap, 
+   ctu_sscan_tid, ctu_tst_mbist_enable, efc_spc_fuse_clk1, 
+   efc_spc_fuse_clk2, efc_spc_ifuse_ashift, efc_spc_ifuse_dshift, 
+   efc_spc_ifuse_data, efc_spc_dfuse_ashift, efc_spc_dfuse_dshift, 
+   efc_spc_dfuse_data, ctu_tst_macrotest, ctu_tst_scan_disable, 
+   ctu_tst_short_chain, global_shift_enable, ctu_tst_scanmode, 
+   spc_scanin0, spc_scanin1, cluster_cken, gclk, cmp_grst_l, 
+   cmp_arst_l, ctu_tst_pre_grst_l, adbginit_l, gdbginit_l
+   );
+
+   // these are the only legal IOs
+
+   // pcx
+   output [4:0]   spc_pcx_req_pq;    // processor to pcx request
+   output         spc_pcx_atom_pq;   // processor to pcx atomic request
+   output [`PCX_WIDTH-1:0] spc_pcx_data_pa;  // processor to pcx packet
+
+   // shadow scan
+   wire     spc_sscan_so;         // From ifu of sparc_ifu.v
+   wire     spc_scanout0;         // From test_stub of test_stub_bist.v
+   wire     spc_scanout1;         // From test_stub of test_stub_bist.v
+   //output     spc_sscan_so;         // From ifu of sparc_ifu.v
+   //output     spc_scanout0;         // From test_stub of test_stub_bist.v
+   //output     spc_scanout1;         // From test_stub of test_stub_bist.v
+
+   // bist
+   output     tst_ctu_mbist_done;  // From test_stub of test_stub_two_bist.v
+   output     tst_ctu_mbist_fail;  // From test_stub of test_stub_two_bist.v
+
+   // fuse
+   output     spc_efc_ifuse_data;     // From ifu of sparc_ifu.v
+   output     spc_efc_dfuse_data;     // From ifu of sparc_ifu.v
+
+
+   // cpx interface
+   input [4:0] pcx_spc_grant_px; // pcx to processor grant info  
+   input       cpx_spc_data_rdy_cx2; // cpx data inflight to sparc  
+   input [`CPX_WIDTH-1:0] cpx_spc_data_cx2;     // cpx to sparc data packet
+
+   input [3:0]  const_cpuid;
+   input [7:0]  const_maskid;           // To ifu of sparc_ifu.v
+
+   // sscan
+   input        ctu_tck;                // To ifu of sparc_ifu.v
+   input        ctu_sscan_se;           // To ifu of sparc_ifu.v
+   input        ctu_sscan_snap;         // To ifu of sparc_ifu.v
+   input [3:0]  ctu_sscan_tid;          // To ifu of sparc_ifu.v
+
+   // bist
+   input        ctu_tst_mbist_enable;   // To test_stub of test_stub_bist.v
+
+   // efuse
+   input        efc_spc_fuse_clk1;
+   input        efc_spc_fuse_clk2;
+   input        efc_spc_ifuse_ashift;
+   input        efc_spc_ifuse_dshift;
+   input        efc_spc_ifuse_data;
+   input        efc_spc_dfuse_ashift;
+   input        efc_spc_dfuse_dshift;
+   input        efc_spc_dfuse_data;
+   
+   // scan and macro test
+   input        ctu_tst_macrotest;      // To test_stub of test_stub_bist.v
+   input        ctu_tst_scan_disable;   // To test_stub of test_stub_bist.v
+   input        ctu_tst_short_chain;    // To test_stub of test_stub_bist.v
+   input        global_shift_enable;    // To test_stub of test_stub_two_bist.v
+   input        ctu_tst_scanmode;       // To test_stub of test_stub_two_bist.v
+   input        spc_scanin0;
+   input        spc_scanin1;
+   
+   // clk
+   input        cluster_cken;           // To spc_hdr of cluster_header.v
+   input        gclk;                   // To spc_hdr of cluster_header.v
+
+   // reset
+   input        cmp_grst_l;
+   input        cmp_arst_l;
+   input        ctu_tst_pre_grst_l;     // To test_stub of test_stub_bist.v
+
+   input        adbginit_l;             // To spc_hdr of cluster_header.v
+   input        gdbginit_l;             // To spc_hdr of cluster_header.v
+
+
+   // ----------------- End of IOs -------------------------- //
+
+   /* AUTOOUTPUT*/
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   // End of automatics
+
+   // not hooked up yet
+   wire [3:0]   tlu_dsfsr_flt_vld;      // To lsu of lsu.v
+   wire         tlu_lsu_int_ld_ill_va_w2;// To lsu of lsu.v
+   wire [9:0]   lsu_tlu_ldst_va_m;// To lsu of lsu.v
+   wire [47:0]  lsu_ifu_stxa_data;	// From lsu of lsu.v
+
+   wire         lsu_tlu_misalign_addr_ldst_atm_m ;// To tlu of tlu.v
+
+   // name change at top level
+   wire [3:0] 	tlu_ifu_sftint_vld;
+   wire [3:0] 	tlu_hintp_vld;
+   wire [3:0] 	tlu_rerr_vld;
+   wire         lsu_exu_ldst_miss_g2;	// To exu of sparc_exu.v
+   wire         lsu_ifu_ldst_miss_w;	// To exu of sparc_exu.v
+   wire         lsu_exu_dfill_vld_g;
+   wire [63:0]  lsu_exu_dfill_data_g;	// From lsu of lsu.v
+   wire [62:0]  tlu_sscan_test_data;
+   wire         lsu_ifu_tlb_data_ue;      // dtlb data asi rd parity error
+   wire         lsu_ifu_tlb_tag_ue;       // dtlb tag asi rd parity error
+   wire [8:0]   ifu_tlu_imm_asi_d;      // From ifu of sparc_ifu.v
+   
+   wire        ifu_lsu_wsr_inst_d;     // To lsu of lsu.v, ...
+   wire        ifu_exu_wsr_inst_d; 
+
+   // hypervisor stuff
+   wire [3:0]   tlu_hpstate_enb,
+                tlu_hpstate_priv,
+                tlu_hpstate_ibe;
+
+   // scan chain
+   wire                    short_scan0_1;
+   wire                    short_scan0_2;
+   wire                    short_scan0_3;
+   wire                    short_scan0_4;
+   wire                    short_scan0_5;
+   wire                    short_scan0_6;
+   wire                    scan0_1;
+   wire                    scan0_2;
+   wire                    scan0_3;
+   wire                    scan0_4;
+   wire                    scan0_5;
+   wire                    scan0_6;
+   wire                    scan0_7;
+
+   wire                    short_scan1_1;
+   wire                    short_scan1_2;
+   wire                    short_scan1_3;
+   wire                    short_scan1_4;
+   wire                    short_scan1_5;
+   wire                    scan1_1;
+   wire                    scan1_2;
+   wire                    scan1_3;
+   wire                    scan1_4;
+   wire                    scan1_5;
+
+   
+   // bus width difference
+   wire [12:0]	lsu_t0_pctxt_state;	// From lsu of lsu.v
+   wire [12:0]	lsu_t1_pctxt_state;	// From lsu of lsu.v
+   wire [12:0]	lsu_t2_pctxt_state;	// From lsu of lsu.v
+   wire [12:0]	lsu_t3_pctxt_state;	// From lsu of lsu.v
+
+   wire [6:0]  bist_ctl_reg_in;
+
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire [10:0]          bist_ctl_reg_out;       // From test_stub of test_stub_bist.v
+   wire                 bist_ctl_reg_wr_en;     // From lsu of lsu.v
+   wire [`CPX_WIDTH-1:0]cpx_spc_data_cx2_buf;   // From buf_cpx of cpx_spc_buf.v
+   wire [`CPX_WIDTH-1:0]cpx_spc_data_cx3;       // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_rdy_cx2_buf;// From buf_cpx of cpx_spc_buf.v
+   wire                 cpx_spc_data_rdy_cx3;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 exu_ffu_wsr_inst_e;     // From exu of sparc_exu.v
+   wire [47:0]          exu_ifu_brpc_e;         // From exu of sparc_exu.v
+   wire [7:0]           exu_ifu_cc_d;           // From exu of sparc_exu.v
+   wire                 exu_ifu_ecc_ce_m;       // From exu of sparc_exu.v
+   wire                 exu_ifu_ecc_ue_m;       // From exu of sparc_exu.v
+   wire [7:0]           exu_ifu_err_reg_m;      // From exu of sparc_exu.v
+   wire [7:0]           exu_ifu_err_synd_m;     // From exu of sparc_exu.v
+   wire                 exu_ifu_inj_ack;        // From exu of sparc_exu.v
+   wire [3:0]           exu_ifu_longop_done_g;  // From exu of sparc_exu.v
+   wire [3:0]           exu_ifu_oddwin_s;       // From exu of sparc_exu.v
+   wire                 exu_ifu_regn_e;         // From exu of sparc_exu.v
+   wire                 exu_ifu_regz_e;         // From exu of sparc_exu.v
+   wire                 exu_ifu_spill_e;        // From exu of sparc_exu.v
+   wire                 exu_ifu_va_oor_m;       // From exu of sparc_exu.v
+   wire [10:3]          exu_lsu_early_va_e;     // From exu of sparc_exu.v
+   wire [47:0]          exu_lsu_ldst_va_e;      // From exu of sparc_exu.v
+   wire                 exu_lsu_priority_trap_m;// From exu of sparc_exu.v
+   wire [63:0]          exu_lsu_rs2_data_e;     // From exu of sparc_exu.v
+   wire [63:0]          exu_lsu_rs3_data_e;     // From exu of sparc_exu.v
+   wire [7:0]           exu_mmu_early_va_e;     // From exu of sparc_exu.v
+   wire                 exu_mul_input_vld;      // From exu of sparc_exu.v
+   wire [63:0]          exu_mul_rs1_data;       // From exu of sparc_exu.v
+   wire [63:0]          exu_mul_rs2_data;       // From exu of sparc_exu.v
+   wire [63:0]          exu_spu_rs3_data_e;     // From exu of sparc_exu.v
+   wire [7:0]           exu_tlu_ccr0_w;         // From exu of sparc_exu.v
+   wire [7:0]           exu_tlu_ccr1_w;         // From exu of sparc_exu.v
+   wire [7:0]           exu_tlu_ccr2_w;         // From exu of sparc_exu.v
+   wire [7:0]           exu_tlu_ccr3_w;         // From exu of sparc_exu.v
+   wire [2:0]           exu_tlu_cwp0_w;         // From exu of sparc_exu.v
+   wire [2:0]           exu_tlu_cwp1_w;         // From exu of sparc_exu.v
+   wire [2:0]           exu_tlu_cwp2_w;         // From exu of sparc_exu.v
+   wire [2:0]           exu_tlu_cwp3_w;         // From exu of sparc_exu.v
+   wire                 exu_tlu_cwp_cmplt;      // From exu of sparc_exu.v
+   wire [1:0]           exu_tlu_cwp_cmplt_tid;  // From exu of sparc_exu.v
+   wire                 exu_tlu_cwp_retry;      // From exu of sparc_exu.v
+   wire                 exu_tlu_misalign_addr_jmpl_rtn_m;// From exu of sparc_exu.v
+   wire                 exu_tlu_spill;          // From exu of sparc_exu.v
+   wire                 exu_tlu_spill_other;    // From exu of sparc_exu.v
+   wire [1:0]           exu_tlu_spill_tid;      // From exu of sparc_exu.v
+   wire [2:0]           exu_tlu_spill_wtype;    // From exu of sparc_exu.v
+   wire [8:0]           exu_tlu_ttype_m;        // From exu of sparc_exu.v
+   wire                 exu_tlu_ttype_vld_m;    // From exu of sparc_exu.v
+   wire                 exu_tlu_va_oor_jl_ret_m;// From exu of sparc_exu.v
+   wire                 exu_tlu_va_oor_m;       // From exu of sparc_exu.v
+   wire [63:0]          exu_tlu_wsr_data_m;     // From exu of sparc_exu.v
+   wire [63:0]          ffu_exu_rsr_data_m;     // From ffu of sparc_ffu.v
+   wire [3:0]           ffu_ifu_cc_vld_w2;      // From ffu of sparc_ffu.v
+   wire [7:0]           ffu_ifu_cc_w2;          // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_ecc_ce_w2;      // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_ecc_ue_w2;      // From ffu of sparc_ffu.v
+   wire [5:0]           ffu_ifu_err_reg_w2;     // From ffu of sparc_ffu.v
+   wire [13:0]          ffu_ifu_err_synd_w2;    // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_fpop_done_w2;   // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_fst_ce_w;       // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_inj_ack;        // From ffu of sparc_ffu.v
+   wire                 ffu_ifu_stallreq;       // From ffu of sparc_ffu.v
+   wire [1:0]           ffu_ifu_tid_w2;         // From ffu of sparc_ffu.v
+   wire                 ffu_lsu_blk_st_e;       // From ffu of sparc_ffu.v
+   wire [5:3]           ffu_lsu_blk_st_va_e;    // From ffu of sparc_ffu.v
+   wire [80:0]          ffu_lsu_data;           // From ffu of sparc_ffu.v
+   wire                 ffu_lsu_fpop_rq_vld;    // From ffu of sparc_ffu.v
+   wire                 ffu_lsu_kill_fst_w;     // From ffu of sparc_ffu.v
+   wire                 ffu_tlu_fpu_cmplt;      // From ffu of sparc_ffu.v
+   wire [1:0]           ffu_tlu_fpu_tid;        // From ffu of sparc_ffu.v
+   wire                 ffu_tlu_ill_inst_m;     // From ffu of sparc_ffu.v
+   wire                 ffu_tlu_trap_ieee754;   // From ffu of sparc_ffu.v
+   wire                 ffu_tlu_trap_other;     // From ffu of sparc_ffu.v
+   wire                 ffu_tlu_trap_ue;        // From ffu of sparc_ffu.v
+   wire                 ifu_exu_addr_mask_d;    // From ifu of sparc_ifu.v
+   wire [2:0]           ifu_exu_aluop_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_exu_casa_d;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_dbrinst_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_exu_disable_ce_e;   // From ifu of sparc_ifu.v
+   wire                 ifu_exu_dontmv_regz0_e; // From ifu of sparc_ifu.v
+   wire                 ifu_exu_dontmv_regz1_e; // From ifu of sparc_ifu.v
+   wire [7:0]           ifu_exu_ecc_mask;       // From ifu of sparc_ifu.v
+   wire                 ifu_exu_enshift_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_exu_flushw_e;       // From ifu of sparc_ifu.v
+   wire                 ifu_exu_ialign_d;       // From ifu of sparc_ifu.v
+   wire [31:0]          ifu_exu_imm_data_d;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_inj_irferr;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_inst_vld_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_inst_vld_w;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_invert_d;       // From ifu of sparc_ifu.v
+   wire                 ifu_exu_kill_e;         // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_exu_muldivop_d;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_muls_d;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_nceen_e;        // From ifu of sparc_ifu.v
+   wire [47:0]          ifu_exu_pc_d;           // From ifu of sparc_ifu.v
+   wire [63:0]          ifu_exu_pcver_e;        // From ifu of sparc_ifu.v
+   wire                 ifu_exu_range_check_jlret_d;// From ifu of sparc_ifu.v
+   wire                 ifu_exu_range_check_other_d;// From ifu of sparc_ifu.v
+   wire [4:0]           ifu_exu_rd_d;           // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rd_exusr_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rd_ffusr_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rd_ifusr_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_ren1_s;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_ren2_s;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_ren3_s;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_restore_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_exu_restored_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_return_d;       // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_exu_rs1_s;          // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rs1_vld_d;      // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_exu_rs2_s;          // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rs2_vld_d;      // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_exu_rs3_s;          // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rs3e_vld_d;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_rs3o_vld_d;     // From ifu of sparc_ifu.v
+   wire                 ifu_exu_save_d;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_saved_e;        // From ifu of sparc_ifu.v
+   wire                 ifu_exu_setcc_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_exu_sethi_inst_d;   // From ifu of sparc_ifu.v
+   wire [2:0]           ifu_exu_shiftop_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_exu_tagop_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_exu_tcc_e;          // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_exu_tid_s2;         // From ifu of sparc_ifu.v
+   wire                 ifu_exu_ttype_vld_m;    // From ifu of sparc_ifu.v
+   wire                 ifu_exu_tv_d;           // From ifu of sparc_ifu.v
+   wire                 ifu_exu_use_rsr_e_l;    // From ifu of sparc_ifu.v
+   wire                 ifu_exu_usecin_d;       // From ifu of sparc_ifu.v
+   wire                 ifu_exu_useimm_d;       // From ifu of sparc_ifu.v
+   wire                 ifu_exu_wen_d;          // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_ffu_fcc_num_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_fld_d;          // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_fpop1_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_fpop2_d;        // From ifu of sparc_ifu.v
+   wire [8:0]           ifu_ffu_fpopcode_d;     // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_ffu_frd_d;          // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_ffu_frs1_d;         // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_ffu_frs2_d;         // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_fst_d;          // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_inj_frferr;     // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_ldfsr_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_ldst_size_d;    // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_ldxfsr_d;       // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_mvcnd_m;        // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_quad_op_e;      // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_stfsr_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_ffu_visop_d;        // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_alt_space_d;    // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_alt_space_e;    // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_asi_ack;        // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_asi_rd_unc;     // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_casa_e;         // From ifu of sparc_ifu.v
+   wire [2:0]           ifu_lsu_destid_s;       // From ifu of sparc_ifu.v
+   wire [3:0]           ifu_lsu_error_inj;      // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_fwd_data_vld;   // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_fwd_wr_ack;     // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ibuf_busy;      // From ifu of sparc_ifu.v
+   wire [7:0]           ifu_lsu_imm_asi_d;      // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_imm_asi_vld_d;  // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_inv_clear;      // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ld_inst_e;      // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ldst_dbl_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ldst_fp_e;      // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_lsu_ldst_size_e;    // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ldstub_e;       // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ldxa_data_vld_w2;// From ifu of sparc_ifu.v
+   wire [63:0]          ifu_lsu_ldxa_data_w2;   // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_ldxa_illgl_va_w2;// From ifu of sparc_ifu.v
+   wire [1:0]           ifu_lsu_ldxa_tid_w2;    // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_memref_d;       // From ifu of sparc_ifu.v
+   wire [3:0]           ifu_lsu_nceen;          // From ifu of sparc_ifu.v
+   wire [51:0]          ifu_lsu_pcxpkt_e;       // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_pcxreq_d;       // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_pref_inst_e;    // From ifu of sparc_ifu.v
+   wire [4:0]           ifu_lsu_rd_e;           // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_sign_ext_e;     // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_st_inst_e;      // From ifu of sparc_ifu.v
+   wire                 ifu_lsu_swap_e;         // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_lsu_thrid_s;        // From ifu of sparc_ifu.v
+   wire                 ifu_mmu_trap_m;         // From ifu of sparc_ifu.v
+   wire                 ifu_spu_inst_vld_w;     // From ifu of sparc_ifu.v
+   wire [3:0]           ifu_spu_nceen;          // From ifu of sparc_ifu.v
+   wire                 ifu_spu_trap_ack;       // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_alt_space_d;    // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_done_inst_d;    // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_flsh_inst_e;    // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_flush_fd2_w;    // From lsu of lsu.v
+   wire                 ifu_tlu_flush_fd3_w;    // From lsu of lsu.v
+   wire                 ifu_tlu_flush_fd_w;     // From lsu of lsu.v
+   wire                 ifu_tlu_flush_m;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_flush_w;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_hwint_m;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_icmiss_e;       // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_immu_miss_m;    // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_inst_vld_m;     // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_inst_vld_m_bf1; // From lsu of lsu.v
+   wire                 ifu_tlu_inst_vld_w;     // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_itlb_done;      // From ifu of sparc_ifu.v
+   wire [3:0]           ifu_tlu_l2imiss;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_mb_inst_e;      // From ifu of sparc_ifu.v
+   wire [48:0]          ifu_tlu_npc_m;          // From ifu of sparc_ifu.v
+   wire [48:0]          ifu_tlu_pc_m;           // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_pc_oor_e;       // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_priv_violtn_m;  // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_retry_inst_d;   // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_rsr_inst_d;     // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_rstint_m;       // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_sftint_m;       // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_sir_inst_m;     // From ifu of sparc_ifu.v
+   wire [6:0]           ifu_tlu_sraddr_d;       // From ifu of sparc_ifu.v
+   wire [6:0]           ifu_tlu_sraddr_d_v2;    // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_tlu_thrid_d;        // From ifu of sparc_ifu.v
+   wire [1:0]           ifu_tlu_thrid_e;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_trap_m;         // From ifu of sparc_ifu.v
+   wire [8:0]           ifu_tlu_ttype_m;        // From ifu of sparc_ifu.v
+   wire                 ifu_tlu_ttype_vld_m;    // From ifu of sparc_ifu.v
+   wire [7:0]           lsu_asi_reg0;           // From lsu of lsu.v
+   wire [7:0]           lsu_asi_reg1;           // From lsu of lsu.v
+   wire [7:0]           lsu_asi_reg2;           // From lsu of lsu.v
+   wire [7:0]           lsu_asi_reg3;           // From lsu of lsu.v
+   wire [7:0]           lsu_asi_state;          // From lsu of lsu.v
+   wire [3:0]           lsu_dmmu_sfsr_trp_wr;   // From lsu of lsu.v
+   wire [23:0]          lsu_dsfsr_din_g;        // From lsu of lsu.v
+   wire                 lsu_exu_flush_pipe_w;   // From lsu of lsu.v
+   wire [63:0]          lsu_exu_ldxa_data_g;    // From tlu of tlu.v
+   wire                 lsu_exu_ldxa_m;         // From tlu of tlu.v
+   wire [4:0]           lsu_exu_rd_m;           // From lsu of lsu.v
+   wire                 lsu_exu_st_dtlb_perr_g; // From lsu of lsu.v
+   wire [1:0]           lsu_exu_thr_m;          // From lsu of lsu.v
+   wire                 lsu_ffu_ack;            // From lsu of lsu.v
+   wire [2:0]           lsu_ffu_bld_cnt_w;      // From lsu of lsu.v
+   wire                 lsu_ffu_blk_asi_e;      // From lsu of lsu.v
+   wire                 lsu_ffu_flush_pipe_w;   // From lsu of lsu.v
+   wire [63:0]          lsu_ffu_ld_data;        // From lsu of lsu.v
+   wire                 lsu_ffu_ld_vld;         // From lsu of lsu.v
+   wire                 lsu_ffu_stb_full0;      // From lsu of lsu.v
+   wire                 lsu_ffu_stb_full1;      // From lsu of lsu.v
+   wire                 lsu_ffu_stb_full2;      // From lsu of lsu.v
+   wire                 lsu_ffu_stb_full3;      // From lsu of lsu.v
+   wire [3:0]           lsu_ictag_mrgn;         // From lsu of lsu.v
+   wire [17:0]          lsu_ifu_asi_addr;       // From lsu of lsu.v
+   wire                 lsu_ifu_asi_load;       // From lsu of lsu.v
+   wire [7:0]           lsu_ifu_asi_state;      // From lsu of lsu.v
+   wire [1:0]           lsu_ifu_asi_thrid;      // From lsu of lsu.v
+   wire                 lsu_ifu_asi_vld;        // From lsu of lsu.v
+   wire [`CPX_VLD-1:0]  lsu_ifu_cpxpkt_i1;      // From lsu of lsu.v
+   wire                 lsu_ifu_cpxpkt_vld_i1;  // From lsu of lsu.v
+   wire                 lsu_ifu_dc_parity_error_w2;// From lsu of lsu.v
+   wire                 lsu_ifu_dcache_data_perror;// From lsu of lsu.v
+   wire                 lsu_ifu_dcache_tag_perror;// From lsu of lsu.v
+   wire                 lsu_ifu_direct_map_l1;  // From lsu of lsu.v
+   wire [47:4]          lsu_ifu_err_addr;       // From lsu of lsu.v
+   wire [1:0]           lsu_ifu_error_tid;      // From lsu of lsu.v
+   wire                 lsu_ifu_flush_pipe_w;   // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_icache_en;      // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_inj_ack;        // From tlu of tlu.v
+   wire                 lsu_ifu_io_error;       // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_itlb_en;        // From lsu of lsu.v
+   wire                 lsu_ifu_l2_corr_error;  // From lsu of lsu.v
+   wire                 lsu_ifu_l2_unc_error;   // From lsu of lsu.v
+   wire [11:5]          lsu_ifu_ld_icache_index;// From lsu of lsu.v
+   wire [1:0]           lsu_ifu_ld_pcxpkt_tid;  // From lsu of lsu.v
+   wire                 lsu_ifu_ld_pcxpkt_vld;  // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_ldst_cmplt;     // From lsu of lsu.v
+   wire                 lsu_ifu_ldsta_internal_e;// From lsu of lsu.v
+   wire                 lsu_ifu_pcxpkt_ack_d;   // From lsu of lsu.v
+   wire                 lsu_ifu_stallreq;       // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_stbcnt0;        // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_stbcnt1;        // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_stbcnt2;        // From lsu of lsu.v
+   wire [3:0]           lsu_ifu_stbcnt3;        // From lsu of lsu.v
+   wire                 lsu_ifu_tlb_data_su;    // From lsu of lsu.v
+   wire [7:0]           lsu_itlb_mrgn;          // From lsu of lsu.v
+   wire [3:0]           lsu_mamem_mrgn;         // From lsu of lsu.v
+   wire                 lsu_mmu_defr_trp_taken_g;// From lsu of lsu.v
+   wire                 lsu_mmu_flush_pipe_w;   // From lsu of lsu.v
+   wire [63:0]          lsu_mmu_rs3_data_g;     // From lsu of lsu.v
+   wire [2:0]           lsu_pid_state0;         // From lsu of lsu.v
+   wire [2:0]           lsu_pid_state1;         // From lsu of lsu.v
+   wire [2:0]           lsu_pid_state2;         // From lsu of lsu.v
+   wire [2:0]           lsu_pid_state3;         // From lsu of lsu.v
+   wire [7:0]           lsu_spu_asi_state_e;    // From lsu of lsu.v
+   wire                 lsu_spu_early_flush_g;  // From lsu of lsu.v
+   wire                 lsu_spu_ldst_ack;       // From lsu of lsu.v
+   wire [3:0]           lsu_spu_stb_empty;      // From lsu of lsu.v
+   wire [1:0]           lsu_spu_strm_ack_cmplt; // From lsu of lsu.v
+   wire [15:0]          lsu_sscan_data;         // From lsu of lsu.v
+   wire [1:0]           lsu_tlu_async_tid_w2;   // From lsu of lsu.v
+   wire                 lsu_tlu_async_ttype_vld_w2;// From lsu of lsu.v
+   wire [6:0]           lsu_tlu_async_ttype_w2; // From lsu of lsu.v
+   wire [3:0]           lsu_tlu_cpx_req;        // From lsu of lsu.v
+   wire                 lsu_tlu_cpx_vld;        // From lsu of lsu.v
+   wire                 lsu_tlu_daccess_excptn_g;// From lsu of lsu.v
+   wire                 lsu_tlu_daccess_prot_g; // From lsu of lsu.v
+   wire [3:0]           lsu_tlu_dcache_miss_w2; // From lsu of lsu.v
+   wire                 lsu_tlu_defr_trp_taken_g;// From lsu of lsu.v
+   wire                 lsu_tlu_dmmu_miss_g;    // From lsu of lsu.v
+   wire [12:0]          lsu_tlu_dside_ctxt_m;   // From lsu of lsu.v
+   wire                 lsu_tlu_dtlb_done;      // From lsu of lsu.v
+   wire                 lsu_tlu_early_flush2_w; // From lsu of lsu.v
+   wire                 lsu_tlu_early_flush_w;  // From lsu of lsu.v
+   wire [17:0]          lsu_tlu_intpkt;         // From lsu of lsu.v
+   wire [3:0]           lsu_tlu_l2_dmiss;       // From lsu of lsu.v
+   wire                 lsu_tlu_nucleus_ctxt_m; // From lsu of lsu.v
+   wire [12:0]          lsu_tlu_pctxt_m;        // From lsu of lsu.v
+   wire                 lsu_tlu_pcxpkt_ack;     // From lsu of lsu.v
+   wire                 lsu_tlu_priv_action_g;  // From lsu of lsu.v
+   wire [63:0]          lsu_tlu_rs3_data_g;     // From lsu of lsu.v
+   wire [7:0]           lsu_tlu_rsr_data_e;     // From lsu of lsu.v
+   wire                 lsu_tlu_squash_va_oor_m;// From lsu of lsu.v
+   wire [3:0]           lsu_tlu_stb_full_w2;    // From lsu of lsu.v
+   wire [1:0]           lsu_tlu_thrid_d;        // From lsu of lsu.v
+   wire [1:0]           lsu_tlu_tlb_access_tid_m;// From lsu of lsu.v
+   wire [7:0]           lsu_tlu_tlb_asi_state_m;// From lsu of lsu.v
+   wire [47:13]         lsu_tlu_tlb_dmp_va_m;   // From lsu of lsu.v
+   wire                 lsu_tlu_tlb_ld_inst_m;  // From lsu of lsu.v
+   wire [10:0]          lsu_tlu_tlb_ldst_va_m;  // From lsu of lsu.v
+   wire                 lsu_tlu_tlb_st_inst_m;  // From lsu of lsu.v
+   wire [2:0]           lsu_tlu_tte_pg_sz_g;    // From lsu of lsu.v
+   wire [8:0]           lsu_tlu_ttype_m2;       // From lsu of lsu.v
+   wire                 lsu_tlu_ttype_vld_m2;   // From lsu of lsu.v
+   wire                 lsu_tlu_wsr_inst_e;     // From lsu of lsu.v
+   wire                 lsu_tlu_wtchpt_trp_g;   // From lsu of lsu.v
+   wire                 mbist_bisi_mode;        // From test_stub of test_stub_bist.v
+   wire [71:0]          mbist_dcache_data_in;   // From lsu of lsu.v
+   wire                 mbist_dcache_fail;      // From ifu of sparc_ifu.v
+   wire [6:0]           mbist_dcache_index;     // From ifu of sparc_ifu.v
+   wire                 mbist_dcache_read;      // From ifu of sparc_ifu.v
+   wire [1:0]           mbist_dcache_way;       // From ifu of sparc_ifu.v
+   wire                 mbist_dcache_word;      // From ifu of sparc_ifu.v
+   wire                 mbist_dcache_write;     // From ifu of sparc_ifu.v
+   wire                 mbist_done;             // From ifu of sparc_ifu.v
+   wire                 mbist_icache_fail;      // From ifu of sparc_ifu.v
+   wire                 mbist_loop_mode;        // From test_stub of test_stub_bist.v
+   wire                 mbist_loop_on_addr;     // From test_stub of test_stub_bist.v
+   wire                 mbist_start;            // From test_stub of test_stub_bist.v
+   wire                 mbist_stop_on_fail;     // From test_stub of test_stub_bist.v
+   wire                 mbist_stop_on_next_fail;// From test_stub of test_stub_bist.v
+   wire                 mbist_userdata_mode;    // From test_stub of test_stub_bist.v
+   wire [7:0]           mbist_write_data;       // From ifu of sparc_ifu.v
+   wire                 mem_bypass;             // From test_stub of test_stub_bist.v
+   wire                 mem_write_disable;      // From test_stub of test_stub_bist.v
+   wire [63:0]          mul_data_out;           // From mul of sparc_mul_top.v
+   wire                 mul_exu_ack;            // From mul of sparc_mul_top.v
+   wire                 mul_spu_ack;            // From mul of sparc_mul_top.v
+   wire                 mul_spu_shf_ack;        // From mul of sparc_mul_top.v
+   wire                 mux_drive_disable;      // From test_stub of test_stub_bist.v
+   wire                 rclk;                   // From spc_hdr of bw_clk_cl_sparc_cmp.v
+   wire                 se;                     // From test_stub of test_stub_bist.v
+   wire                 sehold;                 // From test_stub of test_stub_bist.v
+   wire                 spc_dbginit_l;          // From spc_hdr of bw_clk_cl_sparc_cmp.v
+   wire                 spc_grst_l;             // From spc_hdr of bw_clk_cl_sparc_cmp.v
+   wire                 spu_ifu_corr_err_w2;    // From spu of spu.v
+   wire [39:4]          spu_ifu_err_addr_w2;    // From spu of spu.v
+   wire                 spu_ifu_int_w2;         // From spu of spu.v
+   wire                 spu_ifu_mamem_err_w1;   // From spu of spu.v
+   wire [1:0]           spu_ifu_ttype_tid_w2;   // From spu of spu.v
+   wire                 spu_ifu_ttype_vld_w2;   // From spu of spu.v
+   wire                 spu_ifu_ttype_w2;       // From spu of spu.v
+   wire                 spu_ifu_unc_err_w1;     // From spu of spu.v
+   wire [123:0]         spu_lsu_ldst_pckt;      // From spu of spu.v
+   wire                 spu_lsu_ldxa_data_vld_w2;// From spu of spu.v
+   wire [63:0]          spu_lsu_ldxa_data_w2;   // From spu of spu.v
+   wire                 spu_lsu_ldxa_illgl_va_w2;// From spu of spu.v
+   wire [1:0]           spu_lsu_ldxa_tid_w2;    // From spu of spu.v
+   wire                 spu_lsu_stxa_ack;       // From spu of spu.v
+   wire [1:0]           spu_lsu_stxa_ack_tid;   // From spu of spu.v
+   wire                 spu_lsu_unc_error_w2;   // From spu of spu.v
+   wire                 spu_mul_acc;            // From spu of spu.v
+   wire                 spu_mul_areg_rst;       // From spu of spu.v
+   wire                 spu_mul_areg_shf;       // From spu of spu.v
+   wire                 spu_mul_mulres_lshft;   // From spu of spu.v
+   wire [63:0]          spu_mul_op1_data;       // From spu of spu.v
+   wire [63:0]          spu_mul_op2_data;       // From spu of spu.v
+   wire                 spu_mul_req_vld;        // From spu of spu.v
+   wire                 testmode_l;             // From test_stub of test_stub_bist.v
+   wire                 tlu_dtlb_data_rd_g;     // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_actxt_g;   // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_all_g;     // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_nctxt_g;   // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_pctxt_g;   // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_sctxt_g;   // From tlu of tlu.v
+   wire                 tlu_dtlb_dmp_vld_g;     // From tlu of tlu.v
+   wire                 tlu_dtlb_invalidate_all_g;// From tlu of tlu.v
+   wire [5:0]           tlu_dtlb_rw_index_g;    // From tlu of tlu.v
+   wire                 tlu_dtlb_rw_index_vld_g;// From tlu of tlu.v
+   wire                 tlu_dtlb_tag_rd_g;      // From tlu of tlu.v
+   wire [42:0]          tlu_dtlb_tte_data_w2;   // From tlu of tlu.v
+   wire [58:0]          tlu_dtlb_tte_tag_w2;    // From tlu of tlu.v
+   wire                 tlu_early_flush_pipe2_w;// From tlu of tlu.v
+   wire                 tlu_early_flush_pipe_w; // From tlu of tlu.v
+   wire [`TSA_GLOBAL_WIDTH-1:0]tlu_exu_agp;     // From tlu of tlu.v
+   wire                 tlu_exu_agp_swap;       // From tlu of tlu.v
+   wire [1:0]           tlu_exu_agp_tid;        // From tlu of tlu.v
+   wire [7:0]           tlu_exu_ccr_m;          // From tlu of tlu.v
+   wire [2:0]           tlu_exu_cwp_m;          // From tlu of tlu.v
+   wire                 tlu_exu_cwp_retry_m;    // From tlu of tlu.v
+   wire                 tlu_exu_cwpccr_update_m;// From tlu of tlu.v
+   wire                 tlu_exu_early_flush_pipe_w;// From tlu of tlu.v
+   wire                 tlu_exu_pic_onebelow_m; // From tlu of tlu.v
+   wire                 tlu_exu_pic_twobelow_m; // From tlu of tlu.v
+   wire                 tlu_exu_priv_trap_m;    // From tlu of tlu.v
+   wire [`TLU_ASR_DATA_WIDTH-1:0]tlu_exu_rsr_data_m;// From tlu of tlu.v
+   wire [40:0]          tlu_idtlb_dmp_key_g;    // From tlu of tlu.v
+   wire [1:0]           tlu_idtlb_dmp_thrid_g;  // From tlu of tlu.v
+   wire [3:0]           tlu_ifu_hwint_i3;       // From tlu of tlu.v
+   wire                 tlu_ifu_nukeint_i2;     // From tlu of tlu.v
+   wire [3:0]           tlu_ifu_pstate_ie;      // From tlu of tlu.v
+   wire [3:0]           tlu_ifu_pstate_pef;     // From tlu of tlu.v
+   wire                 tlu_ifu_resumint_i2;    // From tlu of tlu.v
+   wire                 tlu_ifu_rstint_i2;      // From tlu of tlu.v
+   wire [3:0]           tlu_ifu_rstthr_i2;      // From tlu of tlu.v
+   wire [1:0]           tlu_ifu_trap_tid_w1;    // From tlu of tlu.v
+   wire                 tlu_ifu_trapnpc_vld_w1; // From tlu of tlu.v
+   wire [48:0]          tlu_ifu_trapnpc_w2;     // From tlu of tlu.v
+   wire                 tlu_ifu_trappc_vld_w1;  // From tlu of tlu.v
+   wire [48:0]          tlu_ifu_trappc_w2;      // From tlu of tlu.v
+   wire                 tlu_itlb_data_rd_g;     // From tlu of tlu.v
+   wire                 tlu_itlb_dmp_actxt_g;   // From tlu of tlu.v
+   wire                 tlu_itlb_dmp_all_g;     // From tlu of tlu.v
+   wire                 tlu_itlb_dmp_nctxt_g;   // From tlu of tlu.v
+   wire                 tlu_itlb_dmp_vld_g;     // From tlu of tlu.v
+   wire                 tlu_itlb_invalidate_all_g;// From tlu of tlu.v
+   wire [5:0]           tlu_itlb_rw_index_g;    // From tlu of tlu.v
+   wire                 tlu_itlb_rw_index_vld_g;// From tlu of tlu.v
+   wire                 tlu_itlb_tag_rd_g;      // From tlu of tlu.v
+   wire [42:0]          tlu_itlb_tte_data_w2;   // From tlu of tlu.v
+   wire [58:0]          tlu_itlb_tte_tag_w2;    // From tlu of tlu.v
+   wire                 tlu_itlb_wr_vld_g;      // From tlu of tlu.v
+   wire [7:0]           tlu_lsu_asi_m;          // From tlu of tlu.v
+   wire                 tlu_lsu_asi_update_m;   // From tlu of tlu.v
+   wire [63:0]          tlu_lsu_int_ldxa_data_w2;// From tlu of tlu.v
+   wire                 tlu_lsu_int_ldxa_vld_w2;// From tlu of tlu.v
+   wire                 tlu_lsu_ldxa_async_data_vld;// From tlu of tlu.v
+   wire [1:0]           tlu_lsu_ldxa_tid_w2;    // From tlu of tlu.v
+   wire [25:0]          tlu_lsu_pcxpkt;         // From tlu of tlu.v
+   wire                 tlu_lsu_priv_trap_m;    // From tlu of tlu.v
+   wire [3:0]           tlu_lsu_pstate_am;      // From tlu of tlu.v
+   wire [3:0]           tlu_lsu_pstate_cle;     // From tlu of tlu.v
+   wire [3:0]           tlu_lsu_pstate_priv;    // From tlu of tlu.v
+   wire [3:0]           tlu_lsu_redmode;        // From tlu of tlu.v
+   wire [3:0]           tlu_lsu_redmode_rst_d1; // From tlu of tlu.v
+   wire                 tlu_lsu_stxa_ack;       // From tlu of tlu.v
+   wire [1:0]           tlu_lsu_stxa_ack_tid;   // From tlu of tlu.v
+   wire [1:0]           tlu_lsu_tid_m;          // From tlu of tlu.v
+   wire [`TLU_THRD_NUM-1:0]tlu_lsu_tl_zero;     // From tlu of tlu.v
+   // End of automatics
+
+   wire                 cpx_spc_data_cx3_b0;    // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b100;  // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b103;  // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b106;  // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b109;  // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b12;   // From ff_cpx of cpx_spc_rpt.v
+   wire [`CPX_INV_CID_HI:`CPX_INV_CID_LO]cpx_spc_data_cx3_b120to118;// From ff_cpx of cpx_spc_rpt.v
+   wire [`CPX_WIDTH-1:140]cpx_spc_data_cx3_b144to140;// From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b16;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b20;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b24;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b28;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b32;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b35;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b38;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b4;    // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b41;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b44;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b47;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b50;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b53;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b56;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b60;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b64;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b68;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b72;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b76;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b8;    // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b80;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b84;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b88;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b91;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b94;   // From ff_cpx of cpx_spc_rpt.v
+   wire                 cpx_spc_data_cx3_b97;   // From ff_cpx of cpx_spc_rpt.v
+   wire			lsu_ffu_st_dtlb_perr_g;
+   wire			spu_tlu_rsrv_illgl_m;
+
+
+
+   cpx_spc_buf buf_cpx (/*AUTOINST*/
+                        // Outputs
+                        .cpx_spc_data_cx2_buf(cpx_spc_data_cx2_buf[`CPX_WIDTH-1:0]),
+                        .cpx_spc_data_rdy_cx2_buf(cpx_spc_data_rdy_cx2_buf),
+                        // Inputs
+                        .cpx_spc_data_cx2(cpx_spc_data_cx2[`CPX_WIDTH-1:0]),
+                        .cpx_spc_data_rdy_cx2(cpx_spc_data_rdy_cx2));
+
+/*   cpx_spc_rpt AUTO_TEMPLATE  (
+                       .cpx_spc_data_cx2(cpx_spc_data_cx2_buf[`CPX_WIDTH-1:0]),
+                       .cpx_spc_data_rdy_cx2(cpx_spc_data_rdy_cx2_buf),
+                        .si             (short_scan0_6),
+                        .so             (scan0_1));
+*/
+
+   cpx_spc_rpt ff_cpx  (
+                         .cpx_spc_data_cx3_b144to140(cpx_spc_data_cx3_b144to140[`CPX_WIDTH-1:140]),
+                         .cpx_spc_data_cx3_b120to118(cpx_spc_data_cx3_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO]),
+                         .cpx_spc_data_cx3_b0(cpx_spc_data_cx3_b0),
+                         .cpx_spc_data_cx3_b4(cpx_spc_data_cx3_b4),
+                         .cpx_spc_data_cx3_b8(cpx_spc_data_cx3_b8),
+                         .cpx_spc_data_cx3_b12(cpx_spc_data_cx3_b12),
+                         .cpx_spc_data_cx3_b16(cpx_spc_data_cx3_b16),
+                         .cpx_spc_data_cx3_b20(cpx_spc_data_cx3_b20),
+                         .cpx_spc_data_cx3_b24(cpx_spc_data_cx3_b24),
+                         .cpx_spc_data_cx3_b28(cpx_spc_data_cx3_b28),
+                         .cpx_spc_data_cx3_b32(cpx_spc_data_cx3_b32),
+                         .cpx_spc_data_cx3_b35(cpx_spc_data_cx3_b35),
+                         .cpx_spc_data_cx3_b38(cpx_spc_data_cx3_b38),
+                         .cpx_spc_data_cx3_b41(cpx_spc_data_cx3_b41),
+                         .cpx_spc_data_cx3_b44(cpx_spc_data_cx3_b44),
+                         .cpx_spc_data_cx3_b47(cpx_spc_data_cx3_b47),
+                         .cpx_spc_data_cx3_b50(cpx_spc_data_cx3_b50),
+                         .cpx_spc_data_cx3_b53(cpx_spc_data_cx3_b53),
+                         .cpx_spc_data_cx3_b56(cpx_spc_data_cx3_b56),
+                         .cpx_spc_data_cx3_b60(cpx_spc_data_cx3_b60),
+                         .cpx_spc_data_cx3_b64(cpx_spc_data_cx3_b64),
+                         .cpx_spc_data_cx3_b68(cpx_spc_data_cx3_b68),
+                         .cpx_spc_data_cx3_b72(cpx_spc_data_cx3_b72),
+                         .cpx_spc_data_cx3_b76(cpx_spc_data_cx3_b76),
+                         .cpx_spc_data_cx3_b80(cpx_spc_data_cx3_b80),
+                         .cpx_spc_data_cx3_b84(cpx_spc_data_cx3_b84),
+                         .cpx_spc_data_cx3_b88(cpx_spc_data_cx3_b88),
+                         .cpx_spc_data_cx3_b91(cpx_spc_data_cx3_b91),
+                         .cpx_spc_data_cx3_b94(cpx_spc_data_cx3_b94),
+                         .cpx_spc_data_cx3_b97(cpx_spc_data_cx3_b97),
+                         .cpx_spc_data_cx3_b100(cpx_spc_data_cx3_b100),
+                         .cpx_spc_data_cx3_b103(cpx_spc_data_cx3_b103),
+                         .cpx_spc_data_cx3_b106(cpx_spc_data_cx3_b106),
+                         .cpx_spc_data_cx3_b109(cpx_spc_data_cx3_b109),
+
+                         /*AUTOINST*/
+                        // Outputs
+                        .so             (scan0_1),               // Templated
+                        .cpx_spc_data_cx3(cpx_spc_data_cx3[`CPX_WIDTH-1:0]),
+                        .cpx_spc_data_rdy_cx3(cpx_spc_data_rdy_cx3),
+                        // Inputs
+                        .rclk           (rclk),
+                        .si             (short_scan0_6),         // Templated
+                        .se             (se),
+                        .cpx_spc_data_cx2(cpx_spc_data_cx2_buf[`CPX_WIDTH-1:0]), // Templated
+                        .cpx_spc_data_rdy_cx2(cpx_spc_data_rdy_cx2_buf)); // Templated
+
+`ifdef FPGA_SYN_NO_SPU
+
+      sparc_ifu ifu(
+                 // scan
+                 .short_si0              (spc_scanin0),
+                 .short_si1              (spc_scanin1),
+                 .short_so0              (short_scan0_1),
+                 .short_so1              (short_scan1_1),
+                 .si0                    (scan0_1),
+                 .so0                    (scan0_2),
+                 // reset stuff and clk 
+                 .grst_l                 (spc_grst_l),
+                 .arst_l                 (cmp_arst_l),
+                 .gdbginit_l             (spc_dbginit_l),
+                 
+                 // quad ldst disabled
+                 .lsu_ifu_quad_asi_e    (1'b0),
+
+                 // tlb on condition changes with hypervisor
+                 // itlb_en is the bit from the lsu control register
+                 // with no additional logic
+                 .lsu_ifu_addr_real_l  (lsu_ifu_itlb_en[3:0]),
+                 
+                 // name change
+		             .lsu_ifu_dtlb_data_ue	(lsu_ifu_tlb_data_ue),
+		             .lsu_ifu_dtlb_tag_ue	(lsu_ifu_tlb_tag_ue),
+                 .lsu_ifu_dtlb_data_su  (lsu_ifu_tlb_data_su),
+
+	               .tlu_ifu_hintp_vld	    (tlu_hintp_vld[3:0]),
+	               .tlu_ifu_rerr_vld	    (tlu_rerr_vld[3:0]),
+
+		             .lsu_ifu_t0_tlz    	(tlu_lsu_tl_zero[0]),
+		             .lsu_ifu_t1_tlz    	(tlu_lsu_tl_zero[1]),
+		             .lsu_ifu_t2_tlz	    (tlu_lsu_tl_zero[2]),
+		             .lsu_ifu_t3_tlz	    (tlu_lsu_tl_zero[3]),
+                 
+                 .lsu_ifu_ldst_miss_g   (lsu_ifu_ldst_miss_w),
+                 .tlu_ifu_flush_pipe_w  (lsu_ifu_flush_pipe_w),
+
+                 .lsu_idtlb_mrgn        (lsu_itlb_mrgn[7:0]),
+
+                 .mbist_loop_on_address (mbist_loop_on_addr),
+                 
+                 .tlu_sscan_data        (tlu_sscan_test_data[62:0]),
+                 .sparc_sscan_so        (spc_sscan_so),
+                 .ifu_tlu_imm_asi_d     (ifu_tlu_imm_asi_d[8:0]),
+
+                 // bus width difference
+                 .lsu_ifu_cpxpkt_i1     ({lsu_ifu_cpxpkt_vld_i1,
+                                          lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]}),
+                 
+		             /*AUTOINST*/
+                 // Outputs
+                 .ifu_exu_addr_mask_d   (ifu_exu_addr_mask_d),
+                 .ifu_tlu_inst_vld_w    (ifu_tlu_inst_vld_w),
+                 .ifu_tlu_flush_w       (ifu_tlu_flush_w),
+                 .ifu_lsu_alt_space_e   (ifu_lsu_alt_space_e),
+                 .ifu_tlu_ttype_vld_m   (ifu_tlu_ttype_vld_m),
+                 .ifu_exu_muldivop_d    (ifu_exu_muldivop_d[4:0]),
+                 .ifu_lsu_thrid_s       (ifu_lsu_thrid_s[1:0]),
+                 .mbist_write_data      (mbist_write_data[7:0]),
+                 .ifu_exu_aluop_d       (ifu_exu_aluop_d[2:0]),
+                 .ifu_exu_casa_d        (ifu_exu_casa_d),
+                 .ifu_exu_dbrinst_d     (ifu_exu_dbrinst_d),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                 .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[7:0]),
+                 .ifu_exu_enshift_d     (ifu_exu_enshift_d),
+                 .ifu_exu_flushw_e      (ifu_exu_flushw_e),
+                 .ifu_exu_ialign_d      (ifu_exu_ialign_d),
+                 .ifu_exu_imm_data_d    (ifu_exu_imm_data_d[31:0]),
+                 .ifu_exu_inj_irferr    (ifu_exu_inj_irferr),
+                 .ifu_exu_inst_vld_e    (ifu_exu_inst_vld_e),
+                 .ifu_exu_inst_vld_w    (ifu_exu_inst_vld_w),
+                 .ifu_exu_invert_d      (ifu_exu_invert_d),
+                 .ifu_exu_kill_e        (ifu_exu_kill_e),
+                 .ifu_exu_muls_d        (ifu_exu_muls_d),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_exu_pc_d          (ifu_exu_pc_d[47:0]),
+                 .ifu_exu_pcver_e       (ifu_exu_pcver_e[63:0]),
+                 .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                 .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                 .ifu_exu_rd_d          (ifu_exu_rd_d[4:0]),
+                 .ifu_exu_rd_exusr_e    (ifu_exu_rd_exusr_e),
+                 .ifu_exu_rd_ffusr_e    (ifu_exu_rd_ffusr_e),
+                 .ifu_exu_rd_ifusr_e    (ifu_exu_rd_ifusr_e),
+                 .ifu_exu_ren1_s        (ifu_exu_ren1_s),
+                 .ifu_exu_ren2_s        (ifu_exu_ren2_s),
+                 .ifu_exu_ren3_s        (ifu_exu_ren3_s),
+                 .ifu_exu_restore_d     (ifu_exu_restore_d),
+                 .ifu_exu_restored_e    (ifu_exu_restored_e),
+                 .ifu_exu_return_d      (ifu_exu_return_d),
+                 .ifu_exu_rs1_s         (ifu_exu_rs1_s[4:0]),
+                 .ifu_exu_rs1_vld_d     (ifu_exu_rs1_vld_d),
+                 .ifu_exu_rs2_s         (ifu_exu_rs2_s[4:0]),
+                 .ifu_exu_rs2_vld_d     (ifu_exu_rs2_vld_d),
+                 .ifu_exu_rs3_s         (ifu_exu_rs3_s[4:0]),
+                 .ifu_exu_rs3e_vld_d    (ifu_exu_rs3e_vld_d),
+                 .ifu_exu_rs3o_vld_d    (ifu_exu_rs3o_vld_d),
+                 .ifu_exu_save_d        (ifu_exu_save_d),
+                 .ifu_exu_saved_e       (ifu_exu_saved_e),
+                 .ifu_exu_setcc_d       (ifu_exu_setcc_d),
+                 .ifu_exu_sethi_inst_d  (ifu_exu_sethi_inst_d),
+                 .ifu_exu_shiftop_d     (ifu_exu_shiftop_d[2:0]),
+                 .ifu_exu_tagop_d       (ifu_exu_tagop_d),
+                 .ifu_exu_tcc_e         (ifu_exu_tcc_e),
+                 .ifu_exu_tid_s2        (ifu_exu_tid_s2[1:0]),
+                 .ifu_exu_ttype_vld_m   (ifu_exu_ttype_vld_m),
+                 .ifu_exu_tv_d          (ifu_exu_tv_d),
+                 .ifu_exu_use_rsr_e_l   (ifu_exu_use_rsr_e_l),
+                 .ifu_exu_usecin_d      (ifu_exu_usecin_d),
+                 .ifu_exu_useimm_d      (ifu_exu_useimm_d),
+                 .ifu_exu_wen_d         (ifu_exu_wen_d),
+                 .ifu_exu_wsr_inst_d    (ifu_exu_wsr_inst_d),
+                 .ifu_ffu_fcc_num_d     (ifu_ffu_fcc_num_d[1:0]),
+                 .ifu_ffu_fld_d         (ifu_ffu_fld_d),
+                 .ifu_ffu_fpop1_d       (ifu_ffu_fpop1_d),
+                 .ifu_ffu_fpop2_d       (ifu_ffu_fpop2_d),
+                 .ifu_ffu_fpopcode_d    (ifu_ffu_fpopcode_d[8:0]),
+                 .ifu_ffu_frd_d         (ifu_ffu_frd_d[4:0]),
+                 .ifu_ffu_frs1_d        (ifu_ffu_frs1_d[4:0]),
+                 .ifu_ffu_frs2_d        (ifu_ffu_frs2_d[4:0]),
+                 .ifu_ffu_fst_d         (ifu_ffu_fst_d),
+                 .ifu_ffu_inj_frferr    (ifu_ffu_inj_frferr),
+                 .ifu_ffu_ldfsr_d       (ifu_ffu_ldfsr_d),
+                 .ifu_ffu_ldst_size_d   (ifu_ffu_ldst_size_d),
+                 .ifu_ffu_ldxfsr_d      (ifu_ffu_ldxfsr_d),
+                 .ifu_ffu_mvcnd_m       (ifu_ffu_mvcnd_m),
+                 .ifu_ffu_quad_op_e     (ifu_ffu_quad_op_e),
+                 .ifu_ffu_stfsr_d       (ifu_ffu_stfsr_d),
+                 .ifu_ffu_visop_d       (ifu_ffu_visop_d),
+                 .ifu_lsu_alt_space_d   (ifu_lsu_alt_space_d),
+                 .ifu_lsu_asi_ack       (ifu_lsu_asi_ack),
+                 .ifu_lsu_asi_rd_unc    (ifu_lsu_asi_rd_unc),
+                 .ifu_lsu_casa_e        (ifu_lsu_casa_e),
+                 .ifu_lsu_destid_s      (ifu_lsu_destid_s[2:0]),
+                 .ifu_lsu_error_inj     (ifu_lsu_error_inj[3:0]),
+                 .ifu_lsu_fwd_data_vld  (ifu_lsu_fwd_data_vld),
+                 .ifu_lsu_fwd_wr_ack    (ifu_lsu_fwd_wr_ack),
+                 .ifu_lsu_ibuf_busy     (ifu_lsu_ibuf_busy),
+                 .ifu_lsu_imm_asi_d     (ifu_lsu_imm_asi_d[7:0]),
+                 .ifu_lsu_imm_asi_vld_d (ifu_lsu_imm_asi_vld_d),
+                 .ifu_lsu_inv_clear     (ifu_lsu_inv_clear),
+                 .ifu_lsu_ld_inst_e     (ifu_lsu_ld_inst_e),
+                 .ifu_lsu_ldst_dbl_e    (ifu_lsu_ldst_dbl_e),
+                 .ifu_lsu_ldst_fp_e     (ifu_lsu_ldst_fp_e),
+                 .ifu_lsu_ldst_size_e   (ifu_lsu_ldst_size_e[1:0]),
+                 .ifu_lsu_ldstub_e      (ifu_lsu_ldstub_e),
+                 .ifu_lsu_ldxa_data_vld_w2(ifu_lsu_ldxa_data_vld_w2),
+                 .ifu_lsu_ldxa_data_w2  (ifu_lsu_ldxa_data_w2[63:0]),
+                 .ifu_lsu_ldxa_illgl_va_w2(ifu_lsu_ldxa_illgl_va_w2),
+                 .ifu_lsu_ldxa_tid_w2   (ifu_lsu_ldxa_tid_w2[1:0]),
+                 .ifu_lsu_memref_d      (ifu_lsu_memref_d),
+                 .ifu_lsu_nceen         (ifu_lsu_nceen[3:0]),
+                 .ifu_lsu_pcxpkt_e      (ifu_lsu_pcxpkt_e[51:0]),
+                 .ifu_lsu_pcxreq_d      (ifu_lsu_pcxreq_d),
+                 .ifu_lsu_pref_inst_e   (ifu_lsu_pref_inst_e),
+                 .ifu_lsu_rd_e          (ifu_lsu_rd_e[4:0]),
+                 .ifu_lsu_sign_ext_e    (ifu_lsu_sign_ext_e),
+                 .ifu_lsu_st_inst_e     (ifu_lsu_st_inst_e),
+                 .ifu_lsu_swap_e        (ifu_lsu_swap_e),
+                 .ifu_lsu_wsr_inst_d    (ifu_lsu_wsr_inst_d),
+                 .ifu_mmu_trap_m        (ifu_mmu_trap_m),
+                 .ifu_spu_inst_vld_w    (),
+                 .ifu_spu_nceen         (),
+                 .ifu_spu_trap_ack      (),
+                 .ifu_tlu_alt_space_d   (ifu_tlu_alt_space_d),
+                 .ifu_tlu_done_inst_d   (ifu_tlu_done_inst_d),
+                 .ifu_tlu_flsh_inst_e   (ifu_tlu_flsh_inst_e),
+                 .ifu_tlu_flush_m       (ifu_tlu_flush_m),
+                 .ifu_tlu_hwint_m       (ifu_tlu_hwint_m),
+                 .ifu_tlu_icmiss_e      (ifu_tlu_icmiss_e),
+                 .ifu_tlu_immu_miss_m   (ifu_tlu_immu_miss_m),
+                 .ifu_tlu_inst_vld_m    (ifu_tlu_inst_vld_m),
+                 .ifu_tlu_itlb_done     (ifu_tlu_itlb_done),
+                 .ifu_tlu_l2imiss       (ifu_tlu_l2imiss[3:0]),
+                 .ifu_tlu_mb_inst_e     (ifu_tlu_mb_inst_e),
+                 .ifu_tlu_npc_m         (ifu_tlu_npc_m[48:0]),
+                 .ifu_tlu_pc_m          (ifu_tlu_pc_m[48:0]),
+                 .ifu_tlu_pc_oor_e      (ifu_tlu_pc_oor_e),
+                 .ifu_tlu_priv_violtn_m (ifu_tlu_priv_violtn_m),
+                 .ifu_tlu_retry_inst_d  (ifu_tlu_retry_inst_d),
+                 .ifu_tlu_rsr_inst_d    (ifu_tlu_rsr_inst_d),
+                 .ifu_tlu_rstint_m      (ifu_tlu_rstint_m),
+                 .ifu_tlu_sftint_m      (ifu_tlu_sftint_m),
+                 .ifu_tlu_sir_inst_m    (ifu_tlu_sir_inst_m),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .ifu_tlu_sraddr_d_v2   (ifu_tlu_sraddr_d_v2[6:0]),
+                 .ifu_tlu_thrid_d       (ifu_tlu_thrid_d[1:0]),
+                 .ifu_tlu_thrid_e       (ifu_tlu_thrid_e[1:0]),
+                 .ifu_tlu_trap_m        (ifu_tlu_trap_m),
+                 .ifu_tlu_ttype_m       (ifu_tlu_ttype_m[8:0]),
+                 .mbist_dcache_fail     (mbist_dcache_fail),
+                 .mbist_dcache_index    (mbist_dcache_index[6:0]),
+                 .mbist_dcache_read     (mbist_dcache_read),
+                 .mbist_dcache_way      (mbist_dcache_way[1:0]),
+                 .mbist_dcache_word     (mbist_dcache_word),
+                 .mbist_dcache_write    (mbist_dcache_write),
+                 .mbist_done            (mbist_done),
+                 .mbist_icache_fail     (mbist_icache_fail),
+                 .spc_efc_ifuse_data    (spc_efc_ifuse_data),
+                 // Inputs
+                 .mem_write_disable     (mem_write_disable),
+                 .mux_drive_disable     (mux_drive_disable),
+                 .exu_tlu_wsr_data_m    (exu_tlu_wsr_data_m[2:0]),
+                 .lsu_ictag_mrgn        (lsu_ictag_mrgn[3:0]),
+                 .tlu_itlb_tte_tag_w2   (tlu_itlb_tte_tag_w2[58:0]),
+                 .tlu_itlb_tte_data_w2  (tlu_itlb_tte_data_w2[42:0]),
+                 .tlu_itlb_rw_index_vld_g(tlu_itlb_rw_index_vld_g),
+                 .tlu_itlb_rw_index_g   (tlu_itlb_rw_index_g[5:0]),
+                 .tlu_idtlb_dmp_key_g   (tlu_idtlb_dmp_key_g[40:0]),
+                 .tlu_itlb_dmp_all_g    (tlu_itlb_dmp_all_g),
+                 .lsu_sscan_data        (lsu_sscan_data[15:0]),
+                 .const_cpuid           (const_cpuid[3:0]),
+                 .const_maskid          (const_maskid[7:0]),
+                 .ctu_sscan_se          (ctu_sscan_se),
+                 .ctu_sscan_snap        (ctu_sscan_snap),
+                 .ctu_sscan_tid         (ctu_sscan_tid[3:0]),
+                 .ctu_tck               (ctu_tck),
+                 .efc_spc_fuse_clk1     (efc_spc_fuse_clk1),
+                 .efc_spc_fuse_clk2     (efc_spc_fuse_clk2),
+                 .efc_spc_ifuse_ashift  (efc_spc_ifuse_ashift),
+                 .efc_spc_ifuse_data    (efc_spc_ifuse_data),
+                 .efc_spc_ifuse_dshift  (efc_spc_ifuse_dshift),
+                 .exu_ifu_brpc_e        (exu_ifu_brpc_e[47:0]),
+                 .exu_ifu_cc_d          (exu_ifu_cc_d[7:0]),
+                 .exu_ifu_ecc_ce_m      (exu_ifu_ecc_ce_m),
+                 .exu_ifu_ecc_ue_m      (exu_ifu_ecc_ue_m),
+                 .exu_ifu_err_reg_m     (exu_ifu_err_reg_m[7:0]),
+                 .exu_ifu_err_synd_m    (exu_ifu_err_synd_m[7:0]),
+                 .exu_ifu_inj_ack       (exu_ifu_inj_ack),
+                 .exu_ifu_longop_done_g (exu_ifu_longop_done_g[3:0]),
+                 .exu_ifu_oddwin_s      (exu_ifu_oddwin_s[3:0]),
+                 .exu_ifu_regn_e        (exu_ifu_regn_e),
+                 .exu_ifu_regz_e        (exu_ifu_regz_e),
+                 .exu_ifu_spill_e       (exu_ifu_spill_e),
+                 .exu_ifu_va_oor_m      (exu_ifu_va_oor_m),
+                 .ffu_ifu_cc_vld_w2     (ffu_ifu_cc_vld_w2[3:0]),
+                 .ffu_ifu_cc_w2         (ffu_ifu_cc_w2[7:0]),
+                 .ffu_ifu_ecc_ce_w2     (ffu_ifu_ecc_ce_w2),
+                 .ffu_ifu_ecc_ue_w2     (ffu_ifu_ecc_ue_w2),
+                 .ffu_ifu_err_reg_w2    (ffu_ifu_err_reg_w2[5:0]),
+                 .ffu_ifu_err_synd_w2   (ffu_ifu_err_synd_w2[13:0]),
+                 .ffu_ifu_fpop_done_w2  (ffu_ifu_fpop_done_w2),
+                 .ffu_ifu_fst_ce_w      (ffu_ifu_fst_ce_w),
+                 .ffu_ifu_inj_ack       (ffu_ifu_inj_ack),
+                 .ffu_ifu_stallreq      (ffu_ifu_stallreq),
+                 .ffu_ifu_tid_w2        (ffu_ifu_tid_w2[1:0]),
+                 .lsu_ifu_asi_addr      (lsu_ifu_asi_addr[17:0]),
+                 .lsu_ifu_asi_load      (lsu_ifu_asi_load),
+                 .lsu_ifu_asi_state     (lsu_ifu_asi_state[7:0]),
+                 .lsu_ifu_asi_thrid     (lsu_ifu_asi_thrid[1:0]),
+                 .lsu_ifu_asi_vld       (lsu_ifu_asi_vld),
+                 .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+                 .lsu_ifu_dcache_data_perror(lsu_ifu_dcache_data_perror),
+                 .lsu_ifu_dcache_tag_perror(lsu_ifu_dcache_tag_perror),
+                 .lsu_ifu_direct_map_l1 (lsu_ifu_direct_map_l1),
+                 .lsu_ifu_err_addr      (lsu_ifu_err_addr[47:4]),
+                 .lsu_ifu_error_tid     (lsu_ifu_error_tid[1:0]),
+                 .lsu_ifu_icache_en     (lsu_ifu_icache_en[3:0]),
+                 .lsu_ifu_inj_ack       (lsu_ifu_inj_ack[3:0]),
+                 .lsu_ifu_io_error      (lsu_ifu_io_error),
+                 .lsu_ifu_l2_corr_error (lsu_ifu_l2_corr_error),
+                 .lsu_ifu_l2_unc_error  (lsu_ifu_l2_unc_error),
+                 .lsu_ifu_ld_icache_index(lsu_ifu_ld_icache_index[`IC_IDX_HI:5]),
+                 .lsu_ifu_ld_pcxpkt_tid (lsu_ifu_ld_pcxpkt_tid[1:0]),
+                 .lsu_ifu_ld_pcxpkt_vld (lsu_ifu_ld_pcxpkt_vld),
+                 .lsu_ifu_ldst_cmplt    (lsu_ifu_ldst_cmplt[3:0]),
+                 .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+                 .lsu_ifu_pcxpkt_ack_d  (lsu_ifu_pcxpkt_ack_d),
+                 .lsu_ifu_stallreq      (lsu_ifu_stallreq),
+                 .lsu_ifu_stbcnt0       (lsu_ifu_stbcnt0[3:0]),
+                 .lsu_ifu_stbcnt1       (lsu_ifu_stbcnt1[3:0]),
+                 .lsu_ifu_stbcnt2       (lsu_ifu_stbcnt2[3:0]),
+                 .lsu_ifu_stbcnt3       (lsu_ifu_stbcnt3[3:0]),
+                 .lsu_ifu_stxa_data     (lsu_ifu_stxa_data[47:0]),
+                 .lsu_pid_state0        (lsu_pid_state0[2:0]),
+                 .lsu_pid_state1        (lsu_pid_state1[2:0]),
+                 .lsu_pid_state2        (lsu_pid_state2[2:0]),
+                 .lsu_pid_state3        (lsu_pid_state3[2:0]),
+                 .lsu_t0_pctxt_state    (lsu_t0_pctxt_state[12:0]),
+                 .lsu_t1_pctxt_state    (lsu_t1_pctxt_state[12:0]),
+                 .lsu_t2_pctxt_state    (lsu_t2_pctxt_state[12:0]),
+                 .lsu_t3_pctxt_state    (lsu_t3_pctxt_state[12:0]),
+                 .mbist_bisi_mode       (mbist_bisi_mode),
+                 .mbist_dcache_data_in  (mbist_dcache_data_in[71:0]),
+                 .mbist_loop_mode       (mbist_loop_mode),
+                 .mbist_start           (mbist_start),
+                 .mbist_stop_on_fail    (mbist_stop_on_fail),
+                 .mbist_stop_on_next_fail(mbist_stop_on_next_fail),
+                 .mbist_userdata_mode   (mbist_userdata_mode),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .spu_ifu_corr_err_w2   (1'b0),
+                 .spu_ifu_err_addr_w2   (36'h000000000),
+                 .spu_ifu_int_w2        (1'b0),
+                 .spu_ifu_mamem_err_w1  (1'b0),
+                 .spu_ifu_ttype_tid_w2  (2'b00),
+                 .spu_ifu_ttype_vld_w2  (1'b0),
+                 .spu_ifu_ttype_w2      (1'b0),
+                 .spu_ifu_unc_err_w1    (1'b0),
+                 .testmode_l            (testmode_l),
+                 .tlu_hpstate_enb       (tlu_hpstate_enb[3:0]),
+                 .tlu_hpstate_ibe       (tlu_hpstate_ibe[3:0]),
+                 .tlu_hpstate_priv      (tlu_hpstate_priv[3:0]),
+                 .tlu_idtlb_dmp_thrid_g (tlu_idtlb_dmp_thrid_g[1:0]),
+                 .tlu_ifu_hwint_i3      (tlu_ifu_hwint_i3[3:0]),
+                 .tlu_ifu_nukeint_i2    (tlu_ifu_nukeint_i2),
+                 .tlu_ifu_pstate_ie     (tlu_ifu_pstate_ie[3:0]),
+                 .tlu_ifu_pstate_pef    (tlu_ifu_pstate_pef[3:0]),
+                 .tlu_ifu_resumint_i2   (tlu_ifu_resumint_i2),
+                 .tlu_ifu_rstint_i2     (tlu_ifu_rstint_i2),
+                 .tlu_ifu_rstthr_i2     (tlu_ifu_rstthr_i2[3:0]),
+                 .tlu_ifu_sftint_vld    (tlu_ifu_sftint_vld[3:0]),
+                 .tlu_ifu_trap_tid_w1   (tlu_ifu_trap_tid_w1[1:0]),
+                 .tlu_ifu_trapnpc_vld_w1(tlu_ifu_trapnpc_vld_w1),
+                 .tlu_ifu_trapnpc_w2    (tlu_ifu_trapnpc_w2[48:0]),
+                 .tlu_ifu_trappc_vld_w1 (tlu_ifu_trappc_vld_w1),
+                 .tlu_ifu_trappc_w2     (tlu_ifu_trappc_w2[48:0]),
+                 .tlu_itlb_data_rd_g    (tlu_itlb_data_rd_g),
+                 .tlu_itlb_dmp_actxt_g  (tlu_itlb_dmp_actxt_g),
+                 .tlu_itlb_dmp_nctxt_g  (tlu_itlb_dmp_nctxt_g),
+                 .tlu_itlb_dmp_vld_g    (tlu_itlb_dmp_vld_g),
+                 .tlu_itlb_invalidate_all_g(tlu_itlb_invalidate_all_g),
+                 .tlu_itlb_tag_rd_g     (tlu_itlb_tag_rd_g),
+                 .tlu_itlb_wr_vld_g     (tlu_itlb_wr_vld_g),
+                 .tlu_lsu_pstate_am     (tlu_lsu_pstate_am[3:0]),
+                 .tlu_lsu_pstate_priv   (tlu_lsu_pstate_priv[3:0]),
+                 .tlu_lsu_redmode       (tlu_lsu_redmode[3:0]));
+
+`else
+   
+   sparc_ifu ifu(
+                 // scan
+                 .short_si0              (spc_scanin0),
+                 .short_si1              (spc_scanin1),
+                 .short_so0              (short_scan0_1),
+                 .short_so1              (short_scan1_1),
+                 .si0                    (scan0_1),
+                 .so0                    (scan0_2),
+                 // reset stuff and clk 
+                 .grst_l                 (spc_grst_l),
+                 .arst_l                 (cmp_arst_l),
+                 .gdbginit_l             (spc_dbginit_l),
+                 
+                 // quad ldst disabled
+                 .lsu_ifu_quad_asi_e    (1'b0),
+
+                 // tlb on condition changes with hypervisor
+                 // itlb_en is the bit from the lsu control register
+                 // with no additional logic
+                 .lsu_ifu_addr_real_l  (lsu_ifu_itlb_en[3:0]),
+                 
+                 // name change
+		             .lsu_ifu_dtlb_data_ue	(lsu_ifu_tlb_data_ue),
+		             .lsu_ifu_dtlb_tag_ue	(lsu_ifu_tlb_tag_ue),
+                 .lsu_ifu_dtlb_data_su  (lsu_ifu_tlb_data_su),
+
+	               .tlu_ifu_hintp_vld	    (tlu_hintp_vld[3:0]),
+	               .tlu_ifu_rerr_vld	    (tlu_rerr_vld[3:0]),
+
+		             .lsu_ifu_t0_tlz    	(tlu_lsu_tl_zero[0]),
+		             .lsu_ifu_t1_tlz    	(tlu_lsu_tl_zero[1]),
+		             .lsu_ifu_t2_tlz	    (tlu_lsu_tl_zero[2]),
+		             .lsu_ifu_t3_tlz	    (tlu_lsu_tl_zero[3]),
+                 
+                 .lsu_ifu_ldst_miss_g   (lsu_ifu_ldst_miss_w),
+                 .tlu_ifu_flush_pipe_w  (lsu_ifu_flush_pipe_w),
+
+                 .lsu_idtlb_mrgn        (lsu_itlb_mrgn[7:0]),
+
+                 .mbist_loop_on_address (mbist_loop_on_addr),
+                 
+                 .tlu_sscan_data        (tlu_sscan_test_data[62:0]),
+                 .sparc_sscan_so        (spc_sscan_so),
+                 .ifu_tlu_imm_asi_d     (ifu_tlu_imm_asi_d[8:0]),
+
+                 // bus width difference
+                 .lsu_ifu_cpxpkt_i1     ({lsu_ifu_cpxpkt_vld_i1,
+                                          lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]}),
+                 
+		             /*AUTOINST*/
+                 // Outputs
+                 .ifu_exu_addr_mask_d   (ifu_exu_addr_mask_d),
+                 .ifu_tlu_inst_vld_w    (ifu_tlu_inst_vld_w),
+                 .ifu_tlu_flush_w       (ifu_tlu_flush_w),
+                 .ifu_lsu_alt_space_e   (ifu_lsu_alt_space_e),
+                 .ifu_tlu_ttype_vld_m   (ifu_tlu_ttype_vld_m),
+                 .ifu_exu_muldivop_d    (ifu_exu_muldivop_d[4:0]),
+                 .ifu_lsu_thrid_s       (ifu_lsu_thrid_s[1:0]),
+                 .mbist_write_data      (mbist_write_data[7:0]),
+                 .ifu_exu_aluop_d       (ifu_exu_aluop_d[2:0]),
+                 .ifu_exu_casa_d        (ifu_exu_casa_d),
+                 .ifu_exu_dbrinst_d     (ifu_exu_dbrinst_d),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                 .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[7:0]),
+                 .ifu_exu_enshift_d     (ifu_exu_enshift_d),
+                 .ifu_exu_flushw_e      (ifu_exu_flushw_e),
+                 .ifu_exu_ialign_d      (ifu_exu_ialign_d),
+                 .ifu_exu_imm_data_d    (ifu_exu_imm_data_d[31:0]),
+                 .ifu_exu_inj_irferr    (ifu_exu_inj_irferr),
+                 .ifu_exu_inst_vld_e    (ifu_exu_inst_vld_e),
+                 .ifu_exu_inst_vld_w    (ifu_exu_inst_vld_w),
+                 .ifu_exu_invert_d      (ifu_exu_invert_d),
+                 .ifu_exu_kill_e        (ifu_exu_kill_e),
+                 .ifu_exu_muls_d        (ifu_exu_muls_d),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_exu_pc_d          (ifu_exu_pc_d[47:0]),
+                 .ifu_exu_pcver_e       (ifu_exu_pcver_e[63:0]),
+                 .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                 .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                 .ifu_exu_rd_d          (ifu_exu_rd_d[4:0]),
+                 .ifu_exu_rd_exusr_e    (ifu_exu_rd_exusr_e),
+                 .ifu_exu_rd_ffusr_e    (ifu_exu_rd_ffusr_e),
+                 .ifu_exu_rd_ifusr_e    (ifu_exu_rd_ifusr_e),
+                 .ifu_exu_ren1_s        (ifu_exu_ren1_s),
+                 .ifu_exu_ren2_s        (ifu_exu_ren2_s),
+                 .ifu_exu_ren3_s        (ifu_exu_ren3_s),
+                 .ifu_exu_restore_d     (ifu_exu_restore_d),
+                 .ifu_exu_restored_e    (ifu_exu_restored_e),
+                 .ifu_exu_return_d      (ifu_exu_return_d),
+                 .ifu_exu_rs1_s         (ifu_exu_rs1_s[4:0]),
+                 .ifu_exu_rs1_vld_d     (ifu_exu_rs1_vld_d),
+                 .ifu_exu_rs2_s         (ifu_exu_rs2_s[4:0]),
+                 .ifu_exu_rs2_vld_d     (ifu_exu_rs2_vld_d),
+                 .ifu_exu_rs3_s         (ifu_exu_rs3_s[4:0]),
+                 .ifu_exu_rs3e_vld_d    (ifu_exu_rs3e_vld_d),
+                 .ifu_exu_rs3o_vld_d    (ifu_exu_rs3o_vld_d),
+                 .ifu_exu_save_d        (ifu_exu_save_d),
+                 .ifu_exu_saved_e       (ifu_exu_saved_e),
+                 .ifu_exu_setcc_d       (ifu_exu_setcc_d),
+                 .ifu_exu_sethi_inst_d  (ifu_exu_sethi_inst_d),
+                 .ifu_exu_shiftop_d     (ifu_exu_shiftop_d[2:0]),
+                 .ifu_exu_tagop_d       (ifu_exu_tagop_d),
+                 .ifu_exu_tcc_e         (ifu_exu_tcc_e),
+                 .ifu_exu_tid_s2        (ifu_exu_tid_s2[1:0]),
+                 .ifu_exu_ttype_vld_m   (ifu_exu_ttype_vld_m),
+                 .ifu_exu_tv_d          (ifu_exu_tv_d),
+                 .ifu_exu_use_rsr_e_l   (ifu_exu_use_rsr_e_l),
+                 .ifu_exu_usecin_d      (ifu_exu_usecin_d),
+                 .ifu_exu_useimm_d      (ifu_exu_useimm_d),
+                 .ifu_exu_wen_d         (ifu_exu_wen_d),
+                 .ifu_exu_wsr_inst_d    (ifu_exu_wsr_inst_d),
+                 .ifu_ffu_fcc_num_d     (ifu_ffu_fcc_num_d[1:0]),
+                 .ifu_ffu_fld_d         (ifu_ffu_fld_d),
+                 .ifu_ffu_fpop1_d       (ifu_ffu_fpop1_d),
+                 .ifu_ffu_fpop2_d       (ifu_ffu_fpop2_d),
+                 .ifu_ffu_fpopcode_d    (ifu_ffu_fpopcode_d[8:0]),
+                 .ifu_ffu_frd_d         (ifu_ffu_frd_d[4:0]),
+                 .ifu_ffu_frs1_d        (ifu_ffu_frs1_d[4:0]),
+                 .ifu_ffu_frs2_d        (ifu_ffu_frs2_d[4:0]),
+                 .ifu_ffu_fst_d         (ifu_ffu_fst_d),
+                 .ifu_ffu_inj_frferr    (ifu_ffu_inj_frferr),
+                 .ifu_ffu_ldfsr_d       (ifu_ffu_ldfsr_d),
+                 .ifu_ffu_ldst_size_d   (ifu_ffu_ldst_size_d),
+                 .ifu_ffu_ldxfsr_d      (ifu_ffu_ldxfsr_d),
+                 .ifu_ffu_mvcnd_m       (ifu_ffu_mvcnd_m),
+                 .ifu_ffu_quad_op_e     (ifu_ffu_quad_op_e),
+                 .ifu_ffu_stfsr_d       (ifu_ffu_stfsr_d),
+                 .ifu_ffu_visop_d       (ifu_ffu_visop_d),
+                 .ifu_lsu_alt_space_d   (ifu_lsu_alt_space_d),
+                 .ifu_lsu_asi_ack       (ifu_lsu_asi_ack),
+                 .ifu_lsu_asi_rd_unc    (ifu_lsu_asi_rd_unc),
+                 .ifu_lsu_casa_e        (ifu_lsu_casa_e),
+                 .ifu_lsu_destid_s      (ifu_lsu_destid_s[2:0]),
+                 .ifu_lsu_error_inj     (ifu_lsu_error_inj[3:0]),
+                 .ifu_lsu_fwd_data_vld  (ifu_lsu_fwd_data_vld),
+                 .ifu_lsu_fwd_wr_ack    (ifu_lsu_fwd_wr_ack),
+                 .ifu_lsu_ibuf_busy     (ifu_lsu_ibuf_busy),
+                 .ifu_lsu_imm_asi_d     (ifu_lsu_imm_asi_d[7:0]),
+                 .ifu_lsu_imm_asi_vld_d (ifu_lsu_imm_asi_vld_d),
+                 .ifu_lsu_inv_clear     (ifu_lsu_inv_clear),
+                 .ifu_lsu_ld_inst_e     (ifu_lsu_ld_inst_e),
+                 .ifu_lsu_ldst_dbl_e    (ifu_lsu_ldst_dbl_e),
+                 .ifu_lsu_ldst_fp_e     (ifu_lsu_ldst_fp_e),
+                 .ifu_lsu_ldst_size_e   (ifu_lsu_ldst_size_e[1:0]),
+                 .ifu_lsu_ldstub_e      (ifu_lsu_ldstub_e),
+                 .ifu_lsu_ldxa_data_vld_w2(ifu_lsu_ldxa_data_vld_w2),
+                 .ifu_lsu_ldxa_data_w2  (ifu_lsu_ldxa_data_w2[63:0]),
+                 .ifu_lsu_ldxa_illgl_va_w2(ifu_lsu_ldxa_illgl_va_w2),
+                 .ifu_lsu_ldxa_tid_w2   (ifu_lsu_ldxa_tid_w2[1:0]),
+                 .ifu_lsu_memref_d      (ifu_lsu_memref_d),
+                 .ifu_lsu_nceen         (ifu_lsu_nceen[3:0]),
+                 .ifu_lsu_pcxpkt_e      (ifu_lsu_pcxpkt_e[51:0]),
+                 .ifu_lsu_pcxreq_d      (ifu_lsu_pcxreq_d),
+                 .ifu_lsu_pref_inst_e   (ifu_lsu_pref_inst_e),
+                 .ifu_lsu_rd_e          (ifu_lsu_rd_e[4:0]),
+                 .ifu_lsu_sign_ext_e    (ifu_lsu_sign_ext_e),
+                 .ifu_lsu_st_inst_e     (ifu_lsu_st_inst_e),
+                 .ifu_lsu_swap_e        (ifu_lsu_swap_e),
+                 .ifu_lsu_wsr_inst_d    (ifu_lsu_wsr_inst_d),
+                 .ifu_mmu_trap_m        (ifu_mmu_trap_m),
+                 .ifu_spu_inst_vld_w    (ifu_spu_inst_vld_w),
+                 .ifu_spu_nceen         (ifu_spu_nceen[3:0]),
+                 .ifu_spu_trap_ack      (ifu_spu_trap_ack),
+                 .ifu_tlu_alt_space_d   (ifu_tlu_alt_space_d),
+                 .ifu_tlu_done_inst_d   (ifu_tlu_done_inst_d),
+                 .ifu_tlu_flsh_inst_e   (ifu_tlu_flsh_inst_e),
+                 .ifu_tlu_flush_m       (ifu_tlu_flush_m),
+                 .ifu_tlu_hwint_m       (ifu_tlu_hwint_m),
+                 .ifu_tlu_icmiss_e      (ifu_tlu_icmiss_e),
+                 .ifu_tlu_immu_miss_m   (ifu_tlu_immu_miss_m),
+                 .ifu_tlu_inst_vld_m    (ifu_tlu_inst_vld_m),
+                 .ifu_tlu_itlb_done     (ifu_tlu_itlb_done),
+                 .ifu_tlu_l2imiss       (ifu_tlu_l2imiss[3:0]),
+                 .ifu_tlu_mb_inst_e     (ifu_tlu_mb_inst_e),
+                 .ifu_tlu_npc_m         (ifu_tlu_npc_m[48:0]),
+                 .ifu_tlu_pc_m          (ifu_tlu_pc_m[48:0]),
+                 .ifu_tlu_pc_oor_e      (ifu_tlu_pc_oor_e),
+                 .ifu_tlu_priv_violtn_m (ifu_tlu_priv_violtn_m),
+                 .ifu_tlu_retry_inst_d  (ifu_tlu_retry_inst_d),
+                 .ifu_tlu_rsr_inst_d    (ifu_tlu_rsr_inst_d),
+                 .ifu_tlu_rstint_m      (ifu_tlu_rstint_m),
+                 .ifu_tlu_sftint_m      (ifu_tlu_sftint_m),
+                 .ifu_tlu_sir_inst_m    (ifu_tlu_sir_inst_m),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .ifu_tlu_sraddr_d_v2   (ifu_tlu_sraddr_d_v2[6:0]),
+                 .ifu_tlu_thrid_d       (ifu_tlu_thrid_d[1:0]),
+                 .ifu_tlu_thrid_e       (ifu_tlu_thrid_e[1:0]),
+                 .ifu_tlu_trap_m        (ifu_tlu_trap_m),
+                 .ifu_tlu_ttype_m       (ifu_tlu_ttype_m[8:0]),
+                 .mbist_dcache_fail     (mbist_dcache_fail),
+                 .mbist_dcache_index    (mbist_dcache_index[6:0]),
+                 .mbist_dcache_read     (mbist_dcache_read),
+                 .mbist_dcache_way      (mbist_dcache_way[1:0]),
+                 .mbist_dcache_word     (mbist_dcache_word),
+                 .mbist_dcache_write    (mbist_dcache_write),
+                 .mbist_done            (mbist_done),
+                 .mbist_icache_fail     (mbist_icache_fail),
+                 .spc_efc_ifuse_data    (spc_efc_ifuse_data),
+                 // Inputs
+                 .mem_write_disable     (mem_write_disable),
+                 .mux_drive_disable     (mux_drive_disable),
+                 .exu_tlu_wsr_data_m    (exu_tlu_wsr_data_m[2:0]),
+                 .lsu_ictag_mrgn        (lsu_ictag_mrgn[3:0]),
+                 .tlu_itlb_tte_tag_w2   (tlu_itlb_tte_tag_w2[58:0]),
+                 .tlu_itlb_tte_data_w2  (tlu_itlb_tte_data_w2[42:0]),
+                 .tlu_itlb_rw_index_vld_g(tlu_itlb_rw_index_vld_g),
+                 .tlu_itlb_rw_index_g   (tlu_itlb_rw_index_g[5:0]),
+                 .tlu_idtlb_dmp_key_g   (tlu_idtlb_dmp_key_g[40:0]),
+                 .tlu_itlb_dmp_all_g    (tlu_itlb_dmp_all_g),
+                 .lsu_sscan_data        (lsu_sscan_data[15:0]),
+                 .const_cpuid           (const_cpuid[3:0]),
+                 .const_maskid          (const_maskid[7:0]),
+                 .ctu_sscan_se          (ctu_sscan_se),
+                 .ctu_sscan_snap        (ctu_sscan_snap),
+                 .ctu_sscan_tid         (ctu_sscan_tid[3:0]),
+                 .ctu_tck               (ctu_tck),
+                 .efc_spc_fuse_clk1     (efc_spc_fuse_clk1),
+                 .efc_spc_fuse_clk2     (efc_spc_fuse_clk2),
+                 .efc_spc_ifuse_ashift  (efc_spc_ifuse_ashift),
+                 .efc_spc_ifuse_data    (efc_spc_ifuse_data),
+                 .efc_spc_ifuse_dshift  (efc_spc_ifuse_dshift),
+                 .exu_ifu_brpc_e        (exu_ifu_brpc_e[47:0]),
+                 .exu_ifu_cc_d          (exu_ifu_cc_d[7:0]),
+                 .exu_ifu_ecc_ce_m      (exu_ifu_ecc_ce_m),
+                 .exu_ifu_ecc_ue_m      (exu_ifu_ecc_ue_m),
+                 .exu_ifu_err_reg_m     (exu_ifu_err_reg_m[7:0]),
+                 .exu_ifu_err_synd_m    (exu_ifu_err_synd_m[7:0]),
+                 .exu_ifu_inj_ack       (exu_ifu_inj_ack),
+                 .exu_ifu_longop_done_g (exu_ifu_longop_done_g[3:0]),
+                 .exu_ifu_oddwin_s      (exu_ifu_oddwin_s[3:0]),
+                 .exu_ifu_regn_e        (exu_ifu_regn_e),
+                 .exu_ifu_regz_e        (exu_ifu_regz_e),
+                 .exu_ifu_spill_e       (exu_ifu_spill_e),
+                 .exu_ifu_va_oor_m      (exu_ifu_va_oor_m),
+                 .ffu_ifu_cc_vld_w2     (ffu_ifu_cc_vld_w2[3:0]),
+                 .ffu_ifu_cc_w2         (ffu_ifu_cc_w2[7:0]),
+                 .ffu_ifu_ecc_ce_w2     (ffu_ifu_ecc_ce_w2),
+                 .ffu_ifu_ecc_ue_w2     (ffu_ifu_ecc_ue_w2),
+                 .ffu_ifu_err_reg_w2    (ffu_ifu_err_reg_w2[5:0]),
+                 .ffu_ifu_err_synd_w2   (ffu_ifu_err_synd_w2[13:0]),
+                 .ffu_ifu_fpop_done_w2  (ffu_ifu_fpop_done_w2),
+                 .ffu_ifu_fst_ce_w      (ffu_ifu_fst_ce_w),
+                 .ffu_ifu_inj_ack       (ffu_ifu_inj_ack),
+                 .ffu_ifu_stallreq      (ffu_ifu_stallreq),
+                 .ffu_ifu_tid_w2        (ffu_ifu_tid_w2[1:0]),
+                 .lsu_ifu_asi_addr      (lsu_ifu_asi_addr[17:0]),
+                 .lsu_ifu_asi_load      (lsu_ifu_asi_load),
+                 .lsu_ifu_asi_state     (lsu_ifu_asi_state[7:0]),
+                 .lsu_ifu_asi_thrid     (lsu_ifu_asi_thrid[1:0]),
+                 .lsu_ifu_asi_vld       (lsu_ifu_asi_vld),
+                 .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+                 .lsu_ifu_dcache_data_perror(lsu_ifu_dcache_data_perror),
+                 .lsu_ifu_dcache_tag_perror(lsu_ifu_dcache_tag_perror),
+                 .lsu_ifu_direct_map_l1 (lsu_ifu_direct_map_l1),
+                 .lsu_ifu_err_addr      (lsu_ifu_err_addr[47:4]),
+                 .lsu_ifu_error_tid     (lsu_ifu_error_tid[1:0]),
+                 .lsu_ifu_icache_en     (lsu_ifu_icache_en[3:0]),
+                 .lsu_ifu_inj_ack       (lsu_ifu_inj_ack[3:0]),
+                 .lsu_ifu_io_error      (lsu_ifu_io_error),
+                 .lsu_ifu_l2_corr_error (lsu_ifu_l2_corr_error),
+                 .lsu_ifu_l2_unc_error  (lsu_ifu_l2_unc_error),
+                 .lsu_ifu_ld_icache_index(lsu_ifu_ld_icache_index[`IC_IDX_HI:5]),
+                 .lsu_ifu_ld_pcxpkt_tid (lsu_ifu_ld_pcxpkt_tid[1:0]),
+                 .lsu_ifu_ld_pcxpkt_vld (lsu_ifu_ld_pcxpkt_vld),
+                 .lsu_ifu_ldst_cmplt    (lsu_ifu_ldst_cmplt[3:0]),
+                 .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+                 .lsu_ifu_pcxpkt_ack_d  (lsu_ifu_pcxpkt_ack_d),
+                 .lsu_ifu_stallreq      (lsu_ifu_stallreq),
+                 .lsu_ifu_stbcnt0       (lsu_ifu_stbcnt0[3:0]),
+                 .lsu_ifu_stbcnt1       (lsu_ifu_stbcnt1[3:0]),
+                 .lsu_ifu_stbcnt2       (lsu_ifu_stbcnt2[3:0]),
+                 .lsu_ifu_stbcnt3       (lsu_ifu_stbcnt3[3:0]),
+                 .lsu_ifu_stxa_data     (lsu_ifu_stxa_data[47:0]),
+                 .lsu_pid_state0        (lsu_pid_state0[2:0]),
+                 .lsu_pid_state1        (lsu_pid_state1[2:0]),
+                 .lsu_pid_state2        (lsu_pid_state2[2:0]),
+                 .lsu_pid_state3        (lsu_pid_state3[2:0]),
+                 .lsu_t0_pctxt_state    (lsu_t0_pctxt_state[12:0]),
+                 .lsu_t1_pctxt_state    (lsu_t1_pctxt_state[12:0]),
+                 .lsu_t2_pctxt_state    (lsu_t2_pctxt_state[12:0]),
+                 .lsu_t3_pctxt_state    (lsu_t3_pctxt_state[12:0]),
+                 .mbist_bisi_mode       (mbist_bisi_mode),
+                 .mbist_dcache_data_in  (mbist_dcache_data_in[71:0]),
+                 .mbist_loop_mode       (mbist_loop_mode),
+                 .mbist_start           (mbist_start),
+                 .mbist_stop_on_fail    (mbist_stop_on_fail),
+                 .mbist_stop_on_next_fail(mbist_stop_on_next_fail),
+                 .mbist_userdata_mode   (mbist_userdata_mode),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .spu_ifu_corr_err_w2   (spu_ifu_corr_err_w2),
+                 .spu_ifu_err_addr_w2   (spu_ifu_err_addr_w2[39:4]),
+                 .spu_ifu_int_w2        (spu_ifu_int_w2),
+                 .spu_ifu_mamem_err_w1  (spu_ifu_mamem_err_w1),
+                 .spu_ifu_ttype_tid_w2  (spu_ifu_ttype_tid_w2[1:0]),
+                 .spu_ifu_ttype_vld_w2  (spu_ifu_ttype_vld_w2),
+                 .spu_ifu_ttype_w2      (spu_ifu_ttype_w2),
+                 .spu_ifu_unc_err_w1    (spu_ifu_unc_err_w1),
+                 .testmode_l            (testmode_l),
+                 .tlu_hpstate_enb       (tlu_hpstate_enb[3:0]),
+                 .tlu_hpstate_ibe       (tlu_hpstate_ibe[3:0]),
+                 .tlu_hpstate_priv      (tlu_hpstate_priv[3:0]),
+                 .tlu_idtlb_dmp_thrid_g (tlu_idtlb_dmp_thrid_g[1:0]),
+                 .tlu_ifu_hwint_i3      (tlu_ifu_hwint_i3[3:0]),
+                 .tlu_ifu_nukeint_i2    (tlu_ifu_nukeint_i2),
+                 .tlu_ifu_pstate_ie     (tlu_ifu_pstate_ie[3:0]),
+                 .tlu_ifu_pstate_pef    (tlu_ifu_pstate_pef[3:0]),
+                 .tlu_ifu_resumint_i2   (tlu_ifu_resumint_i2),
+                 .tlu_ifu_rstint_i2     (tlu_ifu_rstint_i2),
+                 .tlu_ifu_rstthr_i2     (tlu_ifu_rstthr_i2[3:0]),
+                 .tlu_ifu_sftint_vld    (tlu_ifu_sftint_vld[3:0]),
+                 .tlu_ifu_trap_tid_w1   (tlu_ifu_trap_tid_w1[1:0]),
+                 .tlu_ifu_trapnpc_vld_w1(tlu_ifu_trapnpc_vld_w1),
+                 .tlu_ifu_trapnpc_w2    (tlu_ifu_trapnpc_w2[48:0]),
+                 .tlu_ifu_trappc_vld_w1 (tlu_ifu_trappc_vld_w1),
+                 .tlu_ifu_trappc_w2     (tlu_ifu_trappc_w2[48:0]),
+                 .tlu_itlb_data_rd_g    (tlu_itlb_data_rd_g),
+                 .tlu_itlb_dmp_actxt_g  (tlu_itlb_dmp_actxt_g),
+                 .tlu_itlb_dmp_nctxt_g  (tlu_itlb_dmp_nctxt_g),
+                 .tlu_itlb_dmp_vld_g    (tlu_itlb_dmp_vld_g),
+                 .tlu_itlb_invalidate_all_g(tlu_itlb_invalidate_all_g),
+                 .tlu_itlb_tag_rd_g     (tlu_itlb_tag_rd_g),
+                 .tlu_itlb_wr_vld_g     (tlu_itlb_wr_vld_g),
+                 .tlu_lsu_pstate_am     (tlu_lsu_pstate_am[3:0]),
+                 .tlu_lsu_pstate_priv   (tlu_lsu_pstate_priv[3:0]),
+                 .tlu_lsu_redmode       (tlu_lsu_redmode[3:0]));
+
+`endif //  `ifdef FPGA_SYN_NO_SPU
+   
+
+`ifdef FPGA_SYN_NO_SPU
+
+      lsu lsu(
+           // temp - name change
+           .ifu_tlu_wsr_inst_d          (ifu_lsu_wsr_inst_d),
+	   // eco 6529 .
+	   .lsu_ffu_st_dtlb_perr_g		(lsu_ffu_st_dtlb_perr_g),
+	   // Bug 4799.
+	   .tlu_lsu_priv_trap_m		(tlu_lsu_priv_trap_m),
+
+           .short_si0              (short_scan0_1),
+           .short_si1              (short_scan1_1),
+           .short_so0              (short_scan0_2),
+           .short_so1              (short_scan1_2),
+           .si0                          (scan0_3),
+           .si1                          (short_scan1_4),
+           .so0                          (scan0_4),
+           .so1                          (scan1_1),
+           // reset stuff
+           .grst_l                       (spc_grst_l),
+           .arst_l                       (cmp_arst_l),
+           .clk                          (rclk),
+	         .lsu_exu_dfill_data_w2	(lsu_exu_dfill_data_g[63:0]),
+	         .lsu_exu_dfill_vld_w2	(lsu_exu_dfill_vld_g),
+	         .lsu_exu_ldst_miss_w2	(lsu_exu_ldst_miss_g2),
+           //.cpx_spc_data_cx             (cpx_spc_data_cx3[`CPX_WIDTH-1:0]),
+           .cpx_spc_data_cx             ({cpx_spc_data_cx3_b144to140[`CPX_WIDTH-1:140],
+                                          cpx_spc_data_cx3[139:121],
+                                          cpx_spc_data_cx3_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO],
+                                          cpx_spc_data_cx3[117:110],
+                                          cpx_spc_data_cx3_b109,
+                                          cpx_spc_data_cx3[108:107],
+                                          cpx_spc_data_cx3_b106,
+                                          cpx_spc_data_cx3[105:104],
+                                          cpx_spc_data_cx3_b103,
+                                          cpx_spc_data_cx3[102:101],
+                                          cpx_spc_data_cx3_b100,
+                                          cpx_spc_data_cx3[99:98],
+                                          cpx_spc_data_cx3_b97,
+                                          cpx_spc_data_cx3[96:95],
+                                          cpx_spc_data_cx3_b94,
+                                          cpx_spc_data_cx3[93:92],
+                                          cpx_spc_data_cx3_b91,
+                                          cpx_spc_data_cx3[90:89],
+                                          cpx_spc_data_cx3_b88,
+                                          cpx_spc_data_cx3[87:85],
+                                          cpx_spc_data_cx3_b84,
+                                          cpx_spc_data_cx3[83:81],
+                                          cpx_spc_data_cx3_b80,
+                                          cpx_spc_data_cx3[79:77],
+                                          cpx_spc_data_cx3_b76,
+                                          cpx_spc_data_cx3[75:73],
+                                          cpx_spc_data_cx3_b72,
+                                          cpx_spc_data_cx3[71:69],
+                                          cpx_spc_data_cx3_b68,
+                                          cpx_spc_data_cx3[67:65],
+                                          cpx_spc_data_cx3_b64,
+                                          cpx_spc_data_cx3[63:61],
+                                          cpx_spc_data_cx3_b60,
+                                          cpx_spc_data_cx3[59:57],
+                                          cpx_spc_data_cx3_b56,
+                                          cpx_spc_data_cx3[55:54],
+                                          cpx_spc_data_cx3_b53,
+                                          cpx_spc_data_cx3[52:51],
+                                          cpx_spc_data_cx3_b50,
+                                          cpx_spc_data_cx3[49:48],
+                                          cpx_spc_data_cx3_b47,
+                                          cpx_spc_data_cx3[46:45],
+                                          cpx_spc_data_cx3_b44,
+                                          cpx_spc_data_cx3[43:42],
+                                          cpx_spc_data_cx3_b41,
+                                          cpx_spc_data_cx3[40:39],
+                                          cpx_spc_data_cx3_b38,
+                                          cpx_spc_data_cx3[37:36],
+                                          cpx_spc_data_cx3_b35,
+                                          cpx_spc_data_cx3[34:33],
+                                          cpx_spc_data_cx3_b32,
+                                          cpx_spc_data_cx3[31:29],
+                                          cpx_spc_data_cx3_b28,
+                                          cpx_spc_data_cx3[27:25],
+                                          cpx_spc_data_cx3_b24,
+                                          cpx_spc_data_cx3[23:21],
+                                          cpx_spc_data_cx3_b20,
+                                          cpx_spc_data_cx3[19:17],
+                                          cpx_spc_data_cx3_b16,
+                                          cpx_spc_data_cx3[15:13],
+                                          cpx_spc_data_cx3_b12,
+                                          cpx_spc_data_cx3[11:9],
+                                          cpx_spc_data_cx3_b8,
+                                          cpx_spc_data_cx3[7:5],
+                                          cpx_spc_data_cx3_b4,
+                                          cpx_spc_data_cx3[3:1],
+                                          cpx_spc_data_cx3_b0}),
+           .exu_tlu_wsr_data_m          (exu_tlu_wsr_data_m[7:0]),
+           
+	   // Hypervisor related
+      	   .tlu_lsu_hpv_priv      	(tlu_hpstate_priv[3:0]),
+           .tlu_lsu_hpstate_en     	(tlu_hpstate_enb[3:0]),
+
+	         .spu_lsu_int_w2		(1'b0),
+           .gdbginit_l             	(spc_dbginit_l),
+           /*AUTOINST*/
+           // Outputs
+           .bist_ctl_reg_in             (bist_ctl_reg_in[6:0]),
+           .bist_ctl_reg_wr_en          (bist_ctl_reg_wr_en),
+           .ifu_tlu_flush_fd2_w         (ifu_tlu_flush_fd2_w),
+           .ifu_tlu_flush_fd3_w         (ifu_tlu_flush_fd3_w),
+           .ifu_tlu_flush_fd_w          (ifu_tlu_flush_fd_w),
+           .lsu_asi_reg0                (lsu_asi_reg0[7:0]),
+           .lsu_asi_reg1                (lsu_asi_reg1[7:0]),
+           .lsu_asi_reg2                (lsu_asi_reg2[7:0]),
+           .lsu_asi_reg3                (lsu_asi_reg3[7:0]),
+           .lsu_dmmu_sfsr_trp_wr        (lsu_dmmu_sfsr_trp_wr[3:0]),
+           .lsu_dsfsr_din_g             (lsu_dsfsr_din_g[23:0]),
+           .lsu_exu_flush_pipe_w        (lsu_exu_flush_pipe_w),
+           .lsu_exu_rd_m                (lsu_exu_rd_m[4:0]),
+           .lsu_exu_st_dtlb_perr_g      (lsu_exu_st_dtlb_perr_g),
+           .lsu_exu_thr_m               (lsu_exu_thr_m[1:0]),
+           .lsu_ffu_ack                 (lsu_ffu_ack),
+           .lsu_ffu_blk_asi_e           (lsu_ffu_blk_asi_e),
+           .lsu_ffu_flush_pipe_w        (lsu_ffu_flush_pipe_w),
+           .lsu_ffu_ld_data             (lsu_ffu_ld_data[63:0]),
+           .lsu_ffu_ld_vld              (lsu_ffu_ld_vld),
+           .lsu_ffu_stb_full0           (lsu_ffu_stb_full0),
+           .lsu_ffu_stb_full1           (lsu_ffu_stb_full1),
+           .lsu_ffu_stb_full2           (lsu_ffu_stb_full2),
+           .lsu_ffu_stb_full3           (lsu_ffu_stb_full3),
+           .lsu_ictag_mrgn              (lsu_ictag_mrgn[3:0]),
+           .lsu_ifu_asi_addr            (lsu_ifu_asi_addr[17:0]),
+           .lsu_ifu_asi_load            (lsu_ifu_asi_load),
+           .lsu_ifu_asi_state           (lsu_ifu_asi_state[7:0]),
+           .lsu_ifu_asi_thrid           (lsu_ifu_asi_thrid[1:0]),
+           .lsu_ifu_asi_vld             (lsu_ifu_asi_vld),
+           .lsu_ifu_cpxpkt_i1           (lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]),
+           .lsu_ifu_cpxpkt_vld_i1       (lsu_ifu_cpxpkt_vld_i1),
+           .lsu_ifu_dc_parity_error_w2  (lsu_ifu_dc_parity_error_w2),
+           .lsu_ifu_dcache_data_perror  (lsu_ifu_dcache_data_perror),
+           .lsu_ifu_dcache_tag_perror   (lsu_ifu_dcache_tag_perror),
+           .lsu_ifu_direct_map_l1       (lsu_ifu_direct_map_l1),
+           .lsu_ifu_error_tid           (lsu_ifu_error_tid[1:0]),
+           .lsu_ifu_flush_pipe_w        (lsu_ifu_flush_pipe_w),
+           .lsu_ifu_icache_en           (lsu_ifu_icache_en[3:0]),
+           .lsu_ifu_io_error            (lsu_ifu_io_error),
+           .lsu_ifu_itlb_en             (lsu_ifu_itlb_en[3:0]),
+           .lsu_ifu_l2_corr_error       (lsu_ifu_l2_corr_error),
+           .lsu_ifu_l2_unc_error        (lsu_ifu_l2_unc_error),
+           .lsu_ifu_ld_icache_index     (lsu_ifu_ld_icache_index[11:5]),
+           .lsu_ifu_ld_pcxpkt_tid       (lsu_ifu_ld_pcxpkt_tid[1:0]),
+           .lsu_ifu_ld_pcxpkt_vld       (lsu_ifu_ld_pcxpkt_vld),
+           .lsu_ifu_ldst_cmplt          (lsu_ifu_ldst_cmplt[3:0]),
+           .lsu_ifu_ldst_miss_w         (lsu_ifu_ldst_miss_w),
+           .lsu_ifu_ldsta_internal_e    (lsu_ifu_ldsta_internal_e),
+           .lsu_ifu_pcxpkt_ack_d        (lsu_ifu_pcxpkt_ack_d),
+           .lsu_ifu_stallreq            (lsu_ifu_stallreq),
+           .lsu_ifu_stbcnt0             (lsu_ifu_stbcnt0[3:0]),
+           .lsu_ifu_stbcnt1             (lsu_ifu_stbcnt1[3:0]),
+           .lsu_ifu_stbcnt2             (lsu_ifu_stbcnt2[3:0]),
+           .lsu_ifu_stbcnt3             (lsu_ifu_stbcnt3[3:0]),
+           .lsu_ifu_stxa_data           (lsu_ifu_stxa_data[47:0]),
+           .lsu_ifu_tlb_data_su         (lsu_ifu_tlb_data_su),
+           .lsu_ifu_tlb_data_ue         (lsu_ifu_tlb_data_ue),
+           .lsu_ifu_tlb_tag_ue          (lsu_ifu_tlb_tag_ue),
+           .lsu_itlb_mrgn               (lsu_itlb_mrgn[7:0]),
+           .lsu_mamem_mrgn              (),
+           .lsu_mmu_defr_trp_taken_g    (lsu_mmu_defr_trp_taken_g),
+           .lsu_mmu_flush_pipe_w        (lsu_mmu_flush_pipe_w),
+           .lsu_mmu_rs3_data_g          (lsu_mmu_rs3_data_g[63:0]),
+           .lsu_pid_state0              (lsu_pid_state0[2:0]),
+           .lsu_pid_state1              (lsu_pid_state1[2:0]),
+           .lsu_pid_state2              (lsu_pid_state2[2:0]),
+           .lsu_pid_state3              (lsu_pid_state3[2:0]),
+           .lsu_spu_asi_state_e         (),
+           .lsu_spu_early_flush_g       (),
+           .lsu_spu_ldst_ack            (),
+           .lsu_spu_stb_empty           (),
+           .lsu_spu_strm_ack_cmplt      (),
+           .lsu_t0_pctxt_state          (lsu_t0_pctxt_state[12:0]),
+           .lsu_t1_pctxt_state          (lsu_t1_pctxt_state[12:0]),
+           .lsu_t2_pctxt_state          (lsu_t2_pctxt_state[12:0]),
+           .lsu_t3_pctxt_state          (lsu_t3_pctxt_state[12:0]),
+           .lsu_tlu_async_tid_w2        (lsu_tlu_async_tid_w2[1:0]),
+           .lsu_tlu_async_ttype_vld_w2  (lsu_tlu_async_ttype_vld_w2),
+           .lsu_tlu_async_ttype_w2      (lsu_tlu_async_ttype_w2[6:0]),
+           .lsu_tlu_cpx_req             (lsu_tlu_cpx_req[3:0]),
+           .lsu_tlu_cpx_vld             (lsu_tlu_cpx_vld),
+           .lsu_tlu_daccess_excptn_g    (lsu_tlu_daccess_excptn_g),
+           .lsu_tlu_dcache_miss_w2      (lsu_tlu_dcache_miss_w2[3:0]),
+           .lsu_tlu_defr_trp_taken_g    (lsu_tlu_defr_trp_taken_g),
+           .lsu_tlu_dmmu_miss_g         (lsu_tlu_dmmu_miss_g),
+           .lsu_tlu_dside_ctxt_m        (lsu_tlu_dside_ctxt_m[12:0]),
+           .lsu_tlu_dtlb_done           (lsu_tlu_dtlb_done),
+           .lsu_tlu_early_flush2_w      (lsu_tlu_early_flush2_w),
+           .lsu_tlu_early_flush_w       (lsu_tlu_early_flush_w),
+           .lsu_tlu_intpkt              (lsu_tlu_intpkt[17:0]),
+           .lsu_tlu_l2_dmiss            (lsu_tlu_l2_dmiss[3:0]),
+           .lsu_tlu_ldst_va_m           (lsu_tlu_ldst_va_m[9:0]),
+           .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+           .lsu_tlu_pctxt_m             (lsu_tlu_pctxt_m[12:0]),
+           .lsu_tlu_pcxpkt_ack          (lsu_tlu_pcxpkt_ack),
+           .lsu_tlu_rs3_data_g          (lsu_tlu_rs3_data_g[63:0]),
+           .lsu_tlu_rsr_data_e          (lsu_tlu_rsr_data_e[7:0]),
+           .lsu_tlu_stb_full_w2         (lsu_tlu_stb_full_w2[3:0]),
+           .lsu_tlu_thrid_d             (lsu_tlu_thrid_d[1:0]),
+           .lsu_tlu_tlb_access_tid_m    (lsu_tlu_tlb_access_tid_m[1:0]),
+           .lsu_tlu_tlb_asi_state_m     (lsu_tlu_tlb_asi_state_m[7:0]),
+           .lsu_tlu_tlb_dmp_va_m        (lsu_tlu_tlb_dmp_va_m[47:13]),
+           .lsu_tlu_tlb_ld_inst_m       (lsu_tlu_tlb_ld_inst_m),
+           .lsu_tlu_tlb_ldst_va_m       (lsu_tlu_tlb_ldst_va_m[10:0]),
+           .lsu_tlu_tlb_st_inst_m       (lsu_tlu_tlb_st_inst_m),
+           .lsu_tlu_ttype_m2            (lsu_tlu_ttype_m2[8:0]),
+           .lsu_tlu_ttype_vld_m2        (lsu_tlu_ttype_vld_m2),
+           .lsu_tlu_wsr_inst_e          (lsu_tlu_wsr_inst_e),
+           .mbist_dcache_data_in        (mbist_dcache_data_in[71:0]),
+           .spc_efc_dfuse_data          (spc_efc_dfuse_data),
+           .spc_pcx_atom_pq             (spc_pcx_atom_pq),
+           .spc_pcx_data_pa             (spc_pcx_data_pa[`PCX_WIDTH-1:0]),
+           .spc_pcx_req_pq              (spc_pcx_req_pq[4:0]),
+           .lsu_asi_state               (lsu_asi_state[7:0]),
+           .lsu_ifu_err_addr            (lsu_ifu_err_addr[47:4]),
+           .lsu_sscan_data              (lsu_sscan_data[15:0]),
+           .ifu_tlu_inst_vld_m_bf1      (ifu_tlu_inst_vld_m_bf1),
+           .lsu_ffu_bld_cnt_w           (lsu_ffu_bld_cnt_w[2:0]),
+           .lsu_tlu_nucleus_ctxt_m      (lsu_tlu_nucleus_ctxt_m),
+           .lsu_tlu_tte_pg_sz_g         (lsu_tlu_tte_pg_sz_g[2:0]),
+           .lsu_tlu_squash_va_oor_m     (lsu_tlu_squash_va_oor_m),
+           .lsu_tlu_wtchpt_trp_g        (lsu_tlu_wtchpt_trp_g),
+           .lsu_tlu_daccess_prot_g      (lsu_tlu_daccess_prot_g),
+           .lsu_tlu_priv_action_g       (lsu_tlu_priv_action_g),
+           // Inputs
+           .bist_ctl_reg_out            (bist_ctl_reg_out[10:0]),
+           .const_cpuid                 (const_cpuid[2:0]),
+           .ctu_sscan_tid               (ctu_sscan_tid[3:0]),
+           .efc_spc_dfuse_ashift        (efc_spc_dfuse_ashift),
+           .efc_spc_dfuse_data          (efc_spc_dfuse_data),
+           .efc_spc_dfuse_dshift        (efc_spc_dfuse_dshift),
+           .efc_spc_fuse_clk1           (efc_spc_fuse_clk1),
+           .efc_spc_fuse_clk2           (efc_spc_fuse_clk2),
+           .exu_lsu_rs2_data_e          (exu_lsu_rs2_data_e[63:0]),
+           .exu_lsu_rs3_data_e          (exu_lsu_rs3_data_e[63:0]),
+           .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+           .exu_tlu_va_oor_m            (exu_tlu_va_oor_m),
+           .ffu_lsu_blk_st_e            (ffu_lsu_blk_st_e),
+           .ffu_lsu_blk_st_va_e         (ffu_lsu_blk_st_va_e[5:3]),
+           .ffu_lsu_fpop_rq_vld         (ffu_lsu_fpop_rq_vld),
+           .ffu_lsu_kill_fst_w          (ffu_lsu_kill_fst_w),
+           .ifu_lsu_alt_space_d         (ifu_lsu_alt_space_d),
+           .ifu_lsu_alt_space_e         (ifu_lsu_alt_space_e),
+           .ifu_lsu_asi_ack             (ifu_lsu_asi_ack),
+           .ifu_lsu_asi_rd_unc          (ifu_lsu_asi_rd_unc),
+           .ifu_lsu_casa_e              (ifu_lsu_casa_e),
+           .ifu_lsu_destid_s            (ifu_lsu_destid_s[2:0]),
+           .ifu_lsu_fwd_data_vld        (ifu_lsu_fwd_data_vld),
+           .ifu_lsu_fwd_wr_ack          (ifu_lsu_fwd_wr_ack),
+           .ifu_lsu_ibuf_busy           (ifu_lsu_ibuf_busy),
+           .ifu_lsu_imm_asi_d           (ifu_lsu_imm_asi_d[7:0]),
+           .ifu_lsu_imm_asi_vld_d       (ifu_lsu_imm_asi_vld_d),
+           .ifu_lsu_inv_clear           (ifu_lsu_inv_clear),
+           .ifu_lsu_ld_inst_e           (ifu_lsu_ld_inst_e),
+           .ifu_lsu_ldst_dbl_e          (ifu_lsu_ldst_dbl_e),
+           .ifu_lsu_ldst_fp_e           (ifu_lsu_ldst_fp_e),
+           .ifu_lsu_ldst_size_e         (ifu_lsu_ldst_size_e[1:0]),
+           .ifu_lsu_ldstub_e            (ifu_lsu_ldstub_e),
+           .ifu_lsu_ldxa_data_vld_w2    (ifu_lsu_ldxa_data_vld_w2),
+           .ifu_lsu_ldxa_data_w2        (ifu_lsu_ldxa_data_w2[63:0]),
+           .ifu_lsu_ldxa_illgl_va_w2    (ifu_lsu_ldxa_illgl_va_w2),
+           .ifu_lsu_ldxa_tid_w2         (ifu_lsu_ldxa_tid_w2[1:0]),
+           .ifu_lsu_memref_d            (ifu_lsu_memref_d),
+           .ifu_lsu_nceen               (ifu_lsu_nceen[3:0]),
+           .ifu_lsu_pcxpkt_e            (ifu_lsu_pcxpkt_e[51:0]),
+           .ifu_lsu_pcxreq_d            (ifu_lsu_pcxreq_d),
+           .ifu_lsu_pref_inst_e         (ifu_lsu_pref_inst_e),
+           .ifu_lsu_rd_e                (ifu_lsu_rd_e[4:0]),
+           .ifu_lsu_sign_ext_e          (ifu_lsu_sign_ext_e),
+           .ifu_lsu_st_inst_e           (ifu_lsu_st_inst_e),
+           .ifu_lsu_swap_e              (ifu_lsu_swap_e),
+           .ifu_lsu_thrid_s             (ifu_lsu_thrid_s[1:0]),
+           .ifu_tlu_flsh_inst_e         (ifu_tlu_flsh_inst_e),
+           .ifu_tlu_flush_m             (ifu_tlu_flush_m),
+           .ifu_tlu_inst_vld_m          (ifu_tlu_inst_vld_m),
+           .ifu_tlu_mb_inst_e           (ifu_tlu_mb_inst_e),
+           .ifu_tlu_sraddr_d            (ifu_tlu_sraddr_d[6:0]),
+           .ifu_tlu_thrid_e             (ifu_tlu_thrid_e[1:0]),
+           .mbist_dcache_index          (mbist_dcache_index[6:0]),
+           .mbist_dcache_read           (mbist_dcache_read),
+           .mbist_dcache_way            (mbist_dcache_way[1:0]),
+           .mbist_dcache_word           (mbist_dcache_word),
+           .mbist_dcache_write          (mbist_dcache_write),
+           .mbist_write_data            (mbist_write_data[7:0]),
+           .mem_write_disable           (mem_write_disable),
+           .mux_drive_disable           (mux_drive_disable),
+           .pcx_spc_grant_px            (pcx_spc_grant_px[4:0]),
+           .se                          (se),
+           .sehold                      (sehold),
+           .spu_lsu_ldxa_data_vld_w2    (1'b0),
+           .spu_lsu_ldxa_data_w2        (64'h0000000000000000),
+           .spu_lsu_ldxa_illgl_va_w2    (1'b0),
+           .spu_lsu_ldxa_tid_w2         (2'b00),
+           .spu_lsu_stxa_ack            (1'b0),
+           .spu_lsu_stxa_ack_tid        (2'b00),
+           .spu_lsu_unc_error_w2        (1'b0),
+           .testmode_l                  (testmode_l),
+           .tlu_dsfsr_flt_vld           (tlu_dsfsr_flt_vld[3:0]),
+           .tlu_dtlb_data_rd_g          (tlu_dtlb_data_rd_g),
+           .tlu_dtlb_dmp_actxt_g        (tlu_dtlb_dmp_actxt_g),
+           .tlu_dtlb_dmp_all_g          (tlu_dtlb_dmp_all_g),
+           .tlu_dtlb_dmp_nctxt_g        (tlu_dtlb_dmp_nctxt_g),
+           .tlu_dtlb_dmp_pctxt_g        (tlu_dtlb_dmp_pctxt_g),
+           .tlu_dtlb_dmp_sctxt_g        (tlu_dtlb_dmp_sctxt_g),
+           .tlu_dtlb_dmp_vld_g          (tlu_dtlb_dmp_vld_g),
+           .tlu_dtlb_invalidate_all_g   (tlu_dtlb_invalidate_all_g),
+           .tlu_dtlb_rw_index_g         (tlu_dtlb_rw_index_g[5:0]),
+           .tlu_dtlb_rw_index_vld_g     (tlu_dtlb_rw_index_vld_g),
+           .tlu_dtlb_tag_rd_g           (tlu_dtlb_tag_rd_g),
+           .tlu_dtlb_tte_data_w2        (tlu_dtlb_tte_data_w2[42:0]),
+           .tlu_dtlb_tte_tag_w2         (tlu_dtlb_tte_tag_w2[58:0]),
+           .tlu_early_flush_pipe2_w     (tlu_early_flush_pipe2_w),
+           .tlu_early_flush_pipe_w      (tlu_early_flush_pipe_w),
+           .tlu_exu_early_flush_pipe_w  (tlu_exu_early_flush_pipe_w),
+           .tlu_idtlb_dmp_key_g         (tlu_idtlb_dmp_key_g[40:0]),
+           .tlu_idtlb_dmp_thrid_g       (tlu_idtlb_dmp_thrid_g[1:0]),
+           .tlu_lsu_asi_m               (tlu_lsu_asi_m[7:0]),
+           .tlu_lsu_asi_update_m        (tlu_lsu_asi_update_m),
+           .tlu_lsu_int_ld_ill_va_w2    (tlu_lsu_int_ld_ill_va_w2),
+           .tlu_lsu_int_ldxa_data_w2    (tlu_lsu_int_ldxa_data_w2[63:0]),
+           .tlu_lsu_int_ldxa_vld_w2     (tlu_lsu_int_ldxa_vld_w2),
+           .tlu_lsu_ldxa_async_data_vld (tlu_lsu_ldxa_async_data_vld),
+           .tlu_lsu_ldxa_tid_w2         (tlu_lsu_ldxa_tid_w2[1:0]),
+           .tlu_lsu_pcxpkt              (tlu_lsu_pcxpkt[25:0]),
+           .tlu_lsu_pstate_am           (tlu_lsu_pstate_am[3:0]),
+           .tlu_lsu_pstate_cle          (tlu_lsu_pstate_cle[3:0]),
+           .tlu_lsu_pstate_priv         (tlu_lsu_pstate_priv[3:0]),
+           .tlu_lsu_redmode             (tlu_lsu_redmode[3:0]),
+           .tlu_lsu_redmode_rst_d1      (tlu_lsu_redmode_rst_d1[3:0]),
+           .tlu_lsu_stxa_ack            (tlu_lsu_stxa_ack),
+           .tlu_lsu_stxa_ack_tid        (tlu_lsu_stxa_ack_tid[1:0]),
+           .tlu_lsu_tid_m               (tlu_lsu_tid_m[1:0]),
+           .tlu_lsu_tl_zero             (tlu_lsu_tl_zero[3:0]),
+           .spu_lsu_ldst_pckt           (124'h0000000000000000000000000000000),
+           .exu_lsu_ldst_va_e           (exu_lsu_ldst_va_e[47:0]),
+           .exu_lsu_early_va_e          (exu_lsu_early_va_e[10:3]),
+           .ffu_lsu_data                (ffu_lsu_data[80:0])); 
+
+`else
+     
+   lsu lsu(
+           // temp - name change
+           .ifu_tlu_wsr_inst_d          (ifu_lsu_wsr_inst_d),
+	   // eco 6529 .
+	   .lsu_ffu_st_dtlb_perr_g		(lsu_ffu_st_dtlb_perr_g),
+	   // Bug 4799.
+	   .tlu_lsu_priv_trap_m		(tlu_lsu_priv_trap_m),
+
+           .short_si0              (short_scan0_1),
+           .short_si1              (short_scan1_1),
+           .short_so0              (short_scan0_2),
+           .short_so1              (short_scan1_2),
+           .si0                          (scan0_3),
+           .si1                          (short_scan1_5),
+           .so0                          (scan0_4),
+           .so1                          (scan1_1),
+           // reset stuff
+           .grst_l                       (spc_grst_l),
+           .arst_l                       (cmp_arst_l),
+           .clk                          (rclk),
+	         .lsu_exu_dfill_data_w2	(lsu_exu_dfill_data_g[63:0]),
+	         .lsu_exu_dfill_vld_w2	(lsu_exu_dfill_vld_g),
+	         .lsu_exu_ldst_miss_w2	(lsu_exu_ldst_miss_g2),
+           //.cpx_spc_data_cx             (cpx_spc_data_cx3[`CPX_WIDTH-1:0]),
+           .cpx_spc_data_cx             ({cpx_spc_data_cx3_b144to140[`CPX_WIDTH-1:140],
+                                          cpx_spc_data_cx3[139:121],
+                                          cpx_spc_data_cx3_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO],
+                                          cpx_spc_data_cx3[117:110],
+                                          cpx_spc_data_cx3_b109,
+                                          cpx_spc_data_cx3[108:107],
+                                          cpx_spc_data_cx3_b106,
+                                          cpx_spc_data_cx3[105:104],
+                                          cpx_spc_data_cx3_b103,
+                                          cpx_spc_data_cx3[102:101],
+                                          cpx_spc_data_cx3_b100,
+                                          cpx_spc_data_cx3[99:98],
+                                          cpx_spc_data_cx3_b97,
+                                          cpx_spc_data_cx3[96:95],
+                                          cpx_spc_data_cx3_b94,
+                                          cpx_spc_data_cx3[93:92],
+                                          cpx_spc_data_cx3_b91,
+                                          cpx_spc_data_cx3[90:89],
+                                          cpx_spc_data_cx3_b88,
+                                          cpx_spc_data_cx3[87:85],
+                                          cpx_spc_data_cx3_b84,
+                                          cpx_spc_data_cx3[83:81],
+                                          cpx_spc_data_cx3_b80,
+                                          cpx_spc_data_cx3[79:77],
+                                          cpx_spc_data_cx3_b76,
+                                          cpx_spc_data_cx3[75:73],
+                                          cpx_spc_data_cx3_b72,
+                                          cpx_spc_data_cx3[71:69],
+                                          cpx_spc_data_cx3_b68,
+                                          cpx_spc_data_cx3[67:65],
+                                          cpx_spc_data_cx3_b64,
+                                          cpx_spc_data_cx3[63:61],
+                                          cpx_spc_data_cx3_b60,
+                                          cpx_spc_data_cx3[59:57],
+                                          cpx_spc_data_cx3_b56,
+                                          cpx_spc_data_cx3[55:54],
+                                          cpx_spc_data_cx3_b53,
+                                          cpx_spc_data_cx3[52:51],
+                                          cpx_spc_data_cx3_b50,
+                                          cpx_spc_data_cx3[49:48],
+                                          cpx_spc_data_cx3_b47,
+                                          cpx_spc_data_cx3[46:45],
+                                          cpx_spc_data_cx3_b44,
+                                          cpx_spc_data_cx3[43:42],
+                                          cpx_spc_data_cx3_b41,
+                                          cpx_spc_data_cx3[40:39],
+                                          cpx_spc_data_cx3_b38,
+                                          cpx_spc_data_cx3[37:36],
+                                          cpx_spc_data_cx3_b35,
+                                          cpx_spc_data_cx3[34:33],
+                                          cpx_spc_data_cx3_b32,
+                                          cpx_spc_data_cx3[31:29],
+                                          cpx_spc_data_cx3_b28,
+                                          cpx_spc_data_cx3[27:25],
+                                          cpx_spc_data_cx3_b24,
+                                          cpx_spc_data_cx3[23:21],
+                                          cpx_spc_data_cx3_b20,
+                                          cpx_spc_data_cx3[19:17],
+                                          cpx_spc_data_cx3_b16,
+                                          cpx_spc_data_cx3[15:13],
+                                          cpx_spc_data_cx3_b12,
+                                          cpx_spc_data_cx3[11:9],
+                                          cpx_spc_data_cx3_b8,
+                                          cpx_spc_data_cx3[7:5],
+                                          cpx_spc_data_cx3_b4,
+                                          cpx_spc_data_cx3[3:1],
+                                          cpx_spc_data_cx3_b0}),
+           .exu_tlu_wsr_data_m          (exu_tlu_wsr_data_m[7:0]),
+           
+	   // Hypervisor related
+      	   .tlu_lsu_hpv_priv      	(tlu_hpstate_priv[3:0]),
+           .tlu_lsu_hpstate_en     	(tlu_hpstate_enb[3:0]),
+
+	         .spu_lsu_int_w2		(1'b0),
+           .gdbginit_l             	(spc_dbginit_l),
+           /*AUTOINST*/
+           // Outputs
+           .bist_ctl_reg_in             (bist_ctl_reg_in[6:0]),
+           .bist_ctl_reg_wr_en          (bist_ctl_reg_wr_en),
+           .ifu_tlu_flush_fd2_w         (ifu_tlu_flush_fd2_w),
+           .ifu_tlu_flush_fd3_w         (ifu_tlu_flush_fd3_w),
+           .ifu_tlu_flush_fd_w          (ifu_tlu_flush_fd_w),
+           .lsu_asi_reg0                (lsu_asi_reg0[7:0]),
+           .lsu_asi_reg1                (lsu_asi_reg1[7:0]),
+           .lsu_asi_reg2                (lsu_asi_reg2[7:0]),
+           .lsu_asi_reg3                (lsu_asi_reg3[7:0]),
+           .lsu_dmmu_sfsr_trp_wr        (lsu_dmmu_sfsr_trp_wr[3:0]),
+           .lsu_dsfsr_din_g             (lsu_dsfsr_din_g[23:0]),
+           .lsu_exu_flush_pipe_w        (lsu_exu_flush_pipe_w),
+           .lsu_exu_rd_m                (lsu_exu_rd_m[4:0]),
+           .lsu_exu_st_dtlb_perr_g      (lsu_exu_st_dtlb_perr_g),
+           .lsu_exu_thr_m               (lsu_exu_thr_m[1:0]),
+           .lsu_ffu_ack                 (lsu_ffu_ack),
+           .lsu_ffu_blk_asi_e           (lsu_ffu_blk_asi_e),
+           .lsu_ffu_flush_pipe_w        (lsu_ffu_flush_pipe_w),
+           .lsu_ffu_ld_data             (lsu_ffu_ld_data[63:0]),
+           .lsu_ffu_ld_vld              (lsu_ffu_ld_vld),
+           .lsu_ffu_stb_full0           (lsu_ffu_stb_full0),
+           .lsu_ffu_stb_full1           (lsu_ffu_stb_full1),
+           .lsu_ffu_stb_full2           (lsu_ffu_stb_full2),
+           .lsu_ffu_stb_full3           (lsu_ffu_stb_full3),
+           .lsu_ictag_mrgn              (lsu_ictag_mrgn[3:0]),
+           .lsu_ifu_asi_addr            (lsu_ifu_asi_addr[17:0]),
+           .lsu_ifu_asi_load            (lsu_ifu_asi_load),
+           .lsu_ifu_asi_state           (lsu_ifu_asi_state[7:0]),
+           .lsu_ifu_asi_thrid           (lsu_ifu_asi_thrid[1:0]),
+           .lsu_ifu_asi_vld             (lsu_ifu_asi_vld),
+           .lsu_ifu_cpxpkt_i1           (lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]),
+           .lsu_ifu_cpxpkt_vld_i1       (lsu_ifu_cpxpkt_vld_i1),
+           .lsu_ifu_dc_parity_error_w2  (lsu_ifu_dc_parity_error_w2),
+           .lsu_ifu_dcache_data_perror  (lsu_ifu_dcache_data_perror),
+           .lsu_ifu_dcache_tag_perror   (lsu_ifu_dcache_tag_perror),
+           .lsu_ifu_direct_map_l1       (lsu_ifu_direct_map_l1),
+           .lsu_ifu_error_tid           (lsu_ifu_error_tid[1:0]),
+           .lsu_ifu_flush_pipe_w        (lsu_ifu_flush_pipe_w),
+           .lsu_ifu_icache_en           (lsu_ifu_icache_en[3:0]),
+           .lsu_ifu_io_error            (lsu_ifu_io_error),
+           .lsu_ifu_itlb_en             (lsu_ifu_itlb_en[3:0]),
+           .lsu_ifu_l2_corr_error       (lsu_ifu_l2_corr_error),
+           .lsu_ifu_l2_unc_error        (lsu_ifu_l2_unc_error),
+           .lsu_ifu_ld_icache_index     (lsu_ifu_ld_icache_index[11:5]),
+           .lsu_ifu_ld_pcxpkt_tid       (lsu_ifu_ld_pcxpkt_tid[1:0]),
+           .lsu_ifu_ld_pcxpkt_vld       (lsu_ifu_ld_pcxpkt_vld),
+           .lsu_ifu_ldst_cmplt          (lsu_ifu_ldst_cmplt[3:0]),
+           .lsu_ifu_ldst_miss_w         (lsu_ifu_ldst_miss_w),
+           .lsu_ifu_ldsta_internal_e    (lsu_ifu_ldsta_internal_e),
+           .lsu_ifu_pcxpkt_ack_d        (lsu_ifu_pcxpkt_ack_d),
+           .lsu_ifu_stallreq            (lsu_ifu_stallreq),
+           .lsu_ifu_stbcnt0             (lsu_ifu_stbcnt0[3:0]),
+           .lsu_ifu_stbcnt1             (lsu_ifu_stbcnt1[3:0]),
+           .lsu_ifu_stbcnt2             (lsu_ifu_stbcnt2[3:0]),
+           .lsu_ifu_stbcnt3             (lsu_ifu_stbcnt3[3:0]),
+           .lsu_ifu_stxa_data           (lsu_ifu_stxa_data[47:0]),
+           .lsu_ifu_tlb_data_su         (lsu_ifu_tlb_data_su),
+           .lsu_ifu_tlb_data_ue         (lsu_ifu_tlb_data_ue),
+           .lsu_ifu_tlb_tag_ue          (lsu_ifu_tlb_tag_ue),
+           .lsu_itlb_mrgn               (lsu_itlb_mrgn[7:0]),
+           .lsu_mamem_mrgn              (lsu_mamem_mrgn[3:0]),
+           .lsu_mmu_defr_trp_taken_g    (lsu_mmu_defr_trp_taken_g),
+           .lsu_mmu_flush_pipe_w        (lsu_mmu_flush_pipe_w),
+           .lsu_mmu_rs3_data_g          (lsu_mmu_rs3_data_g[63:0]),
+           .lsu_pid_state0              (lsu_pid_state0[2:0]),
+           .lsu_pid_state1              (lsu_pid_state1[2:0]),
+           .lsu_pid_state2              (lsu_pid_state2[2:0]),
+           .lsu_pid_state3              (lsu_pid_state3[2:0]),
+           .lsu_spu_asi_state_e         (lsu_spu_asi_state_e[7:0]),
+           .lsu_spu_early_flush_g       (lsu_spu_early_flush_g),
+           .lsu_spu_ldst_ack            (lsu_spu_ldst_ack),
+           .lsu_spu_stb_empty           (lsu_spu_stb_empty[3:0]),
+           .lsu_spu_strm_ack_cmplt      (lsu_spu_strm_ack_cmplt[1:0]),
+           .lsu_t0_pctxt_state          (lsu_t0_pctxt_state[12:0]),
+           .lsu_t1_pctxt_state          (lsu_t1_pctxt_state[12:0]),
+           .lsu_t2_pctxt_state          (lsu_t2_pctxt_state[12:0]),
+           .lsu_t3_pctxt_state          (lsu_t3_pctxt_state[12:0]),
+           .lsu_tlu_async_tid_w2        (lsu_tlu_async_tid_w2[1:0]),
+           .lsu_tlu_async_ttype_vld_w2  (lsu_tlu_async_ttype_vld_w2),
+           .lsu_tlu_async_ttype_w2      (lsu_tlu_async_ttype_w2[6:0]),
+           .lsu_tlu_cpx_req             (lsu_tlu_cpx_req[3:0]),
+           .lsu_tlu_cpx_vld             (lsu_tlu_cpx_vld),
+           .lsu_tlu_daccess_excptn_g    (lsu_tlu_daccess_excptn_g),
+           .lsu_tlu_dcache_miss_w2      (lsu_tlu_dcache_miss_w2[3:0]),
+           .lsu_tlu_defr_trp_taken_g    (lsu_tlu_defr_trp_taken_g),
+           .lsu_tlu_dmmu_miss_g         (lsu_tlu_dmmu_miss_g),
+           .lsu_tlu_dside_ctxt_m        (lsu_tlu_dside_ctxt_m[12:0]),
+           .lsu_tlu_dtlb_done           (lsu_tlu_dtlb_done),
+           .lsu_tlu_early_flush2_w      (lsu_tlu_early_flush2_w),
+           .lsu_tlu_early_flush_w       (lsu_tlu_early_flush_w),
+           .lsu_tlu_intpkt              (lsu_tlu_intpkt[17:0]),
+           .lsu_tlu_l2_dmiss            (lsu_tlu_l2_dmiss[3:0]),
+           .lsu_tlu_ldst_va_m           (lsu_tlu_ldst_va_m[9:0]),
+           .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+           .lsu_tlu_pctxt_m             (lsu_tlu_pctxt_m[12:0]),
+           .lsu_tlu_pcxpkt_ack          (lsu_tlu_pcxpkt_ack),
+           .lsu_tlu_rs3_data_g          (lsu_tlu_rs3_data_g[63:0]),
+           .lsu_tlu_rsr_data_e          (lsu_tlu_rsr_data_e[7:0]),
+           .lsu_tlu_stb_full_w2         (lsu_tlu_stb_full_w2[3:0]),
+           .lsu_tlu_thrid_d             (lsu_tlu_thrid_d[1:0]),
+           .lsu_tlu_tlb_access_tid_m    (lsu_tlu_tlb_access_tid_m[1:0]),
+           .lsu_tlu_tlb_asi_state_m     (lsu_tlu_tlb_asi_state_m[7:0]),
+           .lsu_tlu_tlb_dmp_va_m        (lsu_tlu_tlb_dmp_va_m[47:13]),
+           .lsu_tlu_tlb_ld_inst_m       (lsu_tlu_tlb_ld_inst_m),
+           .lsu_tlu_tlb_ldst_va_m       (lsu_tlu_tlb_ldst_va_m[10:0]),
+           .lsu_tlu_tlb_st_inst_m       (lsu_tlu_tlb_st_inst_m),
+           .lsu_tlu_ttype_m2            (lsu_tlu_ttype_m2[8:0]),
+           .lsu_tlu_ttype_vld_m2        (lsu_tlu_ttype_vld_m2),
+           .lsu_tlu_wsr_inst_e          (lsu_tlu_wsr_inst_e),
+           .mbist_dcache_data_in        (mbist_dcache_data_in[71:0]),
+           .spc_efc_dfuse_data          (spc_efc_dfuse_data),
+           .spc_pcx_atom_pq             (spc_pcx_atom_pq),
+           .spc_pcx_data_pa             (spc_pcx_data_pa[`PCX_WIDTH-1:0]),
+           .spc_pcx_req_pq              (spc_pcx_req_pq[4:0]),
+           .lsu_asi_state               (lsu_asi_state[7:0]),
+           .lsu_ifu_err_addr            (lsu_ifu_err_addr[47:4]),
+           .lsu_sscan_data              (lsu_sscan_data[15:0]),
+           .ifu_tlu_inst_vld_m_bf1      (ifu_tlu_inst_vld_m_bf1),
+           .lsu_ffu_bld_cnt_w           (lsu_ffu_bld_cnt_w[2:0]),
+           .lsu_tlu_nucleus_ctxt_m      (lsu_tlu_nucleus_ctxt_m),
+           .lsu_tlu_tte_pg_sz_g         (lsu_tlu_tte_pg_sz_g[2:0]),
+           .lsu_tlu_squash_va_oor_m     (lsu_tlu_squash_va_oor_m),
+           .lsu_tlu_wtchpt_trp_g        (lsu_tlu_wtchpt_trp_g),
+           .lsu_tlu_daccess_prot_g      (lsu_tlu_daccess_prot_g),
+           .lsu_tlu_priv_action_g       (lsu_tlu_priv_action_g),
+           // Inputs
+           .bist_ctl_reg_out            (bist_ctl_reg_out[10:0]),
+           .const_cpuid                 (const_cpuid[2:0]),
+           .ctu_sscan_tid               (ctu_sscan_tid[3:0]),
+           .efc_spc_dfuse_ashift        (efc_spc_dfuse_ashift),
+           .efc_spc_dfuse_data          (efc_spc_dfuse_data),
+           .efc_spc_dfuse_dshift        (efc_spc_dfuse_dshift),
+           .efc_spc_fuse_clk1           (efc_spc_fuse_clk1),
+           .efc_spc_fuse_clk2           (efc_spc_fuse_clk2),
+           .exu_lsu_rs2_data_e          (exu_lsu_rs2_data_e[63:0]),
+           .exu_lsu_rs3_data_e          (exu_lsu_rs3_data_e[63:0]),
+           .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+           .exu_tlu_va_oor_m            (exu_tlu_va_oor_m),
+           .ffu_lsu_blk_st_e            (ffu_lsu_blk_st_e),
+           .ffu_lsu_blk_st_va_e         (ffu_lsu_blk_st_va_e[5:3]),
+           .ffu_lsu_fpop_rq_vld         (ffu_lsu_fpop_rq_vld),
+           .ffu_lsu_kill_fst_w          (ffu_lsu_kill_fst_w),
+           .ifu_lsu_alt_space_d         (ifu_lsu_alt_space_d),
+           .ifu_lsu_alt_space_e         (ifu_lsu_alt_space_e),
+           .ifu_lsu_asi_ack             (ifu_lsu_asi_ack),
+           .ifu_lsu_asi_rd_unc          (ifu_lsu_asi_rd_unc),
+           .ifu_lsu_casa_e              (ifu_lsu_casa_e),
+           .ifu_lsu_destid_s            (ifu_lsu_destid_s[2:0]),
+           .ifu_lsu_fwd_data_vld        (ifu_lsu_fwd_data_vld),
+           .ifu_lsu_fwd_wr_ack          (ifu_lsu_fwd_wr_ack),
+           .ifu_lsu_ibuf_busy           (ifu_lsu_ibuf_busy),
+           .ifu_lsu_imm_asi_d           (ifu_lsu_imm_asi_d[7:0]),
+           .ifu_lsu_imm_asi_vld_d       (ifu_lsu_imm_asi_vld_d),
+           .ifu_lsu_inv_clear           (ifu_lsu_inv_clear),
+           .ifu_lsu_ld_inst_e           (ifu_lsu_ld_inst_e),
+           .ifu_lsu_ldst_dbl_e          (ifu_lsu_ldst_dbl_e),
+           .ifu_lsu_ldst_fp_e           (ifu_lsu_ldst_fp_e),
+           .ifu_lsu_ldst_size_e         (ifu_lsu_ldst_size_e[1:0]),
+           .ifu_lsu_ldstub_e            (ifu_lsu_ldstub_e),
+           .ifu_lsu_ldxa_data_vld_w2    (ifu_lsu_ldxa_data_vld_w2),
+           .ifu_lsu_ldxa_data_w2        (ifu_lsu_ldxa_data_w2[63:0]),
+           .ifu_lsu_ldxa_illgl_va_w2    (ifu_lsu_ldxa_illgl_va_w2),
+           .ifu_lsu_ldxa_tid_w2         (ifu_lsu_ldxa_tid_w2[1:0]),
+           .ifu_lsu_memref_d            (ifu_lsu_memref_d),
+           .ifu_lsu_nceen               (ifu_lsu_nceen[3:0]),
+           .ifu_lsu_pcxpkt_e            (ifu_lsu_pcxpkt_e[51:0]),
+           .ifu_lsu_pcxreq_d            (ifu_lsu_pcxreq_d),
+           .ifu_lsu_pref_inst_e         (ifu_lsu_pref_inst_e),
+           .ifu_lsu_rd_e                (ifu_lsu_rd_e[4:0]),
+           .ifu_lsu_sign_ext_e          (ifu_lsu_sign_ext_e),
+           .ifu_lsu_st_inst_e           (ifu_lsu_st_inst_e),
+           .ifu_lsu_swap_e              (ifu_lsu_swap_e),
+           .ifu_lsu_thrid_s             (ifu_lsu_thrid_s[1:0]),
+           .ifu_tlu_flsh_inst_e         (ifu_tlu_flsh_inst_e),
+           .ifu_tlu_flush_m             (ifu_tlu_flush_m),
+           .ifu_tlu_inst_vld_m          (ifu_tlu_inst_vld_m),
+           .ifu_tlu_mb_inst_e           (ifu_tlu_mb_inst_e),
+           .ifu_tlu_sraddr_d            (ifu_tlu_sraddr_d[6:0]),
+           .ifu_tlu_thrid_e             (ifu_tlu_thrid_e[1:0]),
+           .mbist_dcache_index          (mbist_dcache_index[6:0]),
+           .mbist_dcache_read           (mbist_dcache_read),
+           .mbist_dcache_way            (mbist_dcache_way[1:0]),
+           .mbist_dcache_word           (mbist_dcache_word),
+           .mbist_dcache_write          (mbist_dcache_write),
+           .mbist_write_data            (mbist_write_data[7:0]),
+           .mem_write_disable           (mem_write_disable),
+           .mux_drive_disable           (mux_drive_disable),
+           .pcx_spc_grant_px            (pcx_spc_grant_px[4:0]),
+           .se                          (se),
+           .sehold                      (sehold),
+           .spu_lsu_ldxa_data_vld_w2    (spu_lsu_ldxa_data_vld_w2),
+           .spu_lsu_ldxa_data_w2        (spu_lsu_ldxa_data_w2[63:0]),
+           .spu_lsu_ldxa_illgl_va_w2    (spu_lsu_ldxa_illgl_va_w2),
+           .spu_lsu_ldxa_tid_w2         (spu_lsu_ldxa_tid_w2[1:0]),
+           .spu_lsu_stxa_ack            (spu_lsu_stxa_ack),
+           .spu_lsu_stxa_ack_tid        (spu_lsu_stxa_ack_tid[1:0]),
+           .spu_lsu_unc_error_w2        (spu_lsu_unc_error_w2),
+           .testmode_l                  (testmode_l),
+           .tlu_dsfsr_flt_vld           (tlu_dsfsr_flt_vld[3:0]),
+           .tlu_dtlb_data_rd_g          (tlu_dtlb_data_rd_g),
+           .tlu_dtlb_dmp_actxt_g        (tlu_dtlb_dmp_actxt_g),
+           .tlu_dtlb_dmp_all_g          (tlu_dtlb_dmp_all_g),
+           .tlu_dtlb_dmp_nctxt_g        (tlu_dtlb_dmp_nctxt_g),
+           .tlu_dtlb_dmp_pctxt_g        (tlu_dtlb_dmp_pctxt_g),
+           .tlu_dtlb_dmp_sctxt_g        (tlu_dtlb_dmp_sctxt_g),
+           .tlu_dtlb_dmp_vld_g          (tlu_dtlb_dmp_vld_g),
+           .tlu_dtlb_invalidate_all_g   (tlu_dtlb_invalidate_all_g),
+           .tlu_dtlb_rw_index_g         (tlu_dtlb_rw_index_g[5:0]),
+           .tlu_dtlb_rw_index_vld_g     (tlu_dtlb_rw_index_vld_g),
+           .tlu_dtlb_tag_rd_g           (tlu_dtlb_tag_rd_g),
+           .tlu_dtlb_tte_data_w2        (tlu_dtlb_tte_data_w2[42:0]),
+           .tlu_dtlb_tte_tag_w2         (tlu_dtlb_tte_tag_w2[58:0]),
+           .tlu_early_flush_pipe2_w     (tlu_early_flush_pipe2_w),
+           .tlu_early_flush_pipe_w      (tlu_early_flush_pipe_w),
+           .tlu_exu_early_flush_pipe_w  (tlu_exu_early_flush_pipe_w),
+           .tlu_idtlb_dmp_key_g         (tlu_idtlb_dmp_key_g[40:0]),
+           .tlu_idtlb_dmp_thrid_g       (tlu_idtlb_dmp_thrid_g[1:0]),
+           .tlu_lsu_asi_m               (tlu_lsu_asi_m[7:0]),
+           .tlu_lsu_asi_update_m        (tlu_lsu_asi_update_m),
+           .tlu_lsu_int_ld_ill_va_w2    (tlu_lsu_int_ld_ill_va_w2),
+           .tlu_lsu_int_ldxa_data_w2    (tlu_lsu_int_ldxa_data_w2[63:0]),
+           .tlu_lsu_int_ldxa_vld_w2     (tlu_lsu_int_ldxa_vld_w2),
+           .tlu_lsu_ldxa_async_data_vld (tlu_lsu_ldxa_async_data_vld),
+           .tlu_lsu_ldxa_tid_w2         (tlu_lsu_ldxa_tid_w2[1:0]),
+           .tlu_lsu_pcxpkt              (tlu_lsu_pcxpkt[25:0]),
+           .tlu_lsu_pstate_am           (tlu_lsu_pstate_am[3:0]),
+           .tlu_lsu_pstate_cle          (tlu_lsu_pstate_cle[3:0]),
+           .tlu_lsu_pstate_priv         (tlu_lsu_pstate_priv[3:0]),
+           .tlu_lsu_redmode             (tlu_lsu_redmode[3:0]),
+           .tlu_lsu_redmode_rst_d1      (tlu_lsu_redmode_rst_d1[3:0]),
+           .tlu_lsu_stxa_ack            (tlu_lsu_stxa_ack),
+           .tlu_lsu_stxa_ack_tid        (tlu_lsu_stxa_ack_tid[1:0]),
+           .tlu_lsu_tid_m               (tlu_lsu_tid_m[1:0]),
+           .tlu_lsu_tl_zero             (tlu_lsu_tl_zero[3:0]),
+           .spu_lsu_ldst_pckt           (spu_lsu_ldst_pckt[`PCX_WIDTH-1:0]),
+           .exu_lsu_ldst_va_e           (exu_lsu_ldst_va_e[47:0]),
+           .exu_lsu_early_va_e          (exu_lsu_early_va_e[10:3]),
+           .ffu_lsu_data                (ffu_lsu_data[80:0])); 
+
+`endif //  `ifdef FPGA_SYN_NO_SPU
+
+`ifdef FPGA_SYN_NO_SPU
+
+   sparc_exu exu   (
+                 .short_si0              (short_scan0_2),
+                 .short_so0              (short_scan0_3),
+                 .short_si1 (short_scan1_2),
+                 .short_so1 (short_scan1_3),
+                 .si0 (scan0_2),
+                 .so0 (scan0_3),
+                 // reset stuff
+                 .grst_l                (spc_grst_l),
+                 .arst_l                (cmp_arst_l),
+                 .mul_exu_data_g (mul_data_out[63:0]),
+                 .ifu_tlu_wsr_inst_d (ifu_exu_wsr_inst_d),
+                 //
+                 .exu_tlu_ue_trap_m     (),
+                 
+		             /*AUTOINST*/
+                 // Outputs
+                 .exu_ffu_wsr_inst_e    (exu_ffu_wsr_inst_e),
+                 .exu_ifu_brpc_e        (exu_ifu_brpc_e[47:0]),
+                 .exu_ifu_cc_d          (exu_ifu_cc_d[7:0]),
+                 .exu_ifu_ecc_ce_m      (exu_ifu_ecc_ce_m),
+                 .exu_ifu_ecc_ue_m      (exu_ifu_ecc_ue_m),
+                 .exu_ifu_err_reg_m     (exu_ifu_err_reg_m[7:0]),
+                 .exu_ifu_inj_ack       (exu_ifu_inj_ack),
+                 .exu_ifu_longop_done_g (exu_ifu_longop_done_g[3:0]),
+                 .exu_ifu_oddwin_s      (exu_ifu_oddwin_s[3:0]),
+                 .exu_ifu_regn_e        (exu_ifu_regn_e),
+                 .exu_ifu_regz_e        (exu_ifu_regz_e),
+                 .exu_ifu_spill_e       (exu_ifu_spill_e),
+                 .exu_ifu_va_oor_m      (exu_ifu_va_oor_m),
+                 .exu_lsu_early_va_e    (exu_lsu_early_va_e[10:3]),
+                 .exu_lsu_ldst_va_e     (exu_lsu_ldst_va_e[47:0]),
+                 .exu_lsu_priority_trap_m(exu_lsu_priority_trap_m),
+                 .exu_lsu_rs2_data_e    (exu_lsu_rs2_data_e[63:0]),
+                 .exu_lsu_rs3_data_e    (exu_lsu_rs3_data_e[63:0]),
+                 .exu_mmu_early_va_e    (exu_mmu_early_va_e[7:0]),
+                 .exu_mul_input_vld     (exu_mul_input_vld),
+                 .exu_mul_rs1_data      (exu_mul_rs1_data[63:0]),
+                 .exu_mul_rs2_data      (exu_mul_rs2_data[63:0]),
+                 .exu_spu_rs3_data_e    (),
+                 .exu_tlu_ccr0_w        (exu_tlu_ccr0_w[7:0]),
+                 .exu_tlu_ccr1_w        (exu_tlu_ccr1_w[7:0]),
+                 .exu_tlu_ccr2_w        (exu_tlu_ccr2_w[7:0]),
+                 .exu_tlu_ccr3_w        (exu_tlu_ccr3_w[7:0]),
+                 .exu_tlu_cwp0_w        (exu_tlu_cwp0_w[2:0]),
+                 .exu_tlu_cwp1_w        (exu_tlu_cwp1_w[2:0]),
+                 .exu_tlu_cwp2_w        (exu_tlu_cwp2_w[2:0]),
+                 .exu_tlu_cwp3_w        (exu_tlu_cwp3_w[2:0]),
+                 .exu_tlu_cwp_cmplt     (exu_tlu_cwp_cmplt),
+                 .exu_tlu_cwp_cmplt_tid (exu_tlu_cwp_cmplt_tid[1:0]),
+                 .exu_tlu_cwp_retry     (exu_tlu_cwp_retry),
+                 .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+                 .exu_tlu_spill         (exu_tlu_spill),
+                 .exu_tlu_spill_other   (exu_tlu_spill_other),
+                 .exu_tlu_spill_tid     (exu_tlu_spill_tid[1:0]),
+                 .exu_tlu_spill_wtype   (exu_tlu_spill_wtype[2:0]),
+                 .exu_tlu_ttype_m       (exu_tlu_ttype_m[8:0]),
+                 .exu_tlu_ttype_vld_m   (exu_tlu_ttype_vld_m),
+                 .exu_tlu_va_oor_jl_ret_m(exu_tlu_va_oor_jl_ret_m),
+                 .exu_tlu_va_oor_m      (exu_tlu_va_oor_m),
+                 .exu_tlu_wsr_data_m    (exu_tlu_wsr_data_m[63:0]),
+                 .exu_ifu_err_synd_m    (exu_ifu_err_synd_m[7:0]),
+                 // Inputs
+                 .mux_drive_disable     (mux_drive_disable),
+                 .mem_write_disable     (mem_write_disable),
+                 .ffu_exu_rsr_data_m    (ffu_exu_rsr_data_m[63:0]),
+                 .ifu_exu_addr_mask_d   (ifu_exu_addr_mask_d),
+                 .ifu_exu_aluop_d       (ifu_exu_aluop_d[2:0]),
+                 .ifu_exu_casa_d        (ifu_exu_casa_d),
+                 .ifu_exu_dbrinst_d     (ifu_exu_dbrinst_d),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                 .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[7:0]),
+                 .ifu_exu_enshift_d     (ifu_exu_enshift_d),
+                 .ifu_exu_flushw_e      (ifu_exu_flushw_e),
+                 .ifu_exu_ialign_d      (ifu_exu_ialign_d),
+                 .ifu_exu_imm_data_d    (ifu_exu_imm_data_d[31:0]),
+                 .ifu_exu_inj_irferr    (ifu_exu_inj_irferr),
+                 .ifu_exu_inst_vld_e    (ifu_exu_inst_vld_e),
+                 .ifu_exu_inst_vld_w    (ifu_exu_inst_vld_w),
+                 .ifu_exu_invert_d      (ifu_exu_invert_d),
+                 .ifu_exu_kill_e        (ifu_exu_kill_e),
+                 .ifu_exu_muldivop_d    (ifu_exu_muldivop_d[4:0]),
+                 .ifu_exu_muls_d        (ifu_exu_muls_d),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_exu_pc_d          (ifu_exu_pc_d[47:0]),
+                 .ifu_exu_pcver_e       (ifu_exu_pcver_e[63:0]),
+                 .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                 .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                 .ifu_exu_rd_d          (ifu_exu_rd_d[4:0]),
+                 .ifu_exu_rd_exusr_e    (ifu_exu_rd_exusr_e),
+                 .ifu_exu_rd_ffusr_e    (ifu_exu_rd_ffusr_e),
+                 .ifu_exu_rd_ifusr_e    (ifu_exu_rd_ifusr_e),
+                 .ifu_exu_ren1_s        (ifu_exu_ren1_s),
+                 .ifu_exu_ren2_s        (ifu_exu_ren2_s),
+                 .ifu_exu_ren3_s        (ifu_exu_ren3_s),
+                 .ifu_exu_restore_d     (ifu_exu_restore_d),
+                 .ifu_exu_restored_e    (ifu_exu_restored_e),
+                 .ifu_exu_return_d      (ifu_exu_return_d),
+                 .ifu_exu_rs1_s         (ifu_exu_rs1_s[4:0]),
+                 .ifu_exu_rs1_vld_d     (ifu_exu_rs1_vld_d),
+                 .ifu_exu_rs2_s         (ifu_exu_rs2_s[4:0]),
+                 .ifu_exu_rs2_vld_d     (ifu_exu_rs2_vld_d),
+                 .ifu_exu_rs3_s         (ifu_exu_rs3_s[4:0]),
+                 .ifu_exu_rs3e_vld_d    (ifu_exu_rs3e_vld_d),
+                 .ifu_exu_rs3o_vld_d    (ifu_exu_rs3o_vld_d),
+                 .ifu_exu_save_d        (ifu_exu_save_d),
+                 .ifu_exu_saved_e       (ifu_exu_saved_e),
+                 .ifu_exu_setcc_d       (ifu_exu_setcc_d),
+                 .ifu_exu_sethi_inst_d  (ifu_exu_sethi_inst_d),
+                 .ifu_exu_shiftop_d     (ifu_exu_shiftop_d[2:0]),
+                 .ifu_exu_tagop_d       (ifu_exu_tagop_d),
+                 .ifu_exu_tcc_e         (ifu_exu_tcc_e),
+                 .ifu_exu_tid_s2        (ifu_exu_tid_s2[1:0]),
+                 .ifu_exu_ttype_vld_m   (ifu_exu_ttype_vld_m),
+                 .ifu_exu_tv_d          (ifu_exu_tv_d),
+                 .ifu_exu_use_rsr_e_l   (ifu_exu_use_rsr_e_l),
+                 .ifu_exu_usecin_d      (ifu_exu_usecin_d),
+                 .ifu_exu_useimm_d      (ifu_exu_useimm_d),
+                 .ifu_exu_wen_d         (ifu_exu_wen_d),
+                 .ifu_tlu_flush_m       (ifu_tlu_flush_m),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .lsu_exu_dfill_data_g  (lsu_exu_dfill_data_g[63:0]),
+                 .lsu_exu_dfill_vld_g   (lsu_exu_dfill_vld_g),
+                 .lsu_exu_flush_pipe_w  (lsu_exu_flush_pipe_w),
+                 .lsu_exu_ldst_miss_g2  (lsu_exu_ldst_miss_g2),
+                 .lsu_exu_ldxa_data_g   (lsu_exu_ldxa_data_g[63:0]),
+                 .lsu_exu_ldxa_m        (lsu_exu_ldxa_m),
+                 .lsu_exu_rd_m          (lsu_exu_rd_m[4:0]),
+                 .lsu_exu_st_dtlb_perr_g(lsu_exu_st_dtlb_perr_g),
+                 .lsu_exu_thr_m         (lsu_exu_thr_m[1:0]),
+                 .mul_exu_ack           (mul_exu_ack),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .tlu_exu_agp           (tlu_exu_agp[1:0]),
+                 .tlu_exu_agp_swap      (tlu_exu_agp_swap),
+                 .tlu_exu_agp_tid       (tlu_exu_agp_tid[1:0]),
+                 .tlu_exu_ccr_m         (tlu_exu_ccr_m[7:0]),
+                 .tlu_exu_cwp_m         (tlu_exu_cwp_m[2:0]),
+                 .tlu_exu_cwp_retry_m   (tlu_exu_cwp_retry_m),
+                 .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                 .tlu_exu_pic_onebelow_m(tlu_exu_pic_onebelow_m),
+                 .tlu_exu_pic_twobelow_m(tlu_exu_pic_twobelow_m),
+                 .tlu_exu_priv_trap_m   (tlu_exu_priv_trap_m),
+                 .tlu_exu_rsr_data_m    (tlu_exu_rsr_data_m[63:0]));
+
+`else
+   
+sparc_exu exu   (
+                 .short_si0              (short_scan0_2),
+                 .short_so0              (short_scan0_3),
+                 .short_si1 (short_scan1_2),
+                 .short_so1 (short_scan1_3),
+                 .si0 (scan0_2),
+                 .so0 (scan0_3),
+                 // reset stuff
+                 .grst_l                (spc_grst_l),
+                 .arst_l                (cmp_arst_l),
+                 .mul_exu_data_g (mul_data_out[63:0]),
+                 .ifu_tlu_wsr_inst_d (ifu_exu_wsr_inst_d),
+                 //
+                 .exu_tlu_ue_trap_m     (),
+                 
+		             /*AUTOINST*/
+                 // Outputs
+                 .exu_ffu_wsr_inst_e    (exu_ffu_wsr_inst_e),
+                 .exu_ifu_brpc_e        (exu_ifu_brpc_e[47:0]),
+                 .exu_ifu_cc_d          (exu_ifu_cc_d[7:0]),
+                 .exu_ifu_ecc_ce_m      (exu_ifu_ecc_ce_m),
+                 .exu_ifu_ecc_ue_m      (exu_ifu_ecc_ue_m),
+                 .exu_ifu_err_reg_m     (exu_ifu_err_reg_m[7:0]),
+                 .exu_ifu_inj_ack       (exu_ifu_inj_ack),
+                 .exu_ifu_longop_done_g (exu_ifu_longop_done_g[3:0]),
+                 .exu_ifu_oddwin_s      (exu_ifu_oddwin_s[3:0]),
+                 .exu_ifu_regn_e        (exu_ifu_regn_e),
+                 .exu_ifu_regz_e        (exu_ifu_regz_e),
+                 .exu_ifu_spill_e       (exu_ifu_spill_e),
+                 .exu_ifu_va_oor_m      (exu_ifu_va_oor_m),
+                 .exu_lsu_early_va_e    (exu_lsu_early_va_e[10:3]),
+                 .exu_lsu_ldst_va_e     (exu_lsu_ldst_va_e[47:0]),
+                 .exu_lsu_priority_trap_m(exu_lsu_priority_trap_m),
+                 .exu_lsu_rs2_data_e    (exu_lsu_rs2_data_e[63:0]),
+                 .exu_lsu_rs3_data_e    (exu_lsu_rs3_data_e[63:0]),
+                 .exu_mmu_early_va_e    (exu_mmu_early_va_e[7:0]),
+                 .exu_mul_input_vld     (exu_mul_input_vld),
+                 .exu_mul_rs1_data      (exu_mul_rs1_data[63:0]),
+                 .exu_mul_rs2_data      (exu_mul_rs2_data[63:0]),
+                 .exu_spu_rs3_data_e    (exu_spu_rs3_data_e[63:0]),
+                 .exu_tlu_ccr0_w        (exu_tlu_ccr0_w[7:0]),
+                 .exu_tlu_ccr1_w        (exu_tlu_ccr1_w[7:0]),
+                 .exu_tlu_ccr2_w        (exu_tlu_ccr2_w[7:0]),
+                 .exu_tlu_ccr3_w        (exu_tlu_ccr3_w[7:0]),
+                 .exu_tlu_cwp0_w        (exu_tlu_cwp0_w[2:0]),
+                 .exu_tlu_cwp1_w        (exu_tlu_cwp1_w[2:0]),
+                 .exu_tlu_cwp2_w        (exu_tlu_cwp2_w[2:0]),
+                 .exu_tlu_cwp3_w        (exu_tlu_cwp3_w[2:0]),
+                 .exu_tlu_cwp_cmplt     (exu_tlu_cwp_cmplt),
+                 .exu_tlu_cwp_cmplt_tid (exu_tlu_cwp_cmplt_tid[1:0]),
+                 .exu_tlu_cwp_retry     (exu_tlu_cwp_retry),
+                 .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+                 .exu_tlu_spill         (exu_tlu_spill),
+                 .exu_tlu_spill_other   (exu_tlu_spill_other),
+                 .exu_tlu_spill_tid     (exu_tlu_spill_tid[1:0]),
+                 .exu_tlu_spill_wtype   (exu_tlu_spill_wtype[2:0]),
+                 .exu_tlu_ttype_m       (exu_tlu_ttype_m[8:0]),
+                 .exu_tlu_ttype_vld_m   (exu_tlu_ttype_vld_m),
+                 .exu_tlu_va_oor_jl_ret_m(exu_tlu_va_oor_jl_ret_m),
+                 .exu_tlu_va_oor_m      (exu_tlu_va_oor_m),
+                 .exu_tlu_wsr_data_m    (exu_tlu_wsr_data_m[63:0]),
+                 .exu_ifu_err_synd_m    (exu_ifu_err_synd_m[7:0]),
+                 // Inputs
+                 .mux_drive_disable     (mux_drive_disable),
+                 .mem_write_disable     (mem_write_disable),
+                 .ffu_exu_rsr_data_m    (ffu_exu_rsr_data_m[63:0]),
+                 .ifu_exu_addr_mask_d   (ifu_exu_addr_mask_d),
+                 .ifu_exu_aluop_d       (ifu_exu_aluop_d[2:0]),
+                 .ifu_exu_casa_d        (ifu_exu_casa_d),
+                 .ifu_exu_dbrinst_d     (ifu_exu_dbrinst_d),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                 .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[7:0]),
+                 .ifu_exu_enshift_d     (ifu_exu_enshift_d),
+                 .ifu_exu_flushw_e      (ifu_exu_flushw_e),
+                 .ifu_exu_ialign_d      (ifu_exu_ialign_d),
+                 .ifu_exu_imm_data_d    (ifu_exu_imm_data_d[31:0]),
+                 .ifu_exu_inj_irferr    (ifu_exu_inj_irferr),
+                 .ifu_exu_inst_vld_e    (ifu_exu_inst_vld_e),
+                 .ifu_exu_inst_vld_w    (ifu_exu_inst_vld_w),
+                 .ifu_exu_invert_d      (ifu_exu_invert_d),
+                 .ifu_exu_kill_e        (ifu_exu_kill_e),
+                 .ifu_exu_muldivop_d    (ifu_exu_muldivop_d[4:0]),
+                 .ifu_exu_muls_d        (ifu_exu_muls_d),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_exu_pc_d          (ifu_exu_pc_d[47:0]),
+                 .ifu_exu_pcver_e       (ifu_exu_pcver_e[63:0]),
+                 .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                 .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                 .ifu_exu_rd_d          (ifu_exu_rd_d[4:0]),
+                 .ifu_exu_rd_exusr_e    (ifu_exu_rd_exusr_e),
+                 .ifu_exu_rd_ffusr_e    (ifu_exu_rd_ffusr_e),
+                 .ifu_exu_rd_ifusr_e    (ifu_exu_rd_ifusr_e),
+                 .ifu_exu_ren1_s        (ifu_exu_ren1_s),
+                 .ifu_exu_ren2_s        (ifu_exu_ren2_s),
+                 .ifu_exu_ren3_s        (ifu_exu_ren3_s),
+                 .ifu_exu_restore_d     (ifu_exu_restore_d),
+                 .ifu_exu_restored_e    (ifu_exu_restored_e),
+                 .ifu_exu_return_d      (ifu_exu_return_d),
+                 .ifu_exu_rs1_s         (ifu_exu_rs1_s[4:0]),
+                 .ifu_exu_rs1_vld_d     (ifu_exu_rs1_vld_d),
+                 .ifu_exu_rs2_s         (ifu_exu_rs2_s[4:0]),
+                 .ifu_exu_rs2_vld_d     (ifu_exu_rs2_vld_d),
+                 .ifu_exu_rs3_s         (ifu_exu_rs3_s[4:0]),
+                 .ifu_exu_rs3e_vld_d    (ifu_exu_rs3e_vld_d),
+                 .ifu_exu_rs3o_vld_d    (ifu_exu_rs3o_vld_d),
+                 .ifu_exu_save_d        (ifu_exu_save_d),
+                 .ifu_exu_saved_e       (ifu_exu_saved_e),
+                 .ifu_exu_setcc_d       (ifu_exu_setcc_d),
+                 .ifu_exu_sethi_inst_d  (ifu_exu_sethi_inst_d),
+                 .ifu_exu_shiftop_d     (ifu_exu_shiftop_d[2:0]),
+                 .ifu_exu_tagop_d       (ifu_exu_tagop_d),
+                 .ifu_exu_tcc_e         (ifu_exu_tcc_e),
+                 .ifu_exu_tid_s2        (ifu_exu_tid_s2[1:0]),
+                 .ifu_exu_ttype_vld_m   (ifu_exu_ttype_vld_m),
+                 .ifu_exu_tv_d          (ifu_exu_tv_d),
+                 .ifu_exu_use_rsr_e_l   (ifu_exu_use_rsr_e_l),
+                 .ifu_exu_usecin_d      (ifu_exu_usecin_d),
+                 .ifu_exu_useimm_d      (ifu_exu_useimm_d),
+                 .ifu_exu_wen_d         (ifu_exu_wen_d),
+                 .ifu_tlu_flush_m       (ifu_tlu_flush_m),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .lsu_exu_dfill_data_g  (lsu_exu_dfill_data_g[63:0]),
+                 .lsu_exu_dfill_vld_g   (lsu_exu_dfill_vld_g),
+                 .lsu_exu_flush_pipe_w  (lsu_exu_flush_pipe_w),
+                 .lsu_exu_ldst_miss_g2  (lsu_exu_ldst_miss_g2),
+                 .lsu_exu_ldxa_data_g   (lsu_exu_ldxa_data_g[63:0]),
+                 .lsu_exu_ldxa_m        (lsu_exu_ldxa_m),
+                 .lsu_exu_rd_m          (lsu_exu_rd_m[4:0]),
+                 .lsu_exu_st_dtlb_perr_g(lsu_exu_st_dtlb_perr_g),
+                 .lsu_exu_thr_m         (lsu_exu_thr_m[1:0]),
+                 .mul_exu_ack           (mul_exu_ack),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .tlu_exu_agp           (tlu_exu_agp[1:0]),
+                 .tlu_exu_agp_swap      (tlu_exu_agp_swap),
+                 .tlu_exu_agp_tid       (tlu_exu_agp_tid[1:0]),
+                 .tlu_exu_ccr_m         (tlu_exu_ccr_m[7:0]),
+                 .tlu_exu_cwp_m         (tlu_exu_cwp_m[2:0]),
+                 .tlu_exu_cwp_retry_m   (tlu_exu_cwp_retry_m),
+                 .tlu_exu_cwpccr_update_m(tlu_exu_cwpccr_update_m),
+                 .tlu_exu_pic_onebelow_m(tlu_exu_pic_onebelow_m),
+                 .tlu_exu_pic_twobelow_m(tlu_exu_pic_twobelow_m),
+                 .tlu_exu_priv_trap_m   (tlu_exu_priv_trap_m),
+                 .tlu_exu_rsr_data_m    (tlu_exu_rsr_data_m[63:0]));
+
+`endif
+   
+`ifdef FPGA_SYN_NO_SPU
+
+      tlu tlu(
+           .short_si0              (short_scan0_3),
+           .short_si1              (short_scan1_3),
+           .short_so0              (short_scan0_4),
+           .short_so1              (short_scan1_4),
+           .si0 (scan0_4),
+           .si1 (scan1_1),
+           .so0 (scan0_5),
+           .so1 (scan1_2),
+           .grst_l                (spc_grst_l),
+           .arst_l                (cmp_arst_l),
+	       .tlu_sftint_vld		    (tlu_ifu_sftint_vld[3:0]),
+	       .ifu_tlu_swint_m		    (ifu_tlu_sftint_m),
+           .exu_tlu_cwp0                (exu_tlu_cwp0_w[2:0]),
+           .exu_tlu_cwp1                (exu_tlu_cwp1_w[2:0]),
+           .exu_tlu_cwp2                (exu_tlu_cwp2_w[2:0]),
+           .exu_tlu_cwp3                (exu_tlu_cwp3_w[2:0]),
+
+           // fix for bug 5953
+           .exu_tlu_ue_trap_m           (1'b0),
+
+           // temporary fix for bug 5863
+           // TBD: change for TO 2.0
+	   // fixed for eco 6660
+           .spu_tlu_rsrv_illgl_m        (1'b0),
+
+           // new interface to the pib block
+           .ifu_lsu_imm_asi_d           (ifu_tlu_imm_asi_d[8:0]),
+           
+           .ifu_tlu_imiss_e       (ifu_tlu_icmiss_e),
+           // MMU_ASI_RD_CHANGE
+           .ifu_tlu_thrid_d             (lsu_tlu_thrid_d[1:0]),
+           .lsu_tlu_st_rs3_data_g       (lsu_mmu_rs3_data_g[63:0]),
+           .lsu_tlu_async_ttype_g       (lsu_tlu_async_ttype_w2[6:0]),
+           .lsu_tlu_async_tid_g         (lsu_tlu_async_tid_w2[1:0]),
+           .lsu_tlu_async_ttype_vld_g   (lsu_tlu_async_ttype_vld_w2),
+       // end of new interface to the pib
+	   /*AUTOINST*/
+           // Outputs
+           .tlu_lsu_int_ldxa_data_w2    (tlu_lsu_int_ldxa_data_w2[63:0]),
+           .tlu_lsu_int_ld_ill_va_w2    (tlu_lsu_int_ld_ill_va_w2),
+           .tlu_lsu_int_ldxa_vld_w2     (tlu_lsu_int_ldxa_vld_w2),
+           .tlu_dtlb_data_rd_g          (tlu_dtlb_data_rd_g),
+           .tlu_dtlb_dmp_actxt_g        (tlu_dtlb_dmp_actxt_g),
+           .tlu_dtlb_dmp_all_g          (tlu_dtlb_dmp_all_g),
+           .tlu_dtlb_dmp_nctxt_g        (tlu_dtlb_dmp_nctxt_g),
+           .tlu_dtlb_dmp_pctxt_g        (tlu_dtlb_dmp_pctxt_g),
+           .tlu_dtlb_dmp_sctxt_g        (tlu_dtlb_dmp_sctxt_g),
+           .tlu_dtlb_dmp_vld_g          (tlu_dtlb_dmp_vld_g),
+           .tlu_dtlb_invalidate_all_g   (tlu_dtlb_invalidate_all_g),
+           .tlu_dtlb_rw_index_g         (tlu_dtlb_rw_index_g[5:0]),
+           .tlu_dtlb_rw_index_vld_g     (tlu_dtlb_rw_index_vld_g),
+           .tlu_dtlb_tag_rd_g           (tlu_dtlb_tag_rd_g),
+           .tlu_dtlb_tte_data_w2        (tlu_dtlb_tte_data_w2[42:0]),
+           .tlu_dtlb_tte_tag_w2         (tlu_dtlb_tte_tag_w2[58:0]),
+           .lsu_ifu_inj_ack             (lsu_ifu_inj_ack[3:0]),
+           .tlu_exu_agp                 (tlu_exu_agp[`TSA_GLOBAL_WIDTH-1:0]),
+           .tlu_exu_agp_swap            (tlu_exu_agp_swap),
+           .tlu_exu_agp_tid             (tlu_exu_agp_tid[1:0]),
+           .tlu_exu_ccr_m               (tlu_exu_ccr_m[7:0]),
+           .tlu_exu_cwp_m               (tlu_exu_cwp_m[2:0]),
+           .tlu_exu_cwp_retry_m         (tlu_exu_cwp_retry_m),
+           .tlu_exu_cwpccr_update_m     (tlu_exu_cwpccr_update_m),
+           .tlu_exu_rsr_data_m          (tlu_exu_rsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]),
+           .tlu_idtlb_dmp_key_g         (tlu_idtlb_dmp_key_g[40:0]),
+           .tlu_idtlb_dmp_thrid_g       (tlu_idtlb_dmp_thrid_g[1:0]),
+           .tlu_ifu_hwint_i3            (tlu_ifu_hwint_i3[3:0]),
+           .tlu_ifu_nukeint_i2          (tlu_ifu_nukeint_i2),
+           .tlu_ifu_pstate_ie           (tlu_ifu_pstate_ie[3:0]),
+           .tlu_ifu_pstate_pef          (tlu_ifu_pstate_pef[3:0]),
+           .tlu_ifu_resumint_i2         (tlu_ifu_resumint_i2),
+           .tlu_ifu_rstint_i2           (tlu_ifu_rstint_i2),
+           .tlu_ifu_rstthr_i2           (tlu_ifu_rstthr_i2[3:0]),
+           .tlu_ifu_trap_tid_w1         (tlu_ifu_trap_tid_w1[1:0]),
+           .tlu_ifu_trapnpc_vld_w1      (tlu_ifu_trapnpc_vld_w1),
+           .tlu_ifu_trapnpc_w2          (tlu_ifu_trapnpc_w2[48:0]),
+           .tlu_ifu_trappc_w2           (tlu_ifu_trappc_w2[48:0]),
+           .tlu_ifu_trappc_vld_w1       (tlu_ifu_trappc_vld_w1),
+           .tlu_itlb_data_rd_g          (tlu_itlb_data_rd_g),
+           .tlu_itlb_dmp_actxt_g        (tlu_itlb_dmp_actxt_g),
+           .tlu_itlb_dmp_all_g          (tlu_itlb_dmp_all_g),
+           .tlu_itlb_dmp_nctxt_g        (tlu_itlb_dmp_nctxt_g),
+           .tlu_itlb_dmp_vld_g          (tlu_itlb_dmp_vld_g),
+           .tlu_itlb_invalidate_all_g   (tlu_itlb_invalidate_all_g),
+           .tlu_itlb_rw_index_g         (tlu_itlb_rw_index_g[5:0]),
+           .tlu_itlb_rw_index_vld_g     (tlu_itlb_rw_index_vld_g),
+           .tlu_itlb_tag_rd_g           (tlu_itlb_tag_rd_g),
+           .tlu_itlb_tte_data_w2        (tlu_itlb_tte_data_w2[42:0]),
+           .tlu_itlb_tte_tag_w2         (tlu_itlb_tte_tag_w2[58:0]),
+           .tlu_itlb_wr_vld_g           (tlu_itlb_wr_vld_g),
+           .tlu_lsu_asi_m               (tlu_lsu_asi_m[7:0]),
+           .tlu_lsu_asi_update_m        (tlu_lsu_asi_update_m),
+           .tlu_sscan_test_data         (tlu_sscan_test_data[62:0]),
+           .tlu_lsu_ldxa_tid_w2         (tlu_lsu_ldxa_tid_w2[1:0]),
+           .tlu_lsu_pcxpkt              (tlu_lsu_pcxpkt[25:0]),
+           .tlu_lsu_pstate_am           (tlu_lsu_pstate_am[3:0]),
+           .tlu_lsu_pstate_cle          (tlu_lsu_pstate_cle[3:0]),
+           .tlu_lsu_pstate_priv         (tlu_lsu_pstate_priv[3:0]),
+           .tlu_lsu_redmode             (tlu_lsu_redmode[3:0]),
+           .tlu_lsu_redmode_rst_d1      (tlu_lsu_redmode_rst_d1[3:0]),
+           .tlu_lsu_stxa_ack            (tlu_lsu_stxa_ack),
+           .tlu_lsu_stxa_ack_tid        (tlu_lsu_stxa_ack_tid[1:0]),
+           .tlu_lsu_tid_m               (tlu_lsu_tid_m[1:0]),
+           .tlu_lsu_tl_zero             (tlu_lsu_tl_zero[`TLU_THRD_NUM-1:0]),
+           .tlu_hintp_vld               (tlu_hintp_vld[`TLU_THRD_NUM-1:0]),
+           .tlu_rerr_vld                (tlu_rerr_vld[`TLU_THRD_NUM-1:0]),
+           .tlu_early_flush_pipe_w      (tlu_early_flush_pipe_w),
+           .tlu_early_flush_pipe2_w     (tlu_early_flush_pipe2_w),
+           .tlu_exu_early_flush_pipe_w  (tlu_exu_early_flush_pipe_w),
+           .tlu_lsu_ldxa_async_data_vld (tlu_lsu_ldxa_async_data_vld),
+           .tlu_hpstate_priv            (tlu_hpstate_priv[`TLU_THRD_NUM-1:0]),
+           .tlu_hpstate_enb             (tlu_hpstate_enb[`TLU_THRD_NUM-1:0]),
+           .tlu_hpstate_ibe             (tlu_hpstate_ibe[`TLU_THRD_NUM-1:0]),
+           .tlu_exu_priv_trap_m         (tlu_exu_priv_trap_m),
+           .tlu_lsu_priv_trap_m         (tlu_lsu_priv_trap_m),
+           .tlu_exu_pic_onebelow_m      (tlu_exu_pic_onebelow_m),
+           .tlu_exu_pic_twobelow_m      (tlu_exu_pic_twobelow_m),
+           .lsu_exu_ldxa_m              (lsu_exu_ldxa_m),
+           .lsu_exu_ldxa_data_g         (lsu_exu_ldxa_data_g[63:0]),
+           .tlu_dsfsr_flt_vld           (tlu_dsfsr_flt_vld[3:0]),
+           // Inputs
+           .rclk                        (rclk),
+           .const_cpuid                 (const_cpuid[3:0]),
+           .exu_lsu_ldst_va_e           (exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:0]),
+           .lsu_tlu_ldst_va_m           (lsu_tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]),
+           .exu_mmu_early_va_e          (exu_mmu_early_va_e[7:0]),
+           .exu_tlu_ccr0_w              (exu_tlu_ccr0_w[7:0]),
+           .exu_tlu_ccr1_w              (exu_tlu_ccr1_w[7:0]),
+           .exu_tlu_ccr2_w              (exu_tlu_ccr2_w[7:0]),
+           .exu_tlu_ccr3_w              (exu_tlu_ccr3_w[7:0]),
+           .exu_tlu_cwp_cmplt           (exu_tlu_cwp_cmplt),
+           .exu_tlu_cwp_cmplt_tid       (exu_tlu_cwp_cmplt_tid[1:0]),
+           .exu_tlu_cwp_retry           (exu_tlu_cwp_retry),
+           .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+           .exu_tlu_spill               (exu_tlu_spill),
+           .exu_tlu_spill_tid           (exu_tlu_spill_tid[1:0]),
+           .exu_tlu_spill_other         (exu_tlu_spill_other),
+           .exu_tlu_spill_wtype         (exu_tlu_spill_wtype[2:0]),
+           .exu_tlu_ttype_m             (exu_tlu_ttype_m[8:0]),
+           .exu_tlu_ttype_vld_m         (exu_tlu_ttype_vld_m),
+           .exu_tlu_va_oor_jl_ret_m     (exu_tlu_va_oor_jl_ret_m),
+           .exu_tlu_va_oor_m            (exu_tlu_va_oor_m),
+           .ffu_tlu_ill_inst_m          (ffu_tlu_ill_inst_m),
+           .ffu_ifu_tid_w2              (ffu_ifu_tid_w2[1:0]),
+           .ffu_tlu_trap_ieee754        (ffu_tlu_trap_ieee754),
+           .ffu_tlu_trap_other          (ffu_tlu_trap_other),
+           .ffu_tlu_trap_ue             (ffu_tlu_trap_ue),
+           .ifu_lsu_ld_inst_e           (ifu_lsu_ld_inst_e),
+           .ifu_lsu_memref_d            (ifu_lsu_memref_d),
+           .ifu_lsu_st_inst_e           (ifu_lsu_st_inst_e),
+           .ifu_tlu_done_inst_d         (ifu_tlu_done_inst_d),
+           .ifu_tlu_flush_m             (ifu_tlu_flush_m),
+           .ifu_tlu_flush_fd_w          (ifu_tlu_flush_fd_w),
+           .ifu_tlu_flush_fd2_w         (ifu_tlu_flush_fd2_w),
+           .ifu_tlu_flush_fd3_w         (ifu_tlu_flush_fd3_w),
+           .lsu_tlu_early_flush_w       (lsu_tlu_early_flush_w),
+           .lsu_tlu_early_flush2_w      (lsu_tlu_early_flush2_w),
+           .ifu_tlu_hwint_m             (ifu_tlu_hwint_m),
+           .ifu_tlu_immu_miss_m         (ifu_tlu_immu_miss_m),
+           .ifu_tlu_pc_oor_e            (ifu_tlu_pc_oor_e),
+           .ifu_tlu_l2imiss             (ifu_tlu_l2imiss[`TLU_THRD_NUM-1:0]),
+           .ifu_tlu_inst_vld_m          (ifu_tlu_inst_vld_m),
+           .ifu_tlu_inst_vld_m_bf1      (ifu_tlu_inst_vld_m_bf1),
+           .ifu_tlu_itlb_done           (ifu_tlu_itlb_done),
+           .ifu_tlu_npc_m               (ifu_tlu_npc_m[48:0]),
+           .ifu_tlu_pc_m                (ifu_tlu_pc_m[48:0]),
+           .ifu_tlu_priv_violtn_m       (ifu_tlu_priv_violtn_m),
+           .ifu_tlu_retry_inst_d        (ifu_tlu_retry_inst_d),
+           .ifu_tlu_rstint_m            (ifu_tlu_rstint_m),
+           .ifu_tlu_sir_inst_m          (ifu_tlu_sir_inst_m),
+           .ifu_lsu_thrid_s             (ifu_lsu_thrid_s[1:0]),
+           .ifu_tlu_ttype_m             (ifu_tlu_ttype_m[8:0]),
+           .ifu_tlu_ttype_vld_m         (ifu_tlu_ttype_vld_m),
+           .ifu_mmu_trap_m              (ifu_mmu_trap_m),
+           .ifu_tlu_trap_m              (ifu_tlu_trap_m),
+           .lsu_asi_reg0                (lsu_asi_reg0[7:0]),
+           .lsu_asi_reg1                (lsu_asi_reg1[7:0]),
+           .lsu_asi_reg2                (lsu_asi_reg2[7:0]),
+           .lsu_asi_reg3                (lsu_asi_reg3[7:0]),
+           .lsu_asi_state               (lsu_asi_state[`TLU_ASI_STATE_WIDTH-1:0]),
+           .lsu_tlu_defr_trp_taken_g    (lsu_tlu_defr_trp_taken_g),
+           .lsu_mmu_defr_trp_taken_g    (lsu_mmu_defr_trp_taken_g),
+           .lsu_tlu_cpx_req             (lsu_tlu_cpx_req[3:0]),
+           .lsu_tlu_cpx_vld             (lsu_tlu_cpx_vld),
+           .lsu_tlu_daccess_excptn_g    (lsu_tlu_daccess_excptn_g),
+           .lsu_tlu_daccess_prot_g      (lsu_tlu_daccess_prot_g),
+           .lsu_tlu_dmmu_miss_g         (lsu_tlu_dmmu_miss_g),
+           .lsu_tlu_dside_ctxt_m        (lsu_tlu_dside_ctxt_m[12:0]),
+           .lsu_tlu_dtlb_done           (lsu_tlu_dtlb_done),
+           .lsu_tlu_intpkt              (lsu_tlu_intpkt[17:0]),
+           .ctu_sscan_tid               (ctu_sscan_tid[`TLU_THRD_NUM-1:0]),
+           .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+           .lsu_tlu_pctxt_m             (lsu_tlu_pctxt_m[12:0]),
+           .lsu_tlu_pcxpkt_ack          (lsu_tlu_pcxpkt_ack),
+           .lsu_tlu_priv_action_g       (lsu_tlu_priv_action_g),
+           .lsu_tlu_rs3_data_g          (lsu_tlu_rs3_data_g[63:0]),
+           .lsu_tlu_tlb_access_tid_m    (lsu_tlu_tlb_access_tid_m[1:0]),
+           .lsu_tlu_tlb_asi_state_m     (lsu_tlu_tlb_asi_state_m[7:0]),
+           .lsu_tlu_tlb_dmp_va_m        (lsu_tlu_tlb_dmp_va_m[47:13]),
+           .lsu_tlu_tlb_ld_inst_m       (lsu_tlu_tlb_ld_inst_m),
+           .lsu_tlu_tlb_ldst_va_m       (lsu_tlu_tlb_ldst_va_m[10:0]),
+           .lsu_tlu_tlb_st_inst_m       (lsu_tlu_tlb_st_inst_m),
+           .lsu_tlu_ttype_m2            (lsu_tlu_ttype_m2[8:0]),
+           .lsu_tlu_ttype_vld_m2        (lsu_tlu_ttype_vld_m2),
+           .lsu_tlu_wtchpt_trp_g        (lsu_tlu_wtchpt_trp_g),
+           .mem_write_disable           (mem_write_disable),
+           .mux_drive_disable           (mux_drive_disable),
+           .sehold                      (sehold),
+           .se                          (se),
+           .ifu_tlu_sraddr_d            (ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0]),
+           .ifu_tlu_sraddr_d_v2         (ifu_tlu_sraddr_d_v2[`TLU_ASR_ADDR_WIDTH-1:0]),
+           .ifu_tlu_rsr_inst_d          (ifu_tlu_rsr_inst_d),
+           .lsu_tlu_wsr_inst_e          (lsu_tlu_wsr_inst_e),
+           .exu_tlu_wsr_data_m          (exu_tlu_wsr_data_m[63:0]),
+           .lsu_tlu_rsr_data_e          (lsu_tlu_rsr_data_e[7:0]),
+           .ifu_lsu_alt_space_e         (ifu_lsu_alt_space_e),
+           .ifu_tlu_alt_space_d         (ifu_tlu_alt_space_d),
+           .lsu_tlu_squash_va_oor_m     (lsu_tlu_squash_va_oor_m),
+           .lsu_tlu_dcache_miss_w2      (lsu_tlu_dcache_miss_w2[3:0]),
+           .lsu_tlu_l2_dmiss            (lsu_tlu_l2_dmiss[3:0]),
+           .lsu_tlu_stb_full_w2         (lsu_tlu_stb_full_w2[3:0]),
+           .ffu_tlu_fpu_tid             (ffu_tlu_fpu_tid[1:0]),
+           .ffu_tlu_fpu_cmplt           (ffu_tlu_fpu_cmplt),
+           .lsu_pid_state0              (lsu_pid_state0[2:0]),
+           .lsu_pid_state1              (lsu_pid_state1[2:0]),
+           .lsu_pid_state2              (lsu_pid_state2[2:0]),
+           .lsu_pid_state3              (lsu_pid_state3[2:0]),
+           .lsu_tlu_nucleus_ctxt_m      (lsu_tlu_nucleus_ctxt_m),
+           .lsu_tlu_tte_pg_sz_g         (lsu_tlu_tte_pg_sz_g[2:0]),
+           .ifu_lsu_error_inj           (ifu_lsu_error_inj[3:0]),
+           .ifu_lsu_imm_asi_vld_d       (ifu_lsu_imm_asi_vld_d),
+           .lsu_dsfsr_din_g             (lsu_dsfsr_din_g[23:0]),
+           .lsu_dmmu_sfsr_trp_wr        (lsu_dmmu_sfsr_trp_wr[3:0]),
+           .lsu_mmu_flush_pipe_w        (lsu_mmu_flush_pipe_w),
+           .exu_lsu_priority_trap_m     (exu_lsu_priority_trap_m));
+   
+`else
+   
+   tlu tlu(
+           .short_si0              (short_scan0_3),
+           .short_si1              (short_scan1_3),
+           .short_so0              (short_scan0_4),
+           .short_so1              (short_scan1_4),
+           .si0 (scan0_4),
+           .si1 (scan1_1),
+           .so0 (scan0_5),
+           .so1 (scan1_2),
+           .grst_l                (spc_grst_l),
+           .arst_l                (cmp_arst_l),
+	       .tlu_sftint_vld		    (tlu_ifu_sftint_vld[3:0]),
+	       .ifu_tlu_swint_m		    (ifu_tlu_sftint_m),
+           .exu_tlu_cwp0                (exu_tlu_cwp0_w[2:0]),
+           .exu_tlu_cwp1                (exu_tlu_cwp1_w[2:0]),
+           .exu_tlu_cwp2                (exu_tlu_cwp2_w[2:0]),
+           .exu_tlu_cwp3                (exu_tlu_cwp3_w[2:0]),
+
+           // fix for bug 5953
+           .exu_tlu_ue_trap_m           (1'b0),
+
+           // temporary fix for bug 5863
+           // TBD: change for TO 2.0
+	   // fixed for eco 6660
+           .spu_tlu_rsrv_illgl_m        (spu_tlu_rsrv_illgl_m),
+
+           // new interface to the pib block
+           .ifu_lsu_imm_asi_d           (ifu_tlu_imm_asi_d[8:0]),
+           
+           .ifu_tlu_imiss_e       (ifu_tlu_icmiss_e),
+           // MMU_ASI_RD_CHANGE
+           .ifu_tlu_thrid_d             (lsu_tlu_thrid_d[1:0]),
+           .lsu_tlu_st_rs3_data_g       (lsu_mmu_rs3_data_g[63:0]),
+           .lsu_tlu_async_ttype_g       (lsu_tlu_async_ttype_w2[6:0]),
+           .lsu_tlu_async_tid_g         (lsu_tlu_async_tid_w2[1:0]),
+           .lsu_tlu_async_ttype_vld_g   (lsu_tlu_async_ttype_vld_w2),
+       // end of new interface to the pib
+	   /*AUTOINST*/
+           // Outputs
+           .tlu_lsu_int_ldxa_data_w2    (tlu_lsu_int_ldxa_data_w2[63:0]),
+           .tlu_lsu_int_ld_ill_va_w2    (tlu_lsu_int_ld_ill_va_w2),
+           .tlu_lsu_int_ldxa_vld_w2     (tlu_lsu_int_ldxa_vld_w2),
+           .tlu_dtlb_data_rd_g          (tlu_dtlb_data_rd_g),
+           .tlu_dtlb_dmp_actxt_g        (tlu_dtlb_dmp_actxt_g),
+           .tlu_dtlb_dmp_all_g          (tlu_dtlb_dmp_all_g),
+           .tlu_dtlb_dmp_nctxt_g        (tlu_dtlb_dmp_nctxt_g),
+           .tlu_dtlb_dmp_pctxt_g        (tlu_dtlb_dmp_pctxt_g),
+           .tlu_dtlb_dmp_sctxt_g        (tlu_dtlb_dmp_sctxt_g),
+           .tlu_dtlb_dmp_vld_g          (tlu_dtlb_dmp_vld_g),
+           .tlu_dtlb_invalidate_all_g   (tlu_dtlb_invalidate_all_g),
+           .tlu_dtlb_rw_index_g         (tlu_dtlb_rw_index_g[5:0]),
+           .tlu_dtlb_rw_index_vld_g     (tlu_dtlb_rw_index_vld_g),
+           .tlu_dtlb_tag_rd_g           (tlu_dtlb_tag_rd_g),
+           .tlu_dtlb_tte_data_w2        (tlu_dtlb_tte_data_w2[42:0]),
+           .tlu_dtlb_tte_tag_w2         (tlu_dtlb_tte_tag_w2[58:0]),
+           .lsu_ifu_inj_ack             (lsu_ifu_inj_ack[3:0]),
+           .tlu_exu_agp                 (tlu_exu_agp[`TSA_GLOBAL_WIDTH-1:0]),
+           .tlu_exu_agp_swap            (tlu_exu_agp_swap),
+           .tlu_exu_agp_tid             (tlu_exu_agp_tid[1:0]),
+           .tlu_exu_ccr_m               (tlu_exu_ccr_m[7:0]),
+           .tlu_exu_cwp_m               (tlu_exu_cwp_m[2:0]),
+           .tlu_exu_cwp_retry_m         (tlu_exu_cwp_retry_m),
+           .tlu_exu_cwpccr_update_m     (tlu_exu_cwpccr_update_m),
+           .tlu_exu_rsr_data_m          (tlu_exu_rsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]),
+           .tlu_idtlb_dmp_key_g         (tlu_idtlb_dmp_key_g[40:0]),
+           .tlu_idtlb_dmp_thrid_g       (tlu_idtlb_dmp_thrid_g[1:0]),
+           .tlu_ifu_hwint_i3            (tlu_ifu_hwint_i3[3:0]),
+           .tlu_ifu_nukeint_i2          (tlu_ifu_nukeint_i2),
+           .tlu_ifu_pstate_ie           (tlu_ifu_pstate_ie[3:0]),
+           .tlu_ifu_pstate_pef          (tlu_ifu_pstate_pef[3:0]),
+           .tlu_ifu_resumint_i2         (tlu_ifu_resumint_i2),
+           .tlu_ifu_rstint_i2           (tlu_ifu_rstint_i2),
+           .tlu_ifu_rstthr_i2           (tlu_ifu_rstthr_i2[3:0]),
+           .tlu_ifu_trap_tid_w1         (tlu_ifu_trap_tid_w1[1:0]),
+           .tlu_ifu_trapnpc_vld_w1      (tlu_ifu_trapnpc_vld_w1),
+           .tlu_ifu_trapnpc_w2          (tlu_ifu_trapnpc_w2[48:0]),
+           .tlu_ifu_trappc_w2           (tlu_ifu_trappc_w2[48:0]),
+           .tlu_ifu_trappc_vld_w1       (tlu_ifu_trappc_vld_w1),
+           .tlu_itlb_data_rd_g          (tlu_itlb_data_rd_g),
+           .tlu_itlb_dmp_actxt_g        (tlu_itlb_dmp_actxt_g),
+           .tlu_itlb_dmp_all_g          (tlu_itlb_dmp_all_g),
+           .tlu_itlb_dmp_nctxt_g        (tlu_itlb_dmp_nctxt_g),
+           .tlu_itlb_dmp_vld_g          (tlu_itlb_dmp_vld_g),
+           .tlu_itlb_invalidate_all_g   (tlu_itlb_invalidate_all_g),
+           .tlu_itlb_rw_index_g         (tlu_itlb_rw_index_g[5:0]),
+           .tlu_itlb_rw_index_vld_g     (tlu_itlb_rw_index_vld_g),
+           .tlu_itlb_tag_rd_g           (tlu_itlb_tag_rd_g),
+           .tlu_itlb_tte_data_w2        (tlu_itlb_tte_data_w2[42:0]),
+           .tlu_itlb_tte_tag_w2         (tlu_itlb_tte_tag_w2[58:0]),
+           .tlu_itlb_wr_vld_g           (tlu_itlb_wr_vld_g),
+           .tlu_lsu_asi_m               (tlu_lsu_asi_m[7:0]),
+           .tlu_lsu_asi_update_m        (tlu_lsu_asi_update_m),
+           .tlu_sscan_test_data         (tlu_sscan_test_data[62:0]),
+           .tlu_lsu_ldxa_tid_w2         (tlu_lsu_ldxa_tid_w2[1:0]),
+           .tlu_lsu_pcxpkt              (tlu_lsu_pcxpkt[25:0]),
+           .tlu_lsu_pstate_am           (tlu_lsu_pstate_am[3:0]),
+           .tlu_lsu_pstate_cle          (tlu_lsu_pstate_cle[3:0]),
+           .tlu_lsu_pstate_priv         (tlu_lsu_pstate_priv[3:0]),
+           .tlu_lsu_redmode             (tlu_lsu_redmode[3:0]),
+           .tlu_lsu_redmode_rst_d1      (tlu_lsu_redmode_rst_d1[3:0]),
+           .tlu_lsu_stxa_ack            (tlu_lsu_stxa_ack),
+           .tlu_lsu_stxa_ack_tid        (tlu_lsu_stxa_ack_tid[1:0]),
+           .tlu_lsu_tid_m               (tlu_lsu_tid_m[1:0]),
+           .tlu_lsu_tl_zero             (tlu_lsu_tl_zero[`TLU_THRD_NUM-1:0]),
+           .tlu_hintp_vld               (tlu_hintp_vld[`TLU_THRD_NUM-1:0]),
+           .tlu_rerr_vld                (tlu_rerr_vld[`TLU_THRD_NUM-1:0]),
+           .tlu_early_flush_pipe_w      (tlu_early_flush_pipe_w),
+           .tlu_early_flush_pipe2_w     (tlu_early_flush_pipe2_w),
+           .tlu_exu_early_flush_pipe_w  (tlu_exu_early_flush_pipe_w),
+           .tlu_lsu_ldxa_async_data_vld (tlu_lsu_ldxa_async_data_vld),
+           .tlu_hpstate_priv            (tlu_hpstate_priv[`TLU_THRD_NUM-1:0]),
+           .tlu_hpstate_enb             (tlu_hpstate_enb[`TLU_THRD_NUM-1:0]),
+           .tlu_hpstate_ibe             (tlu_hpstate_ibe[`TLU_THRD_NUM-1:0]),
+           .tlu_exu_priv_trap_m         (tlu_exu_priv_trap_m),
+           .tlu_lsu_priv_trap_m         (tlu_lsu_priv_trap_m),
+           .tlu_exu_pic_onebelow_m      (tlu_exu_pic_onebelow_m),
+           .tlu_exu_pic_twobelow_m      (tlu_exu_pic_twobelow_m),
+           .lsu_exu_ldxa_m              (lsu_exu_ldxa_m),
+           .lsu_exu_ldxa_data_g         (lsu_exu_ldxa_data_g[63:0]),
+           .tlu_dsfsr_flt_vld           (tlu_dsfsr_flt_vld[3:0]),
+           // Inputs
+           .rclk                        (rclk),
+           .const_cpuid                 (const_cpuid[3:0]),
+           .exu_lsu_ldst_va_e           (exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:0]),
+           .lsu_tlu_ldst_va_m           (lsu_tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]),
+           .exu_mmu_early_va_e          (exu_mmu_early_va_e[7:0]),
+           .exu_tlu_ccr0_w              (exu_tlu_ccr0_w[7:0]),
+           .exu_tlu_ccr1_w              (exu_tlu_ccr1_w[7:0]),
+           .exu_tlu_ccr2_w              (exu_tlu_ccr2_w[7:0]),
+           .exu_tlu_ccr3_w              (exu_tlu_ccr3_w[7:0]),
+           .exu_tlu_cwp_cmplt           (exu_tlu_cwp_cmplt),
+           .exu_tlu_cwp_cmplt_tid       (exu_tlu_cwp_cmplt_tid[1:0]),
+           .exu_tlu_cwp_retry           (exu_tlu_cwp_retry),
+           .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+           .exu_tlu_spill               (exu_tlu_spill),
+           .exu_tlu_spill_tid           (exu_tlu_spill_tid[1:0]),
+           .exu_tlu_spill_other         (exu_tlu_spill_other),
+           .exu_tlu_spill_wtype         (exu_tlu_spill_wtype[2:0]),
+           .exu_tlu_ttype_m             (exu_tlu_ttype_m[8:0]),
+           .exu_tlu_ttype_vld_m         (exu_tlu_ttype_vld_m),
+           .exu_tlu_va_oor_jl_ret_m     (exu_tlu_va_oor_jl_ret_m),
+           .exu_tlu_va_oor_m            (exu_tlu_va_oor_m),
+           .ffu_tlu_ill_inst_m          (ffu_tlu_ill_inst_m),
+           .ffu_ifu_tid_w2              (ffu_ifu_tid_w2[1:0]),
+           .ffu_tlu_trap_ieee754        (ffu_tlu_trap_ieee754),
+           .ffu_tlu_trap_other          (ffu_tlu_trap_other),
+           .ffu_tlu_trap_ue             (ffu_tlu_trap_ue),
+           .ifu_lsu_ld_inst_e           (ifu_lsu_ld_inst_e),
+           .ifu_lsu_memref_d            (ifu_lsu_memref_d),
+           .ifu_lsu_st_inst_e           (ifu_lsu_st_inst_e),
+           .ifu_tlu_done_inst_d         (ifu_tlu_done_inst_d),
+           .ifu_tlu_flush_m             (ifu_tlu_flush_m),
+           .ifu_tlu_flush_fd_w          (ifu_tlu_flush_fd_w),
+           .ifu_tlu_flush_fd2_w         (ifu_tlu_flush_fd2_w),
+           .ifu_tlu_flush_fd3_w         (ifu_tlu_flush_fd3_w),
+           .lsu_tlu_early_flush_w       (lsu_tlu_early_flush_w),
+           .lsu_tlu_early_flush2_w      (lsu_tlu_early_flush2_w),
+           .ifu_tlu_hwint_m             (ifu_tlu_hwint_m),
+           .ifu_tlu_immu_miss_m         (ifu_tlu_immu_miss_m),
+           .ifu_tlu_pc_oor_e            (ifu_tlu_pc_oor_e),
+           .ifu_tlu_l2imiss             (ifu_tlu_l2imiss[`TLU_THRD_NUM-1:0]),
+           .ifu_tlu_inst_vld_m          (ifu_tlu_inst_vld_m),
+           .ifu_tlu_inst_vld_m_bf1      (ifu_tlu_inst_vld_m_bf1),
+           .ifu_tlu_itlb_done           (ifu_tlu_itlb_done),
+           .ifu_tlu_npc_m               (ifu_tlu_npc_m[48:0]),
+           .ifu_tlu_pc_m                (ifu_tlu_pc_m[48:0]),
+           .ifu_tlu_priv_violtn_m       (ifu_tlu_priv_violtn_m),
+           .ifu_tlu_retry_inst_d        (ifu_tlu_retry_inst_d),
+           .ifu_tlu_rstint_m            (ifu_tlu_rstint_m),
+           .ifu_tlu_sir_inst_m          (ifu_tlu_sir_inst_m),
+           .ifu_lsu_thrid_s             (ifu_lsu_thrid_s[1:0]),
+           .ifu_tlu_ttype_m             (ifu_tlu_ttype_m[8:0]),
+           .ifu_tlu_ttype_vld_m         (ifu_tlu_ttype_vld_m),
+           .ifu_mmu_trap_m              (ifu_mmu_trap_m),
+           .ifu_tlu_trap_m              (ifu_tlu_trap_m),
+           .lsu_asi_reg0                (lsu_asi_reg0[7:0]),
+           .lsu_asi_reg1                (lsu_asi_reg1[7:0]),
+           .lsu_asi_reg2                (lsu_asi_reg2[7:0]),
+           .lsu_asi_reg3                (lsu_asi_reg3[7:0]),
+           .lsu_asi_state               (lsu_asi_state[`TLU_ASI_STATE_WIDTH-1:0]),
+           .lsu_tlu_defr_trp_taken_g    (lsu_tlu_defr_trp_taken_g),
+           .lsu_mmu_defr_trp_taken_g    (lsu_mmu_defr_trp_taken_g),
+           .lsu_tlu_cpx_req             (lsu_tlu_cpx_req[3:0]),
+           .lsu_tlu_cpx_vld             (lsu_tlu_cpx_vld),
+           .lsu_tlu_daccess_excptn_g    (lsu_tlu_daccess_excptn_g),
+           .lsu_tlu_daccess_prot_g      (lsu_tlu_daccess_prot_g),
+           .lsu_tlu_dmmu_miss_g         (lsu_tlu_dmmu_miss_g),
+           .lsu_tlu_dside_ctxt_m        (lsu_tlu_dside_ctxt_m[12:0]),
+           .lsu_tlu_dtlb_done           (lsu_tlu_dtlb_done),
+           .lsu_tlu_intpkt              (lsu_tlu_intpkt[17:0]),
+           .ctu_sscan_tid               (ctu_sscan_tid[`TLU_THRD_NUM-1:0]),
+           .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+           .lsu_tlu_pctxt_m             (lsu_tlu_pctxt_m[12:0]),
+           .lsu_tlu_pcxpkt_ack          (lsu_tlu_pcxpkt_ack),
+           .lsu_tlu_priv_action_g       (lsu_tlu_priv_action_g),
+           .lsu_tlu_rs3_data_g          (lsu_tlu_rs3_data_g[63:0]),
+           .lsu_tlu_tlb_access_tid_m    (lsu_tlu_tlb_access_tid_m[1:0]),
+           .lsu_tlu_tlb_asi_state_m     (lsu_tlu_tlb_asi_state_m[7:0]),
+           .lsu_tlu_tlb_dmp_va_m        (lsu_tlu_tlb_dmp_va_m[47:13]),
+           .lsu_tlu_tlb_ld_inst_m       (lsu_tlu_tlb_ld_inst_m),
+           .lsu_tlu_tlb_ldst_va_m       (lsu_tlu_tlb_ldst_va_m[10:0]),
+           .lsu_tlu_tlb_st_inst_m       (lsu_tlu_tlb_st_inst_m),
+           .lsu_tlu_ttype_m2            (lsu_tlu_ttype_m2[8:0]),
+           .lsu_tlu_ttype_vld_m2        (lsu_tlu_ttype_vld_m2),
+           .lsu_tlu_wtchpt_trp_g        (lsu_tlu_wtchpt_trp_g),
+           .mem_write_disable           (mem_write_disable),
+           .mux_drive_disable           (mux_drive_disable),
+           .sehold                      (sehold),
+           .se                          (se),
+           .ifu_tlu_sraddr_d            (ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0]),
+           .ifu_tlu_sraddr_d_v2         (ifu_tlu_sraddr_d_v2[`TLU_ASR_ADDR_WIDTH-1:0]),
+           .ifu_tlu_rsr_inst_d          (ifu_tlu_rsr_inst_d),
+           .lsu_tlu_wsr_inst_e          (lsu_tlu_wsr_inst_e),
+           .exu_tlu_wsr_data_m          (exu_tlu_wsr_data_m[63:0]),
+           .lsu_tlu_rsr_data_e          (lsu_tlu_rsr_data_e[7:0]),
+           .ifu_lsu_alt_space_e         (ifu_lsu_alt_space_e),
+           .ifu_tlu_alt_space_d         (ifu_tlu_alt_space_d),
+           .lsu_tlu_squash_va_oor_m     (lsu_tlu_squash_va_oor_m),
+           .lsu_tlu_dcache_miss_w2      (lsu_tlu_dcache_miss_w2[3:0]),
+           .lsu_tlu_l2_dmiss            (lsu_tlu_l2_dmiss[3:0]),
+           .lsu_tlu_stb_full_w2         (lsu_tlu_stb_full_w2[3:0]),
+           .ffu_tlu_fpu_tid             (ffu_tlu_fpu_tid[1:0]),
+           .ffu_tlu_fpu_cmplt           (ffu_tlu_fpu_cmplt),
+           .lsu_pid_state0              (lsu_pid_state0[2:0]),
+           .lsu_pid_state1              (lsu_pid_state1[2:0]),
+           .lsu_pid_state2              (lsu_pid_state2[2:0]),
+           .lsu_pid_state3              (lsu_pid_state3[2:0]),
+           .lsu_tlu_nucleus_ctxt_m      (lsu_tlu_nucleus_ctxt_m),
+           .lsu_tlu_tte_pg_sz_g         (lsu_tlu_tte_pg_sz_g[2:0]),
+           .ifu_lsu_error_inj           (ifu_lsu_error_inj[3:0]),
+           .ifu_lsu_imm_asi_vld_d       (ifu_lsu_imm_asi_vld_d),
+           .lsu_dsfsr_din_g             (lsu_dsfsr_din_g[23:0]),
+           .lsu_dmmu_sfsr_trp_wr        (lsu_dmmu_sfsr_trp_wr[3:0]),
+           .lsu_mmu_flush_pipe_w        (lsu_mmu_flush_pipe_w),
+           .exu_lsu_priority_trap_m     (exu_lsu_priority_trap_m));
+
+`endif //  `ifdef FPGA_SYN_NO_SPU
+
+
+   spu spu(
+           .short_si0 			(short_scan0_4),
+           .short_so0 			(short_scan0_5),
+           .short_si1              	(short_scan1_4),
+           .short_so1              	(short_scan1_5),
+           .si1 (scan1_2),
+           .so1 (scan1_3),
+           // reset stuff
+           .grst_l                 	(spc_grst_l),
+           .arst_l                 	(cmp_arst_l),
+           .mem_bypass (mem_bypass),
+           
+           .tlu_spu_flush_w 		(tlu_exu_early_flush_pipe_w),
+           .ifu_spu_flush_w       	(ifu_tlu_flush_w),
+
+           .cpx_spu_data_cx     	({cpx_spc_data_cx3[144:140],cpx_spc_data_cx3[138:137],
+						cpx_spc_data_cx3[127:0]}),
+
+           .exu_spu_rsrv_data_e         (exu_spu_rs3_data_e[8:6]),
+           .exu_lsu_rs3_data_e          (exu_spu_rs3_data_e[63:0]),
+
+	         .mux_drive_disable     	(mux_drive_disable),
+           .lsu_spu_strm_ack_cmplt      (lsu_spu_strm_ack_cmplt[1:0]),
+
+           //
+           .spu_tlu_rsrv_illgl_m        (spu_tlu_rsrv_illgl_m),
+           
+	   /*AUTOINST*/
+           // Outputs
+           .spu_ifu_ttype_w2            (spu_ifu_ttype_w2),
+           .spu_ifu_ttype_vld_w2        (spu_ifu_ttype_vld_w2),
+           .spu_ifu_ttype_tid_w2        (spu_ifu_ttype_tid_w2[1:0]),
+           .spu_lsu_ldst_pckt           (spu_lsu_ldst_pckt[123:0]),
+           .spu_mul_req_vld             (spu_mul_req_vld),
+           .spu_mul_areg_shf            (spu_mul_areg_shf),
+           .spu_mul_areg_rst            (spu_mul_areg_rst),
+           .spu_mul_acc                 (spu_mul_acc),
+           .spu_mul_op1_data            (spu_mul_op1_data[63:0]),
+           .spu_mul_op2_data            (spu_mul_op2_data[63:0]),
+           .spu_lsu_ldxa_data_w2        (spu_lsu_ldxa_data_w2[63:0]),
+           .spu_lsu_ldxa_data_vld_w2    (spu_lsu_ldxa_data_vld_w2),
+           .spu_lsu_ldxa_tid_w2         (spu_lsu_ldxa_tid_w2[1:0]),
+           .spu_lsu_stxa_ack            (spu_lsu_stxa_ack),
+           .spu_lsu_stxa_ack_tid        (spu_lsu_stxa_ack_tid[1:0]),
+           .spu_mul_mulres_lshft        (spu_mul_mulres_lshft),
+           .spu_ifu_corr_err_w2         (spu_ifu_corr_err_w2),
+           .spu_ifu_unc_err_w1          (spu_ifu_unc_err_w1),
+           .spu_lsu_unc_error_w2        (spu_lsu_unc_error_w2),
+           .spu_ifu_err_addr_w2         (spu_ifu_err_addr_w2[39:4]),
+           .spu_ifu_mamem_err_w1        (spu_ifu_mamem_err_w1),
+           .spu_ifu_int_w2              (spu_ifu_int_w2),
+           .spu_lsu_ldxa_illgl_va_w2    (spu_lsu_ldxa_illgl_va_w2),
+           // Inputs
+           .se                          (se),
+           .rclk                        (rclk),
+           .mem_write_disable           (mem_write_disable),
+           .sehold                      (sehold),
+           .const_cpuid                 (const_cpuid[2:0]),
+           .lsu_spu_ldst_ack            (lsu_spu_ldst_ack),
+           .mul_spu_ack                 (mul_spu_ack),
+           .mul_spu_shf_ack             (mul_spu_shf_ack),
+           .mul_data_out                (mul_data_out[63:0]),
+           .lsu_spu_asi_state_e         (lsu_spu_asi_state_e[7:0]),
+           .ifu_spu_inst_vld_w          (ifu_spu_inst_vld_w),
+           .ifu_lsu_ld_inst_e           (ifu_lsu_ld_inst_e),
+           .ifu_lsu_st_inst_e           (ifu_lsu_st_inst_e),
+           .ifu_lsu_alt_space_e         (ifu_lsu_alt_space_e),
+           .ifu_tlu_thrid_e             (ifu_tlu_thrid_e[1:0]),
+           .exu_lsu_ldst_va_e           (exu_lsu_ldst_va_e[7:0]),
+           .ifu_spu_trap_ack            (ifu_spu_trap_ack),
+           .lsu_spu_stb_empty           (lsu_spu_stb_empty[3:0]),
+           .lsu_spu_early_flush_g       (lsu_spu_early_flush_g),
+           .ifu_spu_nceen               (ifu_spu_nceen[3:0]),
+           .lsu_mamem_mrgn              (lsu_mamem_mrgn[3:0]));
+  
+
+`ifdef FPGA_SYN_NO_SPU
+
+      sparc_mul_top mul(
+                     .si                (scan1_2),
+                     .so                (scan1_4),
+                     //
+                     .grst_l             (spc_grst_l),
+                     .arst_l                 	(cmp_arst_l),
+                     /*AUTOINST*/
+                     // Outputs
+                     .mul_exu_ack       (mul_exu_ack),
+                     .mul_spu_ack       (),
+                     .mul_spu_shf_ack   (),
+                     .mul_data_out      (mul_data_out[63:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .exu_mul_input_vld (exu_mul_input_vld),
+                     .exu_mul_rs1_data  (exu_mul_rs1_data[63:0]),
+                     .exu_mul_rs2_data  (exu_mul_rs2_data[63:0]),
+                     .spu_mul_req_vld   (1'b0),
+                     .spu_mul_acc       (1'b0),
+                     .spu_mul_areg_shf  (1'b0),
+                     .spu_mul_areg_rst  (1'b0),
+                     .spu_mul_op1_data  (64'h0000000000000000),
+                     .spu_mul_op2_data  (64'h0000000000000000),
+                     .spu_mul_mulres_lshft(spu_mul_mulres_lshft));
+   
+`else
+   
+   sparc_mul_top mul(
+                     .si                (scan1_3),
+                     .so                (scan1_4),
+                     //
+                     .grst_l             (spc_grst_l),
+                     .arst_l                 	(cmp_arst_l),
+                     /*AUTOINST*/
+                     // Outputs
+                     .mul_exu_ack       (mul_exu_ack),
+                     .mul_spu_ack       (mul_spu_ack),
+                     .mul_spu_shf_ack   (mul_spu_shf_ack),
+                     .mul_data_out      (mul_data_out[63:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .exu_mul_input_vld (exu_mul_input_vld),
+                     .exu_mul_rs1_data  (exu_mul_rs1_data[63:0]),
+                     .exu_mul_rs2_data  (exu_mul_rs2_data[63:0]),
+                     .spu_mul_req_vld   (spu_mul_req_vld),
+                     .spu_mul_acc       (spu_mul_acc),
+                     .spu_mul_areg_shf  (spu_mul_areg_shf),
+                     .spu_mul_areg_rst  (spu_mul_areg_rst),
+                     .spu_mul_op1_data  (spu_mul_op1_data[63:0]),
+                     .spu_mul_op2_data  (spu_mul_op2_data[63:0]),
+                     .spu_mul_mulres_lshft(spu_mul_mulres_lshft));
+   
+`endif //  `ifdef FPGA_SYN_NO_SPU
+
+`ifdef FPGA_SYN_NO_SPU
+
+   sparc_ffu ffu(
+                 .short_si0             (short_scan0_4),
+                 .short_so0             (short_scan0_6),
+                 .si                    (scan0_5),
+                 .so                    (scan0_6),
+                 // reset stuff
+                 .grst_l                (spc_grst_l),
+                 .arst_l                (cmp_arst_l),
+                 
+	   // eco 6529 .
+	   .lsu_ffu_st_dtlb_perr_g		(lsu_ffu_st_dtlb_perr_g),
+
+                 .exu_ffu_ist_e         (ifu_lsu_st_inst_e),
+                 .ifu_ffu_tid_d         (ifu_tlu_thrid_d[1:0]),
+                 .cpx_fpu_data          (cpx_spc_data_cx2_buf[63:0]),
+                 .cpx_vld             (cpx_spc_data_cx2_buf[`CPX_VLD]),
+                 .cpx_fcmp            (cpx_spc_data_cx2_buf[69]),
+                 .cpx_req             (cpx_spc_data_cx2_buf[`CPX_RQ_HI:`CPX_RQ_LO]),
+                 .cpx_fccval          (cpx_spc_data_cx2_buf[68:67]),
+                 .cpx_fpexc           (cpx_spc_data_cx2_buf[76:72]),                   
+                 .exu_ffu_gsr_mask_m  (exu_tlu_wsr_data_m[63:32]),
+                 .exu_ffu_gsr_scale_m (exu_tlu_wsr_data_m[7:3]),
+                 .exu_ffu_gsr_align_m (exu_tlu_wsr_data_m[2:0]),
+                 .exu_ffu_gsr_rnd_m   (exu_tlu_wsr_data_m[27:25]),
+                 .ifu_ffu_ldst_single_d   (ifu_ffu_ldst_size_d),
+		             /*AUTOINST*/
+                 // Outputs
+                 .ffu_lsu_data          (ffu_lsu_data[80:0]),
+                 .ffu_ifu_cc_vld_w2     (ffu_ifu_cc_vld_w2[3:0]),
+                 .ffu_ifu_cc_w2         (ffu_ifu_cc_w2[7:0]),
+                 .ffu_ifu_ecc_ce_w2     (ffu_ifu_ecc_ce_w2),
+                 .ffu_ifu_ecc_ue_w2     (ffu_ifu_ecc_ue_w2),
+                 .ffu_ifu_err_reg_w2    (ffu_ifu_err_reg_w2[5:0]),
+                 .ffu_ifu_err_synd_w2   (ffu_ifu_err_synd_w2[13:0]),
+                 .ffu_ifu_fpop_done_w2  (ffu_ifu_fpop_done_w2),
+                 .ffu_ifu_fst_ce_w      (ffu_ifu_fst_ce_w),
+                 .ffu_ifu_inj_ack       (ffu_ifu_inj_ack),
+                 .ffu_ifu_stallreq      (ffu_ifu_stallreq),
+                 .ffu_ifu_tid_w2        (ffu_ifu_tid_w2[1:0]),
+                 .ffu_lsu_blk_st_e      (ffu_lsu_blk_st_e),
+                 .ffu_lsu_blk_st_va_e   (ffu_lsu_blk_st_va_e[5:3]),
+                 .ffu_lsu_fpop_rq_vld   (ffu_lsu_fpop_rq_vld),
+                 .ffu_lsu_kill_fst_w    (ffu_lsu_kill_fst_w),
+                 .ffu_tlu_fpu_cmplt     (ffu_tlu_fpu_cmplt),
+                 .ffu_tlu_fpu_tid       (ffu_tlu_fpu_tid[1:0]),
+                 .ffu_tlu_ill_inst_m    (ffu_tlu_ill_inst_m),
+                 .ffu_tlu_trap_ieee754  (ffu_tlu_trap_ieee754),
+                 .ffu_tlu_trap_other    (ffu_tlu_trap_other),
+                 .ffu_tlu_trap_ue       (ffu_tlu_trap_ue),
+                 .ffu_exu_rsr_data_m    (ffu_exu_rsr_data_m[63:0]),
+                 // Inputs
+                 .mux_drive_disable     (mux_drive_disable),
+                 .mem_write_disable     (mem_write_disable),
+                 .exu_ffu_wsr_inst_e    (exu_ffu_wsr_inst_e),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[6:0]),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_ffu_fcc_num_d     (ifu_ffu_fcc_num_d[1:0]),
+                 .ifu_ffu_fld_d         (ifu_ffu_fld_d),
+                 .ifu_ffu_fpop1_d       (ifu_ffu_fpop1_d),
+                 .ifu_ffu_fpop2_d       (ifu_ffu_fpop2_d),
+                 .ifu_ffu_fpopcode_d    (ifu_ffu_fpopcode_d[8:0]),
+                 .ifu_ffu_frd_d         (ifu_ffu_frd_d[4:0]),
+                 .ifu_ffu_frs1_d        (ifu_ffu_frs1_d[4:0]),
+                 .ifu_ffu_frs2_d        (ifu_ffu_frs2_d[4:0]),
+                 .ifu_ffu_fst_d         (ifu_ffu_fst_d),
+                 .ifu_ffu_inj_frferr    (ifu_ffu_inj_frferr),
+                 .ifu_ffu_ldfsr_d       (ifu_ffu_ldfsr_d),
+                 .ifu_ffu_ldxfsr_d      (ifu_ffu_ldxfsr_d),
+                 .ifu_ffu_mvcnd_m       (ifu_ffu_mvcnd_m),
+                 .ifu_ffu_quad_op_e     (ifu_ffu_quad_op_e),
+                 .ifu_ffu_stfsr_d       (ifu_ffu_stfsr_d),
+                 .ifu_ffu_visop_d       (ifu_ffu_visop_d),
+                 .ifu_lsu_ld_inst_e     (ifu_lsu_ld_inst_e),
+                 .ifu_tlu_flsh_inst_e   (ifu_tlu_flsh_inst_e),
+                 .ifu_tlu_flush_w       (ifu_tlu_flush_w),
+                 .ifu_tlu_inst_vld_w    (ifu_tlu_inst_vld_w),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .lsu_ffu_ack           (lsu_ffu_ack),
+                 .lsu_ffu_bld_cnt_w     (lsu_ffu_bld_cnt_w[2:0]),
+                 .lsu_ffu_blk_asi_e     (lsu_ffu_blk_asi_e),
+                 .lsu_ffu_flush_pipe_w  (lsu_ffu_flush_pipe_w),
+                 .lsu_ffu_ld_data       (lsu_ffu_ld_data[63:0]),
+                 .lsu_ffu_ld_vld        (lsu_ffu_ld_vld),
+                 .lsu_ffu_stb_full0     (lsu_ffu_stb_full0),
+                 .lsu_ffu_stb_full1     (lsu_ffu_stb_full1),
+                 .lsu_ffu_stb_full2     (lsu_ffu_stb_full2),
+                 .lsu_ffu_stb_full3     (lsu_ffu_stb_full3),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold));
+  
+`else
+  
+   sparc_ffu ffu(
+                 .short_si0             (short_scan0_5),
+                 .short_so0             (short_scan0_6),
+                 .si                    (scan0_5),
+                 .so                    (scan0_6),
+                 // reset stuff
+                 .grst_l                (spc_grst_l),
+                 .arst_l                (cmp_arst_l),
+                 
+	   // eco 6529 .
+	   .lsu_ffu_st_dtlb_perr_g		(lsu_ffu_st_dtlb_perr_g),
+
+                 .exu_ffu_ist_e         (ifu_lsu_st_inst_e),
+                 .ifu_ffu_tid_d         (ifu_tlu_thrid_d[1:0]),
+                 .cpx_fpu_data          (cpx_spc_data_cx2_buf[63:0]),
+                 .cpx_vld             (cpx_spc_data_cx2_buf[`CPX_VLD]),
+                 .cpx_fcmp            (cpx_spc_data_cx2_buf[69]),
+                 .cpx_req             (cpx_spc_data_cx2_buf[`CPX_RQ_HI:`CPX_RQ_LO]),
+                 .cpx_fccval          (cpx_spc_data_cx2_buf[68:67]),
+                 .cpx_fpexc           (cpx_spc_data_cx2_buf[76:72]),                   
+                 .exu_ffu_gsr_mask_m  (exu_tlu_wsr_data_m[63:32]),
+                 .exu_ffu_gsr_scale_m (exu_tlu_wsr_data_m[7:3]),
+                 .exu_ffu_gsr_align_m (exu_tlu_wsr_data_m[2:0]),
+                 .exu_ffu_gsr_rnd_m   (exu_tlu_wsr_data_m[27:25]),
+                 .ifu_ffu_ldst_single_d   (ifu_ffu_ldst_size_d),
+		             /*AUTOINST*/
+                 // Outputs
+                 .ffu_lsu_data          (ffu_lsu_data[80:0]),
+                 .ffu_ifu_cc_vld_w2     (ffu_ifu_cc_vld_w2[3:0]),
+                 .ffu_ifu_cc_w2         (ffu_ifu_cc_w2[7:0]),
+                 .ffu_ifu_ecc_ce_w2     (ffu_ifu_ecc_ce_w2),
+                 .ffu_ifu_ecc_ue_w2     (ffu_ifu_ecc_ue_w2),
+                 .ffu_ifu_err_reg_w2    (ffu_ifu_err_reg_w2[5:0]),
+                 .ffu_ifu_err_synd_w2   (ffu_ifu_err_synd_w2[13:0]),
+                 .ffu_ifu_fpop_done_w2  (ffu_ifu_fpop_done_w2),
+                 .ffu_ifu_fst_ce_w      (ffu_ifu_fst_ce_w),
+                 .ffu_ifu_inj_ack       (ffu_ifu_inj_ack),
+                 .ffu_ifu_stallreq      (ffu_ifu_stallreq),
+                 .ffu_ifu_tid_w2        (ffu_ifu_tid_w2[1:0]),
+                 .ffu_lsu_blk_st_e      (ffu_lsu_blk_st_e),
+                 .ffu_lsu_blk_st_va_e   (ffu_lsu_blk_st_va_e[5:3]),
+                 .ffu_lsu_fpop_rq_vld   (ffu_lsu_fpop_rq_vld),
+                 .ffu_lsu_kill_fst_w    (ffu_lsu_kill_fst_w),
+                 .ffu_tlu_fpu_cmplt     (ffu_tlu_fpu_cmplt),
+                 .ffu_tlu_fpu_tid       (ffu_tlu_fpu_tid[1:0]),
+                 .ffu_tlu_ill_inst_m    (ffu_tlu_ill_inst_m),
+                 .ffu_tlu_trap_ieee754  (ffu_tlu_trap_ieee754),
+                 .ffu_tlu_trap_other    (ffu_tlu_trap_other),
+                 .ffu_tlu_trap_ue       (ffu_tlu_trap_ue),
+                 .ffu_exu_rsr_data_m    (ffu_exu_rsr_data_m[63:0]),
+                 // Inputs
+                 .mux_drive_disable     (mux_drive_disable),
+                 .mem_write_disable     (mem_write_disable),
+                 .exu_ffu_wsr_inst_e    (exu_ffu_wsr_inst_e),
+                 .ifu_exu_disable_ce_e  (ifu_exu_disable_ce_e),
+                 .ifu_exu_ecc_mask      (ifu_exu_ecc_mask[6:0]),
+                 .ifu_exu_nceen_e       (ifu_exu_nceen_e),
+                 .ifu_ffu_fcc_num_d     (ifu_ffu_fcc_num_d[1:0]),
+                 .ifu_ffu_fld_d         (ifu_ffu_fld_d),
+                 .ifu_ffu_fpop1_d       (ifu_ffu_fpop1_d),
+                 .ifu_ffu_fpop2_d       (ifu_ffu_fpop2_d),
+                 .ifu_ffu_fpopcode_d    (ifu_ffu_fpopcode_d[8:0]),
+                 .ifu_ffu_frd_d         (ifu_ffu_frd_d[4:0]),
+                 .ifu_ffu_frs1_d        (ifu_ffu_frs1_d[4:0]),
+                 .ifu_ffu_frs2_d        (ifu_ffu_frs2_d[4:0]),
+                 .ifu_ffu_fst_d         (ifu_ffu_fst_d),
+                 .ifu_ffu_inj_frferr    (ifu_ffu_inj_frferr),
+                 .ifu_ffu_ldfsr_d       (ifu_ffu_ldfsr_d),
+                 .ifu_ffu_ldxfsr_d      (ifu_ffu_ldxfsr_d),
+                 .ifu_ffu_mvcnd_m       (ifu_ffu_mvcnd_m),
+                 .ifu_ffu_quad_op_e     (ifu_ffu_quad_op_e),
+                 .ifu_ffu_stfsr_d       (ifu_ffu_stfsr_d),
+                 .ifu_ffu_visop_d       (ifu_ffu_visop_d),
+                 .ifu_lsu_ld_inst_e     (ifu_lsu_ld_inst_e),
+                 .ifu_tlu_flsh_inst_e   (ifu_tlu_flsh_inst_e),
+                 .ifu_tlu_flush_w       (ifu_tlu_flush_w),
+                 .ifu_tlu_inst_vld_w    (ifu_tlu_inst_vld_w),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d[6:0]),
+                 .lsu_ffu_ack           (lsu_ffu_ack),
+                 .lsu_ffu_bld_cnt_w     (lsu_ffu_bld_cnt_w[2:0]),
+                 .lsu_ffu_blk_asi_e     (lsu_ffu_blk_asi_e),
+                 .lsu_ffu_flush_pipe_w  (lsu_ffu_flush_pipe_w),
+                 .lsu_ffu_ld_data       (lsu_ffu_ld_data[63:0]),
+                 .lsu_ffu_ld_vld        (lsu_ffu_ld_vld),
+                 .lsu_ffu_stb_full0     (lsu_ffu_stb_full0),
+                 .lsu_ffu_stb_full1     (lsu_ffu_stb_full1),
+                 .lsu_ffu_stb_full2     (lsu_ffu_stb_full2),
+                 .lsu_ffu_stb_full3     (lsu_ffu_stb_full3),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .sehold                (sehold));
+
+`endif //  `ifdef FPGA_SYN_NO_SPU
+   
+/*   test_stub_bist AUTO_TEMPLATE(
+                                  // Outputs
+                                  .so_0 (spc_scanout0),
+                                  .so_1 (spc_scanout1),
+                                  .mbist_data_mode(mbist_userdata_mode),
+ 
+                                  // Inputs
+                                  .mbist_err ({1'b0, mbist_dcache_fail, mbist_icache_fail}),
+                                  .cluster_grst_l  (spc_grst_l),
+                                  .arst_l (cmp_arst_l));
+ */
+   
+`ifdef FPGA_SYN_NO_SPU
+
+      test_stub_bist test_stub(
+                            // unused
+                            .so_2             (),
+                            .long_chain_so_2  (1'b0),
+                            .short_chain_so_2 (1'b0),
+
+                            // connect with scan stitch
+                            .si(scan1_4),
+                            .so (scan1_5),
+                            .long_chain_so_0  (scan0_7),
+                            .short_chain_so_0 (short_scan0_6),
+                            .long_chain_so_1  (scan1_5),
+                            .short_chain_so_1 (short_scan1_4),
+                            
+                            // from LSU
+                            .bist_ctl_reg_in(bist_ctl_reg_in[6:0]),
+                            
+                            /*AUTOINST*/
+                            // Outputs
+                            .mux_drive_disable(mux_drive_disable),
+                            .mem_write_disable(mem_write_disable),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .testmode_l (testmode_l),
+                            .mem_bypass (),
+                            .so_0       (spc_scanout0),          // Templated
+                            .so_1       (spc_scanout1),          // Templated
+                            .tst_ctu_mbist_done(tst_ctu_mbist_done),
+                            .tst_ctu_mbist_fail(tst_ctu_mbist_fail),
+                            .bist_ctl_reg_out(bist_ctl_reg_out[10:0]),
+                            .mbist_bisi_mode(mbist_bisi_mode),
+                            .mbist_stop_on_next_fail(mbist_stop_on_next_fail),
+                            .mbist_stop_on_fail(mbist_stop_on_fail),
+                            .mbist_loop_mode(mbist_loop_mode),
+                            .mbist_loop_on_addr(mbist_loop_on_addr),
+                            .mbist_data_mode(mbist_userdata_mode), // Templated
+                            .mbist_start(mbist_start),
+                            // Inputs
+                            .ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+                            .arst_l     (cmp_arst_l),            // Templated
+                            .cluster_grst_l(spc_grst_l),         // Templated
+                            .global_shift_enable(global_shift_enable),
+                            .ctu_tst_scan_disable(ctu_tst_scan_disable),
+                            .ctu_tst_scanmode(ctu_tst_scanmode),
+                            .ctu_tst_macrotest(ctu_tst_macrotest),
+                            .ctu_tst_short_chain(ctu_tst_short_chain),
+                            .ctu_tst_mbist_enable(ctu_tst_mbist_enable),
+                            .rclk       (rclk),
+                            .bist_ctl_reg_wr_en(bist_ctl_reg_wr_en),
+                            .mbist_done (mbist_done),
+                            .mbist_err  ({1'b0, mbist_dcache_fail, mbist_icache_fail})); // Templated
+
+`else
+
+   test_stub_bist test_stub(
+                            // unused
+                            .so_2             (),
+                            .long_chain_so_2  (1'b0),
+                            .short_chain_so_2 (1'b0),
+
+                            // connect with scan stitch
+                            .si(scan1_4),
+                            .so (scan1_5),
+                            .long_chain_so_0  (scan0_7),
+                            .short_chain_so_0 (short_scan0_6),
+                            .long_chain_so_1  (scan1_5),
+                            .short_chain_so_1 (short_scan1_5),
+                            
+                            // from LSU
+                            .bist_ctl_reg_in(bist_ctl_reg_in[6:0]),
+                            
+                            /*AUTOINST*/
+                            // Outputs
+                            .mux_drive_disable(mux_drive_disable),
+                            .mem_write_disable(mem_write_disable),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .testmode_l (testmode_l),
+                            .mem_bypass (mem_bypass),
+                            .so_0       (spc_scanout0),          // Templated
+                            .so_1       (spc_scanout1),          // Templated
+                            .tst_ctu_mbist_done(tst_ctu_mbist_done),
+                            .tst_ctu_mbist_fail(tst_ctu_mbist_fail),
+                            .bist_ctl_reg_out(bist_ctl_reg_out[10:0]),
+                            .mbist_bisi_mode(mbist_bisi_mode),
+                            .mbist_stop_on_next_fail(mbist_stop_on_next_fail),
+                            .mbist_stop_on_fail(mbist_stop_on_fail),
+                            .mbist_loop_mode(mbist_loop_mode),
+                            .mbist_loop_on_addr(mbist_loop_on_addr),
+                            .mbist_data_mode(mbist_userdata_mode), // Templated
+                            .mbist_start(mbist_start),
+                            // Inputs
+                            .ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+                            .arst_l     (cmp_arst_l),            // Templated
+                            .cluster_grst_l(spc_grst_l),         // Templated
+                            .global_shift_enable(global_shift_enable),
+                            .ctu_tst_scan_disable(ctu_tst_scan_disable),
+                            .ctu_tst_scanmode(ctu_tst_scanmode),
+                            .ctu_tst_macrotest(ctu_tst_macrotest),
+                            .ctu_tst_short_chain(ctu_tst_short_chain),
+                            .ctu_tst_mbist_enable(ctu_tst_mbist_enable),
+                            .rclk       (rclk),
+                            .bist_ctl_reg_wr_en(bist_ctl_reg_wr_en),
+                            .mbist_done (mbist_done),
+                            .mbist_err  ({1'b0, mbist_dcache_fail, mbist_icache_fail})); // Templated
+
+`endif //  `ifdef FPGA_SYN_NO_SPU
+   
+
+/*  bw_clk_cl_sparc_cmp AUTO_TEMPLATE(
+                               .si      (scan0_6),
+                               .so      (scan0_7),                               
+                           .arst_l       (cmp_arst_l),
+                           .grst_l       (cmp_grst_l),
+                           // Outputs
+                           .dbginit_l    (spc_dbginit_l),
+                           .cluster_grst_l   (spc_grst_l));
+ */
+   bw_clk_cl_sparc_cmp spc_hdr(/*AUTOINST*/
+                               // Outputs
+                               .cluster_grst_l(spc_grst_l),      // Templated
+                               .dbginit_l(spc_dbginit_l),        // Templated
+                               .rclk    (rclk),
+                               .so      (scan0_7),               // Templated
+                               // Inputs
+                               .adbginit_l(adbginit_l),
+                               .arst_l  (cmp_arst_l),            // Templated
+                               .cluster_cken(cluster_cken),
+                               .gclk    (gclk),
+                               .gdbginit_l(gdbginit_l),
+                               .grst_l  (cmp_grst_l),            // Templated
+                               .se      (se),
+                               .si      (scan0_6));               // Templated
+endmodule // sparc
+
+
+// Local Variables:
+// verilog-library-directories:("../tlu/rtl" "../ifu/rtl" "../exu/rtl" "../lsu/rtl" "../spu/rtl" "../mul/rtl" "../ffu/rtl/" "../../common/rtl" ".")
+// End:
+
+     
Index: /trunk/T1-CPU/ifu/sparc_ifu.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu.v	(revision 6)
@@ -0,0 +1,2324 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Description:	
+//    The instruction fetch unit (IFU) contains the icache, ifq and
+//    fetch dp.
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include "sys.h"
+`include "iop.h"
+`include "ifu.h"
+`include "lsu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu (/*AUTOARG*/
+   // Outputs
+   spc_efc_ifuse_data, sparc_sscan_so, mbist_icache_fail, mbist_done, 
+   mbist_dcache_write, mbist_dcache_word, mbist_dcache_way, 
+   mbist_dcache_read, mbist_dcache_index, mbist_dcache_fail, 
+   ifu_tlu_ttype_m, ifu_tlu_trap_m, ifu_tlu_thrid_e, ifu_tlu_thrid_d, 
+   ifu_tlu_sraddr_d_v2, ifu_tlu_sraddr_d, ifu_tlu_sir_inst_m, 
+   ifu_tlu_sftint_m, ifu_tlu_rstint_m, ifu_tlu_rsr_inst_d, 
+   ifu_tlu_retry_inst_d, ifu_tlu_priv_violtn_m, ifu_tlu_pc_oor_e, 
+   ifu_tlu_pc_m, ifu_tlu_npc_m, ifu_tlu_mb_inst_e, ifu_tlu_l2imiss, 
+   ifu_tlu_itlb_done, ifu_tlu_inst_vld_m, ifu_tlu_immu_miss_m, 
+   ifu_tlu_imm_asi_d, ifu_tlu_icmiss_e, ifu_tlu_hwint_m, 
+   ifu_tlu_flush_m, ifu_tlu_flsh_inst_e, ifu_tlu_done_inst_d, 
+   ifu_tlu_alt_space_d, ifu_spu_trap_ack, ifu_spu_nceen, 
+   ifu_spu_inst_vld_w, ifu_mmu_trap_m, ifu_lsu_wsr_inst_d, 
+   ifu_lsu_swap_e, ifu_lsu_st_inst_e, ifu_lsu_sign_ext_e, 
+   ifu_lsu_rd_e, ifu_lsu_pref_inst_e, ifu_lsu_pcxreq_d, 
+   ifu_lsu_pcxpkt_e, ifu_lsu_nceen, ifu_lsu_memref_d, 
+   ifu_lsu_ldxa_tid_w2, ifu_lsu_ldxa_illgl_va_w2, 
+   ifu_lsu_ldxa_data_w2, ifu_lsu_ldxa_data_vld_w2, ifu_lsu_ldstub_e, 
+   ifu_lsu_ldst_size_e, ifu_lsu_ldst_fp_e, ifu_lsu_ldst_dbl_e, 
+   ifu_lsu_ld_inst_e, ifu_lsu_inv_clear, ifu_lsu_imm_asi_vld_d, 
+   ifu_lsu_imm_asi_d, ifu_lsu_ibuf_busy, ifu_lsu_fwd_wr_ack, 
+   ifu_lsu_fwd_data_vld, ifu_lsu_error_inj, ifu_lsu_destid_s, 
+   ifu_lsu_casa_e, ifu_lsu_asi_rd_unc, ifu_lsu_asi_ack, 
+   ifu_lsu_alt_space_d, ifu_ffu_visop_d, ifu_ffu_stfsr_d, 
+   ifu_ffu_quad_op_e, ifu_ffu_mvcnd_m, ifu_ffu_ldxfsr_d, 
+   ifu_ffu_ldst_size_d, ifu_ffu_ldfsr_d, ifu_ffu_inj_frferr, 
+   ifu_ffu_fst_d, ifu_ffu_frs2_d, ifu_ffu_frs1_d, ifu_ffu_frd_d, 
+   ifu_ffu_fpopcode_d, ifu_ffu_fpop2_d, ifu_ffu_fpop1_d, 
+   ifu_ffu_fld_d, ifu_ffu_fcc_num_d, ifu_exu_wsr_inst_d, 
+   ifu_exu_wen_d, ifu_exu_useimm_d, ifu_exu_usecin_d, 
+   ifu_exu_use_rsr_e_l, ifu_exu_tv_d, ifu_exu_ttype_vld_m, 
+   ifu_exu_tid_s2, ifu_exu_tcc_e, ifu_exu_tagop_d, ifu_exu_shiftop_d, 
+   ifu_exu_sethi_inst_d, ifu_exu_setcc_d, ifu_exu_saved_e, 
+   ifu_exu_save_d, ifu_exu_rs3o_vld_d, ifu_exu_rs3e_vld_d, 
+   ifu_exu_rs3_s, ifu_exu_rs2_vld_d, ifu_exu_rs2_s, 
+   ifu_exu_rs1_vld_d, ifu_exu_rs1_s, ifu_exu_return_d, 
+   ifu_exu_restored_e, ifu_exu_restore_d, ifu_exu_ren3_s, 
+   ifu_exu_ren2_s, ifu_exu_ren1_s, ifu_exu_rd_ifusr_e, 
+   ifu_exu_rd_ffusr_e, ifu_exu_rd_exusr_e, ifu_exu_rd_d, 
+   ifu_exu_range_check_other_d, ifu_exu_range_check_jlret_d, 
+   ifu_exu_pcver_e, ifu_exu_pc_d, ifu_exu_nceen_e, ifu_exu_muls_d, 
+   ifu_exu_kill_e, ifu_exu_invert_d, ifu_exu_inst_vld_w, 
+   ifu_exu_inst_vld_e, ifu_exu_inj_irferr, ifu_exu_imm_data_d, 
+   ifu_exu_ialign_d, ifu_exu_flushw_e, ifu_exu_enshift_d, 
+   ifu_exu_ecc_mask, ifu_exu_dontmv_regz1_e, ifu_exu_dontmv_regz0_e, 
+   ifu_exu_disable_ce_e, ifu_exu_dbrinst_d, ifu_exu_casa_d, 
+   ifu_exu_aluop_d, ifu_exu_addr_mask_d, so0, short_so0, short_so1, 
+   ifu_tlu_inst_vld_w, ifu_tlu_flush_w, ifu_lsu_alt_space_e, 
+   ifu_tlu_ttype_vld_m, ifu_exu_muldivop_d, ifu_lsu_thrid_s, 
+   mbist_write_data, 
+   // Inputs
+   tlu_lsu_redmode, tlu_lsu_pstate_priv, tlu_lsu_pstate_am, 
+   tlu_itlb_wr_vld_g, tlu_itlb_tag_rd_g, tlu_itlb_invalidate_all_g, 
+   tlu_itlb_dmp_vld_g, tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g, 
+   tlu_itlb_data_rd_g, tlu_ifu_trappc_w2, tlu_ifu_trappc_vld_w1, 
+   tlu_ifu_trapnpc_w2, tlu_ifu_trapnpc_vld_w1, tlu_ifu_trap_tid_w1, 
+   tlu_ifu_sftint_vld, tlu_ifu_rstthr_i2, tlu_ifu_rstint_i2, 
+   tlu_ifu_resumint_i2, tlu_ifu_rerr_vld, tlu_ifu_pstate_pef, 
+   tlu_ifu_pstate_ie, tlu_ifu_nukeint_i2, tlu_ifu_hwint_i3, 
+   tlu_ifu_hintp_vld, tlu_ifu_flush_pipe_w, tlu_idtlb_dmp_thrid_g, 
+   tlu_hpstate_priv, tlu_hpstate_ibe, tlu_hpstate_enb, testmode_l, 
+   spu_ifu_unc_err_w1, spu_ifu_ttype_w2, spu_ifu_ttype_vld_w2, 
+   spu_ifu_ttype_tid_w2, spu_ifu_mamem_err_w1, spu_ifu_int_w2, 
+   spu_ifu_err_addr_w2, spu_ifu_corr_err_w2, sehold, se, rclk, 
+   mbist_userdata_mode, mbist_stop_on_next_fail, mbist_stop_on_fail, 
+   mbist_start, mbist_loop_on_address, mbist_loop_mode, 
+   mbist_dcache_data_in, mbist_bisi_mode, lsu_t3_pctxt_state, 
+   lsu_t2_pctxt_state, lsu_t1_pctxt_state, lsu_t0_pctxt_state, 
+   lsu_pid_state3, lsu_pid_state2, lsu_pid_state1, lsu_pid_state0, 
+   lsu_ifu_t3_tlz, lsu_ifu_t2_tlz, lsu_ifu_t1_tlz, lsu_ifu_t0_tlz, 
+   lsu_ifu_stxa_data, lsu_ifu_stbcnt3, lsu_ifu_stbcnt2, 
+   lsu_ifu_stbcnt1, lsu_ifu_stbcnt0, lsu_ifu_stallreq, 
+   lsu_ifu_quad_asi_e, lsu_ifu_pcxpkt_ack_d, 
+   lsu_ifu_ldsta_internal_e, lsu_ifu_ldst_miss_g, lsu_ifu_ldst_cmplt, 
+   lsu_ifu_ld_pcxpkt_vld, lsu_ifu_ld_pcxpkt_tid, 
+   lsu_ifu_ld_icache_index, lsu_ifu_l2_unc_error, 
+   lsu_ifu_l2_corr_error, lsu_ifu_io_error, lsu_ifu_inj_ack, 
+   lsu_ifu_icache_en, lsu_ifu_error_tid, lsu_ifu_err_addr, 
+   lsu_ifu_dtlb_tag_ue, lsu_ifu_dtlb_data_ue, lsu_ifu_dtlb_data_su, 
+   lsu_ifu_direct_map_l1, lsu_ifu_dcache_tag_perror, 
+   lsu_ifu_dcache_data_perror, lsu_ifu_dc_parity_error_w2, 
+   lsu_ifu_cpxpkt_i1, lsu_ifu_asi_vld, lsu_ifu_asi_thrid, 
+   lsu_ifu_asi_state, lsu_ifu_asi_load, lsu_ifu_asi_addr, 
+   lsu_ifu_addr_real_l, grst_l, gdbginit_l, ffu_ifu_tid_w2, 
+   ffu_ifu_stallreq, ffu_ifu_inj_ack, ffu_ifu_fst_ce_w, 
+   ffu_ifu_fpop_done_w2, ffu_ifu_err_synd_w2, ffu_ifu_err_reg_w2, 
+   ffu_ifu_ecc_ue_w2, ffu_ifu_ecc_ce_w2, ffu_ifu_cc_w2, 
+   ffu_ifu_cc_vld_w2, exu_ifu_va_oor_m, exu_ifu_spill_e, 
+   exu_ifu_regz_e, exu_ifu_regn_e, exu_ifu_oddwin_s, 
+   exu_ifu_longop_done_g, exu_ifu_inj_ack, exu_ifu_err_synd_m, 
+   exu_ifu_err_reg_m, exu_ifu_ecc_ue_m, exu_ifu_ecc_ce_m, 
+   exu_ifu_cc_d, exu_ifu_brpc_e, efc_spc_ifuse_dshift, 
+   efc_spc_ifuse_data, efc_spc_ifuse_ashift, efc_spc_fuse_clk2, 
+   efc_spc_fuse_clk1, ctu_tck, ctu_sscan_tid, ctu_sscan_snap, 
+   ctu_sscan_se, const_maskid, const_cpuid, arst_l, 
+   mem_write_disable, mux_drive_disable, exu_tlu_wsr_data_m, 
+   lsu_ictag_mrgn, lsu_idtlb_mrgn, si0, short_si0, short_si1, 
+   tlu_itlb_tte_tag_w2, tlu_itlb_tte_data_w2, 
+   tlu_itlb_rw_index_vld_g, tlu_itlb_rw_index_g, tlu_idtlb_dmp_key_g, 
+   tlu_itlb_dmp_all_g, lsu_sscan_data, tlu_sscan_data
+   );
+
+
+   input          mem_write_disable;
+   input          mux_drive_disable;
+   
+   input [2:0] 	  exu_tlu_wsr_data_m;
+   input [3:0]    lsu_ictag_mrgn;
+   input [7:0]    lsu_idtlb_mrgn;
+
+   // eco 5362
+   output         ifu_exu_addr_mask_d;
+   
+
+   // scan ports
+   input                si0, short_si0,short_si1;      
+   output               so0,short_so0,short_so1;
+   output         ifu_tlu_inst_vld_w;	// From fcl of sparc_ifu_fcl.v
+   output         ifu_tlu_flush_w;	  // From fcl of sparc_ifu_fcl.v
+   output         ifu_lsu_alt_space_e;    // From dec of sparc_ifu_dec.v
+   output         ifu_tlu_ttype_vld_m;// From fcl of sparc_ifu_fcl.v
+   output [4:0]   ifu_exu_muldivop_d;
+   output [1:0]   ifu_lsu_thrid_s;
+
+   // itlb inputs
+   input [58:0]   tlu_itlb_tte_tag_w2;
+   input [42:0]   tlu_itlb_tte_data_w2;
+   input          tlu_itlb_rw_index_vld_g;
+   input [5:0]    tlu_itlb_rw_index_g;
+   input [40:0]   tlu_idtlb_dmp_key_g;
+   input          tlu_itlb_dmp_all_g;
+
+   // sscan rename
+   input [15:0]   lsu_sscan_data;
+   input [62:0]   tlu_sscan_data;
+
+   output [7:0]   mbist_write_data;     // From mbist of sparc_ifu_mbist.v
+   
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input                arst_l;                 // To swl of sparc_ifu_swl.v, ...
+   input [3:0]          const_cpuid;            // To swl of sparc_ifu_swl.v, ...
+   input [7:0]          const_maskid;           // To fdp of sparc_ifu_fdp.v
+   input                ctu_sscan_se;           // To sscan of sparc_ifu_sscan.v
+   input                ctu_sscan_snap;         // To sscan of sparc_ifu_sscan.v
+   input [3:0]          ctu_sscan_tid;          // To swl of sparc_ifu_swl.v, ...
+   input                ctu_tck;                // To sscan of sparc_ifu_sscan.v
+   input                efc_spc_fuse_clk1;      // To icdhdr of cmp_sram_redhdr.v, ...
+   input                efc_spc_fuse_clk2;      // To icdhdr of cmp_sram_redhdr.v
+   input                efc_spc_ifuse_ashift;   // To icdhdr of cmp_sram_redhdr.v
+   input                efc_spc_ifuse_data;     // To icdhdr of cmp_sram_redhdr.v
+   input                efc_spc_ifuse_dshift;   // To icdhdr of cmp_sram_redhdr.v
+   input [47:0]         exu_ifu_brpc_e;         // To fdp of sparc_ifu_fdp.v
+   input [7:0]          exu_ifu_cc_d;           // To dcl of sparc_ifu_dcl.v
+   input                exu_ifu_ecc_ce_m;       // To fcl of sparc_ifu_fcl.v, ...
+   input                exu_ifu_ecc_ue_m;       // To errctl of sparc_ifu_errctl.v
+   input [7:0]          exu_ifu_err_reg_m;      // To errdp of sparc_ifu_errdp.v
+   input [7:0]          exu_ifu_err_synd_m;     // To errdp of sparc_ifu_errdp.v
+   input                exu_ifu_inj_ack;        // To errctl of sparc_ifu_errctl.v
+   input [3:0]          exu_ifu_longop_done_g;  // To swl of sparc_ifu_swl.v
+   input [3:0]          exu_ifu_oddwin_s;       // To fcl of sparc_ifu_fcl.v
+   input                exu_ifu_regn_e;         // To dcl of sparc_ifu_dcl.v
+   input                exu_ifu_regz_e;         // To fcl of sparc_ifu_fcl.v
+   input                exu_ifu_spill_e;        // To swl of sparc_ifu_swl.v
+   input                exu_ifu_va_oor_m;       // To fcl of sparc_ifu_fcl.v
+   input [3:0]          ffu_ifu_cc_vld_w2;      // To dcl of sparc_ifu_dcl.v
+   input [7:0]          ffu_ifu_cc_w2;          // To dcl of sparc_ifu_dcl.v
+   input                ffu_ifu_ecc_ce_w2;      // To errctl of sparc_ifu_errctl.v
+   input                ffu_ifu_ecc_ue_w2;      // To errctl of sparc_ifu_errctl.v
+   input [5:0]          ffu_ifu_err_reg_w2;     // To errdp of sparc_ifu_errdp.v
+   input [13:0]         ffu_ifu_err_synd_w2;    // To errdp of sparc_ifu_errdp.v
+   input                ffu_ifu_fpop_done_w2;   // To swl of sparc_ifu_swl.v
+   input                ffu_ifu_fst_ce_w;       // To swl of sparc_ifu_swl.v, ...
+   input                ffu_ifu_inj_ack;        // To errctl of sparc_ifu_errctl.v
+   input                ffu_ifu_stallreq;       // To fcl of sparc_ifu_fcl.v
+   input [1:0]          ffu_ifu_tid_w2;         // To swl of sparc_ifu_swl.v, ...
+   input                gdbginit_l;             // To swl of sparc_ifu_swl.v, ...
+   input                grst_l;                 // To swl of sparc_ifu_swl.v, ...
+   input [3:0]          lsu_ifu_addr_real_l;    // To fcl of sparc_ifu_fcl.v
+   input [17:0]         lsu_ifu_asi_addr;       // To ifqdp of sparc_ifu_ifqdp.v
+   input                lsu_ifu_asi_load;       // To ifqctl of sparc_ifu_ifqctl.v
+   input [7:0]          lsu_ifu_asi_state;      // To ifqctl of sparc_ifu_ifqctl.v
+   input [1:0]          lsu_ifu_asi_thrid;      // To ifqctl of sparc_ifu_ifqctl.v
+   input                lsu_ifu_asi_vld;        // To ifqctl of sparc_ifu_ifqctl.v
+   input [`CPX_WIDTH-1:0]lsu_ifu_cpxpkt_i1;     // To ifqdp of sparc_ifu_ifqdp.v
+   input                lsu_ifu_dc_parity_error_w2;// To swl of sparc_ifu_swl.v, ...
+   input                lsu_ifu_dcache_data_perror;// To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_dcache_tag_perror;// To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_direct_map_l1;  // To ifqctl of sparc_ifu_ifqctl.v
+   input                lsu_ifu_dtlb_data_su;   // To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_dtlb_data_ue;   // To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_dtlb_tag_ue;    // To errctl of sparc_ifu_errctl.v
+   input [47:4]         lsu_ifu_err_addr;       // To errdp of sparc_ifu_errdp.v
+   input [1:0]          lsu_ifu_error_tid;      // To errctl of sparc_ifu_errctl.v
+   input [3:0]          lsu_ifu_icache_en;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          lsu_ifu_inj_ack;        // To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_io_error;       // To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_l2_corr_error;  // To errctl of sparc_ifu_errctl.v
+   input                lsu_ifu_l2_unc_error;   // To errctl of sparc_ifu_errctl.v
+   input [`IC_IDX_HI:5] lsu_ifu_ld_icache_index;// To invctl of sparc_ifu_invctl.v
+   input [1:0]          lsu_ifu_ld_pcxpkt_tid;  // To invctl of sparc_ifu_invctl.v
+   input                lsu_ifu_ld_pcxpkt_vld;  // To invctl of sparc_ifu_invctl.v
+   input [3:0]          lsu_ifu_ldst_cmplt;     // To swl of sparc_ifu_swl.v
+   input                lsu_ifu_ldst_miss_g;    // To swl of sparc_ifu_swl.v
+   input                lsu_ifu_ldsta_internal_e;// To dec of sparc_ifu_dec.v, ...
+   input                lsu_ifu_pcxpkt_ack_d;   // To ifqctl of sparc_ifu_ifqctl.v
+   input                lsu_ifu_quad_asi_e;     // To swl of sparc_ifu_swl.v
+   input                lsu_ifu_stallreq;       // To fcl of sparc_ifu_fcl.v
+   input [3:0]          lsu_ifu_stbcnt0;        // To swl of sparc_ifu_swl.v
+   input [3:0]          lsu_ifu_stbcnt1;        // To swl of sparc_ifu_swl.v
+   input [3:0]          lsu_ifu_stbcnt2;        // To swl of sparc_ifu_swl.v
+   input [3:0]          lsu_ifu_stbcnt3;        // To swl of sparc_ifu_swl.v
+   input [47:0]         lsu_ifu_stxa_data;      // To ifqdp of sparc_ifu_ifqdp.v
+   input                lsu_ifu_t0_tlz;         // To fcl of sparc_ifu_fcl.v
+   input                lsu_ifu_t1_tlz;         // To fcl of sparc_ifu_fcl.v
+   input                lsu_ifu_t2_tlz;         // To fcl of sparc_ifu_fcl.v
+   input                lsu_ifu_t3_tlz;         // To fcl of sparc_ifu_fcl.v
+   input [2:0]          lsu_pid_state0;         // To fcl of sparc_ifu_fcl.v
+   input [2:0]          lsu_pid_state1;         // To fcl of sparc_ifu_fcl.v
+   input [2:0]          lsu_pid_state2;         // To fcl of sparc_ifu_fcl.v
+   input [2:0]          lsu_pid_state3;         // To fcl of sparc_ifu_fcl.v
+   input [12:0]         lsu_t0_pctxt_state;     // To fdp of sparc_ifu_fdp.v
+   input [12:0]         lsu_t1_pctxt_state;     // To fdp of sparc_ifu_fdp.v
+   input [12:0]         lsu_t2_pctxt_state;     // To fdp of sparc_ifu_fdp.v
+   input [12:0]         lsu_t3_pctxt_state;     // To fdp of sparc_ifu_fdp.v
+   input                mbist_bisi_mode;        // To mbist of sparc_ifu_mbist.v
+   input [71:0]         mbist_dcache_data_in;   // To mbist of sparc_ifu_mbist.v
+   input                mbist_loop_mode;        // To mbist of sparc_ifu_mbist.v
+   input                mbist_loop_on_address;  // To mbist of sparc_ifu_mbist.v
+   input                mbist_start;            // To mbist of sparc_ifu_mbist.v
+   input                mbist_stop_on_fail;     // To mbist of sparc_ifu_mbist.v
+   input                mbist_stop_on_next_fail;// To mbist of sparc_ifu_mbist.v
+   input                mbist_userdata_mode;    // To mbist of sparc_ifu_mbist.v
+   input                rclk;                   // To dec of sparc_ifu_dec.v, ...
+   input                se;                     // To dec of sparc_ifu_dec.v, ...
+   input                sehold;                 // To fcl of sparc_ifu_fcl.v, ...
+   input                spu_ifu_corr_err_w2;    // To errctl of sparc_ifu_errctl.v
+   input [39:4]         spu_ifu_err_addr_w2;    // To errdp of sparc_ifu_errdp.v
+   input                spu_ifu_int_w2;         // To errctl of sparc_ifu_errctl.v
+   input                spu_ifu_mamem_err_w1;   // To errctl of sparc_ifu_errctl.v
+   input [1:0]          spu_ifu_ttype_tid_w2;   // To fcl of sparc_ifu_fcl.v, ...
+   input                spu_ifu_ttype_vld_w2;   // To fcl of sparc_ifu_fcl.v
+   input                spu_ifu_ttype_w2;       // To fcl of sparc_ifu_fcl.v
+   input                spu_ifu_unc_err_w1;     // To errctl of sparc_ifu_errctl.v
+   input                testmode_l;             // To icdhdr of cmp_sram_redhdr.v
+   input [3:0]          tlu_hpstate_enb;        // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_hpstate_ibe;        // To swl of sparc_ifu_swl.v
+   input [3:0]          tlu_hpstate_priv;       // To fcl of sparc_ifu_fcl.v
+   input [1:0]          tlu_idtlb_dmp_thrid_g;  // To fcl of sparc_ifu_fcl.v
+   input                tlu_ifu_flush_pipe_w;   // To swl of sparc_ifu_swl.v, ...
+   input [3:0]          tlu_ifu_hintp_vld;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_ifu_hwint_i3;       // To fcl of sparc_ifu_fcl.v
+   input                tlu_ifu_nukeint_i2;     // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_ifu_pstate_ie;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_ifu_pstate_pef;     // To swl of sparc_ifu_swl.v
+   input [3:0]          tlu_ifu_rerr_vld;       // To fcl of sparc_ifu_fcl.v
+   input                tlu_ifu_resumint_i2;    // To fcl of sparc_ifu_fcl.v
+   input                tlu_ifu_rstint_i2;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_ifu_rstthr_i2;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_ifu_sftint_vld;     // To fcl of sparc_ifu_fcl.v
+   input [1:0]          tlu_ifu_trap_tid_w1;    // To swl of sparc_ifu_swl.v, ...
+   input                tlu_ifu_trapnpc_vld_w1; // To fcl of sparc_ifu_fcl.v
+   input [48:0]         tlu_ifu_trapnpc_w2;     // To fdp of sparc_ifu_fdp.v
+   input                tlu_ifu_trappc_vld_w1;  // To swl of sparc_ifu_swl.v, ...
+   input [48:0]         tlu_ifu_trappc_w2;      // To fdp of sparc_ifu_fdp.v
+   input                tlu_itlb_data_rd_g;     // To fcl of sparc_ifu_fcl.v
+   input                tlu_itlb_dmp_actxt_g;   // To fdp of sparc_ifu_fdp.v
+   input                tlu_itlb_dmp_nctxt_g;   // To fdp of sparc_ifu_fdp.v
+   input                tlu_itlb_dmp_vld_g;     // To fcl of sparc_ifu_fcl.v
+   input                tlu_itlb_invalidate_all_g;// To fcl of sparc_ifu_fcl.v
+   input                tlu_itlb_tag_rd_g;      // To fcl of sparc_ifu_fcl.v
+   input                tlu_itlb_wr_vld_g;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_lsu_pstate_am;      // To fcl of sparc_ifu_fcl.v
+   input [3:0]          tlu_lsu_pstate_priv;    // To fcl of sparc_ifu_fcl.v, ...
+   input [3:0]          tlu_lsu_redmode;        // To fcl of sparc_ifu_fcl.v
+   // End of automatics
+
+   
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output [2:0]         ifu_exu_aluop_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_exu_casa_d;         // From dec of sparc_ifu_dec.v
+   output               ifu_exu_dbrinst_d;      // From dcl of sparc_ifu_dcl.v
+   output               ifu_exu_disable_ce_e;   // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_dontmv_regz0_e; // From dcl of sparc_ifu_dcl.v
+   output               ifu_exu_dontmv_regz1_e; // From dcl of sparc_ifu_dcl.v
+   output [7:0]         ifu_exu_ecc_mask;       // From errctl of sparc_ifu_errctl.v
+   output               ifu_exu_enshift_d;      // From dec of sparc_ifu_dec.v
+   output               ifu_exu_flushw_e;       // From dec of sparc_ifu_dec.v
+   output               ifu_exu_ialign_d;       // From dec of sparc_ifu_dec.v
+   output [31:0]        ifu_exu_imm_data_d;     // From imd of sparc_ifu_imd.v
+   output               ifu_exu_inj_irferr;     // From errctl of sparc_ifu_errctl.v
+   output               ifu_exu_inst_vld_e;     // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_inst_vld_w;     // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_invert_d;       // From dec of sparc_ifu_dec.v
+   output               ifu_exu_kill_e;         // From dcl of sparc_ifu_dcl.v
+   output               ifu_exu_muls_d;         // From dec of sparc_ifu_dec.v
+   output               ifu_exu_nceen_e;        // From errctl of sparc_ifu_errctl.v
+   output [47:0]        ifu_exu_pc_d;           // From fdp of sparc_ifu_fdp.v
+   output [63:0]        ifu_exu_pcver_e;        // From fdp of sparc_ifu_fdp.v
+   output               ifu_exu_range_check_jlret_d;// From dec of sparc_ifu_dec.v
+   output               ifu_exu_range_check_other_d;// From dec of sparc_ifu_dec.v
+   output [4:0]         ifu_exu_rd_d;           // From imd of sparc_ifu_imd.v
+   output               ifu_exu_rd_exusr_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_rd_ffusr_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_rd_ifusr_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_ren1_s;         // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_ren2_s;         // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_ren3_s;         // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_restore_d;      // From dec of sparc_ifu_dec.v
+   output               ifu_exu_restored_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_return_d;       // From dec of sparc_ifu_dec.v
+   output [4:0]         ifu_exu_rs1_s;          // From fdp of sparc_ifu_fdp.v
+   output               ifu_exu_rs1_vld_d;      // From dec of sparc_ifu_dec.v
+   output [4:0]         ifu_exu_rs2_s;          // From fdp of sparc_ifu_fdp.v
+   output               ifu_exu_rs2_vld_d;      // From dec of sparc_ifu_dec.v
+   output [4:0]         ifu_exu_rs3_s;          // From fdp of sparc_ifu_fdp.v
+   output               ifu_exu_rs3e_vld_d;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_rs3o_vld_d;     // From dec of sparc_ifu_dec.v
+   output               ifu_exu_save_d;         // From dec of sparc_ifu_dec.v
+   output               ifu_exu_saved_e;        // From dec of sparc_ifu_dec.v
+   output               ifu_exu_setcc_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_exu_sethi_inst_d;   // From dec of sparc_ifu_dec.v
+   output [2:0]         ifu_exu_shiftop_d;      // From dec of sparc_ifu_dec.v
+   output               ifu_exu_tagop_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_exu_tcc_e;          // From dcl of sparc_ifu_dcl.v
+   output [1:0]         ifu_exu_tid_s2;         // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_ttype_vld_m;    // From fcl of sparc_ifu_fcl.v
+   output               ifu_exu_tv_d;           // From dec of sparc_ifu_dec.v
+   output               ifu_exu_use_rsr_e_l;    // From dec of sparc_ifu_dec.v
+   output               ifu_exu_usecin_d;       // From dec of sparc_ifu_dec.v
+   output               ifu_exu_useimm_d;       // From dec of sparc_ifu_dec.v
+   output               ifu_exu_wen_d;          // From dec of sparc_ifu_dec.v
+   output               ifu_exu_wsr_inst_d;     // From dec of sparc_ifu_dec.v
+   output [1:0]         ifu_ffu_fcc_num_d;      // From imd of sparc_ifu_imd.v
+   output               ifu_ffu_fld_d;          // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_fpop1_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_fpop2_d;        // From dec of sparc_ifu_dec.v
+   output [8:0]         ifu_ffu_fpopcode_d;     // From imd of sparc_ifu_imd.v
+   output [4:0]         ifu_ffu_frd_d;          // From imd of sparc_ifu_imd.v
+   output [4:0]         ifu_ffu_frs1_d;         // From imd of sparc_ifu_imd.v
+   output [4:0]         ifu_ffu_frs2_d;         // From imd of sparc_ifu_imd.v
+   output               ifu_ffu_fst_d;          // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_inj_frferr;     // From errctl of sparc_ifu_errctl.v
+   output               ifu_ffu_ldfsr_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_ldst_size_d;    // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_ldxfsr_d;       // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_mvcnd_m;        // From dcl of sparc_ifu_dcl.v
+   output               ifu_ffu_quad_op_e;      // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_stfsr_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_ffu_visop_d;        // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_alt_space_d;    // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_asi_ack;        // From ifqctl of sparc_ifu_ifqctl.v
+   output               ifu_lsu_asi_rd_unc;     // From errctl of sparc_ifu_errctl.v
+   output               ifu_lsu_casa_e;         // From dec of sparc_ifu_dec.v
+   output [2:0]         ifu_lsu_destid_s;       // From ifqctl of sparc_ifu_ifqctl.v
+   output [3:0]         ifu_lsu_error_inj;      // From errctl of sparc_ifu_errctl.v
+   output               ifu_lsu_fwd_data_vld;   // From errctl of sparc_ifu_errctl.v
+   output               ifu_lsu_fwd_wr_ack;     // From ifqctl of sparc_ifu_ifqctl.v
+   output               ifu_lsu_ibuf_busy;      // From ifqctl of sparc_ifu_ifqctl.v
+   output [7:0]         ifu_lsu_imm_asi_d;      // From imd of sparc_ifu_imd.v
+   output               ifu_lsu_imm_asi_vld_d;  // From imd of sparc_ifu_imd.v
+   output               ifu_lsu_inv_clear;      // From ifqctl of sparc_ifu_ifqctl.v
+   output               ifu_lsu_ld_inst_e;      // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_ldst_dbl_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_ldst_fp_e;      // From dec of sparc_ifu_dec.v
+   output [1:0]         ifu_lsu_ldst_size_e;    // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_ldstub_e;       // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_ldxa_data_vld_w2;// From errctl of sparc_ifu_errctl.v
+   output [63:0]        ifu_lsu_ldxa_data_w2;   // From errdp of sparc_ifu_errdp.v
+   output               ifu_lsu_ldxa_illgl_va_w2;// From ifqctl of sparc_ifu_ifqctl.v
+   output [1:0]         ifu_lsu_ldxa_tid_w2;    // From errctl of sparc_ifu_errctl.v
+   output               ifu_lsu_memref_d;       // From dec of sparc_ifu_dec.v
+   output [3:0]         ifu_lsu_nceen;          // From errctl of sparc_ifu_errctl.v
+   output [51:0]        ifu_lsu_pcxpkt_e;       // From ifqdp of sparc_ifu_ifqdp.v
+   output               ifu_lsu_pcxreq_d;       // From ifqctl of sparc_ifu_ifqctl.v
+   output               ifu_lsu_pref_inst_e;    // From dec of sparc_ifu_dec.v
+   output [4:0]         ifu_lsu_rd_e;           // From imd of sparc_ifu_imd.v
+   output               ifu_lsu_sign_ext_e;     // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_st_inst_e;      // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_swap_e;         // From dec of sparc_ifu_dec.v
+   output               ifu_lsu_wsr_inst_d;     // From dec of sparc_ifu_dec.v
+   output               ifu_mmu_trap_m;         // From fcl of sparc_ifu_fcl.v
+   output               ifu_spu_inst_vld_w;     // From fcl of sparc_ifu_fcl.v
+   output [3:0]         ifu_spu_nceen;          // From errctl of sparc_ifu_errctl.v
+   output               ifu_spu_trap_ack;       // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_alt_space_d;    // From dec of sparc_ifu_dec.v
+   output               ifu_tlu_done_inst_d;    // From dec of sparc_ifu_dec.v
+   output               ifu_tlu_flsh_inst_e;    // From dec of sparc_ifu_dec.v
+   output               ifu_tlu_flush_m;        // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_hwint_m;        // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_icmiss_e;       // From fcl of sparc_ifu_fcl.v
+   output [8:0]         ifu_tlu_imm_asi_d;      // From imd of sparc_ifu_imd.v
+   output               ifu_tlu_immu_miss_m;    // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_inst_vld_m;     // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_itlb_done;      // From fcl of sparc_ifu_fcl.v
+   output [3:0]         ifu_tlu_l2imiss;        // From ifqctl of sparc_ifu_ifqctl.v
+   output               ifu_tlu_mb_inst_e;      // From dec of sparc_ifu_dec.v
+   output [48:0]        ifu_tlu_npc_m;          // From fdp of sparc_ifu_fdp.v
+   output [48:0]        ifu_tlu_pc_m;           // From fdp of sparc_ifu_fdp.v
+   output               ifu_tlu_pc_oor_e;       // From fdp of sparc_ifu_fdp.v
+   output               ifu_tlu_priv_violtn_m;  // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_retry_inst_d;   // From dec of sparc_ifu_dec.v
+   output               ifu_tlu_rsr_inst_d;     // From dec of sparc_ifu_dec.v
+   output               ifu_tlu_rstint_m;       // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_sftint_m;       // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_sir_inst_m;     // From dec of sparc_ifu_dec.v
+   output [6:0]         ifu_tlu_sraddr_d;       // From imd of sparc_ifu_imd.v
+   output [6:0]         ifu_tlu_sraddr_d_v2;    // From imd of sparc_ifu_imd.v
+   output [1:0]         ifu_tlu_thrid_d;        // From fcl of sparc_ifu_fcl.v
+   output [1:0]         ifu_tlu_thrid_e;        // From fcl of sparc_ifu_fcl.v
+   output               ifu_tlu_trap_m;         // From fcl of sparc_ifu_fcl.v
+   output [8:0]         ifu_tlu_ttype_m;        // From fcl of sparc_ifu_fcl.v
+   output               mbist_dcache_fail;      // From mbist of sparc_ifu_mbist.v
+   output [6:0]         mbist_dcache_index;     // From mbist of sparc_ifu_mbist.v
+   output               mbist_dcache_read;      // From mbist of sparc_ifu_mbist.v
+   output [1:0]         mbist_dcache_way;       // From mbist of sparc_ifu_mbist.v
+   output               mbist_dcache_word;      // From mbist of sparc_ifu_mbist.v
+   output               mbist_dcache_write;     // From mbist of sparc_ifu_mbist.v
+   output               mbist_done;             // From mbist of sparc_ifu_mbist.v
+   output               mbist_icache_fail;      // From mbist of sparc_ifu_mbist.v
+   output               sparc_sscan_so;         // From sscan of sparc_ifu_sscan.v
+   output               spc_efc_ifuse_data;     // From icdhdr of cmp_sram_redhdr.v
+   // End of automatics
+
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire                 dcl_fcl_bcregz0_e;      // From dcl of sparc_ifu_dcl.v
+   wire                 dcl_fcl_bcregz1_e;      // From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_broff_sel_bcc_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_broff_sel_bpcc_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_broff_sel_br_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_broff_sel_call_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_immbr_sel_br_d; // From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_immdata_sel_movcc_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_immdata_sel_movr_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_immdata_sel_sethi_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_imd_immdata_sel_simm13_d_l;// From dcl of sparc_ifu_dcl.v
+   wire                 dcl_swl_tcc_done_m;     // From dcl of sparc_ifu_dcl.v
+   wire [2:0]           dec_dcl_cctype_d;       // From dec of sparc_ifu_dec.v
+   wire                 dec_fcl_rdsr_sel_pc_d;  // From dec of sparc_ifu_dec.v
+   wire                 dec_fcl_rdsr_sel_thr_d; // From dec of sparc_ifu_dec.v
+   wire                 dec_imd_call_inst_d;    // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_allfp_d;        // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_br_done_d;      // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_div_inst_d;     // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_fpop_d;         // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_frf_lower_d;    // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_frf_upper_d;    // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_ld_inst_d;      // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_ll_done_d;      // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_mul_inst_d;     // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_rdsr_sel_thr_d; // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_st_inst_d;      // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_sta_inst_e;     // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_std_inst_d;     // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_wrt_tcr_w;      // From dec of sparc_ifu_dec.v
+   wire                 dec_swl_wrtfprs_w;      // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_br_inst_d;      // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_flush_sonly_e;  // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_fpdis_e;        // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_illinst_e;      // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_imask_hit_e;    // From dec of sparc_ifu_dec.v
+   wire [3:0]           dtu_fcl_nextthr_bf;     // From swl of sparc_ifu_swl.v
+   wire                 dtu_fcl_ntr_s;          // From swl of sparc_ifu_swl.v
+   wire                 dtu_fcl_privop_e;       // From dec of sparc_ifu_dec.v
+   wire                 dtu_fcl_retract_d;      // From swl of sparc_ifu_swl.v
+   wire                 dtu_fcl_rollback_g;     // From swl of sparc_ifu_swl.v
+   wire                 dtu_fcl_running_s;      // From swl of sparc_ifu_swl.v
+   wire                 dtu_fcl_sir_inst_e;     // From dec of sparc_ifu_dec.v
+   wire [3:0]           dtu_fcl_thr_active;     // From swl of sparc_ifu_swl.v
+   wire [40:0]          dtu_fdp_thrconf_e;      // From swl of sparc_ifu_swl.v
+   wire                 dtu_ifq_kill_latest_d;  // From dec of sparc_ifu_dec.v
+   wire                 dtu_inst_anull_e;       // From dcl of sparc_ifu_dcl.v
+   wire [31:0]          dtu_inst_d;             // From imd of sparc_ifu_imd.v
+   wire                 dtu_reset;              // From swl of sparc_ifu_swl.v
+   wire                 erb_dtu_ifeterr_d1;     // From errctl of sparc_ifu_errctl.v
+   wire [38:0]          erb_dtu_imask;          // From errdp of sparc_ifu_errdp.v
+   wire [3:0]           erb_fcl_ce_trapvec;     // From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erb_fcl_ifet_uevec_d1;  // From errctl of sparc_ifu_errctl.v
+   wire                 erb_fcl_itlb_ce_d1;     // From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erb_fcl_spu_uetrap;     // From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erb_fcl_ue_trapvec;     // From errctl of sparc_ifu_errctl.v
+   wire                 erb_ifq_ifeterr_d1;     // From errctl of sparc_ifu_errctl.v
+   wire                 erb_ifq_itlberr_s1;     // From errctl of sparc_ifu_errctl.v
+   wire                 erb_reset;              // From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_asi_thr_l;      // From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_asisrc_sel_err_s_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_asisrc_sel_icd_s_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_asisrc_sel_itlb_s_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_asisrc_sel_misc_s_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_asiway_s1_l;    // From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr0_sel_frf_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr0_sel_irf_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr0_sel_itlb_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr0_sel_lsu_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr1_sel_l1pa_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr1_sel_l2pa_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr1_sel_other_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr1_sel_pcd1_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr2_sel_mx0_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr2_sel_mx1_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr2_sel_old_l;// From errctl of sparc_ifu_errctl.v
+   wire [3:0]           erc_erd_eadr2_sel_wrt_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_errasi_sel_addr_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_errasi_sel_en_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_errasi_sel_inj_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_errasi_sel_stat_l;// From errctl of sparc_ifu_errctl.v
+   wire [1:0]           erc_erd_erren_asidata;  // From errctl of sparc_ifu_errctl.v
+   wire [31:0]          erc_erd_errinj_asidata; // From errctl of sparc_ifu_errctl.v
+   wire [22:0]          erc_erd_errstat_asidata;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_ld_imask;       // From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_miscasi_sel_ict_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_miscasi_sel_imask_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_miscasi_sel_other_l;// From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_pgsz_b0;        // From errctl of sparc_ifu_errctl.v
+   wire                 erc_erd_pgsz_b1;        // From errctl of sparc_ifu_errctl.v
+   wire                 erd_erc_fetpe_s1;       // From errdp of sparc_ifu_errdp.v
+   wire                 erd_erc_nirpe_s1;       // From errdp of sparc_ifu_errdp.v
+   wire [3:0]           erd_erc_tagpe_s1;       // From errdp of sparc_ifu_errdp.v
+   wire [1:0]           erd_erc_tlbd_pe_s1;     // From errdp of sparc_ifu_errdp.v
+   wire [1:0]           erd_erc_tlbt_pe_s1;     // From errdp of sparc_ifu_errdp.v
+   wire [2:0]           erd_erc_tte_pgsz;       // From errdp of sparc_ifu_errdp.v
+   wire                 fcl_dcl_regz_e;         // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dec_dslot_s;        // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dec_intr_vld_d;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_ely_inst_vld_d; // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_hprivmode_d;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_hprivmode_w2;   // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_inst_vld_d;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_inst_vld_e;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_intr_vld_e;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_nuke_thr_w;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_privmode_d;     // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_resum_thr_w;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_rst_thr_w;      // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_stall_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_sync_intr_d;    // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_dtu_thr_f;          // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_dtu_tlzero_d;       // From fcl of sparc_ifu_fcl.v
+   wire [1:0]           fcl_erb_asi_tid_f;      // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_erb_clear_iferr;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_ievld_s1;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_immuevld_s1;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_inst_issue_d;   // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_inst_vld_d1;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_itlbrd_data_s;  // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_itlbrd_vld_s;   // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_erb_tevld_s1;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_ctxt_sel_curr_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_ctxt_sel_dmp_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_ctxt_sel_sw_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_dmpthr_l;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_inst_sel_curr_s_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_inst_sel_nir_s_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_inst_sel_nop_s_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_inst_sel_switch_s_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_mask32b_f;      // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_next_ctxt_bf_l; // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_next_thr_bf_l;  // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_nextpcs_sel_pcd_f_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_nextpcs_sel_pce_f_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_nextpcs_sel_pcf_f_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_nextpcs_sel_pcs_f_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_nirthr_s1_l;    // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_noswpc_sel_inc_l_bf;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_noswpc_sel_old_l_bf;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_noswpc_sel_tnpc_l_bf;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_oddwin_s;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_pcbf_sel_br_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_pcbf_sel_nosw_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_pcbf_sel_swpc_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_pcoor_f;        // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_pcoor_vec_f;    // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_rbinst_sel_inste_s;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_rdsr_sel_pc_e_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_rdsr_sel_thr_e_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_rdsr_sel_ver_e_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tctxt_sel_prim; // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thr_s1_l;       // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thr_s2_l;       // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thrtnpc_sel_npcw_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thrtnpc_sel_old_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thrtnpc_sel_pcf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_thrtnpc_sel_tnpc_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tinst_sel_curr_s_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tinst_sel_ifq_s_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tinst_sel_old_s_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tinst_sel_rb_s_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tpcbf_sel_brpc_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tpcbf_sel_old_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tpcbf_sel_pcp4_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_tpcbf_sel_trap_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_trrbpc_sel_err_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_trrbpc_sel_pcs_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_trrbpc_sel_rb_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_fdp_trrbpc_sel_trap_bf_l;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_fdp_usenir_sel_nir_s1;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_icd_index_sel_ifq_bf;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_icd_rdreq_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_icd_wrreq_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_ict_wrreq_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_icv_rdreq_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_icv_wrreq_bf;       // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_ifq_canthr;         // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_ifq_grant_bf;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_ifq_icache_en_s_l;  // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_ifq_icmiss_s1;      // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_ifq_rdreq_s1;       // From fcl of sparc_ifu_fcl.v
+   wire [1:0]           fcl_ifq_thr_s1;         // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_imd_oddwin_d;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_swl_flush_w;        // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_swl_flush_wake_w;   // From fcl of sparc_ifu_fcl.v
+   wire [3:0]           fcl_swl_int_activate_i3;// From fcl of sparc_ifu_fcl.v
+   wire                 fcl_swl_swcvld_s;       // From fcl of sparc_ifu_fcl.v
+   wire                 fcl_swl_swout_f;        // From fcl of sparc_ifu_fcl.v
+   wire [31:0]          fdp_dtu_inst_s;         // From fdp of sparc_ifu_fdp.v
+   wire [47:0]          fdp_erb_pc_f;           // From fdp of sparc_ifu_fdp.v
+   wire                 fdp_fcl_ibit_s;         // From fdp of sparc_ifu_fdp.v
+   wire [5:2]           fdp_fcl_op3_s;          // From fdp of sparc_ifu_fdp.v
+   wire [1:0]           fdp_fcl_op_s;           // From fdp of sparc_ifu_fdp.v
+   wire                 fdp_fcl_pc_oor_e;       // From fdp of sparc_ifu_fdp.v
+   wire [3:0]           fdp_fcl_pc_oor_vec_f;   // From fdp of sparc_ifu_fdp.v
+   wire                 fdp_fcl_swc_s2;         // From fdp of sparc_ifu_fdp.v
+   wire [11:5]          fdp_icv_index_bf;       // From fdp of sparc_ifu_fdp.v
+   wire [1:0]           fuse_icd_repair_en;     // From icdhdr of cmp_sram_redhdr.v
+   wire [7:0]           fuse_icd_repair_value;  // From icdhdr of cmp_sram_redhdr.v
+   wire [5:0]           fuse_icd_rid;           // From icdhdr of cmp_sram_redhdr.v
+   wire                 fuse_icd_wren;          // From icdhdr of cmp_sram_redhdr.v
+   wire [1:0]           icd_fuse_repair_en;     // From icd of bw_r_icd.v
+   wire [7:0]           icd_fuse_repair_value;  // From icd of bw_r_icd.v
+   wire [135:0]         icd_wsel_fetdata_s1;    // From icd of bw_r_icd.v
+   wire [135:0]         icd_wsel_topdata_s1;    // From icd of bw_r_icd.v
+   wire [3:0]           icv_itlb_valid_f;       // From icv of bw_r_rf16x32.v
+   wire                 ifc_ifd_addr_sel_asi_i2_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_addr_sel_bist_i2_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_addr_sel_fill_i2_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_addr_sel_old_i2_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_errinv_e;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_filladdr4_i2;   // From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifc_ifd_finst_sel_l;    // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_idx_sel_fwd_i2; // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ifqbyp_en_l;    // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ifqbyp_sel_asi_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ifqbyp_sel_fwd_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ifqbyp_sel_inq_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ifqbyp_sel_lsu_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_ld_inq_i1;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifc_ifd_ldmil_sel_new;  // From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifc_ifd_milfill_sel_i2_l;// From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifc_ifd_milreq_sel_d_l; // From ifqctl of sparc_ifu_ifqctl.v
+   wire [4:2]           ifc_ifd_pcxline_adj_d;  // From ifqctl of sparc_ifu_ifqctl.v
+   wire [1:0]           ifc_ifd_repway_s;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_reqvalid_e;     // From ifqctl of sparc_ifu_ifqctl.v
+   wire [1:0]           ifc_ifd_thrid_e;        // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_ifd_uncached_e;     // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_inv_asireq_i2;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifc_inv_ifqadv_i2;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifd_ifc_4bpkt_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_asi_vachklo_i2; // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:2]           ifd_ifc_asiaddr_i2;     // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_cpxce_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_cpxms_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_cpxnc_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [`CPX_RQ_SIZE:0]ifd_ifc_cpxreq_i1;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifd_ifc_cpxreq_nxt;     // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_ifc_cpxthr_nxt;     // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_cpxue_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_cpxvld_i2;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [2:0]           ifd_ifc_destid0;        // From ifqdp of sparc_ifu_ifqdp.v
+   wire [2:0]           ifd_ifc_destid1;        // From ifqdp of sparc_ifu_ifqdp.v
+   wire [2:0]           ifd_ifc_destid2;        // From ifqdp of sparc_ifu_ifqdp.v
+   wire [2:0]           ifd_ifc_destid3;        // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_fwd2ic_i2;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_ifc_instoffset0;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_ifc_instoffset1;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_ifc_instoffset2;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_ifc_instoffset3;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifd_ifc_iobpkt_i2;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifd_ifc_miladdr4_i2;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifd_ifc_milhit_s;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [2:0]           ifd_ifc_newdestid_s;    // From ifqdp of sparc_ifu_ifqdp.v
+   wire [4:2]           ifd_ifc_pcxline_d;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [`CPX_WIDTH-1:0]ifd_inv_ifqop_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifd_inv_wrway_i2;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifq_dtu_pred_rdy;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifq_dtu_thrrdy;         // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_asi_erraddr_i2; // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_asi_erren_i2;   // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_asi_errinj_i2;  // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_asi_errstat_i2; // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_asi_imask_i2;   // From ifqctl of sparc_ifu_ifqctl.v
+   wire [47:0]          ifq_erb_asidata_i2;     // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifq_erb_asiway_f;       // From invctl of sparc_ifu_invctl.v
+   wire                 ifq_erb_asiwr_i2;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_ce_rep;         // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_fwdrd_bf;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_ifet_ce;        // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_io_ue;          // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_l2_ue;          // From ifqctl of sparc_ifu_ifqctl.v
+   wire [1:0]           ifq_erb_l2err_tid;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_rdinst_f;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_rdtag_f;        // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_erb_ue_rep;         // From ifqctl of sparc_ifu_ifqctl.v
+   wire [`IC_IDX_HI:4]  ifq_erb_wrindex_f;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifq_fcl_asi_tid_bf;     // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_asird_bf;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire [3:0]           ifq_fcl_fill_thr;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_flush_sonly_e;  // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_icd_wrreq_bf;   // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_ictv_wrreq_bf;  // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_invreq_bf;      // From invctl of sparc_ifu_invctl.v
+   wire                 ifq_fcl_rdreq_bf;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_stallreq;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_fcl_wrreq_bf;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire [32:0]          ifq_fdp_fill_inst;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire                 ifq_icd_data_sel_bist_i2;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_icd_data_sel_fill_i2;// From ifqctl of sparc_ifu_ifqctl.v
+   wire                 ifq_icd_data_sel_old_i2;// From ifqctl of sparc_ifu_ifqctl.v
+   wire [`IC_IDX_HI:2]  ifq_icd_index_bf;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifq_icd_worden_bf;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire [135:0]         ifq_icd_wrdata_i2;      // From ifqdp of sparc_ifu_ifqdp.v
+   wire [1:0]           ifq_icd_wrway_bf;       // From ifqdp of sparc_ifu_ifqdp.v
+   wire [3:0]           ifq_ict_dec_wrway_bf;   // From invctl of sparc_ifu_invctl.v
+   wire                 ifq_icv_wrdata_bf;      // From ifqctl of sparc_ifu_ifqctl.v
+   wire [15:0]          ifq_icv_wren_bf;        // From invctl of sparc_ifu_invctl.v
+   wire [`IC_IDX_HI:5]  ifq_icv_wrindex_bf;     // From invctl of sparc_ifu_invctl.v
+   wire                 ifq_swl_stallreq;       // From ifqctl of sparc_ifu_ifqctl.v
+   wire                 imd_dcl_abit_d;         // From imd of sparc_ifu_imd.v
+   wire [3:0]           imd_dcl_brcond_d;       // From imd of sparc_ifu_imd.v
+   wire [7:0]           imd_dcl_mvcond_d;       // From imd of sparc_ifu_imd.v
+   wire                 inv_ifc_inv_pending;    // From invctl of sparc_ifu_invctl.v
+   wire [7:0]           mbist_icache_index;     // From mbist of sparc_ifu_mbist.v
+   wire                 mbist_icache_read;      // From mbist of sparc_ifu_mbist.v
+   wire [1:0]           mbist_icache_way;       // From mbist of sparc_ifu_mbist.v
+   wire                 mbist_icache_word;      // From mbist of sparc_ifu_mbist.v
+   wire                 mbist_icache_write;     // From mbist of sparc_ifu_mbist.v
+   wire                 mbist_ifq_run_bist;     // From mbist of sparc_ifu_mbist.v
+   wire [3:0]           swl_dcl_thr_d;          // From swl of sparc_ifu_swl.v
+   wire [3:0]           swl_dcl_thr_w2;         // From swl of sparc_ifu_swl.v
+   wire                 swl_dec_divbusy_e;      // From swl of sparc_ifu_swl.v
+   wire                 swl_dec_fp_enable_d;    // From swl of sparc_ifu_swl.v
+   wire                 swl_dec_fpbusy_e;       // From swl of sparc_ifu_swl.v
+   wire                 swl_dec_ibe_e;          // From swl of sparc_ifu_swl.v
+   wire                 swl_dec_mulbusy_e;      // From swl of sparc_ifu_swl.v
+   wire [10:0]          swl_sscan_thrstate;     // From swl of sparc_ifu_swl.v
+   wire [33:0]          wsel_fdp_fetdata_s1;    // From wseldp of sparc_ifu_wseldp.v
+   wire [33:0]          wsel_fdp_topdata_s1;    // From wseldp of sparc_ifu_wseldp.v
+   wire                 wsr_fixed_inst_w;       // From dec of sparc_ifu_dec.v
+   // End of automatics
+
+   // tlb not auto instantiated
+   wire           fcl_itlb_invall_f_l;	// From fcl of sparc_ifu_fcl.v
+
+   wire           itlb_fcl_imiss_s_l;     // To fcl of sparc_ifu_fcl.v
+   wire           itlb_fcl_tlbmiss_f_l;   // To fcl of sparc_ifu_fcl.v
+   wire [3:0]     itlb_wsel_waysel_s1;     // To icd of sparc_ifu_icd.v
+   wire [39:10]   itlb_ifq_paddr_s;       // To ifqdp of sparc_ifu_ifqdp.v, ...
+   wire [42:0]    itlb_rd_tte_data;       // To errdp of sparc_ifu_errdp.v
+   wire [58:0]    itlb_rd_tte_tag;        // To errdp of sparc_ifu_errdp.v
+   
+   wire           fcl_itlb_addr_mask_l;   // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_cam_bypass_bf; // From fcl of sparc_ifu_fcl.v
+   wire [2:0]     fcl_itlb_cam_pid_bf;    // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_cam_real_bf;   // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_cam_vld_bf;    // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_data_rd_vld_bf;// From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_dmp_vld_bf;    // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_dmp_all_bf;    // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_tag_rd_vld_bf; // From fcl of sparc_ifu_fcl.v
+   wire           fcl_itlb_wr_vld_bf;     // From fcl of sparc_ifu_fcl.v
+   wire [47:2]    fdp_icd_vaddr_bf;       // From fdp of sparc_ifu_fdp.v
+   wire [12:0]    fdp_itlb_ctxt_bf;       // From fdp of sparc_ifu_fdp.v
+   wire [32:0]    ict_itlb_tag0_f;        // From ict of bw_r_idct.v
+   wire [32:0]    ict_itlb_tag1_f;        // From ict of bw_r_idct.v
+   wire [32:0]    ict_itlb_tag2_f;        // From ict of bw_r_idct.v
+   wire [32:0]    ict_itlb_tag3_f;        // From ict of bw_r_idct.v
+
+   // sscan rename
+   wire [3:0]       ifq_sscan_data;         // From ifqctl of sparc_ifu_ifqctl.v
+   // bist rename
+   wire [7:0]       mbist_icache_wdata;
+   
+   
+   // rptr bus for bist read of icache
+   wire [67:0]      wsel_mbist_icache_data;
+
+   // bus width mismatch
+   wire [`IC_TAG_SZ:0] ifq_ict_wrtag_f;        // From ifqdp of sparc_ifu_ifqdp.v
+
+   // scan wires
+   wire                scan0_1;
+   wire                scan0_2;
+   wire                scan0_3;
+   wire                scan0_4;
+   wire                scan0_5;
+   wire                scan0_6;
+   wire                scan0_7;
+   wire                scan0_8;
+   wire                scan0_9;
+   wire                scan0_10;
+   wire                scan0_11;
+   wire                scan0_12;
+   wire                scan0_13;
+
+   wire                short_scan1_1;
+   wire                short_scan0_1;
+   wire                short_scan0_2;
+   
+   
+//----------------------------------------------------------------------
+// Code start here 
+//----------------------------------------------------------------------
+
+//   sparc_ifu_dtu dtu(
+//		     .thr_config_in_w (exu_tlu_wsr_data_w[2:0]),
+//		      /*AUTOINST*/);
+
+   // decode
+   sparc_ifu_dec dec(
+                     .so                (scan0_1),
+                     .si                (si0),
+			/*AUTOINST*/
+                     // Outputs
+                     .ifu_exu_aluop_d   (ifu_exu_aluop_d[2:0]),
+                     .ifu_exu_invert_d  (ifu_exu_invert_d),
+                     .ifu_exu_useimm_d  (ifu_exu_useimm_d),
+                     .ifu_exu_usecin_d  (ifu_exu_usecin_d),
+                     .ifu_exu_enshift_d (ifu_exu_enshift_d),
+                     .ifu_exu_tagop_d   (ifu_exu_tagop_d),
+                     .ifu_exu_tv_d      (ifu_exu_tv_d),
+                     .ifu_exu_muls_d    (ifu_exu_muls_d),
+                     .ifu_exu_ialign_d  (ifu_exu_ialign_d),
+                     .ifu_exu_range_check_jlret_d(ifu_exu_range_check_jlret_d),
+                     .ifu_exu_range_check_other_d(ifu_exu_range_check_other_d),
+                     .ifu_exu_shiftop_d (ifu_exu_shiftop_d[2:0]),
+                     .ifu_exu_muldivop_d(ifu_exu_muldivop_d[4:0]),
+                     .ifu_exu_wen_d     (ifu_exu_wen_d),
+                     .ifu_exu_setcc_d   (ifu_exu_setcc_d),
+                     .ifu_exu_rd_ifusr_e(ifu_exu_rd_ifusr_e),
+                     .ifu_exu_rd_exusr_e(ifu_exu_rd_exusr_e),
+                     .ifu_exu_rd_ffusr_e(ifu_exu_rd_ffusr_e),
+                     .ifu_exu_rs1_vld_d (ifu_exu_rs1_vld_d),
+                     .ifu_exu_rs2_vld_d (ifu_exu_rs2_vld_d),
+                     .ifu_exu_rs3e_vld_d(ifu_exu_rs3e_vld_d),
+                     .ifu_exu_rs3o_vld_d(ifu_exu_rs3o_vld_d),
+                     .ifu_exu_use_rsr_e_l(ifu_exu_use_rsr_e_l),
+                     .ifu_exu_save_d    (ifu_exu_save_d),
+                     .ifu_exu_restore_d (ifu_exu_restore_d),
+                     .ifu_exu_return_d  (ifu_exu_return_d),
+                     .ifu_exu_flushw_e  (ifu_exu_flushw_e),
+                     .ifu_exu_saved_e   (ifu_exu_saved_e),
+                     .ifu_exu_restored_e(ifu_exu_restored_e),
+                     .ifu_tlu_rsr_inst_d(ifu_tlu_rsr_inst_d),
+                     .ifu_lsu_wsr_inst_d(ifu_lsu_wsr_inst_d),
+                     .ifu_exu_wsr_inst_d(ifu_exu_wsr_inst_d),
+                     .ifu_tlu_done_inst_d(ifu_tlu_done_inst_d),
+                     .ifu_tlu_retry_inst_d(ifu_tlu_retry_inst_d),
+                     .ifu_lsu_ld_inst_e (ifu_lsu_ld_inst_e),
+                     .ifu_lsu_st_inst_e (ifu_lsu_st_inst_e),
+                     .ifu_lsu_pref_inst_e(ifu_lsu_pref_inst_e),
+                     .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+                     .ifu_lsu_alt_space_d(ifu_lsu_alt_space_d),
+                     .ifu_tlu_alt_space_d(ifu_tlu_alt_space_d),
+                     .ifu_lsu_memref_d  (ifu_lsu_memref_d),
+                     .ifu_lsu_sign_ext_e(ifu_lsu_sign_ext_e),
+                     .ifu_lsu_ldstub_e  (ifu_lsu_ldstub_e),
+                     .ifu_lsu_casa_e    (ifu_lsu_casa_e),
+                     .ifu_exu_casa_d    (ifu_exu_casa_d),
+                     .ifu_lsu_swap_e    (ifu_lsu_swap_e),
+                     .ifu_tlu_mb_inst_e (ifu_tlu_mb_inst_e),
+                     .ifu_tlu_sir_inst_m(ifu_tlu_sir_inst_m),
+                     .ifu_tlu_flsh_inst_e(ifu_tlu_flsh_inst_e),
+                     .ifu_lsu_ldst_dbl_e(ifu_lsu_ldst_dbl_e),
+                     .ifu_lsu_ldst_fp_e (ifu_lsu_ldst_fp_e),
+                     .ifu_lsu_ldst_size_e(ifu_lsu_ldst_size_e[1:0]),
+                     .ifu_ffu_fpop1_d   (ifu_ffu_fpop1_d),
+                     .ifu_ffu_visop_d   (ifu_ffu_visop_d),
+                     .ifu_ffu_fpop2_d   (ifu_ffu_fpop2_d),
+                     .ifu_ffu_fld_d     (ifu_ffu_fld_d),
+                     .ifu_ffu_fst_d     (ifu_ffu_fst_d),
+                     .ifu_ffu_ldst_size_d(ifu_ffu_ldst_size_d),
+                     .ifu_ffu_ldfsr_d   (ifu_ffu_ldfsr_d),
+                     .ifu_ffu_ldxfsr_d  (ifu_ffu_ldxfsr_d),
+                     .ifu_ffu_stfsr_d   (ifu_ffu_stfsr_d),
+                     .ifu_ffu_quad_op_e (ifu_ffu_quad_op_e),
+                     .dec_fcl_rdsr_sel_pc_d(dec_fcl_rdsr_sel_pc_d),
+                     .dec_fcl_rdsr_sel_thr_d(dec_fcl_rdsr_sel_thr_d),
+                     .dec_imd_call_inst_d(dec_imd_call_inst_d),
+                     .dtu_fcl_flush_sonly_e(dtu_fcl_flush_sonly_e),
+                     .dtu_fcl_illinst_e (dtu_fcl_illinst_e),
+                     .dtu_fcl_fpdis_e   (dtu_fcl_fpdis_e),
+                     .dtu_fcl_privop_e  (dtu_fcl_privop_e),
+                     .dtu_fcl_imask_hit_e(dtu_fcl_imask_hit_e),
+                     .dtu_fcl_br_inst_d (dtu_fcl_br_inst_d),
+                     .dtu_fcl_sir_inst_e(dtu_fcl_sir_inst_e),
+                     .dtu_ifq_kill_latest_d(dtu_ifq_kill_latest_d),
+                     .dec_swl_wrt_tcr_w (dec_swl_wrt_tcr_w),
+                     .dec_swl_wrtfprs_w (dec_swl_wrtfprs_w),
+                     .dec_swl_ll_done_d (dec_swl_ll_done_d),
+                     .dec_swl_br_done_d (dec_swl_br_done_d),
+                     .dec_swl_rdsr_sel_thr_d(dec_swl_rdsr_sel_thr_d),
+                     .dec_swl_ld_inst_d (dec_swl_ld_inst_d),
+                     .dec_swl_sta_inst_e(dec_swl_sta_inst_e),
+                     .dec_swl_std_inst_d(dec_swl_std_inst_d),
+                     .dec_swl_st_inst_d (dec_swl_st_inst_d),
+                     .dec_swl_fpop_d    (dec_swl_fpop_d),
+                     .dec_swl_allfp_d   (dec_swl_allfp_d),
+                     .dec_swl_frf_upper_d(dec_swl_frf_upper_d),
+                     .dec_swl_frf_lower_d(dec_swl_frf_lower_d),
+                     .dec_swl_div_inst_d(dec_swl_div_inst_d),
+                     .dec_swl_mul_inst_d(dec_swl_mul_inst_d),
+                     .wsr_fixed_inst_w  (wsr_fixed_inst_w),
+                     .ifu_exu_sethi_inst_d(ifu_exu_sethi_inst_d),
+                     .dec_dcl_cctype_d  (dec_dcl_cctype_d[2:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .dtu_inst_d        (dtu_inst_d[31:0]),
+                     .erb_dtu_imask     (erb_dtu_imask[38:0]),
+                     .swl_dec_ibe_e     (swl_dec_ibe_e),
+                     .dtu_inst_anull_e  (dtu_inst_anull_e),
+                     .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+                     .fcl_dtu_tlzero_d  (fcl_dtu_tlzero_d),
+                     .fcl_dtu_privmode_d(fcl_dtu_privmode_d),
+                     .fcl_dtu_hprivmode_d(fcl_dtu_hprivmode_d),
+                     .fcl_dtu_inst_vld_d(fcl_dtu_inst_vld_d),
+                     .fcl_dtu_ely_inst_vld_d(fcl_dtu_ely_inst_vld_d),
+                     .fcl_dec_intr_vld_d(fcl_dec_intr_vld_d),
+                     .fcl_dtu_inst_vld_e(fcl_dtu_inst_vld_e),
+                     .fcl_dec_dslot_s   (fcl_dec_dslot_s),
+                     .swl_dec_mulbusy_e (swl_dec_mulbusy_e),
+                     .swl_dec_fpbusy_e  (swl_dec_fpbusy_e),
+                     .swl_dec_divbusy_e (swl_dec_divbusy_e),
+                     .swl_dec_fp_enable_d(swl_dec_fp_enable_d));
+
+
+   // Pipeline Control and Switch Logic
+   sparc_ifu_swl swl(
+                     .so                (scan0_2),
+                     .si                (scan0_1),
+		                 .thr_config_in_m	(exu_tlu_wsr_data_m[2:0]),
+                     .extra_longlat_compl(4'b0),
+                     
+		                 /*AUTOINST*/
+                     // Outputs
+                     .swl_sscan_thrstate(swl_sscan_thrstate[10:0]),
+                     .dtu_reset         (dtu_reset),
+                     .swl_dec_mulbusy_e (swl_dec_mulbusy_e),
+                     .swl_dec_divbusy_e (swl_dec_divbusy_e),
+                     .swl_dec_fpbusy_e  (swl_dec_fpbusy_e),
+                     .swl_dec_fp_enable_d(swl_dec_fp_enable_d),
+                     .swl_dec_ibe_e     (swl_dec_ibe_e),
+                     .dtu_fcl_ntr_s     (dtu_fcl_ntr_s),
+                     .dtu_fcl_running_s (dtu_fcl_running_s),
+                     .dtu_fcl_rollback_g(dtu_fcl_rollback_g),
+                     .dtu_fcl_retract_d (dtu_fcl_retract_d),
+                     .dtu_fcl_thr_active(dtu_fcl_thr_active[3:0]),
+                     .dtu_fcl_nextthr_bf(dtu_fcl_nextthr_bf[3:0]),
+                     .swl_dcl_thr_d     (swl_dcl_thr_d[3:0]),
+                     .swl_dcl_thr_w2    (swl_dcl_thr_w2[3:0]),
+                     .dtu_fdp_thrconf_e (dtu_fdp_thrconf_e[40:0]),
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),
+                     .gdbginit_l        (gdbginit_l),
+                     .arst_l            (arst_l),
+                     .grst_l            (grst_l),
+                     .ctu_sscan_tid     (ctu_sscan_tid[3:0]),
+                     .ifq_dtu_thrrdy    (ifq_dtu_thrrdy[3:0]),
+                     .ifq_dtu_pred_rdy  (ifq_dtu_pred_rdy[3:0]),
+                     .ifu_tlu_inst_vld_w(ifu_tlu_inst_vld_w),
+                     .ifu_tlu_ttype_vld_m(ifu_tlu_ttype_vld_m),
+                     .fcl_dtu_hprivmode_d(fcl_dtu_hprivmode_d),
+                     .fcl_dtu_hprivmode_w2(fcl_dtu_hprivmode_w2),
+                     .tlu_ifu_flush_pipe_w(tlu_ifu_flush_pipe_w),
+                     .fcl_swl_flush_w   (fcl_swl_flush_w),
+                     .fcl_dtu_sync_intr_d(fcl_dtu_sync_intr_d),
+                     .fcl_dtu_nuke_thr_w(fcl_dtu_nuke_thr_w),
+                     .fcl_dtu_rst_thr_w (fcl_dtu_rst_thr_w),
+                     .fcl_dtu_resum_thr_w(fcl_dtu_resum_thr_w),
+                     .fcl_dtu_thr_f     (fcl_dtu_thr_f[3:0]),
+                     .tlu_hpstate_ibe   (tlu_hpstate_ibe[3:0]),
+                     .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+                     .tlu_ifu_trappc_vld_w1(tlu_ifu_trappc_vld_w1),
+                     .dec_swl_ll_done_d (dec_swl_ll_done_d),
+                     .dec_swl_br_done_d (dec_swl_br_done_d),
+                     .dec_swl_rdsr_sel_thr_d(dec_swl_rdsr_sel_thr_d),
+                     .dec_swl_std_inst_d(dec_swl_std_inst_d),
+                     .dec_swl_sta_inst_e(dec_swl_sta_inst_e),
+                     .wsr_fixed_inst_w  (wsr_fixed_inst_w),
+                     .dec_swl_ld_inst_d (dec_swl_ld_inst_d),
+                     .dec_swl_mul_inst_d(dec_swl_mul_inst_d),
+                     .dec_swl_div_inst_d(dec_swl_div_inst_d),
+                     .dec_swl_fpop_d    (dec_swl_fpop_d),
+                     .dec_swl_allfp_d   (dec_swl_allfp_d),
+                     .dec_swl_frf_upper_d(dec_swl_frf_upper_d),
+                     .dec_swl_frf_lower_d(dec_swl_frf_lower_d),
+                     .dec_swl_wrtfprs_w (dec_swl_wrtfprs_w),
+                     .dcl_swl_tcc_done_m(dcl_swl_tcc_done_m),
+                     .exu_ifu_longop_done_g(exu_ifu_longop_done_g[3:0]),
+                     .exu_ifu_spill_e   (exu_ifu_spill_e),
+                     .lsu_ifu_ldst_cmplt(lsu_ifu_ldst_cmplt[3:0]),
+                     .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+                     .lsu_ifu_stbcnt0   (lsu_ifu_stbcnt0[3:0]),
+                     .lsu_ifu_stbcnt1   (lsu_ifu_stbcnt1[3:0]),
+                     .lsu_ifu_stbcnt2   (lsu_ifu_stbcnt2[3:0]),
+                     .lsu_ifu_stbcnt3   (lsu_ifu_stbcnt3[3:0]),
+                     .lsu_ifu_quad_asi_e(lsu_ifu_quad_asi_e),
+                     .ffu_ifu_fpop_done_w2(ffu_ifu_fpop_done_w2),
+                     .ffu_ifu_tid_w2    (ffu_ifu_tid_w2[1:0]),
+                     .ffu_ifu_fst_ce_w  (ffu_ifu_fst_ce_w),
+                     .tlu_ifu_trap_tid_w1(tlu_ifu_trap_tid_w1[1:0]),
+                     .tlu_ifu_pstate_pef(tlu_ifu_pstate_pef[3:0]),
+                     .lsu_ifu_ldst_miss_g(lsu_ifu_ldst_miss_g),
+                     .fcl_swl_int_activate_i3(fcl_swl_int_activate_i3[3:0]),
+                     .fcl_swl_flush_wake_w(fcl_swl_flush_wake_w),
+                     .ifq_swl_stallreq  (ifq_swl_stallreq),
+                     .fcl_dtu_stall_bf  (fcl_dtu_stall_bf),
+                     .fcl_swl_swout_f   (fcl_swl_swout_f),
+                     .fcl_swl_swcvld_s  (fcl_swl_swcvld_s),
+                     .fdp_fcl_swc_s2    (fdp_fcl_swc_s2),
+                     .fcl_ifq_icmiss_s1 (fcl_ifq_icmiss_s1),
+                     .fcl_dtu_inst_vld_e(fcl_dtu_inst_vld_e),
+                     .fcl_dtu_intr_vld_e(fcl_dtu_intr_vld_e),
+                     .fcl_dtu_inst_vld_d(fcl_dtu_inst_vld_d),
+                     .erb_dtu_ifeterr_d1(erb_dtu_ifeterr_d1),
+                     .dtu_inst_anull_e  (dtu_inst_anull_e),
+                     .const_cpuid       (const_cpuid[3:0]),
+                     .dec_swl_wrt_tcr_w (dec_swl_wrt_tcr_w),
+                     .dec_swl_st_inst_d (dec_swl_st_inst_d));
+   
+   // Branch Logic
+   sparc_ifu_dcl  dcl(
+                      .so               (scan0_3),
+                      .si               (scan0_2),
+		                  .dtu_dcl_opf2_d	(dtu_inst_d[7]),
+                      .fdp_dcl_op_s     (fdp_dtu_inst_s[31:30]),
+                      .fdp_dcl_op3_s    (fdp_dtu_inst_s[24:19]),
+                      
+		                  /*AUTOINST*/
+                      // Outputs
+                      .ifu_exu_kill_e   (ifu_exu_kill_e),
+                      .ifu_exu_dontmv_regz0_e(ifu_exu_dontmv_regz0_e),
+                      .ifu_exu_dontmv_regz1_e(ifu_exu_dontmv_regz1_e),
+                      .ifu_exu_tcc_e    (ifu_exu_tcc_e),
+                      .ifu_exu_dbrinst_d(ifu_exu_dbrinst_d),
+                      .ifu_ffu_mvcnd_m  (ifu_ffu_mvcnd_m),
+                      .dcl_fcl_bcregz0_e(dcl_fcl_bcregz0_e),
+                      .dcl_fcl_bcregz1_e(dcl_fcl_bcregz1_e),
+                      .dtu_inst_anull_e (dtu_inst_anull_e),
+                      .dcl_swl_tcc_done_m(dcl_swl_tcc_done_m),
+                      .dcl_imd_immdata_sel_simm13_d_l(dcl_imd_immdata_sel_simm13_d_l),
+                      .dcl_imd_immdata_sel_movcc_d_l(dcl_imd_immdata_sel_movcc_d_l),
+                      .dcl_imd_immdata_sel_sethi_d_l(dcl_imd_immdata_sel_sethi_d_l),
+                      .dcl_imd_immdata_sel_movr_d_l(dcl_imd_immdata_sel_movr_d_l),
+                      .dcl_imd_broff_sel_call_d_l(dcl_imd_broff_sel_call_d_l),
+                      .dcl_imd_broff_sel_br_d_l(dcl_imd_broff_sel_br_d_l),
+                      .dcl_imd_broff_sel_bcc_d_l(dcl_imd_broff_sel_bcc_d_l),
+                      .dcl_imd_broff_sel_bpcc_d_l(dcl_imd_broff_sel_bpcc_d_l),
+                      .dcl_imd_immbr_sel_br_d(dcl_imd_immbr_sel_br_d),
+                      // Inputs
+                      .rclk             (rclk),
+                      .se               (se),
+                      .dtu_reset        (dtu_reset),
+                      .exu_ifu_cc_d     (exu_ifu_cc_d[7:0]),
+                      .fcl_dcl_regz_e   (fcl_dcl_regz_e),
+                      .exu_ifu_regn_e   (exu_ifu_regn_e),
+                      .ffu_ifu_cc_w2    (ffu_ifu_cc_w2[7:0]),
+                      .ffu_ifu_cc_vld_w2(ffu_ifu_cc_vld_w2[3:0]),
+                      .tlu_ifu_flush_pipe_w(tlu_ifu_flush_pipe_w),
+                      .swl_dcl_thr_d    (swl_dcl_thr_d[3:0]),
+                      .swl_dcl_thr_w2   (swl_dcl_thr_w2[3:0]),
+                      .imd_dcl_brcond_d (imd_dcl_brcond_d[3:0]),
+                      .imd_dcl_mvcond_d (imd_dcl_mvcond_d[7:0]),
+                      .imd_dcl_abit_d   (imd_dcl_abit_d),
+                      .dec_dcl_cctype_d (dec_dcl_cctype_d[2:0]),
+                      .fcl_dtu_inst_vld_e(fcl_dtu_inst_vld_e),
+                      .fcl_dtu_intr_vld_e(fcl_dtu_intr_vld_e),
+                      .ifu_tlu_flush_w  (ifu_tlu_flush_w));
+
+/*   sparc_ifu_imd AUTO_TEMPLATE( 
+                         .dcl_imd_call_inst_d (dec_imd_call_inst_d),
+                         );
+ 
+*/   
+   sparc_ifu_imd  imd(
+                      .so               (scan0_4),                      
+                      .si               (scan0_3),
+                      
+                      /*AUTOINST*/
+                      // Outputs
+                      .ifu_exu_imm_data_d(ifu_exu_imm_data_d[31:0]),
+                      .dtu_inst_d       (dtu_inst_d[31:0]),
+                      .ifu_exu_rd_d     (ifu_exu_rd_d[4:0]),
+                      .ifu_lsu_rd_e     (ifu_lsu_rd_e[4:0]),
+                      .ifu_lsu_imm_asi_d(ifu_lsu_imm_asi_d[7:0]),
+                      .ifu_tlu_imm_asi_d(ifu_tlu_imm_asi_d[8:0]),
+                      .ifu_lsu_imm_asi_vld_d(ifu_lsu_imm_asi_vld_d),
+                      .ifu_tlu_sraddr_d (ifu_tlu_sraddr_d[6:0]),
+                      .ifu_tlu_sraddr_d_v2(ifu_tlu_sraddr_d_v2[6:0]),
+                      .imd_dcl_brcond_d (imd_dcl_brcond_d[3:0]),
+                      .imd_dcl_mvcond_d (imd_dcl_mvcond_d[7:0]),
+                      .imd_dcl_abit_d   (imd_dcl_abit_d),
+                      .ifu_ffu_frs1_d   (ifu_ffu_frs1_d[4:0]),
+                      .ifu_ffu_frs2_d   (ifu_ffu_frs2_d[4:0]),
+                      .ifu_ffu_frd_d    (ifu_ffu_frd_d[4:0]),
+                      .ifu_ffu_fpopcode_d(ifu_ffu_fpopcode_d[8:0]),
+                      .ifu_ffu_fcc_num_d(ifu_ffu_fcc_num_d[1:0]),
+                      // Inputs
+                      .rclk             (rclk),
+                      .se               (se),
+                      .fdp_dtu_inst_s   (fdp_dtu_inst_s[31:0]),
+                      .fcl_imd_oddwin_d (fcl_imd_oddwin_d),
+                      .dcl_imd_immdata_sel_simm13_d_l(dcl_imd_immdata_sel_simm13_d_l),
+                      .dcl_imd_immdata_sel_movcc_d_l(dcl_imd_immdata_sel_movcc_d_l),
+                      .dcl_imd_immdata_sel_sethi_d_l(dcl_imd_immdata_sel_sethi_d_l),
+                      .dcl_imd_immdata_sel_movr_d_l(dcl_imd_immdata_sel_movr_d_l),
+                      .dcl_imd_broff_sel_call_d_l(dcl_imd_broff_sel_call_d_l),
+                      .dcl_imd_broff_sel_br_d_l(dcl_imd_broff_sel_br_d_l),
+                      .dcl_imd_broff_sel_bcc_d_l(dcl_imd_broff_sel_bcc_d_l),
+                      .dcl_imd_broff_sel_bpcc_d_l(dcl_imd_broff_sel_bpcc_d_l),
+                      .dcl_imd_immbr_sel_br_d(dcl_imd_immbr_sel_br_d),
+                      .dcl_imd_call_inst_d(dec_imd_call_inst_d)); // Templated
+   
+   sparc_ifu_fdp  fdp(
+                      .so               (scan0_5),
+                      .si               (scan0_4),
+                     .fdp_itlb_ctxt_bf (fdp_itlb_ctxt_bf[12:0]),
+                      .fdp_icd_vaddr_bf (fdp_icd_vaddr_bf[47:2]),
+                      .icd_fdp_fetdata_s1(wsel_fdp_fetdata_s1[32:0]),
+                      .icd_fdp_topdata_s1(wsel_fdp_topdata_s1[32:0]),
+                      // eco 5362
+                      .fcl_fdp_addr_mask_d(ifu_exu_addr_mask_d),
+                      /*AUTOINST*/
+                      // Outputs
+                      .fdp_icv_index_bf (fdp_icv_index_bf[11:5]),
+                      .fdp_erb_pc_f     (fdp_erb_pc_f[47:0]),
+                      .fdp_dtu_inst_s   (fdp_dtu_inst_s[31:0]),
+                      .ifu_exu_pc_d     (ifu_exu_pc_d[47:0]),
+                      .ifu_exu_rs1_s    (ifu_exu_rs1_s[4:0]),
+                      .ifu_exu_rs2_s    (ifu_exu_rs2_s[4:0]),
+                      .ifu_exu_rs3_s    (ifu_exu_rs3_s[4:0]),
+                      .ifu_tlu_pc_m     (ifu_tlu_pc_m[48:0]),
+                      .ifu_tlu_npc_m    (ifu_tlu_npc_m[48:0]),
+                      .ifu_tlu_pc_oor_e (ifu_tlu_pc_oor_e),
+                      .ifu_exu_pcver_e  (ifu_exu_pcver_e[63:0]),
+                      .fdp_fcl_swc_s2   (fdp_fcl_swc_s2),
+                      .fdp_fcl_pc_oor_vec_f(fdp_fcl_pc_oor_vec_f[3:0]),
+                      .fdp_fcl_pc_oor_e (fdp_fcl_pc_oor_e),
+                      .fdp_fcl_op_s     (fdp_fcl_op_s[1:0]),
+                      .fdp_fcl_op3_s    (fdp_fcl_op3_s[5:2]),
+                      .fdp_fcl_ibit_s   (fdp_fcl_ibit_s),
+                      // Inputs
+                      .rclk             (rclk),
+                      .se               (se),
+                      .const_maskid     (const_maskid[7:0]),
+                      .lsu_t0_pctxt_state(lsu_t0_pctxt_state[12:0]),
+                      .lsu_t1_pctxt_state(lsu_t1_pctxt_state[12:0]),
+                      .lsu_t2_pctxt_state(lsu_t2_pctxt_state[12:0]),
+                      .lsu_t3_pctxt_state(lsu_t3_pctxt_state[12:0]),
+                      .exu_ifu_brpc_e   (exu_ifu_brpc_e[47:0]),
+                      .tlu_ifu_trappc_w2(tlu_ifu_trappc_w2[48:0]),
+                      .tlu_ifu_trapnpc_w2(tlu_ifu_trapnpc_w2[48:0]),
+                      .tlu_itlb_dmp_nctxt_g(tlu_itlb_dmp_nctxt_g),
+                      .tlu_itlb_dmp_actxt_g(tlu_itlb_dmp_actxt_g),
+                      .tlu_itlb_tte_tag_w2(tlu_itlb_tte_tag_w2[12:0]),
+                      .dtu_fdp_thrconf_e(dtu_fdp_thrconf_e[40:0]),
+                      .ifq_fdp_fill_inst(ifq_fdp_fill_inst[32:0]),
+                      .fcl_fdp_oddwin_s (fcl_fdp_oddwin_s),
+                      .fcl_fdp_pcoor_vec_f(fcl_fdp_pcoor_vec_f[3:0]),
+                      .fcl_fdp_pcoor_f  (fcl_fdp_pcoor_f),
+                      .fcl_fdp_mask32b_f(fcl_fdp_mask32b_f),
+                      .fcl_fdp_tctxt_sel_prim(fcl_fdp_tctxt_sel_prim[3:0]),
+                      .fcl_fdp_usenir_sel_nir_s1(fcl_fdp_usenir_sel_nir_s1),
+                      .fcl_fdp_rbinst_sel_inste_s(fcl_fdp_rbinst_sel_inste_s[3:0]),
+                      .fcl_fdp_thrtnpc_sel_tnpc_l(fcl_fdp_thrtnpc_sel_tnpc_l[3:0]),
+                      .fcl_fdp_thrtnpc_sel_npcw_l(fcl_fdp_thrtnpc_sel_npcw_l[3:0]),
+                      .fcl_fdp_thrtnpc_sel_pcf_l(fcl_fdp_thrtnpc_sel_pcf_l[3:0]),
+                      .fcl_fdp_thrtnpc_sel_old_l(fcl_fdp_thrtnpc_sel_old_l[3:0]),
+                      .fcl_fdp_thr_s1_l (fcl_fdp_thr_s1_l[3:0]),
+                      .fcl_fdp_next_thr_bf_l(fcl_fdp_next_thr_bf_l[3:0]),
+                      .fcl_fdp_next_ctxt_bf_l(fcl_fdp_next_ctxt_bf_l[3:0]),
+                      .fcl_fdp_thr_s2_l (fcl_fdp_thr_s2_l[3:0]),
+                      .fcl_fdp_nirthr_s1_l(fcl_fdp_nirthr_s1_l[3:0]),
+                      .fcl_fdp_tpcbf_sel_pcp4_bf_l(fcl_fdp_tpcbf_sel_pcp4_bf_l[3:0]),
+                      .fcl_fdp_tpcbf_sel_brpc_bf_l(fcl_fdp_tpcbf_sel_brpc_bf_l[3:0]),
+                      .fcl_fdp_tpcbf_sel_trap_bf_l(fcl_fdp_tpcbf_sel_trap_bf_l[3:0]),
+                      .fcl_fdp_tpcbf_sel_old_bf_l(fcl_fdp_tpcbf_sel_old_bf_l[3:0]),
+                      .fcl_fdp_pcbf_sel_swpc_bf_l(fcl_fdp_pcbf_sel_swpc_bf_l),
+                      .fcl_fdp_pcbf_sel_nosw_bf_l(fcl_fdp_pcbf_sel_nosw_bf_l),
+                      .fcl_fdp_pcbf_sel_br_bf_l(fcl_fdp_pcbf_sel_br_bf_l),
+                      .fcl_fdp_trrbpc_sel_trap_bf_l(fcl_fdp_trrbpc_sel_trap_bf_l[3:0]),
+                      .fcl_fdp_trrbpc_sel_rb_bf_l(fcl_fdp_trrbpc_sel_rb_bf_l[3:0]),
+                      .fcl_fdp_trrbpc_sel_err_bf_l(fcl_fdp_trrbpc_sel_err_bf_l[3:0]),
+                      .fcl_fdp_trrbpc_sel_pcs_bf_l(fcl_fdp_trrbpc_sel_pcs_bf_l[3:0]),
+                      .fcl_fdp_noswpc_sel_tnpc_l_bf(fcl_fdp_noswpc_sel_tnpc_l_bf),
+                      .fcl_fdp_noswpc_sel_old_l_bf(fcl_fdp_noswpc_sel_old_l_bf),
+                      .fcl_fdp_noswpc_sel_inc_l_bf(fcl_fdp_noswpc_sel_inc_l_bf),
+                      .fcl_fdp_nextpcs_sel_pce_f_l(fcl_fdp_nextpcs_sel_pce_f_l[3:0]),
+                      .fcl_fdp_nextpcs_sel_pcd_f_l(fcl_fdp_nextpcs_sel_pcd_f_l[3:0]),
+                      .fcl_fdp_nextpcs_sel_pcs_f_l(fcl_fdp_nextpcs_sel_pcs_f_l[3:0]),
+                      .fcl_fdp_nextpcs_sel_pcf_f_l(fcl_fdp_nextpcs_sel_pcf_f_l[3:0]),
+                      .fcl_fdp_rdsr_sel_pc_e_l(fcl_fdp_rdsr_sel_pc_e_l),
+                      .fcl_fdp_rdsr_sel_ver_e_l(fcl_fdp_rdsr_sel_ver_e_l),
+                      .fcl_fdp_rdsr_sel_thr_e_l(fcl_fdp_rdsr_sel_thr_e_l),
+                      .fcl_fdp_inst_sel_curr_s_l(fcl_fdp_inst_sel_curr_s_l),
+                      .fcl_fdp_inst_sel_switch_s_l(fcl_fdp_inst_sel_switch_s_l),
+                      .fcl_fdp_inst_sel_nir_s_l(fcl_fdp_inst_sel_nir_s_l),
+                      .fcl_fdp_inst_sel_nop_s_l(fcl_fdp_inst_sel_nop_s_l),
+                      .fcl_fdp_tinst_sel_curr_s_l(fcl_fdp_tinst_sel_curr_s_l[3:0]),
+                      .fcl_fdp_tinst_sel_rb_s_l(fcl_fdp_tinst_sel_rb_s_l[3:0]),
+                      .fcl_fdp_tinst_sel_old_s_l(fcl_fdp_tinst_sel_old_s_l[3:0]),
+                      .fcl_fdp_tinst_sel_ifq_s_l(fcl_fdp_tinst_sel_ifq_s_l[3:0]),
+                      .fcl_fdp_dmpthr_l (fcl_fdp_dmpthr_l[3:0]),
+                      .fcl_fdp_ctxt_sel_dmp_bf_l(fcl_fdp_ctxt_sel_dmp_bf_l),
+                      .fcl_fdp_ctxt_sel_sw_bf_l(fcl_fdp_ctxt_sel_sw_bf_l),
+                      .fcl_fdp_ctxt_sel_curr_bf_l(fcl_fdp_ctxt_sel_curr_bf_l));
+
+   sparc_ifu_fcl fcl(
+                     .so                (short_scan1_1),
+                     .si                (short_si1),
+                     .rst_tri_en        (mux_drive_disable),
+
+                     // keep around in case we need it later
+                     .ifu_reset_l       (),
+
+		                 .fdp_fcl_va2_bf    (fdp_icd_vaddr_bf[2]),
+                     .itlb_fcl_priv_s1  (itlb_rd_tte_data[`STLB_DATA_P]),
+                     .tlu_fcl_dmp_pid_bf (tlu_itlb_tte_tag_w2[58:56]),
+                     .tlu_fcl_dmp_real_bf (tlu_itlb_tte_tag_w2[55]),
+                     .itlb_fcl_cp_s1    (itlb_rd_tte_data[`STLB_DATA_CP]),
+                     // need these here since itlb is not auto inst'ed
+		                 .fcl_itlb_invall_f_l(fcl_itlb_invall_f_l),
+                     .fcl_itlb_cam_vld_bf(fcl_itlb_cam_vld_bf),
+                     .fcl_itlb_cam_bypass_bf(fcl_itlb_cam_bypass_bf),
+                     .fcl_itlb_addr_mask_l(fcl_itlb_addr_mask_l),
+                     .fcl_itlb_cam_real_bf(fcl_itlb_cam_real_bf),
+                     .fcl_itlb_cam_pid_bf(fcl_itlb_cam_pid_bf[2:0]),
+                     .fcl_itlb_wr_vld_bf(fcl_itlb_wr_vld_bf),
+                     .fcl_itlb_dmp_vld_bf(fcl_itlb_dmp_vld_bf),
+                     .fcl_itlb_dmp_all_bf(fcl_itlb_dmp_all_bf),
+                     .fcl_itlb_tag_rd_vld_bf(fcl_itlb_tag_rd_vld_bf),
+                     .fcl_itlb_data_rd_vld_bf(fcl_itlb_data_rd_vld_bf),
+
+                     // eco 5362
+                     .fcl_fdp_addr_mask_d(ifu_exu_addr_mask_d),
+                     
+		      /*AUTOINST*/
+                     // Outputs
+                     .fcl_icd_rdreq_bf  (fcl_icd_rdreq_bf),
+                     .fcl_icv_rdreq_bf  (fcl_icv_rdreq_bf),
+                     .fcl_icd_wrreq_bf  (fcl_icd_wrreq_bf),
+                     .fcl_ict_wrreq_bf  (fcl_ict_wrreq_bf),
+                     .fcl_icv_wrreq_bf  (fcl_icv_wrreq_bf),
+                     .fcl_icd_index_sel_ifq_bf(fcl_icd_index_sel_ifq_bf),
+                     .fcl_ifq_grant_bf  (fcl_ifq_grant_bf),
+                     .fcl_ifq_icmiss_s1 (fcl_ifq_icmiss_s1),
+                     .fcl_ifq_rdreq_s1  (fcl_ifq_rdreq_s1),
+                     .fcl_ifq_icache_en_s_l(fcl_ifq_icache_en_s_l),
+                     .fcl_ifq_thr_s1    (fcl_ifq_thr_s1[1:0]),
+                     .fcl_ifq_canthr    (fcl_ifq_canthr[3:0]),
+                     .fcl_erb_ievld_s1  (fcl_erb_ievld_s1),
+                     .fcl_erb_tevld_s1  (fcl_erb_tevld_s1),
+                     .fcl_erb_immuevld_s1(fcl_erb_immuevld_s1),
+                     .ifu_lsu_thrid_s   (ifu_lsu_thrid_s[1:0]),
+                     .fcl_erb_asi_tid_f (fcl_erb_asi_tid_f[1:0]),
+                     .fcl_erb_clear_iferr(fcl_erb_clear_iferr[3:0]),
+                     .fcl_erb_itlbrd_vld_s(fcl_erb_itlbrd_vld_s),
+                     .fcl_erb_itlbrd_data_s(fcl_erb_itlbrd_data_s),
+                     .fcl_dec_dslot_s   (fcl_dec_dslot_s),
+                     .fcl_dtu_inst_vld_e(fcl_dtu_inst_vld_e),
+                     .fcl_dtu_intr_vld_e(fcl_dtu_intr_vld_e),
+                     .fcl_dtu_inst_vld_d(fcl_dtu_inst_vld_d),
+                     .fcl_dtu_ely_inst_vld_d(fcl_dtu_ely_inst_vld_d),
+                     .fcl_dec_intr_vld_d(fcl_dec_intr_vld_d),
+                     .fcl_erb_inst_issue_d(fcl_erb_inst_issue_d),
+                     .fcl_erb_inst_vld_d1(fcl_erb_inst_vld_d1),
+                     .ifu_tlu_inst_vld_m(ifu_tlu_inst_vld_m),
+                     .ifu_exu_inst_vld_e(ifu_exu_inst_vld_e),
+                     .ifu_exu_inst_vld_w(ifu_exu_inst_vld_w),
+                     .ifu_spu_inst_vld_w(ifu_spu_inst_vld_w),
+                     .ifu_tlu_inst_vld_w(ifu_tlu_inst_vld_w),
+                     .ifu_tlu_flush_w   (ifu_tlu_flush_w),
+                     .ifu_tlu_flush_m   (ifu_tlu_flush_m),
+                     .fcl_swl_int_activate_i3(fcl_swl_int_activate_i3[3:0]),
+                     .fcl_swl_flush_wake_w(fcl_swl_flush_wake_w),
+                     .fcl_swl_flush_w   (fcl_swl_flush_w),
+                     .fcl_dcl_regz_e    (fcl_dcl_regz_e),
+                     .ifu_tlu_thrid_e   (ifu_tlu_thrid_e[1:0]),
+                     .ifu_tlu_thrid_d   (ifu_tlu_thrid_d[1:0]),
+                     .ifu_tlu_immu_miss_m(ifu_tlu_immu_miss_m),
+                     .ifu_tlu_priv_violtn_m(ifu_tlu_priv_violtn_m),
+                     .ifu_tlu_icmiss_e  (ifu_tlu_icmiss_e),
+                     .ifu_tlu_ttype_vld_m(ifu_tlu_ttype_vld_m),
+                     .ifu_exu_ttype_vld_m(ifu_exu_ttype_vld_m),
+                     .ifu_mmu_trap_m    (ifu_mmu_trap_m),
+                     .ifu_tlu_trap_m    (ifu_tlu_trap_m),
+                     .ifu_tlu_ttype_m   (ifu_tlu_ttype_m[8:0]),
+                     .ifu_tlu_hwint_m   (ifu_tlu_hwint_m),
+                     .ifu_tlu_sftint_m  (ifu_tlu_sftint_m),
+                     .ifu_tlu_rstint_m  (ifu_tlu_rstint_m),
+                     .fcl_dtu_rst_thr_w (fcl_dtu_rst_thr_w),
+                     .fcl_dtu_resum_thr_w(fcl_dtu_resum_thr_w),
+                     .ifu_tlu_itlb_done (ifu_tlu_itlb_done),
+                     .ifu_spu_trap_ack  (ifu_spu_trap_ack),
+                     .ifu_exu_tid_s2    (ifu_exu_tid_s2[1:0]),
+                     .ifu_exu_ren1_s    (ifu_exu_ren1_s),
+                     .ifu_exu_ren2_s    (ifu_exu_ren2_s),
+                     .ifu_exu_ren3_s    (ifu_exu_ren3_s),
+                     .ifu_exu_disable_ce_e(ifu_exu_disable_ce_e),
+                     .fcl_dtu_sync_intr_d(fcl_dtu_sync_intr_d),
+                     .fcl_dtu_tlzero_d  (fcl_dtu_tlzero_d),
+                     .fcl_dtu_privmode_d(fcl_dtu_privmode_d),
+                     .fcl_dtu_hprivmode_d(fcl_dtu_hprivmode_d),
+                     .fcl_dtu_hprivmode_w2(fcl_dtu_hprivmode_w2),
+                     .fcl_dtu_nuke_thr_w(fcl_dtu_nuke_thr_w),
+                     .fcl_swl_swout_f   (fcl_swl_swout_f),
+                     .fcl_dtu_stall_bf  (fcl_dtu_stall_bf),
+                     .fcl_swl_swcvld_s  (fcl_swl_swcvld_s),
+                     .fcl_dtu_thr_f     (fcl_dtu_thr_f[3:0]),
+                     .fcl_imd_oddwin_d  (fcl_imd_oddwin_d),
+                     .fcl_fdp_oddwin_s  (fcl_fdp_oddwin_s),
+                     .fcl_fdp_pcoor_vec_f(fcl_fdp_pcoor_vec_f[3:0]),
+                     .fcl_fdp_pcoor_f   (fcl_fdp_pcoor_f),
+                     .fcl_fdp_mask32b_f (fcl_fdp_mask32b_f),
+                     .fcl_fdp_tctxt_sel_prim(fcl_fdp_tctxt_sel_prim[3:0]),
+                     .fcl_fdp_usenir_sel_nir_s1(fcl_fdp_usenir_sel_nir_s1),
+                     .fcl_fdp_rbinst_sel_inste_s(fcl_fdp_rbinst_sel_inste_s[3:0]),
+                     .fcl_fdp_thrtnpc_sel_tnpc_l(fcl_fdp_thrtnpc_sel_tnpc_l[3:0]),
+                     .fcl_fdp_thrtnpc_sel_npcw_l(fcl_fdp_thrtnpc_sel_npcw_l[3:0]),
+                     .fcl_fdp_thrtnpc_sel_pcf_l(fcl_fdp_thrtnpc_sel_pcf_l[3:0]),
+                     .fcl_fdp_thrtnpc_sel_old_l(fcl_fdp_thrtnpc_sel_old_l[3:0]),
+                     .fcl_fdp_thr_s1_l  (fcl_fdp_thr_s1_l[3:0]),
+                     .fcl_fdp_next_thr_bf_l(fcl_fdp_next_thr_bf_l[3:0]),
+                     .fcl_fdp_next_ctxt_bf_l(fcl_fdp_next_ctxt_bf_l[3:0]),
+                     .fcl_fdp_nirthr_s1_l(fcl_fdp_nirthr_s1_l[3:0]),
+                     .fcl_fdp_thr_s2_l  (fcl_fdp_thr_s2_l[3:0]),
+                     .fcl_fdp_tpcbf_sel_pcp4_bf_l(fcl_fdp_tpcbf_sel_pcp4_bf_l[3:0]),
+                     .fcl_fdp_tpcbf_sel_brpc_bf_l(fcl_fdp_tpcbf_sel_brpc_bf_l[3:0]),
+                     .fcl_fdp_tpcbf_sel_trap_bf_l(fcl_fdp_tpcbf_sel_trap_bf_l[3:0]),
+                     .fcl_fdp_tpcbf_sel_old_bf_l(fcl_fdp_tpcbf_sel_old_bf_l[3:0]),
+                     .fcl_fdp_pcbf_sel_nosw_bf_l(fcl_fdp_pcbf_sel_nosw_bf_l),
+                     .fcl_fdp_pcbf_sel_swpc_bf_l(fcl_fdp_pcbf_sel_swpc_bf_l),
+                     .fcl_fdp_pcbf_sel_br_bf_l(fcl_fdp_pcbf_sel_br_bf_l),
+                     .fcl_fdp_trrbpc_sel_trap_bf_l(fcl_fdp_trrbpc_sel_trap_bf_l[3:0]),
+                     .fcl_fdp_trrbpc_sel_rb_bf_l(fcl_fdp_trrbpc_sel_rb_bf_l[3:0]),
+                     .fcl_fdp_trrbpc_sel_err_bf_l(fcl_fdp_trrbpc_sel_err_bf_l[3:0]),
+                     .fcl_fdp_trrbpc_sel_pcs_bf_l(fcl_fdp_trrbpc_sel_pcs_bf_l[3:0]),
+                     .fcl_fdp_noswpc_sel_tnpc_l_bf(fcl_fdp_noswpc_sel_tnpc_l_bf),
+                     .fcl_fdp_noswpc_sel_old_l_bf(fcl_fdp_noswpc_sel_old_l_bf),
+                     .fcl_fdp_noswpc_sel_inc_l_bf(fcl_fdp_noswpc_sel_inc_l_bf),
+                     .fcl_fdp_nextpcs_sel_pce_f_l(fcl_fdp_nextpcs_sel_pce_f_l[3:0]),
+                     .fcl_fdp_nextpcs_sel_pcd_f_l(fcl_fdp_nextpcs_sel_pcd_f_l[3:0]),
+                     .fcl_fdp_nextpcs_sel_pcs_f_l(fcl_fdp_nextpcs_sel_pcs_f_l[3:0]),
+                     .fcl_fdp_nextpcs_sel_pcf_f_l(fcl_fdp_nextpcs_sel_pcf_f_l[3:0]),
+                     .fcl_fdp_inst_sel_curr_s_l(fcl_fdp_inst_sel_curr_s_l),
+                     .fcl_fdp_inst_sel_switch_s_l(fcl_fdp_inst_sel_switch_s_l),
+                     .fcl_fdp_inst_sel_nir_s_l(fcl_fdp_inst_sel_nir_s_l),
+                     .fcl_fdp_inst_sel_nop_s_l(fcl_fdp_inst_sel_nop_s_l),
+                     .fcl_fdp_tinst_sel_curr_s_l(fcl_fdp_tinst_sel_curr_s_l[3:0]),
+                     .fcl_fdp_tinst_sel_rb_s_l(fcl_fdp_tinst_sel_rb_s_l[3:0]),
+                     .fcl_fdp_tinst_sel_old_s_l(fcl_fdp_tinst_sel_old_s_l[3:0]),
+                     .fcl_fdp_tinst_sel_ifq_s_l(fcl_fdp_tinst_sel_ifq_s_l[3:0]),
+                     .fcl_fdp_dmpthr_l  (fcl_fdp_dmpthr_l[3:0]),
+                     .fcl_fdp_ctxt_sel_dmp_bf_l(fcl_fdp_ctxt_sel_dmp_bf_l),
+                     .fcl_fdp_ctxt_sel_sw_bf_l(fcl_fdp_ctxt_sel_sw_bf_l),
+                     .fcl_fdp_ctxt_sel_curr_bf_l(fcl_fdp_ctxt_sel_curr_bf_l),
+                     .fcl_fdp_rdsr_sel_pc_e_l(fcl_fdp_rdsr_sel_pc_e_l),
+                     .fcl_fdp_rdsr_sel_thr_e_l(fcl_fdp_rdsr_sel_thr_e_l),
+                     .fcl_fdp_rdsr_sel_ver_e_l(fcl_fdp_rdsr_sel_ver_e_l),
+                     // Inputs
+                     .rclk              (rclk),
+                     .grst_l            (grst_l),
+                     .arst_l            (arst_l),
+                     .se                (se),
+                     .sehold            (sehold),
+                     .tlu_ifu_flush_pipe_w(tlu_ifu_flush_pipe_w),
+                     .exu_ifu_va_oor_m  (exu_ifu_va_oor_m),
+                     .exu_ifu_oddwin_s  (exu_ifu_oddwin_s[3:0]),
+                     .spu_ifu_ttype_tid_w2(spu_ifu_ttype_tid_w2[1:0]),
+                     .spu_ifu_ttype_vld_w2(spu_ifu_ttype_vld_w2),
+                     .spu_ifu_ttype_w2  (spu_ifu_ttype_w2),
+                     .erb_fcl_spu_uetrap(erb_fcl_spu_uetrap[3:0]),
+                     .exu_ifu_regz_e    (exu_ifu_regz_e),
+                     .dcl_fcl_bcregz0_e (dcl_fcl_bcregz0_e),
+                     .dcl_fcl_bcregz1_e (dcl_fcl_bcregz1_e),
+                     .dtu_fcl_rollback_g(dtu_fcl_rollback_g),
+                     .dtu_fcl_retract_d (dtu_fcl_retract_d),
+                     .dtu_fcl_br_inst_d (dtu_fcl_br_inst_d),
+                     .dtu_fcl_sir_inst_e(dtu_fcl_sir_inst_e),
+                     .dtu_fcl_privop_e  (dtu_fcl_privop_e),
+                     .dtu_fcl_fpdis_e   (dtu_fcl_fpdis_e),
+                     .dtu_fcl_imask_hit_e(dtu_fcl_imask_hit_e),
+                     .dtu_fcl_illinst_e (dtu_fcl_illinst_e),
+                     .dtu_fcl_thr_active(dtu_fcl_thr_active[3:0]),
+                     .dec_fcl_rdsr_sel_pc_d(dec_fcl_rdsr_sel_pc_d),
+                     .dec_fcl_rdsr_sel_thr_d(dec_fcl_rdsr_sel_thr_d),
+                     .ifq_fcl_wrreq_bf  (ifq_fcl_wrreq_bf),
+                     .ifq_fcl_icd_wrreq_bf(ifq_fcl_icd_wrreq_bf),
+                     .ifq_fcl_ictv_wrreq_bf(ifq_fcl_ictv_wrreq_bf),
+                     .ifq_fcl_rdreq_bf  (ifq_fcl_rdreq_bf),
+                     .ifq_fcl_asi_tid_bf(ifq_fcl_asi_tid_bf[1:0]),
+                     .ifq_fcl_asird_bf  (ifq_fcl_asird_bf),
+                     .ifq_fcl_invreq_bf (ifq_fcl_invreq_bf),
+                     .erb_fcl_itlb_ce_d1(erb_fcl_itlb_ce_d1),
+                     .erb_dtu_ifeterr_d1(erb_dtu_ifeterr_d1),
+                     .erb_fcl_ifet_uevec_d1(erb_fcl_ifet_uevec_d1[3:0]),
+                     .erb_fcl_ue_trapvec(erb_fcl_ue_trapvec[3:0]),
+                     .erb_fcl_ce_trapvec(erb_fcl_ce_trapvec[3:0]),
+                     .dtu_fcl_nextthr_bf(dtu_fcl_nextthr_bf[3:0]),
+                     .dtu_fcl_ntr_s     (dtu_fcl_ntr_s),
+                     .dtu_fcl_running_s (dtu_fcl_running_s),
+                     .dtu_fcl_flush_sonly_e(dtu_fcl_flush_sonly_e),
+                     .fdp_fcl_swc_s2    (fdp_fcl_swc_s2),
+                     .itlb_fcl_tlbmiss_f_l(itlb_fcl_tlbmiss_f_l),
+                     .itlb_fcl_imiss_s_l(itlb_fcl_imiss_s_l),
+                     .fdp_fcl_pc_oor_vec_f(fdp_fcl_pc_oor_vec_f[3:0]),
+                     .fdp_fcl_pc_oor_e  (fdp_fcl_pc_oor_e),
+                     .fdp_fcl_op_s      (fdp_fcl_op_s[1:0]),
+                     .fdp_fcl_op3_s     (fdp_fcl_op3_s[5:2]),
+                     .fdp_fcl_ibit_s    (fdp_fcl_ibit_s),
+                     .lsu_ifu_stallreq  (lsu_ifu_stallreq),
+                     .ffu_ifu_stallreq  (ffu_ifu_stallreq),
+                     .ifq_fcl_stallreq  (ifq_fcl_stallreq),
+                     .dtu_inst_anull_e  (dtu_inst_anull_e),
+                     .ifq_fcl_fill_thr  (ifq_fcl_fill_thr[3:0]),
+                     .ifq_fcl_flush_sonly_e(ifq_fcl_flush_sonly_e),
+                     .tlu_ifu_trap_tid_w1(tlu_ifu_trap_tid_w1[1:0]),
+                     .tlu_ifu_trappc_vld_w1(tlu_ifu_trappc_vld_w1),
+                     .tlu_ifu_trapnpc_vld_w1(tlu_ifu_trapnpc_vld_w1),
+                     .tlu_lsu_pstate_priv(tlu_lsu_pstate_priv[3:0]),
+                     .tlu_lsu_pstate_am (tlu_lsu_pstate_am[3:0]),
+                     .tlu_hpstate_priv  (tlu_hpstate_priv[3:0]),
+                     .tlu_lsu_redmode   (tlu_lsu_redmode[3:0]),
+                     .tlu_hpstate_enb   (tlu_hpstate_enb[3:0]),
+                     .lsu_ifu_addr_real_l(lsu_ifu_addr_real_l[3:0]),
+                     .lsu_pid_state0    (lsu_pid_state0[2:0]),
+                     .lsu_pid_state1    (lsu_pid_state1[2:0]),
+                     .lsu_pid_state2    (lsu_pid_state2[2:0]),
+                     .lsu_pid_state3    (lsu_pid_state3[2:0]),
+                     .lsu_ifu_icache_en (lsu_ifu_icache_en[3:0]),
+                     .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+                     .lsu_ifu_t0_tlz    (lsu_ifu_t0_tlz),
+                     .lsu_ifu_t1_tlz    (lsu_ifu_t1_tlz),
+                     .lsu_ifu_t2_tlz    (lsu_ifu_t2_tlz),
+                     .lsu_ifu_t3_tlz    (lsu_ifu_t3_tlz),
+                     .tlu_ifu_hwint_i3  (tlu_ifu_hwint_i3[3:0]),
+                     .tlu_ifu_pstate_ie (tlu_ifu_pstate_ie[3:0]),
+                     .tlu_ifu_sftint_vld(tlu_ifu_sftint_vld[3:0]),
+                     .tlu_ifu_hintp_vld (tlu_ifu_hintp_vld[3:0]),
+                     .tlu_ifu_rerr_vld  (tlu_ifu_rerr_vld[3:0]),
+                     .tlu_ifu_rstthr_i2 (tlu_ifu_rstthr_i2[3:0]),
+                     .tlu_ifu_rstint_i2 (tlu_ifu_rstint_i2),
+                     .tlu_ifu_resumint_i2(tlu_ifu_resumint_i2),
+                     .tlu_ifu_nukeint_i2(tlu_ifu_nukeint_i2),
+                     .tlu_itlb_wr_vld_g (tlu_itlb_wr_vld_g),
+                     .tlu_itlb_dmp_vld_g(tlu_itlb_dmp_vld_g),
+                     .tlu_itlb_dmp_all_g(tlu_itlb_dmp_all_g),
+                     .tlu_itlb_data_rd_g(tlu_itlb_data_rd_g),
+                     .tlu_itlb_tag_rd_g (tlu_itlb_tag_rd_g),
+                     .tlu_itlb_invalidate_all_g(tlu_itlb_invalidate_all_g),
+                     .tlu_idtlb_dmp_thrid_g(tlu_idtlb_dmp_thrid_g[1:0]),
+                     .exu_ifu_ecc_ce_m  (exu_ifu_ecc_ce_m),
+                     .ffu_ifu_fst_ce_w  (ffu_ifu_fst_ce_w));
+
+//   sparc_ifu_itlb itlb(
+//                        .adj    (lsu_idtlb_mrgn[7:0]),
+//			                  .reset  (fcl_itlb_invall_bf),
+//                        .tlu_itlb_dmp_actxt_g(tlu_itlb_dmp_actxt_g),
+//                        .itlb_vaddr_offset_f (fdp_erb_pc_f[`IC_IDX_HI:(`IC_IDX_HI-1)]),
+//			                  /*AUTOINST*/
+//                        // Outputs
+//                        .ifu_lsu_tlb_writeable(ifu_lsu_tlb_writeable),
+//                        .itlb_ifq_paddr_s(itlb_ifq_paddr_s[39:10]),
+//                        .itlb_icd_waysel_s1(itlb_icd_waysel_s1[3:0]),
+//                        .itlb_fcl_imiss_s_l(itlb_fcl_imiss_s_l),
+//                        .itlb_fcl_tlbmiss_f_l(itlb_fcl_tlbmiss_f_l),
+//                        .itlb_fcl_priv_s1(itlb_fcl_priv_s1),
+//                        .itlb_rd_tte_data(itlb_rd_tte_data[42:0]),
+//                        .itlb_rd_tte_tag(itlb_rd_tte_tag[58:0]),
+//                        .so             (so),
+                        // Inputs
+//                        .clk            (clk),
+//                        .se             (se),
+//                        .si             (si),
+//                        .fdp_icd_vaddr_bf(fdp_icd_vaddr_bf[47:10]),
+//                        .fdp_itlb_ctxt_bf(fdp_itlb_ctxt_bf[12:0]),
+//                        .ict_itlb_tags_f(ict_itlb_tags_f[`IC_TAG_ALL_HI:0]),
+//                        .icv_itlb_valid_f(icv_itlb_valid_f[3:0]),
+//                        .fcl_itlb_cam_vld_bf(fcl_itlb_cam_vld_bf),
+//                        .fcl_itlb_wr_vld_bf(fcl_itlb_wr_vld_bf),
+//                        .fcl_itlb_addr_mask_l(fcl_itlb_addr_mask_l),
+//                        .fcl_itlb_dmp_vld_bf(fcl_itlb_dmp_vld_bf),
+//                        .fcl_itlb_tag_rd_vld_bf(fcl_itlb_tag_rd_vld_bf),
+//                        .fcl_itlb_data_rd_vld_bf(fcl_itlb_data_rd_vld_bf),
+//                        .fcl_itlb_cam_real_bf(fcl_itlb_cam_real_bf),
+//                        .fcl_itlb_cam_pid_bf(fcl_itlb_cam_pid_bf[2:0]),
+//                        .tlu_itlb_tte_tag_w2(tlu_itlb_tte_tag_w2[58:0]),
+//                        .tlu_itlb_tte_data_w2(tlu_itlb_tte_data_w2[42:0]),
+//                        .tlu_itlb_rw_index_vld_g(tlu_itlb_rw_index_vld_g),
+//                        .tlu_itlb_rw_index_g(tlu_itlb_rw_index_g[5:0]),
+//                        .tlu_idtlb_dmp_key_g(tlu_idtlb_dmp_key_g[40:0]),
+//                        .tlu_itlb_dmp_by_ctxt_g(tlu_itlb_dmp_by_ctxt_g),
+//                        .tlu_itlb_dmp_all_g(tlu_itlb_dmp_all_g));
+
+   bw_r_tlb_fpga itlb(
+		              .tlb_pgnum_crit	(),
+		              // Outputs
+		              .tlb_rd_tte_tag	 (itlb_rd_tte_tag[58:0]),  // 2
+		              .tlb_rd_tte_data (itlb_rd_tte_data[42:0]), // 2
+		              .tlb_pgnum	     (itlb_ifq_paddr_s[`IC_TAG_HI:10]), // 2
+		              .tlb_cam_hit	   (itlb_fcl_tlbmiss_f_l),    // 1
+		              .cache_way_hit   (itlb_wsel_waysel_s1[3:0]), // 2
+		              .cache_hit       (itlb_fcl_imiss_s_l),      // 2
+		              .so		(short_scan0_1),
+
+		                  // Inputs
+                  .rclk          (rclk),
+                  .rst_tri_en    (mem_write_disable),
+		              .tlb_cam_vld	(fcl_itlb_cam_vld_bf),       // 0
+
+//`ifdef SPARC_HPV_EN               
+                  .tlb_cam_key   ({fdp_icd_vaddr_bf[47:28],  // 0
+                                   1'b1,
+                                   fdp_icd_vaddr_bf[27:22],
+                                   1'b1,
+                                   fdp_icd_vaddr_bf[21:16],
+                                   1'b1,
+                                   fdp_icd_vaddr_bf[15:13],
+                                   1'b1,
+                                   fcl_itlb_cam_real_bf,  // g is the same as r
+                                   fcl_itlb_cam_real_bf}),// this is the r bit
+                  
+//`else // !`ifdef SPARC_HPV_EN
+//                  .tlb_cam_key ({1'b0, // unused          // 0
+//                                 fdp_icd_vaddr_bf[47:35], 
+//					                       1'b1, // v47_22
+//					                       fdp_icd_vaddr_bf[34:22],
+//					                       fdp_icd_vaddr_bf[21:20],
+//					                       1'b1, // v21_19
+//					                       fdp_icd_vaddr_bf[19],
+//					                       fdp_icd_vaddr_bf[18:17],
+//					                       1'b1, // v18_16
+//					                       fdp_icd_vaddr_bf[16],
+//					                       fdp_icd_vaddr_bf[15:14],
+//					                       1'b1, // v15_13
+//					                       fdp_icd_vaddr_bf[13], // global bit
+//					                       1'b0}),  // all r's are zero
+//                  
+//                  .tlb_cam_real     (fcl_itlb_cam_real_bf),    // 0
+//		              .tlb_demap_ctxt 	(tlu_itlb_dmp_by_ctxt_g),  // 0
+//`endif
+                      
+                  .tlb_cam_pid    (fcl_itlb_cam_pid_bf[2:0]),  // 0
+		              .tlb_demap_key	(tlu_idtlb_dmp_key_g[40:0]), // 0
+   
+		              .tlb_addr_mask_l (fcl_itlb_addr_mask_l),      // 0
+		              .tlb_ctxt		     (fdp_itlb_ctxt_bf[12:0]),    // 0
+
+		              .tlb_wr_vld	    (fcl_itlb_wr_vld_bf),         // 0
+		              .tlb_wr_tte_tag	(tlu_itlb_tte_tag_w2[58:0]),  // 1
+		              .tlb_wr_tte_data(tlu_itlb_tte_data_w2[42:0]), // 1
+
+		              .tlb_rd_tag_vld	 (fcl_itlb_tag_rd_vld_bf),    // 0
+		              .tlb_rd_data_vld (fcl_itlb_data_rd_vld_bf),   // 0
+		              .tlb_rw_index_vld(tlu_itlb_rw_index_vld_g),   // 0
+		              .tlb_rw_index	   (tlu_itlb_rw_index_g[5:0]),  // 0
+	 
+		              .tlb_demap	    (fcl_itlb_dmp_vld_bf),        // 0
+		              .tlb_demap_all	(fcl_itlb_dmp_all_bf),        // 0
+                  .tlb_demap_auto (tlu_itlb_dmp_actxt_g),
+
+		              .cache_ptag_w3	({ict_itlb_tag3_f[27:0],   // 1
+					                          fdp_erb_pc_f[`IC_IDX_HI:(`IC_IDX_HI-1)]}),
+		              .cache_ptag_w2	({ict_itlb_tag2_f[27:0],    // 1
+					                          fdp_erb_pc_f[`IC_IDX_HI:(`IC_IDX_HI-1)]}),
+		              .cache_ptag_w1	({ict_itlb_tag1_f[27:0],    // 1
+                                    fdp_erb_pc_f[`IC_IDX_HI:(`IC_IDX_HI-1)]}),
+		              .cache_ptag_w0	({ict_itlb_tag0_f[27:0],     // 1
+                                    fdp_erb_pc_f[`IC_IDX_HI:(`IC_IDX_HI-1)]}),
+
+		              .cache_set_vld	(icv_itlb_valid_f[3:0]),     // 1
+	 
+		              .tlb_bypass	    (fcl_itlb_cam_bypass_bf),    // 0
+		              .tlb_bypass_va	(fdp_icd_vaddr_bf[12:10]),   // 0
+	 
+		              .si		(short_si0),
+		              .se		(se),
+                  .hold (sehold),
+		              .adj		    (lsu_idtlb_mrgn[7:0]),
+                 // tlb expects this to be asynchronous reset!
+                  .arst_l     (arst_l),
+		              .rst_soft_l (fcl_itlb_invall_f_l));  // 1
+   
+
+   sparc_ifu_wseldp wseldp(
+                           .so          (scan0_6),
+                           .si          (scan0_5),
+                         .wsel_mbist_icache_data(wsel_mbist_icache_data[67:0]),
+                         /*AUTOINST*/
+                           // Outputs
+                           .wsel_fdp_fetdata_s1(wsel_fdp_fetdata_s1[33:0]),
+                           .wsel_fdp_topdata_s1(wsel_fdp_topdata_s1[33:0]),
+                           // Inputs
+                           .rclk        (rclk),
+                           .se          (se),
+                           .icd_wsel_fetdata_s1(icd_wsel_fetdata_s1[135:0]),
+                           .icd_wsel_topdata_s1(icd_wsel_topdata_s1[135:0]),
+                           .itlb_wsel_waysel_s1(itlb_wsel_waysel_s1[3:0]),
+                           .ifq_erb_asiway_f(ifq_erb_asiway_f[1:0]));
+   
+
+/*   cmp_sram_redhdr AUTO_TEMPLATE( 
+                            .fuse_ary_wren(fuse_icd_wren),
+                            .fuse_ary_rid(fuse_icd_rid[5:0]),
+                            .fuse_ary_repair_value(fuse_icd_repair_value[7:0]),
+                            .fuse_ary_repair_en(fuse_icd_repair_en[1:0]),
+                            .spc_efc_xfuse_data(spc_efc_ifuse_data),
+
+                            .efc_spc_xfuse_data(efc_spc_ifuse_data),
+                            .efc_spc_xfuse_ashift(efc_spc_ifuse_ashift),
+                            .efc_spc_xfuse_dshift(efc_spc_ifuse_dshift),
+                            .ary_fuse_repair_value(icd_fuse_repair_value[7:0]),
+                            .ary_fuse_repair_en(icd_fuse_repair_en[1:0]),
+                            .scanin   (scan0_6));
+ */
+                        
+   cmp_sram_redhdr icdhdr(
+                          .scanout      (scan0_7),
+                          /*AUTOINST*/
+                          // Outputs
+                          .fuse_ary_wren(fuse_icd_wren),         // Templated
+                          .fuse_ary_rid (fuse_icd_rid[5:0]),     // Templated
+                          .fuse_ary_repair_value(fuse_icd_repair_value[7:0]), // Templated
+                          .fuse_ary_repair_en(fuse_icd_repair_en[1:0]), // Templated
+                          .spc_efc_xfuse_data(spc_efc_ifuse_data), // Templated
+                          // Inputs
+                          .rclk         (rclk),
+                          .se           (se),
+                          .scanin       (scan0_6),               // Templated
+                          .arst_l       (arst_l),
+                          .testmode_l   (testmode_l),
+                          .efc_spc_fuse_clk1(efc_spc_fuse_clk1),
+                          .efc_spc_fuse_clk2(efc_spc_fuse_clk2),
+                          .efc_spc_xfuse_data(efc_spc_ifuse_data), // Templated
+                          .efc_spc_xfuse_ashift(efc_spc_ifuse_ashift), // Templated
+                          .efc_spc_xfuse_dshift(efc_spc_ifuse_dshift), // Templated
+                          .ary_fuse_repair_value(icd_fuse_repair_value[7:0]), // Templated
+                          .ary_fuse_repair_en(icd_fuse_repair_en[1:0])); // Templated
+//   sparc_ifu_icd icd
+   bw_r_icd icd(
+                .so                     (scan0_8),
+                .si                     (scan0_7),
+			           .fdp_icd_index_bf  (fdp_icd_vaddr_bf[`IC_IDX_HI:2]),
+                 .ifq_icd_index_bf  ({ifq_icd_index_bf[`IC_IDX_HI:2]}),
+                 .bist_ic_data      (mbist_icache_wdata[7:0]),
+                 .rst_tri_en        (mem_write_disable),
+                 .reset_l           (arst_l),
+			           /*AUTOINST*/
+                // Outputs
+                .icd_wsel_fetdata_s1    (icd_wsel_fetdata_s1[135:0]),
+                .icd_wsel_topdata_s1    (icd_wsel_topdata_s1[135:0]),
+                .icd_fuse_repair_value  (icd_fuse_repair_value[7:0]),
+                .icd_fuse_repair_en     (icd_fuse_repair_en[1:0]),
+                // Inputs
+                .rclk                   (rclk),
+                .se                     (se),
+                .sehold                 (sehold),
+                .fcl_icd_index_sel_ifq_bf(fcl_icd_index_sel_ifq_bf),
+                .ifq_icd_wrway_bf       (ifq_icd_wrway_bf[1:0]),
+                .ifq_icd_worden_bf      (ifq_icd_worden_bf[3:0]),
+                .ifq_icd_wrdata_i2      (ifq_icd_wrdata_i2[135:0]),
+                .fcl_icd_rdreq_bf       (fcl_icd_rdreq_bf),
+                .fcl_icd_wrreq_bf       (fcl_icd_wrreq_bf),
+                .ifq_icd_data_sel_old_i2(ifq_icd_data_sel_old_i2),
+                .ifq_icd_data_sel_fill_i2(ifq_icd_data_sel_fill_i2),
+                .ifq_icd_data_sel_bist_i2(ifq_icd_data_sel_bist_i2),
+                .fuse_icd_wren          (fuse_icd_wren),
+                .fuse_icd_rid           (fuse_icd_rid[3:0]),
+                .fuse_icd_repair_value  (fuse_icd_repair_value[7:0]),
+                .fuse_icd_repair_en     (fuse_icd_repair_en[1:0]),
+                .efc_spc_fuse_clk1      (efc_spc_fuse_clk1));
+
+/*   bw_r_idct AUTO_TEMPLATE( 
+                     // Inputs
+                     .adj          (lsu_ictag_mrgn[3:0]),
+                     .reset_l      (arst_l),
+		                 .index0_x     (fdp_icd_vaddr_bf[`IC_IDX_HI:5]),
+                     .index1_x     (ifq_icd_index_bf[`IC_IDX_HI:5]),
+                     .index_sel_x  (fcl_icd_index_sel_ifq_bf),
+                     .dec_wrway_x  (ifq_ict_dec_wrway_bf[3:0]),
+                     .wrtag_w0_y   ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}),
+                     .wrtag_w1_y   ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}),
+                     .wrtag_w2_y   ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}),
+                     .wrtag_w3_y   ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}),
+                     .rdreq_x      (fcl_icd_rdreq_bf),
+                     .wrreq_x      (fcl_ict_wrreq_bf));
+ */
+
+   bw_r_idct ict(
+                 .so                    (short_scan0_2),
+                 .si                    (short_scan0_1),
+                 .rdtag_w0_y    (ict_itlb_tag0_f[32:0]),
+                 .rdtag_w1_y    (ict_itlb_tag1_f[32:0]),
+                 .rdtag_w2_y    (ict_itlb_tag2_f[32:0]),
+                 .rdtag_w3_y    (ict_itlb_tag3_f[32:0]),
+                 .rst_tri_en        (mem_write_disable),
+
+                     /*AUTOINST*/
+                 // Inputs
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .reset_l               (arst_l),                // Templated
+                 .sehold                (sehold),
+                 .index0_x              (fdp_icd_vaddr_bf[`IC_IDX_HI:5]), // Templated
+                 .index1_x              (ifq_icd_index_bf[`IC_IDX_HI:5]), // Templated
+                 .index_sel_x           (fcl_icd_index_sel_ifq_bf), // Templated
+                 .dec_wrway_x           (ifq_ict_dec_wrway_bf[3:0]), // Templated
+                 .rdreq_x               (fcl_icd_rdreq_bf),      // Templated
+                 .wrreq_x               (fcl_ict_wrreq_bf),      // Templated
+                 .wrtag_w0_y            ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}), // Templated
+                 .wrtag_w1_y            ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}), // Templated
+                 .wrtag_w2_y            ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}), // Templated
+                 .wrtag_w3_y            ({4'b0, ifq_ict_wrtag_f[`IC_TAG_SZ:0]}), // Templated
+                 .adj                   (lsu_ictag_mrgn[3:0]));   // Templated
+//     sparc_ifu_icv icv
+/*   bw_r_rf16x32 AUTO_TEMPLATE(
+                    // Outputs
+                    .dout          (icv_itlb_valid_f[3:0]),
+                    .so            (short_so0),
+                    // Inputs
+                    .clk           (clk),
+                    .se            (se),
+                    .sehold        (sehold),
+                    .si            (short_scan0_2),
+                    .rst_tri_en        (mem_write_disable),
+                    .reset_l       (arst_l),
+		                .rd_adr2       (fdp_icv_index_bf[11:5]),
+                    .rd_adr1       (ifq_icv_wrindex_bf[`IC_IDX_HI:5]),
+                    .wr_adr        (ifq_icv_wrindex_bf[`IC_IDX_HI:7]),
+                    .rd_adr1_sel   (fcl_ifq_grant_bf),
+                    .din           (ifq_icv_wrdata_bf),
+                    .bit_wen       (ifq_icv_wren_bf[15:0]),
+                    .rd_en         (fcl_icv_rdreq_bf),
+                    .wr_en         (fcl_icv_wrreq_bf));
+ */
+   bw_r_rf16x32  icv(/*AUTOINST*/
+                     // Outputs
+                     .dout              (icv_itlb_valid_f[3:0]), // Templated
+                     .so                (short_so0),             // Templated
+                     // Inputs
+                     .rclk              (rclk),
+                     .se                (se),                    // Templated
+                     .si                (short_scan0_2),         // Templated
+                     .reset_l           (arst_l),                // Templated
+                     .sehold            (sehold),                // Templated
+                     .rst_tri_en        (mem_write_disable),     // Templated
+                     .rd_adr1           (ifq_icv_wrindex_bf[`IC_IDX_HI:5]), // Templated
+                     .rd_adr2           (fdp_icv_index_bf[11:5]), // Templated
+                     .rd_adr1_sel       (fcl_ifq_grant_bf),      // Templated
+                     .rd_en             (fcl_icv_rdreq_bf),      // Templated
+                     .wr_adr            (ifq_icv_wrindex_bf[`IC_IDX_HI:7]), // Templated
+                     .wr_en             (fcl_icv_wrreq_bf),      // Templated
+                     .bit_wen           (ifq_icv_wren_bf[15:0]), // Templated
+                     .din               (ifq_icv_wrdata_bf));     // Templated
+   sparc_ifu_ifqdp ifqdp(
+                         .so            (short_so1),
+                         .si            (short_scan1_1),
+                        .ifq_ict_wrtag_f(ifq_ict_wrtag_f[`IC_TAG_SZ:0]),
+                         .fdp_ifq_paddr_f(fdp_erb_pc_f[9:2]),
+                         /*AUTOINST*/
+                         // Outputs
+                         .ifu_lsu_pcxpkt_e(ifu_lsu_pcxpkt_e[51:0]),
+                         .ifq_fdp_fill_inst(ifq_fdp_fill_inst[32:0]),
+                         .ifq_erb_asidata_i2(ifq_erb_asidata_i2[47:0]),
+                         .ifd_inv_ifqop_i2(ifd_inv_ifqop_i2[`CPX_WIDTH-1:0]),
+                         .ifq_icd_index_bf(ifq_icd_index_bf[`IC_IDX_HI:2]),
+                         .ifq_icd_wrdata_i2(ifq_icd_wrdata_i2[135:0]),
+                         .ifq_erb_wrindex_f(ifq_erb_wrindex_f[`IC_IDX_HI:4]),
+                         .ifq_icd_wrway_bf(ifq_icd_wrway_bf[1:0]),
+                         .ifd_ifc_milhit_s(ifd_ifc_milhit_s[3:0]),
+                         .ifd_ifc_instoffset0(ifd_ifc_instoffset0[1:0]),
+                         .ifd_ifc_instoffset1(ifd_ifc_instoffset1[1:0]),
+                         .ifd_ifc_instoffset2(ifd_ifc_instoffset2[1:0]),
+                         .ifd_ifc_instoffset3(ifd_ifc_instoffset3[1:0]),
+                         .ifd_ifc_cpxthr_nxt(ifd_ifc_cpxthr_nxt[1:0]),
+                         .ifd_ifc_cpxreq_nxt(ifd_ifc_cpxreq_nxt[3:0]),
+                         .ifd_ifc_cpxreq_i1(ifd_ifc_cpxreq_i1[`CPX_RQ_SIZE:0]),
+                         .ifd_ifc_destid0(ifd_ifc_destid0[2:0]),
+                         .ifd_ifc_destid1(ifd_ifc_destid1[2:0]),
+                         .ifd_ifc_destid2(ifd_ifc_destid2[2:0]),
+                         .ifd_ifc_destid3(ifd_ifc_destid3[2:0]),
+                         .ifd_ifc_newdestid_s(ifd_ifc_newdestid_s[2:0]),
+                         .ifd_ifc_pcxline_d(ifd_ifc_pcxline_d[4:2]),
+                         .ifd_ifc_asi_vachklo_i2(ifd_ifc_asi_vachklo_i2),
+                         .ifd_ifc_cpxvld_i2(ifd_ifc_cpxvld_i2),
+                         .ifd_ifc_asiaddr_i2(ifd_ifc_asiaddr_i2[3:2]),
+                         .ifd_ifc_iobpkt_i2(ifd_ifc_iobpkt_i2),
+                         .ifd_ifc_fwd2ic_i2(ifd_ifc_fwd2ic_i2),
+                         .ifd_ifc_4bpkt_i2(ifd_ifc_4bpkt_i2),
+                         .ifd_ifc_cpxnc_i2(ifd_ifc_cpxnc_i2),
+                         .ifd_ifc_cpxce_i2(ifd_ifc_cpxce_i2),
+                         .ifd_ifc_cpxue_i2(ifd_ifc_cpxue_i2),
+                         .ifd_ifc_cpxms_i2(ifd_ifc_cpxms_i2),
+                         .ifd_ifc_miladdr4_i2(ifd_ifc_miladdr4_i2[3:0]),
+                         .ifd_inv_wrway_i2(ifd_inv_wrway_i2[1:0]),
+                         // Inputs
+                         .rclk          (rclk),
+                         .se            (se),
+                         .lsu_ifu_cpxpkt_i1(lsu_ifu_cpxpkt_i1[`CPX_WIDTH-1:0]),
+                         .lsu_ifu_asi_addr(lsu_ifu_asi_addr[17:0]),
+                         .lsu_ifu_stxa_data(lsu_ifu_stxa_data[47:0]),
+                         .itlb_ifq_paddr_s(itlb_ifq_paddr_s[39:10]),
+                         .ifc_ifd_reqvalid_e(ifc_ifd_reqvalid_e),
+                         .ifc_ifd_filladdr4_i2(ifc_ifd_filladdr4_i2),
+                         .ifc_ifd_repway_s(ifc_ifd_repway_s[1:0]),
+                         .ifc_ifd_uncached_e(ifc_ifd_uncached_e),
+                         .ifc_ifd_thrid_e(ifc_ifd_thrid_e[1:0]),
+                         .ifc_ifd_pcxline_adj_d(ifc_ifd_pcxline_adj_d[4:2]),
+                         .ifc_ifd_errinv_e(ifc_ifd_errinv_e),
+                         .ifc_ifd_ldmil_sel_new(ifc_ifd_ldmil_sel_new[3:0]),
+                         .ifc_ifd_ld_inq_i1(ifc_ifd_ld_inq_i1),
+                         .ifc_ifd_idx_sel_fwd_i2(ifc_ifd_idx_sel_fwd_i2),
+                         .ifc_ifd_milreq_sel_d_l(ifc_ifd_milreq_sel_d_l[3:0]),
+                         .ifc_ifd_milfill_sel_i2_l(ifc_ifd_milfill_sel_i2_l[3:0]),
+                         .ifc_ifd_finst_sel_l(ifc_ifd_finst_sel_l[3:0]),
+                         .ifc_ifd_ifqbyp_sel_fwd_l(ifc_ifd_ifqbyp_sel_fwd_l),
+                         .ifc_ifd_ifqbyp_sel_inq_l(ifc_ifd_ifqbyp_sel_inq_l),
+                         .ifc_ifd_ifqbyp_sel_asi_l(ifc_ifd_ifqbyp_sel_asi_l),
+                         .ifc_ifd_ifqbyp_sel_lsu_l(ifc_ifd_ifqbyp_sel_lsu_l),
+                         .ifc_ifd_ifqbyp_en_l(ifc_ifd_ifqbyp_en_l),
+                         .ifc_ifd_addr_sel_bist_i2_l(ifc_ifd_addr_sel_bist_i2_l),
+                         .ifc_ifd_addr_sel_asi_i2_l(ifc_ifd_addr_sel_asi_i2_l),
+                         .ifc_ifd_addr_sel_old_i2_l(ifc_ifd_addr_sel_old_i2_l),
+                         .ifc_ifd_addr_sel_fill_i2_l(ifc_ifd_addr_sel_fill_i2_l),
+                         .mbist_icache_way(mbist_icache_way[1:0]),
+                         .mbist_icache_word(mbist_icache_word),
+                         .mbist_icache_index(mbist_icache_index[7:0]));
+
+   sparc_ifu_ifqctl ifqctl(
+                           .so          (scan0_9),
+                           .si          (scan0_8),
+                           .ifd_ifc_cpxvalid_i1(lsu_ifu_cpxpkt_i1[`CPX_VLD]),
+			   .lsu_ifu_cpxpkt_wayvld_i1 (lsu_ifu_cpxpkt_i1[`CPX_WYVLD]),
+                           .ifq_sscan_data(ifq_sscan_data[3:0]),
+                           .rst_tri_en  (mux_drive_disable),
+                           
+                           /*AUTOINST*/
+                           // Outputs
+                           .ifu_lsu_inv_clear(ifu_lsu_inv_clear),
+                           .ifu_lsu_ibuf_busy(ifu_lsu_ibuf_busy),
+                           .ifu_lsu_asi_ack(ifu_lsu_asi_ack),
+                           .ifu_lsu_ldxa_illgl_va_w2(ifu_lsu_ldxa_illgl_va_w2),
+                           .ifu_lsu_fwd_wr_ack(ifu_lsu_fwd_wr_ack),
+                           .ifu_lsu_pcxreq_d(ifu_lsu_pcxreq_d),
+                           .ifu_lsu_destid_s(ifu_lsu_destid_s[2:0]),
+                           .ifu_tlu_l2imiss(ifu_tlu_l2imiss[3:0]),
+                           .ifq_fcl_stallreq(ifq_fcl_stallreq),
+                           .ifq_swl_stallreq(ifq_swl_stallreq),
+                           .ifq_fcl_flush_sonly_e(ifq_fcl_flush_sonly_e),
+                           .ifq_fcl_wrreq_bf(ifq_fcl_wrreq_bf),
+                           .ifq_fcl_rdreq_bf(ifq_fcl_rdreq_bf),
+                           .ifq_fcl_icd_wrreq_bf(ifq_fcl_icd_wrreq_bf),
+                           .ifq_fcl_ictv_wrreq_bf(ifq_fcl_ictv_wrreq_bf),
+                           .ifq_erb_fwdrd_bf(ifq_erb_fwdrd_bf),
+                           .ifq_erb_rdtag_f(ifq_erb_rdtag_f),
+                           .ifq_erb_rdinst_f(ifq_erb_rdinst_f),
+                           .ifq_erb_asi_erren_i2(ifq_erb_asi_erren_i2),
+                           .ifq_erb_asi_errstat_i2(ifq_erb_asi_errstat_i2),
+                           .ifq_erb_asi_errinj_i2(ifq_erb_asi_errinj_i2),
+                           .ifq_erb_asi_erraddr_i2(ifq_erb_asi_erraddr_i2),
+                           .ifq_erb_asi_imask_i2(ifq_erb_asi_imask_i2),
+                           .ifq_erb_asiwr_i2(ifq_erb_asiwr_i2),
+                           .ifq_fcl_asird_bf(ifq_fcl_asird_bf),
+                           .ifq_fcl_asi_tid_bf(ifq_fcl_asi_tid_bf[1:0]),
+                           .ifq_erb_ue_rep(ifq_erb_ue_rep),
+                           .ifq_erb_ce_rep(ifq_erb_ce_rep),
+                           .ifq_erb_l2_ue(ifq_erb_l2_ue),
+                           .ifq_erb_io_ue(ifq_erb_io_ue),
+                           .ifq_erb_ifet_ce(ifq_erb_ifet_ce),
+                           .ifq_erb_l2err_tid(ifq_erb_l2err_tid[1:0]),
+                           .ifq_icv_wrdata_bf(ifq_icv_wrdata_bf),
+                           .ifq_icd_worden_bf(ifq_icd_worden_bf[3:0]),
+                           .ifq_fcl_fill_thr(ifq_fcl_fill_thr[3:0]),
+                           .ifq_dtu_thrrdy(ifq_dtu_thrrdy[3:0]),
+                           .ifq_dtu_pred_rdy(ifq_dtu_pred_rdy[3:0]),
+                           .ifc_ifd_filladdr4_i2(ifc_ifd_filladdr4_i2),
+                           .ifc_ifd_reqvalid_e(ifc_ifd_reqvalid_e),
+                           .ifc_ifd_idx_sel_fwd_i2(ifc_ifd_idx_sel_fwd_i2),
+                           .ifc_ifd_errinv_e(ifc_ifd_errinv_e),
+                           .ifc_ifd_uncached_e(ifc_ifd_uncached_e),
+                           .ifc_ifd_thrid_e(ifc_ifd_thrid_e[1:0]),
+                           .ifc_ifd_pcxline_adj_d(ifc_ifd_pcxline_adj_d[4:2]),
+                           .ifc_inv_asireq_i2(ifc_inv_asireq_i2),
+                           .ifc_ifd_repway_s(ifc_ifd_repway_s[1:0]),
+                           .ifc_ifd_milfill_sel_i2_l(ifc_ifd_milfill_sel_i2_l[3:0]),
+                           .ifc_ifd_finst_sel_l(ifc_ifd_finst_sel_l[3:0]),
+                           .ifc_ifd_milreq_sel_d_l(ifc_ifd_milreq_sel_d_l[3:0]),
+                           .ifc_ifd_ifqbyp_sel_fwd_l(ifc_ifd_ifqbyp_sel_fwd_l),
+                           .ifc_ifd_ifqbyp_sel_inq_l(ifc_ifd_ifqbyp_sel_inq_l),
+                           .ifc_ifd_ifqbyp_sel_asi_l(ifc_ifd_ifqbyp_sel_asi_l),
+                           .ifc_ifd_ifqbyp_sel_lsu_l(ifc_ifd_ifqbyp_sel_lsu_l),
+                           .ifc_ifd_ifqbyp_en_l(ifc_ifd_ifqbyp_en_l),
+                           .ifc_ifd_addr_sel_bist_i2_l(ifc_ifd_addr_sel_bist_i2_l),
+                           .ifc_ifd_addr_sel_asi_i2_l(ifc_ifd_addr_sel_asi_i2_l),
+                           .ifc_ifd_addr_sel_old_i2_l(ifc_ifd_addr_sel_old_i2_l),
+                           .ifc_ifd_addr_sel_fill_i2_l(ifc_ifd_addr_sel_fill_i2_l),
+                           .ifq_icd_data_sel_bist_i2(ifq_icd_data_sel_bist_i2),
+                           .ifq_icd_data_sel_fill_i2(ifq_icd_data_sel_fill_i2),
+                           .ifq_icd_data_sel_old_i2(ifq_icd_data_sel_old_i2),
+                           .ifc_ifd_ldmil_sel_new(ifc_ifd_ldmil_sel_new[3:0]),
+                           .ifc_ifd_ld_inq_i1(ifc_ifd_ld_inq_i1),
+                           .ifc_inv_ifqadv_i2(ifc_inv_ifqadv_i2),
+                           // Inputs
+                           .ifd_ifc_milhit_s(ifd_ifc_milhit_s[3:0]),
+                           .ifd_ifc_instoffset0(ifd_ifc_instoffset0[1:0]),
+                           .ifd_ifc_instoffset1(ifd_ifc_instoffset1[1:0]),
+                           .ifd_ifc_instoffset2(ifd_ifc_instoffset2[1:0]),
+                           .ifd_ifc_instoffset3(ifd_ifc_instoffset3[1:0]),
+                           .ifd_ifc_cpxreq_i1(ifd_ifc_cpxreq_i1[`CPX_RQ_SIZE:0]),
+                           .ifd_ifc_cpxreq_nxt(ifd_ifc_cpxreq_nxt[3:0]),
+                           .ifd_ifc_cpxthr_nxt(ifd_ifc_cpxthr_nxt[1:0]),
+                           .ifd_ifc_cpxvld_i2(ifd_ifc_cpxvld_i2),
+                           .ifd_ifc_iobpkt_i2(ifd_ifc_iobpkt_i2),
+                           .ifd_ifc_4bpkt_i2(ifd_ifc_4bpkt_i2),
+                           .ifd_ifc_cpxnc_i2(ifd_ifc_cpxnc_i2),
+                           .ifd_ifc_fwd2ic_i2(ifd_ifc_fwd2ic_i2),
+                           .ifd_ifc_cpxce_i2(ifd_ifc_cpxce_i2),
+                           .ifd_ifc_cpxue_i2(ifd_ifc_cpxue_i2),
+                           .ifd_ifc_cpxms_i2(ifd_ifc_cpxms_i2),
+                           .ifd_ifc_miladdr4_i2(ifd_ifc_miladdr4_i2[3:0]),
+                           .ifd_ifc_asiaddr_i2(ifd_ifc_asiaddr_i2[3:2]),
+                           .ifd_ifc_asi_vachklo_i2(ifd_ifc_asi_vachklo_i2),
+                           .ifd_ifc_destid0(ifd_ifc_destid0[2:0]),
+                           .ifd_ifc_destid1(ifd_ifc_destid1[2:0]),
+                           .ifd_ifc_destid2(ifd_ifc_destid2[2:0]),
+                           .ifd_ifc_destid3(ifd_ifc_destid3[2:0]),
+                           .ifd_ifc_newdestid_s(ifd_ifc_newdestid_s[2:0]),
+                           .ifd_ifc_pcxline_d(ifd_ifc_pcxline_d[4:2]),
+                           .inv_ifc_inv_pending(inv_ifc_inv_pending),
+                           .fcl_ifq_icmiss_s1(fcl_ifq_icmiss_s1),
+                           .fcl_ifq_rdreq_s1(fcl_ifq_rdreq_s1),
+                           .fcl_ifq_thr_s1(fcl_ifq_thr_s1[1:0]),
+                           .fcl_ifq_canthr(fcl_ifq_canthr[3:0]),
+                           .fcl_ifq_grant_bf(fcl_ifq_grant_bf),
+                           .dtu_ifq_kill_latest_d(dtu_ifq_kill_latest_d),
+                           .erb_ifq_ifeterr_d1(erb_ifq_ifeterr_d1),
+                           .erb_ifq_itlberr_s1(erb_ifq_itlberr_s1),
+                           .lsu_ifu_pcxpkt_ack_d(lsu_ifu_pcxpkt_ack_d),
+                           .lsu_ifu_direct_map_l1(lsu_ifu_direct_map_l1),
+                           .lsu_ifu_asi_vld(lsu_ifu_asi_vld),
+                           .lsu_ifu_asi_state(lsu_ifu_asi_state[7:0]),
+                           .lsu_ifu_asi_load(lsu_ifu_asi_load),
+                           .lsu_ifu_asi_thrid(lsu_ifu_asi_thrid[1:0]),
+                           .fcl_ifq_icache_en_s_l(fcl_ifq_icache_en_s_l),
+                           .mbist_ifq_run_bist(mbist_ifq_run_bist),
+                           .mbist_icache_write(mbist_icache_write),
+                           .mbist_icache_read(mbist_icache_read),
+                           .ctu_sscan_tid(ctu_sscan_tid[3:0]),
+                           .rclk        (rclk),
+                           .se          (se),
+                           .gdbginit_l  (gdbginit_l),
+                           .arst_l      (arst_l),
+                           .grst_l      (grst_l),
+                           .sehold      (sehold));
+
+   sparc_ifu_invctl invctl(
+                           .so          (scan0_10),
+                           .si          (scan0_9),
+                           /*AUTOINST*/
+                           // Outputs
+                           .inv_ifc_inv_pending(inv_ifc_inv_pending),
+                           .ifq_icv_wrindex_bf(ifq_icv_wrindex_bf[`IC_IDX_HI:5]),
+                           .ifq_icv_wren_bf(ifq_icv_wren_bf[15:0]),
+                           .ifq_ict_dec_wrway_bf(ifq_ict_dec_wrway_bf[3:0]),
+                           .ifq_fcl_invreq_bf(ifq_fcl_invreq_bf),
+                           .ifq_erb_asiway_f(ifq_erb_asiway_f[1:0]),
+                           // Inputs
+                           .rclk        (rclk),
+                           .se          (se),
+                           .const_cpuid (const_cpuid[2:0]),
+                           .mbist_icache_write(mbist_icache_write),
+                           .lsu_ifu_ld_icache_index(lsu_ifu_ld_icache_index[`IC_IDX_HI:5]),
+                           .lsu_ifu_ld_pcxpkt_vld(lsu_ifu_ld_pcxpkt_vld),
+                           .lsu_ifu_ld_pcxpkt_tid(lsu_ifu_ld_pcxpkt_tid[1:0]),
+                           .ifc_inv_ifqadv_i2(ifc_inv_ifqadv_i2),
+                           .ifc_inv_asireq_i2(ifc_inv_asireq_i2),
+                           .ifq_icd_index_bf(ifq_icd_index_bf[`IC_IDX_HI:5]),
+                           .ifd_inv_ifqop_i2(ifd_inv_ifqop_i2[`CPX_WIDTH-1:0]),
+                           .ifd_inv_wrway_i2(ifd_inv_wrway_i2[1:0]));
+   
+
+   sparc_ifu_errdp  errdp(
+                          .so           (scan0_11),
+                          .si           (scan0_10),
+                         .ifq_erb_wrtag_f(ifq_ict_wrtag_f[`IC_TAG_SZ-1:0]),
+                          .ict_itlb_tags_f({ict_itlb_tag3_f[28:0],
+                                            ict_itlb_tag2_f[28:0],
+                                            ict_itlb_tag1_f[28:0],
+                                            ict_itlb_tag0_f[28:0]}),
+                          .wsel_erb_asidata_s({wsel_mbist_icache_data[65:64],
+                                               wsel_mbist_icache_data[31:0]}),
+                          
+			                    /*AUTOINST*/
+                          // Outputs
+                          .ifu_lsu_ldxa_data_w2(ifu_lsu_ldxa_data_w2[63:0]),
+                          .erb_dtu_imask(erb_dtu_imask[38:0]),
+                          .erd_erc_tlbt_pe_s1(erd_erc_tlbt_pe_s1[1:0]),
+                          .erd_erc_tlbd_pe_s1(erd_erc_tlbd_pe_s1[1:0]),
+                          .erd_erc_tagpe_s1(erd_erc_tagpe_s1[3:0]),
+                          .erd_erc_nirpe_s1(erd_erc_nirpe_s1),
+                          .erd_erc_fetpe_s1(erd_erc_fetpe_s1),
+                          .erd_erc_tte_pgsz(erd_erc_tte_pgsz[2:0]),
+                          // Inputs
+                          .rclk         (rclk),
+                          .se           (se),
+                          .erb_reset    (erb_reset),
+                          .itlb_rd_tte_data(itlb_rd_tte_data[42:0]),
+                          .itlb_rd_tte_tag(itlb_rd_tte_tag[58:0]),
+                          .itlb_ifq_paddr_s(itlb_ifq_paddr_s[39:10]),
+                          .wsel_fdp_fetdata_s1(wsel_fdp_fetdata_s1[33:0]),
+                          .wsel_fdp_topdata_s1(wsel_fdp_topdata_s1[33:0]),
+                          .icv_itlb_valid_f(icv_itlb_valid_f[3:0]),
+                          .lsu_ifu_err_addr(lsu_ifu_err_addr[47:4]),
+                          .spu_ifu_err_addr_w2(spu_ifu_err_addr_w2[39:4]),
+                          .fdp_erb_pc_f (fdp_erb_pc_f[47:0]),
+                          .exu_ifu_err_reg_m(exu_ifu_err_reg_m[7:0]),
+                          .exu_ifu_err_synd_m(exu_ifu_err_synd_m[7:0]),
+                          .ffu_ifu_err_reg_w2(ffu_ifu_err_reg_w2[5:0]),
+                          .ffu_ifu_err_synd_w2(ffu_ifu_err_synd_w2[13:0]),
+                          .tlu_itlb_rw_index_g(tlu_itlb_rw_index_g[5:0]),
+                          .erc_erd_pgsz_b0(erc_erd_pgsz_b0),
+                          .erc_erd_pgsz_b1(erc_erd_pgsz_b1),
+                          .erc_erd_erren_asidata(erc_erd_erren_asidata[1:0]),
+                          .erc_erd_errstat_asidata(erc_erd_errstat_asidata[22:0]),
+                          .erc_erd_errinj_asidata(erc_erd_errinj_asidata[31:0]),
+                          .ifq_erb_asidata_i2(ifq_erb_asidata_i2[47:0]),
+                          .ifq_erb_wrindex_f(ifq_erb_wrindex_f[`IC_IDX_HI:4]),
+                          .erc_erd_asiway_s1_l(erc_erd_asiway_s1_l[3:0]),
+                          .fcl_erb_itlbrd_data_s(fcl_erb_itlbrd_data_s),
+                          .erc_erd_ld_imask(erc_erd_ld_imask),
+                          .erc_erd_asisrc_sel_icd_s_l(erc_erd_asisrc_sel_icd_s_l),
+                          .erc_erd_asisrc_sel_misc_s_l(erc_erd_asisrc_sel_misc_s_l),
+                          .erc_erd_asisrc_sel_err_s_l(erc_erd_asisrc_sel_err_s_l),
+                          .erc_erd_asisrc_sel_itlb_s_l(erc_erd_asisrc_sel_itlb_s_l),
+                          .erc_erd_errasi_sel_en_l(erc_erd_errasi_sel_en_l),
+                          .erc_erd_errasi_sel_stat_l(erc_erd_errasi_sel_stat_l),
+                          .erc_erd_errasi_sel_inj_l(erc_erd_errasi_sel_inj_l),
+                          .erc_erd_errasi_sel_addr_l(erc_erd_errasi_sel_addr_l),
+                          .erc_erd_miscasi_sel_ict_l(erc_erd_miscasi_sel_ict_l),
+                          .erc_erd_miscasi_sel_imask_l(erc_erd_miscasi_sel_imask_l),
+                          .erc_erd_miscasi_sel_other_l(erc_erd_miscasi_sel_other_l),
+                          .erc_erd_asi_thr_l(erc_erd_asi_thr_l[3:0]),
+                          .erc_erd_eadr0_sel_irf_l(erc_erd_eadr0_sel_irf_l[3:0]),
+                          .erc_erd_eadr0_sel_itlb_l(erc_erd_eadr0_sel_itlb_l[3:0]),
+                          .erc_erd_eadr0_sel_frf_l(erc_erd_eadr0_sel_frf_l[3:0]),
+                          .erc_erd_eadr0_sel_lsu_l(erc_erd_eadr0_sel_lsu_l[3:0]),
+                          .erc_erd_eadr1_sel_pcd1_l(erc_erd_eadr1_sel_pcd1_l[3:0]),
+                          .erc_erd_eadr1_sel_l1pa_l(erc_erd_eadr1_sel_l1pa_l[3:0]),
+                          .erc_erd_eadr1_sel_l2pa_l(erc_erd_eadr1_sel_l2pa_l[3:0]),
+                          .erc_erd_eadr1_sel_other_l(erc_erd_eadr1_sel_other_l[3:0]),
+                          .erc_erd_eadr2_sel_mx1_l(erc_erd_eadr2_sel_mx1_l[3:0]),
+                          .erc_erd_eadr2_sel_wrt_l(erc_erd_eadr2_sel_wrt_l[3:0]),
+                          .erc_erd_eadr2_sel_mx0_l(erc_erd_eadr2_sel_mx0_l[3:0]),
+                          .erc_erd_eadr2_sel_old_l(erc_erd_eadr2_sel_old_l[3:0]));
+
+   sparc_ifu_errctl errctl(
+                           .so          (scan0_12),
+                           .si          (scan0_11),
+                          .ifu_tlu_inst_vld_w(ifu_spu_inst_vld_w),
+                           
+                           /*AUTOINST*/
+                           // Outputs
+                           .erc_erd_pgsz_b0(erc_erd_pgsz_b0),
+                           .erc_erd_pgsz_b1(erc_erd_pgsz_b1),
+                           .ifu_lsu_asi_rd_unc(ifu_lsu_asi_rd_unc),
+                           .ifu_lsu_ldxa_tid_w2(ifu_lsu_ldxa_tid_w2[1:0]),
+                           .ifu_lsu_ldxa_data_vld_w2(ifu_lsu_ldxa_data_vld_w2),
+                           .ifu_lsu_fwd_data_vld(ifu_lsu_fwd_data_vld),
+                           .ifu_lsu_error_inj(ifu_lsu_error_inj[3:0]),
+                           .ifu_exu_ecc_mask(ifu_exu_ecc_mask[7:0]),
+                           .ifu_exu_inj_irferr(ifu_exu_inj_irferr),
+                           .ifu_ffu_inj_frferr(ifu_ffu_inj_frferr),
+                           .ifu_exu_nceen_e(ifu_exu_nceen_e),
+                           .ifu_lsu_nceen(ifu_lsu_nceen[3:0]),
+                           .ifu_spu_nceen(ifu_spu_nceen[3:0]),
+                           .erb_fcl_spu_uetrap(erb_fcl_spu_uetrap[3:0]),
+                           .erb_ifq_itlberr_s1(erb_ifq_itlberr_s1),
+                           .erb_ifq_ifeterr_d1(erb_ifq_ifeterr_d1),
+                           .erb_dtu_ifeterr_d1(erb_dtu_ifeterr_d1),
+                           .erb_fcl_itlb_ce_d1(erb_fcl_itlb_ce_d1),
+                           .erb_fcl_ce_trapvec(erb_fcl_ce_trapvec[3:0]),
+                           .erb_fcl_ue_trapvec(erb_fcl_ue_trapvec[3:0]),
+                           .erb_fcl_ifet_uevec_d1(erb_fcl_ifet_uevec_d1[3:0]),
+                           .erc_erd_errstat_asidata(erc_erd_errstat_asidata[22:0]),
+                           .erc_erd_errinj_asidata(erc_erd_errinj_asidata[31:0]),
+                           .erc_erd_erren_asidata(erc_erd_erren_asidata[1:0]),
+                           .erc_erd_eadr0_sel_irf_l(erc_erd_eadr0_sel_irf_l[3:0]),
+                           .erc_erd_eadr0_sel_itlb_l(erc_erd_eadr0_sel_itlb_l[3:0]),
+                           .erc_erd_eadr0_sel_frf_l(erc_erd_eadr0_sel_frf_l[3:0]),
+                           .erc_erd_eadr0_sel_lsu_l(erc_erd_eadr0_sel_lsu_l[3:0]),
+                           .erc_erd_asiway_s1_l(erc_erd_asiway_s1_l[3:0]),
+                           .erc_erd_eadr1_sel_pcd1_l(erc_erd_eadr1_sel_pcd1_l[3:0]),
+                           .erc_erd_eadr1_sel_l1pa_l(erc_erd_eadr1_sel_l1pa_l[3:0]),
+                           .erc_erd_eadr1_sel_l2pa_l(erc_erd_eadr1_sel_l2pa_l[3:0]),
+                           .erc_erd_eadr1_sel_other_l(erc_erd_eadr1_sel_other_l[3:0]),
+                           .erc_erd_eadr2_sel_mx1_l(erc_erd_eadr2_sel_mx1_l[3:0]),
+                           .erc_erd_eadr2_sel_wrt_l(erc_erd_eadr2_sel_wrt_l[3:0]),
+                           .erc_erd_eadr2_sel_mx0_l(erc_erd_eadr2_sel_mx0_l[3:0]),
+                           .erc_erd_eadr2_sel_old_l(erc_erd_eadr2_sel_old_l[3:0]),
+                           .erc_erd_asi_thr_l(erc_erd_asi_thr_l[3:0]),
+                           .erc_erd_asisrc_sel_icd_s_l(erc_erd_asisrc_sel_icd_s_l),
+                           .erc_erd_asisrc_sel_misc_s_l(erc_erd_asisrc_sel_misc_s_l),
+                           .erc_erd_asisrc_sel_err_s_l(erc_erd_asisrc_sel_err_s_l),
+                           .erc_erd_asisrc_sel_itlb_s_l(erc_erd_asisrc_sel_itlb_s_l),
+                           .erc_erd_errasi_sel_en_l(erc_erd_errasi_sel_en_l),
+                           .erc_erd_errasi_sel_stat_l(erc_erd_errasi_sel_stat_l),
+                           .erc_erd_errasi_sel_inj_l(erc_erd_errasi_sel_inj_l),
+                           .erc_erd_errasi_sel_addr_l(erc_erd_errasi_sel_addr_l),
+                           .erc_erd_miscasi_sel_ict_l(erc_erd_miscasi_sel_ict_l),
+                           .erc_erd_miscasi_sel_imask_l(erc_erd_miscasi_sel_imask_l),
+                           .erc_erd_miscasi_sel_other_l(erc_erd_miscasi_sel_other_l),
+                           .erc_erd_ld_imask(erc_erd_ld_imask),
+                           .erb_reset   (erb_reset),
+                           // Inputs
+                           .rclk        (rclk),
+                           .se          (se),
+                           .arst_l      (arst_l),
+                           .grst_l      (grst_l),
+                           .erd_erc_tte_pgsz(erd_erc_tte_pgsz[2:0]),
+                           .icv_itlb_valid_f(icv_itlb_valid_f[3:0]),
+                           .fcl_erb_ievld_s1(fcl_erb_ievld_s1),
+                           .fcl_erb_tevld_s1(fcl_erb_tevld_s1),
+                           .fcl_erb_immuevld_s1(fcl_erb_immuevld_s1),
+                           .fcl_erb_inst_issue_d(fcl_erb_inst_issue_d),
+                           .fcl_erb_inst_vld_d1(fcl_erb_inst_vld_d1),
+                           .ifu_lsu_thrid_s(ifu_lsu_thrid_s[1:0]),
+                           .fcl_erb_asi_tid_f(fcl_erb_asi_tid_f[1:0]),
+                           .ifq_fcl_asi_tid_bf(ifq_fcl_asi_tid_bf[1:0]),
+                           .fcl_erb_clear_iferr(fcl_erb_clear_iferr[3:0]),
+                           .fcl_erb_itlbrd_vld_s(fcl_erb_itlbrd_vld_s),
+                           .fcl_erb_itlbrd_data_s(fcl_erb_itlbrd_data_s),
+                           .erd_erc_tagpe_s1(erd_erc_tagpe_s1[3:0]),
+                           .erd_erc_nirpe_s1(erd_erc_nirpe_s1),
+                           .erd_erc_fetpe_s1(erd_erc_fetpe_s1),
+                           .erd_erc_tlbt_pe_s1(erd_erc_tlbt_pe_s1[1:0]),
+                           .erd_erc_tlbd_pe_s1(erd_erc_tlbd_pe_s1[1:0]),
+                           .tlu_lsu_pstate_priv(tlu_lsu_pstate_priv[3:0]),
+			   .tlu_hpstate_priv  (tlu_hpstate_priv[3:0]),			   
+                           .lsu_ifu_dtlb_data_su(lsu_ifu_dtlb_data_su),
+                           .lsu_ifu_dtlb_data_ue(lsu_ifu_dtlb_data_ue),
+                           .lsu_ifu_dtlb_tag_ue(lsu_ifu_dtlb_tag_ue),
+                           .lsu_ifu_dcache_data_perror(lsu_ifu_dcache_data_perror),
+                           .lsu_ifu_dcache_tag_perror(lsu_ifu_dcache_tag_perror),
+                           .lsu_ifu_l2_unc_error(lsu_ifu_l2_unc_error),
+                           .lsu_ifu_l2_corr_error(lsu_ifu_l2_corr_error),
+                           .lsu_ifu_io_error(lsu_ifu_io_error),
+                           .lsu_ifu_error_tid(lsu_ifu_error_tid[1:0]),
+                           .spu_ifu_unc_err_w1(spu_ifu_unc_err_w1),
+                           .spu_ifu_mamem_err_w1(spu_ifu_mamem_err_w1),
+                           .spu_ifu_corr_err_w2(spu_ifu_corr_err_w2),
+                           .spu_ifu_int_w2(spu_ifu_int_w2),
+                           .spu_ifu_ttype_tid_w2(spu_ifu_ttype_tid_w2[1:0]),
+                           .lsu_ifu_inj_ack(lsu_ifu_inj_ack[3:0]),
+                           .ffu_ifu_ecc_ce_w2(ffu_ifu_ecc_ce_w2),
+                           .ffu_ifu_ecc_ue_w2(ffu_ifu_ecc_ue_w2),
+                           .ffu_ifu_inj_ack(ffu_ifu_inj_ack),
+                           .ffu_ifu_tid_w2(ffu_ifu_tid_w2[1:0]),
+                           .exu_ifu_ecc_ce_m(exu_ifu_ecc_ce_m),
+                           .exu_ifu_ecc_ue_m(exu_ifu_ecc_ue_m),
+                           .exu_ifu_inj_ack(exu_ifu_inj_ack),
+                           .ifq_erb_ue_rep(ifq_erb_ue_rep),
+                           .ifq_erb_ce_rep(ifq_erb_ce_rep),
+                           .ifq_erb_l2_ue(ifq_erb_l2_ue),
+                           .ifq_erb_io_ue(ifq_erb_io_ue),
+                           .ifq_erb_ifet_ce(ifq_erb_ifet_ce),
+                           .ifq_erb_l2err_tid(ifq_erb_l2err_tid[1:0]),
+                           .ifq_erb_rdtag_f(ifq_erb_rdtag_f),
+                           .ifq_erb_rdinst_f(ifq_erb_rdinst_f),
+                           .ifq_erb_asi_erren_i2(ifq_erb_asi_erren_i2),
+                           .ifq_erb_asi_errstat_i2(ifq_erb_asi_errstat_i2),
+                           .ifq_erb_asi_errinj_i2(ifq_erb_asi_errinj_i2),
+                           .ifq_erb_asi_erraddr_i2(ifq_erb_asi_erraddr_i2),
+                           .ifq_erb_asi_imask_i2(ifq_erb_asi_imask_i2),
+                           .ifq_erb_asiwr_i2(ifq_erb_asiwr_i2),
+                           .ifq_fcl_asird_bf(ifq_fcl_asird_bf),
+                           .ifq_erb_fwdrd_bf(ifq_erb_fwdrd_bf),
+                           .ifq_erb_asidata_i2(ifq_erb_asidata_i2[31:0]),
+                           .ifq_erb_asiway_f(ifq_erb_asiway_f[1:0]));
+   
+   // BIST Controller
+//   sparc_ifu_icd_arr_bist bist(// Outputs
+//			       .Test_fdp_icd_index_bf_0 (bist_ic_index[8:0]),
+//			       .Test_ifq_icd_wrway_bf_0 (bist_ic_way),
+//			       .Test_ifq_icd_wrdata_f_0 ({null_data[135:2], 
+//							  bist_ic_data[1:0]}),
+//			       .Test_fcl_icd_rdreq_bf_0 (bist_ic_read),
+//			       .Test_fcl_icd_wrreq_bf_0 (bist_ic_write),
+//			       .Test_ifq_icd_worden_bf_0 (bist_ic_worden),
+//			       .tselect (),
+//			       .tst_done (),
+//			       .fail_h (),
+//			       .scan_out (),
+//			       // Inputs
+//			       .clk (clk),
+//			       .diag_clk(1'b0),
+//			       .rst_l (rst_l),
+//			       .test_h (1'b0),
+//			       .debugz (1'b0),
+//			       .hold_l (1'b1),
+//			       .Test_icd_fdp_topdata_s1_0 (icd_fdp_topdata_s1),
+//			       .Test_icd_fdp_fetdata_s1_0 (icd_fdp_fetdata_s1));
+
+   sparc_ifu_mbist mbist(	
+                         .mbist_icache_data_in(wsel_mbist_icache_data[67:0]),
+                         .mbist_si      (scan0_12),
+                         .mbist_se      (se),
+                         
+                         .mbist_icache_wdata(mbist_icache_wdata[7:0]),
+                         .mbist_dcache_wdata(mbist_write_data[7:0]),
+
+                         .mbist_so      (scan0_13),
+                         .rclk           (rclk),
+                         
+                         /*AUTOINST*/
+                         // Outputs
+                         .mbist_dcache_read(mbist_dcache_read),
+                         .mbist_dcache_write(mbist_dcache_write),
+                         .mbist_dcache_word(mbist_dcache_word),
+                         .mbist_dcache_index(mbist_dcache_index[6:0]),
+                         .mbist_dcache_way(mbist_dcache_way[1:0]),
+                         .mbist_icache_read(mbist_icache_read),
+                         .mbist_icache_write(mbist_icache_write),
+                         .mbist_icache_index(mbist_icache_index[7:0]),
+                         .mbist_icache_word(mbist_icache_word),
+                         .mbist_icache_way(mbist_icache_way[1:0]),
+                         .mbist_ifq_run_bist(mbist_ifq_run_bist),
+                         .mbist_done    (mbist_done),
+                         .mbist_dcache_fail(mbist_dcache_fail),
+                         .mbist_icache_fail(mbist_icache_fail),
+                         // Inputs
+                         .grst_l        (grst_l),
+                         .arst_l        (arst_l),
+                         .mbist_start   (mbist_start),
+                         .mbist_userdata_mode(mbist_userdata_mode),
+                         .mbist_bisi_mode(mbist_bisi_mode),
+                         .mbist_loop_mode(mbist_loop_mode),
+                         .mbist_loop_on_address(mbist_loop_on_address),
+                         .mbist_stop_on_fail(mbist_stop_on_fail),
+                         .mbist_stop_on_next_fail(mbist_stop_on_next_fail),
+                         .mbist_dcache_data_in(mbist_dcache_data_in[71:0]));
+   
+
+   sparc_ifu_sscan sscan(
+                         .so            (so0),
+                         .si            (scan0_13),
+                         .ifq_sscan_test_data(ifq_sscan_data[3:0]),
+                         .lsu_sscan_test_data(lsu_sscan_data[15:0]),
+                         .tlu_sscan_test_data(tlu_sscan_data[62:0]),
+                         /*AUTOINST*/
+                         // Outputs
+                         .sparc_sscan_so(sparc_sscan_so),
+                         // Inputs
+                         .ctu_sscan_snap(ctu_sscan_snap),
+                         .ctu_sscan_se  (ctu_sscan_se),
+                         .ctu_tck       (ctu_tck),
+                         .se            (se),
+                         .swl_sscan_thrstate(swl_sscan_thrstate[10:0]),
+                         .rclk          (rclk));
+
+   // floating outputs
+   sink #(4) s0(.in (ict_itlb_tag0_f[32:29]));
+   sink #(4) s1(.in (ict_itlb_tag1_f[32:29]));
+   sink #(4) s2(.in (ict_itlb_tag2_f[32:29]));
+   sink #(4) s3(.in (ict_itlb_tag3_f[32:29]));
+   sink #(2) s4(.in (fuse_icd_rid[5:4]));
+   
+				
+endmodule
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl" "../../../common/rtl")
+// End:
Index: /trunk/T1-CPU/ifu/sparc_ifu_ifqctl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_ifqctl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_ifqctl.v	(revision 6)
@@ -0,0 +1,1861 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_ifqctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_ifqctl
+//  Description:	
+//  Contains the control logic for the ifq and mil.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "iop.h"
+`include "ifu.h"
+
+
+module sparc_ifu_ifqctl(/*AUTOARG*/
+   // Outputs
+   ifu_lsu_inv_clear, ifu_lsu_ibuf_busy, ifu_lsu_asi_ack, 
+   ifu_lsu_ldxa_illgl_va_w2, ifu_lsu_fwd_wr_ack, ifu_lsu_pcxreq_d, 
+   ifu_lsu_destid_s, ifu_tlu_l2imiss, ifq_fcl_stallreq, 
+   ifq_swl_stallreq, ifq_fcl_flush_sonly_e, ifq_fcl_wrreq_bf, 
+   ifq_fcl_rdreq_bf, ifq_fcl_icd_wrreq_bf, ifq_fcl_ictv_wrreq_bf, 
+   ifq_erb_fwdrd_bf, ifq_erb_rdtag_f, ifq_erb_rdinst_f, 
+   ifq_erb_asi_erren_i2, ifq_erb_asi_errstat_i2, 
+   ifq_erb_asi_errinj_i2, ifq_erb_asi_erraddr_i2, 
+   ifq_erb_asi_imask_i2, ifq_erb_asiwr_i2, ifq_fcl_asird_bf, 
+   ifq_fcl_asi_tid_bf, ifq_erb_ue_rep, ifq_erb_ce_rep, ifq_erb_l2_ue, 
+   ifq_erb_io_ue, ifq_erb_ifet_ce, ifq_erb_l2err_tid, 
+   ifq_icv_wrdata_bf, ifq_icd_worden_bf, ifq_fcl_fill_thr, 
+   ifq_dtu_thrrdy, ifq_dtu_pred_rdy, ifc_ifd_filladdr4_i2, 
+   ifc_ifd_reqvalid_e, ifc_ifd_idx_sel_fwd_i2, ifc_ifd_errinv_e, 
+   ifc_ifd_uncached_e, ifc_ifd_thrid_e, ifc_ifd_pcxline_adj_d, 
+   ifc_inv_asireq_i2, ifc_ifd_repway_s, ifq_sscan_data, 
+   ifc_ifd_milfill_sel_i2_l, ifc_ifd_finst_sel_l, 
+   ifc_ifd_milreq_sel_d_l, ifc_ifd_ifqbyp_sel_fwd_l, 
+   ifc_ifd_ifqbyp_sel_inq_l, ifc_ifd_ifqbyp_sel_asi_l, 
+   ifc_ifd_ifqbyp_sel_lsu_l, ifc_ifd_ifqbyp_en_l, 
+   ifc_ifd_addr_sel_bist_i2_l, ifc_ifd_addr_sel_asi_i2_l, 
+   ifc_ifd_addr_sel_old_i2_l, ifc_ifd_addr_sel_fill_i2_l, 
+   ifq_icd_data_sel_bist_i2, ifq_icd_data_sel_fill_i2, 
+   ifq_icd_data_sel_old_i2, ifc_ifd_ldmil_sel_new, ifc_ifd_ld_inq_i1, 
+   ifc_inv_ifqadv_i2, so, 
+   // Inputs
+   lsu_ifu_cpxpkt_wayvld_i1, ifd_ifc_milhit_s, ifd_ifc_instoffset0, ifd_ifc_instoffset1, 
+   ifd_ifc_instoffset2, ifd_ifc_instoffset3, ifd_ifc_cpxvalid_i1, 
+   ifd_ifc_cpxreq_i1, ifd_ifc_cpxreq_nxt, ifd_ifc_cpxthr_nxt, 
+   ifd_ifc_cpxvld_i2, ifd_ifc_iobpkt_i2, ifd_ifc_4bpkt_i2, 
+   ifd_ifc_cpxnc_i2, ifd_ifc_fwd2ic_i2, ifd_ifc_cpxce_i2, 
+   ifd_ifc_cpxue_i2, ifd_ifc_cpxms_i2, ifd_ifc_miladdr4_i2, 
+   ifd_ifc_asiaddr_i2, ifd_ifc_asi_vachklo_i2, ifd_ifc_destid0, 
+   ifd_ifc_destid1, ifd_ifc_destid2, ifd_ifc_destid3, 
+   ifd_ifc_newdestid_s, ifd_ifc_pcxline_d, inv_ifc_inv_pending, 
+   fcl_ifq_icmiss_s1, fcl_ifq_rdreq_s1, fcl_ifq_thr_s1, 
+   fcl_ifq_canthr, fcl_ifq_grant_bf, dtu_ifq_kill_latest_d, 
+   erb_ifq_ifeterr_d1, erb_ifq_itlberr_s1, lsu_ifu_pcxpkt_ack_d, 
+   lsu_ifu_direct_map_l1, lsu_ifu_asi_vld, lsu_ifu_asi_state, 
+   lsu_ifu_asi_load, lsu_ifu_asi_thrid, fcl_ifq_icache_en_s_l, 
+   mbist_ifq_run_bist, mbist_icache_write, mbist_icache_read, 
+   ctu_sscan_tid, rclk, se, si, gdbginit_l, arst_l, grst_l, 
+   rst_tri_en, sehold
+   );
+
+   input          lsu_ifu_cpxpkt_wayvld_i1;
+   input [3:0]	  ifd_ifc_milhit_s;      // if an Imiss hits in MIL
+   input [1:0]    ifd_ifc_instoffset0; // to select inst to TIR
+   input [1:0]    ifd_ifc_instoffset1; // to select inst to TIR
+   input [1:0]    ifd_ifc_instoffset2; // to select inst to TIR
+   input [1:0]    ifd_ifc_instoffset3; // to select inst to TIR
+
+   input         ifd_ifc_cpxvalid_i1;
+   input [`CPX_RQ_SIZE:0] ifd_ifc_cpxreq_i1;
+
+   input [3:0]   ifd_ifc_cpxreq_nxt;
+   input [1:0]   ifd_ifc_cpxthr_nxt;
+   input         ifd_ifc_cpxvld_i2;
+   
+   input         ifd_ifc_iobpkt_i2;
+   input         ifd_ifc_4bpkt_i2;
+   input         ifd_ifc_cpxnc_i2;
+   input         ifd_ifc_fwd2ic_i2;
+   input         ifd_ifc_cpxce_i2,
+		             ifd_ifc_cpxue_i2,
+                 ifd_ifc_cpxms_i2;
+   
+   input [3:0]   ifd_ifc_miladdr4_i2;
+
+   input [3:2]   ifd_ifc_asiaddr_i2;
+   input         ifd_ifc_asi_vachklo_i2;
+
+   input [2:0]   ifd_ifc_destid0,
+		             ifd_ifc_destid1,
+		             ifd_ifc_destid2,
+		             ifd_ifc_destid3,
+		             ifd_ifc_newdestid_s;
+   input [4:2]   ifd_ifc_pcxline_d;
+//   input [7:0]   ifd_ifc_mil_repway_s;   
+
+   input         inv_ifc_inv_pending;
+   
+   input         fcl_ifq_icmiss_s1;   // icache miss
+   input         fcl_ifq_rdreq_s1;
+
+   input [1:0]   fcl_ifq_thr_s1;
+
+   input [3:0]   fcl_ifq_canthr;        // cancel the imiss reqs to
+                                        // these threads
+   input         fcl_ifq_grant_bf;
+
+   input         dtu_ifq_kill_latest_d;
+   input         erb_ifq_ifeterr_d1;
+   input         erb_ifq_itlberr_s1;
+   
+   input         lsu_ifu_pcxpkt_ack_d;
+   input         lsu_ifu_direct_map_l1;
+
+   input         lsu_ifu_asi_vld;
+   input [7:0]   lsu_ifu_asi_state;
+   input         lsu_ifu_asi_load;
+   input [1:0]   lsu_ifu_asi_thrid;
+
+   input         fcl_ifq_icache_en_s_l;
+   
+   input         mbist_ifq_run_bist,
+                 mbist_icache_write,
+		             mbist_icache_read;
+
+   input [3:0]   ctu_sscan_tid;
+
+   input         rclk, 
+                 se, 
+                 si,
+                 gdbginit_l,
+                 arst_l,
+                 grst_l;
+
+   input         rst_tri_en;
+   input         sehold;
+   
+   // outputs
+   output        ifu_lsu_inv_clear;
+   output        ifu_lsu_ibuf_busy;
+   output        ifu_lsu_asi_ack;
+   output        ifu_lsu_ldxa_illgl_va_w2;
+
+   output        ifu_lsu_fwd_wr_ack;
+   
+   output        ifu_lsu_pcxreq_d;
+   output [2:0]  ifu_lsu_destid_s;
+
+   output [3:0]  ifu_tlu_l2imiss;
+   
+   output        ifq_fcl_stallreq;
+   output        ifq_swl_stallreq;
+   output        ifq_fcl_flush_sonly_e;
+   
+   output        ifq_fcl_wrreq_bf;
+   output        ifq_fcl_rdreq_bf;
+   
+   output        ifq_fcl_icd_wrreq_bf,
+		             ifq_fcl_ictv_wrreq_bf;
+   output        ifq_erb_fwdrd_bf;
+   output        ifq_erb_rdtag_f;
+   output        ifq_erb_rdinst_f;
+   output        ifq_erb_asi_erren_i2;
+   output        ifq_erb_asi_errstat_i2;
+   output        ifq_erb_asi_errinj_i2;
+   output        ifq_erb_asi_erraddr_i2;
+   output        ifq_erb_asi_imask_i2;
+   output        ifq_erb_asiwr_i2;
+   
+   output        ifq_fcl_asird_bf;
+   output [1:0]  ifq_fcl_asi_tid_bf;
+
+   output        ifq_erb_ue_rep;
+   output        ifq_erb_ce_rep;
+   output        ifq_erb_l2_ue;
+   output        ifq_erb_io_ue;
+   output        ifq_erb_ifet_ce;
+   output [1:0]  ifq_erb_l2err_tid;
+
+   output        ifq_icv_wrdata_bf;
+   output [3:0]  ifq_icd_worden_bf;
+
+   output [3:0]  ifq_fcl_fill_thr;     // should be same stage as 
+   // fill_inst
+   output [3:0]  ifq_dtu_thrrdy;
+   output [3:0]  ifq_dtu_pred_rdy;
+   
+   output        ifc_ifd_filladdr4_i2;
+   output        ifc_ifd_reqvalid_e;
+   output        ifc_ifd_idx_sel_fwd_i2;
+
+   output        ifc_ifd_errinv_e;
+   output        ifc_ifd_uncached_e;
+   output [1:0]  ifc_ifd_thrid_e;
+   output [4:2]  ifc_ifd_pcxline_adj_d;
+
+   output        ifc_inv_asireq_i2;
+
+   output [1:0]  ifc_ifd_repway_s;
+
+   output [3:0]  ifq_sscan_data;
+   
+   // mux selects
+   output [3:0]  ifc_ifd_milfill_sel_i2_l;
+   output [3:0]  ifc_ifd_finst_sel_l;
+   output [3:0]  ifc_ifd_milreq_sel_d_l;
+   output        ifc_ifd_ifqbyp_sel_fwd_l, // select next input to ifq pipe
+		             ifc_ifd_ifqbyp_sel_inq_l,
+		             ifc_ifd_ifqbyp_sel_asi_l,
+		             ifc_ifd_ifqbyp_sel_lsu_l;
+	 output        ifc_ifd_ifqbyp_en_l;
+   
+   output        ifc_ifd_addr_sel_bist_i2_l,
+		             ifc_ifd_addr_sel_asi_i2_l,
+                 ifc_ifd_addr_sel_old_i2_l,
+		             ifc_ifd_addr_sel_fill_i2_l;
+
+   output        ifq_icd_data_sel_bist_i2,
+		             ifq_icd_data_sel_fill_i2,
+		             ifq_icd_data_sel_old_i2;
+	 
+   // 2:1 mux selects
+   output [3:0]  ifc_ifd_ldmil_sel_new;     // mil load enable
+
+   
+   output        ifc_ifd_ld_inq_i1;
+   
+   output        ifc_inv_ifqadv_i2;         // move a new op from ifq
+	 // pipe to icache
+
+   output        so;
+   
+   //----------------------------------------------------------------------
+   // Declarations
+   //----------------------------------------------------------------------   
+   // local signals
+   wire [3:0]    thr_s1,      // s1 thread which missed in I$
+		             thr_d1,
+                 thr_e1,
+		             dfthr_f,     // thread currently being filled in I$
+                 dfthr_next_i2,
+		             dfthr_i2,    // next thread to be filled from CPX
+                 milfthr_i2,
+		             dpcxthr_s,
+		             dpcxthr_d;   // thread being transmitted to lsu
+
+   wire [1:0]    thrid_d,
+                 thrid_e;
+   
+   wire [3:0]    pcx_accept_d;
+
+   wire          req_pending_d,
+//		             req_pending_e,
+		             req_accept_d,
+//                 can_pcx_d,
+                 pcxreq_vbit_d;
+   
+   wire [3:0]    comp_valid_s,
+		             mil_valid_s,
+                 mil_cancel;
+
+   wire [3:0]    finst_i2,
+                 finst0,
+                 finst1,
+                 finst2,
+                 finst3;
+
+   wire [2:0]    milchld0,
+		             milchld1,
+		             milchld2,
+		             milchld3,
+		             next_milchld,
+		             milchld_d1;
+   wire          milchld_vld_f,
+		             next_milchld_i2,
+		             milchld_vld_i2;
+
+   wire [3:0]    mil0_state,
+                 mil1_state,
+                 mil2_state,
+                 mil3_state;
+
+   wire [2:0]    i2out;         // state machine output in i2 stage
+   
+   wire          any_milhit_qual_s,     // hit in MIL
+                 any_qualhit_or_io_s,
+		             icmiss_qual_s,
+//		             reqq_empty,    // no pending requests in MIL
+		             oldreq_valid,
+		             next_wrreq_i2,
+		             wrreq_f;
+   wire          block_fetch_s1,
+                 block_fetch_d1;
+
+   wire [3:0]    mil_thr_ready,
+                 all_retry_rdy_e1,
+                 all_retry_rdy_m1;
+   wire          retry_rdy_final_d1,
+                 retry_rdy_e1;
+
+   wire          rst_starv_ctr_l;
+   wire          starv_alert;
+
+   wire [3:0]    milhit_vec_s;
+   wire          any_milhit_s;
+
+   wire [1:0]    rand_repway_s;
+//   wire [1:0]    mil_repway_s;
+
+   wire [3:0]    errthr_d1,
+		             err_vec_d1,
+		             err_req;
+   wire          errinv_d1;
+
+   wire          ifeterr_qual_d1,
+                 ifeterr_e1;
+   wire          thr_match_d1e1;
+   wire          ifqadv_i1;
+
+   wire          ifqadvi2_nxt,
+                 ifqadv_i2_ff;
+   wire          access_grant_l;
+
+   wire          addrbit4_i2;
+   wire          addrbit4_nxt_i2;
+   
+   wire [3:0]    cpxreq_i2;
+   wire [1:0]    cpxthr_i2;
+   
+
+   wire          uncached_i2,
+                 uncached_s,
+                 mil_vld_i2,
+                 mil_uncan_i2,
+                 mil_nc_i2,
+                 mil_nc_e,
+                 mil_nc_d,
+		             uncached_fill_i2,
+		             uncached_f;
+
+   wire [3:0]    mil_nc_vec,
+                 mil_nc_vec_nxt;
+                 
+   
+   wire [3:0]    pcxreq_s,          // req bit from MIL
+                 pcxreq_qual_s,
+//		             newpcxreq_s,       // valid new request from latest miss
+//		             oldpcxreq_s,
+		             rr_gnt,          // round robin grant signal
+		             fill_addr4_i2;    // fill address bit 4 - determines
+	 // which 16B of the 32B line gets
+	 // written. Assume 0 first then 1
+
+   wire          newreq_valid,    // latest S stage miss creates request.
+		             nextreq_valid_s, // if either a new req from i$ or old
+				         // req from MIL is made in this cycle.
+		             req_valid_d;     // req to LSU is valid
+
+   wire          inq_vld,
+		             inq_vld_nxt;
+
+   wire          ic_pkt_i1;
+   
+//   wire          fill_this16b;
+
+   wire [1:0]    filltid_i2,
+		             next_filltid_i2,
+		             filltid_f;
+
+   wire          imissrtn_i2,   // input cpx is ifill return
+                 imissrtn_next_i2,
+                 imissrtn_f,
+		             imissrtn_i1;   // pkt in inq is ifill ret
+
+   wire          invalidate_i1;
+
+   wire [3:0]    icmiss_thr_s,
+		             icmiss_thr_d;
+   wire          icmiss_d1,
+                 icmiss_qual_d1;
+   wire          canthr_s1,
+                 canthr_d1,
+                 canthr_s1_del1;
+   wire          itlberr_d1;
+
+   wire [2:0]    old_destid_s;
+
+   wire          destid_iob_s,
+                 destid_iob_d;
+
+   wire          iosp_d1_l,
+                 n763;
+   
+   wire [3:0]    wrt_tir;
+
+   wire [3:0]    wr_complete_f;
+   wire [3:0]    pred_rdy_i2;
+   wire [3:0]    fill_retn_thr_i2;
+
+   wire          filladdr4_f;
+   
+   wire [3:0]    milhit_to_thr_s,
+		             icmiss_for_milchk,
+                 qualhit_pe_s,
+                 qualhit_or_io_s;
+//		             milhit_qual_s;
+
+   wire          l2_ue_i2,
+		             l2_ce_i2,
+		             io_ue_i2;
+   wire          l2_miss_i2,
+                 l2_miss_f;
+//   wire [3:0]    l2ms_thr;
+   
+   wire          ce_rep_i2,
+		             ue_rep_i2;
+
+   wire          fwdreq_i2,
+                 fwdreq_i3,
+                 fwd_stall,
+                 fwdwr_i3,
+                 fwdrd_i3;
+
+   wire          cpxnc_i3;
+
+   wire          stallreq_d0,
+                 stallreq_d1;
+
+   wire          ifu_asireq_i1,
+                 ifu_asireq_i0,
+                 byp_sel_asi_l,
+                 asird_i1,
+		             asireq_i2,
+                 asireq_i2_l,
+		             asi_load_i1,
+		             asi_load_i2,
+		             asi_vld_next,
+                 asi_vld_i0,
+                 asi_vld_qual_i0;
+   
+   wire [7:0]    asi_state_i1;
+   wire          asi_ic_data_i1,
+		             asi_ic_data_i2,
+		             asi_ic_tag_i1,
+		             asi_ic_tag_i2;
+
+   wire          asi_erren_i1,
+		             asi_errstat_i1,
+		             asi_errinj_i1,
+		             asi_erraddr_i1,
+		             asi_imask_i1;
+
+   wire          asi_ic_data_unchk_i1,
+		             asi_ic_tag_unchk_i1;
+
+   wire          asi_erren_unchk_i1,
+		             asi_errstat_unchk_i1,
+		             asi_errinj_unchk_i1,
+		             asi_erraddr_unchk_i1,
+		             asi_imask_unchk_i1;
+
+   wire          illva_i0,
+                 illva_i1,
+                 illva_i2,
+                 illva_f,
+                 illva_s,
+                 illva_w2;
+
+   wire [3:0]    word_sel_i2;
+   wire          bist_op;
+
+   wire          rdinst_bf,
+		             rd_tag_bf;
+   
+   wire          errpkt_i1;
+   
+   wire          stpkt_i1,
+                 strmack_i1,
+		             ldpkt_i1,
+		             evpkt_i1,
+		             errpkt_i2;
+   
+   wire          icv_wrdata_i2,
+		             icv_wbit_i2,
+		             icv_wrdata_f;
+
+   wire          rst_way_lfsr;
+
+   wire          inq_wayvld_i1;
+   wire          inq_wayvld_i1_nxt;
+   wire          ldinv_i1;
+   wire          ldinv_i2_nxt;
+   wire          ldinv_i2;
+	 
+   wire          ifq_reset,
+                 rnd_reset,
+                 ifq_reset_l;
+   
+
+   wire          clk;
+   
+   
+//----------------------------------------------------------------------
+// Code start here 
+//----------------------------------------------------------------------
+
+   assign        clk = rclk;
+   
+
+   // reset buffer
+   dffrl_async rstff(.din (grst_l),
+                     .q   (ifq_reset_l),
+                     .clk (clk), .se(se), .si(), .so(),
+                     .rst_l (arst_l));
+
+   assign       ifq_reset = ~ifq_reset_l;
+
+   
+//---------
+// MIL fsm
+//---------   
+   sparc_ifu_milfsm mil0(
+ 	                       .ifc_fsm_can_thisthr      (fcl_ifq_canthr[0]),        
+                         //	.ifc_fsm_orphan_thisthr (orphan_thr_d1[0]),
+
+
+	                       .ifc_fsm_fill_thisthr_i2  (fill_retn_thr_i2[0]),    
+	                       .ifc_fsm_wr_complete_f    (wr_complete_f[0]),
+
+                         .ifqadv_i2  (ifc_inv_ifqadv_i2),
+
+                         .ifd_ifc_4bpkt_i2         (ifd_ifc_4bpkt_i2),
+	                       .fcl_ifq_thr_s1           (fcl_ifq_thr_s1),
+	                       .ifc_fsm_imiss_thisthr_s  (icmiss_thr_s[0]),          
+	                       .ifc_fsm_milhit_s         (any_milhit_qual_s),
+
+	                       .ifc_fsm_hiton_thismil_s  (milhit_to_thr_s[0]),   
+
+	                       .ifc_fsm_pcxaccept_thisthr(pcx_accept_d[0]),
+                         .ifc_fsm_miladdr4         (ifd_ifc_miladdr4_i2[0]),
+
+	                       .clk                      (clk),
+                         .se                       (se),
+                         .si                       (si),
+	                       .reset                    (ifq_reset),
+
+                         .so                       (),
+
+	                       .ifc_fsm_err_thisthr      (errthr_d1[0]),
+			
+	                       // outputs	 
+                         .fsm_ifc_errreq           (err_req[0]),
+		  
+	                       .fsm_ifc_wrt_tir          (wrt_tir[0]),
+	                       .fsm_ifc_comp_valid       (comp_valid_s[0]),
+	                       .fsm_ifc_mil_valid        (mil_valid_s[0]),
+	                       .fsm_ifc_mil_cancel       (mil_cancel[0]),
+                         .fsm_ifc_milstate         (mil0_state[3:0]),
+			
+	                       .fsm_ifc_thr_ready        (mil_thr_ready[0]),
+	                       .fsm_ifc_pred_rdy         (pred_rdy_i2[0]),
+	                       .fsm_ifc_pcxreq           (pcxreq_s[0]),       
+	                       .fsm_ifc_addrbit4_i2      (fill_addr4_i2[0]),
+	                       .fsm_ifc_milchld          (milchld0[2:0]));
+   
+
+   sparc_ifu_milfsm mil1(
+                         .ifc_fsm_can_thisthr      (fcl_ifq_canthr[1]),        
+                         //	   .ifc_fsm_orphan_thisthr   (orphan_thr_d1[1]),     
+
+	                       .ifc_fsm_fill_thisthr_i2  (fill_retn_thr_i2[1]),   
+	                       .ifc_fsm_wr_complete_f    (wr_complete_f[1]),          
+      
+                         .ifqadv_i2  (ifc_inv_ifqadv_i2),
+
+                         .ifd_ifc_4bpkt_i2         (ifd_ifc_4bpkt_i2),
+	                       .fcl_ifq_thr_s1           (fcl_ifq_thr_s1),             
+	                       .ifc_fsm_milhit_s         (any_milhit_qual_s),           
+	                       .ifc_fsm_hiton_thismil_s (milhit_to_thr_s[1]),   
+	                       .ifc_fsm_imiss_thisthr_s   (icmiss_thr_s[1]),          
+
+	                       .ifc_fsm_pcxaccept_thisthr   (pcx_accept_d[1]),     
+                         //	   .ifc_fsm_reqq_empty       (reqq_empty),         
+                         .ifc_fsm_miladdr4         (ifd_ifc_miladdr4_i2[1]),
+
+	                       .clk                      (clk),
+                         .se                       (se),
+                         .si                       (si),
+	                       .reset                    (ifq_reset),
+
+	                       .ifc_fsm_err_thisthr      (errthr_d1[1]),
+			
+	                       // outputs	 
+                         .fsm_ifc_errreq           (err_req[1]),
+
+	                       .fsm_ifc_wrt_tir          (wrt_tir[1]),
+                         .so                       (),	    
+                         //	   .fsm_ifc_cm_pending       (can_miss_pending[1]),
+                         //	   .fsm_ifc_delay_mil        (delay_mil[1]),
+	                       .fsm_ifc_comp_valid       (comp_valid_s[1]), 
+	                       .fsm_ifc_mil_valid        (mil_valid_s[1]),
+	                       .fsm_ifc_mil_cancel       (mil_cancel[1]),
+                         .fsm_ifc_milstate         (mil1_state[3:0]),
+
+	                       .fsm_ifc_pcxreq           (pcxreq_s[1]),       
+	                       .fsm_ifc_thr_ready        (mil_thr_ready[1]),
+	                       .fsm_ifc_pred_rdy         (pred_rdy_i2[1]),
+	                       .fsm_ifc_addrbit4_i2      (fill_addr4_i2[1]), 
+	                       .fsm_ifc_milchld          (milchld1[2:0]));
+
+   sparc_ifu_milfsm mil2(
+                         .ifc_fsm_can_thisthr      (fcl_ifq_canthr[2]),        
+                         //	   .ifc_fsm_orphan_thisthr   (orphan_thr_d1[2]),     
+
+	                       .ifc_fsm_fill_thisthr_i2  (fill_retn_thr_i2[2]),
+	                       .ifc_fsm_wr_complete_f    (wr_complete_f[2]),          
+			
+                         .ifqadv_i2  (ifc_inv_ifqadv_i2),
+
+                         .ifd_ifc_4bpkt_i2         (ifd_ifc_4bpkt_i2),
+	                       .fcl_ifq_thr_s1           (fcl_ifq_thr_s1),             
+	                       .ifc_fsm_milhit_s         (any_milhit_qual_s),           
+	                       .ifc_fsm_hiton_thismil_s (milhit_to_thr_s[2]),   
+	                       .ifc_fsm_imiss_thisthr_s  (icmiss_thr_s[2]),          
+
+	                       .ifc_fsm_pcxaccept_thisthr(pcx_accept_d[2]),     
+                         //	   .ifc_fsm_reqq_empty       (reqq_empty),         
+
+                         .ifc_fsm_miladdr4         (ifd_ifc_miladdr4_i2[2]),
+
+	                       .clk                      (clk),
+                         .se                       (se),
+                         .si                       (si),
+	                       .reset                    (ifq_reset),
+
+	                       .ifc_fsm_err_thisthr      (errthr_d1[2]),
+			
+	                       // outputs	 
+                         .fsm_ifc_errreq           (err_req[2]),
+
+                         .so                       (),	    
+                         //	   .fsm_ifc_cm_pending       (can_miss_pending[2]),
+                         //	   .fsm_ifc_delay_mil        (delay_mil[2]),
+	                       .fsm_ifc_wrt_tir          (wrt_tir[2]),
+	                       .fsm_ifc_comp_valid       (comp_valid_s[2]),
+	                       .fsm_ifc_mil_valid        (mil_valid_s[2]),
+	                       .fsm_ifc_mil_cancel       (mil_cancel[2]),
+                         .fsm_ifc_milstate         (mil2_state[3:0]),
+			
+	                       .fsm_ifc_pcxreq           (pcxreq_s[2]),       
+	                       .fsm_ifc_thr_ready        (mil_thr_ready[2]),
+	                       .fsm_ifc_pred_rdy         (pred_rdy_i2[2]),
+	                       .fsm_ifc_addrbit4_i2      (fill_addr4_i2[2]), 
+	                       .fsm_ifc_milchld          (milchld2[2:0]));
+
+
+   sparc_ifu_milfsm mil3(
+                         .ifc_fsm_can_thisthr      (fcl_ifq_canthr[3]),        
+                         //	   .ifc_fsm_orphan_thisthr   (orphan_thr_d1[3]),     
+
+	                       .ifc_fsm_fill_thisthr_i2  (fill_retn_thr_i2[3]),
+	                       .ifc_fsm_wr_complete_f    (wr_complete_f[3]), 
+			
+                         .ifqadv_i2  (ifc_inv_ifqadv_i2),
+
+                         .ifd_ifc_4bpkt_i2         (ifd_ifc_4bpkt_i2),
+	                       .fcl_ifq_thr_s1           (fcl_ifq_thr_s1),             
+	                       .ifc_fsm_milhit_s         (any_milhit_qual_s),           
+	                       .ifc_fsm_hiton_thismil_s (milhit_to_thr_s[3]),   
+	                       .ifc_fsm_imiss_thisthr_s   (icmiss_thr_s[3]),          
+
+	                       .ifc_fsm_pcxaccept_thisthr(pcx_accept_d[3]),     
+                         //	   .ifc_fsm_reqq_empty       (reqq_empty),         
+
+                         .ifc_fsm_miladdr4         (ifd_ifc_miladdr4_i2[3]),
+
+	                       .clk                      (clk),
+                         .se                       (se),
+                         .si                       (si),
+	                       .reset                    (ifq_reset),
+
+	                       .ifc_fsm_err_thisthr      (errthr_d1[3]),
+			
+	                       // outputs	 
+                         .fsm_ifc_errreq           (err_req[3]),
+
+                         .so                       (),	
+                         //	   .fsm_ifc_cm_pending       (can_miss_pending[3]),
+                         //	   .fsm_ifc_delay_mil        (delay_mil[3]),
+	                       .fsm_ifc_wrt_tir          (wrt_tir[3]),
+	                       .fsm_ifc_comp_valid       (comp_valid_s[3]),
+	                       .fsm_ifc_mil_valid        (mil_valid_s[3]),
+	                       .fsm_ifc_mil_cancel       (mil_cancel[3]),
+                         .fsm_ifc_milstate         (mil3_state[3:0]),
+			
+	                       .fsm_ifc_pcxreq           (pcxreq_s[3]),           
+	                       .fsm_ifc_thr_ready        (mil_thr_ready[3]),
+	                       .fsm_ifc_pred_rdy         (pred_rdy_i2[3]),
+	                       .fsm_ifc_addrbit4_i2      (fill_addr4_i2[3]),    
+	                       .fsm_ifc_milchld          (milchld3[2:0]));
+
+
+   
+//-------------------------------------------
+// Fill Return Control (IFU interfac to CPX)
+//-------------------------------------------
+
+   // use soffm2 for lower setup
+   dffe_s #(4) cpxreq_reg(.din (ifd_ifc_cpxreq_nxt),
+                        .q   (cpxreq_i2),
+                        .en  (ifqadv_i1),
+                        .clk (clk), .se(se), .si(), .so());
+   dffe_s #(2) cpxthr_reg(.din (ifd_ifc_cpxthr_nxt),
+                        .q   (cpxthr_i2),
+                        .en  (ifqadv_i1),
+                        .clk (clk), .se(se), .si(), .so());
+   
+
+   // Decode CPX request
+   assign imissrtn_i1 = (ifd_ifc_cpxreq_i1 == `CPX_IFILLPKT) ? 1'b1 : 1'b0;   
+   assign imissrtn_i2 = (cpxreq_i2 == `IFILL_RET) ? ifd_ifc_cpxvld_i2 : 1'b0;
+
+   assign imissrtn_next_i2 = ifc_inv_ifqadv_i2 ? imissrtn_i2 : imissrtn_f;
+   
+   dff_s #(1) imsf_ff(.din (imissrtn_next_i2),
+                    .q   (imissrtn_f),
+                    .clk (clk), .se (se), .si(), .so());
+   
+   // Determine if this is an IFILL RET to one of the threads
+   assign fill_retn_thr_i2 = dfthr_i2 & {4{imissrtn_i2}};
+   
+   // decode current icache fill thread
+   assign dfthr_f[0] = ~filltid_f[1] & ~filltid_f[0];
+   assign dfthr_f[1] = ~filltid_f[1] &  filltid_f[0];
+   assign dfthr_f[2] =  filltid_f[1] & ~filltid_f[0];
+   assign dfthr_f[3] =  filltid_f[1] &  filltid_f[0];
+
+//`ifdef IFU_SAT
+//   assign ifc_ifd_uncached_s = fcl_ifq_icache_en_s_l;   
+//`else   
+//`endif
+
+   assign uncached_s = ifd_ifc_newdestid_s[2] | fcl_ifq_icache_en_s_l;
+
+   // timing fix: keep nc bit locally instead of in DP
+   assign mil_nc_vec_nxt = ({4{uncached_s & fcl_ifq_rdreq_s1}} & 
+                              thr_s1 & ~errthr_d1 |
+                              mil_nc_vec & (mil_valid_s |
+                                            errthr_d1));
+
+   dff_s #(4) nc_reg(.din (mil_nc_vec_nxt),
+                   .q   (mil_nc_vec),
+                   .clk (clk), .se(se), .si(), .so());
+
+   assign mil_nc_i2 = (dfthr_i2[0] & mil_nc_vec[0] |
+                       dfthr_i2[1] & mil_nc_vec[1] |
+                       dfthr_i2[2] & mil_nc_vec[2] |
+                       dfthr_i2[3] & mil_nc_vec[3]);
+
+   assign mil_nc_d = (dpcxthr_d[0] & mil_nc_vec[0] |
+                      dpcxthr_d[1] & mil_nc_vec[1] |
+                      dpcxthr_d[2] & mil_nc_vec[2] |
+                      dpcxthr_d[3] & mil_nc_vec[3]);
+
+   dff_s #(1) nce_ff(.din (mil_nc_d),
+                   .q   (mil_nc_e),
+                   .clk (clk), .se(se), .si(), .so());
+   assign ifc_ifd_uncached_e = mil_nc_e;
+   
+//   assign uncached_fill_i2 = ifd_ifc_uncached_i2 | ifd_ifc_cpxnc_i2;
+   assign uncached_fill_i2 = mil_nc_i2 | ifd_ifc_cpxnc_i2;   
+   
+   // uncached fill -- do not write to icache
+   assign uncached_i2 = ifc_inv_ifqadv_i2 ? 
+	                          uncached_fill_i2 : uncached_f;
+
+   dff_s unc_ff(.din (uncached_i2),
+	            .q   (uncached_f),
+	            .clk (clk),
+	            .se  (se), .si(), .so());
+
+   // Determine if Icache write is done or 
+   // if none is necessary (i.e. if this is a child process or NC)
+   assign wr_complete_f = dfthr_f & {4{(wrreq_f & ifc_inv_ifqadv_i2 | 
+                                        milchld_vld_f |
+				                                uncached_f) & imissrtn_f}};
+
+   // State Machine Outputs
+   // One of these has to be chosen for I2 stage operation
+   mux4ds #(3)  i2out_mux(.dout  (i2out),
+		       .in0   (milchld0),
+		       .in1   (milchld1),
+		       .in2   (milchld2),
+		       .in3   (milchld3),
+		       .sel0  (dfthr_i2[0]),
+		       .sel1  (dfthr_i2[1]),
+		       .sel2  (dfthr_i2[2]),
+		       .sel3  (dfthr_i2[3]));
+
+   assign mil_vld_i2 = (mil_valid_s[0] & dfthr_i2[0] |
+                        mil_valid_s[1] & dfthr_i2[1] |
+                        mil_valid_s[2] & dfthr_i2[2] |
+                        mil_valid_s[3] & dfthr_i2[3]);
+
+   assign mil_uncan_i2 = (mil_valid_s[0] & ~mil_cancel[0] & dfthr_i2[0] |
+                          mil_valid_s[1] & ~mil_cancel[1] & dfthr_i2[1] |
+                          mil_valid_s[2] & ~mil_cancel[2] & dfthr_i2[2] |
+                          mil_valid_s[3] & ~mil_cancel[3] & dfthr_i2[3]);
+
+   // Don't make a wrreq if this is a child entry.  However, if this is
+   // a child and the parent was cancelled, then go ahead and
+   // write... is this really necessary?  Not for functionality.
+   // 3/19: parent will write even if cancelled.  So never write child
+   assign next_wrreq_i2 = imissrtn_i2 & mil_vld_i2 & ~uncached_fill_i2 &
+	                  ~milchld_vld_i2 & ~ifd_ifc_4bpkt_i2; // was: iobpkt_i2
+
+   assign addrbit4_i2 = (milfthr_i2[0] & fill_addr4_i2[0] |
+                         milfthr_i2[1] & fill_addr4_i2[1] |
+                         milfthr_i2[2] & fill_addr4_i2[2] |
+                         milfthr_i2[3] & fill_addr4_i2[3]);
+
+   assign addrbit4_nxt_i2= ifc_inv_ifqadv_i2 ? addrbit4_i2 : filladdr4_f;
+   dff_s #(1) ab4_ff(.din (addrbit4_nxt_i2),
+                   .q   (filladdr4_f),
+                   .clk (clk),
+                   .se  (se), .si(), .so());
+   
+   assign ifc_ifd_filladdr4_i2 = addrbit4_nxt_i2;
+
+   assign next_milchld = ifc_inv_ifqadv_i2 ? 
+	                          {(i2out[2] & imissrtn_i2), i2out[1:0]} :
+	                          milchld_d1;
+
+   // After the packet is processed, the child entry in the MIL,
+   // pointed to by the reg below is processed next (if valid)
+   dffr_s #(3)  milchldd_reg(.din  (next_milchld),
+			                     .clk  (clk),
+			                     .rst  (ifq_reset),
+			                     .q    (milchld_d1),
+			                     .se   (se), .si(), .so());
+
+   assign milchld_vld_i2 = milchld_d1[2];
+   assign next_milchld_i2 = ifc_inv_ifqadv_i2 ? milchld_d1[2] :
+	                                              milchld_vld_f;
+   
+   dffr_s #(1) milchldf_ff(.din  (next_milchld_i2),
+		       .q    (milchld_vld_f),
+		       .clk  (clk),
+		       .rst  (ifq_reset),
+		       .se   (se), .si(), .so());
+
+   // need this to avoid x's in the simulation
+//   assign cpxthrid_adj_i2 = ifd_ifc_cpxthr_i2 &
+//	                          {2{ifd_ifc_cpxreq_i2[`CPX_RQ_SIZE]}};
+
+   // Determine if we should process the child or a new entry
+//   assign next_thr_sel_milchld_i2 = ifc_inv_ifqadv_i2 & milchld_vld_i2 & 
+//	                            ~errpkt_i2;
+//   assign next_thr_sel_milchld_i2 = milchld_vld_i2 & ~errpkt_i2;
+   
+   
+   // if previous mil entry had a child, process that next
+//   mux2ds  #(2) filltid_mux(.dout  (filltid_i2),
+//		                        .in0   (cpxthrid_adj_i2),
+//		                        .in1   (milchld_d1[1:0]),
+//		                        .sel0  (~milchld_vld_i2),
+//		                        .sel1  (milchld_vld_i2));
+   assign filltid_i2 = milchld_vld_i2 ? milchld_d1[1:0] :
+                                        cpxthr_i2[1:0];
+
+   // decode fill thread  (either cpx thread or MIL child thread from above)
+   // need to qual with valid bit to avoid X's in simulation
+//   assign cpxvld_or_milc_i2 = ifd_ifc_cpxreq_i2[`CPX_RQ_SIZE] | milchld_vld_i2;
+   assign dfthr_i2[0] = ~filltid_i2[1] & ~filltid_i2[0];
+   assign dfthr_i2[1] = ~filltid_i2[1] &  filltid_i2[0];
+   assign dfthr_i2[2] =  filltid_i2[1] & ~filltid_i2[0];
+   assign dfthr_i2[3] =  filltid_i2[1] &  filltid_i2[0];
+
+   dp_mux2es  #(2)  thren_mux(.dout (next_filltid_i2),
+			                        .in0  (filltid_f),
+			                        .in1  (filltid_i2),
+			                        .sel  (ifc_inv_ifqadv_i2));
+
+   dff_s #(2) wrthr_reg(.din  (next_filltid_i2),
+		                  .clk  (clk),
+		                  .q    (filltid_f),
+		                  .se   (se), .si(), .so());   
+
+
+   dp_mux2es  #(4)  dthren_mux(.dout (dfthr_next_i2),
+			                         .in0  (dfthr_f),
+			                         .in1  (dfthr_i2),
+			                         .sel  (ifc_inv_ifqadv_i2));
+   
+
+   // Early start of threads
+   // Do we need a control bit to turn this off?
+   // -- do it in SWL
+   assign ifq_dtu_pred_rdy =  pred_rdy_i2 & {dfthr_next_i2[3:0]} &
+                              {4{imissrtn_next_i2}};
+
+// If timing is a problem resort to:
+//   assign ifq_dtu_pred_rdy =  pred_rdy_i2 & {4{ifc_inv_ifqadv_i2}} &
+//                              dfthr_i2 & {4{imissrtn_i2}};
+   
+   
+
+   // pick 16B half cache line which contains the instruction we want
+//   assign fill_this16b = ~(ifc_ifd_filladdr4_i2 ^ ifd_ifc_missaddr4_i2);
+                         // | ifd_ifc_4bpkt_i2;
+
+   // write to thread instruction register
+//   assign ifq_fcl_fill_thr = wrt_tir & {4{fill_this16b | ifd_ifc_4bpkt_i2}};
+//   assign ifq_fcl_fill_thr = wrt_tir & {4{fill_this16b}};
+   assign ifq_fcl_fill_thr = wrt_tir | thr_d1 & {4{itlberr_d1 & 
+                                                   ~canthr_d1 & 
+                                                   icmiss_d1 & 
+                                                   ~canthr_s1_del1}};
+
+   // Select instruction to send to TIR
+   // TBD: Need to find out how the inst from boot PROM is aligned -- Done
+   // From kinkee 02/21/03: It is aligned to the correct 4B of the 16B 
+   // packet.  The other locations are X.
+   assign finst0[0] = ~ifd_ifc_instoffset0[1] & ~ifd_ifc_instoffset0[0];
+   assign finst0[1] = ~ifd_ifc_instoffset0[1] &  ifd_ifc_instoffset0[0];
+   assign finst0[2] =  ifd_ifc_instoffset0[1] & ~ifd_ifc_instoffset0[0];
+   assign finst0[3] =  ifd_ifc_instoffset0[1] &  ifd_ifc_instoffset0[0];
+
+   assign finst1[0] = ~ifd_ifc_instoffset1[1] & ~ifd_ifc_instoffset1[0];
+   assign finst1[1] = ~ifd_ifc_instoffset1[1] &  ifd_ifc_instoffset1[0];
+   assign finst1[2] =  ifd_ifc_instoffset1[1] & ~ifd_ifc_instoffset1[0];
+   assign finst1[3] =  ifd_ifc_instoffset1[1] &  ifd_ifc_instoffset1[0];
+
+   assign finst2[0] = ~ifd_ifc_instoffset2[1] & ~ifd_ifc_instoffset2[0];
+   assign finst2[1] = ~ifd_ifc_instoffset2[1] &  ifd_ifc_instoffset2[0];
+   assign finst2[2] =  ifd_ifc_instoffset2[1] & ~ifd_ifc_instoffset2[0];
+   assign finst2[3] =  ifd_ifc_instoffset2[1] &  ifd_ifc_instoffset2[0];
+
+   assign finst3[0] = ~ifd_ifc_instoffset3[1] & ~ifd_ifc_instoffset3[0];
+   assign finst3[1] = ~ifd_ifc_instoffset3[1] &  ifd_ifc_instoffset3[0];
+   assign finst3[2] =  ifd_ifc_instoffset3[1] & ~ifd_ifc_instoffset3[0];
+   assign finst3[3] =  ifd_ifc_instoffset3[1] &  ifd_ifc_instoffset3[0];
+
+//   mux4ds #(4) finst_mx(.dout (finst_i2),
+//                        .in0  (finst0),
+//                        .in1  (finst1),
+//                        .in2  (finst2),
+//                        .in3  (finst3),
+//                        .sel0 (dfthr_i2[0]),
+//                        .sel1 (dfthr_i2[1]),
+//                        .sel2 (dfthr_i2[2]),
+//                        .sel3 (dfthr_i2[3]));
+
+   wire [3:0] finst_ev,
+              finst_od,
+              finst_i2_l;
+   wire [1:0] filltid_i2_l;
+   bw_u1_inv_10x UZsize_ftid_bf0(.z (filltid_i2_l[0]),
+                                 .a (filltid_i2[0]));
+   bw_u1_inv_20x UZsize_ftid_bf1(.z (filltid_i2_l[1]),
+                                 .a (filltid_i2[1]));
+   // use bw_u1_muxi21_4x
+   assign finst_ev = filltid_i2_l[1] ? finst0 : finst2;
+   assign finst_od = filltid_i2_l[1] ? finst1 : finst3;
+   assign finst_i2_l = filltid_i2_l[0] ? (~finst_ev) : (~finst_od);
+   assign finst_i2 = ~finst_i2_l;
+
+   assign ifc_ifd_finst_sel_l = ~finst_i2;
+
+   // pick MIL entry corresponding to current thread
+   assign milfthr_i2[0] = ~cpxthr_i2[1] & ~cpxthr_i2[0];
+   assign milfthr_i2[1] = ~cpxthr_i2[1] &  cpxthr_i2[0];
+   assign milfthr_i2[2] =  cpxthr_i2[1] & ~cpxthr_i2[0];
+   assign milfthr_i2[3] =  cpxthr_i2[1] &  cpxthr_i2[0];
+   assign ifc_ifd_milfill_sel_i2_l = ~milfthr_i2;
+
+   // write request
+   // assign ifq_fcl_wrreq_bf = ifc_inv_ifqadv_i2 ? next_wrreq_i2 : wrreq_f;
+   // assign ifq_fcl_wrreq_bf = ~ifc_inv_ifqadv_i2 | next_wrreq_i2;
+   assign ifq_fcl_wrreq_bf = wrreq_f & ~ifc_inv_ifqadv_i2 | next_wrreq_i2;
+   
+   dffr_s #(1) wrreq_ff(.din (ifq_fcl_wrreq_bf),
+		                  .clk (clk),
+		                  .q   (wrreq_f),
+		                  .rst (ifq_reset),
+		                  .se  (se), .si(), .so());
+
+   // starvation check
+   // if a write is not granted for 24 cycles, sound the alarm
+   sparc_ifu_ctr5 starv_ctr(
+			                      // Outputs
+			                      .limit	(starv_alert),
+			                      .so	(so),
+			                      // Inputs
+			                      .clk	(clk),
+			                      .se	(se),
+			                      .si	(si),
+			                      .rst_ctr_l (rst_starv_ctr_l));
+   assign rst_starv_ctr_l = ~ifq_reset & wrreq_f;
+
+   // advance in i2 when a write ack is received or if not a fill
+   // Can help timing of this signal by doing
+   //  ifqadv_nxt = ~ifq_fcl_wrreq_bf | fcl_icd_index_sel_ifq_bf
+   assign access_grant_l = ~fcl_ifq_grant_bf;
+   bw_u1_nand2_2x UZsize_acc_n2(.z (ifqadvi2_nxt),
+                                .a (ifq_fcl_wrreq_bf),
+                                .b (access_grant_l));
+   dff_s #(1) qadv_ff(.din (ifqadvi2_nxt),
+                    .q   (ifqadv_i2_ff),
+                    .clk (clk), .se(se), .si(), .so());
+   assign ifc_inv_ifqadv_i2 = ifqadv_i2_ff;
+
+   
+
+   // advance in i1 when a write ack is received AND there are no
+   // child threads to be taken care of
+   assign ifqadv_i1 = (ifc_inv_ifqadv_i2 & ~next_milchld[2] & ~fwd_stall) | 
+                        ifq_reset; 
+
+//-----------------------------------
+// Errors and Error Packet
+//-----------------------------------   
+
+   assign errpkt_i1 = (ifd_ifc_cpxreq_i1 == `CPX_ERRPKT) ? 1'b1 : 1'b0;
+   assign errpkt_i2 = (cpxreq_i2 == `ERR_RET) ? ifd_ifc_cpxvld_i2 : 1'b0;
+
+   // Reported Errors are not logged in ERB
+   assign ce_rep_i2 = ifd_ifc_cpxce_i2 & ~ifd_ifc_cpxue_i2 & errpkt_i2 &
+	                    ifc_inv_ifqadv_i2;
+   assign ue_rep_i2 = ifd_ifc_cpxue_i2 & errpkt_i2 & ifc_inv_ifqadv_i2;
+
+   dff_s #(1) cerep_ff(.din (ce_rep_i2),
+		                 .q   (ifq_erb_ce_rep),
+		                 .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) uerep_ff(.din (ue_rep_i2),
+		                 .q   (ifq_erb_ue_rep),
+		                 .clk (clk), .se(se), .si(), .so());
+
+//   dff #(2) ertid_reg(.din (filltid_i2),
+//		                  .q   (ifq_erb_l2err_tid),
+//		                  .clk (clk), .se(se), .si(), .so());
+   // send thread id one cycle earlier to help crit path
+   assign ifq_erb_l2err_tid = filltid_i2;
+   
+   // Ifetch Errors are logged in ERB
+   assign l2_ce_i2 = ifd_ifc_cpxce_i2 & ~ifd_ifc_cpxue_i2 & imissrtn_i2 &
+	                   ifc_inv_ifqadv_i2 & mil_uncan_i2;
+   assign l2_ue_i2 = ifd_ifc_cpxue_i2 & imissrtn_i2 & ~ifd_ifc_iobpkt_i2 &
+	                   ifc_inv_ifqadv_i2 & mil_uncan_i2;
+   assign io_ue_i2 = ifd_ifc_cpxue_i2 & imissrtn_i2 & ifd_ifc_iobpkt_i2 &
+	                   ifc_inv_ifqadv_i2 & mil_uncan_i2;
+
+   dff_s #(1) l2ce_ff(.din (l2_ce_i2),
+		                .q   (ifq_erb_ifet_ce),
+		                .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) l2ue_ff(.din (l2_ue_i2),
+		                .q   (ifq_erb_l2_ue),
+		                .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) ioue_ff(.din (io_ue_i2),
+		                .q   (ifq_erb_io_ue),
+		                .clk (clk), .se(se), .si(), .so());
+
+   assign l2_miss_i2 = ifd_ifc_cpxms_i2 & imissrtn_i2 & ifc_inv_ifqadv_i2;
+   dff_s #(1) l2ms_ff(.din (l2_miss_i2),
+		                .q   (l2_miss_f),
+		                .clk (clk), .se(se), .si(), .so());
+
+   assign ifu_tlu_l2imiss = dfthr_f & {4{l2_miss_f}};
+   
+//--------------------------------------------
+// Miss Request Control (IFU interface to PCX)
+//--------------------------------------------
+
+   // decode imiss thread
+   assign thr_s1[0] = ~fcl_ifq_thr_s1[0] & ~fcl_ifq_thr_s1[1];
+   assign thr_s1[1] =  fcl_ifq_thr_s1[0] & ~fcl_ifq_thr_s1[1];
+   assign thr_s1[2] = ~fcl_ifq_thr_s1[0] & fcl_ifq_thr_s1[1];
+   assign thr_s1[3] =  fcl_ifq_thr_s1[0] & fcl_ifq_thr_s1[1];
+
+   // signal ic miss to thread MIL state machines
+   assign icmiss_thr_s = {4{fcl_ifq_icmiss_s1 & ~block_fetch_s1}} & thr_s1 & 
+	                       ~icmiss_thr_d;
+
+//   dff #(4) icmsreg(.din  (icmiss_thr_s),
+//		                .clk  (clk),
+//		                .q    (icmiss_thr_d),
+//		                .se   (se), .si(), .so());
+
+   dff_s #(1) icmsd_ff(.din  (fcl_ifq_icmiss_s1),
+		                 .clk  (clk),
+		                 .q    (icmiss_d1),
+		                 .se   (se), .si(), .so());
+
+   assign icmiss_qual_d1 = icmiss_d1 & ~(thr_match_d1e1 & ifeterr_e1);
+
+   // bug 5926
+   assign n763 = ~ifd_ifc_newdestid_s[2];
+   dff_s #(1) iosp_ff(.din (n763),
+		                .q   (iosp_d1_l),
+		                .clk (clk), .se(se), .si(), .so());
+   
+   assign icmiss_thr_d = {4{icmiss_d1 | erb_ifq_ifeterr_d1 & iosp_d1_l}} & thr_d1 |
+                         {4{ifeterr_e1}} & thr_e1;
+   
+   dff_s #(4) thrdreg(.din  (thr_s1),
+		                .clk  (clk),
+		                .q    (thr_d1),
+		                .se   (se), .si(), .so());
+
+   dff_s #(4) threreg(.din  (thr_d1),
+		                .clk  (clk),
+		                .q    (thr_e1),
+		                .se   (se), .si(), .so());
+
+   dff_s #(1) erre_ff(.din (ifeterr_qual_d1),
+                    .q   (ifeterr_e1),
+                    .clk (clk), .se(se), .si(), .so());
+   assign thr_match_d1e1 =  (thr_d1[0] & thr_e1[0] |
+                             thr_d1[1] & thr_e1[1] |
+                             thr_d1[2] & thr_e1[2] |
+                             thr_d1[3] & thr_e1[3]);
+
+//   assign ifeterr_qual_d1 = ~(thr_match_d1e1 & ifeterr_e1) & ~canthr_d1 & 
+//                               erb_ifq_ifeterr_d1;
+   assign ifeterr_qual_d1 = ~(thr_match_d1e1 & ifeterr_e1) & 
+                               erb_ifq_ifeterr_d1 & iosp_d1_l;
+     
+   assign errthr_d1 = (thr_d1 & {4{ifeterr_qual_d1 & ~block_fetch_d1}});
+   
+   // If misses to same thread, (in successive cycles), ignore
+   assign ifc_ifd_ldmil_sel_new = (thr_s1 & {4{fcl_ifq_rdreq_s1}} &
+				                           ~errthr_d1 & ~mil_valid_s);
+
+   // Check hit in MIL -- a thread cannot hit 
+   //   1. its own MIL
+   //   2. an MIL that is being filled
+   //   3. if it is to an IOB line
+   assign qualhit_or_io_s = ifd_ifc_milhit_s & comp_valid_s & 
+	                          ~thr_s1 & 
+                            ~fill_retn_thr_i2 & 
+                            {4{~ifd_ifc_newdestid_s[2]}};
+
+   assign any_qualhit_or_io_s = (qualhit_or_io_s[0] |
+		                             qualhit_or_io_s[1] |
+		                             qualhit_or_io_s[2] |
+		                             qualhit_or_io_s[3]);
+   
+//   assign milhit_qual_s = ifd_ifc_milhit_s & comp_valid_s & 
+//	                        ~thr_s1 & 
+//                          ~fill_retn_thr_i2 & 
+//                          {4{~ifd_ifc_newdestid_s[2]}};
+   
+//   assign any_milhit_qual_s = any_qualhit_or_io_s & ~ifd_ifc_newdestid_s[2];
+   assign any_milhit_qual_s = any_qualhit_or_io_s;   
+   
+   // Generate Replacement Way
+   // Make sure a req doesn't go out to a different way than 
+   // what is pending
+   assign milhit_vec_s = ifd_ifc_milhit_s & (mil_valid_s | errthr_d1);
+   assign any_milhit_s = (|milhit_vec_s[3:0]);
+   
+//   assign mil_repway_s = (ifd_ifc_mil_repway_s[7:6] & {2{milhit_vec_s[3]}} | 
+//                          ifd_ifc_mil_repway_s[5:4] & {2{milhit_vec_s[2]}} | 
+//                          ifd_ifc_mil_repway_s[3:2] & {2{milhit_vec_s[1]}} | 
+//                          ifd_ifc_mil_repway_s[1:0] & {2{milhit_vec_s[0]}});
+
+//   assign ifc_ifd_repway_s = any_milhit_s ? mil_repway_s : rand_repway_s;
+   assign ifc_ifd_repway_s = rand_repway_s;   
+
+   // pick any way at random
+   // reset with dbg_init as well
+   sparc_ifu_lfsr5  lfsr(.out (rand_repway_s),
+			                   .clk  (clk),
+			                   .advance (fcl_ifq_icmiss_s1),
+			                   .reset (rst_way_lfsr),
+			                   .se (se),
+			                   .si (si),
+			                   .so (so));
+
+   assign rst_way_lfsr = ifq_reset | lsu_ifu_direct_map_l1 | ~gdbginit_l;
+
+   // check if miss req is valid in a given pipe stage
+   assign canthr_s1 = (fcl_ifq_canthr[0] & thr_s1[0] |
+                       fcl_ifq_canthr[1] & thr_s1[1] |
+                       fcl_ifq_canthr[2] & thr_s1[2] |
+                       fcl_ifq_canthr[3] & thr_s1[3]);
+   assign canthr_d1 = (fcl_ifq_canthr[0] & thr_d1[0] |
+                       fcl_ifq_canthr[1] & thr_d1[1] |
+                       fcl_ifq_canthr[2] & thr_d1[2] |
+                       fcl_ifq_canthr[3] & thr_d1[3]);
+   
+   // retry a fetch if the imiss occurs while it is being filled
+//   assign block_fetch_s1 = any_milhit_s & 
+//                           ~(any_qualhit_or_io_s | ifd_ifc_newdestid_s[2]) |
+//                           dtu_ifq_kill_latest_d;
+   assign block_fetch_s1 = any_milhit_s & ~ifd_ifc_newdestid_s[2] &
+                           ~any_qualhit_or_io_s | 
+                           dtu_ifq_kill_latest_d |
+                           erb_ifq_itlberr_s1;
+
+   dff_s #(1) bfd_ff(.din (block_fetch_s1),
+                   .q   (block_fetch_d1),
+                   .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) tlbe_ff(.din (erb_ifq_itlberr_s1),
+                    .q   (itlberr_d1),
+                    .clk (clk), .se(se), .si(), .so());
+   
+//   assign retry_rdy_s1 = block_fetch_s1 & fcl_ifq_icmiss_s1;
+//   dff #(1) retrd_ff(.din (retry_rdy_s1),
+//                     .q   (retry_rdy_d1),
+//                     .clk (clk), .se(se), .si(), .so());
+   
+   assign retry_rdy_final_d1 = block_fetch_d1 & (icmiss_qual_d1 | 
+                                                 ifeterr_qual_d1);
+   dff_s #(1) retre_ff(.din (retry_rdy_final_d1),
+                     .q   (retry_rdy_e1),
+                     .clk (clk), .se(se), .si(), .so());
+
+   assign all_retry_rdy_e1 = {4{retry_rdy_e1}} & thr_e1;
+   dff_s #(4) retrm_reg(.din (all_retry_rdy_e1),
+                      .q   (all_retry_rdy_m1),
+                     .clk (clk), .se(se), .si(), .so());
+   
+   assign ifq_dtu_thrrdy = mil_thr_ready | all_retry_rdy_m1;
+
+//   assign retry_fetch_s1 = block_fetch_s1 & fcl_ifq_icmiss_s1 & 
+//                           ~canthr_s1;
+   dff_s #(1) cans_ff(.din (canthr_s1),
+                    .q   (canthr_s1_del1),
+                    .clk (clk), .se(se), .si(), .so());
+
+   assign ifq_fcl_flush_sonly_e = (block_fetch_d1 & 
+                                   (icmiss_qual_d1 & ~canthr_s1_del1 |
+                                    ifeterr_qual_d1) & 
+                                   ~canthr_d1 & ~itlberr_d1);
+
+   // Determine which thread's MIL was hit, if at all
+   // first check if this really was an imiss
+   assign icmiss_for_milchk = thr_s1 & ~icmiss_thr_d & ~errthr_d1;
+   assign icmiss_qual_s = (|icmiss_for_milchk[3:0]) & fcl_ifq_icmiss_s1 & 
+                          ~dtu_ifq_kill_latest_d & ~erb_ifq_itlberr_s1;
+
+   // since multiple requests can be outstanding when an error is
+   // encountered, need to prioritise the mil hits.
+   // TBD: there must be a cleaner way to do this!
+   assign qualhit_pe_s[0] = qualhit_or_io_s[0];
+   assign qualhit_pe_s[1] = ~qualhit_or_io_s[0] & qualhit_or_io_s[1];
+   assign qualhit_pe_s[2] = ~qualhit_or_io_s[0] & ~qualhit_or_io_s[1] &
+                             qualhit_or_io_s[2];
+   assign qualhit_pe_s[3] = ~qualhit_or_io_s[0] & ~qualhit_or_io_s[1] &
+                            ~qualhit_or_io_s[2] & qualhit_or_io_s[3];
+   
+   // A thread cannot hit on an MIL to the IOB
+   assign milhit_to_thr_s = qualhit_pe_s & {4{icmiss_qual_s & 
+                                                 ~ifd_ifc_newdestid_s[2]}};
+
+   // Make Request to PCX if miss in Icache and MIL
+   // determine if we need to send req to L2
+//   assign newpcxreq_s = icmiss_for_milchk & ~fcl_ifq_canthr;
+//   assign newreq_valid = fcl_ifq_icmiss_s1 & ~dtu_ifq_kill_latest_d & 
+//	                       (newpcxreq_s[0] | 
+//			                    newpcxreq_s[1] | 
+//			                    newpcxreq_s[2] | 
+//			                    newpcxreq_s[3]) & 
+//                           (~any_milhit_s | ifd_ifc_newdestid_s[2]);
+
+   assign newreq_valid = icmiss_qual_s &
+                          (~any_milhit_s | ifd_ifc_newdestid_s[2]);
+
+   // check if there are any old requests outstanding, that are not
+   // current in  the D stage. 
+   assign pcxreq_qual_s = pcxreq_s & ~(dpcxthr_d & {4{req_valid_d}});
+   
+//   assign reqq_empty = ~(|pcxreq_qual_s[3:0]);
+//   assign oldpcxreq_s = pcxreq_qual_s & rr_gnt & ~fcl_ifq_canthr;
+//   assign oldreq_valid = (|oldpcxreq_s);
+//   assign oldpcxreq_s = pcxreq_qual_s & rr_gnt;   
+   assign oldreq_valid = (|pcxreq_qual_s);
+	  
+   // Send out PCX request in round robin order if there are other
+   // reqests pending.  If the request queue is empty send this req
+//   assign nextreq_valid_s = ~reqq_empty | newreq_valid;
+   
+   assign nextreq_valid_s = oldreq_valid | newreq_valid | req_pending_d;
+
+   assign rnd_reset = ifq_reset | ~gdbginit_l;
+   
+   // round robin assignment to pcx
+   sparc_ifu_rndrob  pcxrndrob(.req_vec   (pcxreq_qual_s),
+			                         .grant_vec (rr_gnt),
+			                         .advance   (req_accept_d),
+			                         .rst_tri_enable (rst_tri_en),
+			                         .clk       (clk),
+			                         .reset     (rnd_reset),
+			                         .se  (se),
+			                         .si (si),
+			                         .so ());
+
+   // if req queue is empty forward the new request to pcx
+   // if not store it in the MIL
+   assign dpcxthr_s  = req_pending_d ? dpcxthr_d :
+	                     ~oldreq_valid ? thr_s1    : 
+	                                     rr_gnt;
+   dff_s #(4) pcxthr_ff(.din (dpcxthr_s),
+		                  .clk (clk),
+		                  .q   (dpcxthr_d),
+		                  .se  (se), .si(), .so());
+
+   assign thrid_d[0] = dpcxthr_d[3] | dpcxthr_d[1];
+   assign thrid_d[1] = dpcxthr_d[3] | dpcxthr_d[2];
+   dff_s #(2) tide_reg(.din (thrid_d),
+                     .q   (thrid_e),
+                     .clk (clk), .se(se), .si(), .so());
+   assign ifc_ifd_thrid_e = thrid_e;
+   
+   // Determine the destination to which the request is made:
+   mux4ds #(3) dest_mux(.dout (old_destid_s),
+		                    .in0  (ifd_ifc_destid0[2:0]),
+		                    .in1  (ifd_ifc_destid1[2:0]),
+		                    .in2  (ifd_ifc_destid2[2:0]),
+		                    .in3  (ifd_ifc_destid3[2:0]),
+		                    .sel0 (rr_gnt[0]),
+		                    .sel1 (rr_gnt[1]),
+		                    .sel2 (rr_gnt[2]),
+		                    .sel3 (rr_gnt[3]));
+   
+//   mux2ds #(3) fdest_mux(.dout (destid_s),
+//		                     .in0  (ifd_ifc_newdestid_s),
+//		                     .in1  (old_destid_s),
+//		                     .sel0 (~oldreq_valid),
+//		                     .sel1 (oldreq_valid));
+
+//   assign destid_s  = req_pending_d ? ifu_lsu_destid_d    :
+//	                    ~oldreq_valid ? ifd_ifc_newdestid_s : 
+//	                                    old_destid_s;
+   assign ifu_lsu_destid_s = oldreq_valid ? old_destid_s :
+                                            ifd_ifc_newdestid_s;
+
+   // remove this
+   assign destid_iob_s  = req_pending_d ? destid_iob_d :
+                                          ifu_lsu_destid_s[2];
+   dff_s #(1) destd_reg(.din (destid_iob_s),
+		                  .q   (destid_iob_d),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   // If this is going to any L2 bank, zero out the line address 
+   // for Rams 
+   assign ifc_ifd_pcxline_adj_d[4:2] = ifd_ifc_pcxline_d[4:2] & 
+                                       {3{destid_iob_d}};
+   
+   // advace req 
+   dffr_s #(1) pcxreqvd_ff(.din  (nextreq_valid_s),
+		                     .clk  (clk),
+		                     .rst  (ifq_reset),
+		                     .q    (req_valid_d),
+		                     .se   (se), .si(), .so());
+
+   assign ifu_lsu_pcxreq_d = req_valid_d;
+
+//   assign req_pending_d = req_valid_d & ~can_pcx_d & ~lsu_ifu_pcxpkt_ack_d;
+   assign req_pending_d = req_valid_d & ~(lsu_ifu_pcxpkt_ack_d & ~errinv_d1);
+   assign req_accept_d = req_valid_d & lsu_ifu_pcxpkt_ack_d;
+//   assign rr_advance_d = req_accept_d & ~errinv_d1
+
+   // Signal to FSM if pcx request has been accepted by LSU
+   assign pcx_accept_d = dpcxthr_d & {4{req_accept_d}};
+   // Alternate implementation with canthr delayed by a cycle
+//   assign pcxreq_vbit_d = req_valid_d & ~can_pcx_d;
+//   assign pcx_accept_d = dpcxthr_d & {4{req_accept_d}} & ~fcl_ifq_canthr;
+
+   // check if there was an error to this thread
+   assign err_vec_d1 = dpcxthr_d & (errthr_d1 | err_req);
+   assign errinv_d1 = (|err_vec_d1[3:0]);
+
+   dff_s #(1) errinv_ff(.din (errinv_d1),
+		                  .q   (ifc_ifd_errinv_e),
+		                  .clk (clk), .se(se), .si(), .so());
+   
+   assign pcxreq_vbit_d = req_valid_d;   
+   dff_s #(1) pcxreqve_ff(.din  (pcxreq_vbit_d),  // same as ifu_lsu_pcxreq_d
+		                    .clk  (clk),
+		                    .q    (ifc_ifd_reqvalid_e),
+		                    .se   (se), .si(), .so());
+
+//   dff #(1) pcxreqpe_ff(.din  (req_pending_d),
+//		                    .clk  (clk),
+//		                    .q    (req_pending_e),
+//		                    .se   (se), .si(), .so());
+
+   // advance pcx request if there is no prev request pending
+   // the data is deliberately held valid for one extra cycle.  this
+   // is legacy stuff.  LSU guarantees that the data is picked up
+   // minimum 1 cycle after request is made. 
+//   assign ifc_ifd_nxtpcx_sel_new_d = ~req_pending_e;
+//   assign ifc_ifd_nxtpcx_sel_new_d = 1'b1;   
+
+   // Select which MIL request to send out to PCX
+   assign ifc_ifd_milreq_sel_d_l[0] = ~dpcxthr_d[0] & ~rst_tri_en;
+   assign ifc_ifd_milreq_sel_d_l[1] = ~dpcxthr_d[1] | rst_tri_en;
+   assign ifc_ifd_milreq_sel_d_l[2] = ~dpcxthr_d[2] | rst_tri_en;
+   assign ifc_ifd_milreq_sel_d_l[3] = ~dpcxthr_d[3] | rst_tri_en;
+
+
+//-----------------------------
+// Invalidate Controls
+//----------------------------
+   assign stpkt_i1 = (ifd_ifc_cpxreq_i1 == `CPX_STRPKT) ? 1'b1 : 1'b0;
+   assign strmack_i1 = (ifd_ifc_cpxreq_i1 == `CPX_STRMACK) ? 1'b1 : 1'b0;
+   assign evpkt_i1 = (ifd_ifc_cpxreq_i1 == `CPX_EVPKT) ? 1'b1 : 1'b0;
+   assign ldpkt_i1 = (ifd_ifc_cpxreq_i1 == `CPX_LDPKT) ? 1'b1 : 1'b0;
+   
+   assign invalidate_i1 = (stpkt_i1 | strmack_i1 | evpkt_i1 | ldpkt_i1);
+   assign ifu_lsu_inv_clear = ~(invalidate_i1 | inv_ifc_inv_pending);
+//	 assign ifc_inv_wrreq_i2 = (imissrtn_i2 |
+//                              asireq_i2 & asi_ic_tag_i2 & ~asi_load_i2 |
+//			                        mbist_icache_write);
+   
+//   assign wrt_en_wd0_i2 = inv_ifc_word0_inv_i2 & (stpkt_i2 | evpkt_i2) |
+//                          ldinv_i2 & ~ifd_ifc_ldaddr5_i2 |
+//		                      (imissrtn_i2 |
+//                           asireq_i2 & asi_ic_tag_i2 & ~asi_load_i2 |
+//			                     mbist_icache_write) & 
+//	                       ~ifd_ifc_missaddr5_i2;
+//
+//   assign wrt_en_wd1_i2 = inv_ifc_word1_inv_i2 & (stpkt_i2 | evpkt_i2) |
+//			                      ldinv_i2 & ifd_ifc_ldaddr5_i2 |
+//		                        (imissrtn_i2 |
+//			                       asireq_i2 & asi_ic_tag_i2 & ~asi_load_i2 |
+//			                       mbist_icache_write) & 
+//	                        ifd_ifc_missaddr5_i2;
+   
+   // calculate the ICV write data
+   assign icv_wbit_i2 = imissrtn_i2 & ifc_ifd_filladdr4_i2 |
+	                   asireq_i2 & asi_ic_tag_i2 & ~asi_load_i2 & 
+	                   cpxreq_i2[2];
+   
+   assign icv_wrdata_i2 = ifc_inv_ifqadv_i2 ? icv_wbit_i2 : icv_wrdata_f;
+   
+//   mux2ds #(2) icv_damux(.dout (icv_wrdata_i2),
+//		                     .in0  (icv_wrdata_f),
+//		                     .in1  (icv_wbit_i2),
+//		                     .sel0 (~ifc_inv_ifqadv_i2),
+//		                     .sel1 (ifc_inv_ifqadv_i2));
+
+   dff_s #(1) icv_daff(.din  (icv_wrdata_i2),
+		                 .q    (icv_wrdata_f),
+		                 .clk  (clk),
+		                 .se   (se), .si(), .so());
+   assign ifq_icv_wrdata_bf = icv_wrdata_i2;
+
+
+// Begin ECO7010
+   dp_mux2es #(1) wayvld_mux (.dout (inq_wayvld_i1_nxt),        //done
+			      .in0 (lsu_ifu_cpxpkt_wayvld_i1),
+			      .in1 (inq_wayvld_i1),
+			      .sel(inq_vld));
+
+   dff_s #(1) wayvld_ff (.din (inq_wayvld_i1_nxt),             //done
+                       .q   (inq_wayvld_i1),
+                       .clk (clk), .se(se), .si(), .so());
+   
+   assign ldinv_i1 = ldpkt_i1 & inq_wayvld_i1;  //done
+
+   dp_mux2es #(1) ldinv_i2_mux (.dout (ldinv_i2_nxt),  //done
+			      .in0 (ldinv_i1),
+			      .in1 (ldinv_i2),
+			      .sel(ifc_ifd_ifqbyp_en_l));
+
+   dff_s #(1) ldinv_i2_ff (.din (ldinv_i2_nxt),   //done
+			 .q   (ldinv_i2),
+			 .clk (clk), .se(se), .si(), .so());   
+
+//End ECO7010
+
+//------------------------------------------------
+// Fwd Request to read/write Icache
+//------------------------------------------------
+   // is this a fwd req to the L1I?
+   assign fwdreq_i2 = (cpxreq_i2 == `FWD_RQ_RET) ? 
+                        (ifd_ifc_fwd2ic_i2 & ifd_ifc_4bpkt_i2 &
+                         ifd_ifc_cpxvld_i2) : 1'b0;
+
+   // detect first cycle of fwdpkt and stall
+   assign fwd_stall = fwdreq_i2 & ~fwdreq_i3;
+   
+   dff_s #(1) freq_ff(.din (fwd_stall),
+                    .q   (fwdreq_i3),
+                    .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) cpx3_ff(.din (ifd_ifc_cpxnc_i2),
+                    .q   (cpxnc_i3),
+                    .clk (clk), .se(se), .si(), .so());
+
+   // NC bit is also R/W_bar bit
+   assign fwdrd_i3 = fwdreq_i3 & cpxnc_i3;
+   assign fwdwr_i3 = fwdreq_i3 & ~cpxnc_i3;
+
+   // ack back to the LSU to send fwd reply
+   assign ifu_lsu_fwd_wr_ack = fwdwr_i3;
+   assign ifc_ifd_idx_sel_fwd_i2 = fwdreq_i2;
+
+   // let errctl know a fwd packet is coming
+   assign ifq_erb_fwdrd_bf = fwdrd_i3;
+   
+//----------------------------------
+// INQ controls -- now ibuf controls
+//----------------------------------
+
+// INQ removed 2/13/02   
+
+   // Is the pkt in the inq a pkt that affects the icache?
+   assign ic_pkt_i1 = invalidate_i1 | imissrtn_i1 | errpkt_i1;
+   
+//   assign inq_vld_nxt = ~inq_vld & ifd_ifc_cpxvalid_i1 & 
+//	                      (~ifqadv_i1 | asireq_i1) | 
+//	                      inq_vld & ((~ifqadv_i1 | asireq_i1) & ic_pkt_i1 |
+//				                             ifd_ifc_cpxvalid_i1);
+
+   // cut this down to 1 aoi gate
+   assign inq_vld_nxt = (ifd_ifc_cpxvalid_i1 |
+                         inq_vld & ic_pkt_i1) & (~ifqadv_i1 | ifu_asireq_i1);
+   
+   dffr_s #(1) inqv_ff(.din (inq_vld_nxt),
+		                 .q   (inq_vld),
+		                 .rst (ifq_reset),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   assign ifc_ifd_ifqbyp_en_l = ~(ifqadv_i1 | fwd_stall);
+   
+   assign ifc_ifd_ifqbyp_sel_fwd_l = ~(fwd_stall & ~ifq_reset);
+   assign ifc_ifd_ifqbyp_sel_asi_l = ~(~fwd_stall & ~ifq_reset & 
+                                       ifu_asireq_i1);
+   assign ifc_ifd_ifqbyp_sel_inq_l = ~(~fwd_stall & ~ifq_reset & 
+                                       ~ifu_asireq_i1 & inq_vld);
+   assign ifc_ifd_ifqbyp_sel_lsu_l = ~(~fwd_stall & ~ifu_asireq_i1 & 
+                                       ~inq_vld | ifq_reset);
+
+   assign byp_sel_asi_l = ~(ifqadv_i1 & ifu_asireq_i1);
+   
+//   assign ifu_lsu_ibuf_busy = inq_vld & (~ifqadv_i1 | asireq_i1);
+//   assign ifc_ifd_ld_inq_i1 = ~inq_vld | ifqadv_i1 & ~asireq_i1;
+
+   assign ifu_lsu_ibuf_busy = inq_vld;
+   assign ifc_ifd_ld_inq_i1 = ~inq_vld;
+
+//-----------------------------------------
+// ASI access controls
+//-----------------------------------------
+
+   // need this to help with timing
+   // - asi_vld is asserted only if the asi transaction is to an IFU asi
+   //   register AND that register is not in the IMMU.
+   // - it is held valid until an ack is signalled .
+   // - the ack is not signalled for atleast 2 cycles
+   assign asi_vld_next = lsu_ifu_asi_vld & byp_sel_asi_l & 
+	                       ~asireq_i2 & ~illva_i2;  // not when ack is sent
+   
+   dff_s #(1) asiv0_ff(.din (asi_vld_next),
+		                .q   (asi_vld_i0),
+		                .clk (clk), .se(se), .si(), .so());
+   assign asi_vld_qual_i0 = asi_vld_i0 & ~asireq_i2 & ~illva_i2 &
+                            byp_sel_asi_l & ~illva_i1 &
+                            lsu_ifu_asi_vld;
+
+   dff_s #(8) asi_reg(.din (lsu_ifu_asi_state[7:0]),
+		                .q   (asi_state_i1),
+		                .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(2) asi_tid_reg(.din (lsu_ifu_asi_thrid[1:0]),
+		                    .q   (ifq_fcl_asi_tid_bf[1:0]),
+		                    .clk (clk), .se(se), .si(), .so());
+
+//   assign ifu_lsu_asi_ack = ~byp_sel_asi_l;
+   // Decided to wait one more cycle before sending the ack.
+   assign ifu_lsu_asi_ack = asireq_i2 | illva_i2;   
+   
+   // ifu ASIs
+   // icache data = 0x66
+   assign asi_ic_data_unchk_i1 = ~asi_state_i1[7] & 
+	        asi_state_i1[6] & 
+	        asi_state_i1[5] & 
+	        ~asi_state_i1[4] & 
+	        ~asi_state_i1[3] & 
+		      asi_state_i1[2] & 
+		      asi_state_i1[1] & 
+		      ~asi_state_i1[0];
+   assign asi_ic_data_i1 = asi_ic_data_unchk_i1;
+   
+   // icache tags = 0x67
+   // writing to tag also writes to vbits
+   assign asi_ic_tag_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      ~asi_state_i1[3] & 
+		      asi_state_i1[2] & 
+		      asi_state_i1[1] & 
+		      asi_state_i1[0];
+   assign asi_ic_tag_i1 = asi_ic_tag_unchk_i1;
+
+   // error enable 0x4B
+   assign asi_erren_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      ~asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      asi_state_i1[3] & 
+		      ~asi_state_i1[2] & 
+		      asi_state_i1[1] & 
+		      asi_state_i1[0];
+   assign asi_erren_i1 =  asi_erren_unchk_i1 &
+          ~ifd_ifc_asi_vachklo_i2 &
+          ~ifd_ifc_asiaddr_i2[2];
+   
+   // error status 0x4C
+   assign asi_errstat_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      ~asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      asi_state_i1[3] & 
+		      asi_state_i1[2] & 
+		      ~asi_state_i1[1] & 
+		      ~asi_state_i1[0];
+   assign asi_errstat_i1 = asi_errstat_unchk_i1 &
+          ~ifd_ifc_asi_vachklo_i2 &
+          ~ifd_ifc_asiaddr_i2[2];
+
+   // error addr 0x4D
+   assign asi_erraddr_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      ~asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      asi_state_i1[3] & 
+		      asi_state_i1[2] & 
+		      ~asi_state_i1[1] & 
+		      asi_state_i1[0];
+   assign asi_erraddr_i1 =  asi_erraddr_unchk_i1 &
+          ~ifd_ifc_asi_vachklo_i2 &
+          ~ifd_ifc_asiaddr_i2[2];
+
+   // error inject 0x43
+   assign asi_errinj_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      ~asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      ~asi_state_i1[3] & 
+		      ~asi_state_i1[2] & 
+		      asi_state_i1[1] & 
+		      asi_state_i1[0];
+   assign asi_errinj_i1 =  asi_errinj_unchk_i1 &
+          ~ifd_ifc_asi_vachklo_i2 &
+          ~ifd_ifc_asiaddr_i2[2];
+
+   // imask 0x42, va=0x8
+   assign asi_imask_unchk_i1 = ~asi_state_i1[7] & 
+		      asi_state_i1[6] & 
+		      ~asi_state_i1[5] & 
+		      ~asi_state_i1[4] & 
+		      ~asi_state_i1[3] & 
+		      ~asi_state_i1[2] & 
+		      asi_state_i1[1] & 
+		      ~asi_state_i1[0];
+   assign asi_imask_i1 = asi_imask_unchk_i1  &
+          ~ifd_ifc_asi_vachklo_i2 &
+          ifd_ifc_asiaddr_i2[2];  // this is actually va[3]
+
+   // illegal va check
+   assign illva_i0 = ((asi_erren_unchk_i1 |
+                       asi_errstat_unchk_i1 |
+                       asi_errinj_unchk_i1 |
+                       asi_erraddr_unchk_i1) & (ifd_ifc_asi_vachklo_i2 |
+                                                ifd_ifc_asiaddr_i2[2])) &
+                       asi_vld_qual_i0;
+
+   dff_s #(1) illvai1_ff(.din (illva_i0),
+                       .q   (illva_i1),
+                       .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) illvabf_ff(.din (illva_i1),
+                     .q   (illva_i2),
+                     .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) illvaf_ff(.din (illva_i2),
+                     .q   (illva_f),
+                     .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) illvas_ff(.din (illva_f),
+                      .q   (illva_s),
+                      .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) illvaw2_ff(.din (illva_s),
+                       .q   (illva_w2),
+                       .clk (clk), .se(se), .si(), .so());
+   assign ifu_lsu_ldxa_illgl_va_w2 = illva_w2;
+
+   dff_s #(1) tagasi_ff(.din (asi_ic_tag_i1),
+		                  .q   (asi_ic_tag_i2),
+		                  .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) datasi_ff(.din (asi_ic_data_i1),
+		                  .q   (asi_ic_data_i2),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asieeni2_ff(.din (asi_erren_i1),
+		                    .q   (ifq_erb_asi_erren_i2),
+		                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) asieini2_ff(.din (asi_errinj_i1),
+		                    .q   (ifq_erb_asi_errinj_i2),
+		                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) asiesti2_ff(.din (asi_errstat_i1),
+		                    .q   (ifq_erb_asi_errstat_i2),
+		                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) asieadi2_ff(.din (asi_erraddr_i1),
+		                    .q   (ifq_erb_asi_erraddr_i2),
+		                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) imaski2_ff(.din (asi_imask_i1),
+		                   .q   (ifq_erb_asi_imask_i2),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   // All IFU asi requests
+   assign ifu_asireq_i0 = (asi_ic_tag_i1 | asi_ic_data_i1 | asi_erren_i1 |
+		                       asi_errinj_i1 | asi_errstat_i1 | asi_erraddr_i1 |
+		                       asi_imask_i1) & asi_vld_qual_i0;
+
+   dff_s #(1) asireq1_ff(.din  (ifu_asireq_i0),
+		                   .q    (ifu_asireq_i1),
+		                   .clk  (clk), .se(se), .si(), .so());
+   
+   dff_s #(1) asivld_ff(.din  (byp_sel_asi_l),
+		                  .q    (asireq_i2_l),
+		                  .clk  (clk), .se(se), .si(), .so());
+   assign asireq_i2 = ~asireq_i2_l;
+   assign ifc_inv_asireq_i2 = asireq_i2;
+
+   // Stall if we are doing an asi op or fwdreq
+  assign stallreq_d0 = (ifu_asireq_i0 | 
+                         ~byp_sel_asi_l |
+                         fwdreq_i2) | 
+                          starv_alert | 
+                          mbist_ifq_run_bist |
+	                  ldinv_i1 & ~ifqadv_i1 |        //ECO 7010
+                  	  ldinv_i2 & ~ifc_inv_ifqadv_i2; //ECO 7010
+
+   dff_s #(1) stal_ff(.din (stallreq_d0),
+                    .q   (stallreq_d1),
+                    .clk (clk), .se(se), .si(), .so());
+
+   // split into two to save repeater
+   assign ifq_fcl_stallreq = stallreq_d1;
+   assign ifq_swl_stallreq = stallreq_d1;
+
+   dff_s #(1) asil1_ff(.din (lsu_ifu_asi_load),
+		                 .q   (asi_load_i1),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asil2_ff(.din (asi_load_i1),
+		                 .q   (asi_load_i2),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   // insert parity error in data and/or tag
+   // Don't need to qualify with asireq and imissrtn...
+   //   -- moved this to the DP since the qual is not necessary
+//   assign ifc_ifd_insert_pe = (asireq_i2 | imissrtn_i2) & 
+//				ifd_ifc_cpxue_i2;
+
+   // decode asi
+   // generate word selects
+   // can use finst instead of word_sel_i2, but it screws up timing
+   
+   assign word_sel_i2[0] = ~ifd_ifc_asiaddr_i2[3] & ~ifd_ifc_asiaddr_i2[2];
+   assign word_sel_i2[1] = ~ifd_ifc_asiaddr_i2[3] &  ifd_ifc_asiaddr_i2[2];
+   assign word_sel_i2[2] =  ifd_ifc_asiaddr_i2[3] & ~ifd_ifc_asiaddr_i2[2];
+   assign word_sel_i2[3] =  ifd_ifc_asiaddr_i2[3] &  ifd_ifc_asiaddr_i2[2];
+
+   // this assumes asi requests are never stalled
+   assign ifq_icd_worden_bf = (word_sel_i2 | {4{~asireq_i2 & ~fwdwr_i3  |
+//                                                ~ifc_inv_ifqadv_i2 | 
+                                                mbist_icache_write}});
+// & (mbist_icache_worden | {4{~bist_op}});
+
+   // choose where the ic address should come from
+//   assign bist_op = (mbist_icache_read | mbist_icache_write);
+   dff_s #(1) bist_run_ff(.din (mbist_ifq_run_bist),
+                        .q   (bist_op),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   assign ifc_ifd_addr_sel_bist_i2_l = ~bist_op | sehold;
+   assign ifc_ifd_addr_sel_old_i2_l = (bist_op | ifc_inv_ifqadv_i2) & ~sehold;
+   assign ifc_ifd_addr_sel_asi_i2_l = bist_op | ~ifc_inv_ifqadv_i2 | 
+                                      sehold | ~(asireq_i2 | fwdreq_i3);
+   assign ifc_ifd_addr_sel_fill_i2_l = bist_op | ~ifc_inv_ifqadv_i2 | 
+                                       sehold | asireq_i2 | fwdreq_i3;
+
+   // choose where the data should come from
+   assign ifq_icd_data_sel_bist_i2 = mbist_icache_write & ~sehold;
+   assign ifq_icd_data_sel_fill_i2 = ~mbist_icache_write & ifc_inv_ifqadv_i2 &
+                                     ~sehold;
+   assign ifq_icd_data_sel_old_i2 = ~mbist_icache_write & ~ifc_inv_ifqadv_i2 |
+                                     sehold;
+   
+   // generate icache controls
+   assign ifq_fcl_rdreq_bf = asireq_i2 & asi_load_i2 & 
+	                           (asi_ic_data_i2 | asi_ic_tag_i2) |
+	                           mbist_icache_read |
+                             fwdrd_i3;
+
+   assign ifq_fcl_icd_wrreq_bf = asi_ic_data_i2 & asireq_i2 & ~asi_load_i2 |
+	                               mbist_icache_write |
+                                 fwdwr_i3;
+
+   assign ifq_fcl_ictv_wrreq_bf = asi_ic_tag_i2 & asireq_i2 & ~asi_load_i2;
+
+   assign rd_tag_bf = asi_ic_tag_i2 & asi_load_i2;
+   dff_s #(1) asi_srcf_ff(.din (rd_tag_bf),
+		               .q   (ifq_erb_rdtag_f),
+		               .clk (clk), .se(se), .si(), .so());
+   
+   assign rdinst_bf = asi_ic_data_i2 & asi_load_i2;
+   dff_s #(1) asi_inst_ff(.din (rdinst_bf),
+		               .q   (ifq_erb_rdinst_f),
+		               .clk (clk), .se(se), .si(), .so());
+
+   assign asird_i1 = asi_load_i1 & (~byp_sel_asi_l | illva_i1);
+   dff_s #(1) asirdq_ff(.din (asird_i1),
+                      .q   (ifq_fcl_asird_bf),
+                      .clk (clk), .se(se), .si(), .so());
+
+   assign ifq_erb_asiwr_i2 = ~asi_load_i2 & asireq_i2;
+
+
+
+   // Shadow scan mux
+   mux4ds #(4) milss_mux(.dout (ifq_sscan_data[3:0]),
+                         .in0  (mil0_state),
+                         .in1  (mil1_state),
+                         .in2  (mil2_state),
+                         .in3  (mil3_state),
+                         .sel0 (ctu_sscan_tid[0]),
+                         .sel1 (ctu_sscan_tid[1]),
+                         .sel2 (ctu_sscan_tid[2]),
+                         .sel3 (ctu_sscan_tid[3]));
+   
+
+   
+endmodule // sparc_ifu_ifqctl
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_swpla.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_swpla.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_swpla.v	(revision 6)
@@ -0,0 +1,107 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_swpla.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_lfsr5
+//  Description:	
+//  The IFQ is the icache input queue.  This communicates between the
+//  IFU and the outside world.  It handles icache misses and
+//  invalidate requests from the crossbar.  
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_swpla(/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [31:0]  in;
+   output 	 out;
+
+   wire [31:0] 	 in;
+   reg 		 out;
+   
+
+   always @ (in)
+     begin
+	      if (in[31:30] == 2'b01)            // call
+	        out = 1'b1;
+	      else if (in[31:30] == 2'b00)       // branch, sethi, nop
+	        begin
+	           if (in[24:22] == 3'b100) // nop/sethi
+	             out = 1'b0;
+	           else // branch
+	             out = 1'b1;
+	        end // if (in[31:30] == 2'b00)
+	      else if (in[31:30] == 2'b10)        // arith, shift, mem#, mov
+	        begin
+	           if (in[24:23] == 2'b11)  // wrpr, vis, save, jmpl
+	             out = 1'b1;
+	           else if (in[24] == 1'b0) // arith
+	             begin
+		              if (in[22] == 1'b0) // alu op
+		                out = 1'b0;
+		              else if ((in[22] == 1'b1) && (in[20:19] == 2'b00))
+		                // subc or addc
+		                out = 1'b0;
+		              else // mul, div
+		                out = 1'b1;
+	             end // if (in[24] == 1'b0)
+	           else // if (in[24:23] == 2'b10)  shft, mov, rdpr, tag
+	             begin
+		              if (in[22:19] == 4'h4) // mulscc
+		                out = 1'b1;
+		              else if (in[22] == 1'b0)  // shft, tag
+		                out = 1'b0;
+		              else if  ((in[22:19] == 4'hc) ||  (in[22:19] == 4'hf)) // mov
+		                out = 1'b0;
+//		              else if (in[22:19] == 4'ha)  // rdpr
+//		                out = 1'b0;
+		              else // rdsr, mem#, popc, flushw, rdpr
+		                out = 1'b1;
+	             end // if ((in[24] == 1'b1) && (in[23] == 1'b0))
+	        end // if (in[31:30] == 2'b10)
+	      else // ld st
+	        begin
+//	           if (in[24] & in[22] & in[21] & ~in[20] & in[19]) // prefetch
+//	             out = 1'b0;
+             if (in[24] | in[23] | ~in[21]) // fp, alt space or ld
+	             out = 1'b1;
+//	           else if (in[24]) // FP and CAS
+//	             out = 1'b1;
+//	           else if (in[23] & in[20] & in[19]) // stda
+//	             out = 1'b1;
+	           else if ((~in[23]) && (in[22:19] == 4'he))  // stx
+	             out = 1'b0;
+	           else if (in[22:21] == 2'b01) // other st
+	             out = 1'b0;
+	           else // other atomic
+	             out = 1'b1;
+	        end // else: !if(in[31:30] == 2'b10)
+     end // always @ (in)
+
+   sink #(32) s0(.in (in));
+   
+endmodule // sparc_ifu_swpla
+
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_rndrob.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_rndrob.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_rndrob.v	(revision 6)
@@ -0,0 +1,95 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_rndrob.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_rndrob
+//  Description:	
+//  Round robin scheduler.  Least priority to the last granted
+//  customer.  If no requests, the priority remains the same. 
+//
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_rndrob(/*AUTOARG*/
+   // Outputs
+   grant_vec, so, 
+   // Inputs
+   clk, reset, se, si, req_vec, advance, rst_tri_enable
+   );
+
+   input     clk, reset, se, si;
+
+   input [3:0]  req_vec;
+   
+   input 	advance;
+   input 	rst_tri_enable;
+   
+   output [3:0] grant_vec;
+   
+   output 	so;
+
+   wire [3:0] 	next_pv,
+		pv,
+		gv,
+		park_vec;
+   
+   
+   assign 	pv =  advance ? grant_vec : 
+	                        park_vec;
+
+   assign 	next_pv[3:1] = pv[3:1] & {3{~reset}};
+   assign 	next_pv[0] = pv[0] | reset;
+   
+   dff_s #4  park_reg(.din  (next_pv),
+		    .clk  (clk),
+		    .q    (park_vec),
+		    .se   (se), .si(), .so());
+
+   // if noone requests, don't advance, otherwise we'll go back to 0
+   // and will not be fair to other requestors
+   assign gv[0] = park_vec[3] & req_vec[0] |
+		  park_vec[2] & ~req_vec[3] & req_vec[0] |
+		  park_vec[1] & ~req_vec[2] & ~req_vec[3] & req_vec[0] |
+	          ~req_vec[1] & ~req_vec[2] & ~req_vec[3];
+   
+   assign gv[1] = park_vec[0] & req_vec[1] |
+		  park_vec[3] & ~req_vec[0] & req_vec[1] |
+		  park_vec[2] & ~req_vec[3] & ~req_vec[0] & req_vec[1] |
+	          req_vec[1] & ~req_vec[2] & ~req_vec[3] & ~req_vec[0];
+
+   assign gv[2] = park_vec[1] & req_vec[2] |
+		  park_vec[0] & ~req_vec[1] & req_vec[2] |
+		  park_vec[3] & ~req_vec[0] & ~req_vec[1] & req_vec[2] |
+		  req_vec[2] & ~req_vec[3] & ~req_vec[0] & ~req_vec[1];
+
+   assign gv[3] = park_vec[2] & req_vec[3] |
+		  park_vec[1] & ~req_vec[2] & req_vec[3] |
+		  park_vec[0] & ~req_vec[1] & ~req_vec[2] & req_vec[3] |
+		  req_vec[3] & ~req_vec[0] & ~req_vec[1] & ~req_vec[2];
+
+   assign grant_vec[0] = gv[0] | rst_tri_enable;
+   assign grant_vec[3:1] = gv[3:1] & {3{~rst_tri_enable}};
+   
+
+endmodule // sparc_ifu_rndrob
+
+   
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_lru4.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_lru4.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_lru4.v	(revision 6)
@@ -0,0 +1,269 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_lru4.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_lru4
+//  Description:	
+//  LRU scheduler.  Least priority to the last granted
+//  customer.  If no requests, the priority remains the same. 
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_lru4(/*AUTOARG*/
+   // Outputs
+   grant_vec, so, 
+   // Inputs
+   clk, reset, se, si, recent_vec, load_recent, req_vec, spec_vec, 
+   use_spec
+   );
+
+   input     clk, reset, se, si;
+
+   input [3:0] recent_vec;
+   input       load_recent;
+   input [3:0] req_vec,
+               spec_vec;
+
+   input       use_spec;
+   
+   output [3:0] grant_vec;
+   
+   output       so;
+
+
+   wire [3:0]   used0,  // used0 is mru
+		            used1,
+		            used2,
+		            used3;  // used3 is lru
+
+   wire [3:0]   used23,
+                used23_nxt;
+
+   wire [3:0]   used0_buf, 
+		            used1_buf,
+		            used2_buf,
+		            used3_buf; 
+
+   wire [3:0]   sp_used0, 
+		            sp_used1,
+		            sp_used2,
+		            sp_used3; 
+   wire [3:0]   nosp_used0, 
+		            nosp_used1,
+		            nosp_used2,
+		            nosp_used3; 
+
+   wire [3:0]   used0_calc,
+		            used0_nxt,
+ 		            used1_calc,
+		            used1_nxt,
+ 		            used2_calc,
+		            used2_nxt,
+ 		            used3_calc,
+		            used3_nxt;
+
+   wire         hit1,
+		            hit2,
+		            hit3;
+
+   wire [3:0]   nospec_grant,
+                spec_grant;
+   
+   wire         reqhit1,
+		            reqhit2,
+		            reqhit3,
+                reqhit23;
+
+   wire         spechit1,
+		            spechit2,
+		            spechit3,
+                spechit23;
+
+   wire         sel_u0,
+		            sel_u1,
+		            sel_u2,
+		            sel_u3;
+
+   wire         sel_su0,
+		            sel_su1,
+		            sel_su2,
+		            sel_su3;
+
+   dp_buffer #(4) use_buf0(.dout(used0_buf),
+                      .in  (used0));
+   dp_buffer #(4) use_buf1(.dout(used1_buf),
+                      .in  (used1));
+   dp_buffer #(4) use_buf2(.dout(used2_buf),
+                      .in  (used2));
+   dp_buffer #(4) use_buf3(.dout(used3_buf),
+                      .in  (used3));
+   
+
+   // determine lru order for next cycle
+//   assign hit0 = (used0_buf[0] & recent_vec[0] |
+//		              used0_buf[1] & recent_vec[1] |
+//		              used0_buf[2] & recent_vec[2] |
+//		              used0_buf[3] & recent_vec[3]) & load_recent;
+
+   assign hit1 = (used1_buf[0] & recent_vec[0] |
+		              used1_buf[1] & recent_vec[1] |
+		              used1_buf[2] & recent_vec[2] |
+		              used1_buf[3] & recent_vec[3]) & load_recent;
+
+   assign hit2 = (used2_buf[0] & recent_vec[0] |
+		              used2_buf[1] & recent_vec[1] |
+		              used2_buf[2] & recent_vec[2] |
+		              used2_buf[3] & recent_vec[3]) & load_recent;
+
+   assign hit3 = (used3_buf[0] & recent_vec[0] |
+		              used3_buf[1] & recent_vec[1] |
+		              used3_buf[2] & recent_vec[2] |
+		              used3_buf[3] & recent_vec[3]) & load_recent;
+   
+
+   assign  used0_calc = load_recent          ?  recent_vec : used0_buf;
+   assign  used1_calc = (hit3 | hit2 | hit1) ?  used0_buf  : used1_buf;
+   assign  used2_calc = (hit3 | hit2)        ?  used1_buf  : used2_buf;
+   assign  used3_calc = (hit3)               ?  used2_buf  : used3_buf;
+
+   assign  used0_nxt = reset ? 4'b0001 : used0_calc;
+   assign  used1_nxt = reset ? 4'b0010 : used1_calc;
+   assign  used2_nxt = reset ? 4'b0100 : used2_calc;
+   assign  used3_nxt = reset ? 4'b1000 : used3_calc;
+
+   // use 4X4 matrix to hold lru info
+   dff_s #(4) use0_reg(.din (used0_nxt),
+		                 .q   (used0),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+   dff_s #(4) use1_reg(.din (used1_nxt),
+		                 .q   (used1),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+   dff_s #(4) use2_reg(.din (used2_nxt),
+		                 .q   (used2),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+   // used3 is lru
+   dff_s #(4) use3_reg(.din (used3_nxt),
+		                 .q   (used3),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   assign  used23_nxt = used2_nxt | used3_nxt;
+   
+   dff_s #(4) use23_reg(.din (used23_nxt),
+		                 .q   (used23),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+
+   // grant request based on lru
+
+// save some loading on req_vec by not doing this   
+//   assign  reqhit0 = (used0[0] & req_vec[0] |
+//		      used0[1] & req_vec[1] |
+//		      used0[2] & req_vec[2] |
+//		      used0[3] & req_vec[3]);
+   
+   assign  reqhit1 = (used1[0] & req_vec[0] |
+		                  used1[1] & req_vec[1] |
+		                  used1[2] & req_vec[2] |
+		                  used1[3] & req_vec[3]);
+   
+   assign  reqhit2 = (used2[0] & req_vec[0] |
+		                  used2[1] & req_vec[1] |
+		                  used2[2] & req_vec[2] |
+		                  used2[3] & req_vec[3]);
+
+   assign  reqhit3 = (used3[0] & req_vec[0] |
+		                  used3[1] & req_vec[1] |
+		                  used3[2] & req_vec[2] |
+		                  used3[3] & req_vec[3]);
+
+   assign  reqhit23 = (used23[0] & req_vec[0] |
+		                   used23[1] & req_vec[1] |
+		                   used23[2] & req_vec[2] |
+		                   used23[3] & req_vec[3]);
+   
+   assign  sel_u3 = reqhit3;
+   assign  sel_u2 = ~reqhit3 & reqhit2;
+   assign  sel_u1 = ~reqhit23 & reqhit1;
+   assign  sel_u0 = ~reqhit23 & ~reqhit1;
+   
+   assign  nosp_used0 = used0 & {4{~use_spec}};
+   assign  nosp_used1 = used1 & {4{~use_spec}};
+   assign  nosp_used2 = used2 & {4{~use_spec}};
+   assign  nosp_used3 = used3 & {4{~use_spec}};
+
+   mux4ds #(4) nsgnt_mux(.dout (nospec_grant),
+		                     .in0  (nosp_used0),
+		                     .in1  (nosp_used1),
+		                     .in2  (nosp_used2),
+		                     .in3  (nosp_used3),
+		                     .sel0 (sel_u0),
+		                     .sel1 (sel_u1),
+		                     .sel2 (sel_u2),
+		                     .sel3 (sel_u3));
+
+   assign  spechit1 = (used1[0] & spec_vec[0] |
+		                   used1[1] & spec_vec[1] |
+		                   used1[2] & spec_vec[2] |
+		                   used1[3] & spec_vec[3]);
+   
+   assign  spechit2 = (used2[0] & spec_vec[0] |
+		                   used2[1] & spec_vec[1] |
+		                   used2[2] & spec_vec[2] |
+		                   used2[3] & spec_vec[3]);
+
+   assign  spechit3 = (used3[0] & spec_vec[0] |
+		                   used3[1] & spec_vec[1] |
+		                   used3[2] & spec_vec[2] |
+		                   used3[3] & spec_vec[3]);
+
+   assign  spechit23 = (used23[0] & spec_vec[0] |
+		                    used23[1] & spec_vec[1] |
+		                    used23[2] & spec_vec[2] |
+		                    used23[3] & spec_vec[3]);
+   
+   assign  sel_su3 = spechit3;
+   assign  sel_su2 = ~spechit3 & spechit2;
+   assign  sel_su1 = ~spechit23 & spechit1;
+   assign  sel_su0 = ~spechit23 & ~spechit1;
+
+   assign  sp_used0 = used0 & {4{use_spec}};
+   assign  sp_used1 = used1 & {4{use_spec}};
+   assign  sp_used2 = used2 & {4{use_spec}};
+   assign  sp_used3 = used3 & {4{use_spec}};
+   
+   mux4ds #(4) sgnt_mux(.dout (spec_grant),
+		                  .in0  (sp_used0),
+		                  .in1  (sp_used1),
+		                  .in2  (sp_used2),
+		                  .in3  (sp_used3),
+		                  .sel0 (sel_su0),
+		                  .sel1 (sel_su1),
+		                  .sel2 (sel_su2),
+		                  .sel3 (sel_su3));
+
+   assign  grant_vec = spec_grant | nospec_grant;
+   
+
+endmodule // sparc_ifu_lru4
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_par32.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_par32.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_par32.v	(revision 6)
@@ -0,0 +1,42 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_par32.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_par32
+//  Description:        
+//    Generates 32b parity.  Odd number of ones => out = 1
+*/
+
+module sparc_ifu_par32 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [31:0] in;
+   output 	out;
+
+   assign  out = (^in[31:0]);
+
+endmodule // sparc_ifu_par32
+
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_milfsm.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_milfsm.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_milfsm.v	(revision 6)
@@ -0,0 +1,397 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_milfsm.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_ifqdp
+//  Description:	
+//  The IFQ is the icache fill queue.  This communicates between the
+//  IFU and the outside world.  It handles icache misses and
+//  invalidate requests from the crossbar.  
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "ifu.h"
+
+//`define MILFSM_NULL   4'b0000
+//`define MILFSM_WAIT   4'b1000
+//`define MILFSM_REQ    4'b1100
+//`define MILFSM_FILL0  4'b1001
+//`define MILFSM_FILL1  4'b1011
+
+//`define MIL_V  3
+//`define MIL_R  2
+//`define MIL_A  1
+//`define MIL_F  0
+
+
+module sparc_ifu_milfsm(/*AUTOARG*/
+   // Outputs
+   so, fsm_ifc_errreq, fsm_ifc_wrt_tir, fsm_ifc_comp_valid, 
+   fsm_ifc_mil_valid, fsm_ifc_mil_cancel, fsm_ifc_thr_ready, 
+   fsm_ifc_pred_rdy, fsm_ifc_pcxreq, fsm_ifc_addrbit4_i2, 
+   fsm_ifc_milchld, fsm_ifc_milstate, 
+   // Inputs
+   ifc_fsm_can_thisthr, ifc_fsm_fill_thisthr_i2, 
+   ifc_fsm_wr_complete_f, ifqadv_i2, ifd_ifc_4bpkt_i2, 
+   fcl_ifq_thr_s1, ifc_fsm_imiss_thisthr_s, ifc_fsm_milhit_s, 
+   ifc_fsm_hiton_thismil_s, ifc_fsm_pcxaccept_thisthr, 
+   ifc_fsm_miladdr4, clk, se, si, reset, ifc_fsm_err_thisthr
+   );
+
+   input       ifc_fsm_can_thisthr,        
+	             ifc_fsm_fill_thisthr_i2;
+   
+   input       ifc_fsm_wr_complete_f;
+
+   input       ifqadv_i2;
+
+   input       ifd_ifc_4bpkt_i2;
+   input [1:0] fcl_ifq_thr_s1;             
+   input       ifc_fsm_imiss_thisthr_s;
+   input       ifc_fsm_milhit_s;
+	 input       ifc_fsm_hiton_thismil_s,
+               ifc_fsm_pcxaccept_thisthr;
+   input       ifc_fsm_miladdr4;
+
+   input       clk, 
+               se, 
+               si, 
+               reset;
+
+   input       ifc_fsm_err_thisthr;
+   
+
+   output      so;
+
+   output      fsm_ifc_errreq;
+   output      fsm_ifc_wrt_tir;
+
+   output      fsm_ifc_comp_valid,
+	             fsm_ifc_mil_valid,
+               fsm_ifc_mil_cancel,
+	             fsm_ifc_thr_ready;
+   output      fsm_ifc_pred_rdy,
+	             fsm_ifc_pcxreq,
+	             fsm_ifc_addrbit4_i2;
+	 
+   output [2:0] fsm_ifc_milchld;
+
+   output [3:0] fsm_ifc_milstate;
+	 
+
+//----------------------------------------------------------------------
+// Declarations
+//----------------------------------------------------------------------
+
+
+   // local variables
+   reg [3:0] 	next_state;
+
+   wire [3:0] 	milstate;
+
+   wire [2:0]   local_milchld;
+   
+   wire 	milchld_valid;
+
+   wire   fill_this16b;
+   
+
+   wire 	cancel_mil,
+		      cancel_next;
+   wire 	err_pending,
+		      err_pending_next;
+   
+   wire 	valid_d1,
+		valid_i2;
+   
+   wire [2:0] 	next_milchld;
+
+
+   // Missed Instruction List State Machine
+   // 3   - valid
+   // 2   - req
+   // 1   - addr for fill fill (1/0)
+   // 0   - fill
+   //
+   // 2   - child valid
+   // 1:0 - child thr ptr
+   //
+
+
+   always @(/*AUTOSENSE*/err_pending or ifc_fsm_err_thisthr
+            or ifc_fsm_fill_thisthr_i2 or ifc_fsm_imiss_thisthr_s
+            or ifc_fsm_milhit_s or ifc_fsm_pcxaccept_thisthr
+            or ifc_fsm_wr_complete_f or ifd_ifc_4bpkt_i2 or ifqadv_i2
+            or milstate)
+     begin
+	      case (milstate)  //  synopsys parallel_case
+	        4'b0000: // null
+	          begin
+	             //ic_wrreq_i2 = 1'b0;	       
+	             //  orphan_chld = 1'b0;
+               next_state[1:0] = 2'b0;
+               if (ifc_fsm_err_thisthr | ifc_fsm_imiss_thisthr_s)
+		             begin
+		                next_state[`MIL_V] = 1'b1;
+		                if (ifc_fsm_milhit_s & ~ifc_fsm_err_thisthr) 
+		                  next_state[`MIL_R] = 1'b0;  // MILFSM_WAIT
+		                else
+		                  next_state[`MIL_R] = 1'b1;  // MILFSM_REQ;
+		             end
+	             else
+		             next_state = milstate;
+	          end // case: begin...
+	        
+	        4'b1100:  // req
+	          begin
+	             // ic_wrreq_i2 = 1'b0;		
+	             // if canthr=1, the request will not be sent out in this cycle.
+               if ((ifc_fsm_pcxaccept_thisthr) &
+                   ~(ifc_fsm_err_thisthr | err_pending))
+                 // two requests are made when there is an error.
+                 // one, with errbit=1 gets back in invalidate response, 
+                 // the other, with errbit=0, gets the regular ifill 
+                 // return
+		             begin
+		                // we invalidate the icache on detecting an error
+		                // only if this wasn't an MIL hit as well.  If it
+		                // was an MIL we would have gone to the wait state
+		                // already and it is too late to invalidate the cache
+		                next_state = `MILFSM_WAIT;
+                    //		    orphan_chld = 1'b0;		    
+		             end
+//	             else if ((cancel_mil | ifc_fsm_can_thisthr) & 
+//                        ~milchld_valid & ~ifc_fsm_hiton_thismil_s)
+//		             begin
+//		                next_state = `MILFSM_NULL;
+//		             end
+	             else
+		             begin
+		                next_state = milstate;
+                    //		    orphan_chld = 1'b0;
+		             end 
+	          end // case: 4'b1100
+          
+	        4'b1000: // wait
+	          begin
+               //	       orphan_chld = 1'b0;
+	             if (ifc_fsm_fill_thisthr_i2)
+		             begin
+		                // ic_wrreq_i2 = 1'b1;
+                    if (ifd_ifc_4bpkt_i2 & ifqadv_i2) // 4B ifill from IOB
+                      // don't want to advance too quickly and get fasle compl
+                      next_state = `MILFSM_NULL;
+                    else if (~ifd_ifc_4bpkt_i2)
+		                  next_state = `MILFSM_FILL0;
+                    else
+                      next_state = milstate;
+		             end
+	             else
+		             begin
+		                next_state = milstate;
+		                //ic_wrreq_i2 = 1'b0;		    
+		             end
+	          end // case: 4'b1000
+	        
+	        4'b1001: // fill0
+	          begin
+               //	       orphan_chld = 1'b0;
+               if (ifc_fsm_wr_complete_f)
+		             begin
+		                next_state = `MILFSM_FILL1;
+		                //ic_wrreq_i2 = 1'b1;
+		             end
+	             else
+		             begin
+		                next_state = milstate;
+		                //ic_wrreq_i2 = 1'b1;
+		             end
+	          end // case: 4'b1001
+          
+	        4'b1011: // fill1
+	          // Do we really need this state??  yes, to start thr
+	          begin
+               //	       orphan_chld = 1'b0;		    
+	             if (ifc_fsm_wr_complete_f)
+		             begin
+		                //ic_wrreq_i2 = 1'b0; 		    
+                    //		    if (delay_mil | ifc_fsm_imiss_thisthr_s)
+                    //		      next_state = `MILFSM_REQ;
+                    //		    else
+		                next_state = `MILFSM_NULL;
+		             end
+	             else
+		             begin
+		                //ic_wrreq_i2 = 1'b1;
+		                next_state = milstate;
+		             end // else: !if(ifc_fsm_wr_complete_f)
+	          end // case: 4'b10001
+          
+	        default:
+	          begin
+               // synopsys translate_off
+		     // 0in <fire -message "MILSTATE, Error: SPARC/IFU/MILFSM: unknown state!"
+`ifdef DEFINE_0IN
+`else
+               if ($time > (4* `CMP_CLK_PERIOD))
+                 begin
+				`ifdef MODELSIM
+					  $display ("MILSTATE", 
+                            "Error: SPARC/IFU/MILFSM: unknown state! %b\n",milstate);
+				`else
+	                  $error ("MILSTATE", 
+                            "Error: SPARC/IFU/MILFSM: unknown state! %b\n",milstate);
+				`endif
+                 end
+`endif
+               // synopsys translate_on               
+	             next_state = milstate;
+	             //ic_wrreq_i2 = 1'b0;		    
+               //	       orphan_chld = 1'b0;	
+	          end // case: default
+	      endcase // casex(milstate)
+     end // always @ (...
+   
+
+   // MIL state reg
+   dffr_s #(4) milst_reg(.din  (next_state[3:0]),
+		                   .q    (milstate[3:0]),
+		                   .clk  (clk),
+		                   .rst  (reset),
+		                   .se   (se), .si(), .so());
+
+   // Cancel - Delay state machine
+   // -- not used anymore
+   // C D
+   // 0 0  - null
+   // 1 0  - current thread cancelled but pending from L2
+   // 1 1  - one ifill pending from L2, current thread will be sent
+   //        out after that.
+   
+//   assign cancel_next = (ifc_fsm_can_thisthr | 
+//			                   cancel_mil) & next_state[`MIL_V];    // reset wins
+
+   assign cancel_next = (ifc_fsm_can_thisthr | cancel_mil) & 
+                          (milstate[`MIL_V] | ifc_fsm_imiss_thisthr_s |
+                           ifc_fsm_err_thisthr);    // reset wins
+
+   dffr_s #(1) can_ff(.din  (cancel_next),
+		                .q    (cancel_mil),
+		                .clk  (clk),
+		                .rst  (reset),
+		                .se   (se), .si(), .so());
+
+   // track if we need to send out an error request
+   assign err_pending_next = (ifc_fsm_err_thisthr & 
+	                      (milstate[`MIL_R] | ~milstate[`MIL_V]) |
+//	                      err_pending & next_state[`MIL_V]) &	
+                        err_pending & milstate[`MIL_V]) &
+	                      ~ifc_fsm_pcxaccept_thisthr;
+                         // & ~ifc_fsm_can_thisthr;
+   
+   dffr_s #(1) err_ff(.din (err_pending_next),
+		                .q   (err_pending),
+		                .clk (clk),
+		                .rst (reset), .se(se), .si(), .so());
+   assign fsm_ifc_errreq = err_pending;
+
+   // Track secondary hits
+   assign next_milchld[2] = ifc_fsm_hiton_thismil_s |    // hit on MIL by
+   	                                                     // someone else
+              			    fsm_ifc_milchld[2] & milstate[`MIL_V]; // reset
+
+   assign next_milchld[1:0] = ifc_fsm_hiton_thismil_s ? fcl_ifq_thr_s1 :
+			                                                  fsm_ifc_milchld[1:0];
+
+   dffr_s #(3) milchld_reg(.din  (next_milchld),
+		                     .clk  (clk),
+		                     .rst  (reset),
+		                     .q    (local_milchld),
+		                     .se   (se), .si(), .so());
+
+   assign fsm_ifc_milchld[2] = local_milchld[2] & milstate[`MIL_V];
+   assign fsm_ifc_milchld[1:0] = local_milchld[1:0];
+   
+   assign milchld_valid = local_milchld[2] & milstate[`MIL_V];
+
+//   assign fsm_ifc_addrbit4_i2 = milstate[`MIL_F];
+   assign fsm_ifc_addrbit4_i2 = milstate[`MIL_F] & milstate[`MIL_V] & 
+                                (milstate[`MIL_A] | ifc_fsm_wr_complete_f); 
+
+   // determine if we want to fill from the first pkt or second pkt
+   assign fill_this16b = ~(milstate[`MIL_F] ^ ifc_fsm_miladdr4) |
+                            ifd_ifc_4bpkt_i2;
+
+   // write to thread inst reg (TIR)
+//   assign fsm_ifc_wrt_tir =  (next_state[`MIL_F]) & ~cancel_mil & 
+//	                     ifc_fsm_fill_thisthr_i2;
+   assign fsm_ifc_wrt_tir =  (milstate[`MIL_V] & ~milstate[`MIL_R]) & 
+			                         ~(cancel_mil | ifc_fsm_can_thisthr) & 
+                               ifc_fsm_fill_thisthr_i2 &
+                               fill_this16b;
+	  
+   // write to Icache 
+//   assign fsm_ifc_wrreq_i2 = ic_wrreq_i2;
+   assign valid_i2 = milstate[`MIL_V] & ~fsm_ifc_thr_ready;
+   
+   dff_s vld_ff(.din (valid_i2),
+	            .q   (valid_d1),
+	            .clk (clk),
+	            .se  (se), .si(), .so());
+
+   // signal thread completion
+   assign fsm_ifc_thr_ready = milstate[`MIL_V] & milstate[`MIL_F] &
+	                      milstate[`MIL_A] & ifc_fsm_wr_complete_f |
+	                      ~milstate[`MIL_V] & valid_d1;
+
+   // predict ready assuming 2nd ifill happens in the next cycle
+   assign fsm_ifc_pred_rdy =  milstate[`MIL_V] & milstate[`MIL_F] &
+                              (ifc_fsm_wr_complete_f |
+                               milstate[`MIL_A]); //  & ifc_fsm_fill_thisthr_i2
+   
+   // set compare valid for mil hit signal
+   assign fsm_ifc_comp_valid = milstate[`MIL_V] &     // valid entry
+	                             ~milstate[`MIL_F] &    // not f0 or f1
+                               ~milchld_valid;        // no chld already
+
+   assign fsm_ifc_mil_valid = milstate[`MIL_V];
+   assign fsm_ifc_mil_cancel = cancel_mil;
+   
+   // In the request state or if we need to send an error invalidate, 
+   // ask for bus from LSU. 
+//   assign fsm_ifc_pcxreq = (milstate[`MIL_V] & milstate[`MIL_R] |
+//			    err_pending | ifc_fsm_err_thisthr) & 
+//	                   ~ifc_fsm_pcxaccept_thisthr & 
+//                            (milchld_valid | ~cancel_mil);
+
+//   assign fsm_ifc_pcxreq = (milstate[`MIL_V] & milstate[`MIL_R] & 
+//	                          ~ifc_fsm_pcxaccept_thisthr & 
+//                            (milchld_valid | ~cancel_mil));
+
+   // removed pcx_accept from critical path
+   assign fsm_ifc_pcxreq = milstate[`MIL_V] & milstate[`MIL_R];
+
+   assign fsm_ifc_milstate = milstate;
+
+   
+endmodule
Index: /trunk/T1-CPU/ifu/sparc_ifu_cmp35.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_cmp35.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_cmp35.v	(revision 6)
@@ -0,0 +1,54 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_cmp35.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_cmp37
+//  Description:	
+//  37 bit comparator for MIL hit detection
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_cmp35(/*AUTOARG*/
+   // Outputs
+   hit, 
+   // Inputs
+   a, b, valid
+   );
+
+   input [34:0] a, b;
+   input 	valid;
+   
+   output 	hit;
+
+   reg 		hit;
+   wire 	valid;
+   wire [34:0] 	a, b;
+
+   always @ (a or b or valid)
+     begin
+	if ((a==b) & valid)
+	  hit = 1'b1;
+	else
+	  hit = 1'b0;
+     end // always @ (a or b or valid)
+
+endmodule // sparc_ifu_cmp35
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_par34.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_par34.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_par34.v	(revision 6)
@@ -0,0 +1,42 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_par34.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_par34
+//  Description:        
+//    Generates 34b parity.  Odd number of ones => out = 1
+*/
+
+module sparc_ifu_par34 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [33:0] in;
+   output 	out;
+
+   assign  out = (^in[33:0]);
+
+endmodule // sparc_ifu_par34
+
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_par16.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_par16.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_par16.v	(revision 6)
@@ -0,0 +1,42 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_par16.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_par16
+//  Description:        
+//    Generates 34b parity.  Odd number of ones => out = 1
+*/
+
+module sparc_ifu_par16 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [15:0] in;
+   output 	out;
+
+   assign  out = (^in[15:0]);
+
+endmodule // sparc_ifu_par16
+
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_dec.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_dec.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_dec.v	(revision 6)
@@ -0,0 +1,1528 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_dec.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_dec
+//  Description:	
+//  The decode block implements the sparc instruction decode ROM
+//  It has a purely combinational part and some staging flops
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_dec(/*AUTOARG*/
+   // Outputs
+   so, ifu_exu_aluop_d, ifu_exu_invert_d, ifu_exu_useimm_d, 
+   ifu_exu_usecin_d, ifu_exu_enshift_d, ifu_exu_tagop_d, 
+   ifu_exu_tv_d, ifu_exu_muls_d, ifu_exu_ialign_d, 
+   ifu_exu_range_check_jlret_d, ifu_exu_range_check_other_d, 
+   ifu_exu_shiftop_d, ifu_exu_muldivop_d, ifu_exu_wen_d, 
+   ifu_exu_setcc_d, ifu_exu_rd_ifusr_e, ifu_exu_rd_exusr_e, 
+   ifu_exu_rd_ffusr_e, ifu_exu_rs1_vld_d, ifu_exu_rs2_vld_d, 
+   ifu_exu_rs3e_vld_d, ifu_exu_rs3o_vld_d, ifu_exu_use_rsr_e_l, 
+   ifu_exu_save_d, ifu_exu_restore_d, ifu_exu_return_d, 
+   ifu_exu_flushw_e, ifu_exu_saved_e, ifu_exu_restored_e, 
+   ifu_tlu_rsr_inst_d, ifu_lsu_wsr_inst_d, ifu_exu_wsr_inst_d, 
+   ifu_tlu_done_inst_d, ifu_tlu_retry_inst_d, ifu_lsu_ld_inst_e, 
+   ifu_lsu_st_inst_e, ifu_lsu_pref_inst_e, ifu_lsu_alt_space_e, 
+   ifu_lsu_alt_space_d, ifu_tlu_alt_space_d, ifu_lsu_memref_d, 
+   ifu_lsu_sign_ext_e, ifu_lsu_ldstub_e, ifu_lsu_casa_e, 
+   ifu_exu_casa_d, ifu_lsu_swap_e, ifu_tlu_mb_inst_e, 
+   ifu_tlu_sir_inst_m, ifu_tlu_flsh_inst_e, ifu_lsu_ldst_dbl_e, 
+   ifu_lsu_ldst_fp_e, ifu_lsu_ldst_size_e, ifu_ffu_fpop1_d, 
+   ifu_ffu_visop_d, ifu_ffu_fpop2_d, ifu_ffu_fld_d, ifu_ffu_fst_d, 
+   ifu_ffu_ldst_size_d, ifu_ffu_ldfsr_d, ifu_ffu_ldxfsr_d, 
+   ifu_ffu_stfsr_d, ifu_ffu_quad_op_e, dec_fcl_rdsr_sel_pc_d, 
+   dec_fcl_rdsr_sel_thr_d, dec_imd_call_inst_d, 
+   dtu_fcl_flush_sonly_e, dtu_fcl_illinst_e, dtu_fcl_fpdis_e, 
+   dtu_fcl_privop_e, dtu_fcl_imask_hit_e, dtu_fcl_br_inst_d, 
+   dtu_fcl_sir_inst_e, dtu_ifq_kill_latest_d, dec_swl_wrt_tcr_w, 
+   dec_swl_wrtfprs_w, dec_swl_ll_done_d, dec_swl_br_done_d, 
+   dec_swl_rdsr_sel_thr_d, dec_swl_ld_inst_d, dec_swl_sta_inst_e, 
+   dec_swl_std_inst_d, dec_swl_st_inst_d, dec_swl_fpop_d, 
+   dec_swl_allfp_d, dec_swl_frf_upper_d, dec_swl_frf_lower_d, 
+   dec_swl_div_inst_d, dec_swl_mul_inst_d, wsr_fixed_inst_w, 
+   ifu_exu_sethi_inst_d, dec_dcl_cctype_d, 
+   // Inputs
+   rclk, se, si, dtu_inst_d, erb_dtu_imask, swl_dec_ibe_e, 
+   dtu_inst_anull_e, lsu_ifu_ldsta_internal_e, fcl_dtu_tlzero_d, 
+   fcl_dtu_privmode_d, fcl_dtu_hprivmode_d, fcl_dtu_inst_vld_d, 
+   fcl_dtu_ely_inst_vld_d, fcl_dec_intr_vld_d, fcl_dtu_inst_vld_e, 
+   fcl_dec_dslot_s, swl_dec_mulbusy_e, swl_dec_fpbusy_e, 
+   swl_dec_divbusy_e, swl_dec_fp_enable_d
+   );
+
+   input          rclk, 
+                  se, 
+                  si;
+   input [31:0]   dtu_inst_d;	// fed in at Switch (S) stage.
+   input [38:0]   erb_dtu_imask;
+   input          swl_dec_ibe_e;
+   input          dtu_inst_anull_e;
+   input          lsu_ifu_ldsta_internal_e;
+   input          fcl_dtu_tlzero_d;
+   input          fcl_dtu_privmode_d;
+   input          fcl_dtu_hprivmode_d;
+   input          fcl_dtu_inst_vld_d,
+                  fcl_dtu_ely_inst_vld_d,
+                  fcl_dec_intr_vld_d,
+		              fcl_dtu_inst_vld_e;  // qual with this is not necessary
+
+   input          fcl_dec_dslot_s;
+
+   input          swl_dec_mulbusy_e;
+   input          swl_dec_fpbusy_e;
+   input          swl_dec_divbusy_e;
+   input          swl_dec_fp_enable_d;
+
+
+   output         so;
+   
+   // to EXU
+   output [2:0]   ifu_exu_aluop_d;// 000 - add/sub
+                                  // 001 - and
+                                  // 010 - or
+                                  // 011 - xor
+                                  // 1X0 - movcc
+                                  // 1x1 - movr
+   output         ifu_exu_invert_d;   // invert rs2 operand
+   output         ifu_exu_useimm_d;
+   output         ifu_exu_usecin_d;   // use c from icc
+   output         ifu_exu_enshift_d;  // turn on shifter
+
+   output         ifu_exu_tagop_d,     
+		              ifu_exu_tv_d,     
+		              ifu_exu_muls_d,
+                  ifu_exu_ialign_d,
+		              ifu_exu_range_check_jlret_d,		
+		              ifu_exu_range_check_other_d;
+
+   output [2:0] ifu_exu_shiftop_d;  // b2 - 32b(0) or 64b(1)
+                                    // b1 - unsigned(0)  or signed(1)
+                                    // b0 - left(0) or right(1) shift
+   
+   output [4:0] ifu_exu_muldivop_d; // b4 - is_mul
+                                    // b3 - is_div
+                                    // b2 - 64b if 1, 32b if 0
+                                    // b1 - signed if 1, unsigned if 0
+                                    // b0 - set cc's
+   
+   output       ifu_exu_wen_d;      // write to rd
+   output       ifu_exu_setcc_d;    // b0 - write to icc/xcc
+
+   output       ifu_exu_rd_ifusr_e,
+		            ifu_exu_rd_exusr_e,
+		            ifu_exu_rd_ffusr_e;
+
+   output       ifu_exu_rs1_vld_d,
+		            ifu_exu_rs2_vld_d,
+		            ifu_exu_rs3e_vld_d,
+		            ifu_exu_rs3o_vld_d;
+   
+   output       ifu_exu_use_rsr_e_l;
+   
+   output       ifu_exu_save_d, 
+		            ifu_exu_restore_d,
+		            ifu_exu_return_d,
+		            ifu_exu_flushw_e,
+		            ifu_exu_saved_e,
+		            ifu_exu_restored_e;
+
+   // to TLU
+   output       ifu_tlu_rsr_inst_d,
+		            ifu_lsu_wsr_inst_d,
+		            ifu_exu_wsr_inst_d,
+		            ifu_tlu_done_inst_d,
+		            ifu_tlu_retry_inst_d;
+   
+   // to LSU 
+   output       ifu_lsu_ld_inst_e,   // ld inst or atomic
+		            ifu_lsu_st_inst_e,   // store or atomic
+                ifu_lsu_pref_inst_e,
+		            ifu_lsu_alt_space_e, // alt space -- to be removed
+		            ifu_lsu_alt_space_d, // never x -- to be removed
+		            ifu_tlu_alt_space_d, // sometimes x but faster
+		            ifu_lsu_memref_d;    // alerts lsu of upcoming ldst
+//		            ifu_lsu_imm_asi_vld_d;
+
+   output       ifu_lsu_sign_ext_e,
+		            ifu_lsu_ldstub_e,
+		            ifu_lsu_casa_e,
+		            ifu_exu_casa_d,
+		            ifu_lsu_swap_e;
+
+   output       ifu_tlu_mb_inst_e,
+		            ifu_tlu_sir_inst_m,
+		            ifu_tlu_flsh_inst_e;
+
+   output       ifu_lsu_ldst_dbl_e,
+		            ifu_lsu_ldst_fp_e;
+   
+   output [1:0] ifu_lsu_ldst_size_e;
+   
+   // to SPU
+//   output 	ifu_spu_scpy_inst_e,
+//		ifu_spu_scmp_inst_e;
+
+   // to FFU
+   output       ifu_ffu_fpop1_d;
+   output       ifu_ffu_visop_d;
+   output       ifu_ffu_fpop2_d;
+   output       ifu_ffu_fld_d;
+   output       ifu_ffu_fst_d;
+   output       ifu_ffu_ldst_size_d;
+   
+   output       ifu_ffu_ldfsr_d,
+		            ifu_ffu_ldxfsr_d,
+		            ifu_ffu_stfsr_d;
+   output       ifu_ffu_quad_op_e;
+
+   // within IFU
+   output       dec_fcl_rdsr_sel_pc_d,
+		            dec_fcl_rdsr_sel_thr_d;
+
+   output       dec_imd_call_inst_d;
+
+   output       dtu_fcl_flush_sonly_e,
+//                dec_fcl_kill4sta_e,
+		            dtu_fcl_illinst_e,
+		            dtu_fcl_fpdis_e,
+		            dtu_fcl_privop_e,
+		            dtu_fcl_imask_hit_e,
+		            dtu_fcl_br_inst_d,
+		            dtu_fcl_sir_inst_e;
+
+   output       dtu_ifq_kill_latest_d;
+   
+   // within DTU
+   output       dec_swl_wrt_tcr_w,
+		            dec_swl_wrtfprs_w,
+		            dec_swl_ll_done_d,
+                dec_swl_br_done_d,
+		            dec_swl_rdsr_sel_thr_d,
+		            dec_swl_ld_inst_d,
+		            dec_swl_sta_inst_e,
+		            dec_swl_std_inst_d,
+		            dec_swl_st_inst_d,
+		            dec_swl_fpop_d,
+		            dec_swl_allfp_d,
+		            dec_swl_frf_upper_d,
+		            dec_swl_frf_lower_d,
+		            dec_swl_div_inst_d,
+		            dec_swl_mul_inst_d,
+		            wsr_fixed_inst_w,
+		            ifu_exu_sethi_inst_d;   // can be sethi or no-op
+
+   output [2:0] dec_dcl_cctype_d;       // 0yy - fcc(yy)
+                                        // 100 - icc
+                                        // 110 - xcc
+                                        // 1X1 - illegal inst!
+   
+   //------------------------------------------------------------
+   // Declarations
+   //------------------------------------------------------------
+   // Internal Signals
+   wire [1:0]   op;
+   wire [2:0]   op2;
+   wire [5:0]   op3;
+   wire [8:0]   opf;
+
+   wire         brsethi_inst,  // op types
+		            call_inst,
+		            arith_inst,
+		            mem_inst;
+   wire         sethi_or_nop;
+   wire [15:0]  op3_lo;        // decode op3[3:0]
+   wire [3:0]   op3_hi;        // decode op3[5:4]
+
+   wire         dbr_inst_d, 
+		            ibr_inst_d,    // jmpl or return
+		            jmpl_inst_d,
+		            retn_inst_d,
+		            sethi_inst_d;
+
+   wire         rdsr_done_d,
+                rdpr_done_d;
+   wire         dslot_d;
+   
+   wire         use_rsr_d_l;
+   
+   wire         flushw_d,
+		            saved_d,
+		            restored_d;
+   wire         save_retn_done_d;
+   wire         privop_d,
+		            privop_e,
+                hprivop_d,
+                hprivop_e,
+                valid_hp_rs_d,
+                valid_hp_rd_d,
+		            inv_reg_access_d,
+                rsvchk_fail_d,
+		            ill_inst_d,
+		            ill_inst_e;
+
+   wire         inst12_5_nonzero_d,
+                inst11_8_nonzero_d,
+                inst9_5_nonzero_d,
+                rs2_nonzero_d;
+
+   wire         state_chg_inst_d,
+		            state_chg_inst_e,
+		            flush_inst_d;
+
+   wire         cctype_sel_imov,   // select which CC's to use
+		            cctype_sel_fmov,
+		            cctype_sel_bcc,
+		            cctype_sel_bpcc;
+
+   wire         rs1_vld_d,
+		            rs2_vld_d,
+		            rs3_vld_d,
+		            rs4_vld_d;
+
+   wire [4:0]   rs1,
+		            rd;
+   
+   wire         rs1_00,           // decoded rs1
+		            rs1_01,
+		            rs1_02,
+		            rs1_05,
+		            rs1_06,
+		            rs1_07,
+		            rs1_09_0e,
+		            rs1_0f,
+                rs1_10,
+		            rs1_12,
+		            rs1_13,
+		            rs1_14_15,
+		            rs1_16_17,
+                rs1_19,
+		            rs1_1a,
+		            rs1_1b,
+		            rs1_1c_1f,
+		            rd_00,
+		            rd_01,
+		            rd_04,
+		            rd_05,
+		            rd_06,
+		            rd_07,
+		            rd_09,
+		            rd_0f,
+                rd_10,
+		            rd_11,
+		            rd_12,
+                rd_13,
+                rd_18,
+		            rd_1a,
+		            rd_1b,
+		            rd_1c_1f,
+                rd_1f,
+		            rs1_1f;
+
+   wire         fcn0;
+   
+   wire         rd_ifusr_d,
+		            rd_ifusr_e,
+                rd_ffusr_d,
+		            rd_ffusr_e,
+		            rd_exusr_d,
+		            rd_exusr_e;
+   wire         wsr_fixed_inst_e,
+		            wsr_fixed_inst_m,
+		            wsr_inst_next_e,
+		            wsr_fixed_inst_d;
+
+   wire         wrt_tcr_d,
+		            wrt_tcr_e,
+		            wrt_tcr_qual_e,
+		            wrt_tcr_m;
+	 
+   wire         wrt_fprs_d,
+		            wrt_fprs_e,
+		            wrt_fprs_qual_e,
+		            wrt_fprs_m;
+	 
+   wire         prefetch,
+                impl_prefetch,
+                illegal_prefetch,
+                noop_prefetch,
+                pref_done_d,
+                prefetch_d,
+                prefetch_e;
+
+   wire [2:0]   lstype_d,
+		            lstype_e;
+   wire [1:0]   lssize_d;
+   wire         ldst_dbl_d,
+		            ldst_fp_d,
+		            sta_inst_d,
+//                kill_for_sta_d,
+                sta_nostf_d,
+		            sta_inst_e,
+                sta_nostf_e,
+		            sext_d,
+		            ldstub_d,
+		            casa_d,
+                casa_e,
+		            swap_d;
+
+   wire         mb_mask_d,
+		            mb_inst_d,
+		            mb_inst_e;
+
+   wire         sir_inst_d,
+		            sir_inst_e,
+//		            kill_sir_d,
+		            flag_sir_d,
+		            flag_sir_e;
+   
+   wire         fpld_d,
+		            fpop1_d,
+                visop_d,
+                int_align_d,
+		            fpop2_d;
+   wire         quad_ffuop_d;
+
+   wire         allfp_d,
+		            any_fpinst_d,
+		            fpdis_trap_d,
+		            fpdis_trap_e,
+		            fcc_mov_d,
+		            fcc_branch_d;
+   
+   wire         rs2_hit,
+		            opf_hit,
+		            ibit_hit,
+		            rs1_hit,
+		            op3_hit,
+		            rd_hit,
+		            op_hit,
+		            imask_hit,
+		            imask_hit_e;
+
+   wire         clk;
+   
+
+//----------------------------------------------------------------------
+// Code Begins Here
+//----------------------------------------------------------------------
+   assign       clk = rclk;
+
+   
+   assign   op  = dtu_inst_d[31:30];
+   assign   op2 = dtu_inst_d[24:22];
+   assign   op3 = dtu_inst_d[24:19];
+   assign   opf = dtu_inst_d[13:5];
+
+   // decode op
+   assign   brsethi_inst = ~op[1] & ~op[0];
+   assign   call_inst    = ~op[1] &  op[0];
+   assign   arith_inst   =  op[1] & ~op[0];
+   assign   mem_inst     =  op[1] &  op[0];
+
+   // partial decode op2
+   assign   sethi_or_nop = op2[2] & ~op2[1] & ~op2[0];
+   
+   // decode op3
+   assign   op3_hi[0] = ~op3[5] & ~op3[4];
+   assign   op3_hi[1] = ~op3[5] &  op3[4];
+   assign   op3_hi[2] =  op3[5] & ~op3[4];
+   assign   op3_hi[3] =  op3[5] &  op3[4];
+   
+   assign   op3_lo[0]  = ~op3[3] & ~op3[2] & ~op3[1] & ~op3[0];
+   assign   op3_lo[1]  = ~op3[3] & ~op3[2] & ~op3[1] &  op3[0];
+   assign   op3_lo[2]  = ~op3[3] & ~op3[2] &  op3[1] & ~op3[0];
+   assign   op3_lo[3]  = ~op3[3] & ~op3[2] &  op3[1] &  op3[0];
+   assign   op3_lo[4]  = ~op3[3] &  op3[2] & ~op3[1] & ~op3[0];
+   assign   op3_lo[5]  = ~op3[3] &  op3[2] & ~op3[1] &  op3[0];
+   assign   op3_lo[6]  = ~op3[3] &  op3[2] &  op3[1] & ~op3[0];
+   assign   op3_lo[7]  = ~op3[3] &  op3[2] &  op3[1] &  op3[0];
+   assign   op3_lo[8]  =  op3[3] & ~op3[2] & ~op3[1] & ~op3[0];
+   assign   op3_lo[9]  =  op3[3] & ~op3[2] & ~op3[1] &  op3[0];
+   assign   op3_lo[10] =  op3[3] & ~op3[2] &  op3[1] & ~op3[0];
+   assign   op3_lo[11] =  op3[3] & ~op3[2] &  op3[1] &  op3[0];
+   assign   op3_lo[12] =  op3[3] &  op3[2] & ~op3[1] & ~op3[0];
+   assign   op3_lo[13] =  op3[3] &  op3[2] & ~op3[1] &  op3[0];
+   assign   op3_lo[14] =  op3[3] &  op3[2] &  op3[1] & ~op3[0];
+   assign   op3_lo[15] =  op3[3] &  op3[2] &  op3[1] &  op3[0];
+
+   //-------------------------
+   // Branch and Move Controls
+   //-------------------------
+   // brtype
+   assign dbr_inst_d = brsethi_inst & (op2[1] | op2[0]) |   // regular branch
+	                     call_inst;
+   assign jmpl_inst_d = arith_inst & op3_hi[3] & op3_lo[8];  // jmpl
+   assign retn_inst_d = arith_inst & op3_hi[3] & op3_lo[9];  // retn
+   assign ibr_inst_d = jmpl_inst_d | retn_inst_d;
+   assign sethi_inst_d = brsethi_inst & sethi_or_nop;
+   assign ifu_exu_sethi_inst_d = sethi_inst_d;
+   
+   assign dec_swl_br_done_d = (dbr_inst_d | jmpl_inst_d); // br compl.
+                                                  // retn has separate
+                                                  // completion signal
+   assign dtu_fcl_br_inst_d = dbr_inst_d | ibr_inst_d;
+   assign dec_imd_call_inst_d = call_inst;
+
+   // MV-BR Condition
+   assign   cctype_sel_imov  =  op[1] & ~op3[4];
+   assign   cctype_sel_fmov  =  op[1] & op3[4];
+   assign   cctype_sel_bcc  = ~op[1] &  op2[1];
+   assign   cctype_sel_bpcc = ~op[1] & ~op2[1];
+
+   mux4ds  #(3) cctype_mux(.dout  (dec_dcl_cctype_d),
+			                   .in0   ({dtu_inst_d[18], dtu_inst_d[12:11]}),
+			                   .in1   (dtu_inst_d[13:11]),
+			                   // op2[2]=1 for fp branch
+			                   .in2   ({~op2[2], 2'b00}),
+			                   .in3   ({~op2[2], dtu_inst_d[21:20]}),
+			                   .sel0  (cctype_sel_imov),
+			                   .sel1  (cctype_sel_fmov),
+			                   .sel2  (cctype_sel_bcc),
+			                   .sel3  (cctype_sel_bpcc));
+
+   //-------------
+   // ALU Controls
+   //-------------
+   // mov bit
+   assign ifu_exu_aluop_d[2] = brsethi_inst & sethi_or_nop |   // sethi
+                       	    arith_inst & op3_hi[2] & op3[3];   // mov, rd
+
+   // aluop
+   assign ifu_exu_aluop_d[1] = (arith_inst & 
+	                              ((op3_hi[3] & (op3_lo[0] |   // wr
+                                               op3_lo[2] |   // wrpr
+                                               op3_lo[3])) | // wrhpr
+				                         (~op3[5] & op3[1]))         // xor, or
+                                );
+				
+   // aluop/mov type
+   assign ifu_exu_aluop_d[0] = (arith_inst & 
+	                              ((op3_hi[3] & (op3_lo[0] |
+                                               op3_lo[2] |
+                                               op3_lo[3])) | // wr
+				                         (~op3[5] & op3[0])        | // xor, and
+				                         (op3_hi[2] & op3_lo[15]))   // movr
+                                );
+   
+   // invert rs2
+   assign ifu_exu_invert_d  = arith_inst &
+	                      (~op3[5] & op3[2]  |   // sub, andn, orn, xorn
+	                       op3_hi[2] & (op3_lo[3] | op3_lo[1])); // tag sub
+
+   assign ifu_exu_usecin_d   = arith_inst & ~op3[5] & op3[3];   // addc, subc
+
+   // tagged ops
+   assign ifu_exu_tagop_d = arith_inst & op3_hi[2] & ~op3[3] & ~op3[2] &
+	                          fcl_dtu_inst_vld_d;
+   assign ifu_exu_tv_d = ifu_exu_tagop_d & op3[1];
+   assign ifu_exu_muls_d  = arith_inst & op3_hi[2] & op3_lo[4] & 
+	                          ~swl_dec_divbusy_e & fcl_dtu_ely_inst_vld_d;
+
+   // memory for ibr and ldst address range check
+   assign ifu_exu_range_check_other_d = mem_inst & ~prefetch; // ld, st, atom
+   assign ifu_exu_range_check_jlret_d = arith_inst & op3_hi[3] &    
+                                        (op3_lo[8] | op3_lo[9]); // jmpl, retn
+   
+   //--------------
+   // SHFT Controls
+   //--------------
+   // enable shifter and choose shift output
+   // This can be simplified a great deal if MULScc could also be
+   // decoded as a shift instruction.
+   // 9/26/01: No can do! Mulscc is implemented now
+   assign ifu_exu_enshift_d = arith_inst & op3_hi[2] &
+                       	       (op3_lo[5] | op3_lo[6] | op3_lo[7]); 
+   // unsigned or signed (1 => signed)
+   assign ifu_exu_shiftop_d[0] = op3[0];
+   // left or right (1 => right)
+   assign ifu_exu_shiftop_d[1] = op3[1];
+   // shift 32b or 64b (1 => 64)
+   assign ifu_exu_shiftop_d[2] = dtu_inst_d[12];   // was sh32_64
+
+
+   //-------------------
+   // Writeback Controls
+   //-------------------
+   //  write to icc/xcc
+   assign ifu_exu_setcc_d = arith_inst & 
+	                     (op3_hi[1] & (~op3[3] | ~op3[1] & ~op3[0]) |
+	                      op3_hi[2] & (~op3[3] & ~op3[2])); // tagged op
+   //  write to rd
+   assign ifu_exu_wen_d = ((~rd_00) & brsethi_inst & sethi_or_nop | // sethi
+	                         (~rd_00) & arith_inst &  // all single cycle insts
+	                         (~op3[5] & ~op3[3]    |     // alu ops
+			                      ~op3[5] & op3_lo[8]  |     // addC
+			                      ~op3[5] & op3_lo[12] |     // subC
+			                      op3_hi[2] &                
+			                      (~op3[3] & ~op3_lo[4] |    // shft, tag, ~muls
+			                       // need to kill if rd to invalid reg
+			                       // all vld regs will retn in W stage
+			                       op3_lo[8] & ~rs1_0f | op3_lo[10]  | // rd
+                             op3_lo[9] | // rdhpr
+			                       op3_lo[12] | op3_lo[15])| // mov
+			                      op3_hi[3] &
+			                      (op3_lo[8]  |    // jmpl
+			                       op3_lo[12] |    // save
+			                       op3_lo[13] |    // restore
+                             op3_lo[6] & int_align_d)  // vis int align
+			                      )           |    
+	                         call_inst);
+   
+   //-----------------
+   // MUL/DIV Controls
+   //-----------------
+   // is mul
+   assign dec_swl_mul_inst_d = (arith_inst &
+				                        ((op3_hi[0] &
+				                          (op3_lo[9]  |      // mulx
+				                           op3_lo[10] |      // umul
+				                           op3_lo[11])) |    // smul
+				                         (op3_hi[1] &        
+				                          (op3_lo[10] |      // umulcc
+				                           op3_lo[11])))     // smulcc
+				                        );
+
+   assign ifu_exu_muldivop_d[4] = dec_swl_mul_inst_d & ~swl_dec_mulbusy_e & 
+                                  fcl_dtu_ely_inst_vld_d;
+   
+   // is div
+   assign dec_swl_div_inst_d = (arith_inst &
+				                        ((op3_hi[0] &
+				                          (op3_lo[13]  |     // udivx
+				                           op3_lo[14]  |     // udiv
+				                           op3_lo[15]))  |   // sdiv
+				                         (op3_hi[2] & 
+				                          (op3_lo[13]   |    // sdivx
+				                           op3_lo[4]))  |    // muls
+				                         (op3_hi[1] &
+				                          (op3_lo[14] |      // udivcc
+				                           op3_lo[15])))     // sdivcc
+				                        );
+
+   assign ifu_exu_muldivop_d[3] = dec_swl_div_inst_d & ~swl_dec_divbusy_e &
+				                          op3[3] & fcl_dtu_ely_inst_vld_d; // not muls
+   
+   // 64b or 32b (1 => 64b)
+   assign ifu_exu_muldivop_d[2] = ~op3[1];
+
+   // signed or unsigned (1 => signed)
+   assign ifu_exu_muldivop_d[1] = op3_hi[2] |          // sdivx
+	                                (op3[1] & op3[0]);   // smul, sdiv
+   // set cc as well?
+   assign ifu_exu_muldivop_d[0] = op3[4];
+
+   //-------------------------
+   // FP controls
+   //-------------------------
+
+   // portion of vis that is actually an int instruction
+   assign int_align_d = (~opf[8] & ~opf[7] & ~opf[6] & ~opf[5] & 
+                         opf[4] & opf[3] & ~opf[2] & ~opf[0]);
+   assign ifu_exu_ialign_d = arith_inst & op3_hi[3] & op3_lo[6] &
+                             int_align_d & fcl_dtu_inst_vld_d;
+   
+
+   assign fpop1_d = arith_inst & op3_hi[3] & op3_lo[4];
+   assign fpop2_d = arith_inst & op3_hi[3] & op3_lo[5];
+   assign visop_d = arith_inst & op3_hi[3] & op3_lo[6];
+   assign fpld_d = mem_inst & op3[5] & ~op3[3] & ~op3[2];
+
+   // FP stores don't switch out and don't block the fpu
+   assign dec_swl_fpop_d = (fpop1_d | fpop2_d | fpld_d | visop_d);
+   assign allfp_d = (fpop1_d | fpop2_d | fpld_d | visop_d |
+	                   mem_inst & op3[5] & ~op3[3] & op3[2]);
+   assign dec_swl_allfp_d = allfp_d;
+
+   assign ifu_ffu_ldfsr_d = op3_lo[1] & op3_hi[2] & ~rd[0];
+   assign ifu_ffu_ldxfsr_d = op3_lo[1] & op3_hi[2] & rd[0];	  
+   assign ifu_ffu_stfsr_d = op3_lo[5] & op3_hi[2];	  
+
+   assign ifu_ffu_fpop1_d = fpop1_d & ~swl_dec_fpbusy_e & 
+                            fcl_dtu_ely_inst_vld_d;
+   assign ifu_ffu_fpop2_d = fpop2_d & ~swl_dec_fpbusy_e & 
+                            fcl_dtu_ely_inst_vld_d;
+   assign ifu_ffu_visop_d = visop_d & ~swl_dec_fpbusy_e & 
+                            fcl_dtu_ely_inst_vld_d;
+
+   assign ifu_ffu_fld_d =  mem_inst & op3[5] & ~op3[3] & ~op3[2] & 
+			                     fcl_dtu_ely_inst_vld_d & ~swl_dec_fpbusy_e;
+   
+   assign ifu_ffu_fst_d =  mem_inst & op3[5] & ~op3[3] & op3[2] & 
+		                       fcl_dtu_ely_inst_vld_d & ~swl_dec_fpbusy_e;
+
+   // ldqf and stqf are not fpops
+   assign quad_ffuop_d = (opf[1] & opf[0] & (fpop1_d | fpop2_d) |
+	                        fpop1_d & opf[3] & opf[2] & // exc div
+	                        ~(~opf[7] & opf[6] & ~opf[5])) &
+	                         swl_dec_fp_enable_d & fcl_dtu_inst_vld_d;
+
+   dff_s #(1) qope_ff(.din  (quad_ffuop_d),
+		              .q    (ifu_ffu_quad_op_e),
+		              .clk  (clk), .se(se), .si(), .so());
+
+   // quiet traps -- flush the pipe but don't take a trap till later
+//   assign dec_swl_qtrap_d = mem_inst & op3[5] & op3_lo[6] & fcl_dtu_inst_vld_d;  // stq
+//   dff #(1) qtrpe_ff(.din  (dec_swl_qtrap_d),
+//		   .q    (dtu_fcl_qtrap_e),
+//		   .clk  (clk), .se(se), .si(), .so());
+
+   // FP Enabled check
+   assign fcc_branch_d = ~op[1] & ~op[0] & op3[5] & (op3[4] | op3[3]);
+   assign fcc_mov_d = op[1] & ~op[0] & op3_hi[2] & op3_lo[12] & 
+	                    ~dtu_inst_d[18];
+   
+   assign any_fpinst_d = allfp_d | fcc_branch_d | fcc_mov_d | 
+                         arith_inst & op3_hi[2] & op3_lo[8] & rs1_13 | // rd gsr
+                         arith_inst & op3_hi[3] & op3_lo[0] & rd_13;   // wr gsr
+   
+   assign fpdis_trap_d = any_fpinst_d & ~swl_dec_fp_enable_d & 
+                         fcl_dtu_inst_vld_d;
+   dff_s #(1) fpdise_ff(.din (fpdis_trap_d),
+		                .q   (fpdis_trap_e),
+		                .clk (clk), .se(se), .si(), .so());
+   assign dtu_fcl_fpdis_e = fpdis_trap_e;
+
+   // FRF dirty bits
+   // bit 0 is the msb for double and quad
+   assign dec_swl_frf_upper_d = rd[0] & (mem_inst & op3_lo[3] |  // fld
+					                    fpop1_d & (opf[7] | opf[6] & opf[5]) & 
+                                         (opf[3] | ~opf[2]) |
+					                    fpop1_d & ~opf[7] & opf[1] & ~opf[0] |
+					                    fpop2_d & opf[1] | 
+                              visop_d & ~opf[0]);
+   
+   assign dec_swl_frf_lower_d = ~rd[0] & (mem_inst & op3_lo[3] |  // fld
+					                    fpop1_d & (opf[7] | opf[6] & opf[5]) & 
+                                          (opf[3] | ~opf[2]) |
+					                    fpop1_d & ~opf[7] & opf[1] & ~opf[0] |
+					                    fpop2_d & opf[1] | 
+                              visop_d & ~opf[0]) |
+				                      (fpop2_d & ~opf[1] |  // all sgl ops wrt lower
+	                             fpop1_d & opf[7] & ~opf[3] & opf[2] |
+                                     //bug 6470 - fdtoi,fstoi,fqtoi
+	                             (fpop1_d & (opf[7:6]==2'b11) & (opf[3:2]==2'b00)) |
+                                     //bug6470 - end
+				                       fpop1_d & ~opf[7] & ~opf[1] & opf[0] |
+                               visop_d & opf[0] |
+				                       mem_inst & op3_lo[0] & op3[5]);
+   
+   
+   //-------------------------
+   // Special Reg R/W Controls
+   //-------------------------
+   // decode rs1
+   assign rs1 = dtu_inst_d[18:14];
+   assign rs1_00 = ~rs1[4] & ~rs1[3] & ~rs1[2] & ~rs1[1] & ~rs1[0]; // Y
+   assign rs1_01 = ~rs1[4] & ~rs1[3] & ~rs1[2] & ~rs1[1] & rs1[0]; //
+   assign rs1_02 = ~rs1[4] & ~rs1[3] & ~rs1[2] & rs1[1] & ~rs1[0];  // CCR
+   assign rs1_05 = ~rs1[4] & ~rs1[3] & rs1[2] & ~rs1[1] & rs1[0]; // PC
+   assign rs1_06 = ~rs1[4] & ~rs1[3] & rs1[2] & rs1[1] & ~rs1[0]; // fprs
+   assign rs1_07 = ~rs1[4] & ~rs1[3] & rs1[2] & rs1[1] & rs1[0]; //
+   assign rs1_0f = ~rs1[4] & rs1[3] & rs1[2] & rs1[1] & rs1[0];   // mem#
+   assign rs1_10 = rs1[4] & ~rs1[3] & ~rs1[2] & ~rs1[1] & ~rs1[0];   
+   assign rs1_12 = rs1[4] & ~rs1[3] & ~rs1[2] & rs1[1] & ~rs1[0];
+   assign rs1_13 = rs1[4] & ~rs1[3] & ~rs1[2] & rs1[1] & rs1[0];
+   assign rs1_14_15 = rs1[4] & ~rs1[3] & rs1[2] & ~rs1[1];   
+   assign rs1_16_17 = rs1[4] & ~rs1[3] & rs1[2] & rs1[1];
+   assign rs1_19 = rs1[4] & rs1[3] & ~rs1[2] & ~rs1[1] & rs1[0];
+   assign rs1_1f = rs1[4] & rs1[3] & rs1[2] & rs1[1] & rs1[0];    // 
+   assign rs1_1a = rs1[4] & rs1[3] & ~rs1[2] & rs1[1] & ~rs1[0]; // THR
+   assign rs1_1b = rs1[4] & rs1[3] & ~rs1[2] & rs1[1] & rs1[0];
+   assign rs1_1c_1f = rs1[4] & rs1[3] & rs1[2];
+   assign rs1_09_0e = ~rs1[4] & rs1[3] &          // all window mgmt regs
+	              (rs1[2] & ~rs1[1] | rs1[1] & ~rs1[0] |
+		             rs1[0] & ~rs1[2]);
+
+   // decode rd
+   assign rd = dtu_inst_d[29:25];
+   assign rd_00 = ~rd[4] & ~rd[3] & ~rd[2] & ~rd[1] & ~rd[0]; // mem#
+   assign rd_01 = ~rd[4] & ~rd[3] & ~rd[2] & ~rd[1] & rd[0]; // 
+   assign rd_04 = ~rd[4] & ~rd[3] & rd[2] & ~rd[1] & ~rd[0]; //  tick
+   assign rd_05 = ~rd[4] & ~rd[3] & rd[2] & ~rd[1] & rd[0];  //  PC
+   assign rd_06 = ~rd[4] & ~rd[3] & rd[2] & rd[1] & ~rd[0]; 
+   assign rd_07 = ~rd[4] & ~rd[3] & rd[2] & rd[1] & rd[0]; 
+   assign rd_09 = ~rd[4] & rd[3] & ~rd[2] & ~rd[1] & rd[0]; // CWP
+   assign rd_0f = ~rd[4] & rd[3] & rd[2] & rd[1] & rd[0];   // not impl
+   assign rd_10 = rd[4] & ~rd[3] & ~rd[2] & ~rd[1] & ~rd[0];   // gl
+   assign rd_11 = rd[4] & ~rd[3] & ~rd[2] & ~rd[1] & rd[0];   // pic
+   assign rd_12 = rd[4] & ~rd[3] & ~rd[2] & rd[1] & ~rd[0];   // not impl
+   assign rd_13 = rd[4] & ~rd[3] & ~rd[2] & rd[1] & rd[0];   // GSR
+   assign rd_18 = rd[4] & rd[3] & ~rd[2] & ~rd[1] & ~rd[0]; // stick   
+   assign rd_1a = rd[4] & rd[3] & ~rd[2] & rd[1] & ~rd[0]; // Thr
+   assign rd_1b = rd[4] & rd[3] & ~rd[2] & rd[1] & rd[0];
+   assign rd_1c_1f = rd[4] & rd[3] & rd[2];
+   assign rd_1f = rd[4] & rd[3] & rd[2] & rd[1] & rd[0];
+   
+   assign ifu_lsu_wsr_inst_d = arith_inst & fcl_dtu_ely_inst_vld_d & 
+	                       op3_hi[3] & (op3_lo[0] | op3_lo[2] | op3_lo[3]);
+   assign ifu_exu_wsr_inst_d = ifu_lsu_wsr_inst_d;
+
+   assign ifu_tlu_rsr_inst_d = arith_inst & fcl_dtu_ely_inst_vld_d & 
+	                       op3_hi[2] & (op3_lo[8] & ~rs1_0f | // ~membar
+                                      op3_lo[9] |           // hpr
+					                            op3_lo[10]);          // pr
+
+   assign rdsr_done_d = arith_inst & op3_hi[2] & op3_lo[8] & ~mb_inst_d;
+   assign rdpr_done_d = arith_inst & op3_hi[2] & (op3_lo[10] | op3_lo[9]);
+   
+   // all wrpr's except cwp are fixed length
+   // 
+   assign wsr_fixed_inst_d = arith_inst & op3_hi[3] & 
+                               (op3_lo[2] & ~rd_09 |  // wrpr exc. cwp 
+	                              op3_lo[0] & ~rd_1a |  // wr exc. thr
+                                op3_lo[3]);           // wrhpr
+   
+	                     
+   dff_s #(1) wre_ff(.din (wsr_fixed_inst_d),
+		             .clk (clk),
+		             .q   (wsr_fixed_inst_e),
+		             .se  (se), .si(), .so());
+
+   assign wsr_inst_next_e = (~dtu_inst_anull_e) & wsr_fixed_inst_e & 
+	                          fcl_dtu_inst_vld_e;
+   
+   dff_s #(1) wrm_ff(.din (wsr_inst_next_e),
+		             .clk (clk),
+		             .q   (wsr_fixed_inst_m),
+		             .se  (se), .si(), .so());
+
+   dff_s #(1) wrw_ff(.din (wsr_fixed_inst_m),
+		             .clk (clk),
+		             .q   (wsr_fixed_inst_w),
+		             .se  (se), .si(), .so());
+
+   assign use_rsr_d_l = ~(ifu_tlu_rsr_inst_d | ibr_inst_d | call_inst);
+   
+   dff_s #(1) rdsre_ff(.din (use_rsr_d_l),
+		               .clk (clk),
+		               .q   (ifu_exu_use_rsr_e_l),
+		               .se  (se), .si(), .so());
+
+   assign flush_inst_d = fcl_dtu_inst_vld_d & arith_inst & 
+	                       op3_hi[3] & op3_lo[11];
+
+   dff_s #(1) flsh_ff(.din (flush_inst_d),
+		              .q   (ifu_tlu_flsh_inst_e),
+		              .clk (clk),
+		              .se  (se), .si(), .so());
+   
+
+   // If machine state is changed kill the already fetched instructions
+   assign state_chg_inst_d = ifu_lsu_wsr_inst_d | flush_inst_d |
+	                           ifu_tlu_done_inst_d | ifu_tlu_retry_inst_d;
+
+   
+   dff_s #(1) schg_ff(.din (state_chg_inst_d),
+		                .clk (clk),
+		                .q   (state_chg_inst_e),
+//		               .rst (reset),
+		                .se  (se), .si(), .so());
+
+//   assign sta_inst_d = mem_inst & op3[4] & fcl_dtu_inst_vld_d & 
+//                       (~op3[5] & ~op3[3] & op3[2] |
+//			                  op3[5] & (op3_lo[4] | op3_lo[7]) |
+//			                  ~op3[5] & op3_lo[14]);
+   assign sta_inst_d = mem_inst & op3[4] & op3[2] & fcl_dtu_inst_vld_d & ~prefetch;
+
+   assign sta_nostf_d = mem_inst & op3[4] & fcl_dtu_inst_vld_d & 
+                       (~op3[5] & ~op3[3] & op3[2] |
+			                  ~op3[5] & op3_lo[14]);
+
+   dff_s #(1) sta_ff (.din (sta_inst_d),
+		              .clk (clk),
+		              .q   (sta_inst_e),
+		              .se  (se), .si(), .so());
+
+   dff_s #(1) stanf_ff (.din (sta_nostf_d),
+		                  .clk (clk),
+		                  .q   (sta_nostf_e),
+		                  .se  (se), .si(), .so());
+
+  assign dec_swl_sta_inst_e = sta_nostf_e;
+ 
+   // Mov qualification with inst_vld_e to FCL?  Done
+   assign dtu_fcl_flush_sonly_e = (sta_inst_e & lsu_ifu_ldsta_internal_e |
+                                   state_chg_inst_e);
+
+   dff_s #(1) ds_ff(.din (fcl_dec_dslot_s),
+                  .q   (dslot_d),
+                  .clk (clk), .se(se), .si(), .so());
+
+   // suppress ifetch if arch state changes in delay slot (if not 
+   // suppressed, it may cause in error in the L2 for an unmapped 
+   // address). 
+//   assign dtu_ifq_kill_latest_d = ifu_lsu_wsr_inst_d | flush_inst_d |
+//	                              ifu_tlu_done_inst_d | ifu_tlu_retry_inst_d |  
+//                                sta_inst_d;
+   assign dtu_ifq_kill_latest_d = (((arith_inst & op3_hi[3] & 
+                                     (op3_lo[2] | op3_lo[3] | op3_lo[0]) |// wr
+                                     mem_inst & op3[4] & op3[2]) & 
+                                    fcl_dtu_ely_inst_vld_d) |
+                                   fcl_dec_intr_vld_d) & dslot_d; // in DS
+
+//   assign kill_for_sta_d = mem_inst & op3[4] & op3[2] & 
+                           // ~op3[5] &  // sta - excl stf and pref
+//                           fcl_dtu_ely_inst_vld_d & dslot_d; // in DS
+   
+//   dff #(1) kfste_ff(.din (kill_for_sta_d),
+//                     .q   (kill_for_sta_e),
+//                     .clk (clk), .se(se), .si(), .so());
+//   assign dec_fcl_kill4sta_e = kill_for_sta_e;
+   
+   assign rd_ifusr_d = arith_inst & op3_hi[2] & 
+	                     (op3_lo[9] & rs1_06    |  // rdhpr - ver
+			                  op3_lo[8] & (rs1_05 |    // rdsr - pc
+				                             rs1_1a |    // rdsr - thr
+				                             rs1_06))  | // rdsr - fprs
+		                     jmpl_inst_d |                    // jmpl
+	                     call_inst;                         // call
+   
+   assign rd_exusr_d = arith_inst & op3_hi[2] &
+	        (op3_lo[10] & rs1_09_0e |         // rdpr (wind)
+			     op3_lo[8] & (rs1_00 | rs1_02));  // rdsr (y + ccr)
+
+   assign rd_ffusr_d = arith_inst & op3_hi[2] &
+			                 op3_lo[8] & rs1_13;  // rdsr (gsr)
+   
+   dff_s #(1) ifusr_ff(.din  (rd_ifusr_d),
+		               .clk  (clk),
+		               .q    (rd_ifusr_e),
+		               .se   (se), .si(), .so());
+   dff_s #(1) ffusr_ff(.din  (rd_ffusr_d),
+		               .clk  (clk),
+		               .q    (rd_ffusr_e),
+		               .se   (se), .si(), .so());
+   dff_s #(1) exusr_ff(.din  (rd_exusr_d),
+		               .clk  (clk),
+		               .q    (rd_exusr_e),
+		               .se   (se), .si(), .so());
+
+   // make sure they are exclusive
+   assign ifu_exu_rd_ifusr_e = rd_ifusr_e;
+   assign ifu_exu_rd_exusr_e = rd_exusr_e;
+   assign ifu_exu_rd_ffusr_e = rd_ffusr_e;
+
+   // choose which of IFU special regs should be read
+   // call, jmpl or rdpc (05) => pc
+   assign dec_fcl_rdsr_sel_pc_d = ~dtu_inst_d[31] | op3_hi[3] | ~rs1[1] & rs1[0];
+   // read thread conf (1a) or fprs (06)
+   assign dec_fcl_rdsr_sel_thr_d = dtu_inst_d[31] & op3_hi[2] & op3_lo[8] & ~rs1[0];
+   // else version reg (rdhpr 06)
+
+   //   dff #(1) rdfprs_ff(.din (rs1[4]),
+   //		    .q   (dec_swl_rdsr_sel_thr_e),
+   //		    .clk (clk), .se(se), .si(), .so());
+   assign dec_swl_rdsr_sel_thr_d = rs1[4];
+
+   assign wrt_tcr_d = arith_inst & fcl_dtu_inst_vld_d & 
+	                    op3_hi[3] & op3_lo[0] & rd_1a;
+
+   dff_s #(1) tcre_ff(.din  (wrt_tcr_d),
+		                .clk  (clk),
+		                .q    (wrt_tcr_e),
+		                .se   (se), .si(), .so());
+
+   assign wrt_tcr_qual_e = wrt_tcr_e & (~dtu_inst_anull_e) & 
+					                               fcl_dtu_inst_vld_e;
+
+   dff_s #(1) tcrm_ff(.din  (wrt_tcr_qual_e),
+		              .clk  (clk),
+		              .q    (wrt_tcr_m),
+		              .se   (se), .si(), .so());
+   
+   dff_s #(1) tcrw_ff(.din  (wrt_tcr_m),
+		              .clk  (clk),
+		              .q    (dec_swl_wrt_tcr_w),
+		              .se   (se), .si(), .so());
+
+   // FPRS write
+   assign wrt_fprs_d = arith_inst & fcl_dtu_inst_vld_d & 
+	                     op3_hi[3] & op3_lo[0] & rd_06;
+   dff_s #(1) fprse_ff(.din  (wrt_fprs_d),
+		               .clk  (clk),
+		               .q    (wrt_fprs_e),
+		               .se   (se), .si(), .so());
+   assign wrt_fprs_qual_e = wrt_fprs_e & (~dtu_inst_anull_e) & 
+					                                 fcl_dtu_inst_vld_e;
+   dff_s #(1) fprsm_ff(.din  (wrt_fprs_qual_e),
+		               .clk  (clk),
+		               .q    (wrt_fprs_m),
+		               .se   (se), .si(), .so());
+   dff_s #(1) fprsw_ff(.din  (wrt_fprs_m),
+		               .clk  (clk),
+		               .q    (dec_swl_wrtfprs_w),
+		               .se   (se), .si(), .so());
+
+   //----------------
+   // RF/RML Controls
+   //----------------
+   // Reg Valid Signals for doing ECC
+
+   //bug6777 - cas decode is incorrect
+   //         ~(op3_hi[3] & dtu_inst_d[22] & mem_inst); // CAS
+   assign ifu_exu_useimm_d = ~dtu_inst_d[31] | 
+	                      dtu_inst_d[13] & 
+	                     ~(op3_hi[3] & dtu_inst_d[22] & ~op3[0] & mem_inst); // CAS
+
+   // alternately try: ~ren2_d | ~23 | ~22
+   assign rs1_vld_d = (brsethi_inst & op2[1] & op2[0] |
+		                   arith_inst & (~op3[5] |
+				                             op3_hi[2] & ~op3[3] |
+				                             op3_hi[2] & op3_lo[13] |
+				                             op3_hi[2] & op3_lo[15] |
+				                             op3_hi[3] & op3_lo[0]  |
+				                             op3_hi[3] & op3_lo[2]  |
+				                             op3_hi[3] & op3_lo[3]  |
+				                             op3_hi[3] & op3_lo[5] & 
+                                     ~opf[4] & ~opf[3] & opf[2] |  // fmovr
+				                             op3_hi[3] & op3_lo[6] & int_align_d  |
+				                             op3_hi[3] & op3[3] & ~op3[2] |
+				                             op3_hi[3] & op3[3] & ~op3[1]) |
+		                   mem_inst & (~op3[5] |
+				                           ~op3[3] |
+				                           op3[4] & op3[3] & op3[2]));
+
+   assign ifu_exu_rs1_vld_d = rs1_vld_d;
+
+   assign rs2_vld_d = (arith_inst & (~op3[5] |
+				                             op3_hi[2] & (~op3[3] | op3[2]) |
+				                             op3_hi[3] & op3_lo[0]  |
+				                             op3_hi[3] & op3_lo[2]  |
+				                             op3_hi[3] & op3_lo[3]  |
+				                             op3_hi[3] & op3_lo[6] & int_align_d  |
+				                             op3_hi[3] & op3[3] & ~op3[2] |
+				                             op3_hi[3] & op3[3] & ~op3[1]) &
+		                               ~dtu_inst_d[13] |
+		                   mem_inst & (~op3[5] |
+				                           ~op3[3]) & ~dtu_inst_d[13] |
+		                   mem_inst & op3_hi[3] & (op3_lo[12] |   // cas
+					                                     op3_lo[14]));
+
+   assign ifu_exu_rs2_vld_d = rs2_vld_d;
+
+   assign rs3_vld_d = mem_inst & (~op3[5] & ~op3[3] & op3[2] | // all st
+				                          ~op3[5] & op3[3] & op3[2] | // swp,stx,ldstub
+				                          op3_hi[3] & (op3_lo[12] |   // cas
+					                                     op3_lo[14])) |
+                      arith_inst & (op3_hi[2] & (op3_lo[12] | 
+                                                 op3_lo[15]));
+   
+   // for std and stq(=stda) & ~stf
+   assign rs4_vld_d = mem_inst & ~op3[5] & op3_lo[7];
+
+   // rs3even = rs4 + rs3 & ~rd[0]
+   // rs3odd = rs4 + rs3 & rd[0]
+   // needs both to distinguish std
+   assign ifu_exu_rs3e_vld_d = rs4_vld_d | rs3_vld_d & ~rd[0];
+   assign ifu_exu_rs3o_vld_d = rs4_vld_d | rs3_vld_d & rd[0];
+
+   // RML control signals
+   assign ifu_exu_save_d = arith_inst & op3_hi[3] & op3_lo[12] & 
+                           fcl_dtu_inst_vld_d;
+   assign ifu_exu_restore_d = arith_inst & op3_hi[3] & fcl_dtu_inst_vld_d &
+              	        (op3_lo[13] |       // restore
+		                     op3_lo[9]);        // return
+   assign ifu_exu_return_d = arith_inst & op3_hi[3] & op3_lo[9] & 
+                             fcl_dtu_inst_vld_d;   
+
+   assign save_retn_done_d = arith_inst & op3_hi[3] & (op3_lo[12] | op3_lo[9]);
+   assign saved_d = arith_inst & op3_hi[3] & op3_lo[1] & ~dtu_inst_d[25] & 
+                    fcl_dtu_inst_vld_d;
+   assign restored_d = arith_inst & op3_hi[3] & op3_lo[1] & dtu_inst_d[25] & 
+                     fcl_dtu_inst_vld_d;
+   assign flushw_d = arith_inst & op3_hi[2] & op3_lo[11] & fcl_dtu_inst_vld_d;
+   
+   dff_s #(1) savd_ff(.din  (saved_d),
+		              .clk  (clk),
+		              .q    (ifu_exu_saved_e),
+		              .se   (se), .si(), .so());
+   dff_s #(1) restd_ff(.din  (restored_d),
+		               .clk  (clk),
+		               .q    (ifu_exu_restored_e),
+		               .se   (se), .si(), .so());
+   dff_s #(1) flushw_ff(.din  (flushw_d),
+		                .clk  (clk),
+		                .q    (ifu_exu_flushw_e),
+		                .se   (se), .si(), .so());
+
+   //-----------------------
+   // TLU Controls and Traps
+   //-----------------------
+   // Done/Retry
+   assign ifu_tlu_done_inst_d = arith_inst & op3_hi[3] & op3_lo[14] &
+	                        fcl_dtu_ely_inst_vld_d & 
+	                        ~dtu_inst_d[25];   // inst_d[25] = lsb of fcn
+   assign ifu_tlu_retry_inst_d = arith_inst & op3_hi[3] & op3_lo[14] & 
+	                        fcl_dtu_ely_inst_vld_d & dtu_inst_d[25];
+
+   // SIR
+   assign sir_inst_d = arith_inst & op3_hi[3] & op3_lo[0] & 
+	               rs1_00 & rd_0f & dtu_inst_d[13];
+
+//   assign kill_sir_d = sir_inst_d & ~(fcl_dtu_privmode_d | fcl_dtu_hprivmode_d);
+//bug 6484 - flag sir instrn. to tlu only in hpriv mode
+//   assign flag_sir_d = sir_inst_d & (fcl_dtu_privmode_d | fcl_dtu_hprivmode_d);
+
+   assign flag_sir_d = sir_inst_d & fcl_dtu_hprivmode_d;
+
+   dff_s #(1) sire_ff(.din (flag_sir_d),
+		              .q   (sir_inst_e),
+		              .clk (clk), .se(se), .si(), .so());
+   assign dtu_fcl_sir_inst_e = sir_inst_e;
+
+//bug 6484 - kill sir instrn. to tlu if bits other than opcode bits cause illegal instrn.
+//   assign flag_sir_e = sir_inst_e & fcl_dtu_inst_vld_e & ~dtu_inst_anull_e;
+
+   assign flag_sir_e = sir_inst_e & fcl_dtu_inst_vld_e & ~dtu_inst_anull_e & ~ill_inst_e;
+
+   dff_s #(1) sirm_ff(.din (flag_sir_e),
+		              .q   (ifu_tlu_sir_inst_m),
+		              .clk (clk), .se(se), .si(), .so());
+   
+
+   // Privileged opcode trap
+   assign fcn0 = ~rd[4] & ~rd[3] & ~rd[2] & ~rd[1];
+
+	 //op3_lo[0] & rd[4] & ~(rd_12 | rd_13 | 
+	 //rd[3] & ~rd_1a |
+   //rd_1f)) | 
+
+   assign privop_d = (op3_hi[3] & (op3_lo[14] | // done/retr
+				                           op3_lo[2] |         // wrpr
+                                   // & (~rd[4] & ~rd_0f | rd_10) |
+                                   // op3_lo[3] |         // wrhpr
+                                   // & valid_hp_rd_d |  
+				                           op3_lo[1] |  // savd,restd
+				                           op3_lo[0] & ((rd[4] & ~(rd_11 | rd_12 | rd_13 | rd_1b |
+							                                             rd[3] & rd[2])) |
+                                                rd_04)) | // wrasr
+		                                                   
+		                  op3_hi[2] & (op3_lo[10] |        // rdpr
+                                   // & (~rs1[4] | rs1_10 | rs1_1f) |
+                                   // op3_lo[9] |         // rdhpr
+                                   // & valid_hp_rs_d |
+				                           op3_lo[8] &           // rdasr & ~mem#
+				                           (rs1_10 | rs1_16_17 | // perf + sftint
+                                    rs1_19 | rs1_1a))    // stick, thrd stat
+		                  ) & arith_inst & ~(fcl_dtu_privmode_d | 
+                                         fcl_dtu_hprivmode_d);
+
+   // hp regs 0,1,3 and 5, 6 and 1f are implemented
+   // 6 is read only
+
+   assign valid_hp_rd_d = (~rd[4] & ~rd[3] & rd[0] & 
+                           (~rd[2] | ~rd[1]) |      // 1,3,5
+                           rd_00 | rd_1f );          // 0, 1f
+   assign valid_hp_rs_d = (~rs1[4] & ~rs1[3] & rs1[0] & 
+                           (~rs1[2] | ~rs1[1]) |
+                           rs1_00 | rs1_06 | rs1_1f);
+   
+   assign hprivop_d = arith_inst & ~fcl_dtu_hprivmode_d & 
+                      (op3_hi[3] & op3_lo[3] |      // wrhpr
+                       // & valid_hp_rd_d 
+                       op3_hi[2] & op3_lo[9] |      // rdhpr
+                       // & valid_hp_rs_d 
+                       // bug 6484 ----
+                       sir_inst_d |
+                       //  ----
+                       op3_hi[3] & op3_lo[0] & 
+                          (rd_04 | rd_18) & fcl_dtu_privmode_d |   // wrsr s/tick
+                       op3_hi[3] & op3_lo[2] & 
+                          rd_04 & fcl_dtu_privmode_d);             // wrpr
+          
+   dff_s #(1) prope_ff(.din (privop_d),
+		                 .q   (privop_e),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) hprope_ff(.din (hprivop_d),
+		                 .q   (hprivop_e),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign dtu_fcl_privop_e = privop_e & ~hprivop_e;
+//   assign dtu_fcl_hprivop_e = hprivop_e & ~dtu_inst_anull_e;   
+
+   // Illegal Instruction Trap
+   // when they are accessed.  
+   assign inv_reg_access_d = arith_inst & (op3_hi[2] & op3_lo[8] &    // rd asr
+					                                 (rs1_01 | rs1_07 |         // 1, 7
+                                            ~rs1[4] & rs1[3] & ~rs1_0f |  // 8-E, F=mem#
+					                                  rs1_12 | rs1_14_15 |      // 13 is gsr
+                                                                   // 14-15 are WO
+                                            rs1_1b | 
+                                            rs1_1c_1f) |
+   
+					                                 op3_hi[3] & op3_lo[0] &   // wr asr
+					                                 (rd_01 | rd_05 | rd_07 |
+                                            ~rd[4] & rd[3] & ~rd_0f | // 8-E, F is sir
+					                                  rd_12 | rd_1b | 
+                                            rd_1c_1f)
+                                           );
+
+   // reserved field checking (more done in ill_inst check below)
+   assign inst12_5_nonzero_d = (|dtu_inst_d[12:5]);
+   assign inst11_8_nonzero_d = (|dtu_inst_d[11:8]);
+   assign inst9_5_nonzero_d = (|dtu_inst_d[9:5]);
+   assign rs2_nonzero_d = (|dtu_inst_d[4:0]);
+
+   // rsv check is not complete, but most of the important 
+   // holes are covered
+   assign rsvchk_fail_d = (arith_inst & ((~op3[5] |  // arith
+                                          op3_hi[2] & op3_lo[13] | // sdivx
+                                          op3_hi[2] & ~op3[3] & ~op3[2] | // tag
+                                          op3_hi[2] & op3_lo[4] |  // mulscc
+                                          op3_hi[3] & op3_lo[11] | // flsh
+                                          op3_hi[3] & op3[3] & ~op3[1]) & // sav,jmp
+                                         ~dtu_inst_d[13] & inst12_5_nonzero_d |
+   
+                                         op3_hi[2] & (op3[3] & ~op3[2] &  // rd/rdpr
+                                                      ~(rs1_0f & 
+                                                        dtu_inst_d[13]) | // ~mem#
+                                                      op3_lo[11]) &  // flshw
+                                         (inst12_5_nonzero_d | 
+                                          rs2_nonzero_d | 
+                                          dtu_inst_d[13]) |
+   
+                                         op3_hi[3] & ~op3[3] & ~op3[2] & // wr/pr
+                                         ~rd_0f &  // ~sir
+                                         ~dtu_inst_d[13] & inst12_5_nonzero_d |
+   
+				                                 op3_hi[2] & op3_lo[12] &  // movcc
+                                         ~dtu_inst_d[13] & 
+                                         (inst9_5_nonzero_d | dtu_inst_d[10]) |
+   
+                                         op3_hi[2] & op3_lo[15] &  // movr
+                                         ~dtu_inst_d[13] & 
+                                         inst9_5_nonzero_d |
+
+                                         op3_hi[3] & op3_lo[11] &  // flsh
+                                         ~rd_00 |
+
+                                         op3_hi[2] & op3_lo[11] &  // flshw
+                                         (~rd_00 | ~rs1_00) |
+
+				                                 op3_hi[2] & 
+                                         (op3_lo[5] | op3_lo[6] | 
+                                          op3_lo[7]) & // shft
+                                         (inst11_8_nonzero_d |
+                                          dtu_inst_d[7] |
+                                          dtu_inst_d[6] |
+                                          dtu_inst_d[5] & (~dtu_inst_d[12] |
+                                                           ~dtu_inst_d[13]))
+                                         ) |
+   
+		                       mem_inst & (~op3[4] & ~dtu_inst_d[13] &  // ld/st
+                                       inst12_5_nonzero_d |
+                                       op3_hi[3] & 
+                                       (op3_lo[12] | op3_lo[14]) &  // cas
+                                       dtu_inst_d[13] & inst12_5_nonzero_d) |
+				       
+				       ((op[1] & ~op[0]) & (op3[5] & op3[4] & op3[3] & ~op3[2] & ~op3[1] & op3[0]) & |rd) // RETURN(rd != 0), bug 4490
+
+                           );
+   
+
+
+   assign ill_inst_d = (brsethi_inst & (~op2[2] & ~op2[1] & ~op2[0] |  // ill inst
+					                              op2[2] & op2[1] & op2[0] | // null
+					                              op2[1] & op2[0] &  // wrong bpr cond
+					                              (~dtu_inst_d[25] & ~dtu_inst_d[26] |
+                                         dtu_inst_d[28]) | // b28 has to be 0
+					                              ~op2[2] & ~op2[1] & op2[0] & // bpcc
+					                              dtu_inst_d[20]) |
+			                  arith_inst & (op3_hi[1] & (op3_lo[9] | op3_lo[13]) |
+				                              op3_hi[3] & (op3_lo[15] | // null
+						                                       op3_lo[7]) | // impdep2
+				                              op3_hi[3] & op3_lo[14] & 
+				                              (~fcn0 | fcl_dtu_tlzero_d |
+                                       rs2_nonzero_d | inst12_5_nonzero_d |
+                                       dtu_inst_d[13] | ~rs1_00) |  // done/retr
+				                              op3_hi[2] & op3_lo[12] & // movcc
+				                              dtu_inst_d[18] & dtu_inst_d[11] |
+				                              op3_hi[2] & op3_lo[15] &  // movr
+				                              ~dtu_inst_d[11] & ~dtu_inst_d[10] |
+				                              op3_hi[2] & op3_lo[14] | // popc
+				                              op3_hi[2] & op3_lo[9] &  //rdhpr
+                                      (~valid_hp_rs_d | rs1_01 & fcl_dtu_tlzero_d) |
+				                              op3_hi[2] & op3_lo[10] &      // rdpr
+				                              (rs1[4] & ~rs1_10 | // not gl
+                                       rs1_0f | 
+				                               ~rs1[4] & ~rs1[3] & ~rs1[2] & fcl_dtu_tlzero_d) |
+				                              op3_hi[2] & op3_lo[8] & 
+                                      (rs1_0f & (~rd_00 |
+                                                 dtu_inst_d[12] |
+                                                 dtu_inst_d[7] |
+                                                 inst11_8_nonzero_d)) | // mem#
+				                              op3_hi[3] & op3_lo[1] & 
+                                      (~fcn0 | ~rs1_00 | rs2_nonzero_d |
+                                       inst12_5_nonzero_d |
+                                       dtu_inst_d[13]) | // savd, restd
+				                              op3_hi[3] & op3_lo[10] & 
+                                      (inst11_8_nonzero_d |
+                                       dtu_inst_d[7] & 
+                                       ~(fcl_dtu_privmode_d |
+                                         fcl_dtu_hprivmode_d) |
+                                       dtu_inst_d[29] |
+                                       ~dtu_inst_d[13] & (dtu_inst_d[6] |
+                                                          dtu_inst_d[5])) |// tcc
+                                      op3_hi[3] & op3_lo[3] &   // wrhpr
+                                      (~valid_hp_rd_d | rd_01 & fcl_dtu_tlzero_d) |
+				                              op3_hi[3] & op3_lo[2] &   // wrpr
+				                              (rd[4] & ~rd_10 | rd_0f | // gl=0x10
+				                               ~rd[4] & ~rd[3] & ~rd[2] & fcl_dtu_tlzero_d) |
+				                              op3_hi[3] & op3_lo[0] &  
+				                              (rd_0f & ~(rs1_00 & dtu_inst_d[13]))  // sir
+                                      ) |
+			                  mem_inst & (~op3[5] & op3_lo[12] |
+				                            op3[5] & (op3_lo[2] | op3_lo[6]) | // stqf, ldqf
+				                            op3[5] & op3[3] & ~op3[2] |
+				                            op3_hi[2] & (op3_lo[12] | 
+						                                     op3_lo[14] | op3_lo[15]) |
+				                            op3_hi[3] & (op3_lo[1] | op3_lo[5] |
+						                                     op3_lo[15]) |    //  null
+				                            ~op3[5] & op3_lo[3] & rd[0] | // ldd w/ odd rd
+				                            op3[5] & op3_lo[1] & ~fcn0 |  // ldfsr
+				                            op3[5] & op3_lo[13] & // prefetch
+				                              illegal_prefetch |
+				                            op3[5] & op3_lo[5] & ~fcn0 | // stfsr
+				                            ~op3[5] & op3_lo[7] & rd[0])  | // std w/ odd rd
+			                  inv_reg_access_d |
+                        rsvchk_fail_d);
+   
+   dff_s #(1) illinste_ff(.din (ill_inst_d),
+		                  .q   (ill_inst_e),
+		                  .clk (clk), .se(se), .si(), .so());
+   assign dtu_fcl_illinst_e = (ill_inst_e | 
+//                               imask_hit_e |
+                               hprivop_e);
+   
+				    
+   //-------------
+   // LSU Controls
+   //-------------
+   // prefetch
+   assign prefetch    = op3_lo[13] & op3[5];
+
+   assign impl_prefetch = ~rd[4] & ~rd[3] & ~rd[2] | // 0-3
+                           rd[4] & ~rd[3];           // 10-17 (hex)
+   assign noop_prefetch = rd_04 | rd[4] & rd[3];     // 4, 18-1f
+      
+   assign illegal_prefetch = ~rd[4] & rd[2] & (rd[1] | rd[0]) | // 5-7
+                             ~rd[4] & rd[3];                    // 8-f
+   
+   assign prefetch_d = mem_inst & prefetch & impl_prefetch & 
+                       fcl_dtu_inst_vld_d;
+   assign pref_done_d = mem_inst & prefetch & noop_prefetch;
+
+   assign dec_swl_ll_done_d = (pref_done_d | rdsr_done_d | rdpr_done_d |
+                               save_retn_done_d);
+
+   // alt space
+   assign lstype_d[2] = mem_inst & op3[4] & fcl_dtu_inst_vld_d;
+   // store
+   assign lstype_d[1] = mem_inst & op3[2] & ~prefetch & fcl_dtu_inst_vld_d &
+                        ~quad_ffuop_d;
+   // load
+   assign lstype_d[0] = mem_inst & ~prefetch & fcl_dtu_inst_vld_d &
+	                      (~op3[2] | op3[3]) & (~op3_lo[14] | op3[5]) &
+                        ~quad_ffuop_d;
+
+   
+   // only the stda instruction needs to be flagged
+   assign dec_swl_std_inst_d = mem_inst & op3_hi[1] & op3_lo[7] &
+	                             fcl_dtu_inst_vld_d;
+   
+   assign sext_d = op3[3] & ~op3[2]; // load sign extension
+   assign ldstub_d = mem_inst & ~op3[5] & op3_lo[13] & fcl_dtu_inst_vld_d;
+   assign casa_d = mem_inst & op3[5] & (op3_lo[12] | op3_lo[14]) & 
+                                         fcl_dtu_inst_vld_d;
+   assign swap_d = mem_inst & op3_lo[15] & fcl_dtu_inst_vld_d;
+
+   // load inst for speculation
+   // fp loads are not speculated on
+   // ldd is not speculated on
+   assign dec_swl_ld_inst_d = mem_inst & ~op3[2] & 
+	                            ~op3[5] & ~op3_lo[3];
+
+   // store, atomic or flush inst for stb flow control
+   // TBD: no need to full decode this if timing is a problem
+   assign dec_swl_st_inst_d = mem_inst & op3[2] |   // st, pref, cas, ldstb
+                              arith_inst & op3_hi[3] & op3_lo[11];  // flsh
+   
+   // size(out) 00-b 01-h 10-w 11-x/q/d
+   assign lssize_d[0] =  (op3[1] & ~op3_lo[15]) |   // stx,ld/sth,ld/stq, 
+                                                    // ldx, ldd, std, ~swap
+			                     op3[5] & (op3_lo[1] | op3_lo[5]) & rd[0]; // ldxfsr
+   
+
+   assign lssize_d[1] =  ~(op3[1]^op3[0])      |  // ~ld/st b,h
+                          (op3[5] & op3_lo[2]) |  // ld/stq 
+                          op3_lo[14]           |  // stx, casx
+	                        op3_hi[2] & (op3_lo[1] | op3_lo[5]);  // ldst fsr
+
+   assign ifu_ffu_ldst_size_d = ~(op3[1] & op3[0]); // ldf, stf
+//   assign ifu_ffu_ldst_size_d[1] = op3[1] & op3[0];    // lddf, stdf
+   
+
+   assign ldst_fp_d   =  op3[5] & ~op3[3] & fcl_dtu_inst_vld_d & 
+                         ~swl_dec_fpbusy_e;
+
+   assign ldst_dbl_d  =  (~op3[3] & op3[1] & op3[0] | // ldd(f), std(f)
+	                        op3[5] & (op3_lo[2] | op3_lo[6])) &   // ldq, stq
+			                     fcl_dtu_inst_vld_d;
+
+   // mem bar
+   assign mb_mask_d = dtu_inst_d[1] |   // #storeload
+	                    dtu_inst_d[5] |   // #memissue
+  	                  dtu_inst_d[6];    // #sync
+   
+   assign mb_inst_d = arith_inst & op3_hi[2] & op3_lo[8] & 
+	                    rs1_0f & rd_00 & fcl_dtu_inst_vld_d & 
+	                    dtu_inst_d[13] & mb_mask_d;
+
+
+   dff_s #(3) lstype_reg(.din (lstype_d),
+		                 .clk (clk),
+		                 .q   (lstype_e),
+		                 .se  (se), .si(), .so());
+   dff_s #(2) lssize_reg(.din (lssize_d),
+		                 .clk (clk),
+		                 .q   (ifu_lsu_ldst_size_e),
+		                 .se  (se), .si(), .so());
+   dff_s #(1) lsfp_reg(.din (ldst_fp_d),
+		               .clk (clk),
+		               .q   (ifu_lsu_ldst_fp_e),
+		               .se  (se), .si(), .so());
+   dff_s #(1) lsdbl_reg(.din (ldst_dbl_d),
+		                .clk (clk),
+		                .q   (ifu_lsu_ldst_dbl_e),
+		                .se  (se), .si(), .so());
+
+   dff_s #(1) sext_reg(.din(sext_d),
+                     .clk(clk),
+                     .q(ifu_lsu_sign_ext_e),
+                     .se(se), .si(), .so());
+   dff_s #(1) casa_reg(.din(casa_d),
+                     .clk(clk),
+                     .q(casa_e),
+                     .se(se), .si(), .so());
+   dff_s #(1) ldstub_reg(.din(ldstub_d),
+                       .clk(clk),
+                       .q(ifu_lsu_ldstub_e),
+                       .se(se), .si(), .so());
+   dff_s #(1) swap_reg(.din(swap_d),
+                     .clk(clk),
+                     .q(ifu_lsu_swap_e),
+                     .se(se), .si(), .so());
+
+   dff_s #(1) pfe_ff(.din (prefetch_d),
+                   .q   (prefetch_e),
+                   .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) mb_ff(.din (mb_inst_d),
+		            .q   (mb_inst_e),
+		            .clk (clk),
+                .se(se), .si(), .so());
+
+   assign ifu_lsu_casa_e = casa_e;
+   assign ifu_exu_casa_d = casa_d;
+
+//   assign ifu_lsu_ld_inst_e = lstype_e[0] & (~dtu_inst_anull_e) & 
+//			                          ~ifu_ffu_quad_op_e & fcl_dtu_inst_vld_e;
+//   assign ifu_lsu_st_inst_e = lstype_e[1] & (~dtu_inst_anull_e) & 
+//			                          ~ifu_ffu_quad_op_e & fcl_dtu_inst_vld_e;
+
+   assign ifu_lsu_ld_inst_e = lstype_e[0];
+   assign ifu_lsu_st_inst_e = lstype_e[1];
+
+   // temporary
+//   assign ifu_lsu_alt_space_e = lstype_e[2] & ~dtu_inst_anull_e & 
+//	                              fcl_dtu_inst_vld_e;
+
+   assign ifu_lsu_alt_space_e = lstype_e[2];
+   assign ifu_lsu_alt_space_d = mem_inst & op3[4] & fcl_dtu_inst_vld_d;
+   assign ifu_tlu_alt_space_d = op3[4] & fcl_dtu_ely_inst_vld_d;
+//   assign ifu_lsu_imm_asi_vld_d = op3[4] & ~dtu_inst_d[13];
+   assign ifu_lsu_memref_d = mem_inst & fcl_dtu_ely_inst_vld_d;
+   
+   assign ifu_tlu_mb_inst_e = mb_inst_e & ~dtu_inst_anull_e & 
+	                            fcl_dtu_inst_vld_e;
+
+   assign ifu_lsu_pref_inst_e = fcl_dtu_inst_vld_e & prefetch_e &
+                                ~dtu_inst_anull_e;
+
+   //------------
+   // IMASK
+   //------------
+   assign rs2_hit = (dtu_inst_d[4:0] == erb_dtu_imask[4:0]);
+   assign opf_hit = (dtu_inst_d[12:5] == erb_dtu_imask[12:5]);
+   assign ibit_hit = (dtu_inst_d[13] == erb_dtu_imask[13]);
+   assign rs1_hit = (dtu_inst_d[18:14] == erb_dtu_imask[18:14]);
+   assign op3_hit = (dtu_inst_d[24:19] == erb_dtu_imask[24:19]);
+   assign rd_hit = (dtu_inst_d[29:25] == erb_dtu_imask[29:25]);
+   assign op_hit = (dtu_inst_d[31:30] == erb_dtu_imask[31:30]);
+
+   assign imask_hit = (rs2_hit  | ~erb_dtu_imask[32]) &
+                      (opf_hit  | ~erb_dtu_imask[33]) &
+                      (ibit_hit | ~erb_dtu_imask[34]) &
+                      (rs1_hit  | ~erb_dtu_imask[35]) &
+                      (op3_hit  | ~erb_dtu_imask[36]) &
+                      (rd_hit   | ~erb_dtu_imask[37]) &
+                      (op_hit   | ~erb_dtu_imask[38]) &
+                      (|erb_dtu_imask[38:32]) & 
+                      fcl_dtu_inst_vld_d;
+   
+   dff_s #(1) imsk_ff(.din (imask_hit),
+		              .q   (imask_hit_e),
+		              .clk (clk), .se(se), .si(), .so());
+   assign dtu_fcl_imask_hit_e = imask_hit_e & swl_dec_ibe_e;
+   
+
+   //-------------
+   // SPU Controls
+   //-------------
+
+//   assign scpy_inst_d = arith_inst & op3_hi[3] & op3_lo[7] & // use impdep 2
+//	                ~dtu_inst_d[13] & dtu_inst_d[12];    // i=0 always
+//   assign scmp_inst_d = arith_inst & op3_hi[3] & op3_lo[7] & 
+//	                ~dtu_inst_d[13] & ~dtu_inst_d[12];
+//
+//   dff #(1) scpy_ff (.din (scpy_inst_d),
+//		   .clk (clk),
+//		   .q   (scpy_inst_e),
+//		   .se  (se), .si(), .so());
+//
+//   assign ifu_spu_scpy_inst_e = scpy_inst_e & ~dtu_inst_anull_e & 
+//	                        fcl_dtu_inst_vld_e;
+//   
+//   dff #(1) scmp_ff (.din (scmp_inst_d),
+//		   .q   (scmp_inst_e),
+//		   .clk (clk), .se  (se), .si(), .so());
+//
+//   assign ifu_spu_scmp_inst_e = scmp_inst_e & ~dtu_inst_anull_e & 
+//	                        fcl_dtu_inst_vld_e;
+//
+//
+   
+endmodule // sparc_ifu_dec
+
+// Local Variables:
+// verilog-library-directories:("../rtl" ".")
+// End:
Index: /trunk/T1-CPU/ifu/sparc_ifu_errctl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_errctl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_errctl.v	(revision 6)
@@ -0,0 +1,1470 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_errctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:  sparc_ifu_errctl
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "lsu.h"
+
+module sparc_ifu_errctl(/*AUTOARG*/
+   // Outputs
+   erc_erd_pgsz_b0, erc_erd_pgsz_b1, ifu_lsu_asi_rd_unc, 
+   ifu_lsu_ldxa_tid_w2, ifu_lsu_ldxa_data_vld_w2, 
+   ifu_lsu_fwd_data_vld, ifu_lsu_error_inj, ifu_exu_ecc_mask, 
+   ifu_exu_inj_irferr, ifu_ffu_inj_frferr, ifu_exu_nceen_e, 
+   ifu_lsu_nceen, ifu_spu_nceen, erb_fcl_spu_uetrap, 
+   erb_ifq_itlberr_s1, erb_ifq_ifeterr_d1, erb_dtu_ifeterr_d1, 
+   erb_fcl_itlb_ce_d1, erb_fcl_ce_trapvec, erb_fcl_ue_trapvec, 
+   erb_fcl_ifet_uevec_d1, erc_erd_errstat_asidata, 
+   erc_erd_errinj_asidata, erc_erd_erren_asidata, 
+   erc_erd_eadr0_sel_irf_l, erc_erd_eadr0_sel_itlb_l, 
+   erc_erd_eadr0_sel_frf_l, erc_erd_eadr0_sel_lsu_l, 
+   erc_erd_asiway_s1_l, erc_erd_eadr1_sel_pcd1_l, 
+   erc_erd_eadr1_sel_l1pa_l, erc_erd_eadr1_sel_l2pa_l, 
+   erc_erd_eadr1_sel_other_l, erc_erd_eadr2_sel_mx1_l, 
+   erc_erd_eadr2_sel_wrt_l, erc_erd_eadr2_sel_mx0_l, 
+   erc_erd_eadr2_sel_old_l, erc_erd_asi_thr_l, 
+   erc_erd_asisrc_sel_icd_s_l, erc_erd_asisrc_sel_misc_s_l, 
+   erc_erd_asisrc_sel_err_s_l, erc_erd_asisrc_sel_itlb_s_l, 
+   erc_erd_errasi_sel_en_l, erc_erd_errasi_sel_stat_l, 
+   erc_erd_errasi_sel_inj_l, erc_erd_errasi_sel_addr_l, 
+   erc_erd_miscasi_sel_ict_l, erc_erd_miscasi_sel_imask_l, 
+   erc_erd_miscasi_sel_other_l, erc_erd_ld_imask, erb_reset, so, 
+   // Inputs
+   rclk, se, si, arst_l, grst_l, erd_erc_tte_pgsz, icv_itlb_valid_f, 
+   fcl_erb_ievld_s1, fcl_erb_tevld_s1, fcl_erb_immuevld_s1, 
+   fcl_erb_inst_issue_d, fcl_erb_inst_vld_d1, ifu_tlu_inst_vld_w, 
+   ifu_lsu_thrid_s, fcl_erb_asi_tid_f, ifq_fcl_asi_tid_bf, 
+   fcl_erb_clear_iferr, fcl_erb_itlbrd_vld_s, fcl_erb_itlbrd_data_s, 
+   erd_erc_tagpe_s1, erd_erc_nirpe_s1, erd_erc_fetpe_s1, 
+   erd_erc_tlbt_pe_s1, erd_erc_tlbd_pe_s1, tlu_lsu_pstate_priv, tlu_hpstate_priv,
+   lsu_ifu_dtlb_data_su, lsu_ifu_dtlb_data_ue, lsu_ifu_dtlb_tag_ue, 
+   lsu_ifu_dcache_data_perror, lsu_ifu_dcache_tag_perror, 
+   lsu_ifu_l2_unc_error, lsu_ifu_l2_corr_error, lsu_ifu_io_error, 
+   lsu_ifu_error_tid, spu_ifu_unc_err_w1, spu_ifu_mamem_err_w1, 
+   spu_ifu_corr_err_w2, spu_ifu_int_w2, spu_ifu_ttype_tid_w2, 
+   lsu_ifu_inj_ack, ffu_ifu_ecc_ce_w2, ffu_ifu_ecc_ue_w2, 
+   ffu_ifu_inj_ack, ffu_ifu_tid_w2, exu_ifu_ecc_ce_m, 
+   exu_ifu_ecc_ue_m, exu_ifu_inj_ack, ifq_erb_ue_rep, ifq_erb_ce_rep, 
+   ifq_erb_l2_ue, ifq_erb_io_ue, ifq_erb_ifet_ce, ifq_erb_l2err_tid, 
+   ifq_erb_rdtag_f, ifq_erb_rdinst_f, ifq_erb_asi_erren_i2, 
+   ifq_erb_asi_errstat_i2, ifq_erb_asi_errinj_i2, 
+   ifq_erb_asi_erraddr_i2, ifq_erb_asi_imask_i2, ifq_erb_asiwr_i2, 
+   ifq_fcl_asird_bf, ifq_erb_fwdrd_bf, ifq_erb_asidata_i2, 
+   ifq_erb_asiway_f
+   );
+
+   input        rclk, 
+                se, 
+                si,
+                arst_l,
+                grst_l;
+
+   input [2:0]  erd_erc_tte_pgsz;
+
+   input [3:0]  icv_itlb_valid_f;
+
+   input        fcl_erb_ievld_s1,
+		            fcl_erb_tevld_s1,
+		            fcl_erb_immuevld_s1;
+
+   input        fcl_erb_inst_issue_d;
+   input        fcl_erb_inst_vld_d1;
+   
+   input        ifu_tlu_inst_vld_w;
+
+   input [1:0]  ifu_lsu_thrid_s,
+		            fcl_erb_asi_tid_f,
+		            ifq_fcl_asi_tid_bf;
+   
+   input [3:0]  fcl_erb_clear_iferr;
+
+   input        fcl_erb_itlbrd_vld_s,
+		            fcl_erb_itlbrd_data_s;	
+   
+   input [3:0]  erd_erc_tagpe_s1;
+   input        erd_erc_nirpe_s1,
+		            erd_erc_fetpe_s1;
+   input [1:0]  erd_erc_tlbt_pe_s1,
+		            erd_erc_tlbd_pe_s1;
+
+   input [3:0]  tlu_lsu_pstate_priv;
+   input [3:0]  tlu_hpstate_priv;   
+
+   input        lsu_ifu_dtlb_data_su,
+		            lsu_ifu_dtlb_data_ue,
+		            lsu_ifu_dtlb_tag_ue,
+		            lsu_ifu_dcache_data_perror,
+		            lsu_ifu_dcache_tag_perror,
+		            lsu_ifu_l2_unc_error,
+		            lsu_ifu_l2_corr_error,
+		            lsu_ifu_io_error;
+   input [1:0]  lsu_ifu_error_tid;
+
+   input        spu_ifu_unc_err_w1,  // 1 cycle earlier for timing reasons
+                spu_ifu_mamem_err_w1,// 1 cycle earlier for timing reasons
+                spu_ifu_corr_err_w2,
+                spu_ifu_int_w2;
+   
+   input [1:0]  spu_ifu_ttype_tid_w2;
+
+   input [3:0]  lsu_ifu_inj_ack;
+
+   input        ffu_ifu_ecc_ce_w2;
+   input        ffu_ifu_ecc_ue_w2;
+   input        ffu_ifu_inj_ack;
+   input [1:0]  ffu_ifu_tid_w2;
+   
+   input        exu_ifu_ecc_ce_m,
+		            exu_ifu_ecc_ue_m;
+   input        exu_ifu_inj_ack;
+   
+   input        ifq_erb_ue_rep;
+   input        ifq_erb_ce_rep;
+   input        ifq_erb_l2_ue;
+   input        ifq_erb_io_ue;
+   input        ifq_erb_ifet_ce;
+   input [1:0]  ifq_erb_l2err_tid;
+
+   input        ifq_erb_rdtag_f;
+   input        ifq_erb_rdinst_f;
+   input        ifq_erb_asi_erren_i2;
+   input        ifq_erb_asi_errstat_i2;
+   input        ifq_erb_asi_errinj_i2;
+   input        ifq_erb_asi_erraddr_i2;
+   input        ifq_erb_asi_imask_i2;
+   input        ifq_erb_asiwr_i2;
+   input        ifq_fcl_asird_bf;
+   input        ifq_erb_fwdrd_bf;
+   input [31:0] ifq_erb_asidata_i2;
+   input [1:0]  ifq_erb_asiway_f;
+
+   output       erc_erd_pgsz_b0;
+   output       erc_erd_pgsz_b1;
+
+   output       ifu_lsu_asi_rd_unc;
+   output [1:0] ifu_lsu_ldxa_tid_w2;
+   output       ifu_lsu_ldxa_data_vld_w2;
+   output       ifu_lsu_fwd_data_vld;
+   output [3:0] ifu_lsu_error_inj;
+
+   output [7:0] ifu_exu_ecc_mask;
+   output       ifu_exu_inj_irferr;
+   output       ifu_ffu_inj_frferr;
+   
+   output       ifu_exu_nceen_e;
+   output [3:0] ifu_lsu_nceen;
+   output [3:0] ifu_spu_nceen;  //  copy going north
+
+   output [3:0] erb_fcl_spu_uetrap;
+
+   output       erb_ifq_itlberr_s1;
+   output       erb_ifq_ifeterr_d1;
+   output       erb_dtu_ifeterr_d1;
+   output       erb_fcl_itlb_ce_d1;
+   output [3:0] erb_fcl_ce_trapvec;
+   output [3:0] erb_fcl_ue_trapvec;   
+   output [3:0] erb_fcl_ifet_uevec_d1;
+
+   output [22:0] erc_erd_errstat_asidata;
+   output [31:0] erc_erd_errinj_asidata;
+   output [1:0]  erc_erd_erren_asidata; 
+
+   // mux selects
+   output [3:0]  erc_erd_eadr0_sel_irf_l,
+		             erc_erd_eadr0_sel_itlb_l,
+		             erc_erd_eadr0_sel_frf_l,
+		             erc_erd_eadr0_sel_lsu_l;
+
+   output [3:0]  erc_erd_asiway_s1_l;
+   
+   output [3:0]  erc_erd_eadr1_sel_pcd1_l,
+		             erc_erd_eadr1_sel_l1pa_l,
+		             erc_erd_eadr1_sel_l2pa_l,
+		             erc_erd_eadr1_sel_other_l;
+   
+   output [3:0]  erc_erd_eadr2_sel_mx1_l,
+		             erc_erd_eadr2_sel_wrt_l,
+		             erc_erd_eadr2_sel_mx0_l,
+		             erc_erd_eadr2_sel_old_l;
+
+   output [3:0]  erc_erd_asi_thr_l;
+
+   output        erc_erd_asisrc_sel_icd_s_l,  
+		             erc_erd_asisrc_sel_misc_s_l,
+		             erc_erd_asisrc_sel_err_s_l,
+		             erc_erd_asisrc_sel_itlb_s_l;
+
+   output        erc_erd_errasi_sel_en_l,
+		             erc_erd_errasi_sel_stat_l,
+		             erc_erd_errasi_sel_inj_l,
+		             erc_erd_errasi_sel_addr_l;
+
+   output        erc_erd_miscasi_sel_ict_l,
+		             erc_erd_miscasi_sel_imask_l,
+		             erc_erd_miscasi_sel_other_l;
+
+   output        erc_erd_ld_imask;
+   output        erb_reset,
+                 so;
+
+   // Local Signals
+
+   wire          spu_unc_err_w2,
+                 spu_mamem_err_w2;
+   
+   wire          lsu_dtlb_data_su,        
+		             lsu_dtlb_data_ue,        
+		             lsu_dtlb_tag_ue,         
+		             lsu_dcache_data_perror,  
+		             lsu_dcache_tag_perror,   
+		             lsu_l2_unc_error,        
+		             lsu_l2_corr_error,       
+		             lsu_io_error;            
+   wire [1:0]    lsu_error_tid;           
+   
+   wire [3:0]    valid_s1;
+
+   wire [1:0]    tid_d,
+                 ffu_tid_w3,
+                 l2ierr_tid,
+                 spu_tid_w2,
+                 asi_tid_w2,
+		             asi_tid_s1;
+   
+   wire [3:0]    thr_e,
+                 thr_d,
+		             thr_d1,
+		             thr_m,
+		             thr_w,
+                 ffu_thr_w3,
+		             asi_thr_s,
+                 asi_thr_w2,
+		             asi_thr_i2,
+		             thr_lsu_err,
+                 thr_spu_err,
+		             thr_l2ie;
+   
+   wire          itlb_feterr_s1,
+		             tlb_feterr_d1,
+                 itlb_errtr_s1_l,
+                 itlb_errtr_d1_l;
+   
+   wire          tlb_fet_ce_d1,
+		             tlb_fet_ue_d1;
+   
+   wire [3:0]    alltag_err_s1;
+   
+   wire          itlb_tagerr_s1,
+		             itlb_dataerr_s1,
+		             insterr_d1,
+                 insterr_s1,
+		             insterr_qual_d1,
+		             ictagerr_s1,
+		             ictagerr_d1,
+		             ictagerr_qual_d1;
+
+   wire          asi_daterr_d1,
+		             asi_tagerr_d1,
+		             asi_rd_err_d1;
+
+   wire          asi_ttevld_s1,
+		             asi_tdevld_s1;
+   
+   wire [3:0]    any_tlbasi_err;
+
+   wire [3:0]    dmdu,
+		             dmdu_nxt,
+		             dmsu,
+		             dmsu_nxt,
+		             dmt,
+		             dmt_nxt,
+		             ddc,
+		             ddc_nxt,
+		             dtc,
+		             dtc_nxt,
+		             ldau,
+		             ldau_nxt,
+		             ncu,
+		             ncu_nxt,
+                 mau,
+                 mau_nxt,
+		             any_lsu_err,
+		             any_lsu_ue,
+		             any_lsu_ce,
+                 any_spu_ce,
+                 any_spu_ue,
+		             imt,
+		             imt_nxt,
+		             frc,
+		             frc_nxt,
+		             irc,
+		             irc_nxt,
+		             fru,
+		             fru_nxt,
+		             iru,
+		             iru_nxt,
+		             any_rf_err,
+		             any_rf_ce,
+		             any_rf_ue,
+		             any_irf_err,
+		             any_frf_err,
+		             idc,
+		             idc_nxt,
+		             itc,
+		             itc_nxt,
+		             imdu,
+		             imdu_nxt,
+		             any_err_vld,
+		             any_ue_vld,
+//		             any_ce_vld,
+		             early_idc,
+		             early_idc_nxt,
+		             early_itc,
+		             early_itc_nxt,
+		             early_imdu,
+		             early_imdu_nxt,
+		             early_ldau,
+		             early_ldau_nxt,
+		             early_ncu,
+		             early_ncu_nxt,
+		             early_l2ce,
+		             early_l2ce_nxt,
+		             any_ifu_ce,
+		             any_ifu_ue,
+		             any_ifu_err,
+		             any_iferr_vld;
+
+   wire [3:0]    meu,
+		             meu_nxt,
+		             mec,
+		             mec_nxt,
+		             priv,
+		             priv_nxt,
+		             early_meu,
+		             early_meu_nxt,
+		             early_mec,
+		             early_mec_nxt,
+		             early_priv,
+		             early_priv_nxt;
+
+   wire [22:0]   err_stat0,
+		             err_stat1,
+		             err_stat2,
+		             err_stat3;
+
+   wire [3:0]    ifet_ce_vld,
+                 ifet_ue_vld;
+
+   wire [3:0]    l2if_unc_err,
+		             l2if_corr_err;
+
+   wire [3:0]    ce_trapvec,
+		             ue_trapvec,
+		             ifu_ce_trap;
+
+   wire          wrt_errinj_i2;
+   wire [7:0]    ecc_mask,
+		             ecc_mask_nxt;
+
+   wire [1:0]    errinj_ctl,
+		             errinj_ctl_nxt;
+   wire [5:0]    errinj_vec,
+		             errinj_vec_nxt,
+		             corr_errinj_vec;
+   
+//   wire [3:0]    icache_pa_err_d1;
+
+   wire          irf_ce_w,
+                 irf_ce_unq,
+		             irf_ue_w,
+                 irf_ue_unq;
+
+   wire [3:0]    sel_lsu_err,
+		             sel_ifuspu_err,
+		             sel_rftlb_err;
+
+   wire          clr_err_qual_e,
+                 clr_elyff_e,
+                 clr_elyff_m,
+                 clr_elyff_w;
+   
+   wire [3:0]    early_err_vec_e,
+		             clear_ely_reg_w,
+		             mov_ely_reg_w;
+
+   wire [3:0]    clear_iferr_d1;
+
+   wire [1:0]    asi_way_s1;
+   wire [3:0]    dec_asiway_s1;
+
+   wire [3:0]    asi_wrt_err_stat,
+		             asi_wrt_err_en,
+//		             asi_wrt_err_inj,
+		             asi_wrt_err_addr;
+   
+   wire          dmdu_wrt_data,
+		             dmsu_wrt_data,
+		             imdu_wrt_data,
+		             idc_wrt_data,
+		             itc_wrt_data,
+		             ddc_wrt_data,
+		             dtc_wrt_data,
+		             imt_wrt_data,
+		             dmt_wrt_data,
+		             ldau_wrt_data,
+		             ncu_wrt_data,
+                 mau_wrt_data,
+		             fru_wrt_data,
+		             frc_wrt_data,
+		             iru_wrt_data,
+		             irc_wrt_data,
+		             meu_wrt_data,
+		             mec_wrt_data,
+		             priv_wrt_data;
+
+   wire          nceen_wrt_data,
+		             ceen_wrt_data;
+
+   wire [3:0]    ceen,
+		             ceen_nxt,
+		             nceen,
+		             nceen_nxt;
+
+   wire          nceen_d;
+
+   wire          rdtag_s,
+		             rdinst_s,
+		             asi_erren_f,
+		             asi_errstat_f,
+		             asi_errinj_f,
+		             asi_erraddr_f,
+		             asi_imask_f,
+		             asi_erren_s,
+		             asi_errstat_s,
+		             asi_errinj_s,
+		             asi_erraddr_s,
+		             asi_imask_s;
+
+   wire          asird_f,
+		             asird_s;
+   wire          fwdrd_f,
+                 fwdrd_s,
+                 asifwd_rd_s,
+                 rdinst_f,
+                 fwdrd_d;
+   
+   wire          ldxa_data_vld_s,
+                 ldxa_data_vld_d;
+
+   wire          err_asi_s;
+   wire          erb_reset_l;
+
+   wire          ffu_ce_w3;
+   wire          ffu_ue_w3;   
+
+   wire [3:0] 	 any_lsu_ue_priv_state;
+   wire [3:0] 	 any_priv_state;
+   
+   wire          clk;
+   
+   
+//   
+// Code Begins Here
+//
+   assign        clk = rclk;
+   
+   // reset buffer
+   dffrl_async rstff(.din (grst_l),
+                     .q   (erb_reset_l),
+                     .clk (clk), .se(se), .si(), .so(),
+                     .rst_l (arst_l));
+
+   assign       erb_reset = ~erb_reset_l;
+
+
+   
+   // need to encode page size before sending it back
+   assign erc_erd_pgsz_b0 = (erd_erc_tte_pgsz[2] | 
+	                            erd_erc_tte_pgsz[1] | 
+                              erd_erc_tte_pgsz[0]);
+
+   assign erc_erd_pgsz_b1 = (~erd_erc_tte_pgsz[2] & 
+	                           erd_erc_tte_pgsz[1]);
+   
+
+   // Don't need this with SPARC_HPV_EN
+   // default to tte_lock_d1 = 0
+   // 05/30/03: tlb correctible errors disabled.
+   // so treat as if lock = 1 and force ue.
+//   dff #(1) lk_ff(.din (erd_erc_tte_lock_s1),
+//		              .q   (tte_lock_d1),
+//		              .clk (clk), .se(se), .si(), .so());
+
+//-----------------------
+// Basic pipeline signals
+//-----------------------
+   // thr_s1 also contains asi tid
+   dff_s #(2) tidd_reg(.din (ifu_lsu_thrid_s),
+		                 .q   (tid_d),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   assign thr_d[0] = ~tid_d[1] & ~tid_d[0];
+   assign thr_d[1] = ~tid_d[1] &  tid_d[0];
+   assign thr_d[2] =  tid_d[1] & ~tid_d[0];
+   assign thr_d[3] =  tid_d[1] &  tid_d[0];
+
+   dff_s #(4) thre_reg(.din (thr_d),
+		                 .q   (thr_e),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign thr_d1 = thr_e;
+
+   dff_s #(4) thrm_reg(.din (thr_e),
+		                 .q   (thr_m),
+		                 .clk (clk),  .se(se), .si(), .so());
+   dff_s #(4) thrw_reg(.din (thr_m),
+		                 .q   (thr_w),
+		                 .clk (clk),  .se(se), .si(), .so());
+
+
+//-----------------------------
+// lsu flops (added for timing)
+//-----------------------------
+   // all the lsu signals go to the final mux in the errdp, to help
+   // with timing.  This is no longer necessary, in fact it is no
+   // longer desired, since we have added the flop below to stage all
+   // the lsu signals.  However, the design is not changed, to save
+   // the extra effort in physical composition to rip up the errdp.
+   dff_s #(10) lspipe_reg(.din ({lsu_ifu_dtlb_data_su,        
+                               lsu_ifu_dtlb_data_ue,        
+                               lsu_ifu_dtlb_tag_ue,         
+                               lsu_ifu_dcache_data_perror,  
+                               lsu_ifu_dcache_tag_perror,   
+                               lsu_ifu_l2_unc_error,        
+                               lsu_ifu_l2_corr_error,       
+                               lsu_ifu_io_error,            
+                               lsu_ifu_error_tid[1:0]}),
+                        .q   ({lsu_dtlb_data_su,        
+                               lsu_dtlb_data_ue,        
+                               lsu_dtlb_tag_ue,         
+                               lsu_dcache_data_perror,  
+                               lsu_dcache_tag_perror,   
+                               lsu_l2_unc_error,        
+                               lsu_l2_corr_error,       
+                               lsu_io_error,            
+                               lsu_error_tid[1:0]}),
+                        .clk (clk), .se(se), .si(), .so());
+
+   assign any_priv_state = tlu_lsu_pstate_priv | tlu_hpstate_priv;
+   
+   //Bug 6821: added so that lsu ue's errors pickup the delayed priv level
+   dff_s #(4) lsu_priv_reg(.din (any_priv_state),
+                        .q   (any_lsu_ue_priv_state),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   // thread from lsu
+   assign thr_lsu_err[0] = ~lsu_error_tid[1] & ~lsu_error_tid[0];
+   assign thr_lsu_err[1] = ~lsu_error_tid[1] &  lsu_error_tid[0];
+   assign thr_lsu_err[2] =  lsu_error_tid[1] & ~lsu_error_tid[0];
+   assign thr_lsu_err[3] =  lsu_error_tid[1] &  lsu_error_tid[0];
+
+   // thread from spu
+   // From Farnad: tid is ready several cycles before everything else.
+   // In the ifu, I will assume 1 cycle before
+   dff_s #(2) sptid_reg(.din (spu_ifu_ttype_tid_w2),
+                      .q   (spu_tid_w2),
+                      .clk (clk), .se(se), .so(), .si());
+   
+   dff_s #(2) spe1_reg(.din ({spu_ifu_unc_err_w1, 
+                            spu_ifu_mamem_err_w1}),
+                      .q   ({spu_unc_err_w2,
+                             spu_mamem_err_w2}),
+                      .clk (clk), .se(se), .so(), .si());
+
+   assign thr_spu_err[0] = ~spu_tid_w2[1] & ~spu_tid_w2[0];
+   assign thr_spu_err[1] = ~spu_tid_w2[1] &  spu_tid_w2[0];
+   assign thr_spu_err[2] =  spu_tid_w2[1] & ~spu_tid_w2[0];
+   assign thr_spu_err[3] =  spu_tid_w2[1] &  spu_tid_w2[0];
+
+   // thread from ifq
+   dff_s #(2) ifqthr_reg(.din (ifq_erb_l2err_tid),
+                       .q   (l2ierr_tid),
+                       .clk (clk), .se(se), .so(), .si());
+   
+   assign thr_l2ie[0] = ~l2ierr_tid[1] & ~l2ierr_tid[0];
+   assign thr_l2ie[1] = ~l2ierr_tid[1] &  l2ierr_tid[0];
+   assign thr_l2ie[2] =  l2ierr_tid[1] & ~l2ierr_tid[0];
+   assign thr_l2ie[3] =  l2ierr_tid[1] &  l2ierr_tid[0];
+   
+   
+//---------------------------------------
+// Error Detection -- icache errors
+//---------------------------------------
+   // itlb inst fetch errors
+   assign itlb_feterr_s1 = (erd_erc_tlbd_pe_s1[0] ^ erd_erc_tlbd_pe_s1[1]) & 
+			                       fcl_erb_immuevld_s1;
+   assign erb_ifq_itlberr_s1 = itlb_feterr_s1 & nceen_d;
+   dff_s #(1) itfete_ff(.din (itlb_feterr_s1),
+		                  .q   (tlb_feterr_d1),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign itlb_errtr_s1_l = ~erb_ifq_itlberr_s1;
+   dff_s #(1) itume_ff(.din (itlb_errtr_s1_l),
+		                 .q   (itlb_errtr_d1_l),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+//   assign tlb_fet_ce_d1 = tlb_feterr_d1 & ~tte_lock_d1;
+//   assign tlb_fet_ue_d1 = tlb_feterr_d1 & tte_lock_d1;
+   assign tlb_fet_ce_d1 = 1'b0;
+   assign tlb_fet_ue_d1 = tlb_feterr_d1;
+   
+
+   // instruction errors
+//   assign insterr_s1 = (erd_erc_nirpe_s1 | erd_erc_fetpe_s1) & 
+//			                   fcl_erb_ievld_s1;
+//   dff #(1)  inserr_ff(.din (insterr_s1),
+//		                   .q   (insterr_d1),
+//		                   .clk (clk), .se(se), .si(), .so());
+
+   assign insterr_s1 = (erd_erc_fetpe_s1 | erd_erc_nirpe_s1) &
+                         fcl_erb_ievld_s1;
+   
+   dff_s #(1)  feterr_ff(.din (insterr_s1),
+		                   .q   (insterr_d1),
+		                   .clk (clk), .se(se), .si(), .so());
+//   dff #(1)  nirerr_ff(.din (erd_erc_nirpe_s1),
+//		                   .q   (nirpe_d1),
+//		                   .clk (clk), .se(se), .si(), .so());
+//   dff #(1)  ievld1_ff(.din (fcl_erb_ievld_s1),
+//		                   .q   (ievld_d1),
+//		                   .clk (clk), .se(se), .si(), .so());
+
+   assign insterr_qual_d1 = insterr_d1 & ~tlb_feterr_d1;
+
+   // tag errors
+   dff_s #(4)  vld_reg(.din (icv_itlb_valid_f),
+		                 .q   (valid_s1),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign alltag_err_s1 = erd_erc_tagpe_s1 & valid_s1;
+
+   assign ictagerr_s1 = (|alltag_err_s1[3:0]) & fcl_erb_tevld_s1;
+   dff_s #(1)  itagerr_ff(.din (ictagerr_s1),
+		                    .q   (ictagerr_d1),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   assign  ictagerr_qual_d1 = ictagerr_d1 & ~insterr_d1 & 
+	                            ~tlb_feterr_d1;
+
+   // Corrective action for IFU errors
+   // force an imiss if there is a inst/tag parity error
+   assign  erb_ifq_ifeterr_d1 = (ictagerr_d1 | insterr_d1) & itlb_errtr_d1_l;
+
+   // moved qualification with inst_vld_d1 to the dtu.
+   assign  erb_dtu_ifeterr_d1 = erb_ifq_ifeterr_d1;
+   //assign  erb_dtu_ifeterr_d1 = erb_ifq_ifeterr_d1 & fcl_erb_inst_vld_d1;
+//   assign  icache_pa_err_d1 = {4{ictagerr_d1 | insterr_d1}} & thr_d1;
+
+   // force a tlbmiss if there is a correctible tlb data parity error
+   assign  erb_fcl_itlb_ce_d1 = tlb_fet_ce_d1;
+
+   // take a precise trap if there is an uncorrectible error
+   assign  erb_fcl_ifet_uevec_d1 = ({4{tlb_fet_ue_d1 & fcl_erb_inst_vld_d1}} & 
+				                            thr_d1  | 
+				                            {4{ifq_erb_l2_ue | ifq_erb_io_ue}} & 
+				                            thr_l2ie) & nceen;
+
+   // errors in ifetch to l2 or iob
+   assign  l2if_unc_err = {4{ifq_erb_l2_ue | ifq_erb_io_ue}} & thr_l2ie;
+   assign  l2if_corr_err = {4{ifq_erb_ifet_ce}} & thr_l2ie;
+   
+   
+//-------------------------------------
+// Error Detection -- itlb asi errors
+//-------------------------------------   
+   assign  itlb_tagerr_s1 = (erd_erc_tlbt_pe_s1[0] ^ erd_erc_tlbt_pe_s1[1]) & 
+			                        asi_ttevld_s1;
+   assign  itlb_dataerr_s1 = (erd_erc_tlbd_pe_s1[0] ^ erd_erc_tlbd_pe_s1[1]) & 
+			                         asi_tdevld_s1;
+
+   dff_s #(1) itdate_ff(.din (itlb_dataerr_s1),
+		                  .q   (asi_daterr_d1),
+		                  .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) ittage_ff(.din (itlb_tagerr_s1),
+		                  .q   (asi_tagerr_d1),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign  asi_rd_err_d1 = asi_daterr_d1 | asi_tagerr_d1;
+   assign  ifu_lsu_asi_rd_unc = asi_rd_err_d1;
+   
+   assign  any_tlbasi_err = {4{asi_rd_err_d1}} & asi_thr_w2;
+
+
+//------------------------------
+// RF errors
+//------------------------------   
+   dff_s #(1) irfu_ff(.din (exu_ifu_ecc_ue_m),
+		                .q   (irf_ue_unq),
+		                .clk (clk), .se (se), .si(), .so());
+   dff_s #(1) irfc_ff(.din (exu_ifu_ecc_ce_m),
+		                .q   (irf_ce_unq),
+		                .clk (clk), .se (se), .si(), .so());
+   assign  irf_ce_w = irf_ce_unq & ifu_tlu_inst_vld_w;
+   assign  irf_ue_w = irf_ue_unq & ifu_tlu_inst_vld_w;
+   
+//------------------
+// Error Logging
+//------------------
+   // List of all logged errors
+   // itlbt  u
+   // itlbd  u/c
+   // ict  c
+   // icd  c
+   // irf  c/u
+   // frf  c/u
+   // dtlb d/t u
+   // dct  c
+   // dcd  c
+   // mau  u
+   // l2-d u
+   // l2-i u
+   // dram u -- not any more
+   // io   u
+   //
+   // Errors not logged but causing a trap
+   // l2-d c
+   // l2-i c
+   // l2-s c
+   // 
+
+   // latest errors have highest priority
+   // lsu is latest and sometimes asynchronous
+   // spu has low priority
+   // irf/frf are always "current"
+   // ifu errors are speculative ("early")
+   // All lsu errors are prioritised at the source
+   assign  dmdu_nxt =  {4{lsu_dtlb_data_ue & ~erb_reset}} & thr_lsu_err &
+		                    ~any_ue_vld |
+	                      dmdu & ~({4{dmdu_wrt_data}} & asi_wrt_err_stat);
+   // 6310
+   assign  dmsu_nxt =  {4{lsu_dtlb_data_su & ~erb_reset}} & thr_lsu_err &
+		                   ~any_ue_vld |
+	                     dmsu & ~({4{dmsu_wrt_data}} & asi_wrt_err_stat);
+   assign  dmt_nxt =  {4{lsu_dtlb_tag_ue & ~erb_reset}} & thr_lsu_err &
+		                  ~any_ue_vld |
+	                    dmt & ~({4{dmt_wrt_data}} & asi_wrt_err_stat);
+   assign  ddc_nxt =  {4{lsu_dcache_data_perror & ~erb_reset}} & thr_lsu_err &
+		                  ~any_err_vld |
+	                    ddc & ~({4{ddc_wrt_data}} & asi_wrt_err_stat);
+   assign  dtc_nxt =  {4{lsu_dcache_tag_perror & ~erb_reset}} & thr_lsu_err &
+		                  ~any_err_vld |
+	                    dtc & ~({4{dtc_wrt_data}} & asi_wrt_err_stat);
+
+   assign  ldau_nxt = (mov_ely_reg_w & early_ldau | 
+	                     {4{lsu_l2_unc_error}} & thr_lsu_err |
+	                     {4{spu_unc_err_w2}} & thr_spu_err) &
+		                    ~any_ue_vld |
+	                     ldau & ~({4{ldau_wrt_data}} & asi_wrt_err_stat);
+
+   assign  ncu_nxt = (mov_ely_reg_w & early_ncu |  
+	                    {4{lsu_io_error}} & thr_lsu_err) &
+		                   ~any_ue_vld |
+ 	                   ncu & ~({4{ncu_wrt_data}} & asi_wrt_err_stat);
+
+   assign  any_lsu_ue = thr_lsu_err & {4{lsu_dtlb_data_ue |
+                                         lsu_dtlb_data_su |
+					                               lsu_dtlb_tag_ue |
+					                               lsu_l2_unc_error |
+					                               lsu_io_error}};
+
+   assign  any_lsu_ce = thr_lsu_err & 
+	                      {4{(lsu_dcache_data_perror |
+			                      lsu_dcache_tag_perror |
+			                      lsu_l2_corr_error) &
+                           ~lsu_dtlb_data_ue & 
+                           ~lsu_dtlb_data_su}};
+
+   assign  any_lsu_err = (any_lsu_ue |
+			                    thr_lsu_err & {4{lsu_dcache_data_perror |
+					                                 lsu_dcache_tag_perror}});
+
+   // MAmem parity error
+   assign mau_nxt = {4{spu_mamem_err_w2}} & thr_spu_err & 
+                    ~any_ue_vld |
+                    mau &  ~({4{mau_wrt_data}} & asi_wrt_err_stat);
+
+   assign any_spu_ce = {4{spu_ifu_corr_err_w2 & ~spu_unc_err_w2}} & thr_spu_err;
+   assign any_spu_ue = {4{spu_unc_err_w2 |
+                          spu_mamem_err_w2}} & thr_spu_err;
+				 
+   // tlb asi read error
+   assign imt_nxt =  {4{asi_tagerr_d1 & ~erb_reset}} & asi_thr_w2 &
+		                 ~any_ue_vld |
+	                   imt & ~({4{imt_wrt_data}} & asi_wrt_err_stat);
+
+   dff_s #(2) ffu_err_reg(.din ({ffu_ifu_ecc_ce_w2,
+                               ffu_ifu_ecc_ue_w2}),
+                        .q   ({ffu_ce_w3,
+                               ffu_ue_w3}),
+                        .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(2) fptid_reg(.din (ffu_ifu_tid_w2[1:0]),
+		                  .q   (ffu_tid_w3[1:0]),
+		                  .clk (clk),  .se(se), .si(), .so());
+
+   assign ffu_thr_w3[0] = ~ffu_tid_w3[1] & ~ffu_tid_w3[0];
+   assign ffu_thr_w3[1] = ~ffu_tid_w3[1] &  ffu_tid_w3[0];
+   assign ffu_thr_w3[2] =  ffu_tid_w3[1] & ~ffu_tid_w3[0];
+   assign ffu_thr_w3[3] =  ffu_tid_w3[1] &  ffu_tid_w3[0];
+   
+   
+   // regfile error
+   //Bug6420: log frc and irc bits as well when fru and iru are detected simulatneously
+   assign frc_nxt =  {4{ffu_ce_w3 & ~erb_reset}} &    
+	              ffu_thr_w3 & ~any_err_vld & ~any_lsu_err |
+	              frc & ~({4{frc_wrt_data}} & asi_wrt_err_stat);
+   
+   assign fru_nxt =  {4{ffu_ue_w3 & ~erb_reset}} & ffu_thr_w3 &
+		            ~any_ue_vld & ~any_lsu_ue |
+	              fru & ~({4{fru_wrt_data}} & asi_wrt_err_stat);
+
+   //Bug6420
+   assign irc_nxt =  {4{irf_ce_w & ~erb_reset}} & thr_w &   
+		                ~any_err_vld & ~any_lsu_err |
+	                  irc & ~({4{irc_wrt_data}} & asi_wrt_err_stat);
+   
+   assign iru_nxt =  {4{irf_ue_w & ~erb_reset}} & thr_w &
+		               ~any_ue_vld & ~any_lsu_ue |
+	                 iru & ~({4{iru_wrt_data}} & asi_wrt_err_stat);
+
+   assign any_irf_err = thr_w & {4{irf_ce_w | irf_ue_w}};
+   assign any_frf_err = ffu_thr_w3 & {4{ffu_ce_w3 | ffu_ue_w3}};
+
+   //Bug6420
+   assign any_rf_ce = thr_w & {4{irf_ce_w}} |
+	                    ffu_thr_w3 & {4{ffu_ce_w3}};
+   assign any_rf_ue = thr_w & {4{irf_ue_w}} |
+	                    ffu_thr_w3 & {4{ffu_ue_w3}};
+   assign any_rf_err = any_irf_err | any_frf_err;
+
+
+   // ifu errors
+   assign idc_nxt =  mov_ely_reg_w & ~any_err_vld & early_idc |
+	              idc & ~({4{idc_wrt_data}} & asi_wrt_err_stat);
+   
+   assign itc_nxt =  mov_ely_reg_w & ~any_err_vld & early_itc | 
+	             itc & ~({4{itc_wrt_data}} & asi_wrt_err_stat);
+
+   // bug 6310
+   assign imdu_nxt =  (mov_ely_reg_w & early_imdu |
+		                   {4{asi_daterr_d1 & ~erb_reset}} & asi_thr_w2) &
+                        ~any_ue_vld |
+	                    imdu & ~({4{imdu_wrt_data}} & asi_wrt_err_stat);
+   
+//   assign imdc_nxt =  mov_ely_reg_w & ~any_err_vld & early_imdc |
+//	                    imdc & ~({4{imdc_wrt_data}} & asi_wrt_err_stat);
+   
+   dff_s #(64) errvec_reg(.din ({imt_nxt, imdu_nxt, idc_nxt, itc_nxt, 
+			                         iru_nxt, irc_nxt, fru_nxt, frc_nxt,
+  	                           dmt_nxt, dmdu_nxt, dmsu_nxt, ddc_nxt, dtc_nxt, 
+			                         ldau_nxt, ncu_nxt, mau_nxt}),
+		                    .q   ({imt, imdu, idc, itc, 
+			                         iru, irc, fru, frc,
+  	                           dmt, dmdu, dmsu, ddc, dtc, 
+			                         ldau, ncu, mau}),
+		                    .clk (clk),
+		                    .se  (se), .si(), .so());
+		      
+   assign any_err_vld = imt | imdu | idc | itc | iru | irc | fru | frc |
+  	                    dmt | dmdu | dmsu | ddc | dtc | ldau | ncu | mau;
+
+   assign any_ue_vld =  imt | imdu | iru | fru |
+  	                    dmt | dmdu | dmsu | ldau | ncu | mau;
+   
+//   assign any_ce_vld =  imdc | idc | itc | irc | frc |
+//  	                dmdc | ddc | dtc;
+
+   // IFU errors
+   assign any_ifu_ue = {4{(ifq_erb_l2_ue | ifq_erb_io_ue) & ~erb_reset}} & 
+	                     thr_l2ie |
+	                     {4{tlb_fet_ue_d1 & ~erb_reset}} & thr_d1;
+   
+   assign any_ifu_ce = {4{ifq_erb_ifet_ce & ~erb_reset}} & thr_l2ie |
+	                     {4{(tlb_fet_ce_d1 | insterr_qual_d1 | 
+			                     ictagerr_qual_d1) & ~erb_reset}} & 
+                       thr_d1;
+
+   assign any_ifu_err = any_ifu_ce | any_ifu_ue;
+   
+
+   assign ifet_ce_vld = early_idc | early_itc | early_l2ce;
+   assign ifet_ue_vld = early_imdu | early_ldau | early_ncu;
+
+   // l2ce's are not logged in sparc, so leave them out
+   assign any_iferr_vld = ifet_ue_vld | early_idc | early_itc;   
+
+   // Early errors
+   assign early_idc_nxt = {4{insterr_qual_d1}} & thr_d1 &
+	                  ~any_iferr_vld & ~any_rf_err & ~any_lsu_err |
+	                  early_idc & ~clear_iferr_d1 &
+	                  ~mov_ely_reg_w;
+   
+   assign early_itc_nxt =  {4{ictagerr_qual_d1}} & thr_d1 &
+		                ~any_iferr_vld & ~any_rf_err & ~any_lsu_err |
+	                  early_itc & ~clear_iferr_d1 &
+	                  ~mov_ely_reg_w;	  
+
+   assign early_imdu_nxt =  {4{tlb_fet_ue_d1}} & thr_d1 &
+		                 ~ifet_ue_vld & ~any_rf_ue & ~any_lsu_ue |
+	                   early_imdu & ~clear_iferr_d1 &
+	                   ~mov_ely_reg_w;
+	  
+//   assign early_imdc_nxt =  {4{tlb_fet_ce_d1}} & thr_d1 &
+//		                 ~any_iferr_vld & ~any_rf_err & ~any_lsu_err |
+//	                   early_imdc & ~clear_iferr_d1 &
+//	                   ~mov_ely_reg_w;
+
+   assign early_ldau_nxt = {4{ifq_erb_l2_ue & ~erb_reset}} & thr_l2ie & 
+	                   ~ifet_ue_vld & ~any_rf_ue & ~any_lsu_ue |
+	                   early_ldau & ~clear_iferr_d1 &
+	                   ~mov_ely_reg_w;
+   
+   assign early_ncu_nxt = {4{ifq_erb_io_ue & ~erb_reset}} & thr_l2ie & 
+	                   ~ifet_ue_vld & ~any_rf_ue & ~any_lsu_ue |
+	                   early_ncu & ~clear_iferr_d1 &
+	                   ~mov_ely_reg_w;
+
+   assign early_l2ce_nxt =  {4{ifq_erb_ifet_ce}} & thr_l2ie &
+		                 ~any_iferr_vld & ~any_rf_err & ~any_lsu_err |
+	                   early_l2ce & ~clear_iferr_d1 &
+	                   ~mov_ely_reg_w;
+   
+   dffr_s #(24) elyerr_reg(.din ({early_idc_nxt, 
+			                          early_itc_nxt, 
+			                          early_imdu_nxt,
+			                          early_ldau_nxt,
+			                          early_ncu_nxt,
+			                          early_l2ce_nxt}),
+		                     .q   ({early_idc,
+			                          early_itc,
+			                          early_imdu,
+			                          early_ldau,
+			                          early_ncu,
+			                          early_l2ce}),
+		                     .clk (clk),
+		                     .rst (erb_reset),
+		                     .se  (se), .si(), .so());
+
+   // Multipl errors
+   assign meu_nxt = any_ue_vld & (any_lsu_ue | any_rf_ue | any_tlbasi_err |
+                                  any_spu_ue |
+		                              mov_ely_reg_w & ifet_ue_vld) |
+// known bug - wontfix                            
+//                                    mov_ely_reg_w & early_meu |
+                    meu & ~({4{meu_wrt_data}} & asi_wrt_err_stat);
+
+   assign mec_nxt = any_err_vld & (any_lsu_ce | any_rf_ce |
+		                               mov_ely_reg_w & ifet_ce_vld) |
+// known bug - wontfix                     
+//                     mov_ely_reg_w & early_mec |
+	                  mec & ~({4{mec_wrt_data}} & asi_wrt_err_stat);
+
+   //Bug6821
+   assign priv_nxt = 	  ~any_err_vld & (any_lsu_ue_priv_state & any_lsu_ue | 
+					  any_priv_state & (any_lsu_ce | any_rf_err | any_tlbasi_err) |
+					  mov_ely_reg_w & early_priv) |
+			  priv & ~({4{priv_wrt_data}} & asi_wrt_err_stat);
+   
+   dffr_s #(12) me_reg(.din ({meu_nxt, 
+			                      mec_nxt, 
+			                      priv_nxt}),
+		                 .q   ({meu,
+			                      mec,
+			                      priv}),
+		                 .clk (clk),
+		                 .rst (erb_reset),
+		                 .se  (se), .si(), .so());
+
+   // Early multiple errors
+   assign early_meu_nxt = any_ifu_ue & ifet_ue_vld | 
+	                  early_meu & ~clear_iferr_d1 & ~mov_ely_reg_w;
+
+   assign early_mec_nxt = any_ifu_ce & any_iferr_vld |
+	                  early_mec & ~clear_iferr_d1 & ~mov_ely_reg_w;
+
+   // bug 6155 & 6821
+   assign early_priv_nxt = any_priv_state & ~any_iferr_vld & ~any_rf_err & ~any_lsu_err & any_ifu_err |
+	                   early_priv & ~clear_iferr_d1 & ~mov_ely_reg_w;
+   
+   dffr_s #(12) elyme_reg(.din ({early_meu_nxt, 
+			                         early_mec_nxt, 
+			                         early_priv_nxt}),
+		                    .q   ({early_meu,
+			                         early_mec,
+			                         early_priv}),
+		                    .clk (clk),
+		                    .rst (erb_reset),
+		                    .se  (se), .si(), .so());
+
+   // pipeline progress
+   dff_s #(1) clre_ff(.din (fcl_erb_inst_issue_d),
+                    .q   (clr_elyff_e),
+                    .clk (clk), .se(se), .si(), .so());
+   assign early_err_vec_e = (any_iferr_vld | early_l2ce) & thr_e;
+   assign clr_err_qual_e = (|early_err_vec_e[3:0]) & clr_elyff_e;
+   
+   dff_s #(1) clrm_ff(.din (clr_err_qual_e),
+                    .q   (clr_elyff_m),
+                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) clrw_ff(.din (clr_elyff_m),
+                    .q   (clr_elyff_w),
+                    .clk (clk), .se(se), .si(), .so());
+
+   // fix for 6142 and 6159
+   // delay err reg clear by one cycle to prevent clearing your own errors
+   dff_s #(4) clree_reg(.din (fcl_erb_clear_iferr),
+                      .q   (clear_iferr_d1),
+                      .clk (clk), .se(se), .si(), .so());
+   
+
+   assign clear_ely_reg_w = {4{clr_elyff_w}} & thr_w & 
+	                          (any_iferr_vld | early_l2ce);  // why again?
+   assign mov_ely_reg_w = clear_ely_reg_w & 
+                          {4{ifu_tlu_inst_vld_w & ~erb_reset}};
+
+
+   // asi error status output
+   assign err_stat0 = {meu[0], mec[0], priv[0],
+		                   3'b100,                    // rw, enc, ma
+		                   imdu[0], imt[0], 
+		                   dmdu[0], dmt[0],
+		                   idc[0], itc[0], ddc[0], dtc[0],
+		                   irc[0], iru[0], frc[0], fru[0],
+		                   ldau[0], ncu[0],
+                       dmsu[0], 1'b0, mau[0]};
+   
+   assign err_stat1 = {meu[1], mec[1], priv[1],
+		                   3'b100,
+		                   imdu[1], imt[1], 
+		                   dmdu[1], dmt[1],
+		                   idc[1], itc[1], ddc[1], dtc[1],
+		                   irc[1], iru[1], frc[1], fru[1],
+		                   ldau[1], ncu[1],
+                       dmsu[1], 1'b0, mau[1]};
+   
+   assign err_stat2 = {meu[2], mec[2], priv[2],
+		                   3'b100,
+		                   imdu[2], imt[2], 
+		                   dmdu[2], dmt[2],
+		                   idc[2], itc[2], ddc[2], dtc[2],
+		                   irc[2], iru[2], frc[2], fru[2],
+		                   ldau[2], ncu[2],
+                       dmsu[2], 1'b0, mau[2]};
+   
+   assign err_stat3 = {meu[3], mec[3], priv[3],
+		                   3'b100,
+		                   imdu[3], imt[3], 
+		                   dmdu[3], dmt[3],
+		                   idc[3], itc[3], ddc[3], dtc[3],
+		                   irc[3], iru[3], frc[3], fru[3],
+		                   ldau[3], ncu[3],
+                       dmsu[3], 1'b0, mau[3]};
+
+   mux4ds #(23) err_stat_asi(.dout (erc_erd_errstat_asidata),
+			                       .in0  (err_stat0),
+			                       .in1  (err_stat1),
+			                       .in2  (err_stat2),
+			                       .in3  (err_stat3),
+			                       .sel0 (asi_thr_s[0]),
+			                       .sel1 (asi_thr_s[1]),
+			                       .sel2 (asi_thr_s[2]),
+			                       .sel3 (asi_thr_s[3]));
+
+//----------------------------------
+// Error Address Selection   
+//----------------------------------
+
+   // TBD: Uncorrectible errors have to overwrite correctible errors
+   // mux 0
+   // FRF errors are mutex with everything else
+   // ITLB asi errors are mutex with everything else
+   // ASI writes are mutex with everything else
+   // only one of these errors could occur at a given time
+   assign  erc_erd_eadr0_sel_lsu_l = ~(sel_lsu_err);
+
+   assign  erc_erd_eadr0_sel_irf_l =  ~(~sel_lsu_err & any_irf_err);
+
+   assign  erc_erd_eadr0_sel_itlb_l = ~(~sel_lsu_err & ~any_irf_err & 
+					                              any_tlbasi_err);
+
+   assign  erc_erd_eadr0_sel_frf_l = ~(~sel_lsu_err & ~any_irf_err & 
+				                               ~any_tlbasi_err);
+
+   // mux 1
+   // l1 pa and tlb feterr can be simultaneous
+   // TBD: need to reorder and make spu lower priority?
+   assign  erc_erd_eadr1_sel_other_l = ~(any_spu_ue);
+   
+   assign  erc_erd_eadr1_sel_l2pa_l = ~(~any_spu_ue & 
+                                        (l2if_unc_err | l2if_corr_err));
+
+   assign  erc_erd_eadr1_sel_pcd1_l = ~(~l2if_unc_err & ~l2if_corr_err & 
+					                              ~any_spu_ue & 
+                                        thr_d1 & {4{tlb_feterr_d1}});
+
+   assign  erc_erd_eadr1_sel_l1pa_l = ~(~l2if_unc_err & ~l2if_corr_err & 
+					                              ({4{~tlb_feterr_d1}} | ~thr_d1) & 
+                                        ~any_spu_ue);
+
+//   assign  erc_erd_eadr1_sel_other_l = ~(~l2if_unc_err & ~l2if_corr_err & 
+//					                           {4{~tlb_feterr_d1}} & ~icache_pa_err_d1);
+
+
+   // mux2
+   assign sel_lsu_err = ~any_err_vld & any_lsu_err |
+	                      ~any_ue_vld & any_lsu_ue;
+
+   assign sel_ifuspu_err = (~any_err_vld & ~any_iferr_vld & any_ifu_err |
+                            ~any_ue_vld & any_spu_ue |
+			                      ~any_ue_vld & ~ifet_ue_vld & any_ifu_ue);
+
+   assign sel_rftlb_err = ~any_ue_vld & (any_rf_ue |
+					                               any_tlbasi_err) |
+			                    ~any_err_vld & any_rf_ce;
+   
+   
+   assign  erc_erd_eadr2_sel_wrt_l = ~(asi_wrt_err_addr);
+
+   assign  erc_erd_eadr2_sel_mx0_l = ~(~asi_wrt_err_addr & 
+				                               (sel_lsu_err | 
+					                              sel_rftlb_err));
+   
+   assign  erc_erd_eadr2_sel_mx1_l = ~(~sel_lsu_err & 
+				                               ~asi_wrt_err_addr &
+				                               ~sel_rftlb_err &
+				                               sel_ifuspu_err);
+   
+   assign  erc_erd_eadr2_sel_old_l = ~(~sel_lsu_err & 
+				                               ~asi_wrt_err_addr &
+				                               ~sel_rftlb_err &
+				                               ~sel_ifuspu_err);
+   
+//-----------------------------
+// Error Enable Reg
+//-----------------------------
+   assign nceen_nxt = asi_wrt_err_en & {4{nceen_wrt_data}} |
+	                    ~asi_wrt_err_en & nceen;
+   
+   assign ceen_nxt = asi_wrt_err_en & {4{ceen_wrt_data}} |
+	                   ~asi_wrt_err_en & ceen;
+
+   dffr_s #(8) err_en_reg(.din ({nceen_nxt, ceen_nxt}),
+		                    .q   ({nceen, ceen}),
+		                    .rst (erb_reset),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   assign nceen_d = (thr_d[0] & nceen[0] |
+                     thr_d[1] & nceen[1] |
+                     thr_d[2] & nceen[2] |
+                     thr_d[3] & nceen[3]);
+   
+   dff_s #(1) nce_ff(.din (nceen_d),
+                   .q   (ifu_exu_nceen_e),
+                   .clk (clk), .se(se), .si(), .so());
+
+   assign ifu_lsu_nceen = nceen;
+   assign ifu_spu_nceen = nceen;
+
+   assign ifu_ce_trap = mov_ely_reg_w & ifet_ce_vld;
+   assign ce_trapvec =  (ifu_ce_trap |
+			                   any_rf_ce |
+			                   {4{ifq_erb_ce_rep}} & thr_l2ie |
+                         any_spu_ce |
+			                   any_lsu_ce) & ceen;
+
+   dff_s #(4) ceint_reg(.din (ce_trapvec),
+		                  .q   (erb_fcl_ce_trapvec),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign ue_trapvec =  ({4{ifq_erb_ue_rep}} & thr_l2ie |
+                         any_spu_ue & {4{spu_ifu_int_w2}}) & nceen;
+
+   assign erb_fcl_spu_uetrap = any_spu_ue & nceen;
+   
+   dff_s #(4) ueint_reg(.din (ue_trapvec),
+		                  .q   (erb_fcl_ue_trapvec),
+		                  .clk (clk), .se(se), .si(), .so());
+
+
+   mux4ds #(2) err_en_asi(.dout (erc_erd_erren_asidata),
+			                    .in0  ({nceen[0], ceen[0]}),
+			                    .in1  ({nceen[1], ceen[1]}),
+			                    .in2  ({nceen[2], ceen[2]}),
+			                    .in3  ({nceen[3], ceen[3]}),
+			                    .sel0 (asi_thr_s[0]),
+			                    .sel1 (asi_thr_s[1]),
+			                    .sel2 (asi_thr_s[2]),
+			                    .sel3 (asi_thr_s[3]));
+
+//-------------------------
+// Error Inject
+//-------------------------
+   assign wrt_errinj_i2 = (ifq_erb_asi_errinj_i2 & ifq_erb_asiwr_i2);
+   assign ecc_mask_nxt =  wrt_errinj_i2 ?  ifq_erb_asidata_i2[7:0] :
+                                           ecc_mask[7:0];
+   
+   assign errinj_ctl_nxt[1:0] = wrt_errinj_i2 ? ifq_erb_asidata_i2[31:30] :
+	                                              errinj_ctl[1:0];
+
+   // correct for single shot
+   assign errinj_vec_nxt[5:0] = wrt_errinj_i2 ? ifq_erb_asidata_i2[29:24] :
+	                              errinj_ctl[0] ? corr_errinj_vec :
+	                                              errinj_vec;
+
+   dffr_s #(16) errinj_reg(.din ({errinj_ctl_nxt,
+			                          errinj_vec_nxt,
+			                          ecc_mask_nxt}),
+		                     .q   ({errinj_ctl,
+			                          errinj_vec,
+			                          ecc_mask}),
+		                     .rst (erb_reset),
+		                     .clk (clk), .se (se), .si(), .so());
+   
+   assign ifu_exu_ecc_mask = ecc_mask;
+   assign ifu_exu_inj_irferr = errinj_vec[1] & errinj_ctl[1];
+   assign ifu_ffu_inj_frferr = errinj_vec[0] & errinj_ctl[1];
+   assign ifu_lsu_error_inj[3:0] = errinj_vec[5:2] & {4{errinj_ctl[1]}};
+
+   assign corr_errinj_vec[5:0] = errinj_vec[5:0] & ~{lsu_ifu_inj_ack[3:0], 
+						                                         exu_ifu_inj_ack,
+						                                         ffu_ifu_inj_ack};
+   assign erc_erd_errinj_asidata = {errinj_ctl,
+				                            errinj_vec,
+				                            16'b0,
+				                            ecc_mask};
+   
+//--------------------------
+//  ASI Stuff
+//--------------------------
+
+   dff_s #(2) asiways_reg(.din (ifq_erb_asiway_f),
+		                    .q   (asi_way_s1),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   assign dec_asiway_s1[0] = ~asi_way_s1[1] & ~asi_way_s1[0];
+   assign dec_asiway_s1[1] = ~asi_way_s1[1] &  asi_way_s1[0];
+   assign dec_asiway_s1[2] =  asi_way_s1[1] & ~asi_way_s1[0];
+   assign dec_asiway_s1[3] =  asi_way_s1[1] &  asi_way_s1[0];
+   
+   assign erc_erd_asiway_s1_l = ~dec_asiway_s1;
+   
+   assign asi_thr_i2[0] = ~ifq_fcl_asi_tid_bf[1] & ~ifq_fcl_asi_tid_bf[0];
+   assign asi_thr_i2[1] = ~ifq_fcl_asi_tid_bf[1] &  ifq_fcl_asi_tid_bf[0];
+   assign asi_thr_i2[2] =  ifq_fcl_asi_tid_bf[1] & ~ifq_fcl_asi_tid_bf[0];
+   assign asi_thr_i2[3] =  ifq_fcl_asi_tid_bf[1] &  ifq_fcl_asi_tid_bf[0];
+
+   dff_s #(2) asi_tids_reg(.din (fcl_erb_asi_tid_f),
+			                    .q   (asi_tid_s1),
+			                    .clk (clk), .se(se), .si(), .so());
+   dff_s #(2) asi_tidw2_reg(.din (asi_tid_s1),
+			                    .q   (asi_tid_w2),
+			                    .clk (clk), .se(se), .si(), .so());
+   assign ifu_lsu_ldxa_tid_w2 = asi_tid_w2;
+
+   assign erc_erd_asi_thr_l = ~asi_thr_s;
+
+   assign asi_thr_s[0] = ~asi_tid_s1[1] & ~asi_tid_s1[0];
+   assign asi_thr_s[1] = ~asi_tid_s1[1] &  asi_tid_s1[0];
+   assign asi_thr_s[2] =  asi_tid_s1[1] & ~asi_tid_s1[0];
+   assign asi_thr_s[3] =  asi_tid_s1[1] &  asi_tid_s1[0];
+
+   assign asi_thr_w2[0] = ~asi_tid_w2[1] & ~asi_tid_w2[0];
+   assign asi_thr_w2[1] = ~asi_tid_w2[1] &  asi_tid_w2[0];
+   assign asi_thr_w2[2] =  asi_tid_w2[1] & ~asi_tid_w2[0];
+   assign asi_thr_w2[3] =  asi_tid_w2[1] &  asi_tid_w2[0];
+
+   
+   // F stage flops
+   dff_s #(1) asi_en_ff(.din (ifq_erb_asi_erren_i2),
+		                  .q   (asi_erren_f),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_stat_ff(.din (ifq_erb_asi_errstat_i2),
+		                    .q   (asi_errstat_f),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_addr_ff(.din (ifq_erb_asi_erraddr_i2),
+		                    .q   (asi_erraddr_f),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_inj_ff(.din (ifq_erb_asi_errinj_i2),
+		                   .q   (asi_errinj_f),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_im_ff(.din (ifq_erb_asi_imask_i2),
+		                  .q   (asi_imask_f),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   // S stage Flops
+   dff_s #(1) asi_ens_ff(.din (asi_erren_f),
+		                   .q   (asi_erren_s),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_stats_ff(.din (asi_errstat_f),
+		                     .q   (asi_errstat_s),
+		                     .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_addrs_ff(.din (asi_erraddr_f),
+		                     .q   (asi_erraddr_s),
+		                     .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_injs_ff(.din (asi_errinj_f),
+		                    .q   (asi_errinj_s),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) asi_ims_ff(.din (asi_imask_f),
+		                   .q   (asi_imask_s),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   // ASI Reads
+   // All ASI reads except TLB
+   dff_s #(1) asi_rdf_ff(.din (ifq_fcl_asird_bf),
+		                   .q   (asird_f),
+		                   .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) asi_rds_ff(.din (asird_f),
+		                   .q   (asird_s),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   // fwd reads
+   dff_s #(1) fwd_rdf_ff(.din (ifq_erb_fwdrd_bf),
+		                   .q   (fwdrd_f),
+		                   .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) fwd_rds_ff(.din (fwdrd_f),
+		                   .q   (fwdrd_s),
+		                   .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) fwd_rdd_ff(.din (fwdrd_s),
+		                   .q   (fwdrd_d),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   assign ifu_lsu_fwd_data_vld = fwdrd_d;
+   assign asifwd_rd_s = asird_s | fwdrd_s;
+
+   
+   // asi reads from icache
+   dff_s #(1) ic_rdts_ff(.din (ifq_erb_rdtag_f),
+		                   .q   (rdtag_s),
+		                   .clk (clk), .se(se), .si(), .so());
+
+   // forward requests also read instruction memory
+   assign rdinst_f = fwdrd_f | ifq_erb_rdinst_f;
+   
+   dff_s #(1) ic_rdds_ff(.din (rdinst_f),
+		                   .q   (rdinst_s),
+		                   .clk (clk), .se(se), .si(), .so());
+
+
+//   assign rst_tri_en = 1'b0;
+
+   // pick err asi source
+   assign erc_erd_errasi_sel_en_l   = ~asi_erren_s;
+   assign erc_erd_errasi_sel_stat_l = ~asi_errstat_s | asi_erren_s;
+   assign erc_erd_errasi_sel_inj_l  = ~asi_errinj_s | asi_errstat_s | 
+                                       asi_erren_s;
+   assign erc_erd_errasi_sel_addr_l =  asi_erren_s | asi_errstat_s | 
+	                                     asi_errinj_s;	
+
+   assign err_asi_s = (asi_erren_s | asi_errstat_s | asi_errinj_s | 
+	                     asi_erraddr_s);
+
+   // pick other asi source
+   assign erc_erd_miscasi_sel_ict_l = ~rdtag_s;
+   assign erc_erd_miscasi_sel_imask_l = ~asi_imask_s | rdtag_s;
+   assign erc_erd_miscasi_sel_other_l = rdtag_s | asi_imask_s;
+
+   // pick source for final asi loads
+   assign erc_erd_asisrc_sel_icd_s_l = ~(asifwd_rd_s & rdinst_s);
+   assign erc_erd_asisrc_sel_err_s_l = ~(asifwd_rd_s & ~rdinst_s & err_asi_s);
+   assign erc_erd_asisrc_sel_misc_s_l = ~(asifwd_rd_s & ~rdinst_s & ~err_asi_s);
+   assign erc_erd_asisrc_sel_itlb_s_l = ~(~asifwd_rd_s);
+
+   // is this asi read valid (for checking parity)
+   assign asi_ttevld_s1 = fcl_erb_itlbrd_vld_s & ~fcl_erb_itlbrd_data_s;
+   assign asi_tdevld_s1 = fcl_erb_itlbrd_vld_s & fcl_erb_itlbrd_data_s;
+
+   assign ldxa_data_vld_s = fcl_erb_itlbrd_vld_s | asird_s;
+//   assign ifu_lsu_ldxa_data_vld_w1 = ldxa_data_vld_s;
+   
+   dff_s #(1) asirdd_ff(.din (ldxa_data_vld_s),
+                      .q   (ldxa_data_vld_d),
+                      .clk (clk),
+                      .se  (se), .si(), .so());
+   
+   assign ifu_lsu_ldxa_data_vld_w2 = ldxa_data_vld_d;
+   
+
+   // ASI Writes
+   assign asi_wrt_err_en = asi_thr_i2 & {4{ifq_erb_asiwr_i2 &
+					                                 ifq_erb_asi_erren_i2}};
+   assign asi_wrt_err_stat = asi_thr_i2 & {4{ifq_erb_asiwr_i2 &
+					                                   ifq_erb_asi_errstat_i2}};
+   // err inj is common to the core
+//   assign asi_wrt_err_inj = asi_thr_i2 & {4{ifq_erb_asiwr_i2 &
+//					                                  ifq_erb_asi_errinj_i2}};
+   assign asi_wrt_err_addr = asi_thr_i2 & {4{ifq_erb_asiwr_i2 &
+					                                   ifq_erb_asi_erraddr_i2}};   
+
+   assign erc_erd_ld_imask = ifq_erb_asiwr_i2 & ifq_erb_asi_imask_i2;
+
+
+   // ASI Write Data
+   assign  meu_wrt_data  = ifq_erb_asidata_i2[31];
+   assign  mec_wrt_data  = ifq_erb_asidata_i2[30];
+   assign  priv_wrt_data = ifq_erb_asidata_i2[29];
+
+   assign  imdu_wrt_data = ifq_erb_asidata_i2[25];
+   assign  imt_wrt_data  = ifq_erb_asidata_i2[24];
+   assign  dmdu_wrt_data = ifq_erb_asidata_i2[23];
+   assign  dmt_wrt_data  = ifq_erb_asidata_i2[22];
+   assign  idc_wrt_data  = ifq_erb_asidata_i2[21];
+   assign  itc_wrt_data  = ifq_erb_asidata_i2[20];
+   assign  ddc_wrt_data  = ifq_erb_asidata_i2[19];
+   assign  dtc_wrt_data  = ifq_erb_asidata_i2[18];
+   assign  irc_wrt_data  = ifq_erb_asidata_i2[17];
+   assign  iru_wrt_data  = ifq_erb_asidata_i2[16];
+   assign  frc_wrt_data  = ifq_erb_asidata_i2[15];
+   assign  fru_wrt_data  = ifq_erb_asidata_i2[14];
+   assign  ldau_wrt_data = ifq_erb_asidata_i2[13];
+   assign  ncu_wrt_data  = ifq_erb_asidata_i2[12];  
+   assign  dmsu_wrt_data = ifq_erb_asidata_i2[11];
+   assign  mau_wrt_data  = ifq_erb_asidata_i2[9];
+  
+   assign nceen_wrt_data = ifq_erb_asidata_i2[1];  
+   assign ceen_wrt_data  = ifq_erb_asidata_i2[0];  
+
+   //
+   sink s0(.in (ifq_erb_asidata_i2[8]));
+   sink s1(.in (ifq_erb_asidata_i2[10]));
+   
+endmodule
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_lfsr5.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_lfsr5.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_lfsr5.v	(revision 6)
@@ -0,0 +1,87 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_lfsr5.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_lfsr5
+//  Description:	
+//  The IFQ is the icache input queue.  This communicates between the
+//  IFU and the outside world.  It handles icache misses and
+//  invalidate requests from the crossbar.  
+*/
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_lfsr5 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   advance, clk, se, si, so, reset
+   );
+
+   input 	advance;
+   
+   input 	clk, se, si, so, reset;
+
+   output [1:0] out;
+
+   reg [4:0]    q_next;
+   wire [4:0]   q;
+   
+
+/*
+   always @ (posedge clk)
+     begin
+	out = $random;
+     end // always @ posedge
+ */
+
+//   always @ (posedge clk)
+//     begin
+//	q[4:0] <= q_next[4:0];
+//     end
+
+   always @ (/*AUTOSENSE*/advance or q or reset)
+     begin
+	      if (reset)
+	        q_next = 5'b11111;
+	      else if (advance)
+	        begin
+	           // lfsr -- stable at 000000, period of 63
+	           q_next[1] = q[0];
+	           q_next[2] = q[1];
+	           q_next[3] = q[2];
+	           q_next[4] = q[3];
+	           q_next[0] = q[1] ^ q[4];
+	        end
+	      else
+	        q_next = q;
+     end // always @ (...
+
+   assign out = {q[0], q[2]};
+
+   dff_s #(5) lfsr_reg(.din  (q_next),
+                     .q    (q),
+                     .clk  (clk), .se(se), .si(), .so());
+   
+endmodule // sparc_ifu_lfsr5
+
+		
+	       
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_wseldp.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_wseldp.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_wseldp.v	(revision 6)
@@ -0,0 +1,176 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_wseldp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_wsel
+//  Description:	
+//     Way selects removed from icache and done here 
+*/
+
+module sparc_ifu_wseldp (/*AUTOARG*/
+   // Outputs
+   wsel_fdp_fetdata_s1, wsel_fdp_topdata_s1, wsel_mbist_icache_data, 
+   so, 
+   // Inputs
+   rclk, se, si, icd_wsel_fetdata_s1, icd_wsel_topdata_s1, 
+   itlb_wsel_waysel_s1, ifq_erb_asiway_f
+   );
+
+   input          rclk, 
+                  se,
+                  si;
+
+   input  [135:0] icd_wsel_fetdata_s1,
+                  icd_wsel_topdata_s1;
+
+   input [3:0]    itlb_wsel_waysel_s1;
+   input [1:0]    ifq_erb_asiway_f;
+
+   output [33:0]  wsel_fdp_fetdata_s1;
+   output [33:0]  wsel_fdp_topdata_s1;
+
+   output [67:0]  wsel_mbist_icache_data;
+
+   output         so;
+   
+   // local signals
+   wire [3:0]     dec_asiway_s_l,
+                  waysel_buf_s1;
+   wire [1:0]     asiway_s;
+   
+   wire [33:0]    rdc_fetdata_s1,
+                  rdc_topdata_s1,
+                  erb_asidata_s,                  
+                  asi_topdata_s;
+   wire           clk;
+   
+   //
+   // Code begins here
+   //
+   //------------------
+   // Control Portion
+   //------------------
+
+   assign   clk = rclk;
+   
+   // flop and decode waysel
+   dff_s #(2) asiway_reg(.din (ifq_erb_asiway_f),
+                       .q   (asiway_s),
+                       .clk (clk), .se(se), .si(), .so());
+   assign   dec_asiway_s_l[0] = ~(~asiway_s[1] & ~asiway_s[0]);
+   assign   dec_asiway_s_l[1] = ~(~asiway_s[1] &  asiway_s[0]);
+   assign   dec_asiway_s_l[2] = ~( asiway_s[1] & ~asiway_s[0]);
+   assign   dec_asiway_s_l[3] = ~( asiway_s[1] &  asiway_s[0]);
+
+   //--------------------------
+   // Datapath Section
+   //--------------------------
+
+   // buffer wayselect from itlb
+   // align these buffers with the corresponding pins in itlb
+   assign   waysel_buf_s1 = itlb_wsel_waysel_s1;
+
+   // Very Timing Critical Wayselect Muxes
+   // !!Cannot be a one-hot mux!!
+   // use ao2222
+//   bw_u1_ao2222_2x #(34) fetway_mx(.z   (rdc_fetdata_s1[33:0]),
+//                                   .a2  (icd_wsel_fetdata_s1[33:0]),
+//                                   .b2  (icd_wsel_fetdata_s1[67:34]),
+//                                   .c2  (icd_wsel_fetdata_s1[101:68]),
+//                                   .d2  (icd_wsel_fetdata_s1[135:102]),
+//                                   .a1  (waysel_buf_s1[0]),
+//                                   .b1  (waysel_buf_s1[1]),
+//                                   .c1  (waysel_buf_s1[2]),
+//                                   .d1  (waysel_buf_s1[3]));
+
+//   bw_u1_ao2222_2x #(34) topway_mx(.z   (rdc_topdata_s1[33:0]),
+//                                   .a2  (icd_wsel_topdata_s1[33:0]),
+//                                   .b2  (icd_wsel_topdata_s1[67:34]),
+//                                   .c2  (icd_wsel_topdata_s1[101:68]),
+//                                   .d2  (icd_wsel_topdata_s1[135:102]),
+//                                   .a1  (waysel_buf_s1[0]),
+//                                   .b1  (waysel_buf_s1[1]),
+//                                   .c1  (waysel_buf_s1[2]),
+//                                   .d1  (waysel_buf_s1[3]));
+
+   assign rdc_fetdata_s1 = icd_wsel_fetdata_s1[33:0] & {34{waysel_buf_s1[0]}} |
+                         icd_wsel_fetdata_s1[67:34] & {34{waysel_buf_s1[1]}}  |
+                         icd_wsel_fetdata_s1[101:68] & {34{waysel_buf_s1[2]}} |
+                         icd_wsel_fetdata_s1[135:102] & {34{waysel_buf_s1[3]}};
+   
+   assign rdc_topdata_s1 = icd_wsel_topdata_s1[33:0] & {34{waysel_buf_s1[0]}} |
+                         icd_wsel_topdata_s1[67:34] & {34{waysel_buf_s1[1]}}  |
+                         icd_wsel_topdata_s1[101:68] & {34{waysel_buf_s1[2]}} |
+                         icd_wsel_topdata_s1[135:102] & {34{waysel_buf_s1[3]}};
+
+   // buffer and send to fdp
+   assign   wsel_fdp_fetdata_s1 = rdc_fetdata_s1;
+   assign   wsel_fdp_topdata_s1 = rdc_topdata_s1;
+   
+   // mux for asi data, not critical
+   dp_mux4ds #(34) asid_mx(.dout (erb_asidata_s[33:0]),
+                           .in0  (icd_wsel_fetdata_s1[33:0]),
+                           .in1  (icd_wsel_fetdata_s1[67:34]),
+                           .in2  (icd_wsel_fetdata_s1[101:68]),
+                           .in3  (icd_wsel_fetdata_s1[135:102]),
+                           .sel0_l (dec_asiway_s_l[0]),
+                           .sel1_l (dec_asiway_s_l[1]),
+                           .sel2_l (dec_asiway_s_l[2]),
+                           .sel3_l (dec_asiway_s_l[3]));
+
+   dp_mux4ds #(34) asitop_mx(.dout (asi_topdata_s[33:0]),
+                           .in0  (icd_wsel_topdata_s1[33:0]),
+                           .in1  (icd_wsel_topdata_s1[67:34]),
+                           .in2  (icd_wsel_topdata_s1[101:68]),
+                           .in3  (icd_wsel_topdata_s1[135:102]),
+                           .sel0_l (dec_asiway_s_l[0]),
+                           .sel1_l (dec_asiway_s_l[1]),
+                           .sel2_l (dec_asiway_s_l[2]),
+                           .sel3_l (dec_asiway_s_l[3]));
+
+   // buffer before sending to bist/errdp
+   assign wsel_mbist_icache_data = {asi_topdata_s[33:32], 
+                                    erb_asidata_s[33:32], 
+                                    asi_topdata_s[31:0],
+                                    erb_asidata_s[31:0]};
+
+// Everything below can be ignored for physical implementation
+// monitor for waysel -- moved here from itlb
+// Keeping this around for 0-in. cmp level check is in icache_mutex_mon.v
+
+`ifdef DEFINE_0IN
+   always @ (negedge clk)
+     begin
+        if (!((waysel_buf_s1 == 4'b0001) ||
+              (waysel_buf_s1 == 4'b0010) || 
+              (waysel_buf_s1 == 4'b0100) ||
+              (waysel_buf_s1 == 4'b1000) || 
+              (waysel_buf_s1 == 4'b0000)))
+          begin
+             // 0in <fire -message "FATAL ERROR: icache waysel not mutex"
+             //$error("IC_WAYSEL", "FATAL ERROR: icache waysel not mutex %b",
+             //       waysel_buf_s1);
+          end
+     end // always @ (negedge clk)
+`endif
+ 
+endmodule // sparc_ifu_wseldp
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_invctl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_invctl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_invctl.v	(revision 6)
@@ -0,0 +1,597 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_invctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_invctl
+//  Description:	
+//  Control logic for handling invalidations to the icache
+//
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "iop.h"
+`include "ifu.h"
+
+module sparc_ifu_invctl(/*AUTOARG*/
+   // Outputs
+   so, inv_ifc_inv_pending, ifq_icv_wrindex_bf, ifq_icv_wren_bf, 
+   ifq_ict_dec_wrway_bf, ifq_fcl_invreq_bf, ifq_erb_asiway_f, 
+   // Inputs
+   rclk, se, si, const_cpuid, mbist_icache_write, 
+   lsu_ifu_ld_icache_index, lsu_ifu_ld_pcxpkt_vld, 
+   lsu_ifu_ld_pcxpkt_tid, ifc_inv_ifqadv_i2, ifc_inv_asireq_i2, 
+   ifq_icd_index_bf, ifd_inv_ifqop_i2, ifd_inv_wrway_i2
+   );
+
+   input        rclk, 
+                se, 
+                si;
+   
+   
+   input [2:0]  const_cpuid;
+   input        mbist_icache_write;
+
+   input [`IC_IDX_HI:5]   lsu_ifu_ld_icache_index;
+   input                  lsu_ifu_ld_pcxpkt_vld;
+   input [1:0]            lsu_ifu_ld_pcxpkt_tid;
+   
+   input                  ifc_inv_ifqadv_i2;
+   input                  ifc_inv_asireq_i2;
+   
+   input [`IC_IDX_HI:5]   ifq_icd_index_bf;
+   input [`CPX_WIDTH-1:0] ifd_inv_ifqop_i2;
+   input [1:0]            ifd_inv_wrway_i2;
+   
+
+   output                 so;
+   
+   output                 inv_ifc_inv_pending;
+   
+   output [`IC_IDX_HI:5]  ifq_icv_wrindex_bf;
+   output [15:0]          ifq_icv_wren_bf;
+   output [3:0]           ifq_ict_dec_wrway_bf;
+   output                 ifq_fcl_invreq_bf;
+   output [1:0]           ifq_erb_asiway_f;
+   
+   
+//----------------------------------------------------------------------
+//  Local Signals
+//----------------------------------------------------------------------
+
+   wire [3:0]  cpu_sel,
+               invcpu21_sel_i2;
+   wire        invcpu0_sel_i2;
+   
+   wire [1:0]  inv_vec0,
+		           inv_vec1;
+   wire [1:0]  inv_way0_p1_i2,
+		           inv_way0_p0_i2,
+		           inv_way1_p1_i2,
+		           inv_way1_p0_i2,
+               invwd0_way_i2,
+               invwd1_way_i2,
+               inv0_way_i2,
+               inv1_way_i2;
+
+   wire [1:0]  asi_way_f;
+
+   wire        word0_inv_i2,
+               word1_inv_i2;
+
+   wire        ldinv_i2,
+               ldpkt_i2,
+               evpkt_i2,
+               stpkt_i2,
+               strmack_i2,
+               imissrtn_i2;
+
+   wire        invreq_i2,
+               invalidate_i2,
+               invalidate_f;
+
+   wire        invall_i2,
+               invpa5_i2;
+
+   wire [1:0]  cpxthrid_i2;
+   wire [3:0]  dcpxthr_i2;
+
+   wire [1:0]  ldinv_way_i2;
+   wire [1:0]  w0_way_i2,
+               w1_way_i2,
+               w0_way_f,
+               w1_way_f;
+
+   wire        pick_wr;
+   wire        icv_wrreq_i2;
+ 
+   wire [3:0]  wrt_en_wd_i2,
+               wrt_en_wd_bf,
+               wrt_en_wd_f;
+
+   wire [3:0]  w0_dec_way_i2,
+               w1_dec_way_i2;
+
+   wire [3:0]  dec_wrway;
+   
+   wire        icvidx_sel_wr_i2,
+               icvidx_sel_ld_i2,
+               icvidx_sel_inv_i2;
+
+   wire [15:0] wren_i2;
+
+   
+   wire [`IC_IDX_HI:6] inv_addr_i2;
+   wire [`IC_IDX_HI:5] icaddr_i2;
+
+   wire                missaddr5_i2;
+   wire                missaddr6_i2;
+   
+
+   wire [3:0]          ldthr,
+                       ldidx_sel_new;
+   
+   wire [`IC_IDX_HI:5] ldinv_addr_i2,
+                       ldindex0,
+                       ldindex1,
+                       ldindex2,
+                       ldindex3,
+                       ldindex0_nxt,
+                       ldindex1_nxt,
+                       ldindex2_nxt,
+                       ldindex3_nxt;
+
+   wire                clk;
+   
+   
+//
+// Code Begins Here
+//
+   assign              clk = rclk;
+   
+   //----------------------------------------------------------------------
+   // Extract Invalidate Packet For This Core
+   //----------------------------------------------------------------------
+
+   // mux the invalidate vector down to get this processors inv vector
+
+   // First ecode cpu id
+   assign cpu_sel[0] = ~const_cpuid[2] & ~const_cpuid[1];
+   assign cpu_sel[1] = ~const_cpuid[2] &  const_cpuid[1];
+   assign cpu_sel[2] =  const_cpuid[2] & ~const_cpuid[1];
+   assign cpu_sel[3] =  const_cpuid[2] &  const_cpuid[1];
+
+   // 4:1 follwed by 2:1 to get 8:1, to get invalidate way selects
+   assign invcpu21_sel_i2 = cpu_sel;
+   assign invcpu0_sel_i2 = const_cpuid[0];
+   
+   // First do word 0 for even processors
+   mux4ds #(1)  v0p0_mux(.dout  (inv_vec0[0]),
+			                   .in0   (ifd_inv_ifqop_i2[1]),
+			                   .in1   (ifd_inv_ifqop_i2[9]),
+			                   .in2   (ifd_inv_ifqop_i2[17]),
+			                   .in3   (ifd_inv_ifqop_i2[25]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+
+   mux4ds #(2)  w0p0_mux(.dout (inv_way0_p0_i2[1:0]),
+			                   .in0  (ifd_inv_ifqop_i2[3:2]),
+			                   .in1  (ifd_inv_ifqop_i2[11:10]),
+			                   .in2  (ifd_inv_ifqop_i2[19:18]),
+			                   .in3  (ifd_inv_ifqop_i2[27:26]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+
+   // word 0 for odd processors
+   mux4ds #(1)  v0p1_mux(.dout  (inv_vec0[1]),
+			                   .in0   (ifd_inv_ifqop_i2[5]),
+			                   .in1   (ifd_inv_ifqop_i2[13]),
+			                   .in2   (ifd_inv_ifqop_i2[21]),
+			                   .in3   (ifd_inv_ifqop_i2[29]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+
+   mux4ds #(2)  w0p1_mux(.dout (inv_way0_p1_i2[1:0]),
+			                   .in0  (ifd_inv_ifqop_i2[7:6]),
+			                   .in1  (ifd_inv_ifqop_i2[15:14]),
+			                   .in2  (ifd_inv_ifqop_i2[23:22]),
+			                   .in3  (ifd_inv_ifqop_i2[31:30]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+   
+
+   // Word 1
+   // word 1 for even processors
+   mux4ds #(1)  v1p0_mux(.dout  (inv_vec1[0]),
+			                   .in0   (ifd_inv_ifqop_i2[57]),
+			                   .in1   (ifd_inv_ifqop_i2[65]),
+			                   .in2   (ifd_inv_ifqop_i2[73]),
+			                   .in3   (ifd_inv_ifqop_i2[81]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+
+   mux4ds #(2)  w1p0_mux(.dout (inv_way1_p0_i2[1:0]),
+			                   .in0  (ifd_inv_ifqop_i2[59:58]),
+			                   .in1  (ifd_inv_ifqop_i2[67:66]),
+			                   .in2  (ifd_inv_ifqop_i2[75:74]),
+			                   .in3  (ifd_inv_ifqop_i2[83:82]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+
+   // word 1 for odd processors
+   mux4ds #(1)  inv_v1p1_mux(.dout  (inv_vec1[1]),
+			                       .in0   (ifd_inv_ifqop_i2[61]),
+			                       .in1   (ifd_inv_ifqop_i2[69]),
+			                       .in2   (ifd_inv_ifqop_i2[77]),
+			                       .in3   (ifd_inv_ifqop_i2[85]),
+			                       .sel0 (invcpu21_sel_i2[0]),
+			                       .sel1 (invcpu21_sel_i2[1]),
+			                       .sel2 (invcpu21_sel_i2[2]),
+			                       .sel3 (invcpu21_sel_i2[3]));
+
+   mux4ds #(2)  w1p1_mux(.dout (inv_way1_p1_i2[1:0]),
+			                   .in0  (ifd_inv_ifqop_i2[63:62]),
+			                   .in1  (ifd_inv_ifqop_i2[71:70]),
+			                   .in2  (ifd_inv_ifqop_i2[79:78]),
+			                   .in3  (ifd_inv_ifqop_i2[87:86]),
+			                   .sel0 (invcpu21_sel_i2[0]),
+			                   .sel1 (invcpu21_sel_i2[1]),
+			                   .sel2 (invcpu21_sel_i2[2]),
+			                   .sel3 (invcpu21_sel_i2[3]));
+   
+   // Mux odd and even values down to a single value for word0 and word1
+//   dp_mux2es #(1) v0_mux (.dout (word0_inv_i2),
+//			                    .in0  (inv_vec0[0]),
+//			                    .in1  (inv_vec0[1]),
+//			                    .sel  (invcpu0_sel_i2));
+   assign word0_inv_i2 = invcpu0_sel_i2 ? inv_vec0[1] : inv_vec0[0];
+
+//   dp_mux2es #(2) w0_mux (.dout (invwd0_way_i2[1:0]),
+//			                    .in0  (inv_way0_p0_i2[1:0]),
+//			                    .in1  (inv_way0_p1_i2[1:0]),
+//			                    .sel  (invcpu0_sel_i2));
+   assign invwd0_way_i2 = invcpu0_sel_i2 ? inv_way0_p1_i2[1:0] :
+                                           inv_way0_p0_i2[1:0];
+   
+   // word1
+//   dp_mux2es #(1) v1_mux (.dout (word1_inv_i2),
+//			                    .in0  (inv_vec1[0]),
+//			                    .in1  (inv_vec1[1]),
+//			                    .sel  (invcpu0_sel_i2));
+   assign word1_inv_i2 = invcpu0_sel_i2 ? inv_vec1[1] : inv_vec1[0];
+
+//   dp_mux2es #(2) w1_mux (.dout (invwd1_way_i2[1:0]),
+//			                    .in0  (inv_way1_p0_i2[1:0]),
+//			                    .in1  (inv_way1_p1_i2[1:0]),
+//			                    .sel  (invcpu0_sel_i2));
+   assign invwd1_way_i2 = invcpu0_sel_i2 ? inv_way1_p1_i2[1:0] :
+                                           inv_way1_p0_i2[1:0];
+
+   //-----------------------------
+   // Decode CPX Packet
+   //-----------------------------
+   // load
+   assign ldpkt_i2 = ({ifd_inv_ifqop_i2[`CPX_VLD], 
+                       ifd_inv_ifqop_i2[`CPX_REQFIELD]} == `CPX_LDPKT) ? 
+                       1'b1 : 1'b0;
+   assign ldinv_i2 = ldpkt_i2 & ifd_inv_ifqop_i2[`CPX_WYVLD];
+   assign ldinv_way_i2= ifd_inv_ifqop_i2[`CPX_WY_HI:`CPX_WY_LO];
+
+   // ifill
+   assign imissrtn_i2 = ({ifd_inv_ifqop_i2[`CPX_VLD], 
+                          ifd_inv_ifqop_i2[`CPX_REQFIELD]} == `CPX_IFILLPKT) ?
+                          1'b1 : 1'b0;
+
+   // store ack
+   assign stpkt_i2 = ({ifd_inv_ifqop_i2[`CPX_VLD], 
+                       ifd_inv_ifqop_i2[`CPX_REQFIELD]} == `CPX_STRPKT) ? 
+                       1'b1 : 1'b0;
+   assign strmack_i2 = ({ifd_inv_ifqop_i2[`CPX_VLD], 
+                         ifd_inv_ifqop_i2[`CPX_REQFIELD]} == `CPX_STRMACK) ? 
+                         1'b1 : 1'b0;
+   assign invall_i2 = stpkt_i2 & ifd_inv_ifqop_i2[`CPX_IINV] & 
+                      ifc_inv_ifqadv_i2;
+   assign invpa5_i2 = ifd_inv_ifqop_i2[`CPX_INVPA5];
+   
+   // evict 
+   assign evpkt_i2 = ({ifd_inv_ifqop_i2[`CPX_VLD], 
+                       ifd_inv_ifqop_i2[`CPX_REQFIELD]} == `CPX_EVPKT) ? 
+                       1'b1 : 1'b0;
+   
+   // get thread id and decode
+   assign  cpxthrid_i2 = ifd_inv_ifqop_i2[`CPX_THRFIELD];
+   
+   assign  dcpxthr_i2[0] = ~cpxthrid_i2[1] & ~cpxthrid_i2[0];
+   assign  dcpxthr_i2[1] = ~cpxthrid_i2[1] &  cpxthrid_i2[0];
+   assign  dcpxthr_i2[2] =  cpxthrid_i2[1] & ~cpxthrid_i2[0];
+   assign  dcpxthr_i2[3] =  cpxthrid_i2[1] &  cpxthrid_i2[0];
+
+   //-----------------------------------------------
+   // Generate Write Way and Write Enables
+   //-----------------------------------------------
+
+   // decode way for tags
+   assign  dec_wrway[0] = ~ifd_inv_wrway_i2[1] & ~ifd_inv_wrway_i2[0];
+   assign  dec_wrway[1] = ~ifd_inv_wrway_i2[1] & ifd_inv_wrway_i2[0];
+   assign  dec_wrway[2] = ifd_inv_wrway_i2[1] & ~ifd_inv_wrway_i2[0];
+   assign  dec_wrway[3] = ifd_inv_wrway_i2[1] & ifd_inv_wrway_i2[0];
+
+   assign  ifq_ict_dec_wrway_bf = dec_wrway;
+
+   // way for asi
+   dff_s #(2) asiwayf_reg(.din (ifd_inv_wrway_i2),
+		                    .q   (asi_way_f),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   assign  ifq_erb_asiway_f = asi_way_f;
+
+   
+   // Select which index/way to invalidate
+   assign icv_wrreq_i2 = imissrtn_i2 | ifc_inv_asireq_i2 | mbist_icache_write;
+
+   assign inv0_way_i2 = ~ifc_inv_ifqadv_i2 ? w0_way_f :
+                        ldinv_i2           ? ldinv_way_i2 :
+                                             invwd0_way_i2;
+   assign inv1_way_i2 = ~ifc_inv_ifqadv_i2 ? w1_way_f :
+                        ldinv_i2           ? ldinv_way_i2 :
+                                             invwd1_way_i2;
+
+   assign pick_wr = (imissrtn_i2 | ifc_inv_asireq_i2) & ifc_inv_ifqadv_i2 |
+                     mbist_icache_write;
+   assign w0_way_i2 = pick_wr ? ifd_inv_wrway_i2 :
+                                inv0_way_i2;
+   assign w1_way_i2 = pick_wr ? ifd_inv_wrway_i2 :
+                                inv1_way_i2;
+
+   dff_s #(4) wrway_reg(.din ({w0_way_i2, w1_way_i2}),
+                      .q   ({w0_way_f, w1_way_f}),
+                      .clk (clk), .se(se), .si(), .so());
+   
+   // determine the way in the ICV we are writing to
+//   mux3ds #(2) w0_waymux(.dout  (w0_way_i2),
+//		                     .in0   (ifd_inv_wrway_i2[1:0]),
+//		                     .in1   (invwd0_way_i2[1:0]),
+//		                     .in2   (ldinv_way_i2[1:0]),
+//		                     .sel0  (icvidx_sel_wr_i2),
+//		                     .sel1  (icvidx_sel_inv_i2),
+//		                     .sel2  (icvidx_sel_ld_i2));
+
+//   mux3ds #(2) w1_waymux(.dout  (w1_way_i2),
+//		                     .in0   (ifd_inv_wrway_i2[1:0]),
+//		                     .in1   (invwd1_way_i2[1:0]),
+//		                     .in2   (ldinv_way_i2[1:0]),
+//		                     .sel0  (icvidx_sel_wr_i2),
+//		                     .sel1  (icvidx_sel_inv_i2),
+//		                     .sel2  (icvidx_sel_ld_i2));
+
+   // decode write way
+   assign w0_dec_way_i2[0] = ~w0_way_i2[1] & ~w0_way_i2[0];
+   assign w0_dec_way_i2[1] = ~w0_way_i2[1] &  w0_way_i2[0];
+   assign w0_dec_way_i2[2] =  w0_way_i2[1] & ~w0_way_i2[0];
+   assign w0_dec_way_i2[3] =  w0_way_i2[1] &  w0_way_i2[0];
+
+   assign w1_dec_way_i2[0] = ~w1_way_i2[1] & ~w1_way_i2[0];
+   assign w1_dec_way_i2[1] = ~w1_way_i2[1] &  w1_way_i2[0];
+   assign w1_dec_way_i2[2] =  w1_way_i2[1] & ~w1_way_i2[0];
+   assign w1_dec_way_i2[3] =  w1_way_i2[1] &  w1_way_i2[0];
+
+
+   // determine if valid bit write to top 32B, bot 32B or both
+   assign wrt_en_wd_i2[0] = word0_inv_i2 & (stpkt_i2 | evpkt_i2 |strmack_i2) & 
+                                           ~inv_addr_i2[6] |
+                          ldinv_i2 & ~ldinv_addr_i2[5] & ~ldinv_addr_i2[6] |
+		                      icv_wrreq_i2 & ~missaddr5_i2 & ~missaddr6_i2;
+
+   assign wrt_en_wd_i2[1] = word1_inv_i2 & (stpkt_i2 | evpkt_i2 |strmack_i2) &
+                                           ~inv_addr_i2[6] |
+			                      ldinv_i2 & ldinv_addr_i2[5] & ~ldinv_addr_i2[6] |
+		                        icv_wrreq_i2 & missaddr5_i2 & ~missaddr6_i2;
+
+   assign wrt_en_wd_i2[2] = word0_inv_i2 & (stpkt_i2 | evpkt_i2 |strmack_i2) & 
+                                           inv_addr_i2[6] |
+                          ldinv_i2 & ~ldinv_addr_i2[5] & ldinv_addr_i2[6] |
+		                      icv_wrreq_i2 & ~missaddr5_i2 & missaddr6_i2;
+
+   assign wrt_en_wd_i2[3] = word1_inv_i2 & (stpkt_i2 | evpkt_i2 |strmack_i2) &
+                                           inv_addr_i2[6] |
+			                      ldinv_i2 & ldinv_addr_i2[5] & ldinv_addr_i2[6] |
+		                        icv_wrreq_i2 & missaddr5_i2 & missaddr6_i2;
+
+   assign wrt_en_wd_bf = ifc_inv_ifqadv_i2 ? wrt_en_wd_i2 :
+                                              wrt_en_wd_f;
+   dff_s #(4) wrten_reg(.din (wrt_en_wd_bf),
+                      .q   (wrt_en_wd_f),
+                      .clk (clk), .se(se), .si(), .so());
+
+
+   // Final Write Enable to ICV
+   assign wren_i2[3:0] = (w0_dec_way_i2 & {4{wrt_en_wd_bf[0]}}) | 
+                           {4{invall_i2 & ~invpa5_i2 & ~inv_addr_i2[6]}};
+
+   assign wren_i2[7:4] = (w1_dec_way_i2 & {4{wrt_en_wd_bf[1]}}) | 
+                           {4{invall_i2 & invpa5_i2 & ~inv_addr_i2[6]}}; 
+
+   assign wren_i2[11:8] = (w0_dec_way_i2 & {4{wrt_en_wd_bf[2]}}) | 
+                            {4{invall_i2 & ~invpa5_i2 & inv_addr_i2[6]}};
+
+   assign wren_i2[15:12] = (w1_dec_way_i2 & {4{wrt_en_wd_bf[3]}}) |
+                             {4{invall_i2 & invpa5_i2 & inv_addr_i2[6]}};
+   
+   assign ifq_icv_wren_bf = wren_i2;
+   
+   // advance the wr way for the ICV array
+//   mux2ds #(8) wren_mux(.dout  (next_wren_i2),
+//		                    .in0   (wren_f),
+//		                    .in1   (wren_i2),
+//		                    .sel0  (~ifc_ifd_ifqadv_i2),
+//		                    .sel1  (ifc_ifd_ifqadv_i2));
+
+//   assign wren_bf = ifc_inv_ifqadv_i2 ? wren_i2 : wren_f;
+//   dff #(8) icv_weff(.din  (wren_bf),
+//		                 .q    (wren_f),
+//		                 .clk  (clk),
+//		                 .se   (se), .si(), .so());
+
+//   assign ifq_icv_wren_bf[7:0] = wren_bf[7:0] & {8{~icvaddr6_i2}};
+//   assign ifq_icv_wren_bf[15:8] = wren_bf[7:0] & {8{icvaddr6_i2}};
+   
+
+   //--------------------------
+   // Invalidates
+   //--------------------------
+   assign invalidate_i2 = (stpkt_i2 | evpkt_i2 | strmack_i2) & 
+			                      (word0_inv_i2 | 
+                             word1_inv_i2 |
+			                       ifd_inv_ifqop_i2[`CPX_IINV]) |  // all ways
+			                     ldinv_i2;
+   
+   mux2ds #(1) invf_mux(.dout (invreq_i2),
+		                    .in0  (invalidate_f),
+		                    .in1  (invalidate_i2),
+		                    .sel0  (~ifc_inv_ifqadv_i2),
+		                    .sel1  (ifc_inv_ifqadv_i2));
+   
+   dff_s #(1) invf_ff(.din  (invreq_i2),
+		                .q    (invalidate_f),
+		                .clk  (clk),
+		                .se   (se), .si(), .so());
+
+   // auto invalidate is done during bist
+   // no need to qualify bist_write with ifqadv_i2 since bist is done
+   // before anything else. 
+   assign ifq_fcl_invreq_bf = invreq_i2 | mbist_icache_write;
+
+   // don't really need to OR with invalidate_f, since this will be
+   // gone in a cycle 
+//   assign inv_ifc_inv_pending = invalidate_i2 | invalidate_f;
+   assign inv_ifc_inv_pending = invalidate_i2;
+   
+   //---------------------------------
+   // Get the ifill/invalidation index
+   //---------------------------------
+
+   // ifill index
+   assign icaddr_i2[`IC_IDX_HI:5] = ifq_icd_index_bf[`IC_IDX_HI:5];
+   assign missaddr5_i2 = ifq_icd_index_bf[5];
+   assign missaddr6_i2 = ifq_icd_index_bf[6];
+   
+   // evict invalidate index
+   //   assign    inv_addr_i2 = ifqop_i2[117:112];
+   assign inv_addr_i2 = ifd_inv_ifqop_i2[`CPX_INV_IDX_HI:`CPX_INV_IDX_LO];   
+
+   // index for invalidates caused by a load
+   // store dcache index when a load req is made
+
+   assign ldthr[0] = ~lsu_ifu_ld_pcxpkt_tid[1] & ~lsu_ifu_ld_pcxpkt_tid[0];
+   assign ldthr[1] = ~lsu_ifu_ld_pcxpkt_tid[1] &  lsu_ifu_ld_pcxpkt_tid[0];
+   assign ldthr[2] =  lsu_ifu_ld_pcxpkt_tid[1] & ~lsu_ifu_ld_pcxpkt_tid[0];
+   assign ldthr[3] =  lsu_ifu_ld_pcxpkt_tid[1] &  lsu_ifu_ld_pcxpkt_tid[0];
+
+   assign ldidx_sel_new = ldthr & {4{lsu_ifu_ld_pcxpkt_vld}};
+
+//   dp_mux2es  #(`IC_IDX_SZ) t0_ldidx_mux(.dout (ldindex0_nxt),
+//			                                   .in0  (ldindex0),
+//			                                   .in1  (lsu_ifu_ld_icache_index),
+//			                                   .sel  (ldidx_sel_new[0]));
+   assign ldindex0_nxt = ldidx_sel_new[0] ? lsu_ifu_ld_icache_index :
+                                            ldindex0;
+   
+//   dp_mux2es  #(`IC_IDX_SZ) t1_ldidx_mux(.dout (ldindex1_nxt),
+//			                                   .in0  (ldindex1),
+//			                                   .in1  (lsu_ifu_ld_icache_index),
+//			                                   .sel  (ldidx_sel_new[1]));
+   assign ldindex1_nxt = ldidx_sel_new[1] ? lsu_ifu_ld_icache_index :
+                                            ldindex1;
+   
+//   dp_mux2es  #(`IC_IDX_SZ) t2_ldidx_mux(.dout (ldindex2_nxt),
+//			                                   .in0  (ldindex2),
+//			                                   .in1  (lsu_ifu_ld_icache_index),
+//			                                   .sel  (ldidx_sel_new[2]));
+   assign ldindex2_nxt = ldidx_sel_new[2] ? lsu_ifu_ld_icache_index :
+                                            ldindex2;
+   
+//   dp_mux2es  #(`IC_IDX_SZ) t3_ldidx_mux(.dout (ldindex3_nxt),
+//			                                   .in0  (ldindex3),
+//			                                   .in1  (lsu_ifu_ld_icache_index),
+//			                                   .sel  (ldidx_sel_new[3]));
+   assign ldindex3_nxt = ldidx_sel_new[3] ? lsu_ifu_ld_icache_index :
+                                            ldindex3;
+   
+   
+   dff_s #(`IC_IDX_SZ)  ldix0_reg(.din (ldindex0_nxt),
+		                            .q   (ldindex0),
+		                            .clk (clk), .se(se), .si(), .so());
+   dff_s #(`IC_IDX_SZ)  ldix1_reg(.din (ldindex1_nxt),
+		                            .q   (ldindex1),
+		                            .clk (clk), .se(se), .si(), .so());
+   dff_s #(`IC_IDX_SZ)  ldix2_reg(.din (ldindex2_nxt),
+		                            .q   (ldindex2),
+		                            .clk (clk), .se(se), .si(), .so());
+   dff_s #(`IC_IDX_SZ)  ldix3_reg(.din (ldindex3_nxt),
+		                            .q   (ldindex3),
+		                            .clk (clk), .se(se), .si(), .so());
+
+   // Pick dcache index corresponding to current thread
+   mux4ds #(`IC_IDX_SZ) ldinv_mux(.dout (ldinv_addr_i2),
+			                            .in0  (ldindex0),
+			                            .in1  (ldindex1),
+			                            .in2  (ldindex2),
+			                            .in3  (ldindex3),
+			                            .sel0 (dcpxthr_i2[0]),
+			                            .sel1 (dcpxthr_i2[1]),
+			                            .sel2 (dcpxthr_i2[2]),
+			                            .sel3 (dcpxthr_i2[3]));
+
+   // Final Mux for Index
+   assign icvidx_sel_wr_i2 = imissrtn_i2 | ifc_inv_asireq_i2 | 
+                             mbist_icache_write | ~ifc_inv_ifqadv_i2;
+   assign icvidx_sel_ld_i2 = ldinv_i2 & ifc_inv_ifqadv_i2;
+   assign icvidx_sel_inv_i2 = ~imissrtn_i2 & ~ldinv_i2 & 
+                              ~ifc_inv_asireq_i2 & ifc_inv_ifqadv_i2 &
+                              ~mbist_icache_write;
+
+   mux3ds #(`IC_IDX_SZ) icv_idx_mux(
+                            .dout  (ifq_icv_wrindex_bf[`IC_IDX_HI:5]),
+			                      .in0   (icaddr_i2[`IC_IDX_HI:5]),
+			                      .in1   ({inv_addr_i2[`IC_IDX_HI:6], 1'b0}),
+			                      .in2   (ldinv_addr_i2[`IC_IDX_HI:5]),
+			                      .sel0  (icvidx_sel_wr_i2),
+			                      .sel1  (icvidx_sel_inv_i2),
+			                      .sel2  (icvidx_sel_ld_i2));
+
+   sink #(`CPX_WIDTH) s0(.in (ifd_inv_ifqop_i2));
+   
+
+endmodule // sparc_ifu_invctl
Index: /trunk/T1-CPU/ifu/sparc_ifu_dcl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_dcl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_dcl.v	(revision 6)
@@ -0,0 +1,764 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_dcl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_dcl
+//  Description:	
+//   The decode control logic block does branch condition evaluation,
+//   delay slot management, and appropriate condition code
+//   selection.  It also executes the tcc instruction and kills the E
+//   stage instruction if a move did not succeed.  The DCL block is
+//   also responsible for generating the correct select signals to
+//   choose the branch offset and immediate operand.
+//
+*/
+////////////////////////////////////////////////////////////////////////
+
+`define CC_N  3
+`define CC_Z  2
+`define CC_V  1
+`define CC_C  0
+
+`define FP_U  3
+`define FP_G  2
+`define FP_L  1
+`define FP_E  0
+
+`define FSR_FCC0_HI 11
+`define FSR_FCC0_LO 10
+`define FSR_FCC1_HI 33
+`define FSR_FCC1_LO 32
+`define FSR_FCC2_HI 35
+`define FSR_FCC2_LO 34
+`define FSR_FCC3_HI 37
+`define FSR_FCC3_LO 36
+
+
+module sparc_ifu_dcl(/*AUTOARG*/
+   // Outputs
+   ifu_exu_kill_e, ifu_exu_dontmv_regz0_e, ifu_exu_dontmv_regz1_e, 
+   ifu_exu_tcc_e, ifu_exu_dbrinst_d, ifu_ffu_mvcnd_m, 
+   dcl_fcl_bcregz0_e, dcl_fcl_bcregz1_e, dtu_inst_anull_e, 
+   dcl_swl_tcc_done_m, dcl_imd_immdata_sel_simm13_d_l, 
+   dcl_imd_immdata_sel_movcc_d_l, dcl_imd_immdata_sel_sethi_d_l, 
+   dcl_imd_immdata_sel_movr_d_l, dcl_imd_broff_sel_call_d_l, 
+   dcl_imd_broff_sel_br_d_l, dcl_imd_broff_sel_bcc_d_l, 
+   dcl_imd_broff_sel_bpcc_d_l, dcl_imd_immbr_sel_br_d, so, 
+   // Inputs
+   rclk, se, si, dtu_reset, exu_ifu_cc_d, fcl_dcl_regz_e, 
+   exu_ifu_regn_e, ffu_ifu_cc_w2, ffu_ifu_cc_vld_w2, 
+   tlu_ifu_flush_pipe_w, swl_dcl_thr_d, swl_dcl_thr_w2, 
+   imd_dcl_brcond_d, imd_dcl_mvcond_d, fdp_dcl_op_s, fdp_dcl_op3_s, 
+   imd_dcl_abit_d, dec_dcl_cctype_d, dtu_dcl_opf2_d, 
+   fcl_dtu_inst_vld_e, fcl_dtu_intr_vld_e, ifu_tlu_flush_w
+   );
+
+   input    rclk, 
+            se, 
+            si, 
+            dtu_reset;
+   
+   input [7:0] exu_ifu_cc_d;         // latest CCs from EXU
+   
+   input       fcl_dcl_regz_e,        // rs1=0
+	             exu_ifu_regn_e;        // rs1<0
+
+   input [7:0] ffu_ifu_cc_w2;
+   input [3:0] ffu_ifu_cc_vld_w2;
+
+   input       tlu_ifu_flush_pipe_w;
+   
+   input [3:0] swl_dcl_thr_d,
+	             swl_dcl_thr_w2;
+   
+   input [3:0] imd_dcl_brcond_d;     // branch condition type
+   input [7:0] imd_dcl_mvcond_d;     // mov condition type
+
+   input [1:0] fdp_dcl_op_s;
+   input [5:0] fdp_dcl_op3_s;
+   input       imd_dcl_abit_d;	      // anull bit for cond branch
+   input [2:0] dec_dcl_cctype_d;     // which cond codes to use
+   input       dtu_dcl_opf2_d;
+
+   input       fcl_dtu_inst_vld_e;
+   input       fcl_dtu_intr_vld_e;
+   input       ifu_tlu_flush_w;
+
+   output      ifu_exu_kill_e,
+		           ifu_exu_dontmv_regz0_e,
+		           ifu_exu_dontmv_regz1_e,
+		           ifu_exu_tcc_e;
+   output      ifu_exu_dbrinst_d;
+
+   output      ifu_ffu_mvcnd_m;
+   
+   output      dcl_fcl_bcregz0_e,
+               dcl_fcl_bcregz1_e;
+
+   output      dtu_inst_anull_e;
+   output      dcl_swl_tcc_done_m;
+
+   output      dcl_imd_immdata_sel_simm13_d_l,      // imm data select
+	             dcl_imd_immdata_sel_movcc_d_l,
+	             dcl_imd_immdata_sel_sethi_d_l,
+	             dcl_imd_immdata_sel_movr_d_l;
+
+   output      dcl_imd_broff_sel_call_d_l,      // dir branch offset select
+	             dcl_imd_broff_sel_br_d_l,
+	             dcl_imd_broff_sel_bcc_d_l,
+	             dcl_imd_broff_sel_bpcc_d_l;
+
+   output      dcl_imd_immbr_sel_br_d;
+   
+   output      so;
+
+//----------------------------------------------------------------------
+// Declarations
+//----------------------------------------------------------------------
+
+   wire [7:0]  cc_breval_e,
+	             fp_breval_d;
+
+   wire        abit_e;
+
+   wire        cond_brtaken_e,
+	             anull_all,
+	             anull_ubr,
+	             anull_cbr;
+
+   wire [3:0]  anull_next_e,
+               anull_e,
+               thr_anull_d;
+
+   wire        inst_anull_d,
+               inst_anull_e;
+
+   wire [3:0]  flush_abit;
+   wire        all_flush_w,
+               all_flush_w2;
+
+   wire        br_always_e;
+   
+   wire        sel_movcc,
+	             sel_movr;
+
+   wire [3:0]  br_cond_e,
+	             br_cond_d;
+   wire [3:0]  thr_vld_e;
+   
+   wire [3:0]  ls_brcond_d,
+               ls_brcond_e;
+   wire [1:0]  ccfp_sel;
+
+   wire [3:0]  cc_e;
+
+   wire [1:0]  curr_fcc_d;
+
+   wire [7:0]  fcc_d;
+
+   wire [7:0]  t0_fcc_d,
+	             t1_fcc_d,
+	             t2_fcc_d,
+	             t3_fcc_d,
+	             t0_fcc_nxt,
+	             t1_fcc_nxt,
+	             t2_fcc_nxt,
+	             t3_fcc_nxt;
+
+   wire        use_fcc0_d,
+	             use_fcc1_d,
+	             use_fcc2_d,
+	             use_fcc3_d;
+
+   wire [3:0]  thr_e,
+	             thr_dec_d;
+//	             fcc_dec_d,
+//	             fcc_dec_e;
+   
+   wire [1:0]  op_d;
+   wire [5:0]  op3_d;
+
+   wire        use_xcc_d,
+	             ltz_e,
+	             cc_eval0,
+	             cc_eval1,
+	             fp_eval0_d,
+	             fp_eval1_d,
+	             fp_eval_d,
+	             fp_eval_e,
+	             r_eval1,
+	             r_eval0,
+	             ccfp_eval,
+	             ccbr_taken_e,
+	             mvbr_sel_br_d,
+	             cc_mvbr_d,
+	             cc_mvbr_e,
+	             fpcond_mvbr_d,
+	             fpcond_mvbr_e;
+
+   wire        call_inst_e,
+               call_inst_d,
+	             dbr_inst_d,
+	             dbr_inst_e,
+	             ibr_inst_d,
+	             ibr_inst_e,
+	             mov_inst_d,
+	             mov_inst_e,
+               tcc_done_e,
+	             tcc_inst_d,
+	             tcc_inst_e;
+
+   wire        clk;
+   
+
+   
+//----------------------------------------------------------------------
+// Code start here 
+//----------------------------------------------------------------------
+   assign      clk = rclk;
+   
+
+   // S Stage Operands
+   dff_s #(2) opreg(.din  (fdp_dcl_op_s),
+		              .clk  (clk),
+		              .q    (op_d),
+		              .se   (se), .si(), .so());
+
+   dff_s #(6) op3_reg(.din  (fdp_dcl_op3_s),
+		                .clk  (clk),
+		                .q    (op3_d),
+		                .se   (se), .si(), .so());
+
+   dff_s abite_reg(.din  (imd_dcl_abit_d),
+		             .clk  (clk),
+		             .q    (abit_e),
+		             .se   (se), .si(), .so());
+
+   // need to protect from scan contention
+   dff_s #(4) thre_reg(.din (swl_dcl_thr_d),
+                     .q   (thr_e),
+                     .clk (clk), .se(se), .si(), .so());
+
+   //------------------------------
+   // Choose correct immediate data
+   //------------------------------
+   // movcc if op3 = 101100
+   assign dcl_imd_immdata_sel_movcc_d_l = ~(op_d[1] &
+					                                  op3_d[5] & ~op3_d[4] & 
+                                            op3_d[3] & ~op3_d[0]);
+
+   // movr if op3 = 101111
+   //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   // Reduced the number of terms in the eqn to help with timing 
+   // path, the result of which is that the immediate data sent to the
+   // exu for a FLUSH instruction is INCORRECT!  (It is decoded as a
+   // MOVR).  However, since our architecture completely ignores the
+   // address of the flush, this should be ok.  Confirmed with Sanjay
+   // 03/31/03. (v1.29 -> 1.30)
+   // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   assign dcl_imd_immdata_sel_movr_d_l = ~(op_d[1] &
+				                                   op3_d[5] & op3_d[3] &
+				                                   op3_d[1] & op3_d[0]);
+   
+   // sethi if op3 = 100xx
+   assign dcl_imd_immdata_sel_sethi_d_l = ~(~op_d[1]);
+
+   // everything else
+   assign dcl_imd_immdata_sel_simm13_d_l = 
+                   	        ~(dcl_imd_immdata_sel_movcc_d_l &
+                              dcl_imd_immdata_sel_movr_d_l  &
+	                            dcl_imd_immdata_sel_sethi_d_l);
+
+   //------------------------------
+   // Choose correct branch offset
+   //------------------------------
+   // call or ld/store
+   assign dcl_imd_broff_sel_call_d_l = ~(op_d[0]);
+
+   // branch on register
+   assign dcl_imd_broff_sel_br_d_l = ~(~op_d[0] & 
+				                               op3_d[4] & op3_d[3]);
+   // branch w/o prediction
+   assign dcl_imd_broff_sel_bcc_d_l = ~(~op_d[0] & 
+					                              op3_d[4] & ~op3_d[3]);
+   // everything else
+   assign dcl_imd_broff_sel_bpcc_d_l = ~(~op_d[0] & 
+					                               ~op3_d[4]);
+
+   //------------------------------------
+   // mark branch/conditional instrctions
+   //------------------------------------
+   // call
+   assign call_inst_d = ~op_d[1] & op_d[0];
+   dff_s #(1) call_inste_reg(.din  (call_inst_d),
+			                     .clk  (clk),
+			                     .q    (call_inst_e),
+			                     .se   (se), .si(), .so());
+
+   // call or branch but not nop/sethi
+   assign dbr_inst_d = ~op_d[1] & (op_d[0] | op3_d[4] | op3_d[3]);
+   
+   // Choose between branch offset and immediate operand
+   assign dcl_imd_immbr_sel_br_d = dbr_inst_d;
+
+   // tell exu to use pc instead of rs1
+   assign ifu_exu_dbrinst_d = ~op_d[1];
+
+   dff_s #(1) dbr_inste_reg(.din  (dbr_inst_d),
+			                    .clk  (clk),
+			                    .q    (dbr_inst_e),
+			                    .se   (se), .si(), .so());
+
+   // jmpl + return
+   assign ibr_inst_d = op_d[1] & ~op_d[0] &
+		                    op3_d[5] &  op3_d[4] &  op3_d[3] & 
+                        ~op3_d[2] & ~op3_d[1];
+   dff_s #(1) ibr_inste_reg(.din  (ibr_inst_d),
+			                    .clk  (clk),
+			                    .q    (ibr_inst_e),
+			                    .se   (se), .si(), .so());   
+   // mov
+   assign mov_inst_d = (op_d[1] & ~op_d[0] &
+			                  op3_d[5] & ~op3_d[4] & op3_d[3] & op3_d[2] & 
+			                  (~op3_d[1] & ~op3_d[0] | op3_d[1] & op3_d[0]));
+
+   dff_s #(1) mov_inste_reg(.din  (mov_inst_d),
+			                    .clk  (clk),
+			                    .q    (mov_inst_e),
+			                    .se   (se), .si(), .so());
+   // tcc
+   assign tcc_inst_d = op_d[1] & ~op_d[0] &
+		                   op3_d[5] &  op3_d[4] &  op3_d[3] & 
+		                   ~op3_d[2] &  op3_d[1] & ~op3_d[0];
+   dff_s #(1) tcc_inste_reg(.din  (tcc_inst_d),
+			                    .clk  (clk),
+			                    .q    (tcc_inst_e),
+			                    .se   (se), .si(), .so());
+
+   assign mvbr_sel_br_d = ~op_d[1] & ~op_d[0] |          // br
+	                         op3_d[3] & ~op3_d[2] & op3_d[1] & ~op3_d[0]; // tcc
+   
+   assign cc_mvbr_d = ~(~op_d[1] & ~op_d[0] & op3_d[4] & op3_d[3] |  // bpr
+			                  op_d[1] & ~op_d[0] & op3_d[5] & ~op3_d[4] &
+			                  op3_d[3] & op3_d[2] & op3_d[1] & op3_d[0] |  // movr
+			                  op_d[1] & ~op_d[0] & op3_d[5] & op3_d[4] &
+			                  ~op3_d[3] & op3_d[2] & ~op3_d[1] & op3_d[0] &
+			                  dtu_dcl_opf2_d);                             // fmovr
+
+
+   //---------------------------
+   // FCC Logic
+   //--------------------------
+   // choose current fcc
+   assign use_fcc0_d = ~dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
+   assign use_fcc1_d = ~dec_dcl_cctype_d[1] &  dec_dcl_cctype_d[0];
+   assign use_fcc2_d =  dec_dcl_cctype_d[1] & ~dec_dcl_cctype_d[0];
+   assign use_fcc3_d =  dec_dcl_cctype_d[1] &  dec_dcl_cctype_d[0];
+   
+   mux4ds #(2) fcc_mux(.dout (curr_fcc_d[1:0]),
+		                   .in0  (fcc_d[1:0]),
+		                   .in1  (fcc_d[3:2]),
+		                   .in2  (fcc_d[5:4]),
+		                   .in3  (fcc_d[7:6]),
+		                   .sel0 (use_fcc0_d),
+		                   .sel1 (use_fcc1_d),
+		                   .sel2 (use_fcc2_d),
+		                   .sel3 (use_fcc3_d));
+
+   // decode to make next step easier
+//   assign fcc_dec_d[0] = ~curr_fcc_d[1] & ~curr_fcc_d[0];
+//   assign fcc_dec_d[1] = ~curr_fcc_d[1] &  curr_fcc_d[0];
+//   assign fcc_dec_d[2] =  curr_fcc_d[1] & ~curr_fcc_d[0];
+//   assign fcc_dec_d[3] =  curr_fcc_d[1] &  curr_fcc_d[0];
+
+//   dff #(4) fcce_reg(.din (fcc_dec_d),
+//		                 .q   (fcc_dec_e),
+//		                 .clk (clk),
+//		                 .se  (se), .si(), .so());
+   
+   
+   //------------------
+   // CC Logic for BCC
+   //------------------
+   // Choose appropriate CCs
+   //
+   // dec_cctype is 3 bits
+   // 10X  icc
+   // 11X  xcc
+   // 000  fcc0
+   // 001  fcc1
+   // 010  fcc2
+   // 011  fcc3
+//   assign use_xcc_d = (dec_dcl_cctype_d[2] | op3_d[3]) & dec_dcl_cctype_d[1];
+   assign use_xcc_d = dec_dcl_cctype_d[1];   
+   assign fpcond_mvbr_d = ~dec_dcl_cctype_d[2] & ~tcc_inst_d;
+
+   dff_s fpbr_reg(.din  (fpcond_mvbr_d),
+		            .clk  (clk),
+		            .q    (fpcond_mvbr_e),
+		            .se   (se), .si(), .so());
+   
+   // mux between xcc and icc
+//   assign cc_d =  use_xcc_d ?  exu_ifu_cc_d[7:4] :      // xcc
+//			                         exu_ifu_cc_d[3:0];       // icc
+//   dff #(4)  ccreg_e(.din  (cc_d),
+//		                 .clk  (clk),
+//		                 .q    (cc_e),
+//		                 .se   (se),  .si(), .so());
+
+   bw_u1_soffm2_4x UZsize_ccreg0_e(.d0 (exu_ifu_cc_d[0]),
+                                   .d1 (exu_ifu_cc_d[4]),
+                                   .s  (use_xcc_d),
+                                   .q  (cc_e[0]),
+                                   .ck (clk), .se(se), .sd(), .so());
+   bw_u1_soffm2_4x UZsize_ccreg1_e(.d0 (exu_ifu_cc_d[1]),
+                                   .d1 (exu_ifu_cc_d[5]),
+                                   .s  (use_xcc_d),
+                                   .q  (cc_e[1]),
+                                   .ck (clk), .se(se), .sd(), .so());
+   bw_u1_soffm2_4x UZsize_ccreg2_e(.d0 (exu_ifu_cc_d[2]),
+                                   .d1 (exu_ifu_cc_d[6]),
+                                   .s  (use_xcc_d),
+                                   .q  (cc_e[2]),
+                                   .ck (clk), .se(se), .sd(), .so());
+   bw_u1_soffm2_4x UZsize_ccreg3_e(.d0 (exu_ifu_cc_d[3]),
+                                   .d1 (exu_ifu_cc_d[7]),
+                                   .s  (use_xcc_d),
+                                   .q  (cc_e[3]),
+                                   .ck (clk), .se(se), .sd(), .so());
+   
+   
+   //------------------------------
+   // Evaluate Branch
+   //------------------------------
+   // Select correct branch condition
+   assign sel_movcc = ~mvbr_sel_br_d & cc_mvbr_d;
+   assign sel_movr = ~mvbr_sel_br_d & ~cc_mvbr_d;
+   
+   // br_cond is the same as the "cond" field = inst[28:25] for bcc
+   mux3ds #(4)  brcond_mux(.dout   (br_cond_d),
+			                     .in0    (imd_dcl_brcond_d),  // br or tcc
+			                     .in1    (imd_dcl_mvcond_d[7:4]),  // movcc
+			                     .in2    (imd_dcl_mvcond_d[3:0]),  // movr
+			                     .sel0   (mvbr_sel_br_d),
+			                     .sel1   (sel_movcc),
+			                     .sel2   (sel_movr));
+   
+   dff_s #(4)  brcond_e_reg(.din  (br_cond_d),
+			                    .clk  (clk),
+			                    .q    (br_cond_e),
+			                    .se   (se), .si(), .so());
+
+   // Branch Type Decode
+   assign ls_brcond_d[0]  =  ~br_cond_d[1] & ~br_cond_d[0];
+   assign ls_brcond_d[1]  =  ~br_cond_d[1] &  br_cond_d[0];
+   assign ls_brcond_d[2]  =   br_cond_d[1] & ~br_cond_d[0];
+   assign ls_brcond_d[3]  =   br_cond_d[1] &  br_cond_d[0];
+
+   dff_s #(4)  lsbrc_e_reg(.din  (ls_brcond_d),
+			                   .clk  (clk),
+			                   .q    (ls_brcond_e),
+			                   .se   (se), .si(), .so());
+   
+   // Evaluate potential integer CC branches
+   assign ltz_e = (cc_e[`CC_N] ^ cc_e[`CC_V]);
+
+   assign cc_breval_e[0] = 1'b0;                       // BPN
+   assign cc_breval_e[1] = cc_e[`CC_Z];	               // BPE
+   assign cc_breval_e[2] = cc_e[`CC_Z] | ltz_e;        // BPLE
+   assign cc_breval_e[3] = ltz_e;                      // BPL
+   assign cc_breval_e[4] = cc_e[`CC_Z] | cc_e[`CC_C];  // BPLEU
+   assign cc_breval_e[5] = cc_e[`CC_C];                // BPCS
+   assign cc_breval_e[6] = cc_e[`CC_N];                // BPNEG
+   assign cc_breval_e[7] = cc_e[`CC_V];                // BPVS 
+
+   // mux to choose right condition
+   assign cc_eval0 = cc_breval_e[0] & ls_brcond_e[0] |
+		                 cc_breval_e[1] & ls_brcond_e[1] |
+		                 cc_breval_e[2] & ls_brcond_e[2] |
+		                 cc_breval_e[3] & ls_brcond_e[3];
+
+   assign cc_eval1 = cc_breval_e[4] & ls_brcond_e[0] |
+		                 cc_breval_e[5] & ls_brcond_e[1] |
+		                 cc_breval_e[6] & ls_brcond_e[2] |
+		                 cc_breval_e[7] & ls_brcond_e[3];
+
+   // Evaluate FP CC branches in D stage
+   assign fp_breval_d[0] = 1'b0;                            // FBN / A
+   assign fp_breval_d[1] = (curr_fcc_d[1] | curr_fcc_d[0]); // FBNE / E
+   assign fp_breval_d[2] = curr_fcc_d[1] ^ curr_fcc_d[0];   // FBLG / UE
+   assign fp_breval_d[3] = curr_fcc_d[0];                   // FBUL / GE
+   assign fp_breval_d[4] = ~curr_fcc_d[1] & curr_fcc_d[0];  // FBL / UGE
+   assign fp_breval_d[5] = curr_fcc_d[1];                   // FBUG / LE
+   assign fp_breval_d[6] = curr_fcc_d[1] & ~curr_fcc_d[0];  // FBG / ULE
+   assign fp_breval_d[7] = curr_fcc_d[1] & curr_fcc_d[0];   // FBU / O
+   
+   assign fp_eval0_d = fp_breval_d[0] & ls_brcond_d[0] |
+		                 fp_breval_d[1] & ls_brcond_d[1] |
+		                 fp_breval_d[2] & ls_brcond_d[2] |
+		                 fp_breval_d[3] & ls_brcond_d[3];
+
+   assign fp_eval1_d = fp_breval_d[4] & ls_brcond_d[0] |
+		                 fp_breval_d[5] & ls_brcond_d[1] |
+		                 fp_breval_d[6] & ls_brcond_d[2] |
+		                 fp_breval_d[7] & ls_brcond_d[3];
+
+   assign fp_eval_d = br_cond_d[2] ? fp_eval1_d :
+                                     fp_eval0_d;
+
+   dff_s #(1) fpev_ff(.din (fp_eval_d),
+		                .q   (fp_eval_e),
+		                .clk (clk),
+		                .se  (se), .si(), .so());
+
+   // merge eval0, eval1 and fp condition codes
+   assign ccfp_sel[0] = ~fpcond_mvbr_e & ~br_cond_e[2];
+   assign ccfp_sel[1] = ~fpcond_mvbr_e &  br_cond_e[2];
+//   assign ccfp_sel[2] =  fpcond_mvbr_e & ~br_cond_e[2];
+//   assign ccfp_sel[3] =  fpcond_mvbr_e &  br_cond_e[2];
+   
+   assign ccfp_eval = ccfp_sel[0] & cc_eval0 |
+		                  ccfp_sel[1] & cc_eval1 | 
+		                  fpcond_mvbr_e & fp_eval_e;
+   
+   // invert branch condition if this is an inverted br type
+//   assign ccbr_taken_e = (ccfp_eval ^ br_cond_e[3]) & cc_mvbr_e;
+   assign ccbr_taken_e = ccfp_eval ? (cc_mvbr_e & ~br_cond_e[3]) :
+                                       (cc_mvbr_e & br_cond_e[3]);
+
+   assign br_always_e = (~br_cond_e[0] & ~br_cond_e[1] & ~br_cond_e[2] & 
+	                       br_cond_e[3] & cc_mvbr_e);
+
+   //--------------
+   // For BRZ
+   // -------------
+   // Calculate Cond Assuming Z=1 And Z=0.  Then Mux
+//   assign r_eval1 = ((exu_ifu_regn_e | ~br_cond_e[1] | ~br_cond_e[0]) ^
+//		                  br_cond_e[2]) & ~cc_mvbr_e;
+   assign r_eval1 = exu_ifu_regn_e ? (~br_cond_e[2] & ~cc_mvbr_e) :
+                                       (((br_cond_e[1] & br_cond_e[0]) ^ 
+                                         ~br_cond_e[2]) & ~cc_mvbr_e);
+   
+//   assign r_eval0 = ((exu_ifu_regn_e & br_cond_e[1]) ^
+//                      br_cond_e[2]) & ~cc_mvbr_e;
+   assign r_eval0 = exu_ifu_regn_e ? ((br_cond_e[1] ^ br_cond_e[2]) & 
+                                       ~cc_mvbr_e) :
+                                       (br_cond_e[2] & ~cc_mvbr_e);
+
+   dff_s #(1) regcc_ff(.din  (cc_mvbr_d),
+		                 .clk  (clk),
+		                 .q    (cc_mvbr_e),
+		                 .se   (se), .si(), .so());
+
+   // Evaluate Final Branch condition
+   // 3:1 mux
+//   assign cond_brtaken_e = cc_mvbr_e      ?  ccbr_taken_e :
+//		                       exu_ifu_regz_e ?       r_eval1 :
+//		                                              r_eval0;
+   // 2:1 mux
+//   assign cond_brtaken_e = exu_ifu_regz_e ? (r_eval1 | ccbr_taken_e) :
+//                                              (r_eval0 | ccbr_taken_e);
+
+   //////// Chandra ////////
+
+   wire   temp0, temp1, cond_brtaken_e_l;
+
+   // limit loading on this signal
+//   wire   regz_buf_e;
+//   bw_u1_buf_5x UZfix_regz_bf(.a (exu_ifu_regz_e),
+//                              .z (regz_buf_e));
+   
+   assign temp0 = (r_eval0 | ccbr_taken_e);
+   assign temp1 = (r_eval1 | ccbr_taken_e);
+
+   bw_u1_muxi21_6x UZsize_cbtmux(.z(cond_brtaken_e_l), 
+                                  .d0(temp0), 
+                                  .d1(temp1), 
+                                  .s(fcl_dcl_regz_e));
+   
+   bw_u1_inv_20x UZsize_cbtinv(.z(cond_brtaken_e), 
+                                .a(cond_brtaken_e_l));
+
+   ////////////////////////
+
+   assign dcl_fcl_bcregz0_e = (temp0 & dbr_inst_e | ibr_inst_e | 
+                               call_inst_e) & ~dtu_inst_anull_e;
+   assign dcl_fcl_bcregz1_e = (temp1 & dbr_inst_e | ibr_inst_e | 
+                               call_inst_e) & ~dtu_inst_anull_e;
+
+//   assign ifu_exu_dontmove_e = mov_inst_e & ~cond_brtaken_e;
+   assign ifu_exu_dontmv_regz0_e = ~temp0 & mov_inst_e;
+   assign ifu_exu_dontmv_regz1_e = ~temp1 & mov_inst_e;
+
+   // branch condition to FPU
+   dff_s #(1) fpcond_ff(.din  (cond_brtaken_e),
+		                  .q    (ifu_ffu_mvcnd_m),
+		                  .clk  (clk),
+		                  .se   (se), .si(), .so());		    
+
+   // branch / move completion and anull signals
+//   assign dtu_fcl_brtaken_e = ~dtu_inst_anull_e & 
+//	                            (ibr_inst_e | call_inst_e |
+//			                         dbr_inst_e & cond_brtaken_e);
+
+   // if mov didn't succeed kill write back and bypass
+   // need to check thread as well
+//   assign ifu_exu_kill_e = dtu_inst_anull_e | 
+//	                   ~fcl_dtu_inst_vld_e;  // don't need this anymore
+   assign ifu_exu_kill_e = dtu_inst_anull_e;
+
+
+   // signal trap if tcc succeeds
+   assign ifu_exu_tcc_e = ~dtu_inst_anull_e & tcc_inst_e & ccbr_taken_e & 
+                          fcl_dtu_inst_vld_e;
+
+   assign tcc_done_e = ~dtu_inst_anull_e & tcc_inst_e & ~ccbr_taken_e & 
+                       fcl_dtu_inst_vld_e;   
+
+   dff_s #(1) tccm_ff(.din (tcc_done_e),
+                    .q   (dcl_swl_tcc_done_m),
+                    .clk (clk),
+                    .se  (se), .si(), .so());
+   
+   // logic to anull delay slot, if this branch itsel is not anulled
+   assign anull_cbr  =  abit_e & dbr_inst_e & ~br_always_e & ~call_inst_e;
+   assign anull_ubr  =  abit_e & dbr_inst_e & br_always_e & ~call_inst_e;
+
+   assign anull_all =  anull_ubr  | anull_cbr & ~cond_brtaken_e;
+
+   // check which thread to anull
+   assign thr_vld_e = thr_e & {4{fcl_dtu_inst_vld_e}};
+
+   assign all_flush_w = tlu_ifu_flush_pipe_w | ifu_tlu_flush_w;
+   dff_s #(1) flshw2_ff(.din (all_flush_w),
+                      .q   (all_flush_w2),
+                      .clk (clk), .se(se), .si(), .so());
+
+   assign flush_abit = swl_dcl_thr_w2 & {4{all_flush_w2}};
+   
+   assign anull_next_e = ((~anull_e & {4{anull_all}} & thr_vld_e) |
+ 	                        (anull_e & ~(thr_e & {4{fcl_dtu_inst_vld_e |
+                                                  fcl_dtu_intr_vld_e}}))) & 
+                           ~flush_abit;
+   
+   // anull_e needs to be per thread
+   dffr_s #(4) anull_ff(.din  (anull_next_e),
+		                  .clk  (clk),
+		                  .rst  (dtu_reset),
+		                  .q    (anull_e),
+		                  .se   (se), .si(), .so());
+
+   // 
+//   assign thr_dec_e[0] = swl_dcl_thr_e[0] | rst_tri_enable;
+//   assign thr_dec_e[3:1] = swl_dcl_thr_e[3:1] & {3{~rst_tri_enable}};
+
+   assign thr_anull_d = swl_dcl_thr_d & anull_next_e;
+   assign inst_anull_d =  (|thr_anull_d[3:0]);
+   dff_s #(1) ina_ff(.din (inst_anull_d),
+                   .q   (inst_anull_e),
+                   .clk (clk), .se (se), .si(), .so());
+                   
+   assign dtu_inst_anull_e = inst_anull_e;
+
+//   mux4ds dcla_mux(.dout (this_inst_anull_e),  
+//		               .in0  (anull_e[0]),
+//		               .in1  (anull_e[1]),
+//		               .in2  (anull_e[2]),
+//		               .in3  (anull_e[3]),
+//		               .sel0 (thr_dec_e[0]),
+//		               .sel1 (thr_dec_e[1]),
+//		               .sel2 (thr_dec_e[2]),
+//		               .sel3 (thr_dec_e[3]));
+//   assign dtu_inst_anull_e = this_inst_anull_e & fcl_dtu_inst_vld_e;
+
+
+//--------------------
+// Copy of FCC
+//--------------------
+   // FCC's are maintained in the ffu.  A copy is kept here to run the
+   // FP branch instructions.
+   
+   // load FCC from FFU
+   mux2ds #(8)  t0_fcc_mux(.dout (t0_fcc_nxt[7:0]),
+			                     .in0  (t0_fcc_d[7:0]),
+			                     .in1  (ffu_ifu_cc_w2[7:0]),
+			                     .sel0  (~ffu_ifu_cc_vld_w2[0]),
+			                     .sel1  (ffu_ifu_cc_vld_w2[0]));
+   
+   dffr_s #(8) t0_fcc_reg(.din (t0_fcc_nxt[7:0]),
+		                    .q   (t0_fcc_d[7:0]),
+		                    .rst (dtu_reset),
+		                    .clk (clk),  .se  (se), .si(), .so());
+`ifdef FPGA_SYN_1THREAD
+   assign fcc_d[7:0] = t0_fcc_d[7:0];
+`else
+   
+   mux2ds #(8)  t1_fcc_mux(.dout (t1_fcc_nxt[7:0]),
+			                     .in0  (t1_fcc_d[7:0]),
+			                     .in1  (ffu_ifu_cc_w2[7:0]),
+			                     .sel0  (~ffu_ifu_cc_vld_w2[1]),
+			                     .sel1  (ffu_ifu_cc_vld_w2[1]));
+   
+   mux2ds #(8)  t2_fcc_mux(.dout (t2_fcc_nxt[7:0]),
+			                     .in0  (t2_fcc_d[7:0]),
+			                     .in1  (ffu_ifu_cc_w2[7:0]),
+			                     .sel0  (~ffu_ifu_cc_vld_w2[2]),
+			                     .sel1  (ffu_ifu_cc_vld_w2[2]));
+   
+   mux2ds #(8)  t3_fcc_mux(.dout (t3_fcc_nxt[7:0]),
+			                     .in0  (t3_fcc_d[7:0]),
+			                     .in1  (ffu_ifu_cc_w2[7:0]),
+			                     .sel0  (~ffu_ifu_cc_vld_w2[3]),
+			                     .sel1  (ffu_ifu_cc_vld_w2[3]));
+   
+   // thread0 fcc registers
+
+   dffr_s #(8) t1_fcc_reg(.din (t1_fcc_nxt[7:0]),
+		                    .q   (t1_fcc_d[7:0]),
+		                    .rst (dtu_reset),
+		                    .clk (clk),  .se  (se), .si(), .so());
+   dffr_s #(8) t2_fcc_reg(.din (t2_fcc_nxt[7:0]),
+		                    .q   (t2_fcc_d[7:0]),
+		                    .rst (dtu_reset),
+		                    .clk (clk),  .se  (se), .si(), .so());
+   dffr_s #(8) t3_fcc_reg(.din (t3_fcc_nxt[7:0]),
+		                    .q   (t3_fcc_d[7:0]),
+		                    .rst (dtu_reset),
+		                    .clk (clk),  .se  (se), .si(), .so());
+
+   // choose thread
+   assign thr_dec_d[0] = swl_dcl_thr_d[0];
+   assign thr_dec_d[3:1] = swl_dcl_thr_d[3:1];
+   
+   mux4ds #(8) fcc0d_mx(.dout (fcc_d[7:0]),
+		                    .in0  (t0_fcc_d[7:0]),
+		                    .in1  (t1_fcc_d[7:0]),
+		                    .in2  (t2_fcc_d[7:0]),
+		                    .in3  (t3_fcc_d[7:0]),
+		                    .sel0 (thr_dec_d[0]),
+		                    .sel1 (thr_dec_d[1]),
+		                    .sel2 (thr_dec_d[2]),
+		                    .sel3 (thr_dec_d[3]));
+
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+endmodule // sparc_ifu_dcl
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_ifqdp.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_ifqdp.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_ifqdp.v	(revision 6)
@@ -0,0 +1,781 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_ifqdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_ifqdp
+//  Description:	
+//  The IFQ is the icache fill queue.  This communicates between the
+//  IFU and the outside world.  It handles icache misses and
+//  invalidate requests from the crossbar.  
+//
+*/
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "iop.h"
+`include "ifu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module sparc_ifu_ifqdp(/*AUTOARG*/
+   // Outputs
+   so, ifu_lsu_pcxpkt_e, ifq_fdp_fill_inst, ifq_erb_asidata_i2, 
+   ifd_inv_ifqop_i2, ifq_icd_index_bf, ifq_icd_wrdata_i2, 
+   ifq_ict_wrtag_f, ifq_erb_wrindex_f, ifq_icd_wrway_bf, 
+   ifd_ifc_milhit_s, ifd_ifc_instoffset0, ifd_ifc_instoffset1, 
+   ifd_ifc_instoffset2, ifd_ifc_instoffset3, ifd_ifc_cpxthr_nxt, 
+   ifd_ifc_cpxreq_nxt, ifd_ifc_cpxreq_i1, ifd_ifc_destid0, 
+   ifd_ifc_destid1, ifd_ifc_destid2, ifd_ifc_destid3, 
+   ifd_ifc_newdestid_s, ifd_ifc_pcxline_d, ifd_ifc_asi_vachklo_i2, 
+   ifd_ifc_cpxvld_i2, ifd_ifc_asiaddr_i2, ifd_ifc_iobpkt_i2, 
+   ifd_ifc_fwd2ic_i2, ifd_ifc_4bpkt_i2, ifd_ifc_cpxnc_i2, 
+   ifd_ifc_cpxce_i2, ifd_ifc_cpxue_i2, ifd_ifc_cpxms_i2, 
+   ifd_ifc_miladdr4_i2, ifd_inv_wrway_i2, 
+   // Inputs
+   rclk, se, si, lsu_ifu_cpxpkt_i1, lsu_ifu_asi_addr, 
+   lsu_ifu_stxa_data, itlb_ifq_paddr_s, fdp_ifq_paddr_f, 
+   ifc_ifd_reqvalid_e, ifc_ifd_filladdr4_i2, ifc_ifd_repway_s, 
+   ifc_ifd_uncached_e, ifc_ifd_thrid_e, ifc_ifd_pcxline_adj_d, 
+   ifc_ifd_errinv_e, ifc_ifd_ldmil_sel_new, ifc_ifd_ld_inq_i1, 
+   ifc_ifd_idx_sel_fwd_i2, ifc_ifd_milreq_sel_d_l, 
+   ifc_ifd_milfill_sel_i2_l, ifc_ifd_finst_sel_l, 
+   ifc_ifd_ifqbyp_sel_fwd_l, ifc_ifd_ifqbyp_sel_inq_l, 
+   ifc_ifd_ifqbyp_sel_asi_l, ifc_ifd_ifqbyp_sel_lsu_l, 
+   ifc_ifd_ifqbyp_en_l, ifc_ifd_addr_sel_bist_i2_l, 
+   ifc_ifd_addr_sel_asi_i2_l, ifc_ifd_addr_sel_old_i2_l, 
+   ifc_ifd_addr_sel_fill_i2_l, mbist_icache_way, mbist_icache_word, 
+   mbist_icache_index
+   );
+
+   input 	 rclk, 
+           se, 
+           si;
+   
+   input [`CPX_WIDTH-1:0] lsu_ifu_cpxpkt_i1;
+   input [17:0]   lsu_ifu_asi_addr;
+   input [47:0]   lsu_ifu_stxa_data;
+   
+   input [39:10]  itlb_ifq_paddr_s;
+   input [9:2]    fdp_ifq_paddr_f;
+   
+   // from ifqctl
+   input         ifc_ifd_reqvalid_e;
+   input         ifc_ifd_filladdr4_i2;
+   input [1:0]   ifc_ifd_repway_s;
+   input         ifc_ifd_uncached_e;
+   input [1:0]   ifc_ifd_thrid_e;
+   input [4:2]   ifc_ifd_pcxline_adj_d;
+   
+   input         ifc_ifd_errinv_e;
+   
+   // 2:1 mux selects
+   input [3:0]   ifc_ifd_ldmil_sel_new;  // mil load enable
+   
+   input        ifc_ifd_ld_inq_i1;        // ld new cpxreq to in buffer
+   input        ifc_ifd_idx_sel_fwd_i2;
+   
+   // other mux selects
+   input [3:0]  ifc_ifd_milreq_sel_d_l,   // selects outgoing mil_req
+		            ifc_ifd_milfill_sel_i2_l; // selects the mil entry just
+	 // returned from the fill
+	 // port
+   input [3:0]  ifc_ifd_finst_sel_l;    // address to load to thr IR
+
+   input        ifc_ifd_ifqbyp_sel_fwd_l, // select next input to process
+		            ifc_ifd_ifqbyp_sel_inq_l,
+		            ifc_ifd_ifqbyp_sel_asi_l,
+		            ifc_ifd_ifqbyp_sel_lsu_l;
+	 input        ifc_ifd_ifqbyp_en_l;   
+
+   input        ifc_ifd_addr_sel_bist_i2_l,
+		            ifc_ifd_addr_sel_asi_i2_l,
+                ifc_ifd_addr_sel_old_i2_l,
+		            ifc_ifd_addr_sel_fill_i2_l;
+   
+   input [1:0]  mbist_icache_way;
+   input        mbist_icache_word;
+   input [7:0]  mbist_icache_index;
+   
+   output       so;
+   
+   output [51:0] ifu_lsu_pcxpkt_e;
+
+   output [32:0] ifq_fdp_fill_inst;
+   output [47:0] ifq_erb_asidata_i2;
+
+   output [`CPX_WIDTH-1:0] ifd_inv_ifqop_i2;
+
+   output [`IC_IDX_HI:2]  ifq_icd_index_bf;   // index for wr and bist
+   
+   output [135:0]         ifq_icd_wrdata_i2;
+   output [`IC_TAG_SZ:0]  ifq_ict_wrtag_f;      // fill tag
+//   output [`IC_TAG_SZ-1:0] ifq_erb_wrtag_f;      // tag w/o parity
+   output [`IC_IDX_HI:4]   ifq_erb_wrindex_f;
+   output [1:0]            ifq_icd_wrway_bf;     // fill data way
+   
+   output [3:0]           ifd_ifc_milhit_s;     // if an Imiss hits in MIL
+//   output [7:0]           ifd_ifc_mil_repway_s;
+   
+   output [1:0]           ifd_ifc_instoffset0;   // to select inst to TIR
+   output [1:0]           ifd_ifc_instoffset1;   // to select inst to TIR
+   output [1:0]           ifd_ifc_instoffset2;   // to select inst to TIR
+   output [1:0]           ifd_ifc_instoffset3;   // to select inst to TIR
+
+   output [1:0]            ifd_ifc_cpxthr_nxt;
+   output [3:0]            ifd_ifc_cpxreq_nxt;    // cpx reqtype + vbit
+   output [`CPX_RQ_SIZE:0] ifd_ifc_cpxreq_i1;    // cpx reqtype + vbit
+
+   
+   output [2:0]            ifd_ifc_destid0,
+		                       ifd_ifc_destid1,
+		                       ifd_ifc_destid2,
+		                       ifd_ifc_destid3,
+		                       ifd_ifc_newdestid_s;
+
+   output [4:2]            ifd_ifc_pcxline_d;
+
+   output                  ifd_ifc_asi_vachklo_i2;
+                         
+   output                  ifd_ifc_cpxvld_i2;
+   output [3:2]            ifd_ifc_asiaddr_i2;   
+   output                  ifd_ifc_iobpkt_i2;
+   output                  ifd_ifc_fwd2ic_i2;
+   output                  ifd_ifc_4bpkt_i2;
+   output                  ifd_ifc_cpxnc_i2;
+   output                  ifd_ifc_cpxce_i2,
+		                       ifd_ifc_cpxue_i2,
+                           ifd_ifc_cpxms_i2;
+   
+   output [3:0]            ifd_ifc_miladdr4_i2;
+   
+   output [1:0]            ifd_inv_wrway_i2;
+
+   
+ 	 
+   //----------------------------------------------------------------------
+   // Declarations
+   //----------------------------------------------------------------------   
+
+   // local signals
+   wire [39:0]             imiss_paddr_s;
+   wire [9:2]              lcl_paddr_s;
+   
+   wire [42:2]             mil_entry0,         // mil entries
+		                       mil_entry1,
+		                       mil_entry2,
+		                       mil_entry3;
+
+//   wire [42:2]             mil0_in_s,          // inputs to mil
+//		                       mil1_in_s,
+//		                       mil2_in_s,
+//		                       mil3_in_s;
+
+   wire                    tag_par_s,
+		                       tag_par_i2;
+
+   wire [42:2]             newmil_entry_s;
+   
+   wire [42:2]             mil_pcxreq_d,        // outgoing request from mil
+		                       pcxreq_d,            // mil or direct ic or prev req
+		                       pcxreq_e;          // outgoing request to lsu
+
+   wire [42:2]             fill_addr_i2,
+		                       fill_addr_adj,
+		                       icaddr_i2,
+		                       asi_addr_i2,
+		                       bist_addr_i2;
+
+   wire [42:4]             wraddr_f;
+
+
+   wire [`CPX_WIDTH-1:0]   inq_cpxpkt_i1,   // output from inq
+//			                     inq_cpxpkt_nxt,
+			                     stxa_data_pkt,
+                           fwd_data_pkt,
+			                     ifqop_i1,
+			                     ifqop_i2;        // ifq op currently being processed
+
+   wire [3:0]              swc_i2;
+   
+   wire [135:0]            icdata_i2;
+   
+   wire [3:0]              parity_i2,
+		                       par_i2;
+
+   wire [17:0]             asi_va_i2,
+                           asi_va_i1;
+   wire [13:2]             asi_fwd_index;
+   wire                    clk;
+   
+   
+//   wire [`IC_IDX_HI:6]     inv_addr_i2;
+   
+   //
+   // Code start here 
+   //
+
+   assign                  clk = rclk;
+   
+   //----------------------------------------------------------------------
+   // Instruction Miss - Fill Request Datapath
+   //----------------------------------------------------------------------
+
+   // new set of flops
+   dff_s #(8) pcs_reg(.din (fdp_ifq_paddr_f[9:2]),
+                    .q   (lcl_paddr_s[9:2]),
+                    .clk (clk), .se(se), .si(), .so());
+                    
+
+   // bits 1:0 are floating
+   assign  imiss_paddr_s = {itlb_ifq_paddr_s[39:10], 
+                            lcl_paddr_s[9:2],
+                            2'b0};
+   
+   // Check for hit in MIL
+   // Should we enable the comps to save power? -- timing problem
+
+   // compare only top 35 bits (bot 5 bits are line offset of 32B line)
+   sparc_ifu_cmp35 milcmp0 (.hit (ifd_ifc_milhit_s[0]),
+			                      .a (imiss_paddr_s[39:5]),
+			                      .b (mil_entry0[39:5]),
+			                      .valid (1'b1)
+			                      );
+   
+   sparc_ifu_cmp35 milcmp1 (.hit (ifd_ifc_milhit_s[1]),
+			                      .a (imiss_paddr_s[39:5]),
+			                      .b (mil_entry1[39:5]),
+			                      .valid (1'b1)
+			                      );
+   
+   sparc_ifu_cmp35 milcmp2 (.hit (ifd_ifc_milhit_s[2]),
+			                      .a (imiss_paddr_s[39:5]),
+			                      .b (mil_entry2[39:5]),
+			                      .valid (1'b1)
+			                      );
+   sparc_ifu_cmp35 milcmp3 (.hit (ifd_ifc_milhit_s[3]),
+			                      .a (imiss_paddr_s[39:5]),
+			                      .b (mil_entry3[39:5]),
+			                      .valid (1'b1)
+			                      );
+
+   // Send replacement way to ctl logic
+//   assign  ifd_ifc_mil_repway_s =  {mil_entry3[41:40],
+//	                                  mil_entry2[41:40],
+//	                                  mil_entry1[41:40],
+//	                                  mil_entry0[41:40]};
+
+
+   // calculate tag parity
+   sparc_ifu_par32 tag_par(.in  ({{`ICT_FILL_BITS{1'b0}}, 
+                                  imiss_paddr_s[`IC_TAG_HI:`IC_TAG_LO]}),
+			                     .out (tag_par_s));
+
+
+   // Missed Instruction List
+   // 43    - NOT cacheable
+   // 42    - tag parity
+   // 41:40 - repl way
+   // 39:0  - paddr
+
+   // Prepare Missed Instruction List entry
+   assign  newmil_entry_s = {tag_par_s,
+			                       ifc_ifd_repway_s,			    
+			                       imiss_paddr_s[39:2]};
+
+   // ldmil_sel is thr_s[3:0] & imiss_s
+//   dp_mux2es  #(41)    milin_mux0(.dout (mil0_in_s),
+//				                          .in0  (mil_entry0), 
+//				                          .in1  (newmil_entry_s),
+//				                          .sel  (ifc_ifd_ldmil_sel_new[0]));
+//   dp_mux2es  #(41)    milin_mux1(.dout (mil1_in_s),
+//				                        .in0  (mil_entry1), 
+//				                        .in1  (newmil_entry_s),
+//				                        .sel  (ifc_ifd_ldmil_sel_new[1]));
+//   dp_mux2es  #(41)    milin_mux2(.dout (mil2_in_s),
+//				                        .in0  (mil_entry2), 
+//				                        .in1  (newmil_entry_s),
+//				                        .sel  (ifc_ifd_ldmil_sel_new[2]));
+//   dp_mux2es  #(41)    milin_mux3(.dout (mil3_in_s),
+//				                        .in0  (mil_entry3), 
+//				                        .in1  (newmil_entry_s),
+//				                        .sel  (ifc_ifd_ldmil_sel_new[3]));
+
+   wire    clk_mil0;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenmil0(.rclk (rclk),
+                              .clk  (clk_mil0),
+                              .en_l (~ifc_ifd_ldmil_sel_new[0]),
+                              .tm_l (~se));
+`endif
+   wire    clk_mil1;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenmil1(.rclk (rclk),
+                              .clk  (clk_mil1),
+                              .en_l (~ifc_ifd_ldmil_sel_new[1]),
+                              .tm_l (~se));
+`endif
+   wire    clk_mil2;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenmil2(.rclk (rclk),
+                              .clk  (clk_mil2),
+                              .en_l (~ifc_ifd_ldmil_sel_new[2]),
+                              .tm_l (~se));
+`endif
+   wire    clk_mil3;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenmil3(.rclk (rclk),
+                              .clk  (clk_mil3),
+                              .en_l (~ifc_ifd_ldmil_sel_new[3]),
+                              .tm_l (~se));
+`endif
+   
+
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(41)   mil0(.din  (newmil_entry_s), 
+		                .en (~(~ifc_ifd_ldmil_sel_new[0])), .clk(rclk), 
+		                .q    (mil_entry0), 
+		                .se   (se), .si(), .so());
+`else
+   dff_s #(41)   mil0(.din  (newmil_entry_s), 
+		                .clk  (clk_mil0), 
+		                .q    (mil_entry0), 
+		                .se   (se), .si(), .so());
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(41)   mil1(.din (newmil_entry_s), 
+		                .en (~(~ifc_ifd_ldmil_sel_new[1])), .clk(rclk), 
+		                .q   (mil_entry1), 
+		                .se  (se), .si(), .so());
+`else
+   dff_s #(41)   mil1(.din (newmil_entry_s), 
+		                .clk (clk_mil1), 
+		                .q   (mil_entry1), 
+		                .se  (se), .si(), .so());
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(41)   mil2(.din (newmil_entry_s), 
+		                .en (~(~ifc_ifd_ldmil_sel_new[2])), .clk(rclk), 
+		                .q   (mil_entry2), 
+		                .se  (se), .si(), .so());
+`else
+   dff_s #(41)   mil2(.din (newmil_entry_s), 
+		                .clk (clk_mil2), 
+		                .q   (mil_entry2), 
+		                .se  (se), .si(), .so());
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(41)   mil3(.din (newmil_entry_s), 
+		                .en (~(~ifc_ifd_ldmil_sel_new[3])), .clk(rclk), 
+		                .q   (mil_entry3), 
+		                .se  (se), .si(), .so());
+`else
+   dff_s #(41)   mil3(.din (newmil_entry_s), 
+		                .clk (clk_mil3), 
+		                .q   (mil_entry3), 
+		                .se  (se), .si(), .so());
+`endif
+
+   assign  ifd_ifc_newdestid_s = {imiss_paddr_s[39], 
+				                          imiss_paddr_s[`BANK_ID_HI:`BANK_ID_LO]};
+   assign  ifd_ifc_destid0 = {mil_entry0[39], 
+			                        mil_entry0[`BANK_ID_HI:`BANK_ID_LO]};
+   assign  ifd_ifc_destid1 = {mil_entry1[39], 
+			                        mil_entry1[`BANK_ID_HI:`BANK_ID_LO]};
+   assign  ifd_ifc_destid2 = {mil_entry2[39], 
+			                        mil_entry2[`BANK_ID_HI:`BANK_ID_LO]};
+   assign  ifd_ifc_destid3 = {mil_entry3[39], 
+			                        mil_entry3[`BANK_ID_HI:`BANK_ID_LO]};
+
+   assign  ifd_ifc_instoffset0 = mil_entry0[3:2];
+   assign  ifd_ifc_instoffset1 = mil_entry1[3:2];
+   assign  ifd_ifc_instoffset2 = mil_entry2[3:2];
+   assign  ifd_ifc_instoffset3 = mil_entry3[3:2];
+   
+   
+   // MIL Request Out mux
+   dp_mux4ds  #(41)  milreq_mux (.dout (mil_pcxreq_d),
+			                         .in0  ({mil_entry0[42:2]}),
+			                         .in1  ({mil_entry1[42:2]}),
+			                         .in2  ({mil_entry2[42:2]}),
+			                         .in3  ({mil_entry3[42:2]}),
+			                         .sel0_l  (ifc_ifd_milreq_sel_d_l[0]),
+			                         .sel1_l  (ifc_ifd_milreq_sel_d_l[1]),
+			                         .sel2_l  (ifc_ifd_milreq_sel_d_l[2]),
+			                         .sel3_l  (ifc_ifd_milreq_sel_d_l[3]));
+   
+   // Next PCX Request Mux
+//   dp_mux3ds  #(44)  nxtpcx_mux (.dout  (pcxreq_d),
+//			                         .in0   (mil_pcxreq_d), 
+//			                         .in1   (44'bx),
+//			                         .in2   (pcxreq_e),
+//			                         .sel0_l  (ifc_ifd_nxtpcx_sel_new_d_l),
+//			                         .sel1_l  (ifc_ifd_nxtpcx_sel_err_d_l),
+//			                         .sel2_l  (ifc_ifd_nxtpcx_sel_prev_d_l));
+
+
+   // TBD: If destid == any L2 bank, need to zero out bit 4 for Rams
+   //    -- done
+   assign  ifd_ifc_pcxline_d[4:2] = mil_pcxreq_d[4:2];
+   
+   assign  pcxreq_d[42:5] = mil_pcxreq_d[42:5];
+   assign  pcxreq_d[4:2] = ifc_ifd_pcxline_adj_d[4:2];
+//   assign  pcxreq_d[1:0] = mil_pcxreq_d[1:0];  // dont need this
+
+   dff_s #(41) pcxreq_reg (.din  (pcxreq_d),
+			                    .clk  (clk),
+			                    .q    (pcxreq_e),
+			                    .se   (se), .si(), .so());
+
+// CHANGE to regular dff   
+//   dffe #(44) pcxreq_reg (.din  (pcxreq_d),
+//			                    .clk  (clk),
+//			                    .q    (pcxreq_e),
+//                          .en   (ifc_ifd_nxtpcx_sel_new_d),
+//			                    .se   (se), .si(), .so());
+   
+   // PCX Req Reg -- req type is 5 bits
+   assign   ifu_lsu_pcxpkt_e = {ifc_ifd_reqvalid_e,   // 51    - valid
+			                          ifc_ifd_errinv_e,     // 50 - inv all ways
+                                ifc_ifd_uncached_e,   // 49 - not cacheable
+			                          {`IMISS_RQ},          // 48:44 - req type
+			                          pcxreq_e[41:40],      // 43:42 - rep way
+			                          ifc_ifd_thrid_e[1:0], // 41:40 - thrid
+			                          pcxreq_e[39:2],       // 39:2  - word address
+			                          2'b0};                // force to zero
+   
+
+   //----------------------------------------------------------------------
+   // Fill Return Address
+   //----------------------------------------------------------------------
+
+   // MIL Fill Return Mux
+   dp_mux4ds  #(41)  milfill_mux(.dout (fill_addr_i2),
+			                         .in0 ( mil_entry0),
+			                         .in1 ( mil_entry1),
+			                         .in2 ( mil_entry2),
+			                         .in3 ( mil_entry3),
+			                         .sel0_l (ifc_ifd_milfill_sel_i2_l[0]),
+			                         .sel1_l (ifc_ifd_milfill_sel_i2_l[1]),
+			                         .sel2_l (ifc_ifd_milfill_sel_i2_l[2]),
+			                         .sel3_l (ifc_ifd_milfill_sel_i2_l[3]));
+
+   assign   ifd_ifc_miladdr4_i2[3:0]  = {mil_entry3[4],
+                                         mil_entry2[4],
+                                         mil_entry1[4],
+                                         mil_entry0[4]};
+   
+   assign   ifd_ifc_iobpkt_i2 = fill_addr_i2[39];
+   assign   fill_addr_adj = {fill_addr_i2[42:5], 
+			                       ifc_ifd_filladdr4_i2,
+			                       fill_addr_i2[3:2]};
+   // determine if this is cacheable in I$
+   // moved to ifqctl
+//   assign   ifd_ifc_uncached_i2 = fill_addr_i2[43];
+
+   // merged with addren mux to save some timing
+   dp_mux4ds #(41) icadr_mux(.dout (icaddr_i2),
+			                       .in0  (fill_addr_adj),
+			                       .in1  (asi_addr_i2),
+			                       .in2  (bist_addr_i2),
+                             .in3  ({wraddr_f[42:4], 2'b0}),
+			                       .sel0_l (ifc_ifd_addr_sel_fill_i2_l),
+			                       .sel1_l (ifc_ifd_addr_sel_asi_i2_l),
+			                       .sel2_l (ifc_ifd_addr_sel_bist_i2_l),
+                             .sel3_l (ifc_ifd_addr_sel_old_i2_l));
+   
+   // way, 32B line sel
+   assign ifd_inv_wrway_i2 =  icaddr_i2[41:40];
+
+//   dp_mux2es  #(39)  addren_mux(.dout (wraddr_i2),
+//			                        .in0  (wraddr_f),
+//			                        .in1  (icaddr_i2[42:4]),
+//			                        .sel  (ifc_ifd_ifqadv_i2));
+
+   
+   dff_s #(39) wraddr_reg(.din  (icaddr_i2[42:4]),
+		                    .clk  (clk),
+		                    .q    (wraddr_f[42:4]),
+		                    .se   (se), .si(), .so());
+
+   // tag = parity bit + `IC_TAG_SZ bits of address
+   assign  ifq_erb_wrindex_f = wraddr_f[`IC_IDX_HI:4];
+   assign  ifq_ict_wrtag_f = {wraddr_f[42], wraddr_f[`IC_TAG_HI:`IC_TAG_LO]};
+
+   assign  ifq_icd_index_bf = icaddr_i2[`IC_IDX_HI:2];
+   assign  ifq_icd_wrway_bf = icaddr_i2[41:40];
+
+   //----------------------------------------------------------------------
+   // Fill Return Data
+   //----------------------------------------------------------------------
+   // IFQ-IBUF
+   // inq is the same size as the cpx_width
+   // inq is replaced with a single flop, ibuf
+
+   // ibuf enable mux
+//   dp_mux2es  #(`CPX_WIDTH)  ifqen_mux(.dout (inq_cpxpkt_nxt),
+//				                             .in0 (inq_cpxpkt_i1),
+//				                             .in1 (lsu_ifu_cpxpkt_i1), 
+//				                             .sel (ifc_ifd_ld_inq_i1));
+
+   wire    clk_ibuf1;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenibuf(.rclk (rclk),
+                              .clk  (clk_ibuf1),
+                              .en_l (~ifc_ifd_ld_inq_i1),
+                              .tm_l (~se));
+`endif
+                             
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(`CPX_WIDTH) ibuf(.din (lsu_ifu_cpxpkt_i1),
+			                  .q   (inq_cpxpkt_i1),
+			                  .en (~(~ifc_ifd_ld_inq_i1)), .clk(rclk),
+			                  .se  (se), .si(), .so());
+`else
+   dff_s #(`CPX_WIDTH) ibuf(.din (lsu_ifu_cpxpkt_i1),
+			                  .q   (inq_cpxpkt_i1),
+			                  .clk (clk_ibuf1),
+			                  .se  (se), .si(), .so());
+`endif
+
+   assign  ifd_ifc_cpxreq_i1 = {inq_cpxpkt_i1[`CPX_VLD], 
+			                          inq_cpxpkt_i1[`CPX_REQFIELD]};
+
+   // ifq operand bypass mux
+   // fill pkt is 128d+2w+2t+3iw+1v+1nc+4r = 140
+   dp_mux4ds  #(`CPX_WIDTH)  ifq_bypmux(.dout (ifqop_i1),
+				                              .in0 (fwd_data_pkt), 
+				                              .in1 (inq_cpxpkt_i1),
+				                              .in2 (stxa_data_pkt),
+				                              .in3 (lsu_ifu_cpxpkt_i1),
+				                              .sel0_l (ifc_ifd_ifqbyp_sel_fwd_l),
+				                              .sel1_l (ifc_ifd_ifqbyp_sel_inq_l),
+				                              .sel2_l (ifc_ifd_ifqbyp_sel_asi_l),
+				                              .sel3_l (ifc_ifd_ifqbyp_sel_lsu_l));
+   
+   wire    clk_ifqop;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   bw_u1_ckenbuf_6x  ckenifop(.rclk (rclk),
+                              .clk  (clk_ifqop),
+                              .en_l (ifc_ifd_ifqbyp_en_l),
+                              .tm_l (~se));
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(`CPX_WIDTH)  ifqop_reg(.din (ifqop_i1), 
+			                        .q   (ifqop_i2), 
+			                        .en (~(ifc_ifd_ifqbyp_en_l)), .clk(rclk), 
+			                        .se  (se), .si(), .so());
+`else
+   dff_s #(`CPX_WIDTH)  ifqop_reg(.din (ifqop_i1), 
+			                        .q   (ifqop_i2), 
+			                        .clk (clk_ifqop), 
+			                        .se  (se), .si(), .so());
+`endif
+   assign  ifd_inv_ifqop_i2 = ifqop_i2;
+   
+   // switch condition pre decode
+   sparc_ifu_swpla  swpla0(.in  (ifqop_i2[31:0]),
+			                     .out (swc_i2[0]));
+   sparc_ifu_swpla  swpla1(.in  (ifqop_i2[63:32]),
+			                     .out (swc_i2[1]));
+   sparc_ifu_swpla  swpla2(.in  (ifqop_i2[95:64]),
+			                     .out (swc_i2[2]));
+   sparc_ifu_swpla  swpla3(.in  (ifqop_i2[127:96]),
+			                     .out (swc_i2[3]));
+
+   // Add Parity to each inst.
+   sparc_ifu_par32 par0(.in  (ifqop_i2[31:0]),
+			                  .out (par_i2[0]));
+   sparc_ifu_par32 par1(.in  (ifqop_i2[63:32]),
+			                  .out (par_i2[1]));
+   sparc_ifu_par32 par2(.in  (ifqop_i2[95:64]),
+			                  .out (par_i2[2]));
+   sparc_ifu_par32 par3(.in  (ifqop_i2[127:96]),
+			                  .out (par_i2[3]));
+
+   // add 8 xor gates in the dp
+   //   assign parity_i2 = par_i2 ^ swc_i2 ^ {4{ifc_ifd_insert_pe}};
+   //   assign tag_par_i2 = par_i2[0] ^ ifc_ifd_insert_pe;
+
+   // Make the par32 cell above, par33 and include cpxue_i2
+   assign   parity_i2 = par_i2 ^ swc_i2 ^ {4{ifd_ifc_cpxue_i2}};
+   assign   tag_par_i2 = par_i2[0] ^ ifd_ifc_cpxue_i2;
+   
+   // parity, swc, inst[31:0]
+   assign   icdata_i2 = {parity_i2[3], ifqop_i2[127:96], swc_i2[3],
+		                     parity_i2[2], ifqop_i2[95:64],  swc_i2[2],
+		                     parity_i2[1], ifqop_i2[63:32],  swc_i2[1],
+		                     parity_i2[0], ifqop_i2[31:0],   swc_i2[0]};
+
+   // write data to icache
+   assign ifq_icd_wrdata_i2 = icdata_i2;
+
+
+   // very critical
+   assign ifd_ifc_cpxreq_nxt   = ifqop_i1[`CPX_REQFIELD];
+   assign ifd_ifc_cpxthr_nxt   = ifqop_i1[`CPX_THRFIELD];
+
+   assign ifd_ifc_cpxvld_i2   = ifqop_i2[`CPX_VLD];
+   assign ifd_ifc_4bpkt_i2    = ifqop_i2[`CPX_IF4B];
+   assign ifd_ifc_cpxce_i2    = ifqop_i2[`CPX_ERR_LO];
+   assign ifd_ifc_cpxue_i2    = ifqop_i2[(`CPX_ERR_LO + 1)];
+   assign ifd_ifc_cpxms_i2    = ifqop_i2[(`CPX_ERR_LO + 2)];
+   assign ifd_ifc_cpxnc_i2    = ifqop_i2[`CPX_NC];
+   assign ifd_ifc_fwd2ic_i2   = ifqop_i2[103];
+
+   // instr sel mux to write to thread inst regsiter in S stage
+   // instr is always BIG ENDIAN
+   dp_mux4ds  #(33)  fillinst_mux(.dout (ifq_fdp_fill_inst),
+				                        .in0 (icdata_i2[134:102]),
+				                        .in1 (icdata_i2[100:68]),
+				                        .in2 (icdata_i2[66:34]),
+				                        .in3 (icdata_i2[32:0]),
+				                        .sel0_l (ifc_ifd_finst_sel_l[0]),
+				                        .sel1_l (ifc_ifd_finst_sel_l[1]),
+				                        .sel2_l (ifc_ifd_finst_sel_l[2]),
+				                        .sel3_l (ifc_ifd_finst_sel_l[3]));
+
+   // synopsys translate_off
+//`ifdef DEFINE_0IN
+//`else
+//   always @ (ifq_fdp_fill_inst or ifd_ifc_cpxreq_i2)
+//     if (((^ifq_fdp_fill_inst[32:0]) == 1'bx) && (ifd_ifc_cpxreq_i2 == `CPX_IFILLPKT))
+//       begin
+//          $display("ifqdp.v: Imiss Return val = %h\n", ifqop_i2);
+//          $error("IFQCPX", "Error: X's detected in Imiss Return Inst %h", 
+//                 ifq_fdp_fill_inst[31:0]);
+//       end
+//`endif
+   // synopsys translate_on
+   
+   
+   // TBD: 1. inv way in fill pkt -- DONE
+   //      2. inv packet -- DONE
+   //      3. DFT pkt from TAP -- NO NEED
+   //      4. Ld pkt to invalidate i$  -- DONE
+
+   //----------------------------------------------------------------------
+   // ASI Access
+   //----------------------------------------------------------------------
+   // mux stxa pkt into the cpx
+   assign  stxa_data_pkt[`CPX_VLD] = 1'b0;
+   // vbits and parity are muxed into the cpxreq
+   assign  stxa_data_pkt[`CPX_REQFIELD] = {1'b1, lsu_ifu_stxa_data[34:32]}; 
+//   assign  stxa_data_pkt[`CPX_THRFIELD] = lsu_ifu_asi_thrid[1:0];
+   assign  stxa_data_pkt[`CPX_THRFIELD] = 2'b0;   
+   // use parity to insert error in icache inst or tag
+   assign  stxa_data_pkt[(`CPX_ERR_LO + 1)] = lsu_ifu_stxa_data[32];   
+   assign  stxa_data_pkt[127:0] = {4{lsu_ifu_stxa_data[31:0]}};
+
+   // other bits need to be tied off
+   assign  stxa_data_pkt[133:128] = 6'b0;   
+   assign  stxa_data_pkt[137:136] = 2'b0;   
+   assign  stxa_data_pkt[139] = 1'b0;
+
+   // format fwd data pkt in a similar way
+   assign  fwd_data_pkt[`CPX_VLD:(`CPX_ERR_LO + 2)] = ifqop_i2[`CPX_VLD:(`CPX_ERR_LO + 2)];
+   assign  fwd_data_pkt[(`CPX_ERR_LO + 1)] = ifqop_i2[32];   
+   assign  fwd_data_pkt[`CPX_ERR_LO:128] = ifqop_i2[`CPX_ERR_LO:128];
+   assign  fwd_data_pkt[127:0] = {4{ifqop_i2[31:0]}};
+   
+
+   
+   dff_s #(16) stxa_ff(.din (lsu_ifu_stxa_data[47:32]),
+		                 .q   (ifq_erb_asidata_i2[47:32]), 
+		                 .clk (clk), .se(se), .si(), .so());
+   assign  ifq_erb_asidata_i2[31:0] = ifqop_i2[31:0];
+
+   // va[63:32] is truncated
+   // In this architecture we only need va[17:0]
+   // rest of the bits ar ehere only for the address range check
+   // 12 new muxes (10 for addr, 2 for way)
+   // CHANGE: this mux has been moved before the asi_addr_reg, rather
+   // than after.
+   // Use mux flop soffm2?
+   dp_mux2es #(12) asifwd_mx(.dout (asi_fwd_index[13:2]),
+                             .in0  ({lsu_ifu_asi_addr[17:16],   // asi way
+                                     lsu_ifu_asi_addr[12:3]}),  // asi addr
+                             .in1  ({ifqop_i2[81:80],    // fwd rq way
+                                     ifqop_i2[76:67]}),  // fwd rq addr
+                             .sel  (ifc_ifd_idx_sel_fwd_i2));
+
+   assign asi_va_i1 = {asi_fwd_index[13:12],
+                       lsu_ifu_asi_addr[15:13],
+                       asi_fwd_index[11:2],
+                       lsu_ifu_asi_addr[2:0]};
+   
+   dff_s #(18) asi_addr_reg(.din (asi_va_i1[17:0]),  // 15:13 is not used
+			                    .q   (asi_va_i2[17:0]),
+			                    .clk (clk),
+			                    .se  (se), .si(), .so());
+
+   // 16b zero cmp: leave out bit 3!! (imask is 0x8)
+   assign  ifd_ifc_asi_vachklo_i2 = (|asi_va_i2[16:4]) | (|asi_va_i2[2:0]);
+
+   // mux in ifqop and asi_va_i2 to create new asi va?
+   // asi va is shifted by 1 bit to look like 64b op
+   assign    ifd_ifc_asiaddr_i2[3:2] = asi_va_i2[4:3];
+
+   assign    asi_addr_i2 = {tag_par_i2,           // tag parity 42
+			                      asi_va_i2[17:16],     // way 41:40
+			                      ifqop_i2[27:0],       // tag 39:12
+			                      asi_va_i2[12:3]       // index 11:2
+                            };                 
+
+   // bist has to go to icache in the same cycle
+   // cannot flop it
+   assign    bist_addr_i2 = {1'b0,                    // par
+			                       mbist_icache_way[1:0],   // way 41:40
+			                       28'b0,                   // tag 39:12
+			                       mbist_icache_index[7:0], // index 11:4
+                             mbist_icache_word,       // 3
+			                       1'b0
+			                       };
+
+   // floating signals
+   sink #(2) s0(.in (imiss_paddr_s[1:0]));
+   sink s1(.in (pcxreq_e[42]));
+   sink s2(.in (fill_addr_i2[4]));
+   
+   
+endmodule // sparc_ifu_ifqdp
+
+
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_thrfsm.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_thrfsm.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_thrfsm.v	(revision 6)
@@ -0,0 +1,205 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_thrfsm.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_swlthrfsm
+//  Description:	
+//  The switch logithrfsm contains the thread state machine.  
+*/
+
+`include "ifu.h"
+
+module sparc_ifu_thrfsm(/*AUTOARG*/
+   // Outputs
+   so, thr_state, 
+   // Inputs
+   completion, schedule, spec_ld, ldhit, stall, int_activate, 
+   start_thread, thaw_thread, nuke_thread, rst_thread, switch_out, 
+   halt_thread, sw_cond, clk, se, si, reset
+   );
+
+   // thread specific input
+   input  completion,   // the op this thread was waiting for is complete
+	        schedule,     // this thread was just switched in
+	        spec_ld,      // speculative switch in
+	        ldhit,        // speculation was correct
+	        stall,        // stall thread for ldmiss, imiss or trap
+	        int_activate, // activate this thread
+          halt_thread,
+	        start_thread,    // wake up this thread from dead state
+	        nuke_thread,
+          thaw_thread,
+	        rst_thread;      // reset this thread
+
+   // common inputs
+   input  switch_out,   // this thread was just switched out
+	        sw_cond;	// wait until completion signal is received
+
+   input       clk, se, si, reset;
+
+   output      so;
+
+   output [4:0] thr_state;
+
+   // local signals
+   reg [4:0]    next_state;
+   
+   //
+   // Code Begins Here
+   //
+   
+//   assign       spec_rdy     = thr_state[`TCR_READY];
+
+   always @ (/*AUTOSENSE*/ completion
+             or halt_thread or int_activate or ldhit or nuke_thread
+             or rst_thread or schedule or spec_ld or stall
+             or start_thread or sw_cond or switch_out or thaw_thread 
+             or thr_state)
+     begin
+	      case (thr_state[4:0])
+          `THRFSM_IDLE:  // 5'b00000
+	          begin
+	             if (rst_thread | thaw_thread)
+		             next_state = `THRFSM_WAIT;
+	             else if (start_thread)    
+		             next_state = `THRFSM_RDY;
+	             else  // all other interrupts ignored
+		             next_state = thr_state[4:0];
+	          end
+
+	        `THRFSM_HALT:  // 5'b00010
+	          begin
+	             if (nuke_thread)
+		             next_state = `THRFSM_IDLE;
+	             else if (rst_thread | thaw_thread)
+		             next_state = `THRFSM_WAIT;
+	             else if (int_activate | start_thread) 
+		             next_state = `THRFSM_RDY;
+	             else
+		             next_state = thr_state[4:0];
+	          end
+	        
+	        `THRFSM_RDY:       // 5'b11001
+	          begin
+	             if (stall)     
+		             // trap also kills inst_s2 and nir
+		             // Ldmiss should not happen in this state
+		             next_state = `THRFSM_WAIT;
+	             else if (schedule)
+		             next_state = `THRFSM_RUN;
+	             else
+		             next_state = thr_state[4:0];
+	          end // case: `THRFSM_RDY
+
+	        `THRFSM_RUN:       // 5'b00101
+	          begin
+	             if (stall | sw_cond)
+		             // trap also kills inst_s2 and nir
+		             // ldmiss should not happen in this state		 
+		             next_state = `THRFSM_WAIT;
+	             else if (switch_out)
+	               // on an interrupt or thread stall, the fcl has to
+	               // switch out the thread and inform the fsm 
+		             next_state = `THRFSM_RDY;
+	             else
+		             next_state = thr_state[4:0];
+	          end // case: `THRFSM_RUN
+
+	        `THRFSM_WAIT:       // 5'b00001
+	          begin
+	             if (nuke_thread) 
+		             next_state = `THRFSM_IDLE;
+	             else if (halt_thread) // exclusive with above
+		             next_state = `THRFSM_HALT;
+	             else if (stall) // excl. with above
+		             next_state = `THRFSM_WAIT;
+	             else if (spec_ld) // exclusive with above
+		             next_state = `THRFSM_SPEC_RDY;
+	             else if (completion & ~halt_thread)
+		             next_state = `THRFSM_RDY;
+	             else
+		             next_state = thr_state[4:0];
+	          end // case: `THRFSM_WAIT
+	        
+	        `THRFSM_SPEC_RDY:       // 5'b10011
+	          begin
+	             if (stall)
+		             next_state = `THRFSM_WAIT;
+	             else if (schedule & ~ldhit) // exclusive
+		             next_state = `THRFSM_SPEC_RUN;
+	             else if (schedule & ldhit)  // exclusive
+		             next_state = `THRFSM_RUN;
+	             else if (ldhit)
+		             next_state = `THRFSM_RDY;
+	             else
+		             next_state = thr_state[4:0];
+	          end // case: `THRFSM_SPEC_RDY
+
+	        `THRFSM_SPEC_RUN:       // 5'b00111
+	          begin
+	             if (stall | sw_cond)
+		             next_state = `THRFSM_WAIT;
+	             else if ((ldhit) & switch_out)
+		             next_state = `THRFSM_RDY;
+	             else if ((ldhit) & ~switch_out)
+		             next_state = `THRFSM_RUN;
+	             else if (~(ldhit) & switch_out)
+		             next_state = `THRFSM_SPEC_RDY;
+	             // on an interrupt or thread stall, the fcl has to
+	             // switch out the thread and inform the fsm 
+	             else
+		             next_state = thr_state[4:0];
+	          end // case: `THRFSM_SPEC_RUN
+
+//VCS coverage off
+	        default:
+	          begin
+               // synopsys translate_off
+		     // 0in <fire -message "thrfsm.v: Error! Invalid State"
+`ifdef DEFINE_0IN
+`else           
+		`ifdef MODELSIM
+	             $display("ILLEGAL_THR_STATE", "thrfsm.v: Error! Invalid State %b\n", thr_state);
+		`else
+	             $error("ILLEGAL_THR_STATE", "thrfsm.v: Error! Invalid State %b\n", thr_state);
+		`endif		 
+`endif		     
+               // synopsys translate_on
+	             if (rst_thread)
+		             next_state = `THRFSM_WAIT;
+	             else if (nuke_thread)
+		             next_state = `THRFSM_IDLE;		 
+	             else 
+		             next_state = thr_state[4:0];
+	          end
+//VCS coverage on
+	      endcase // casex({thr_state[4:0]})
+     end // always @ (...
+
+   // thread config register (tcr)
+   dffr_s #(5) tcr(.din  (next_state),
+	             .clk  (clk),
+	             .q    (thr_state),
+	             .rst  (reset),
+	             .se   (se), .so(), .si());
+
+
+endmodule
Index: /trunk/T1-CPU/ifu/sparc_ifu_fcl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_fcl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_fcl.v	(revision 6)
@@ -0,0 +1,3725 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_fcl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_fcl
+//  Description:	
+//  The FCL is the fetch control logic.  It controls the PC datapath
+//  and the fetch/next instruction datapath.  It also manages access
+//  to the icache data, tags, vbits and to the tlb.
+//  The FCL starts fetching from the reset PC upon reset.  It is up to
+//  the DTU to specify which thread to fetch from.  Only T0 is set to
+//  the reset PC.  If the decode unit specifies any other thread, it
+//  will fetch from an indeterminate address.
+//  The fetch block automatically stalls the machine when an Imiss is
+//  detected and there is no thread to switch to.  
+// 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "ifu.h"
+
+
+module sparc_ifu_fcl(/*AUTOARG*/
+   // Outputs
+   fcl_icd_rdreq_bf, fcl_icv_rdreq_bf, fcl_icd_wrreq_bf, 
+   fcl_ict_wrreq_bf, fcl_icv_wrreq_bf, fcl_icd_index_sel_ifq_bf, 
+   fcl_ifq_grant_bf, fcl_ifq_icmiss_s1, fcl_ifq_rdreq_s1, 
+   fcl_ifq_icache_en_s_l, fcl_ifq_thr_s1, fcl_ifq_canthr, 
+   fcl_itlb_cam_vld_bf, fcl_itlb_cam_bypass_bf, fcl_itlb_addr_mask_l, 
+   fcl_itlb_cam_real_bf, fcl_itlb_cam_pid_bf, fcl_itlb_wr_vld_bf, 
+   fcl_itlb_dmp_vld_bf, fcl_itlb_dmp_all_bf, fcl_itlb_tag_rd_vld_bf, 
+   fcl_itlb_invall_f_l, fcl_itlb_data_rd_vld_bf, fcl_erb_ievld_s1, 
+   fcl_erb_tevld_s1, fcl_erb_immuevld_s1, ifu_lsu_thrid_s, 
+   fcl_erb_asi_tid_f, fcl_erb_clear_iferr, fcl_erb_itlbrd_vld_s, 
+   fcl_erb_itlbrd_data_s, fcl_dec_dslot_s, fcl_dtu_inst_vld_e, 
+   fcl_dtu_intr_vld_e, fcl_dtu_inst_vld_d, fcl_dtu_ely_inst_vld_d, 
+   fcl_dec_intr_vld_d, fcl_erb_inst_issue_d, fcl_erb_inst_vld_d1, 
+   ifu_tlu_inst_vld_m, ifu_exu_inst_vld_e, ifu_exu_inst_vld_w, 
+   ifu_spu_inst_vld_w, ifu_tlu_inst_vld_w, ifu_tlu_flush_w, 
+   ifu_tlu_flush_m, fcl_swl_int_activate_i3, fcl_swl_flush_wake_w, 
+   fcl_swl_flush_w, fcl_dcl_regz_e, ifu_tlu_thrid_e, ifu_tlu_thrid_d, 
+   ifu_tlu_immu_miss_m, ifu_tlu_priv_violtn_m, ifu_tlu_icmiss_e, 
+   ifu_tlu_ttype_vld_m, ifu_exu_ttype_vld_m, ifu_mmu_trap_m, 
+   ifu_tlu_trap_m, ifu_tlu_ttype_m, ifu_tlu_hwint_m, 
+   ifu_tlu_sftint_m, ifu_tlu_rstint_m, fcl_dtu_rst_thr_w, 
+   fcl_dtu_resum_thr_w, ifu_tlu_itlb_done, ifu_spu_trap_ack, 
+   ifu_exu_tid_s2, ifu_exu_ren1_s, ifu_exu_ren2_s, ifu_exu_ren3_s, 
+   ifu_exu_disable_ce_e, fcl_dtu_sync_intr_d, fcl_dtu_tlzero_d, 
+   fcl_dtu_privmode_d, fcl_dtu_hprivmode_d, fcl_dtu_hprivmode_w2, 
+   fcl_dtu_nuke_thr_w, fcl_swl_swout_f, fcl_dtu_stall_bf, 
+   fcl_swl_swcvld_s, fcl_dtu_thr_f, fcl_imd_oddwin_d, 
+   fcl_fdp_oddwin_s, fcl_fdp_pcoor_vec_f, fcl_fdp_pcoor_f, 
+   fcl_fdp_mask32b_f, fcl_fdp_addr_mask_d, fcl_fdp_tctxt_sel_prim, 
+   fcl_fdp_usenir_sel_nir_s1, fcl_fdp_rbinst_sel_inste_s, 
+   fcl_fdp_thrtnpc_sel_tnpc_l, fcl_fdp_thrtnpc_sel_npcw_l, 
+   fcl_fdp_thrtnpc_sel_pcf_l, fcl_fdp_thrtnpc_sel_old_l, 
+   fcl_fdp_thr_s1_l, fcl_fdp_next_thr_bf_l, fcl_fdp_next_ctxt_bf_l, 
+   fcl_fdp_nirthr_s1_l, fcl_fdp_thr_s2_l, 
+   fcl_fdp_tpcbf_sel_pcp4_bf_l, fcl_fdp_tpcbf_sel_brpc_bf_l, 
+   fcl_fdp_tpcbf_sel_trap_bf_l, fcl_fdp_tpcbf_sel_old_bf_l, 
+   fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_swpc_bf_l, 
+   fcl_fdp_pcbf_sel_br_bf_l, fcl_fdp_trrbpc_sel_trap_bf_l, 
+   fcl_fdp_trrbpc_sel_rb_bf_l, fcl_fdp_trrbpc_sel_err_bf_l, 
+   fcl_fdp_trrbpc_sel_pcs_bf_l, fcl_fdp_noswpc_sel_tnpc_l_bf, 
+   fcl_fdp_noswpc_sel_old_l_bf, fcl_fdp_noswpc_sel_inc_l_bf, 
+   fcl_fdp_nextpcs_sel_pce_f_l, fcl_fdp_nextpcs_sel_pcd_f_l, 
+   fcl_fdp_nextpcs_sel_pcs_f_l, fcl_fdp_nextpcs_sel_pcf_f_l, 
+   fcl_fdp_inst_sel_curr_s_l, fcl_fdp_inst_sel_switch_s_l, 
+   fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l, 
+   fcl_fdp_tinst_sel_curr_s_l, fcl_fdp_tinst_sel_rb_s_l, 
+   fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l, 
+   fcl_fdp_dmpthr_l, fcl_fdp_ctxt_sel_dmp_bf_l, 
+   fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l, 
+   fcl_fdp_rdsr_sel_pc_e_l, fcl_fdp_rdsr_sel_thr_e_l, 
+   fcl_fdp_rdsr_sel_ver_e_l, so, ifu_reset_l, 
+   // Inputs
+   rclk, grst_l, arst_l, se, sehold, si, rst_tri_en, 
+   tlu_ifu_flush_pipe_w, exu_ifu_va_oor_m, exu_ifu_oddwin_s, 
+   spu_ifu_ttype_tid_w2, spu_ifu_ttype_vld_w2, spu_ifu_ttype_w2, 
+   erb_fcl_spu_uetrap, exu_ifu_regz_e, dcl_fcl_bcregz0_e, 
+   dcl_fcl_bcregz1_e, dtu_fcl_rollback_g, dtu_fcl_retract_d, 
+   dtu_fcl_br_inst_d, dtu_fcl_sir_inst_e, dtu_fcl_privop_e, 
+   dtu_fcl_fpdis_e, dtu_fcl_imask_hit_e, dtu_fcl_illinst_e, 
+   dtu_fcl_thr_active, dec_fcl_rdsr_sel_pc_d, dec_fcl_rdsr_sel_thr_d, 
+   ifq_fcl_wrreq_bf, ifq_fcl_icd_wrreq_bf, ifq_fcl_ictv_wrreq_bf, 
+   ifq_fcl_rdreq_bf, ifq_fcl_asi_tid_bf, ifq_fcl_asird_bf, 
+   ifq_fcl_invreq_bf, erb_fcl_itlb_ce_d1, erb_dtu_ifeterr_d1, 
+   erb_fcl_ifet_uevec_d1, erb_fcl_ue_trapvec, erb_fcl_ce_trapvec, 
+   dtu_fcl_nextthr_bf, dtu_fcl_ntr_s, dtu_fcl_running_s, 
+   dtu_fcl_flush_sonly_e, fdp_fcl_swc_s2, fdp_fcl_va2_bf, 
+   itlb_fcl_tlbmiss_f_l, itlb_fcl_priv_s1, itlb_fcl_cp_s1, 
+   itlb_fcl_imiss_s_l, fdp_fcl_pc_oor_vec_f, fdp_fcl_pc_oor_e, 
+   fdp_fcl_op_s, fdp_fcl_op3_s, fdp_fcl_ibit_s, lsu_ifu_stallreq, 
+   ffu_ifu_stallreq, ifq_fcl_stallreq, dtu_inst_anull_e, 
+   ifq_fcl_fill_thr, ifq_fcl_flush_sonly_e, tlu_ifu_trap_tid_w1, 
+   tlu_ifu_trappc_vld_w1, tlu_ifu_trapnpc_vld_w1, 
+   tlu_lsu_pstate_priv, tlu_lsu_pstate_am, tlu_hpstate_priv, 
+   tlu_lsu_redmode, tlu_hpstate_enb, lsu_ifu_addr_real_l, 
+   lsu_pid_state0, lsu_pid_state1, lsu_pid_state2, lsu_pid_state3, 
+   lsu_ifu_icache_en, lsu_ifu_dc_parity_error_w2, lsu_ifu_t0_tlz, 
+   lsu_ifu_t1_tlz, lsu_ifu_t2_tlz, lsu_ifu_t3_tlz, tlu_ifu_hwint_i3, 
+   tlu_ifu_pstate_ie, tlu_ifu_sftint_vld, tlu_ifu_hintp_vld, 
+   tlu_ifu_rerr_vld, tlu_ifu_rstthr_i2, tlu_ifu_rstint_i2, 
+   tlu_ifu_resumint_i2, tlu_ifu_nukeint_i2, tlu_itlb_wr_vld_g, 
+   tlu_itlb_dmp_vld_g, tlu_itlb_dmp_all_g, tlu_itlb_data_rd_g, 
+   tlu_itlb_tag_rd_g, tlu_itlb_invalidate_all_g, tlu_fcl_dmp_pid_bf, 
+   tlu_fcl_dmp_real_bf, tlu_idtlb_dmp_thrid_g, exu_ifu_ecc_ce_m, 
+   ffu_ifu_fst_ce_w
+   );
+
+   input  rclk,
+	        grst_l,
+          arst_l,
+	        se,
+          sehold,
+	        si;
+
+   input  rst_tri_en;
+
+
+   input  tlu_ifu_flush_pipe_w;	// flush pipe on a trap
+   input  exu_ifu_va_oor_m;
+   input [3:0] exu_ifu_oddwin_s;
+
+   input [1:0] spu_ifu_ttype_tid_w2;
+   input       spu_ifu_ttype_vld_w2;
+   input       spu_ifu_ttype_w2;
+
+   input [3:0] erb_fcl_spu_uetrap;  // use m3
+   
+//   input       dtu_fcl_brtaken_e;    // branch taken
+   input       exu_ifu_regz_e;
+   input       dcl_fcl_bcregz0_e,
+               dcl_fcl_bcregz1_e;
+   
+   input       dtu_fcl_rollback_g;
+   input       dtu_fcl_retract_d;
+   input       dtu_fcl_br_inst_d;
+   input       dtu_fcl_sir_inst_e;
+   input       dtu_fcl_privop_e,
+		           dtu_fcl_fpdis_e,
+               dtu_fcl_imask_hit_e,
+		           dtu_fcl_illinst_e;
+   input [3:0] dtu_fcl_thr_active;
+
+   input       dec_fcl_rdsr_sel_pc_d,
+		           dec_fcl_rdsr_sel_thr_d;
+   
+   input       ifq_fcl_wrreq_bf;
+   input       ifq_fcl_icd_wrreq_bf,
+		           ifq_fcl_ictv_wrreq_bf,
+		           ifq_fcl_rdreq_bf;
+
+   input [1:0] ifq_fcl_asi_tid_bf;
+   input       ifq_fcl_asird_bf;
+   
+   input       ifq_fcl_invreq_bf;
+
+   input       erb_fcl_itlb_ce_d1;
+   input       erb_dtu_ifeterr_d1;
+   input [3:0] erb_fcl_ifet_uevec_d1,
+		           erb_fcl_ue_trapvec,
+		           erb_fcl_ce_trapvec;   
+   
+   input [3:0] dtu_fcl_nextthr_bf;   // thread to switch to
+   input       dtu_fcl_ntr_s,        // next thread ready for ex
+               dtu_fcl_running_s;
+
+   input       dtu_fcl_flush_sonly_e;
+//               dec_fcl_kill4sta_e;
+
+   input       fdp_fcl_swc_s2,       // instruction switch condition
+		           fdp_fcl_va2_bf,       // bit 2 of vaddr
+		           itlb_fcl_tlbmiss_f_l,  // itlb miss
+		           itlb_fcl_priv_s1,     // privileged access page
+		           itlb_fcl_cp_s1,       // uncached access page
+		           itlb_fcl_imiss_s_l;     // icache miss in s1
+
+   input [3:0] fdp_fcl_pc_oor_vec_f;
+   input       fdp_fcl_pc_oor_e;
+
+   input [1:0] fdp_fcl_op_s;
+   input [5:2] fdp_fcl_op3_s;
+   input       fdp_fcl_ibit_s;
+
+   input       lsu_ifu_stallreq,
+	             ffu_ifu_stallreq,
+               ifq_fcl_stallreq;
+   
+   input       dtu_inst_anull_e;
+
+   input [3:0] ifq_fcl_fill_thr;     // fill inst goes to this
+                                   	 // thread instruction register
+   input       ifq_fcl_flush_sonly_e;
+
+   input [1:0] tlu_ifu_trap_tid_w1;     // thr for which trappc is sent
+   input       tlu_ifu_trappc_vld_w1,   // ld pc on trap or done/retry
+		           tlu_ifu_trapnpc_vld_w1;  // ld Npc for a retry
+
+   input [3:0] tlu_lsu_pstate_priv;     // may need to flop these three
+   input [3:0] tlu_lsu_pstate_am;
+   input [3:0] tlu_hpstate_priv;
+   input [3:0] tlu_lsu_redmode;
+   input [3:0] tlu_hpstate_enb;
+
+   input [3:0] lsu_ifu_addr_real_l;
+   input [2:0] lsu_pid_state0,
+               lsu_pid_state1,
+               lsu_pid_state2,
+               lsu_pid_state3;
+   input [3:0] lsu_ifu_icache_en;
+
+   input       lsu_ifu_dc_parity_error_w2;
+   
+
+//   input       lsu_ifu_flush_ireg;  // not needed any more
+   input       lsu_ifu_t0_tlz,
+ 		           lsu_ifu_t1_tlz,
+ 		           lsu_ifu_t2_tlz,
+ 		           lsu_ifu_t3_tlz;
+
+   input [3:0] tlu_ifu_hwint_i3,        // normal interrupt
+		           tlu_ifu_pstate_ie,
+		           tlu_ifu_sftint_vld,
+		           tlu_ifu_hintp_vld,
+		           tlu_ifu_rerr_vld,
+		           tlu_ifu_rstthr_i2;       // reset or idle interrupt
+
+   input       tlu_ifu_rstint_i2,       // reset to a dead thread
+		           tlu_ifu_resumint_i2,
+		           tlu_ifu_nukeint_i2;
+
+   input       tlu_itlb_wr_vld_g,
+		           tlu_itlb_dmp_vld_g,
+               tlu_itlb_dmp_all_g,
+		           tlu_itlb_data_rd_g,
+		           tlu_itlb_tag_rd_g;
+   input       tlu_itlb_invalidate_all_g;
+
+   input [2:0] tlu_fcl_dmp_pid_bf;
+   input       tlu_fcl_dmp_real_bf;
+   input [1:0] tlu_idtlb_dmp_thrid_g;
+
+   input       exu_ifu_ecc_ce_m;
+   input       ffu_ifu_fst_ce_w;
+   
+   // to icd
+   output      fcl_icd_rdreq_bf,
+               fcl_icv_rdreq_bf,
+		           fcl_icd_wrreq_bf,
+		           fcl_ict_wrreq_bf,
+		           fcl_icv_wrreq_bf;
+
+   output      fcl_icd_index_sel_ifq_bf;
+   output      fcl_ifq_grant_bf;
+   
+   // to ifq
+   output      fcl_ifq_icmiss_s1;  // if icache turned off
+   output      fcl_ifq_rdreq_s1;
+   output      fcl_ifq_icache_en_s_l;
+   
+   output [1:0] fcl_ifq_thr_s1;
+   output [3:0] fcl_ifq_canthr;     // cancel ifetch to this thread
+
+   // to itlb
+   output       fcl_itlb_cam_vld_bf,
+                fcl_itlb_cam_bypass_bf,
+		            fcl_itlb_addr_mask_l,
+                fcl_itlb_cam_real_bf;
+
+   output [2:0] fcl_itlb_cam_pid_bf;
+   
+   output       fcl_itlb_wr_vld_bf,
+		            fcl_itlb_dmp_vld_bf,
+                fcl_itlb_dmp_all_bf,
+		            fcl_itlb_tag_rd_vld_bf,
+		            fcl_itlb_invall_f_l,
+		            fcl_itlb_data_rd_vld_bf;
+   
+   // to erb
+   output       fcl_erb_ievld_s1,
+		            fcl_erb_tevld_s1,
+		            fcl_erb_immuevld_s1;
+
+   output [1:0] ifu_lsu_thrid_s,
+		            fcl_erb_asi_tid_f;
+
+   output [3:0] fcl_erb_clear_iferr;
+   
+
+   output       fcl_erb_itlbrd_vld_s,
+		            fcl_erb_itlbrd_data_s;
+
+   output       fcl_dec_dslot_s;
+   output       fcl_dtu_inst_vld_e,
+                fcl_dtu_intr_vld_e,
+		            fcl_dtu_inst_vld_d,
+                fcl_dtu_ely_inst_vld_d,
+                fcl_dec_intr_vld_d,
+		            fcl_erb_inst_issue_d,
+		            fcl_erb_inst_vld_d1,
+		            ifu_tlu_inst_vld_m,
+//		            ifu_lsu_inst_vld_m,
+		            ifu_exu_inst_vld_e,
+		            ifu_exu_inst_vld_w,
+		            ifu_spu_inst_vld_w,
+		            ifu_tlu_inst_vld_w;
+   
+   output       ifu_tlu_flush_w;
+   output       ifu_tlu_flush_m;
+
+   output [3:0] fcl_swl_int_activate_i3;
+   output       fcl_swl_flush_wake_w;
+   output       fcl_swl_flush_w;
+
+   output       fcl_dcl_regz_e;
+   
+   // to tlu
+   output [1:0] ifu_tlu_thrid_e;
+   output [1:0] ifu_tlu_thrid_d;
+
+   output       ifu_tlu_immu_miss_m,
+		            ifu_tlu_priv_violtn_m;
+	 
+   output       ifu_tlu_icmiss_e;
+   output       ifu_tlu_ttype_vld_m;
+   output       ifu_exu_ttype_vld_m;
+   output       ifu_mmu_trap_m;
+   output       ifu_tlu_trap_m;
+   output [8:0] ifu_tlu_ttype_m;
+   
+   output       ifu_tlu_hwint_m;
+   output       ifu_tlu_sftint_m;
+//   output       ifu_tlu_hintp_m;
+//   output       ifu_tlu_rerr_m;
+   output       ifu_tlu_rstint_m;
+   output       fcl_dtu_rst_thr_w;
+   output       fcl_dtu_resum_thr_w;
+
+   output       ifu_tlu_itlb_done;
+
+   output       ifu_spu_trap_ack;
+   
+   // to exu
+   output [1:0] ifu_exu_tid_s2;
+   output       ifu_exu_ren1_s,
+		            ifu_exu_ren2_s,
+		            ifu_exu_ren3_s;
+
+   output       ifu_exu_disable_ce_e;  // to exu and ffu
+   
+   
+   // to dtu
+   output       fcl_dtu_sync_intr_d;
+   output       fcl_dtu_tlzero_d;
+   output       fcl_dtu_privmode_d;
+   output       fcl_dtu_hprivmode_d;
+   output       fcl_dtu_hprivmode_w2;
+   output       fcl_dtu_nuke_thr_w;
+   output       fcl_swl_swout_f;
+   output       fcl_dtu_stall_bf;
+//   output       fcl_dtu_switch_s;     // indicates to the DTU that a
+                                      // switch took place to next_thr
+   output       fcl_swl_swcvld_s;
+   output [3:0] fcl_dtu_thr_f;
+   output       fcl_imd_oddwin_d;
+   
+   // to fdp
+   output       fcl_fdp_oddwin_s;
+   output [3:0] fcl_fdp_pcoor_vec_f;
+   output       fcl_fdp_pcoor_f;
+   output       fcl_fdp_mask32b_f;
+   output       fcl_fdp_addr_mask_d;
+
+   output [3:0] fcl_fdp_tctxt_sel_prim;
+
+   
+   // 2:1 mux selects
+   output       fcl_fdp_usenir_sel_nir_s1;   // same as usenir_d2
+   output [3:0] fcl_fdp_rbinst_sel_inste_s;
+
+   output [3:0] fcl_fdp_thrtnpc_sel_tnpc_l,  // load npc
+		            fcl_fdp_thrtnpc_sel_npcw_l,
+		            fcl_fdp_thrtnpc_sel_pcf_l,
+		            fcl_fdp_thrtnpc_sel_old_l;
+   
+   output [3:0] fcl_fdp_thr_s1_l;            // s1 thr for thrNIR input mux
+   
+   // other mux selects   
+   output [3:0] fcl_fdp_next_thr_bf_l,  // for thrpc output mux
+                fcl_fdp_next_ctxt_bf_l, // for ctxt output mux
+		            fcl_fdp_nirthr_s1_l,    // select NIR in s1 stage
+		            fcl_fdp_thr_s2_l;       // s2 thr for thr_inst_reg
+
+   output [3:0] fcl_fdp_tpcbf_sel_pcp4_bf_l, // selects for thread PC muxes
+		            fcl_fdp_tpcbf_sel_brpc_bf_l,
+		            fcl_fdp_tpcbf_sel_trap_bf_l,
+		            fcl_fdp_tpcbf_sel_old_bf_l;
+
+   output       fcl_fdp_pcbf_sel_nosw_bf_l,      // F stage pc mux selects
+		            fcl_fdp_pcbf_sel_swpc_bf_l,
+		            fcl_fdp_pcbf_sel_br_bf_l;
+
+   output [3:0] fcl_fdp_trrbpc_sel_trap_bf_l, 
+		            fcl_fdp_trrbpc_sel_rb_bf_l,
+		            fcl_fdp_trrbpc_sel_err_bf_l,
+		            fcl_fdp_trrbpc_sel_pcs_bf_l;
+
+   output       fcl_fdp_noswpc_sel_tnpc_l_bf,    // next pc select,
+		            fcl_fdp_noswpc_sel_old_l_bf,		 // dont need anymore
+		            fcl_fdp_noswpc_sel_inc_l_bf;
+
+   output [3:0] fcl_fdp_nextpcs_sel_pce_f_l,  
+		            fcl_fdp_nextpcs_sel_pcd_f_l,
+		            fcl_fdp_nextpcs_sel_pcs_f_l,  
+		            fcl_fdp_nextpcs_sel_pcf_f_l;
+
+   output       fcl_fdp_inst_sel_curr_s_l,   // selects for inst_s2
+		            fcl_fdp_inst_sel_switch_s_l,
+		            fcl_fdp_inst_sel_nir_s_l,
+		            fcl_fdp_inst_sel_nop_s_l;
+
+   output [3:0] fcl_fdp_tinst_sel_curr_s_l,  // selects for tinst regs
+		            fcl_fdp_tinst_sel_rb_s_l,		
+		            fcl_fdp_tinst_sel_old_s_l,
+		            fcl_fdp_tinst_sel_ifq_s_l;
+
+   output [3:0] fcl_fdp_dmpthr_l;
+
+   output       fcl_fdp_ctxt_sel_dmp_bf_l,
+		            fcl_fdp_ctxt_sel_sw_bf_l,
+		            fcl_fdp_ctxt_sel_curr_bf_l;
+
+   output       fcl_fdp_rdsr_sel_pc_e_l,
+                fcl_fdp_rdsr_sel_thr_e_l,
+                fcl_fdp_rdsr_sel_ver_e_l;
+   
+   output       so,
+                ifu_reset_l;
+
+
+   //----------------------------------------------------------------------
+   // Declarations
+   //----------------------------------------------------------------------
+   reg [3:0]    fcl_fdp_tpcbf_sel_old_bf_l,
+		            fcl_fdp_tpcbf_sel_pcp4_bf_l,
+		            fcl_fdp_tpcbf_sel_trap_bf_l,
+		            fcl_fdp_tpcbf_sel_brpc_bf_l;
+   
+   wire         fcl_fdp_inst_sel_nop_s_l,
+                fcl_fdp_inst_sel_nir_s_l,
+		            fcl_fdp_inst_sel_curr_s_l,
+		            fcl_fdp_inst_sel_switch_s_l;
+   
+
+   // local signals
+   wire         //sw_itlb_on,
+                sw_itlb_real,
+		            sw_itlb_am,
+		            //this_itlb_on,
+                this_itlb_real,
+		            itlb_on;
+
+   wire [3:0]   xlate_en,
+                xlate_en_d1;
+   
+   wire [2:0]   sw_pid_bf,
+                curr_pid_bf;
+
+   wire         pid_sel_sw,
+                pid_sel_curr,
+                pid_sel_dmp;
+   
+   wire         itlb_access_gnt,
+		            itlb_access_en,
+                itlb_write_en,
+                ctxt_sel_dmp,
+		            itlb_access_done,
+                itlb_write_done,
+		            itlb_rd_access_done,
+		            itlb_rd_access_done_d1,
+		            itlb_rd_access_done_d2,
+		            itlb_rd_req_bf,
+		            itlb_rd_req_f,
+		            itlb_data_rd_f,
+                itlb_data_rd_s;
+   
+   wire [1:0]   asi_tid_bf;
+   wire [1:0]   spu_tid_w2;
+   
+   wire         fetch_bf,	// fetch an instruction next cycle
+                allow_ifq_access_icd_bf,
+                inst_access_bf,
+                ia1_bf,
+                ia0_bf,
+                no_instacc_bf;
+   
+   wire         cam_vld_bf,
+                tlb_invall_bf,
+                tlb_invall_f,
+//                tlb_invall_req_bf,
+                inst_vld_bf;
+   
+   wire         rdreq_bf,     // read from I$ next cycle
+		            rdreq_f;
+   
+   wire         ic_wrreq_bf;
+   
+   wire         running_s2,
+                valid_s,
+		            running_s1,
+                ely_running_s1,
+		            running_d,
+		            running_e,
+		            running_m,
+    		        inst_vld_f,
+		            inst_vld_s,
+                inst_vld_s_crit,
+		            inst_vld_s1,
+		            inst_vld_s2,    // valid bit of S stage
+	 	                            // instruction.  If this is 0,
+		                            // convert inst to no-op
+		            inst_vld_d,
+                inst_vld_d_crit,
+		            inst_vld_d1,
+		            inst_vld_e,
+		            inst_vld_qual_e,
+		            inst_vld_m,
+		            inst_vld_w;
+
+   wire         inst_vld_w_crit;
+   
+   wire         no_iftrap_m,
+                no_iftrap_w;
+
+   wire         stall_f,
+		            stall_s1,
+		            stall_s1_nxt,
+                ely_stall_thisthr_f,
+                part_stall_thisthr_f,
+		            stall_thisthr_f;
+   wire         rdreq_s1;
+
+   wire         usenir_bf,
+		            usenir_f,
+		            usenir_s1;
+   
+   wire [3:0]   tinst_vld_s,    // valid bit of thr instr register
+		                            // in s stage
+		            tinst_vld_nxt;
+
+   wire [3:0]   val_thr_s1,
+		            val_thr_f,
+                thr_e_v2,
+		            val_thr_e;
+   
+   wire         flush_sonly_qual_e,
+                flush_sonly_all_m,
+                flush_sonly_qual_m,
+                ims_flush_sonly_m,
+                ims_flush_sonly_w,
+                ims_flush_coll_m,
+                ims_flush_coll_w,
+                flush_sonly_m;
+
+   wire         flush_pipe_w;
+   
+   wire         kill_thread_d,
+//		            kill_thread_e,
+		            kill_thread_m,
+                kill_local_m,
+                ely_kill_thread_s2,
+                ely_kill_thread_m,
+		            kill_thread_s2;
+   
+   wire [3:0]   clear_s_d1,
+                flush_thr_w,
+                late_flush_w2;
+
+   wire         utrap_flush_w,
+                utrap_flush_m,
+                flush_pipe_w2;
+   
+   wire         kill_curr_f,
+                kill_curr_d,
+                kill_curr_e,
+                kill_curr_m;
+   
+   wire [3:0]   canthr_f,
+                canthr_s_early,
+		            canthr_s;
+   wire         canthr_sw;
+   wire         canthr_sm,
+                canthr_sd;
+
+   wire         forcemiss_f,   // force an icache miss (if icache is off)
+		            forcemiss_s1,
+                icmiss_for_perf,
+//                ic_miss_sw_s1,
+		            ic_miss_s1;    // icache miss (forced or not)
+
+   wire [3:0]   icache_en_d1;
+
+   wire         icache_on_bf,
+		            icache_on_f,
+		            icache_on_s1,
+                uncached_page_s1;
+//		            sw_icache_on,
+//		            this_icache_on;
+   
+   wire         imsto_thisthr_s1,
+                iferrto_thisthr_d1,
+                retract_iferr_d1,
+                retract_iferr_qual_d1,
+                retract_inst_d,
+                retract_iferr_e;
+//   wire         intrto_thisthr_d;
+//   wire         imsto_nextthr_s1;
+
+   wire         mark4rb_w,
+                mark4rb_m,
+                mark4rb_e,
+                mark4rb_d,
+                mark4rb_s;
+   
+   wire [3:0]   tlbmiss_s2,
+		            tlbmiss_d,
+		            nir_tlbmiss_vec,
+		            nir_tlbmiss_next;
+
+   wire [3:0]   delay_slot_vec,
+                delay_slot_vec_nxt;
+   
+   wire         tlb_cam_miss_f,
+                tlb_cam_miss_s1,
+                nir_tlbmiss_s1,
+		            tlbmiss_s1_crit,
+		            tlbmiss_s1;
+   
+   wire         cam_vld_f,
+		            cam_vld_s1;
+   
+   wire         immu_fault_f,
+		            immu_miss_d,
+		            immu_miss_crit_d,
+		            immu_miss_qual_d,
+		            immu_miss_e,
+//                immu_miss_qual_e,
+                immu_miss_m,
+                addr_real_e;
+   wire [3:0]   itlb_addr_real_l,
+                itlb_addr_real;
+   wire [3:0]   pstate_am_d1;
+   
+   wire         pc_oor_s1,
+                pc_oor_s2,
+                pc_oor_s,
+		            pc_oor_f;
+   wire         set_oor_m;
+   wire         addr_mask_32b_m;
+   
+   wire         priv_mode_s1,
+                priv_mode_f,
+                hpriv_mode_s1,
+                hpriv_mode_w,
+                hpriv_mode_w2,
+                hpriv_mode_f;
+
+   wire         inst_acc_exc_s1,
+		            inst_acc_exc_d,
+		            inst_acc_exc_e;
+   wire [3:0]   inst_acc_vec_s2,
+		            inst_acc_vec_d;
+   
+   wire         priv_violtn_e,
+                priv_violtn_m;
+
+   wire         trap_e,
+		            trap_m;
+   
+   wire         ttype_sel_spuma_e,
+ 		            ttype_sel_spuenc_e,
+		            ttype_sel_corr_err_e,
+		            ttype_sel_unc_err_e,		
+		            ttype_sel_res_err_e,		
+		            ttype_sel_hstk_cmp_e,		
+		            ttype_sel_pcoor_e,
+		            ttype_sel_immu_miss_e, 
+		            ttype_sel_real_trans_e, 
+		            ttype_sel_icache_err_e,
+		            ttype_sel_priv_viol_e, 
+		            ttype_sel_privop_e,    
+		            ttype_sel_illinst_e,
+		            ttype_sel_ibe_e,
+		            ttype_sel_sir_e,
+		            ttype_sel_fpdis_e;
+   
+   wire [8:0]   ttype_e;
+   
+   wire [3:0]   next_nir_privvec,
+		            nir_privvec;
+   wire         nir_priv_s1,
+		            priv_inst_s1;
+   
+   wire         tlzero_s2;
+   wire [3:0]   tlzero_vec_d1;
+
+   wire         nuke_thr_w,
+                resum_thr_w,
+                rst_thr_w;
+
+   wire [3:0]   spu_thr;
+//   wire [3:0]   rst_thr_bf;
+   
+   wire [3:0]   async_rst_i3,
+                async_rst_i4,
+		            next_rst_i2,
+		            rstint_i2,
+		            rstint_i3,
+		            resumint_i2,
+		            resumint_i3,
+                next_resum_i2,
+		            nuke_thr_i2,
+		            next_nuke_i2,
+		            nuke_thr_i3,
+		            next_sftint_i2,
+                next_hintp_i2,
+                next_rerr_i2,
+		            next_hwint_i3,
+		            sftint_i3,
+                hintp_i3,
+                rerr_i3,
+		            hwint_i4,
+		            next_ceint_i2,
+		            ceint_i3,
+		            next_ueint_i2,
+		            ueint_i3,
+		            next_spuint0_i2,
+		            spuint0_i3,
+		            next_spuint1_i2,
+		            spuint1_i3;
+
+   wire [3:0]   intr_in_pipe;
+
+   wire [3:0]   hypv_int_en,
+                hypv_int_en_d1;
+   wire [3:0]   supv_int_en,
+                supv_int_en_d1;
+
+   wire [3:0]   ifet_ue_vec_d1,
+		            ifet_ue_vec_e;
+   wire         ifet_ue_e;
+
+   wire [3:0]   any_intr_vec_f,
+                any_intr_vec_s,
+                intr_pending_nxt,
+                intr_pending_s,
+                supv_masked_intr_s,
+                hypv_masked_intr_s;
+
+   wire         spuint0_m,
+                spuint0_trap_m,
+//                spuint0_qual_m,
+		            spuint0_e,
+		            spuint0_qual_e,
+                spuint0_w,
+                spuint0_trap_w,
+ 		            spuint1_m,
+                spuint1_trap_m,
+//                spuint1_qual_m,
+		            spuint1_e,
+		            spuint1_qual_e,
+                spuint1_w,
+                spuint1_trap_w,
+                hwint_m,
+                hwint_e,
+                rstint_m,
+//                rstint_qual_m,
+                resumint_m,
+                resumint_qual_m,
+                sftint_m,
+                sftint_e,
+                sftint_qual_e,
+                hintp_e,
+                hintp_qual_e,
+                hintp_m,
+                rerr_e,
+                rerr_qual_e,
+                rerr_m,
+                nuke_thr_m,
+                nuke_thr_qual_m,
+		            ceint_m,
+                ceint_trap_m,
+                ceint_trap_w,
+//                ceint_qual_m,
+                ceint_qual_w,
+		            ceint_e,
+		            ceint_qual_e,
+		            ueint_m,
+                ueint_trap_m,
+                ueint_trap_w,
+//                ueint_qual_m,
+                ueint_qual_w,
+                ueint_qual_e,
+		            ueint_e;
+
+   wire         disr_trap_m,
+                rb_intr_m,
+                rb_intr_w,
+                any_intr_m;
+   
+   wire         force_intr_s;
+   wire         intr_vld_s,
+		            intr_vld_d,
+		            intr_vld_e,
+		            intr_vld_m,
+		            intr_vld_w,
+                intr_vld_qual_s,
+		            intr_vld_qual_d,
+		            intr_vld_qual_e,
+		            intr_vld_qual_m;
+   
+   wire         kill_intr_f,
+		            kill_intr_d,
+		            kill_intr_e;
+   
+//	 wire         kill_intr_m;
+   
+   wire         rst_stallreq,
+                rst_stallreq_l,
+                all_stallreq,
+                rst_itlb_stv_l,
+                arst_vld_f,
+                arst_vld_f_l,
+                arst_vld_s,
+                arst_vld_s_l,
+                async_intr_vld_s,
+                itlb_starv_alert,
+                rst_sw_bf,
+                rst_sw_bf_l,
+                sw_for_real_rst_bf,
+                rst_stallreq_d0,
+		            rst_stallreq_d1,
+		            rst_stallreq_d2;
+
+   wire         lsu_stallreq_d1,
+                ffu_stallreq_d1;
+   
+   wire [3:0]   rstint_penc;
+   
+   wire         usep_bf,
+                set_usen_bf,
+                usen_iso_bf,
+		            usen_bf;
+   wire         va2_f;
+   wire         ntpc_thisthr;
+   
+   wire [3:0]   thr_usen_nxt,
+ 		            thr_usen_bf;
+   
+   wire         brto_nxtthr_bf_l,  // intermediate signal for icadr sel
+//                brto_nxtthr_bf,
+//                thr_match_ne_norst,
+                sw_match_ne_norst,
+                brtaken_buf_e,
+                brtaken_unq_e,
+                brtaken_e,
+                brtaken_m;
+   
+   wire         switch_bf,   // switch in next cycle unless stall
+                switch_qual_bf,
+		            switch_s2;  // switch in this cycle
+   
+   wire         rstt,       // set thr_f to the reset pkt thread
+		            swt,        // switch to nextthr_bf
+		            samet;      // don't change thread
+
+   wire [3:0]   thr_f_crit,
+                thr_f_dec,
+                thr_f_flop;
+
+   wire [3:0]   thr_f,      // = thr_s2
+		            thr_bf,
+		            thr_s1,     // = thr_d
+		            thr_s1_next,
+                dec_thr_s1_l,
+		            thr_d,
+		            thr_e,
+		            thr_m,
+		            thr_w2,
+		            thr_w;
+
+   wire         tm_fd_l;
+   
+   wire         thr_match_fw,
+		            thr_match_fw2,
+		            thr_match_dw,
+		            thr_match_dw2,
+		            thr_match_em,
+		            thr_match_ew,
+		            thr_match_ew2,
+		            same_thr_mw2,
+		            thr_match_mw,
+		            thr_match_fm,
+		            thr_match_de,
+		            thr_match_dm,
+		            thr_match_fe,
+		            thr_match_fd,
+		            thr_match_fs1,
+		            thr_match_nw,
+		            thr_match_nd,
+		            thr_match_ne;
+//		            thr_match_ft;
+
+   wire         rb2_inst_d,
+		            rb2_inst_e,
+		            rb1_inst_s,
+		            rb1_inst_d,
+		            rb0_inst_bf,
+		            rb0_inst_s,
+		            rt2_inst_e,
+		            rt1_inst_s,
+		            rt1_inst_d,
+		            rt0_inst_bf,
+		            rt0_inst_s;
+   
+   wire [3:0]   rb_w2,
+                rb_for_iferr_e,
+		            rb_froms,
+		            rb_frome,
+		            rb_fromd;
+   
+   wire         rb_stg_s,
+		            rb_stg_d,
+                rb_stg_d_crit,
+		            rb_stg_e;
+   
+   wire         icadr_selbr_l,
+//		            icadr_selsw,
+//                icadr_selbr,
+                icadr_selsw_l;
+
+   wire         sw_or_async_stall;
+   
+   wire [3:0]   trap_thr;
+   
+   wire [3:0]   load_tpc,     // thread pc reg input select
+		            load_bpc,     // these should be exclusive in normal mode
+		            load_pcp4;    // but not during scan shift or reset
+   
+   wire         irf_ce_w,
+		            irf_ce_m,
+                any_ce_w,
+		            rb_stg_w;
+
+   wire [3:0]   ce_cnt0,
+                ce_cnt0_nxt,
+                ce_cnt1,
+                ce_cnt1_nxt,
+                ce_cnt_rst;
+   
+   wire         ce_val0_d,
+                ce_val1_d,
+                disable_ce_e,
+                disable_ce_d;
+   
+   wire [3:0]   ntpc_vld,     // use thr_nextpc_f
+		            ntpc_vld_nxt;
+   
+   wire [1:0]   sas_thrid_w;
+   
+   wire         rdsr_sel_pc_e,
+		    		    rdsr_sel_thr_e;
+
+   wire [1:0]   trap_tid_w2;
+   wire         trappc_vld_w2, 
+                trapnpc_vld_w2;
+
+   wire         fcl_reset,
+                fcl_reset_l;
+
+   // some monitor is looking for this signal
+//   wire         fcl_swl_flush_wait_w=1'b0;
+   wire         clk;
+   
+   wire [3:0]   nextthr_bf_buf,
+                nextthr_final_bf;
+
+
+//
+// Code start here 
+//
+   assign       clk = rclk;
+   
+//----------------------------------------------------------------------
+// Fetch Unit Controls
+//----------------------------------------------------------------------
+
+   // reset buffer
+   dffrl_async rstff(.din (grst_l),
+                     .q   (fcl_reset_l),
+                     .clk (clk), .se(se), .si(), .so(),
+                     .rst_l (arst_l));
+
+   assign   fcl_reset = ~fcl_reset_l;
+   assign   ifu_reset_l = fcl_reset_l;
+   
+
+//-----------------------------------
+// TLB Operations
+//-----------------------------------
+
+   dff_s #(4) real_reg(.din (lsu_ifu_addr_real_l),
+                     .q   (itlb_addr_real_l),
+                     .clk (clk), .se(se), .si(), .so());
+   assign   itlb_addr_real = ~itlb_addr_real_l;
+
+   // ITLB on signal
+
+//`ifdef SPARC_HPV_EN   
+   assign   xlate_en = (~tlu_hpstate_enb & lsu_ifu_addr_real_l |
+                        tlu_hpstate_enb & ~tlu_hpstate_priv) & 
+                         ~tlu_lsu_redmode;
+   
+//`else
+//   assign   xlate_en = lsu_ifu_addr_real_l;
+//`endif
+
+   dff_s #(4) xlate_reg(.din (xlate_en),
+                      .q   (xlate_en_d1),
+                      .clk (clk), .se(se), .si(), .so());
+   
+//   assign sw_itlb_on = ((nextthr_bf_buf & xlate_en_d1) == 4'b0) ?
+//		                     1'b0 : 1'b1;
+//   assign this_itlb_on = ((thr_f & xlate_en_d1) == 4'b0) ?
+//			                     1'b0 : 1'b1;
+//   assign itlb_on = switch_bf ? sw_itlb_on : this_itlb_on;
+   assign itlb_on = (nextthr_final_bf[0] & xlate_en_d1[0] |
+                     nextthr_final_bf[1] & xlate_en_d1[1] |
+                     nextthr_final_bf[2] & xlate_en_d1[2] |
+                     nextthr_final_bf[3] & xlate_en_d1[3]);
+   
+
+   // flop xlate_en (done) addr_real and icache_en if timing is 
+   // not cutting it
+
+   // Hypervisor signals
+   assign sw_itlb_real = ((nextthr_bf_buf & itlb_addr_real) == 4'b0) ?
+		                     1'b0 : 1'b1;
+   assign this_itlb_real = ((thr_f & itlb_addr_real) == 4'b0) ?
+			                     1'b0 : 1'b1;
+
+//   assign fcl_itlb_cam_real_bf = switch_bf ? sw_itlb_real : this_itlb_real;
+
+   mux3ds  creal_mx(.dout (fcl_itlb_cam_real_bf),
+                    .in0  (sw_itlb_real),
+                    .in1  (this_itlb_real),
+                    .in2  (tlu_fcl_dmp_real_bf),
+                    .sel0 (pid_sel_sw),
+                    .sel1 (pid_sel_curr),
+                    .sel2 (pid_sel_dmp));
+
+   // Partition ID
+   mux4ds #(3) swpid_mux (.dout (sw_pid_bf[2:0]),
+                          .in0  (lsu_pid_state0[2:0]),
+                          .in1  (lsu_pid_state1[2:0]),
+                          .in2  (lsu_pid_state2[2:0]),
+                          .in3  (lsu_pid_state3[2:0]),
+                          .sel0 (nextthr_bf_buf[0]),
+                          .sel1 (nextthr_bf_buf[1]),
+                          .sel2 (nextthr_bf_buf[2]),
+                          .sel3 (nextthr_bf_buf[3]));
+
+   mux4ds #(3) currpid_mux (.dout (curr_pid_bf[2:0]),
+                            .in0  (lsu_pid_state0[2:0]),
+                            .in1  (lsu_pid_state1[2:0]),
+                            .in2  (lsu_pid_state2[2:0]),
+                            .in3  (lsu_pid_state3[2:0]),
+                            .sel0 (thr_f[0]),
+                            .sel1 (thr_f[1]),
+                            .sel2 (thr_f[2]),
+                            .sel3 (thr_f[3]));
+
+//   assign fcl_itlb_cam_pid_bf[2:0] = switch_bf ?  
+//                                       sw_pid_bf[2:0] : 
+//                                       curr_pid_bf[2:0];
+
+//   assign pid_sel_dmp = tlu_itlb_dmp_actxt_g & ctxt_sel_dmp;
+   assign pid_sel_dmp = ctxt_sel_dmp;   
+   assign pid_sel_curr = ~pid_sel_dmp & ~switch_bf;
+   assign pid_sel_sw = ~pid_sel_dmp & switch_bf;
+   mux3ds #(3) ipid_mx(.dout (fcl_itlb_cam_pid_bf[2:0]),
+                       .in0  (sw_pid_bf[2:0]),
+                       .in1  (curr_pid_bf[2:0]),
+                       .in2  (tlu_fcl_dmp_pid_bf[2:0]),
+                       .sel0 (pid_sel_sw),
+                       .sel1 (pid_sel_curr),
+                       .sel2 (pid_sel_dmp));
+   
+   // ITLB address mask
+   dff_s #(4) am_reg(.din (tlu_lsu_pstate_am),
+                   .q   (pstate_am_d1),
+                   .clk (clk), .se(se), .si(), .so());
+   
+   assign sw_itlb_am = ((nextthr_bf_buf & pstate_am_d1) == 4'b0) ?
+		                     1'b0 : 1'b1;
+   assign fcl_itlb_addr_mask_l = switch_bf ? 
+                                 ~sw_itlb_am : ~fcl_fdp_mask32b_f;
+
+   dff_s #(4) tlz_reg(.din ({lsu_ifu_t3_tlz,
+                           lsu_ifu_t2_tlz,
+                           lsu_ifu_t1_tlz,
+                           lsu_ifu_t0_tlz}),
+                    .q   (tlzero_vec_d1[3:0]),
+                    .clk (clk), .se (se), .si(), .so());
+                           
+
+   // TLB context select
+   assign fcl_fdp_tctxt_sel_prim = tlzero_vec_d1 & itlb_addr_real_l;
+//   assign fcl_fdp_tctxt_sel_prim[1] = lsu_ifu_t1_tlz & itlb_addr_real_l[1];
+//   assign fcl_fdp_tctxt_sel_prim[2] = lsu_ifu_t2_tlz & itlb_addr_real_l[2];
+//   assign fcl_fdp_tctxt_sel_prim[3] = lsu_ifu_t3_tlz & itlb_addr_real_l[3];
+   
+
+   // Access to TLB
+   // ITLB may be accessed even when icache is off
+   assign cam_vld_bf = itlb_on & inst_access_bf;
+                        
+   assign fcl_itlb_cam_vld_bf = cam_vld_bf;
+   assign fcl_itlb_cam_bypass_bf = ~cam_vld_bf;
+   
+   dff_s #(1) itlb_onf_ff(.din (cam_vld_bf),
+		                    .q   (cam_vld_f),
+		                    .clk (clk),
+		                    .se  (se), .si(), .so());
+
+   dff_s #(1) itlb_ons1_ff(.din (cam_vld_f),
+		                     .q   (cam_vld_s1),
+		                     .clk (clk),
+		                     .se  (se), .si(), .so());
+
+   // allow rd/wr/demap access to tlb
+   // itlb access is granted only every other cycle
+   // (not enough time to turn the request from mmu around)
+//   assign itlb_access_en = ~cam_vld_bf & ~ifq_fcl_asird_bf & 
+//                           ~itlb_access_done;
+//
+//   assign itlb_write_en = ~cam_vld_bf & ~ifq_fcl_asird_bf & 
+//                           ~itlb_write_done & 
+//                           (~tlu_itlb_dmp_vld_g | itlb_access_done);
+
+// Save some timing   
+//   assign itlb_write_en = (~itlb_on | no_instacc_bf) & ~ifq_fcl_asird_bf &
+//                          ~itlb_write_done & 
+//                          (~tlu_itlb_dmp_vld_g | itlb_access_done);
+
+   assign itlb_write_en = no_instacc_bf & ~ifq_fcl_asird_bf & 
+                          ~itlb_write_done &  
+                          (~tlu_itlb_dmp_vld_g | itlb_access_done);
+   assign itlb_access_en = no_instacc_bf & ~ifq_fcl_asird_bf &
+                           ~itlb_access_done;
+
+   // reset tlb
+//   dff #(1) itlbrst_ff(.din (tlu_itlb_invalidate_all_g),
+//		                   .q   (tlb_invall_req_bf),
+//		                   .clk (clk), .se(se), .si(), .so());
+//   assign tlb_invall_bf = tlb_invall_req_bf & ~itlb_access_done;
+   assign tlb_invall_bf = sehold ? tlb_invall_f :
+                           (tlu_itlb_invalidate_all_g & itlb_access_en);
+   dff_s #(1) itlbrstf_ff(.din (tlb_invall_bf),
+		                    .q   (tlb_invall_f),
+		                    .clk (clk), .se(se), .si(), .so());
+
+   assign fcl_itlb_wr_vld_bf = tlu_itlb_wr_vld_g & itlb_write_en;
+   assign fcl_itlb_dmp_vld_bf = tlu_itlb_dmp_vld_g & itlb_access_en;
+   assign fcl_itlb_dmp_all_bf = tlu_itlb_dmp_all_g & tlu_itlb_dmp_vld_g & 
+                                itlb_access_en;
+
+//   assign fcl_itlb_invall_bf = tlb_invall_bf & itlb_access_en | fcl_reset;
+   assign fcl_itlb_invall_f_l = ~tlb_invall_f;
+
+   assign fcl_itlb_data_rd_vld_bf = tlu_itlb_data_rd_g & itlb_access_en & 
+	                                  ~itlb_rd_access_done_d2 &
+                                    ~itlb_rd_access_done_d1;
+ 
+   assign fcl_itlb_tag_rd_vld_bf = tlu_itlb_tag_rd_g & itlb_access_en &
+	                                 ~itlb_rd_access_done_d2 &
+                                   ~itlb_rd_access_done_d1;
+
+   assign rst_itlb_stv_l = ((tlu_itlb_invalidate_all_g |
+                             tlu_itlb_dmp_vld_g |
+                             tlu_itlb_data_rd_g |
+                             tlu_itlb_tag_rd_g) & ~itlb_access_done |
+                            tlu_itlb_wr_vld_g & ~itlb_write_done) &
+                             ~fcl_reset;
+
+   sparc_ifu_ctr5 starv_ctr(
+			                      // Outputs
+			                      .limit	(itlb_starv_alert),
+			                      .so	(so),
+			                      // Inputs
+			                      .clk	(clk),
+			                      .se	(se),
+			                      .si	(si),
+			                      .rst_ctr_l (rst_itlb_stv_l));
+
+   assign itlb_rd_req_bf = fcl_itlb_data_rd_vld_bf | fcl_itlb_tag_rd_vld_bf;
+
+   // tlb access request
+   assign itlb_access_gnt = (fcl_itlb_data_rd_vld_bf |
+			                       fcl_itlb_tag_rd_vld_bf  |
+//                             tlb_invall_bf & itlb_access_en |
+                             tlb_invall_bf |
+                             fcl_itlb_dmp_vld_bf);
+
+   dff_s #(1) tlb_gnt1_ff(.din (itlb_access_gnt),
+		                  .q   (itlb_access_done),
+		                  .clk (clk), .se  (se), .si(), .so());
+
+   dff_s #(1) tlb_rd_ff(.din (itlb_rd_req_bf),
+		                .q   (itlb_rd_req_f),
+		                .clk (clk), .se  (se), .si(), .so());
+
+   dff_s #(1) tlb_wrt1_ff(.din (fcl_itlb_wr_vld_bf),
+		                    .q   (itlb_write_done),
+		                    .clk (clk), .se  (se), .si(), .so());
+   
+
+   // TBD:
+   // reads need to wait one more cycle.  Others can ack without this
+   // second delay.
+   assign itlb_rd_access_done = itlb_rd_req_f & itlb_access_done;
+   
+   dff_s #(1) tlb_rd1_ff(.din (itlb_rd_access_done),
+		                  .q   (itlb_rd_access_done_d1),
+		                  .clk (clk), .se  (se), .si(), .so());
+   dff_s #(1) tlb_rd2_ff(.din (itlb_rd_access_done_d1),
+		                   .q   (itlb_rd_access_done_d2),
+		                  .clk (clk), .se  (se), .si(), .so());
+   assign ifu_tlu_itlb_done = ~itlb_rd_req_f & itlb_access_done |
+                               itlb_write_done | 
+	                             itlb_rd_access_done_d2;
+
+   assign fcl_erb_itlbrd_vld_s = itlb_rd_access_done_d1;
+   
+   assign asi_tid_bf = ifq_fcl_asird_bf ? ifq_fcl_asi_tid_bf :
+	                                        tlu_idtlb_dmp_thrid_g;
+   
+   dff_s #(2) asi_tid_reg(.din (asi_tid_bf),
+		                  .q   (fcl_erb_asi_tid_f),
+		                  .clk (clk), .se(se), .si(), .so());
+   
+		    
+   // Remember if we read tag or data
+   dff_s #(1) tlb_rddf_ff(.din (fcl_itlb_data_rd_vld_bf),
+		                  .q   (itlb_data_rd_f),
+		                  .clk (clk), .se  (se), .si(), .so());
+
+   dff_s #(1) tlb_rdds_ff(.din (itlb_data_rd_f),
+		                  .q   (itlb_data_rd_s),
+		                  .clk (clk), .se  (se), .si(), .so());
+
+   // pick itlb ldxa data
+   assign fcl_erb_itlbrd_data_s =  itlb_data_rd_s;
+
+   // Demap thread
+   assign fcl_fdp_dmpthr_l[0] = ~(~tlu_idtlb_dmp_thrid_g[1] & ~tlu_idtlb_dmp_thrid_g[0]);
+   assign fcl_fdp_dmpthr_l[1] = ~(~tlu_idtlb_dmp_thrid_g[1] & tlu_idtlb_dmp_thrid_g[0]);
+   assign fcl_fdp_dmpthr_l[2] = ~(tlu_idtlb_dmp_thrid_g[1] & ~tlu_idtlb_dmp_thrid_g[0]);
+   assign fcl_fdp_dmpthr_l[3] = ~(tlu_idtlb_dmp_thrid_g[1] & tlu_idtlb_dmp_thrid_g[0]);
+
+   // Select appropriate context for TLB
+   // ctxt_sel_dmp is itlb_access_en without the asird signal
+   assign ctxt_sel_dmp = no_instacc_bf & ~itlb_access_done;
+   assign fcl_fdp_ctxt_sel_dmp_bf_l = ~ctxt_sel_dmp;   
+   assign fcl_fdp_ctxt_sel_sw_bf_l = ctxt_sel_dmp | ~switch_bf;
+   assign fcl_fdp_ctxt_sel_curr_bf_l = ctxt_sel_dmp | switch_bf;
+   
+   
+//--------------------------
+// Fetch Request and Stall
+//--------------------------
+   
+   // Determine if we need can continue fetching next cycle
+//   assign fetch_bf = (~all_stallreq & ~fcl_reset & ~rst_stallreq) &
+//                   (switch_bf |
+//                    ~(part_stall_thisthr_f | fdp_fcl_swc_s2));
+//                    ~(stall_thisthr_f | fdp_fcl_swc_s2 | immu_fault_f));
+
+   assign fetch_bf = (~all_stallreq & ~fcl_reset & ~rst_stallreq) &
+                       (switch_bf |  // replace with ntr_s?
+                        ~(part_stall_thisthr_f  
+                          | fdp_fcl_swc_s2
+                          )
+                        );
+
+   // dtu_fcl_running_s should be a part of this eqn, since it is assumed
+   // by the ifill completion prediction logic in the swl
+//   assign inst_access_bf = (~all_stallreq & ~fcl_reset & ~rst_stallreq & 
+//                            (switch_bf & ~usen_iso_bf |
+//                             ~switch_bf & ~ely_stall_thisthr_f &
+//                             dtu_fcl_running_s & 
+//                             ~ely_kill_thread_s2 &
+//                             //~fdp_fcl_swc_s2 & // take out for tim reasons
+//                             ~usep_bf));
+
+   assign ia0_bf = (~all_stallreq & ~fcl_reset & ~rst_stallreq & 
+                    (switch_bf |
+                     ~ely_stall_thisthr_f &
+                     dtu_fcl_running_s & 
+                     ~ely_kill_thread_s2 &
+                     ~usep_bf));
+
+   assign ia1_bf = (~all_stallreq & ~fcl_reset & ~rst_stallreq & 
+                    (~switch_bf & ~ely_stall_thisthr_f &
+                     dtu_fcl_running_s & 
+                     ~ely_kill_thread_s2 &
+                     ~usep_bf));
+   
+
+   assign inst_access_bf = usen_iso_bf ? ia1_bf : ia0_bf;
+   // needs to work even if usen_iso_bf is X - not nec. 11/06/03
+//   dp_mux2es #(1) ia_mx(.dout (inst_access_bf),
+//                        .in0  (ia0_bf),
+//                        .in1  (ia1_bf),
+//                        .sel  (usen_iso_bf));
+   
+   
+
+//   assign allow_ifq_access_icd_bf = (all_stallreq | rs
+//                                     ~switch_bf & 
+//                                     (usep_bf | stall_f) |
+//                                     switch_bf & usen_bf);
+   assign allow_ifq_access_icd_bf = ~inst_access_bf;
+   
+   // earlier version for critical stuff
+   assign no_instacc_bf = all_stallreq | fcl_reset | rst_stallreq |
+                          ~dtu_fcl_ntr_s & (ely_stall_thisthr_f | usep_bf);
+   
+   // check if icache is on
+   dff_s #(4) ic_en_reg(.din (lsu_ifu_icache_en),
+                      .q   (icache_en_d1),
+                      .clk (clk), .se(se), .si(), .so());
+   
+//   assign sw_icache_on = (nextthr_bf_buf[0] & icache_en_d1[0] |
+//                          nextthr_bf_buf[1] & icache_en_d1[1] |
+//                          nextthr_bf_buf[2] & icache_en_d1[2] |
+//                          nextthr_bf_buf[3] & icache_en_d1[3]);
+//   assign this_icache_on = (thr_f[0] & icache_en_d1[0] |
+//                            thr_f[1] & icache_en_d1[1] |
+//                            thr_f[2] & icache_en_d1[2] |
+//                            thr_f[3] & icache_en_d1[3]);
+//   assign icache_on_bf = switch_bf ? sw_icache_on : this_icache_on;
+
+   assign icache_on_bf = (nextthr_final_bf[0] & icache_en_d1[0] |
+                          nextthr_final_bf[1] & icache_en_d1[1] |
+                          nextthr_final_bf[2] & icache_en_d1[2] |
+                          nextthr_final_bf[3] & icache_en_d1[3]);
+
+   // remember if icache was turned on
+   dff_s #(1) icef_ff(.din (icache_on_bf),
+		              .q   (icache_on_f),
+		              .clk (clk), .se(se), .si(), .so());
+   dff_s #(1) ices_ff(.din (icache_on_f),
+		              .q   (icache_on_s1),
+		              .clk (clk), .se(se), .si(), .so());
+
+   // check if cp is set
+   assign uncached_page_s1 = ~itlb_fcl_cp_s1 & cam_vld_s1;
+   assign fcl_ifq_icache_en_s_l = ~icache_on_s1 | uncached_page_s1;
+   
+   // Read from the icache only if 
+   //   we need to fetch AND 
+   //   the icache is on AND 
+   //   we are not using the NIR 
+   assign rdreq_bf = icache_on_bf & inst_access_bf;
+
+   assign fcl_icd_rdreq_bf = rdreq_bf | ifq_fcl_rdreq_bf;
+
+   // split off driver to icv to reduce load
+   assign fcl_icv_rdreq_bf = rdreq_bf | ifq_fcl_rdreq_bf;
+   
+   // Read req pipe
+   dffr_s #(1)  rdreq_ff(.din  (rdreq_bf),
+		                 .clk  (clk),
+		                 .rst  (fcl_reset),
+		                 .q    (rdreq_f),
+		                 .se   (se), .si(), .so());
+   // Remember if we fetched in the last cycle
+   dff_s #(1)  rdreqs1_ff (.din  (rdreq_f),
+		                   .clk  (clk),
+		                   .q    (rdreq_s1),
+		                   .se   (se), .si(), .so());
+   assign fcl_ifq_rdreq_s1 = ~stall_s1;
+
+   // Use NIR pipe
+   assign usenir_bf = switch_bf ? usen_bf : usep_bf;
+   
+   dffr_s #(1)  unf_ff(.din  (usenir_bf),
+		                 .clk  (clk),
+		                 .rst  (fcl_reset),
+		                 .q    (usenir_f),
+		                 .se   (se), .si(), .so());
+   // Remember if we fetched in the last cycle
+   dff_s #(1)  uns1_ff (.din  (usenir_f),
+		                  .clk  (clk),
+		                  .q    (usenir_s1),
+		                  .se   (se), .si(), .so());
+
+
+   // Write signal to icache if no access from pipe
+   assign ic_wrreq_bf = allow_ifq_access_icd_bf & ifq_fcl_wrreq_bf;
+
+   assign fcl_icd_wrreq_bf = ic_wrreq_bf | ifq_fcl_icd_wrreq_bf;
+   assign fcl_ict_wrreq_bf = ic_wrreq_bf | ifq_fcl_ictv_wrreq_bf;
+   assign fcl_icv_wrreq_bf = ic_wrreq_bf | ifq_fcl_ictv_wrreq_bf |
+                             ifq_fcl_invreq_bf;
+
+   // synopsys translate_off
+   always @ (posedge clk)
+   begin
+	    if (fcl_icd_rdreq_bf & fcl_icd_wrreq_bf)
+	      begin
+		 // 0in <fire -message "ERROR: sparc_ifu_fcl: rd and wr req to I$ at the same time"
+`ifdef DEFINE_0IN
+`else           
+		`ifdef MODELSIM
+			 $display( "CACHE_CONTENTION", "ERROR: sparc_ifu_fcl: rd and wr req to I$ at the same time");
+		`else
+	         $error("CACHE_CONTENTION", "ERROR: sparc_ifu_fcl: rd and wr req to I$ at the same time");
+		`endif	 
+`endif
+	      end
+   end
+   // synopsys translate_on
+   
+
+   //-------------------------
+   // Valid Instruction Pipe
+   //-------------------------
+   // F stage
+   assign inst_vld_bf = fetch_bf;
+   dff_s #(1) inst_vld_ff(.din (inst_vld_bf),
+		                  .clk (clk),
+		                  .q   (inst_vld_f),
+		                  .se  (se), .si(), .so());
+
+   assign stall_f = ~inst_vld_f | kill_curr_f;
+   assign stall_thisthr_f = stall_f | imsto_thisthr_s1 | // intrto_thisthr_d |
+	                          kill_thread_s2 | rb_stg_s | ~dtu_fcl_running_s | 
+                            iferrto_thisthr_d1;
+
+   assign part_stall_thisthr_f = stall_f | 
+                                 imsto_thisthr_s1 |
+                                 ~dtu_fcl_running_s | 
+                                 ely_kill_thread_s2 |
+	                               rb_stg_s;
+   
+   assign ely_stall_thisthr_f = stall_f | rb_stg_s;   
+
+//   assign stall_s1_nxt = stall_thisthr_f | intr_vld_s | tmsto_thisthr_f;
+   assign stall_s1_nxt = stall_thisthr_f; //| intr_vld_s;
+   
+   // S1 stage
+   dff_s #(1) stalld_ff(.din (stall_s1_nxt),
+		                  .clk (clk),
+		                  .q   (stall_s1),
+		                  .se  (se), .si(), .so());
+
+   assign inst_vld_s1 = ~stall_s1 & ~ic_miss_s1 & ~kill_curr_d;
+   assign val_thr_s1 = thr_s1 & {4{inst_vld_s1}}; // 4b
+
+   // S2 stage
+   assign val_thr_f = thr_f & {4{~stall_f & ~rb_stg_s & dtu_fcl_running_s}};
+
+   // Tag the S stage thr inst register as containing a valid inst or not
+   assign tinst_vld_nxt = (ifq_fcl_fill_thr |  
+                           (rb_w2 & ~rb_for_iferr_e) | // set
+			                     val_thr_s1 & ~val_thr_f |
+                           //			   val_thr_s1 |
+			                     tinst_vld_s & ~val_thr_f) &
+			                      ~(clear_s_d1 |
+                              {4{erb_dtu_ifeterr_d1 & inst_vld_d1 & 
+                                 ~rb_stg_e}} & thr_e);   // reset
+
+   dffr_s #(4) tinst_reg(.din  (tinst_vld_nxt),
+		                 .clk  (clk),
+		                 .rst  (fcl_reset),
+		                 .q    (tinst_vld_s),
+		                 .se   (se), .si(), .so());
+
+   // Does current thread have valid inst in s2
+   assign inst_vld_s2 = ((thr_f_crit & tinst_vld_s) == 4'b0000) ?
+			                    {1'b0} :  {1'b1};          
+
+   assign inst_vld_s = ~switch_s2 & inst_vld_s1 | 
+	                      switch_s2 & inst_vld_s2;
+   assign inst_vld_s_crit = ~switch_s2 & ~stall_s1 & ~kill_curr_d | 
+	                      switch_s2 & inst_vld_s2;
+
+   assign valid_s = inst_vld_s & ~stall_f &  // f and s2 have same thread
+                       dtu_fcl_running_s &
+	                     ~(ely_kill_thread_s2 | rb_stg_s);
+
+   assign running_s2 = inst_vld_s & ~stall_thisthr_f;// f and s2 have 
+                                                     // same thread
+   // D stage
+   dff_s #(1) rund_ff(.din (running_s2),
+		              .clk (clk),
+		              .q   (inst_vld_d),
+		              .se  (se), .si(), .so());
+   dff_s #(1) eivd_ff(.din (running_s2),
+		                .clk (clk),
+		                .q   (inst_vld_d_crit),
+		                .se  (se), .si(), .so());
+   assign fcl_erb_inst_issue_d = inst_vld_d & ~intr_vld_d;
+   assign running_d = inst_vld_d & ~kill_thread_d & ~rb_stg_d & 
+	                    ~intr_vld_d;
+
+   // E stage
+   dff_s #(1) rune_ff(.din (running_d),
+		              .clk (clk),
+		              .q   (inst_vld_e),
+		              .se  (se), .si(), .so());
+
+   assign running_e = inst_vld_e & ~dtu_inst_anull_e & 
+	                    ~kill_curr_e & ~rb_stg_e &
+                      ~(thr_match_em & ifu_tlu_flush_m);
+   assign inst_vld_qual_e = inst_vld_e & ~rb_stg_e;
+   assign val_thr_e = thr_e_v2 & {4{inst_vld_qual_e}} & ~late_flush_w2 &
+                      ~(thr_w & {4{utrap_flush_w}});
+   
+   
+   // M stage
+   dff_s #(1) runm_ff(.din (running_e),
+		              .clk (clk),
+		              .q   (inst_vld_m),
+		              .se  (se), .si(), .so());
+   assign running_m = (inst_vld_m | intr_vld_m) & ~kill_thread_m;
+   
+   assign ifu_tlu_inst_vld_m = (inst_vld_m | intr_vld_m) & ~kill_curr_m;
+   // less critical
+   // assign ifu_lsu_inst_vld_m = ifu_tlu_inst_vld_m;
+   
+   // W stage
+   dff_s #(1) runw_ff(.din (running_m),
+		              .q   (inst_vld_w),
+		              .clk (clk), .se  (se), .si(), .so());   
+
+   dff_s #(1) iw_ff(.din (running_m),
+		              .q   (inst_vld_w_crit),
+		              .clk (clk), .se  (se), .si(), .so());   
+   
+   // synopsys translate_off
+//   wire         sas_m,
+//		            inst_done_w_for_sas;
+   
+//   assign sas_m = inst_vld_m & ~kill_thread_m & 
+//	                ~(exu_ifu_ecc_ce_m & inst_vld_m & ~trap_m);
+
+//   dff #(1) sasw_ff(.din (sas_m),
+//		              .clk (clk),
+//		              .q   (inst_done_w_for_sas),
+//		              .se  (se), .si(), .so());   
+   // synopsys translate_on
+
+   // need to kill branch by E stage, so qual with rb_stg_X
+   assign fcl_dtu_inst_vld_e = inst_vld_e & ~rb_stg_e & ~kill_curr_e;
+   assign fcl_dtu_intr_vld_e = intr_vld_e & ~rb_stg_e & ~kill_curr_e;
+   assign fcl_dtu_inst_vld_d = inst_vld_d & ~kill_curr_d &
+	                             ~rb_stg_d_crit & ~immu_miss_crit_d;
+   assign fcl_dtu_ely_inst_vld_d = inst_vld_d_crit;
+   assign ifu_tlu_inst_vld_w = inst_vld_w;
+   assign ifu_exu_inst_vld_w = inst_vld_w_crit;
+   assign ifu_spu_inst_vld_w = inst_vld_w;
+   assign ifu_exu_inst_vld_e = fcl_dtu_inst_vld_e;
+   
+   assign flush_sonly_qual_e = dtu_fcl_flush_sonly_e & inst_vld_e &
+//                               ~dec_fcl_kill4sta_e &
+                               ~rb_stg_e & ~dtu_inst_anull_e & ~kill_curr_e;
+   
+
+   dff_s #(1) flshm_ff(.din (flush_sonly_qual_e),
+                     .q   (flush_sonly_m),
+                     .clk (clk),
+                     .se  (se), .si(), .so());
+
+   dff_s #(1) imflshm_ff(.din (ifq_fcl_flush_sonly_e),
+                       .q   (ims_flush_sonly_m),
+                       .clk (clk),
+                       .se  (se), .si(), .so());
+   // detect collision between two different types of retractions
+   assign ims_flush_coll_m = ims_flush_sonly_m & ~canthr_sm & 
+                             retract_iferr_e;
+   dff_s #(1) imflshw_ff(.din (ims_flush_coll_m),
+                       .q   (ims_flush_sonly_w),
+                       .clk (clk),
+                       .se  (se), .si(), .so());
+   assign ims_flush_coll_w = ims_flush_sonly_w & ~canthr_sw;
+   assign flush_sonly_qual_m = (ims_flush_sonly_m & ~canthr_sm & 
+                                ~retract_iferr_e |
+                                flush_sonly_m & inst_vld_m & ~kill_local_m &
+                                ~kill_curr_m);
+   assign flush_sonly_all_m = (ims_flush_sonly_m & ~canthr_sm | 
+                               flush_sonly_m & inst_vld_m);
+
+//   assign flush_sonly_qual_m = flush_sonly_m & ~canthr_sm;
+//   assign qtrap_flush_e = dtu_fcl_qtrap_e & inst_vld_e & ~dtu_inst_anull_e & 
+//	                  ~rb_stg_e;
+   
+//------------------------------   
+// Instruction Kill Logic
+//------------------------------
+   
+   // kill_s2 is the same as kill_f
+   assign kill_thread_s2 = thr_match_fw & rb_stg_w |
+//	                         thr_match_ft & trappc_vld_w2 |
+	                         thr_match_fm & (flush_sonly_all_m) |
+                           kill_curr_f;
+   
+   assign ely_kill_thread_s2 = thr_match_fw & utrap_flush_w |
+//	                             thr_match_ft & trappc_vld_w2 |
+	                             thr_match_fm & (flush_sonly_all_m) |
+                               kill_curr_f;
+   
+   assign kill_thread_d = thr_match_dw & rb_stg_w |
+	                        thr_match_dm & (flush_sonly_all_m) |
+                          kill_curr_d;
+
+   // M and E still need full qualification with flush pipe   
+//   assign kill_thread_e = thr_match_ew & utrap_flush_w | 
+//                          thr_match_ew & tlu_ifu_flush_pipe_w | 
+//                          kill_curr_e ;
+   assign ely_kill_thread_m = thr_match_mw & utrap_flush_w |
+//                              mark4rb_m |
+                              kill_curr_m;
+   assign kill_thread_m = ely_kill_thread_m | 
+                          thr_match_mw & tlu_ifu_flush_pipe_w;
+   
+   assign kill_local_m = thr_match_mw & (utrap_flush_w | intr_vld_w);
+
+   assign flush_pipe_w = rb_stg_w | tlu_ifu_flush_pipe_w;
+//   assign part_flush_w = ifu_tlu_flush_w | tlu_ifu_flush_pipe_w;
+//   assign kill_nextthr_w = thr_match_nw & flush_pipe_w;
+   assign flush_thr_w = thr_w & {4{flush_pipe_w}};
+   dff_s #(1) fp_ff(.din (flush_pipe_w),
+                  .q   (flush_pipe_w2),
+                  .clk (clk), .se(se), .si(), .so());
+
+//   assign clear_s_stage =  thr_e & {4{flush_sonly_qual_e}};
+//   assign clear_s_stage =  trap_thr & {4{trappc_vld_w2}} |
+//	                         {4{dummy_flush_ireg}} | 
+//	                         thr_e & {4{flush_sonly_qual_e}};
+                           //  | flush_thr_w 
+
+   assign canthr_f = thr_e & {4{flush_sonly_qual_e}} | 
+                     (rb_w2 & ~rb_for_iferr_e) | rb_froms;
+
+//   dff #(4) cls_reg(.din (clear_s_stage),
+//		                .q   (clear_s_early),
+//		                .clk (clk), .se(se), .si(), .so());
+
+   // ***NOTE***
+   // Don't use clear_s_d1 to generate fcl_ifq_canthr, since clear_s_d1 
+   // includes ifeterr!
+   // first term could be just flush_sonly_m & inst_vld_m & thr_m
+   assign clear_s_d1 = thr_m & {4{flush_sonly_all_m}} |
+                       late_flush_w2 | 
+                       trap_thr & {4{trappc_vld_w2}};
+
+   assign fcl_erb_clear_iferr = thr_m & {4{ims_flush_sonly_m | 
+                                           flush_sonly_m}} |
+                                late_flush_w2 | 
+                                trap_thr & {4{trappc_vld_w2}};
+
+
+   dff_s #(4) cm_reg(.din (canthr_f),
+		               .q   (canthr_s_early),
+		               .clk (clk),
+		               .se  (se), .si(), .so());
+
+   assign canthr_s = canthr_s_early | late_flush_w2 | 
+                     trap_thr & {4{trappc_vld_w2}};          
+   
+//   assign fcl_ifq_canthr = clear_s_stage | rb_w2 | rb_froms | 
+//	                         canthr_s;
+   assign fcl_ifq_canthr = canthr_s;
+
+   assign canthr_sm = (canthr_s[0] & thr_m[0] | 
+                       canthr_s[1] & thr_m[1] | 
+                       canthr_s[2] & thr_m[2] | 
+                       canthr_s[3] & thr_m[3]);
+
+   assign canthr_sw = (canthr_s[0] & thr_w[0] | 
+                       canthr_s[1] & thr_w[1] | 
+                       canthr_s[2] & thr_w[2] | 
+                       canthr_s[3] & thr_w[3]);
+
+   assign canthr_sd = (canthr_s[0] & thr_d[0] | 
+                       canthr_s[1] & thr_d[1] | 
+                       canthr_s[2] & thr_d[2] | 
+                       canthr_s[3] & thr_d[3]) |
+                        thr_match_dw & utrap_flush_w;
+
+   dff_s #(4) fpw2_reg(.din (flush_thr_w),
+                     .q   (late_flush_w2),
+                     .clk (clk), .se(se), .si(), .so());
+
+//   assign late_flush_w2 = thr_w2 & {4{flush_pipe_w2}};
+   
+   assign kill_curr_f = (thr_f_crit[0] & late_flush_w2[0] |
+                         thr_f_crit[1] & late_flush_w2[1] |
+                         thr_f_crit[2] & late_flush_w2[2] |
+                         thr_f_crit[3] & late_flush_w2[3]);
+   assign kill_curr_d = (thr_d[0] & late_flush_w2[0] |
+                         thr_d[1] & late_flush_w2[1] |
+                         thr_d[2] & late_flush_w2[2] |
+                         thr_d[3] & late_flush_w2[3]);
+   assign kill_curr_e = (thr_e_v2[0] & late_flush_w2[0] |
+                         thr_e_v2[1] & late_flush_w2[1] |
+                         thr_e_v2[2] & late_flush_w2[2] |
+                         thr_e_v2[3] & late_flush_w2[3]) |
+                          thr_match_ew & utrap_flush_w;
+   
+//   assign kill_curr_m = (thr_m[0] & late_flush_w2[0] |
+//                         thr_m[1] & late_flush_w2[1] |
+//                         thr_m[2] & late_flush_w2[2] |
+//                         thr_m[3] & late_flush_w2[3]);
+   assign kill_curr_m = same_thr_mw2 & flush_pipe_w2;
+   
+   //------------------------------
+   // track I$ misses
+   //------------------------------
+   
+   // force a miss if a fetch and icache is off
+   // forcemiss triggers a fill vld_grequest to L2, so set to zero by default
+   assign forcemiss_f = inst_vld_f & ~icache_on_f;
+   dffr_s #(1)  miss_ff(.din (forcemiss_f),
+		                  .clk (clk),
+		                  .rst (fcl_reset),
+		                  .q   (forcemiss_s1),
+		                  .se  (se), .si(), .so());
+
+   //ooooooooooooooooooooooooooooooooooooooooooooooooooooooo
+   // removed imiss_s_l from this signal for timing fix  
+   // Perf Hit: 0.2% TPCC, 0.4% JBB
+//   assign ic_miss_sw_s1 = (~itlb_fcl_imiss_s_l & rdreq_s1 |
+//                             tlb_cam_miss_s1 | 
+//                             forcemiss_s1);
+//   assign ic_miss_sw_s1 =   tlb_cam_miss_s1 | 
+//                            forcemiss_s1;
+   //ooooooooooooooooooooooooooooooooooooooooooooooooooooooo
+   
+   assign ic_miss_s1 =  (~itlb_fcl_imiss_s_l & rdreq_s1 |
+			                   forcemiss_s1) & 
+                        ~stall_s1 & ~tlbmiss_s1_crit & ~pc_oor_s1 & 
+                        ~rb_stg_d_crit & ~canthr_sd;
+
+   assign icmiss_for_perf =  (~itlb_fcl_imiss_s_l & rdreq_s1) & 
+                        ~stall_s1 & ~tlbmiss_s1_crit & ~pc_oor_s1 & 
+                        ~rb_stg_d & ~canthr_sd;
+
+//   assign fcl_ifq_icmiss_s1 = ic_miss_s1 & ~ely_kill_thread_d;  // use buffer
+   assign fcl_ifq_icmiss_s1 = ic_miss_s1;  // use buffer   
+
+   // for perf counters (d1=e)
+   dff_s #(1) icmd1_ff(.din (icmiss_for_perf),
+                     .q   (ifu_tlu_icmiss_e),
+                     .clk (clk), .se(se), .si(), .so());
+   
+   // I$ miss is always to thr_s1.  Below we check to see if this is
+   // the same as thr_f (=thr_s2) which is the "current thread"
+//   assign imsto_thisthr_s1 = thr_match_fd & ic_miss_s1;
+//   assign imsto_nextthr_s1 = thr_match_nd & (ic_miss_s1 | tlbmiss_s1);
+
+   assign imsto_thisthr_s1 = thr_match_fd & ic_miss_s1;
+//   assign imsto_nextthr_s1 = thr_match_nd & (ic_miss_sw_s1);
+//   assign intrto_thisthr_d = thr_match_fd & fcl_dtu_sync_intr_d;
+
+   assign iferrto_thisthr_d1 = thr_match_fe & erb_dtu_ifeterr_d1 & 
+                               inst_vld_d1;
+   
+
+//------------------------------
+// track itlb misses
+//------------------------------
+
+   // default to hit when camming is turned off
+   assign tlb_cam_miss_f = ~itlb_fcl_tlbmiss_f_l & cam_vld_f;
+   dff_s #(1) tlbmsf_ff(.din (tlb_cam_miss_f),
+		                  .clk (clk),
+		                  .q   (tlb_cam_miss_s1),
+		                  .se  (se), .si(), .so());
+   
+   // tlb miss logic
+   // va hole has higher priority than immu miss
+   assign tlbmiss_s2 = (({4{tlbmiss_s1 & ~pc_oor_s1 & ~rb_stg_d}} & thr_s1) |
+			                  ({4{erb_fcl_itlb_ce_d1 & inst_vld_d1 & 
+                            ~rb_stg_e}} & thr_e &
+			                   (~thr_d | {4{~inst_vld_d | ~thr_match_de}})) |
+			                  ({4{immu_miss_e}} & rb_frome) |
+			                  ({4{immu_miss_d}} & rb_fromd & ~rb_frome) |  // set
+			                  tlbmiss_d & (~thr_d | {4{~inst_vld_d}}) & ~rb_w2) & 
+			                 ~(clear_s_d1); // reset
+
+//   assign tlbmiss_s2 = (({4{tlbmiss_s1 & ~pc_oor_s1 & ~rb_stg_d}} & thr_s1) |
+//			({4{erb_fcl_itlb_ce_d1 & inst_vld_qual_d1}} & thr_e |
+//			 tlbmiss_d & (~thr_e | {4{~inst_vld_qual_e}}) & 
+//			 ~rb_w2) & ~(clear_s_stage); // reset
+
+   dffr_s #(4) tlbmiss_reg(.din (tlbmiss_s2),
+		                     .q   (tlbmiss_d),
+		                     .clk (clk),
+		                     .rst (fcl_reset),
+		                     .se  (se), .si(), .so());
+
+   assign immu_fault_f = (thr_f_crit[0] & (tlbmiss_d[0] | inst_acc_vec_d[0]) |
+			                    thr_f_crit[1] & (tlbmiss_d[1] | inst_acc_vec_d[1]) |
+			                    thr_f_crit[2] & (tlbmiss_d[2] | inst_acc_vec_d[2]) |
+			                    thr_f_crit[3] & (tlbmiss_d[3] | inst_acc_vec_d[3])) &
+                           switch_s2|  
+			                     // D stage miss
+			                     (tlbmiss_s1 | pc_oor_s1) & thr_match_fs1;
+	                         // S stage miss
+
+   assign immu_miss_crit_d = (thr_d[0] & tlbmiss_d[0] |
+			                        thr_d[1] & tlbmiss_d[1] |
+			                        thr_d[2] & tlbmiss_d[2] |
+			                        thr_d[3] & tlbmiss_d[3]);
+   
+   // TBD: move this to the E stage, post RB
+   assign immu_miss_d = immu_miss_crit_d & inst_vld_d | 
+			                  thr_match_de & erb_fcl_itlb_ce_d1 & inst_vld_d1;
+
+   // don't need to do this, once everyone switches to immu_miss_m
+   assign immu_miss_qual_d = immu_miss_d & ~kill_thread_d &
+	                            ~(immu_miss_e & thr_match_de & 
+                                inst_vld_e & ~dtu_inst_anull_e & ~rb_stg_e &
+                                ~kill_curr_e);
+   
+   dff_s immu_misse_ff(.din (immu_miss_qual_d),
+		                 .clk (clk),
+		                 .q   (immu_miss_e),
+		                 .se  (se), .si(), .so());
+
+   
+   // flop this and send in M
+//   assign ifu_tlu_immu_miss_e = immu_miss_e & ~addr_real_e & 
+//                                inst_vld_e & ~dtu_inst_anull_e & ~rb_stg_e;
+//   assign ifu_tlu_immu_miss_e = 1'b0;
+   
+//   assign immu_miss_qual_e = immu_miss_e & //~addr_real_e & 
+// //                             ~(immu_miss_m & thr_match_em) &
+//                             inst_vld_e & ~dtu_inst_anull_e & ~rb_stg_e;
+
+//   dff #(1) immu_msm_ff(.din (immu_miss_qual_e),
+   dff_s #(1) immu_msm_ff(.din (immu_miss_e),
+                        .q   (immu_miss_m),
+                        .clk (clk), .se(se), .si(), .so());
+
+   assign ifu_tlu_immu_miss_m = immu_miss_m & inst_vld_m & ~kill_curr_m;
+
+   assign addr_real_e = (itlb_addr_real[0] & thr_e[0] |
+                         itlb_addr_real[1] & thr_e[1] |
+                         itlb_addr_real[2] & thr_e[2] |
+                         itlb_addr_real[3] & thr_e[3]);
+
+   // store tlbmiss state for NIR
+   assign nir_tlbmiss_next = ({4{tlb_cam_miss_s1 & ~stall_s1}} & thr_s1 |
+			                        nir_tlbmiss_vec & (~thr_s1 | {4{stall_s1}}));
+
+   dffr_s #(4) nirtlbm_reg(.din (nir_tlbmiss_next),
+		                   .clk (clk),
+		                   .q   (nir_tlbmiss_vec),
+		                   .rst (fcl_reset),
+		                   .se  (se), .si(), .so());
+
+   assign nir_tlbmiss_s1 = (nir_tlbmiss_vec[0] & thr_s1[0] |
+			                      nir_tlbmiss_vec[1] & thr_s1[1] |
+			                      nir_tlbmiss_vec[2] & thr_s1[2] |
+			                      nir_tlbmiss_vec[3] & thr_s1[3]);
+
+   assign tlbmiss_s1_crit = ~usenir_s1 ? tlb_cam_miss_s1 :
+	                                       nir_tlbmiss_s1;
+
+   assign tlbmiss_s1 = tlbmiss_s1_crit & ~stall_s1;
+
+//---------------------------------
+//  Privilege Mode and VA Hole
+//---------------------------------
+   assign addr_mask_32b_m = (thr_m[0] & pstate_am_d1[0] |
+			                       thr_m[1] & pstate_am_d1[1] |
+			                       thr_m[2] & pstate_am_d1[2] |
+			                       thr_m[3] & pstate_am_d1[3]);
+   
+   assign fcl_fdp_mask32b_f = (thr_f[0] & pstate_am_d1[0] |
+			                         thr_f[1] & pstate_am_d1[1] |
+			                         thr_f[2] & pstate_am_d1[2] |
+			                         thr_f[3] & pstate_am_d1[3]);
+
+   dff_s #(1) amd_ff(.din (fcl_fdp_mask32b_f),
+		               .q   (fcl_fdp_addr_mask_d),
+		               .clk (clk), .se(se), .si(), .so());
+
+   // keep track of whether pc is outside va hole
+   assign set_oor_m = exu_ifu_va_oor_m & brtaken_m & ~addr_mask_32b_m;
+   assign fcl_fdp_pcoor_vec_f = fdp_fcl_pc_oor_vec_f | {4{set_oor_m}} & thr_m;
+   
+   assign fcl_fdp_pcoor_f =  (thr_f[0] & fcl_fdp_pcoor_vec_f[0] |
+			                        thr_f[1] & fcl_fdp_pcoor_vec_f[1] |
+			                        thr_f[2] & fcl_fdp_pcoor_vec_f[2] |
+			                        thr_f[3] & fcl_fdp_pcoor_vec_f[3]);
+   
+   assign pc_oor_f = fcl_fdp_pcoor_f  & ~part_stall_thisthr_f;
+   dff_s oors1_ff(.din (pc_oor_f),
+		            .q   (pc_oor_s1),
+		            .clk (clk), .se(se), .si(), .so());
+
+   // track privilege mode of current page
+   assign priv_mode_f = (thr_f[0] & tlu_lsu_pstate_priv[0] |
+			                   thr_f[1] & tlu_lsu_pstate_priv[1] |
+			                   thr_f[2] & tlu_lsu_pstate_priv[2] |
+			                   thr_f[3] & tlu_lsu_pstate_priv[3]);
+
+   dff_s #(1) priv_ff(.din (priv_mode_f),
+                    .q   (priv_mode_s1),
+                    .clk (clk), .se(se), .si(), .so());
+   
+   // s1 and d are the same thread
+   assign fcl_dtu_privmode_d = priv_mode_s1;
+
+   // hyper privilege
+   assign hpriv_mode_f = (thr_f[0] & tlu_hpstate_priv[0] |
+			                    thr_f[1] & tlu_hpstate_priv[1] |
+			                    thr_f[2] & tlu_hpstate_priv[2] |
+			                    thr_f[3] & tlu_hpstate_priv[3]);
+
+   assign hpriv_mode_w = (thr_w[0] & tlu_hpstate_priv[0] |
+			                    thr_w[1] & tlu_hpstate_priv[1] |
+			                    thr_w[2] & tlu_hpstate_priv[2] |
+			                    thr_w[3] & tlu_hpstate_priv[3]);
+   
+   dff_s #(1) hprivd_ff(.din (hpriv_mode_f),
+                     .q   (hpriv_mode_s1),
+                     .clk (clk), .se(se), .si(), .so());
+
+   assign fcl_dtu_hprivmode_d = hpriv_mode_s1;
+
+   dff_s #(1) hprivw2_ff(.din (hpriv_mode_w),
+                     .q   (hpriv_mode_w2),
+                     .clk (clk), .se(se), .si(), .so());
+   assign fcl_dtu_hprivmode_w2 = hpriv_mode_w2;
+
+   // determine if priv page has been accessed in non priv mode
+   // or if we have fallen into the VA hole
+   assign inst_acc_exc_s1 = (priv_inst_s1 & ~(priv_mode_s1 | hpriv_mode_s1) & 
+			                       ~tlbmiss_s1_crit & cam_vld_s1 | 
+			                       pc_oor_s1) & ~stall_s1 & ~rb_stg_d;
+   assign pc_oor_s2 = (thr_f[0] & inst_acc_vec_d[0] |
+                       thr_f[1] & inst_acc_vec_d[1] |
+                       thr_f[2] & inst_acc_vec_d[2] |
+                       thr_f[3] & inst_acc_vec_d[3]);
+   assign pc_oor_s = (tm_fd_l) ? pc_oor_s2 : pc_oor_s1;
+      
+   assign inst_acc_vec_s2 = (({4{inst_acc_exc_s1}} & thr_s1) |
+			                       ({4{inst_acc_exc_e}} & rb_frome) |
+			                       ({4{inst_acc_exc_d}} & rb_fromd & ~rb_frome) |
+	                           inst_acc_vec_d & (~thr_d | {4{~inst_vld_d}}) & 
+			                       ~rb_w2) &
+			                        ~(clear_s_d1);
+	 
+   dffr_s #(4) instaccd_reg(.din (inst_acc_vec_s2),
+			                  .q   (inst_acc_vec_d),
+			                  .rst (fcl_reset),
+			                  .clk (clk), .se (se), .si(), .so());
+
+   assign inst_acc_exc_d = (thr_d[0] & inst_acc_vec_d[0] |
+			                      thr_d[1] & inst_acc_vec_d[1] |
+			                      thr_d[2] & inst_acc_vec_d[2] |
+			                      thr_d[3] & inst_acc_vec_d[3]);
+
+   dff_s #(1) instacce_ff(.din (inst_acc_exc_d),
+		                  .q   (inst_acc_exc_e),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   // TLU needs to know if this is a priv violtn
+   assign priv_violtn_e = inst_acc_exc_e & ~fdp_fcl_pc_oor_e;
+   dff_s #(1) privm_ff(.din (priv_violtn_e),
+		               .q   (priv_violtn_m),
+		               .clk (clk),  .se  (se), .si(), .so());
+
+   assign ifu_tlu_priv_violtn_m = priv_violtn_m & inst_vld_m & ~kill_curr_m;
+   
+   // NIR privilege bit
+   assign next_nir_privvec = {4{itlb_fcl_priv_s1 & ~stall_s1 & 
+                                cam_vld_s1}} & thr_s1 |
+	                           nir_privvec & (~thr_s1 | {4{stall_s1}});
+
+   dffr_s #(4) nir_priv_reg(.din (next_nir_privvec),
+			                  .q   (nir_privvec),
+			                  .rst (fcl_reset),
+			                  .clk (clk), .se(se), .si(), .so());
+   
+   assign nir_priv_s1 = (nir_privvec[0] & thr_s1[0] |
+			                   nir_privvec[1] & thr_s1[1] |
+			                   nir_privvec[2] & thr_s1[2] |
+			                   nir_privvec[3] & thr_s1[3]);
+   
+   assign priv_inst_s1 = ~usenir_s1 ? (itlb_fcl_priv_s1 & cam_vld_s1) : 
+                                        nir_priv_s1;
+
+//-------------------------
+// Errors
+//-------------------------
+
+   // decide when the errors are valid
+   assign running_s1 = ~stall_s1 & ~kill_thread_d & ~rb_stg_d & ~pc_oor_s1 &
+	                     ~tlb_cam_miss_s1 & ~retract_inst_d;
+//   assign ely_running_s1 = ~stall_s1 & ~rb_stg_d & ~pc_oor_s1 & 
+//                           ~tlb_cam_miss_s1 & ~retract_inst_d & ~kill_curr_d;
+   assign ely_running_s1 = ~stall_s1 & ~rb_stg_d_crit & ~pc_oor_s1 & 
+                           ~tlb_cam_miss_s1 & ~kill_curr_d;
+   assign fcl_erb_ievld_s1 = ely_running_s1 & rdreq_s1 & itlb_fcl_imiss_s_l;
+   assign fcl_erb_tevld_s1 = ely_running_s1 & rdreq_s1;
+
+   assign fcl_erb_immuevld_s1 = ely_running_s1 & cam_vld_s1;
+
+//   assign fcl_erb_ttevld_s1 = asird_s & rdtag_s;
+//   assign fcl_erb_tdevld_s1 = asird_s & ~rdtag_s;
+   
+   dff_s #(1) d1vld_ff(.din (running_s1),
+		                 .q   (inst_vld_d1),
+		                 .clk (clk), .se(se), .si(), .so());
+//   assign inst_vld_qual_d1 = inst_vld_d1 & ~kill_thread_e & 
+//	                           ~flush_sonly_qual_e & ~rb_stg_e;
+   assign fcl_erb_inst_vld_d1 = inst_vld_d1;
+   
+
+   // ifetch unc. error
+   assign ifet_ue_vec_d1 = (erb_fcl_ifet_uevec_d1 |
+	                          ifet_ue_vec_e & ~val_thr_e) &   // reset
+			                       ~(clear_s_d1);                 // wins
+
+   dffr_s #(4) ifuerr_reg(.din (ifet_ue_vec_d1),
+		                  .q   (ifet_ue_vec_e),
+		                  .rst (fcl_reset),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign ifet_ue_e = (ifet_ue_vec_e[0] & thr_e[0] | 
+		                   ifet_ue_vec_e[1] & thr_e[1] | 
+		                   ifet_ue_vec_e[2] & thr_e[2] | 
+		                   ifet_ue_vec_e[3] & thr_e[3]);
+
+
+//----------------------
+// Other I side traps
+//----------------------
+   // Determine if we are in Trap Level 0
+   assign tlzero_s2 = (thr_f[0] & tlzero_vec_d1[0] |
+		                   thr_f[1] & tlzero_vec_d1[1] |
+		                   thr_f[2] & tlzero_vec_d1[2] |
+		                   thr_f[3] & tlzero_vec_d1[3]);
+   dff_s #(1) tlzd_ff(.din (tlzero_s2),
+		              .q   (fcl_dtu_tlzero_d),
+		              .clk (clk), .se(se), .si(), .so());
+
+   // Collect all IFU traps
+   assign trap_e = (immu_miss_e | inst_acc_exc_e | dtu_fcl_illinst_e |
+		                dtu_fcl_fpdis_e | dtu_fcl_privop_e | ifet_ue_e |
+		                dtu_fcl_imask_hit_e | dtu_fcl_sir_inst_e) & 
+                     inst_vld_e;
+
+   dff_s trapm_ff(.din (trap_e),
+		            .q   (trap_m),
+		            .clk (clk),
+		            .se  (se), .si(), .so());
+
+   assign no_iftrap_m = ~ifu_tlu_ttype_vld_m;
+   dff_s trapw_ff(.din (no_iftrap_m),
+		            .q   (no_iftrap_w),
+		            .clk (clk),
+		            .se  (se), .si(), .so());
+   
+   // south is very critical
+   assign ifu_tlu_ttype_vld_m = (trap_m & inst_vld_m | 
+                                 disr_trap_m) & ~kill_curr_m & ~kill_local_m;
+   // less critical going east
+   assign ifu_exu_ttype_vld_m = trap_m & inst_vld_m;
+
+   // less critical going southwest
+   assign ifu_mmu_trap_m = trap_m;
+
+   // less critical going south   
+   assign ifu_tlu_trap_m = trap_m;
+   
+   // trap type priority encode
+   // Decreasing priority is
+   //   pc out of range           i_acc_exc
+   //   immu parity error         i_acc_err
+   //   immu miss                 i_acc_mmu_ms
+   //   icache/tag parity error   i_acc_err
+   //   privilege page            i_acc_exc
+   //   privilege opcode          priv_opc
+   //   illegal non-fp inst       ill_inst
+   //   soft reset                sir
+   //   fp disabled               fp_disabled
+   //   illegal fp instruction    ill_inst
+
+   // Clean this up!!
+   assign ttype_sel_spuma_e = spuint1_qual_e;
+   assign ttype_sel_spuenc_e = spuint0_qual_e;
+   assign ttype_sel_corr_err_e = ceint_qual_e;
+   assign ttype_sel_unc_err_e = ueint_qual_e;
+   assign ttype_sel_res_err_e = rerr_qual_e;
+   assign ttype_sel_hstk_cmp_e = hintp_qual_e;
+   
+   assign ttype_sel_pcoor_e = fdp_fcl_pc_oor_e & inst_acc_exc_e;
+   assign ttype_sel_icache_err_e = ifet_ue_e; 
+   assign ttype_sel_immu_miss_e = ~fdp_fcl_pc_oor_e & immu_miss_e & 
+                                  ~addr_real_e;
+   assign ttype_sel_real_trans_e = ~fdp_fcl_pc_oor_e & immu_miss_e & 
+                                   addr_real_e;    
+   assign ttype_sel_priv_viol_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                           inst_acc_exc_e;
+   assign ttype_sel_ibe_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                        ~inst_acc_exc_e & dtu_fcl_imask_hit_e;
+   assign ttype_sel_privop_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                       ~inst_acc_exc_e & dtu_fcl_privop_e;
+   assign ttype_sel_illinst_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                        ~inst_acc_exc_e & dtu_fcl_illinst_e;
+   assign ttype_sel_sir_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                    ~inst_acc_exc_e & ~dtu_fcl_illinst_e &
+	                     dtu_fcl_sir_inst_e;
+   
+   assign ttype_sel_fpdis_e = ~fdp_fcl_pc_oor_e & ~immu_miss_e & 
+	                      ~inst_acc_exc_e & ~dtu_fcl_illinst_e &
+	                       dtu_fcl_fpdis_e;
+
+   // mux in the trap type
+   assign ttype_e[8:0] = ttype_sel_unc_err_e    ? `DATA_ERR  :
+                   ttype_sel_hstk_cmp_e   ? `HSTICK_CMP    :
+	                 ttype_sel_spuma_e      ? `SPU_MAINT     :
+	                 ttype_sel_spuenc_e     ? `SPU_ENCINT    :
+	                 ttype_sel_corr_err_e   ? `CORR_ECC_ERR  :
+                   ttype_sel_res_err_e    ? `RESUMABLE_ERR :
+          
+	                 ttype_sel_pcoor_e      ? `INST_ACC_EXC  :
+	                 ttype_sel_immu_miss_e  ? `FAST_MMU_MS   :
+	                 ttype_sel_real_trans_e ? `REAL_TRANS_MS :
+	                 ttype_sel_icache_err_e ? `INST_ACC_ERR  :
+	                 ttype_sel_priv_viol_e  ? `INST_ACC_EXC  :
+	                 ttype_sel_ibe_e        ? `INST_BRK_PT   :
+	                 ttype_sel_privop_e     ? `PRIV_OPC :
+	                 ttype_sel_illinst_e    ? `ILL_INST :
+	                 ttype_sel_sir_e        ? `SIR      :
+	                 ttype_sel_fpdis_e      ? `FP_DISABLED :
+	                                           9'h1ff;
+
+   dff_s #(9) ttype_reg(.din (ttype_e[8:0]),
+		                .q   (ifu_tlu_ttype_m[8:0]),
+		                .clk (clk), .se(se), .si(), .so());
+	 
+//------------------------------
+// Interrupts and Resets
+//------------------------------
+   // Process resets to see if they are sync or async
+   assign intr_in_pipe = ({4{intr_vld_d}} & thr_d |
+                          {4{intr_vld_e}} & thr_e |
+                          {4{intr_vld_m}} & thr_m |
+                          {4{intr_vld_w}} & thr_w);
+
+//   assign async_rst_i2 = tlu_ifu_rstthr_i2  & {4{tlu_ifu_rstint_i2}} &
+   assign async_rst_i3 = (rstint_i3 | nuke_thr_i3 | resumint_i3) & 
+                           ~dtu_fcl_thr_active & ~intr_in_pipe;
+
+   dff_s #(4) asyrst4_reg(.din (async_rst_i3),
+                        .q   (async_rst_i4),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   // stall pipe before switching in rst thread
+   assign rst_stallreq_d0 = (|async_rst_i4[3:0]);
+   assign rst_stallreq = rst_stallreq_d0 | rst_stallreq_d1 | rst_stallreq_d2;
+
+   dff_s #(2) stlreq_reg(.din ({lsu_ifu_stallreq,
+                              ffu_ifu_stallreq}),
+                       .q   ({lsu_stallreq_d1,
+                              ffu_stallreq_d1}),
+                       .clk (clk), .se(se), .si(), .so());
+   
+   assign all_stallreq = ifq_fcl_stallreq | lsu_stallreq_d1 | 
+                         ffu_stallreq_d1 | itlb_starv_alert;
+
+   // leave out stall from ifq which goes directly to swl
+   assign fcl_dtu_stall_bf = lsu_stallreq_d1 | ffu_stallreq_d1 | 
+                             itlb_starv_alert | rst_stallreq;
+
+   // priority encode rst interrupts
+   // this could lead to obvious starvation of thr3, the assumption is that
+   // idle/resume/reset interrupts do not occur very frequently
+   assign rstint_penc[0] = async_rst_i4[0];
+   assign rstint_penc[1] = ~async_rst_i4[0] & async_rst_i4[1];
+   assign rstint_penc[2] = ~async_rst_i4[0] & ~async_rst_i4[1]  & 
+	                          async_rst_i4[2];
+   assign rstint_penc[3] = ~async_rst_i4[0] & ~async_rst_i4[1]  & 
+	                         ~async_rst_i4[2];
+
+   // BF - switch in rst thread
+   dff_s #(1) asyncr1_ff(.din (rst_stallreq_d0),
+		                   .q   (rst_stallreq_d1),
+		                   .clk (clk), .se(se), .si(), .so());
+   assign arst_vld_f_l = ~arst_vld_f;
+   assign arst_vld_s_l = ~arst_vld_s;
+   bw_u1_nand3_4x UZsize_rstsw_n3(.z (rst_sw_bf_l),
+                                  .a (arst_vld_f_l),
+                                  .b (arst_vld_s_l),
+                                  .c (rst_stallreq_d1));
+   assign rst_sw_bf = ~rst_sw_bf_l;
+
+   // double check if asyn intrs are still valid
+   assign sw_for_real_rst_bf = rst_sw_bf & rst_stallreq_d0;
+   
+   // F
+   dff_s #(1) asyncr2_ff(.din (sw_for_real_rst_bf),
+		                   .q   (rst_stallreq_d2),
+		                   .clk (clk), .se(se), .si(), .so());
+//   assign arst_vld_f = rst_stallreq_d2 & any_rstnuke_f;
+   assign arst_vld_f = rst_stallreq_d2;   
+
+   // hold thread till reset of curr thread is processed
+//   assign rst_thr_bf = arst_vld_f ? thr_f : rstint_penc;
+   
+   // S issue to pipe
+   dff_s #(1) rstvlds_ff(.din (arst_vld_f),
+		                   .q   (arst_vld_s),
+		                   .clk (clk), .se(se), .si(), .so());
+   assign async_intr_vld_s = arst_vld_s & ~kill_intr_f; // & any_rstnuke_f 
+
+
+   //
+   // thread wise interrupts
+   //
+   assign rstint_i2 = {4{tlu_ifu_rstint_i2}} & tlu_ifu_rstthr_i2;
+   assign resumint_i2 = {4{tlu_ifu_resumint_i2}} & tlu_ifu_rstthr_i2;
+   assign nuke_thr_i2 =  {4{tlu_ifu_nukeint_i2}} & tlu_ifu_rstthr_i2;
+
+   assign next_rst_i2 = rstint_i2 | 
+	                      rstint_i3 & (~(thr_w & {4{fcl_dtu_rst_thr_w}}));
+   assign next_resum_i2 = resumint_i2 | 
+	                        resumint_i3 & (~(thr_w & {4{fcl_dtu_resum_thr_w}})) 
+                          & ~rstint_i2;
+
+   assign next_nuke_i2  = (nuke_thr_i2 | nuke_thr_i3) & 
+                            (~(thr_w & {4{fcl_dtu_nuke_thr_w}})) & 
+	                       ~(rstint_i2 | resumint_i2);
+
+   assign next_sftint_i2 = tlu_ifu_sftint_vld;
+   assign next_hwint_i3 = tlu_ifu_hwint_i3;
+   assign next_hintp_i2 = tlu_ifu_hintp_vld;
+   assign next_rerr_i2 = tlu_ifu_rerr_vld;
+   
+   assign next_ceint_i2 = erb_fcl_ce_trapvec |
+	                        ceint_i3 & (~(thr_w & {4{ceint_qual_w}}));
+
+   assign next_ueint_i2 = erb_fcl_ue_trapvec |
+	                        ueint_i3 & (~(thr_w & {4{ueint_qual_w}}));
+
+   // From Farnad: tid is ready several cycles before everything else
+   // I will assume 1 cycle before in the ifu
+   dff_s #(2) sptid_reg(.din (spu_ifu_ttype_tid_w2),
+                      .q   (spu_tid_w2),
+                      .clk (clk), .se(se), .so(), .si());
+   
+   assign spu_thr[0] = ~spu_tid_w2[1] & ~spu_tid_w2[0];
+   assign spu_thr[1] = ~spu_tid_w2[1] &  spu_tid_w2[0];
+   assign spu_thr[2] =  spu_tid_w2[1] & ~spu_tid_w2[0];
+   assign spu_thr[3] =  spu_tid_w2[1] &  spu_tid_w2[0];
+
+   assign next_spuint1_i2 = {4{spu_ifu_ttype_vld_w2 & spu_ifu_ttype_w2}} & 
+	                          spu_thr & ~erb_fcl_spu_uetrap |
+	                          spuint1_i3 & ~({4{spuint1_w}} & thr_w);
+
+   assign next_spuint0_i2 = {4{spu_ifu_ttype_vld_w2 & ~spu_ifu_ttype_w2}} & 
+	                          spu_thr & ~erb_fcl_spu_uetrap |
+	                          spuint0_i3 & ~({4{spuint0_w}} & thr_w);
+
+
+   dffr_s #(4) rst_reg(.din  (next_rst_i2),
+		                 .q    (rstint_i3),
+		                 .clk  (clk),
+		                 .rst  (fcl_reset),
+		                 .se   (se), .si(), .so());
+
+   dffr_s #(4) resum_reg(.din  (next_resum_i2),
+		                   .q    (resumint_i3),
+		                   .clk  (clk),
+		                   .rst  (fcl_reset),
+		                   .se   (se), .si(), .so());
+
+   dffr_s #(4) nuke_reg(.din  (next_nuke_i2),
+		                  .q    (nuke_thr_i3),
+		                  .rst  (fcl_reset),
+		                  .clk  (clk),
+		                  .se   (se), .si(), .so());
+
+   dffr_s #(4) sfti_reg(.din  (next_sftint_i2),
+		                  .q    (sftint_i3),
+		                  .rst  (fcl_reset),
+		                  .clk  (clk), .se   (se), .si(), .so());
+   dffr_s #(4) hstki_reg(.din  (next_hintp_i2),
+		                  .q    (hintp_i3),
+		                  .rst  (fcl_reset),
+		                  .clk  (clk), .se   (se), .si(), .so());
+   dffr_s #(4) reri_reg(.din  (next_rerr_i2),
+		                  .q    (rerr_i3),
+		                  .rst  (fcl_reset),
+		                  .clk  (clk), .se   (se), .si(), .so());
+   dffr_s #(4) hwi_reg(.din  (next_hwint_i3),
+		                 .q    (hwint_i4),
+		                 .rst  (fcl_reset),
+		                 .clk  (clk), .se   (se), .si(), .so());
+
+   dffr_s #(4) spui0_reg(.din  (next_spuint0_i2),
+		                   .q    (spuint0_i3),
+		                   .rst  (fcl_reset),
+		                   .clk  (clk), .se   (se), .si(), .so());
+   
+   dffr_s #(4) spui1_reg(.din  (next_spuint1_i2),
+		                   .q    (spuint1_i3),
+		                   .rst  (fcl_reset),
+		                   .clk  (clk), .se   (se), .si(), .so());
+
+   dffr_s #(4) cei_reg(.din  (next_ceint_i2),
+		                 .q    (ceint_i3),
+		                 .rst  (fcl_reset),
+		                 .clk  (clk), .se   (se), .si(), .so());
+
+   dffr_s #(4) uei_reg(.din  (next_ueint_i2),
+		                 .q    (ueint_i3),
+		                 .rst  (fcl_reset),
+		                 .clk  (clk), .se   (se), .si(), .so());
+
+   assign supv_int_en = (~tlu_hpstate_priv | ~tlu_hpstate_enb) & 
+                         tlu_ifu_pstate_ie & dtu_fcl_thr_active;
+   assign hypv_int_en = ~tlu_hpstate_priv & tlu_hpstate_enb | 
+                        tlu_ifu_pstate_ie & dtu_fcl_thr_active;
+   
+   dff_s #(4) spvie_ff(.din (supv_int_en),
+                     .q   (supv_int_en_d1),
+                     .clk (clk), .se(se), .si(), .so());
+   dff_s #(4) hpvie_ff(.din (hypv_int_en),
+                     .q   (hypv_int_en_d1),
+                     .clk (clk), .se(se), .si(), .so());
+
+   // force an interrupt by putting nop on pipe
+   // use this signal instead of hw_int_s to help with crit path
+   assign supv_masked_intr_s = (sftint_i3        |
+			                          rerr_i3);
+   assign hypv_masked_intr_s = (hwint_i4         |
+			                          hintp_i3         |
+			                          ceint_i3         |
+			                          ueint_i3         |
+			                          spuint0_i3       |
+			                          spuint1_i3);
+
+   assign fcl_swl_int_activate_i3 = hypv_masked_intr_s |
+                                    supv_masked_intr_s;
+
+    // keep track of rolled back interrupts
+   assign intr_pending_nxt =  (({4{intr_vld_e}} & rb_frome) |
+                               ({4{intr_vld_d}} & rb_fromd & ~rb_frome) |
+                                intr_pending_s) & ~clear_s_d1;
+   
+   dffr_s #(4) ipend_reg(.din (intr_pending_nxt),
+                       .q   (intr_pending_s),
+                       .rst (fcl_reset),
+                       .clk (clk), .se(se), .si(), .so());
+
+   assign any_intr_vec_f = (supv_masked_intr_s & supv_int_en_d1 |
+                            hypv_masked_intr_s & hypv_int_en_d1 |
+                            intr_pending_s    |
+			                      rstint_i3         |
+                            resumint_i3       |
+			                      nuke_thr_i3);
+
+   dff_s #(4) anyints_reg(.din (any_intr_vec_f),
+                        .q   (any_intr_vec_s),
+                        .clk (clk), .se(se), .si(), .so());
+			
+   assign force_intr_s = (thr_f_crit[0] & any_intr_vec_s[0] |
+                          thr_f_crit[1] & any_intr_vec_s[1] |
+                          thr_f_crit[2] & any_intr_vec_s[2] |
+                          thr_f_crit[3] & any_intr_vec_s[3]) &
+	                         ~kill_intr_f;
+
+   // interrupt and reset signal pipe
+   // VA hole trap has higher priority than interrupt
+   //   - since the VA hole marker is lost once the intr is taken
+   assign intr_vld_s = force_intr_s & (valid_s & ~pc_oor_s | 
+                                       async_intr_vld_s);
+
+   assign intr_vld_qual_s = intr_vld_s & ~iferrto_thisthr_d1;
+   dff_s #(1) any_intrd_ff(.din (intr_vld_qual_s),
+		                     .q   (intr_vld_d),	
+		                     .clk (clk),	
+		                     .se  (se), .so(), .si());
+   assign fcl_dec_intr_vld_d = intr_vld_d;
+   assign intr_vld_qual_d = intr_vld_d & ~kill_intr_d & ~kill_thread_d & 
+	                           ~rb_stg_d;
+
+   dff_s #(1) intr_vlde_ff(.din (intr_vld_qual_d),
+		                   .q   (intr_vld_e),
+		                   .clk (clk), .se  (se), .so(), .si());
+
+   assign intr_vld_qual_e = intr_vld_e & ~kill_curr_e & ~rb_stg_e & 
+	                    ~kill_intr_e & ~dtu_inst_anull_e &
+                      ~(thr_match_em & ifu_tlu_flush_m);
+   
+   dff_s #(1) intr_vldm_ff(.din (intr_vld_qual_e),
+		                   .q   (intr_vld_m),
+		                   .clk (clk), .se  (se), .so(), .si());
+
+   assign intr_vld_qual_m = intr_vld_m & ~kill_thread_m & ~mark4rb_m;
+
+   dff_s #(1) intr_vldw_ff(.din (intr_vld_qual_m),
+		                   .q   (intr_vld_w),
+		                   .clk (clk), .se  (se), .so(), .si());
+
+   // Reset and Idle are prioritized in M.  All others in E
+   // reset interrupt
+   assign rstint_m = (rstint_i3[0] & thr_m[0] |
+			                rstint_i3[1] & thr_m[1] |
+			                rstint_i3[2] & thr_m[2] |
+			                rstint_i3[3] & thr_m[3]);
+
+   assign ifu_tlu_rstint_m = rstint_m & intr_vld_m & ~kill_local_m & 
+                             ~kill_curr_m;
+//   assign rstint_qual_m = rstint_m & ~ely_kill_thread_m & intr_vld_m;
+   dff_s #(1) rstw_ff(.din (rstint_m),
+                    .q   (rst_thr_w),
+                    .clk (clk), .se(se), .si(), .so());
+   assign fcl_dtu_rst_thr_w = rst_thr_w & intr_vld_w;
+
+   // resume interrupt
+   assign resumint_m = (resumint_i3[0] & thr_m[0] |
+			                  resumint_i3[1] & thr_m[1] |
+			                  resumint_i3[2] & thr_m[2] |
+			                  resumint_i3[3] & thr_m[3]);
+   assign resumint_qual_m = resumint_m & ~rstint_m;
+   
+   dff_s #(1) resumw_ff(.din (resumint_qual_m),
+                      .q   (resum_thr_w),
+                      .clk (clk), .se(se), .si(), .so());
+   assign fcl_dtu_resum_thr_w = resum_thr_w & intr_vld_w;
+
+   // idle interrupt
+   assign nuke_thr_m = (nuke_thr_i3[0] & thr_m[0] |
+				                nuke_thr_i3[1] & thr_m[1] |
+				                nuke_thr_i3[2] & thr_m[2] |
+				                nuke_thr_i3[3] & thr_m[3]);
+
+   assign nuke_thr_qual_m = nuke_thr_m & ~rstint_m & ~resumint_m;
+
+   dff_s #(1) nukw_ff(.din (nuke_thr_qual_m),
+                    .q   (nuke_thr_w),
+                    .clk (clk),
+                    .se  (se), .si(), .so());
+   assign fcl_dtu_nuke_thr_w = nuke_thr_w & intr_vld_w;
+   
+   // uncorrected ecc
+   assign ueint_e = (ueint_i3[0] & thr_e[0] & hypv_int_en_d1[0] |
+		                 ueint_i3[1] & thr_e[1] & hypv_int_en_d1[1] |
+		                 ueint_i3[2] & thr_e[2] & hypv_int_en_d1[2] |
+		                 ueint_i3[3] & thr_e[3] & hypv_int_en_d1[3]);
+   assign ueint_qual_e = ueint_e & intr_vld_e;
+
+   dff_s #(1) uem_ff (.din (ueint_qual_e),
+                    .q   (ueint_m),
+                    .clk (clk), .se (se), .si(), .so());
+                    
+//   assign ueint_m = (ueint_i3[0] & thr_m[0] |
+//		                 ueint_i3[1] & thr_m[1] |
+//		                 ueint_i3[2] & thr_m[2] |
+//		                 ueint_i3[3] & thr_m[3]);
+   
+   assign ueint_trap_m = ueint_m & intr_vld_m & 
+                         ~(rstint_m | resumint_m | nuke_thr_m);
+   
+//   assign ueint_qual_m = ueint_trap_m & ~ely_kill_thread_m;
+   dff_s #(1) ueintw_ff(.din (ueint_trap_m),
+                      .q   (ueint_trap_w),
+                      .clk (clk), .se(se), .si(), .so());
+   assign ueint_qual_w = ueint_trap_w & intr_vld_w;
+
+   // hstk match interrupt
+   assign hintp_e = (hintp_i3[0] & thr_e[0] & hypv_int_en_d1[0] |
+			               hintp_i3[1] & thr_e[1] & hypv_int_en_d1[1] |
+			               hintp_i3[2] & thr_e[2] & hypv_int_en_d1[2] |
+			               hintp_i3[3] & thr_e[3] & hypv_int_en_d1[3]);
+   assign hintp_qual_e = hintp_e & intr_vld_e & ~ueint_e;
+
+   dff_s #(1) hintpm_ff (.din (hintp_qual_e),
+                       .q   (hintp_m),
+                       .clk (clk), .se (se), .si(), .so());
+
+//   assign ifu_tlu_hintp_m = hintp_m & ~kill_local_m & intr_vld_m & 
+//	                    ~(rstint_m | nuke_thr_m | ueint_m);
+   
+   // hw int
+   assign hwint_e = (hwint_i4[0] & thr_e[0] & hypv_int_en_d1[0] |
+			               hwint_i4[1] & thr_e[1] & hypv_int_en_d1[1] |
+			               hwint_i4[2] & thr_e[2] & hypv_int_en_d1[2] |
+			               hwint_i4[3] & thr_e[3] & hypv_int_en_d1[3]);
+   dff_s #(1) hwe_ff(.din (hwint_e),
+                   .q   (hwint_m),
+                   .clk (clk), .se(se), .si(), .so());
+
+   assign ifu_tlu_hwint_m = hwint_m & intr_vld_m & ~kill_local_m &
+                     ~kill_curr_m & 
+	                   ~(rstint_m | resumint_m | nuke_thr_m | ueint_m | hintp_m);
+   
+
+   // spu interrupt
+   assign spuint1_e = (spuint1_i3[0] & thr_e[0] & hypv_int_en_d1[0] |
+		                   spuint1_i3[1] & thr_e[1] & hypv_int_en_d1[1] |
+		                   spuint1_i3[2] & thr_e[2] & hypv_int_en_d1[2] |
+		                   spuint1_i3[3] & thr_e[3] & hypv_int_en_d1[3]);
+   assign spuint1_qual_e = spuint1_e & intr_vld_e & ~ueint_e & ~hintp_e;
+
+//   assign spuint1_m = (spuint1_i3[0] & thr_m[0] |
+//		                   spuint1_i3[1] & thr_m[1] |
+//		                   spuint1_i3[2] & thr_m[2] |
+//		                   spuint1_i3[3] & thr_m[3]);
+
+   dff_s #(1) spu1m_ff(.din (spuint1_qual_e),
+                     .q   (spuint1_m),
+                     .clk (clk), .se(se), .si(), .so());
+
+   assign spuint1_trap_m = spuint1_m & intr_vld_m & 
+	                    ~(rstint_m | resumint_m | nuke_thr_m | hwint_m);
+
+//   assign spuint1_qual_m = spuint1_trap_m & ~ely_kill_thread_m;
+   
+   dff_s #(1) spiw1_ff(.din (spuint1_trap_m),
+		                 .q   (spuint1_trap_w),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign spuint1_w = spuint1_trap_w & intr_vld_w;
+   
+   assign spuint0_e = (spuint0_i3[0] & thr_e[0] & hypv_int_en_d1[0] |
+		                   spuint0_i3[1] & thr_e[1] & hypv_int_en_d1[1] |
+		                   spuint0_i3[2] & thr_e[2] & hypv_int_en_d1[2] |
+		                   spuint0_i3[3] & thr_e[3] & hypv_int_en_d1[3]);
+   
+   assign spuint0_qual_e = spuint0_e & intr_vld_e & ~ueint_e &
+			                     ~spuint1_e & ~hintp_e;
+
+//   assign spuint0_m = (spuint0_i3[0] & thr_m[0] |
+//		                   spuint0_i3[1] & thr_m[1] |
+//		                   spuint0_i3[2] & thr_m[2] |
+//		                   spuint0_i3[3] & thr_m[3]);
+   dff_s #(1) spu0m_ff(.din (spuint0_qual_e),
+                     .q   (spuint0_m),
+                     .clk (clk), .se(se), .si(), .so());
+
+   assign spuint0_trap_m = spuint0_m & intr_vld_m &
+             	        ~(rstint_m | nuke_thr_m | resumint_m |
+		                    hwint_m);
+   
+//   assign spuint0_qual_m = spuint0_trap_m & ~kill_thread_m;
+
+   dff_s #(1) spiw0_ff(.din (spuint0_trap_m),
+		                 .q   (spuint0_trap_w),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign spuint0_w = spuint0_trap_w & intr_vld_w;
+   
+//   assign ifu_spu_trap_ack = {spuint1_w, spuint0_w};
+   assign ifu_spu_trap_ack = spuint1_w;   
+   
+   
+   // software interrupts
+   assign sftint_e = (sftint_i3[0] & thr_e[0] & supv_int_en_d1[0] |
+			                sftint_i3[1] & thr_e[1] & supv_int_en_d1[1] |
+			                sftint_i3[2] & thr_e[2] & supv_int_en_d1[2] |
+			                sftint_i3[3] & thr_e[3] & supv_int_en_d1[3]);
+
+   assign sftint_qual_e = sftint_e & ~spuint0_e & intr_vld_e & 
+                          ~ueint_e & ~spuint1_e & ~hintp_e;
+   
+   dff_s #(1) swm_ff(.din (sftint_qual_e),
+                   .q   (sftint_m),
+                   .clk (clk), .se(se), .si(), .so());
+
+   // if nothing else, signal sftint!
+//   assign ifu_tlu_sftint_m = (sftint_m & 
+//	                            ~(rstint_m | nuke_thr_m | hintp_m | resumint_m |
+//				                        hwint_m | spuint1_m | spuint0_m | ueint_m) |
+//                              ~(ceint_m | rerr_m)) & 
+//                               ~kill_local_m & intr_vld_m;
+
+   assign ifu_tlu_sftint_m = (sftint_m & 
+	                            ~(rstint_m | nuke_thr_m | hintp_m | resumint_m |
+				                        hwint_m | spuint1_m | spuint0_m | ueint_m)) & 
+                               ~kill_local_m & ~kill_curr_m & intr_vld_m;
+   
+
+   // corrected ecc interrupt
+   assign ceint_e = (ceint_i3[0] & thr_e[0] & hypv_int_en_d1[0] |
+		                 ceint_i3[1] & thr_e[1] & hypv_int_en_d1[1] |
+		                 ceint_i3[2] & thr_e[2] & hypv_int_en_d1[2] |
+		                 ceint_i3[3] & thr_e[3] & hypv_int_en_d1[3]);
+   assign ceint_qual_e = ceint_e & intr_vld_e & ~ueint_e & 
+                          ~spuint1_e & ~spuint0_e & ~hintp_e;
+
+//   assign ceint_m = (ceint_i3[0] & thr_m[0] |
+//		                 ceint_i3[1] & thr_m[1] |
+//		                 ceint_i3[2] & thr_m[2] |
+//		                 ceint_i3[3] & thr_m[3]);
+   dff_s #(1) cem_ff(.din (ceint_qual_e),
+                   .q   (ceint_m),
+                   .clk (clk), .se(se), .si(), .so());
+   
+   assign ceint_trap_m = ceint_m & intr_vld_m & 
+	                 ~(rstint_m | nuke_thr_m | resumint_m |
+		                 sftint_m | hwint_m);
+   
+//   assign ceint_qual_m = ceint_trap_m & ~ely_kill_thread_m;
+   dff_s #(1) ceintw_ff(.din (ceint_trap_m),
+                      .q   (ceint_trap_w),
+                      .clk (clk), .se(se), .si(), .so());
+   assign ceint_qual_w = ceint_trap_w & intr_vld_w;
+
+   // resumable error interrupt
+   assign rerr_e = (rerr_i3[0] & thr_e[0] & supv_int_en_d1[0] |
+		                 rerr_i3[1] & thr_e[1] & supv_int_en_d1[1] |
+		                 rerr_i3[2] & thr_e[2] & supv_int_en_d1[2] |
+		                 rerr_i3[3] & thr_e[3] & supv_int_en_d1[3]);
+   assign rerr_qual_e = rerr_e & intr_vld_e & ~ueint_e & ~ceint_e &
+                        ~spuint1_e & ~spuint0_e & ~hintp_e;
+   
+   dff_s #(1) rem_ff(.din (rerr_qual_e),
+                   .q   (rerr_m),
+                   .clk (clk), .se(se), .si(), .so());
+   
+//   assign rerr_m = (rerr_i3[0] & thr_m[0] |
+//			              rerr_i3[1] & thr_m[1] |
+//			              rerr_i3[2] & thr_m[2] |
+//			              rerr_i3[3] & thr_m[3]);
+
+//   assign ifu_tlu_rerr_m = rerr_m & ~kill_local_m & intr_vld_m & 
+//	                    ~(rstint_m | nuke_thr_m | ueint_m | ceint_m);
+
+   assign disr_trap_m = (ueint_m | hintp_m | spuint0_m | spuint1_m |
+                         ceint_m | rerr_m) & ~rstint_m & ~nuke_thr_m &
+                          ~resumint_m & intr_vld_m;
+
+   // check if a scheduled interrupt evaporated...
+   assign any_intr_m = (ueint_m | ceint_m | spuint0_m | spuint1_m |
+                        hintp_m | rerr_m | sftint_m | hwint_m | 
+                        rstint_m | nuke_thr_m | resumint_m);
+   
+   // ..and rollback if that is the case
+   assign rb_intr_m = ~any_intr_m & intr_vld_m;
+   dff_s #(1) rbint_ff(.din (rb_intr_m),
+                     .q   (rb_intr_w),
+                     .clk (clk), .se(se), .si(), .so());
+
+   // use synchronous interrupt signal to switch out thread in swl
+//   assign fcl_dtu_sync_intr_d = (intr_vld_d | immu_miss_crit_d) & ~rb_stg_d;
+   assign fcl_dtu_sync_intr_d = (intr_vld_d) & ~rb_stg_d_crit;   
+   
+   // kill the next three interrupts.  After that you are on your own.
+//   assign kill_intr_m = ((thr_m & thr_w) == 4'b0) ?
+//			                    1'b0 : (intr_vld_w);
+   assign kill_intr_e = ((thr_e & thr_w) == 4'b0) ?
+			                    1'b0 : (intr_vld_w);
+   assign kill_intr_d = ((thr_d & thr_w) == 4'b0) ?
+			                    1'b0 : (intr_vld_w);
+   assign kill_intr_f = ((thr_f & thr_w) == 4'b0) ?
+			                    1'b0 : (intr_vld_w);
+
+//--------------------------------
+// check if we are in a delay slot
+//--------------------------------
+   // remember if the current instruction is a delay slot
+   assign delay_slot_vec_nxt = ({4{dtu_fcl_br_inst_d & inst_vld_d & 
+                                   ~rb_stg_d}} & thr_d | // set
+                                delay_slot_vec &
+                                ~(thr_d & {4{inst_vld_d &
+                                             ~rb_stg_d &
+                                             ~intr_vld_d}})) &
+                                 ~(trap_thr & {4{trappc_vld_w2}});
+                                // & ~late_flush_w2;
+   // Need to be a little pessimitic: can't clear the delay slot vec
+   // after a utrap, since we may still be in the delay slot when we
+   // re-execute
+
+   dffr_s #(4) ds_reg(.din (delay_slot_vec_nxt),
+                    .q   (delay_slot_vec),
+                    .rst (fcl_reset),
+                    .clk (clk), .se(se), .si(), .so());
+   assign fcl_dec_dslot_s = (delay_slot_vec[0] & thr_f[0] |
+                             delay_slot_vec[1] & thr_f[1] |
+                             delay_slot_vec[2] & thr_f[2] |
+                             delay_slot_vec[3] & thr_f[3]);
+   
+
+//------------------------------
+// NIR control
+//------------------------------
+   // use nir if va[2] of previous fetch is a zero (i.e lower word)
+   dff_s #(1) va2_ff(.din (fdp_fcl_va2_bf),
+		               .clk (clk),
+		               .q   (va2_f),
+		               .se  (se), .si(), .so());
+   
+   assign usep_bf = rdreq_f & ~va2_f & ~ntpc_thisthr & ~stall_f;
+   assign set_usen_bf = usep_bf & ~ely_stall_thisthr_f & dtu_fcl_running_s;
+   
+   // need to kill usen if trap or interrupt or flush
+   assign thr_usen_nxt = ({4{set_usen_bf}} &  thr_f  |    // set usen
+			                    thr_usen_bf  & ~val_thr_f) &    // keep old value
+			                     ~((thr_d & {4{dtu_fcl_br_inst_d}})  |
+		                         (thr_s1 & {4{ic_miss_s1}})  |
+                             (thr_e & {4{erb_dtu_ifeterr_d1 & inst_vld_d1}}) |
+			                       (clear_s_d1) |
+			                       (ntpc_vld)  |
+			                       (rb_w2 | rb_froms));     // reset usen (wins)
+                         // & ~dtu_fcl_flush_nir
+   
+   dffr_s #(4) thr_usen_reg(.din  (thr_usen_nxt),
+			                  .clk  (clk),
+			                  .q    (thr_usen_bf),
+			                  .rst  (fcl_reset),
+			                  .se   (se), .si(), .so());
+   
+   /*
+   // Use hand instantiated mux
+   bw_u1_ao2222_4x UZsize_usn_mx(.z   (usen_iso_bf)
+		                            .a2  (thr_usen_bf[0]),
+		                            .b2  (thr_usen_bf[1]),
+		                            .c2  (thr_usen_bf[2]),
+		                            .d2  (thr_usen_bf[3]),
+		                            .a1  (nextthr_bf_buf[0]),
+		                            .b1  (nextthr_bf_buf[1]),
+		                            .c1  (nextthr_bf_buf[2]),
+		                            .d1  (nextthr_bf_buf[3]));
+
+    // isolate from critical path
+    bw_u1_buf_5x  UZsize_usn_iso(.z(usen_bf), .a(usen_iso_bf));
+    */
+                                
+   assign usen_iso_bf = (thr_usen_bf[0] & nextthr_bf_buf[0] |
+                         thr_usen_bf[1] & nextthr_bf_buf[1] |
+                         thr_usen_bf[2] & nextthr_bf_buf[2] |
+                         thr_usen_bf[3] & nextthr_bf_buf[3]);
+   assign usen_bf = usen_iso_bf;
+
+   
+   
+//------------------------------
+// Switch Control
+//------------------------------   
+   // Switch IF
+   //   1. Another thread is ready OR
+   //   2. We hit a switch condition or Imiss and another thread is
+   //      speculatively ready
+   //   3. No thread is running and another thread is speculatively ready
+   //   4. The DTU calls for a thread switch and another thread is ready
+   //   (NOTE: if we hit a switch condition or Imiss and no thread is
+   //    speculatively or otherwise ready we stall the pipe).
+   //
+   //   New plan:  switch if another thread is ready or spec ready.
+   //
+
+//   assign switch_bf = dtu_fcl_ntr_s;
+   bw_u1_buf_20x UZsize_swbuf(.a (dtu_fcl_ntr_s),
+                              .z (switch_bf));
+   
+//   assign switch_bf = dtu_fcl_ntr_s & ~imsto_nextthr_s1;
+//   assign switch_bf = dtu_fcl_ntr_s & ~(imsto_nextthr_s1 | kill_nextthr_w | 
+//                                        intrto_nextthr_d);   
+   
+//   assign fcl_dtu_switch_s = switch_bf & ~all_stallreq & ~rst_stallreq;
+//   assign fcl_dtu_switch_s = switch_bf & ~kill_nextthr_w;
+
+   // TBD: No need to send this anymore, since switch_bf = ntr_s
+//   assign fcl_dtu_switch_s = switch_bf;  // sw out curr and sw in next
+
+   assign fcl_swl_swout_f = stall_f;     // sw out curr but don't sw in next
+   // Note: need fcl_swl_swout_f and dtu_fcl_running_s to sync swl and
+   // fcl at all times.  
+
+   assign switch_qual_bf = switch_bf & ~rst_stallreq;
+   dff_s #(1) sw_ff (.din  (switch_qual_bf),
+		               .clk  (clk),
+		               .q    (switch_s2),
+		               .se   (se), .si(), .so());
+
+   dff_s #(1) tmfn_ff (.din  (switch_bf),
+		                 .clk  (clk),
+		                 .q    (tm_fd_l),
+		                 .se   (se), .si(), .so());
+
+   // need to qual with immu_fault to avoid X's
+//   assign fcl_dtu_swc_s = fdp_fcl_swc_s2 & inst_vld_s_crit & 
+//                          ~immu_fault_f & ~part_stall_thisthr_f;
+//   assign fcl_dtu_swc_s = fdp_fcl_swc_s2 & inst_vld_s_crit & 
+//                          ~immu_fault_f & ~imsto_thisthr_s1 & ~rb_stg_s;
+   assign fcl_swl_swcvld_s = inst_vld_s_crit & ~immu_fault_f & 
+                             ~imsto_thisthr_s1 & ~rb_stg_s;
+     
+
+//------------------------------   
+// Thread pipe
+//------------------------------
+
+//`ifdef VERPLEX
+//   $constraint nthr_1h4 ($one_hot(dtu_fcl_nextthr_bf[3:0]));
+//   $constraint thrf_1h4 ($one_hot(thr_f[3:0]));
+//`endif
+   
+   // Keep track the thread in each pipe stage
+   assign rstt = (~fcl_reset & (rst_stallreq_d1 & ~arst_vld_f)) | rst_tri_en;
+   assign swt =  (~rst_stallreq_d1 & ~arst_vld_f & switch_bf | fcl_reset) & 
+                   ~rst_tri_en;
+   assign samet = (~rst_stallreq_d1 & ~switch_bf | arst_vld_f) & 
+                    ~fcl_reset & ~rst_tri_en;
+   
+   mux3ds #(4)  nxttthr_mux(.dout  (thr_bf[3:0]),      
+			                      .in0   (thr_f[3:0]),       
+			                      .in1   (nextthr_bf_buf[3:0]),
+			                      .in2   (rstint_penc[3:0]), 
+			                      .sel0  (samet),
+			                      .sel1  (swt),
+			                      .sel2  (rstt));
+
+   assign thr_match_nw = (thr_w[0] & nextthr_bf_buf[0] | 
+			                    thr_w[1] & nextthr_bf_buf[1] | 
+			                    thr_w[2] & nextthr_bf_buf[2] | 
+			                    thr_w[3] & nextthr_bf_buf[3]);
+
+   assign thr_match_nd = (thr_d[0] & nextthr_bf_buf[0] | 
+			                    thr_d[1] & nextthr_bf_buf[1] | 
+			                    thr_d[2] & nextthr_bf_buf[2] | 
+			                    thr_d[3] & nextthr_bf_buf[3]);
+
+//   assign thr_match_ne = (thr_e[0] & dtu_fcl_nextthr_bf[0] | 
+//			                    thr_e[1] & dtu_fcl_nextthr_bf[1] | 
+//			                    thr_e[2] & dtu_fcl_nextthr_bf[2] | 
+//			                    thr_e[3] & dtu_fcl_nextthr_bf[3]);
+   // qualify inst_vld_e in fcl itself
+
+//   bw_u1_ao2222_4x UZsize_tmne(.z  (thr_match_ne),
+//                               .a1 (val_thr_e[0]),
+//                               .b1 (val_thr_e[1]),
+//                               .c1 (val_thr_e[2]),
+//                               .d1 (val_thr_e[3]),
+//                               .a2 (dtu_fcl_nextthr_bf[0]),
+//                               .b2 (dtu_fcl_nextthr_bf[1]),
+//                               .c2 (dtu_fcl_nextthr_bf[2]),
+//                               .d2 (dtu_fcl_nextthr_bf[3]));
+
+   wire   tmne_10,
+          tmne_32;
+   bw_u1_aoi22_2x UZsize_tmne10(.z (tmne_10),
+                                .a1 (dtu_fcl_nextthr_bf[0]),
+                                .b1 (dtu_fcl_nextthr_bf[1]),
+                                .a2 (val_thr_e[0]),
+                                .b2 (val_thr_e[1]));
+   bw_u1_aoi22_2x UZsize_tmne32(.z (tmne_32),
+                                .a1 (dtu_fcl_nextthr_bf[2]),
+                                .b1 (dtu_fcl_nextthr_bf[3]),
+                                .a2 (val_thr_e[2]),
+                                .b2 (val_thr_e[3]));
+   bw_u1_nand2_4x UZsize_tmne30(.z (thr_match_ne),
+                                .a (tmne_10),
+                                .b (tmne_32));
+   
+
+   dff_s #(4) thrf_reg(.din   (thr_bf),     // thr_f may be 4'b0000 but it has 
+		                 .clk   (clk),        // to reset to 4'b0001
+		                 .q     (thr_f_flop),
+		                 .se    (se),  .si(), .so());
+
+   bw_u1_buf_10x UZsize_tfcrit0(.a (thr_f_flop[0]), .z(thr_f_crit[0]));
+   bw_u1_buf_10x UZsize_tfcrit1(.a (thr_f_flop[1]), .z(thr_f_crit[1]));
+   bw_u1_buf_10x UZsize_tfcrit2(.a (thr_f_flop[2]), .z(thr_f_crit[2]));
+   bw_u1_buf_10x UZsize_tfcrit3(.a (thr_f_flop[3]), .z(thr_f_crit[3]));
+
+   bw_u1_buf_10x UZsize_tfncr0(.a (thr_f_flop[0]), .z(thr_f[0]));
+   bw_u1_buf_10x UZsize_tfncr1(.a (thr_f_flop[1]), .z(thr_f[1]));
+   bw_u1_buf_10x UZsize_tfncr2(.a (thr_f_flop[2]), .z(thr_f[2]));
+   bw_u1_buf_10x UZsize_tfncr3(.a (thr_f_flop[3]), .z(thr_f[3]));
+
+   assign     ifu_exu_tid_s2[1] = thr_f[3] | thr_f[2];
+   assign     ifu_exu_tid_s2[0] = thr_f[3] | thr_f[1];
+   assign     ifu_lsu_thrid_s = ifu_exu_tid_s2;
+   assign     fcl_dtu_thr_f = thr_f;
+   
+//   assign thr_s1_next = inst_vld_f ? thr_f : thr_s1;
+   assign     thr_s1_next[0] = thr_f[0];
+   assign     thr_s1_next[1] = ~thr_f[0] & thr_f[1];
+   assign     thr_s1_next[2] = ~thr_f[0] & ~thr_f[1] & thr_f[2];
+   assign     thr_s1_next[3] = ~thr_f[0] & ~thr_f[1] & ~thr_f[2];
+   
+//`ifdef VERPLEX
+//   $constraint thr_s1_1h4 ($one_hot(thr_s1_next[3:0]));
+//`endif
+   
+   dff_s #(4) thrs1_reg(.din   (thr_s1_next),
+		    .clk   (clk),
+		    .q     (thr_s1),
+		    .se    (se), .si(), .so());
+   
+   dff_s #(4) thrd_reg(.din    (thr_s1_next), 
+		     .clk   (clk),
+		     .q     (thr_d),
+		     .se    (se), .si(), .so());
+   
+   assign fcl_ifq_thr_s1[0] = thr_s1[3] | thr_s1[1];
+   assign fcl_ifq_thr_s1[1] = thr_s1[3] | thr_s1[2];
+
+   assign ifu_tlu_thrid_d[1] = thr_d[3] | thr_d[2];
+   assign ifu_tlu_thrid_d[0] = thr_d[3] | thr_d[1];
+   
+   assign thr_match_fs1 = (thr_d[0] & thr_f_crit[0] | 
+			                     thr_d[1] & thr_f_crit[1] | 
+			                     thr_d[2] & thr_f_crit[2] | 
+			                     thr_d[3] & thr_f_crit[3]);
+   assign thr_match_fd = thr_match_fs1;
+   assign thr_match_fe = (thr_e[0] & thr_f[0] | 
+			                    thr_e[1] & thr_f[1] | 
+			                    thr_e[2] & thr_f[2] | 
+			                    thr_e[3] & thr_f[3]);
+   assign thr_match_fm = (thr_m[0] & thr_f[0] | 
+			                    thr_m[1] & thr_f[1] | 
+			                    thr_m[2] & thr_f[2] | 
+			                    thr_m[3] & thr_f[3]);
+//   assign thr_match_ft = (trap_thr[0] & thr_f[0] | 
+//			  trap_thr[1] & thr_f[1] | 
+//			  trap_thr[2] & thr_f[2] | 
+//			  trap_thr[3] & thr_f[3]);
+   
+   dffr_s #(4) thre_reg(.din   (thr_d),    
+		    .clk   (clk),
+		    .rst   (fcl_reset),
+		    .q     (thr_e),
+		    .se    (se), .si(), .so());
+
+   dffr_s #(4) thre2_reg(.din   (thr_d),    
+		    .clk   (clk),
+		    .rst   (fcl_reset),
+		    .q     (thr_e_v2),
+		    .se    (se), .si(), .so());
+
+   assign     ifu_tlu_thrid_e[1] = thr_e[3] | thr_e[2];
+   assign     ifu_tlu_thrid_e[0] = thr_e[3] | thr_e[1];
+
+   assign thr_match_de = (thr_d[0] & thr_e[0] | 
+			                    thr_d[1] & thr_e[1] | 
+			                    thr_d[2] & thr_e[2] | 
+			                    thr_d[3] & thr_e[3]);
+			  
+   assign thr_match_dm = (thr_d[0] & thr_m[0] | 
+			                    thr_d[1] & thr_m[1] | 
+			                    thr_d[2] & thr_m[2] | 
+			                    thr_d[3] & thr_m[3]);
+
+   dff_s #(4) thrm_reg(.din   (thr_e),    
+		   .clk   (clk),
+		   .q     (thr_m),
+		   .se    (se), .si(), .so());
+
+   dff_s #(4) thrw_reg(.din   (thr_m),    
+		   .clk   (clk),
+		   .q     (thr_w),
+		   .se    (se), .si(), .so());
+
+   assign sas_thrid_w[1] = thr_w[3] | thr_w[2];
+   assign sas_thrid_w[0] = thr_w[3] | thr_w[1];
+   
+   assign thr_match_fw = (thr_f[0] & thr_w[0] | 
+			                    thr_f[1] & thr_w[1] | 
+			                    thr_f[2] & thr_w[2] | 
+			                    thr_f[3] & thr_w[3]);
+	 
+   assign thr_match_fw2 = (thr_f[0] & thr_w2[0] | 
+			                     thr_f[1] & thr_w2[1] | 
+			                     thr_f[2] & thr_w2[2] | 
+			                     thr_f[3] & thr_w2[3]);
+
+   assign thr_match_dw = (thr_d[0] & thr_w[0] | 
+			                    thr_d[1] & thr_w[1] | 
+			                    thr_d[2] & thr_w[2] | 
+			                    thr_d[3] & thr_w[3]);
+
+   assign thr_match_dw2 = (thr_d[0] & thr_w2[0] | 
+			                     thr_d[1] & thr_w2[1] | 
+			                     thr_d[2] & thr_w2[2] | 
+			                     thr_d[3] & thr_w2[3]);
+	 
+   assign thr_match_em = (thr_e[0] & thr_m[0] | 
+			                    thr_e[1] & thr_m[1] | 
+			                    thr_e[2] & thr_m[2] | 
+			                    thr_e[3] & thr_m[3]);
+
+   assign thr_match_ew = (thr_e_v2[0] & thr_w[0] | 
+			                    thr_e_v2[1] & thr_w[1] | 
+			                    thr_e_v2[2] & thr_w[2] | 
+			                    thr_e_v2[3] & thr_w[3]);
+
+   dff_s #(1) stmw2_ff(.din (thr_match_ew),
+                     .q   (same_thr_mw2),
+                     .clk (clk), .se (se), .si(), .so());
+   
+   assign thr_match_ew2 = (thr_e[0] & thr_w2[0] | 
+			                     thr_e[1] & thr_w2[1] | 
+			                     thr_e[2] & thr_w2[2] | 
+			                     thr_e[3] & thr_w2[3]);
+	 
+   assign thr_match_mw = (thr_m[0] & thr_w[0] | 
+			                    thr_m[1] & thr_w[1] | 
+			                    thr_m[2] & thr_w[2] | 
+			                    thr_m[3] & thr_w[3]);
+			  
+   dff_s #(4) thrw2_reg(.din   (thr_w),    
+		    .clk   (clk),
+		    .q     (thr_w2),
+		    .se    (se), .si(), .so());
+   
+
+//-------------------------
+// Rollback
+//-------------------------
+
+   // 04/05/02
+   // Looks like we made a mistake with rollback.  Should never
+   // rollback to S.  In the event of a dmiss or mul contention, just
+   // kill all the instructions and rollback to F.  This adds one
+   // cycle to the dmiss penalty and to the mul latency if we have to
+   // wait, both not a very high price to pay.  This would have saved
+   // lots of hours of design and verif time.
+   //    
+   assign rb2_inst_d = thr_match_dw & inst_vld_d & dtu_fcl_rollback_g;
+   assign rb1_inst_s = thr_match_fw & inst_vld_s & dtu_fcl_rollback_g;
+   assign rb0_inst_bf = thr_match_nw & switch_bf & dtu_fcl_rollback_g;
+
+//   assign rt1_inst_s = thr_match_fd & inst_vld_s & retract_inst_d;
+//   assign rt0_inst_bf = thr_match_nd & dtu_fcl_ntr_s & retract_inst_d;
+
+//   assign retract_iferr_d = thr_match_de & erb_dtu_ifeterr_d1 & inst_vld_d1 &
+//                            ~kill_curr_e & fcl_dtu_inst_vld_d;
+   assign retract_iferr_d1 = erb_dtu_ifeterr_d1 & inst_vld_d1;
+
+   assign retract_inst_d = retract_iferr_d1 & thr_match_de & 
+                           fcl_dtu_inst_vld_d |
+                           mark4rb_d | 
+                           dtu_fcl_retract_d;
+
+   assign rt1_inst_s = thr_match_fd & inst_vld_s & dtu_fcl_retract_d | 
+                       mark4rb_s;
+//                     | thr_match_fe & inst_vld_s & retract_iferr_d1;
+
+   // TBD:  This is not necessary since the thread will switch out and
+   // stall whatever makes its way to the S stage.
+   // NOTE: rb0_inst *is needed* however.
+   assign rt0_inst_bf = thr_match_nd & switch_bf & dtu_fcl_retract_d;
+//                      | thr_match_ne & dtu_fcl_ntr_s & retract_iferr_d1;
+
+   assign retract_iferr_qual_d1 = retract_iferr_d1 & thr_match_de &
+                                  fcl_dtu_inst_vld_d &
+                                  ~(dtu_fcl_rollback_g & thr_match_ew);
+
+   dff_s rbe_ff(.din (rb2_inst_d),
+	      .q   (rb2_inst_e),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+
+   dff_s rte_ff(.din (retract_inst_d),
+	      .q   (rt2_inst_e),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+
+   dff_s rbd_ff(.din (rb1_inst_s),
+	      .q   (rb1_inst_d),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+
+   dff_s rtd_ff(.din (rt1_inst_s),
+	      .q   (rt1_inst_d),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+   
+   dff_s rbs_ff(.din (rb0_inst_bf),
+	      .q   (rb0_inst_s),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+
+   // TBD: is this necessary?
+   dff_s rts_ff(.din (rt0_inst_bf),
+	      .q   (rt0_inst_s),
+	      .clk (clk),
+	      .se  (se), .si(), .so());
+
+   dff_s rtiferr_ff(.din (retract_iferr_qual_d1),
+                  .q   (retract_iferr_e),
+                  .clk (clk),
+                  .se  (se), .si(), .so());
+   
+   assign rb_stg_s = (rb0_inst_s | rt0_inst_s) & tm_fd_l |
+	                   (rb1_inst_d | rt1_inst_d) & ~tm_fd_l;
+   assign rb_stg_d_crit = rb1_inst_d | rt1_inst_d;
+   assign rb_stg_e = rb2_inst_e | rt2_inst_e;
+
+   bw_u1_buf_5x UZsize_rbd_buf(.a (rb_stg_d_crit),
+                               .z (rb_stg_d));
+   
+   // determine rollback amount
+   assign rb_frome = {4{(rb2_inst_e | rt2_inst_e) & 
+                        (inst_vld_e | intr_vld_e)}} & thr_e;
+   assign rb_fromd = {4{(rb1_inst_d | rt1_inst_d) & 
+                        (inst_vld_d | intr_vld_d)}} & thr_d;
+   assign rb_froms = {4{rb_stg_s & inst_vld_s_crit}} & thr_f;   
+   assign rb_w2 = rb_frome | rb_fromd;
+   assign rb_for_iferr_e = {4{retract_iferr_e}} & thr_e;
+
+//------------------------------   
+// Branch Control
+//------------------------------
+   // final portion of branch evaluation
+   wire brtaken_e_l;   
+   bw_u1_buf_20x UZsize_bcbf(.z(fcl_dcl_regz_e), 
+                             .a(exu_ifu_regz_e));
+      
+   bw_u1_muxi21_6x UZsize_bcmux(.z(brtaken_e_l), 
+                                .d0(dcl_fcl_bcregz0_e), 
+                                .d1(dcl_fcl_bcregz1_e), 
+                                .s(exu_ifu_regz_e));
+
+   bw_u1_inv_15x UZsize_bcinv(.z(brtaken_e), 
+                              .a(brtaken_e_l));
+
+   // Branch is taken in the E stage to thr_e.  Below we check to see
+   // if this is the same as the next thread we will switch to
+
+   // isolate non critical section
+   bw_u1_buf_5x UZsize_btbuf(.z (brtaken_unq_e),
+                             .a (brtaken_e));
+   assign brtaken_buf_e = brtaken_unq_e & inst_vld_qual_e & ~kill_curr_e;
+
+//   assign thr_match_ne_norst = thr_match_ne & ~rst_sw_bf;
+//   assign brto_nxtthr_bf  = thr_match_ne & brtaken_e;
+   bw_u1_nand2_4x UZsize_btkn_ntl(.a (brtaken_e),
+                                  .b (thr_match_ne),
+                                  .z (brto_nxtthr_bf_l));
+
+//   bw_u1_inv_8x UZsize_btkn_bf(.a (brto_nxtthr_bf_l),
+//                               .z (brto_nxtthr_bf));
+
+   dff_s #(1) br_ff(.din (brtaken_buf_e),
+		              .q   (brtaken_m),
+		              .clk (clk),
+		              .se  (se), .si(), .so());
+   
+
+//----------------------------------------------------------------------
+// PC related control
+//----------------------------------------------------------------------
+
+   // Choose next IC address
+   // IC address is chosen from
+   //    1. Next PC assuming no switch 
+   //    2. Branch PC if E stage branch is to next thread
+   //    3. Saved F stage Thread PC if we switch threads
+
+   assign fcl_icd_index_sel_ifq_bf = allow_ifq_access_icd_bf;
+   assign fcl_ifq_grant_bf = allow_ifq_access_icd_bf;
+
+   // Select branch PC
+//   assign fcl_fdp_icaddr_sel_br_bf_l = ~(~all_stallreq &
+//					                               brto_nxtthr_bf    &
+//					                               switch_bf);
+//
+//   // Select the switch PC from thread PC register
+//   assign fcl_fdp_icaddr_sel_swpc_bf_l = ~(~all_stallreq &
+//					                                 ~usen_bf &
+//					                                 ~brto_nxtthr_bf &
+//					                                 switch_bf);
+//
+//   // Select current thread's next PC or IC write addr (PC/PC+4/I$ wraddr)
+//   assign fcl_fdp_icaddr_sel_curr_bf_l = ~(~all_stallreq &
+//					                                 ~(stall_f | usep_bf) & 
+//					                                 ~switch_bf);
+//   
+//   assign fcl_fdp_icaddr_sel_ifq_bf_l = ~(all_stallreq |
+//					                                (stall_f | usep_bf) & ~switch_bf |
+//					                                ~brto_nxtthr_bf & usen_bf &
+//					                                (switch_bf | stall_f | usep_bf));
+   
+   
+
+//   assign sw_or_async_stall = (switch_bf & ~rst_stallreq | rst_sw_bf);
+   wire   sw_or_async_stall_l;
+   assign rst_stallreq_l = ~rst_stallreq;
+   bw_u1_aoi21_4x UZsize_swstl_aoi(.z  (sw_or_async_stall_l),
+                                   .a  (rst_sw_bf),
+                                   .b1 (switch_bf),
+                                   .b2 (rst_stallreq_l));
+   assign sw_or_async_stall = ~sw_or_async_stall_l;
+
+//   assign icadr_selbr = sw_or_async_stall & brto_nxtthr_bf;
+   assign sw_match_ne_norst = sw_or_async_stall & thr_match_ne;
+   bw_u1_nand2_10x UZfix_icad_br(.a (brtaken_e),
+                                 .b (sw_match_ne_norst),
+                                 .z (icadr_selbr_l));
+
+//   assign icadr_selsw = sw_or_async_stall & ~brto_nxtthr_bf;
+   bw_u1_nand2_15x UZfix_icad_sw(.a (brto_nxtthr_bf_l),
+                                 .b (sw_or_async_stall),
+                                 .z (icadr_selsw_l));
+   
+
+   // select next PC
+   assign fcl_fdp_pcbf_sel_br_bf_l = icadr_selbr_l;
+   assign fcl_fdp_pcbf_sel_swpc_bf_l = icadr_selsw_l ;
+   assign fcl_fdp_pcbf_sel_nosw_bf_l = ~sw_or_async_stall_l;
+
+   // Select PC to switch to in the event of a switch
+   // No need to protect during scan
+   // NOTE: SWL guarantees nextthr_bf is one hot
+//   assign fcl_fdp_next_thr_bf_l = rst_stallreq_d1 ? ~rstint_penc :
+//                                                    ~dtu_fcl_nextthr_bf;
+
+   wire [3:0] next_thr_bf_l;
+   wire       nt_sel_rst;
+   assign     nt_sel_rst = rst_stallreq_d1 | rst_tri_en;
+   
+   bw_u1_muxi21_2x UZfix_nthr_mx0(.z (next_thr_bf_l[0]),
+                                  .d0 (dtu_fcl_nextthr_bf[0]),
+                                  .d1 (rstint_penc[0]),
+                                  .s  (nt_sel_rst));
+   bw_u1_muxi21_2x UZfix_nthr_mx1(.z (next_thr_bf_l[1]),
+                                  .d0 (dtu_fcl_nextthr_bf[1]),
+                                  .d1 (rstint_penc[1]),
+                                  .s  (nt_sel_rst));
+   bw_u1_muxi21_2x UZfix_nthr_mx2(.z (next_thr_bf_l[2]),
+                                  .d0 (dtu_fcl_nextthr_bf[2]),
+                                  .d1 (rstint_penc[2]),
+                                  .s  (nt_sel_rst));
+   bw_u1_muxi21_2x UZfix_nthr_mx3(.z (next_thr_bf_l[3]),
+                                  .d0 (dtu_fcl_nextthr_bf[3]),
+                                  .d1 (rstint_penc[3]),
+                                  .s  (nt_sel_rst));
+   assign     fcl_fdp_next_thr_bf_l = next_thr_bf_l;
+   
+
+//   assign nextthr_bf_buf = dtu_fcl_nextthr_bf;
+   bw_u1_buf_20x UZsize_ntbf0(.a (dtu_fcl_nextthr_bf[0]),
+                              .z (nextthr_bf_buf[0]));
+   bw_u1_buf_20x UZsize_ntbf1(.a (dtu_fcl_nextthr_bf[1]),
+                              .z (nextthr_bf_buf[1]));
+   bw_u1_buf_20x UZsize_ntbf2(.a (dtu_fcl_nextthr_bf[2]),
+                              .z (nextthr_bf_buf[2]));
+   bw_u1_buf_20x UZsize_ntbf3(.a (dtu_fcl_nextthr_bf[3]),
+                              .z (nextthr_bf_buf[3]));
+   // use 6x
+   assign fcl_fdp_next_ctxt_bf_l[2:0] = ~nextthr_bf_buf[2:0] | {3{rst_tri_en}};
+   assign fcl_fdp_next_ctxt_bf_l[3] = ~nextthr_bf_buf[3] & ~rst_tri_en;
+
+//   assign nextthr_final_bf = switch_bf ? dtu_fcl_nextthr_bf : thr_f;
+   wire [3:0] nextthr_final_bf_l;
+   bw_u1_muxi21_2x UZfix_ntfmux0(.z  (nextthr_final_bf_l[0]),
+                                .d0 (thr_f[0]),
+                                .d1 (dtu_fcl_nextthr_bf[0]),
+                                .s  (switch_bf));
+   bw_u1_inv_8x UZsize_ntfin_buf0(.z (nextthr_final_bf[0]),
+                                 .a (nextthr_final_bf_l[0]));
+
+   bw_u1_muxi21_2x UZfix_ntfmux1(.z  (nextthr_final_bf_l[1]),
+                                .d0 (thr_f[1]),
+                                .d1 (dtu_fcl_nextthr_bf[1]),
+                                .s  (switch_bf));
+   bw_u1_inv_8x UZsize_ntfin_buf1(.z (nextthr_final_bf[1]),
+                                 .a (nextthr_final_bf_l[1]));
+
+   bw_u1_muxi21_2x UZfix_ntfmux2(.z  (nextthr_final_bf_l[2]),
+                                .d0 (thr_f[2]),
+                                .d1 (dtu_fcl_nextthr_bf[2]),
+                                .s  (switch_bf));
+   bw_u1_inv_8x UZsize_ntfin_buf2(.z (nextthr_final_bf[2]),
+                                 .a (nextthr_final_bf_l[2]));
+
+   bw_u1_muxi21_2x UZfix_ntfmux3(.z  (nextthr_final_bf_l[3]),
+                                .d0 (thr_f[3]),
+                                .d1 (dtu_fcl_nextthr_bf[3]),
+                                .s  (switch_bf));
+   bw_u1_inv_8x UZsize_ntfin_buf3(.z (nextthr_final_bf[3]),
+                                 .a (nextthr_final_bf_l[3]));
+   
+   
+   // decode trap thread
+   dff_s #(2) ld_trp_reg(.din ({tlu_ifu_trappc_vld_w1,
+                              tlu_ifu_trapnpc_vld_w1}),
+                       .q   ({trappc_vld_w2,
+                              trapnpc_vld_w2}),
+                       .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(2) trp_tid_reg(.din (tlu_ifu_trap_tid_w1[1:0]),
+                        .q   (trap_tid_w2[1:0]),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   assign trap_thr[0] = ~trap_tid_w2[1] & ~trap_tid_w2[0];
+   assign trap_thr[1] = ~trap_tid_w2[1] &  trap_tid_w2[0];
+   assign trap_thr[2] =  trap_tid_w2[1] & ~trap_tid_w2[0];
+   assign trap_thr[3] =  trap_tid_w2[1] &  trap_tid_w2[0];
+
+   assign load_tpc[3:0] = {4{trappc_vld_w2}} & trap_thr |
+	                   rb_w2 |
+	                   {4{rb_stg_w | ims_flush_coll_w}} & thr_w |
+//                     {4{dec_fcl_kill4sta_e}} & thr_e |
+	                   {4{flush_sonly_qual_m}} & thr_m;
+   
+   assign load_bpc[3:0] = {4{brtaken_buf_e}} & thr_e;
+   assign load_pcp4[3:0] = {4{~part_stall_thisthr_f &
+                              ~iferrto_thisthr_d1 |
+                              arst_vld_f |
+                              async_intr_vld_s}}     & thr_f;
+
+   always @ (/*AUTOSENSE*/load_bpc or load_pcp4 or load_tpc)
+     begin
+//	      if (fcl_reset)
+//	        begin // RESET PC is loaded to T0
+//	           fcl_fdp_tpcbf_sel_old_bf_l = 4'b0001;
+//	           fcl_fdp_tpcbf_sel_pcp4_bf_l = 4'b1110;
+//	           fcl_fdp_tpcbf_sel_trap_bf_l = 4'b1111;
+//	           fcl_fdp_tpcbf_sel_brpc_bf_l = 4'b1111;
+//	        end // if (reset)
+//	      else 
+//	        begin
+	      fcl_fdp_tpcbf_sel_old_bf_l = (load_bpc | load_tpc | load_pcp4);
+	      fcl_fdp_tpcbf_sel_brpc_bf_l = ~load_bpc | load_tpc | load_pcp4;
+	      fcl_fdp_tpcbf_sel_pcp4_bf_l = ~load_pcp4 | load_tpc;
+	      fcl_fdp_tpcbf_sel_trap_bf_l = ~load_tpc;
+     end // always @ (...
+
+   // Track correctible errors
+   assign irf_ce_m = exu_ifu_ecc_ce_m & ~trap_m & inst_vld_m & ~kill_curr_m;
+   dff_s #(1) irfcew_ff(.din (irf_ce_m),
+		                .q   (irf_ce_w),
+		                .clk (clk), .se(se), .si(), .so());
+
+   // track if ldhit was actually a miss
+   // D and S stage are rolled back through the normal D stage retract
+   // process.  
+   assign mark4rb_d = lsu_ifu_dc_parity_error_w2 & thr_match_dw2 & 
+                      (inst_vld_d | intr_vld_d);
+   assign mark4rb_s = lsu_ifu_dc_parity_error_w2 & thr_match_fw2 & 
+                      (inst_vld_s | intr_vld_s);
+
+   assign mark4rb_e = lsu_ifu_dc_parity_error_w2 & thr_match_ew2 & 
+                      (inst_vld_e | intr_vld_e) & 
+                        ~dtu_inst_anull_e & ~kill_curr_e;
+
+   dff_s #(2) markrb_reg(.din ({mark4rb_m,
+                              mark4rb_e}),
+                       .q   ({mark4rb_w,
+                              mark4rb_m}),
+                       .clk (clk),
+                       .se  (se), .si(), .so());
+   
+   // Rollback from W on irf/frf ce and on a dcache parity error
+   assign rb_stg_w = irf_ce_w & inst_vld_w & no_iftrap_w | 
+	                   ffu_ifu_fst_ce_w & inst_vld_w & no_iftrap_w |
+                     rb_intr_w & intr_vld_w |
+                     mark4rb_w |
+                     fcl_dtu_resum_thr_w | 
+		                 fcl_dtu_nuke_thr_w;
+
+   // flush after hardware micro trap
+//   assign ifu_tlu_flush_w = irf_ce_w | fcl_dtu_nuke_thr_w | mark4rb_w |
+//                            fcl_dtu_resum_thr_w;
+   // very critical
+   assign ifu_tlu_flush_m = (exu_ifu_ecc_ce_m & inst_vld_m & ~trap_m |
+                             (resumint_m | nuke_thr_m) & 
+                             intr_vld_m & ~rstint_m |
+                             rb_intr_m | 
+                             mark4rb_m);
+   assign utrap_flush_m =  ifu_tlu_flush_m & ~kill_local_m;
+   dff_s #(1) flw_ff(.din (utrap_flush_m),
+                   .q   (utrap_flush_w),
+                   .clk (clk), .se(se), .si(), .so());
+   assign ifu_tlu_flush_w = utrap_flush_w;
+   assign fcl_swl_flush_w =  (irf_ce_w & inst_vld_w & no_iftrap_w | 
+                              rb_intr_w & intr_vld_w |
+                              mark4rb_w |
+                              fcl_dtu_resum_thr_w | 
+		                          fcl_dtu_nuke_thr_w);
+
+   // tells swl to flush and then wake up
+   assign fcl_swl_flush_wake_w = fcl_swl_flush_w & ~mark4rb_w;
+   
+   // if the same instruction keeps hitting ce's disable ce detection
+   // count how many ce's occur to a given thread
+   assign any_ce_w = ffu_ifu_fst_ce_w | irf_ce_w;
+   
+   assign ce_cnt1_nxt = (({4{any_ce_w & inst_vld_w}} & thr_w & 
+                          ce_cnt0) ^ ce_cnt1) & ~ce_cnt_rst;
+   assign ce_cnt0_nxt = (({4{any_ce_w & inst_vld_w}} & thr_w) ^ 
+                         ce_cnt0) & ~ce_cnt_rst;
+
+   assign ce_cnt_rst = thr_w & {4{inst_vld_w & ~any_ce_w}} | {4{fcl_reset}};
+
+   dff_s #(8) cecnt_reg(.din ({ce_cnt1_nxt, ce_cnt0_nxt}),
+                      .q   ({ce_cnt1, ce_cnt0}),
+                      .clk (clk),
+                      .se(se), .si(), .so());
+
+   // find the count for the current d stage thread
+   assign ce_val1_d = (thr_d[0] & ce_cnt1[0] | 
+                       thr_d[1] & ce_cnt1[1] | 
+                       thr_d[2] & ce_cnt1[2] | 
+                       thr_d[3] & ce_cnt1[3]);
+
+   assign ce_val0_d = (thr_d[0] & ce_cnt0[0] | 
+                       thr_d[1] & ce_cnt0[1] | 
+                       thr_d[2] & ce_cnt0[2] | 
+                       thr_d[3] & ce_cnt0[3]);
+
+   // if count hits 3 disable ce's
+   assign disable_ce_d = ce_val1_d & ce_val0_d;
+
+   dff_s #(1) disce_ff(.din (disable_ce_d),
+                     .q   (disable_ce_e),
+                     .clk (clk), .se(se), .si(), .so());
+   assign ifu_exu_disable_ce_e = disable_ce_e;
+   
+   // select error/trap/utrap rollback PC
+   assign fcl_fdp_trrbpc_sel_trap_bf_l = 
+		                ~({4{trappc_vld_w2}} & trap_thr);
+
+   assign fcl_fdp_trrbpc_sel_err_bf_l = 
+                  ({4{trappc_vld_w2}} & trap_thr) |
+	                 ~({4{rb_stg_w}} & thr_w);
+   
+   assign fcl_fdp_trrbpc_sel_rb_bf_l = 
+                  ({4{trappc_vld_w2}} & trap_thr) |
+		               ({4{rb_stg_w}} & thr_w) |		   
+	                ~(rb_frome & rb_fromd);
+   
+   assign fcl_fdp_trrbpc_sel_pcs_bf_l = 
+                 ({4{trappc_vld_w2}} & trap_thr) | 
+		               ({4{rb_stg_w}} & thr_w) |		   
+	                 (rb_frome & rb_fromd);
+   
+   // select next S stage Thr PC
+   assign fcl_fdp_nextpcs_sel_pce_f_l = ~rb_frome;
+   assign fcl_fdp_nextpcs_sel_pcd_f_l = rb_frome | ~rb_fromd;
+   assign fcl_fdp_nextpcs_sel_pcf_f_l = rb_frome | rb_fromd |
+	                                ~(thr_f & {4{~part_stall_thisthr_f &
+                                               ~iferrto_thisthr_d1 |
+                                               arst_vld_f |
+                                               async_intr_vld_s}});
+   assign fcl_fdp_nextpcs_sel_pcs_f_l = rb_frome | rb_fromd | 
+	                                (thr_f & {4{~part_stall_thisthr_f &
+                                              ~iferrto_thisthr_d1 |
+                                              arst_vld_f |
+                                              async_intr_vld_s}});
+   
+   // next S2 stage pc and npc select
+   assign thr_f_dec[3:1] = thr_f_crit[3:1] & {3{~rst_tri_en}};
+   assign thr_f_dec[0] = thr_f_crit[0] | rst_tri_en;
+   assign fcl_fdp_thr_s2_l = ~thr_f_dec;     // thr_f = thr_s2
+
+
+   // Select NextPC from
+   //    1. Trap NextPC (if the tnpc is valid)
+   //    2. reset PC
+   //    3. incremented PC (PC+4)
+   //    4. old PC (in the event of a stall)
+
+   // Load the trap PC to the BF stage NPC.  (The BF stage NPC is used
+   // only for storing the next PC from the TLU
+   assign fcl_fdp_thrtnpc_sel_tnpc_l = ~({4{trapnpc_vld_w2}} & trap_thr);
+
+   assign fcl_fdp_thrtnpc_sel_npcw_l = ({4{trapnpc_vld_w2}} & trap_thr) |
+	                                       ~({4{rb_stg_w}} & thr_w);
+
+   assign fcl_fdp_thrtnpc_sel_pcf_l = ({4{trapnpc_vld_w2}} & trap_thr) |
+	                                      ({4{rb_stg_w}} & thr_w) |
+	                                        (~({4{ims_flush_coll_w}} & thr_w) &
+                                           ~({4{flush_sonly_qual_m}} & thr_m));
+                                       //   {4{dec_fcl_kill4sta_e}} & thr_e);
+   
+   assign fcl_fdp_thrtnpc_sel_old_l = ({4{trapnpc_vld_w2}} & trap_thr) |
+	                                      ({4{rb_stg_w}} & thr_w) |
+	                                      ({4{ims_flush_coll_w}} & thr_w) |
+                                        ({4{flush_sonly_qual_m}} & thr_m);
+                                         // {4{dec_fcl_kill4sta_e}} & thr_e);
+   
+   assign ntpc_vld_nxt = fcl_fdp_thrtnpc_sel_old_l | 
+	                       ntpc_vld & ({4{(part_stall_thisthr_f |
+                                         iferrto_thisthr_d1) & 
+                                        ~arst_vld_f &
+                                        ~async_intr_vld_s}} | ~thr_f) &
+                                  ~({4{trappc_vld_w2}} & trap_thr);
+   
+   dffr_s #(4) ntpcv_reg(.din  (ntpc_vld_nxt),
+		     .clk  (clk),
+		     .q    (ntpc_vld),
+		     .rst  (fcl_reset),
+		     .se   (se), .si(), .so());
+
+   assign ntpc_thisthr = (thr_f[0] & ntpc_vld[0] |
+			                    thr_f[1] & ntpc_vld[1] |
+			                    thr_f[2] & ntpc_vld[2] |
+			                    thr_f[3] & ntpc_vld[3]);
+
+//   assign fcl_fdp_noswpc_sel_rst_l_bf = 1'b1; 
+   assign fcl_fdp_noswpc_sel_tnpc_l_bf = ~ntpc_thisthr;
+   assign fcl_fdp_noswpc_sel_old_l_bf = ntpc_thisthr | inst_vld_f | arst_vld_f;
+   assign fcl_fdp_noswpc_sel_inc_l_bf = ntpc_thisthr | ~inst_vld_f & ~arst_vld_f;
+   
+   
+   // Don't need noswpc_sel_old anymore (this is always 1)
+//   always @(/*AUTOSENSE*/ntpc_vld or reset or thr_f)
+//     begin
+//	      if (reset)
+//	        begin
+//	           fcl_fdp_noswpc_sel_tnpc_l_bf = 1'b1;
+//	           fcl_fdp_noswpc_sel_rst_l_bf = 1'b0;	
+//	           fcl_fdp_noswpc_sel_inc_l_bf = 1'b1;
+//	           fcl_fdp_noswpc_sel_old_l_bf = 1'b1;
+//	        end
+//	      else if ((ntpc_vld & thr_f) != 4'b0000)
+//	        begin
+//	           fcl_fdp_noswpc_sel_tnpc_l_bf = 1'b0;
+//	           fcl_fdp_noswpc_sel_rst_l_bf = 1'b1;	
+//	           fcl_fdp_noswpc_sel_inc_l_bf = 1'b1;
+//	           fcl_fdp_noswpc_sel_old_l_bf = 1'b1;	
+//	        end // if ((ntpc_vld & thr_f) != 4'b0000)
+////	else if (ely_stall_thisthr_f)
+////	  begin
+////	     fcl_fdp_noswpc_sel_tnpc_l_bf = 1'b1;
+////	     fcl_fdp_noswpc_sel_rst_l_bf = 1'b1;	
+////	     fcl_fdp_noswpc_sel_inc_l_bf = 1'b1;
+////	     fcl_fdp_noswpc_sel_old_l_bf = 1'b0;	     
+////	  end // if (ely_stall_thisthr_f)
+//	      else 
+//	        begin
+//	           fcl_fdp_noswpc_sel_tnpc_l_bf = 1'b1;
+//	           fcl_fdp_noswpc_sel_rst_l_bf = 1'b1;	
+//	           fcl_fdp_noswpc_sel_inc_l_bf = 1'b0;
+//	           fcl_fdp_noswpc_sel_old_l_bf = 1'b1;	
+//	        end // else: 
+//	      
+//     end // always @ (...
+
+   // NOTE: direct branch vs indirect branch select goes from dtu to fdp
+
+//----------------------------------------------------------------------
+// Instruction Register Related Control
+//----------------------------------------------------------------------
+
+   // use NIR if no read previously
+   assign fcl_fdp_usenir_sel_nir_s1 = usenir_s1;
+
+
+   assign  fcl_fdp_inst_sel_nop_s_l = ~(ely_stall_thisthr_f | 
+                                        ~inst_vld_s_crit | 
+                                        force_intr_s | 
+                                        immu_fault_f);
+   
+   assign  fcl_fdp_inst_sel_switch_s_l = ~switch_s2 |
+                                        (ely_stall_thisthr_f | 
+                                         ~inst_vld_s_crit | 
+                                         force_intr_s | 
+                                         immu_fault_f);
+
+   assign  fcl_fdp_inst_sel_nir_s_l = ~usenir_s1 |
+                                        (switch_s2 |
+                                         ely_stall_thisthr_f | 
+                                         ~inst_vld_s_crit | 
+                                         force_intr_s | 
+                                         immu_fault_f);
+   
+   assign  fcl_fdp_inst_sel_curr_s_l = (usenir_s1 |
+                                        switch_s2 |
+                                        ely_stall_thisthr_f | 
+                                        ~inst_vld_s_crit | 
+                                        force_intr_s | 
+                                        immu_fault_f);
+   
+   
+   // Instruction Output Mux
+//   always @ (/*AUTOSENSE*/ely_stall_thisthr_f or force_intr_s
+//             or immu_fault_f or inst_vld_s_crit or switch_s2
+//             or usenir_s1)
+//     begin
+//	      if (ely_stall_thisthr_f | ~inst_vld_s_crit | force_intr_s | 
+//            immu_fault_f) 
+//	        begin // stalled or imiss
+//	           fcl_fdp_inst_sel_nop_s_l = 1'b0;
+//	           fcl_fdp_inst_sel_switch_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_nir_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_curr_s_l = 1'b1;
+//	        end
+//	      else if (switch_s2) 
+//	        begin
+//	           fcl_fdp_inst_sel_nop_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_switch_s_l = 1'b0;
+//	           fcl_fdp_inst_sel_nir_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_curr_s_l = 1'b1;
+//	        end
+//	      else if (usenir_s1) 
+//	        begin
+//	           fcl_fdp_inst_sel_nop_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_switch_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_nir_s_l = 1'b0;
+//	           fcl_fdp_inst_sel_curr_s_l = 1'b1;
+//	        end
+//	      else
+//	        begin
+//	           fcl_fdp_inst_sel_nop_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_switch_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_nir_s_l = 1'b1;
+//	           fcl_fdp_inst_sel_curr_s_l = 1'b0;
+//	        end // else: !if(switch_s2 | stall_s1)
+//     end // always @ (...
+
+   // thread IR input muxes
+   assign fcl_fdp_tinst_sel_rb_s_l   = ~rb_w2;
+   assign fcl_fdp_tinst_sel_ifq_s_l  = rb_w2 | ~ifq_fcl_fill_thr;
+   assign fcl_fdp_tinst_sel_curr_s_l = ~val_thr_s1 | rb_w2 | ifq_fcl_fill_thr; 
+   assign fcl_fdp_tinst_sel_old_s_l  = val_thr_s1 | rb_w2 | ifq_fcl_fill_thr; 
+
+   // Select rollback instruction
+   assign fcl_fdp_rbinst_sel_inste_s = {4{rb2_inst_e | rt2_inst_e}} & 
+                                       thr_e;
+
+   // thread NIR input muxes  (2:1 no need to protect)
+   assign fcl_fdp_thr_s1_l = ~thr_s1 | {4{stall_s1}};
+
+   // select appropriate NIR
+   assign dec_thr_s1_l[0] = ~(thr_s1[0] | rst_tri_en);
+   assign dec_thr_s1_l[3:1] = ~(thr_s1[3:1] & {3{~rst_tri_en}});
+   
+   assign fcl_fdp_nirthr_s1_l = dec_thr_s1_l; 
+   
+
+//--------------------
+// rdsr data to exu
+//--------------------   
+
+   dff_s #(1) pcrsr_ff(.din  (dec_fcl_rdsr_sel_pc_d),
+		               .clk  (clk),
+		               .q    (rdsr_sel_pc_e),
+		               .se   (se), .si(), .so());
+   dff_s #(1) thrrsr_ff(.din  (dec_fcl_rdsr_sel_thr_d),
+		                .clk  (clk),
+		                .q    (rdsr_sel_thr_e),
+		                .se   (se), .si(), .so());
+   // make sure they are exclusive
+   assign fcl_fdp_rdsr_sel_pc_e_l = ~rdsr_sel_pc_e;
+   assign fcl_fdp_rdsr_sel_thr_e_l = ~(~rdsr_sel_pc_e & rdsr_sel_thr_e);
+   assign fcl_fdp_rdsr_sel_ver_e_l = ~(~rdsr_sel_pc_e & ~rdsr_sel_thr_e);
+
+//--------------------------------------------------------------
+// Reg file control
+//--------------------------------------------------------------
+
+// Some decode is done here since these signals are in the crit path
+
+   // Regfile enables are only power saving features.  So they don't
+   // have to be exact, as long as they are on, a super set of when
+   // they need to be on.
+
+   // Enable rs3 if store or atomic or mov
+   assign ifu_exu_ren3_s =  inst_vld_f & fdp_fcl_op_s[1] & fdp_fcl_op3_s[2] &
+	                    (fdp_fcl_op_s[0] | fdp_fcl_op3_s[5]);
+
+   // enable rs2 if i=0 and !branch or CAS
+   // cas not fully decoded;  i=inst[13];
+   assign ifu_exu_ren2_s = inst_vld_f & fdp_fcl_op_s[1] &
+			    (~fdp_fcl_ibit_s | 
+			     fdp_fcl_op_s[0] & fdp_fcl_op3_s[5]);
+
+   // rs1 is read if this is not (a branch on cc or no-op/sethi)
+   assign ifu_exu_ren1_s = inst_vld_f & (fdp_fcl_op_s[1] |     // not br/call
+			   fdp_fcl_op3_s[4] & fdp_fcl_op3_s[3]);  // BPR
+
+   //-------------------------------------
+   // Generate oddwin signal for rs and rd
+   //-------------------------------------
+   assign fcl_fdp_oddwin_s = (exu_ifu_oddwin_s[0] & thr_f[0] |
+                              exu_ifu_oddwin_s[1] & thr_f[1] |
+                              exu_ifu_oddwin_s[2] & thr_f[2] |
+                              exu_ifu_oddwin_s[3] & thr_f[3]);
+
+   dff_s #(1) oddwin_ff(.din (fcl_fdp_oddwin_s),
+		                  .clk (clk),
+		                  .q   (fcl_imd_oddwin_d),
+		                  .se  (se), .si(), .so());
+   
+
+   sink #(2) s0(.in (sas_thrid_w));
+endmodule // sparc_ifu_fcl
Index: /trunk/T1-CPU/ifu/sparc_ifu_incr46.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_incr46.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_incr46.v	(revision 6)
@@ -0,0 +1,45 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_incr46.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Description:	
+//  Contains the pc incrementer.
+*/
+
+module sparc_ifu_incr46(a, a_inc, ofl);
+   input  [45:0]  a;
+   output [45:0]  a_inc;
+   output 	  ofl;
+   
+   reg [45:0] 	  a_inc;
+   reg 		  ofl;
+   
+   always @ (a)
+     begin
+	      a_inc = a + (46'b1);
+	      ofl = (~a[45]) & a_inc[45];
+     end
+   
+   
+   
+endmodule // sparc_ifu_incr46
+
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_swl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_swl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_swl.v	(revision 6)
@@ -0,0 +1,1831 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_swl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_swl
+//  Description:	
+//  The switch logic manages the 4 threads.  It schedules the next
+//  thread to be executed.   
+*/
+////////////////////////////////////////////////////////////////////////
+
+`include "ifu.h"
+
+module sparc_ifu_swl(/*AUTOARG*/
+   // Outputs
+   swl_sscan_thrstate, so, dtu_reset, swl_dec_mulbusy_e, 
+   swl_dec_divbusy_e, swl_dec_fpbusy_e, swl_dec_fp_enable_d, 
+   swl_dec_ibe_e, dtu_fcl_ntr_s, dtu_fcl_running_s, 
+   dtu_fcl_rollback_g, dtu_fcl_retract_d, dtu_fcl_thr_active, 
+   dtu_fcl_nextthr_bf, swl_dcl_thr_d, swl_dcl_thr_w2, 
+   dtu_fdp_thrconf_e, 
+   // Inputs
+   rclk, se, si, gdbginit_l, arst_l, grst_l, ctu_sscan_tid, 
+   ifq_dtu_thrrdy, ifq_dtu_pred_rdy, ifu_tlu_inst_vld_w, 
+   ifu_tlu_ttype_vld_m, fcl_dtu_hprivmode_d, fcl_dtu_hprivmode_w2, 
+   tlu_ifu_flush_pipe_w, fcl_swl_flush_w, fcl_dtu_sync_intr_d, 
+   fcl_dtu_nuke_thr_w, fcl_dtu_rst_thr_w, fcl_dtu_resum_thr_w, 
+   fcl_dtu_thr_f, tlu_hpstate_ibe, lsu_ifu_ldsta_internal_e, 
+   tlu_ifu_trappc_vld_w1, dec_swl_ll_done_d, dec_swl_br_done_d, 
+   dec_swl_rdsr_sel_thr_d, dec_swl_std_inst_d, dec_swl_sta_inst_e, 
+   wsr_fixed_inst_w, dec_swl_ld_inst_d, dec_swl_mul_inst_d, 
+   dec_swl_div_inst_d, dec_swl_fpop_d, dec_swl_allfp_d, 
+   dec_swl_frf_upper_d, dec_swl_frf_lower_d, dec_swl_wrtfprs_w, 
+   dcl_swl_tcc_done_m, exu_ifu_longop_done_g, exu_ifu_spill_e, 
+   lsu_ifu_ldst_cmplt, lsu_ifu_dc_parity_error_w2, lsu_ifu_stbcnt0, 
+   lsu_ifu_stbcnt1, lsu_ifu_stbcnt2, lsu_ifu_stbcnt3, 
+   lsu_ifu_quad_asi_e, ffu_ifu_fpop_done_w2, ffu_ifu_tid_w2, 
+   ffu_ifu_fst_ce_w, tlu_ifu_trap_tid_w1, tlu_ifu_pstate_pef, 
+   lsu_ifu_ldst_miss_g, fcl_swl_int_activate_i3, 
+   fcl_swl_flush_wake_w, ifq_swl_stallreq, fcl_dtu_stall_bf, 
+   fcl_swl_swout_f, fcl_swl_swcvld_s, fdp_fcl_swc_s2, 
+   fcl_ifq_icmiss_s1, fcl_dtu_inst_vld_e, fcl_dtu_intr_vld_e, 
+   fcl_dtu_inst_vld_d, erb_dtu_ifeterr_d1, dtu_inst_anull_e, 
+   const_cpuid, thr_config_in_m, dec_swl_wrt_tcr_w, 
+   dec_swl_st_inst_d, extra_longlat_compl
+   );
+
+   input       rclk, 
+               se, 
+               si,
+               gdbginit_l,
+               arst_l,
+               grst_l;
+
+   input [3:0] ctu_sscan_tid;   // guaranteed one-hot by ctu
+
+   input [3:0] ifq_dtu_thrrdy;	      // ifq completion signals
+   input [3:0] ifq_dtu_pred_rdy;	    // ifq almost done
+
+   input       ifu_tlu_inst_vld_w,    //
+	             ifu_tlu_ttype_vld_m;
+   input       fcl_dtu_hprivmode_d;
+   input       fcl_dtu_hprivmode_w2;
+   input       tlu_ifu_flush_pipe_w;  // flush after a trap
+   input       fcl_swl_flush_w;
+   input       fcl_dtu_sync_intr_d;   // interrupt
+   input       fcl_dtu_nuke_thr_w;    // sync suspend
+   input       fcl_dtu_rst_thr_w;    // 
+   input       fcl_dtu_resum_thr_w;    // 
+   input [3:0] fcl_dtu_thr_f;
+
+   input [3:0] tlu_hpstate_ibe;
+   
+   input       lsu_ifu_ldsta_internal_e,// sta to local reg
+	             tlu_ifu_trappc_vld_w1, // trap completion
+	             dec_swl_ll_done_d,   // rdsr completion
+               dec_swl_br_done_d,
+	             dec_swl_rdsr_sel_thr_d,
+	             dec_swl_std_inst_d,    //
+	             dec_swl_sta_inst_e,    // state change
+	             wsr_fixed_inst_w,      // wrspr completion
+	             dec_swl_ld_inst_d;     // load hit/compl. speculation
+
+   input       dec_swl_mul_inst_d,
+	             dec_swl_div_inst_d;
+   input       dec_swl_fpop_d,
+	             dec_swl_allfp_d;
+
+   input       dec_swl_frf_upper_d,
+	             dec_swl_frf_lower_d,
+	             dec_swl_wrtfprs_w;
+   
+   input       dcl_swl_tcc_done_m;
+
+   input [3:0] exu_ifu_longop_done_g; // save, restore, div, mul compl.
+   input       exu_ifu_spill_e;
+   input [3:0] lsu_ifu_ldst_cmplt;
+   input       lsu_ifu_dc_parity_error_w2;
+
+   input [3:0] lsu_ifu_stbcnt0,
+	             lsu_ifu_stbcnt1,
+	             lsu_ifu_stbcnt2,
+	             lsu_ifu_stbcnt3;
+
+//   input [3:0] lsu_ifu_stq_busy;
+   input       lsu_ifu_quad_asi_e;
+	 
+   input       ffu_ifu_fpop_done_w2;
+   input [1:0] ffu_ifu_tid_w2;
+   input       ffu_ifu_fst_ce_w;
+   
+   input [1:0] tlu_ifu_trap_tid_w1;
+
+   input [3:0] tlu_ifu_pstate_pef;
+
+   input       lsu_ifu_ldst_miss_g;   // dcache hit or miss
+   
+   input [3:0] fcl_swl_int_activate_i3; // wake up thread on interrupt
+   input       fcl_swl_flush_wake_w;
+
+   // TBD: with the latest changes fcl_dtu_switch_s = dtu_fcl_ntr_s, so 
+   // this input can be removed.
+//   input       fcl_dtu_switch_s; // switch out curr, sw in next
+
+   input       ifq_swl_stallreq;
+
+   input       fcl_dtu_stall_bf,
+               fcl_swl_swout_f,       // curr thread is stalled
+	             fcl_swl_swcvld_s,
+               fdp_fcl_swc_s2,        // thread stall condition
+	             fcl_ifq_icmiss_s1;     // icache miss
+   input       fcl_dtu_inst_vld_e,
+               fcl_dtu_intr_vld_e,
+	             fcl_dtu_inst_vld_d;
+
+   input       erb_dtu_ifeterr_d1;
+
+   input       dtu_inst_anull_e;      // anull delay slot
+
+   input [3:0] const_cpuid;           // use 4 bits to allow future
+                                      // expansion to 16 cores
+
+   input [2:0] thr_config_in_m;       // write data to thread status reg
+   input       dec_swl_wrt_tcr_w;     // write signal for thr status reg
+   input       dec_swl_st_inst_d;
+
+   input [3:0] extra_longlat_compl;   // spare signal, not used
+   
+   
+   output [10:0] swl_sscan_thrstate;
+   output        so;
+   
+   output      dtu_reset;
+
+   output      swl_dec_mulbusy_e,
+	             swl_dec_divbusy_e,
+	             swl_dec_fpbusy_e,
+	             swl_dec_fp_enable_d;
+
+   output      swl_dec_ibe_e;
+   
+   // to fcl
+   output      dtu_fcl_ntr_s,         // next thread ready
+               dtu_fcl_running_s,
+	             dtu_fcl_rollback_g,    // rollback on spec
+	             dtu_fcl_retract_d;     // rollback on hw hazard
+
+   output [3:0] dtu_fcl_thr_active;   // currently active threads
+   
+   output [3:0] dtu_fcl_nextthr_bf,   // thread to switch to when ntr=1
+		            swl_dcl_thr_d,
+		            swl_dcl_thr_w2;
+
+   // to fdp
+   output [40:0] dtu_fdp_thrconf_e;   // thread conf for RDSR inst
+
+//----------------------------------------------------------
+// Declarations
+//----------------------------------------------------------
+   // local signals
+//   wire [3:0]    count_nxt,
+//	               count;
+//   wire          proc0;
+//   wire          start_on_rst;
+
+   wire          ibe_d,
+                 ibe_e;
+
+   wire [3:0]    completion,
+		             wm_imiss,
+		             wm_other,
+		             spec_ld_d,
+		             issue_spec_ld,
+		             ldmiss,
+		             ldhit_thr,
+		             spec_ld_g,
+		             imiss,
+		             trap,
+                 ldmiss_non_crit,
+                 ldmiss_crit,
+		             trp_no_retr,
+//		             rb_thr_w,
+                 rt_st_thr_d,
+                 rt_st_thr_e,
+		             schedule,
+		             int_activate,
+		             start_thread,
+                 thaw_thread,
+		             resum_thread,
+		             nuke_thread,
+		             rst_thread;
+
+   wire          rollback_g,
+                 rb_en_g,
+                 rollback_g_l,
+                 rollback_w2;
+   
+
+   wire          sched_nt;
+   
+   wire [3:0]    fixedop_done,
+		             wsr_done_w3;
+
+   wire          wsr_inst_w2,
+                 wsr_inst_w3;
+   
+   wire          wsr_fixed_qual_w,
+                 wsr_fixed_w2;
+
+   wire          llinst_done_e;
+   
+   wire [3:0]    ll_done_e,
+                 branch_done_d,
+                 std_tcc_done_m;
+
+   wire          sta_done_e,
+		             killed_inst_done_e;
+   wire [3:0]    killed_uniop_done_e;
+
+   wire          uniop_d,
+		             uniop_e;
+
+//   wire          no_iftrap_m,
+//                 no_iftrap_w;
+
+   wire [3:0]    thr_s1,
+                 thr_s2,
+		             thr_f,
+		             thr_d,
+		             thr_e,
+		             thr_m,
+		             thr_w,
+                 st_thr_w2,
+		             st_thr_w3;
+
+   wire          flush_pipe_w_nxt,
+                 flush_all_w,
+                 flush_pipe_w2,
+                 flush_done_w,
+                 fp_flush_done_w2,
+                 fp_flush_done_w3,
+		             flush_done_w2;
+
+   wire          rbfor_fst_ce_w;
+   
+   wire [3:0]    flush_wake_w2,
+                 fp_flush_wake_w3,
+                 halt_thread;
+   
+   wire          wrt_spec_w,
+                 wrt_spec_w2,
+                 halt_w,
+                 halt_w2,
+		             en_spec_d,
+		             en_spec_m,
+		             en_spec_g,
+		             spec_next;
+   
+
+   wire          ld_inst_e,
+		             ld_inst_next_e,
+                 ld_inst_qual_d,
+		             ld_inst_m,
+		             ld_inst_unq_w,
+		             ld_inst_w,
+                 ld_inst_w2;
+	 
+   wire          switch_out,
+	               sw_cond_s,
+	               swc_d,
+	               swc_next_d,
+	               swc_e;
+
+   wire          trp_noretr_d;
+   
+   wire [3:0]    all_stall;
+
+   wire [3:0]    rdy,
+	               sprdy_or_urdy;
+
+   wire          running_s2,
+                 atr_s;
+
+   wire [4:0]    thr0_state,
+		             thr1_state,
+		             thr2_state,
+		             thr3_state;
+
+   wire          use_spec;
+
+   wire [3:0]    wrt_tcr_w2;
+
+   wire [1:0]    enc_thr_d;
+
+   wire          rd_thract_d,
+		             rd_thract_e;
+   wire [51:0]   fmt_thrconf_e,
+                 thrconf_out_e,
+                 fmt_thrconf_adj;
+
+   wire          hprivmode_e,
+                 rdsr_sel_thr_e;
+   
+   wire [2:0]    rd_tid_spec_e;
+
+   wire [3:0]    mul_busy_d,
+		             mul_busy_e,
+		             div_busy_d,
+		             div_busy_e,
+		             fp_busy_d,
+		             fp_busy_e;
+
+   wire          true_fpbusy_e,
+                 fpbusy_local_e,
+                 true_mulbusy_e,
+                 true_divbusy_e;
+
+   wire          fbusy_nxt_d,
+                 fbusy_crit_e,
+                 fbusy_d3,
+                 fbusy_d0,
+                 fbusy_d1,
+                 fbusy_d2,
+                 dbusy_d3,
+                 dbusy_d0,
+                 dbusy_d1,
+                 dbusy_d2,
+                 mbusy_d3,
+                 mbusy_d0,
+                 mbusy_d1,
+                 mbusy_d2;
+
+   wire [3:0]    mul_wait,
+		             mul_wait_nxt,
+		             div_wait,
+		             div_wait_nxt,
+		             fp_wait,
+		             fp_wait_nxt;
+
+   wire          mul_wait_any,
+		             div_wait_any,
+		             fp_wait_any;
+
+   wire [3:0]    mul_wake,
+		             div_wake,
+		             fp_wake;
+
+   wire [3:0]    exu_lop_done,
+		             mul_done,
+		             div_done,
+		             fp_done;
+
+   wire [3:0]    retr_thr_wakeup;
+   wire [3:0]    trap_thrrdy,
+		             trap_thr;
+
+   wire [3:0]    fp_thr,
+		             fp_thrrdy;
+
+   wire          same_thr_de,
+		             same_thr_dg,
+                 same_thr_fd,
+                 same_thr_fe,
+		             same_thr_fm,                 
+		             same_thr_fg;
+
+
+   wire          st_thisthr_e;
+   
+   wire          st_inst_e,
+		             st_inst_qual_d,
+		             st_inst_qual_e,
+ 		             st_inst_m,
+ 		             st_inst_g;
+   
+   wire          pipe_st_cnt_ge1,
+ 		             pipe_st_cnt_ge2,
+ 		             pipe_st_cnt_ge3;
+   
+   wire          pipe_st_d,
+                 pipe_st_e,
+ 		             pipe_st_m,
+ 		             pipe_st_g;
+
+   wire          all_dst_ge1,
+ 		             all_dst_ge2,
+ 		             all_dst_ge3,
+ 		             all_dst_eq4;
+
+   wire          dst_cnt_ge1,
+ 		             dst_cnt_ge2,
+ 		             dst_cnt_ge3;
+   
+   
+   wire [3:0]    stbcnt_s,
+		             stbcnt_d;
+
+   wire [3:0]    stb_stall,
+		             stb_blocked,
+		             stb_blocked_d1,
+		             st_in_pipe,
+		             stb_retry,
+		             wm_stbwait,
+		             stb_wait_nxt;
+   
+   wire          switch_store_d,
+//		             retract_stall_d,
+		             retract_store_e,
+                 retract_st_next_d,
+		             retract_store_d;
+
+   wire          retract_iferr_d,
+                 iferr_s,
+                 iferr_d;
+
+   wire          clear_wmo_e;
+
+   wire          sw_st_e,
+		             sw_st_m,
+		             sw_st_g,
+		             sw_st_w2;
+
+   wire          std_inst_e;
+//		             stq_inst_e,
+//		             stq_inst_m,
+//		             stq_inst_w,
+//		             stq_inst_w2;
+   wire          std_done_e;
+   wire          std_done_m;
+//   wire [3:0]    stq_busy,
+//		             stq_in_pipe,
+//		             stq_wait,
+//		             stq_wait_next,
+//		             stq_done_thr;
+
+   wire [2:0]    fprs0,
+		             fprs1,
+		             fprs2,
+		             fprs3,
+		             fprs_d,
+		             fprs_e,
+		             fprs_wrt_data,
+                 thr_config_in_w,
+                 thr_config_in_w2,
+		             fprs0_nxt,
+		             fprs1_nxt,
+		             fprs2_nxt,
+		             fprs3_nxt;
+
+   wire [1:0]    new_fprs;
+
+   wire [3:0]    fprs_en_s,
+		             fpen_vec_s;
+
+   wire          wrt_fprs_w,
+                 wrt_fprs_w2;
+   
+   wire [3:0]    sel_wrt,
+		             fprs_sel_set,
+		             fprs_sel_wrt,
+		             fprs_sel_old;
+
+   wire          fpen_s;
+
+   wire [1:0]    trap_tid_w2;
+   wire          trappc_vld_w2;
+
+   wire          dtu_reset_l;
+   wire          sched_reset;
+   
+   wire          clk;
+   
+   //
+   // Code Begins Here
+   //
+
+   assign        clk = rclk;
+   
+   // reset buffer
+   dffrl_async rstff(.din (grst_l),
+                        .q   (dtu_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+   assign       dtu_reset = ~dtu_reset_l;
+   
+
+   //---------------------------------------------
+   // Start off thread on reset using this counter
+   //---------------------------------------------
+//   dffr #(4) thrrdy_ctr(.din (count_nxt),
+//		                    .clk (clk),
+//		                    .q   (count),
+//		                    .rst (dtu_reset),
+//		                    .se (se), .si(), .so());
+//
+//   // count_nxt = count + 1, sticky at 8 = 1111
+//   assign count_nxt[0] = ~count[0] | count[3];
+//   assign count_nxt[1] = (count[1] ^ count[0]) | count[3];
+//   assign count_nxt[2] = (count[2] ^ (count[1] & count[0])) | count[3]; 
+//   assign count_nxt[3] = (count[3] ^ (count[2] & count[1] & count[0])) |
+//	                       count[3]; 
+//
+//   assign proc0 = (const_cpuid == 4'b0000) ? 1'b1 : 1'b0;
+//   assign start_on_rst = (~count[3] & count[2] & count[1] & count[0])
+//                          & proc0;
+
+//`ifdef IFU_SAT   
+//   // temporary hack to start threads
+//   reg [3:0]  auto_start;
+//   always @ (posedge clk)
+//     auto_start = 4'b0000;
+//`endif
+   
+   //-----------------
+   // completion logic
+   //-----------------
+   sparc_ifu_thrcmpl compl(
+                           .reset       (dtu_reset),
+			                     /*AUTOINST*/
+                           // Outputs
+                           .completion  (completion[3:0]),
+                           .wm_imiss    (wm_imiss[3:0]),
+                           .wm_other    (wm_other[3:0]),
+                           // Inputs
+                           .clk         (clk),
+                           .se          (se),
+                           .si          (si),
+                           .fcl_ifq_icmiss_s1(fcl_ifq_icmiss_s1),
+                           .erb_dtu_ifeterr_d1(erb_dtu_ifeterr_d1),
+                           .sw_cond_s   (sw_cond_s),
+                           .en_spec_g   (en_spec_g),
+                           .atr_s       (atr_s),
+                           .dtu_fcl_thr_active(dtu_fcl_thr_active[3:0]),
+                           .ifq_dtu_thrrdy(ifq_dtu_thrrdy[3:0]),
+                           .ifq_dtu_pred_rdy(ifq_dtu_pred_rdy[3:0]),
+                           .exu_lop_done(exu_lop_done[3:0]),
+                           .branch_done_d(branch_done_d[3:0]),
+                           .fixedop_done(fixedop_done[3:0]),
+                           .ldmiss      (ldmiss[3:0]),
+                           .spec_ld_d   (spec_ld_d[3:0]),
+                           .trap        (trap[3:0]),
+                           .retr_thr_wakeup(retr_thr_wakeup[3:0]),
+                           .flush_wake_w2(flush_wake_w2[3:0]),
+                           .ldhit_thr   (ldhit_thr[3:0]),
+                           .spec_ld_g   (spec_ld_g[3:0]),
+                           .clear_wmo_e (clear_wmo_e),
+                           .wm_stbwait  (wm_stbwait[3:0]),
+                           .stb_retry   (stb_retry[3:0]),
+                           .rst_thread  (rst_thread[3:0]),
+                           .trap_thrrdy (trap_thrrdy[3:0]),
+                           .thr_s2      (thr_s2[3:0]),
+                           .thr_e       (thr_e[3:0]),
+                           .thr_s1      (thr_s1[3:0]),
+                           .fp_thrrdy   (fp_thrrdy[3:0]),
+                           .lsu_ifu_ldst_cmplt(lsu_ifu_ldst_cmplt[3:0]),
+                           .sta_done_e  (sta_done_e),
+                           .killed_inst_done_e(killed_inst_done_e));
+
+   //------------
+   // Thread Pipe
+   //------------
+   assign thr_f = fcl_dtu_thr_f;
+
+//   assign thr_dec_f[0] = thr_f[0] | rst_tri_en;
+//   assign thr_dec_f[3:1] = thr_f[3:1] & {3{~rst_tri_en}};
+
+//   assign thr_dec_d[0] = thr_d[0] | rst_tri_en;
+//   assign thr_dec_d[3:1] = thr_d[3:1] & {3{~rst_tri_en}};
+
+
+   dff_s #(4) thrd_reg(.din  (thr_f[3:0]),
+		   .clk  (clk),
+		   .q    (thr_d[3:0]),
+		   .se   (se), .si(), .so());
+
+   assign     swl_dcl_thr_d = thr_d;
+
+   dff_s #(4) thre_reg(.din  (thr_d),
+		   .clk  (clk),
+		   .q    (thr_e),
+		   .se   (se), .si(), .so());
+
+   dff_s #(4) thrm_reg(.din  (thr_e),
+		   .clk  (clk),
+		   .q    (thr_m),
+		   .se   (se), .si(), .so());
+   dff_s #(4) thrw_reg(.din  (thr_m),
+		   .clk  (clk),
+		   .q    (thr_w),
+		   .se   (se), .si(), .so());
+
+   dff_s #(4) thrw2_reg(.din  (thr_w),
+		    .clk  (clk),
+		    .q    (st_thr_w2),
+		    .se   (se), .si(), .so());
+
+   dff_s #(4) thrw3_reg(.din  (st_thr_w2),
+		    .clk  (clk),
+		    .q    (st_thr_w3),
+		    .se   (se), .si(), .so());
+
+   assign     swl_dcl_thr_w2 = st_thr_w2;
+
+   // send ibe of curr thread to dec
+   assign ibe_d = (thr_d[0] & tlu_hpstate_ibe[0] |
+                   thr_d[1] & tlu_hpstate_ibe[1] |
+                   thr_d[2] & tlu_hpstate_ibe[2] |
+                   thr_d[3] & tlu_hpstate_ibe[3]);
+
+   dff_s #(1) ibee_ff(.din (ibe_d),
+                    .q   (ibe_e),
+                    .clk (clk), .se(se), .si(), .so());
+   assign swl_dec_ibe_e = ibe_e;
+
+//----------------------------------------------------------------------
+// Track Thread Execution
+//----------------------------------------------------------------------
+
+   // track instructions
+   dff_s #(1) lle_ff(.din (dec_swl_ll_done_d),
+                   .q   (llinst_done_e),
+                   .clk (clk), .se (se), .si(), .so());
+                   
+   assign ll_done_e = thr_e & {4{llinst_done_e & fcl_dtu_inst_vld_e & 
+                                 ~exu_ifu_spill_e}};
+   assign std_tcc_done_m = thr_m & {4{dcl_swl_tcc_done_m | std_done_m}};
+
+   assign wsr_fixed_qual_w  = wsr_fixed_inst_w & ifu_tlu_inst_vld_w & 
+                              ~fcl_swl_flush_w;
+   dff_s #(1) wsrw2_ff(.din (wsr_fixed_qual_w),
+                     .q   (wsr_fixed_w2),
+                     .clk (clk), .se(se), .si(), .so());
+                     
+   assign wsr_inst_w2  = wsr_fixed_w2 & ~flush_pipe_w2;
+
+   // delay one cycle to allow tlu to finish
+   dff_s #(1) wsw3_ff(.din (wsr_inst_w2),
+                    .q   (wsr_inst_w3),
+                    .clk (clk), .se(se), .si(), .so());
+
+   assign wsr_done_w3 = {4{wsr_inst_w3}} & st_thr_w3;
+
+   assign fixedop_done = (ll_done_e | wsr_done_w3 | std_tcc_done_m |
+	                        wrt_tcr_w2 | extra_longlat_compl);
+
+   assign branch_done_d = thr_d & {4{dec_swl_br_done_d & fcl_dtu_inst_vld_d}};
+
+   assign sta_done_e = dec_swl_sta_inst_e & fcl_dtu_inst_vld_e & 
+                       ~lsu_ifu_ldsta_internal_e;
+   assign ld_inst_qual_d = dec_swl_ld_inst_d & fcl_dtu_inst_vld_d & 
+                           ~iferr_d;
+   dff_s #(1) lde_ff(.din  (ld_inst_qual_d),
+		               .clk  (clk),
+		               .q    (ld_inst_e),
+		               .se   (se), .si(), .so());
+   assign ld_inst_next_e = ld_inst_e;
+//                         & ~dtu_inst_anull_e &    
+//                           ~(lsu_ifu_ldsta_internal_e & 
+//                             ifu_lsu_alt_space_e &
+//                             fcl_dtu_inst_vld_e);
+   
+//   assign ld_inst_internal_e = ~dtu_inst_anull_e & ld_inst_e &
+//                               (fcl_dtu_inst_vld_e & 
+//                                lsu_ifu_ldsta_internal_e & 
+//                                ifu_lsu_alt_space_e);
+   
+   dff_s #(1) ldm_ff(.din  (ld_inst_next_e),
+		               .clk  (clk),
+		               .q    (ld_inst_m),
+		               .se   (se), .si(), .so());
+   dff_s #(1) ldw_ff(.din  (ld_inst_m),
+		               .clk  (clk),
+		               .q    (ld_inst_unq_w),
+		               .se   (se), .si(), .so());
+   assign ld_inst_w = ifu_tlu_inst_vld_w & ld_inst_unq_w;
+   dff_s #(1) ldw2_ff(.din  (ld_inst_w),
+		               .clk  (clk),
+		               .q    (ld_inst_w2),
+		               .se   (se), .si(), .so());
+
+   // track instruction status
+   dff_s #(1) swcd_ff(.din  (sw_cond_s),
+		                .clk  (clk),
+		                .q    (swc_d),
+		                .se   (se), .si(), .so());
+
+   assign swc_next_d = (swc_d & ~dec_swl_br_done_d);
+//			                   | fcl_dtu_sync_intr_d;
+   
+   dff_s #(1) swce_ff(.din  (swc_next_d),
+		                .clk  (clk),
+		                .q    (swc_e),
+		                .se   (se), .si(), .so());
+
+//bug6838,bug6989 - interrupt issued in annulled delay slot resets wm_other mask in e-stage; this
+//                  reset causes switch logic to lose a long latency op(div) which set the wm_other mask
+//                  in s-stage. Note that the div is issued to FPU. the ifu re-issues the interrupt - 
+//                  which results in flush. this kills the long latency op and div is lost
+//
+//                  fix is to detect interrupt in anulled delay slot followed by long latency op and
+//                  not reset the wm_other mask.
+//
+//       10/07/04 - fix changed to delay setting of wm_other mask from d-cycle to e-cycle. hence
+//                  removing the kill in killed_inst_done_e
+//
+//   assign killed_inst_done_e = (fcl_dtu_inst_vld_e  & swc_e | //sw inst
+//                                fcl_dtu_intr_vld_e) &  // any intr
+//                                 dtu_inst_anull_e;
+
+   assign killed_inst_done_e = fcl_dtu_inst_vld_e  & swc_e & //sw inst
+                                 dtu_inst_anull_e;
+
+   // a uniop is something that stalls all threads (looks like a uni
+   // threaded machine)
+   assign uniop_d = (dec_swl_allfp_d | //& swl_dec_fp_enable_d  
+                     dec_swl_mul_inst_d | dec_swl_div_inst_d) & 
+                      fcl_dtu_inst_vld_d;
+   dff_s #(1) uniop_ff(.din  (uniop_d),
+		                 .clk  (clk),
+		                 .q    (uniop_e),
+		                 .se   (se), .si(), .so());
+   assign killed_uniop_done_e = thr_e & {4{dtu_inst_anull_e & uniop_e & 
+                                           fcl_dtu_inst_vld_e |
+                                           clear_wmo_e}};
+
+//   assign sched_nt = fcl_dtu_switch_s & ~fcl_dtu_stall_bf;
+   assign sched_nt = dtu_fcl_ntr_s & ~(fcl_dtu_stall_bf | ifq_swl_stallreq);   
+   assign schedule = dtu_fcl_nextthr_bf & {4{sched_nt}};
+
+   // speculate load hit if it is a load instruction
+   // FP loads are not speculated on 
+   assign spec_ld_d = thr_d & {4{ld_inst_qual_d & en_spec_d}};
+   assign issue_spec_ld = thr_d & {4{ld_inst_qual_d & en_spec_d}} &
+                          ~wm_imiss & ~wm_stbwait;
+
+//   assign issue_spec_ld = thr_d & {4{dec_swl_ld_inst_d & en_spec_d & 
+//                                     ~fcl_ifq_icmiss_s1}} & ~wm_imiss;
+   
+   assign spec_ld_g = (thr_w & {4{ld_inst_w & en_spec_g}});
+   
+   // actual load hit signal and load miss stall
+//   assign rollback_g = en_spec_g & ld_inst_w & lsu_ifu_ldst_miss_g;
+   // expand to gates:
+   assign rb_en_g = en_spec_g & ld_inst_w;
+   assign rollback_g_l = ~(lsu_ifu_ldst_miss_g & rb_en_g);
+//   bw_u1_nand2_7x UZsize_rbgen(.a (lsu_ifu_ldst_miss_g),
+//                               .b (rb_en_g),
+//                               .z (rollback_g_l));
+   assign rollback_g = ~rollback_g_l;
+//   bw_u1_invh_25x UZsize_rbbuf(.a (rollback_g_l),
+//                               .z (rollback_g));
+   assign dtu_fcl_rollback_g = rollback_g;
+
+   // delay restart of ldhit when en_spec=0 by 1 more cycle?
+   
+// assign rb_thr_w = thr_w & {4{en_spec_g & ld_inst_w & lsu_ifu_ldst_miss_g}};
+// assign spec_ldmiss = rb_thr_w; // to thrcmpl
+   
+   assign ldhit_thr = (thr_w & {4{ld_inst_w & ~lsu_ifu_ldst_miss_g}} | 
+                       thr_e & {4{ld_inst_e & dtu_inst_anull_e}});
+   assign ldmiss_crit = thr_w & {4{ld_inst_w & lsu_ifu_ldst_miss_g}};
+   assign ldmiss_non_crit = st_thr_w2 & {4{lsu_ifu_dc_parity_error_w2}};
+                            // | thr_e & {4{ld_inst_internal_e}} 
+                            
+   assign ldmiss = ldmiss_crit | ldmiss_non_crit;
+   
+   assign rt_st_thr_d = thr_d & {4{retract_store_d}};
+   assign rt_st_thr_e = thr_e & {4{retract_store_e}};
+
+   dff_s #(1) rbw2_ff(.din (rollback_g),
+                    .q   (rollback_w2),
+                    .clk (clk), .se(se), .si(), .so());
+
+   // traps and interrupts
+   dff_s #(1) ld_trp_reg(.din (tlu_ifu_trappc_vld_w1),
+                       .q   (trappc_vld_w2),
+                       .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(2) trp_tid_reg(.din (tlu_ifu_trap_tid_w1[1:0]),
+                        .q   (trap_tid_w2[1:0]),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   assign trap_thr[0] = ~trap_tid_w2[1] & ~trap_tid_w2[0];
+   assign trap_thr[1] = ~trap_tid_w2[1] &  trap_tid_w2[0];
+   assign trap_thr[2] =  trap_tid_w2[1] & ~trap_tid_w2[0];
+   assign trap_thr[3] =  trap_tid_w2[1] &  trap_tid_w2[0];
+
+   assign trap_thrrdy = trap_thr & {4{trappc_vld_w2}};
+
+   // fst processed directly in swl
+   assign rbfor_fst_ce_w = ifu_tlu_inst_vld_w & ~tlu_ifu_flush_pipe_w & 
+                           ffu_ifu_fst_ce_w & ~fcl_swl_flush_w;
+
+//   dff #(1) fstce_ff(.din (rbfor_fst_ce_w),
+//                     .q   (rbfor_fst_ce_w2),
+//                     .clk (clk), .se(se), .si(), .so());
+   
+   assign flush_all_w = tlu_ifu_flush_pipe_w | fcl_swl_flush_w;
+//   assign flush_pipe_w_nxt = tlu_ifu_flush_pipe_w & ~fcl_swl_flush_w;
+   assign flush_pipe_w_nxt = tlu_ifu_flush_pipe_w & 
+                             ~fcl_swl_flush_wake_w;   
+   
+   dff_s #(1) flpw2_ff(.din (flush_pipe_w_nxt),
+                     .q   (flush_pipe_w2),
+                     .clk (clk), .se(se), .si(), .so());
+
+//   assign no_iftrap_m = ~ifu_tlu_ttype_vld_m;
+//   dff #(1) trpw_ff(.din (no_iftrap_m),
+//                    .q   (no_iftrap_w),
+//                    .clk (clk), .se(se), .si(), .so());
+   
+//bug6838,bug6989 - change setting of trap for interrupt from d-cycle to e-cycle
+//                  remove  thr_d & {4{fcl_dtu_sync_intr_d & ~iferr_d}} & ~rt_st_thr_e |
+   
+//   assign trap =  thr_w & {4{flush_all_w}} |
+   assign trap =  st_thr_w2 & {4{flush_pipe_w2}} |
+                  thr_w & {4{fcl_swl_flush_w}} |
+                        thr_e & {4{fcl_dtu_intr_vld_e & ~dtu_inst_anull_e}} |
+	                thr_m & {4{ifu_tlu_ttype_vld_m}};
+
+   assign trp_no_retr = st_thr_w2 & {4{flush_pipe_w2 |
+                                       lsu_ifu_dc_parity_error_w2 &
+                                       ld_inst_w2    | 
+                                       rollback_w2}}  |
+	                      trap_thr & {4{trappc_vld_w2}} |
+                        fp_flush_wake_w3 |
+                        thr_w & {4{fcl_swl_flush_w}};
+//	                       thr_m    & {4{ifu_tlu_ttype_vld_m}};
+
+   assign trp_noretr_d = (thr_d[0] & trp_no_retr[0] | 
+                          thr_d[1] & trp_no_retr[1] | 
+                          thr_d[2] & trp_no_retr[2] | 
+                          thr_d[3] & trp_no_retr[3]);
+   
+//   assign flush_done_w = fcl_swl_flush_w & ~fcl_swl_flush_wait_w;
+   assign flush_done_w = fcl_swl_flush_wake_w;   
+   dff_s #(1) flsh_ff(.din (flush_done_w),
+		                .q   (flush_done_w2),
+		                .clk (clk), .se(se), .si(), .so());
+   assign flush_wake_w2 = {4{flush_done_w2}} & st_thr_w2 | fp_flush_wake_w3;
+
+   // delay FP wakeup by one extra cycle to allow time for IRF CE
+   // to be corrected.
+   dff_s #(1) fpflsh_ff(.din (rbfor_fst_ce_w),
+		                  .q   (fp_flush_done_w2),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(1) fpflw_ff(.din (fp_flush_done_w2),
+		                 .q   (fp_flush_done_w3),
+		                 .clk (clk), .se(se), .si(), .so());
+   assign fp_flush_wake_w3 = st_thr_w3 & {4{fp_flush_done_w3}};
+   
+   // store buffer full
+//   assign stbfull_on_curr_thr = stb_stall & thr_f;
+//   assign stbfull_thisthr =  stbfull_on_curr_thr[0] |
+//	                           stbfull_on_curr_thr[1] |
+//	                           stbfull_on_curr_thr[2] |
+//	                           stbfull_on_curr_thr[3];
+
+//   assign stbfull_nextthr = ((stb_stall & dtu_fcl_nextthr_bf)
+//			                       == 4'b0) ?
+//			                        1'b0 : 1'b1;
+
+   // imiss stall condition
+   assign thr_s1 = thr_d;
+   assign imiss = (thr_s1 & {4{fcl_ifq_icmiss_s1}}) | 
+	                  (thr_e & {4{erb_dtu_ifeterr_d1}});  //
+
+   // All Stall conditions
+   assign all_stall = imiss | ldmiss_non_crit | trap | stb_stall | 
+                      ldmiss_crit;
+
+   // TBD: move to ifu -- done
+//   assign ext_stallreq = ifq_dtu_stallreq | lsu_ifu_stallreq | 
+//                         ffu_ifu_stallreq;  
+                         // | other stall reqs
+//   assign cpu_fcl_stallreq = ext_stallreq;
+   
+   // thread start and stop
+//   assign switch_out = fcl_dtu_switch_s | fcl_dtu_stall_bf | fcl_swl_swout_f;
+   assign switch_out = dtu_fcl_ntr_s | fcl_dtu_stall_bf | fcl_swl_swout_f |
+                       ifq_swl_stallreq;
+
+   // general stall condition
+//   assign cpu_fcl_stallreq = ~dtu_fcl_ntr_s & (stbfull_thisthr) |
+//                             dtu_fcl_ntr_s & (stbfull_nextthr) |
+//	                           ext_stallreq;
+
+   // ldmiss (i.e rollback) and flush_pipe are taken care of in FCL
+   assign sw_cond_s = fdp_fcl_swc_s2 & fcl_swl_swcvld_s & ~iferr_s;
+
+   // thread reset and other interrupts
+   // added flop for timing reasons
+
+//   assign async_rst_i3 = rst_thread & ~dtu_fcl_thr_active;
+//   assign rst_thread   = rstthr_i3 & {4{rstint_i3}};
+  // assign nuke_thread  = (rstthr_i3 & {4{nukeint_i3}} & 
+//			                    ~dtu_fcl_thr_active) |   // if not active nuke 
+			                                             // immediately
+//			                     ({4{fcl_dtu_nuke_thr_ms}} & thr_m);
+                                                   // else wait for signal
+
+   assign resum_thread = ({4{fcl_dtu_resum_thr_w}} & thr_w);
+   assign nuke_thread = ({4{fcl_dtu_nuke_thr_w}} & thr_w);
+   assign rst_thread = ({4{fcl_dtu_rst_thr_w}} & thr_w);
+
+   assign int_activate = fcl_swl_int_activate_i3 & ~wm_imiss;
+
+//`ifdef IFU_SAT   
+//   assign start_thread = {3'b0, start_on_rst} | auto_start | 
+//                         resum_thread & (~wm_imiss | ifq_dtu_thrrdy);
+//`else
+   assign start_thread = resum_thread & (~wm_imiss | ifq_dtu_thrrdy) & 
+                                          (~wm_stbwait | stb_retry);
+   assign thaw_thread = resum_thread & (wm_imiss & ~ifq_dtu_thrrdy |
+                                        wm_stbwait & ~stb_retry);
+   
+//`endif
+   
+   
+//----------------------------------------------------------------------
+// Thread FSM
+//----------------------------------------------------------------------   
+   sparc_ifu_thrfsm  thrfsm0(
+			     // Outputs
+`ifdef FPGA_SYN
+			     
+			     .so	(/*so*/),
+`else
+			     .so	(so),
+`endif
+			     .thr_state	(thr0_state[4:0]),
+			     // Inputs
+			     .completion(completion[0]),
+			     .schedule	(schedule[0]),
+			     .spec_ld	(issue_spec_ld[0]),
+			     .ldhit	(ldhit_thr[0]),
+			     .switch_out(switch_out),
+
+			     .stall     (all_stall[0]),
+			     .sw_cond	(sw_cond_s),
+
+			     .int_activate(int_activate[0]),
+			     .start_thread(start_thread[0]),
+			     .thaw_thread(thaw_thread[0]),
+			     .nuke_thread (nuke_thread[0]),
+			     .rst_thread(rst_thread[0]),
+
+			     .halt_thread (halt_thread[0]),
+			     
+			     .clk	(clk),
+			     .se	(se),
+			     .si	(si),
+			     .reset	(dtu_reset));
+
+   sparc_ifu_thrfsm  thrfsm1(
+			     // Outputs
+`ifdef FPGA_SYN
+			     
+			     .so	(/*so*/),
+`else
+			     .so	(so),
+`endif
+			     .thr_state	(thr1_state[4:0]),
+			     // Inputs
+			     .completion(completion[1]),
+			     .schedule	(schedule[1]),
+			     .spec_ld	(issue_spec_ld[1]),
+			     .ldhit	(ldhit_thr[1]),
+			     .switch_out(switch_out),
+
+			     .stall     (all_stall[1]),
+			     .sw_cond	(sw_cond_s),
+
+			     .int_activate(int_activate[1]),
+			     .start_thread(start_thread[1]),
+			     .thaw_thread(thaw_thread[1]),
+			     .nuke_thread (nuke_thread[1]),
+			     .rst_thread(rst_thread[1]),
+
+			     .halt_thread (halt_thread[1]),
+			     
+			     .clk	(clk),
+			     .se	(se),
+			     .si	(si),
+			     .reset	(dtu_reset));
+   
+   sparc_ifu_thrfsm  thrfsm2(
+			     // Outputs
+`ifdef FPGA_SYN
+			     
+			     .so	(/*so*/),
+`else
+			     .so	(so),
+`endif
+			     .thr_state	(thr2_state[4:0]),
+			     // Inputs
+			     .completion(completion[2]),
+			     .schedule	(schedule[2]),
+			     .spec_ld	(issue_spec_ld[2]),
+			     .ldhit	(ldhit_thr[2]),
+			     .switch_out(switch_out),
+
+			     .stall     (all_stall[2]),
+			     .sw_cond	(sw_cond_s),
+
+			     .int_activate(int_activate[2]),
+			     .start_thread(start_thread[2]),
+			     .thaw_thread(thaw_thread[2]),
+			     .nuke_thread (nuke_thread[2]),
+			     .rst_thread(rst_thread[2]),
+
+			     .halt_thread (halt_thread[2]),
+			     
+			     .clk	(clk),
+			     .se	(se),
+			     .si	(si),
+			     .reset	(dtu_reset));
+   
+   sparc_ifu_thrfsm  thrfsm3(
+			     // Outputs
+`ifdef FPGA_SYN
+			     
+			     .so	(/*so*/),
+`else
+			     .so	(so),
+`endif
+			     .thr_state	(thr3_state[4:0]),
+			     // Inputs
+			     .completion(completion[3]),
+			     .schedule	(schedule[3]),
+			     .spec_ld	(issue_spec_ld[3]),
+			     .ldhit	(ldhit_thr[3]),
+			     .switch_out(switch_out),
+
+			     .stall     (all_stall[3]),
+			     .sw_cond	(sw_cond_s),
+
+			     .int_activate(int_activate[3]),
+			     .start_thread(start_thread[3]),
+			     .thaw_thread(thaw_thread[3]),
+			     .nuke_thread (nuke_thread[3]),
+			     .rst_thread(rst_thread[3]),
+
+			     .halt_thread (halt_thread[3]),
+			     
+			     .clk	(clk),
+			     .se	(se),
+			     .si	(si),
+			     .reset	(dtu_reset));
+
+//----------------------------------------------------------------------
+// Schedule Next Thread
+//----------------------------------------------------------------------
+   // rdy bit from thrfsm
+   assign dtu_fcl_thr_active[0] = thr0_state[`TCR_ACTIVE];
+   assign dtu_fcl_thr_active[1] = thr1_state[`TCR_ACTIVE];
+   assign dtu_fcl_thr_active[2] = thr2_state[`TCR_ACTIVE];
+   assign dtu_fcl_thr_active[3] = thr3_state[`TCR_ACTIVE];
+
+   assign rdy[0] = thr0_state[`TCR_URDY];
+   assign rdy[1] = thr1_state[`TCR_URDY];
+   assign rdy[2] = thr2_state[`TCR_URDY];
+   assign rdy[3] = thr3_state[`TCR_URDY];
+
+   assign sprdy_or_urdy[0] = thr0_state[`TCR_READY];
+   assign sprdy_or_urdy[1] = thr1_state[`TCR_READY];
+   assign sprdy_or_urdy[2] = thr2_state[`TCR_READY];
+   assign sprdy_or_urdy[3] = thr3_state[`TCR_READY];
+   
+   assign running_s2 = (thr0_state[`TCR_RUNNING] |
+			                  thr1_state[`TCR_RUNNING] |
+			                  thr2_state[`TCR_RUNNING] |
+			                  thr3_state[`TCR_RUNNING]);
+
+   assign dtu_fcl_running_s = running_s2;
+
+   assign thr_s2 =  {thr3_state[`TCR_RUNNING],
+			               thr2_state[`TCR_RUNNING],
+			               thr1_state[`TCR_RUNNING],
+			               thr0_state[`TCR_RUNNING]};
+
+   // Next Thread Ready
+   assign dtu_fcl_ntr_s = (sprdy_or_urdy[0] | sprdy_or_urdy[1] | 
+				                   sprdy_or_urdy[2] | sprdy_or_urdy[3]);
+
+   // Any thread ready
+   assign atr_s = dtu_fcl_ntr_s | running_s2;
+
+   // decide which scheduler to use
+   // timing note: see if use_spec can be generated in previous cycle
+   assign use_spec = ~(rdy[3] | rdy[2] | rdy[1] | rdy[0]);
+
+   assign sched_reset = dtu_reset | ~gdbginit_l;
+   // schedule ready threads
+   sparc_ifu_lru4 thr_sched(// Outputs
+			                      .grant_vec	(dtu_fcl_nextthr_bf[3:0]),
+			                      .so		(so),
+			                      // Inputs
+			                      .clk	(clk),
+			                      .reset	(sched_reset),
+			                      .se		(se),
+			                      .si		(si),
+			                      .recent_vec	(thr_e[3:0]),
+			                      .load_recent(fcl_dtu_inst_vld_e),
+			                      .req_vec	(rdy[3:0]),
+                            .spec_vec (sprdy_or_urdy[3:0]),
+                            .use_spec (use_spec));
+      
+//----------------------------------------------------------------------
+// Thread Status (Config) Register
+//----------------------------------------------------------------------
+   // Read thread config
+   assign enc_thr_d[1] = thr_d[3] | thr_d[2];
+   assign enc_thr_d[0] = thr_d[3] | thr_d[1];   
+
+   assign rd_thract_d = (thr0_state[0] & thr_d[0] |
+                         thr1_state[0] & thr_d[1] |
+                         thr2_state[0] & thr_d[2] |
+                         thr3_state[0] & thr_d[3]);
+
+   dff_s #(1) rdthr_ff(.din (rd_thract_d),
+		                  .clk (clk),
+		                  .q   (rd_thract_e),
+		                  .se   (se), .si(), .so());
+
+   dff_s #(3) rdcf_reg(.din ({enc_thr_d, en_spec_d}),
+		                 .clk (clk),
+		                 .q   (rd_tid_spec_e),
+		                 .se   (se), .si(), .so());
+
+   dff_s #(1) hpe_ff(.din (fcl_dtu_hprivmode_d),
+		               .clk (clk),
+		               .q   (hprivmode_e),
+		               .se   (se), .si(), .so());
+   dff_s #(1) rdthre_ff(.din (dec_swl_rdsr_sel_thr_d),
+		                  .clk (clk),
+		                  .q   (rdsr_sel_thr_e),
+		                  .se   (se), .si(), .so());
+
+   // TBD: read out all thread state, not just the current thread
+   //      Done 9/26/02
+   assign fmt_thrconf_e = {wm_stbwait,
+                           wm_other,
+                           wm_imiss,           // 51:40 - wait mask
+                           4'b0,               // 39:36 - rsvd
+                           thr0_state,
+                           thr1_state,
+                           thr2_state,
+                           thr3_state,         // 35:16 - thr state
+			                     {2'b0},             // 15:14 - rsvd
+			                     const_cpuid,        // 13:10 - 4b cpu id
+			                     rd_tid_spec_e[2:1], // 9:8 - 2b tid
+			                     {5'b0},             // 7:3 - rsvd
+			                     rd_tid_spec_e[0],   // 2 - en spec
+			                     {1'b0},             // 1 - QOS/rsvd
+			                     rd_thract_e};       // 0 - active
+
+//`ifdef SPARC_HPV_EN
+   assign fmt_thrconf_adj[51:1] = fmt_thrconf_e[51:1] & {51{hprivmode_e}};
+   assign fmt_thrconf_adj[0] = fmt_thrconf_e[0];
+//`else
+//   assign fmt_thrconf_adj[51:0] = fmt_thrconf_e[51:0];
+//`endif
+
+//   assign thrconf_out_e[51:16] = (fmt_thrconf_e[51:16] & 
+//                                      {36{hprivmode_e}});
+   
+//   mux2ds #(52) rdsr_mxe(.dout (thrconf_out_e[51:0]),
+//		                     .in0  ({49'b0, fprs_e}),
+//		                     .in1  (fmt_thrconf_adj[51:0]),
+//		                     .sel0 (~rdsr_sel_thr_e),
+//		                     .sel1 (rdsr_sel_thr_e));
+   assign thrconf_out_e[51:0] = rdsr_sel_thr_e ? fmt_thrconf_adj[51:0] :
+                                                 {49'b0, fprs_e};
+
+   // leave out the zeros before sending to fdp
+   assign dtu_fdp_thrconf_e = {thrconf_out_e[51:40], // 40:29
+                               thrconf_out_e[35:16], // 28:9
+                               thrconf_out_e[13:8],  // 8:3
+                               thrconf_out_e[2:0]};
+
+   // shadow scan outputs
+   mux4ds #(11) sscan_mx(.dout (swl_sscan_thrstate[10:0]),
+                         .in0  ({thr0_state[4:0], 
+                                 wm_imiss[0],
+                                 wm_other[0],
+                                 wm_stbwait[0],
+                                 mul_busy_e[0],
+                                 div_busy_e[0],
+                                 fp_busy_e[0]}),
+                         .in1  ({thr1_state[4:0], 
+                                 wm_imiss[1],
+                                 wm_other[1],
+                                 wm_stbwait[1],
+                                 mul_busy_e[1],
+                                 div_busy_e[1],
+                                 fp_busy_e[1]}),
+                         .in2  ({thr2_state[4:0], 
+                                 wm_imiss[2],
+                                 wm_other[2],
+                                 wm_stbwait[2],
+                                 mul_busy_e[2],
+                                 div_busy_e[2],
+                                 fp_busy_e[2]}),
+                         .in3  ({thr3_state[4:0], 
+                                 wm_imiss[3],
+                                 wm_other[3],
+                                 wm_stbwait[3],
+                                 mul_busy_e[3],
+                                 div_busy_e[3],
+                                 fp_busy_e[3]}),
+                         .sel0 (ctu_sscan_tid[0]),
+                         .sel1 (ctu_sscan_tid[1]),
+                         .sel2 (ctu_sscan_tid[2]),
+                         .sel3 (ctu_sscan_tid[3]));
+   
+   // write to TCR
+   assign wrt_spec_w = dec_swl_wrt_tcr_w & ifu_tlu_inst_vld_w & 
+                       ~flush_all_w;
+
+   assign spec_next = (wrt_spec_w2 & fcl_dtu_hprivmode_w2) ? 
+                             thr_config_in_w2[2] : 
+                             en_spec_d;
+   
+   assign halt_w = wrt_spec_w & ~thr_config_in_w[0];
+   
+   dff_s #(1) wrsw2_ff(.din (wrt_spec_w),
+                     .q   (wrt_spec_w2),
+                     .clk (clk), .se(se), .si(), .so());
+   
+   assign wrt_tcr_w2 = st_thr_w2 & {4{wrt_spec_w2}};
+   
+   dff_s #(1) hlt_ff(.din (halt_w),
+                   .q   (halt_w2),
+                   .clk (clk), .se(se), .si(), .so());
+   assign halt_thread = st_thr_w2 & {4{halt_w2}};
+   
+   dffr_s #(1) enspec_ff(.din (spec_next),
+		                   .clk (clk),
+		                   .q   (en_spec_d),
+		                   .rst (dtu_reset),
+		                   .se  (se), .si(), .so());
+   
+   dff_s #(1) enspecm_ff(.din (rd_tid_spec_e[0]),
+		                   .clk (clk),
+		                   .q   (en_spec_m),
+		                   .se  (se), .si(), .so());
+
+   dff_s #(1) enspecw_ff(.din (en_spec_m),
+		                   .clk (clk),
+		                   .q   (en_spec_g),
+		                   .se  (se), .si(), .so());
+
+
+   //-----------------------------
+   // Instruction Flow Control
+   //-----------------------------
+
+   // mul and div control (1 each per cpu)
+   assign mul_busy_d = ({4{dec_swl_mul_inst_d & ~swl_dec_mulbusy_e &
+                           fcl_dtu_inst_vld_d & ~iferr_d}} & thr_d &
+//			                  ~rb_thr_w & ~rt_st_thr_e |          // set
+                        ~rt_st_thr_e |
+			                  mul_busy_e & ~killed_uniop_done_e) & 
+			                   ~exu_ifu_longop_done_g & (~trp_no_retr);  // reset wins
+
+   assign div_busy_d = ({4{dec_swl_div_inst_d & ~swl_dec_divbusy_e &
+                           fcl_dtu_inst_vld_d & ~iferr_d}} & 
+			                  thr_d  & ~rt_st_thr_e |        // set
+			                  div_busy_e & ~killed_uniop_done_e) & 
+			                   ~exu_ifu_longop_done_g & (~trp_no_retr); // reset wins
+
+   assign fp_busy_d = ({4{dec_swl_allfp_d & // swl_dec_fp_enable_d &
+                          fcl_dtu_inst_vld_d &
+                          ~fpbusy_local_e & ~iferr_d}} & thr_d & 
+		                   ~rt_st_thr_d & ~rt_st_thr_e |
+                                  // FP could be a st
+		                   fp_busy_e & ~killed_uniop_done_e) &
+			                  {4{~ffu_ifu_fpop_done_w2}} & ~trp_no_retr; // reset wins
+
+   dffr_s #(4) mulb_ff(.din (mul_busy_d),
+		                 .q   (mul_busy_e),
+		                 .clk (clk),
+		                 .rst (dtu_reset),
+		                 .se  (se), .si(), .so());
+   assign true_mulbusy_e = (|mul_busy_e[3:0]);   
+   assign mbusy_d0 = true_mulbusy_e & mul_wait_any;
+   
+   // block shared resource for two extra cycles, to allow waiting
+   // threads a fair chance at getting it.  
+   assign swl_dec_mulbusy_e = true_mulbusy_e | mbusy_d3 | mbusy_d1 | mbusy_d2;
+   
+   dffr_s #(4) divb_ff(.din (div_busy_d),
+		                 .q   (div_busy_e),
+		                 .clk (clk),
+		                 .rst (dtu_reset),
+		                 .se  (se), .si(), .so());
+   assign true_divbusy_e = (|div_busy_e[3:0]);   
+   assign dbusy_d0 = true_divbusy_e & div_wait_any;
+
+   // block shared resource for two extra cycles, to allow waiting
+   // threads a fair chance at getting it.  
+   assign swl_dec_divbusy_e = true_divbusy_e | dbusy_d3 | dbusy_d1 | dbusy_d2;
+
+   dffr_s #(4) fpb_ff(.din (fp_busy_d),
+		                .q   (fp_busy_e),
+		                .clk (clk),
+		                .rst (dtu_reset),
+		                .se  (se), .si(), .so());
+   assign true_fpbusy_e = (|fp_busy_e[3:0]);
+   assign fbusy_d0 = true_fpbusy_e & fp_wait_any;
+   
+   assign fbusy_nxt_d = (|fp_busy_d[3:0]) | fbusy_d0 | fbusy_d1 | fbusy_d2;
+   dffr_s #(1) tfbe_ff(.din (fbusy_nxt_d),
+                     .q   (fbusy_crit_e),
+                     .clk (clk), 
+                     .rst (dtu_reset), .se(se), .si(), .so());
+   
+   // block shared resource for two extra cycles, to allow waiting
+   // threads a fair chance at getting it.  
+   assign swl_dec_fpbusy_e = fbusy_crit_e;
+   assign fpbusy_local_e = true_fpbusy_e | fbusy_d3 | fbusy_d1 | fbusy_d2;
+
+   dff_s #(3) bd1_reg(.din ({mbusy_d0, dbusy_d0, fbusy_d0}),
+                    .q   ({mbusy_d1, dbusy_d1, fbusy_d1}),
+                    .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(3) bd2_reg(.din ({mbusy_d1, dbusy_d1, fbusy_d1}),
+                    .q   ({mbusy_d2, dbusy_d2, fbusy_d2}),
+                    .clk (clk), .se(se), .si(), .so());
+   
+   dff_s #(3) bd3_reg(.din ({mbusy_d2, dbusy_d2, fbusy_d2}),
+                    .q   ({mbusy_d3, dbusy_d3, fbusy_d3}),
+                    .clk (clk), .se(se), .si(), .so());
+   
+   // ifetch errors
+   // If there was an error in the ifetch call back this instruction
+   assign iferr_d = erb_dtu_ifeterr_d1 & same_thr_de;
+   assign iferr_s = erb_dtu_ifeterr_d1 & same_thr_fe;
+   assign retract_iferr_d =  erb_dtu_ifeterr_d1 & fcl_dtu_inst_vld_d & 
+                             same_thr_de;
+
+   // mul_inst is already already qualified with inst_vld
+   // so is fpop
+   // don't set retract if there is an iferr, since this will cancel
+   // the pending imisses!  
+   assign dtu_fcl_retract_d = ((dec_swl_mul_inst_d & swl_dec_mulbusy_e | 
+			                          dec_swl_div_inst_d & swl_dec_divbusy_e |
+			                          dec_swl_allfp_d & fpbusy_local_e) &
+                                // & swl_dec_fp_enable_d 
+                               fcl_dtu_inst_vld_d |
+                               retract_store_d  
+//                             |  retract_iferr_d
+                               );
+
+   // what does this do???
+   // no need to stall after retract since thread is already switched out
+//   assign retract_stall_d = (dec_swl_mul_inst_d & swl_dec_mulbusy_e | 
+//			                       dec_swl_div_inst_d & swl_dec_divbusy_e |
+//			                       dec_swl_allfp_d & swl_dec_fp_enable_d & 
+//                             fpbusy_local_e);
+
+   assign mul_wait_nxt = ({4{dec_swl_mul_inst_d & swl_dec_mulbusy_e &
+                             fcl_dtu_inst_vld_d & ~iferr_d}} &
+			                 thr_d  & ~rt_st_thr_e | mul_done |   // set
+			                 mul_wait & ~retr_thr_wakeup  & ~killed_uniop_done_e) & 
+			                 (~trp_no_retr);
+
+   dffr_s #(4) mw_ff(.din (mul_wait_nxt[3:0]),
+		               .q   (mul_wait[3:0]),
+		               .clk (clk),
+		               .rst (dtu_reset),
+		               .se(se), .si(), .so());
+   assign mul_wait_any = (|mul_wait[3:0]);
+   
+   assign div_wait_nxt =  ({4{dec_swl_div_inst_d & swl_dec_divbusy_e &
+                              fcl_dtu_inst_vld_d & ~iferr_d}} & 
+                        thr_d  & ~rt_st_thr_e | div_done | // set 
+			                  div_wait & ~retr_thr_wakeup & ~killed_uniop_done_e) & 
+			                  (~trp_no_retr);
+   
+   dffr_s #(4) dw_ff(.din (div_wait_nxt[3:0]),
+		               .q   (div_wait[3:0]),
+		               .clk (clk),
+		               .rst (dtu_reset),
+		               .se(se), .si(), .so());
+   assign div_wait_any = (|div_wait[3:0]);
+
+   assign fp_wait_nxt =  ({4{dec_swl_allfp_d & // swl_dec_fp_enable_d & 
+                             fcl_dtu_inst_vld_d & fpbusy_local_e &
+                             ~iferr_d}} &
+			                    thr_d  & ~rt_st_thr_d & ~rt_st_thr_e |
+                          fp_done |    // set 
+			                    fp_wait & ~retr_thr_wakeup & ~killed_uniop_done_e) & 
+			                     (~trp_no_retr);
+   
+   dffr_s #(4) fw_ff(.din (fp_wait_nxt[3:0]),
+		               .q   (fp_wait[3:0]),
+		               .clk (clk),
+		               .rst (dtu_reset),
+		               .se(se), .si(), .so());
+   assign fp_wait_any = (|fp_wait[3:0]);
+
+   // wake up waiting threads when the unit is no longer busy
+   // need to qual with trp_no_retr since trp can occur at the same
+   // time as unit becoming unbusy.  
+   assign mul_wake = mul_wait & {4{~true_mulbusy_e}} & ~trp_no_retr;
+   assign div_wake = div_wait & {4{~true_divbusy_e}} & ~trp_no_retr;
+   assign fp_wake  = fp_wait & {4{~true_fpbusy_e}} & ~trp_no_retr;
+			      
+//   assign retr_thr_wakeup = (mul_wait & {4{~swl_dec_mulbusy_e}} |
+//	                     div_wait & {4{~swl_dec_divbusy_e}} | 
+//	                     fp_wait & {4{~fpbusy_local_e}} | 
+//			     wm_stbwait & stb_retry);
+
+   assign retr_thr_wakeup = mul_wake | div_wake | fp_wake;
+//	                  |  (wm_stbwait & stb_retry & ~wm_other);
+   
+   assign fp_thr[0] = ~ffu_ifu_tid_w2[1] & ~ffu_ifu_tid_w2[0];
+   assign fp_thr[1] = ~ffu_ifu_tid_w2[1] &  ffu_ifu_tid_w2[0];
+   assign fp_thr[2] =  ffu_ifu_tid_w2[1] & ~ffu_ifu_tid_w2[0];
+   assign fp_thr[3] =  ffu_ifu_tid_w2[1] &  ffu_ifu_tid_w2[0];
+
+   // Delay mul div completion to prevent one thread from hogging mul and div
+   assign mul_done = exu_ifu_longop_done_g & mul_busy_e & {4{mul_wait_any}};
+   assign div_done = exu_ifu_longop_done_g & div_busy_e & {4{div_wait_any}};
+   assign fp_done = fp_thr & {4{ffu_ifu_fpop_done_w2}} & {4{fp_wait_any}};   
+
+   assign fp_thrrdy = fp_thr & {4{ffu_ifu_fpop_done_w2}} & {4{~fp_wait_any}};
+   
+   // don't complete if another mul/div is waiting
+   assign exu_lop_done = (exu_ifu_longop_done_g & 
+			                    (~mul_busy_e | {4{~mul_wait_any}}) &
+			                    (~div_busy_e | {4{~div_wait_any}}));
+
+   // TBD:
+   // 1.  Put in similar wakeup sequence for fp completion -- done
+   // 2.  Is it worth doing a round robin wakeup when a deadlock problem
+   //     exists even in that case? -- no need with lru scheduler
+
+   
+   //--------------------------
+   // Store buffer flow control
+   //--------------------------
+   // store pipe
+   assign st_inst_qual_d = dec_swl_st_inst_d & fcl_dtu_inst_vld_d;
+   dff_s ste_ff(.din (st_inst_qual_d),
+	            .q   (st_inst_e),
+	            .clk (clk),
+	            .se  (se), .si(), .so());
+   assign st_inst_qual_e = st_inst_e & ~dtu_inst_anull_e;
+
+   dff_s stm_ff(.din (st_inst_qual_e),
+	            .q   (st_inst_m),
+	            .clk (clk),
+	            .se  (se), .si(), .so());
+   dff_s stg_ff(.din (st_inst_m),
+	            .q   (st_inst_g),
+	            .clk (clk),
+	            .se  (se), .si(), .so());
+
+//   assign st_inst_qual_g = st_inst_g & ifu_tlu_inst_vld_w;
+//   dff stw2_ff(.din (st_inst_qual_g),
+//	             .q   (st_inst_w2),
+//	             .clk (clk),
+//               .se  (se), .si(), .so());
+
+   // determine which of the above thread is to the D thread
+   assign same_thr_de = (thr_d[0] & thr_e[0] |
+			                   thr_d[1] & thr_e[1] |
+			                   thr_d[2] & thr_e[2] |
+			                   thr_d[3] & thr_e[3]);
+   assign same_thr_dg = (thr_d[0] & thr_w[0] |
+			                   thr_d[1] & thr_w[1] |
+			                   thr_d[2] & thr_w[2] |
+			                   thr_d[3] & thr_w[3]);
+
+   assign same_thr_fd = (thr_f[0] & thr_d[0] |
+			                   thr_f[1] & thr_d[1] |
+			                   thr_f[2] & thr_d[2] |
+			                   thr_f[3] & thr_d[3]);
+   assign same_thr_fe = (thr_f[0] & thr_e[0] |
+			                   thr_f[1] & thr_e[1] |
+			                   thr_f[2] & thr_e[2] |
+			                   thr_f[3] & thr_e[3]);
+   assign same_thr_fm = (thr_f[0] & thr_m[0] |
+			                   thr_f[1] & thr_m[1] |
+			                   thr_f[2] & thr_m[2] |
+			                   thr_f[3] & thr_m[3]);
+   assign same_thr_fg = (thr_f[0] & thr_w[0] |
+			                   thr_f[1] & thr_w[1] |
+			                   thr_f[2] & thr_w[2] |
+			                   thr_f[3] & thr_w[3]);
+
+   assign pipe_st_e = same_thr_fe & st_inst_e;
+   assign pipe_st_m = same_thr_fm & st_inst_m;
+   assign pipe_st_g = same_thr_fg & st_inst_g;
+   assign pipe_st_d = same_thr_fd & st_inst_qual_d;
+
+   dff_s #(1) pste_ff(.din (pipe_st_d),
+                    .q   (st_thisthr_e),
+                    .clk (clk), .se(se), .si(), .so());
+
+   // count the number of stores in the pipe to this thread (0-4)
+   assign pipe_st_cnt_ge1 = pipe_st_e | pipe_st_m | pipe_st_g;
+//	                          pipe_st_w2;
+
+   assign pipe_st_cnt_ge2 = (pipe_st_e & pipe_st_m |
+			                       pipe_st_e & pipe_st_g |
+                             pipe_st_m & pipe_st_g);
+
+//   assign pipe_st_cnt_ge2 = (pipe_st_e & pipe_st_m |
+//			                       pipe_st_e & pipe_st_g |
+//			                       pipe_st_e & pipe_st_w2 |
+//			                       pipe_st_m & pipe_st_g |
+//			                       pipe_st_m & pipe_st_w2 |
+//	                           pipe_st_g & pipe_st_w2);
+
+   assign pipe_st_cnt_ge3 = (pipe_st_e & pipe_st_m & pipe_st_g);
+//			                       pipe_st_e & pipe_st_m & pipe_st_w2 |
+//			                       pipe_st_e & pipe_st_g & pipe_st_w2 |
+//			                       pipe_st_m & pipe_st_g & pipe_st_w2);
+
+//   assign pipe_st_cnt_eq4 = pipe_st_e & pipe_st_m & pipe_st_g & 
+//	                          pipe_st_w2;
+
+   dff_s #(3) pstc_reg(.din ({pipe_st_cnt_ge1,
+                            pipe_st_cnt_ge2,
+                            pipe_st_cnt_ge3}),
+                     .q   ({dst_cnt_ge1,
+                            dst_cnt_ge2,
+                            dst_cnt_ge3}),
+                     .clk (clk), .se(se), .si(), .so());
+   
+   // get the number of taken store buffer entries to this thread
+   mux4ds #(4) stbcnt_mux(.dout (stbcnt_s),
+			                    .in0  (lsu_ifu_stbcnt0),
+			                    .in1  (lsu_ifu_stbcnt1),
+			                    .in2  (lsu_ifu_stbcnt2),
+			                    .in3  (lsu_ifu_stbcnt3),
+			                    .sel0 (thr_f[0]),
+			                    .sel1 (thr_f[1]),
+			                    .sel2 (thr_f[2]),
+			                    .sel3 (thr_f[3]));
+
+   dff_s #(4) stbd_reg(.din (stbcnt_s),
+		                 .q   (stbcnt_d),
+		                 .clk (clk),
+		                 .se  (se), .si(), .so());
+
+   assign all_dst_ge1 = dst_cnt_ge1 | st_thisthr_e;
+   assign all_dst_ge2 = dst_cnt_ge1 & st_thisthr_e | dst_cnt_ge2;
+   assign all_dst_ge3 = dst_cnt_ge2 & st_thisthr_e | dst_cnt_ge3;
+   assign all_dst_eq4 = dst_cnt_ge3 & st_thisthr_e;
+
+   // switch if taken entries + stores in pipe >= 8
+   assign switch_store_d = stbcnt_d[3] & fcl_dtu_inst_vld_d | // 8
+	            dec_swl_st_inst_d & fcl_dtu_inst_vld_d & 
+	            (stbcnt_d[2] & stbcnt_d[1] & stbcnt_d[0] | // 7
+		           stbcnt_d[2] & stbcnt_d[1] & all_dst_ge1 | // 6 + 1
+		           stbcnt_d[2] & stbcnt_d[0] & all_dst_ge2 | // 5 + 2
+		           stbcnt_d[2]               & all_dst_ge3 | // 4 + 3
+		           stbcnt_d[1] & stbcnt_d[0] & all_dst_eq4); // 3 + 4
+
+   assign stb_stall = {4{switch_store_d}} & thr_d;
+   assign stb_blocked = {lsu_ifu_stbcnt3[3], lsu_ifu_stbcnt2[3],
+			                   lsu_ifu_stbcnt1[3], lsu_ifu_stbcnt0[3]};
+
+   dff_s #(4) stbb_reg(.din (stb_blocked),
+		                 .q   (stb_blocked_d1),
+		                 .clk (clk), .se(se), .si(), .so());
+   
+   // retract this thread if taken entries + stores in pipe >= 9
+   assign retract_store_d = dec_swl_st_inst_d & fcl_dtu_inst_vld_d &
+	        (stbcnt_d[3] | // 8
+	         stbcnt_d[2] & stbcnt_d[1] & stbcnt_d[0] & all_dst_ge1 | // 7 + 1
+	         stbcnt_d[2] & stbcnt_d[1] & all_dst_ge2 |  // 6 + 2
+	         stbcnt_d[2] & stbcnt_d[0] & all_dst_ge3 |  // 5 + 3
+	         stbcnt_d[2] & all_dst_eq4);                // 4 + 4
+
+   // remember if we retracted a store so that we can clear wmo in 
+   // the next cycle
+   assign retract_st_next_d = (retract_store_d | retract_iferr_d) & 
+                              ~(same_thr_dg & rollback_g) & 
+                              ~trp_noretr_d;
+   
+   dff_s #(1) retr_se(.din (retract_st_next_d),
+		                .q   (retract_store_e),
+		                .clk (clk), .se (se), .si(), .so());
+
+   // clear wmo if you set it already
+   assign clear_wmo_e = retract_store_e & (swc_d & same_thr_de | swc_e);
+// assign clear_wmo_e = retract_store_e;   
+
+
+   // mark a switched out thread for wakeup
+//   assign stb_wait_nxt = ({4{switch_store_d}} & thr_d & ~rb_thr_w |   // set
+//			                    wm_stbwait & ~stb_retry 
+//  			                ~(thr_d & {4{swc_d}}) & 
+//			                    ~(thr_e & {4{dec_swl_sta_inst_e & 
+//				                               lsu_ifu_ldsta_internal_e}})   // reset
+//			                    ) & ~trp_no_retr;  // this reset wins
+
+//   assign stb_wait_nxt = ({4{switch_store_d}} & thr_d & ~rb_thr_w |   // set
+//			                    wm_stbwait & ~stb_retry) & ~trp_no_retr;
+
+   assign stb_wait_nxt = ({4{switch_store_d}} & thr_d |   // set
+			                    wm_stbwait & ~stb_retry);
+   
+   dffr_s #(4) stbw_reg(.din (stb_wait_nxt),
+		                  .q   (wm_stbwait),
+		                  .clk (clk),
+		                  .rst (dtu_reset),
+		                  .se  (se), .si(), .so());
+
+   // count to 4 and retry 
+   dff_s stbrete_ff(.din (switch_store_d),
+		              .q   (sw_st_e),
+		              .clk (clk),
+		              .se  (se), .si(), .so());
+   dff_s stbretm_ff(.din (sw_st_e),
+		              .q   (sw_st_m),
+		              .clk (clk),
+		              .se  (se), .si(), .so());
+   dff_s stbretg_ff(.din (sw_st_m),
+		              .q   (sw_st_g),
+		              .clk (clk),
+		              .se  (se), .si(), .so());
+   dff_s stbretw2_ff(.din (sw_st_g),
+		               .q   (sw_st_w2),
+		               .clk (clk),
+		               .se  (se), .si(), .so());
+//   assign stb_retry = {4{sw_st_w2}} & st_thr_w2 & ~stb_blocked;
+
+   assign st_in_pipe = ({4{sw_st_e}} & thr_e |
+			                  {4{sw_st_m}} & thr_m |
+			                  {4{sw_st_g}} & thr_w |
+			                  {4{sw_st_w2}} & st_thr_w2);
+
+   // don't really need to AND with wm_stbwait with current logic, but
+   // for future use, this is left as is 
+   assign stb_retry = ~stb_blocked_d1 & ~st_in_pipe & wm_stbwait;
+
+
+   //
+   // Quad Stores
+   //
+   dff_s #(1) stde_ff(.din (dec_swl_std_inst_d),
+		                .q   (std_inst_e),
+		                .clk (clk), .se(se), .si(), .so());
+
+//   assign stq_inst_e = std_inst_e & lsu_ifu_quad_asi_e & fcl_dtu_inst_vld_e;
+   assign std_done_e = std_inst_e & ~lsu_ifu_quad_asi_e & fcl_dtu_inst_vld_e;
+   dff_s #(1) stdm_ff(.din (std_done_e),
+		                .q   (std_done_m),
+		                .clk (clk), .se(se), .si(), .so());
+   
+//   dff #(1) stqm_ff(.din (stq_inst_e),
+//		                .q   (stq_inst_m),
+//		                .clk (clk), .se(se), .si(), .so());
+//   dff #(1) stqw_ff(.din (stq_inst_m),
+//		                .q   (stq_inst_w),
+//		                .clk (clk), .se(se), .si(), .so());
+//   dff #(1) stqw2_ff(.din (stq_inst_w),
+//		                 .q   (stq_inst_w2),
+//		                 .clk (clk), .se(se), .si(), .so());
+
+//   assign stq_in_pipe = ({4{stq_inst_m}} & thr_m |
+//			                   {4{stq_inst_w}} & thr_w |
+//			                   {4{stq_inst_w2}} & st_thr_w2);
+
+//   assign stq_busy = (stq_in_pipe | lsu_ifu_stq_busy);
+//   assign stq_wait_next = thr_e & {4{stq_inst_e}} | 
+//                 			    stq_wait & stq_busy;
+   
+//   dffr #(4) stqwait_reg(.din (stq_wait_next),
+//		                     .q   (stq_wait),
+//		                     .rst (dtu_reset),
+//		                     .clk (clk), .se(se), .si(), .so());
+//
+//   assign stq_done_thr = stq_wait & ~stq_busy | thr_m & {4{std_done_m}};
+   
+   
+   //-----------------------------
+   // FPRS
+   //-----------------------------
+   dff_s #(3) wrtd_w_reg(.din (thr_config_in_m[2:0]),
+                        .q   (thr_config_in_w[2:0]),
+                        .clk (clk), .se(se), .si(), .so());
+
+   dff_s #(3) wrtd_w2_reg(.din (thr_config_in_w[2:0]),
+                        .q   (thr_config_in_w2[2:0]),
+                        .clk (clk), .se(se), .si(), .so());
+   
+   assign fprs_wrt_data = thr_config_in_w2;
+   mux3ds #(3) fprs_mx0(.dout (fprs0_nxt),
+		                    .in0  (fprs_wrt_data),
+		                    .in1  (fprs0),
+		                    .in2  ({fprs0[2], new_fprs[1:0]}),
+		                    .sel0 (fprs_sel_wrt[0]),
+		                    .sel1 (fprs_sel_old[0]),
+		                    .sel2 (fprs_sel_set[0]));
+   mux3ds #(3) fprs_mx1(.dout (fprs1_nxt),
+		                    .in0  (fprs_wrt_data),
+		                    .in1  (fprs1),
+		                    .in2  ({fprs1[2], new_fprs[1:0]}),
+		                    .sel0 (fprs_sel_wrt[1]),
+		                    .sel1 (fprs_sel_old[1]),
+		                    .sel2 (fprs_sel_set[1]));
+   mux3ds #(3) fprs_mx2(.dout (fprs2_nxt),
+		                    .in0  (fprs_wrt_data),
+		                    .in1  (fprs2),
+		                    .in2  ({fprs2[2], new_fprs[1:0]}),
+		                    .sel0 (fprs_sel_wrt[2]),
+		                    .sel1 (fprs_sel_old[2]),
+		                    .sel2 (fprs_sel_set[2]));
+   mux3ds #(3) fprs_mx3(.dout (fprs3_nxt),
+		                    .in0  (fprs_wrt_data),
+		                    .in1  (fprs3),
+		                    .in2  ({fprs3[2], new_fprs[1:0]}),
+		                    .sel0 (fprs_sel_wrt[3]),
+		                    .sel1 (fprs_sel_old[3]),
+		                    .sel2 (fprs_sel_set[3]));
+
+   // make resettable for now.  Eventually change to non-reset
+   // Done
+   dff_s #(3) t0_fprs(.din (fprs0_nxt),
+		                 .q   (fprs0),
+//		                 .rst (dtu_reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dff_s #(3) t1_fprs(.din (fprs1_nxt),
+		                 .q   (fprs1),
+//		                 .rst (dtu_reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dff_s #(3) t2_fprs(.din (fprs2_nxt),
+		                 .q   (fprs2),
+//		                 .rst (dtu_reset),
+		                 .clk (clk), .se(se), .si(), .so());
+   dff_s #(3) t3_fprs(.din (fprs3_nxt),
+		                 .q   (fprs3),
+//		                 .rst (dtu_reset),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   assign fprs_en_s = {fprs3[2],fprs2[2],fprs1[2],fprs0[2]};
+   assign fpen_vec_s = (tlu_ifu_pstate_pef & fprs_en_s & thr_f);
+   assign fpen_s = (|fpen_vec_s[3:0]);
+   dff_s #(1) fpend_ff(.din (fpen_s),
+		                 .q   (swl_dec_fp_enable_d),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   // unprotected since synopsys does not use one hot mux
+   mux4ds #(3) curr_fprs_mx(.dout (fprs_d),
+			                      .in0  (fprs0),
+			                      .in1  (fprs1),
+			                      .in2  (fprs2),
+			                      .in3  (fprs3),
+			                      .sel0 (thr_d[0]),
+			                      .sel1 (thr_d[1]),
+			                      .sel2 (thr_d[2]),
+			                      .sel3 (thr_d[3]));
+
+   dff_s #(3) fprse_reg(.din (fprs_d),
+		                  .q   (fprs_e),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign new_fprs[1] = dec_swl_frf_upper_d | fprs_d[1];
+   assign new_fprs[0] = dec_swl_frf_lower_d | fprs_d[0];
+
+   // writes to fprs are done by software
+   assign wrt_fprs_w = ifu_tlu_inst_vld_w & dec_swl_wrtfprs_w &
+			                 ~flush_all_w;
+
+   dff_s #(1) fpwr_ff(.din (wrt_fprs_w),
+                    .q   (wrt_fprs_w2),
+                    .clk (clk), .se(se), .si(), .so());
+   
+   assign sel_wrt = st_thr_w2 & {4{wrt_fprs_w2}};
+   assign fprs_sel_set = thr_d & {4{dec_swl_fpop_d & swl_dec_fp_enable_d &
+                                    fcl_dtu_inst_vld_d}};
+   assign fprs_sel_wrt = ~fprs_sel_set & sel_wrt;
+   assign fprs_sel_old = ~sel_wrt & ~fprs_sel_set;
+
+   sink #(52) s0(.in (thrconf_out_e));
+   
+   
+endmodule // sparc_ifu_swl
+
+// Local Variables:
+// verilog-library-directories:("../../rtl" ".")
+// End:
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_sscan.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_sscan.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_sscan.v	(revision 6)
@@ -0,0 +1,69 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_sscan.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_ifu_sscan( ctu_sscan_snap, ctu_sscan_se, ctu_tck, lsu_sscan_test_data, 
+tlu_sscan_test_data, swl_sscan_thrstate, ifq_sscan_test_data, sparc_sscan_so, rclk, si, so, se);
+
+input ctu_sscan_snap;
+input ctu_sscan_se;
+input ctu_tck;
+input si;
+input se;
+input [10:0] swl_sscan_thrstate;
+input [3:0] ifq_sscan_test_data;
+input [15:0] lsu_sscan_test_data;
+input [62:0] tlu_sscan_test_data;
+input rclk;
+
+output sparc_sscan_so;
+output so;
+
+//////////////////////////////////////////////////////////////////
+
+wire snap_f;
+wire [93:0] snap_data, snap_data_f, snap_data_ff;
+
+`ifdef CONNECT_SHADOW_SCAN
+wire [92:0] sscan_shift_data;
+`endif
+
+////////
+
+dff_s #(1) snap_inst0(.q(snap_f), .din(ctu_sscan_snap), .clk(rclk), .se(se), .si(), .so());
+
+assign snap_data = {ifq_sscan_test_data, tlu_sscan_test_data, lsu_sscan_test_data, swl_sscan_thrstate};
+
+dffe_s #(94) snap_inst1(.q(snap_data_f), .din(snap_data), .clk(rclk), .en(snap_f), .se(se), .si(), .so());
+
+`ifdef CONNECT_SHADOW_SCAN
+dff_sscan #(94) snap_inst2(.q(snap_data_ff), .din(snap_data_f), .clk(ctu_tck), .se(ctu_sscan_se), 
+		     .si({sscan_shift_data, 1'b0}),
+		     .so({sparc_sscan_so, sscan_shift_data}));
+`else
+dff_s #(94) snap_inst2(.q(snap_data_ff), .din(snap_data_f), .clk(ctu_tck), .se(ctu_sscan_se), 
+		     .si(), .so());
+
+assign sparc_sscan_so = 1'b0;
+`endif
+
+sink #(94) s0(.in (snap_data_ff));
+   
+
+endmodule     
Index: /trunk/T1-CPU/ifu/sparc_ifu_imd.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_imd.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_imd.v	(revision 6)
@@ -0,0 +1,243 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_imd.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_imd
+//  Description:	
+//  Contains the immediate operand datapath.  Has two outputs:  The
+//  simm data to the EXU and the branch offset to the IFU.
+*/
+
+
+module sparc_ifu_imd(/*AUTOARG*/
+   // Outputs
+   ifu_exu_imm_data_d, dtu_inst_d, ifu_exu_rd_d, ifu_lsu_rd_e, 
+   ifu_lsu_imm_asi_d, ifu_tlu_imm_asi_d, ifu_lsu_imm_asi_vld_d, ifu_tlu_sraddr_d, 
+   ifu_tlu_sraddr_d_v2, imd_dcl_brcond_d, imd_dcl_mvcond_d, 
+   imd_dcl_abit_d, so, ifu_ffu_frs1_d, ifu_ffu_frs2_d, ifu_ffu_frd_d, 
+   ifu_ffu_fpopcode_d, ifu_ffu_fcc_num_d, 
+   // Inputs
+   rclk, se, si, fdp_dtu_inst_s, fcl_imd_oddwin_d, 
+   dcl_imd_immdata_sel_simm13_d_l, dcl_imd_immdata_sel_movcc_d_l, 
+   dcl_imd_immdata_sel_sethi_d_l, dcl_imd_immdata_sel_movr_d_l, 
+   dcl_imd_broff_sel_call_d_l, dcl_imd_broff_sel_br_d_l, 
+   dcl_imd_broff_sel_bcc_d_l, dcl_imd_broff_sel_bpcc_d_l, 
+   dcl_imd_immbr_sel_br_d, dcl_imd_call_inst_d
+   );
+
+   input rclk, 
+         se, 
+         si;
+
+   input [31:0] fdp_dtu_inst_s;          // instruction from fetch
+
+   input        fcl_imd_oddwin_d;       // are we in an even or odd window
+   
+   input        dcl_imd_immdata_sel_simm13_d_l,  // imm data selects
+		            dcl_imd_immdata_sel_movcc_d_l,
+		            dcl_imd_immdata_sel_sethi_d_l,
+		            dcl_imd_immdata_sel_movr_d_l;
+   
+   input        dcl_imd_broff_sel_call_d_l,      // dir branch offset select
+		            dcl_imd_broff_sel_br_d_l,
+		            dcl_imd_broff_sel_bcc_d_l,
+		            dcl_imd_broff_sel_bpcc_d_l;
+
+   input        dcl_imd_immbr_sel_br_d;  // use branch offset or simm data
+   input        dcl_imd_call_inst_d;
+   
+   output [31:0] ifu_exu_imm_data_d;      // imm data to EXU
+   output [31:0] dtu_inst_d;              // D stage inst to DEC
+
+   output [4:0]  ifu_exu_rd_d,
+		             ifu_lsu_rd_e;
+   
+   output [7:0]  ifu_lsu_imm_asi_d;       // ASI for ldA and stA
+   output [8:0]  ifu_tlu_imm_asi_d;       // ASI for ldA and stA
+   output        ifu_lsu_imm_asi_vld_d;
+   output [6:0]  ifu_tlu_sraddr_d;
+   output [6:0]  ifu_tlu_sraddr_d_v2;
+   output [3:0]  imd_dcl_brcond_d;
+   output [7:0]  imd_dcl_mvcond_d;
+
+   output        imd_dcl_abit_d;         // anull bit for cond branch
+
+   output        so;
+
+   output [4:0]  ifu_ffu_frs1_d,
+		             ifu_ffu_frs2_d,
+		             ifu_ffu_frd_d;
+
+   output [8:0]  ifu_ffu_fpopcode_d;
+   output [1:0]  ifu_ffu_fcc_num_d;
+	 
+
+//-----------------------------------
+// Declaration of local signals
+//----------------------------------
+   wire [4:0]  sraddr5;
+
+   wire [31:0] imm_data_d;       // imm data 
+
+   wire [31:0] dtu_inst_d,
+		           simm13,
+		           simm11,
+		           simm10,
+		           imm22,
+		           dbr16,
+		           dbcc22_nopred,
+		           dbcc19_pred,
+		           dcall,
+		           broffset_d;
+   
+   wire        clk, ifu_lsu_imm_asi_vld_f;
+   
+   
+//----------------------------------------------------------------------
+// Code starts here 
+//----------------------------------------------------------------------
+   assign      clk = rclk;
+   
+   //--------
+   // S Stage
+   // Contains mostly random logic to help with decode in D stage
+   //--------
+   
+   // Regfile operations:
+   // REMOVED
+//   assign ifu_exu_rs1_s = fdp_dtu_inst_s[18:14] ^ 
+//			  {{fdp_dtu_inst_s[17] & dcl_imd_oddwin_s},  4'b0000};
+
+//   assign ifu_exu_rs2_s = fdp_dtu_inst_s[4:0] ^ 
+//			  {{fdp_dtu_inst_s[3] & dcl_imd_oddwin_s},  4'b0000};
+
+//   assign ifu_exu_rs3_s = fdp_dtu_inst_s[29:25] ^ 
+//			  {{fdp_dtu_inst_s[28] & dcl_imd_oddwin_s},  4'b0000};
+
+//   assign imd_dcl_op_s = fdp_dtu_inst_s[31:30];
+//   assign imd_dcl_op3_s = fdp_dtu_inst_s[24:19];
+   
+   //--------
+   // D stage
+   // Contains the immediate data and branch offset muxes
+   //--------
+   
+   dff_s #(32) inst_d_reg(.din  (fdp_dtu_inst_s),
+		      .clk  (clk),
+		      .q    (dtu_inst_d),
+		      .se   (se), .si(), .so());
+
+   dff_s #(1) ifu_lsu_imm_asi_inst(.din  (fdp_dtu_inst_s[13]),
+                      .clk  (clk),
+                      .q    (ifu_lsu_imm_asi_vld_f),
+                      .se   (se), .si(), .so());
+
+   assign imd_dcl_abit_d = dtu_inst_d[29];
+
+   // imm data select
+   // sext12:0 -- add/sub/and/or/xor/taggedOP/jmpl/ld/store/atomic/div/mul/popc
+   //             prefetch/return/restore/save/sir/wr/shft/flush
+   //  !!!CAS does not use Imm data!!!
+   //
+   // sext10:0 -- movcc
+   // sext9:0  -- movr
+   // 21:0,10'b0 -- sethi
+
+   assign simm13 = {{19{dtu_inst_d[12]}},dtu_inst_d[12:0]};
+   assign simm11 = {{21{dtu_inst_d[10]}},dtu_inst_d[10:0]};
+   assign simm10 = {{22{dtu_inst_d[9]}},dtu_inst_d[9:0]};
+   assign imm22  = {dtu_inst_d[21:0], 10'b0};
+
+   dp_mux4ds  #(32) immdata_mux(.dout (imm_data_d),
+			  .in0  (simm13),
+			  .in1  (simm11),
+			  .in2  (simm10),
+			  .in3  (imm22),
+			  .sel0_l (dcl_imd_immdata_sel_simm13_d_l),
+			  .sel1_l (dcl_imd_immdata_sel_movcc_d_l),
+			  .sel2_l (dcl_imd_immdata_sel_movr_d_l),
+			  .sel3_l (dcl_imd_immdata_sel_sethi_d_l));
+
+
+   // branch offset select
+   assign dbr16 = {{14{dtu_inst_d[21]}}, dtu_inst_d[21:20], 
+		   dtu_inst_d[13:0], 2'b0};
+   assign dbcc22_nopred = {{8{dtu_inst_d[21]}}, dtu_inst_d[21:0], 2'b0};
+   assign dbcc19_pred = {{11{dtu_inst_d[18]}}, dtu_inst_d[18:0], 2'b0};
+   assign dcall = {dtu_inst_d[29:0], 2'b0};
+
+   dp_mux4ds  #(32) broffset_mux(.dout   (broffset_d[31:0]),
+			   .in0    (dcall[31:0]),          // call
+			   .in1    (dbr16[31:0]),          // br on reg
+			   .in2    (dbcc22_nopred[31:0]),  // branch w/o pred
+			   .in3    (dbcc19_pred[31:0]),    // branch w/ pred
+			   .sel0_l (dcl_imd_broff_sel_call_d_l),
+			   .sel1_l (dcl_imd_broff_sel_br_d_l),
+			   .sel2_l (dcl_imd_broff_sel_bcc_d_l),
+			   .sel3_l (dcl_imd_broff_sel_bpcc_d_l));
+
+   dp_mux2es #(32) immbr_mux(.dout (ifu_exu_imm_data_d[31:0]),
+			   .in0  (imm_data_d[31:0]),
+			   .in1  (broffset_d[31:0]),
+			   .sel  (dcl_imd_immbr_sel_br_d));
+
+   // branch/move condition to dcl
+   assign imd_dcl_brcond_d = dtu_inst_d[28:25];
+   assign imd_dcl_mvcond_d = dtu_inst_d[17:10];
+
+   // if call instruction set rd = 0f (15)
+   assign ifu_exu_rd_d[3:0] = dtu_inst_d[28:25] | {4{dcl_imd_call_inst_d}};
+   assign ifu_exu_rd_d[4] = (dtu_inst_d[29] & ~dcl_imd_call_inst_d) ^
+			      (ifu_exu_rd_d[3] & fcl_imd_oddwin_d);
+   
+   dff_s #(5) rde_ff(.din (ifu_exu_rd_d[4:0]),
+		 .clk (clk),
+		 .q   (ifu_lsu_rd_e[4:0]),
+		 .se  (se), .si(), .so());
+
+   // read/write pr and read/write sr
+   dp_mux2es #(5)  sraddr_mux(.dout (sraddr5[4:0]),
+			    .in0  (dtu_inst_d[18:14]),  // rs1 for rdpr
+			    .in1  (dtu_inst_d[29:25]),  // rd  for wrpr
+			    .sel  (dtu_inst_d[23]));
+
+   assign ifu_tlu_sraddr_d = {dtu_inst_d[19],                     // hpriv
+                              {dtu_inst_d[20] & ~dtu_inst_d[19]}, // priv
+                              sraddr5[4:0]};
+   assign ifu_tlu_sraddr_d_v2 = ifu_tlu_sraddr_d;
+   
+
+   // asi fields for stA, ldA
+   // same as fpopcode_d
+
+   assign ifu_lsu_imm_asi_d[7:0] = dtu_inst_d[12:5];
+   assign ifu_tlu_imm_asi_d[8:0] = dtu_inst_d[13:5];
+
+   assign ifu_lsu_imm_asi_vld_d = ~ifu_lsu_imm_asi_vld_f;
+   
+   // fp reg fields
+   assign ifu_ffu_frd_d = dtu_inst_d[29:25];
+   assign ifu_ffu_fcc_num_d = dtu_inst_d[26:25];
+   assign ifu_ffu_frs1_d = dtu_inst_d[18:14];
+   assign ifu_ffu_fpopcode_d = dtu_inst_d[13:5];
+   assign ifu_ffu_frs2_d = dtu_inst_d[4:0];
+   
+endmodule // sparc_ifu_imd
Index: /trunk/T1-CPU/ifu/sparc_ifu_fdp.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_fdp.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_fdp.v	(revision 6)
@@ -0,0 +1,1115 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_fdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:  sparc_ifu_fdp
+//  Description:	
+//    The fdp contains the pc's for all four threads and the PC and
+//    nPC for all pipestages register.  The fetcher also contains two
+//    adders for doing PC + br_offset and PC + 4.
+//    The fdp also holds the last fetched icache data for each thread
+//    and the next instruction register, which has the top half of the
+//    double instruction bundle which is fetched from the icache. 
+*/
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include "ifu.h"
+
+`define NOP            32'h01000000
+`define PO_RESET_PC    48'hfffff0000020
+`define VER_MANUF      16'h003e
+`define VER_IMPL       16'h0023
+`define VER_MAXGL      8'h03
+`define VER_MAXWIN     8'h07
+`define VER_MAXTL      8'h06
+
+//`define VER_MAXTL      {5'b0, fcl_fdp_hprivmode_e, 2'b10}
+//`define VER_IMPL_MASK  24'h002301
+
+//`define VERSION_REG_HPV  {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 5'b0, fcl_fdp_hprivmode_e, 2'b10, `VER_MAXWIN}
+
+//`define VERSION_REG      {`VER_MANUF, `VER_IMPL_MASK, `VER_MAXGL, 8'h06, `VER_MAXWIN}
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module sparc_ifu_fdp(/*AUTOARG*/
+   // Outputs
+   so, fdp_itlb_ctxt_bf, fdp_icd_vaddr_bf, fdp_icv_index_bf, 
+   fdp_erb_pc_f, fdp_dtu_inst_s, ifu_exu_pc_d, ifu_exu_rs1_s, 
+   ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_tlu_pc_m, ifu_tlu_npc_m, 
+   ifu_tlu_pc_oor_e, ifu_exu_pcver_e, fdp_fcl_swc_s2, 
+   fdp_fcl_pc_oor_vec_f, fdp_fcl_pc_oor_e, fdp_fcl_op_s, 
+   fdp_fcl_op3_s, fdp_fcl_ibit_s, 
+   // Inputs
+   rclk, se, si, const_maskid, lsu_t0_pctxt_state, 
+   lsu_t1_pctxt_state, lsu_t2_pctxt_state, lsu_t3_pctxt_state, 
+   exu_ifu_brpc_e, tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2, 
+   tlu_itlb_dmp_nctxt_g, tlu_itlb_dmp_actxt_g, tlu_itlb_tte_tag_w2, 
+   dtu_fdp_thrconf_e, icd_fdp_fetdata_s1, icd_fdp_topdata_s1, 
+   ifq_fdp_fill_inst, fcl_fdp_oddwin_s, fcl_fdp_pcoor_vec_f, 
+   fcl_fdp_pcoor_f, fcl_fdp_mask32b_f, fcl_fdp_addr_mask_d, 
+   fcl_fdp_tctxt_sel_prim, fcl_fdp_usenir_sel_nir_s1, 
+   fcl_fdp_rbinst_sel_inste_s, fcl_fdp_thrtnpc_sel_tnpc_l, 
+   fcl_fdp_thrtnpc_sel_npcw_l, fcl_fdp_thrtnpc_sel_pcf_l, 
+   fcl_fdp_thrtnpc_sel_old_l, fcl_fdp_thr_s1_l, 
+   fcl_fdp_next_thr_bf_l, fcl_fdp_next_ctxt_bf_l, fcl_fdp_thr_s2_l, 
+   fcl_fdp_nirthr_s1_l, fcl_fdp_tpcbf_sel_pcp4_bf_l, 
+   fcl_fdp_tpcbf_sel_brpc_bf_l, fcl_fdp_tpcbf_sel_trap_bf_l, 
+   fcl_fdp_tpcbf_sel_old_bf_l, fcl_fdp_pcbf_sel_swpc_bf_l, 
+   fcl_fdp_pcbf_sel_nosw_bf_l, fcl_fdp_pcbf_sel_br_bf_l, 
+   fcl_fdp_trrbpc_sel_trap_bf_l, fcl_fdp_trrbpc_sel_rb_bf_l, 
+   fcl_fdp_trrbpc_sel_err_bf_l, fcl_fdp_trrbpc_sel_pcs_bf_l, 
+   fcl_fdp_noswpc_sel_tnpc_l_bf, fcl_fdp_noswpc_sel_old_l_bf, 
+   fcl_fdp_noswpc_sel_inc_l_bf, fcl_fdp_nextpcs_sel_pce_f_l, 
+   fcl_fdp_nextpcs_sel_pcd_f_l, fcl_fdp_nextpcs_sel_pcs_f_l, 
+   fcl_fdp_nextpcs_sel_pcf_f_l, fcl_fdp_rdsr_sel_pc_e_l, 
+   fcl_fdp_rdsr_sel_ver_e_l, fcl_fdp_rdsr_sel_thr_e_l, 
+   fcl_fdp_inst_sel_curr_s_l, fcl_fdp_inst_sel_switch_s_l, 
+   fcl_fdp_inst_sel_nir_s_l, fcl_fdp_inst_sel_nop_s_l, 
+   fcl_fdp_tinst_sel_curr_s_l, fcl_fdp_tinst_sel_rb_s_l, 
+   fcl_fdp_tinst_sel_old_s_l, fcl_fdp_tinst_sel_ifq_s_l, 
+   fcl_fdp_dmpthr_l, fcl_fdp_ctxt_sel_dmp_bf_l, 
+   fcl_fdp_ctxt_sel_sw_bf_l, fcl_fdp_ctxt_sel_curr_bf_l
+   );
+
+   input       rclk, 
+	             se,
+	             si;
+
+   input [7:0] const_maskid;
+   
+   input [12:0] lsu_t0_pctxt_state,   // primary context
+		            lsu_t1_pctxt_state,
+		            lsu_t2_pctxt_state,
+		            lsu_t3_pctxt_state;
+
+   //   input 	 exu_ifu_va_oor_e;
+   input [47:0] exu_ifu_brpc_e;        // br address for dir branch
+
+   input [48:0] tlu_ifu_trappc_w2,     // trap/exception PC
+		            tlu_ifu_trapnpc_w2;    // next trap PC
+
+   input        tlu_itlb_dmp_nctxt_g,
+		            tlu_itlb_dmp_actxt_g;
+   
+   input [12:0] tlu_itlb_tte_tag_w2;
+   
+//   input [`IC_IDX_HI:4] ifq_fdp_icindex_bf;   // index + 1 bit for 16B write
+   
+   input [40:0]         dtu_fdp_thrconf_e;
+
+   input [32:0]         icd_fdp_fetdata_s1,    // 4 inst + 4 sw bits
+		                    icd_fdp_topdata_s1;    // next instruction
+   
+   input [32:0]         ifq_fdp_fill_inst;    // icache miss return
+
+   input                fcl_fdp_oddwin_s;
+   input [3:0]          fcl_fdp_pcoor_vec_f;
+   input                fcl_fdp_pcoor_f;
+   input                fcl_fdp_mask32b_f;
+   input                fcl_fdp_addr_mask_d;   
+   input [3:0]          fcl_fdp_tctxt_sel_prim;
+   
+   // 2:1 mux selects
+   input                fcl_fdp_usenir_sel_nir_s1;   // same as usenir_d2
+   input [3:0]          fcl_fdp_rbinst_sel_inste_s;  // rollback 1 or 2 
+
+   input [3:0]          fcl_fdp_thrtnpc_sel_tnpc_l,  // load npc
+	                      fcl_fdp_thrtnpc_sel_npcw_l,
+		                    fcl_fdp_thrtnpc_sel_pcf_l,
+	                      fcl_fdp_thrtnpc_sel_old_l;
+   
+   input [3:0]          fcl_fdp_thr_s1_l;            // s2 thr (64*5 muxes)
+   
+   // other mux selects
+   input [3:0]          fcl_fdp_next_thr_bf_l;  // for thrpc output mux
+   input [3:0]          fcl_fdp_next_ctxt_bf_l; // for ctxt output mux
+
+   input [3:0]          fcl_fdp_thr_s2_l;       // s2 thr (64*5 muxes)
+   input [3:0]          fcl_fdp_nirthr_s1_l;        // same as thr_s1, but protected
+   
+   input [3:0]          fcl_fdp_tpcbf_sel_pcp4_bf_l, // selects for thread PC muxes
+	                      fcl_fdp_tpcbf_sel_brpc_bf_l,
+	                      fcl_fdp_tpcbf_sel_trap_bf_l,
+	                      fcl_fdp_tpcbf_sel_old_bf_l;
+
+   input                fcl_fdp_pcbf_sel_swpc_bf_l,
+	                      fcl_fdp_pcbf_sel_nosw_bf_l,
+	                      fcl_fdp_pcbf_sel_br_bf_l;
+
+   input [3:0]          fcl_fdp_trrbpc_sel_trap_bf_l, 
+	                      fcl_fdp_trrbpc_sel_rb_bf_l,
+	                      fcl_fdp_trrbpc_sel_err_bf_l,	       
+	                      fcl_fdp_trrbpc_sel_pcs_bf_l;
+	 
+   input                fcl_fdp_noswpc_sel_tnpc_l_bf,    // next pc select from trap,
+	                      fcl_fdp_noswpc_sel_old_l_bf,			     
+	                      fcl_fdp_noswpc_sel_inc_l_bf;
+
+   input [3:0]          fcl_fdp_nextpcs_sel_pce_f_l,  
+	                      fcl_fdp_nextpcs_sel_pcd_f_l,
+	                      fcl_fdp_nextpcs_sel_pcs_f_l,  
+	                      fcl_fdp_nextpcs_sel_pcf_f_l;
+   
+   input                fcl_fdp_rdsr_sel_pc_e_l,      
+	                      fcl_fdp_rdsr_sel_ver_e_l,
+	                      fcl_fdp_rdsr_sel_thr_e_l;
+
+   input                fcl_fdp_inst_sel_curr_s_l,       // selects for inst_s2
+	                      fcl_fdp_inst_sel_switch_s_l,
+	                      fcl_fdp_inst_sel_nir_s_l,
+	                      fcl_fdp_inst_sel_nop_s_l;
+   
+   input [3:0]          fcl_fdp_tinst_sel_curr_s_l, // selects for tinst regs
+	                      fcl_fdp_tinst_sel_rb_s_l,	       
+	                      fcl_fdp_tinst_sel_old_s_l,
+	                      fcl_fdp_tinst_sel_ifq_s_l;
+
+   input [3:0]          fcl_fdp_dmpthr_l;
+
+   input                fcl_fdp_ctxt_sel_dmp_bf_l,
+	                      fcl_fdp_ctxt_sel_sw_bf_l,
+	                      fcl_fdp_ctxt_sel_curr_bf_l;
+
+
+   output               so;
+   output [12:0]        fdp_itlb_ctxt_bf;
+   output [47:2]        fdp_icd_vaddr_bf;   // 11:2 is index to ic
+   output [11:5]        fdp_icv_index_bf;   
+   output [47:0]        fdp_erb_pc_f;
+   output [31:0]        fdp_dtu_inst_s;     // 32b inst + switch bit 
+
+   output [47:0]        ifu_exu_pc_d;       // PC for rel branch
+   output [4:0]         ifu_exu_rs1_s,      // reg file read address
+		                    ifu_exu_rs2_s,
+		                    ifu_exu_rs3_s;
+		              
+   output [48:0]        ifu_tlu_pc_m,
+		                    ifu_tlu_npc_m;
+
+   output               ifu_tlu_pc_oor_e;
+   
+   output [63:0]        ifu_exu_pcver_e;    // PCs to different dests.
+
+   output               fdp_fcl_swc_s2;       // tells whether to switch or not
+   output [3:0]         fdp_fcl_pc_oor_vec_f; // PC va hole check
+   output               fdp_fcl_pc_oor_e;
+
+   output [1:0]         fdp_fcl_op_s;
+   output [5:2]         fdp_fcl_op3_s;
+   output               fdp_fcl_ibit_s;
+
+   
+   
+   
+//----------------------------------------------------------------------
+// Declarations
+//----------------------------------------------------------------------
+
+   // local signals
+
+   // Contexts
+   wire [12:0] 	curr_ctxt,
+		            sw_ctxt,
+		            dmp_ctxt,
+		            dmp_ctxt_unq,
+		            dmp_ctxt1,
+		            dmp_ctxt2,
+		            t0_ctxt_bf,
+		            t1_ctxt_bf,
+		            t2_ctxt_bf,
+		            t3_ctxt_bf;
+
+   // PCs
+   wire [48:0]  t0pc_f, t1pc_f, t2pc_f, t3pc_f,         // F stage thread PC
+		            t0pc_s, t1pc_s, t2pc_s, t3pc_s,         // S stage thr pc
+		            t0_next_pcs_f, t1_next_pcs_f, t2_next_pcs_f, t3_next_pcs_f,
+		            t0npc_bf, t1npc_bf, t2npc_bf, t3npc_bf, // Next PC in
+							                                          // BF stage
+		            pc_s, pc_d, pc_e, pc_m, pc_w,          
+		            npc_s, npc_d, npc_e, npc_m, npc_w,
+		            pc_d_adj, npc_d_adj;
+
+   wire [47:0]  pc_bf,
+		            swpc_bf,                // PC of next thread if not branch
+                pc_f;
+
+   wire [48:0]  nextpc_nosw_bf,         // next pc if no switch
+		            am_mask;
+   
+   // trap PCs and rollback PCs
+   wire [48:0]  t0_trap_rb_pc_bf,
+		            t1_trap_rb_pc_bf,
+		            t2_trap_rb_pc_bf,
+		            t3_trap_rb_pc_bf;
+
+   wire [48:0]  thr_trappc_bf,
+		            t0_trapnpc_f,
+		            t1_trapnpc_f,
+		            t2_trapnpc_f,
+		            t3_trapnpc_f,
+		            trapnpc0_bf,
+		            trapnpc1_bf,
+		            trapnpc2_bf,
+		            trapnpc3_bf;
+
+   // Branch PCs
+   wire [48:0]  pcinc_f;                // incr output
+
+   // Instruction Words
+   wire [32:0]  inst_s2,                // instruction to switch to in S
+		            fdp_inst_s,             // instruction to be sent to D
+		            t0inst_s1,              // input to thr inst reg in S
+		            t1inst_s1,
+		            t2inst_s1,
+		            t3inst_s1,
+		            t0inst_s2,              // thr inst reg output
+		            t1inst_s2,
+		            t2inst_s2,
+		            t3inst_s2;
+
+   wire [32:0]  inst_s1;                // fetched instruction in S
+   wire [32:0]  inst_s1_bf1;            // buf version of inst_s1
+
+   wire [32:0]  rb_inst0_s,             // instruction to rollback to
+		            rb_inst1_s,             // instruction to rollback to
+		            rb_inst2_s,             // instruction to rollback to
+		            rb_inst3_s,             // instruction to rollback to
+		            inst_d,                 //   rollback 1
+		            inst_e;                 //   rollback 2
+
+   // Next instruction word
+   wire [32:0]  nirdata_s1,             // next inst reg contents
+		            t0nir,                  // thread NIR reg output
+		            t1nir,
+		            t2nir,
+		            t3nir;
+
+   wire         clk;
+   
+
+   //
+   // Code start here 
+   //
+   assign       clk = rclk;
+   
+//----------------------------------------------------------------------
+// Context Reg
+//----------------------------------------------------------------------
+   assign t0_ctxt_bf = lsu_t0_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[0]}};
+
+`ifdef FPGA_SYN_1THREAD
+
+   assign sw_ctxt = t0_ctxt_bf;
+   assign curr_ctxt = t0_ctxt_bf;
+   assign dmp_ctxt_unq = lsu_t0_pctxt_state;
+   
+`else
+
+   assign t1_ctxt_bf = lsu_t1_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[1]}};
+   assign t2_ctxt_bf = lsu_t2_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[2]}};
+   assign t3_ctxt_bf = lsu_t3_pctxt_state & {13{fcl_fdp_tctxt_sel_prim[3]}};
+
+   dp_mux4ds #(13) sw_ctxt_mux(.dout (sw_ctxt),
+			       .in0  (t0_ctxt_bf),
+			       .in1  (t1_ctxt_bf),
+			       .in2  (t2_ctxt_bf),
+			       .in3  (t3_ctxt_bf),
+			       .sel0_l (fcl_fdp_next_ctxt_bf_l[0]),
+			       .sel1_l (fcl_fdp_next_ctxt_bf_l[1]),
+			       .sel2_l (fcl_fdp_next_ctxt_bf_l[2]),
+			       .sel3_l (fcl_fdp_next_ctxt_bf_l[3]));
+   
+   dp_mux4ds #(13) curr_ctxt_mux(.dout (curr_ctxt),
+			     .in0  (t0_ctxt_bf),
+			     .in1  (t1_ctxt_bf),
+			     .in2  (t2_ctxt_bf),
+			     .in3  (t3_ctxt_bf),
+			     .sel0_l (fcl_fdp_thr_s2_l[0]),
+			     .sel1_l (fcl_fdp_thr_s2_l[1]),
+			     .sel2_l (fcl_fdp_thr_s2_l[2]),
+			     .sel3_l (fcl_fdp_thr_s2_l[3]));
+
+   dp_mux4ds #(13) dmp_ctxt_mux(.dout (dmp_ctxt_unq),
+			      .in0  (lsu_t0_pctxt_state),
+			      .in1  (lsu_t1_pctxt_state),
+			      .in2  (lsu_t2_pctxt_state),
+			      .in3  (lsu_t3_pctxt_state),
+			      .sel0_l (fcl_fdp_dmpthr_l[0]),
+			      .sel1_l (fcl_fdp_dmpthr_l[1]),
+			      .sel2_l (fcl_fdp_dmpthr_l[2]),
+			      .sel3_l (fcl_fdp_dmpthr_l[3]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   assign dmp_ctxt1 = dmp_ctxt_unq & {13{~(tlu_itlb_dmp_nctxt_g |
+					                                 tlu_itlb_dmp_actxt_g)}};
+//`ifdef SPARC_HPV_EN   
+   assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[12:7],tlu_itlb_tte_tag_w2[6:0]} & 
+	                    {13{tlu_itlb_dmp_actxt_g}};
+//`else
+//  assign dmp_ctxt2 = {tlu_itlb_tte_tag_w2[13:8],tlu_itlb_tte_tag_w2[6:0]} & 
+//	                    {13{tlu_itlb_dmp_actxt_g}};
+//`endif   
+
+   assign dmp_ctxt = dmp_ctxt1 | dmp_ctxt2;
+
+   dp_mux3ds #(13) ctxt_mux (.dout (fdp_itlb_ctxt_bf),
+			                     .in0  (curr_ctxt),
+			                     .in1  (sw_ctxt),
+			                     .in2  (dmp_ctxt),
+			                     .sel0_l  (fcl_fdp_ctxt_sel_curr_bf_l),
+			                     .sel1_l  (fcl_fdp_ctxt_sel_sw_bf_l),
+			                     .sel2_l  (fcl_fdp_ctxt_sel_dmp_bf_l));
+   
+   
+// ----------------------------------------------------------------------
+// PC datapath    
+// ----------------------------------------------------------------------
+
+   // pc/thr to exu for rdsr instruction
+   // this is the only 64 bit cell in the IFU
+   dp_mux3ds #(64) ver_mux(.dout (ifu_exu_pcver_e[63:0]),
+			                   .in0  ({{16{pc_e[47]}}, pc_e[47:0]}),
+			                   .in1  ({`VER_MANUF, 
+                                 `VER_IMPL,
+                                 const_maskid[7:0],
+                                 `VER_MAXGL, 
+                                 `VER_MAXTL,
+                                 `VER_MAXWIN}),
+			                   .in2  ({12'b0, 
+                                 dtu_fdp_thrconf_e[40:29],
+                                 4'b0,
+                                 dtu_fdp_thrconf_e[28:9],
+                                 2'b0,
+                                 dtu_fdp_thrconf_e[8:3],
+                                 5'b0,
+                                 dtu_fdp_thrconf_e[2:0]}),
+			                   .sel0_l  (fcl_fdp_rdsr_sel_pc_e_l),
+			                   .sel1_l  (fcl_fdp_rdsr_sel_ver_e_l),
+			                   .sel2_l  (fcl_fdp_rdsr_sel_thr_e_l));
+   
+   // Select the next thread pc (for F stage)
+   dp_mux4ds #(49) t0_pcbf_mux(.dout (t0npc_bf), 
+			                       .in0 ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}), 
+			                       .in1 (nextpc_nosw_bf), 
+			                       .in2 (t0_trap_rb_pc_bf), 
+			                       .in3 ({1'b0, exu_ifu_brpc_e}),
+			                       .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[0]),
+			                       .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[0]),
+			                       .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[0]),
+			                       .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[0]));
+
+`ifdef FPGA_SYN_1THREAD
+`else
+   dp_mux4ds #(49) t1_pcbf_mux(.dout (t1npc_bf), 
+			                       .in0 ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}), 
+			                       .in1 (nextpc_nosw_bf), 
+			                       .in2 (t1_trap_rb_pc_bf), 
+			                       .in3 ({1'b0, exu_ifu_brpc_e}),
+			                       .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[1]),
+			                       .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[1]),
+			                       .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[1]),
+			                       .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[1]));
+   
+   dp_mux4ds #(49) t2_pcbf_mux(.dout (t2npc_bf), 
+			                       .in0 ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}), 
+			                       .in1 (nextpc_nosw_bf), 
+			                       .in2 (t2_trap_rb_pc_bf), 
+			                       .in3 ({1'b0, exu_ifu_brpc_e}),
+			                       .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[2]),
+			                       .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[2]),
+			                       .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[2]),
+			                       .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[2]));
+   
+   dp_mux4ds #(49) t3_pcbf_mux(.dout (t3npc_bf), 
+			                       .in0 ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}), 
+			                       .in1 (nextpc_nosw_bf), 
+			                       .in2 (t3_trap_rb_pc_bf), 
+			                       .in3 ({1'b0, exu_ifu_brpc_e}),
+			                       .sel0_l (fcl_fdp_tpcbf_sel_old_bf_l[3]),
+			                       .sel1_l (fcl_fdp_tpcbf_sel_pcp4_bf_l[3]),
+			                       .sel2_l (fcl_fdp_tpcbf_sel_trap_bf_l[3]),
+			                       .sel3_l (fcl_fdp_tpcbf_sel_brpc_bf_l[3]));
+`endif
+   
+   // F stage thread PC regs;  use low power thr flop
+   dff_s  #(49)  t0_pcf_reg(.din (t0npc_bf), 
+			                  .clk (clk), 
+			                  .q   (t0pc_f), 
+			                  .se  (se), .si(), .so());
+`ifdef FPGA_SYN_1THREAD
+   assign fdp_fcl_pc_oor_vec_f = {3'b0, t0pc_f[48]};
+   assign swpc_bf = t0pc_f[47:0];
+`else
+   dff_s  #(49)  t1_pcf_reg(.din (t1npc_bf), 
+			                  .clk (clk), 
+			                  .q   (t1pc_f), 
+			                  .se  (se), .si(), .so());
+   dff_s  #(49)  t2_pcf_reg(.din (t2npc_bf), 
+			                  .clk (clk), 
+			                  .q   (t2pc_f), 
+			                  .se  (se), .si(), .so());
+   dff_s  #(49)  t3_pcf_reg(.din (t3npc_bf), 
+			                  .clk (clk), 
+			                  .q   (t3pc_f), 
+			                  .se  (se), .si(), .so());
+
+   assign fdp_fcl_pc_oor_vec_f = {t3pc_f[48], t2pc_f[48], 
+				                          t1pc_f[48], t0pc_f[48]};
+
+   
+   // select the pc to be used on a switch -- need to protect
+   dp_mux4ds #(48) swpc_mux(.dout (swpc_bf), 
+			                    .in0 (t0pc_f[47:0]), 
+			                    .in1 (t1pc_f[47:0]), 
+			                    .in2 (t2pc_f[47:0]), 
+			                    .in3 (t3pc_f[47:0]),
+			                    .sel0_l (fcl_fdp_next_thr_bf_l[0]),
+			                    .sel1_l (fcl_fdp_next_thr_bf_l[1]),
+			                    .sel2_l (fcl_fdp_next_thr_bf_l[2]),
+			                    .sel3_l (fcl_fdp_next_thr_bf_l[3]));
+`endif
+
+   // choose between I$ write address and read address
+   // need mux only for lower 11 bits (2+3 + ICINDEX_SIZE)
+//   dp_mux2es #(48) ifqfdp_mux(.dout (icaddr_nosw_bf[47:0]),
+//	     .in0  (nextpc_nosw_bf[47:0]), 
+//	     .in1  ({{37{1'b0}}, ifq_fdp_icindex_bf, 4'b0}),
+//	     .sel  (fcl_fdp_ifqfdp_sel_ifq_bf));  // 1=ifq
+
+   // implements switch and branch
+   // can we cut this down to 11 bits? No! tlb needs all 48
+
+//   dp_mux4ds #(48) nxt_icaddr_mux(.dout  (icaddr_bf),
+//				                        .in0   (swpc_bf[47:0]), 
+//				                        .in1   (nextpc_nosw_bf[47:0]),
+//				                        .in2   ({8'b0, {`IC_TAG_SZ{1'b0}}, 
+//                                         ifq_fdp_icindex_bf, 4'b0}),
+//				                        .in3   (exu_ifu_brpc_e[47:0]), 
+//				                        .sel0_l (fcl_fdp_icaddr_sel_swpc_bf_l),
+//				                        .sel1_l (fcl_fdp_icaddr_sel_curr_bf_l),
+//				                        .sel2_l (fcl_fdp_icaddr_sel_ifq_bf_l),
+//				                        .sel3_l (fcl_fdp_icaddr_sel_br_bf_l));
+
+//   assign fdp_icd_vaddr_bf = icaddr_bf[47:0];
+   // this goes to the itlb, icd and ict on top of fdp
+   // this is !!very critical!!
+   assign fdp_icd_vaddr_bf = pc_bf[47:2];
+
+   // create separate output for the icv to the left
+   assign fdp_icv_index_bf = pc_bf[11:5];
+
+   // Place this mux as close to the top (itlb) as possible
+   dp_mux3ds #(48) pcbf_mux(.dout  (pc_bf[47:0]),
+			  .in0   (swpc_bf[47:0]),        
+			  .in1   (nextpc_nosw_bf[47:0]),
+			  .in2   (exu_ifu_brpc_e[47:0]), 
+			  .sel0_l (fcl_fdp_pcbf_sel_swpc_bf_l),
+			  .sel1_l (fcl_fdp_pcbf_sel_nosw_bf_l),
+			  .sel2_l (fcl_fdp_pcbf_sel_br_bf_l));
+
+   dff_s #(48)  pcf_reg(.din  (pc_bf), 
+		    .clk  (clk), 
+		    .q    (pc_f), 
+		    .se   (se), .si(), .so());
+
+   assign fdp_erb_pc_f = pc_f[47:0];
+
+    // trappc mux (choose trap pc vs rollback/uTrap pc)
+   dp_mux4ds #(49) trap_pc0_mux(.dout (t0_trap_rb_pc_bf),
+			      .in0  (tlu_ifu_trappc_w2),
+			      .in1  (pc_d_adj),
+			      .in2  (t0pc_s),
+			      .in3  (pc_w),
+			      .sel0_l  (fcl_fdp_trrbpc_sel_trap_bf_l[0]),
+			      .sel1_l  (fcl_fdp_trrbpc_sel_rb_bf_l[0]),
+			      .sel2_l  (fcl_fdp_trrbpc_sel_pcs_bf_l[0]),
+			      .sel3_l  (fcl_fdp_trrbpc_sel_err_bf_l[0]));
+  
+`ifdef FPGA_SYN_1THREAD
+`else 
+   dp_mux4ds #(49) trap_pc1_mux(.dout (t1_trap_rb_pc_bf),
+			      .in0  (tlu_ifu_trappc_w2),
+			      .in1  (pc_d_adj),
+			      .in2  (t1pc_s),
+			      .in3  (pc_w),
+			      .sel0_l  (fcl_fdp_trrbpc_sel_trap_bf_l[1]),
+			      .sel1_l  (fcl_fdp_trrbpc_sel_rb_bf_l[1]),
+			      .sel2_l  (fcl_fdp_trrbpc_sel_pcs_bf_l[1]),
+			      .sel3_l  (fcl_fdp_trrbpc_sel_err_bf_l[1]));
+   
+   dp_mux4ds #(49) trap_pc2_mux(.dout (t2_trap_rb_pc_bf),
+			      .in0  (tlu_ifu_trappc_w2),
+			      .in1  (pc_d_adj),
+			      .in2  (t2pc_s),
+			      .in3  (pc_w),
+			      .sel0_l  (fcl_fdp_trrbpc_sel_trap_bf_l[2]),
+			      .sel1_l  (fcl_fdp_trrbpc_sel_rb_bf_l[2]),
+			      .sel2_l  (fcl_fdp_trrbpc_sel_pcs_bf_l[2]),
+			      .sel3_l  (fcl_fdp_trrbpc_sel_err_bf_l[2]));
+   
+   dp_mux4ds #(49) trap_pc3_mux(.dout (t3_trap_rb_pc_bf),
+			      .in0  (tlu_ifu_trappc_w2),
+			      .in1  (pc_d_adj),
+			      .in2  (t3pc_s),
+			      .in3  (pc_w),
+			      .sel0_l  (fcl_fdp_trrbpc_sel_trap_bf_l[3]),
+			      .sel1_l  (fcl_fdp_trrbpc_sel_rb_bf_l[3]),
+			      .sel2_l  (fcl_fdp_trrbpc_sel_pcs_bf_l[3]),
+			      .sel3_l  (fcl_fdp_trrbpc_sel_err_bf_l[3]));
+`endif
+   
+
+   // can reduce this to a 2:1 mux since reset pc is not used any more and
+   // pc_f is not needed.
+   dp_mux3ds #(49) pcp4_mux(.dout  (nextpc_nosw_bf),
+			  .in0   (pcinc_f),
+			  .in1   (thr_trappc_bf),
+			  .in2   ({fcl_fdp_pcoor_f, pc_f[47:0]}),
+			  .sel0_l (fcl_fdp_noswpc_sel_inc_l_bf),
+			  .sel1_l (fcl_fdp_noswpc_sel_tnpc_l_bf),
+			  .sel2_l (fcl_fdp_noswpc_sel_old_l_bf));
+
+
+   // next S stage thread pc mux per thread
+   // Use advtpcs signal which works for stall (Aug '01)
+   // Merged pc_e/pc_d into the eqn to allow for rollback
+   dp_mux4ds #(49) t0pcf_mux(.dout (t0_next_pcs_f), 
+			   .in0  (t0pc_s), 
+			   .in1  ({fcl_fdp_pcoor_vec_f[0], t0pc_f[47:0]}),
+			   .in2  (pc_d_adj),
+			   .in3  (pc_e),
+			   .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[0]),
+			   .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[0]),
+			   .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[0]),
+			   .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[0]));
+
+`ifdef FPGA_SYN_1THREAD
+`else  
+   dp_mux4ds #(49) t1pcf_mux(.dout (t1_next_pcs_f), 
+			   .in0  (t1pc_s), 
+			   .in1  ({fcl_fdp_pcoor_vec_f[1], t1pc_f[47:0]}),
+			   .in2  (pc_d_adj),
+			   .in3  (pc_e),
+			   .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[1]),
+			   .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[1]),
+			   .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[1]),
+			   .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[1]));
+   
+   dp_mux4ds #(49) t2pcf_mux(.dout (t2_next_pcs_f), 
+			   .in0  (t2pc_s), 
+			   .in1  ({fcl_fdp_pcoor_vec_f[2], t2pc_f[47:0]}),
+//			   .in1  ({fcl_fdp_pcoor_f, pc_f[47:0]}),
+			   .in2  (pc_d_adj),
+			   .in3  (pc_e),
+			   .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[2]),
+			   .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[2]),
+			   .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[2]),
+			   .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[2]));
+   
+   dp_mux4ds #(49) t3pcf_mux(.dout (t3_next_pcs_f), 
+			   .in0  (t3pc_s), 
+			   .in1  ({fcl_fdp_pcoor_vec_f[3], t3pc_f[47:0]}),
+//			   .in1  ({fcl_fdp_pcoor_f, pc_f[47:0]}),
+			   .in2  (pc_d_adj),
+			   .in3  (pc_e),
+			   .sel0_l (fcl_fdp_nextpcs_sel_pcs_f_l[3]),
+			   .sel1_l (fcl_fdp_nextpcs_sel_pcf_f_l[3]),
+			   .sel2_l (fcl_fdp_nextpcs_sel_pcd_f_l[3]),
+			   .sel3_l (fcl_fdp_nextpcs_sel_pce_f_l[3]));
+`endif
+   
+   
+   // S stage thread PC regs;  use low power thr flop
+   dff_s  #(49)  t0pcs_reg(.din  (t0_next_pcs_f),  
+		                   .q    (t0pc_s), 
+		                   .clk  (clk),  .se(se), .si(), .so());
+`ifdef FPGA_SYN_1THREAD
+   assign pc_s = t0pc_s;
+   assign npc_s = t0_next_pcs_f;
+`else  
+   dff_s  #(49)  t1pcs_reg(.din  (t1_next_pcs_f),  
+		                   .q    (t1pc_s), 
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s  #(49)  t2pcs_reg(.din  (t2_next_pcs_f),  
+		                   .q    (t2pc_s), 
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s  #(49)  t3pcs_reg(.din  (t3_next_pcs_f),  
+		                   .q    (t3pc_s), 
+		                   .clk  (clk),  .se(se), .si(), .so());
+   
+   // S stage PC mux -- need to protect
+   dp_mux4ds #(49) pcs_mux(.dout (pc_s),
+			 .in0  (t0pc_s), 
+			 .in1  (t1pc_s), 
+			 .in2  (t2pc_s), 
+			 .in3  (t3pc_s),
+			 .sel0_l (fcl_fdp_thr_s2_l[0]),
+			 .sel1_l (fcl_fdp_thr_s2_l[1]),
+			 .sel2_l (fcl_fdp_thr_s2_l[2]),
+			 .sel3_l (fcl_fdp_thr_s2_l[3]));
+
+   // S stage next PC mux -- need to protect
+   dp_mux4ds #(49) npcs_mux(.dout (npc_s),
+			  .in0  (t0_next_pcs_f), 
+			  .in1  (t1_next_pcs_f), 
+			  .in2  (t2_next_pcs_f), 
+			  .in3  (t3_next_pcs_f),
+			  .sel0_l (fcl_fdp_thr_s2_l[0]),
+			  .sel1_l (fcl_fdp_thr_s2_l[1]),
+			  .sel2_l (fcl_fdp_thr_s2_l[2]),
+			  .sel3_l (fcl_fdp_thr_s2_l[3]));
+`endif
+
+   // D stage PC and nPC
+   dff_s  #(49)  pcd_reg(.din (pc_s), 
+		                 .q   (pc_d), 
+		                 .clk (clk),  .se(se), .si(), .so());
+   dff_s  #(49)  npcd_reg(.din  (npc_s), 
+		                  .q    (npc_d), 
+		                  .clk  (clk), .se(se), .si(), .so());
+
+   assign am_mask = {{17{~fcl_fdp_addr_mask_d}}, 32'hffffffff};
+
+   // nand2
+   assign pc_d_adj = pc_d & am_mask;
+   assign npc_d_adj = npc_d & am_mask;
+   
+   assign ifu_exu_pc_d = pc_d_adj[47:0];
+
+   // E stage PC and nPC
+   dff_s  #(49)  pce_reg(.din (pc_d_adj), 
+		                 .q   (pc_e), 
+		                 .clk (clk), .se(se), .si(), .so());
+   dff_s  #(49)  npce_reg(.din  (npc_d_adj), 
+		                  .q    (npc_e), 
+		                  .clk (clk), .se(se), .si(), .so());
+
+   assign fdp_fcl_pc_oor_e = pc_e[48];
+   assign ifu_tlu_pc_oor_e = pc_e[48];
+
+   // M stage PC and nPC
+   dff_s  #(49)  pcm_reg(.din  (pc_e), 
+		                 .q    (pc_m), 
+		                 .clk  (clk),  .se(se), .si(), .so());
+   dff_s  #(49)  npcm_reg(.din (npc_e), 
+		                  .q   (npc_m), 
+		                  .clk (clk), .se(se), .si(), .so());
+   assign ifu_tlu_pc_m = pc_m[48:0];
+   assign ifu_tlu_npc_m = npc_m[48:0];
+   
+   // W stage PC and nPC
+   dff_s  #(49)  pcw_reg(.din  (pc_m), 
+		                 .q    (pc_w), 
+		                 .clk  (clk),  .se(se), .si(), .so());
+   dff_s  #(49)  npcw_reg(.din (npc_m), 
+		                  .q   (npc_w), 
+		                  .clk (clk), .se(se), .si(), .so());
+   
+//   assign ifu_tlu_pc_w = pc_w;
+//   assign ifu_tlu_npc_w = npc_w;
+
+   // PC incrementer
+   // can we fit the ofl logic on the side of the incrementer?
+   assign pcinc_f[1:0] = pc_f[1:0];
+   sparc_ifu_incr46 pc_inc(.a     (pc_f[47:2]), 
+			                     .a_inc (pcinc_f[47:2]), 
+			                     .ofl   ());   // ofl output not needed
+   
+//   assign pcinc_f[48] = inc_ofl & ~fcl_fdp_mask32b_f | fcl_fdp_pcoor_f;
+   assign pcinc_f[48] = ~pc_f[47] & pcinc_f[47] & ~fcl_fdp_mask32b_f | 
+                        fcl_fdp_pcoor_f;   
+
+   // Enable for thr trapnpc reg
+   dp_mux4ds #(49) t0tnpc_mux(.dout (trapnpc0_bf),
+			                        .in0  (tlu_ifu_trapnpc_w2),
+			                        .in1  (npc_w),
+                              .in2  (t0pc_f),
+			                        .in3  (t0_trapnpc_f),
+			                        .sel0_l  (fcl_fdp_thrtnpc_sel_tnpc_l[0]),
+			                        .sel1_l  (fcl_fdp_thrtnpc_sel_npcw_l[0]),
+			                        .sel2_l  (fcl_fdp_thrtnpc_sel_pcf_l[0]),
+			                        .sel3_l  (fcl_fdp_thrtnpc_sel_old_l[0]));
+  
+`ifdef FPGA_SYN_1THREAD
+`else
+   dp_mux4ds #(49) t1tnpc_mux(.dout (trapnpc1_bf),
+			    .in0  (tlu_ifu_trapnpc_w2),
+			    .in1  (npc_w),
+          .in2  (t1pc_f),
+			    .in3  (t1_trapnpc_f), 
+			    .sel0_l  (fcl_fdp_thrtnpc_sel_tnpc_l[1]),
+			    .sel1_l  (fcl_fdp_thrtnpc_sel_npcw_l[1]),
+          .sel2_l  (fcl_fdp_thrtnpc_sel_pcf_l[1]),
+			    .sel3_l  (fcl_fdp_thrtnpc_sel_old_l[1]));
+   
+   dp_mux4ds #(49) t2tnpc_mux(.dout (trapnpc2_bf),
+			    .in0  (tlu_ifu_trapnpc_w2),
+			    .in1  (npc_w),
+          .in2  (t2pc_f),
+			    .in3  (t2_trapnpc_f), 
+			    .sel0_l  (fcl_fdp_thrtnpc_sel_tnpc_l[2]),
+			    .sel1_l  (fcl_fdp_thrtnpc_sel_npcw_l[2]),
+          .sel2_l  (fcl_fdp_thrtnpc_sel_pcf_l[2]),
+			    .sel3_l  (fcl_fdp_thrtnpc_sel_old_l[2]));
+   
+   dp_mux4ds #(49) t3tnpc_mux(.dout (trapnpc3_bf),
+			    .in0  (tlu_ifu_trapnpc_w2),
+			    .in1  (npc_w),
+          .in2  (t3pc_f),
+			    .in3  (t3_trapnpc_f), 
+			    .sel0_l  (fcl_fdp_thrtnpc_sel_tnpc_l[3]),
+			    .sel1_l  (fcl_fdp_thrtnpc_sel_npcw_l[3]),
+          .sel2_l  (fcl_fdp_thrtnpc_sel_pcf_l[3]),
+			    .sel3_l  (fcl_fdp_thrtnpc_sel_old_l[3]));
+`endif
+   
+   // thread next trap pc reg
+   dff_s #(49) t0tnpcf_reg(.din  (trapnpc0_bf),
+		                   .q    (t0_trapnpc_f),
+		                   .clk  (clk),  .se(se), .si(), .so());
+`ifdef FPGA_SYN_1THREAD
+   assign thr_trappc_bf = t0_trapnpc_f;
+`else
+   dff_s #(49) t1tnpcf_reg(.din  (trapnpc1_bf),
+		                   .q    (t1_trapnpc_f),
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s #(49) t2tnpcf_reg(.din  (trapnpc2_bf),
+		                   .q    (t2_trapnpc_f),
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s #(49) t3tnpcf_reg(.din  (trapnpc3_bf),
+		                   .q    (t3_trapnpc_f),
+		                   .clk  (clk),  .se(se), .si(), .so());
+
+   dp_mux4ds #(49) nxttpc_mux(.dout (thr_trappc_bf),
+			    .in0  (t0_trapnpc_f), 
+			    .in1  (t1_trapnpc_f),
+			    .in2  (t2_trapnpc_f),
+			    .in3  (t3_trapnpc_f),
+			    .sel0_l (fcl_fdp_thr_s2_l[0]), // thr_s2 = thr_f
+			    .sel1_l (fcl_fdp_thr_s2_l[1]),
+			    .sel2_l (fcl_fdp_thr_s2_l[2]),
+			    .sel3_l (fcl_fdp_thr_s2_l[3]));
+`endif
+
+   // During rst nextpc_nosw_bf = PO_RESET_PC.  All thread PC_f registers,
+   // the icaddr_f register and the nextpc register should be loaded
+   // with nextpc_nosw_bf during reset.
+   // Eventually, we will load the reset_pc from the trap logic unit,
+   // which will arrive on the trap_pc bus.
+
+
+   // TBD in PC datapath:
+   // 1.  Add useNIR bit to PCs  -- DONE
+   // 2.  Add support for ifq request grant -- DONE
+   // 3.  Generate icache read signal (from fcl?) -- DONE
+   // 4.  Rollback functionality -- DONE
+   // 5.  PC range checks -- DONE
+   // 6.  Change PC to 48 bit value -- DONE
+   
+   
+//----------------------------------------------------------------------
+// Fetched Instruction Datapath
+//----------------------------------------------------------------------
+
+// This is logically 33 bits wide.  The NIR and IR datapaths are laid
+// side by side, making this a 66bit datapath.  The NIR path is
+// potentially a little longer.
+
+   // choose between NIR data and fetched data
+   dp_mux2es #(33)  usenir_mux(.dout (inst_s1), 
+			                       .in0  (icd_fdp_fetdata_s1[32:0]), 
+			                       .in1  (nirdata_s1),
+			                       .sel  (fcl_fdp_usenir_sel_nir_s1));  // 1=nir
+
+   // Instruction Output Mux
+   // CHANGE: now 4:1
+   dp_mux4ds  #(33)  instout_mux(.dout (fdp_inst_s),  
+			                         .in0 (icd_fdp_fetdata_s1[32:0]), 
+			                         .in1 (inst_s2), 
+			                         .in2 ({`NOP, 1'b0}),
+			                         .in3 (nirdata_s1[32:0]), 
+			                         .sel0_l (fcl_fdp_inst_sel_curr_s_l),
+			                         .sel1_l (fcl_fdp_inst_sel_switch_s_l),
+			                         .sel2_l (fcl_fdp_inst_sel_nop_s_l),
+			                         .sel3_l (fcl_fdp_inst_sel_nir_s_l));
+
+   assign fdp_fcl_swc_s2 = fdp_inst_s[0];
+
+   assign fdp_fcl_op_s = fdp_inst_s[32:31];
+   assign fdp_fcl_op3_s = fdp_inst_s[25:22];
+   assign fdp_fcl_ibit_s = fdp_inst_s[14];
+   
+   assign fdp_dtu_inst_s = fdp_inst_s[32:1];
+
+   // CHANGE: Random logic to fix timing paths
+   // output pin on RHS, as close to IRF as possible
+   // 16x drivers
+   // nand2-xor-invert
+   assign ifu_exu_rs1_s[4] = fdp_inst_s[19] ^ 
+			                       (fdp_inst_s[18] & fcl_fdp_oddwin_s);
+   assign ifu_exu_rs1_s[3:0] = fdp_inst_s[18:15];
+   
+   assign ifu_exu_rs2_s[4] = (fdp_inst_s[5] ^ 
+			                        (fdp_inst_s[4] & fcl_fdp_oddwin_s));
+   assign ifu_exu_rs2_s[3:0] = fdp_inst_s[4:1];
+
+   assign ifu_exu_rs3_s[4] = (fdp_inst_s[30] ^ 
+			                        (fdp_inst_s[29] & fcl_fdp_oddwin_s));
+   assign ifu_exu_rs3_s[3:0] = fdp_inst_s[29:26];
+
+
+   dp_buffer #(33) insts1_buf(inst_s1_bf1, inst_s1[32:0]);
+		
+   // Thread instruction muxes
+   dp_mux4ds #(33)  t0inst_mux(.dout (t0inst_s1),
+			     .in0 (ifq_fdp_fill_inst),  
+			     .in1 (inst_s1_bf1), 
+			     .in2 (t0inst_s2),
+			     .in3 (rb_inst0_s),
+			     .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[0]),
+			     .sel1_l (fcl_fdp_tinst_sel_curr_s_l[0]),
+			     .sel2_l (fcl_fdp_tinst_sel_old_s_l[0]),
+			     .sel3_l (fcl_fdp_tinst_sel_rb_s_l[0]));
+
+`ifdef FPGA_SYN_1THREAD
+`else
+   dp_mux4ds #(33)  t1inst_mux(.dout (t1inst_s1),
+			     .in0 (ifq_fdp_fill_inst),  
+			     .in1 (inst_s1_bf1), 
+			     .in2 (t1inst_s2),
+			     .in3 (rb_inst1_s),
+			     .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[1]),
+			     .sel1_l (fcl_fdp_tinst_sel_curr_s_l[1]),
+			     .sel2_l (fcl_fdp_tinst_sel_old_s_l[1]),
+			     .sel3_l (fcl_fdp_tinst_sel_rb_s_l[1]));
+
+   dp_mux4ds #(33)  t2inst_mux(.dout (t2inst_s1),
+			     .in0 (ifq_fdp_fill_inst),  
+			     .in1 (inst_s1_bf1), 
+			     .in2 (t2inst_s2),
+			     .in3 (rb_inst2_s),
+			     .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[2]),
+			     .sel1_l (fcl_fdp_tinst_sel_curr_s_l[2]),
+			     .sel2_l (fcl_fdp_tinst_sel_old_s_l[2]),
+			     .sel3_l (fcl_fdp_tinst_sel_rb_s_l[2]));
+
+   dp_mux4ds #(33)  t3inst_mux(.dout (t3inst_s1),
+			     .in0 (ifq_fdp_fill_inst),  
+			     .in1 (inst_s1_bf1), 
+			     .in2 (t3inst_s2),
+			     .in3 (rb_inst3_s),
+			     .sel0_l (fcl_fdp_tinst_sel_ifq_s_l[3]),
+			     .sel1_l (fcl_fdp_tinst_sel_curr_s_l[3]),
+			     .sel2_l (fcl_fdp_tinst_sel_old_s_l[3]),
+			     .sel3_l (fcl_fdp_tinst_sel_rb_s_l[3]));
+`endif
+
+   // Thread Instruction Register
+   dff_s #(33) t0_inst_reg(.din  (t0inst_s1), 
+		                   .q    (t0inst_s2),
+		                   .clk  (clk),  .se(se), .si(), .so());
+`ifdef FPGA_SYN_1THREAD
+   assign inst_s2 = t0inst_s2;
+`else
+   dff_s #(33) t1_inst_reg(.din  (t1inst_s1), 
+		                   .q    (t1inst_s2),
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s #(33) t2_inst_reg(.din  (t2inst_s1), 
+		                   .q    (t2inst_s2),
+		                   .clk  (clk),  .se(se), .si(), .so());
+   dff_s #(33) t3_inst_reg(.din  (t3inst_s1), 
+		                   .q    (t3inst_s2),
+		                   .clk  (clk),  .se(se), .si(), .so());
+   
+   // switch instruction mux -- choose the instruction to switch to
+   // fcl keep track of which t*inst_s2 is valid
+   dp_mux4ds  #(33) swinst_mux(.dout (inst_s2),
+			     .in0  (t0inst_s2), 
+			     .in1  (t1inst_s2), 
+			     .in2  (t2inst_s2), 
+			     .in3  (t3inst_s2),
+			     .sel0_l (fcl_fdp_thr_s2_l[0]),
+			     .sel1_l (fcl_fdp_thr_s2_l[1]),
+			     .sel2_l (fcl_fdp_thr_s2_l[2]),
+			     .sel3_l (fcl_fdp_thr_s2_l[3]));
+`endif
+
+   // Rollback instruction
+   dff_s #(33) rbinst_d_reg(.din (fdp_inst_s[32:0]),
+			                  .q   (inst_d),
+			                  .clk (clk),
+			                  .se  (se), .si(), .so());
+   
+   dff_s #(33) rbinst_e_reg(.din (inst_d),
+			                  .q   (inst_e),
+			                  .clk (clk),
+			                  .se  (se), .si(), .so());
+
+   dp_mux2es #(33) rbinst0_mux(.dout (rb_inst0_s),
+			                       .in0  (inst_d),
+			                       .in1  (inst_e),
+			                       .sel  (fcl_fdp_rbinst_sel_inste_s[0]));
+
+`ifdef FPGA_SYN_1THREAD
+`else
+   dp_mux2es #(33) rbinst1_mux(.dout (rb_inst1_s),
+			                       .in0  (inst_d),
+			                       .in1  (inst_e),
+			                       .sel  (fcl_fdp_rbinst_sel_inste_s[1]));
+
+   dp_mux2es #(33) rbinst2_mux(.dout (rb_inst2_s),
+			                       .in0  (inst_d),
+			                       .in1  (inst_e),
+			                       .sel  (fcl_fdp_rbinst_sel_inste_s[2]));
+
+   dp_mux2es #(33) rbinst3_mux(.dout (rb_inst3_s),
+			                       .in0  (inst_d),
+			                       .in1  (inst_e),
+			                       .sel  (fcl_fdp_rbinst_sel_inste_s[3]));
+`endif
+
+//----------------------------------------------------------------------
+// Next Instruction Datapath
+//----------------------------------------------------------------------
+
+   // Thread next instruction muxes
+//   dp_mux2es #(33) t0nir_mux(.dout (t0nir_in),
+//			                     .in0 (icd_fdp_topdata_s1[32:0]), 
+//			                     .in1 (t0nir), 
+//			                     .sel (fcl_fdp_thr_s1_l[0]));  // 0=new
+//   dp_mux2es #(33) t1nir_mux(.dout (t1nir_in),
+//			                     .in0 (icd_fdp_topdata_s1[32:0]), 
+//			                     .in1 (t1nir), 
+//			                     .sel (fcl_fdp_thr_s1_l[1])); 
+//   dp_mux2es #(33) t2nir_mux(.dout (t2nir_in),
+//			                     .in0 (icd_fdp_topdata_s1[32:0]), 
+//			                     .in1 (t2nir), 
+//			                     .sel (fcl_fdp_thr_s1_l[2])); 
+//   dp_mux2es #(33) t3nir_mux(.dout (t3nir_in),
+//			                     .in0 (icd_fdp_topdata_s1[32:0]), 
+//			                     .in1 (t3nir), 
+//			                     .sel (fcl_fdp_thr_s1_l[3])); 
+
+   // Thread Next Instruction Register
+   wire   clk_nir0;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   
+   bw_u1_ckenbuf_6x  ckennir0(.rclk (rclk),
+                              .clk  (clk_nir0),
+                              .en_l (fcl_fdp_thr_s1_l[0]),
+                              .tm_l (~se));
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(33) t0nir_reg(.din (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t0nir), 
+		                   .en  (~(fcl_fdp_thr_s1_l[0])), .clk(rclk), .se(se), .si(), .so());
+`else
+   
+   dff_s #(33) t0nir_reg(.din  (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t0nir), 
+		                   .clk  (clk_nir0), .se(se), .si(), .so());
+`endif
+   
+`ifdef FPGA_SYN_1THREAD
+   assign nirdata_s1 = t0nir; 
+`else
+   wire   clk_nir1;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   
+   bw_u1_ckenbuf_6x  ckennir1(.rclk (rclk),
+                              .clk  (clk_nir1),
+                              .en_l (fcl_fdp_thr_s1_l[1]),
+                              .tm_l (~se));
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(33)  t1nir_reg(.din  (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t1nir), 
+		                   .en (~(fcl_fdp_thr_s1_l[1])), .clk  (rclk), .se(se), .si(), .so());
+`else
+   dff_s #(33) t1nir_reg(.din  (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t1nir), 
+		                   .clk  (clk_nir1), .se(se), .si(), .so());
+`endif
+   
+   wire   clk_nir2;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   
+   bw_u1_ckenbuf_6x  ckennir2(.rclk (rclk),
+                              .clk  (clk_nir2),
+                              .en_l (fcl_fdp_thr_s1_l[2]),
+                              .tm_l (~se));
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(33) t2nir_reg(.din  (icd_fdp_topdata_s1[32:0]),
+		                   .q    (t2nir), 
+		                   .en (~(fcl_fdp_thr_s1_l[2])), .clk  (rclk), .se(se), .si(), .so());
+`else
+   dff_s #(33) t2nir_reg(.din  (icd_fdp_topdata_s1[32:0]),
+		                   .q    (t2nir), 
+		                   .clk  (clk_nir2), .se(se), .si(), .so());
+`endif
+   wire   clk_nir3;
+`ifdef FPGA_SYN_CLK_EN
+`else
+   
+   bw_u1_ckenbuf_6x  ckennir3(.rclk (rclk),
+                              .clk  (clk_nir3),
+                              .en_l (fcl_fdp_thr_s1_l[3]),
+                              .tm_l (~se));
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+   dffe_s #(33) t3nir_reg(.din  (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t3nir), 
+		                   .en (~(fcl_fdp_thr_s1_l[3])), .clk  (rclk), .se(se), .si(), .so());
+`else
+   
+   dff_s #(33) t3nir_reg(.din  (icd_fdp_topdata_s1[32:0]), 
+		                   .q    (t3nir), 
+		                   .clk  (clk_nir3), .se(se), .si(), .so());
+`endif
+   
+   // Next thread NIR mux  (nir output mux)
+   dp_mux4ds  #(33) nextnir_mux(.dout (nirdata_s1),
+		                          .in0 (t0nir), 
+                              .in1 (t1nir), 
+                              .in2 (t2nir), 
+                              .in3 (t3nir),
+		                          .sel0_l (fcl_fdp_nirthr_s1_l[0]),
+		                          .sel1_l (fcl_fdp_nirthr_s1_l[1]),
+		                          .sel2_l (fcl_fdp_nirthr_s1_l[2]),
+		                          .sel3_l (fcl_fdp_nirthr_s1_l[3]));
+`endif
+
+   // TBD in fetched instruction DP:
+   // 1. Rollback -- DONE
+   // 2. Icache parity check (increase fet data and top data to 34 bits)
+
+endmodule // sparc_ifu_fdp
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_thrcmpl.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_thrcmpl.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_thrcmpl.v	(revision 6)
@@ -0,0 +1,204 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_thrcmpl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_thrcmpl
+//  Description:	
+//  The thread completion block processes the completion signals fomr
+//  the different cpu blocks and generates a unified completion
+//  signal. 
+*/
+
+module sparc_ifu_thrcmpl(/*AUTOARG*/
+   // Outputs
+   completion, wm_imiss, wm_other, 
+   // Inputs
+   clk, se, si, reset, fcl_ifq_icmiss_s1, erb_dtu_ifeterr_d1, 
+   sw_cond_s, en_spec_g, atr_s, dtu_fcl_thr_active, ifq_dtu_thrrdy, 
+   ifq_dtu_pred_rdy, exu_lop_done, branch_done_d, fixedop_done, 
+   ldmiss, spec_ld_d, trap, retr_thr_wakeup, flush_wake_w2, 
+   ldhit_thr, spec_ld_g, clear_wmo_e, wm_stbwait, stb_retry, 
+   rst_thread, trap_thrrdy, thr_s2, thr_e, thr_s1, fp_thrrdy, 
+   lsu_ifu_ldst_cmplt, sta_done_e, killed_inst_done_e
+   );
+
+   input     clk, se, si, reset;
+
+   input     fcl_ifq_icmiss_s1;
+   input     erb_dtu_ifeterr_d1;
+   
+   input     sw_cond_s;
+   input     en_spec_g;
+   input     atr_s;
+
+   input [3:0] dtu_fcl_thr_active;
+   input [3:0] ifq_dtu_thrrdy,         // I$ miss completion
+               ifq_dtu_pred_rdy,
+		           exu_lop_done,  // mul, div, wrpr, sav, rest
+               branch_done_d,
+		           fixedop_done;           // br, rdsr, wrs/pr, 
+   input [3:0] ldmiss,
+		           spec_ld_d,
+		           trap,
+		           retr_thr_wakeup,
+		           flush_wake_w2,
+		           ldhit_thr,
+		           spec_ld_g;
+
+   input       clear_wmo_e;
+   input [3:0] wm_stbwait,
+               stb_retry;
+
+   input [3:0] rst_thread,
+		           trap_thrrdy;
+
+   input [3:0] thr_s2,
+		           thr_e,
+		           thr_s1;
+   
+   input [3:0] fp_thrrdy;
+
+   input [3:0] lsu_ifu_ldst_cmplt;	   // sta local, ld and atomic done
+   
+   input       sta_done_e,
+		           killed_inst_done_e;        // long lat op was killed
+   
+   // .. Other completion signals needed
+   // 1. STA completion from LSU -- real mem done 10/03, local TBD
+   // 2. Atomic completion  -- done
+   // 3. membar completion (lsu) -- done
+   // 4. flush completion (lsu)
+   // 5. FP op completion (ffu)
+   // 
+
+   output [3:0] completion;
+   output [3:0] wm_imiss;
+   output [3:0] wm_other;
+
+   // local signals
+   wire [3:0]   wm_imiss,
+		            wm_other,
+		            wmi_nxt,
+		            wmo_nxt;
+
+   wire [3:0]   clr_wmo_thr_e;
+   
+   wire [3:0]   ldst_thrrdy,
+		            ld_thrrdy,
+		            sta_thrrdy,
+		            killed_thrrdy,
+		            fp_thrrdy,
+                pred_ifq_rdy,
+                imiss_thrrdy,
+		            other_thrrdy;
+   //   wire [3:0] 	can_imiss;
+   
+   //---------------------------------------------------------------------- 
+   // Code begins here
+   //----------------------------------------------------------------------
+   
+   // Thread completion
+   // Since an imiss can overlap with anything else, have to make sure
+   // the imiss condition has been cleared.
+   // Imiss itself has to make sure ALL OTHER conditions have been
+   // cleared.  In this code, I am not checking for branches being
+   // cleared, since Imiss is assumed to take much longer than a branch.
+   // -- may not be a valid assumption, since milhits could be faster
+
+//   assign  can_imiss = fcl_ifq_canthr;
+                        // & (wm_imiss | ({4{fcl_ifq_icmiss_s1}} & thr_s1));
+   
+   dffr_s #(4) wmi_ff(.din (wmi_nxt),
+		              .clk (clk),
+		              .q   (wm_imiss),
+		              .rst (reset),
+		              .se  (se), .si(), .so());
+
+   dffr_s #(4) wmo_ff(.din (wmo_nxt),
+		              .clk (clk),
+		              .q   (wm_other),
+		              .rst (reset),
+		              .se  (se), .si(), .so());
+
+   assign  wmi_nxt = ({4{fcl_ifq_icmiss_s1}} & thr_s1) | // set
+		                   ({4{erb_dtu_ifeterr_d1}} & thr_e) |
+			                   (wm_imiss & ~imiss_thrrdy);    // reset
+
+   // clear wm_other when we have a retracted store
+   assign  clr_wmo_thr_e = {4{clear_wmo_e}} & thr_e;
+
+   assign  wmo_nxt = (({4{sw_cond_s}} & thr_s2 & ~clr_wmo_thr_e) | 
+		                  trap | ldmiss) & dtu_fcl_thr_active | 
+                      rst_thread |  // set
+		                  wm_other & dtu_fcl_thr_active &
+		                  ~(other_thrrdy | spec_ld_d | clr_wmo_thr_e); // reset
+
+   // A load hit signal is always for the load which is being filled
+   // to the RF.  If speculation is enabled, the load would have
+   // completed even before the hit signal.  So need to suppress the
+   // completions signal.
+
+   // load miss, st buf hit, ld/st alternate completion
+   assign ldst_thrrdy = lsu_ifu_ldst_cmplt & ~spec_ld_g;   
+   assign ld_thrrdy = ldhit_thr & {4{~en_spec_g}};
+   assign sta_thrrdy = thr_e & {4{sta_done_e}};
+   assign killed_thrrdy = thr_e & {4{killed_inst_done_e}};
+
+   // everthing else
+   assign other_thrrdy = (ldst_thrrdy     |     // ld, sta local, atomic
+                          branch_done_d   |     // br
+	                        ld_thrrdy       |     // load hit without spec
+	                        exu_lop_done    |     // mul, div, win mgmt
+	                        fixedop_done    |     // rdsr, wrspr
+	                        killed_thrrdy   |     // ll op was anulled
+    	                    retr_thr_wakeup |     // retract cond compl
+	                        flush_wake_w2   |     // wake up after ecc 
+	                        fp_thrrdy       |     // fp completion
+	                        sta_thrrdy      |     // sta to real memory
+                	        trap_thrrdy);         // trap
+
+   // Imiss predicted ready
+   assign pred_ifq_rdy = ifq_dtu_pred_rdy & {4{~atr_s}} & dtu_fcl_thr_active;
+   assign imiss_thrrdy = pred_ifq_rdy | ifq_dtu_thrrdy;
+   
+//   assign completion = imiss_thrrdy & (~(wm_other | wm_stbwait) |
+//					                               other_thrrdy) |       //see C1
+//		                   other_thrrdy & (~(wm_imiss | wmi_nxt));
+
+//   assign completion = (imiss_thrrdy & ~(wm_other | wm_stbwait) |
+//		                    other_thrrdy & ~(wm_stbwait | wm_imiss) |
+//                        stb_retry & ~(wm_other | wm_imiss) |
+//                        imiss_thrrdy & other_thrrdy & ~wm_stbwait |
+//                        imiss_thrrdy & stb_retry & ~wm_other |
+//                        stb_retry & other_thrrdy & ~wm_imiss);
+
+   assign completion = ((imiss_thrrdy | ~wm_imiss) &
+                        (other_thrrdy | ~wm_other) &
+                        (stb_retry | ~wm_stbwait) &
+                        (wm_imiss | wm_other | wm_stbwait));
+
+   // C1: should we do ~(wm_other | wmo_nxt)??
+   // When an imiss is pending, we cannot be doing another fetch, so I
+   // don't think so.  It seems nice and symmetric to put it in
+   // though, unfortunately this results in a timing problem on swc_s 
+   // and trap
+   
+endmodule // sparc_ifu_thrcmpl
Index: /trunk/T1-CPU/ifu/sparc_ifu_errdp.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_errdp.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_errdp.v	(revision 6)
@@ -0,0 +1,672 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_errdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:  sparc_ifu_errdp
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+`include "lsu.h"
+`include "ifu.h"
+
+module sparc_ifu_errdp(/*AUTOARG*/
+   // Outputs
+   so, ifu_lsu_ldxa_data_w2, erb_dtu_imask, erd_erc_tlbt_pe_s1, 
+   erd_erc_tlbd_pe_s1, erd_erc_tagpe_s1, erd_erc_nirpe_s1, 
+   erd_erc_fetpe_s1, erd_erc_tte_pgsz, 
+   // Inputs
+   rclk, se, si, erb_reset, itlb_rd_tte_data, itlb_rd_tte_tag, 
+   itlb_ifq_paddr_s, wsel_fdp_fetdata_s1, wsel_fdp_topdata_s1, 
+   wsel_erb_asidata_s, ict_itlb_tags_f, icv_itlb_valid_f, 
+   lsu_ifu_err_addr, spu_ifu_err_addr_w2, fdp_erb_pc_f, 
+   exu_ifu_err_reg_m, exu_ifu_err_synd_m, ffu_ifu_err_reg_w2, 
+   ffu_ifu_err_synd_w2, tlu_itlb_rw_index_g, erc_erd_pgsz_b0, 
+   erc_erd_pgsz_b1, erc_erd_erren_asidata, erc_erd_errstat_asidata, 
+   erc_erd_errinj_asidata, ifq_erb_asidata_i2, ifq_erb_wrtag_f, 
+   ifq_erb_wrindex_f, erc_erd_asiway_s1_l, fcl_erb_itlbrd_data_s, 
+   erc_erd_ld_imask, erc_erd_asisrc_sel_icd_s_l, 
+   erc_erd_asisrc_sel_misc_s_l, erc_erd_asisrc_sel_err_s_l, 
+   erc_erd_asisrc_sel_itlb_s_l, erc_erd_errasi_sel_en_l, 
+   erc_erd_errasi_sel_stat_l, erc_erd_errasi_sel_inj_l, 
+   erc_erd_errasi_sel_addr_l, erc_erd_miscasi_sel_ict_l, 
+   erc_erd_miscasi_sel_imask_l, erc_erd_miscasi_sel_other_l, 
+   erc_erd_asi_thr_l, erc_erd_eadr0_sel_irf_l, 
+   erc_erd_eadr0_sel_itlb_l, erc_erd_eadr0_sel_frf_l, 
+   erc_erd_eadr0_sel_lsu_l, erc_erd_eadr1_sel_pcd1_l, 
+   erc_erd_eadr1_sel_l1pa_l, erc_erd_eadr1_sel_l2pa_l, 
+   erc_erd_eadr1_sel_other_l, erc_erd_eadr2_sel_mx1_l, 
+   erc_erd_eadr2_sel_wrt_l, erc_erd_eadr2_sel_mx0_l, 
+   erc_erd_eadr2_sel_old_l
+   );
+
+   input       rclk, 
+               se, 
+               si, 
+               erb_reset;
+
+   input [42:0] itlb_rd_tte_data;   // this is in s1
+   input [58:0] itlb_rd_tte_tag;    // this is in s1
+   input [39:10] itlb_ifq_paddr_s;
+   input [33:0] wsel_fdp_fetdata_s1,    
+		            wsel_fdp_topdata_s1;
+   input [33:0] wsel_erb_asidata_s;
+   
+   input [`IC_TAG_ALL_HI:0] ict_itlb_tags_f;
+   input [3:0]              icv_itlb_valid_f;
+
+   input [47:4]  lsu_ifu_err_addr;
+   input [39:4]  spu_ifu_err_addr_w2;
+   input [47:0]  fdp_erb_pc_f;
+   
+   input [7:0]   exu_ifu_err_reg_m;
+   input [7:0]   exu_ifu_err_synd_m;
+   input [5:0]   ffu_ifu_err_reg_w2;
+   input [13:0]  ffu_ifu_err_synd_w2;
+   input [5:0]   tlu_itlb_rw_index_g;
+
+   input         erc_erd_pgsz_b0,
+                 erc_erd_pgsz_b1;
+
+   input [1:0]   erc_erd_erren_asidata;
+   input [22:0]  erc_erd_errstat_asidata;
+   input [31:0]  erc_erd_errinj_asidata;   
+   input [47:0]  ifq_erb_asidata_i2;
+
+   input [`IC_TAG_SZ-1:0] ifq_erb_wrtag_f;
+   input [`IC_IDX_HI:4]   ifq_erb_wrindex_f;
+   
+   // mux selects
+   input [3:0]  erc_erd_asiway_s1_l;
+   input        fcl_erb_itlbrd_data_s;
+   input        erc_erd_ld_imask;
+   
+   input        erc_erd_asisrc_sel_icd_s_l,  
+		            erc_erd_asisrc_sel_misc_s_l,
+		            erc_erd_asisrc_sel_err_s_l,
+		            erc_erd_asisrc_sel_itlb_s_l;
+
+   input        erc_erd_errasi_sel_en_l,
+		            erc_erd_errasi_sel_stat_l,
+		            erc_erd_errasi_sel_inj_l,
+		            erc_erd_errasi_sel_addr_l;
+
+   input        erc_erd_miscasi_sel_ict_l,
+		            erc_erd_miscasi_sel_imask_l,
+		            erc_erd_miscasi_sel_other_l;
+
+   input [3:0]  erc_erd_asi_thr_l;   
+	 
+   input [3:0]  erc_erd_eadr0_sel_irf_l,
+		            erc_erd_eadr0_sel_itlb_l,
+		            erc_erd_eadr0_sel_frf_l,
+		            erc_erd_eadr0_sel_lsu_l;
+   
+   input [3:0]  erc_erd_eadr1_sel_pcd1_l,
+		            erc_erd_eadr1_sel_l1pa_l,
+		            erc_erd_eadr1_sel_l2pa_l,
+		            erc_erd_eadr1_sel_other_l;
+   
+   input [3:0]  erc_erd_eadr2_sel_mx1_l,
+		            erc_erd_eadr2_sel_wrt_l,
+		            erc_erd_eadr2_sel_mx0_l,
+		            erc_erd_eadr2_sel_old_l;
+
+   
+   output       so;
+   output [63:0] ifu_lsu_ldxa_data_w2;
+   output [38:0] erb_dtu_imask;
+//   output [9:0]  erb_ifq_paddr_s;
+   
+   output [1:0]  erd_erc_tlbt_pe_s1,
+		             erd_erc_tlbd_pe_s1;
+   output [3:0]  erd_erc_tagpe_s1;
+   output        erd_erc_nirpe_s1,
+		             erd_erc_fetpe_s1;
+
+   output [2:0]  erd_erc_tte_pgsz;
+
+
+//   
+// local signals   
+//
+
+   wire [47:4]   lsu_err_addr;
+   
+   wire [`IC_TAG_ALL_HI:0]  ictags_s1;
+   wire [3:0]               icv_data_s1;
+   wire [31:0]              tag_asi_data;
+
+   wire [47:4]              t0_eadr_mx0_out,
+		                        t1_eadr_mx0_out,
+		                        t2_eadr_mx0_out,
+		                        t3_eadr_mx0_out,
+ 		                        t0_eadr_mx1_out,
+		                        t1_eadr_mx1_out,
+		                        t2_eadr_mx1_out,
+		                        t3_eadr_mx1_out;
+   
+   wire [47:4]              t0_err_addr_nxt,
+		                        t0_err_addr,
+	 	                        t1_err_addr_nxt,
+		                        t1_err_addr,
+		                        t2_err_addr_nxt,
+		                        t2_err_addr,
+		                        t3_err_addr_nxt,
+		                        t3_err_addr;
+   
+   wire [47:4]              err_addr_asidata;
+	 
+   wire [63:0]              formatted_tte_data,
+		                        formatted_tte_tag,
+		                        tlb_asi_data,
+		                        misc_asi_data,
+		                        err_asi_data,
+                            ldxa_data_s,
+                            ldxa_data_d;
+   
+   wire [39:4]              paddr_s1,
+		                        paddr_d1;
+   
+   wire [39:4]              ifet_addr_f;
+   
+   wire [47:0]              pc_s1;
+   wire [47:4]              pc_d1;
+   wire [7:0]               irfaddr_w,
+                            irfsynd_w;
+   wire                     irfaddr_4_w;
+   wire [5:0]               itlb_asi_index;
+
+   wire [38:0]              imask_next;
+
+   wire                     clk;
+   
+   
+//
+// Code Begins Here
+//
+   assign                   clk = rclk;
+   
+//-------------
+// Tags
+//-------------   
+   dff_s #(`IC_TAG_ALL) tags_reg(.din (ict_itlb_tags_f),
+		                           .q   (ictags_s1),
+		                           .clk (clk),
+		                           .se  (se), .si(), .so());
+
+   dff_s #(4) vbits_reg(.din (icv_itlb_valid_f[3:0]),
+		                  .q   (icv_data_s1),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   // check parity
+   sparc_ifu_par32  tag_par0(.in  ({3'b0, ictags_s1[`IC_TAG_SZ:0]}),
+			                       .out (erd_erc_tagpe_s1[0]));
+   sparc_ifu_par32  tag_par1(.in  ({3'b0, ictags_s1[((2*`IC_TAG_SZ) + 1):(`IC_TAG_SZ+1)]}),
+			                       .out (erd_erc_tagpe_s1[1]));
+   sparc_ifu_par32  tag_par2(.in  ({3'b0, ictags_s1[((3*`IC_TAG_SZ) + 2):(2*(`IC_TAG_SZ)+2)]}),
+			                       .out (erd_erc_tagpe_s1[2]));
+   sparc_ifu_par32  tag_par3(.in  ({3'b0, ictags_s1[((4*`IC_TAG_SZ) + 3):(3*(`IC_TAG_SZ)+3)]}),
+			                       .out (erd_erc_tagpe_s1[3]));
+
+   dp_mux4ds #(32) asitag_mux(.dout (tag_asi_data[31:0]),
+			 .in0  ({icv_data_s1[0], 1'b0, ictags_s1[28], 1'b0, ictags_s1[27:0]}),
+			 .in1  ({icv_data_s1[1], 1'b0, ictags_s1[57], 1'b0, ictags_s1[56:29]}),
+			 .in2  ({icv_data_s1[2], 1'b0, ictags_s1[86], 1'b0, ictags_s1[85:58]}),
+			 .in3  ({icv_data_s1[3], 1'b0, ictags_s1[115], 1'b0, ictags_s1[114:87]}),
+			 .sel0_l (erc_erd_asiway_s1_l[0]),
+			 .sel1_l (erc_erd_asiway_s1_l[1]),
+			 .sel2_l (erc_erd_asiway_s1_l[2]),
+			 .sel3_l (erc_erd_asiway_s1_l[3]));
+
+//------------------
+// Data
+//------------------
+   // parity check on instruction
+   // This may have to be done in the next stage (at least partially)
+   
+   sparc_ifu_par34 nir_par(.in  (wsel_fdp_topdata_s1[33:0]),
+			                     .out (erd_erc_nirpe_s1));
+   sparc_ifu_par34 inst_par(.in  (wsel_fdp_fetdata_s1[33:0]),
+			                      .out (erd_erc_fetpe_s1));
+
+//----------------------------------------------------------------------
+// TLB read data
+//----------------------------------------------------------------------
+
+//`ifdef SPARC_HPV_EN
+   // don't include v(26) and u(24) bits in parity   
+   sparc_ifu_par32 tt_tag_par0(.in  ({itlb_rd_tte_tag[33:27],
+				                              itlb_rd_tte_tag[25],
+				                              itlb_rd_tte_tag[23:0]}),
+			                         .out (erd_erc_tlbt_pe_s1[0]));
+//`else
+//   // don't include v(28) and u(26) bits in parity
+//   sparc_ifu_par32 tt_tag_par0(.in  ({itlb_rd_tte_tag[33:29],
+//				                              itlb_rd_tte_tag[27],
+//				                              itlb_rd_tte_tag[25:0]}),
+//			                         .out (erd_erc_tlbt_pe_s1[0]));
+//`endif // !`ifdef SPARC_HPV_EN
+   
+   
+   sparc_ifu_par32 tt_tag_par1(.in  ({7'b0, itlb_rd_tte_tag[58:34]}),
+			                         .out (erd_erc_tlbt_pe_s1[1]));
+   
+   sparc_ifu_par32 tt_data_par0(.in  (itlb_rd_tte_data[31:0]),
+				                        .out (erd_erc_tlbd_pe_s1[0]));
+   sparc_ifu_par16 tt_data_par1(.in  ({5'b0, itlb_rd_tte_data[42:32]}),
+				                        .out (erd_erc_tlbd_pe_s1[1]));
+
+//   assign erd_erc_tte_lock_s1 = itlb_rd_tte_data[`STLB_DATA_L];
+
+   
+//`ifdef	SPARC_HPV_EN
+   assign erd_erc_tte_pgsz[2:0] = {itlb_rd_tte_data[`STLB_DATA_27_22_SEL],
+				                           itlb_rd_tte_data[`STLB_DATA_21_16_SEL],
+				                           itlb_rd_tte_data[`STLB_DATA_15_13_SEL]};
+
+   assign formatted_tte_tag[63:0] =
+          {
+//           `ifdef SUN4V_TAG_RD
+           // implement this!
+           itlb_rd_tte_tag[58:55],
+//           `else
+//         {4{itlb_rd_tte_tag[53]}},                                     // 4b
+//           `endif
+
+           itlb_rd_tte_tag[`STLB_TAG_PARITY],     // Parity                 1b
+           itlb_rd_tte_tag[`STLB_TAG_VA_27_22_V], // mxsel2 - b27:22 vld    1b
+           itlb_rd_tte_tag[`STLB_TAG_VA_21_16_V], // mxsel1 - b21:16 vld    1b
+           itlb_rd_tte_tag[`STLB_TAG_VA_15_13_V], // mxsel0 - b15:13 vld    1b
+
+           {8{itlb_rd_tte_tag[53]}},                                     // 8b
+           itlb_rd_tte_tag[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO], // 20b
+           itlb_rd_tte_tag[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO], // 6b
+           itlb_rd_tte_tag[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO], // 6b
+           itlb_rd_tte_tag[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO], // 3b
+           itlb_rd_tte_tag[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO]// 13b
+           } ;
+//`else
+//   assign erd_erc_tte_pgsz[2:0] = {itlb_rd_tte_data[`STLB_DATA_21_19_SEL],
+//				                           itlb_rd_tte_data[`STLB_DATA_18_16_SEL],
+//				                           itlb_rd_tte_data[`STLB_DATA_15_13_SEL]};
+//
+//   assign formatted_tte_tag[63:0] =
+//          {
+//           {16{itlb_rd_tte_tag[54]}},                                    // 16b
+//           itlb_rd_tte_tag[`STLB_TAG_VA_47_22_HI:`STLB_TAG_VA_47_22_LO], // 26b
+//           itlb_rd_tte_tag[`STLB_TAG_VA_21_20_HI:`STLB_TAG_VA_21_20_LO], // 3b
+//           itlb_rd_tte_tag[`STLB_TAG_VA_19],
+//           itlb_rd_tte_tag[`STLB_TAG_VA_18_17_HI:`STLB_TAG_VA_18_17_LO], // 3b
+//           itlb_rd_tte_tag[`STLB_TAG_VA_16],
+//           itlb_rd_tte_tag[`STLB_TAG_VA_15_14_HI:`STLB_TAG_VA_15_14_LO], // 3b
+//           itlb_rd_tte_tag[`STLB_TAG_VA_13],
+//           itlb_rd_tte_tag[`STLB_TAG_CTXT_12_7_HI:`STLB_TAG_CTXT_12_7_LO],//13b
+//           itlb_rd_tte_tag[`STLB_TAG_CTXT_6_0_HI:`STLB_TAG_CTXT_6_0_LO]
+//           } ;
+//`endif // !`ifdef SPARC_HPV_EN
+   
+
+//`ifdef	SPARC_HPV_EN
+   assign formatted_tte_data[63:0] =
+          {      
+           itlb_rd_tte_tag[`STLB_TAG_V],           // V    (1b)
+           erc_erd_pgsz_b1,                        // pg SZ msb 4m or 512k
+           erc_erd_pgsz_b0,                        // pg sz lsb 4m or 64k
+           itlb_rd_tte_data[`STLB_DATA_NFO],       // NFO  (1b)
+           itlb_rd_tte_data[`STLB_DATA_IE],        // IE   (1b)
+           10'b0,                                  // soft2 
+           itlb_rd_tte_data[`STLB_DATA_27_22_SEL], // pgsz b2
+           itlb_rd_tte_tag[`STLB_TAG_U],
+
+           itlb_rd_tte_data[`STLB_DATA_PARITY],      // Parity   (1b)
+           itlb_rd_tte_data[`STLB_DATA_27_22_SEL],   // mxsel2_l (1b)
+           itlb_rd_tte_data[`STLB_DATA_21_16_SEL],   // mxsel1_l (1b)
+           itlb_rd_tte_data[`STLB_DATA_15_13_SEL],   // mxsel0_l (1b)
+  
+           2'b0,                                   // unused diag 2b
+           1'b0,                                   // ?? PA   (28b)
+           itlb_rd_tte_data[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+           itlb_rd_tte_data[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+           itlb_rd_tte_data[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+           itlb_rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO],
+           6'b0,                                   // ?? 12-7 (6b)
+           itlb_rd_tte_data[`STLB_DATA_L],         // L    (1b)
+           itlb_rd_tte_data[`STLB_DATA_CP],        // CP   (1b)
+           itlb_rd_tte_data[`STLB_DATA_CV],        // CV   (1b)
+           itlb_rd_tte_data[`STLB_DATA_E],         // E    (1b)
+           itlb_rd_tte_data[`STLB_DATA_P],         // P    (1b)
+           itlb_rd_tte_data[`STLB_DATA_W],         // W    (1b)
+	         1'b0
+        } ;
+//`else // !`ifdef SPARC_HPV_EN
+//
+//   assign formatted_tte_data[63:0] =
+//          {      
+//           itlb_rd_tte_tag[`STLB_TAG_V],           // V    (1b)
+//           erc_erd_pgsz_b1,                        // pg SZ msb 4m or 512k
+//           erc_erd_pgsz_b0,                        // pg sz lsb 4m or 64k
+//           itlb_rd_tte_data[`STLB_DATA_NFO],       // NFO  (1b)
+//           itlb_rd_tte_data[`STLB_DATA_IE],        // IE   (1b)
+//           9'b0,                                   // soft2 58-42 (17b)
+//           8'b0,                                   // diag 8b
+//	         itlb_rd_tte_tag[`STLB_TAG_U],           // U    (1b)
+//           1'b0,                                   // ?? PA   (28b)
+//           itlb_rd_tte_data[`STLB_DATA_PA_39_22_HI:`STLB_DATA_PA_39_22_LO],
+//           itlb_rd_tte_data[`STLB_DATA_PA_21_19_HI:`STLB_DATA_PA_21_19_LO],
+//           itlb_rd_tte_data[`STLB_DATA_PA_18_16_HI:`STLB_DATA_PA_18_16_LO],
+//           itlb_rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO],
+//           6'b0,                                   // ?? 12-7 (6b)
+//           itlb_rd_tte_data[`STLB_DATA_L],         // L    (1b)
+//           itlb_rd_tte_data[`STLB_DATA_CP],        // CP   (1b)
+//           itlb_rd_tte_data[`STLB_DATA_CV],        // CV   (1b)
+//           itlb_rd_tte_data[`STLB_DATA_E],         // E    (1b)
+//           itlb_rd_tte_data[`STLB_DATA_P],         // P    (1b)
+//           itlb_rd_tte_data[`STLB_DATA_W],         // W    (1b)
+//           itlb_rd_tte_data[`STLB_DATA_G]          // G    (1b)
+//        } ;
+//`endif // !`ifdef SPARC_HPV_EN
+   
+   
+
+   // mux in all asi values
+   dp_mux2es #(64) itlbrd_mux(.dout (tlb_asi_data[63:0]),
+			    .in0  (formatted_tte_tag[63:0]),
+			    .in1  (formatted_tte_data[63:0]),
+			    .sel  (fcl_erb_itlbrd_data_s));
+
+   dp_mux4ds #(64) err_mux(.dout (err_asi_data[63:0]),
+			 .in0  ({62'b0, erc_erd_erren_asidata}),
+			 .in1  ({32'b0, erc_erd_errstat_asidata, 9'b0}),
+			 .in2  ({32'b0, erc_erd_errinj_asidata}),
+			 .in3  ({16'b0, err_addr_asidata, 4'b0}),
+			 .sel0_l (erc_erd_errasi_sel_en_l),
+			 .sel1_l (erc_erd_errasi_sel_stat_l),
+			 .sel2_l (erc_erd_errasi_sel_inj_l),
+			 .sel3_l (erc_erd_errasi_sel_addr_l));
+
+   dp_mux3ds #(64) misc_asi_mux(.dout (misc_asi_data[63:0]),
+			      .in0  ({29'b0, 
+				            tag_asi_data[31:28], 
+				            3'b0, 
+				            tag_asi_data[27:0]}),
+			      .in1  ({25'b0, erb_dtu_imask}),
+			      .in2  (64'b0),
+			      .sel0_l (erc_erd_miscasi_sel_ict_l),
+			      .sel1_l (erc_erd_miscasi_sel_imask_l),
+			      .sel2_l (erc_erd_miscasi_sel_other_l));
+
+   // Final asi data
+   // May need to add a flop to this mux output before sending it to the LSU
+   dp_mux4ds #(64) final_asi_mux(.dout (ldxa_data_s),
+			       .in0  (tlb_asi_data[63:0]),
+			       .in1  (err_asi_data),
+			       .in2  (misc_asi_data),
+			       .in3  ({30'b0,
+				             wsel_erb_asidata_s[0],
+				             wsel_erb_asidata_s[33:1]}), 
+			       .sel0_l (erc_erd_asisrc_sel_itlb_s_l),
+			       .sel1_l (erc_erd_asisrc_sel_err_s_l),
+			       .sel2_l (erc_erd_asisrc_sel_misc_s_l),
+			       .sel3_l (erc_erd_asisrc_sel_icd_s_l));
+
+   dff_s #(64) ldxa_reg(.din (ldxa_data_s),
+                      .q   (ldxa_data_d),
+                      .clk (clk), .se(se), .si(), .so());
+   assign ifu_lsu_ldxa_data_w2 = ldxa_data_d;
+
+				   
+//----------------------------------------
+// Error Address
+//----------------------------------------   
+
+   assign ifet_addr_f = {ifq_erb_wrtag_f[`IC_TAG_SZ-1:0], 
+                         ifq_erb_wrindex_f[`IC_IDX_HI:4]};
+
+   // pc of latest access
+   dff_s #(48) pcs1_reg(.din (fdp_erb_pc_f[47:0]),
+		                  .q   (pc_s1[47:0]),
+		                  .clk (clk), .se(se), .si(), .so());
+   
+   // Physical address
+   assign paddr_s1[39:10] = itlb_ifq_paddr_s[39:10];
+   assign paddr_s1[9:4]   = pc_s1[9:4];
+   dff_s #(36) padd_reg(.din (paddr_s1[39:4]),
+		                  .q   (paddr_d1[39:4]),
+		                  .clk (clk), .se(se), .si(), .so());
+
+//   assign erb_ifq_paddr_s[9:0] = pc_s1[9:0];
+
+   // stage PC one more cycle
+   dff_s #(44) pcd1_reg(.din (pc_s1[47:4]),
+		                  .q   (pc_d1[47:4]),
+		                  .clk (clk), .se(se), .si(), .so());
+
+   // IRF address
+   dff_s #(16) irf_reg(.din ({exu_ifu_err_reg_m[7:0],
+                            exu_ifu_err_synd_m[7:0]}),
+		                 .q   ({irfaddr_w[7:5], 
+                            irfaddr_4_w,
+                            irfaddr_w[3:0],
+                            irfsynd_w[7:0]}),
+		                 .clk (clk), .se(se), .si(), .so());
+
+   // fix for bug 5594
+   // nand2 + xnor
+   assign irfaddr_w[4] = irfaddr_4_w ^ (irfaddr_w[5] & irfaddr_w[3]);
+
+   // itlb asi address
+   dff_s #(6) itlbidx_reg(.din (tlu_itlb_rw_index_g),
+                        .q   (itlb_asi_index),
+                        .clk (clk), .se(se), .si(), .so());
+
+
+   // lsu error address
+   dff_s #(44) lsadr_reg(.din (lsu_ifu_err_addr),
+                       .q   (lsu_err_addr),
+                       .clk (clk), .se(se), .si(), .so());
+
+	  
+   // mux in the different error addresses
+   // thread 0
+   dp_mux4ds #(44) t0_eadr_mx0(.dout  (t0_eadr_mx0_out),
+			     .in0   ({24'b0, irfsynd_w[7:0], 4'b0, irfaddr_w[7:0]}),
+			     .in1   ({38'b0, itlb_asi_index}),
+			     .in2   ({17'b0, ffu_ifu_err_synd_w2[13:7], 
+                    1'b0, ffu_ifu_err_synd_w2[6:0], 
+                    6'b0, ffu_ifu_err_reg_w2[5:0]}),
+			     .in3   (lsu_err_addr),
+			     .sel0_l (erc_erd_eadr0_sel_irf_l[0]),
+			     .sel1_l (erc_erd_eadr0_sel_itlb_l[0]),
+			     .sel2_l (erc_erd_eadr0_sel_frf_l[0]),
+			     .sel3_l (erc_erd_eadr0_sel_lsu_l[0]));
+
+   dp_mux4ds #(44) t0_eadr_mx1(.dout  (t0_eadr_mx1_out),
+			     .in0   (pc_d1[47:4]),
+			     .in1   ({8'b0, paddr_d1[39:4]}),
+			     .in2   ({8'b0, ifet_addr_f}),
+			     .in3   ({8'b0, spu_ifu_err_addr_w2[39:4]}),
+			     .sel0_l (erc_erd_eadr1_sel_pcd1_l[0]),
+			     .sel1_l (erc_erd_eadr1_sel_l1pa_l[0]),
+			     .sel2_l (erc_erd_eadr1_sel_l2pa_l[0]),
+			     .sel3_l (erc_erd_eadr1_sel_other_l[0]));
+
+   dp_mux4ds #(44) t0_eadr_mx2(.dout  (t0_err_addr_nxt),
+			     .in0   (t0_eadr_mx0_out),
+			     .in1   (t0_eadr_mx1_out),
+			     .in2   (ifq_erb_asidata_i2[47:4]),
+			     .in3   (t0_err_addr),
+			     .sel0_l (erc_erd_eadr2_sel_mx0_l[0]),
+			     .sel1_l (erc_erd_eadr2_sel_mx1_l[0]),
+			     .sel2_l (erc_erd_eadr2_sel_wrt_l[0]),
+			     .sel3_l (erc_erd_eadr2_sel_old_l[0]));
+
+   dff_s #(44) t0_eadr_reg(.din (t0_err_addr_nxt),
+		       .q   (t0_err_addr),
+		       .clk (clk), .se(se), .si(), .so());
+
+`ifdef FPGA_SYN_1THREAD
+	assign err_addr_asidata = t0_err_addr;
+`else
+   // thread 1
+   dp_mux4ds #(44) t1_eadr_mx0(.dout  (t1_eadr_mx0_out),
+			     .in0   ({24'b0, irfsynd_w[7:0], 4'b0, irfaddr_w[7:0]}),
+			     .in1   ({38'b0, itlb_asi_index}),
+			     .in2   ({17'b0, ffu_ifu_err_synd_w2[13:7], 
+                    1'b0, ffu_ifu_err_synd_w2[6:0], 
+                    6'b0, ffu_ifu_err_reg_w2[5:0]}),
+			     .in3   (lsu_err_addr),
+			     .sel0_l (erc_erd_eadr0_sel_irf_l[1]),
+			     .sel1_l (erc_erd_eadr0_sel_itlb_l[1]),
+			     .sel2_l (erc_erd_eadr0_sel_frf_l[1]),
+			     .sel3_l (erc_erd_eadr0_sel_lsu_l[1]));
+
+   dp_mux4ds #(44) t1_eadr_mx1(.dout  (t1_eadr_mx1_out),
+			     .in0   (pc_d1[47:4]),
+			     .in1   ({8'b0, paddr_d1[39:4]}),
+			     .in2   ({8'b0, ifet_addr_f}),
+			     .in3   ({8'b0, spu_ifu_err_addr_w2[39:4]}),
+//			     .in3   ({44'b0}),
+			     .sel0_l (erc_erd_eadr1_sel_pcd1_l[1]),
+			     .sel1_l (erc_erd_eadr1_sel_l1pa_l[1]),
+			     .sel2_l (erc_erd_eadr1_sel_l2pa_l[1]),
+			     .sel3_l (erc_erd_eadr1_sel_other_l[1]));
+
+   dp_mux4ds #(44) t1_eadr_mx2(.dout  (t1_err_addr_nxt),
+			     .in0   (t1_eadr_mx0_out),
+			     .in1   (t1_eadr_mx1_out),
+			     .in2   (ifq_erb_asidata_i2[47:4]),
+			     .in3   (t1_err_addr),
+			     .sel0_l (erc_erd_eadr2_sel_mx0_l[1]),
+			     .sel1_l (erc_erd_eadr2_sel_mx1_l[1]),
+			     .sel2_l (erc_erd_eadr2_sel_wrt_l[1]),
+			     .sel3_l (erc_erd_eadr2_sel_old_l[1]));
+
+   dff_s #(44) t1_eadr_reg(.din (t1_err_addr_nxt),
+		       .q   (t1_err_addr),
+		       .clk (clk), .se(se), .si(), .so());
+
+   // thread 2
+   dp_mux4ds #(44) t2_eadr_mx0(.dout  (t2_eadr_mx0_out),
+			     .in0   ({24'b0, irfsynd_w[7:0], 4'b0, irfaddr_w[7:0]}),
+			     .in1   ({38'b0, itlb_asi_index}),
+			     .in2   ({17'b0, ffu_ifu_err_synd_w2[13:7], 
+                    1'b0, ffu_ifu_err_synd_w2[6:0], 
+                    6'b0, ffu_ifu_err_reg_w2[5:0]}),
+			     .in3   (lsu_err_addr),
+			     .sel0_l (erc_erd_eadr0_sel_irf_l[2]),
+			     .sel1_l (erc_erd_eadr0_sel_itlb_l[2]),
+			     .sel2_l (erc_erd_eadr0_sel_frf_l[2]),
+			     .sel3_l (erc_erd_eadr0_sel_lsu_l[2]));
+
+   dp_mux4ds #(44) t2_eadr_mx1(.dout  (t2_eadr_mx1_out),
+			     .in0   (pc_d1[47:4]),
+			     .in1   ({8'b0, paddr_d1[39:4]}),
+			     .in2   ({8'b0, ifet_addr_f}),
+			     .in3   ({8'b0, spu_ifu_err_addr_w2[39:4]}),
+//			     .in3   ({44'b0}),
+			     .sel0_l (erc_erd_eadr1_sel_pcd1_l[2]),
+			     .sel1_l (erc_erd_eadr1_sel_l1pa_l[2]),
+			     .sel2_l (erc_erd_eadr1_sel_l2pa_l[2]),
+			     .sel3_l (erc_erd_eadr1_sel_other_l[2]));
+
+   dp_mux4ds #(44) t2_eadr_mx2(.dout  (t2_err_addr_nxt),
+			     .in0   (t2_eadr_mx0_out),
+			     .in1   (t2_eadr_mx1_out),
+			     .in2   (ifq_erb_asidata_i2[47:4]),
+			     .in3   (t2_err_addr),
+			     .sel0_l (erc_erd_eadr2_sel_mx0_l[2]),
+			     .sel1_l (erc_erd_eadr2_sel_mx1_l[2]),
+			     .sel2_l (erc_erd_eadr2_sel_wrt_l[2]),
+			     .sel3_l (erc_erd_eadr2_sel_old_l[2]));
+
+   dff_s #(44) t2_eadr_reg(.din (t2_err_addr_nxt),
+		       .q   (t2_err_addr),
+		       .clk (clk), .se(se), .si(), .so());
+
+   // thread 3
+   dp_mux4ds #(44) t3_eadr_mx0(.dout  (t3_eadr_mx0_out),
+			     .in0   ({24'b0, irfsynd_w[7:0], 4'b0, irfaddr_w[7:0]}),
+			     .in1   ({38'b0, itlb_asi_index}),
+			     .in2   ({17'b0, ffu_ifu_err_synd_w2[13:7], 
+                    1'b0, ffu_ifu_err_synd_w2[6:0], 
+                    6'b0, ffu_ifu_err_reg_w2[5:0]}),
+			     .in3   (lsu_err_addr),
+			     .sel0_l (erc_erd_eadr0_sel_irf_l[3]),
+			     .sel1_l (erc_erd_eadr0_sel_itlb_l[3]),
+			     .sel2_l (erc_erd_eadr0_sel_frf_l[3]),
+			     .sel3_l (erc_erd_eadr0_sel_lsu_l[3]));
+
+   dp_mux4ds #(44) t3_eadr_mx1(.dout  (t3_eadr_mx1_out),
+			     .in0   (pc_d1[47:4]),
+			     .in1   ({8'b0, paddr_d1[39:4]}),
+			     .in2   ({8'b0, ifet_addr_f}),
+			     .in3   ({8'b0, spu_ifu_err_addr_w2[39:4]}),
+//			     .in3   ({44'b0}),
+			     .sel0_l (erc_erd_eadr1_sel_pcd1_l[3]),
+			     .sel1_l (erc_erd_eadr1_sel_l1pa_l[3]),
+			     .sel2_l (erc_erd_eadr1_sel_l2pa_l[3]),
+			     .sel3_l (erc_erd_eadr1_sel_other_l[3]));
+
+   dp_mux4ds #(44) t3_eadr_mx2(.dout  (t3_err_addr_nxt),
+			     .in0   (t3_eadr_mx0_out),
+			     .in1   (t3_eadr_mx1_out),
+			     .in2   (ifq_erb_asidata_i2[47:4]),
+			     .in3   (t3_err_addr),
+			     .sel0_l (erc_erd_eadr2_sel_mx0_l[3]),
+			     .sel1_l (erc_erd_eadr2_sel_mx1_l[3]),
+			     .sel2_l (erc_erd_eadr2_sel_wrt_l[3]),
+			     .sel3_l (erc_erd_eadr2_sel_old_l[3]));
+
+   dff_s #(44) t3_eadr_reg(.din (t3_err_addr_nxt),
+		       .q   (t3_err_addr),
+		       .clk (clk), .se(se), .si(), .so());
+
+
+   // asi read
+   dp_mux4ds #(44) asi_eadr_mx(.dout (err_addr_asidata),
+			     .in0  (t0_err_addr),
+			     .in1  (t1_err_addr),
+			     .in2  (t2_err_addr),
+			     .in3  (t3_err_addr),
+			     .sel0_l (erc_erd_asi_thr_l[0]),
+			     .sel1_l (erc_erd_asi_thr_l[1]),
+			     .sel2_l (erc_erd_asi_thr_l[2]),
+			     .sel3_l (erc_erd_asi_thr_l[3]));
+`endif
+   
+   // Instruction Mask
+   dp_mux2es #(39) imask_en_mux(.dout (imask_next),
+			      .in0  (erb_dtu_imask),
+			      .in1  (ifq_erb_asidata_i2[38:0]),
+			      .sel  (erc_erd_ld_imask));
+
+   // need to reset top 7 bits only
+   dffr_s #(39) imask_reg(.din (imask_next),
+		      .q   (erb_dtu_imask),
+		      .rst (erb_reset),
+		      .clk (clk), .se(se), .si(), .so());
+
+   sink #(4) s0(.in (pc_s1[3:0]));
+   
+endmodule // sparc_ifu_erb
+
Index: /trunk/T1-CPU/ifu/sparc_ifu_ctr5.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_ctr5.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_ctr5.v	(revision 6)
@@ -0,0 +1,64 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_ctr5.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_ifu_ctr5
+//  Description:	
+//  5 bit counter for starvation detect
+*/
+
+module sparc_ifu_ctr5(/*AUTOARG*/
+   // Outputs
+   limit, so, 
+   // Inputs
+   clk, se, si, rst_ctr_l
+   );
+
+   input     clk;
+   input     se, si;
+
+   input     rst_ctr_l;
+
+   output    limit;
+   output    so;
+   
+   wire [4:0] count,
+	      count_nxt,
+	      sum;
+   
+   assign   sum[0] = ~count[0];
+   assign   sum[1] = count[1] ^ count[0];
+   assign   sum[2] = count[2] ^ (count[1] & count[0]);
+   assign   sum[3] = count[3] ^ (count[2] & count[1] & count[0]);
+   assign   sum[4] = count[4] ^ (count[3] & count[2] & count[1] & count[0]);
+   assign   count_nxt = sum & {5{rst_ctr_l}};
+
+   dff_s #(5) cnt_reg(.din (count_nxt),
+		                .q   (count),
+		                .clk (clk),
+		                .se  (se), .si(), .so());
+
+   // limit set to 24 for now
+   assign   limit = count[4] & count[3];
+   
+   
+endmodule
+   
Index: /trunk/T1-CPU/ifu/sparc_ifu_mbist.v
===================================================================
--- /trunk/T1-CPU/ifu/sparc_ifu_mbist.v	(revision 6)
+++ /trunk/T1-CPU/ifu/sparc_ifu_mbist.v	(revision 6)
@@ -0,0 +1,622 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_ifu_mbist.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//    Description:        Memory BIST Controller for the L1 ICache and DCache
+//    Block Type:         Control Block
+//    Module:             mbist_engine
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module sparc_ifu_mbist(
+  mbist_dcache_read,
+  mbist_dcache_write,
+  mbist_dcache_word,
+  mbist_dcache_index,
+  mbist_dcache_way,
+  mbist_icache_read,
+  mbist_icache_write,
+  mbist_icache_index,
+  mbist_icache_word,
+  mbist_icache_way,
+  mbist_icache_wdata,
+  mbist_dcache_wdata,
+  mbist_done,
+  mbist_dcache_fail,
+  mbist_icache_fail,
+  rclk,
+  mbist_start,
+  mbist_ifq_run_bist,
+  mbist_userdata_mode,
+  mbist_bisi_mode,
+  mbist_loop_mode,
+  mbist_loop_on_address,
+  mbist_stop_on_fail,
+  mbist_stop_on_next_fail,
+  mbist_dcache_data_in,
+  mbist_icache_data_in,
+  grst_l,
+  arst_l,
+  mbist_si,
+  mbist_so,
+  mbist_se
+);
+
+
+
+
+// /////////////////////////////////////////////////////////////////////////////
+// Outputs
+// /////////////////////////////////////////////////////////////////////////////
+
+   output             mbist_dcache_read;
+   output             mbist_dcache_write;
+   output             mbist_dcache_word;
+   output[6:0]        mbist_dcache_index;
+   output[1:0]        mbist_dcache_way;
+
+   output             mbist_icache_read;
+   output             mbist_icache_write;
+   output[7:0]        mbist_icache_index;
+   output             mbist_icache_word;
+   output[1:0]        mbist_icache_way;
+   output             mbist_ifq_run_bist;
+
+   output[7:0]        mbist_icache_wdata;
+   output[7:0]        mbist_dcache_wdata;
+
+   output             mbist_done;
+   output             mbist_dcache_fail;
+   output             mbist_icache_fail;
+
+   output             mbist_so;
+ 
+
+
+// /////////////////////////////////////////////////////////////////////////////
+// Inputs
+// /////////////////////////////////////////////////////////////////////////////
+
+   input              rclk;
+   input              mbist_si;
+   input              mbist_se;
+
+   input              grst_l;
+   input              arst_l;
+
+   input              mbist_start;
+   input              mbist_userdata_mode;
+   input              mbist_bisi_mode;
+   input              mbist_loop_mode;
+   input              mbist_loop_on_address;
+   input              mbist_stop_on_fail;
+   input              mbist_stop_on_next_fail;
+
+   input[71:0]        mbist_dcache_data_in;
+   input[67:0]        mbist_icache_data_in;
+
+
+// /////////////////////////////////////////////////////////////////////////////
+// Wires
+// /////////////////////////////////////////////////////////////////////////////
+
+  wire [7:0] config_in; 
+  wire [7:0] config_out;        
+  wire start_transition;        
+  wire reset_engine;    
+  wire loop;    
+  wire run;     
+  wire bisi;    
+  wire userdata_mode;   
+  wire stop_on_fail;    
+  wire stop_on_next_fail;       
+  wire loop_on_address;
+  wire [7:0] userdata_in;       
+  wire [7:0] userdata_out;      
+  wire [6:0] useradd_in;        
+  wire [6:0] useradd_out;       
+  wire [20:0] control_in;       
+  wire [20:0] control_out;      
+  wire msb;     
+  wire array_sel;       
+  wire [1:0] data_control;      
+  wire address_mix;     
+  wire [2:0] march_element;     
+  wire [9:0] array_address;    
+  wire dcache_sel;      
+  wire [1:0] read_write_control;        
+  wire [20:0] qual_control_out; 
+  wire four_cycle_march;        
+  wire [9:0] add;      
+  wire upaddress_march; 
+  wire [10:0] mbist_address;    
+  wire array_write;     
+  wire array_read;      
+  wire initialize;      
+  wire fail;    
+  wire true_data;       
+  wire [7:0] data_pattern;      
+  wire second_time_through;     
+  wire icache_sel;      
+  wire dc_read_pipe_out1;       
+  wire dc_read_pipe_out2;       
+  wire dcache_piped_read;       
+  wire ic_read_pipe_out1;       
+  wire ic_read_pipe_out2;       
+  wire icache_piped_read;       
+  wire [7:0] data_pipe_out1;    
+  wire [7:0] data_pipe_out2;    
+  wire [10:0] add_pipe_out1;    
+  wire [10:0] add_pipe_out2;    
+  wire [9:0] dcache_piped_address;      
+  wire [10:0] icache_piped_address;     
+  wire [1:0] fail_reg_in;       
+  wire [1:0] fail_reg_out;      
+  wire qual_dcache_fail;        
+  wire qual_icache_fail;        
+  wire beyond_last_fail;
+  wire dcache_fail;     
+  wire icache_fail;     
+  wire mismatch, mbist_word_sel;        
+  wire [71:0] expect_data;      
+  wire [71:0] compare_data;     
+  wire qual_fail, dcache_data_sel;       
+  wire [10:0] fail_add_reg_in;  
+  wire [10:0] fail_add_reg_out; 
+  wire [71:0] fail_data_reg_in; 
+  wire [71:0] fail_data_reg_out;        
+  wire [20:0] fail_control_reg_in;
+  wire [20:0] fail_control_reg_out;     
+  wire mbist_icache_read_bf, mbist_icache_write_bf;
+  wire [71:0] compare_data_bf;
+  wire msb_rst, msb_d1_rst, msb_d2_rst, msb_d3_rst, mbist_done_int;
+  wire msb_d1, msb_d2, msb_d3, msb_d4, mbist_reset_l, mbist_reset;
+
+
+//// reset buffer ////
+
+   dffrl_async rstff(.din (grst_l),
+                     .q   (mbist_reset_l),
+                     .clk (rclk), .se(mbist_se), .si(), .so(),
+                     .rst_l (arst_l));
+
+   assign       mbist_reset = ~mbist_reset_l;
+
+// /////////////////////////////////////////////////////////////////////////////
+//
+// MBIST Config Register
+//
+// /////////////////////////////////////////////////////////////////////////////
+//
+// A low to high transition on mbist_start will reset and start the engine.  
+// mbist_start must remain active high for the duration of MBIST.  
+// If mbist_start deasserts the engine will stop but not reset.
+// Once MBIST has completed mbist_done will assert and the fail status
+// signals will be valid.  
+// To run MBIST again the mbist_start signal must transition low then high.
+//
+// Loop on Address will disable the address mix function.
+//
+// /////////////////////////////////////////////////////////////////////////////
+
+
+
+  dff_s #(8) config_reg (
+               .clk      ( rclk                  ),
+               .din      ( config_in[7:0]       ),
+               .q        ( config_out[7:0]      ), .se(mbist_se), .si(), .so());
+
+
+
+  assign config_in[0]        =    mbist_start;
+  assign config_in[1]        =    config_out[0];
+  assign start_transition    =    config_out[0]      &  ~config_out[1];
+  assign reset_engine        =    mbist_reset | start_transition   |  ((loop | loop_on_address)  &  mbist_done);
+  assign run                 =    config_out[1]      &  ~mbist_done_int;
+  assign mbist_ifq_run_bist  =    run;
+
+  assign config_in[2]        =    start_transition   ?   mbist_bisi_mode:      config_out[2];
+  assign bisi                =    config_out[2];
+
+  assign config_in[3]        =    start_transition   ?   mbist_userdata_mode:  config_out[3];
+  assign userdata_mode       =    config_out[3];
+
+  assign config_in[4]        =    start_transition   ?   mbist_loop_mode:  config_out[4];
+  assign loop                =    config_out[4];
+
+  assign config_in[5]        =    start_transition   ?   mbist_stop_on_fail:  config_out[5];
+  assign stop_on_fail        =    config_out[5];
+
+  assign config_in[6]        =    start_transition   ?   mbist_stop_on_next_fail:  config_out[6];
+  assign stop_on_next_fail   =    config_out[6];
+
+  assign config_in[7]        =    start_transition   ?   mbist_loop_on_address:  config_out[7];
+  assign loop_on_address     =    config_out[7];
+
+
+  dff_s #(8) userdata_reg (
+                 .clk      ( rclk                    ),
+                 .din      ( userdata_in[7:0]       ),
+                 .q        ( userdata_out[7:0]      ), .se(mbist_se), .si(), .so());
+
+
+  assign userdata_in[7:0]    =    userdata_out[7:0];
+
+
+
+
+  dff_s #(7) user_address_reg (
+                 .clk      ( rclk                   ),
+                 .din      ( useradd_in[6:0]       ),
+                 .q        ( useradd_out[6:0]      ), .se(mbist_se), .si(), .so());
+
+  assign useradd_in[6:0]    =    useradd_out[6:0];
+
+
+// /////////////////////////////////////////////////////////////////////////////
+//
+// MBIST Control Register
+//
+// /////////////////////////////////////////////////////////////////////////////
+// Remove Address mix disable before delivery
+// /////////////////////////////////////////////////////////////////////////////
+
+
+   dff_s #(21) control_reg  (
+                      .clk   ( rclk                        ),
+                      .din   ( control_in[20:0]           ),
+                      .q     ( control_out[20:0]          ), .se(mbist_se), .si(), .so());
+
+  assign   msb                       =     control_out[20];
+  assign   array_sel                 =     control_out[19];
+  assign   data_control[1:0]         =     userdata_mode ? 2'b11 : control_out[18:17];
+  assign   address_mix               =     loop_on_address    ?   1'b1:   control_out[16];
+  assign   mbist_word_sel            =     loop_on_address ? 1'b1 : control_out[15]; 
+  assign   march_element[2:0]        =     control_out[14:12];
+  assign   array_address[9:0]       =      loop_on_address    ?   {6'h3f, control_out[5:2]}:
+                                           (dcache_sel & ~bisi)     ?   {1'd1, control_out[10:2]}: control_out[11:2];
+  assign   read_write_control[1:0]   =     control_out[1:0];
+
+  assign   qual_control_out[20:0]    =     {msb, array_sel, data_control[1:0], address_mix, mbist_word_sel, march_element[2:0], array_address[9:0], read_write_control[1:0]};
+
+// added by Chandra
+
+  wire [1:0] add_data_int;
+  wire [20:0] add_data;
+  wire [9:0] mbist_address_bf;
+
+  assign add_data_int[1:0] = four_cycle_march ? 2'b01: 2'b10;
+  assign add_data[20:0] = qual_control_out[20:0] + {19'd0, add_data_int};
+  assign control_in[20:0] = {21{~run & ~reset_engine}} & qual_control_out | {21{run & ~reset_engine}} & add_data;
+
+  assign   add[9:0]                  =     upaddress_march     ?   array_address[9:0]:    ~array_address[9:0];
+  assign   mbist_address_bf[9:0]     =     loop_on_address     ?  {useradd_out[5:0], add[3:0]}: 
+                                           address_mix         ?  (dcache_sel ? ({add[9:8], add[0], add[7:1]}) : ({add[9:8], add[6:0], add[7]})) :
+                                                                    add[9:0];
+
+
+
+  assign   array_write               =    ~run                 ?    1'b0:
+                                           four_cycle_march    ?  (read_write_control[0] ^ read_write_control[1]):  read_write_control[1];
+  assign   array_read                =    ~array_write        &&  run  &&  ~initialize;
+
+  assign   mbist_done_int            =    (stop_on_fail  &&  fail)      ||  (stop_on_next_fail  &&  fail)         ||
+                                          (bisi  &&  march_element[0])  ||  msb;
+
+  assign   mbist_done 		     =    (stop_on_fail  &&  fail)      ||  (stop_on_next_fail  &&  fail)         ||
+                                          (bisi  &&  march_element[0])  ||  msb_d4;
+
+////////////
+////////////
+
+  wire [7:0] mbist_write_data_bf;
+
+  assign   mbist_write_data_bf[7:0]     =     true_data           ?   data_pattern[7:0]:      ~data_pattern[7:0];
+  assign   mbist_dcache_wdata[7:0]      =     mbist_write_data_bf[7:0];
+
+  assign   second_time_through       =    ~loop_on_address    &&   address_mix;
+  assign   initialize                =    (march_element[2:0] == 3'b000)  &&  ~second_time_through;
+  assign   four_cycle_march          =    (march_element[2:0] == 3'h6)    ||  (march_element[2:0] == 3'h7);
+  assign   upaddress_march           =    (march_element[2:0] == 3'h0)    ||  (march_element[2:0] == 3'h1) ||
+                                          (march_element[2:0] == 3'h2)    ||  (march_element[2:0] == 3'h6);
+
+  assign   true_data                 =     read_write_control[1] ^ ~march_element[0];
+  assign   data_pattern[7:0]         =     userdata_mode                ?    userdata_out[7:0]:
+                                           bisi                         ?    8'hFF:                    // true_data function will invert to 8'h00
+                                          (data_control[1:0] == 2'h0)   ?    8'hAA:
+                                          (data_control[1:0] == 2'h1)   ?    8'h99:
+                                          (data_control[1:0] == 2'h2)   ?    8'hCC:
+                                                                             8'h00;
+  assign   dcache_sel                =    ~array_sel;
+  assign   icache_sel                =     array_sel;
+
+////////////
+////////////
+
+  assign   mbist_dcache_index[6:0]     =     mbist_address[6:0];
+  assign   mbist_dcache_way[1:0]       =     (mbist_address[8:7] & {2{config_out[0]}});
+  assign   mbist_dcache_word           =     mbist_address[10];
+  assign   mbist_dcache_read           =     dcache_sel  &&  array_read;
+  assign   mbist_dcache_write          =    (dcache_sel  ||  bisi) &&  array_write;
+
+  assign   mbist_icache_index[7:0]     =     mbist_address[7:0];
+  assign   mbist_icache_way[1:0]       =     (mbist_address[9:8] & {2{config_out[0]}});
+  assign   mbist_icache_word           =     mbist_address[10];
+  assign   mbist_icache_read_bf        =     icache_sel  &&  array_read;
+  assign   mbist_icache_write_bf       =    (icache_sel  ||  bisi)  &&  array_write;
+
+////////////////////////
+////////////////////////
+
+  assign msb_rst = msb & ~reset_engine;
+  dff_s #(1) msb_d1_inst(
+                   .clk      ( rclk                   ),
+                   .din      ( msb_rst ),
+                   .q        ( msb_d1 ), .se(mbist_se), .si(), .so());
+  assign msb_d1_rst = msb_d1 & ~reset_engine;
+  dff_s #(1) msb_d2_inst(
+                   .clk      ( rclk                   ),
+                   .din      ( msb_d1_rst ),
+                   .q        ( msb_d2 ), .se(mbist_se), .si(), .so());
+  assign msb_d2_rst = msb_d2 & ~reset_engine;
+  dff_s #(1) msb_d3_inst(
+                   .clk      ( rclk                   ),
+                   .din      ( msb_d2_rst ),
+                   .q        ( msb_d3 ), .se(mbist_se), .si(), .so());
+  assign msb_d3_rst = msb_d3 & ~reset_engine;
+  dff_s #(1) msb_d4_inst(
+                   .clk      ( rclk                   ),
+                   .din      ( msb_d3_rst ),
+                   .q        ( msb_d4 ), .se(mbist_se), .si(), .so());
+
+// /////////////////////////////////////////////////////////////////////////////
+// Pipeline for Read, Data, and Address
+// /////////////////////////////////////////////////////////////////////////////
+
+  wire dc_read_pipe_out3, dc_read_pipe_out4, ic_read_pipe_out3, ic_read_pipe_out4;
+  wire dc_read_pipe_out1_bf, dc_read_pipe_out2_bf, dc_read_pipe_out3_bf;
+  wire ic_read_pipe_out1_bf, ic_read_pipe_out2_bf, ic_read_pipe_out3_bf;
+
+  ////////////
+  ////////////
+
+  dff_s #(1) dc_read_pipe_reg1 (
+                   .clk      ( rclk                   ),
+                   .din      ( mbist_dcache_read      ),
+                   .q        ( dc_read_pipe_out1      ), .se(mbist_se), .si(), .so());
+
+  assign dc_read_pipe_out1_bf = dc_read_pipe_out1 & ~reset_engine;
+ 
+  dff_s #(1) dc_read_pipe_reg2 (
+                   .clk      ( rclk                   ),
+                   .din      ( dc_read_pipe_out1_bf   ),
+                   .q        ( dc_read_pipe_out2      ), .se(mbist_se), .si(), .so());
+
+  assign dc_read_pipe_out2_bf = dc_read_pipe_out2 & ~reset_engine;
+
+  dff_s #(1) dc_read_pipe_reg3 (
+                   .clk      ( rclk                   ),
+                   .din      ( dc_read_pipe_out2_bf   ),
+                   .q        ( dc_read_pipe_out3      ), .se(mbist_se), .si(), .so());
+
+  assign dc_read_pipe_out3_bf = dc_read_pipe_out3 & ~reset_engine;
+  assign dcache_data_sel       =  dc_read_pipe_out3_bf;
+
+  dff_s #(1) dc_read_pipe_reg4 (
+                   .clk      ( rclk                   ),
+                   .din      ( dc_read_pipe_out3_bf   ),
+                   .q        ( dc_read_pipe_out4      ), .se(mbist_se), .si(), .so());
+
+  assign dcache_piped_read       =  dc_read_pipe_out4 & ~reset_engine;
+
+  ////////////
+  ////////////
+
+  dff_s #(1) ic_read_pipe_reg1 (
+                   .clk      ( rclk                   ),
+                   .din      ( mbist_icache_read_bf   ),
+                   .q        ( ic_read_pipe_out1      ), .se(mbist_se), .si(), .so());
+
+  assign ic_read_pipe_out1_bf = ic_read_pipe_out1 & ~reset_engine;
+  assign mbist_icache_read = ic_read_pipe_out1;
+
+  dff_s #(1) ic_read_pipe_reg2 (
+                   .clk      ( rclk                   ),
+                   .din      ( ic_read_pipe_out1_bf   ),
+                   .q        ( ic_read_pipe_out2      ), .se(mbist_se), .si(), .so());
+
+  assign ic_read_pipe_out2_bf = ic_read_pipe_out2 & ~reset_engine;
+
+  dff_s #(1) ic_read_pipe_reg3 (
+                   .clk      ( rclk                   ),
+                   .din      ( ic_read_pipe_out2_bf   ),
+                   .q        ( ic_read_pipe_out3      ), .se(mbist_se), .si(), .so());
+
+  assign ic_read_pipe_out3_bf = ic_read_pipe_out3 & ~reset_engine;
+
+  dff_s #(1) ic_read_pipe_reg4 (
+                   .clk      ( rclk                   ),
+                   .din      ( ic_read_pipe_out3_bf   ),
+                   .q        ( ic_read_pipe_out4      ), .se(mbist_se), .si(), .so());
+
+
+  assign icache_piped_read       =  ic_read_pipe_out4 & ~reset_engine;
+
+  ////////////
+  ////////////
+
+  dff_s #(1) ic_write_pipe_reg1 (
+                   .clk      ( rclk                    ),
+                   .din      ( mbist_icache_write_bf  ),
+                   .q        ( mbist_icache_write     ), .se(mbist_se), .si(), .so());
+
+  ////////////
+  ////////////
+
+  wire [7:0] data_pipe_out3, data_pipe_out4;
+
+  dff_s #(8) data_pipe_reg1 (
+                   .clk      ( rclk                    ),
+                   .din      ( mbist_write_data_bf[7:0]  ),
+                   .q        ( data_pipe_out1[7:0]    ), .se(mbist_se), .si(), .so());
+
+  assign mbist_icache_wdata = data_pipe_out1;
+
+  dff_s #(8) data_pipe_reg2 (
+                   .clk      ( rclk                    ),
+                   .din      ( data_pipe_out1[7:0]    ),
+                   .q        ( data_pipe_out2[7:0]    ), .se(mbist_se), .si(), .so());
+
+  dff_s #(8) data_pipe_reg3 (
+                   .clk      ( rclk                    ),
+                   .din      ( data_pipe_out2[7:0]    ),
+                   .q        ( data_pipe_out3[7:0]    ), .se(mbist_se), .si(), .so());
+
+  dff_s #(8) data_pipe_reg4 (
+                   .clk      ( rclk                    ),
+                   .din      ( data_pipe_out3[7:0]    ),
+                   .q        ( data_pipe_out4[7:0]    ), .se(mbist_se), .si(), .so());
+
+
+////////////
+////////////
+
+  wire [10:0] add_pipe_out3, add_pipe_out4;
+  wire mbist_word_sel_bf;
+
+  assign mbist_word_sel_bf = loop_on_address ? useradd_out[6] : mbist_word_sel;
+
+  dff_s #(11) add_pipe_reg1 (
+                   .clk      ( rclk                        ),
+                   .din      ( {mbist_word_sel_bf, mbist_address_bf[9:0]}        ),
+                   .q        ( add_pipe_out1[10:0]        ), .se(mbist_se), .si(), .so());
+
+  assign mbist_address = add_pipe_out1;
+
+  dff_s #(11) add_pipe_reg2 (
+                   .clk      ( rclk                    ),
+                   .din      ( add_pipe_out1[10:0]    ),
+                   .q        ( add_pipe_out2[10:0]    ), .se(mbist_se), .si(), .so());
+
+  dff_s #(11) add_pipe_reg3 (
+                   .clk      ( rclk                    ),
+                   .din      ( add_pipe_out2[10:0]    ),
+                   .q        ( add_pipe_out3[10:0]    ), .se(mbist_se), .si(), .so());
+
+  dff_s #(11) add_pipe_reg4 (
+                   .clk      ( rclk                    ),
+                   .din      ( add_pipe_out3[10:0]    ),
+                   .q        ( add_pipe_out4[10:0]    ), .se(mbist_se), .si(), .so());
+
+
+  assign dcache_piped_address[9:0]   =  {add_pipe_out4[10], add_pipe_out4[8:0]};
+  assign icache_piped_address[10:0]  =  add_pipe_out4[10:0];
+
+// /////////////////////////////////////////////////////////////////////////////
+// Shared Fail Detection
+// /////////////////////////////////////////////////////////////////////////////
+
+  dff_s #(2) fail_reg       (
+                   .clk      ( rclk                ),
+                   .din      ( fail_reg_in[1:0]   ),
+                   .q        ( fail_reg_out[1:0]  ), .se(mbist_se), .si(), .so());
+
+
+  assign    fail_reg_in[1:0]      =    reset_engine      ?    2'b0: {qual_dcache_fail,qual_icache_fail}  |  fail_reg_out[1:0];
+
+
+  assign    qual_dcache_fail      =  (!stop_on_next_fail  || (stop_on_next_fail &&  beyond_last_fail))  &&  dcache_fail;
+  assign    qual_icache_fail      =  (!stop_on_next_fail  || (stop_on_next_fail &&  beyond_last_fail))  &&  icache_fail;
+
+  assign    dcache_fail           =    dcache_piped_read  &&  mismatch;
+  assign    icache_fail           =    icache_piped_read  &&  mismatch;
+
+// added by Chandra
+
+//  assign expect_data[71:0] = { ({4{dcache_piped_read}} & data_pipe_out4[7:4]), 
+//			        (icache_piped_read ? {2{data_pipe_out4[1:0]}} : data_pipe_out4[3:0]), {8{data_pipe_out4[7:0]}}};
+
+  assign expect_data[71:0] = { ({4{dcache_piped_read}} & data_pipe_out4[7:4]), 
+			        (icache_piped_read ? {2{data_pipe_out4[1:0]}} : data_pipe_out4[3:0]), {7{data_pipe_out4[7:0]}},
+			        (icache_piped_read ? data_pipe_out4[7:4] : data_pipe_out4[3:0]), data_pipe_out4[3:0] };
+
+  assign    compare_data_bf[71:0]    =    dcache_data_sel ?      mbist_dcache_data_in[71:0]:  {4'h0,mbist_icache_data_in[67:0]};
+
+  dff_s #(72) compare_data_inst(
+                   .clk      ( rclk                        ),
+                   .din      ( compare_data_bf[71:0]   ),
+                   .q        ( compare_data[71:0]  ), .se(mbist_se), .si(), .so());
+
+  assign    mismatch              =    expect_data[71:0]   !=     compare_data[71:0];
+
+
+  assign    mbist_dcache_fail     =    fail_reg_out[1];
+  assign    mbist_icache_fail     =    fail_reg_out[0];
+
+  assign    fail                  =   |fail_reg_out[1:0];
+  assign    qual_fail             =    qual_dcache_fail  ||  qual_icache_fail;
+
+
+// /////////////////////////////////////////////////////////////////////////////
+// Fail Address and Data Capture and Control Reg Store
+// /////////////////////////////////////////////////////////////////////////////
+
+
+
+  dff_s #(11) fail_add_reg(
+                   .clk      ( rclk                        ),
+                   .din      ( fail_add_reg_in[10:0]   ),
+                   .q        ( fail_add_reg_out[10:0]  ), .se(mbist_se), .si(), .so());
+
+
+  assign fail_add_reg_in[10:0]     =  reset_engine              ?    11'b0:
+                                      qual_dcache_fail          ?    {1'b0,dcache_piped_address[9:0]}:
+                                      qual_icache_fail          ?    icache_piped_address[10:0]:
+                                                                     fail_add_reg_out[10:0];
+
+
+  dff_s #(72) fail_data_reg(
+                   .clk      ( rclk                      ),
+                   .din      ( fail_data_reg_in[71:0]   ),
+                   .q        ( fail_data_reg_out[71:0]  ), .se(mbist_se), .si(), .so());
+
+
+  assign fail_data_reg_in[71:0]     =  reset_engine     ?   72'b0:
+                                       qual_fail        ?   compare_data[71:0]:
+                                                            fail_data_reg_out[71:0];
+
+
+  assign fail_control_reg_in[20:0]     = (reset_engine && !mbist_stop_on_next_fail)    ?   21'b0:
+                                          qual_fail                                    ?   qual_control_out[20:0]:
+                                                                                           fail_control_reg_out[20:0];
+
+  dff_s #(21) fail_control_reg_inst(
+                   .clk      ( rclk                         ),
+                   .din      ( fail_control_reg_in[20:0]   ),
+                   .q        ( fail_control_reg_out[20:0]  ), .se(mbist_se), .si(), .so());
+
+////////
+
+  assign  beyond_last_fail  =  qual_control_out[20:0]    >    fail_control_reg_out[20:0];
+
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_stb_ctl.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_stb_ctl.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_stb_ctl.v	(revision 6)
@@ -0,0 +1,1026 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_stb_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Control for STB of LSU
+//				- Contains control for a single STB currently.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+`include "iop.h"
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_stb_ctl (/*AUTOARG*/
+   // Outputs
+   so, stb_clk_en_l, stb_crnt_ack_id, lsu_stb_empty, stb_l2bnk_addr, 
+   stb_atm_rq_type, stb_wrptr, stb_rd_for_pcx, stb_pcx_rptr, 
+   stb_wrptr_prev, stb_state_ced_mod, stb_state_vld_out, 
+   lsu_stbcnt, stb_rmo_st_issue, stb_full, st_pcx_rq_kill_w2, 
+   // Inputs
+   rclk, grst_l, arst_l, si, se, thrd_en_g, cpx_st_ack_tid, 
+   pcx_rq_for_stb, st_ack_dq_stb, stb_flush_st_g, stb_cam_wvld_m, 
+   lsu_blk_st_m, tlb_pgnum_g, pcx_req_squash, flshinst_rst, 
+   lsu_stbctl_flush_pipe_w, flsh_inst_m, stb_state_si_0, 
+   stb_state_si_1, stb_state_si_2, stb_state_si_3, stb_state_si_4, 
+   stb_state_si_5, stb_state_si_6, stb_state_si_7, stb_state_rtype_0, 
+   stb_state_rtype_1, stb_state_rtype_2, stb_state_rtype_3, 
+   stb_state_rtype_4, stb_state_rtype_5, stb_state_rtype_6, 
+   stb_state_rtype_7, stb_state_rmo, stb_alt_sel, stb_alt_addr, 
+   lsu_dtlb_bypass_e, tlb_cam_hit, lsu_outstanding_rmo_st_max,
+   st_dtlb_perr_g
+   ) ;	
+
+
+   input rclk ;			
+   input grst_l;
+   input arst_l;
+   
+   input si;
+   input se;
+   output so;
+   
+input		thrd_en_g ;
+input		cpx_st_ack_tid ;	// st ack for given thread
+input		pcx_rq_for_stb ;	// stb's st selected for read for pcx
+input		st_ack_dq_stb ;		// store dequeued from stb
+input		stb_flush_st_g ;	// flush stb write in cycle g
+input		stb_cam_wvld_m ;	// stb write in cycle m
+
+input		lsu_blk_st_m ;		// blk st wr
+
+//input  [7:6]    lsu_ldst_va_m ;         // staging purposes
+//input  [2:1]    lsu_st_rq_type_m ;	// st request type
+//input		lsu_st_rmo_m ;		// rmo store in m-stage
+
+input  [39:37]  tlb_pgnum_g ;  	        // ldst access to io 
+input		pcx_req_squash ;	// pcx req is squashed
+
+input		flshinst_rst ;		// reset by flush inst on return
+input		lsu_stbctl_flush_pipe_w ;
+
+   input flsh_inst_m;
+   
+
+//from stb_ctldp
+   input [3:2] stb_state_si_0;
+   input [3:2] stb_state_si_1;
+   input [3:2] stb_state_si_2;
+   input [3:2] stb_state_si_3;
+   input [3:2] stb_state_si_4;
+   input [3:2] stb_state_si_5;
+   input [3:2] stb_state_si_6;
+   input [3:2] stb_state_si_7;
+
+   input [2:1] stb_state_rtype_0;
+   input [2:1] stb_state_rtype_1;
+   input [2:1] stb_state_rtype_2;
+   input [2:1] stb_state_rtype_3;
+   input [2:1] stb_state_rtype_4;
+   input [2:1] stb_state_rtype_5;
+   input [2:1] stb_state_rtype_6;
+   input [2:1] stb_state_rtype_7;
+
+   //input [7:0] stb_state_io;
+   input [7:0] stb_state_rmo;
+
+   input       stb_alt_sel ;
+   input [2:0] stb_alt_addr ;
+
+input          lsu_dtlb_bypass_e;
+input          tlb_cam_hit;		// m-cycle
+
+input		st_dtlb_perr_g ;	// enabled st dtlb parity err.
+ 
+   //output      stb_non_l2bnk;
+   output [7:0] stb_clk_en_l;
+  
+output  [2:0]   stb_crnt_ack_id ;       // ackid for current outstanding st.
+
+output		lsu_stb_empty ;		// stb is empty
+
+output	[2:0]	stb_l2bnk_addr ;	// l2bank address.	
+output	[2:1]	stb_atm_rq_type ;	// identify atomic transaction
+
+output	[2:0]	stb_wrptr ;		// write ptr - per thread
+//output	[2:0]	stb_dfq_rptr ;		// rptr for dfq - per thread
+output		stb_rd_for_pcx ; 	// rd vld for pcx - per thread
+output	[2:0]	stb_pcx_rptr ;		// rptr for pcx - per thread
+output	[2:0]	stb_wrptr_prev ;
+output  [7:0]   stb_state_ced_mod ;
+output  [7:0]   stb_state_vld_out ;
+
+output	[3:0]	lsu_stbcnt ;	// # of vld entries
+
+output		stb_rmo_st_issue ;		// rmo store issued from thread's stb.
+
+output		stb_full ;
+output		st_pcx_rq_kill_w2 ;
+
+   input  lsu_outstanding_rmo_st_max;
+   
+   wire [7:0] stb_state_rst;
+   
+   wire [7:0] stb_state_vld;
+   wire [7:0] stb_state_vld_din;
+   wire [7:0] stb_state_vld_set;
+   
+   wire [7:0] stb_state_ced;
+   wire [7:0] stb_state_ced_din;
+   wire [7:0] stb_state_ced_set;
+
+   wire [7:0] stb_state_ack;
+   wire [7:0] stb_state_ack_din;
+   wire [7:0] stb_state_ack_set;
+
+   wire [3:2] stb_state_si_0;	// removed 8x4 bits
+   wire [3:2] stb_state_si_1;
+   wire [3:2] stb_state_si_2;
+   wire [3:2] stb_state_si_3;
+   wire [3:2] stb_state_si_4;
+   wire [3:2] stb_state_si_5;
+   wire [3:2] stb_state_si_6;
+   wire [3:2] stb_state_si_7;
+/*
+   wire [3:2] stb_state_si_0_din;
+   wire [3:2] stb_state_si_1_din;
+   wire [3:2] stb_state_si_2_din;
+   wire [3:2] stb_state_si_3_din;
+   wire [3:2] stb_state_si_4_din;
+   wire [3:2] stb_state_si_5_din;
+   wire [3:2] stb_state_si_6_din;
+   wire [3:2] stb_state_si_7_din;
+*/   
+   wire [7:0] stb_state_io;
+   wire [7:0] stb_state_io_din;
+
+   wire [7:0] stb_state_rmo;
+//   wire [7:0] stb_state_rmo_din;
+   
+   wire [2:1] stb_state_rtype_0; // rm 8x1 bits
+   wire [2:1] stb_state_rtype_1;
+   wire [2:1] stb_state_rtype_2;
+   wire [2:1] stb_state_rtype_3;
+   wire [2:1] stb_state_rtype_4;
+   wire [2:1] stb_state_rtype_5;
+   wire [2:1] stb_state_rtype_6;
+   wire [2:1] stb_state_rtype_7;
+/*
+   wire [2:1] stb_state_rtype_0_din;
+   wire [2:1] stb_state_rtype_1_din;
+   wire [2:1] stb_state_rtype_2_din;
+   wire [2:1] stb_state_rtype_3_din;
+   wire [2:1] stb_state_rtype_4_din;
+   wire [2:1] stb_state_rtype_5_din;
+   wire [2:1] stb_state_rtype_6_din;
+   wire [2:1] stb_state_rtype_7_din;
+*/
+   wire [2:0] stb_l2bnk_addr;
+   wire [2:1] stb_atm_rq_type;
+      
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+wire	[3:0]	stb_wptr_prev ;
+wire		stb_rptr_dfq_en ;
+wire		update_stb_wptr ;
+//wire 	[1:0] 	st_enc_set_way ;
+wire	[3:0]	stb_rptr_dfq_new, stb_rptr_dfq ;
+wire 	valid_entry_for_pcx ;
+wire	[7:0]	dec_wptr_g, dec_rptr_dfq, dec_rptr_pcx, dec_ackptr ;
+wire	[7:0]	dec_wptr_m ;
+//wire		stb_wvld_g ;
+//wire	[5:0]	stb_inv_set0,stb_inv_set1;
+//wire	[5:0]	stb_inv_set2,stb_inv_set3;
+
+wire		ack_vld ;
+wire	[3:0]	stb_wptr_new, stb_wptr ;	
+wire		stb_cam_wvld_g ;
+wire	[7:0] 	inflight_vld_g ;
+wire		dq_vld_d1,dq_vld_d2 ;
+wire	[7:0]	dqptr_d1,dqptr_d2;
+wire		pcx_rq_for_stb_d1 ;
+wire    	pcx_rq_for_stb_d2,pcx_req_squash_d2 ;
+
+   wire       clk;
+   assign     clk = rclk;
+
+   wire       rst_l;
+   wire       stb_ctl_rst_l;
+   
+   dffrl_async rstff(.din (grst_l),
+                     .q   (stb_ctl_rst_l),
+                     .clk (clk), .se(se), .si(), .so(),
+                     .rst_l (arst_l));
+   assign     rst_l = stb_ctl_rst_l;
+   
+//=========================================================================================
+//	RESET
+//=========================================================================================
+
+// A flush will reset the vld bit in the stb - it should be the only one as
+// the stb has drained.
+
+   wire   reset;
+   //waiting int 3.0
+   //assign rst_l = stb_ctl_rst_l;
+
+   assign reset = ~rst_l | flshinst_rst ;
+
+//=========================================================================================
+//	STB READ FOR PCX
+//=========================================================================================
+
+// Assumes that an entry can be sent to the pcx iff the next oldest
+// entry has received its ack. This pointer will not look for L2Bank
+// overlap as the ptr calculation is much more complicated.
+
+// (1)--> Entry must be valid and not already sent to pcx.
+//		Includes squashing of speculative req
+// (2)--> Previous in linked list must be valid and acked (or invalid)
+// (3)--> This is to break the deadlock between oldest and youngest
+// entries when queue is full. Oldest entry can always exit to pcx.
+
+// This vector is one-hot. Assumption is that stb is a circular queue.
+// deadlock has to be broken between oldest and youngest entry when the
+// queue is full. The dfq ptr is used to mark oldest
+
+dff_s #(2)  rq_stgd1       (
+        .din    ({pcx_rq_for_stb_d1,pcx_req_squash}), 
+        .q    	({pcx_rq_for_stb_d2,pcx_req_squash_d2}), 
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+wire	ffu_bst_wr_g ;
+dff_s #(1)  ff_bstg       (
+        .din    (lsu_blk_st_m),
+        .q    	(ffu_bst_wr_g),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+wire	full_flush_st_g ;
+// flush_pipe does not apply to blk st wr.
+assign	full_flush_st_g = (stb_flush_st_g | (lsu_stbctl_flush_pipe_w & ~ffu_bst_wr_g)) & stb_cam_wvld_g ;
+
+// timing fix: 5/6 -  begin
+// qual dec_rptr_pcx w/ tlb camhit and in qctl1 move kill qual after store pick
+wire      tlb_cam_hit_g, tlb_hit_g;
+wire      dtlb_bypass_m, dtlb_bypass_g ;
+
+dff_s #(1)  ff_dtlb_bypass_m       (
+        .din    (lsu_dtlb_bypass_e),
+        .q    	(dtlb_bypass_m),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+dff_s #(1)  ff_dtlb_bypass_g       (
+        .din    (dtlb_bypass_m),
+        .q    	(dtlb_bypass_g),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+dff_s #(1)  ff_tlb_cam_hit_g       (
+        .din    (tlb_cam_hit),
+        .q    	(tlb_cam_hit_g),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+assign  tlb_hit_g  =  tlb_cam_hit_g | dtlb_bypass_g | ffu_bst_wr_g; //bug6406/eco6610
+// timing fix: 5/6 -  end
+
+// st rq can now speculate on flush
+assign	inflight_vld_g[7:0] = 
+	dec_wptr_g[7:0] & {8{stb_cam_wvld_g & thrd_en_g}} ; 
+	// the later term is for an inflight ld which gets squashed. It
+	// should not effect dec_rptr_pcx. This is related to a timing fix
+	// where the flush is taken out of inflight_vld_g.
+//assign	inflight_vld_g[7:0] = dec_wptr_g[7:0] & {8{stb_wvld_g & thrd_en_g}} ;
+
+//timing fix: 5/6/03 - kill inflight vld if tlb_hit_g=0; dec_rptr_pcx will be 0 and hence kill_w2 will be 0
+// leave inflight_vld_g as is, since it is used to set squash - which eventually reset state_vld
+wire [7:0] inflight_issue_g_tmp ;
+
+assign  inflight_issue_g_tmp[7:0]  =  inflight_vld_g[7:0] & {8{tlb_hit_g}};
+
+wire [7:0] inflight_issue_g ;
+assign	inflight_issue_g[7:0] = 
+	inflight_issue_g_tmp[7:0] & {8{~(|(stb_state_vld[7:0] & ~stb_state_ack[7:0]))}};  
+	//inflight_vld_g[7:0] & {8{~(|(stb_state_vld[7:0] & ~stb_state_ack[7:0]))}};  // timing fix : 5/6
+
+
+// Modified state ced includes in-flight pcx sel which is not squashed.
+// Timing : pcx_req_squash delayed. A st that is squashed can then make a request 3-cycles
+// later.
+wire	skid_ced, st_vld_rq_d2 ;
+assign	st_vld_rq_d2 = pcx_rq_for_stb_d2 & ~pcx_req_squash_d2 ;
+assign	skid_ced = pcx_rq_for_stb_d1 | st_vld_rq_d2 ;
+// For squashing rawp.
+assign	stb_state_ced_mod[7:0] =
+	((dec_ackptr[7:0] & {8{st_vld_rq_d2}}) | stb_state_ced[7:0]) ;
+
+//RMO st counter satuated
+   
+wire  rmo_st_satuated;   
+//dff #(1) rmo_st_satuated_ff  (
+//    .din (lsu_outstanding_rmo_st_max),
+//    .q   (rmo_st_satuated),
+//    .clk    (clk),
+//    .se     (se), .si     (), .so ()
+//);
+                          
+   assign rmo_st_satuated  =  lsu_outstanding_rmo_st_max;
+                      
+wire	[7:0]	stb_state_ced_spec ;
+assign	stb_state_ced_spec[7:0] =
+	((dec_ackptr[7:0] & {8{skid_ced}}) | stb_state_ced[7:0]) | 
+   (stb_state_rmo[7:0] & {8{rmo_st_satuated}});
+   
+assign 	dec_rptr_pcx[7:0] =
+		 (inflight_issue_g[7:0] | stb_state_vld[7:0]) 
+		 //(inflight_vld_g[7:0] | stb_state_vld[7:0]) 
+			& ~stb_state_ced_spec[7:0] &	// -->(1)
+		(({stb_state_vld[6:0],stb_state_vld[7]} &	// 
+		  {stb_state_ack[6:0],stb_state_ack[7]}) // 
+		| ~{stb_state_vld[6:0],stb_state_vld[7]}	// -->(2)
+		| dec_rptr_dfq[7:0]) ;				// -->(3)
+
+
+// There should be only one such entry i.e., the vector is 1-hot.
+// Incorporate st dtlb parity error. It should not propagate to memory.
+// Tracing full_flush_st_g, note that the pointers will not be restored
+// correctly for timing reasons - anyway, this is considered unrecoverable.
+// Monitor !
+assign valid_entry_for_pcx = |dec_rptr_pcx[7:0] ;
+
+wire	any_inflight_iss_g,any_inflight_iss_w2 ;
+assign	any_inflight_iss_g = |inflight_vld_g[7:0] ;
+wire	pick_inflight_iss_g,pick_inflight_iss_w2 ;
+assign	pick_inflight_iss_g = |(dec_rptr_pcx[7:0] & inflight_issue_g[7:0]) ;
+
+wire	st_pcx_rq_kill_g ;
+assign	st_pcx_rq_kill_g = pick_inflight_iss_g & full_flush_st_g ;
+//assign	st_pcx_rq_kill_g = (|(dec_rptr_pcx[7:0] & inflight_issue_g[7:0])) & full_flush_st_g ;
+
+wire	st_vld_squash_g,st_vld_squash_w2 ;
+assign	st_vld_squash_g = any_inflight_iss_g & full_flush_st_g ;
+//assign	st_vld_squash_g = (|inflight_vld_g[7:0]) & full_flush_st_g ;
+
+wire st_pcx_rq_kill_tmp,st_vld_squash_tmp ;
+wire st_dtlb_perr_w2 ;
+dff_s #(5)  stkill_stgd1       (
+        .din    ({st_pcx_rq_kill_g,st_vld_squash_g,
+		any_inflight_iss_g,pick_inflight_iss_g,st_dtlb_perr_g}), 
+	.q      ({st_pcx_rq_kill_tmp,st_vld_squash_tmp,
+		any_inflight_iss_w2,pick_inflight_iss_w2,st_dtlb_perr_w2}), 
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+assign	st_pcx_rq_kill_w2 = 
+		st_pcx_rq_kill_tmp | 
+		(pick_inflight_iss_w2 & st_dtlb_perr_w2); 
+
+assign	st_vld_squash_w2  =
+		st_vld_squash_tmp  |
+		(any_inflight_iss_w2 & st_dtlb_perr_w2);
+
+
+// Encode pcx rptr
+// ** Timing : Could put flop in rwctl. 
+assign stb_pcx_rptr[0] = dec_rptr_pcx[1] | dec_rptr_pcx[3] | dec_rptr_pcx[5] | dec_rptr_pcx[7] ;	
+assign stb_pcx_rptr[1] = dec_rptr_pcx[2] | dec_rptr_pcx[3] | dec_rptr_pcx[6] | dec_rptr_pcx[7] ;	
+assign stb_pcx_rptr[2] = dec_rptr_pcx[4] | dec_rptr_pcx[5] | dec_rptr_pcx[6] | dec_rptr_pcx[7] ;	
+
+// This is used in qctl.
+// Timing : flopped in qctl before use.
+assign	stb_rd_for_pcx = valid_entry_for_pcx ; 
+
+//=========================================================================================
+//	STB READ FOR DFQ
+//=========================================================================================
+
+
+// Read Pointer to generate the next available entry for the dfq.
+// Timing : This should be fine as st_ack_dq_stb is decode out of dfq byp flop.
+wire	incr_dfq_ptr ;
+// stb_rmo_st_issue added for rmo st bug - if critical then add flop.
+
+// bug2983: incr_dfq_ptr is set by both st_ack_dq_stb and stb_rmo_st_issue
+//          in the same cycle. this results in losing a dequeue.
+//
+//          fix is to detect rmo store after regular store. issue the rmo
+//          store and dont reset the rmo store vld until the dequeue of the older
+//          regular store.
+
+wire    stb_dq_rmo ;
+
+//assign	incr_dfq_ptr = st_ack_dq_stb | stb_rmo_st_issue ; //bug 2983
+assign	incr_dfq_ptr = st_ack_dq_stb | stb_dq_rmo ;
+
+assign	stb_rptr_dfq_new[3:0]	=	stb_rptr_dfq[3:0]  + {3'b0, incr_dfq_ptr} ;
+//assign	stb_rptr_dfq_new[3:0]	=	stb_rptr_dfq[3:0]  + {3'b0, st_ack_dq_stb} ;
+
+assign stb_rptr_dfq_en = st_ack_dq_stb | incr_dfq_ptr ; 
+
+dffre_s #(4)  rptr_d	(
+	.din		(stb_rptr_dfq_new[3:0]),.q	(stb_rptr_dfq[3:0]),
+	.en		(stb_rptr_dfq_en),	.rst	(reset), 
+	.clk		(clk), 
+	.se		(se),	.si	(), .so	()
+	);
+
+//assign	stb_dfq_rptr[2:0] = stb_rptr_dfq_new[2:0] ;
+
+// Decode Read Ptr
+// Generated cycle before actual read.
+assign	dec_rptr_dfq[0]	= ~stb_rptr_dfq[2] & ~stb_rptr_dfq[1] & ~stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[1]	= ~stb_rptr_dfq[2] & ~stb_rptr_dfq[1] &  stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[2]	= ~stb_rptr_dfq[2] &  stb_rptr_dfq[1] & ~stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[3]	= ~stb_rptr_dfq[2] &  stb_rptr_dfq[1] &  stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[4]	=  stb_rptr_dfq[2] & ~stb_rptr_dfq[1] & ~stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[5]	=  stb_rptr_dfq[2] & ~stb_rptr_dfq[1] &  stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[6]	=  stb_rptr_dfq[2] &  stb_rptr_dfq[1] & ~stb_rptr_dfq[0] ;	
+assign	dec_rptr_dfq[7]	=  stb_rptr_dfq[2] &  stb_rptr_dfq[1] &  stb_rptr_dfq[0] ;
+
+// Stge dfq ptr and dq vld by 2-cycles to appropriate invalidation pt
+dff_s #(9)  dq_stgd1       (
+        .din    ({dec_rptr_dfq[7:0],st_ack_dq_stb}), 
+	.q      ({dqptr_d1[7:0],dq_vld_d1}),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+dff_s #(9)  dq_stgd2       (
+        .din    ({dqptr_d1[7:0],dq_vld_d1}), 
+	.q      ({dqptr_d2[7:0],dq_vld_d2}),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+//=========================================================================================
+//	WPTR FOR STB
+//=========================================================================================
+
+// It is assumed that if there is a store in the pipe, there is a
+// free entry in the corresponding stb. Otherwise, the pipe would've
+// have stalled for the thread. This is maintained locally instead of in
+// stb rw ctl.
+
+// 00(flush,wr) - no update,01 - +1,10 - d1,11 - no update 
+// cam or data wr ptr would do. 
+//assign  update_stb_wptr         =       stb_cam_wvld_m |  stb_flush_st_g ;
+assign  update_stb_wptr         =       stb_cam_wvld_m ^  (full_flush_st_g | st_dtlb_perr_g);
+
+assign  stb_wptr_new[3:0]       =       (full_flush_st_g | st_dtlb_perr_g) ?
+                                                        stb_wptr_prev[3:0] :
+                                                        stb_wptr[3:0] + {3'b0, stb_cam_wvld_m} ;
+
+dff_s  wvld_stgg       (
+        .din    (stb_cam_wvld_m), .q      (stb_cam_wvld_g),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+
+//assign	stb_wvld_g = stb_cam_wvld_g & ~full_flush_st_g ;
+
+dffre_s #(4)  wptr_new    (
+        .din            (stb_wptr_new[3:0]),    .q      (stb_wptr[3:0]),
+        .en             (update_stb_wptr),    .rst    (reset),
+        .clk            (clk),
+        .se             (se), .si     (), .so ()
+        );
+
+assign	stb_wrptr[2:0]	= stb_wptr[2:0] ;
+
+wire [2:0] stb_wptr_m ;
+// flush should not be required. If the previous st is flushed then
+// the current st should be invalid.
+assign  stb_wptr_m[2:0]       =      stb_wptr[2:0] ;
+/*assign  stb_wptr_m[3:0]       =       (full_flush_st_g) ?
+                                                        stb_wptr_prev[3:0] :
+                                                        stb_wptr[3:0] ;*/
+
+// Decode wptr
+assign	dec_wptr_m[0] = ~stb_wptr_m[2] & ~stb_wptr_m[1] & ~stb_wptr_m[0] ;	
+assign	dec_wptr_m[1] = ~stb_wptr_m[2] & ~stb_wptr_m[1] &  stb_wptr_m[0] ;	
+assign	dec_wptr_m[2] = ~stb_wptr_m[2] &  stb_wptr_m[1] & ~stb_wptr_m[0] ;	
+assign	dec_wptr_m[3] = ~stb_wptr_m[2] &  stb_wptr_m[1] &  stb_wptr_m[0] ;	
+assign	dec_wptr_m[4] =  stb_wptr_m[2] & ~stb_wptr_m[1] & ~stb_wptr_m[0] ;	
+assign	dec_wptr_m[5] =  stb_wptr_m[2] & ~stb_wptr_m[1] &  stb_wptr_m[0] ;	
+assign	dec_wptr_m[6] =  stb_wptr_m[2] &  stb_wptr_m[1] & ~stb_wptr_m[0] ;	
+assign	dec_wptr_m[7] =  stb_wptr_m[2] &  stb_wptr_m[1] &  stb_wptr_m[0] ;
+
+dff_s #(8)  dwptr_stgg       (
+        .din    (dec_wptr_m[7:0]), .q      (dec_wptr_g[7:0]),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+// stb_wptr_prev represents the latest valid entry in stb
+/*dffre #(4)  wptr_prev   (
+        .din            (stb_wptr[3:0]),        .q      (stb_wptr_prev[3:0]),
+        .en             (update_stb_wptr),      .rst    (reset),
+        .clk            (clk),
+        .se             (se), .si     (), .so ()
+        );*/
+
+assign	stb_wptr_prev[3:0] = stb_wptr[3:0] - {4'b0001} ;
+
+// Bug 2419 - In case this is a critical path, a flop can be inserted.
+assign	stb_wrptr_prev[2:0]	= stb_wptr_prev[2:0] ;
+
+//=========================================================================================
+//	# OF STORES IN STB
+//=========================================================================================
+
+wire	[3:0]	stb_wptr_w2 ;
+
+// Count should not include stores in pipe-stages 'g' or before.
+dff_s #(4)  wptr_stgw2       (
+        .din    (stb_wptr[3:0]), .q      (stb_wptr_w2[3:0]),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+assign	lsu_stbcnt[3:0] =  (stb_wptr_w2[3:0] - stb_rptr_dfq[3:0]) ;
+
+// Performance Cntr Info
+wire	stb_full_w2 ;
+assign	stb_full_w2 = lsu_stbcnt[2] & lsu_stbcnt[1] & lsu_stbcnt[0] ;
+dff_s   sfull (
+        .din    (stb_full_w2), .q      (stb_full),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+//=========================================================================================
+//	CONTROL STATE
+//=========================================================================================
+
+// (V) 	-	Valid State. Initialized by write and cleared once entry
+// 		has written DFQ and then written the cache. If the store
+//		will only bypass then it still needs to enter DFQ but 
+//		can be deallocated immediately on entry into DFQ. (1b)
+// (A)	-	(NA) Allocate. Determined on read of cache. May be modified by
+//		invalidate or st mv'ing to DFQ. The load woust have to
+//		have same set index and same replacement way to clear A bit. (1b)
+// (SI) -	cache set index for invalidate/load cam'ing. (6b)
+// (WY) -	(NA) Allocate way for store. (2b)
+// (CED) -	Committed to SKB. Entry written to SKB. (1b)
+// (ACK) - 	Ack for store received from L2. (1b)
+// (UPD) -	(NA) Entry mv'ed to DFQ. (1b)
+// (W)   -  	(NA) Wrap bit. (1b) <--- Not used
+// * All state needs to be reset when entry is freed.
+//
+// Total - 14b.
+
+// ack_id is internally tracked. 
+// There can only be one outstanding
+dffre_s #(8)  ackptr_ff	(
+	.din		(dec_rptr_pcx[7:0]), .q	(dec_ackptr[7:0]),
+	.en		(pcx_rq_for_stb), .rst (reset),
+	.clk		(clk), 
+	.se		(se),	.si	(), .so	()
+	);
+
+   
+assign 	ack_vld = cpx_st_ack_tid ;
+//assign	st_dc_hit_g = lsu_st_hit_g ;
+
+assign  stb_crnt_ack_id[0] = dec_ackptr[1] | dec_ackptr[3] |
+                                dec_ackptr[5] | dec_ackptr[7] ;
+assign  stb_crnt_ack_id[1] = dec_ackptr[2] | dec_ackptr[3] |
+                                dec_ackptr[6] | dec_ackptr[7] ;
+assign  stb_crnt_ack_id[2] = dec_ackptr[4] | dec_ackptr[5] |
+                                dec_ackptr[6] | dec_ackptr[7] ;       
+
+// Decode valid dequeue ids arriving from dfq.
+
+// pa[39:36] 
+// 0x00-0x7f  dram
+// 0xa0-0xbf  l2csr
+// others as non l2 accsess = b39 & ~(~b38 & b37)   
+// timing fix: stb_non_l2bnk is delayed 1 cycle - gen in w/g cycle
+//assign	stb_non_l2bnk = stb_alt_sel ?
+//	stb_alt_addr[2] & ~(~stb_alt_addr[1] & stb_alt_addr[0]) :
+//	tlb_pgnum_m[39]  & ~(~tlb_pgnum_m[38]  & tlb_pgnum_m[37])  & ~flsh_inst_m;
+
+wire   [2:0]  stb_alt_addr_g;
+wire          stb_alt_sel_g;
+
+dff_s #(4) ff_alt_addr_g       (
+        .din    ({stb_alt_sel,stb_alt_addr[2:0]}), 
+        .q      ({stb_alt_sel_g,stb_alt_addr_g[2:0]}),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+wire  flsh_inst_g;
+dff_s #(1) ff_flsh_inst_g       (
+        .din    (flsh_inst_m),
+        .q      (flsh_inst_g),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+wire   stb_alt_io_g , tlb_pgnum_io_g ;
+
+assign  stb_alt_io_g  =  
+        stb_alt_addr_g[2] & ~(~stb_alt_addr_g[1] & stb_alt_addr_g[0]);
+assign  tlb_pgnum_io_g  =  
+        tlb_pgnum_g[39]  & ~(~tlb_pgnum_g[38]  & tlb_pgnum_g[37])  & ~flsh_inst_g;
+
+// used as input to state_io in stb_ctldp
+wire   stb_non_l2bnk_g;
+assign  stb_non_l2bnk_g  =  
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+// used as output to qctl1 - this has to be qual'ed w/dec_rptr_pcx so no x's propagate
+//alt_sel_g  state_vld  comment
+// 0         0          select tlb_pgnum_io_g(bypass)
+// 0         1          select stb_state_io
+// 1         0          select stb_alt_io_g
+// 1         1          select stb_alt_io_g
+
+wire  [7:0]  stb_l2bnk_addr_b2;
+
+//  inflight (stb_alt / tlb)
+//  stb
+//  bug3875       
+assign  stb_l2bnk_addr_b2[0]  =  
+     stb_state_vld[0] ? stb_state_io[0] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[1]  =  
+     stb_state_vld[1] ? stb_state_io[1] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[2]  =  
+     stb_state_vld[2] ? stb_state_io[2] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[3]  =  
+     stb_state_vld[3] ? stb_state_io[3] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[4]  =  
+     stb_state_vld[4] ? stb_state_io[4] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[5]  =  
+     stb_state_vld[5] ? stb_state_io[5] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[6]  =  
+     stb_state_vld[6] ? stb_state_io[6] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+assign  stb_l2bnk_addr_b2[7]  =  
+     stb_state_vld[7] ? stb_state_io[7] :
+        stb_alt_sel_g ? stb_alt_io_g :
+                        tlb_pgnum_io_g ;
+
+
+dff_s  rqsel_stgg       (
+        .din    (pcx_rq_for_stb), .q      (pcx_rq_for_stb_d1),
+        .clk    (clk),
+        .se     (se), .si     (), .so ()
+        );
+
+// Use of tlb_pgnum_m will be critical !!! 
+
+//always @( posedge clk)
+//	begin
+//	for (i=0;i<8;i=i+1) 	
+//		begin
+//			if (reset                                                 // reset
+//                                | (dqptr_d2[i] & dq_vld_d2)                  	  // dequeue from stb
+//				  | (dec_ackptr[i] & pcx_rq_for_stb_d1 & 
+//						~pcx_req_squash & stb_state_rmo[i])) 
+//				// write will be visible in cache.
+//				begin
+//					stb_state_vld[i] <= 1'b0 ;
+//					stb_state_ced[i] <= 1'b0 ;
+//					stb_state_ack[i] <= 1'b0 ;
+//				end
+//			if (dec_wptr_g[i] & stb_wvld_g & thrd_en_g )
+//				begin
+//					stb_state_vld[i] <= 1'b1 ;
+//					stb_state_wy[i] <=  st_enc_set_way[1:0];
+//				end
+//			if (dec_wptr_m[i] & stb_cam_wvld_m)	// spec. write
+//				begin
+//					stb_state_si[i] <=  lsu_ldst_va_m[9:4] ;
+//					stb_state_rtype[i] <= lsu_st_rq_type_m[2:0] ;
+//					stb_state_io[i] <=  non_l2bnk ;
+//					stb_state_rmo[i] <= lsu_st_rmo_m ;
+//				end
+//			// atomic will not write to cache even if it hits.
+//			// rd_for_pcx needs to be gated for a cycle.
+//			// This is delayed by a cycle to take into account
+//			// squashing of speculative requests.
+//			// rmo's will dequeue entry immediately.
+//			if (dec_ackptr[i] & pcx_rq_for_stb_d1 & ~pcx_req_squash & ~stb_state_rmo[i]) 
+//				stb_state_ced[i] = 1'b1 ;
+//			if (dec_ackptr[i] & ack_vld)
+//				stb_state_ack[i] = 1'b1 ;
+			
+//		end
+//	end
+
+// UNIFY : mux select destination address of pcx pkt
+
+// always->dff translation begin
+
+   // =================================
+   // rst  set  din
+   // 0    0    q
+   // 1    0    0 (reset)
+   // x    1    1 (set)
+   // ==================================
+   // din = set | (~r & q)
+
+   //vld 
+   wire	[7:0]	stb_issue_rmo ;
+   wire	[7:0]	flush_vld_w2 ;
+   // Timing 
+   assign	stb_issue_rmo[7:0] = 
+	(dec_ackptr[7:0] & {8{st_vld_rq_d2}} & stb_state_rmo[7:0]) ;
+	// (dec_ackptr[7:0] & {8{pcx_rq_for_stb_d1}} & 
+	//	{8{~pcx_req_squash}} & stb_state_rmo[7:0]) ;
+   assign	stb_rmo_st_issue = |stb_issue_rmo[7:0] ;
+
+   //bug2983 - begin
+   wire        rmo_pend,rmo_pend_d1;
+   wire [7:0]  rmo_pend_ackptr , stb_dq_rmo_dfq_ptr;
+   // this will set 1 cycle after pcx_rq_for_stb and before the corresponding ced is set(which is 2 cycles
+   // after pcx_rq_for_stb
+   //bug3249: dec_rptr_dfq catches up w/ dec_ackptr; i.e. dec_ackptr entry is the oldset. rmo_pend should not
+   //         be set in this case based on previuos entry (since it will be the youngest)
+   //         fix - kill pend if issue and dq ptr are same (~{8{|(dec_ackptr[7:0] & dec_rptr_dfq[7:0])}})
+   assign rmo_pend_ackptr[7:0]  =
+          // is the current req RMO store
+          //(dec_ackptr[7:0] & stb_state_rmo[7:0]) &  //bug3249
+          //(dec_ackptr[7:0] & stb_state_rmo[7:0] & ~dec_rptr_dfq[7:0]) &    //bug7100 new fix, bug7117
+          (dec_ackptr[7:0] & stb_state_rmo[7:0] & ~dqptr_d2[7:0]) & 
+          // is the older store a regular store
+          ({stb_state_vld[6:0],stb_state_vld[7]} & ~{stb_state_rmo[6:0],stb_state_rmo[7]});
+
+   assign rmo_pend = |rmo_pend_ackptr[7:0];
+
+   wire   rmo_pend_rst;
+   assign rmo_pend_rst  =  reset | stb_dq_rmo;
+
+   dffre_s #(1)  ff_rmo_pend      (
+         .din  (rmo_pend),
+         .q    (rmo_pend_d1),
+         .en   (st_vld_rq_d2),      
+         .rst  (rmo_pend_rst),
+         .clk  (clk),
+         .se   (se), .si     (), .so ()
+         );
+
+   // ok to use either dec_ackptr[7:0] OR dec_rptr_dfq[7:0] 'cos the stores younger to 1st RMO store
+   // are not issued ('cos vld of RMO store is not reset). Hence ackptr and rptr_dfq will be the same
+   // when rmo_pend=0.
+   //
+   // has to qual'ed w/ st_vld_rq_d2. otherwise can result in vld reset before ced is set. the next
+   // time the entry is used it will have ced=1 and not issue.
+   //
+   // cannot use rmo_pend_ackptr[7:0] instead of dec_ackptr[7:0] 'cos the former will be reset when
+   // rmo_pend=0 and will not dequeue the rmo stb entry. i.e if rmo_pend=1 when st_vld_rq_d2=1, use
+   // dec_ackptr[7:0]
+
+   //------------------------------------------------------------------------------------------------
+   // Case 1: NO older regular store vld dequeue pending
+   //------------------------------------------------------------------------------------------------
+   // |        1           |    2    |    3    |    4     |     5    |          |          |
+   // stb_state_vld=8'h1------------------------------------->8'h0
+   // stb_state_rmo=8'h1
+   //
+   // pcx_rq_for_stb=1-------->0                     
+   //
+   // dec_ackptr=8'h0--------->8'h1
+   //
+   // st_vld_rq_d2=0--------------------->1           0
+   // stb_issue_rmo=8'h0-------------->8'h1        8'h0
+   // stb_dq_rmo_dfq_ptr=8'h0--------->8'h1       8'h0
+   //
+   // rmo_pend=0
+   // rmo_pend_d1=0
+   //
+   // dq_vld_d2=0
+   // dqptr_d2=8'h0
+   //------------------------------------------------------------------------------------------------
+   // Case 2: older regular store vld dequeue pending(entry0-older reg store; entry1-rmo younger store)
+   //------------------------------------------------------------------------------------------------
+   // |        1              |    2     |   3    |    4    |    5    |    6    |          | 
+   // stb_state_vld=8'h3-------------------------------------->8'h2      8'h0
+   // stb_state_rmo=8'h2
+   // stb_state_ack=8'h1-------------------------------------->8'h0
+   //
+   // pcx_rq_for_stb=1-------------->0                     
+   //
+   // dec_ackptr=8'h1------------>8'h2
+   //
+   // st_vld_rq_d2=0-------------------------->1        0
+   // stb_issue_rmo=8'h0------------------->8'h1     8'h0
+   // stb_dq_rmo_dfq_ptr=8'h0--------------------------------->8'h2      8'h0 (dequeue rmo store)
+   //
+   // rmo_pend=0-------------------->1                           0
+   // rmo_pend_d1=0--------------------------->1                            0
+   //
+   // dq_vld_d2=0-------------------------------------->1        0
+   // dqptr_d2=8'h0--------------------------------->8'h1     8'h0 (dequeue regular store)
+   //------------------------------------------------------------------------------------------------
+
+   assign stb_dq_rmo_dfq_ptr[7:0] = 
+          (stb_issue_rmo[7:0]   & ~rmo_pend_ackptr[7:0]) |         // if rmo_pend=0 when st_vld_rq_d2=1
+          (dec_ackptr[7:0]      & {8{rmo_pend_d1 & ~rmo_pend}});   // if rmo_pend=1 when st_vld_rq_d2=1
+
+   assign stb_dq_rmo  =  |stb_dq_rmo_dfq_ptr[7:0];
+   //bug2983 - end
+
+   assign stb_state_rst[7:0] = 
+	{8{reset}} | (dqptr_d2[7:0] & {8{dq_vld_d2}})
+	// reset vld,ced,ack immed. on issue to pcx for rmo store.
+	| stb_dq_rmo_dfq_ptr[7:0] |  // fix for bug2983
+	// | stb_issue_rmo[7:0] |  // bug2983
+        flush_vld_w2[7:0] ;	// because of trap
+
+   // vld is now speculatively written
+   assign stb_state_vld_set[7:0] = dec_wptr_g[7:0] & {8{stb_cam_wvld_g & thrd_en_g}} ;
+   //assign stb_state_vld_set[7:0] = dec_wptr_g[7:0] & {8{stb_wvld_g & thrd_en_g}} ;
+   assign stb_state_vld_din[7:0] = stb_state_vld_set[7:0] | 
+                                  (~stb_state_rst[7:0] & stb_state_vld[7:0]);
+  
+   wire	[7:0] stb_state_vld_tmp ; 
+   dff_s #(8)  ff_stb_state_vld       (
+        .din    (stb_state_vld_din[7:0]), 
+        .q      (stb_state_vld_tmp[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+	
+   assign stb_state_vld[7:0] = stb_state_vld_tmp[7:0] & ~flush_vld_w2[7:0] ;
+
+   wire	[7:0] stb_state_vld_set_w2 ;
+   dff_s #(8)  ff_stb_state_vld_set       (
+        .din    (stb_state_vld_set[7:0]), 
+        .q      (stb_state_vld_set_w2[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+
+   assign flush_vld_w2[7:0] = stb_state_vld_set_w2[7:0] & {8{st_vld_squash_w2}} ;
+
+   // The stb valids for the scm need not include the intermediate flush condition
+   // (flush_vld_w2). It is assumed that the flush of the store will invalidate 
+   // a subsequent ld. (8 extra flops).
+   // Bug 3201 - rmo st are made invisible to loads.
+   
+   wire [7:0]  st_scm_vld ;
+   assign st_scm_vld[7:0] = stb_state_vld_din[7:0] & ~stb_state_rmo[7:0] ;
+   
+   dff_s #(8)  ff_st_scm_vld       (
+        .din    (st_scm_vld[7:0]), 
+        .q      (stb_state_vld_out[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+
+   //ced
+   assign stb_state_ced_set[7:0] = dec_ackptr[7:0] & {8{st_vld_rq_d2}} ;
+   // Timing fix.
+   //assign stb_state_ced_set[7:0] = dec_ackptr[7:0] & {8{pcx_rq_for_stb_d1 & ~pcx_req_squash}};
+   // make reset dominant - specifically for coincident set and reset by rmo st.
+   assign stb_state_ced_din[7:0] = ~stb_state_rst[7:0] & 
+					(stb_state_ced_set[7:0] | stb_state_ced[7:0]);
+   //assign stb_state_ced_din[7:0] = stb_state_ced_set[7:0] | 
+   //                               (~stb_state_rst[7:0] & stb_state_ced[7:0]);
+   
+   dff_s #(8)  ff_stb_state_ced       (
+        .din    (stb_state_ced_din[7:0]), 
+        .q      (stb_state_ced[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+
+   //ack
+   assign stb_state_ack_set[7:0] = dec_ackptr[7:0] & {8{ack_vld}};
+   assign stb_state_ack_din[7:0] = stb_state_ack_set[7:0] | 
+                                  (~stb_state_rst[7:0] & stb_state_ack[7:0]);
+   
+   dff_s #(8)  ff_stb_state_ack       (
+        .din    (stb_state_ack_din[7:0]), 
+        .q      (stb_state_ack[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+
+   //spec. write
+   wire [7:0] spec_wrt;   
+   assign     spec_wrt [7:0] = dec_wptr_m[7:0] & {8{stb_cam_wvld_m}};
+   assign     stb_clk_en_l [7:0] = ~spec_wrt[7:0];
+
+  //spec write Ffs move to lsu_stb_ctldp to save area      
+ 
+
+  // moved state_io logic from ctldp 
+
+  assign stb_state_io_din[7:0]  =  (stb_state_vld_set[7:0] & {8{stb_non_l2bnk_g}}) |
+                                   (~stb_state_rst[7:0] & stb_state_io[7:0]);
+
+   dff_s #(8)  ff_stb_state_io       (
+        .din    (stb_state_io_din[7:0]), 
+        .q      (stb_state_io[7:0]    ),
+        .clk    (clk),
+        .se     (se), .si (), .so ()
+        );
+
+// always->dff translation end    
+// streaming unit does not have to care about outstanding rmo sparc-stores.
+// membar will take care of that. spu must insert appr. delay in sampling signal.
+assign	lsu_stb_empty = ~(|stb_state_vld[7:0]);
+
+//=========================================================================================
+//	SELECT L2BANK ADDRESS
+//=========================================================================================
+
+//reg [5:0] temp ;
+//reg [2:0] stb_l2bnk_addr ;
+
+//// This is modelling a mux. 
+//always @(/*AUTOSENSE*/ /*memory or*/ dec_rptr_pcx)
+//	begin
+//		for (j=0;j<8;j=j+1) 	
+//			if (dec_rptr_pcx[j])	// 1-hot
+//				begin
+//				temp[5:0] 		= stb_state_si[j] ;
+//				stb_l2bnk_addr[2:0] 	= {stb_state_io[j],temp[4:3]} ;
+//				stb_atm_rq_type[2:0] 	= stb_state_rtype[j] ;
+//				end
+//	end
+
+
+//always->and-or translation begin
+   assign stb_l2bnk_addr[2:0] = {3{dec_rptr_pcx[0]}} & {stb_l2bnk_addr_b2[0], stb_state_si_0[3:2]} |
+                                {3{dec_rptr_pcx[1]}} & {stb_l2bnk_addr_b2[1], stb_state_si_1[3:2]} |
+                                {3{dec_rptr_pcx[2]}} & {stb_l2bnk_addr_b2[2], stb_state_si_2[3:2]} |
+                                {3{dec_rptr_pcx[3]}} & {stb_l2bnk_addr_b2[3], stb_state_si_3[3:2]} |
+                                {3{dec_rptr_pcx[4]}} & {stb_l2bnk_addr_b2[4], stb_state_si_4[3:2]} |
+                                {3{dec_rptr_pcx[5]}} & {stb_l2bnk_addr_b2[5], stb_state_si_5[3:2]} |
+                                {3{dec_rptr_pcx[6]}} & {stb_l2bnk_addr_b2[6], stb_state_si_6[3:2]} |
+                                {3{dec_rptr_pcx[7]}} & {stb_l2bnk_addr_b2[7], stb_state_si_7[3:2]} ;
+   
+   assign stb_atm_rq_type[2:1]= {2{dec_rptr_pcx[0]}} &  stb_state_rtype_0[2:1] |
+                                {2{dec_rptr_pcx[1]}} &  stb_state_rtype_1[2:1] |
+                                {2{dec_rptr_pcx[2]}} &  stb_state_rtype_2[2:1] |
+                                {2{dec_rptr_pcx[3]}} &  stb_state_rtype_3[2:1] |
+                                {2{dec_rptr_pcx[4]}} &  stb_state_rtype_4[2:1] |
+                                {2{dec_rptr_pcx[5]}} &  stb_state_rtype_5[2:1] |
+                                {2{dec_rptr_pcx[6]}} &  stb_state_rtype_6[2:1] |
+                                {2{dec_rptr_pcx[7]}} &  stb_state_rtype_7[2:1] ;
+   
+//always->and-or translation end
+          
+   
+endmodule
+
Index: /trunk/T1-CPU/lsu/lsu_dcache_lfsr.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_dcache_lfsr.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_dcache_lfsr.v	(revision 6)
@@ -0,0 +1,83 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_dcache_lfsr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: lsu_dcache_lfsr
+*/
+////////////////////////////////////////////////////////////////////////
+
+module lsu_dcache_lfsr (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   advance, clk, se, si, so, reset
+   );
+
+   input 	advance;
+   
+   input 	clk, se, si, so, reset;
+
+   output [1:0] out;
+
+   reg [4:0]    q_next;
+   wire [4:0]   q;
+   
+
+/*
+   always @ (posedge clk)
+     begin
+	out = $random;
+     end // always @ posedge
+ */
+
+//   always @ (posedge clk)
+//     begin
+//	q[4:0] <= q_next[4:0];
+//     end
+
+   always @ (/*AUTOSENSE*/advance or q or reset)
+     begin
+	      if (reset)
+	        q_next = 5'b11111;
+	      else if (advance)
+	        begin
+	           // lfsr -- stable at 000000, period of 63
+	           q_next[1] = q[0];
+	           q_next[2] = q[1];
+	           q_next[3] = q[2];
+	           q_next[4] = q[3];
+	           q_next[0] = q[1] ^ q[4];
+	        end
+	      else
+	        q_next = q;
+     end // always @ (...
+
+   assign out = {q[0], q[2]};
+
+   dff_s #(5) lfsr_reg(.din  (q_next),
+                     .q    (q),
+                     .clk  (clk), .se(se), .si(), .so());
+   
+endmodule // lsu_dcache_lfsr
+
+		
+	       
+
Index: /trunk/T1-CPU/lsu/lsu_tagdp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_tagdp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_tagdp.v	(revision 6)
@@ -0,0 +1,199 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_tagdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module lsu_tagdp( /*AUTOARG*/
+   // Outputs
+   so, lsu_misc_rdata_w2, lsu_rd_dtag_parity_g, 
+   // Inputs
+   rclk, si, se, lsu_va_wtchpt_addr, lsu_va_wtchpt_sel_g, dva_vld_m, 
+   dtag_rdata_w0_m, dtag_rdata_w1_m, dtag_rdata_w2_m, 
+   dtag_rdata_w3_m, lsu_dtag_rsel_m, lsu_local_ldxa_data_g, 
+   lsu_local_ldxa_sel_g, lsu_tlb_rd_data, lsu_local_ldxa_tlbrd_sel_g, 
+   lsu_local_diagnstc_tagrd_sel_g
+   );
+
+   input         rclk;
+   input         si;
+   input         se;
+   output        so;
+   
+input [47:3]  lsu_va_wtchpt_addr ;
+input         lsu_va_wtchpt_sel_g;
+   
+input  [3:0]     dva_vld_m;	  // valid array read
+input  [29:0]    dtag_rdata_w0_m; // 29b tag; 1b parity  from dtag
+input  [29:0]    dtag_rdata_w1_m; // 29b tag; 1b parity  from dtag
+input  [29:0]    dtag_rdata_w2_m; // 29b tag; 1b parity  from dtag
+input  [29:0]    dtag_rdata_w3_m; // 29b tag; 1b parity  from dtag
+input  [3:0]     lsu_dtag_rsel_m; // select one of the above tag  from ??
+
+input  [47:0]    lsu_local_ldxa_data_g; // from dctl
+input            lsu_local_ldxa_sel_g;  //used to mux ldxa data with 1/4 tags. from ??
+
+input  [63:0]    lsu_tlb_rd_data; // from tlbdp - used in local ldxa mux
+input            lsu_local_ldxa_tlbrd_sel_g;
+input            lsu_local_diagnstc_tagrd_sel_g;
+
+
+output [63:0]    lsu_misc_rdata_w2; // to qdp1
+output [3:0]     lsu_rd_dtag_parity_g; // parity check on 4 tags. to dctl
+
+
+wire             dtag_rdata_w0_parity_g,
+                 dtag_rdata_w1_parity_g,
+                 dtag_rdata_w2_parity_g,
+                 dtag_rdata_w3_parity_g;
+
+wire   [29:0]    dtag_rdata_sel_m,
+                 dtag_rdata_sel_g;
+
+
+wire   [3:0]     dtag_rdata_w0_8b_parity_m,
+                 dtag_rdata_w1_8b_parity_m,
+                 dtag_rdata_w2_8b_parity_m,
+                 dtag_rdata_w3_8b_parity_m;
+
+wire   [3:0]     dtag_rdata_w0_8b_parity_g,
+                 dtag_rdata_w1_8b_parity_g,
+                 dtag_rdata_w2_8b_parity_g,
+                 dtag_rdata_w3_8b_parity_g;
+
+wire	[63:0]	 lsu_misc_rdata_g;
+
+wire		 dtag_vld_sel_m, dtag_vld_sel_g;
+
+   wire  clk;
+   assign clk = rclk;
+   
+//=================================================================================================
+//      Select Tag Read data / ldxa data
+//=================================================================================================
+
+// select 1 out of 4 tags
+mux4ds  #(31) dtag_rdata_sel (
+        .in0    ({dtag_rdata_w0_m[29:0],dva_vld_m[0]}),
+        .in1    ({dtag_rdata_w1_m[29:0],dva_vld_m[1]}),
+        .in2    ({dtag_rdata_w2_m[29:0],dva_vld_m[2]}),
+        .in3    ({dtag_rdata_w3_m[29:0],dva_vld_m[3]}),
+        .sel0   (lsu_dtag_rsel_m[0]),  
+        .sel1   (lsu_dtag_rsel_m[1]),
+        .sel2   (lsu_dtag_rsel_m[2]),  
+        .sel3   (lsu_dtag_rsel_m[3]),
+        .dout   ({dtag_rdata_sel_m[29:0],dtag_vld_sel_m})
+);
+
+dff_s  #(31) dtag_rdata_sel_g_ff (
+           .din  ({dtag_rdata_sel_m[29:0],dtag_vld_sel_m}),
+           .q    ({dtag_rdata_sel_g[29:0],dtag_vld_sel_g}),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+mux4ds  #(64) lsu_misc_rdata_sel (
+        .in0    ({16'h0,lsu_local_ldxa_data_g[47:0]}),
+        .in1    (lsu_tlb_rd_data[63:0]),
+        .in2    ({16'h0,lsu_va_wtchpt_addr[47:3],3'b000}),                           
+        .in3    ({33'h0,dtag_rdata_sel_g[29:0],dtag_vld_sel_g}),
+        .sel0   (lsu_local_ldxa_sel_g),  
+        .sel1   (lsu_local_ldxa_tlbrd_sel_g),
+        .sel2   (lsu_va_wtchpt_sel_g),
+        .sel3   (lsu_local_diagnstc_tagrd_sel_g),
+        .dout   (lsu_misc_rdata_g[63:0])
+);
+
+dff_s  #(64) lsu_misc_rdata_w2_ff (
+           .din  (lsu_misc_rdata_g[63:0]),
+           .q    (lsu_misc_rdata_w2[63:0]),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+
+//=================================================================================================
+//      Tag Parity Calculation
+//=================================================================================================
+
+// flop tag parity bits 
+dff_s  #(4) dtag_rdata_parity_g_ff (
+           .din  ({dtag_rdata_w0_m[29],
+                   dtag_rdata_w1_m[29],
+                   dtag_rdata_w2_m[29],
+                   dtag_rdata_w3_m[29]}),
+           .q    ({dtag_rdata_w0_parity_g,
+                   dtag_rdata_w1_parity_g,
+                   dtag_rdata_w2_parity_g,
+                   dtag_rdata_w3_parity_g}),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+
+// generate 8bit parity for all ways before g-flop
+assign  dtag_rdata_w0_8b_parity_m[0] = ^dtag_rdata_w0_m[7:0] ;
+assign  dtag_rdata_w0_8b_parity_m[1] = ^dtag_rdata_w0_m[15:8] ;
+assign  dtag_rdata_w0_8b_parity_m[2] = ^dtag_rdata_w0_m[23:16] ;
+assign  dtag_rdata_w0_8b_parity_m[3] = ^dtag_rdata_w0_m[28:24] ;
+
+assign  dtag_rdata_w1_8b_parity_m[0] = ^dtag_rdata_w1_m[7:0] ;
+assign  dtag_rdata_w1_8b_parity_m[1] = ^dtag_rdata_w1_m[15:8] ;
+assign  dtag_rdata_w1_8b_parity_m[2] = ^dtag_rdata_w1_m[23:16] ;
+assign  dtag_rdata_w1_8b_parity_m[3] = ^dtag_rdata_w1_m[28:24] ;
+
+assign  dtag_rdata_w2_8b_parity_m[0] = ^dtag_rdata_w2_m[7:0] ;
+assign  dtag_rdata_w2_8b_parity_m[1] = ^dtag_rdata_w2_m[15:8] ;
+assign  dtag_rdata_w2_8b_parity_m[2] = ^dtag_rdata_w2_m[23:16] ;
+assign  dtag_rdata_w2_8b_parity_m[3] = ^dtag_rdata_w2_m[28:24] ;
+
+assign  dtag_rdata_w3_8b_parity_m[0] = ^dtag_rdata_w3_m[7:0] ;
+assign  dtag_rdata_w3_8b_parity_m[1] = ^dtag_rdata_w3_m[15:8] ;
+assign  dtag_rdata_w3_8b_parity_m[2] = ^dtag_rdata_w3_m[23:16] ;
+assign  dtag_rdata_w3_8b_parity_m[3] = ^dtag_rdata_w3_m[28:24] ;
+
+
+// g-flop for 8-bit parity for all 4 ways
+
+dff_s  #(4) dtag_rdata_w0_8b_parity_g_ff (
+           .din  (dtag_rdata_w0_8b_parity_m[3:0]),
+           .q    (dtag_rdata_w0_8b_parity_g[3:0]),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+dff_s  #(4) dtag_rdata_w1_8b_parity_g_ff (
+           .din  (dtag_rdata_w1_8b_parity_m[3:0]),
+           .q    (dtag_rdata_w1_8b_parity_g[3:0]),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+dff_s  #(4) dtag_rdata_w2_8b_parity_g_ff (
+           .din  (dtag_rdata_w2_8b_parity_m[3:0]),
+           .q    (dtag_rdata_w2_8b_parity_g[3:0]),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+dff_s  #(4) dtag_rdata_w3_8b_parity_g_ff (
+           .din  (dtag_rdata_w3_8b_parity_m[3:0]),
+           .q    (dtag_rdata_w3_8b_parity_g[3:0]),
+           .clk  (clk),
+           .se   (se),       .si (),          .so ());
+
+
+assign  lsu_rd_dtag_parity_g[0]  =  ^({dtag_rdata_w0_8b_parity_g[3:0],dtag_rdata_w0_parity_g});
+assign  lsu_rd_dtag_parity_g[1]  =  ^({dtag_rdata_w1_8b_parity_g[3:0],dtag_rdata_w1_parity_g});
+assign  lsu_rd_dtag_parity_g[2]  =  ^({dtag_rdata_w2_8b_parity_g[3:0],dtag_rdata_w2_parity_g});
+assign  lsu_rd_dtag_parity_g[3]  =  ^({dtag_rdata_w3_8b_parity_g[3:0],dtag_rdata_w3_parity_g});
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_excpctl.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_excpctl.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_excpctl.v	(revision 6)
@@ -0,0 +1,1608 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_excpctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+/////////////////////////////////////////////////////////////////
+
+`include "sys.h"
+`include "lsu.h"
+
+module lsu_excpctl ( /*AUTOARG*/
+   // Outputs
+   so, lsu_exu_st_dtlb_perr_g, lsu_ffu_st_dtlb_perr_g, 
+   lsu_defr_trp_taken_g, lsu_tlu_defr_trp_taken_g, 
+   lsu_mmu_defr_trp_taken_g, lsu_st_dtlb_perr_g, 
+   lsu_dmmu_sfsr_trp_wr, lsu_dsfsr_din_g, lsu_tlb_perr_ld_rq_kill_w, 
+   lsu_spu_early_flush_g, lsu_local_early_flush_g, 
+   lsu_tlu_early_flush_w, lsu_tlu_early_flush2_w, lsu_ttype_vld_m2, 
+   lsu_ttype_vld_m2_bf1, lsu_ifu_flush_pipe_w, lsu_exu_flush_pipe_w, 
+   lsu_mmu_flush_pipe_w, lsu_ffu_flush_pipe_w, lsu_tlu_wtchpt_trp_g, 
+   lsu_tlu_dmmu_miss_g, lsu_tlu_misalign_addr_ldst_atm_m, 
+   lsu_tlu_daccess_excptn_g, lsu_tlu_daccess_prot_g, 
+   lsu_tlu_priv_action_g, lsu_ifu_tlb_data_su, lsu_ifu_tlb_data_ue, 
+   lsu_ifu_tlb_tag_ue, lsu_tlu_ttype_m2, lsu_tlu_ttype_vld_m2, 
+   stb_cam_sqsh_msk, stb_cam_hit_bf, stb_cam_hit_bf1, 
+   tte_data_perror_unc, asi_tte_data_perror, asi_tte_tag_perror, 
+   // Inputs
+   rclk, si, se, grst_l, arst_l, tlb_rd_tte_data_ebit, 
+   tlb_rd_tte_data_pbit, tlb_rd_tte_data_nfobit, 
+   tlb_rd_tte_data_wbit, tlb_cam_hit, tlb_pgnum_b39, 
+   lsu_ldst_va_b39_m, lsu_sun4r_va_m_l, lsu_sun4r_pgsz_b2t0_e, 
+   lsu_sun4v_pgsz_b2t0_e, tlu_early_flush_pipe_w, ifu_lsu_flush_w, 
+   ifu_lsu_nceen, lsu_tlb_asi_data_perr_g, lsu_tlb_asi_tag_perr_g, 
+   stb_state_vld0, stb_state_vld1, stb_state_vld2, stb_state_vld3, 
+   ifu_tlu_thrid_e, tlu_lsu_priv_trap_m, tlu_lsu_pstate_priv, 
+   st_inst_vld_e, ld_inst_vld_e, ifu_lsu_alt_space_e, lsu_ldst_va_m, 
+   hpv_priv_m, hpstate_en_m, stb_cam_hit, dtlb_bypass_m, 
+   lsu_alt_space_m, atomic_m, ldst_dbl_m, fp_ldst_m, lda_internal_m, 
+   sta_internal_m, cam_real_m, data_rd_vld_g, tag_rd_vld_g, 
+   ldst_sz_m, asi_internal_m, rd_only_ltlb_asi_e, wr_only_ltlb_asi_e, 
+   dfill_tlb_asi_e, ifill_tlb_asi_e, nofault_asi_m, as_if_user_asi_m, 
+   atomic_asi_m, phy_use_ec_asi_m, phy_byp_ec_asi_m, quad_asi_m, 
+   binit_quad_asi_m, blk_asi_m, recognized_asi_m, strm_asi_m, 
+   mmu_rd_only_asi_m, rd_only_asi_m, wr_only_asi_m, unimp_asi_m, 
+   lsu_nonalt_nucl_access_m, va_wtchpt_cmp_en_m, 
+   lsu_va_match_b47_b32_m, lsu_va_match_b31_b3_m, 
+   va_wtchpt_msk_match_m, ifu_tlu_inst_vld_m, 
+   exu_tlu_misalign_addr_jmpl_rtn_m, exu_tlu_va_oor_m, 
+   tlu_dsfsr_flt_vld, tlu_lsu_pstate_cle, tlu_lsu_pstate_am, 
+   lsu_excpctl_asi_state_m, lsu_tlu_nonalt_ldst_m, 
+   lsu_squash_va_oor_m, lsu_tlu_xslating_ldst_m, lsu_tlu_ctxt_sel_m, 
+   lsu_tlu_write_op_m, lsu_memref_m, lsu_flsh_inst_m, 
+   tte_data_parity_error, tte_tag_parity_error
+   );
+
+   
+   input rclk;   
+   input si;
+   input se;
+   input grst_l;
+   input arst_l;
+   output so;
+
+   //=================================================================
+   // input from tlb
+//   input [`STLB_DATA_NFO:`STLB_DATA_W] tlb_rd_tte_data ; // tte data from tlb
+   input  tlb_rd_tte_data_ebit;
+   input  tlb_rd_tte_data_pbit;
+   input  tlb_rd_tte_data_nfobit;
+   input  tlb_rd_tte_data_wbit;
+
+
+   input                               tlb_cam_hit;
+   input                               tlb_pgnum_b39;
+//   input                               tlb_rd_tte_data_locked ;    // lock bit from tte
+   //=================================================================
+
+   input	lsu_ldst_va_b39_m ;
+   input	lsu_sun4r_va_m_l ; 
+   input [2:0]	lsu_sun4r_pgsz_b2t0_e ;
+   input [2:0]	lsu_sun4v_pgsz_b2t0_e ;
+
+   input         tlu_early_flush_pipe_w;
+   input         ifu_lsu_flush_w;
+   input [3:0]   ifu_lsu_nceen ;             // uncorrectible error enable 
+
+   input       	lsu_tlb_asi_data_perr_g ;
+   input       	lsu_tlb_asi_tag_perr_g ;
+
+   input [7:0]  stb_state_vld0 ;  // valid bits - stb0
+   input [7:0]	stb_state_vld1 ;  // valid bits - stb1
+   input [7:0]  stb_state_vld2 ;  // valid bits - stb2
+   input [7:0]  stb_state_vld3 ;  // valid bits - stb3
+
+   input [1:0]  ifu_tlu_thrid_e ; // thread-id.
+
+   input	tlu_lsu_priv_trap_m ;	// daccess-excp in tlu
+
+   output 	lsu_exu_st_dtlb_perr_g ;
+   output 	lsu_ffu_st_dtlb_perr_g ;
+
+   output	lsu_defr_trp_taken_g ;
+   output	lsu_tlu_defr_trp_taken_g ;
+   output	lsu_mmu_defr_trp_taken_g ;
+
+   output [3:0]	lsu_st_dtlb_perr_g ;
+
+   output [3:0]  lsu_dmmu_sfsr_trp_wr;	   // sfsr wr based on trap.
+   output [23:0] lsu_dsfsr_din_g;
+
+
+   output lsu_tlb_perr_ld_rq_kill_w ;
+   output lsu_spu_early_flush_g;
+   output lsu_local_early_flush_g;   //to lsu
+   
+//   output lsu_dctl_early_flush_w;
+   output lsu_tlu_early_flush_w;
+   output lsu_tlu_early_flush2_w;
+
+   output lsu_ttype_vld_m2;
+   output lsu_ttype_vld_m2_bf1;
+   
+
+//   output 	lsu_stbctl_flush_pipe_w ;
+//   output 	lsu_stbrwctl_flush_pipe_w ;
+   //output lsu_flush_pipe_w;
+   output lsu_ifu_flush_pipe_w;
+   output lsu_exu_flush_pipe_w;
+   output lsu_mmu_flush_pipe_w;
+   output lsu_ffu_flush_pipe_w;
+
+   output lsu_tlu_wtchpt_trp_g ;        // watchpt trap has occurred.
+   output lsu_tlu_dmmu_miss_g;
+   output lsu_tlu_misalign_addr_ldst_atm_m ; // mem_addr unaligned
+//   output lsu_tlu_priv_violtn_g;
+   wire   lsu_tlu_priv_violtn_g;
+   output lsu_tlu_daccess_excptn_g;
+   output lsu_tlu_daccess_prot_g;
+   output lsu_tlu_priv_action_g;
+//   output lsu_tlu_tte_ebit_g;
+//   output lsu_tlu_spec_access_epage_g;
+//   output lsu_tlu_uncache_atomic_g;
+//   output lsu_tlu_illegal_asi_action_g;
+//   output lsu_tlu_flt_ld_nfo_pg_g;
+
+   //output lsu_tlu_asi_rd_unc;
+
+   output lsu_ifu_tlb_data_su ;	  // specific to st ue
+   output lsu_ifu_tlb_data_ue ;   // dtlb data asi rd parity error ; now ld ue
+   output lsu_ifu_tlb_tag_ue ;    // dtlb tag asi rd parity error
+  
+output [8:0]            lsu_tlu_ttype_m2;
+output                  lsu_tlu_ttype_vld_m2;
+
+   output  [7:0]   stb_cam_sqsh_msk ;  // squash spurious hits
+
+   output	stb_cam_hit_bf;		  // buffered stb_cam_hit for qctl1.
+   output	stb_cam_hit_bf1;		// buffered stb_cam_hit for stb_rwctl, dctl.
+
+   input [3:0]          tlu_lsu_pstate_priv ;   
+//   input [3:0]          tlu_lsu_hpv_priv;
+//   input [3:0]          tlu_lsu_hpstate_en;
+   
+   
+   input                st_inst_vld_e;
+   input                ld_inst_vld_e;
+   input                ifu_lsu_alt_space_e;        // alternate space ld/st
+
+   //interface between lsu_dctldp
+   input [7:0]          lsu_ldst_va_m;
+   
+   //interface between lsu_excpctl and lsu_dctl
+
+   output               tte_data_perror_unc;
+   //output               tte_data_perror_corr;
+   output		asi_tte_data_perror ;
+   output		asi_tte_tag_perror ;
+
+
+   input hpv_priv_m;
+   input hpstate_en_m;
+   
+   input		stb_cam_hit ;
+   
+   input                dtlb_bypass_m;
+   
+   input                lsu_alt_space_m;
+   input                atomic_m;
+//   input                atomic_g;
+   input                ldst_dbl_m;
+   input                fp_ldst_m;
+//   input                lsu_inst_vld_w;
+   input                lda_internal_m;
+   input                sta_internal_m;
+   input                cam_real_m;
+//   input                va_wtchpt_match;
+
+   input                data_rd_vld_g;
+   input                tag_rd_vld_g;
+   input [1:0]          ldst_sz_m;
+   input                asi_internal_m;
+
+//   input                dfill_thread0;
+//   input                dfill_thread1;
+//   input                dfill_thread2;
+//   input                dfill_thread3;
+
+   wire                ld_inst_vld_unflushed;
+   wire                st_inst_vld_unflushed;
+//   input                flsh_inst_g;
+//   input                unc_err_trap_g;
+   
+   //asi decode
+   input                rd_only_ltlb_asi_e;
+   input                wr_only_ltlb_asi_e;
+   input                dfill_tlb_asi_e;
+   input                ifill_tlb_asi_e;
+
+   input                nofault_asi_m;
+   input                as_if_user_asi_m;
+
+   input                atomic_asi_m;
+   input                phy_use_ec_asi_m;
+   input                phy_byp_ec_asi_m;
+//   input                tlb_byp_asi_m;
+   input                quad_asi_m;
+   input                binit_quad_asi_m;
+   input                blk_asi_m;
+//   input                blk_cmt_asi_m;
+   input                recognized_asi_m;
+   input                strm_asi_m;
+   input                mmu_rd_only_asi_m;
+   input                rd_only_asi_m;
+   input                wr_only_asi_m;
+   input                unimp_asi_m;
+   input		lsu_nonalt_nucl_access_m ;
+
+   input    va_wtchpt_cmp_en_m;    //from dctl
+   input    lsu_va_match_b47_b32_m;        //from qdp1
+   input    lsu_va_match_b31_b3_m;         //from qdp1
+
+   input    va_wtchpt_msk_match_m; //from dctldp
+
+   input		ifu_tlu_inst_vld_m ;
+
+input           exu_tlu_misalign_addr_jmpl_rtn_m;// misaligned addr - jmpl or return addr
+input           exu_tlu_va_oor_m;       	// ??? - to be used in sfsr
+input [3:0]     tlu_dsfsr_flt_vld;
+input [3:0]   	tlu_lsu_pstate_cle ;       // current little endian
+input [3:0]   	tlu_lsu_pstate_am ;        // address mask
+input  [7:0]    lsu_excpctl_asi_state_m ;   // ASI State + imm asi
+input          	lsu_tlu_nonalt_ldst_m ; // non-alternate load or store // FORCE
+input      	lsu_squash_va_oor_m ;   // squash va_oor for mem-op. // FORCE
+input           lsu_tlu_xslating_ldst_m ;// xslating ldst,atomic etc // FORCE
+input   [2:0]   lsu_tlu_ctxt_sel_m;           // context selected:0-p,1-s,2-n // FORCE
+input           lsu_tlu_write_op_m; // FORCE
+input		lsu_memref_m ;
+input           lsu_flsh_inst_m ;
+   
+
+input    tte_data_parity_error ;
+input    tte_tag_parity_error ;
+
+wire	other_flush_pipe_w ;
+wire	defr_trp_taken ;
+wire 	defr_trp_taken_m, defr_trp_taken_byp, defr_trp_taken_m_din ;   
+wire  	tlb_tte_vld_m, tlb_tte_vld_g ;
+wire 	priv_pg_usr_mode_m, priv_pg_usr_mode_g, priv_pg_usr_mode;
+wire 	nfo_pg_nonnfo_asi_m, nfo_pg_nonnfo_asi_g, nfo_pg_nonnfo_asi;
+wire  	spec_access_epage_m, spec_access_epage_g, spec_access_epage ;
+wire   	nonwr_pg_st_access;
+   
+//=========================================================================================
+// MISCELLANEOUS
+//=========================================================================================
+
+   wire       clk;
+   assign     clk = rclk;
+   wire       reset;
+
+   wire       dbb_reset_l;
+
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+   assign reset = ~dbb_reset_l ;
+
+bw_u1_buf_30x UZsize_stb_cam_hit_bf1  (.a(stb_cam_hit),   .z(stb_cam_hit_bf1));  //to dctl, stb_rwctl
+bw_u1_buf_30x UZsize_stb_cam_hit_bf   (.a(stb_cam_hit),   .z(stb_cam_hit_bf ));  //to qctl1
+
+wire                ld_inst_vld_m;
+wire                st_inst_vld_m;
+
+dff_s #(2) inst_vld_stgm (
+   .din ({ld_inst_vld_e, st_inst_vld_e}),
+   .q   ({ld_inst_vld_m, st_inst_vld_m}),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);
+
+dff_s #(2) inst_vld_stgg (
+   .din ({ld_inst_vld_m, st_inst_vld_m}),
+   .q   ({ld_inst_vld_unflushed, st_inst_vld_unflushed}),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);
+
+wire	tlu_priv_trap_g ;
+dff_s #(1) tprivtrp_g (
+   .din (tlu_lsu_priv_trap_m),
+   .q   (tlu_priv_trap_g),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);
+   
+  
+//=========================================================================================
+//  Thread Staging
+//=========================================================================================
+
+wire [1:0] thrid_m, thrid_g ;
+dff_s #(2)  tid_stgm (
+        .din    (ifu_tlu_thrid_e[1:0]),
+        .q      (thrid_m[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	thread0_m, thread1_m, thread2_m, thread3_m;
+
+assign	thread0_m = ~thrid_m[1] & ~thrid_m[0] ;
+assign	thread1_m = ~thrid_m[1] &  thrid_m[0] ;
+assign	thread2_m =  thrid_m[1] & ~thrid_m[0] ;
+assign	thread3_m =  thrid_m[1] &  thrid_m[0] ;
+
+wire thread0_g, thread1_g, thread2_g, thread3_g ;
+dff_s #(4)  tid_stgg (
+        .din    ({thread0_m, thread1_m, thread2_m, thread3_m}),
+        .q      ({thread0_g, thread1_g, thread2_g, thread3_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//  INST_VLD_W GENERATION
+//=========================================================================================
+
+
+assign	thrid_g[0] = thread1_g | thread3_g ;
+assign	thrid_g[1] = thread2_g | thread3_g ;
+   
+wire    flush_w_inst_vld_m ;
+wire    lsu_inst_vld_w ;
+wire	lsu_flush_pipe_w;
+assign  flush_w_inst_vld_m =
+        ifu_tlu_inst_vld_m &
+        ~(lsu_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
+
+dff_s  stgw_ivld (
+        .din    (flush_w_inst_vld_m),
+        .q      (lsu_inst_vld_w),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//========================================================================
+//	Miscellaneous
+//========================================================================
+
+
+// Moved to excpctl from stb_rwctl as excpctl is closer to stb-cam.
+mux4ds  #(8) stbvld_mx (
+  .in0  (~stb_state_vld0[7:0]),
+  .in1  (~stb_state_vld1[7:0]),
+  .in2  (~stb_state_vld2[7:0]),
+  .in3  (~stb_state_vld3[7:0]),
+  .sel0 (thread0_g),
+  .sel1 (thread1_g),
+  .sel2 (thread2_g),
+  .sel3 (thread3_g),
+  .dout (stb_cam_sqsh_msk[7:0])
+);
+   
+//========================================================================
+//  Exception Handling Begin
+//========================================================================
+
+//va watch point
+   wire va_match_g;
+   wire va_wtchpt_msk_match_g;
+
+
+wire	va_wtchpt_en_m ;
+
+assign	va_wtchpt_en_m = 
+va_wtchpt_cmp_en_m & 
+(((~asi_internal_m & recognized_asi_m) & lsu_alt_space_m) | ~lsu_alt_space_m) // Bug5226
+& (ld_inst_vld_m | st_inst_vld_m) & //bug 3681
+ ~(hpv_priv_m & hpstate_en_m)  // ECO 4178
+& ~cam_real_m ;                // ECO 5470 (TO_2_0)
+
+//bug6480   
+   wire lsu_va_match_m;
+   wire	pstate_am_m ;
+
+assign lsu_va_match_m = ((lsu_va_match_b47_b32_m & lsu_va_match_b31_b3_m) & ~pstate_am_m) |
+                          (lsu_va_match_b31_b3_m & pstate_am_m);
+   
+dff_s #(3)  stgwtch_g (
+        .din    ({va_wtchpt_en_m,
+                  lsu_va_match_m,
+                  va_wtchpt_msk_match_m}), 
+        .q      ({va_wtchpt_en_g,
+                  va_match_g,
+                  va_wtchpt_msk_match_g}), 
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+// These signals will eventually generate exceptions.
+   wire va_wtchpt_match;
+   
+assign  va_wtchpt_match = 
+        va_match_g &  va_wtchpt_msk_match_g & lsu_inst_vld_w & va_wtchpt_en_g;
+
+assign  lsu_tlu_wtchpt_trp_g = va_wtchpt_match ;
+
+
+// tlb related exceptions/errors
+wire  tlb_daccess_excptn_e, tlb_daccess_excptn_m ; 
+wire  tlb_daccess_excptn_e_d1;
+wire	tlb_illgl_pgsz_m ;
+
+assign  tlb_daccess_excptn_e  =
+  ((rd_only_ltlb_asi_e &  st_inst_vld_e)  |
+   (wr_only_ltlb_asi_e &  ld_inst_vld_e)) & ifu_lsu_alt_space_e   ;
+  
+dff_s  #(1) tlbex_stgm (
+        .din    ({tlb_daccess_excptn_e}),
+        .q      ({tlb_daccess_excptn_e_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign tlb_daccess_excptn_m = tlb_daccess_excptn_e_d1 | tlb_illgl_pgsz_m;
+      
+wire pstate_priv_m;
+//wire pstate_priv;
+
+mux4ds  #(1) pstate_priv_m_mux (
+        .in0    (tlu_lsu_pstate_priv[0]),
+        .in1    (tlu_lsu_pstate_priv[1]),
+        .in2    (tlu_lsu_pstate_priv[2]),
+        .in3    (tlu_lsu_pstate_priv[3]),
+        .sel0   (thread0_m),  
+        .sel1   (thread1_m),
+        .sel2   (thread2_m),  
+        .sel3   (thread3_m),
+        .dout   (pstate_priv_m)
+);
+   
+//dff #(1)  priv_stgg (
+//        .din    (pstate_priv_m),
+//        .q      (pstate_priv),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+
+// privilege violation - priv page accessed in user mode
+//timing 
+//assign  priv_pg_usr_mode =  // data access exception; TT=h30
+//  (ld_inst_vld_unflushed | st_inst_vld_unflushed) & ~(pstate_priv | hpv_priv) & tlb_rd_tte_data_pbit ;
+
+//SC2   wire hpv_priv_m;
+     
+   assign priv_pg_usr_mode_m = (ld_inst_vld_m | st_inst_vld_m) & ~(pstate_priv_m | hpv_priv_m);
+
+dff_s #(1) priv_pg_usr_mode_stgg  (
+        .din    (priv_pg_usr_mode_m),
+        .q      (priv_pg_usr_mode_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+   assign priv_pg_usr_mode = priv_pg_usr_mode_g & tlb_rd_tte_data_pbit ;
+   
+// protection violation - store to a page that does not have write permission
+//timing
+//assign  nonwr_pg_st_access =  // data access protection; TT=h33
+//  st_inst_vld_unflushed   & 
+//  ~tlb_rd_tte_data_wbit & ~lsu_dtlb_bypass_g & tlb_cam_hit_g ;
+//   //lsu_dtlb_bypass_g) ; // W=1 in bypass mode - In bypass mode this trap will never happen !!!
+
+   assign nonwr_pg_st_access = ~tlb_rd_tte_data_wbit & st_inst_vld_unflushed & tlb_tte_vld_g;
+   
+wire  daccess_prot ;
+assign  daccess_prot = nonwr_pg_st_access  ;
+    //((~lsu_dtlb_bypass_g & tlb_cam_hit_g) | (tlb_byp_asi_g & lsu_alt_space_g)) ;
+
+// access to a page marked with the nfo with an asi other than nfo asi.
+//timing
+//assign  nfo_pg_nonnfo_asi  =  // data access exception; TT=h30
+//  (ld_inst_vld_unflushed | st_inst_vld_unflushed) &   // any access
+//  ((~nofault_asi_g & lsu_alt_space_g) | ~lsu_alt_space_g) // in alternate space or not
+//  & tlb_rd_tte_data_nfobit ;
+
+assign nfo_pg_nonnfo_asi_m = (ld_inst_vld_m | st_inst_vld_m) &
+                             ((~nofault_asi_m & lsu_alt_space_m) | ~lsu_alt_space_m) ;
+   
+dff_s #(1) nfo_pg_nonnfo_asi_stgg   (
+        .din    (nfo_pg_nonnfo_asi_m),
+        .q      (nfo_pg_nonnfo_asi_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+assign    nfo_pg_nonnfo_asi = nfo_pg_nonnfo_asi_g & tlb_rd_tte_data_nfobit ;
+   
+// as_if_usr asi accesses priv page.
+//timing
+//assign  as_if_usr_priv_pg  =  // data access exception; TT=h30
+//  (ld_inst_vld_unflushed | st_inst_vld_unflushed) & as_if_user_asi_g & lsu_alt_space_g & 
+//      tlb_rd_tte_data_pbit ;
+
+   wire   as_if_usr_priv_pg_m, as_if_usr_priv_pg_g, as_if_usr_priv_pg;
+   assign as_if_usr_priv_pg_m = (ld_inst_vld_m | st_inst_vld_m) & as_if_user_asi_m & lsu_alt_space_m;
+ 
+dff_s #(1) as_if_usr_priv_pg_stgg   (
+        .din    (as_if_usr_priv_pg_m),
+        .q      (as_if_usr_priv_pg_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   assign  as_if_usr_priv_pg =  as_if_usr_priv_pg_g & tlb_rd_tte_data_pbit ; 
+ 
+// non-cacheable address - iospace PA[39] = 1 
+// atomic access to non-cacheable space.
+   wire    atm_access_w_nc, atomic_g;
+
+dff_s #(1) atm_stgg (
+        .din    (atomic_m),
+        .q      (atomic_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   
+assign  atm_access_w_nc = atomic_g & tlb_pgnum_b39 ; // io space 
+
+// atomic inst with unsupported asi.
+//timing
+//assign  atm_access_unsup_asi = atomic_g & ~atomic_asi_g & lsu_alt_space_g ;
+   wire atm_access_unsup_asi_m, atm_access_unsup_asi;
+   
+assign  atm_access_unsup_asi_m = atomic_m & ~atomic_asi_m & lsu_alt_space_m;
+
+dff_s #(1) atm_access_unsup_asi_stgg   (
+        .din    (atm_access_unsup_asi_m),
+        .q      (atm_access_unsup_asi),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   
+//timing
+//assign  tlb_tte_vld_g = ~lsu_dtlb_bypass_g & tlb_cam_hit_g ;
+
+wire	dmmu_va_oor_m ;
+assign  tlb_tte_vld_m = ~dtlb_bypass_m & tlb_cam_hit & 
+			~((unimp_asi_m | asi_internal_m | ~recognized_asi_m) & 
+				lsu_alt_space_m) & // Bug 3541,5186
+			~dmmu_va_oor_m ; // Bug 5070
+	
+dff_s #(1) tlb_tte_vld_stgg   (
+        .din    (tlb_tte_vld_m),
+        .q      (tlb_tte_vld_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+wire  pg_with_ebit_m, pg_with_ebit_g, pg_with_ebit  ;
+//timing   
+//assign	pg_with_ebit = 
+//	(tlb_rd_tte_data_ebit & tlb_tte_vld_g)  | // tte
+//        (lsu_dtlb_bypass_g & ~(phy_use_ec_asi_g & lsu_alt_space_g)) | // regular bypass 
+//        (tlb_byp_asi_g & ~phy_use_ec_asi_g & lsu_alt_space_g) ; // phy_byp
+
+assign	pg_with_ebit_m = 
+        (dtlb_bypass_m & ~(phy_use_ec_asi_m & lsu_alt_space_m) & 
+	(lsu_ldst_va_b39_m & ~pstate_am_m)) |
+	// regular bypass // Bug 4296,5050 related.
+        (dtlb_bypass_m & (phy_byp_ec_asi_m & lsu_alt_space_m)) ; // phy_byp
+
+dff_s #(1) pg_with_ebit_stgg   (
+        .din    (pg_with_ebit_m),
+        .q      (pg_with_ebit_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+assign  pg_with_ebit = (tlb_rd_tte_data_ebit & tlb_tte_vld_g)  | // tte  
+	                      pg_with_ebit_g;
+   
+//timing
+//assign  spec_access_epage = 
+//  ((ld_inst_vld_unflushed & nofault_asi_g & lsu_alt_space_g) |  // spec load
+//  flsh_inst_g) & // flush inst
+//  pg_with_ebit ; // page with side effects
+////  tlb_rd_tte_data_ebit ; // page with side effects
+
+assign  spec_access_epage_m = 
+// Bug 5166
+((ld_inst_vld_m & ~atomic_m) & nofault_asi_m & lsu_alt_space_m);   // spec load
+dff_s #(1) spec_access_epage_stgg   (
+        .din    (spec_access_epage_m),
+        .q      (spec_access_epage_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+// remove flsh_inst_g ??   
+//assign spec_access_epage = (spec_access_epage_g  | flsh_inst_g) & pg_with_ebit;
+assign spec_access_epage = (spec_access_epage_g) & pg_with_ebit;
+   
+
+   wire quad_asi_non_ldstda_m;
+   // covers regular quad asi AND binit. 
+   assign quad_asi_non_ldstda_m = 
+	quad_asi_m & lsu_alt_space_m & 
+		((~ldst_dbl_m & ld_inst_vld_m) | // only lddbl should use
+		(fp_ldst_m & (ld_inst_vld_m | st_inst_vld_m))) ; // float should not use
+
+   wire	true_quad_non_ldda_m ; 
+   // catches case where st or non-ldd uses asi
+   assign true_quad_non_ldda_m =
+	(quad_asi_m & ~binit_quad_asi_m) & lsu_alt_space_m & 
+  ((~ldst_dbl_m & ld_inst_vld_m) | st_inst_vld_m) ;
+   
+wire  blk_asi_non_ldstdfa_m ;
+
+assign  blk_asi_non_ldstdfa_m = blk_asi_m & lsu_alt_space_m & 
+     ~(ldst_dbl_m & fp_ldst_m) & (ld_inst_vld_m | st_inst_vld_m) ;
+
+// trap on illegal asi
+wire  illegal_asi_trap_m, illegal_asi_trap_g, illegal_asi_trap_m_d1 ;
+
+assign  illegal_asi_trap_m = 
+((ld_inst_vld_m | st_inst_vld_m) & lsu_alt_space_m & ~recognized_asi_m) | 
+((ld_inst_vld_m | st_inst_vld_m) & asi_internal_m & fp_ldst_m & lsu_alt_space_m) | // Bug 4382
+blk_asi_non_ldstdfa_m |
+quad_asi_non_ldstda_m |
+true_quad_non_ldda_m  ; 
+   
+dff_s #(1) illegal_asi_trap_stgg   (
+        .din    (illegal_asi_trap_m),
+        .q      (illegal_asi_trap_m_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   //need lsu_inst_vld_w ??
+//   assign illegal_asi_trap_g = illegal_asi_trap_m_d1 & lsu_inst_vld_w;
+   assign illegal_asi_trap_g = illegal_asi_trap_m_d1;
+         
+wire wr_to_strm_sync_m ;
+//timing
+//assign	wr_to_strm_sync =  	
+//  strm_asi & ((ldst_va_g[7:0] == 8'hA0) | (ldst_va_g[7:0] == 8'h68)) &
+//  st_inst_vld_unflushed & lsu_alt_space_g ;
+
+assign	wr_to_strm_sync_m =  	// Bug 5742
+  strm_asi_m & (lsu_ldst_va_m[7:0] == 8'hA0) & st_inst_vld_m & lsu_alt_space_m ;
+
+/*dff #(1) wr_to_strm_sync_stgg   (
+        .din    (wr_to_strm_sync_m),
+        .q      (wr_to_strm_sync),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );*/
+   
+
+// HPV Changes 
+// Push back into previous stage.
+// qualification with hpv_priv and hpstate_en required to ensure hypervisor
+// is not trying to access.
+//SC2   wire hpv_priv_e;
+   
+//SC2 mux4ds  #(1) hpv_priv_e_mux (
+//SC2        .in0    (tlu_lsu_hpv_priv[0]),
+//SC2        .in1    (tlu_lsu_hpv_priv[1]),
+//SC2        .in2    (tlu_lsu_hpv_priv[2]),
+//SC2        .in3    (tlu_lsu_hpv_priv[3]),
+//SC2        .sel0   (thread0_e),  
+//SC2        .sel1   (thread1_e),
+//SC2        .sel2   (thread2_e),  
+//SC2        .sel3   (thread3_e),
+//SC2       .dout   (hpv_priv_e)
+//SC2);
+
+//SC2   wire hpstate_en_e;
+   
+//SC2 mux4ds  #(1) hpstate_en_e_mux (
+//SC2        .in0    (tlu_lsu_hpstate_en[0]),
+//SC2        .in1    (tlu_lsu_hpstate_en[1]),
+//SC2        .in2    (tlu_lsu_hpstate_en[2]),
+//SC2        .in3    (tlu_lsu_hpstate_en[3]),
+//SC2        .sel0   (thread0_e),  
+//SC2        .sel1   (thread1_e),
+//SC2        .sel2   (thread2_e),  
+//SC2        .sel3   (thread3_e),
+//SC2        .dout   (hpstate_en_e)
+//SC2);
+//SC2   wire hpstate_en_m;
+   
+//SC2 dff #(2) hpv_stgm (
+//SC2        .din    ({hpv_priv_e, hpstate_en_e}),
+//SC2        .q    	({hpv_priv_m, hpstate_en_m}),
+//SC2        .clk    (clk),
+//SC2        .se     (se),       .si (),          .so ()
+//SC2        );
+//SC2   wire hpv_priv, hpstate_en;
+
+   
+//SC2 dff #(2) hpv_stgg (
+//SC2        .din    ({hpv_priv_m, hpstate_en_m}),
+//SC2        .q     	({hpv_priv,   hpstate_en}),
+//SC2        .clk    (clk),
+//SC2        .se     (se),       .si (),          .so ()
+//SC2        );
+
+/*assign  priv_action = (ld_inst_vld_unflushed | st_inst_vld_unflushed) & ~lsu_asi_state[7] & 
+      ~pstate_priv & ~(hpv_priv & hpstate_en) & lsu_alt_space_g ;*/
+// Generate a stage earlier
+   wire priv_action_m, priv_action;
+   
+assign  priv_action_m = (ld_inst_vld_m | st_inst_vld_m) & 
+	((~lsu_excpctl_asi_state_m[7] & lsu_alt_space_m) |	// alt_space
+	lsu_nonalt_nucl_access_m) &		// non-alt space - nucleus ctxt
+      ~pstate_priv_m & ~(hpv_priv_m & hpstate_en_m) ;
+
+/*assign  priv_action_m = (ld_inst_vld_m | st_inst_vld_m) & ~lsu_excpctl_asi_state_m[7] & 
+      ~pstate_priv_m & ~(hpv_priv_m & hpstate_en_m) & lsu_alt_space_m ;*/
+
+dff_s  pact_stgg (
+        .din    (priv_action_m),
+        .q    	(priv_action),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Take data_access exception if supervisor uses hypervisor asi  
+   wire hpv_asi_range_m;
+   wire spv_use_hpv_m ;
+//timing
+//assign  hpv_asi_range =
+//                    ~lsu_asi_state[7] & (
+//                         (~lsu_asi_state[6] & lsu_asi_state[5] & lsu_asi_state[4]) | // 0x3?
+//                         ( lsu_asi_state[6]));  
+    
+assign  hpv_asi_range_m =
+                         ~lsu_excpctl_asi_state_m[7] & (
+                         (~lsu_excpctl_asi_state_m[6] & lsu_excpctl_asi_state_m[5] & lsu_excpctl_asi_state_m[4]) | // 0x3?
+                         ( lsu_excpctl_asi_state_m[6]));                                   // 0x4?,5?,6?,7?
+
+// Take data_access exception if supervisor uses hypervisor asi
+
+assign  spv_use_hpv_m = (ld_inst_vld_m | st_inst_vld_m) &
+                         hpv_asi_range_m &
+                         pstate_priv_m & ~hpv_priv_m & lsu_alt_space_m ;
+
+// EARLY TRAPS
+
+// memory address not aligned
+wire  qw_align_addr,blk_align_addr ;
+wire  hw_align_addr,wd_align_addr,dw_align_addr;
+
+assign  hw_align_addr = ~lsu_ldst_va_m[0] ;         // half-word addr
+assign  wd_align_addr = ~lsu_ldst_va_m[1] & ~lsu_ldst_va_m[0] ;     // word addr
+assign  dw_align_addr = ~lsu_ldst_va_m[2] & ~lsu_ldst_va_m[1] & ~lsu_ldst_va_m[0] ; // dw addr
+assign  qw_align_addr = ~lsu_ldst_va_m[3] & ~lsu_ldst_va_m[2] & ~lsu_ldst_va_m[1] & ~lsu_ldst_va_m[0] ; // qw addr
+assign  blk_align_addr = 
+~lsu_ldst_va_m[5] & ~lsu_ldst_va_m[4] & ~lsu_ldst_va_m[3] & 
+~lsu_ldst_va_m[2] & ~lsu_ldst_va_m[1] & ~lsu_ldst_va_m[0] ; // 64B aligned addr for block ld/st
+
+wire  hw_size,wd_size,dw_size;
+
+//assign  byte_size = ~ldst_sz_m[1] &  ~ldst_sz_m[0] ; // byte size    
+assign  hw_size = ~ldst_sz_m[1] &  ldst_sz_m[0] ; // half-word size 
+assign  wd_size =  ldst_sz_m[1] & ~ldst_sz_m[0] ; // word size
+assign  dw_size =  ldst_sz_m[1] &  ldst_sz_m[0] ; // double-word size
+
+wire  mem_addr_not_align ;
+   
+assign  mem_addr_not_align
+  = (((hw_size & ~hw_align_addr) | // half-word check
+    (wd_size & ~wd_align_addr)  | // word check
+    (dw_size & ~dw_align_addr)  | // double word check
+    //((quad_asi_m | binit_quad_asi_m) & lsu_alt_space_m & ldst_dbl_m & ~qw_align_addr) | // quad word check
+    (blk_asi_m & lsu_alt_space_m & fp_ldst_m & ldst_dbl_m & ~blk_align_addr)) & // 64B blk ld/st check
+    //(blk_asi_m & lsu_alt_space_m & blk_asi_m & ~blk_align_addr)) & // 64B blk ld/st check
+    (ld_inst_vld_m | st_inst_vld_m)) |
+    // check only for loads 
+    (((quad_asi_m | binit_quad_asi_m) & lsu_alt_space_m & ldst_dbl_m & ~qw_align_addr) & ld_inst_vld_m) ; // quad word check
+
+// To be removed !! Now supported for both ld and st thru unimp_asi.
+//wire	blkst_cmt_daccess_excp_m ;
+//assign	blkst_cmt_daccess_excp_m =
+//    (blk_cmt_asi_m & lsu_alt_space_m & fp_ldst_m & ldst_dbl_m & st_inst_vld_m) ;
+
+   wire    stdf_maddr_not_align, lddf_maddr_not_align ;
+
+assign  stdf_maddr_not_align
+    = st_inst_vld_m & fp_ldst_m & ldst_dbl_m & wd_align_addr & ~dw_align_addr 
+      & ~((blk_asi_m | quad_asi_m) & lsu_alt_space_m);
+
+assign  lddf_maddr_not_align
+    = ld_inst_vld_m & fp_ldst_m & ldst_dbl_m & wd_align_addr & ~dw_align_addr 
+      & ~((blk_asi_m | quad_asi_m) & lsu_alt_space_m);
+
+// internal asi access by ld/st other than ldxa/stxa/lddfa/stdfa.
+wire  asi_internal_non_xdw ;
+
+assign  asi_internal_non_xdw 
+    = (st_inst_vld_m | ld_inst_vld_m) & lsu_alt_space_m & asi_internal_m  & 
+      ~(dw_size & (~ldst_dbl_m | fp_ldst_m)) ; //bug4149;
+
+
+// asi related
+// rd-only mmu asi requiring va decode.
+wire	mmu_rd_only_asi_wva_m ;
+assign	mmu_rd_only_asi_wva_m =
+	((lsu_excpctl_asi_state_m[7:0]==8'h58) & (
+		(lsu_ldst_va_m[7:0] == 8'h00) | 	// dtag_target
+		(lsu_ldst_va_m[7:0] == 8'h20))) | 	// dsync_far
+	((lsu_excpctl_asi_state_m[7:0]==8'h50) & 
+		(lsu_ldst_va_m[7:0] == 8'h00)) ; 	// itag_target
+
+wire  wr_to_rd_only_asi, rd_of_wr_only_asi, unimp_asi_used;   
+
+assign  wr_to_rd_only_asi = 
+	((mmu_rd_only_asi_wva_m |// mmu with non-unique asi
+	mmu_rd_only_asi_m |	// mmu with unique asi
+	rd_only_asi_m)		// non mmu
+	 &  st_inst_vld_m & lsu_alt_space_m) |
+	wr_to_strm_sync_m ;	// Bug 5399
+
+assign  rd_of_wr_only_asi = wr_only_asi_m &  ld_inst_vld_m & lsu_alt_space_m ;
+assign  unimp_asi_used = unimp_asi_m &  (ld_inst_vld_m | st_inst_vld_m) & lsu_alt_space_m ;
+
+   wire asi_related_trap_m ; // asi_related_trap_g;
+
+assign  asi_related_trap_m = wr_to_rd_only_asi | rd_of_wr_only_asi | unimp_asi_used | asi_internal_non_xdw ;
+
+// Illegal page size for tlb fill
+
+wire	[2:0]	pgszr_m,pgszv_m ;
+dff_s #(6)   pgsz_stgm (
+        .din    ({lsu_sun4r_pgsz_b2t0_e[2:0],lsu_sun4v_pgsz_b2t0_e[2:0]}),
+        .q      ({pgszr_m[2:0],pgszv_m[2:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	[2:0]	pgsz_m ;
+
+assign	pgsz_m[2:0] = lsu_sun4r_va_m_l ? pgszv_m[2:0] : pgszr_m[2:0] ;
+
+wire	illgl_pgsz_m ;
+assign	illgl_pgsz_m = 
+	(~pgsz_m[2] &  pgsz_m[1] & ~pgsz_m[0]) | // 010 ; 512K
+	( pgsz_m[2] & ~pgsz_m[1] & ~pgsz_m[0]) | // 100 ; 32M
+	( pgsz_m[2] &  pgsz_m[1] & ~pgsz_m[0]) | // 110 ; 2G
+	( pgsz_m[2] &  pgsz_m[1] &  pgsz_m[0]) ; // 111 ; 16G
+
+wire	ifill_tlb_asi_m,dfill_tlb_asi_m ;
+dff_s #(2)   idfill_stgm (
+        .din    ({ifill_tlb_asi_e,dfill_tlb_asi_e}),
+        .q      ({ifill_tlb_asi_m,dfill_tlb_asi_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	tlb_illgl_pgsz_m = 
+	(ifill_tlb_asi_m | dfill_tlb_asi_m) & st_inst_vld_m & lsu_alt_space_m & illgl_pgsz_m ;
+
+wire  [8:0] early_ttype_m,early_ttype_g ; 
+wire    early_trap_vld_m, early_trap_vld_g ;  
+assign  early_trap_vld_m =  
+			stdf_maddr_not_align | lddf_maddr_not_align | 
+			mem_addr_not_align ;
+     
+wire	lsu_tlu_misalign_addr_ldst_atm_m ; 
+assign  lsu_tlu_misalign_addr_ldst_atm_m = early_trap_vld_m ;
+
+// mux select order must be maintained
+assign  early_ttype_m[8:0] = 
+      stdf_maddr_not_align ? 9'h036 :
+        lddf_maddr_not_align ? 9'h035 : 
+           mem_addr_not_align ?  9'h034 : 9'hxxx ; 
+   
+dff_s #(10)   etrp_stgg (
+        .din    ({early_ttype_m[8:0],early_trap_vld_m}),
+        .q      ({early_ttype_g[8:0],early_trap_vld_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire daccess_excptn_early_m, daccess_excptn_early_g ;
+
+wire atm_access_w_nc_byp_m,atm_access_w_nc_byp_g ;
+assign atm_access_w_nc_byp_m = 
+atomic_m & dtlb_bypass_m & (lsu_ldst_va_b39_m & ~pstate_am_m) ; 
+						//Bug 5050
+
+dff_s   atmbyp_stgg (
+        .din    (atm_access_w_nc_byp_m),
+        .q      (atm_access_w_nc_byp_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign daccess_excptn_early_m =
+    asi_related_trap_m | tlb_daccess_excptn_m |
+    spv_use_hpv_m |  
+    atm_access_w_nc_byp_m ; // Bug 4281.
+
+dff_s  #(1) dearly_stgg (
+        .din    (daccess_excptn_early_m),
+        .q      (daccess_excptn_early_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire daccess_excptn;
+   
+assign  daccess_excptn =  
+    (priv_pg_usr_mode | as_if_usr_priv_pg | nfo_pg_nonnfo_asi | 
+      atm_access_w_nc ) & tlb_tte_vld_g | 
+      illegal_asi_trap_g | daccess_excptn_early_g | atm_access_unsup_asi | //bug4622
+    	spec_access_epage ; 
+ 
+   wire [3:0] lsu_nceen_d1;  
+dff_s #(4)  nceen_d1_ff (
+        .din    (ifu_lsu_nceen[3:0]),
+        .q      (lsu_nceen_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+wire nceen_pipe_g ;
+assign  nceen_pipe_g = 
+  (thread0_g & lsu_nceen_d1[0]) | (thread1_g & lsu_nceen_d1[1]) |
+  (thread2_g & lsu_nceen_d1[2]) | (thread3_g & lsu_nceen_d1[3]) ;
+
+ // correctible dtlb data parity error on cam will cause dmmu miss.
+// prefetch will rely on the ld_inst_vld/st_inst_vld not being asserted
+// to prevent mmu_miss from being signalled if prefetch does not translate.
+// Timing Change : Remove data perror from dmmu_miss ; to be treated as disrupting trap.
+   wire dmmu_miss_m, dmmu_miss_m_d1;
+   
+assign dmmu_miss_m = 
+  ~tlb_cam_hit & ~dtlb_bypass_m & 
+  (ld_inst_vld_m | st_inst_vld_m) & 
+  ~(lda_internal_m | sta_internal_m | early_trap_vld_m) ;
+
+dff_s #(1)  dmmu_miss_stgg (
+        .din    (dmmu_miss_m),
+        .q      (dmmu_miss_m_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+//need lsu_inst_vld_w ??
+   wire dmmu_miss_g;
+   
+   assign dmmu_miss_g = dmmu_miss_m_d1 & lsu_inst_vld_w;
+   
+
+wire [8:0] dmiss_type ;
+   wire    cam_real_g;
+   
+dff_s #(1) cam_real_stgg (
+   .din (cam_real_m),
+   .q   (cam_real_g),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+   );  
+ assign        dmiss_type[8:0] = cam_real_g ? 9'h03f : 9'h068 ;
+
+// two wtchpt matches
+//assign  lsu_tlu_ttype_m2[8:0] = 
+//  early_trap_vld_g ? early_ttype_g[8:0] : 
+//    priv_action ? 9'h037 : 
+//      va_wtchpt_match ? 9'h062 :
+//        daccess_excptn ? 9'h030 : 
+//          dmmu_miss_g ? dmiss_type[8:0] :  // dmmu_miss
+//            daccess_error ? 9'h032 : 
+//              daccess_prot ? 9'h06c :
+//	              spubyp_trap_active_g ? {3'b000,spubyp_ttype[5:0]} : // should be no other tttype to compare to. 
+//                  9'bx_xxxx_xxxx ;
+
+wire early_trap_vld_sel, priv_action_sel, va_wtchpt_match_sel, daccess_excptn_sel, dmmu_miss_sel,
+     daccess_prot_sel ;
+
+// Need to maintain this order in selects. Based on priority of traps    
+   assign early_trap_vld_sel = early_trap_vld_g;
+   assign priv_action_sel = ~early_trap_vld_sel & priv_action;
+   assign va_wtchpt_match_sel = ~early_trap_vld_sel & ~priv_action_sel & va_wtchpt_match;
+   assign daccess_excptn_sel = ~early_trap_vld_sel & ~priv_action_sel & ~va_wtchpt_match_sel &
+                               daccess_excptn;
+   assign dmmu_miss_sel = ~early_trap_vld_sel & ~priv_action_sel & ~va_wtchpt_match_sel &
+                          ~daccess_excptn_sel & dmmu_miss_g;
+
+   assign daccess_prot_sel = ~early_trap_vld_sel & ~priv_action_sel & ~va_wtchpt_match_sel &
+                             ~daccess_excptn_sel & ~dmmu_miss_sel & daccess_prot;
+   
+assign  lsu_tlu_ttype_m2[8:0] =
+          ({9{early_trap_vld_sel}}     &  early_ttype_g[8:0]) | 
+          ({9{priv_action_sel}}        &  9'h037            ) |
+          ({9{va_wtchpt_match_sel}}    &  9'h062            ) |
+          ({9{daccess_excptn_sel}}     &  9'h030            ) |
+          ({9{dmmu_miss_sel}}          &  dmiss_type[8:0]   ) |
+          ({9{daccess_prot_sel}}       &  9'h06c            ) ;
+      
+assign  lsu_tlu_ttype_vld_m2 =  dmmu_miss_g | daccess_excptn | daccess_prot |
+        priv_action | early_trap_vld_g  | 
+	      va_wtchpt_match ;
+
+assign lsu_ttype_vld_m2 = lsu_tlu_ttype_vld_m2 | defr_trp_taken ;  //to stb_rwctl
+
+assign lsu_ttype_vld_m2_bf1 =    lsu_ttype_vld_m2; //to dctl, qctl1
+
+wire	squash_priority_g ; // Bug 4678
+assign	squash_priority_g = priv_action | early_trap_vld_g | va_wtchpt_match ;
+   
+assign  lsu_tlu_dmmu_miss_g = dmmu_miss_g & ~squash_priority_g ;
+assign  lsu_tlu_priv_violtn_g = (priv_pg_usr_mode | as_if_usr_priv_pg) & tlb_tte_vld_g ; 
+wire	dmmu_va_oor_g ;
+assign  lsu_tlu_daccess_excptn_g = 
+(daccess_excptn | dmmu_va_oor_g  // Bug 5036
+| tlu_priv_trap_g) & ~squash_priority_g ;
+
+// prioritize daccess_excptn higher than daccess_prot. This may
+// be a critical path which needs to be resolved -> qual. now
+// in mmu.
+//assign  lsu_tlu_daccess_prot_g = daccess_prot ;
+   wire daccess_prot_g;
+assign  daccess_prot_g = daccess_prot & 
+	~(tlu_priv_trap_g | daccess_excptn | squash_priority_g) ;   
+assign  lsu_tlu_daccess_prot_g = daccess_prot & ~squash_priority_g ; // Bug 5336.
+assign  lsu_tlu_priv_action_g = priv_action ; 
+//assign  lsu_tlu_tte_ebit_g = tlb_rd_tte_data_ebit & tlb_tte_vld_g ;
+wire	lsu_tlu_tte_ebit_g;
+assign  lsu_tlu_tte_ebit_g = pg_with_ebit ;
+//assign  lsu_tlu_spec_access_epage_g = spec_access_epage & tlb_tte_vld_g ; // page with side effects
+wire	lsu_tlu_spec_access_epage_g ;
+assign  lsu_tlu_spec_access_epage_g = spec_access_epage ; // page with side effects
+wire	lsu_tlu_uncache_atomic_g;
+assign  lsu_tlu_uncache_atomic_g = 
+	(atm_access_w_nc & tlb_tte_vld_g) |
+	(atm_access_w_nc_byp_g) ;
+// Define illegal asi actions
+// see sfsr description - excludes cases where 02 and 04 are set for ftype !!!
+wire lsu_tlu_flt_ld_nfo_pg_g;
+assign  lsu_tlu_flt_ld_nfo_pg_g = nfo_pg_nonnfo_asi & tlb_tte_vld_g ; 
+
+wire illgl_asi_action_pre_m,illgl_asi_action_pre_g ;
+assign	illgl_asi_action_pre_m = asi_related_trap_m | tlb_daccess_excptn_m | illegal_asi_trap_m | spv_use_hpv_m ; // bug 4181; //bug3660	
+
+dff_s  illglasi_g (
+        .din    (illgl_asi_action_pre_m),
+        .q      (illgl_asi_action_pre_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire lsu_tlu_illegal_asi_action_g;
+assign  lsu_tlu_illegal_asi_action_g = 
+atm_access_unsup_asi | (illgl_asi_action_pre_g) & // Bug 4825
+~(lsu_tlu_spec_access_epage_g | lsu_tlu_uncache_atomic_g) ;
+//(illgl_asi_action_pre_g | (atm_access_unsup_asi)) & 
+//~(lsu_tlu_spec_access_epage_g | lsu_tlu_uncache_atomic_g) ;
+
+//=========================================================================================
+//  Generate Flush Pipe
+//=========================================================================================
+
+
+assign	other_flush_pipe_w = 
+tlu_early_flush_pipe_w | (lsu_tlu_ttype_vld_m2 & lsu_inst_vld_w) |
+defr_trp_taken ;	// deferred trap.
+assign	lsu_ifu_flush_pipe_w = other_flush_pipe_w ;
+assign	lsu_exu_flush_pipe_w = other_flush_pipe_w ;
+assign	lsu_mmu_flush_pipe_w = other_flush_pipe_w ;
+assign	lsu_ffu_flush_pipe_w = other_flush_pipe_w ;
+
+
+assign	lsu_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
+
+//assign 	lsu_qctl1_flush_pipe_w = lsu_flush_pipe_w ;
+//assign 	lsu_stbctl_flush_pipe_w = lsu_flush_pipe_w ;
+//assign 	lsu_stbrwctl_flush_pipe_w = lsu_flush_pipe_w ;
+
+//=========================================================================================
+//  Early Traps to SPU
+//=========================================================================================
+
+// detect st to ma/strm sync - data-access exception.
+//wire	st_to_sync_dexcp_m ;
+// qual with alt_space not required - spu will do it.
+//assign	st_to_sync_dexcp_m = // Bug 5704
+//strm_asi_m & ((lsu_ldst_va_m[7:0] == 8'ha0) | (lsu_ldst_va_m[7:0] == 8'h68)) & st_inst_vld_m ;  
+
+wire	early_flush_m ;
+
+assign  early_flush_m =
+        (atomic_m & lsu_alt_space_m) |  // Bug 4650 - alt-space atomics should flush.
+        priv_action_m           |
+        early_trap_vld_m        |       // mem-addr-not-aligned.
+        illegal_asi_trap_m      |       // for fp non use of internal asi.
+        //st_to_sync_dexcp_m    |       // Bug 5742
+        //wr_to_strm_sync_m     |       // Bug 5890 - redundant - make room.
+        defr_trp_taken_m_din    |       // Bug 5890
+        daccess_excptn_early_m  ;
+        /*asi_related_trap_m    |       // Bug 2592
+        spv_use_hpv_m       |
+        wr_to_strm_sync_m;*/
+
+
+dff_s  eflushspu_g (
+        .din    (early_flush_m),
+        .q      (lsu_spu_early_flush_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  eflushspu2_g (
+        .din    (early_flush_m),
+        .q      (lsu_local_early_flush_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  eflushtlu_g (
+        .din    (early_flush_m),
+        .q      (lsu_tlu_early_flush_w),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  eflushtlu2_g (
+        .din    (early_flush_m),
+        .q      (lsu_tlu_early_flush2_w),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+
+//=========================================================================================
+//  Parity Error Checking
+//=========================================================================================
+
+// DTLB Parity Errors. 
+// ASI read of Tag/Data :
+//  - uncorrectible error
+//  - logging occurs on read.
+//  - precise trap is taken when ldxa completes if nceen set.
+//  - if not set then ldxa is allowed to complete.
+// CAM Read of Tag/Data :
+//  - correctible if locked bit not set.
+//    - takes disrupting trap later.
+//  - uncorrectible if locked bit set.
+//  - both are treated as precise traps.
+//  - if errors not enabled, then load completes as if hit in L1.
+// ** TLB error will cause a trap which will preclude concurrent dcache,dtag  **
+// ** parity errors.                **
+
+// cam related tte data parity error - error assumed correctible if locked
+// bit is not set. Will cause a dmmu_miss for correction.
+// qualify with cam_hit ??
+wire  tte_data_perror_unc ;
+
+assign	lsu_tlb_perr_ld_rq_kill_w =
+	//tte_data_perror_corr | (tte_data_perror_unc & nceen_pipe_g) ;
+	(tte_data_perror_unc & nceen_pipe_g) ;
+
+// correctible dtlb errors no longer supported.
+/*assign  tte_data_perror_corr = 
+  tte_data_parity_error & ~tlb_rd_tte_data_locked & tlb_tte_vld_g & 
+  (ld_inst_vld_unflushed | st_inst_vld_unflushed) & lsu_inst_vld_w ;*/
+
+// caused for both locked and unlocked entries.
+assign  tte_data_perror_unc  = 
+  //tte_data_parity_error &  tlb_rd_tte_data_locked & tlb_tte_vld_g & 
+  tte_data_parity_error &  tlb_tte_vld_g & 
+  (ld_inst_vld_unflushed | st_inst_vld_unflushed) & lsu_inst_vld_w &
+  ~lsu_flush_pipe_w ;
+
+// Asi rd parity error detection
+wire  asi_tte_data_perror,asi_tte_tag_perror ;
+
+assign  asi_tte_data_perror =
+  tte_data_parity_error & data_rd_vld_g ;
+// For data tte read, both tag and data arrays are read.
+// Parity error on asi read of tag should not be reported.
+assign  asi_tte_tag_perror =
+  tte_tag_parity_error & tag_rd_vld_g & ~data_rd_vld_g ;
+
+wire	st_dtlb_perror ;
+assign	st_dtlb_perror =   tte_data_parity_error &  tlb_tte_vld_g & 
+   st_inst_vld_unflushed & lsu_inst_vld_w ;
+ // ~lsu_flush_pipe_w ;
+
+wire	cancel_err_flush ;
+assign	cancel_err_flush = // Bug 5165
+((priv_pg_usr_mode | nfo_pg_nonnfo_asi |
+atm_access_w_nc) & tlb_tte_vld_g) | // bug6052/eco6620
+spec_access_epage | 
+nonwr_pg_st_access ;
+
+// Bug 6877
+wire squash_err ;
+assign squash_err = 
+// assume always higher priority. BE - share common terms elsewhere.
+tlu_early_flush_pipe_w | defr_trp_taken | ifu_lsu_flush_w |
+// isolate to daccess_excptn/daccess_prot as per Bug 5165.
+(lsu_tlu_ttype_vld_m2 & ~(daccess_excptn_sel | daccess_prot_sel)) | 
+((daccess_excptn_sel | daccess_prot_sel) & ~cancel_err_flush) ;
+   
+wire	tlb_data_su_g ;
+assign	tlb_data_su_g =   st_dtlb_perror & ~atomic_g &
+  ~squash_err ;
+  //~(lsu_flush_pipe_w & ~cancel_err_flush) ; // Bug 6877
+   
+wire	ld_dtlb_perror ;
+assign	ld_dtlb_perror =   tte_data_parity_error &  tlb_tte_vld_g & 
+  ld_inst_vld_unflushed  & lsu_inst_vld_w &
+  ~squash_err ;
+   
+wire  tlb_data_ue_g ;
+assign  tlb_data_ue_g = 
+	ld_dtlb_perror |	// synchronous to pipe - xslate ; ue is for ld now.
+	lsu_tlb_asi_data_perr_g ; // asychronous to pipe - asi rd
+
+/* Simplify for Bug 5888.
+wire	st_noatom_dtlb_perr ; // atomics not represented.
+assign	st_noatom_dtlb_perr = st_dtlb_perror & ~lsu_flush_pipe_w & ~atomic_g ;
+wire	st_noatom_dtlb_perr_en ; 
+assign	st_noatom_dtlb_perr_en = st_noatom_dtlb_perr & nceen_pipe_g ; */
+wire	st_noatom_dtlb_perr_en ; 
+wire	st_dtlb_perr_en ;
+assign	st_noatom_dtlb_perr_en = st_dtlb_perr_en & ~atomic_g ;
+
+// rm corr err. reporting
+dff_s  #(3) terr_stgd1 (
+        .din    ({tlb_data_su_g,tlb_data_ue_g,lsu_tlb_asi_tag_perr_g}),
+        //.din    ({st_noatom_dtlb_perr,tlb_data_ue_g,lsu_tlb_asi_tag_perr_g}),
+        .q      ({lsu_ifu_tlb_data_su,lsu_ifu_tlb_data_ue,lsu_ifu_tlb_tag_ue}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// If st dtlb parity error detected, then need to invalidate st in stb.
+// Considered unrecoverable for the thread itself.
+
+assign	st_dtlb_perr_en = st_dtlb_perror & ~lsu_flush_pipe_w & nceen_pipe_g ;
+
+// Kill will happen for atomics also.
+//assign	lsu_exu_st_dtlb_perr_g = st_dtlb_perr_en ;
+assign	lsu_exu_st_dtlb_perr_g = st_noatom_dtlb_perr_en ; // Bug 5888
+
+assign	lsu_ffu_st_dtlb_perr_g = st_noatom_dtlb_perr_en ; // Bug 5910/ECO 6529
+   
+assign	lsu_st_dtlb_perr_g[0] = st_dtlb_perr_en & thread0_g ;
+assign	lsu_st_dtlb_perr_g[1] = st_dtlb_perr_en & thread1_g ;
+assign	lsu_st_dtlb_perr_g[2] = st_dtlb_perr_en & thread2_g ;
+assign	lsu_st_dtlb_perr_g[3] = st_dtlb_perr_en & thread3_g ;
+
+//==========================================================================
+// DEFERRED TRAP DUE TO STORE 
+//==========================================================================
+
+// Cases :
+// defr_trp_m=1,ifu_flush_w=0. 
+//	- defr_trp is generated.
+//	- next inst will not take redundant deferred trap as
+//	its inst_vld will be annulled by trap flush.
+// defr_trp_m=1,ifu_flush_w=1. 
+//	- defr_trp is generated. TLU annuls.
+//	- Other units see redundant defr_trp flush ORed with ifu_flush_w.
+//	- next inst will not take redundant deferred trap as
+//	its inst_vld will be annulled by ifu_flush_w .
+
+
+// Log Deferred trap. Take on next available inst from thread.
+// Inst vld must be qualified with flush.
+
+wire    st_defr_trp_en0,st_defr_trp_en1,st_defr_trp_en2,st_defr_trp_en3 ;
+wire    st_defr_trp0,st_defr_trp1,st_defr_trp2,st_defr_trp3 ;
+
+assign  st_defr_trp_en0 = st_noatom_dtlb_perr_en & thread0_g ;
+assign  st_defr_trp_en1 = st_noatom_dtlb_perr_en & thread1_g ;
+assign  st_defr_trp_en2 = st_noatom_dtlb_perr_en & thread2_g ;
+assign  st_defr_trp_en3 = st_noatom_dtlb_perr_en & thread3_g ;
+
+wire    stpend_rst0_m,stpend_rst1_m,stpend_rst2_m,stpend_rst3_m;
+wire    stpend_rst0_w,stpend_rst1_w,stpend_rst2_w,stpend_rst3_w;
+wire    stpend_rst0,stpend_rst1,stpend_rst2,stpend_rst3;
+assign  stpend_rst0_m = reset | 
+((st_defr_trp0 | st_defr_trp_en0) & thread0_m & flush_w_inst_vld_m);
+assign  stpend_rst1_m = reset | 
+((st_defr_trp1 | st_defr_trp_en1) & thread1_m & flush_w_inst_vld_m);
+assign  stpend_rst2_m = reset | 
+((st_defr_trp2 | st_defr_trp_en2) & thread2_m & flush_w_inst_vld_m);
+assign  stpend_rst3_m = reset | 
+((st_defr_trp3 | st_defr_trp_en3) & thread3_m & flush_w_inst_vld_m);
+
+// Postphone reset by a cycle - 4916
+dff_s #(4)  stpend_d1 (
+           .din    ({stpend_rst3_m,stpend_rst2_m,stpend_rst1_m,stpend_rst0_m}),
+           .q      ({stpend_rst3_w,stpend_rst2_w,stpend_rst1_w,stpend_rst0_w}),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+// Prevent reset if inst is flushed by ifu.
+assign	stpend_rst3 = stpend_rst3_w & ~ifu_lsu_flush_w ;
+assign	stpend_rst2 = stpend_rst2_w & ~ifu_lsu_flush_w ;
+assign	stpend_rst1 = stpend_rst1_w & ~ifu_lsu_flush_w ;
+assign	stpend_rst0 = stpend_rst0_w & ~ifu_lsu_flush_w ;
+
+dffre_s #(1)  deftrp_t0 (
+           .din    (st_defr_trp_en0),
+           .q      (st_defr_trp0),
+           .rst    (stpend_rst0),
+           .en     (st_defr_trp_en0),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+dffre_s #(1)  deftrp_t1 (
+           .din    (st_defr_trp_en1),
+           .q      (st_defr_trp1),
+           .rst    (stpend_rst1),
+           .en     (st_defr_trp_en1),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+dffre_s #(1)  deftrp_t2 (
+           .din    (st_defr_trp_en2),
+           .q      (st_defr_trp2),
+           .rst    (stpend_rst2),
+           .en     (st_defr_trp_en2),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+dffre_s #(1)  deftrp_t3 (
+           .din    (st_defr_trp_en3),
+           .q      (st_defr_trp3),
+           .rst    (stpend_rst3),
+           .en     (st_defr_trp_en3),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+// Deferred trap can be taken on any instruction.
+// Selection is based on next thread available.
+
+//instruction n+2, and the following...
+
+assign  defr_trp_taken_m =
+        //ifu_tlu_inst_vld_m & (
+        flush_w_inst_vld_m & ( 	// <= rely of flush by defr-trp to clear
+				// pended defr-trp
+        (st_defr_trp0 & thread0_m) |
+        (st_defr_trp1 & thread1_m) |
+        (st_defr_trp2 & thread2_m) |
+        (st_defr_trp3 & thread3_m)) ;
+
+assign defr_trp_taken_byp = 
+        //ifu_tlu_inst_vld_m & (
+        flush_w_inst_vld_m & (
+        (st_defr_trp_en0 & thread0_m) |
+        (st_defr_trp_en1 & thread1_m) |
+        (st_defr_trp_en2 & thread2_m) |
+        (st_defr_trp_en3 & thread3_m) );
+ 
+ 
+assign defr_trp_taken_m_din = defr_trp_taken_m |  defr_trp_taken_byp;
+ 
+dff_s #(1) defr_trp_taken_stgg (
+     .din (defr_trp_taken_m_din),
+     .q   (defr_trp_taken),
+     .clk    (clk),
+     .se     (se),       .si (),          .so ()
+    );
+  
+assign	lsu_defr_trp_taken_g = defr_trp_taken ;
+assign	lsu_tlu_defr_trp_taken_g = defr_trp_taken ;
+assign	lsu_mmu_defr_trp_taken_g = defr_trp_taken ;
+
+//==========================================================================
+// DSFSR/SFAR WR 
+//==========================================================================
+
+
+
+wire	[3:0]	pstate_cle,pstate_am ;
+// flop'n use to prevent timing path.
+dff_s #(8)  cle_stg (
+        .din    ({tlu_lsu_pstate_cle[3:0],tlu_lsu_pstate_am[3:0]}),
+        .q      ({pstate_cle[3:0],pstate_am[3:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	pstate_cle_m ;
+assign  pstate_cle_m = 
+        (thread0_m & pstate_cle[0]) |
+        (thread1_m & pstate_cle[1]) |
+        (thread2_m & pstate_cle[2]) |
+        (thread3_m & pstate_cle[3]);
+
+wire	[3:0]	dsfsr_asi_sel_m ;
+wire	prim_asi_sel ;
+assign	prim_asi_sel = 
+exu_tlu_misalign_addr_jmpl_rtn_m | (lsu_tlu_nonalt_ldst_m & ~lsu_nonalt_nucl_access_m) ;
+assign  dsfsr_asi_sel_m[0] =  // ASI_PRIMARY
+		 prim_asi_sel & ~pstate_cle_m;
+// Does asi_primary_little make sense for jmpl/return ?
+assign  dsfsr_asi_sel_m[1] =  // ASI_PRIMARY_LITTLE
+        	prim_asi_sel  &  pstate_cle_m;
+assign  dsfsr_asi_sel_m[2] =  // ASI_NUCLEUS
+        	lsu_nonalt_nucl_access_m &  ~pstate_cle_m;
+assign  dsfsr_asi_sel_m[3] =  // ASI_NUCLEUS_LITTLE
+        	lsu_nonalt_nucl_access_m &   pstate_cle_m;
+/*assign  dsfsr_asi_sel_m[4] =  // assigned asi
+        ~(exu_tlu_misalign_addr_jmpl_rtn_m | lsu_tlu_nonalt_ldst_m);*/
+
+wire	[7:0]	asi_state_g ;
+// flop'n use to prevent timing path.
+dff_s #(8)  asistate_stgg (
+        .din    (lsu_excpctl_asi_state_m[7:0]),
+        .q      (asi_state_g[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	[7:0]	dsfsr_asi_g ;
+wire	[3:0]	dsfsr_asi_sel_g ;
+
+/*assign dsfsr_asi_g[7:0] =(dsfsr_asi_sel_g[0] ? 8'h80 : 8'h00) |
+                         (dsfsr_asi_sel_g[1] ? 8'h88 : 8'h00) |
+                         (dsfsr_asi_sel_g[2] ? asi_state_g[7:0] : 8'h00);*/
+// Bug 4212 - spec problem
+assign dsfsr_asi_g[7:0] =(dsfsr_asi_sel_g[0] ? 8'h80 :
+                         	(dsfsr_asi_sel_g[1] ? 8'h88 : 
+                         		(dsfsr_asi_sel_g[2] ? 8'h04 :
+                         			(dsfsr_asi_sel_g[3] ?  8'h0C : asi_state_g[7:0]))));
+ 
+assign  pstate_am_m = 
+        (thread0_m & pstate_am[0]) |
+        (thread1_m & pstate_am[1]) |
+        (thread2_m & pstate_am[2]) |
+        (thread3_m & pstate_am[3]);
+
+assign  dmmu_va_oor_m = exu_tlu_va_oor_m & ~pstate_am_m & lsu_memref_m & ~lsu_squash_va_oor_m;
+
+wire	[3:0]     dsfsr_flt_vld;
+dff_s #(4)  fltvld_stgd1 (
+        .din    (tlu_dsfsr_flt_vld[3:0]),
+        .q      (dsfsr_flt_vld[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	dsfsr_flt_vld_m ;
+assign  dsfsr_flt_vld_m = 
+        (thread0_m & dsfsr_flt_vld[0]) |
+        (thread1_m & dsfsr_flt_vld[1]) |
+        (thread2_m & dsfsr_flt_vld[2]) |
+        (thread3_m & dsfsr_flt_vld[3]);
+
+wire	ldst_xslate_g,flsh_inst_g,dsfsr_flt_vld_g,dsfsr_wr_op_g ;
+wire	misalign_addr_jmpl_rtn_g,misalign_addr_ldst_atm_g ;
+wire	[2:0]	dsfsr_ctxt_sel ;
+
+// flop flt_vld and use
+dff_s #(14)  dsfsr_stgg (
+        .din    ({dsfsr_asi_sel_m[3:0],dmmu_va_oor_m,// memref_m,
+                lsu_tlu_xslating_ldst_m,lsu_flsh_inst_m,lsu_tlu_ctxt_sel_m[2:0],
+                dsfsr_flt_vld_m,lsu_tlu_write_op_m,exu_tlu_misalign_addr_jmpl_rtn_m,
+                lsu_tlu_misalign_addr_ldst_atm_m}),
+        .q      ({dsfsr_asi_sel_g[3:0],dmmu_va_oor_g,ldst_xslate_g,// memref_g,
+                flsh_inst_g,dsfsr_ctxt_sel[2:0],dsfsr_flt_vld_g, dsfsr_wr_op_g,
+                misalign_addr_jmpl_rtn_g,misalign_addr_ldst_atm_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// To be set only for data_access_exception traps - only one can be
+// reported at any time.        
+
+wire	[6:0]	dsfsr_ftype_g ;
+assign  dsfsr_ftype_g[6] = 1'b0;
+assign  dsfsr_ftype_g[5] = dmmu_va_oor_g | lsu_tlu_wtchpt_trp_g;
+assign  dsfsr_ftype_g[4] = lsu_tlu_flt_ld_nfo_pg_g;
+assign  dsfsr_ftype_g[3] = lsu_tlu_illegal_asi_action_g 
+			| tlu_priv_trap_g ; // Bug 4799
+//assign  dsfsr_ftype_g[3] = lsu_tlu_illegal_asi_action_g | tlu_mmu_sync_data_excp_g;
+assign  dsfsr_ftype_g[2] = (lsu_tlu_uncache_atomic_g & ~atm_access_unsup_asi);
+assign  dsfsr_ftype_g[1] = lsu_tlu_spec_access_epage_g;
+assign  dsfsr_ftype_g[0] = lsu_tlu_priv_violtn_g;
+
+wire	dsfsr_side_effect_g ;
+assign  dsfsr_side_effect_g = lsu_tlu_tte_ebit_g & (ldst_xslate_g | flsh_inst_g);
+
+// Fault Type based on Priority Encoding of Traps
+wire	[6:0]	dsfsr_pe_ftype_g ;
+wire	dsfsr_ftype_zero ;
+// Is this needed ? Doesn't it default to zero ?
+assign  dsfsr_pe_ftype_g[6:0] = dsfsr_ftype_zero ? 7'h00 : dsfsr_ftype_g[6:0]; 
+
+// set to 11 when the access does not have a translating asi.
+wire	[1:0]	dsfsr_ctxt_g ;
+assign  dsfsr_ctxt_g[1:0] =
+        dsfsr_ctxt_sel[0] ? 2'b00 :     
+                dsfsr_ctxt_sel[1] ? 2'b01 :     
+                        dsfsr_ctxt_sel[2] ? 2'b10 : 2'b11;      
+
+
+assign  lsu_dsfsr_din_g[23:0] =
+        {dsfsr_asi_g[7:0],
+        2'b0,
+        dsfsr_pe_ftype_g[6:0],
+        dsfsr_side_effect_g,
+        dsfsr_ctxt_g[1:0],
+	1'b0, // Bug 3323 - Arch change
+        //pstate_priv,  
+        dsfsr_wr_op_g,  // pipe
+        dsfsr_flt_vld_g,
+        1'b1};
+
+// This is going to be a critical path !!!
+// Assume that traps in front-end cause instructions to be no`oped
+// further down the pipeline. Thus there is no need to qualify writes
+// to dsfsr with writes to isfsr
+wire	dsfsr_trp_wr_g ;
+wire	dsfsr_trp_wr_pre_m,dsfsr_trp_wr_pre_g ;
+
+
+assign	dsfsr_trp_wr_pre_m =
+	spv_use_hpv_m	| // Bug 3254 ; add new data-access-excp
+	// spec_access_epage_m | // Bug 3515
+	priv_action_m | 
+	exu_tlu_misalign_addr_jmpl_rtn_m |
+	lsu_tlu_misalign_addr_ldst_atm_m ;
+
+dff_s   dsfsrtrg_stgg (
+        .din    (dsfsr_trp_wr_pre_m),
+        .q      (dsfsr_trp_wr_pre_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  dsfsr_trp_wr_g = 
+        ((lsu_tlu_priv_violtn_g  | 
+	lsu_tlu_spec_access_epage_g |	// Bug 3515 - uncomment out.
+        lsu_tlu_uncache_atomic_g | lsu_tlu_illegal_asi_action_g |
+        lsu_tlu_flt_ld_nfo_pg_g  | dmmu_va_oor_g) |     // data access exceptions                       
+        daccess_prot |	// daccess_excptn not excluded.
+        lsu_tlu_wtchpt_trp_g     |      // watchpoint trap      
+	dsfsr_trp_wr_pre_g |
+	tlu_priv_trap_g 		// scratchpad/queue daccess;Bug 4799
+        ) &
+        lsu_inst_vld_w & ~(ifu_lsu_flush_w | defr_trp_taken) ; // Bug 4444,5196
+
+assign  dsfsr_ftype_zero = 
+        daccess_prot_g | lsu_tlu_priv_action_g | lsu_tlu_wtchpt_trp_g |
+        misalign_addr_jmpl_rtn_g | misalign_addr_ldst_atm_g;
+
+// terms below can be made common. (grape)
+assign  lsu_dmmu_sfsr_trp_wr[0] = dsfsr_trp_wr_g & thread0_g;
+assign  lsu_dmmu_sfsr_trp_wr[1] = dsfsr_trp_wr_g & thread1_g;
+assign  lsu_dmmu_sfsr_trp_wr[2] = dsfsr_trp_wr_g & thread2_g;
+assign  lsu_dmmu_sfsr_trp_wr[3] = dsfsr_trp_wr_g & thread3_g; 
+
+//==========================================================================
+// Exception Handling End
+//==========================================================================
+
+endmodule // lsu_dctl1
+
Index: /trunk/T1-CPU/lsu/lsu_stb_rwctl.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_stb_rwctl.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_stb_rwctl.v	(revision 6)
@@ -0,0 +1,1203 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_stb_rwctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////
+/*
+//  Description:  Control for Unified STB CAM/DATA of LSU
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+          // time scale definition
+
+`include "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_stb_rwctl (/*AUTOARG*/
+   // Outputs
+   so, lsu_stbctl_flush_pipe_w, stb_cam_wr_no_ivld_m, 
+   ld_rawp_st_ced_w2, stb_data_wr_ptr, stb_data_wptr_vld, 
+   stb_data_rd_ptr, stb_data_rptr_vld, stb_wdata_ramd_b75_b64, 
+   stb_cam_cm_tid, stb_ldst_byte_msk, stb_ldst_byte_msk_min, 
+   stb_cam_rw_ptr, stb_cam_wptr_vld, stb_cam_rptr_vld, 
+   lsu_st_sz_bhww_m, lsu_st_sz_dw_m, lsu_st_sz_bhw_m, 
+   lsu_st_sz_wdw_m, lsu_st_sz_b_m, lsu_st_sz_w_m, lsu_st_sz_hw_m, 
+   lsu_st_sz_hww_m, ld_rawp_st_ackid_w2, stb_flush_st_g, 
+   stb_cam_wvld_m, lsu_st_rq_type_m, lsu_stb_data_early_sel_e, 
+   lsu_stb_data_final_sel_m, lsu_ldquad_inst_m, stb_thrd_en_g, 
+   flsh_inst_m, lsu_stb_va_m, lsu_stb_empty_buf, lsu_spu_stb_empty, 
+   ifu_tlu_inst_vld_m_bf1, ifu_tlu_inst_vld_m_bf2, lsu_ifu_stbcnt0, 
+   lsu_ifu_stbcnt1, lsu_ifu_stbcnt2, lsu_ifu_stbcnt3, 
+   lsu_ffu_stb_full0, lsu_ffu_stb_full1, lsu_ffu_stb_full2, 
+   lsu_ffu_stb_full3, 
+   // Inputs
+   rclk, rst_tri_en, si, se, ld_inst_vld_e, ldst_sz_e, st_inst_vld_e, 
+   stb_pcx_rptr0, stb_wrptr0, stb_pcx_rptr1, stb_wrptr1, 
+   stb_pcx_rptr2, stb_wrptr2, stb_pcx_rptr3, stb_wrptr3, 
+   stb_cam_hit_ptr, stb_cam_hit, lsu_ldst_va_m, sta_internal_m, 
+   ifu_tlu_thrid_e, tlu_exu_early_flush_pipe_w, lsu_ttype_vld_m2, 
+   ifu_lsu_flush_w, lsu_defr_trp_taken_g, ifu_lsu_casa_e, 
+   ifu_lsu_ldstub_e, ifu_lsu_swap_e, ifu_lsu_ldst_dbl_e, 
+   stb_state_ced0, stb_state_ced1, stb_state_ced2, stb_state_ced3, 
+   stb_ld_full_raw, stb_ld_partial_raw, stb_wrptr0_prev, 
+   stb_wrptr1_prev, stb_wrptr2_prev, stb_wrptr3_prev, 
+   ifu_lsu_alt_space_e, ifu_lsu_ldst_fp_e, lsu_quad_asi_e, 
+   lsu_st_rmo_m, lsu_bst_in_pipe_m, ffu_lsu_kill_fst_w, 
+   ffu_lsu_blk_st_e, ffu_lsu_blk_st_tid_m, ffu_lsu_blk_st_va_e, 
+   lsu_snap_blk_st_m, tlb_pgnum_b39_g, lsu_stb_empty, 
+   ifu_tlu_flsh_inst_e, stb_cam_mhit, ifu_tlu_inst_vld_m, 
+   lsu_st_pcx_rq_pick, lsu_st_pcx_rq_vld, stb_rdata_ramc_b8t0, 
+   lsu_stbcnt0, lsu_stbcnt1, lsu_stbcnt2, lsu_stbcnt3
+   ) ;  
+
+input     rclk ;     
+//input     grst_l ;   
+//input     arst_l ;   
+   input  rst_tri_en;
+   
+   input  si;
+   input  se;
+   output so;
+   
+
+input     ld_inst_vld_e ;   // load in pipe.
+input [1:0]   ldst_sz_e ;   // size of load.
+input     st_inst_vld_e ;   // store in pipe.
+// Currently bypass flop make request 
+//input [3:0]   pcx_rq_for_stb ;  // pcx request rd of dfq - threaded
+//input [2:0]   stb_dfq_rptr0 ;   // dfq rptr for stb0
+input [2:0]   stb_pcx_rptr0 ;   // pcx rptr for stb0
+input [2:0]   stb_wrptr0 ;    // wrt ptr - stb0
+//input [2:0]   stb_dfq_rptr1 ;   // dfq rptr for stb1
+input [2:0]   stb_pcx_rptr1 ;   // pcx rptr for stb1
+input [2:0]   stb_wrptr1 ;    // wrt ptr - stb1
+//input [2:0]   stb_dfq_rptr2 ;   // dfq rptr for stb2
+input [2:0]   stb_pcx_rptr2 ;   // pcx rptr for stb2
+input [2:0]   stb_wrptr2 ;    // wrt ptr - stb2
+//input [2:0]   stb_dfq_rptr3 ;   // dfq rptr for stb3
+input [2:0]   stb_pcx_rptr3 ;   // pcx rptr for stb3
+input [2:0]   stb_wrptr3 ;    // wrt ptr - stb3
+input [2:0]     stb_cam_hit_ptr ; // entry which hit
+input     stb_cam_hit ;   // hit has occurred
+//input [7:0]     stb_state_vld0 ;  // valid bits - stb0
+//input [7:0]     stb_state_vld1 ;  // valid bits - stb1
+//input [7:0]     stb_state_vld2 ;  // valid bits - stb2
+//input [7:0]     stb_state_vld3 ;  // valid bits - stb3
+input [9:0]    lsu_ldst_va_m ;
+input     sta_internal_m ;   // internal stxa
+input [1:0]   ifu_tlu_thrid_e ; // thread-id.
+
+//   output     lsu_stbrwctl_flush_pipe_w ;  // tmp for tso_mon
+   input      tlu_exu_early_flush_pipe_w;
+   input      lsu_ttype_vld_m2;
+   
+   input      ifu_lsu_flush_w;
+   input      lsu_defr_trp_taken_g;
+   output     lsu_stbctl_flush_pipe_w;
+   
+   
+input                   ifu_lsu_casa_e ;        // compare-swap instr
+input                   ifu_lsu_ldstub_e ;      // ldstub
+input                   ifu_lsu_swap_e ;        // swap
+input     ifu_lsu_ldst_dbl_e; // ldst dbl, specifically for stquad.
+//input   [63:0]          lsu_stb_st_data_g ;     // data to be written to stb
+input [7:0]   stb_state_ced0 ;
+input [7:0]   stb_state_ced1 ;
+input [7:0]   stb_state_ced2 ;
+input [7:0]   stb_state_ced3 ;
+input [7:0]   stb_ld_full_raw ;
+input [7:0]   stb_ld_partial_raw ;
+input   [2:0]   stb_wrptr0_prev ;
+input   [2:0]   stb_wrptr1_prev ;
+input   [2:0]     stb_wrptr2_prev ;
+input   [2:0]   stb_wrptr3_prev ;
+input     ifu_lsu_alt_space_e ; // alt_space inst
+input     ifu_lsu_ldst_fp_e ;
+//input     tlb_cam_hit ;   // tlb cam hit - mstage
+input     lsu_quad_asi_e ;  // quad ldst asi
+//input  [3:0]      lsu_st_ack_rq_stb ;
+//input     lsu_dtlb_bypass_e ;
+input	lsu_st_rmo_m ;	// rmo st in m cycle.
+input	lsu_bst_in_pipe_m ;	// 1st helper for bst.
+input           ffu_lsu_kill_fst_w ;	// ecc error on st.
+input  		ffu_lsu_blk_st_e ;     	// blk st helper signalled by ffu
+input  	[1:0]	ffu_lsu_blk_st_tid_m ;  // blk st tid - from ffu_lsu_data
+input	[5:3]  	ffu_lsu_blk_st_va_e ;	// bits 5:3 of va from increment
+input  		lsu_snap_blk_st_m ;     	// snap blk st state
+input		tlb_pgnum_b39_g ;
+
+input 	[3:0]   lsu_stb_empty ;         // thread's stb is empty
+input           ifu_tlu_flsh_inst_e;
+input		stb_cam_mhit ;
+input           ifu_tlu_inst_vld_m ;
+//input   [3:0]   lsu_st_pcx_rq_kill_w2 ;
+
+input [3:0]   lsu_st_pcx_rq_pick ;  
+
+input         lsu_st_pcx_rq_vld ;
+
+input	[8:0]	stb_rdata_ramc_b8t0 ;	// scan-only
+
+output          stb_cam_wr_no_ivld_m ;
+
+//output      ld_rawp_st_ced_g ;
+output      ld_rawp_st_ced_w2 ;
+output  [4:0]   stb_data_wr_ptr ; // write ptr - stb data
+output      stb_data_wptr_vld ; // wr vld for stb data
+output  [4:0]   stb_data_rd_ptr ; // rd ptr for stb data
+output      stb_data_rptr_vld ; // rptr vld for stb data
+output  [75:64]    stb_wdata_ramd_b75_b64 ;  // write data for DATA RAM. 
+
+// partial or full raw required
+output  [1:0]   stb_cam_cm_tid ;  // cam tid - stb cam
+//output  [7:0]   stb_cam_sqsh_msk ;  // squash spurious hits
+//output      stb_cam_vld ;
+output  [7:0]   stb_ldst_byte_msk ; // byte mask for write/cam
+output  [7:0]   stb_ldst_byte_msk_min ; // byte mask for write/cam for min path
+
+//output  [3:0]   stb_rd_for_pcx_sel ;    // stb's st selected for read for pcx
+output  [4:0]   stb_cam_rw_ptr ;        // rw ptr for shared stb cam port
+output          stb_cam_wptr_vld ;      // wr vld for stb write   
+output          stb_cam_rptr_vld ;      // rd vld for stb write   
+
+
+//output      lsu_stb_pcx_rvld_d1 ; // stb has been read-delayby1cycle
+//output      lsu_stb_dfq_rvld ;  // wr to dfq stb bypass ff
+
+output                  lsu_st_sz_bhww_m ;      // byte or hword or word
+output                  lsu_st_sz_dw_m ;        // double word
+output                  lsu_st_sz_bhw_m ;       // byte or hword
+output                  lsu_st_sz_wdw_m ;       // word or dword
+output                  lsu_st_sz_b_m ;         // byte
+output                  lsu_st_sz_w_m ;         // word
+output                  lsu_st_sz_hw_m ;        // hword
+output                  lsu_st_sz_hww_m ;       // hword or word
+
+//output     ld_stb_full_raw_g ;
+//output     ld_stb_partial_raw_g ;
+//output  [3:0]   ld_stb_full_raw_g ;
+//output  [3:0]   ld_stb_partial_raw_g ;
+
+output  [2:0]   ld_rawp_st_ackid_w2 ;
+
+//output  [2:0]   stb_dfq_rd_id ;   // stb entry being read for current thread for current thread
+
+output  [3:0]     stb_flush_st_g ;  // st is flushed in cycle g
+output  [3:0]     stb_cam_wvld_m ;
+
+output  [2:1]   lsu_st_rq_type_m ;
+
+output  [3:0]   lsu_stb_data_early_sel_e ;// select source of stb data.
+output      lsu_stb_data_final_sel_m ;// select source of stb data.
+
+output      lsu_ldquad_inst_m ; // stquad inst
+//output      lsu_stdbl_inst_m ;  // stdbl inst
+
+//output  [1:0]   lsu_stb_rd_tid ;  // thread for which stb read occurs
+
+output	[3:0]	stb_thrd_en_g ;	// thread id for current stb access
+
+   output     flsh_inst_m;
+
+   output [9:3] lsu_stb_va_m;
+
+output	[3:0]	lsu_stb_empty_buf ;
+output	[3:0]	lsu_spu_stb_empty ;
+
+   output     ifu_tlu_inst_vld_m_bf1;
+   output     ifu_tlu_inst_vld_m_bf2;
+
+   input [3:0] lsu_stbcnt0;
+   input [3:0] lsu_stbcnt1;
+   input [3:0] lsu_stbcnt2;
+   input [3:0] lsu_stbcnt3;
+
+   output [3:0] lsu_ifu_stbcnt0;
+   output [3:0] lsu_ifu_stbcnt1;
+   output [3:0] lsu_ifu_stbcnt2;
+   output [3:0] lsu_ifu_stbcnt3;
+
+   output       lsu_ffu_stb_full0;
+   output       lsu_ffu_stb_full1;
+   output       lsu_ffu_stb_full2;
+   output       lsu_ffu_stb_full3;
+ 
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+//wire  [4:0] stb_dequeue_ptr ;
+wire  [2:0] stb_wptr_prev ;
+wire  [1:0] st_thrid_m,st_thrid_g ;
+wire  [7:0] ld_any_raw_vld ;
+wire  [7:0] ld_any_raw_vld_d1 ;
+//wire    ld_raw_mhit ;
+wire  [2:0] st_rq_type_m,st_rq_type_g ;
+
+wire  [1:0] ldst_sz_m,ldst_sz_g, pipe_ldst_sz_m ;
+wire    ldst_byte, ldst_hwrd, ldst_word, ldst_dwrd ;
+wire  [7:0] ldst_byte_mask ;
+wire  [2:0] stb_wptr ;  
+wire  [1:0] thrid_m,thrid_g ;
+wire    ld_inst_vld_m, st_inst_vld_m ;
+
+wire    ldst_dbl_m;
+wire    atomic_m ;
+wire    ldstub_m ;
+wire    casa_m, casa_g ;
+wire    swap_m;
+wire    flush_st_g ;
+wire    cam_wptr_vld_g ;
+wire  [2:0] cam_wptr_d1 ;
+
+wire  [2:0] stb_rdptr0,stb_rdptr1 ;
+wire  [2:0] stb_rdptr2,stb_rdptr3 ;
+
+//wire  [3:0] stb_rd_mask ;
+wire  [3:0] stb_select_rptr ;
+wire  [1:0] stb_rd_thrid ;
+//wire    cam_vld_g ;
+wire  [9:0]  ldst_va_m, pipe_ldst_va_m ;
+wire  [3:0]  ldst_va_g ;
+wire  [2:0] cam_wr_ptr ;
+wire  thread0_m, thread1_m, thread2_m, thread3_m ;
+wire  thread0_g, thread1_g, thread2_g, thread3_g ;
+wire  [2:0]   ld_rawp_stb_id ;
+
+//wire  rd_for_dfq_granted ;
+wire  [7:0] stb_state_ced,stb_state_ced_d1 ;
+//wire    stq_wr_en ;
+//wire  [3:0] stq_wr_en_g ;
+//wire  [3:0] stquad_vld ;
+//wire  [2:0] stquad_ptr0,stquad_ptr1,stquad_ptr2,stquad_ptr3 ;
+//wire  [3:0] ld_stq_hit_g ;
+//wire  ldq_hit_g ;
+//wire  [3:0] ldq_hit_g ;
+wire  ldst_fp_m;
+wire  ldstub_e,casa_e,ldst_dbl_e;
+//wire  stb_data_final_sel_e ;
+wire  alt_space_e,alt_space_m ;
+wire  quad_asi_m ;
+//wire  stquad_e, stquad_m ;
+wire  stdbl_e ;
+//wire  dfq_any_rq_for_stb ;
+//wire  [3:0]   stb_rd_for_dfq ;  // read rq for dfq - threaded
+wire    blkst_m,blkst_g ;
+wire	stb_not_empty ;
+
+   wire       clk;
+   assign     clk = rclk;
+
+//   wire       rst_l;
+//   wire       stb_rwctl_rst_l;
+   
+//   dffrl_async rstff(.din (grst_l),
+//                     .q   (stb_rwctl_rst_l),
+//                     .clk (clk), .se(se), .si(), .so(),
+//                     .rst_l (arst_l));
+
+//=========================================================================================
+//  MISC
+//=========================================================================================
+
+// Scan-only flops.
+
+wire	[8:0]	stb_rdata_ramc_b8t0_so ;
+dff_s #(9)  scmscan_ff (
+        .din    (stb_rdata_ramc_b8t0[8:0]),
+        .q      (stb_rdata_ramc_b8t0_so[8:0]),
+        .clk    (clk),
+        .se   (se),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//  INST_VLD_W GENERATION
+//=========================================================================================
+
+wire    flush_w_inst_vld_m ;
+wire    lsu_inst_vld_w ;
+wire    lsu_stbrwctl_flush_pipe_w;
+
+//=======================================
+//instaniate buffers
+//======================================
+
+   wire   ifu_tlu_inst_vld_m_bf0;
+   
+bw_u1_buf_10x UZfix_ifu_tlu_inst_vld_m_bf0 ( .a(ifu_tlu_inst_vld_m), .z(ifu_tlu_inst_vld_m_bf0) );
+bw_u1_buf_30x UZfix_ifu_tlu_inst_vld_m_bf1 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf1) );
+bw_u1_buf_20x UZfix_ifu_tlu_inst_vld_m_bf2 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf2) );
+   
+assign  flush_w_inst_vld_m =
+        ifu_tlu_inst_vld_m_bf0 &
+        ~(lsu_stbrwctl_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
+
+dff_s  stgw_ivld (
+        .din    (flush_w_inst_vld_m),
+        .q      (lsu_inst_vld_w),
+        .clk    (clk),
+        .se   (se),       .si (),          .so ()
+        );
+
+   wire other_flush_pipe_w;
+   wire tlu_early_flush_pipe_w;
+   assign tlu_early_flush_pipe_w = tlu_exu_early_flush_pipe_w;
+   
+assign	other_flush_pipe_w = 
+tlu_early_flush_pipe_w | (lsu_ttype_vld_m2 & lsu_inst_vld_w) |
+lsu_defr_trp_taken_g ;
+   
+   wire lsu_flush_pipe_w;
+   
+assign	lsu_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
+assign 	lsu_stbctl_flush_pipe_w = lsu_flush_pipe_w ;
+assign 	lsu_stbrwctl_flush_pipe_w = lsu_flush_pipe_w ;   
+
+//=========================================================================================
+//  STB Array Addr/Ctl Generation
+//=========================================================================================
+
+assign  ldstub_e = ifu_lsu_ldstub_e ;
+assign  casa_e   = ifu_lsu_casa_e ;
+assign  ldst_dbl_e = ifu_lsu_ldst_dbl_e ;
+
+assign  alt_space_e = ifu_lsu_alt_space_e ;
+
+//assign  stdbl_e =  ldst_dbl_e & (~alt_space_e | (alt_space_e & ~lsu_quad_asi_e)) ;
+assign  stdbl_e =  ldst_dbl_e ;
+
+//   wire lsu_stdbl_inst_m;
+   
+//dff  stq_stgm (
+//  .din  (stdbl_e), 
+//  .q  	(lsu_stdbl_inst_m),  
+//  .clk  (clk), 
+//  .se (se), .si (), .so ()
+//  );
+
+// This path can probably be eased.
+assign  lsu_stb_data_early_sel_e[0] = ldstub_e  & ~rst_tri_en;
+assign  lsu_stb_data_early_sel_e[1] = casa_e & ~rst_tri_en;
+assign  lsu_stb_data_early_sel_e[2] = ~(ldstub_e | casa_e |  stdbl_e) | rst_tri_en;
+assign  lsu_stb_data_early_sel_e[3] = stdbl_e & ~rst_tri_en ;
+
+// modify for accepting bst data out of pipe.
+//assign  stb_data_final_sel_e = ~(ldst_fp_e | ffu_lsu_blk_st_e) ;
+
+/*dff  lsel_g (
+  .din  (stb_data_final_sel_e), 
+  .q  (lsu_stb_data_final_sel_m),
+  .clk  (clk), 
+  .se (se), .si (), .so ()
+  );*/
+
+assign	lsu_stb_data_final_sel_m = ~(ldst_fp_m | blkst_m) ;
+
+wire	real_st_m ;
+wire	flsh_inst_m, flsh_inst_g ;
+// !!! could qualify st_inst_vld_e with stxa_internal !!!
+dff_s #(13) stgm_vld  (
+  .din  ({ld_inst_vld_e,st_inst_vld_e,ldst_sz_e[1:0], 
+    ifu_lsu_swap_e, ifu_lsu_ldstub_e, ifu_lsu_casa_e,ifu_lsu_ldst_dbl_e,
+    ifu_tlu_thrid_e[1:0],ifu_lsu_ldst_fp_e,lsu_quad_asi_e,ifu_tlu_flsh_inst_e}),  
+  .q  ({ld_inst_vld_m,real_st_m,pipe_ldst_sz_m[1:0], 
+    swap_m,ldstub_m,casa_m,ldst_dbl_m,thrid_m[1:0],ldst_fp_m,quad_asi_m,flsh_inst_m}),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+assign	st_inst_vld_m = real_st_m | flsh_inst_m ;
+
+// do we need ld/st unflushed ?
+   wire sta_internal_g;
+   
+dff_s #(7) stgw_vld  (
+  .din  ({sta_internal_m,   
+    casa_m, thrid_m[1:0],ldst_sz_m[1:0], flsh_inst_m}),  
+  .q    ({sta_internal_g,   
+    casa_g, thrid_g[1:0],ldst_sz_g[1:0], flsh_inst_g}),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+
+// stb-cam will be written by st at rising edge of g-stage.
+// However, st can be flushed after write. To keep, the stb state consistent,
+// The valid and write ptr will not be updated until the rising edge of w2.
+
+wire	early_flush_cond_g,partial_flush_st_g ;
+assign early_flush_cond_g = 
+(sta_internal_g | ~(lsu_inst_vld_w | blkst_g) | ffu_lsu_kill_fst_w) ;
+assign	flush_st_g = (early_flush_cond_g | lsu_stbrwctl_flush_pipe_w) & cam_wptr_vld_g ;
+
+//timing, send to stb_ctl and qualified by stb_cam_wvld_g (thread version of cam_wptr_vld_g)   
+//assign	partial_flush_st_g = early_flush_cond_g & cam_wptr_vld_g ;
+assign	partial_flush_st_g = early_flush_cond_g ; 
+
+assign  atomic_m = (casa_m | ldstub_m | swap_m) & st_inst_vld_m ;
+
+// WRITE PTR VALID GENERATION.
+
+// meant specifically to squash pcx_rq_for_stb.
+assign  stb_cam_wr_no_ivld_m 
+  = (st_inst_vld_m | casa_m | ldstub_m | swap_m | blkst_m) ;
+
+//bug3610 - kill cam write vld(==stb data write vld next cycle) to avoid datat read and write same cycle
+//          to the same entry
+wire  b2b_st_detect ;
+
+assign  stb_cam_wptr_vld  
+  = (((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m) & ~(flush_st_g & b2b_st_detect) ;
+  //= ((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m ;  // bug3610
+  //= (st_inst_vld_m | atomic_m | (ldst_dbl_m & st_inst_vld_m) | blkst_m) ;
+
+dff_s  wptr_g (
+  .din  (stb_cam_wptr_vld), .q  (cam_wptr_vld_g),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+//flop move into mem cell (roll back)  
+assign  stb_data_wptr_vld = cam_wptr_vld_g ;
+
+// WRITE PTR GENERATION
+
+// It is assumed that if there is a store in the pipe, there is a 
+// free entry in the corresponding stb. Otherwise, the pipe would've
+// stalled for the thread.      
+
+// If a store-like inst has been flushed, then the old ptr has to be restored
+// and used.  This is done within thread specific stb control
+
+assign  thread0_m = ~st_thrid_m[1] & ~st_thrid_m[0] ;
+assign  thread1_m = ~st_thrid_m[1] &  st_thrid_m[0] ;
+assign  thread2_m =  st_thrid_m[1] & ~st_thrid_m[0] ;
+assign  thread3_m =  st_thrid_m[1] &  st_thrid_m[0] ;
+
+dff_s #(4) stgg_thrd (
+  .din  ({thread0_m,thread1_m,thread2_m,thread3_m}),  
+  .q  ({thread0_g,thread1_g,thread2_g,thread3_g}),  
+  .clk  (clk), 
+  .se (se), .si (), .so ()
+  );
+
+assign	stb_thrd_en_g[0] = thread0_g ;
+assign	stb_thrd_en_g[1] = thread1_g ;
+assign	stb_thrd_en_g[2] = thread2_g ;
+assign	stb_thrd_en_g[3] = thread3_g ;
+
+//assign  stb_wptr[2:0] = 
+//  thread0_m ? stb_wrptr0[2:0] :
+//    thread1_m ? stb_wrptr1[2:0] :
+//      thread2_m ? stb_wrptr2[2:0] :
+//        thread3_m ? stb_wrptr3[2:0] : 3'bxxx ;
+
+assign  stb_wptr[2:0] = 
+  (thread0_m ? stb_wrptr0[2:0] :  3'b000) |
+  (thread1_m ? stb_wrptr1[2:0] :  3'b000) |
+  (thread2_m ? stb_wrptr2[2:0] :  3'b000) |
+  (thread3_m ? stb_wrptr3[2:0] :  3'b000) ;
+   
+assign  b2b_st_detect =   // detect back-to-back store
+  (thread0_m & thread0_g) |
+  (thread1_m & thread1_g) |
+  (thread2_m & thread2_g) |
+  (thread3_m & thread3_g) ;
+
+assign  cam_wr_ptr[2:0] = (flush_st_g & b2b_st_detect) ? cam_wptr_d1[2:0] : stb_wptr[2:0] ;
+
+dff_s #(3)  wptr_d1 (
+  .din  (cam_wr_ptr[2:0]),  .q  (cam_wptr_d1[2:0]),
+  .clk  (clk), 
+  .se (se), .si (), .so ()
+  );
+
+assign  stb_cam_wvld_m[0] = stb_cam_wptr_vld & thread0_m ;
+assign  stb_cam_wvld_m[1] = stb_cam_wptr_vld & thread1_m ;
+assign  stb_cam_wvld_m[2] = stb_cam_wptr_vld & thread2_m ;
+assign  stb_cam_wvld_m[3] = stb_cam_wptr_vld & thread3_m ;
+
+// contains potential flush conditions.
+assign  stb_flush_st_g[0] = partial_flush_st_g ;
+assign  stb_flush_st_g[1] = partial_flush_st_g ;
+assign  stb_flush_st_g[2] = partial_flush_st_g ;
+assign  stb_flush_st_g[3] = partial_flush_st_g ;
+
+// stb-data has a delayed write in w2. Alignment of stb data will be done on write
+// of 64b into stb. This allows write of stb cam and data to be done in the
+// same cycle, and thus read can occur simultaneously for pcx. 
+
+//mem cell change to bw_r_rf32x80, flop move into mem cell (roll back)
+//flop outside mem cell
+assign  stb_data_wr_ptr[4:0] =  {st_thrid_g[1:0],cam_wptr_d1[2:0]};
+   
+// RD PTR/VLD GENERATION
+
+// stb read for dfq dumps data into a bypass flop. Thus a read for the dfq can occur
+// if a thread's stb has an acked entry and the bypass flop is empty.
+// stb read for pcx occurs on availability of queue entry. 
+
+// Both dfq and pcx require a read of the cam and data. The reads
+// can thus not happen when load that hits in the stb is in the w2 (change to W3)
+// stage and a store is in the g-stage of the pipe. Both
+// probabilities are low.
+
+// ??Read for pcx takes priority over dfq. No deadlock can occur
+// ??as at some point the pcx reads will be exhausted and the stb
+// ??will have to drain itself. The stb is self-regulating in this regard.
+
+// priority of stb read: ld_cam_hit (full raw bypass) > dfq > pcx 
+
+//====================================================================================
+//raw bypass timing 
+//G/WB                          W2     W3                      W4
+//cam_hit(from stb_cam output)  flop   stb_data rd_ptr/rd_vld  read STB_DATA/BYP
+//====================================================================================
+
+   wire [1:0] thrid_w2;
+   wire [2:0] stb_cam_hit_ptr_w2;
+   wire       stb_cam_hit_w2;   
+   wire       stb_cam_hit_w;   
+   
+   //bug3503
+   assign stb_cam_hit_w  =  stb_cam_hit & lsu_inst_vld_w & ~lsu_stbrwctl_flush_pipe_w;
+
+dff_s #(6) stb_cam_hit_stg_w2 (
+  .din  ({thrid_g[1:0],  stb_cam_hit_ptr[2:0],    stb_cam_hit_w   }), 
+  .q  	({thrid_w2[1:0], stb_cam_hit_ptr_w2[2:0], stb_cam_hit_w2}),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+   
+// logic moved to qctl1
+// pcx is making request for data in current cycle. Can be multi-hot.
+//assign  pcx_any_rq_for_stb = |pcx_rq_for_stb[3:0] ;
+//assign  pcx_any_rq_for_stb = 
+//	(pcx_rq_for_stb[0] & ~lsu_st_pcx_rq_kill_w2[0]) | 
+//	(pcx_rq_for_stb[1] & ~lsu_st_pcx_rq_kill_w2[1]) | 
+//	(pcx_rq_for_stb[2] & ~lsu_st_pcx_rq_kill_w2[2]) | 
+//	(pcx_rq_for_stb[3] & ~lsu_st_pcx_rq_kill_w2[3]) ; 
+
+// ??ld-cam hit based read takes precedence
+// ??Timing : This could be made pessimistic by using ld_inst_vld_g
+
+//assign  stb_select_rptr[3:0] =  pcx_rq_for_stb[3:0] ;  // timing fix
+assign  stb_select_rptr[3:0] =  lsu_st_pcx_rq_pick[3:0] ; 
+
+// This could be a critical path. Be careful !
+//assign  stb_rdptr0[2:0] = ~dfq_any_rq_for_stb ? stb_pcx_rptr0[2:0] : stb_dfq_rptr0[2:0] ; 
+assign  stb_rdptr0[2:0] = stb_pcx_rptr0[2:0] ;
+assign  stb_rdptr1[2:0] = stb_pcx_rptr1[2:0] ;
+assign  stb_rdptr2[2:0] = stb_pcx_rptr2[2:0] ;
+assign  stb_rdptr3[2:0] = stb_pcx_rptr3[2:0] ;
+
+// logic moved to qctl1
+//wire  [1:0] stb_rd_tid ;
+//
+//assign  stb_rd_tid[0] = pcx_rq_for_stb[1] | pcx_rq_for_stb[3] ;
+//assign  stb_rd_tid[1] = pcx_rq_for_stb[2] | pcx_rq_for_stb[3] ;
+//   
+//dff #(2) stbtid_stgd1 (
+//  .din    (stb_rd_tid[1:0]),  .q  (lsu_stb_rd_tid[1:0]),
+//  .clk    (clk), 
+//  .se   (se), .si (), .so ()
+//  );
+
+//assign  stb_dfq_rd_id[2:0] = stb_data_rd_ptr[2:0] ; // or cam rd ptr
+
+//timing fix:5/6/03
+//bug4988 - change the prirority from 0->3 to 3->0; the reason is when select_rptr=0, the
+//          default thread id(rptr[4:3])=thread0 but the default rptr[2:0]=thread3. If
+//          thread0 and thread3 rptr are the same and the thread0 write is occuring, the
+//          rptr[4:0] is same as wptr[4:0]
+wire  [2:0]  stb_rdptr ;
+//assign  stb_rdptr[2:0] = 
+//  stb_select_rptr[0] ? stb_rdptr0[2:0] :
+//    stb_select_rptr[1] ? stb_rdptr1[2:0] :
+//      stb_select_rptr[2] ? stb_rdptr2[2:0] :
+//                             stb_rdptr3[2:0] ;
+
+//assign  stb_rdptr[2:0] = 
+//  stb_select_rptr[3] ? stb_rdptr3[2:0] :
+//    stb_select_rptr[2] ? stb_rdptr2[2:0] :
+//      stb_select_rptr[1] ? stb_rdptr1[2:0] :
+//                             stb_rdptr0[2:0] ;
+
+assign  stb_rdptr[2:0] = 
+  (stb_select_rptr[3] ? stb_rdptr3[2:0] : 3'b0) |
+  (stb_select_rptr[2] ? stb_rdptr2[2:0] : 3'b0) |
+  (stb_select_rptr[1] ? stb_rdptr1[2:0] : 3'b0) |
+  (stb_select_rptr[0] ? stb_rdptr0[2:0] : 3'b0) ;
+    
+//timing fix: 8/29/03 - remove the default select logic for stb_select_rptr since synthesis is forced to replace 
+//            4to1 mux w/ and-or mux or 2to1 mux
+//wire   stb_select_rptr_b3;
+//assign stb_select_rptr_b3 =  ~|stb_select_rptr[2:0];
+
+wire  [2:0]  stb_rdptr_l;
+
+assign stb_rdptr_l[2:0] =  ~stb_rdptr[2:0] ;
+//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b0_mux(
+//                  .z(stb_rdptr_l[0]), 
+//                  .d0(stb_rdptr0[0]), 
+//                  .d1(stb_rdptr1[0]), 
+//                  .d2(stb_rdptr2[0]), 
+//                  .d3(stb_rdptr3[0]), 
+//                  .s0(stb_select_rptr[0]), 
+//                  .s1(stb_select_rptr[1]), 
+//                  .s2(stb_select_rptr[2]), 
+//                  .s3(stb_select_rptr[3]));
+//   
+//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b1_mux(
+//                  .z(stb_rdptr_l[1]), 
+//                  .d0(stb_rdptr0[1]), 
+//                  .d1(stb_rdptr1[1]), 
+//                  .d2(stb_rdptr2[1]), 
+//                  .d3(stb_rdptr3[1]), 
+//                  .s0(stb_select_rptr[0]), 
+//                  .s1(stb_select_rptr[1]), 
+//                  .s2(stb_select_rptr[2]), 
+//                  .s3(stb_select_rptr[3]));
+//   
+//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b2_mux(
+//                  .z(stb_rdptr_l[2]), 
+//                  .d0(stb_rdptr0[2]), 
+//                  .d1(stb_rdptr1[2]), 
+//                  .d2(stb_rdptr2[2]), 
+//                  .d3(stb_rdptr3[2]), 
+//                  .s0(stb_select_rptr[0]), 
+//                  .s1(stb_select_rptr[1]), 
+//                  .s2(stb_select_rptr[2]), 
+//                  .s3(stb_select_rptr[3]));
+//   
+   
+assign  stb_rd_thrid[0] = stb_select_rptr[1] | stb_select_rptr[3] ;
+assign  stb_rd_thrid[1] = stb_select_rptr[2] | stb_select_rptr[3] ;
+
+// read
+// this mux will have to be accommodated in path !!! Talk to Satya. 
+// Timing : This could be made pessimistic by using ld_inst_vld_g
+
+// raw read STB at W3 (changed from W2)        
+assign  stb_data_rd_ptr[4:0] = stb_cam_hit_w2 ? 
+        {thrid_w2[1:0],stb_cam_hit_ptr_w2[2:0]} :  // rd based on ld hit
+        {stb_rd_thrid[1:0],~stb_rdptr_l[2:0]} ;       // rd for pcx or dfq
+   
+// Blk-st modification for thread.
+assign	st_thrid_m[1:0] = blkst_m ? ffu_lsu_blk_st_tid_m[1:0] : thrid_m[1:0] ;
+dff_s #(2)  stid_stgg (
+  .din  (st_thrid_m[1:0]), 
+  .q  	(st_thrid_g[1:0]),
+  .clk  (clk), 
+  .se (se), .si (), .so ()
+  );
+
+//timing fix: 5/6/03
+//assign  stb_cam_rw_ptr[4:0]  = stb_cam_wptr_vld ? 
+//        {st_thrid_m[1:0],cam_wr_ptr[2:0]} :  // write
+//        {stb_rd_thrid[1:0],stb_rdptr[2:0]} ;  // read
+
+wire [2:0] cam_wr_ptr_l;
+wire [1:0] stb_rd_thrid_l;
+wire [1:0] st_thrid_m_l;
+
+assign cam_wr_ptr_l[2:0]  =  ~cam_wr_ptr[2:0];
+assign stb_rd_thrid_l[1:0]  =  ~stb_rd_thrid[1:0];
+assign st_thrid_m_l[1:0]  =  ~st_thrid_m[1:0];
+
+bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b0_mux(
+                  .z(stb_cam_rw_ptr[0]), 
+                  .d0(stb_rdptr_l[0]), 
+                  .d1(cam_wr_ptr_l[0]), 
+                  .s(stb_cam_wptr_vld));
+   
+bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b1_mux(
+                  .z(stb_cam_rw_ptr[1]), 
+                  .d0(stb_rdptr_l[1]), 
+                  .d1(cam_wr_ptr_l[1]), 
+                  .s(stb_cam_wptr_vld));
+   
+bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b2_mux(
+                  .z(stb_cam_rw_ptr[2]), 
+                  .d0(stb_rdptr_l[2]), 
+                  .d1(cam_wr_ptr_l[2]), 
+                  .s(stb_cam_wptr_vld));
+   
+bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b3_mux(
+                  .z(stb_cam_rw_ptr[3]), 
+                  .d0(stb_rd_thrid_l[0]), 
+                  .d1(st_thrid_m_l[0]), 
+                  .s(stb_cam_wptr_vld));
+   
+bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b4_mux(
+                  .z(stb_cam_rw_ptr[4]), 
+                  .d0(stb_rd_thrid_l[1]), 
+                  .d1(st_thrid_m_l[1]), 
+                  .s(stb_cam_wptr_vld));
+   
+
+
+//raw read STB at W3 (not W2)
+//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
+//                     stb_cam_rptr_vld and stb_data_rptr_vld
+
+//bug4988 - qual lsu_st_pcx_rq_vld w/ no write vld to stb_data. use stb_cam_wr_no_ivld_m instead of write vld.
+//          this is the same signal used to kill pcx_rq_for_stb
+//          stb_cam_rptr_vld is not set if stb_cam_wptr_vld=1
+     
+assign  stb_data_rptr_vld = 
+  //(|stb_select_rptr[3:0]) |  // pcx/dfq rd - timing fix
+  //lsu_st_pcx_rq_vld |  // pcx/dfq rd  // bug4988
+   (lsu_st_pcx_rq_vld & ~stb_cam_wr_no_ivld_m) |  // pcx/dfq rd
+    stb_cam_hit_w2 ;         // cam hit requires read whether single or multiple
+
+//raw read STB at W3 (not W2)      
+//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
+//                     stb_cam_rptr_vld and stb_data_rptr_vld
+assign  stb_cam_rptr_vld = 
+  //((|stb_select_rptr[3:0]) & ~(stb_cam_hit_w2)) & // only pcx read  - timing fix
+  (lsu_st_pcx_rq_vld & ~(stb_cam_hit_w2)) & // only pcx read 
+      ~stb_cam_wptr_vld ;   // st,st-like write does not block
+   
+// lsu_stb_rd_vld_d1 - not used
+//dff  stbrd_stgd1  (
+//  .din    (stb_cam_rptr_vld), .q  (lsu_stb_rd_vld_d1),
+//  .clk    (clk), 
+//  .se   (se), .si (), .so ()
+//  );
+
+// logic moved to qctl1
+//dff #(1)  prvld_stgd1 (
+//  .din  (pcx_any_rq_for_stb), 
+//  .q  (lsu_stb_pcx_rvld_d1),
+//  .clk  (clk), 
+//  .se (se), .si (), .so ()
+//  );
+
+assign  stb_cam_cm_tid[1:0] = thrid_m[1:0] ;
+
+
+//=========================================================================================
+//  BYTE MASK FORMATTING
+//=========================================================================================
+
+
+// Write/CAM Data for CAM RAM.
+// Physical dword aligned addr - PA[39:3] (37b)
+// Byte Mask - (8b)
+// Total - 45b
+
+//  | b7  |  b6 | b5  | b4  | b3  | b2  | b1  | b0  |
+//  |   hw3 |   hw2 |   hw1 |   hw0 |
+//  |     w1    |   w0    |
+//  |       dw        | 
+
+
+
+//dff  #(11) va_m (
+//  .din    (exu_lsu_ldst_va_e[10:0]),  .q  (pipe_ldst_va_m[10:0]),
+//  .clk    (clk), 
+//  .se   (se), .si (), .so ()
+//  );
+
+assign pipe_ldst_va_m[9:0] = lsu_ldst_va_m[9:0];
+
+// ldst_byte may not be needed
+assign ldst_byte = ~ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 00
+assign ldst_hwrd = ~ldst_sz_m[1] &  ldst_sz_m[0] ;  // 01
+assign ldst_word =  ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 10
+assign ldst_dwrd =  ldst_sz_m[1] &  ldst_sz_m[0] ;  // 11
+
+// Note : dword term is common. 
+assign ldst_byte_mask[0]  =
+  ( ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0] )       |
+  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
+  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
+assign ldst_byte_mask[1]  =
+  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])        |
+  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
+assign ldst_byte_mask[2]  =
+  ( ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])         |
+  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd | ldst_word))  |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
+assign ldst_byte_mask[3]  =
+  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])       |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
+assign ldst_byte_mask[4]  =
+  (~ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0])        |
+  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word)) ;
+assign ldst_byte_mask[5]  =
+  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])         |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] &  (ldst_dwrd | ldst_word))  ;
+assign ldst_byte_mask[6]  =
+  (~ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])     |
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word | ldst_hwrd)) ;
+assign ldst_byte_mask[7]  =
+  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])   ;
+
+assign  stb_ldst_byte_msk[7:0]  = ldst_byte_mask[7:0]; 
+
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b0 (.a(ldst_byte_mask[0]), .z(stb_ldst_byte_msk_min[0]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b1 (.a(ldst_byte_mask[1]), .z(stb_ldst_byte_msk_min[1]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b2 (.a(ldst_byte_mask[2]), .z(stb_ldst_byte_msk_min[2]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b3 (.a(ldst_byte_mask[3]), .z(stb_ldst_byte_msk_min[3]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b4 (.a(ldst_byte_mask[4]), .z(stb_ldst_byte_msk_min[4]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b5 (.a(ldst_byte_mask[5]), .z(stb_ldst_byte_msk_min[5]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b6 (.a(ldst_byte_mask[6]), .z(stb_ldst_byte_msk_min[6]));
+   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b7 (.a(ldst_byte_mask[7]), .z(stb_ldst_byte_msk_min[7]));
+   
+   
+// Generate selects to format st data
+assign  lsu_st_sz_bhww_m = ldst_byte | ldst_hwrd | ldst_word ;      // byte or hword or word
+assign  lsu_st_sz_dw_m   = ldst_dwrd ;            // double word
+assign  lsu_st_sz_bhw_m  = ldst_byte | ldst_hwrd ;      // byte or hword
+assign  lsu_st_sz_wdw_m  = ldst_word | ldst_dwrd ;      // word or dword
+assign  lsu_st_sz_b_m    = ldst_byte ;            // byte
+assign  lsu_st_sz_w_m    = ldst_word ;            // word
+assign  lsu_st_sz_hw_m   = ldst_hwrd ;            // hword
+assign  lsu_st_sz_hww_m  = ldst_hwrd | ldst_word ;      // hword or word
+
+//=========================================================================================
+//  BLK-ST HANDLING
+//=========================================================================================
+
+wire	blkst_m_tmp ;
+dff_s  stgm_bst (
+  .din (ffu_lsu_blk_st_e),
+  .q   (blkst_m_tmp),
+  .clk (clk),
+  .se   (se),       .si (),          .so ()
+);
+
+assign	blkst_m = blkst_m_tmp & ~(real_st_m  | flsh_inst_m |
+		ld_inst_vld_m) ; // Bug 3444
+
+dff_s  stgg_bst (
+  .din (blkst_m),
+  .q   (blkst_g),
+  .clk (clk),
+  .se   (se),       .si (),          .so ()
+);
+
+wire	snap_blk_st_local_m ;
+assign	snap_blk_st_local_m = lsu_snap_blk_st_m & ifu_tlu_inst_vld_m_bf0 ;
+
+wire	[1:0]	bst_sz_m ;
+wire	[9:0]	bst_va_m ;
+// output to be used in m-stage.
+dffe_s #(9) bst_state_m (
+        .din    ({ldst_sz_m[1:0],ldst_va_m[9:6],ldst_va_m[2:0]}),
+        .q      ({bst_sz_m[1:0],bst_va_m[9:6],bst_va_m[2:0]}),
+        .en     (snap_blk_st_local_m),
+        .clk    (clk),
+        .se   (se),       .si (),          .so ()
+        );
+
+dff_s #(3)  bsva_stgm (
+  .din    (ffu_lsu_blk_st_va_e[5:3]), .q (bst_va_m[5:3]),
+  .clk    (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+//assign	bst_va_m[5:3]	= ffu_lsu_blk_st_va_e[5:3] ;
+
+//assign  ldst_va_m[10] =  pipe_ldst_va_m[10] ;
+assign  ldst_va_m[9:0] = blkst_m ?  bst_va_m[9:0] : pipe_ldst_va_m[9:0] ;
+
+assign	lsu_stb_va_m[9:3] = ldst_va_m[9:3] ;
+
+assign	ldst_sz_m[1:0]	=  blkst_m ? bst_sz_m[1:0] : pipe_ldst_sz_m[1:0] ;
+
+//=========================================================================================
+//  WRITE DATA FOR DATA RAM
+//=========================================================================================
+
+// Write Data for DATA RAM.
+// Data - (64b)
+// (8b parity is generated on read)
+// Rqtype - (3b)
+// Size - (3b). 
+// Addr - (3b). Lower 3b of 40b addr.
+// (set index and way available from ctl state.
+// Total - 73b.
+
+// st-quad requires own encoding.
+// assume does not have to be changed for blk-st
+assign  st_rq_type_m[2:0] =
+                casa_m ? 3'b010 :                       // cas pkt 1
+                        (ldstub_m | swap_m) ? 3'b110 :  // ldstub/swap
+                          //(stquad_m)  ? 3'b111 :  // stquad-pkt1
+                                  3'b001 ;        // normal store or partial interrupt rq type
+
+//assign  lsu_st_rq_type_m[2:0] = st_rq_type_m[2:0] ;
+assign  lsu_st_rq_type_m[2:1] = st_rq_type_m[2:1] ;
+
+// Need ASI decode
+/*wire	lsu_stquad_inst_m ;
+assign  lsu_stquad_inst_m = ldst_dbl_m & st_inst_vld_m & quad_asi_m ; 
+*/
+
+wire	st_rmo_m,st_rmo_g ;
+assign	st_rmo_m = lsu_st_rmo_m | blkst_m ; // binit and blk rmo stores.
+dff_s #(9)  stgg_etc  (
+  .din    ({ldst_va_m[3:0],st_rq_type_m[2:0],st_rmo_m,lsu_bst_in_pipe_m}), 
+  .q      ({ldst_va_g[3:0],st_rq_type_g[2:0],st_rmo_g,bst_in_pipe_g}),
+  .clk    (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+wire	bst_any_helper ;
+assign	bst_any_helper = blkst_g | bst_in_pipe_g ; // Bug 3934
+
+// Size will have to be changed to 2bits.
+// 7 more bits could be added to data ram to save read of cam in providing dfq pkt !!! 
+assign stb_wdata_ramd_b75_b64[75:64]   = 
+  {st_rmo_g,st_rq_type_g[2:0],flsh_inst_g,bst_any_helper,ldst_sz_g[1:0],ldst_va_g[3:0]}; 
+	// Bug3395, 3934
+
+//=========================================================================================
+//  FULL/PARTIAL RAW CALCULATION
+//=========================================================================================
+
+// io load cannot bypass from stb. A stb hit results in an io-ld being treated
+// as a partial-raw. (OR should it be serialized behind any io store ??)
+wire	io_ld,io_ld_w2 ;
+assign	io_ld = tlb_pgnum_b39_g ; // Bug 4362
+
+// full-raw is squashed on multiple hits in stb. Treated like partial raw.
+// Ensure that all ld and ld-like instructions signal ld_inst_vld. We can then
+// remove qualification with ld_inst_vld_g.
+/*assign  ld_stb_full_raw_g = 
+	(|stb_ld_full_raw[7:0]) & ~(stb_cam_mhit | ldq_hit_g | io_ld) ;
+assign  ld_stb_full_raw_g[0] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & 
+          ~(stb_cam_mhit | ldq_hit_g[0] | io_ld) & thread0_g ;
+          //~(ld_raw_mhit | ld_stq_hit_g[0] | io_ld) & thread0_g ;
+assign  ld_stb_full_raw_g[1] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & 
+          ~(stb_cam_mhit | ldq_hit_g[1] | io_ld) & thread1_g ;
+assign  ld_stb_full_raw_g[2] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & 
+          ~(stb_cam_mhit | ldq_hit_g[2] | io_ld) & thread2_g ;
+assign  ld_stb_full_raw_g[3] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & 
+          ~(stb_cam_mhit | ldq_hit_g[3] | io_ld) & thread3_g ; */
+// Multiple full raws are also treated like a partial.
+/*assign  ld_stb_partial_raw_g = 
+	((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g | (io_ld & stb_not_empty)) ;
+assign  ld_stb_partial_raw_g[0] = 
+	((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[0] | (io_ld & stb_not_empty)) 
+          & ld_inst_vld_g & thread0_g ;
+assign  ld_stb_partial_raw_g[1] = 
+	((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[1] | (io_ld & stb_not_empty)) 
+          & ld_inst_vld_g & thread1_g ;
+assign  ld_stb_partial_raw_g[2] = 
+	((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[2] | (io_ld & stb_not_empty)) 
+          & ld_inst_vld_g & thread2_g ;
+assign  ld_stb_partial_raw_g[3] = 
+	((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[3] | (io_ld & stb_not_empty)) 
+          & ld_inst_vld_g & thread3_g; */
+
+//=========================================================================================
+//  STQ HANDLING
+//=========================================================================================
+
+/*	REMOVE STQUAD */
+
+//=========================================================================================
+//	LD QUAD HANDLING
+//=========================================================================================
+
+dff_s  altsp_stgm (
+  .din    (alt_space_e), .q (alt_space_m),
+  .clk    (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+assign  lsu_ldquad_inst_m = ldst_dbl_m & ld_inst_vld_m & quad_asi_m & alt_space_m ; 
+
+/*wire	ldquad_inst_g ;
+dff_s  ldq_stgg (
+  .din    (lsu_ldquad_inst_m), .q (ldquad_inst_g),
+  .clk    (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+wire	ldq_stb_cam_hit ;
+assign	ldq_stb_cam_hit = stb_cam_hit & ldquad_inst_g ;
+// Terms can be made common.
+assign  ldq_hit_g = ldq_stb_cam_hit ; */
+/*assign  ldq_hit_g[0] = thread0_g & ldq_stb_cam_hit ;
+assign  ldq_hit_g[1] = thread1_g & ldq_stb_cam_hit ;
+assign  ldq_hit_g[2] = thread2_g & ldq_stb_cam_hit ;
+assign  ldq_hit_g[3] = thread3_g & ldq_stb_cam_hit ; */
+
+//=========================================================================================
+//  STB MULTIPLE HIT GENERATION
+//=========================================================================================
+
+// Multiple hits in stb is to be treated as a partial raw case. The ld however must wait
+// until the youngest store which hit exits the stb. A ptr needs to be calculated for this case.
+// A version of stb_wptr is used instead because it is easily available. (Would this have
+// any significant performance impact ? - No)
+
+assign  ld_any_raw_vld[7:0] = stb_ld_full_raw[7:0] | stb_ld_partial_raw[7:0] ;
+
+dff_s #(16)  stgw2_rvld (
+        .din    ({ld_any_raw_vld[7:0],stb_state_ced[7:0]}),
+        .q    	({ld_any_raw_vld_d1[7:0],stb_state_ced_d1[7:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+// This equation can be optimized for the grape flow.
+// This can be obtained from stb.
+/*assign  ld_raw_mhit =
+  (ld_any_raw_vld[7] & |(ld_any_raw_vld[6:0])) |
+  (ld_any_raw_vld[6] & |(ld_any_raw_vld[5:0])) |
+  (ld_any_raw_vld[5] & |(ld_any_raw_vld[4:0])) |
+  (ld_any_raw_vld[4] & |(ld_any_raw_vld[3:0])) |
+  (ld_any_raw_vld[3] & |(ld_any_raw_vld[2:0])) |
+  (ld_any_raw_vld[2] & |(ld_any_raw_vld[1:0])) |
+  (ld_any_raw_vld[1] &   ld_any_raw_vld[0]) ; */
+
+//=========================================================================================
+//  STB Partial Raw ptr generation
+//=========================================================================================
+
+// The loading on the raw output of the stb cam will be significant if the signal 
+// has to fan out to all 4 ctl blocks. That's why the control has to be localized.
+
+// Using the ack bit may result in pessimistic issue of partial raw loads.
+// For a single partial raw or multiple hit case, detecting whether there is any
+// unacked store is sufficient. Calculation is for no unacked store.
+// Can we use cam_hit ptr instead !!!
+
+//assign  ld_rawp_st_ced_w2 = (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
+wire [2:0] wptr_prev ;
+assign	wptr_prev[2:0] = stb_wptr_prev[2:0] ;
+wire [7:0] wptr_dcd ; // Bug 4294
+assign	wptr_dcd[0] = ~wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
+assign	wptr_dcd[1] = ~wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
+assign	wptr_dcd[2] = ~wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
+assign	wptr_dcd[3] = ~wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
+assign	wptr_dcd[4] =  wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
+assign	wptr_dcd[5] =  wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
+assign	wptr_dcd[6] =  wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
+assign  wptr_dcd[7] =  wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
+
+wire iold_st_ced_g,iold_st_ced_w2 ;
+assign	iold_st_ced_g = |(wptr_dcd[7:0] & stb_state_ced[7:0]) ;
+
+dff_s #(2)   ioldced_stgw2  (
+  .din  ({iold_st_ced_g,io_ld}), 
+  .q 	({iold_st_ced_w2,io_ld_w2}),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+assign  ld_rawp_st_ced_w2 = 
+	io_ld_w2 ? iold_st_ced_w2 :
+	(~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
+
+// For the case of a single partial raw.
+assign  ld_rawp_stb_id[0] = stb_cam_hit_ptr[0] ;
+assign  ld_rawp_stb_id[1] = stb_cam_hit_ptr[1] ;
+assign  ld_rawp_stb_id[2] = stb_cam_hit_ptr[2] ;
+/*assign  ld_rawp_stb_id[0] = stb_ld_partial_raw[1] | stb_ld_partial_raw[3] |
+        stb_ld_partial_raw[5] | stb_ld_partial_raw[7] ;
+assign  ld_rawp_stb_id[1] = stb_ld_partial_raw[2] | stb_ld_partial_raw[3] |
+        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ;
+assign  ld_rawp_stb_id[2] = stb_ld_partial_raw[4] | stb_ld_partial_raw[5] |
+        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ; */
+
+   wire [3:0] pipe_thread_g;
+   assign     pipe_thread_g[0] = ~thrid_g[1] & ~thrid_g[0];
+   assign     pipe_thread_g[1] = ~thrid_g[1] &  thrid_g[0];
+   assign     pipe_thread_g[2] =  thrid_g[1] & ~thrid_g[0];
+   assign     pipe_thread_g[3] =  thrid_g[1] &  thrid_g[0];
+ 
+assign  stb_state_ced[7:0] = 
+( pipe_thread_g[0] ? stb_state_ced0[7:0] : 8'b0 ) |
+( pipe_thread_g[1] ? stb_state_ced1[7:0] : 8'b0 ) |
+( pipe_thread_g[2] ? stb_state_ced2[7:0] : 8'b0 ) |
+( pipe_thread_g[3] ? stb_state_ced3[7:0] : 8'b0 );
+
+assign  stb_wptr_prev[2:0] = 
+  (pipe_thread_g[0] ? stb_wrptr0_prev[2:0] : 3'b0) |
+  (pipe_thread_g[1] ? stb_wrptr1_prev[2:0] : 3'b0) |
+  (pipe_thread_g[2] ? stb_wrptr2_prev[2:0] : 3'b0) |
+  (pipe_thread_g[3] ? stb_wrptr3_prev[2:0] : 3'b0);
+
+assign  stb_not_empty  =
+  (pipe_thread_g[0]  & ~lsu_stb_empty[0] ) | 
+  (pipe_thread_g[1]  & ~lsu_stb_empty[1] ) |
+  (pipe_thread_g[2]  & ~lsu_stb_empty[2] ) |
+  (pipe_thread_g[3]  & ~lsu_stb_empty[3] ) ;
+
+assign	lsu_stb_empty_buf[3:0] = lsu_stb_empty[3:0] ;
+assign	lsu_spu_stb_empty[3:0] = lsu_stb_empty[3:0] ;
+
+//wire ldstdbl_g ;
+// stdbl should be qualified with quad_asi_g !!!
+//assign  ldstdbl_g = ldst_dbl_g & (ld_inst_vld_g | st_inst_vld_g) & ~ldst_fp_g ;
+
+// casa_g and stdbl_g may not be required.
+//assign  ld_rawp_st_ackid_g[2:0] = 
+//  (casa_g | ldstdbl_g | stb_cam_mhit | (io_ld & stb_not_empty))
+//  ? stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
+
+//===================================================
+//casa: need st-st order
+//st cam mhit: cannot figure out the youngest
+//io: side effect
+//remove int ldd and quad ldd, why need ldstdbl?
+//===================================================
+wire	[2:0]	ld_rawp_st_ackid_g ;
+
+assign  ld_rawp_st_ackid_g[2:0] = 
+  (casa_g | stb_cam_mhit | (io_ld & stb_not_empty))?
+   stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
+   
+dff_s #(3)  rawpackid_w2 (
+  .din  (ld_rawp_st_ackid_g[2:0]), 
+  .q 	(ld_rawp_st_ackid_w2[2:0]),
+  .clk  (clk), 
+  .se   (se), .si (), .so ()
+  );
+
+
+   assign lsu_ifu_stbcnt0[3:0] = lsu_stbcnt0[3:0] ;
+   assign lsu_ifu_stbcnt1[3:0] = lsu_stbcnt1[3:0] ;
+   assign lsu_ifu_stbcnt2[3:0] = lsu_stbcnt2[3:0] ;
+   assign lsu_ifu_stbcnt3[3:0] = lsu_stbcnt3[3:0] ;
+
+   assign lsu_ffu_stb_full0 =    lsu_stbcnt0[3];
+   assign lsu_ffu_stb_full1 =    lsu_stbcnt1[3];
+   assign lsu_ffu_stb_full2 =    lsu_stbcnt2[3];
+   assign lsu_ffu_stb_full3 =    lsu_stbcnt3[3];
+   
+endmodule
+
Index: /trunk/T1-CPU/lsu/lsu_tlbdp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_tlbdp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_tlbdp.v	(revision 6)
@@ -0,0 +1,271 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_tlbdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+`include        "lsu.h"
+
+module lsu_tlbdp(/*AUTOARG*/
+   // Outputs
+   so, lsu_tlb_rd_data, tlb_pgnum_buf, tlb_pgnum_buf2, 
+   tlb_rd_tte_data_ie_buf, stb_cam_vld, tte_data_parity_error, 
+   tte_tag_parity_error, cache_way_hit_buf1, cache_way_hit_buf2, 
+   lsu_tlu_tte_pg_sz_g, 
+   // Inputs
+   rclk, si, se, tlb_rd_tte_tag, tlb_rd_tte_data, 
+   lsu_tlb_data_rd_vld_g, tlb_pgnum, asi_internal_m, lsu_alt_space_m, 
+   tlb_cam_hit, ifu_lsu_ld_inst_e, lsu_dtlb_bypass_e, 
+   tlb_rd_tte_data_parity, tlb_rd_tte_tag_parity, cache_way_hit
+   );
+
+   input  rclk;
+   input  si;
+   input  se;
+   output so;
+   
+input   [58:0]         	tlb_rd_tte_tag ;         // tte tag from tlb
+input   [42:0]          tlb_rd_tte_data ;        // tte data from tlb
+input			lsu_tlb_data_rd_vld_g ;	 // select between tte tag/data rd.		
+
+input [39:10]           tlb_pgnum;
+input                   asi_internal_m;
+   input                lsu_alt_space_m;
+   
+// **new**
+output	[63:0]		lsu_tlb_rd_data ;	// tag or data rd from tlb
+
+
+
+   output [39:10]        tlb_pgnum_buf;
+   output [39:37]        tlb_pgnum_buf2;
+//   output [42:0]         tlb_rd_tte_data_buf;
+   output             tlb_rd_tte_data_ie_buf;
+
+//======================================================
+//stb cam vld mved from stb_rwctl    
+input     tlb_cam_hit ;
+input     ifu_lsu_ld_inst_e;
+input     lsu_dtlb_bypass_e;
+output    stb_cam_vld;
+
+
+input    tlb_rd_tte_data_parity ; // data parity bit from tte data
+input    tlb_rd_tte_tag_parity ;  // data parity bit from tte tag
+output   tte_data_parity_error ;
+output   tte_tag_parity_error ;
+
+   input [3:0] cache_way_hit;
+   output [3:0] cache_way_hit_buf1;
+   output [3:0] cache_way_hit_buf2;
+
+output  [2:0]           lsu_tlu_tte_pg_sz_g ;   // page-size of tte 
+
+wire   tlb_rd_tte_data_27_22_sel_buf;
+wire   tlb_rd_tte_data_21_16_sel_buf;
+wire   tlb_rd_tte_data_15_13_sel_buf;
+wire   lsu_tte_pg_sz_b2, lsu_tte_pg_sz_b1, lsu_tte_pg_sz_b0;
+wire   pg_sz_b0, pg_sz_b1, pg_sz_b2;
+
+//===============================================================
+   wire   tlb_tte_data_mx_sel2, tlb_tte_data_mx_sel1, tlb_tte_data_mx_sel0;
+//tlb_tte_data_mx_sel2 ;  // select for bits 21-19
+//tlb_tte_data_mx_sel1 ;  // select for bits 18-16
+//tlb_tte_data_mx_sel0 ;  // select for bits 15-13
+
+   assign tlb_tte_data_mx_sel2 = tlb_rd_tte_data_27_22_sel_buf;
+   assign tlb_tte_data_mx_sel1 = tlb_rd_tte_data_21_16_sel_buf;
+   assign tlb_tte_data_mx_sel0 = tlb_rd_tte_data_15_13_sel_buf;
+      
+// assign  pg_sz_b0 = 
+//  (~tlb_tte_data_mx_sel1 & tlb_tte_data_mx_sel0) | // 64K
+//  ( tlb_tte_data_mx_sel1 & tlb_tte_data_mx_sel0) ; // 4M/256M
+
+assign  pg_sz_b0 =  tlb_tte_data_mx_sel0;
+   
+assign  pg_sz_b1 = 
+  (~tlb_tte_data_mx_sel2 & tlb_tte_data_mx_sel1 &  tlb_tte_data_mx_sel0) ; // 4M
+assign  pg_sz_b2 = 
+  ( tlb_tte_data_mx_sel2 & tlb_tte_data_mx_sel1 &  tlb_tte_data_mx_sel0) ; // 256M
+   
+assign  lsu_tte_pg_sz_b2 = pg_sz_b2 ;
+assign  lsu_tte_pg_sz_b1 = pg_sz_b1 ;
+assign  lsu_tte_pg_sz_b0 = pg_sz_b0 ;
+
+assign lsu_tlu_tte_pg_sz_g[2:0] = {pg_sz_b2,pg_sz_b1,pg_sz_b0} ;
+   
+// CAM VLD GENERATION
+
+// Unfortunately because of timing considerations, this cannot be qualified with
+// flush and inst_vld. Must exclude other conditions though such as internal asi
+// atomics etc !!! (NOTE : earlier version of inst_vld may be obtained.   
+   wire   clk;
+
+   assign   clk =rclk;
+   
+wire  dtlb_bypass_m ;
+dff_s #(1) dtlb_bypass_stgm  (
+  .din    (lsu_dtlb_bypass_e), .q (dtlb_bypass_m),
+  .clk    (clk), 
+  .se     (se), .si (), .so ()
+  );
+
+dff_s #(1) ld_inst_vld_stgm  (
+  .din    (ifu_lsu_ld_inst_e), .q (ld_inst_vld_m),
+  .clk    (clk), 
+  .se     (se), .si (), .so ()
+  );
+
+assign  stb_cam_vld = ld_inst_vld_m & (tlb_cam_hit | dtlb_bypass_m) & 
+                      ~(asi_internal_m  & lsu_alt_space_m); //bug 4635, revisit
+//======================================================================   
+
+//buffer all inputs first
+wire   [58:0]         tlb_rd_tte_tag_buf ;       
+wire   [42:0]         tlb_rd_tte_data_buf ;      
+wire			            lsu_tte_pg_sz_b1_buf;
+wire			            lsu_tte_pg_sz_b0_buf;
+wire			            lsu_tte_pg_sz_b2_buf;
+wire   [39:10]        tlb_pgnum_l;
+wire   [39:10]        tlb_pgnum_buf;  
+wire   [39:37]        tlb_pgnum_buf2;  
+
+//BUFFERS
+   assign             tlb_rd_tte_tag_buf[58:0] = tlb_rd_tte_tag[58:0];
+   assign             lsu_tte_pg_sz_b1_buf = lsu_tte_pg_sz_b1;
+   assign             lsu_tte_pg_sz_b0_buf = lsu_tte_pg_sz_b0;
+   assign             lsu_tte_pg_sz_b2_buf = lsu_tte_pg_sz_b2;
+
+   //tlb_pgnum buffer
+   assign             tlb_pgnum_l [39:10] = ~ tlb_pgnum[39:10];
+   assign             tlb_pgnum_buf[39:10] = ~ tlb_pgnum_l[39:10];
+   assign             tlb_pgnum_buf2[39:37] = ~ tlb_pgnum_l[39:37];
+
+   assign             tlb_rd_tte_data_buf[42:0] = tlb_rd_tte_data[42:0];
+   
+   assign tlb_rd_tte_data_ie_buf =  tlb_rd_tte_data_buf [`STLB_DATA_IE];
+   assign tlb_rd_tte_data_27_22_sel_buf = tlb_rd_tte_data_buf [`STLB_DATA_27_22_SEL];
+   assign tlb_rd_tte_data_21_16_sel_buf = tlb_rd_tte_data_buf [`STLB_DATA_21_16_SEL];
+   assign tlb_rd_tte_data_15_13_sel_buf = tlb_rd_tte_data_buf [`STLB_DATA_15_13_SEL];
+   
+   
+wire	[63:0]	formatted_tte_tag, formatted_tte_data;
+
+//=================================================================================================
+//      Format TLB Tag 
+//=================================================================================================
+
+assign  formatted_tte_tag[63:0] =
+        {
+	tlb_rd_tte_tag_buf[58:56],
+	tlb_rd_tte_tag_buf[55],
+	// ECO 4265 begin
+	tlb_rd_tte_tag_buf[`STLB_TAG_PARITY], 	  // Parity
+	tlb_rd_tte_tag_buf[`STLB_TAG_VA_27_22_V], // mxsel2 - b27:22 vld 
+	tlb_rd_tte_tag_buf[`STLB_TAG_VA_21_16_V], // mxsel1 - b21:16 vld
+	tlb_rd_tte_tag_buf[`STLB_TAG_VA_15_13_V], // mxsel0 - b15:13 vld
+        {8{tlb_rd_tte_tag_buf[53]}},                                        // (8b)
+	// ECO 4265 end
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO],    // (20b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO],    // (6b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO],    // (6b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO],    // (3b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO] // (13b)
+        } ;
+/*
+assign  formatted_tte_tag[63:0] =
+        {
+        {16{tlb_rd_tte_tag_buf[54]}},                                       // (16b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_47_22_HI:`STLB_TAG_VA_47_22_LO],    // (26b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_21_20_HI:`STLB_TAG_VA_21_20_LO],    // (3b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_19],
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_18_17_HI:`STLB_TAG_VA_18_17_LO],    // (3b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_16],
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_15_14_HI:`STLB_TAG_VA_15_14_LO],    // (3b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_VA_13],
+        tlb_rd_tte_tag_buf[`STLB_TAG_CTXT_12_7_HI:`STLB_TAG_CTXT_12_7_LO],  // (13b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_CTXT_6_0_HI:`STLB_TAG_CTXT_6_0_LO]
+        } ;
+*/
+
+
+//=================================================================================================
+//      Format TLB Data 
+//=================================================================================================
+
+assign  formatted_tte_data[63:0] =
+        {
+        tlb_rd_tte_tag_buf[`STLB_TAG_V],            // V    (1b)
+        lsu_tte_pg_sz_b1_buf,                       // SZ   (2b)
+        lsu_tte_pg_sz_b0_buf,
+        tlb_rd_tte_data_buf[`STLB_DATA_NFO],        // NFO  (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_IE],         // IE   (1b)
+        9'd0,         				    // Soft2
+        1'b0,
+        lsu_tte_pg_sz_b2_buf,                       // SZ   (1b)
+        tlb_rd_tte_tag_buf[`STLB_TAG_U],            // U    (1b)
+	// ECO 4265 - begin
+        tlb_rd_tte_data_buf[`STLB_DATA_PARITY],      // Parity   (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_27_22_SEL],   // mxsel2_l (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_21_16_SEL],   // mxsel1_l (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_15_13_SEL],   // mxsel0_l (1b)
+        2'd0,                                        // Unused Diag bits
+	// ECO 4265 - end 
+        1'b0,                                        // PA   (28b)
+        tlb_rd_tte_data_buf[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+        tlb_rd_tte_data_buf[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+        tlb_rd_tte_data_buf[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+        tlb_rd_tte_data_buf[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO],
+        6'd0,                                   // ?? 12-7 (6b)
+        tlb_rd_tte_data_buf[`STLB_DATA_L],          // L    (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_CP],         // CP   (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_CV],         // CV   (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_E],          // E    (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_P],          // P    (1b)
+        tlb_rd_tte_data_buf[`STLB_DATA_W],          // W    (1b)
+        1'b0
+        } ;
+
+
+//=================================================================================================
+//      Select TLB Read data / TLB Read tag
+//=================================================================================================
+
+assign lsu_tlb_rd_data[63:0] =
+                lsu_tlb_data_rd_vld_g ? formatted_tte_data[63:0] : formatted_tte_tag[63:0];
+
+
+//=================================================================================================
+//      Calculate parity for TLB Tag and Data
+//=================================================================================================
+   wire lsu_rd_tte_data_parity, lsu_rd_tte_tag_parity;
+   
+assign  lsu_rd_tte_data_parity = ^tlb_rd_tte_data_buf[41:0] ;
+assign  lsu_rd_tte_tag_parity =  ^{tlb_rd_tte_tag_buf[58:55],tlb_rd_tte_tag_buf[53:27],
+				tlb_rd_tte_tag_buf[25],tlb_rd_tte_tag_buf[23:0]} ;
+ 
+assign  tte_data_parity_error = 
+  tlb_rd_tte_data_parity ^ lsu_rd_tte_data_parity ;
+assign  tte_tag_parity_error  = 
+  tlb_rd_tte_tag_parity ^ lsu_rd_tte_tag_parity ;
+
+   assign cache_way_hit_buf1[3:0] = cache_way_hit[3:0] ;
+   assign cache_way_hit_buf2[3:0] = cache_way_hit[3:0];
+
+   
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_qdp1.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_qdp1.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_qdp1.v	(revision 6)
@@ -0,0 +1,1934 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_qdp1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Description:  LSU PCX Datapath - QDP1
+*/
+////////////////////////////////////////////////////////////////////////
+// header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+          // time scale definition
+`include  "iop.h" 
+`include  "lsu.h" 
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module lsu_qdp1 ( /*AUTOARG*/
+   // Outputs
+   so, lsu_va_match_b47_b32_m, lsu_va_match_b31_b3_m, lsu_va_wtchpt_addr, spc_pcx_data_pa, 
+   dtag_wdata_m, lmq0_byp_misc_sz, lmq1_byp_misc_sz, 
+   lmq2_byp_misc_sz, lmq3_byp_misc_sz, lsu_byp_misc_sz_e, 
+   lsu_l2fill_sign_extend_m, lsu_l2fill_bendian_m, lmq0_l2fill_fpld, 
+   lmq1_l2fill_fpld, lmq2_l2fill_fpld, lmq3_l2fill_fpld, lmq_ld_rd1, 
+   lmq0_ncache_ld, lmq1_ncache_ld, lmq2_ncache_ld, lmq3_ncache_ld, 
+   lmq0_ld_rq_type, lmq1_ld_rq_type, lmq2_ld_rq_type, 
+   lmq3_ld_rq_type, lmq0_ldd_vld, lmq1_ldd_vld, lmq2_ldd_vld, 
+   lmq3_ldd_vld, ld_sec_hit_thrd0, ld_sec_hit_thrd1, 
+   ld_sec_hit_thrd2, ld_sec_hit_thrd3, lmq0_pcx_pkt_addr, 
+   lmq1_pcx_pkt_addr, lmq2_pcx_pkt_addr, lmq3_pcx_pkt_addr, 
+   lsu_mmu_rs3_data_g, lsu_tlu_rs3_data_g, lsu_diagnstc_wr_data_b0, 
+   lsu_diagnstc_wr_data_e, lsu_ifu_stxa_data, 
+   lsu_ifu_ld_icache_index, lsu_ifu_ld_pcxpkt_tid, lsu_error_pa_m, 
+   lsu_pref_pcx_req, st_rs3_data_g, lsu_ldst_va_way_g, 
+   dcache_alt_data_w0_m, 
+   // Inputs
+   rclk, si, se, lsu_dcache_iob_rd_w, lsu_ramtest_rd_w, 
+   lsu_pcx_rq_sz_b3, lsu_diagnstc_data_sel, pcx_pkt_src_sel, 
+   lsu_stb_pcx_rvld_d1, imiss_pcx_mx_sel, fwd_int_fp_pcx_mx_sel, 
+   spu_lsu_ldst_pckt, tlu_lsu_pcxpkt, const_cpuid, ifu_pcx_pkt, 
+   lmq_byp_data_en_w2, lmq_byp_data_sel0, lmq_byp_data_sel1, 
+   lmq_byp_data_sel2, lmq_byp_data_sel3, lmq_byp_ldxa_sel0, 
+   lmq_byp_ldxa_sel1, lmq_byp_ldxa_sel2, lmq_byp_ldxa_sel3, 
+   lmq_byp_data_fmx_sel, exu_lsu_rs3_data_e, ifu_lsu_ldxa_data_w2, 
+   tlu_lsu_int_ldxa_data_w2, spu_lsu_ldxa_data_w2, stb_rdata_ramd, 
+   stb_rdata_ramc, lmq_byp_misc_sel, dfq_byp_sel, ld_pcx_rq_sel, 
+   ld_pcx_thrd, lmq_enable, ld_pcx_pkt_g, ffu_lsu_data, 
+   lsu_tlb_st_sel_m, lsu_pcx_fwd_pkt, lsu_pcx_fwd_reply, 
+   lsu_diagnstc_dtagv_prty_invrt_e, lsu_misc_rdata_w2, 
+   lsu_stb_rd_tid, lsu_iobrdge_rply_data_sel, lsu_iobrdge_rd_data, 
+   lsu_atomic_pkt2_bsel_g, lsu_pcx_ld_dtag_perror_w2, 
+   lsu_dcache_rdata_w, lsu_va_wtchpt0_wr_en_l, 
+   lsu_va_wtchpt1_wr_en_l, lsu_va_wtchpt2_wr_en_l, 
+   lsu_va_wtchpt3_wr_en_l, thread0_m, thread1_m, thread2_m, 
+   thread3_m, lsu_thread_g, lsu_ldst_va_m, tlb_pgnum, lsu_bld_pcx_rq, 
+   lsu_bld_rq_addr, lmq0_pcx_pkt_way, lmq1_pcx_pkt_way, 
+   lmq2_pcx_pkt_way, lmq3_pcx_pkt_way, lsu_dfq_ld_vld, 
+   lsu_ifu_asi_data_en_l, lsu_ld0_spec_vld_kill_w2, 
+   lsu_ld1_spec_vld_kill_w2, lsu_ld2_spec_vld_kill_w2, 
+   lsu_ld3_spec_vld_kill_w2, lsu_fwd_rply_sz1_unc, rst_tri_en, 
+   lsu_l2fill_data, l2fill_vld_m, ld_thrd_byp_sel_m, sehold
+   ) ;  
+
+input                     rclk ;
+input                     si;
+input                     se;
+input			  sehold;
+//input			  tmb_l;
+
+output                    so;
+input 			  lsu_dcache_iob_rd_w ;	
+input 			  lsu_ramtest_rd_w ;
+
+input			 lsu_pcx_rq_sz_b3 ;
+
+input  [3:0]   		  lsu_diagnstc_data_sel ;
+
+input   [3:0]             pcx_pkt_src_sel ;       // sel 1/4 pkt src for pcx.
+input                     lsu_stb_pcx_rvld_d1 ;   // stb has been read-delayby1cycle
+input                     imiss_pcx_mx_sel ;      // select imiss over spu.
+input   [2:0]             fwd_int_fp_pcx_mx_sel ; // select fwd/intrpt/fpop
+   
+input   [`PCX_WIDTH-1:0]  spu_lsu_ldst_pckt ;     // stream ld/st pkt for pcx.
+input   [25:0]            tlu_lsu_pcxpkt ;        // truncated pcx interrupt pkt.
+input   [2:0]             const_cpuid ;           // cpu id
+input   [51:0]            ifu_pcx_pkt ;           // ifu imiss request.
+input   [3:0]             lmq_byp_data_en_w2 ;
+input   [3:0]             lmq_byp_data_sel0 ;     // ldxa/stb/cas bypass data sel.
+input   [3:0]             lmq_byp_data_sel1 ;     // ldxa/stb/cas bypass data sel.
+input   [3:0]             lmq_byp_data_sel2 ;     // ldxa/stb/cas bypass data sel.
+input   [3:0]             lmq_byp_data_sel3 ;     // ldxa/stb/cas bypass data sel.
+input   [2:0]             lmq_byp_ldxa_sel0 ;     // ldxa data sel - thread0
+input   [2:0]             lmq_byp_ldxa_sel1 ;     // ldxa data sel - thread1
+input   [2:0]             lmq_byp_ldxa_sel2 ;     // ldxa data sel - thread2
+input   [2:0]             lmq_byp_ldxa_sel3 ;     // ldxa data sel - thread3
+input	[3:0]		  lmq_byp_data_fmx_sel ;  // final sel for lmq data.
+input   [63:0]            exu_lsu_rs3_data_e ;    // rs3_data for cas pkt 2.
+input   [63:0]            ifu_lsu_ldxa_data_w2 ;  // ldxa data from ifu. 
+//input   [63:0]            tlu_lsu_ldxa_data_w2 ;  // ldxa data from tlu (mmu)
+input   [63:0]            tlu_lsu_int_ldxa_data_w2 ;  // ldxa data from tlu (intrpt/scpd)
+input   [63:0]            spu_lsu_ldxa_data_w2 ;  // ldxa data from spu 
+input   [75:0]            stb_rdata_ramd ;        // stb0 data ram output.
+input   [44:9]            stb_rdata_ramc ;        // stb0 tag ram output.
+input   [3:0]             lmq_byp_misc_sel ;      // select g-stage lmq source
+input   [3:0]             dfq_byp_sel ;
+input   [3:0]             ld_pcx_rq_sel ;
+input   [1:0]             ld_pcx_thrd ;
+ 
+input   [3:0]             lmq_enable ;             // 4 enables for lmq.
+input   [`LMQ_WIDTH-1:40]  ld_pcx_pkt_g ;           // ld miss pkt for thread.
+input   [80:0]            ffu_lsu_data ;
+input   [3:0]             lsu_tlb_st_sel_m ;
+//input   [3:0]             lsu_tlb_st_sel_g ;
+//input                     lsu_tlb_st_vld_g ;   
+input   [107:0]           lsu_pcx_fwd_pkt ;         // local fwd reply/req
+input                     lsu_pcx_fwd_reply ;       // fwd reply on pcx pkt 
+input                     lsu_diagnstc_dtagv_prty_invrt_e ;
+//input                     lsu_diagnstc_wr_src_sel_e ;// dcache/dtag/v write - diag   
+//input   [47:0]            lsu_local_ldxa_data_w2 ;   // local ldxa data 
+input   [63:0]            lsu_misc_rdata_w2 ;   // local ldxa data 
+input   [1:0]             lsu_stb_rd_tid ;           // thread for which stb rd occurs
+input   [2:0]             lsu_iobrdge_rply_data_sel ;
+input   [43:0]            lsu_iobrdge_rd_data ;
+input   [2:0]             lsu_atomic_pkt2_bsel_g ;
+input                     lsu_pcx_ld_dtag_perror_w2 ;
+input	[63:0]		  lsu_dcache_rdata_w ;
+//input   [47:0]            tlu_lsu_iobrdge_pc_data ;  // NOTE: unused: remove this in sync w/ tlu
+
+input         lsu_va_wtchpt0_wr_en_l;
+input         lsu_va_wtchpt1_wr_en_l;
+input         lsu_va_wtchpt2_wr_en_l;
+input         lsu_va_wtchpt3_wr_en_l;
+input         thread0_m;
+input         thread1_m;
+input         thread2_m;
+input         thread3_m;
+
+   input [3:0] lsu_thread_g;
+   
+
+//input         lsu_pa_wtchpt_wr_en_l;
+input [47:0]  lsu_ldst_va_m;
+input [39:13] tlb_pgnum;
+input         lsu_bld_pcx_rq ;        // cycle after request
+input [1:0]   lsu_bld_rq_addr ;       // cycle after request
+   
+//input  [1:0]           lsu_lmq_pkt_way_g;
+input  [1:0]           lmq0_pcx_pkt_way;
+input  [1:0]           lmq1_pcx_pkt_way;
+input  [1:0]           lmq2_pcx_pkt_way;
+input  [1:0]           lmq3_pcx_pkt_way;
+
+input         	lsu_dfq_ld_vld ;        
+input		lsu_ifu_asi_data_en_l ;
+
+input           lsu_ld0_spec_vld_kill_w2 ;
+input           lsu_ld1_spec_vld_kill_w2 ;
+input           lsu_ld2_spec_vld_kill_w2 ;
+input           lsu_ld3_spec_vld_kill_w2 ;
+
+input		lsu_fwd_rply_sz1_unc ;
+
+input           rst_tri_en ;
+
+output        lsu_va_match_b47_b32_m;
+output        lsu_va_match_b31_b3_m;
+
+//output        lsu_pa_match_b39_13_g;
+//output        lsu_pa_match_b12_3_m;
+output [47:3] lsu_va_wtchpt_addr;
+//output [39:3] lsu_pa_wtchpt_addr;
+   
+//output  [63:0]            ld_stb_bypass_data ;  // st to load bypass data.
+
+output  [`PCX_WIDTH-1:0]  spc_pcx_data_pa ;
+output  [29:0]            dtag_wdata_m ;            // tag to write to dtag.
+//output  [3:0]             lsu_byp_misc_addr_m ;     // lower 3bits of addr for ldxa/raw etc
+//output  [1:0]             lsu_byp_misc_sz_m ;       // size for ldxa/raw etc
+output  [1:0]             lmq0_byp_misc_sz ;    
+output  [1:0]             lmq1_byp_misc_sz ;    
+output  [1:0]             lmq2_byp_misc_sz ;    
+output  [1:0]             lmq3_byp_misc_sz ;    
+
+output  [1:0]             lsu_byp_misc_sz_e ;       // size for ldxa/raw etc
+output                    lsu_l2fill_sign_extend_m ;// requires sign-extend else zero extend
+output                    lsu_l2fill_bendian_m ;    // big endian fill/bypass.
+//output                    lsu_l2fill_fpld_e ;       // fp load 
+output                    lmq0_l2fill_fpld ;       // fp load 
+output                    lmq1_l2fill_fpld ;       // fp load 
+output                    lmq2_l2fill_fpld ;       // fp load 
+output                    lmq3_l2fill_fpld ;       // fp load 
+
+output  [4:0]             lmq_ld_rd1 ;              // rd for all loads
+//output                    lsu_ncache_ld_e ;         // non-cacheable ld from dfq
+output                    lmq0_ncache_ld ;         // non-cacheable ld from dfq
+output                    lmq1_ncache_ld ;         // non-cacheable ld from dfq
+output                    lmq2_ncache_ld ;         // non-cacheable ld from dfq
+output                    lmq3_ncache_ld ;         // non-cacheable ld from dfq
+//output  [2:0]             lsu_ld_rq_type_e ;        // for identifying atomic ld.
+
+output  [2:0]             lmq0_ld_rq_type ;        // for identifying atomic ld.
+output  [2:0]             lmq1_ld_rq_type ;        // for identifying atomic ld.
+output  [2:0]             lmq2_ld_rq_type ;        // for identifying atomic ld.
+output  [2:0]             lmq3_ld_rq_type ;        // for identifying atomic ld.
+
+output                    lmq0_ldd_vld ;             // ld double 
+output                    lmq1_ldd_vld ;             // ld double 
+output                    lmq2_ldd_vld ;             // ld double 
+output                    lmq3_ldd_vld ;             // ld double 
+
+output                    ld_sec_hit_thrd0 ;        // ld has sec. hit against th0
+output                    ld_sec_hit_thrd1 ;        // ld has sec. hit against th1
+output                    ld_sec_hit_thrd2 ;        // ld has sec. hit against th2
+output                    ld_sec_hit_thrd3 ;        // ld has sec. hit against th3
+//output  [1:0]             lmq_pcx_pkt_sz ;
+//output  [39:0]            lmq_pcx_pkt_addr ;  
+output  [10:0]            lmq0_pcx_pkt_addr;
+output  [10:0]            lmq1_pcx_pkt_addr;
+output  [10:0]            lmq2_pcx_pkt_addr;
+output  [10:0]            lmq3_pcx_pkt_addr;
+   
+//output  [63:0]            lsu_tlu_st_rs3_data_g ;
+output  [63:0]            lsu_mmu_rs3_data_g ;
+output  [63:0]            lsu_tlu_rs3_data_g ;
+
+output                    lsu_diagnstc_wr_data_b0 ; // diagnostic wr data - bit 0
+output  [63:0]            lsu_diagnstc_wr_data_e ;
+
+output  [47:0]            lsu_ifu_stxa_data ;       // stxa related data
+
+output  [11:5]            lsu_ifu_ld_icache_index ;
+output  [1:0]             lsu_ifu_ld_pcxpkt_tid ;
+
+//output  [1:0]             lmq_ld_way ;              // cache set way for ld fill
+
+output  [28:0]            lsu_error_pa_m ;          // error phy addr
+//output  [13:0]            lsu_spu_rsrv_data_m ;     // rs3 data for reserved fields.
+output                    lsu_pref_pcx_req ;        // pref sent to pcx
+
+   output [63:0]          st_rs3_data_g;
+
+output  [1:0]             lsu_ldst_va_way_g ;          // 12:11 for direct map
+//====================================================================   
+//dc_fill CP
+
+   input [63:0]           lsu_l2fill_data; //from qdp2
+   input                  l2fill_vld_m;    //from dctl
+   input   [3:0]          ld_thrd_byp_sel_m;//from dctl 
+
+   output [63:0]          dcache_alt_data_w0_m;  //to d$
+//   output [7:0]           lsu_l2fill_or_byp_msb_m;   //to dctl
+//====================================================================   
+   
+
+wire  [`STB_PCX_WIDTH-1:0]  store_pcx_pkt ;
+wire  [`PCX_WIDTH-1:0]  pcx_pkt_data ;
+wire  [`STB_PCX_WIDTH-1:0]  stb_pcx_pkt ;
+wire  [`PCX_WIDTH-1:0]  imiss_strm_pcx_pkt ;
+wire  [`PCX_WIDTH-1:0]  intrpt_full_pcxpkt ;
+wire  [`PCX_WIDTH-1:0]  ifu_full_pcx_pkt_e ;
+wire  [51:0]      ifu_pcx_pkt_e ;
+wire  [63:0]      cas_pkt2_data ;
+wire  [63:0]      lmq0_bypass_data_in,lmq1_bypass_data_in ;
+wire  [63:0]      lmq2_bypass_data_in,lmq3_bypass_data_in ;
+wire  [63:0]      lmq0_bypass_data, lmq1_bypass_data ;
+wire  [63:0]      lmq2_bypass_data, lmq3_bypass_data ;
+wire  [39:0]      lmq_ld_addr ;
+wire  [`LMQ_WIDTH:0]    load_pcx_pkt ;
+wire  [`LMQ_WIDTH-1:0]  lmq0_pcx_pkt, lmq1_pcx_pkt ;
+wire  [`LMQ_WIDTH-1:0]  lmq2_pcx_pkt, lmq3_pcx_pkt ;
+wire  [`PCX_WIDTH-1:0]  fpop_full_pcxpkt ;
+wire  [63:0]      tlb_st_data ;
+//wire    [63:0]      formatted_tte_tag ;
+//wire    [63:0]      formatted_tte_data ;
+wire  [63:0]      lmq0_bypass_ldxa_data ;
+wire  [63:0]      lmq1_bypass_ldxa_data ;
+wire  [63:0]      lmq2_bypass_ldxa_data ;
+wire  [63:0]      lmq3_bypass_ldxa_data ;
+wire  [`PCX_WIDTH-1:0]  fwd_full_pcxpkt ;
+wire  [47:3]            lsu_tlu_st_rs3_data_g ;
+
+
+//===================================================
+//  clock buffer   
+//===================================================
+//wire   lsu_qdp1_clk ;   
+wire   clk;
+assign  clk = rclk;
+
+wire         thread0_g;
+wire         thread1_g;
+wire         thread2_g;
+wire         thread3_g;
+
+   assign    thread0_g = lsu_thread_g[0];
+   assign    thread1_g = lsu_thread_g[1];
+   assign    thread2_g = lsu_thread_g[2];
+   assign    thread3_g = lsu_thread_g[3];
+   
+//=================================================================================================
+//    LMQ DP
+//=================================================================================================
+
+wire  [12:0]  ldst_va_g;
+
+dff_s  #(13) ff_ldst_va_g (
+        .din    (lsu_ldst_va_m[12:0]),
+        .q      (ldst_va_g[12:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+assign  lsu_ldst_va_way_g[1:0] =  ldst_va_g[12:11];
+
+wire  [`LMQ_VLD:0]  ld_pcx_pkt_g_tmp;
+
+assign ld_pcx_pkt_g_tmp[`LMQ_VLD:0] =  {ld_pcx_pkt_g[`LMQ_WIDTH-1:44],
+                                        2'b00,      // done after the flop
+                                        //lsu_lmq_pkt_way_g[1:0],
+                                        ld_pcx_pkt_g[41:40],
+                                        tlb_pgnum[39:13],ldst_va_g[12:0]};
+
+// Unfortunately ld_pcx_pkt_g is now 65 bits wide. Grape-mapper needs to give feedback.
+// THREAD 0.
+/*
+dffe_s  #(`LMQ_WIDTH) lmq0 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq0_pcx_pkt[`LMQ_VLD:0]),
+        .en     (lmq_enable[0]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+*/
+wire lmq0_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lmq0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_enable[0]),
+                .tmb_l  (~se),
+                .clk    (lmq0_clk)
+                ) ;   
+`endif
+wire  [`LMQ_VLD:0]  lmq0_pcx_pkt_tmp ;
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(`LMQ_WIDTH) lmq0 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq0_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .en (~(~lmq_enable[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(`LMQ_WIDTH) lmq0 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq0_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .clk    (lmq0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+ 
+//bug2705 - speculative pick in w-cycle
+wire    lmq0_pcx_pkt_vld ;
+assign  lmq0_pcx_pkt_vld  =  lmq0_pcx_pkt_tmp[`LMQ_VLD] & ~lsu_ld0_spec_vld_kill_w2 ;
+
+assign  lmq0_pcx_pkt[`LMQ_VLD:0]  = {lmq0_pcx_pkt_vld,
+                                     lmq0_pcx_pkt_tmp[`LMQ_VLD-1:44],
+                                     lmq0_pcx_pkt_way[1:0],
+                                     lmq0_pcx_pkt_tmp[41:0]};
+  
+// Needs to be multi-threaded.
+//assign lmq_pcx_pkt_sz[1:0] = lmq0_pcx_pkt[`LMQ_SZ_HI:`LMQ_SZ_LO]  ;
+
+assign  ld_sec_hit_thrd0 =  
+(ld_pcx_pkt_g_tmp[`LMQ_AD_HI:`LMQ_AD_LO+4] == lmq0_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO+4]) ;            
+
+`ifdef FPGA_SYN_1THREAD
+  assign load_pcx_pkt[`LMQ_WIDTH-1:0] = lmq0_pcx_pkt[`LMQ_WIDTH-1:0];
+`else
+// THREAD 1.
+/*
+dffe_s  #(`LMQ_WIDTH) lmq1 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq1_pcx_pkt[`LMQ_VLD:0]),
+        .en     (lmq_enable[1]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );              
+*/
+wire lmq1_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lmq1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_enable[1]),
+                .tmb_l  (~se),
+                .clk    (lmq1_clk)
+                ) ;   
+`endif
+
+wire  [`LMQ_VLD:0]  lmq1_pcx_pkt_tmp;
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(`LMQ_WIDTH) lmq1 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq1_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .en (~(~lmq_enable[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(`LMQ_WIDTH) lmq1 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq1_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .clk    (lmq1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+
+//bug2705 - speculative pick in w-cycle
+wire    lmq1_pcx_pkt_vld ;
+assign  lmq1_pcx_pkt_vld  =  lmq1_pcx_pkt_tmp[`LMQ_VLD] & ~lsu_ld1_spec_vld_kill_w2 ;
+
+assign  lmq1_pcx_pkt[`LMQ_VLD:0]  =  {lmq1_pcx_pkt_vld,
+                                      lmq1_pcx_pkt_tmp[`LMQ_VLD-1:44],
+                                      lmq1_pcx_pkt_way[1:0],
+                                      lmq1_pcx_pkt_tmp[41:0]};
+   
+assign  ld_sec_hit_thrd1 =  
+(ld_pcx_pkt_g_tmp[`LMQ_AD_HI:`LMQ_AD_LO+4] == lmq1_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO+4]) ;            
+
+// THREAD 2.
+/*
+dffe_s  #(`LMQ_WIDTH) lmq2 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq2_pcx_pkt[`LMQ_VLD:0]),
+        .en     (lmq_enable[2]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );              
+*/
+wire lmq2_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lmq2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_enable[2]),
+                .tmb_l  (~se),
+                .clk    (lmq2_clk)
+                ) ;   
+`endif
+
+wire  [`LMQ_VLD:0]  lmq2_pcx_pkt_tmp;
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(`LMQ_WIDTH) lmq2 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq2_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .en (~(~lmq_enable[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(`LMQ_WIDTH) lmq2 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq2_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .clk    (lmq2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+
+//bug2705 - speculative pick in w-cycle
+wire    lmq2_pcx_pkt_vld ;
+assign  lmq2_pcx_pkt_vld  =  lmq2_pcx_pkt_tmp[`LMQ_VLD] & ~lsu_ld2_spec_vld_kill_w2 ;
+
+   
+assign  lmq2_pcx_pkt[`LMQ_VLD:0]  =  {lmq2_pcx_pkt_vld,
+                                      lmq2_pcx_pkt_tmp[`LMQ_VLD-1:44],
+                                      lmq2_pcx_pkt_way[1:0],
+                                      lmq2_pcx_pkt_tmp[41:0]};
+
+assign  ld_sec_hit_thrd2 =  
+(ld_pcx_pkt_g_tmp[`LMQ_AD_HI:`LMQ_AD_LO+4] == lmq2_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO+4]) ;            
+
+// THREAD 3.
+/*
+dffe_s  #(`LMQ_WIDTH) lmq3 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq3_pcx_pkt[`LMQ_VLD:0]),
+        .en     (lmq_enable[3]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );              
+*/
+wire lmq3_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lmq3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_enable[3]),
+                .tmb_l  (~se),
+                .clk    (lmq3_clk)
+                ) ;   
+`endif
+
+wire  [`LMQ_VLD:0]  lmq3_pcx_pkt_tmp;
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(`LMQ_WIDTH) lmq3 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq3_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .en (~(~lmq_enable[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(`LMQ_WIDTH) lmq3 (
+        .din    (ld_pcx_pkt_g_tmp[`LMQ_VLD:0]),
+        .q      (lmq3_pcx_pkt_tmp[`LMQ_VLD:0]),
+        .clk    (lmq3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+
+//bug2705 - speculative pick in w-cycle
+wire    lmq3_pcx_pkt_vld ;
+assign  lmq3_pcx_pkt_vld  =  lmq3_pcx_pkt_tmp[`LMQ_VLD] & ~lsu_ld3_spec_vld_kill_w2 ;
+
+   
+assign  lmq3_pcx_pkt[`LMQ_VLD:0]  =  {lmq3_pcx_pkt_vld,
+                                      lmq3_pcx_pkt_tmp[`LMQ_VLD-1:44],
+                                      lmq3_pcx_pkt_way[1:0],
+                                      lmq3_pcx_pkt_tmp[41:0]};
+
+
+assign  ld_sec_hit_thrd3 =  
+(ld_pcx_pkt_g_tmp[`LMQ_AD_HI:`LMQ_AD_LO+4] == lmq3_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO+4]) ;            
+
+// Select 1 of 4 LMQ Contents.
+// selection is based on which thread's load is chosen for pcx.
+mux4ds  #(`LMQ_WIDTH) lmq_pthrd_sel (
+  .in0  (lmq0_pcx_pkt[`LMQ_WIDTH-1:0]),
+  .in1  (lmq1_pcx_pkt[`LMQ_WIDTH-1:0]),
+  .in2  (lmq2_pcx_pkt[`LMQ_WIDTH-1:0]),
+  .in3  (lmq3_pcx_pkt[`LMQ_WIDTH-1:0]),
+  .sel0 (ld_pcx_rq_sel[0]),  
+  .sel1   (ld_pcx_rq_sel[1]),
+  .sel2 (ld_pcx_rq_sel[2]),  
+  .sel3   (ld_pcx_rq_sel[3]),
+  .dout (load_pcx_pkt[`LMQ_WIDTH-1:0])
+);
+`endif
+
+assign  lsu_pref_pcx_req = load_pcx_pkt[`LMQ_PREF] ;
+
+// Choose data to src for fill/bypass.
+// E-stage muxing : required for fills specifically.
+
+   assign lmq0_ldd_vld =   lmq0_pcx_pkt[`LMQ_RD2_VLD];
+`ifdef FPGA_SYN_1THREAD
+   assign lmq1_ldd_vld =   1'b0;
+   assign lmq2_ldd_vld =   1'b0;
+   assign lmq3_ldd_vld =   1'b0;
+`else
+   assign lmq1_ldd_vld =   lmq1_pcx_pkt[`LMQ_RD2_VLD];
+   assign lmq2_ldd_vld =   lmq2_pcx_pkt[`LMQ_RD2_VLD];
+   assign lmq3_ldd_vld =   lmq3_pcx_pkt[`LMQ_RD2_VLD];
+`endif
+
+   assign lmq0_pcx_pkt_addr[10:0] =  lmq0_pcx_pkt[`LMQ_AD_LO + 10 :`LMQ_AD_LO];
+`ifdef FPGA_SYN_1THREAD
+   assign lmq1_pcx_pkt_addr[10:0] =  11'b0;
+   assign lmq2_pcx_pkt_addr[10:0] =  11'b0;
+   assign lmq3_pcx_pkt_addr[10:0] =  11'b0;
+`else
+   assign lmq1_pcx_pkt_addr[10:0] =  lmq1_pcx_pkt[`LMQ_AD_LO + 10 :`LMQ_AD_LO];
+   assign lmq2_pcx_pkt_addr[10:0] =  lmq2_pcx_pkt[`LMQ_AD_LO + 10 :`LMQ_AD_LO];
+   assign lmq3_pcx_pkt_addr[10:0] =  lmq3_pcx_pkt[`LMQ_AD_LO + 10 :`LMQ_AD_LO];
+`endif
+
+   assign lmq0_ld_rq_type[2:0] = lmq0_pcx_pkt[`LMQ_RQ_HI:`LMQ_RQ_LO];
+`ifdef FPGA_SYN_1THREAD
+   assign lmq1_ld_rq_type[2:0] = 3'b0;
+   assign lmq2_ld_rq_type[2:0] = 3'b0;
+   assign lmq3_ld_rq_type[2:0] = 3'b0;
+`else
+   assign lmq1_ld_rq_type[2:0] = lmq1_pcx_pkt[`LMQ_RQ_HI:`LMQ_RQ_LO];
+   assign lmq2_ld_rq_type[2:0] = lmq2_pcx_pkt[`LMQ_RQ_HI:`LMQ_RQ_LO];
+   assign lmq3_ld_rq_type[2:0] = lmq3_pcx_pkt[`LMQ_RQ_HI:`LMQ_RQ_LO];
+`endif
+   
+    assign lmq0_l2fill_fpld =  lmq0_pcx_pkt[`LMQ_FPLD];
+`ifdef FPGA_SYN_1THREAD
+    assign lmq1_l2fill_fpld =  1'b0;
+    assign lmq2_l2fill_fpld =  1'b0;
+    assign lmq3_l2fill_fpld =  1'b0;
+`else
+    assign lmq1_l2fill_fpld =  lmq1_pcx_pkt[`LMQ_FPLD];
+    assign lmq2_l2fill_fpld =  lmq2_pcx_pkt[`LMQ_FPLD];
+    assign lmq3_l2fill_fpld =  lmq3_pcx_pkt[`LMQ_FPLD];
+`endif
+/*
+   wire    lsu_l2fill_fpld_e;
+ 
+mux4ds  #(44) lmq_dthrd_sel1 (
+  .in0  ({lmq0_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], lmq0_pcx_pkt[`LMQ_NC],
+          lmq0_pcx_pkt[`LMQ_FPLD],lmq0_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in1  ({lmq1_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], lmq1_pcx_pkt[`LMQ_NC],
+          lmq1_pcx_pkt[`LMQ_FPLD],lmq1_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in2  ({lmq2_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], lmq2_pcx_pkt[`LMQ_NC],
+          lmq2_pcx_pkt[`LMQ_FPLD],lmq2_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in3  ({lmq3_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], lmq3_pcx_pkt[`LMQ_NC],
+          lmq3_pcx_pkt[`LMQ_FPLD],lmq3_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .sel0 (dfq_byp_sel[0]),  
+  .sel1 (dfq_byp_sel[1]),
+  .sel2 (dfq_byp_sel[2]),  
+  .sel3 (dfq_byp_sel[3]),
+  .dout ({lmq_ld_addr[39:0], lsu_ncache_ld_e,
+          lsu_l2fill_fpld_e, lsu_byp_misc_sz_e[1:0]})
+);
+*/
+
+   assign  lmq0_ncache_ld =   lmq0_pcx_pkt[`LMQ_NC];
+`ifdef FPGA_SYN_1THREAD
+   assign  lmq1_ncache_ld =   1'b0;
+   assign  lmq2_ncache_ld =   1'b0;
+   assign  lmq3_ncache_ld =   1'b0;
+`else
+   assign  lmq1_ncache_ld =   lmq1_pcx_pkt[`LMQ_NC];
+   assign  lmq2_ncache_ld =   lmq2_pcx_pkt[`LMQ_NC];
+   assign  lmq3_ncache_ld =   lmq3_pcx_pkt[`LMQ_NC];
+`endif
+   
+`ifdef FPGA_SYN_1THREAD
+   assign lmq_ld_addr[39:0] =  lmq0_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO];
+   assign lsu_byp_misc_sz_e[1:0] = lmq0_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO];
+   assign lmq_ld_rd1[4:0] = lmq0_pcx_pkt[`LMQ_RD1_HI: `LMQ_RD1_LO];
+   assign lsu_l2fill_bendian_m = lmq0_pcx_pkt[`LMQ_BIGEND];
+   assign lsu_l2fill_sign_extend_m = lmq0_pcx_pkt[`LMQ_SIGNEXT];
+`else
+mux4ds  #(42) lmq_dthrd_sel1 (
+  .in0  ({lmq0_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO],
+          lmq0_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in1  ({lmq1_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], 
+          lmq1_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in2  ({lmq2_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], 
+          lmq2_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .in3  ({lmq3_pcx_pkt[`LMQ_AD_HI:`LMQ_AD_LO], 
+          lmq3_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO]}),
+  .sel0 (dfq_byp_sel[0]),  
+  .sel1 (dfq_byp_sel[1]),
+  .sel2 (dfq_byp_sel[2]),  
+  .sel3 (dfq_byp_sel[3]),
+  .dout ({lmq_ld_addr[39:0], lsu_byp_misc_sz_e[1:0]})
+);
+   
+// POR
+// M-stage muxing : require for alignment and bypassing to exu.
+// flopped then used in qctl/dctl G-stage  
+// lmq_ld_rd1 to lsu_qctl
+// others to lsu_dctl
+  
+// M-Stage Muxing 
+
+mux4ds  #(7) lmq_dthrd_sel2 (
+  .in0  ({lmq0_pcx_pkt[`LMQ_RD1_HI: `LMQ_RD1_LO],lmq0_pcx_pkt[`LMQ_BIGEND],
+    lmq0_pcx_pkt[`LMQ_SIGNEXT]}),
+  .in1  ({lmq1_pcx_pkt[`LMQ_RD1_HI: `LMQ_RD1_LO],lmq1_pcx_pkt[`LMQ_BIGEND],
+    lmq1_pcx_pkt[`LMQ_SIGNEXT]}),
+  .in2  ({lmq2_pcx_pkt[`LMQ_RD1_HI: `LMQ_RD1_LO],lmq2_pcx_pkt[`LMQ_BIGEND],
+    lmq2_pcx_pkt[`LMQ_SIGNEXT]}),
+  .in3  ({lmq3_pcx_pkt[`LMQ_RD1_HI: `LMQ_RD1_LO],lmq3_pcx_pkt[`LMQ_BIGEND],
+    lmq3_pcx_pkt[`LMQ_SIGNEXT]}),
+  .sel0 (lmq_byp_misc_sel[0]),  
+  .sel1 (lmq_byp_misc_sel[1]),
+  .sel2 (lmq_byp_misc_sel[2]),  
+  .sel3 (lmq_byp_misc_sel[3]),
+  .dout ({lmq_ld_rd1[4:0],lsu_l2fill_bendian_m,lsu_l2fill_sign_extend_m})
+);
+`endif
+
+   assign  lmq0_byp_misc_sz[1:0] = lmq0_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO];
+`ifdef FPGA_SYN_1THREAD
+   assign  lmq1_byp_misc_sz[1:0] = 2'b0;
+   assign  lmq2_byp_misc_sz[1:0] = 2'b0;
+   assign  lmq3_byp_misc_sz[1:0] = 2'b0;
+`else
+   assign  lmq1_byp_misc_sz[1:0] = lmq1_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO];
+   assign  lmq2_byp_misc_sz[1:0] = lmq2_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO];
+   assign  lmq3_byp_misc_sz[1:0] = lmq3_pcx_pkt[`LMQ_SZ_HI: `LMQ_SZ_LO];
+`endif
+   
+
+//assign  lmq_pcx_pkt_addr[10:0] = lmq_ld_addr[10:0] ;
+ 
+   
+   wire [28:0] dtag_wdata_e;
+   
+assign  dtag_wdata_e[28:0] = 
+        ~lsu_dfq_ld_vld ?
+        lsu_diagnstc_wr_data_e[29:1] : lmq_ld_addr[39:11] ;
+
+// Parity Generation for Tag. Match with macro.
+wire    dtag_wr_parity ;
+//assign  dtag_wr_parity = ^dtag_wdata_e[28:0] ;   
+//assign  dtag_wdata_e[29] = 
+//        ~lsu_dfq_ld_vld ?
+//        lsu_diagnstc_dtagv_prty_invrt_e^dtag_wr_parity : dtag_wr_parity ;
+
+   wire dtag_wr_parity_7_0, dtag_wr_parity_15_8, 
+        dtag_wr_parity_23_16,  dtag_wr_parity_28_24;
+
+   assign dtag_wr_parity_7_0  =  ^dtag_wdata_e[7:0];   //zzpar8
+   assign dtag_wr_parity_15_8 =  ^dtag_wdata_e[15:8];  //zzpar8
+   assign dtag_wr_parity_23_16 = ^dtag_wdata_e[23:16]; //zzpar8
+   assign dtag_wr_parity_28_24 = ^dtag_wdata_e[28:24]; //zzpar8
+
+   wire   dtag_wr_parity_28_24_with_invrt;
+   
+   assign dtag_wr_parity_28_24_with_invrt = 
+           (^dtag_wdata_e[28:24]) ^ lsu_diagnstc_dtagv_prty_invrt_e; //zzpar8
+   
+          
+   wire dtag_wr_parity_7_0_m, dtag_wr_parity_15_8_m, 
+        dtag_wr_parity_23_16_m,  dtag_wr_parity_28_24_m;
+   wire lsu_dfq_ld_vld_m;
+   wire dtag_wr_parity_28_24_with_invrt_m;
+
+
+// 12/12/03 : Change for Macrotest. I didn't mention
+// these 4 bits ! Pls check for a max time violation.
+wire	dtag_wr_parity_7_0_din, dtag_wr_parity_15_8_din ;  
+wire	dtag_wr_parity_23_16_din, dtag_wr_parity_28_24_din ;
+assign	dtag_wr_parity_7_0_din = 
+sehold ? dtag_wr_parity_7_0_m : dtag_wr_parity_7_0 ;
+assign	dtag_wr_parity_15_8_din = 
+sehold ? dtag_wr_parity_15_8_m : dtag_wr_parity_15_8 ;
+assign	dtag_wr_parity_23_16_din = 
+sehold ? dtag_wr_parity_23_16_m : dtag_wr_parity_23_16 ;
+assign	dtag_wr_parity_28_24_din = 
+sehold ? dtag_wr_parity_28_24_m : dtag_wr_parity_28_24 ;
+   
+dff_s #(6) tag_parity_m (
+     .din ({dtag_wr_parity_7_0_din, dtag_wr_parity_15_8_din,
+            dtag_wr_parity_23_16_din, dtag_wr_parity_28_24_din,
+            lsu_dfq_ld_vld,   dtag_wr_parity_28_24_with_invrt}),
+     .q   ({dtag_wr_parity_7_0_m, dtag_wr_parity_15_8_m,
+            dtag_wr_parity_23_16_m, dtag_wr_parity_28_24_m,
+            lsu_dfq_ld_vld_m, dtag_wr_parity_28_24_with_invrt_m}),
+     .clk  (clk),
+     .se   (1'b0),     .si (),          .so ()
+);
+   
+assign dtag_wr_parity = dtag_wr_parity_7_0_m ^ dtag_wr_parity_15_8_m ^
+                        dtag_wr_parity_23_16_m ^ dtag_wr_parity_28_24_m;
+
+   wire dtag_wr_parity_with_invrt;
+  
+assign dtag_wr_parity_with_invrt = 
+       dtag_wr_parity_7_0_m ^ dtag_wr_parity_15_8_m ^
+       dtag_wr_parity_23_16_m ^ dtag_wr_parity_28_24_with_invrt_m;
+   
+wire [29:0] dtag_wdata_m;
+
+// 12/12/03 : Change for Macrotest.
+assign dtag_wdata_m[29] = 
+        ~(lsu_dfq_ld_vld_m | sehold) ?
+        dtag_wr_parity_with_invrt : dtag_wr_parity ; 
+   
+// 12/12/03 : Change for Macrotest.
+wire [28:0] dtag_wdata_e_din ;
+assign	dtag_wdata_e_din[28:0] = 
+sehold ? dtag_wdata_m[28:0] : dtag_wdata_e[28:0] ;
+
+dff_s  #(29) tag_stgm (
+        .din  (dtag_wdata_e_din[28:0]), 
+        .q    (dtag_wdata_m[28:0]),
+        .clk  (clk),
+        .se   (1'b0),     .si (),          .so ()
+        );
+   
+   assign      lsu_error_pa_m[28:0] =  dtag_wdata_m[28:0];
+   
+
+//=================================================================================================
+//    RS3 DATA ALIGNMENT FOR CAS
+//=================================================================================================
+
+wire  [7:0] rs3_byte0, rs3_byte1, rs3_byte2, rs3_byte3 ; 
+wire  [7:0] rs3_byte4, rs3_byte5, rs3_byte6, rs3_byte7 ; 
+wire  [63:0]  atm_byte_g ;
+wire  [63:0]  st_rs3_data_m,st_rs3_data_g ;
+
+dff_s  #(64) rs3_stgm (
+        .din  (exu_lsu_rs3_data_e[63:0]), 
+        .q    (st_rs3_data_m[63:0]),
+        .clk  (clk),
+        .se   (1'b0),     .si (),          .so ()
+        ); 
+
+// rm (along with spu).
+//assign  lsu_spu_rsrv_data_m[13:0] =
+//  {st_rs3_data_m[27:23],st_rs3_data_m[21:16],st_rs3_data_m[8:6]} ;
+
+dff_s  #(64) rs3_stgg (
+        .din  (st_rs3_data_m[63:0]), 
+        .q    (st_rs3_data_g[63:0]),
+        .clk  (clk),
+        .se   (1'b0),     .si (),          .so ()
+        ); 
+
+assign  rs3_byte0[7:0] = st_rs3_data_g[7:0] ;
+assign  rs3_byte1[7:0] = st_rs3_data_g[15:8] ;
+assign  rs3_byte2[7:0] = st_rs3_data_g[23:16] ;
+assign  rs3_byte3[7:0] = st_rs3_data_g[31:24] ;
+assign  rs3_byte4[7:0] = st_rs3_data_g[39:32] ;
+assign  rs3_byte5[7:0] = st_rs3_data_g[47:40] ;
+assign  rs3_byte6[7:0] = st_rs3_data_g[55:48] ;
+assign  rs3_byte7[7:0] = st_rs3_data_g[63:56] ;
+
+//assign  atm_byte_g[7:0] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte0[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte3[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte7[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_7_0 (
+    .in0 (rs3_byte0[7:0]),
+    .in1 (rs3_byte3[7:0]),
+    .in2 (rs3_byte7[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[7:0]));
+                          
+   
+//assign  atm_byte_g[15:8] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte1[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte2[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte6[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_15_8 (
+    .in0 (rs3_byte1[7:0]),
+    .in1 (rs3_byte2[7:0]),
+    .in2 (rs3_byte6[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[15:8]));
+   
+//assign  atm_byte_g[23:16] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte2[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte1[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte5[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_23_16 (
+    .in0 (rs3_byte2[7:0]),
+    .in1 (rs3_byte1[7:0]),
+    .in2 (rs3_byte5[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[23:16]));
+   
+//assign  atm_byte_g[31:24] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte3[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte0[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte4[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_31_24 (
+    .in0 (rs3_byte3[7:0]),
+    .in1 (rs3_byte0[7:0]),
+    .in2 (rs3_byte4[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[31:24]));
+   
+//assign  atm_byte_g[39:32] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte4[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte0[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte3[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_39_32 (
+    .in0 (rs3_byte4[7:0]),
+    .in1 (rs3_byte0[7:0]),
+    .in2 (rs3_byte3[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[39:32]));
+
+//assign  atm_byte_g[47:40] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte5[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte1[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte2[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_47_40(
+    .in0 (rs3_byte5[7:0]),
+    .in1 (rs3_byte1[7:0]),
+    .in2 (rs3_byte2[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[47:40]));
+   
+//assign  atm_byte_g[55:48] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte6[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte2[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte1[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_55_48(
+    .in0 (rs3_byte6[7:0]),
+    .in1 (rs3_byte2[7:0]),
+    .in2 (rs3_byte1[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[55:48]));
+   
+//assign  atm_byte_g[63:56] =
+//lsu_atomic_pkt2_bsel_g[2] ? rs3_byte7[7:0] :
+//  lsu_atomic_pkt2_bsel_g[1] ? rs3_byte3[7:0] :
+//    lsu_atomic_pkt2_bsel_g[0] ? rs3_byte0[7:0] : 8'bxxxx_xxxx ;
+
+mux3ds #(8) mx_atm_byte_g_63_56 (
+    .in0 (rs3_byte7[7:0]),
+    .in1 (rs3_byte3[7:0]),
+    .in2 (rs3_byte0[7:0]),
+    .sel0(lsu_atomic_pkt2_bsel_g[2]),
+    .sel1(lsu_atomic_pkt2_bsel_g[1]),
+    .sel2(lsu_atomic_pkt2_bsel_g[0]),
+    .dout(atm_byte_g[63:56]));
+        
+//=================================================================================================
+//    STB/LDXA DATA BYPASSING
+//=================================================================================================
+
+// Add STB to load bypass data flops.
+// Attempt is made to bypass data in G-stage for load. If not
+// possible then flop data and wait for next available bubble.
+// Once bypass occurs then load can be considered resolved.
+// Load Full Raw bypassing does not have to use DFQ.
+
+// ldxa data will reside in bypass flops until an opportunity
+// is available to write to irf. ldxa's must write to lmq
+// in order to provide information such as rd to irf.
+
+// ** The two conditions are mutually exclusive. **
+
+// lsu_local_ldxa_data_w2 w/ lsu_misc_rdata_w2 for all 4 threads
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [2:0]  lmq_byp_ldxa_sel0_1hot ;
+assign  lmq_byp_ldxa_sel0_1hot[0]  =  lmq_byp_ldxa_sel0[0] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel0_1hot[1]  =  lmq_byp_ldxa_sel0[1] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel0_1hot[2]  =  lmq_byp_ldxa_sel0[2] |  rst_tri_en;
+
+
+// THREAD 0
+mux3ds  #(64) ldbyp0_ldxa_mx (
+  .in0  (ifu_lsu_ldxa_data_w2[63:0]), // ifu-ldxa bypass data
+  //.in1  (tlu_lsu_ldxa_data_w2[63:0]), // tlu-ldxa bypass data
+  .in1  (spu_lsu_ldxa_data_w2[63:0]), // spu-ldxa bypass data
+  .in2  (lsu_misc_rdata_w2[63:0]),    // local asi bypass data
+  .sel0 (lmq_byp_ldxa_sel0_1hot[0]),  
+  //.sel1 (lmq_byp_ldxa_sel0[1]),
+  .sel1 (lmq_byp_ldxa_sel0_1hot[1]),
+  .sel2 (lmq_byp_ldxa_sel0_1hot[2]),
+  .dout (lmq0_bypass_ldxa_data[63:0])
+);
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [3:0]  lmq_byp_data_sel0_1hot ;
+assign  lmq_byp_data_sel0_1hot[0]  =  lmq_byp_data_sel0[0] ;
+assign  lmq_byp_data_sel0_1hot[1]  =  lmq_byp_data_sel0[1] ;
+assign  lmq_byp_data_sel0_1hot[2]  =  lmq_byp_data_sel0[2] ;
+assign  lmq_byp_data_sel0_1hot[3]  =  lmq_byp_data_sel0[3] ;
+
+wire 	[63:0]	lmq0_bypass_misc_data ;
+mux4ds  #(64) ldbyp0_data_mx (
+  .in0  (stb_rdata_ramd[63:0]),   // stb bypass data
+  .in1  (exu_lsu_rs3_data_e[63:0]), // rs3 data
+  .in2  (atm_byte_g[63:0]),   // cas formatted data
+  .in3  (lmq0_bypass_ldxa_data[63:0]),  // ldxa bypass data
+  .sel0 (lmq_byp_data_sel0_1hot[0]),  
+  .sel1 (lmq_byp_data_sel0_1hot[1]),
+  .sel2 (lmq_byp_data_sel0_1hot[2]),
+  .sel3 (lmq_byp_data_sel0_1hot[3]),
+  .dout (lmq0_bypass_misc_data[63:0])
+);
+
+
+// 2:1 mux for additional data bus from tlu.
+// Grape : merge into mux-flop.
+mux2ds  #(64) ldbyp0_fmx (
+  .in0  (lmq0_bypass_misc_data[63:0]),
+  .in1  (tlu_lsu_int_ldxa_data_w2[63:0]),
+  .sel0 (~lmq_byp_data_fmx_sel[0]),  
+  .sel1 (lmq_byp_data_fmx_sel[0]),
+  .dout (lmq0_bypass_data_in[63:0])
+);
+
+/*
+dffe_s  #(64) ldbyp0_data_ff (
+        .din    (lmq0_bypass_data_in[63:0]),
+        .q      (lmq0_bypass_data[63:0]),
+        .en     (lmq_byp_data_en_w2[0]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+*/
+wire ldbyp0_data_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf ldbyp0_data_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_byp_data_en_w2[0]),
+                .tmb_l  (~se),
+                .clk    (ldbyp0_data_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) ldbyp0_data_ff (
+        .din    (lmq0_bypass_data_in[63:0]),
+        .q      (lmq0_bypass_data[63:0]),
+        .en (~(~lmq_byp_data_en_w2[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s  #(64) ldbyp0_data_ff (
+        .din    (lmq0_bypass_data_in[63:0]),
+        .q      (lmq0_bypass_data[63:0]),
+        .clk    (ldbyp0_data_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+
+`ifdef FPGA_SYN_1THREAD
+`else
+   
+// THREAD 1
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [2:0]  lmq_byp_ldxa_sel1_1hot ;
+assign  lmq_byp_ldxa_sel1_1hot[0]  =  lmq_byp_ldxa_sel1[0] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel1_1hot[1]  =  lmq_byp_ldxa_sel1[1] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel1_1hot[2]  =  lmq_byp_ldxa_sel1[2] |  rst_tri_en;
+
+
+mux3ds  #(64) ldbyp1_ldxa_mx (
+        .in0    (ifu_lsu_ldxa_data_w2[63:0]),   // ifu-ldxa bypass data
+        //.in1    (tlu_lsu_ldxa_data_w2[63:0]),   // tlu-ldxa bypass data
+  	.in1  	(spu_lsu_ldxa_data_w2[63:0]), // spu-ldxa bypass data
+  	.in2  	(lsu_misc_rdata_w2[63:0]),// local asi bypass data
+        .sel0   (lmq_byp_ldxa_sel1_1hot[0]),
+        //.sel1   (lmq_byp_ldxa_sel1[1]),
+  	.sel1 	(lmq_byp_ldxa_sel1_1hot[1]),
+  	.sel2 	(lmq_byp_ldxa_sel1_1hot[2]),
+        .dout   (lmq1_bypass_ldxa_data[63:0])
+);     
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [3:0]  lmq_byp_data_sel1_1hot ;
+assign  lmq_byp_data_sel1_1hot[0]  =  lmq_byp_data_sel1[0] ;
+assign  lmq_byp_data_sel1_1hot[1]  =  lmq_byp_data_sel1[1] ;
+assign  lmq_byp_data_sel1_1hot[2]  =  lmq_byp_data_sel1[2] ;
+assign  lmq_byp_data_sel1_1hot[3]  =  lmq_byp_data_sel1[3] ;
+
+
+wire 	[63:0]	lmq1_bypass_misc_data ;
+mux4ds  #(64) ldbyp1_data_mx (
+  .in0  (stb_rdata_ramd[63:0]),   // stb bypass data
+  .in1  (exu_lsu_rs3_data_e[63:0]), // rs3 data
+  .in2  (atm_byte_g[63:0]),   // cas formatted data
+  .in3  (lmq1_bypass_ldxa_data[63:0]),  // ldxa bypass data
+  .sel0 (lmq_byp_data_sel1_1hot[0]),  
+  .sel1 (lmq_byp_data_sel1_1hot[1]),
+  .sel2 (lmq_byp_data_sel1_1hot[2]),
+  .sel3 (lmq_byp_data_sel1_1hot[3]),
+  .dout (lmq1_bypass_misc_data[63:0])
+);
+
+// 2:1 mux for additional data bus from tlu.
+// Grape : merge into mux-flop.
+mux2ds  #(64) ldbyp1_fmx (
+  .in0  (lmq1_bypass_misc_data[63:0]),
+  .in1  (tlu_lsu_int_ldxa_data_w2[63:0]),
+  .sel0 (~lmq_byp_data_fmx_sel[1]),  
+  .sel1 (lmq_byp_data_fmx_sel[1]),
+  .dout (lmq1_bypass_data_in[63:0])
+);
+
+/*
+dffe_s  #(64) ldbyp1_data_ff (
+        .din    (lmq1_bypass_data_in[63:0]),
+        .q      (lmq1_bypass_data[63:0]),
+        .en     (lmq_byp_data_en_w2[1]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+*/
+wire ldbyp1_data_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf ldbyp1_data_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_byp_data_en_w2[1]),
+                .tmb_l  (~se),
+                .clk    (ldbyp1_data_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) ldbyp1_data_ff (
+        .din    (lmq1_bypass_data_in[63:0]),
+        .q      (lmq1_bypass_data[63:0]),
+        .en (~(~lmq_byp_data_en_w2[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s  #(64) ldbyp1_data_ff (
+        .din    (lmq1_bypass_data_in[63:0]),
+        .q      (lmq1_bypass_data[63:0]),
+        .clk    (ldbyp1_data_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+   
+// THREAD 2
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [2:0]  lmq_byp_ldxa_sel2_1hot ;
+assign  lmq_byp_ldxa_sel2_1hot[0]  =  lmq_byp_ldxa_sel2[0] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel2_1hot[1]  =  lmq_byp_ldxa_sel2[1] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel2_1hot[2]  =  lmq_byp_ldxa_sel2[2] |  rst_tri_en;
+
+
+mux3ds  #(64) ldbyp2_data_mx (
+        .in0    (ifu_lsu_ldxa_data_w2[63:0]),   // ifu-ldxa bypass data
+        //.in1    (tlu_lsu_ldxa_data_w2[63:0]),   // tlu-ldxa bypass data
+  	.in1  	(spu_lsu_ldxa_data_w2[63:0]), // spu-ldxa bypass data
+  	.in2  	(lsu_misc_rdata_w2[63:0]),// local asi bypass data
+        .sel0   (lmq_byp_ldxa_sel2_1hot[0]),
+        //.sel1   (lmq_byp_ldxa_sel2[1]),
+  	.sel1 (lmq_byp_ldxa_sel2_1hot[1]),
+  	.sel2 (lmq_byp_ldxa_sel2_1hot[2]),
+        .dout   (lmq2_bypass_ldxa_data[63:0])
+);     
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [3:0]  lmq_byp_data_sel2_1hot ;
+assign  lmq_byp_data_sel2_1hot[0]  =  lmq_byp_data_sel2[0] ;
+assign  lmq_byp_data_sel2_1hot[1]  =  lmq_byp_data_sel2[1] ;
+assign  lmq_byp_data_sel2_1hot[2]  =  lmq_byp_data_sel2[2] ;
+assign  lmq_byp_data_sel2_1hot[3]  =  lmq_byp_data_sel2[3] ;
+
+
+wire 	[63:0]	lmq2_bypass_misc_data ;
+mux4ds  #(64) ldbyp2_ldxa_mx (
+  .in0  (stb_rdata_ramd[63:0]),   // stb bypass data
+  .in1  (exu_lsu_rs3_data_e[63:0]), // rs3 data
+  .in2  (atm_byte_g[63:0]),   // cas formatted data
+  .in3  (lmq2_bypass_ldxa_data[63:0]),  // ldxa bypass data
+  .sel0 (lmq_byp_data_sel2_1hot[0]),  
+  .sel1 (lmq_byp_data_sel2_1hot[1]),
+  .sel2 (lmq_byp_data_sel2_1hot[2]),
+  .sel3 (lmq_byp_data_sel2_1hot[3]),
+  .dout (lmq2_bypass_misc_data[63:0])
+);
+
+// 2:1 mux for additional data bus from tlu.
+// Grape : merge into mux-flop.
+mux2ds  #(64) ldbyp2_fmx (
+  .in0  (lmq2_bypass_misc_data[63:0]),
+  .in1  (tlu_lsu_int_ldxa_data_w2[63:0]),
+  .sel0 (~lmq_byp_data_fmx_sel[2]),  
+  .sel1 (lmq_byp_data_fmx_sel[2]),
+  .dout (lmq2_bypass_data_in[63:0])
+);
+
+/*
+dffe_s  #(64) ldbyp2_data_ff (
+        .din    (lmq2_bypass_data_in[63:0]),
+        .q      (lmq2_bypass_data[63:0]),
+        .en     (lmq_byp_data_en_w2[2]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+*/
+wire ldbyp2_data_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf ldbyp2_data_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_byp_data_en_w2[2]),
+                .tmb_l  (~se),
+                .clk    (ldbyp2_data_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) ldbyp2_data_ff (
+        .din    (lmq2_bypass_data_in[63:0]),
+        .q      (lmq2_bypass_data[63:0]),
+        .en (~(~lmq_byp_data_en_w2[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s  #(64) ldbyp2_data_ff (
+        .din    (lmq2_bypass_data_in[63:0]),
+        .q      (lmq2_bypass_data[63:0]),
+        .clk    (ldbyp2_data_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+   
+// THREAD 3
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [2:0]  lmq_byp_ldxa_sel3_1hot ;
+assign  lmq_byp_ldxa_sel3_1hot[0]  =  lmq_byp_ldxa_sel3[0] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel3_1hot[1]  =  lmq_byp_ldxa_sel3[1] & ~rst_tri_en;
+assign  lmq_byp_ldxa_sel3_1hot[2]  =  lmq_byp_ldxa_sel3[2] |  rst_tri_en;
+
+
+mux3ds  #(64) ldbyp3_data_mx (
+        .in0    (ifu_lsu_ldxa_data_w2[63:0]),   // ifu-ldxa bypass data
+        //.in1    (tlu_lsu_ldxa_data_w2[63:0]),   // tlu-ldxa bypass data
+  	.in1  	(spu_lsu_ldxa_data_w2[63:0]), // spu-ldxa bypass data
+  	.in2  	(lsu_misc_rdata_w2[63:0]),// local asi bypass data
+        .sel0   (lmq_byp_ldxa_sel3_1hot[0]),
+        //.sel1   (lmq_byp_ldxa_sel3[1]),
+  	.sel1 	(lmq_byp_ldxa_sel3_1hot[1]),
+  	.sel2 	(lmq_byp_ldxa_sel3_1hot[2]),
+        .dout   (lmq3_bypass_ldxa_data[63:0])
+);     
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [3:0]  lmq_byp_data_sel3_1hot ;
+assign  lmq_byp_data_sel3_1hot[0]  =  lmq_byp_data_sel3[0] ;
+assign  lmq_byp_data_sel3_1hot[1]  =  lmq_byp_data_sel3[1] ;
+assign  lmq_byp_data_sel3_1hot[2]  =  lmq_byp_data_sel3[2] ;
+assign  lmq_byp_data_sel3_1hot[3]  =  lmq_byp_data_sel3[3] ;
+
+
+wire 	[63:0]	lmq3_bypass_misc_data ;
+mux4ds  #(64) ldbyp3_ldxa_mx (
+  .in0  (stb_rdata_ramd[63:0]),   // stb bypass data
+  .in1  (exu_lsu_rs3_data_e[63:0]), // rs3 data
+  .in2  (atm_byte_g[63:0]),   // cas formatted data
+  .in3  (lmq3_bypass_ldxa_data[63:0]),  // ldxa bypass data
+  .sel0 (lmq_byp_data_sel3_1hot[0]),  
+  .sel1 (lmq_byp_data_sel3_1hot[1]),
+  .sel2 (lmq_byp_data_sel3_1hot[2]),
+  .sel3 (lmq_byp_data_sel3_1hot[3]),
+  .dout (lmq3_bypass_misc_data[63:0])
+);
+
+// 2:1 mux for additional data bus from tlu.
+// Grape : merge into mux-flop.
+mux2ds  #(64) ldbyp3_fmx (
+  .in0  (lmq3_bypass_misc_data[63:0]),
+  .in1  (tlu_lsu_int_ldxa_data_w2[63:0]),
+  .sel0 (~lmq_byp_data_fmx_sel[3]),  
+  .sel1 (lmq_byp_data_fmx_sel[3]),
+  .dout (lmq3_bypass_data_in[63:0])
+);
+
+/*
+dffe_s  #(64) ldbyp3_data_ff (
+        .din    (lmq3_bypass_data_in[63:0]),
+        .q      (lmq3_bypass_data[63:0]),
+        .en     (lmq_byp_data_en_w2[3]), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+*/
+wire ldbyp3_data_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf ldbyp3_data_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lmq_byp_data_en_w2[3]),
+                .tmb_l  (~se),
+                .clk    (ldbyp3_data_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) ldbyp3_data_ff (
+        .din    (lmq3_bypass_data_in[63:0]),
+        .q      (lmq3_bypass_data[63:0]),
+        .en (~(~lmq_byp_data_en_w2[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s  #(64) ldbyp3_data_ff (
+        .din    (lmq3_bypass_data_in[63:0]),
+        .q      (lmq3_bypass_data[63:0]),
+        .clk    (ldbyp3_data_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+   
+`endif
+
+`ifdef FPGA_SYN_1THREAD
+  assign cas_pkt2_data[63:0] = lmq0_bypass_data[63:0];
+  assign tlb_st_data[63:0] = lmq0_bypass_data[63:0];
+`else
+// This can be merged with above mux !!!!
+mux4ds  #(64) ld_byp_cas_mx (
+  .in0  (lmq0_bypass_data[63:0]),
+  .in1  (lmq1_bypass_data[63:0]),
+  .in2  (lmq2_bypass_data[63:0]),
+  .in3  (lmq3_bypass_data[63:0]),
+  .sel0 (ld_pcx_rq_sel[0]),  
+  .sel1   (ld_pcx_rq_sel[1]),
+  .sel2 (ld_pcx_rq_sel[2]),  
+  .sel3   (ld_pcx_rq_sel[3]),
+  .dout (cas_pkt2_data[63:0])
+);
+
+// Can this be merged with above muxes ?
+mux4ds  #(64) tlb_st_mx (
+  .in0  (lmq0_bypass_data[63:0]),
+  .in1  (lmq1_bypass_data[63:0]),
+  .in2  (lmq2_bypass_data[63:0]),
+  .in3  (lmq3_bypass_data[63:0]),
+  .sel0 (lsu_tlb_st_sel_m[0]),  
+  .sel1   (lsu_tlb_st_sel_m[1]),
+  .sel2 (lsu_tlb_st_sel_m[2]),  
+  .sel3   (lsu_tlb_st_sel_m[3]),
+  .dout (tlb_st_data[63:0])
+);
+`endif
+
+/*mux4ds  #(64) tlb_st_mx (
+  .in0  (lmq0_bypass_data[63:0]),
+  .in1  (lmq1_bypass_data[63:0]),
+  .in2  (lmq2_bypass_data[63:0]),
+  .in3  (lmq3_bypass_data[63:0]),
+  .sel0 (lsu_tlb_st_sel_g[0]),  
+  .sel1   (lsu_tlb_st_sel_g[1]),
+  .sel2 (lsu_tlb_st_sel_g[2]),  
+  .sel3   (lsu_tlb_st_sel_g[3]),
+  .dout (tlb_st_data[63:0])
+);*/
+
+wire	[63:0] tlb_st_data_d1 ;
+dff_s  #(64) std_d1 (
+        .din    (tlb_st_data[63:0]), 
+        .q      (tlb_st_data_d1[63:0]),
+        .clk    (clk),
+        .se     (1'b0),     .si (),          .so ()
+        );
+
+// Begin - Bug3487. 
+
+
+wire asi_data_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf asid_clkbuf (
+                .rclk   (clk),
+                .enb_l  (lsu_ifu_asi_data_en_l),
+                .tmb_l  (~se),
+                .clk    (asi_data_clk)
+                ) ;   
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) ifu_std_d1 (
+        .din    (tlb_st_data[47:0]), 
+        .q      (lsu_ifu_stxa_data[47:0]),
+        .en (~(lsu_ifu_asi_data_en_l)), .clk(clk),
+        .se     (1'b0),     .si (),          .so ()
+        );
+`else
+dff_s  #(48) ifu_std_d1 (
+        .din    (tlb_st_data[47:0]), 
+        .q      (lsu_ifu_stxa_data[47:0]),
+        .clk    (asi_data_clk),
+        .se     (1'b0),     .si (),          .so ()
+        );
+`endif
+
+// select is now a stage earlier, which should be
+// fine as selects stay constant.
+//assign  lsu_ifu_stxa_data[47:0] = tlb_st_data_d1[47:0] ;
+
+// End - Bug3487. 
+
+
+//wire    [3:0]   lsu_diag_access_sel_d1 ;
+
+//dff #(4)  diagsel_stgd1 (
+//        .din    (lsu_diag_access_sel[3:0]),
+//        .q      (lsu_diag_access_sel_d1[3:0]),
+//        .clk    (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+//mux4ds  #(64) diag_st_mx (
+//  .in0  (lmq0_bypass_data[63:0]),
+//  .in1  (lmq1_bypass_data[63:0]),
+//  .in2  (lmq2_bypass_data[63:0]),
+//  .in3  (lmq3_bypass_data[63:0]),
+//  .sel0 (lsu_diag_access_sel_d1[0]),  
+//  .sel1 (lsu_diag_access_sel_d1[1]),
+//  .sel2 (lsu_diag_access_sel_d1[2]),  
+//  .sel3 (lsu_diag_access_sel_d1[3]),
+//  .dout (lsu_diagnstc_wr_data_e[63:0])
+//);
+
+// 1-hot fix: 8/1/03 - can be multihot during scan
+// grape mapper convert the 1 of the inverter used for the select to the logic below
+wire  [3:0]  lsu_diagnstc_data_sel_1hot ;
+assign  lsu_diagnstc_data_sel_1hot[0]  =  lsu_diagnstc_data_sel[0] & ~rst_tri_en;
+assign  lsu_diagnstc_data_sel_1hot[1]  =  lsu_diagnstc_data_sel[1] & ~rst_tri_en;
+assign  lsu_diagnstc_data_sel_1hot[2]  =  lsu_diagnstc_data_sel[2] & ~rst_tri_en;
+assign  lsu_diagnstc_data_sel_1hot[3]  =  lsu_diagnstc_data_sel[3] |  rst_tri_en;
+
+
+`ifdef FPGA_SYN_1THREAD
+  assign lsu_diagnstc_wr_data_e[63:0] = lmq0_bypass_data[63:0];
+`else
+mux4ds  #(64) diag_st_mx (
+  .in0  (lmq0_bypass_data[63:0]),
+  .in1  (lmq1_bypass_data[63:0]),
+  .in2  (lmq2_bypass_data[63:0]),
+  .in3  (lmq3_bypass_data[63:0]),
+  .sel0 (lsu_diagnstc_data_sel_1hot[0]),  
+  .sel1 (lsu_diagnstc_data_sel_1hot[1]),
+  .sel2 (lsu_diagnstc_data_sel_1hot[2]),  
+  .sel3 (lsu_diagnstc_data_sel_1hot[3]),
+  .dout (lsu_diagnstc_wr_data_e[63:0])
+);
+`endif
+   
+// Remove flops
+/*dff  #(64) dgndt_d1 (
+        .din    (tlb_st_data[63:0]), 
+        .q      (lsu_diagnstc_wr_data_e[63:0]),
+        .clk    (clk),
+        .se     (1'b0),     .si (),          .so ()
+        ); */
+
+assign lsu_diagnstc_wr_data_b0 = lsu_diagnstc_wr_data_e[0] ;
+
+// Move tte format and parity calc to tlbdp
+
+//assign lsu_tlu_st_rs3_data_g[63:0] = tlb_st_data_d1[63:0];
+assign lsu_tlu_st_rs3_data_g[47:3] = tlb_st_data_d1[47:3];
+assign lsu_mmu_rs3_data_g[63:0] = tlb_st_data_d1[63:0];
+assign lsu_tlu_rs3_data_g[63:0] = tlb_st_data_d1[63:0];
+
+// Removed Fast bypass as penalty is negligible.
+
+//=================================================================================================
+//    STQ PKT2 DATA
+//=================================================================================================
+
+//** stquad support removed **
+
+//=================================================================================================
+//    IMISS/SPU DP
+//=================================================================================================
+
+// Format of IFU pcx packet (50b) :
+//  b49 - valid
+//  b48:44 - req type
+//  b43:42 - rep way (for "eviction" - maintains directory consistency )
+//  b41:40 - mil id
+//  b39:0  - imiss address
+
+
+// Align ifu pkt with ldst pkt - temporary !
+// Does this need to be enabled ?!!!! No.
+assign  ifu_pcx_pkt_e[51:0] = ifu_pcx_pkt[51:0] ;
+
+// Form pcx-wide ifu request packet.
+assign  ifu_full_pcx_pkt_e[`PCX_VLD] = ifu_pcx_pkt_e[51] ;
+assign  ifu_full_pcx_pkt_e[`PCX_RQ_HI:`PCX_RQ_LO] = ifu_pcx_pkt_e[48:44];
+assign  ifu_full_pcx_pkt_e[`PCX_NC] = ifu_pcx_pkt_e[49] ;
+assign  ifu_full_pcx_pkt_e[`PCX_CP_HI:`PCX_CP_LO] = const_cpuid[2:0] ;
+// thread-id unused - use mil id instead.
+assign  ifu_full_pcx_pkt_e[`PCX_TH_HI:`PCX_TH_LO] = ifu_pcx_pkt_e[41:40] ;
+assign  ifu_full_pcx_pkt_e[`PCX_BF_HI] =  ifu_pcx_pkt_e[50] ;
+assign  ifu_full_pcx_pkt_e[`PCX_BF_HI-1:`PCX_BF_LO] =  2'b00;
+assign  ifu_full_pcx_pkt_e[`PCX_WY_HI:`PCX_WY_LO] =  ifu_pcx_pkt_e[43:42] ;
+// unused - always infer 32b
+assign  ifu_full_pcx_pkt_e[`PCX_SZ_HI:`PCX_SZ_LO] =  3'b000 ;
+assign  ifu_full_pcx_pkt_e[`PCX_AD_HI:`PCX_AD_LO] =  ifu_pcx_pkt_e[39:0] ;
+// no data
+assign  ifu_full_pcx_pkt_e[`PCX_DA_HI:`PCX_DA_LO] =  64'd0 ;
+
+// Form pcx-wide interrupt request packet.
+assign  intrpt_full_pcxpkt[`PCX_VLD] = tlu_lsu_pcxpkt[25] ;
+assign  intrpt_full_pcxpkt[`PCX_RQ_HI:`PCX_RQ_LO] = tlu_lsu_pcxpkt[24:20];
+assign  intrpt_full_pcxpkt[`PCX_NC] = 1'b0 ;
+
+//tlu_lsu_pcxpkt[12:8] is the 5 bit interrupt destination thread id,
+//so [12:10] is the cpu id, and [9:8] is the thread id.   
+assign  intrpt_full_pcxpkt[`PCX_CP_HI:`PCX_CP_LO] = tlu_lsu_pcxpkt[12:10];
+
+// or should thread-id be 19:18 ?
+assign  intrpt_full_pcxpkt[`PCX_TH_HI:`PCX_TH_LO] = tlu_lsu_pcxpkt[19:18] ;
+// May actually make undriven fields x.
+assign  intrpt_full_pcxpkt[`PCX_BF_HI:`PCX_BF_LO] =  3'b000;
+assign  intrpt_full_pcxpkt[`PCX_WY_HI:`PCX_WY_LO] =  2'b00 ;
+assign  intrpt_full_pcxpkt[`PCX_SZ_HI:`PCX_SZ_LO] =  3'b000 ;
+assign  intrpt_full_pcxpkt[`PCX_AD_HI:`PCX_AD_LO] =  40'd0 ;
+assign  intrpt_full_pcxpkt[`PCX_DA_HI:`PCX_DA_LO] =  {46'd0,tlu_lsu_pcxpkt[17:0]} ;
+
+// Format fpop_full_pcxpkt.
+
+assign  fpop_full_pcxpkt[`PCX_VLD] = ffu_lsu_data[80] ;
+assign  fpop_full_pcxpkt[`PCX_RQ_HI:`PCX_RQ_LO] = {4'b0101,ffu_lsu_data[78]} ;
+assign  fpop_full_pcxpkt[`PCX_NC] = 1'b0 ;
+assign  fpop_full_pcxpkt[`PCX_CP_HI:`PCX_CP_LO] = const_cpuid[2:0] ;
+assign  fpop_full_pcxpkt[`PCX_TH_HI:`PCX_TH_LO] = ffu_lsu_data[77:76] ;
+assign  fpop_full_pcxpkt[`PCX_BF_HI:`PCX_SZ_LO] = 8'd0 ;
+assign  fpop_full_pcxpkt[`PCX_AD_HI:`PCX_AD_LO+16] = 24'd0 ;      
+assign  fpop_full_pcxpkt[`PCX_AD_LO+15:`PCX_AD_LO+8] = ffu_lsu_data[75:68]; // 79:72
+assign  fpop_full_pcxpkt[`PCX_AD_LO+7:`PCX_AD_LO+4] = 4'b0000;      // 71:68
+assign  fpop_full_pcxpkt[`PCX_AD_LO+3:`PCX_AD_LO] = ffu_lsu_data[67:64] ; // 67:64
+assign  fpop_full_pcxpkt[`PCX_DA_HI:`PCX_DA_LO] = ffu_lsu_data[63:0] ;
+
+
+// RAMTest Data Merging.
+wire cacherd_clk;   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf cacherd_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsu_ramtest_rd_w),
+                .tmb_l  (~se),
+                .clk    (cacherd_clk)
+                ) ;   
+`endif
+
+wire  [63:0]  cache_rdata_w,cache_rdata_w2 ;
+
+mux2ds  #(64) cacherd_sel (
+  .in0  (ifu_lsu_ldxa_data_w2[63:0]),
+  .in1  (lsu_dcache_rdata_w[63:0]),
+  .sel0 (~lsu_dcache_iob_rd_w),  
+  .sel1 (lsu_dcache_iob_rd_w),
+  .dout (cache_rdata_w[63:0])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) cachedata (
+        .din    (cache_rdata_w[63:0]),
+        .q      (cache_rdata_w2[63:0]), // references dcache rd staging
+        .en (~(~lsu_ramtest_rd_w)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(64) cachedata (
+        .din    (cache_rdata_w[63:0]),
+        .q      (cache_rdata_w2[63:0]), // references dcache rd staging
+        .clk    (cacherd_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+
+assign  fwd_full_pcxpkt[`PCX_VLD] = 1'b1 ;
+assign  fwd_full_pcxpkt[`PCX_RQ_HI:`PCX_RQ_LO] = {3'b011,lsu_pcx_fwd_reply,~lsu_pcx_fwd_reply} ;
+assign  fwd_full_pcxpkt[`PCX_NC] = lsu_pcx_fwd_pkt[107] ;
+assign  fwd_full_pcxpkt[`PCX_CP_HI:`PCX_CP_LO] = lsu_pcx_fwd_pkt[106:104] ;
+assign  fwd_full_pcxpkt[`PCX_TH_HI:`PCX_TH_LO] = 2'b00 ;
+assign  fwd_full_pcxpkt[`PCX_BF_HI:`PCX_SZ_LO] = 
+			{6'b000000,lsu_fwd_rply_sz1_unc,1'b1} ;
+// All address bits should not be required !!!
+assign  fwd_full_pcxpkt[`PCX_AD_HI:`PCX_AD_LO] = lsu_pcx_fwd_pkt[103:64] ;
+
+//  Mux sources of TAP request data - margin,pc,defeature/debug/bist.
+// Be careful about pc - could be a critical path.
+// ** Assume read-data stays constant at output latches of dcache **
+//assign  fwd_full_pcxpkt[`PCX_DA_HI:`PCX_DA_LO] =
+//lsu_iobrdge_rply_data_sel[0] ?  {20'd0,lsu_iobrdge_rd_data[43:0]} :
+//	lsu_iobrdge_rply_data_sel[1] ?  cache_rdata_w2[63:0] : 
+//        		lsu_iobrdge_rply_data_sel[2] ?  lsu_pcx_fwd_pkt[63:0] : 
+//                            				64'hxxxx_xxxx_xxxx_xxxx ;
+
+mux3ds #(64) mx_fwd_full_pcxpkt (
+    .in0 ({20'd0,lsu_iobrdge_rd_data[43:0]}),
+    .in1 (cache_rdata_w2[63:0]),
+    .in2 (lsu_pcx_fwd_pkt[63:0]),
+    .sel0(lsu_iobrdge_rply_data_sel[0]),
+    .sel1(lsu_iobrdge_rply_data_sel[1]),
+    .sel2(lsu_iobrdge_rply_data_sel[2]),
+    .dout(fwd_full_pcxpkt[`PCX_DA_HI:`PCX_DA_LO]));
+        
+   
+wire  [`PCX_WIDTH-1:0]  spu_lsu_ldst_pckt_d1 ;
+dff_s  #(`PCX_WIDTH) ff_spu_lsu_ldst_pckt_d1 (
+        .din  (spu_lsu_ldst_pckt[`PCX_WIDTH-1:0]), 
+        .q    (spu_lsu_ldst_pckt_d1[`PCX_WIDTH-1:0]),
+        .clk  (clk),
+        .se   (1'b0),     .si (),          .so ()
+        ); 
+
+assign  imiss_strm_pcx_pkt[`PCX_WIDTH-1:0] = imiss_pcx_mx_sel ?  
+          ifu_full_pcx_pkt_e[`PCX_WIDTH-1:0] : spu_lsu_ldst_pckt_d1[`PCX_WIDTH-1:0] ;
+
+wire  [`PCX_WIDTH-1:0]  fwd_int_fp_pcx_pkt ;   
+mux3ds #(`PCX_WIDTH) mux_fwd_int_fp_pcx_pkt (
+     .in0  (fwd_full_pcxpkt[`PCX_WIDTH-1:0]),                                        
+     .in1  (intrpt_full_pcxpkt[`PCX_WIDTH-1:0]),
+     .in2  (fpop_full_pcxpkt[`PCX_WIDTH-1:0]),
+     .sel0 (fwd_int_fp_pcx_mx_sel[0]),
+     .sel1 (fwd_int_fp_pcx_mx_sel[1]),
+     .sel2 (fwd_int_fp_pcx_mx_sel[2]),
+     .dout (fwd_int_fp_pcx_pkt [`PCX_WIDTH-1:0])                                        
+);
+
+//=================================================================================================
+//    PCX PKT SELECTION
+//=================================================================================================
+
+assign stb_pcx_pkt[`STB_PCX_VLD] = lsu_stb_pcx_rvld_d1 ;                // Valid
+// Support stores for now.
+assign stb_pcx_pkt[`STB_PCX_RQ_HI:`STB_PCX_RQ_LO] = stb_rdata_ramd[74:72] ;     // Rq-type
+assign stb_pcx_pkt[`STB_PCX_NC] = 
+	// Mina the OR gate has been extended to a 3 input gate
+	stb_rdata_ramd[74] | stb_rdata_ramd[73] | 	// atomics
+	stb_rdata_ramd[71] ;  				// flush inst 
+// cpu-id will be inserted on way out of core.
+assign  stb_pcx_pkt[`STB_PCX_TH_HI:`STB_PCX_TH_LO] = lsu_stb_rd_tid[1:0] ;    // TID
+// bf-id is not required.
+// mux will have to be placed elsewhere. (grape)
+assign  stb_pcx_pkt[`STB_PCX_FLSH] = stb_rdata_ramd[71] ;	// flush
+assign  stb_pcx_pkt[`STB_PCX_FLSH-1] = 1'b0 ;
+//assign  stb_pcx_pkt[`STB_PCX_WY_HI:`STB_PCX_WY_LO] = 2'b00 ;
+
+//bug 2511   
+assign  stb_pcx_pkt[`STB_PCX_SZ_HI:`STB_PCX_SZ_LO] =
+                        stb_rdata_ramd[69:68];                          // Size
+
+//assign  stb_pcx_pkt[`STB_PCX_AD_HI:`STB_PCX_AD_LO] = stb_pcx_pkt[`STB_PCX_FLSH] ? 40'b0 :
+//                        {stb_rdata_ramc[44:9],stb_rdata_ramd[67:64]} ;// Addr
+
+assign  stb_pcx_pkt[`STB_PCX_AD_HI:`STB_PCX_AD_LO] = 
+                        {stb_rdata_ramc[44:9],stb_rdata_ramd[67:64]} ;// Addr
+
+
+assign  stb_pcx_pkt[`STB_PCX_DA_HI:`STB_PCX_DA_LO] =
+                        stb_rdata_ramd[63:0];                           // Data   
+   
+assign  store_pcx_pkt[`STB_PCX_WIDTH-1:0] = stb_pcx_pkt[`STB_PCX_WIDTH-1:0] ;
+
+// bld addr select. 
+wire [1:0] bld_addr_b54 ;
+assign	bld_addr_b54[1:0] =
+	lsu_bld_pcx_rq ? lsu_bld_rq_addr[1:0] : load_pcx_pkt[`LMQ_AD_LO+5:`LMQ_AD_LO+4] ; 
+
+// Select between load and store outbound pkt.
+// *** cpu-id currently hardwired in pkt
+// *** Thrd id currently hardwired.
+mux4ds  #(124) pcx_pkt_src (
+  .in0  ({load_pcx_pkt[`LMQ_VLD],2'b00,
+    load_pcx_pkt[`LMQ_RQ_HI: `LMQ_RQ_LO],
+    load_pcx_pkt[`LMQ_NC],const_cpuid[2:0],   
+    ld_pcx_thrd[1:0],lsu_pcx_ld_dtag_perror_w2,
+    load_pcx_pkt[`LMQ_PREF],load_pcx_pkt[`LMQ_DFLUSH],
+    load_pcx_pkt[`LMQ_WY_HI:`LMQ_WY_LO],lsu_pcx_rq_sz_b3,
+    //load_pcx_pkt[`LMQ_WY_HI:`LMQ_WY_LO],1'b0,
+    //load_pcx_pkt[`LMQ_SZ_HI:0],cas_pkt2_data[63:0]}), // load
+    load_pcx_pkt[`LMQ_SZ_HI:`LMQ_AD_LO+6], bld_addr_b54[1:0], 
+    load_pcx_pkt[`LMQ_AD_LO+3:`LMQ_AD_LO],cas_pkt2_data[63:0]}), // load
+  .in1  ({store_pcx_pkt[`STB_PCX_VLD],1'b0, 
+  store_pcx_pkt[`STB_PCX_FLSH],	// turn into interrupt request.
+    store_pcx_pkt[`STB_PCX_RQ_HI:`STB_PCX_RQ_LO],
+    store_pcx_pkt[`STB_PCX_NC], const_cpuid[2:0],
+    store_pcx_pkt[`STB_PCX_TH_HI:`STB_PCX_TH_LO],
+    1'b0,
+    stb_rdata_ramd[70], // blk-st : Bug 3395
+    stb_rdata_ramd[75], 
+    2'b00,
+    //store_pcx_pkt[`STB_PCX_WY_HI:`STB_PCX_WY_LO],
+    1'b0,store_pcx_pkt[`STB_PCX_SZ_HI:0]}),     // store
+  .in2  (imiss_strm_pcx_pkt[`PCX_WIDTH-1:0]),   // alt src : imiss,stream.
+  .in3  (fwd_int_fp_pcx_pkt[`PCX_WIDTH-1:0]),   // fwd, interrupt, fpop                           
+  .sel0 (pcx_pkt_src_sel[0]),  
+  .sel1 (pcx_pkt_src_sel[1]),
+  .sel2 (pcx_pkt_src_sel[2]),
+  .sel3 (pcx_pkt_src_sel[3]),                          
+  .dout (pcx_pkt_data[`PCX_WIDTH-1:0])
+);
+
+dff_s  #(124) pcx_xmit_ff (
+        .din  (pcx_pkt_data[`PCX_WIDTH-1:0]), 
+        .q    (spc_pcx_data_pa[`PCX_WIDTH-1:0]),
+        .clk  (clk),
+        .se     (1'b0),     .si (),          .so ()
+        ); 
+
+//  Stage to avoid critical path
+/*assign  lsu_ifu_ld_icache_index[11:5] = pcx_pkt_data[`PCX_AD_LO+11:`PCX_AD_LO+5] ;
+assign  lsu_ifu_ld_pcxpkt_tid[1:0] = pcx_pkt_data[`PCX_TH_HI:`PCX_TH_LO] ;*/
+
+dff_s  #(9) stg_icindx (
+        .din  ({pcx_pkt_data[`PCX_AD_LO+11:`PCX_AD_LO+5],pcx_pkt_data[`PCX_TH_HI:`PCX_TH_LO]}), 
+        .q    ({lsu_ifu_ld_icache_index[11:5],lsu_ifu_ld_pcxpkt_tid[1:0]}),
+        .clk  (clk),
+        .se     (1'b0),     .si (),          .so ()
+        ); 
+
+//=========================================================================================
+//  VA Watchpt Reg per thread
+//=========================================================================================
+   
+//VA_watchpoint_thread0   
+   wire        va_wtchpt0_clk ;   
+   wire [47:3] va_wtchpt0_addr;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_va_wtchpt0 (
+                .rclk   (clk),
+                .enb_l  (lsu_va_wtchpt0_wr_en_l),
+                .tmb_l  (~se),
+                .clk    (va_wtchpt0_clk)
+                ) ; 
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(45) va_wtchpt0_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt0_addr[47:3]),
+        .en (~(lsu_va_wtchpt0_wr_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s #(45) va_wtchpt0_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt0_addr[47:3]),
+        .clk    (va_wtchpt0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+
+//VA_watchpoint_thread1   
+   wire        va_wtchpt1_clk ;   
+   wire [47:3] va_wtchpt1_addr;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_va_wtchpt1 (
+                .rclk   (clk),
+                .enb_l  (lsu_va_wtchpt1_wr_en_l),
+                .tmb_l  (~se),
+                .clk    (va_wtchpt1_clk)
+                ) ; 
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(45) va_wtchpt1_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt1_addr[47:3]),
+        .en (~(lsu_va_wtchpt1_wr_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s #(45) va_wtchpt1_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt1_addr[47:3]),
+        .clk    (va_wtchpt1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+
+//VA_watchpoint_thread2   
+   wire        va_wtchpt2_clk ;   
+   wire [47:3] va_wtchpt2_addr;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_va_wtchpt2 (
+                .rclk   (clk),
+                .enb_l  (lsu_va_wtchpt2_wr_en_l),
+                .tmb_l  (~se),
+                .clk    (va_wtchpt2_clk)
+                ) ; 
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(45) va_wtchpt2_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt2_addr[47:3]),
+        .en (~(lsu_va_wtchpt2_wr_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s #(45) va_wtchpt2_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt2_addr[47:3]),
+        .clk    (va_wtchpt2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+   
+//VA_watchpoint_thread3   
+   wire        va_wtchpt3_clk ;   
+   wire [47:3] va_wtchpt3_addr;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_va_wtchpt3 (
+                .rclk   (clk),
+                .enb_l  (lsu_va_wtchpt3_wr_en_l),
+                .tmb_l  (~se),
+                .clk    (va_wtchpt3_clk)
+                ) ; 
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(45) va_wtchpt3_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt3_addr[47:3]),
+        .en (~(lsu_va_wtchpt3_wr_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s #(45) va_wtchpt3_ff (
+        .din    (lsu_tlu_st_rs3_data_g[47:3]),
+        .q      (va_wtchpt3_addr[47:3]),
+        .clk    (va_wtchpt3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+
+   wire [47:3] va_wtchpt_addr;
+   
+mux4ds #(45)     va_wtchpt_mx_m (
+        .in0    (va_wtchpt0_addr[47:3]),
+        .in1    (va_wtchpt1_addr[47:3]),
+        .in2    (va_wtchpt2_addr[47:3]),
+        .in3    (va_wtchpt3_addr[47:3]),
+        .sel0   (thread0_m),
+        .sel1   (thread1_m),
+        .sel2   (thread2_m),
+        .sel3   (thread3_m),
+        .dout   (va_wtchpt_addr[47:3])
+        );
+
+mux4ds #(45)     va_wtchpt_mx_g (
+        .in0    (va_wtchpt0_addr[47:3]),
+        .in1    (va_wtchpt1_addr[47:3]),
+        .in2    (va_wtchpt2_addr[47:3]),
+        .in3    (va_wtchpt3_addr[47:3]),
+        .sel0   (thread0_g),
+        .sel1   (thread1_g),
+        .sel2   (thread2_g),
+        .sel3   (thread3_g),
+        .dout   (lsu_va_wtchpt_addr[47:3])
+        );
+      
+//VA wtchpt comparison at M stage
+//assign lsu_va_match_m = (lsu_ldst_va_m[47:3] == va_wtchpt_addr[47:3]); 
+//bug6480/eco6623
+assign lsu_va_match_b47_b32_m = (lsu_ldst_va_m[47:32] == va_wtchpt_addr[47:32]); 
+assign lsu_va_match_b31_b3_m =  (lsu_ldst_va_m[31:3 ] == va_wtchpt_addr[31:3 ]); 
+
+//====================================================================   
+//dc_fill CP
+   wire [63:0] l2fill_data_m;
+   
+//dff #(64) stgm_l2fd (
+//        .din    (lsu_l2fill_data[63:0]),
+//        .q      (l2fill_data_m[63:0]),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+   assign      l2fill_data_m[63:0] = lsu_l2fill_data[63:0];
+   
+      
+   wire [63:0] ld_byp_data_m;
+   
+`ifdef FPGA_SYN_1THREAD
+  assign ld_byp_data_m[63:0] = lmq0_bypass_data[63:0];
+`else
+mux4ds  #(64) ld_byp_mx (
+  .in0  (lmq0_bypass_data[63:0]),
+  .in1  (lmq1_bypass_data[63:0]),
+  .in2  (lmq2_bypass_data[63:0]),
+  .in3  (lmq3_bypass_data[63:0]),
+  .sel0 (ld_thrd_byp_sel_m[0]),  
+  .sel1 (ld_thrd_byp_sel_m[1]),
+  .sel2 (ld_thrd_byp_sel_m[2]),  
+  .sel3 (ld_thrd_byp_sel_m[3]),
+  .dout (ld_byp_data_m[63:0])
+);
+`endif
+  
+assign dcache_alt_data_w0_m[63:0] =  
+       l2fill_vld_m ? l2fill_data_m[63:0] : 
+                      ld_byp_data_m[63:0];
+
+//assign	lsu_l2fill_or_byp_msb_m[7:0]
+//	= {lsu_l2fill_or_byp_data_m[63], 
+//     lsu_l2fill_or_byp_data_m[55], 
+//     lsu_l2fill_or_byp_data_m[47], 
+//     lsu_l2fill_or_byp_data_m[39],
+//	   lsu_l2fill_or_byp_data_m[31], 
+//     lsu_l2fill_or_byp_data_m[23], 
+//     lsu_l2fill_or_byp_data_m[15], 
+//     lsu_l2fill_or_byp_data_m[07]} ;
+//====================================================================   
+   
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_qdp2.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_qdp2.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_qdp2.v	(revision 6)
@@ -0,0 +1,1142 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_qdp2.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:  LSU_QDP2
+//  Description:  LSU CPX Datapath.
+*/
+////////////////////////////////////////////////////////////////////////
+// header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+                  // time scale definition
+`include  "iop.h"
+ 
+`include  "lsu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module lsu_qdp2 ( /*AUTOARG*/
+   // Outputs
+   so, lsu_l2fill_data, dfq_wdata, dfq_tid, lsu_dcache_fill_data_e, 
+   lsu_ifill_pkt, lsu_pcx_fwd_pkt, lsu_cpx_pkt_strm_ack, 
+   lsu_cpx_pkt_vld, lsu_cpx_pkt_atm_st_cmplt, lsu_cpx_pkt_tid, 
+   lsu_cpx_pkt_invwy, lsu_cpx_pkt_inv_pa, lsu_cpx_pkt_l2miss, 
+   lsu_dfq_byp_invwy_vld, lsu_dfq_byp_type, lsu_dfq_byp_flush, 
+   lsu_dfq_byp_tid, lsu_cpu_inv_data_b13to9, lsu_cpu_inv_data_b7to2, 
+   lsu_cpu_inv_data_b0, lsu_iobrdge_wr_data, lsu_iobrdge_tap_rq_type, 
+   lsu_cpx_pkt_perror_dinv, lsu_cpx_pkt_perror_iinv, 
+   lsu_cpx_pkt_perror_set, lsu_cpx_pkt_ld_err, lsu_dfq_byp_binit_st, 
+   lsu_cpx_pkt_binit_st, lsu_cpx_pkt_prefetch, lsu_cpx_pkt_prefetch2, 
+   lsu_dfq_byp_cpx_inv, lsu_dfq_byp_stack_adr_b54, 
+   lsu_dfq_byp_stack_wrway, lsu_dfq_byp_atm, dcache_iob_addr_e, 
+   st_dcfill_addr, lsu_st_way_e, lsu_dcache_iob_way_e, 
+   lsu_st_dcfill_size_e, lsu_cpx_pkt_ifill_type, lsu_cpx_pkt_atomic, 
+   // Inputs
+   rst_tri_en, rclk, si, se, lsu_dfill_data_sel_hi, dfq_byp_ff_en, 
+   dfq_rd_vld_d1, dfq_rdata, cpx_spc_data_cx, stb_rdata_ramd_buf, 
+   stb_rdata_ramd_b74_buf, stb_rdata_ramc_buf, lsu_stb_pcx_rvld_d1, 
+   lsu_diagnstc_wr_data_e, lsu_diagnstc_dc_prty_invrt_e, 
+   mbist_write_data, cpx_fwd_pkt_en_cx, lsu_cpu_dcd_sel, 
+   lsu_cpu_uhlf_sel, lsu_cpxpkt_type_dcd_cx, lsu_dc_iob_access_e, 
+   lsu_dcfill_data_mx_sel_e, lsu_cpx_spc_inv_vld, lsu_cpx_thrdid, 
+   lsu_cpx_stack_dcfill_vld, pcx_rq_for_stb_d1, lsu_dfq_ld_vld, 
+   lsu_dfq_st_vld, lsu_dfq_ldst_vld
+   ) ;  
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+//
+   input rst_tri_en;
+   
+input                     rclk ;
+input                     si;
+input                     se;
+output                    so;
+
+input                       lsu_dfill_data_sel_hi ; // select hi or low order 8B. 
+//input                       dcfill_src_dfq_sel ;
+input                       dfq_byp_ff_en ;
+input                       dfq_rd_vld_d1 ;
+input [`DFQ_WIDTH:0]        dfq_rdata ;             // dfq rd output
+input [`CPX_WIDTH-1:0]      cpx_spc_data_cx;        // cpx to processor pkt
+//input [2:0]                 stb_dfq_rd_id ;         // stb entry id 
+input [69:0]                stb_rdata_ramd_buf ;        // stb0 data ram output.
+input                       stb_rdata_ramd_b74_buf ;        // stb0 data ram output.
+input [14:9]                stb_rdata_ramc_buf ;        // stb0 tag ram output.
+input                       lsu_stb_pcx_rvld_d1 ;   // stb has been read-delayby1cycle
+//input                       lsu_stb_dfq_rvld ;      // wr to dfq stb bypass ff
+//input [1:0]                 lmq_pcx_pkt_sz ;
+//input [39:0]                lmq_pcx_pkt_addr ;
+
+// diagnostic write information
+//input                       lsu_diagnstc_wr_src_sel_e ;    // diagnstc write - diag/store
+input  [63:0]               lsu_diagnstc_wr_data_e ;       // Store data
+input  [7:0]                lsu_diagnstc_dc_prty_invrt_e ; // invert parity of dw
+//input  [3:0]                lsu_diagnstc_wr_way_e ;        // cache way to be written
+//input  [10:0]               lsu_diagnstc_wr_addr_e ;       // address
+
+//input                     lsu_ifill_pkt_vld ;     // ifill pkt vld
+//input                     lsu_bist_wvld_e ;       // bist write to dcache
+//input                     lsu_bist_rvld_e ;       // bist read from dcache  
+
+//input   [6:0]             mbist_dcache_index ;    // bist rd/wr address 
+//input                     mbist_dcache_word;
+//input   [1:0]             mbist_dcache_way;   
+input   [7:0]             mbist_write_data ;      // bist wdata
+   
+input                     cpx_fwd_pkt_en_cx ;     // cpx fwd reply/req
+input   [7:0]             lsu_cpu_dcd_sel ;
+input                     lsu_cpu_uhlf_sel ;
+input   [5:0]             lsu_cpxpkt_type_dcd_cx ;
+//input                     lsu_st_wr_sel_e ;
+//input   [1:0]             lmq_ld_way ;
+//input   [1:0]             lsu_st_ack_wrwy ;       // cache set way to write to.  
+//input   [1:0]             lsu_st_ack_addr_b54 ;
+//input   [1:0]             lsu_stb_rd_tid ;
+input			  lsu_dc_iob_access_e ;	// iob read/write of dcache
+
+//input                     tmb_l;
+//input   [3:0]             lsu_dcfill_mx_sel_e;
+//input                     lsu_dcfill_addr_mx_sel_e;
+input                     lsu_dcfill_data_mx_sel_e;
+
+input                     lsu_cpx_spc_inv_vld;
+input   [3:0]             lsu_cpx_thrdid;
+input                     lsu_cpx_stack_dcfill_vld ;
+input   [3:0]             pcx_rq_for_stb_d1;
+
+input                     lsu_dfq_ld_vld ;
+input                     lsu_dfq_st_vld ;
+input                     lsu_dfq_ldst_vld ;
+
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+// End of automatics
+//
+
+output  [63:0]            lsu_l2fill_data ;       // dfill data for write to irf
+
+output  [`DFQ_WIDTH:0]    dfq_wdata ;
+output  [1:0]             dfq_tid ;               // thread-id for load at head of DFQ.
+
+output  [143:0]           lsu_dcache_fill_data_e ;// store-write/ld-miss fill 
+
+output  [`CPX_VLD-1:0]  lsu_ifill_pkt ;
+output  [107:0]           lsu_pcx_fwd_pkt ;       // local fwd reply/req 
+output               	  lsu_cpx_pkt_strm_ack ;
+output                    lsu_cpx_pkt_vld ;
+output                    lsu_cpx_pkt_atm_st_cmplt ;
+output  [1:0]             lsu_cpx_pkt_tid ;
+output  [1:0]             lsu_cpx_pkt_invwy ;     // invalidate way
+output  [4:0]             lsu_cpx_pkt_inv_pa ;    // invalidate pa [10:6]
+output			  lsu_cpx_pkt_l2miss ;	// ld req missed in L2
+output                    lsu_dfq_byp_invwy_vld ;
+output  [5:0]             lsu_dfq_byp_type ;
+output                    lsu_dfq_byp_flush ;
+//output  [2:0]             lsu_dfq_byp_cpuid ;
+output  [1:0]             lsu_dfq_byp_tid ;
+//output  [13:0]            lsu_cpu_inv_data ;
+output  [13:9]            lsu_cpu_inv_data_b13to9 ;
+output  [7:2]             lsu_cpu_inv_data_b7to2 ;
+output                    lsu_cpu_inv_data_b0 ;
+//output                    lsu_dfq_byp_stquad_pkt2 ;
+//output                    lsu_cpx_pkt_stquad_pkt2 ;
+output  [43:0]            lsu_iobrdge_wr_data ;
+output  [8:0]             lsu_iobrdge_tap_rq_type ;
+//output                    lsu_dfq_byp_perror_dinv ;  // dtag perror corr. st ack
+//output                    lsu_dfq_byp_perror_iinv ;  // itag perror corr. st ack
+output                    lsu_cpx_pkt_perror_dinv ;  // dtag perror corr. st ack
+output                    lsu_cpx_pkt_perror_iinv ;  // itag perror corr. st ack
+output  [1:0]             lsu_cpx_pkt_perror_set ;  // dtag perror - spec. b54
+output  [1:0]             lsu_cpx_pkt_ld_err ;      // err field - cpx ld pkt
+output			  lsu_dfq_byp_binit_st ;	// blk-init st in bypass.
+output			  lsu_cpx_pkt_binit_st ;    // blk-init store
+output			  lsu_cpx_pkt_prefetch;    // prefetch
+output			  lsu_cpx_pkt_prefetch2;   // prefetch - for dctl
+
+output                    lsu_dfq_byp_cpx_inv;
+//output			  lsu_dfq_byp_stack_dcfill_vld;
+output  [1:0]             lsu_dfq_byp_stack_adr_b54;
+output  [1:0]             lsu_dfq_byp_stack_wrway;
+output                    lsu_dfq_byp_atm;
+
+   //dcache_fill_addr_e change
+   output [7:0]           dcache_iob_addr_e;
+   output [10:0]          st_dcfill_addr;
+
+   output [1:0]           lsu_st_way_e;
+   output [1:0]           lsu_dcache_iob_way_e;
+
+   output [1:0]           lsu_st_dcfill_size_e;
+   
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+wire  [13:0]      cpx_cpulo_inv_data ;
+wire  [13:0]      cpx_cpuhi_inv_data ;
+//wire  [`STB_PCX_WIDTH-1:0]  stb_pcx_pkt ;
+//wire  [`STB_DFQ_WIDTH-1:0]  stb_dfq_pkt_data ;
+wire  [`STB_DFQ_WIDTH-1:0]  stb_dfq_data_in ;
+//wire  [`DFQ_WIDTH-1:0]  cpx_dfq_data ;
+//wire  [`DFQ_WIDTH-1:0]  cpx_dfq_data_d1 ;
+//wire  [`CPX_WIDTH-1:0]  cpx_data_cx_d1 ;
+//wire        cpx_st_cmplt_d1 ;
+wire  [`DFQ_WIDTH:0]  dfq_byp_mx_data ;
+wire  [`DFQ_WIDTH-1:0]    dfq_byp_ff_data ;
+//wire  [`STB_DFQ_WIDTH-1:0]  store_dfq_pkt ;
+wire  [127:0]   st_dcfill_data ;
+wire  [63:0]      dcache_wr_data ;
+wire  [127:0]   ldinv_dcfill_data ;
+//wire  [`LMQ_WIDTH-1:0]  lmq0_pcx_pkt, lmq1_pcx_pkt ;
+//wire  [`LMQ_WIDTH-1:0]  lmq2_pcx_pkt, lmq3_pcx_pkt ;
+wire  [127:0] lsu_dcfill_data ;
+wire  [15:0]      dcache_wr_parity_mod ;
+//wire  [3:0]     bist_rsel_way_e ;
+wire  [107:0]     cpx_fwd_pkt_din ;
+
+//wire [3:0]     bist_rsel_way_m ;
+//wire [3:0]     lsu_bist_rsel_way_wb ;  // way select for read
+wire  [1:0]  cpx_st_dcfill_wrway;
+wire  [`STB_DFQ_VLD:0]   stb_dcfill_data_mx;
+wire           clk;
+wire  [13:0]            lsu_cpu_inv_data ;
+
+assign  clk = rclk;
+
+
+//=================================================================================================
+//      STB Datapath
+//=================================================================================================
+
+// PCX PKT FORMATTING
+// THREAD0
+//assign stb_pcx_pkt[`STB_PCX_VLD] = lsu_stb_pcx_rvld_d1 ;    // Valid
+// Support stores for now.
+//assign stb_pcx_pkt[`STB_PCX_RQ_HI:`STB_PCX_RQ_LO] = stb_rdata_ramd[74:72] ; // Rq-type
+//assign stb_pcx_pkt[`STB_PCX_NC] = stb_rdata_ramd[74] ;  // NC
+// cpu-id will be inserted on way out of core.
+//assign  stb_pcx_pkt[`STB_PCX_TH_HI:`STB_PCX_TH_LO] = lsu_stb_rd_tid[1:0] ;  // TID
+// bf-id is not required.
+//assign  stb_pcx_pkt[`STB_PCX_WY_HI:`STB_PCX_WY_LO] = stb_rdata_ramd[71:70] ;  // WAY
+//assign  stb_pcx_pkt[`STB_PCX_SZ_HI:`STB_PCX_SZ_LO] = 
+//      stb_rdata_ramd[69:68];        // Size
+//assign  stb_pcx_pkt[`STB_PCX_AD_HI:`STB_PCX_AD_LO] = 
+//      {stb_rdata_ramc[44:9],stb_rdata_ramd[67:64]} ;// Addr        
+//assign  stb_pcx_pkt[`STB_PCX_DA_HI:`STB_PCX_DA_LO] = 
+//      stb_rdata_ramd[63:0];         // Data   
+
+// STB to DFQ Data Formatting
+// THREAD0
+assign  stb_dfq_data_in[`STB_DFQ_WIDTH-1:0] =
+  {lsu_stb_pcx_rvld_d1,                         // 82:82 vld  //stb_pcx_pkt[`STB_PCX_VLD],
+  stb_rdata_ramd_b74_buf,                           // 81:81 ??   //stb_rdata_ramd[74],
+  2'b00,                                        // 80:79 not used
+  //stb_pcx_pkt[`STB_PCX_WY_HI:`STB_PCX_WY_LO],
+  3'b000,                                       // 78:76 instead of stb_dfq_rd_id[2:0],
+  stb_rdata_ramd_buf[69:68],                        // 75:74 size //stb_pcx_pkt[`STB_PCX_SZ_HI:`STB_PCX_SZ_LO], 
+  {stb_rdata_ramc_buf[14:9],stb_rdata_ramd_buf[67:64]}, // 73:64 Addr //stb_pcx_pkt[`STB_PCX_AD_LO+9:`STB_PCX_AD_LO],
+  stb_rdata_ramd_buf[63:0]};                        // 63:0  data  //stb_pcx_pkt[`STB_PCX_DA_HI:`STB_PCX_DA_LO]};
+
+
+// STB DATA BYPASS FLOP
+// Data is read out on read for pcx. The data is then
+// bypassed to the dfq when the st-ack is received.
+//wire  [3:0]   pcx_rq_for_stb_d1;
+wire  [3:0]   clk_stb_data;
+wire  [`STB_DFQ_VLD:0]  stb_dfq_pkt_data0,
+                        stb_dfq_pkt_data1,
+                        stb_dfq_pkt_data2,
+                        stb_dfq_pkt_data3;
+
+// timing fix: 9/15/03 - reduce loading on pcx_rq_for_stb[3:0] to stb_clt[0-3]. it had FO2 (stb_ctl,qdp2 - cap=0.5-0.8)
+//             move the flop from qdp2 to qctl1
+
+//flop pcx rq to read stb data
+//dff  #(4) pcx_rq_for_stb_ff (                       
+//           .din  (pcx_rq_for_stb[3:0]),
+//           .q    (pcx_rq_for_stb_d1[3:0]),
+//           .clk  (clk), 
+//           .se   (1'b0),       .si (),          .so ());                                
+
+//dffe  #(83) stb_dfq_byp_ff (
+//        .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]), 
+//  .q    (stb_dfq_pkt_data[`STB_DFQ_VLD:0]),
+//        .en   (lsu_stb_dfq_rvld), .clk (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+//THREAD0
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf stb_dfq_byp0_clken(                
+          .clk(clk_stb_data[0]),             
+          .rclk(clk),                         
+          .enb_l(~pcx_rq_for_stb_d1[0]),           
+          .tmb_l(~se));                       
+`endif
+                                                 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(83) stb_dfq_byp0_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data0[`STB_DFQ_VLD:0]),
+           .en (~(~pcx_rq_for_stb_d1[0])), .clk(clk), 
+           .se   (1'b0),       .si (),          .so ());                                
+`else
+dff_s  #(83) stb_dfq_byp0_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data0[`STB_DFQ_VLD:0]),
+           .clk  (clk_stb_data[0]), 
+           .se   (1'b0),       .si (),          .so ());                                
+`endif
+
+//THREAD1
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf stb_dfq_byp1_clken(                
+          .clk(clk_stb_data[1]),             
+          .rclk(clk),                         
+          .enb_l(~pcx_rq_for_stb_d1[1]),           
+          .tmb_l(~se));                       
+`endif
+                                                 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(83) stb_dfq_byp1_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data1[`STB_DFQ_VLD:0]),
+           .en (~(~pcx_rq_for_stb_d1[1])), .clk(clk), 
+           .se   (1'b0),       .si (),          .so ());                                
+`else
+dff_s  #(83) stb_dfq_byp1_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data1[`STB_DFQ_VLD:0]),
+           .clk  (clk_stb_data[1]), 
+           .se   (1'b0),       .si (),          .so ());                                
+`endif
+
+//THREAD2
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf stb_dfq_byp2_clken(                
+          .clk(clk_stb_data[2]),             
+          .rclk(clk),                         
+          .enb_l(~pcx_rq_for_stb_d1[2]),           
+          .tmb_l(~se));                       
+`endif
+                                                 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(83) stb_dfq_byp2_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data2[`STB_DFQ_VLD:0]),
+           .en (~(~pcx_rq_for_stb_d1[2])), .clk(clk), 
+           .se   (1'b0),       .si (),          .so ());                                
+`else
+dff_s  #(83) stb_dfq_byp2_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data2[`STB_DFQ_VLD:0]),
+           .clk  (clk_stb_data[2]), 
+           .se   (1'b0),       .si (),          .so ());                                
+`endif
+
+//THREAD3
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf stb_dfq_byp3_clken(                
+          .clk(clk_stb_data[3]),             
+          .rclk(clk),                         
+          .enb_l(~pcx_rq_for_stb_d1[3]),           
+          .tmb_l(~se));                       
+`endif
+                                                 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(83) stb_dfq_byp3_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data3[`STB_DFQ_VLD:0]),
+           .en (~(~pcx_rq_for_stb_d1[3])), .clk(clk), 
+           .se   (1'b0),       .si (),          .so ());                                
+`else
+dff_s  #(83) stb_dfq_byp3_ff (                       
+           .din  (stb_dfq_data_in[`STB_DFQ_VLD:0]),
+           .q    (stb_dfq_pkt_data3[`STB_DFQ_VLD:0]),
+           .clk  (clk_stb_data[3]), 
+           .se   (1'b0),       .si (),          .so ());                                
+`endif
+
+// MUX the store data if cpx_pkt==st_ack w/ dcfill vld=1
+mux4ds  #(`STB_DFQ_VLD+1) stb_data_mx (
+  .in0  (stb_dfq_pkt_data0[`STB_DFQ_VLD:0]),
+  .in1  (stb_dfq_pkt_data1[`STB_DFQ_VLD:0]),
+  .in2  (stb_dfq_pkt_data2[`STB_DFQ_VLD:0]),
+  .in3  (stb_dfq_pkt_data3[`STB_DFQ_VLD:0]),
+  .sel0 (lsu_cpx_thrdid[0]),  
+  .sel1 (lsu_cpx_thrdid[1]),
+  .sel2 (lsu_cpx_thrdid[2]),
+  .sel3 (lsu_cpx_thrdid[3]),
+  .dout (stb_dcfill_data_mx[`STB_DFQ_VLD:0])
+);
+
+//NOTE: mux this raw data w/ modified data to generate dfq input and feed into dfq_wdata
+
+
+
+
+//=================================================================================================
+//    FWD PKT - REQ/REPLY
+//=================================================================================================
+
+// Design Note !! - Bus can be decreased - do not have to keep tag.
+
+// TAP ACCESS FORMAT
+// BEGIN (OLD)
+// Control bits :
+// R/W,TID,BIST,MARGIN,DEFEATURE,PC (R=1,W=0)
+// These 7b are mapped to bits 70:64 of the cpx pkt.
+// (R/W is the highest order bit). 
+// *Note that a write to pc is ignored by hardware.
+// *The cpx-reply will not contain the control information.
+// *TID(Thread id) applies only to pc and defeature.
+// Data bits :
+// PC(48b),Margin(36b),Bist-Ctl(14b),Defeature(4b).
+// The largest field of 48b is mapped to bits 47:0 of the cpx pkt.
+// END (OLD)
+
+// Control bits (mapped to data[127:96] of cpx packet):
+// L1I data,L1D data,BIST,MARGIN,DEFEATURE,PC,TID[1:0]
+// These 8b are mapped to bits 103:96 of the cpx pkt.
+// Unused bits are zeros.
+// (TID is the lowest order 2 bits).
+// *Note that a write to pc is ignored by hardware.
+// *The cpx-reply will not contain the control information.
+// *TID(Thread id) applies only to pc and defeature.
+//
+// Address bits (mapped to data[95:64] of cpx packet):
+// This is used to access the L1 cache arrays.  This field
+// is a dont-care for the bist/margin/defeature/pc ASIs.
+// Only the lower 32 address bits are specified here.
+// The core (lsu) will pad zeros create a 64-bit address.
+//
+// Data bits (mapped to data[63:0] of cpx packet):
+// PC(48b),Margin(36b),Bist-Ctl(14b),Defeature(4b).
+// The largest field of 48b is mapped to bits 47:0 of the cpx pkt.
+
+
+// Formatted to contain fwd req which is of largest size.
+// Truncate address !!! 40b should not be required.
+assign  cpx_fwd_pkt_din[107:0] = 
+  {
+  cpx_spc_data_cx[`CPX_NC], // r/!w   (1b)
+  cpx_spc_data_cx[133:131], // src/tar  (3b)
+  cpx_spc_data_cx[103:0]    // 64b data + 40b addr (104b)
+  } ;
+
+// Contains cpx fwd reply or req
+//dffe  #(108) fwdpkt_ff  (
+//        .din  (cpx_fwd_pkt_din[107:0]), 
+//  .q    (lsu_pcx_fwd_pkt[107:0]),
+//        .en   (cpx_fwd_pkt_en_cx), 
+//  .clk  (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf fwdpkt_clken(                             
+          .clk(clk_cpx_fwd_pkt_en_cx),               
+          .rclk(clk),                                 
+          .enb_l(~cpx_fwd_pkt_en_cx),                  
+          .tmb_l(~se));                               
+`endif
+                                                         
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(108) fwdpkt_ff  (                                  
+            .din  (cpx_fwd_pkt_din[107:0]),                
+            .q    (lsu_pcx_fwd_pkt[107:0]),                 
+            .en (~(~cpx_fwd_pkt_en_cx)), .clk(clk),                   
+            .se     (1'b0),       .si (),          .so ());   
+`else
+dff_s  #(108) fwdpkt_ff  (                                  
+            .din  (cpx_fwd_pkt_din[107:0]),                
+            .q    (lsu_pcx_fwd_pkt[107:0]),                 
+            .clk  (clk_cpx_fwd_pkt_en_cx),                   
+            .se     (1'b0),       .si (),          .so ());   
+`endif
+
+
+// New mapping for address bits given that tag is gone. (OBSOLETE)
+// pkt[74:73] - Way
+// pkt[72:65] - Set Index
+// pkt[64] - Word
+// New mapping - based on 0-in testing, alignment with PRM.
+// pkt[76:75] - Way
+// pkt[74:68] - Set Index
+// pkt[67] -DWord
+
+wire [7:0] dcache_iob_addr_e ;
+assign	dcache_iob_addr_e[7:0] = lsu_pcx_fwd_pkt[74:67] ;
+
+//wire [3:0] dcache_iob_wy_e ; 
+//assign	dcache_iob_wy_e[0] = ~lsu_pcx_fwd_pkt[76] & ~lsu_pcx_fwd_pkt[75] ;
+//assign	dcache_iob_wy_e[1] = ~lsu_pcx_fwd_pkt[76] &  lsu_pcx_fwd_pkt[75] ;
+//assign	dcache_iob_wy_e[2] =  lsu_pcx_fwd_pkt[76] & ~lsu_pcx_fwd_pkt[75] ;
+//assign	dcache_iob_wy_e[3] =  lsu_pcx_fwd_pkt[76] &  lsu_pcx_fwd_pkt[75] ;
+
+assign lsu_dcache_iob_way_e [1:0] =  {lsu_pcx_fwd_pkt[76],  lsu_pcx_fwd_pkt[75]};
+  
+wire [63:0] dcache_iob_data_e ; 
+assign	dcache_iob_data_e[63:0] = lsu_pcx_fwd_pkt[63:0] ;
+
+assign  lsu_iobrdge_wr_data[43:0] = lsu_pcx_fwd_pkt[43:0] ;
+assign  lsu_iobrdge_tap_rq_type[8:0] = {lsu_pcx_fwd_pkt[107],lsu_pcx_fwd_pkt[103:96]} ;
+
+//=================================================================================================
+//    DFQ PKT SELECTION
+//=================================================================================================
+
+// There are two sources :
+// - from the ccx - load,inv 
+// - from the stb - ack'ed store update.
+// ** store updates do not have to be inserted into DFQ for ordering purposes. An inv will
+// clear stale data in the stb and bypass flops to ensure TSO.
+
+// to be written to dfq if bypass full else wr to byp mx.
+//assign  dfq_wdata[`DFQ_WIDTH:0] = 
+//  {lsu_cpx_spc_inv_vld,lsu_cpxpkt_type_dcd_cx[5:0],cpx_spc_data_cx[`CPX_WIDTH-1:0]};
+//  //{{(`DFQ_WIDTH-`CPX_WIDTH)1'b0},cpx_spc_data_cx[`CPX_WIDTH-1:0]},
+
+wire  [`DFQ_WIDTH:0]  dfq_st_data,dfq_cpx_raw_wdata;
+wire  [1:0]           cpx_st_ack_addr_b54;
+
+assign  dfq_cpx_raw_wdata[`DFQ_WIDTH:0] = 
+  {lsu_cpx_spc_inv_vld,lsu_cpxpkt_type_dcd_cx[5:0],cpx_spc_data_cx[`CPX_WIDTH-1:0]};
+
+assign  dfq_st_data[`DFQ_WIDTH:0]  =  
+        {lsu_cpx_spc_inv_vld,lsu_cpxpkt_type_dcd_cx[5:0],
+         cpx_spc_data_cx[`CPX_WIDTH-1:87],
+         cpx_st_ack_addr_b54[1:0],             // 86:85
+         cpx_st_dcfill_wrway[1:0],             // 84:83
+         stb_dcfill_data_mx[`STB_DFQ_VLD:0]};  // 82:0
+
+mux2ds  #(`DFQ_WIDTH+1) dfq_st_data_mx (
+  .in0  (dfq_st_data[`DFQ_WIDTH:0]),
+  .in1  (dfq_cpx_raw_wdata[`DFQ_WIDTH:0]),
+  .sel0 (lsu_cpx_stack_dcfill_vld),  
+  .sel1 (~lsu_cpx_stack_dcfill_vld),
+  .dout (dfq_wdata[`DFQ_WIDTH:0])
+);
+
+//timing fix: 05/31/03: decouple byp mux from lsu_cpx_stack_dcfill_vld
+//            i.e. replace dfq_wdata w/ dfq_cpx_raw_wdata in byp mux
+// select between dfq output and cpx bypass.
+mux2ds  #(`DFQ_WIDTH+1) dfq_byp_mx (
+  .in0  (dfq_rdata[`DFQ_WIDTH:0]),
+  .in1  (dfq_cpx_raw_wdata[`DFQ_WIDTH:0]),
+  .sel0 (dfq_rd_vld_d1),  
+  .sel1 (~dfq_rd_vld_d1),
+  .dout (dfq_byp_mx_data[`DFQ_WIDTH:0])
+);
+
+assign  lsu_dfq_byp_cpx_inv     =   dfq_byp_mx_data[`DFQ_WIDTH];
+assign  lsu_dfq_byp_tid[1:0]    =   dfq_byp_mx_data[`CPX_TH_HI:`CPX_TH_LO] ;
+//assign  lsu_dfq_byp_cpuid[2:0]  =   dfq_byp_mx_data[`CPX_INV_CID_HI:`CPX_INV_CID_LO] ;
+assign  lsu_dfq_byp_flush = 	dfq_byp_mx_data[`CPX_NC] ;
+assign  lsu_dfq_byp_invwy_vld = dfq_byp_mx_data[`CPX_WYVLD] ;
+
+//assign  lsu_dfq_byp_type[5:0]   =   dfq_byp_mx_data[`DFQ_WIDTH-1:`DFQ_WIDTH-6] ;
+assign  lsu_dfq_byp_type[5:3]   =   dfq_byp_mx_data[`DFQ_WIDTH-1:`DFQ_WIDTH-3] ;
+assign  lsu_dfq_byp_type[2]   =   dfq_byp_mx_data[`DFQ_WIDTH-4] & dfq_rd_vld_d1;
+assign  lsu_dfq_byp_type[1:0]   =   dfq_byp_mx_data[`DFQ_WIDTH-5:`DFQ_WIDTH-6] ;
+
+//assign  lsu_dfq_byp_stquad_pkt2 =   dfq_byp_mx_data[130] ;
+assign  lsu_dfq_byp_binit_st =   dfq_byp_mx_data[125] ;
+//assign  lsu_dfq_byp_perror_iinv    = dfq_byp_mx_data[`CPX_PERR_DINV+1] ;
+//assign  lsu_dfq_byp_perror_dinv    = dfq_byp_mx_data[`CPX_PERR_DINV] ;
+//assign  lsu_dfq_byp_stack_dcfill_vld =   dfq_byp_mx_data[87] ;
+assign  lsu_dfq_byp_stack_adr_b54[1:0] =   dfq_byp_mx_data[86:85] ;
+assign  lsu_dfq_byp_stack_wrway[1:0] =   dfq_byp_mx_data[84:83] ;
+
+assign  lsu_ifill_pkt[`CPX_VLD-1:0] = dfq_byp_mx_data[`CPX_VLD-1:0] ;
+//assign  lsu_ifill_pkt[`CPX_WIDTH-1:0] = {lsu_ifill_pkt_vld,dfq_byp_mx_data[`CPX_VLD-1:0]} ;
+
+assign  lsu_dfq_byp_atm  = dfq_byp_mx_data[129] ;
+
+// Decode in qctl !!!
+//assign  dfq_byp_tid[1:0] = dfq_byp_mx_data[`CPX_TH_HI:`CPX_TH_LO] ;
+//assign  dfq_byp_tid[1:0] = dfq_byp_mx_data[`DFQ_TH_HI:`DFQ_TH_LO] ;
+
+// Stage dfq output
+// In case of multiple inv or other such cases, pkt will be held in
+// byp ff until pkt completely utilized.
+//dffe  #(`DFQ_WIDTH) dfq_data_stg (
+//        .din  (dfq_byp_mx_data[`DFQ_WIDTH-1:0]),
+//  .q    (dfq_byp_ff_data[`DFQ_WIDTH-1:0]),
+//        .en (dfq_byp_ff_en),  .clk  (clk),
+//        .se     (1'b0),     .si (),          .so ()
+//);
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf dfq_byp_ff_en_clken(                     
+          .clk(clk_dfq_byp_ff_en),                  
+          .rclk(clk),                                
+          .enb_l(~dfq_byp_ff_en),                     
+          .tmb_l(~se));                              
+`endif
+                                                        
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(`DFQ_WIDTH) dfq_data_stg (                        
+                  .din  (dfq_byp_mx_data[`DFQ_WIDTH-1:0]),
+                  .q    (dfq_byp_ff_data[`DFQ_WIDTH-1:0]),
+                  .en (~(~dfq_byp_ff_en)), .clk(clk),
+                  .se   (1'b0),     .si (),          .so ());
+`else
+dff_s  #(`DFQ_WIDTH) dfq_data_stg (                        
+                  .din  (dfq_byp_mx_data[`DFQ_WIDTH-1:0]),
+                  .q    (dfq_byp_ff_data[`DFQ_WIDTH-1:0]),
+                  .clk  (clk_dfq_byp_ff_en),
+                  .se   (1'b0),     .si (),          .so ());
+`endif
+
+
+// To be decoded in qctl
+//assign  lsu_st_cmplt_type = dfq_byp_ff_data[`DFQ_ST_CMPLT];
+
+assign  dfq_tid[1:0] = dfq_byp_ff_data[`CPX_TH_HI:`CPX_TH_LO] ;
+
+output    lsu_cpx_pkt_ifill_type;
+output    lsu_cpx_pkt_atomic ;
+
+// Should some of these be in-flight ?
+//assign  lsu_cpx_pkt_rqtype[3:0]   = dfq_byp_ff_data[`CPX_RQ_HI:`CPX_RQ_LO] ;
+assign  lsu_cpx_pkt_ifill_type    = dfq_byp_ff_data[`DFQ_WIDTH-2];
+assign  lsu_cpx_pkt_tid[1:0]      = dfq_byp_ff_data[`CPX_TH_HI:`CPX_TH_LO] ;
+assign  lsu_cpx_pkt_vld     = dfq_byp_ff_data[`CPX_VLD] ;
+assign  lsu_cpx_pkt_atm_st_cmplt  = dfq_byp_ff_data[129] ;
+assign  lsu_cpx_pkt_invwy[1:0]    = dfq_byp_ff_data[`CPX_WY_HI:`CPX_WY_LO] ;
+// Upper 6bits are used to store decoded request type information.
+assign  lsu_cpx_pkt_strm_ack   = dfq_byp_ff_data[`DFQ_WIDTH-5];
+//assign  lsu_cpx_pkt_inv_pa[4:0]   = dfq_byp_ff_data[`CPX_INV_PA_HI-1:`CPX_INV_PA_LO];  //!!
+assign  lsu_cpx_pkt_inv_pa[4:0]   = dfq_byp_ff_data[`CPX_INV_PA_HI:`CPX_INV_PA_LO];
+assign  lsu_cpx_pkt_atomic    = dfq_byp_ff_data[129]  | //atomic st ack
+            dfq_byp_ff_data[131]  ; //stquad pkt1
+//assign  lsu_cpx_pkt_stquad_pkt2   = dfq_byp_ff_data[130] ;
+assign  lsu_cpx_pkt_binit_st   = dfq_byp_ff_data[125] ;
+assign  lsu_cpx_pkt_prefetch = dfq_byp_ff_data[128] ; // for qctl2
+assign  lsu_cpx_pkt_prefetch2 = dfq_byp_ff_data[128] ;  // for dctl
+//assign	lsu_spu_strm_st = dfq_byp_ff_data[134] ; // strm store ack (vs. ma)
+
+assign  lsu_cpx_pkt_perror_iinv    = dfq_byp_ff_data[`CPX_PERR_DINV+1] ;
+assign  lsu_cpx_pkt_perror_dinv    = dfq_byp_ff_data[`CPX_PERR_DINV] ;
+assign  lsu_cpx_pkt_perror_set[1:0] = 
+	dfq_byp_ff_data[`CPX_PERR_DINV_AD5:`CPX_PERR_DINV_AD4] ;
+
+assign  lsu_cpx_pkt_ld_err[1:0] = dfq_byp_ff_data[138:137] ;  
+assign  lsu_cpx_pkt_l2miss = dfq_byp_ff_data[139] ;  
+
+
+//=================================================================================================
+//      DFQ OUTPUT - LOCAL PROCESSING
+//=================================================================================================
+
+
+mux4ds  #(14) invfld_lo_sel (
+        .in0    ({dfq_byp_mx_data[`CPX_A11_C0_HI:`CPX_A11_C0_LO],
+                  dfq_byp_mx_data[`CPX_A10_C0_HI:`CPX_A10_C0_LO],
+                  dfq_byp_mx_data[`CPX_A01_C0_HI:`CPX_A01_C0_LO],
+                  dfq_byp_mx_data[`CPX_A00_C0_HI:`CPX_A00_C0_LO]}),
+        .in1    ({dfq_byp_mx_data[`CPX_A11_C1_HI:`CPX_A11_C1_LO],
+                  dfq_byp_mx_data[`CPX_A10_C1_HI:`CPX_A10_C1_LO],
+                  dfq_byp_mx_data[`CPX_A01_C1_HI:`CPX_A01_C1_LO],
+                  dfq_byp_mx_data[`CPX_A00_C1_HI:`CPX_A00_C1_LO]}),
+        .in2    ({dfq_byp_mx_data[`CPX_A11_C2_HI:`CPX_A11_C2_LO],
+                  dfq_byp_mx_data[`CPX_A10_C2_HI:`CPX_A10_C2_LO],
+                  dfq_byp_mx_data[`CPX_A01_C2_HI:`CPX_A01_C2_LO],
+                  dfq_byp_mx_data[`CPX_A00_C2_HI:`CPX_A00_C2_LO]}),
+        .in3    ({dfq_byp_mx_data[`CPX_A11_C3_HI:`CPX_A11_C3_LO],
+                  dfq_byp_mx_data[`CPX_A10_C3_HI:`CPX_A10_C3_LO],
+                  dfq_byp_mx_data[`CPX_A01_C3_HI:`CPX_A01_C3_LO],
+                  dfq_byp_mx_data[`CPX_A00_C3_HI:`CPX_A00_C3_LO]}),
+        .sel0   (lsu_cpu_dcd_sel[0]),
+        .sel1   (lsu_cpu_dcd_sel[1]),
+        .sel2   (lsu_cpu_dcd_sel[2]),
+        .sel3   (lsu_cpu_dcd_sel[3]),
+        .dout   (cpx_cpulo_inv_data[13:0])
+);
+
+mux4ds  #(14) invfld_hi_sel (
+        .in0    ({dfq_byp_mx_data[`CPX_A11_C4_HI:`CPX_A11_C4_LO],
+                  dfq_byp_mx_data[`CPX_A10_C4_HI:`CPX_A10_C4_LO],
+                  dfq_byp_mx_data[`CPX_A01_C4_HI:`CPX_A01_C4_LO],
+                  dfq_byp_mx_data[`CPX_A00_C4_HI:`CPX_A00_C4_LO]}),
+        .in1    ({dfq_byp_mx_data[`CPX_A11_C5_HI:`CPX_A11_C5_LO],
+                  dfq_byp_mx_data[`CPX_A10_C5_HI:`CPX_A10_C5_LO],
+                  dfq_byp_mx_data[`CPX_A01_C5_HI:`CPX_A01_C5_LO],
+                  dfq_byp_mx_data[`CPX_A00_C5_HI:`CPX_A00_C5_LO]}),
+        .in2    ({dfq_byp_mx_data[`CPX_A11_C6_HI:`CPX_A11_C6_LO],
+                  dfq_byp_mx_data[`CPX_A10_C6_HI:`CPX_A10_C6_LO],
+                  dfq_byp_mx_data[`CPX_A01_C6_HI:`CPX_A01_C6_LO],
+                  dfq_byp_mx_data[`CPX_A00_C6_HI:`CPX_A00_C6_LO]}),
+        .in3    ({dfq_byp_mx_data[`CPX_A11_C7_HI:`CPX_A11_C7_LO],
+                  dfq_byp_mx_data[`CPX_A10_C7_HI:`CPX_A10_C7_LO],
+                  dfq_byp_mx_data[`CPX_A01_C7_HI:`CPX_A01_C7_LO],
+                  dfq_byp_mx_data[`CPX_A00_C7_HI:`CPX_A00_C7_LO]}),
+        .sel0   (lsu_cpu_dcd_sel[4]),
+        .sel1   (lsu_cpu_dcd_sel[5]),
+        .sel2   (lsu_cpu_dcd_sel[6]),
+        .sel3   (lsu_cpu_dcd_sel[7]),
+        .dout   (cpx_cpuhi_inv_data[13:0])
+);
+
+
+mux2ds  #(14) invfld_sel (
+        .in0    (cpx_cpulo_inv_data[13:0]),
+        .in1    (cpx_cpuhi_inv_data[13:0]),
+        .sel0   (~lsu_cpu_uhlf_sel),  
+        .sel1   (lsu_cpu_uhlf_sel),
+        .dout   (lsu_cpu_inv_data[13:0])
+);
+
+assign  lsu_cpu_inv_data_b13to9[13:9]  =  lsu_cpu_inv_data[13:9] ;
+assign  lsu_cpu_inv_data_b7to2[7:2]  =  lsu_cpu_inv_data[7:2] ;
+assign  lsu_cpu_inv_data_b0  =  lsu_cpu_inv_data[0] ;
+
+// same structure as above for st data write way
+wire  [13:0] cpx_cpulo_dcfill_wrway,
+             cpx_cpuhi_dcfill_wrway,
+             cpx_st_dcfill_wrway_sel;
+             
+
+mux4ds  #(14) st_dcfill_wrway_lo (
+        .in0    ({cpx_spc_data_cx[`CPX_A11_C0_HI:`CPX_A11_C0_LO],
+                  cpx_spc_data_cx[`CPX_A10_C0_HI:`CPX_A10_C0_LO],
+                  cpx_spc_data_cx[`CPX_A01_C0_HI:`CPX_A01_C0_LO],
+                  cpx_spc_data_cx[`CPX_A00_C0_HI:`CPX_A00_C0_LO]}),
+        .in1    ({cpx_spc_data_cx[`CPX_A11_C1_HI:`CPX_A11_C1_LO],
+                  cpx_spc_data_cx[`CPX_A10_C1_HI:`CPX_A10_C1_LO],
+                  cpx_spc_data_cx[`CPX_A01_C1_HI:`CPX_A01_C1_LO],
+                  cpx_spc_data_cx[`CPX_A00_C1_HI:`CPX_A00_C1_LO]}),
+        .in2    ({cpx_spc_data_cx[`CPX_A11_C2_HI:`CPX_A11_C2_LO],
+                  cpx_spc_data_cx[`CPX_A10_C2_HI:`CPX_A10_C2_LO],
+                  cpx_spc_data_cx[`CPX_A01_C2_HI:`CPX_A01_C2_LO],
+                  cpx_spc_data_cx[`CPX_A00_C2_HI:`CPX_A00_C2_LO]}),
+        .in3    ({cpx_spc_data_cx[`CPX_A11_C3_HI:`CPX_A11_C3_LO],
+                  cpx_spc_data_cx[`CPX_A10_C3_HI:`CPX_A10_C3_LO],
+                  cpx_spc_data_cx[`CPX_A01_C3_HI:`CPX_A01_C3_LO],
+                  cpx_spc_data_cx[`CPX_A00_C3_HI:`CPX_A00_C3_LO]}),
+        .sel0   (lsu_cpu_dcd_sel[0]),
+        .sel1   (lsu_cpu_dcd_sel[1]),
+        .sel2   (lsu_cpu_dcd_sel[2]),
+        .sel3   (lsu_cpu_dcd_sel[3]),
+        .dout   (cpx_cpulo_dcfill_wrway[13:0])
+);
+
+mux4ds  #(14) st_dcfill_wrway_hi (
+        .in0    ({cpx_spc_data_cx[`CPX_A11_C4_HI:`CPX_A11_C4_LO],
+                  cpx_spc_data_cx[`CPX_A10_C4_HI:`CPX_A10_C4_LO],
+                  cpx_spc_data_cx[`CPX_A01_C4_HI:`CPX_A01_C4_LO],
+                  cpx_spc_data_cx[`CPX_A00_C4_HI:`CPX_A00_C4_LO]}),
+        .in1    ({cpx_spc_data_cx[`CPX_A11_C5_HI:`CPX_A11_C5_LO],
+                  cpx_spc_data_cx[`CPX_A10_C5_HI:`CPX_A10_C5_LO],
+                  cpx_spc_data_cx[`CPX_A01_C5_HI:`CPX_A01_C5_LO],
+                  cpx_spc_data_cx[`CPX_A00_C5_HI:`CPX_A00_C5_LO]}),
+        .in2    ({cpx_spc_data_cx[`CPX_A11_C6_HI:`CPX_A11_C6_LO],
+                  cpx_spc_data_cx[`CPX_A10_C6_HI:`CPX_A10_C6_LO],
+                  cpx_spc_data_cx[`CPX_A01_C6_HI:`CPX_A01_C6_LO],
+                  cpx_spc_data_cx[`CPX_A00_C6_HI:`CPX_A00_C6_LO]}),
+        .in3    ({cpx_spc_data_cx[`CPX_A11_C7_HI:`CPX_A11_C7_LO],
+                  cpx_spc_data_cx[`CPX_A10_C7_HI:`CPX_A10_C7_LO],
+                  cpx_spc_data_cx[`CPX_A01_C7_HI:`CPX_A01_C7_LO],
+                  cpx_spc_data_cx[`CPX_A00_C7_HI:`CPX_A00_C7_LO]}),
+        .sel0   (lsu_cpu_dcd_sel[4]),
+        .sel1   (lsu_cpu_dcd_sel[5]),
+        .sel2   (lsu_cpu_dcd_sel[6]),
+        .sel3   (lsu_cpu_dcd_sel[7]),
+        .dout   (cpx_cpuhi_dcfill_wrway[13:0])
+);
+
+
+
+mux2ds  #(14) st_dcfill_wrway_sel (
+        .in0    (cpx_cpulo_dcfill_wrway[13:0]),
+        .in1    (cpx_cpuhi_dcfill_wrway[13:0]),
+        .sel0   (~lsu_cpu_uhlf_sel),
+        .sel1   (lsu_cpu_uhlf_sel),
+        .dout   (cpx_st_dcfill_wrway_sel[13:0])
+);
+
+// select the appropriate offset
+
+//bug3718 - 0in bug - cpx_st_dcfill_wrway_sel can be multi-hot foe non-stack cpx responses
+//          hence qual w/ stack req type
+wire  [3:0]  st_dcfill_wrway_mxsel ;
+
+assign st_dcfill_wrway_mxsel[0] =  (lsu_cpxpkt_type_dcd_cx[2] & cpx_st_dcfill_wrway_sel[0]) & ~rst_tri_en ;
+assign st_dcfill_wrway_mxsel[1] =  (lsu_cpxpkt_type_dcd_cx[2] & cpx_st_dcfill_wrway_sel[4]) & ~rst_tri_en ;
+assign st_dcfill_wrway_mxsel[2] =  (lsu_cpxpkt_type_dcd_cx[2] & cpx_st_dcfill_wrway_sel[7]) & ~rst_tri_en ;
+assign st_dcfill_wrway_mxsel[3] =  ~|st_dcfill_wrway_mxsel[2:0] | rst_tri_en;
+
+mux4ds  #(2) st_dcfill_wrway_sel_b54 (
+        .in0    (cpx_st_dcfill_wrway_sel[3:2]),
+        .in1    (cpx_st_dcfill_wrway_sel[6:5]),
+        .in2    (cpx_st_dcfill_wrway_sel[10:9]),
+        .in3    (cpx_st_dcfill_wrway_sel[13:12]),
+        .sel0   (st_dcfill_wrway_mxsel[0]),
+        .sel1   (st_dcfill_wrway_mxsel[1]),
+        .sel2   (st_dcfill_wrway_mxsel[2]),
+        .sel3   (st_dcfill_wrway_mxsel[3]),
+        .dout   (cpx_st_dcfill_wrway[1:0])
+);
+
+
+assign  cpx_st_ack_addr_b54[0] = cpx_st_dcfill_wrway_sel[4] | cpx_st_dcfill_wrway_sel[11] ;
+assign  cpx_st_ack_addr_b54[1] = cpx_st_dcfill_wrway_sel[7] | cpx_st_dcfill_wrway_sel[11] ;
+
+//=================================================================================================
+
+
+//assign store_dfq_pkt[`STB_DFQ_WIDTH-1:0] = stb_dfq_pkt_data[`STB_DFQ_WIDTH-1:0] ;
+
+// Items generated/prior to fill cycle (but after DFQ read).
+// This logic will be put in qctl and then be fwded to dcache.
+// - Parity (16b) - load & store.
+// - Byte Enable (16b) - store (8b), ld (16b) all high.
+// - Cache Tag (30b) - obtained from LMQ.
+// - RD1 (5b) - obtained from LMQ.
+// - RD2 (5b) - obtained from LMQ.
+// ** DFQ will contain either loads or inv.
+
+// Need to do alignment. Assume dw for now.
+// For a load, a bypass will always happen, a write is 
+  
+// Mux in diagnostic information. Only data is muxed in because
+// all other info is critical
+
+   wire [63:0] diagnstc_wr_data;
+   
+dff_s  #(64) diagnstc_wr_data_ff (
+        .din    (lsu_diagnstc_wr_data_e[63:0]), 
+        .q      (diagnstc_wr_data[63:0]),
+        .clk    (clk),
+        .se     (1'b0),     .si (),          .so ()
+        ); 
+   
+mux2ds  #(64) dcwr_sel (
+  //.in0  ({store_dfq_pkt[`STB_DFQ_DA_HI:`STB_DFQ_DA_LO]}),
+  .in0  ({dfq_byp_ff_data[`STB_DFQ_DA_HI:`STB_DFQ_DA_LO]}),
+  .in1  ({diagnstc_wr_data[63:0]}),
+  .sel0 ( lsu_dfq_st_vld),  
+  .sel1 (~lsu_dfq_st_vld),
+  //.sel0 (~lsu_diagnstc_wr_src_sel_e),  
+  //.sel1 ( lsu_diagnstc_wr_src_sel_e),
+  .dout (dcache_wr_data[63:0])
+);
+
+
+// store currently assumed to be dword.
+// st dword is duplicated across 16B.
+// currently assume st and not atomics supported.
+// The width can be reduced !!!
+assign st_dcfill_data[127:0] =
+  {                                                            //dfq_byp_ff_data[`STB_DFQ_VLD],
+                                                               //2'b00,   // need thread-id
+                                                               //2'b00,1'b0,5'b00000,
+//   dfq_byp_ff_data[84:83],                          // 131:130 - wr_way[1:0]
+//   dfq_byp_ff_data[`STB_DFQ_SZ_HI:`STB_DFQ_SZ_LO],  // 129:128 - size[1:0]
+                                                               //29'd0,                                           //!!! reduce 
+                                                               //{dfq_byp_ff_data[`CPX_INV_PA_HI:`CPX_INV_PA_LO], // addr 10:6
+                                                               //dfq_byp_ff_data[86:85],        // addr 5:4
+                                                               //dfq_byp_ff_data[`STB_DFQ_AD_LO+3:`STB_DFQ_AD_LO]}, // addr 3:0
+   dcache_wr_data[63:0],                            // 127:64
+   dcache_wr_data[63:0]};                           // 63:0
+
+   assign st_dcfill_addr[10:0] =    
+   {dfq_byp_ff_data[`CPX_INV_PA_HI:`CPX_INV_PA_LO],    // addr 10:6
+    dfq_byp_ff_data[86:85],                            // addr 5:4
+    dfq_byp_ff_data[`STB_DFQ_AD_LO+3:`STB_DFQ_AD_LO]}; // addr 3:0
+   
+// lmq0_pcx_pkt will have to be brought in. Same for lmq_ld_addr
+// The width can be reduced !!!
+
+//potentially we can take one cycle earlier version dfq_st_data   
+   assign lsu_st_way_e[1:0] = dfq_byp_ff_data[84:83];
+   assign lsu_st_dcfill_size_e [1:0] = dfq_byp_ff_data[`STB_DFQ_SZ_HI:`STB_DFQ_SZ_LO];
+       
+assign ldinv_dcfill_data[127:0] =
+  {                                                            //1'b0,
+                                                               //dfq_byp_ff_data[`DFQ_TH_HI:`DFQ_TH_LO],
+                                                               //dfq_byp_ff_data[`DFQ_LD_TYPE:`DFQ_INV_TYPE],
+                                                               //1'b1,  //assume ld always writes.
+                                                               //5'b00000,
+//   lmq_ld_way[1:0],                                // 131:130 - way[1:0]- dfq_byp_ff_data[`DFQ_WY_HI:`DFQ_WY_LO],
+//   2'b0,                                           // 129:128 - size[1:0]- lmq_pcx_pkt_sz[1:0],      //!!! reduce 
+                                                               //40'b0,  //lmq_pcx_pkt_addr[39:0],   //!!! reduce
+   dfq_byp_ff_data[`DFQ_DA_HI:`DFQ_DA_LO]};        // 127:0
+
+
+// Select between dfq-bypass (ld-inv) and store.
+// *** cpu-id currently hardwired in pkt
+// This may be further restricted in width !!!
+
+mux2ds  #(128) dfq_pkt_src (
+  .in0  (st_dcfill_data[127:0]),    
+  .in1  (ldinv_dcfill_data[127:0]), 
+  .sel0 (~lsu_dfq_ld_vld),  
+  .sel1 (lsu_dfq_ld_vld),
+  .dout (lsu_dcfill_data[127:0])
+);
+
+// Parity Generation for write data - from load or store.
+wire  [15:0]  dcache_wr_parity ;
+lsu_dc_parity_gen parity_gen (
+    .data_in  (lsu_dcfill_data[`DCFILL_DA_HI:`DCFILL_DA_LO]),
+    .parity_out (dcache_wr_parity[15:0])
+  );
+
+// Bug 4125. Corrupt parity if l2 unc err detected. Corrupt both upper and lower half
+// as subsequent read will pick up one of two halves.
+//wire	parity_byte0_flip ;
+//wire	parity_byte8_flip ;
+wire	ld_unc_error ;
+assign	ld_unc_error = (dfq_byp_ff_data[138] & dfq_byp_ff_data[`DFQ_WIDTH-1]); // not critical !
+
+//bug7021/ECO7022
+//assign	parity_byte0_flip = dcache_wr_parity[0] ^ ld_unc_error ;
+//assign	parity_byte8_flip = dcache_wr_parity[8] ^ ld_unc_error ;
+
+   wire [15:0] parity_byte_flip;
+   assign      parity_byte_flip[15:0] = dcache_wr_parity[15:0] ^ {16{ld_unc_error }};
+   
+//assign  dcache_wr_parity_mod[15:0]  =
+//    lsu_diagnstc_wr_src_sel_e ? 
+//    ({lsu_diagnstc_dc_prty_invrt_e[7:0],lsu_diagnstc_dc_prty_invrt_e[7:0]} ^ dcache_wr_parity[15:0]) :
+//    dcache_wr_parity[15:0] ;
+
+wire  [15:0]  diagnstc_wr_parity;
+
+assign diagnstc_wr_parity[15:0]  =  {lsu_diagnstc_dc_prty_invrt_e[7:0],lsu_diagnstc_dc_prty_invrt_e[7:0]} ^ dcache_wr_parity[15:0];
+
+mux2ds  #(16) dcache_wr_parity_mod_mux (
+              .in0(diagnstc_wr_parity[15:0]),
+//              .in1({dcache_wr_parity[15:9],parity_byte8_flip,dcache_wr_parity[7:1],parity_byte0_flip}),
+              .in1(parity_byte_flip[15:0]),        //bug7021/ECO7022                  
+              .sel0(~lsu_dfq_ldst_vld),
+              .sel1( lsu_dfq_ldst_vld),
+              //.sel0(lsu_diagnstc_wr_src_sel_e),
+              //.sel1(~lsu_diagnstc_wr_src_sel_e),
+              .dout(dcache_wr_parity_mod[15:0])
+);
+
+
+// Bist read and write address sent thru fill_addr
+//assign  lsu_dcache_fill_addr_e[10:0] = 
+//lsu_dc_iob_access_e ? {dcache_iob_addr_e[7:0],2'b00} :
+//(lsu_bist_wvld_e | lsu_bist_rvld_e) ? {1'b0, lsu_bist_addr_e[7:0],2'b00} :  //??FIX
+//  lsu_diagnstc_wr_src_sel_e ? lsu_diagnstc_wr_addr_e[10:0] :
+//    lsu_dcfill_data[`DCFILL_AD_LO+10:`DCFILL_AD_LO];
+
+//   wire [10:0] lsu_dcache_fill_addr_e;
+   
+//mux4ds  #(11) lsu_dcache_fill_addr_e_mux (
+//  .in0  ({dcache_iob_addr_e[8:0],2'b00}),
+//  .in1  ({mbist_dcache_index[6:0], mbist_dcache_word, 3'b00}),
+//  .in2  (lsu_diagnstc_wr_addr_e[10:0]),
+//  .in3  (lsu_dcfill_data[`DCFILL_AD_LO+10:`DCFILL_AD_LO]),
+//  .sel0 (lsu_dcfill_mx_sel_e[0]),
+//  .sel1 (lsu_dcfill_mx_sel_e[1]),
+//  .sel2 (lsu_dcfill_mx_sel_e[2]),
+//  .sel3 (lsu_dcfill_mx_sel_e[3]),
+//  .dout (lsu_dcache_fill_addr_e[10:0])
+//);
+
+wire	[63:0] misc_fill_data_e ;
+// Use smaller width mux to save area.
+//assign	misc_fill_data_e[63:0] =
+//lsu_dc_iob_access_e ? dcache_iob_data_e[63:0] :
+//	 		{32{lsu_bist_wdata_e[1:0]}} ;
+
+   wire [7:0] mbist_write_data_d1;
+
+dff_s #(8) mbist_write_data_ff (
+   .din (mbist_write_data[7:0]),
+   .q   (mbist_write_data_d1[7:0]),
+   .clk    (clk),
+   .se     (1'b0),     .si (),          .so ()
+); 
+    
+
+   wire      [3:0] misc_fill_parity_e;
+assign    misc_fill_parity_e[3:0] = {4{~lsu_dc_iob_access_e}} & mbist_write_data_d1[3:0];
+
+mux2ds  #(64) misc_fill_data_e_mux (
+              .in0(dcache_iob_data_e[63:0]),
+              .in1({8{mbist_write_data_d1[7:0]}}),
+              .sel0(lsu_dc_iob_access_e),
+              .sel1(~lsu_dc_iob_access_e),
+              .dout(misc_fill_data_e[63:0])
+);
+
+mux2ds  #(144) lsu_dcache_fill_data_e_mux (
+               .in0({misc_fill_data_e[63:0],misc_fill_data_e[63:0],{4{misc_fill_parity_e[3:0]}}}),
+               .in1({lsu_dcfill_data[`DCFILL_DA_HI:`DCFILL_DA_LO],dcache_wr_parity_mod[15:0]}),
+               .sel0(lsu_dcfill_data_mx_sel_e),
+               .sel1(~lsu_dcfill_data_mx_sel_e),
+               .dout(lsu_dcache_fill_data_e[143:0])
+);
+
+//assign  lsu_dcache_fill_size_e[1:0] = 
+//(lsu_dc_iob_access_e | lsu_bist_wvld_e | lsu_diagnstc_wr_src_sel_e) ? 2'b11 :
+//    lsu_dcfill_data[`DCFILL_SZ_HI:`DCFILL_SZ_LO] ;
+
+
+
+//   wire [1:0] bist_way_e;
+   
+//assign bist_way_e[1:0] = (lsu_bist_rvld_e | lsu_bist_wvld_e) ? 
+//                          mbist_dcache_way[1:0] : 2'b00;
+   
+//assign  bist_rsel_way_e[0] = ~bist_way_e[1] & ~bist_way_e[0] ;
+//assign  bist_rsel_way_e[1] = ~bist_way_e[1] &  bist_way_e[0] ;
+//assign  bist_rsel_way_e[2] =  bist_way_e[1] & ~bist_way_e[0] ;
+//assign  bist_rsel_way_e[3] =  bist_way_e[1] &  bist_way_e[0] ;
+
+//   assign lsu_bist_rsel_way_e[3:0] = bist_rsel_way_e[3:0];
+   
+ 
+// This staging may have to go elsewhere 
+//always @(posedge clk)
+//  begin
+//    bist_rsel_way_m[3:0] <= bist_rsel_way_e[3:0] ;  
+//  end
+
+//always @(posedge clk)
+//  begin
+//    lsu_bist_rsel_way_wb[3:0] <= bist_rsel_way_m[3:0] ; 
+//  end
+
+//dff #(4) bist_rsel_way_m_ff (
+//        .din    (bist_rsel_way_e[3:0]),
+//        .q      (bist_rsel_way_m[3:0]),
+//        .clk    (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+//dff #(4) lsu_bist_rsel_way_wb_ff (
+//        .din    (bist_rsel_way_m[3:0]),
+//        .q      (lsu_bist_rsel_way_wb[3:0]),
+//        .clk    (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+//assign  lsu_dcache_fill_way_e[0] = 
+//lsu_dc_iob_access_e ? dcache_iob_wy_e[0] : 
+//	(lsu_bist_wvld_e | lsu_bist_rvld_e) ? bist_rsel_way_e[0] :
+//  		lsu_diagnstc_wr_src_sel_e ? lsu_diagnstc_wr_way_e[0] : 
+//    			~lsu_dcfill_data[`DCFILL_WY_HI] & ~lsu_dcfill_data[`DCFILL_WY_LO] ;
+//assign  lsu_dcache_fill_way_e[1] = 
+//lsu_dc_iob_access_e ? dcache_iob_wy_e[1] : 
+//	(lsu_bist_wvld_e | lsu_bist_rvld_e) ? bist_rsel_way_e[1] :
+//  		lsu_diagnstc_wr_src_sel_e ? lsu_diagnstc_wr_way_e[1] : 
+//    			~lsu_dcfill_data[`DCFILL_WY_HI] &  lsu_dcfill_data[`DCFILL_WY_LO] ;
+//assign  lsu_dcache_fill_way_e[2] =  
+//lsu_dc_iob_access_e ? dcache_iob_wy_e[2] : 
+//	(lsu_bist_wvld_e | lsu_bist_rvld_e) ?  bist_rsel_way_e[2] :
+//  		lsu_diagnstc_wr_src_sel_e ? lsu_diagnstc_wr_way_e[2] : 
+//    			lsu_dcfill_data[`DCFILL_WY_HI] & ~lsu_dcfill_data[`DCFILL_WY_LO] ;
+//assign  lsu_dcache_fill_way_e[3] =  
+//lsu_dc_iob_access_e ? dcache_iob_wy_e[3] : 
+//	(lsu_bist_wvld_e | lsu_bist_rvld_e) ?  bist_rsel_way_e[3] :
+//  		lsu_diagnstc_wr_src_sel_e ? lsu_diagnstc_wr_way_e[3] : 
+//    			lsu_dcfill_data[`DCFILL_WY_HI] &  lsu_dcfill_data[`DCFILL_WY_LO] ;
+
+/*   
+mux4ds  #(1) lsu_dcache_fill_way0_e_mux (
+  .in0  (dcache_iob_wy_e[0]),
+  .in1  (bist_rsel_way_e[0]),        
+  .in2  (lsu_diagnstc_wr_way_e[0]),    
+  .in3  (~lsu_dcfill_data[131] & ~lsu_dcfill_data[130]),
+  .sel0 (lsu_dcfill_mx_sel_e[0]),
+  .sel1 (lsu_dcfill_mx_sel_e[1]),
+  .sel2 (lsu_dcfill_mx_sel_e[2]),
+  .sel3 (lsu_dcfill_mx_sel_e[3]),
+  .dout (lsu_dcache_fill_way_e[0]));                                 
+
+mux4ds  #(1) lsu_dcache_fill_way1_e_mux (
+  .in0  (dcache_iob_wy_e[1]),
+  .in1  (bist_rsel_way_e[1]),        
+  .in2  (lsu_diagnstc_wr_way_e[1]),    
+  .in3  (~lsu_dcfill_data[131] &  lsu_dcfill_data[130]),
+  .sel0 (lsu_dcfill_mx_sel_e[0]),
+  .sel1 (lsu_dcfill_mx_sel_e[1]),
+  .sel2 (lsu_dcfill_mx_sel_e[2]),
+  .sel3 (lsu_dcfill_mx_sel_e[3]),
+  .dout (lsu_dcache_fill_way_e[1]));                                 
+
+mux4ds  #(1) lsu_dcache_fill_way2_e_mux (
+  .in0  (dcache_iob_wy_e[2]),
+  .in1  (bist_rsel_way_e[2]),        
+  .in2  (lsu_diagnstc_wr_way_e[2]),    
+  .in3  ( lsu_dcfill_data[131] & ~lsu_dcfill_data[130]),
+  .sel0 (lsu_dcfill_mx_sel_e[0]),
+  .sel1 (lsu_dcfill_mx_sel_e[1]),
+  .sel2 (lsu_dcfill_mx_sel_e[2]),
+  .sel3 (lsu_dcfill_mx_sel_e[3]),
+  .dout (lsu_dcache_fill_way_e[2]));                                 
+
+
+mux4ds  #(1) lsu_dcache_fill_way3_e_mux (
+  .in0  (dcache_iob_wy_e[3]),
+  .in1  (bist_rsel_way_e[3]),        
+  .in2  (lsu_diagnstc_wr_way_e[3]),    
+  .in3  ( lsu_dcfill_data[131] &  lsu_dcfill_data[130]),
+  .sel0 (lsu_dcfill_mx_sel_e[0]),
+  .sel1 (lsu_dcfill_mx_sel_e[1]),
+  .sel2 (lsu_dcfill_mx_sel_e[2]),
+  .sel3 (lsu_dcfill_mx_sel_e[3]),
+  .dout (lsu_dcache_fill_way_e[3]));                
+*/
+//   assign lsu_dcache_fill_way_enc_e[0] =  lsu_dcache_fill_way_e[1] |  lsu_dcache_fill_way_e[3];
+//   assign lsu_dcache_fill_way_enc_e[1] =  lsu_dcache_fill_way_e[2] |  lsu_dcache_fill_way_e[3];
+
+wire [63:0] l2fill_data_e;
+ 
+mux2ds        #(64) half_sel (
+      .in0    (lsu_dcfill_data[`DCFILL_DA_HI:`DCFILL_DA_LO+64]),
+      .in1    (lsu_dcfill_data[`DCFILL_DA_LO+63:`DCFILL_DA_LO]),
+      .sel0   (lsu_dfill_data_sel_hi),  .sel1 (~lsu_dfill_data_sel_hi),
+      .dout   (l2fill_data_e[63:0])
+);
+
+dff_s #(64) stgm_l2fd (
+        .din    (l2fill_data_e[63:0]),
+        .q      (lsu_l2fill_data[63:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_dc_parity_gen.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_dc_parity_gen.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_dc_parity_gen.v	(revision 6)
@@ -0,0 +1,50 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_dc_parity_gen.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+//
+//  Module Name: lsu_dc_parity_gen.v
+//  Description: Parity Generator based on odd parity
+
+module lsu_dc_parity_gen (parity_out, data_in);
+
+// Changed the default to match that of dcache width
+parameter WIDTH = 8 ;
+parameter NUM = 16 ;
+
+input	[WIDTH * NUM - 1 : 0]	data_in ; // data in
+
+output	[NUM - 1 : 0]		parity_out ; // parity output
+reg	[NUM - 1 : 0]		parity ; // parity output
+
+integer i ;
+integer j ;
+
+always @(data_in)
+    for (i = 0; i <= NUM - 1 ; i = i + 1) begin
+	    parity[i] = 1'b0 ;
+        for (j = WIDTH * i; j <= WIDTH * (i + 1) - 1 ; j = j + 1) begin
+            parity[i] = parity[i] ^ data_in[j] ;
+        end
+    end
+
+assign parity_out[NUM - 1 : 0] = parity[NUM - 1 : 0];
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_dcdp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_dcdp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_dcdp.v	(revision 6)
@@ -0,0 +1,947 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_dcdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//	Description:	LSU Data Cache Data Path
+//			- Final Way-Select Mux.
+//			- Alignment, Sign-Extension, Endianness.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_dcdp ( /*AUTOARG*/
+   // Outputs
+   so, dcache_rdata_wb_buf, mbist_dcache_data_in, 
+   lsu_exu_dfill_data_w2, lsu_ffu_ld_data, stb_rdata_ramc_buf, 
+   // Inputs
+   rclk, si, se, rst_tri_en, dcache_rdata_wb, dcache_rparity_wb, 
+   dcache_rdata_msb_w0_m, dcache_rdata_msb_w1_m, 
+   dcache_rdata_msb_w2_m, dcache_rdata_msb_w3_m, lsu_bist_rsel_way_e, 
+   dcache_alt_mx_sel_e, cache_way_hit_buf2, morphed_addr_m, 
+   signed_ldst_byte_m, signed_ldst_hw_m, signed_ldst_w_m, 
+   merge7_sel_byte0_m, merge7_sel_byte7_m, merge6_sel_byte1_m, 
+   merge6_sel_byte6_m, merge5_sel_byte2_m, merge5_sel_byte5_m, 
+   merge4_sel_byte3_m, merge4_sel_byte4_m, merge3_sel_byte0_m, 
+   merge3_sel_byte3_m, merge3_sel_byte4_m, 
+   merge3_sel_byte7_default_m, merge3_sel_byte_m, merge2_sel_byte1_m, 
+   merge2_sel_byte2_m, merge2_sel_byte5_m, 
+   merge2_sel_byte6_default_m, merge2_sel_byte_m, merge0_sel_byte0_m, 
+   merge0_sel_byte1_m, merge0_sel_byte2_m, 
+   merge0_sel_byte3_default_m, merge0_sel_byte4_m, 
+   merge0_sel_byte5_m, merge0_sel_byte6_m, 
+   merge0_sel_byte7_default_m, merge1_sel_byte0_m, 
+   merge1_sel_byte1_m, merge1_sel_byte2_m, 
+   merge1_sel_byte3_default_m, merge1_sel_byte4_m, 
+   merge1_sel_byte5_m, merge1_sel_byte6_m, 
+   merge1_sel_byte7_default_m, merge0_sel_byte_1h_m, 
+   merge1_sel_byte_1h_m, merge1_sel_byte_2h_m, stb_rdata_ramc
+   ) ;	
+
+   input rclk;
+   input si;
+   input se;
+   output so;
+   input  rst_tri_en;
+   
+input  [63:0]  dcache_rdata_wb;
+output [63:0]  dcache_rdata_wb_buf;
+
+input [7:0] dcache_rparity_wb;
+output [71:0] mbist_dcache_data_in;
+
+output [63:0]		lsu_exu_dfill_data_w2; 	// bypass data - d$ fill or hit
+output [63:0]		lsu_ffu_ld_data ;	      // ld data to frf
+   
+
+//=========================================
+//dc_fill CP
+//=========================================
+   input [7:0]           dcache_rdata_msb_w0_m;    //from D$
+   input [7:0]           dcache_rdata_msb_w1_m;    //from D$
+   input [7:0]           dcache_rdata_msb_w2_m;    //from D$
+   input [7:0]           dcache_rdata_msb_w3_m;    //from D$
+
+   input [3:0]           lsu_bist_rsel_way_e;     //from qdp2
+
+   input                 dcache_alt_mx_sel_e;
+   input [3:0]           cache_way_hit_buf2;    //from dtlb
+   
+   input [7:0]           morphed_addr_m;  //from dctl
+
+   input          signed_ldst_byte_m;    //from dctl
+//   input          unsigned_ldst_byte_m;  //from dctl 
+   input          signed_ldst_hw_m;      //from dctl
+//   input          unsigned_ldst_hw_m;    //from dctl
+   input          signed_ldst_w_m;       //from dctl
+//   input          unsigned_ldst_w_m;     //from dctl
+
+input                   merge7_sel_byte0_m;
+input                   merge7_sel_byte7_m;
+   
+input                   merge6_sel_byte1_m;
+input                   merge6_sel_byte6_m;
+
+input                   merge5_sel_byte2_m;   
+input                   merge5_sel_byte5_m;
+
+input                   merge4_sel_byte3_m;
+input                   merge4_sel_byte4_m;
+
+input                   merge3_sel_byte0_m;
+input                   merge3_sel_byte3_m;
+input                   merge3_sel_byte4_m;
+input                   merge3_sel_byte7_default_m;
+input                   merge3_sel_byte_m ;
+
+input                   merge2_sel_byte1_m;
+input                   merge2_sel_byte2_m;
+input                   merge2_sel_byte5_m;
+input                   merge2_sel_byte6_default_m;
+input                   merge2_sel_byte_m ;
+
+input                   merge0_sel_byte0_m, merge0_sel_byte1_m;
+input                   merge0_sel_byte2_m, merge0_sel_byte3_default_m;
+   
+input                   merge0_sel_byte4_m, merge0_sel_byte5_m;
+input                   merge0_sel_byte6_m, merge0_sel_byte7_default_m;
+                                                               
+input                   merge1_sel_byte0_m, merge1_sel_byte1_m;
+input                   merge1_sel_byte2_m, merge1_sel_byte3_default_m;
+input                   merge1_sel_byte4_m, merge1_sel_byte5_m;
+input                   merge1_sel_byte6_m, merge1_sel_byte7_default_m; 
+
+input			             merge0_sel_byte_1h_m ;
+   
+input			             merge1_sel_byte_1h_m, merge1_sel_byte_2h_m ;
+
+   input [14:9]        stb_rdata_ramc;
+   output [14:9]       stb_rdata_ramc_buf;
+   
+//wire   [3:1]           lsu_byp_byte_zero_extend ; // zero-extend for bypass bytes 7-1
+wire   [7:1]           lsu_byp_byte_sign_extend ; // sign-extend by 1 for byp bytes 7-1
+   
+wire	[7:0]		byte0,byte1,byte2,byte3;
+wire	[7:0]		byte4,byte5,byte6,byte7;
+//wire [3:1] zero_extend_g;
+wire [7:1] sign_extend_g;
+
+wire	[7:0]		align_byte3 ;
+wire	[7:0]		align_byte2 ;
+wire	[7:0]		align_byte1_1h,align_byte1_2h;
+wire	[7:0]		align_byte0_1h,align_byte0_2h ;
+wire	[63:0]	align_byte ;
+
+
+wire                   merge7_sel_byte0;
+wire                   merge7_sel_byte7;
+   
+wire                   merge6_sel_byte1;
+wire                   merge6_sel_byte6;
+
+wire                   merge5_sel_byte2;   
+wire                   merge5_sel_byte5;
+
+wire                   merge4_sel_byte3;
+wire                   merge4_sel_byte4;
+
+wire                   merge3_sel_byte0;
+wire                   merge3_sel_byte3;
+wire                   merge3_sel_byte4;
+wire                   merge3_sel_byte7;
+wire                   merge3_sel_byte ;
+
+wire                   merge2_sel_byte1;
+wire                   merge2_sel_byte2;
+wire                   merge2_sel_byte5;
+wire                   merge2_sel_byte6;
+wire                   merge2_sel_byte ;
+
+wire                   merge0_sel_byte0, merge0_sel_byte1;
+wire                   merge0_sel_byte2, merge0_sel_byte3;
+wire                   merge0_sel_byte4, merge0_sel_byte5;
+wire                   merge0_sel_byte6, merge0_sel_byte7;
+wire                   merge1_sel_byte0, merge1_sel_byte1;
+wire                   merge1_sel_byte2, merge1_sel_byte3;
+wire                   merge1_sel_byte4, merge1_sel_byte5;
+wire                   merge1_sel_byte6, merge1_sel_byte7; 
+
+wire			              merge0_sel_byte_1h ;
+wire			              merge1_sel_byte_1h, merge1_sel_byte_2h ;
+
+   wire       clk;
+   assign     clk = rclk;
+
+   assign     stb_rdata_ramc_buf[14:9] = stb_rdata_ramc[14:9];
+   
+//=========================================================================================
+//	Alignment of Fill Data
+//=========================================================================================
+
+// Alignment needs to be done for following reasons :
+// - Write of data to irf on ld hit in l1.
+// - Write of data to irf on ld fill to l1 after miss in l1.
+// - Store of irf data to memory.
+//	- Data must be aligned before write to stb.
+//	- If data is bypassed from stb by ld then it will
+//	need realignment thru dfq i.e., it looks like a fill.
+// This applies to data either read from the dcache (hit) or dfq(fill on miss). 
+
+
+assign	byte7[7:0] = dcache_rdata_wb[63:56];
+assign	byte6[7:0] = dcache_rdata_wb[55:48];
+assign	byte5[7:0] = dcache_rdata_wb[47:40];
+assign	byte4[7:0] = dcache_rdata_wb[39:32];
+assign	byte3[7:0] = dcache_rdata_wb[31:24];
+assign	byte2[7:0] = dcache_rdata_wb[23:16];
+assign	byte1[7:0] = dcache_rdata_wb[15:8];
+assign	byte0[7:0] = dcache_rdata_wb[7:0];
+
+//assign	zero_extend_g[3:1] = lsu_byp_byte_zero_extend[3:1] ;
+assign	sign_extend_g[7:1] = lsu_byp_byte_sign_extend[7:1] ;
+
+//buffer
+   assign     dcache_rdata_wb_buf[63:0] = dcache_rdata_wb[63:0];
+   assign     mbist_dcache_data_in[71:0] = {dcache_rdata_wb_buf[63:0], dcache_rparity_wb[7:0]};
+
+// Final endian/justified/sign-extend Byte 0.
+//assign	align_byte0_1h[7:0]
+//	= merge0_sel_byte0 ? byte0[7:0] :
+//		  merge0_sel_byte1 ? byte1[7:0] :
+//			  merge0_sel_byte2 ? byte2[7:0] :
+//				  merge0_sel_byte3 ?  byte3[7:0] :
+//					  8'hxx ;
+
+   wire       merge0_sel_byte0_mxsel0, merge0_sel_byte1_mxsel1, merge0_sel_byte2_mxsel2, merge0_sel_byte3_mxsel3;
+   assign     merge0_sel_byte0_mxsel0 = merge0_sel_byte0 & ~rst_tri_en;
+   assign     merge0_sel_byte1_mxsel1 = merge0_sel_byte1 & ~rst_tri_en;
+   assign     merge0_sel_byte2_mxsel2 = merge0_sel_byte2 & ~rst_tri_en;
+   assign     merge0_sel_byte3_mxsel3 = merge0_sel_byte3 |  rst_tri_en;
+   
+mux4ds #(8) align_byte0_1h_mx (
+      .in0 (byte0[7:0]),
+      .in1 (byte1[7:0]), 
+      .in2 (byte2[7:0]),
+      .in3 (byte3[7:0]),
+      .sel0(merge0_sel_byte0_mxsel0),
+      .sel1(merge0_sel_byte1_mxsel1),
+      .sel2(merge0_sel_byte2_mxsel2),
+      .sel3(merge0_sel_byte3_mxsel3),
+      .dout(align_byte0_1h[7:0])
+);
+                             
+//assign	align_byte0_2h[7:0]
+//	= merge0_sel_byte4 ? byte4[7:0] :
+//		  merge0_sel_byte5 ? byte5[7:0] :
+//			  merge0_sel_byte6 ? byte6[7:0] :
+//				  merge0_sel_byte7 ? byte7[7:0] :
+//					  8'hxx ;
+
+   wire       merge0_sel_byte4_mxsel0, merge0_sel_byte5_mxsel1, merge0_sel_byte6_mxsel2, merge0_sel_byte7_mxsel3;
+   assign     merge0_sel_byte4_mxsel0 = merge0_sel_byte4 & ~rst_tri_en;
+   assign     merge0_sel_byte5_mxsel1 = merge0_sel_byte5 & ~rst_tri_en;
+   assign     merge0_sel_byte6_mxsel2 = merge0_sel_byte6 & ~rst_tri_en;
+   assign     merge0_sel_byte7_mxsel3 = merge0_sel_byte7 |  rst_tri_en;
+   
+mux4ds #(8) align_byte0_2h_mx (
+      .in0 (byte4[7:0]),
+      .in1 (byte5[7:0]), 
+      .in2 (byte6[7:0]),
+      .in3 (byte7[7:0]),
+      .sel0(merge0_sel_byte4_mxsel0),
+      .sel1(merge0_sel_byte5_mxsel1),
+      .sel2(merge0_sel_byte6_mxsel2),
+      .sel3(merge0_sel_byte7_mxsel3),
+      .dout(align_byte0_2h[7:0])
+);
+   
+// No sign-extension or zero-extension for byte0
+//assign	align_byte[7:0]	
+//	= merge0_sel_byte_1h ? align_byte0_1h[7:0] :
+//					align_byte0_2h[7:0] ;
+   
+   assign align_byte[7:0] = merge0_sel_byte_1h ? align_byte0_1h[7:0] :
+                                                 align_byte0_2h[7:0];
+   
+
+// Final endian/justified/sign-extend Byte 1.
+// *** The path thru byte1 is the most critical ***
+//assign	align_byte1_1h[7:0]
+//	= merge1_sel_byte0 ? byte0[7:0] :
+//		  merge1_sel_byte1 ? byte1[7:0] :
+//			  merge1_sel_byte2 ? byte2[7:0] :
+//				  merge1_sel_byte3 ? byte3[7:0] :
+//						8'hxx ;
+
+   wire       merge1_sel_byte0_mxsel0, merge1_sel_byte1_mxsel1, merge1_sel_byte2_mxsel2, merge1_sel_byte3_mxsel3;
+   assign     merge1_sel_byte0_mxsel0 = merge1_sel_byte0 & ~rst_tri_en;
+   assign     merge1_sel_byte1_mxsel1 = merge1_sel_byte1 & ~rst_tri_en;
+   assign     merge1_sel_byte2_mxsel2 = merge1_sel_byte2 & ~rst_tri_en;
+   assign     merge1_sel_byte3_mxsel3 = merge1_sel_byte3 |  rst_tri_en;
+   
+mux4ds #(8) align_byte1_1h_mx (
+    .in0 (byte0[7:0]),
+    .in1 (byte1[7:0]),
+    .in2 (byte2[7:0]), 
+    .in3 (byte3[7:0]),
+    .sel0(merge1_sel_byte0_mxsel0),
+    .sel1(merge1_sel_byte1_mxsel1),
+    .sel2(merge1_sel_byte2_mxsel2),
+    .sel3(merge1_sel_byte3_mxsel3),
+    .dout(align_byte1_1h[7:0])
+);
+      
+//assign	align_byte1_2h[7:0]
+//	= merge1_sel_byte4 ? byte4[7:0] :
+//		  merge1_sel_byte5 ? byte5[7:0] :
+//			  merge1_sel_byte6 ? byte6[7:0] :
+//					merge1_sel_byte7 ? byte7[7:0] :
+//						8'hxx ; 
+
+   wire       merge1_sel_byte4_mxsel0, merge1_sel_byte5_mxsel1, merge1_sel_byte6_mxsel2, merge1_sel_byte7_mxsel3;
+   assign     merge1_sel_byte4_mxsel0 = merge1_sel_byte4 & ~rst_tri_en;
+   assign     merge1_sel_byte5_mxsel1 = merge1_sel_byte5 & ~rst_tri_en;
+   assign     merge1_sel_byte6_mxsel2 = merge1_sel_byte6 & ~rst_tri_en;
+   assign     merge1_sel_byte7_mxsel3 = merge1_sel_byte7 |  rst_tri_en;
+
+mux4ds #(8) align_byte1_2h_mx (
+    .in0 (byte4[7:0]),
+    .in1 (byte5[7:0]),
+    .in2 (byte6[7:0]), 
+    .in3 (byte7[7:0]),
+    .sel0(merge1_sel_byte4_mxsel0),
+    .sel1(merge1_sel_byte5_mxsel1),
+    .sel2(merge1_sel_byte6_mxsel2),
+    .sel3(merge1_sel_byte7_mxsel3),
+    .dout(align_byte1_2h[7:0])
+);
+   
+//assign	align_byte[15:8] = 	
+//	zero_extend_g[1] ? 8'h00 :
+//		sign_extend_g[1] ? 8'hff :
+//			merge1_sel_byte_1h ? align_byte1_1h[7:0] :
+//				merge1_sel_byte_2h ? align_byte1_2h[7:0] :
+//						8'hxx ;
+
+//mux4ds #(8) align_byte1_mx (
+//    .in0 (8'h00),
+//    .in1 (8'hff),
+//    .in2 (align_byte1_1h[7:0]), 
+//    .in3 (align_byte1_2h[7:0]),
+//    .sel0(zero_extend_g[1]),
+//    .sel1(sign_extend_g[1]),
+//    .sel2(merge1_sel_byte_1h),
+//    .sel3(merge1_sel_byte_2h),
+//    .dout(align_byte[15:8])
+//);
+
+   //change to aoi from pass gate
+   //don't need zero_extend
+   
+assign  align_byte[15:8] =
+ (sign_extend_g[1] ? 8'hff : 8'h00) |
+ (merge1_sel_byte_1h ? align_byte1_1h[7:0] : 8'h00) |
+ (merge1_sel_byte_2h ? align_byte1_2h[7:0] : 8'h00);
+ 
+// Final endian/justified/sign-extend Byte 2.
+//assign	align_byte2[7:0]
+//	= merge2_sel_byte1 ? byte1[7:0] :
+//		  merge2_sel_byte2 ? byte2[7:0] :
+//					merge2_sel_byte5 ? byte5[7:0] :
+//           merge2_sel_byte6 ?  byte6[7:0] :
+//							8'hxx ;
+
+   wire       merge2_sel_byte1_mxsel0, merge2_sel_byte2_mxsel1, merge2_sel_byte5_mxsel2, merge2_sel_byte6_mxsel3;
+   assign     merge2_sel_byte1_mxsel0 = merge2_sel_byte1 & ~rst_tri_en;
+   assign     merge2_sel_byte2_mxsel1 = merge2_sel_byte2 & ~rst_tri_en;
+   assign     merge2_sel_byte5_mxsel2 = merge2_sel_byte5 & ~rst_tri_en;
+   assign     merge2_sel_byte6_mxsel3 = merge2_sel_byte6 |  rst_tri_en;
+   
+mux4ds #(8) align_byte2_1st_mx (
+         .in0 (byte1[7:0]),
+         .in1 (byte2[7:0]),
+         .in2 (byte5[7:0]),
+         .in3 (byte6[7:0]),
+         .sel0(merge2_sel_byte1_mxsel0),
+         .sel1(merge2_sel_byte2_mxsel1),
+         .sel2(merge2_sel_byte5_mxsel2),
+         .sel3(merge2_sel_byte6_mxsel3),
+         .dout(align_byte2[7:0])                     
+                                );
+   
+//assign	align_byte[23:16] = 	
+//	zero_extend_g[2] ? 8'h00 :
+//		sign_extend_g[2] ? 8'hff :
+//				merge2_sel_byte ? align_byte2[7:0] :
+//								8'hxx ;
+
+//mux3ds #(8) align_byte2_2nd_mx  (
+//         .in0 (8'h00),
+//         .in1 (8'hff),
+//         .in2 (align_byte2[7:0]),
+//         .sel0(zero_extend_g[2]),
+//         .sel1(sign_extend_g[2]),
+//         .sel2(merge2_sel_byte),
+//         .dout(align_byte[23:16])
+//                                      );
+
+assign    align_byte[23:16] =
+( sign_extend_g[2] ? 8'hff : 8'h00) |
+(  merge2_sel_byte ? align_byte2[7:0] : 8'h00);
+                                 
+// Final endian/justified/sign-extend Byte 3.
+//assign	align_byte3[7:0]
+//	= merge3_sel_byte0 ? byte0[7:0] :
+//			merge3_sel_byte3 ? byte3[7:0] :
+//				merge3_sel_byte4 ? byte4[7:0] :
+// 				merge3_sel_byte7 ? byte7[7:0] :
+//					  8'hxx ;
+
+   wire       merge3_sel_byte0_mxsel0, merge3_sel_byte3_mxsel1, merge3_sel_byte4_mxsel2, merge3_sel_byte7_mxsel3;
+   assign     merge3_sel_byte0_mxsel0 = merge3_sel_byte0 & ~rst_tri_en;
+   assign     merge3_sel_byte3_mxsel1 = merge3_sel_byte3 & ~rst_tri_en;
+   assign     merge3_sel_byte4_mxsel2 = merge3_sel_byte4 & ~rst_tri_en;
+   assign     merge3_sel_byte7_mxsel3 = merge3_sel_byte7 |  rst_tri_en;
+   
+mux4ds #(8) align_byte3_1st_mx (
+         .in0 (byte0[7:0]),
+         .in1 (byte3[7:0]),
+         .in2 (byte4[7:0]),
+         .in3 (byte7[7:0]),
+         .sel0(merge3_sel_byte0_mxsel0),
+         .sel1(merge3_sel_byte3_mxsel1),
+         .sel2(merge3_sel_byte4_mxsel2),
+         .sel3(merge3_sel_byte7_mxsel3),
+         .dout(align_byte3[7:0])
+                                     );
+   
+//assign	align_byte[31:24] =	
+//	zero_extend_g[3] ? 8'h00 :
+//		sign_extend_g[3] ? 8'hff :
+//			merge3_sel_byte ? align_byte3[7:0] :
+//				8'hxx ;
+
+//mux3ds #(8) align_byte3_2nd_mx (
+//         .in0 (8'h00),
+//         .in1 (8'hff), 
+//         .in2 (align_byte3[7:0]),
+//         .sel0(zero_extend_g[3]),
+//         .sel1(sign_extend_g[3]),
+//         .sel2(merge3_sel_byte),
+//         .dout(align_byte[31:24])
+//                                     );
+
+assign    align_byte[31:24] =
+  (sign_extend_g[3] ? 8'hff : 8'h00 ) |
+  (merge3_sel_byte  ?  align_byte3[7:0] : 8'h00);
+        
+// Final endian/justified/sign-extend Byte 4.
+//assign	align_byte[39:32]
+//	= zero_extend_g[4] ? 8'h00 :
+//		 sign_extend_g[4] ? 8'hff :
+//       merge4_sel_byte3 ? byte3[7:0] : 
+//         merge4_sel_byte4 ? byte4[7:0] : 
+//           8'hxx;
+
+//mux4ds #(8) align_byte4_mx (
+//        .in0 (8'h00),
+//        .in1 (8'hff),
+//        .in2 (byte3[7:0]),
+//        .in3 (byte4[7:0]),
+//        .sel0(zero_extend_g[4]),
+//        .sel1(sign_extend_g[4]),
+//        .sel2(merge4_sel_byte3),
+//        .sel3(merge4_sel_byte4),
+//        .dout(align_byte[39:32])
+//                                 );
+
+assign align_byte[39:32] = 
+  (sign_extend_g[4] ? 8'hff : 8'h00) |
+  (merge4_sel_byte3 ? byte3[7:0] : 8'h00) |
+  (merge4_sel_byte4 ? byte4[7:0] : 8'h00);
+   
+// Final endian/justified/sign-extend Byte 5.
+//assign	align_byte[47:40]
+//  = zero_extend_g[5] ? 8'h00 :
+//		  sign_extend_g[5] ? 8'hff :
+//	      merge5_sel_byte2 ? byte2[7:0] : 
+//          merge5_sel_byte5 ? byte5[7:0] :
+//            8'hxx ;
+
+//mux4ds #(8) align_byte5_mx (
+//        .in0 (8'h00),
+//        .in1 (8'hff),
+//        .in2 (byte2[7:0]),
+//        .in3 (byte5[7:0]),
+//        .sel0(zero_extend_g[5]),
+//        .sel1(sign_extend_g[5]),
+//        .sel2(merge5_sel_byte2),
+//        .sel3(merge5_sel_byte5),
+//        .dout(align_byte[47:40])
+//                                 );
+ 
+assign align_byte[47:40] =
+ (sign_extend_g[5] ? 8'hff : 8'h00) |
+ (merge5_sel_byte2 ? byte2[7:0] : 8'h00) |
+ (merge5_sel_byte5 ? byte5[7:0] : 8'h00);
+   
+ 
+// Final endian/justified/sign-extend Byte 6.
+//assign	align_byte[55:48]
+//  = zero_extend_g[6] ? 8'h00 :
+//		  sign_extend_g[6] ? 8'hff :     
+//	      merge6_sel_byte1 ? byte1[7:0] : 
+//         merge6_sel_byte6 ? byte6[7:0] :
+//            8'hxx ;
+
+//mux4ds #(8) align_byte6_mx (
+//        .in0 (8'h00),
+//        .in1 (8'hff),
+//        .in2 (byte1[7:0]),
+//        .in3 (byte6[7:0]),
+//        .sel0(zero_extend_g[6]),
+//        .sel1(sign_extend_g[6]),
+//        .sel2(merge6_sel_byte1),
+//        .sel3(merge6_sel_byte6),
+//        .dout(align_byte[55:48])
+//                                 );
+
+assign  align_byte[55:48] = 
+ (sign_extend_g[6] ? 8'hff : 8'h00) |
+ (merge6_sel_byte1 ? byte1[7:0] : 8'h00) |
+ (merge6_sel_byte6 ? byte6[7:0] : 8'h00);
+       
+ 
+// Final endian/justified/sign-extend Byte 7.
+//assign	align_byte[63:56] =	
+//	zero_extend_g[7] ? 8'h00 :
+//		sign_extend_g[7] ? 8'hff :
+//			merge7_sel_byte0 ? byte0[7:0] :
+//  			merge7_sel_byte7 ? byte7[7:0] :
+//					8'hxx ;
+
+//mux4ds #(8) align_byte7_mx (
+//        .in0 (8'h00),
+//        .in1 (8'hff),
+//        .in2 (byte0[7:0]),
+//        .in3 (byte7[7:0]),
+//        .sel0(zero_extend_g[7]),
+//        .sel1(sign_extend_g[7]),
+//        .sel2(merge7_sel_byte0),
+//        .sel3(merge7_sel_byte7),
+//        .dout(align_byte[63:56])
+//                                 );
+
+assign align_byte[63:56] =
+  (sign_extend_g[7] ?  8'hff : 8'h00 ) |
+  (merge7_sel_byte0 ?  byte0[7:0] : 8'h00) |
+  (merge7_sel_byte7 ?  byte7[7:0] : 8'h00);
+   
+//====================================================
+//dc_fill CP sign/zero control signals
+//====================================================
+   wire [7:0] ld_data_msb_w0_m;
+   wire [7:0] ld_data_msb_w1_m;
+   wire [7:0] ld_data_msb_w2_m;
+   wire [7:0] ld_data_msb_w3_m;
+
+   wire [7:0] ld_data_msb_w0_g;
+   wire [7:0] ld_data_msb_w1_g;
+   wire [7:0] ld_data_msb_w2_g;
+   wire [7:0] ld_data_msb_w3_g;
+   
+assign ld_data_msb_w0_m[7:0] = dcache_rdata_msb_w0_m[7:0];
+assign ld_data_msb_w1_m[7:0] = dcache_rdata_msb_w1_m[7:0];
+assign ld_data_msb_w2_m[7:0] = dcache_rdata_msb_w2_m[7:0];
+assign ld_data_msb_w3_m[7:0] = dcache_rdata_msb_w3_m[7:0];
+   
+dff_s #(32) ld_data_msb_stgg (
+        .din    ({ld_data_msb_w0_m[7:0], ld_data_msb_w1_m[7:0], ld_data_msb_w2_m[7:0], ld_data_msb_w3_m[7:0]}),
+        .q      ({ld_data_msb_w0_g[7:0], ld_data_msb_w1_g[7:0], ld_data_msb_w2_g[7:0], ld_data_msb_w3_g[7:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire [3:0] dcache_alt_rsel_way_m;
+   wire       dcache_alt_mx_sel_m;
+   
+dff_s #(5) dcache_alt_stgm  (
+        .din    ({lsu_bist_rsel_way_e[3:0],  dcache_alt_mx_sel_e}),
+        .q      ({dcache_alt_rsel_way_m[3:0], dcache_alt_mx_sel_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire [3:0] dcache_alt_rsel_way_g;
+   wire       dcache_alt_mx_sel_g;
+   
+dff_s #(5) dcache_alt_stgg  (
+        .din    ({dcache_alt_rsel_way_m[3:0],  dcache_alt_mx_sel_m}),
+        .q      ({dcache_alt_rsel_way_g[3:0],  dcache_alt_mx_sel_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   wire [3:0] cache_way_mx_sel;
+   
+   assign     cache_way_mx_sel [3:0] = dcache_alt_mx_sel_g ? dcache_alt_rsel_way_g[3:0] : cache_way_hit_buf2[3:0];
+
+//   wire [7:0] align_bytes_msb;
+   
+//mux4ds  #(8) align_bytes_msb_mux (
+//        .in0    (ld_data_msb_w0_g[7:0]),
+//        .in1    (ld_data_msb_w1_g[7:0]),
+//        .in2    (ld_data_msb_w2_g[7:0]),
+//        .in3    (ld_data_msb_w3_g[7:0]),
+//        .sel0   (cache_way_mx_sel[0]),  
+//        .sel1   (cache_way_mx_sel[1]),
+//        .sel2   (cache_way_mx_sel[2]),  
+//        .sel3   (cache_way_mx_sel[3]),
+//        .dout   (align_bytes_msb[7:0])
+//);
+
+   wire       signed_ldst_byte_g;
+   wire       signed_ldst_hw_g;
+   wire       signed_ldst_w_g;
+   
+dff_s #(3) ldst_size_stgg(
+ .din    ({signed_ldst_byte_m, signed_ldst_hw_m, signed_ldst_w_m}),
+ .q      ({signed_ldst_byte_g, signed_ldst_hw_g, signed_ldst_w_g}),
+ .clk    (clk),
+ .se     (se),       .si (),          .so ()
+);
+
+wire [7:0] morphed_addr_g;
+   
+dff_s #(8) stgg_morphadd(
+        .din    (morphed_addr_m[7:0]),
+        .q      (morphed_addr_g[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire       sign_bit_w0_g, sign_bit_w1_g, sign_bit_w2_g, sign_bit_w3_g;
+
+assign  sign_bit_w0_g =
+  (morphed_addr_g[0] & ld_data_msb_w0_g[7]) |
+  (morphed_addr_g[1] & ld_data_msb_w0_g[6]) |
+  (morphed_addr_g[2] & ld_data_msb_w0_g[5]) |
+  (morphed_addr_g[3] & ld_data_msb_w0_g[4]) |
+  (morphed_addr_g[4] & ld_data_msb_w0_g[3]) |
+  (morphed_addr_g[5] & ld_data_msb_w0_g[2]) |
+  (morphed_addr_g[6] & ld_data_msb_w0_g[1]) |
+  (morphed_addr_g[7] & ld_data_msb_w0_g[0]) ;
+
+assign  sign_bit_w1_g =
+  (morphed_addr_g[0] & ld_data_msb_w1_g[7]) |
+  (morphed_addr_g[1] & ld_data_msb_w1_g[6]) |
+  (morphed_addr_g[2] & ld_data_msb_w1_g[5]) |
+  (morphed_addr_g[3] & ld_data_msb_w1_g[4]) |
+  (morphed_addr_g[4] & ld_data_msb_w1_g[3]) |
+  (morphed_addr_g[5] & ld_data_msb_w1_g[2]) |
+  (morphed_addr_g[6] & ld_data_msb_w1_g[1]) |
+  (morphed_addr_g[7] & ld_data_msb_w1_g[0]) ;
+
+assign  sign_bit_w2_g =
+  (morphed_addr_g[0] & ld_data_msb_w2_g[7]) |
+  (morphed_addr_g[1] & ld_data_msb_w2_g[6]) |
+  (morphed_addr_g[2] & ld_data_msb_w2_g[5]) |
+  (morphed_addr_g[3] & ld_data_msb_w2_g[4]) |
+  (morphed_addr_g[4] & ld_data_msb_w2_g[3]) |
+  (morphed_addr_g[5] & ld_data_msb_w2_g[2]) |
+  (morphed_addr_g[6] & ld_data_msb_w2_g[1]) |
+  (morphed_addr_g[7] & ld_data_msb_w2_g[0]) ;
+
+assign  sign_bit_w3_g =
+  (morphed_addr_g[0] & ld_data_msb_w3_g[7]) |
+  (morphed_addr_g[1] & ld_data_msb_w3_g[6]) |
+  (morphed_addr_g[2] & ld_data_msb_w3_g[5]) |
+  (morphed_addr_g[3] & ld_data_msb_w3_g[4]) |
+  (morphed_addr_g[4] & ld_data_msb_w3_g[3]) |
+  (morphed_addr_g[5] & ld_data_msb_w3_g[2]) |
+  (morphed_addr_g[6] & ld_data_msb_w3_g[1]) |
+  (morphed_addr_g[7] & ld_data_msb_w3_g[0]) ;
+   
+//assign  sign_bit_g =
+//  (morphed_addr_g[0] & align_bytes_msb[7]) |
+//  (morphed_addr_g[1] & align_bytes_msb[6]) |
+//  (morphed_addr_g[2] & align_bytes_msb[5]) |
+//  (morphed_addr_g[3] & align_bytes_msb[4]) |
+//  (morphed_addr_g[4] & align_bytes_msb[3]) |
+//  (morphed_addr_g[5] & align_bytes_msb[2]) |
+//  (morphed_addr_g[6] & align_bytes_msb[1]) |
+//  (morphed_addr_g[7] & align_bytes_msb[0]) ;
+
+
+//dff #(4) ssign_bit_stgg (
+//        .din    ({sign_bit_w0_m, sign_bit_w1_m, sign_bit_w2_m, sign_bit_w3_m}),
+//        .q      ({sign_bit_w0_g, sign_bit_w1_g, sign_bit_w2_g, sign_bit_w3_g}),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+   
+// byte0 never requires sign or zero extension.
+//w0
+//   wire [3:1] lsu_byp_byte_zero_extend_w0;
+   wire [7:1] lsu_byp_byte_sign_extend_w0;
+   
+//assign  lsu_byp_byte_zero_extend_w0[1] =
+//        unsigned_ldst_byte_g | (signed_ldst_byte_g & ~sign_bit_w0_g);
+   
+assign  lsu_byp_byte_sign_extend_w0[1] =
+        signed_ldst_byte_g & sign_bit_w0_g;
+ 
+//assign  lsu_byp_byte_zero_extend_w0[2] =
+//        unsigned_ldst_hw_g | (signed_ldst_hw_g & ~sign_bit_w0_g);
+
+assign  lsu_byp_byte_sign_extend_w0[2] =
+        signed_ldst_hw_g & sign_bit_w0_g;
+   
+//assign  lsu_byp_byte_zero_extend_w0[3] =
+//        lsu_byp_byte_zero_extend_w0[2] ;
+
+assign  lsu_byp_byte_sign_extend_w0[3] =
+        lsu_byp_byte_sign_extend_w0[2] ;
+
+//assign  lsu_byp_byte_zero_extend_w0[4] =
+//        unsigned_ldst_w_g | (signed_ldst_w_g & ~sign_bit_w0_g);
+   
+assign  lsu_byp_byte_sign_extend_w0[4] =
+        signed_ldst_w_g & sign_bit_w0_g;
+        
+//assign  lsu_byp_byte_zero_extend_w0[5] =
+//    lsu_byp_byte_zero_extend_w0[4] ;
+assign  lsu_byp_byte_sign_extend_w0[5] =
+    lsu_byp_byte_sign_extend_w0[4] ;
+//assign  lsu_byp_byte_zero_extend_w0[6] =
+//    lsu_byp_byte_zero_extend_w0[4] ;
+assign  lsu_byp_byte_sign_extend_w0[6] =
+    lsu_byp_byte_sign_extend_w0[4] ;
+//assign  lsu_byp_byte_zero_extend_w0[7] =
+//    lsu_byp_byte_zero_extend_w0[4] ;
+assign  lsu_byp_byte_sign_extend_w0[7] =
+    lsu_byp_byte_sign_extend_w0[4] ;
+
+//w1
+//   wire [3:1] lsu_byp_byte_zero_extend_w1;
+   wire [7:1] lsu_byp_byte_sign_extend_w1;
+   
+//assign  lsu_byp_byte_zero_extend_w1[1] =
+//        unsigned_ldst_byte_g | (signed_ldst_byte_g & ~sign_bit_w1_g);
+   
+assign  lsu_byp_byte_sign_extend_w1[1] =
+        signed_ldst_byte_g & sign_bit_w1_g;
+ 
+//assign  lsu_byp_byte_zero_extend_w1[2] =
+//        unsigned_ldst_hw_g | (signed_ldst_hw_g & ~sign_bit_w1_g);
+
+assign  lsu_byp_byte_sign_extend_w1[2] =
+        signed_ldst_hw_g & sign_bit_w1_g;
+   
+//assign  lsu_byp_byte_zero_extend_w1[3] =
+//        lsu_byp_byte_zero_extend_w1[2] ;
+
+assign  lsu_byp_byte_sign_extend_w1[3] =
+        lsu_byp_byte_sign_extend_w1[2] ;
+
+//assign  lsu_byp_byte_zero_extend_w1[4] =
+//        unsigned_ldst_w_g | (signed_ldst_w_g & ~sign_bit_w1_g);
+   
+assign  lsu_byp_byte_sign_extend_w1[4] =
+        signed_ldst_w_g & sign_bit_w1_g;
+        
+//assign  lsu_byp_byte_zero_extend_w1[5] =
+//    lsu_byp_byte_zero_extend_w1[4] ;
+assign  lsu_byp_byte_sign_extend_w1[5] =
+    lsu_byp_byte_sign_extend_w1[4] ;
+//assign  lsu_byp_byte_zero_extend_w1[6] =
+//    lsu_byp_byte_zero_extend_w1[4] ;
+assign  lsu_byp_byte_sign_extend_w1[6] =
+    lsu_byp_byte_sign_extend_w1[4] ;
+//assign  lsu_byp_byte_zero_extend_w1[7] =
+//    lsu_byp_byte_zero_extend_w1[4] ;
+assign  lsu_byp_byte_sign_extend_w1[7] =
+    lsu_byp_byte_sign_extend_w1[4] ;
+
+//w2
+//   wire [3:1] lsu_byp_byte_zero_extend_w2;
+   wire [7:1] lsu_byp_byte_sign_extend_w2;
+   
+//assign  lsu_byp_byte_zero_extend_w2[1] =
+//        unsigned_ldst_byte_g | (signed_ldst_byte_g & ~sign_bit_w2_g);
+   
+assign  lsu_byp_byte_sign_extend_w2[1] =
+        signed_ldst_byte_g & sign_bit_w2_g;
+ 
+//assign  lsu_byp_byte_zero_extend_w2[2] =
+//        unsigned_ldst_hw_g | (signed_ldst_hw_g & ~sign_bit_w2_g);
+
+assign  lsu_byp_byte_sign_extend_w2[2] =
+        signed_ldst_hw_g & sign_bit_w2_g;
+   
+//assign  lsu_byp_byte_zero_extend_w2[3] =
+//        lsu_byp_byte_zero_extend_w2[2] ;
+
+assign  lsu_byp_byte_sign_extend_w2[3] =
+        lsu_byp_byte_sign_extend_w2[2] ;
+
+//assign  lsu_byp_byte_zero_extend_w2[4] =
+//        unsigned_ldst_w_g | (signed_ldst_w_g & ~sign_bit_w2_g);
+   
+assign  lsu_byp_byte_sign_extend_w2[4] =
+        signed_ldst_w_g & sign_bit_w2_g;
+        
+//assign  lsu_byp_byte_zero_extend_w2[5] =
+//    lsu_byp_byte_zero_extend_w2[4] ;
+assign  lsu_byp_byte_sign_extend_w2[5] =
+    lsu_byp_byte_sign_extend_w2[4] ;
+//assign  lsu_byp_byte_zero_extend_w2[6] =
+//    lsu_byp_byte_zero_extend_w2[4] ;
+assign  lsu_byp_byte_sign_extend_w2[6] =
+    lsu_byp_byte_sign_extend_w2[4] ;
+//assign  lsu_byp_byte_zero_extend_w2[7] =
+//    lsu_byp_byte_zero_extend_w2[4] ;
+assign  lsu_byp_byte_sign_extend_w2[7] =
+    lsu_byp_byte_sign_extend_w2[4] ;
+
+//w3
+//   wire [3:1] lsu_byp_byte_zero_extend_w3;
+   wire [7:1] lsu_byp_byte_sign_extend_w3;
+   
+//assign  lsu_byp_byte_zero_extend_w3[1] =
+//        unsigned_ldst_byte_g | (signed_ldst_byte_g & ~sign_bit_w3_g);
+   
+assign  lsu_byp_byte_sign_extend_w3[1] =
+        signed_ldst_byte_g & sign_bit_w3_g;
+ 
+//assign  lsu_byp_byte_zero_extend_w3[2] =
+//        unsigned_ldst_hw_g | (signed_ldst_hw_g & ~sign_bit_w3_g);
+
+assign  lsu_byp_byte_sign_extend_w3[2] =
+        signed_ldst_hw_g & sign_bit_w3_g;
+   
+//assign  lsu_byp_byte_zero_extend_w3[3] =
+//        lsu_byp_byte_zero_extend_w3[2] ;
+
+assign  lsu_byp_byte_sign_extend_w3[3] =
+        lsu_byp_byte_sign_extend_w3[2] ;
+
+//assign  lsu_byp_byte_zero_extend_w3[4] =
+//        unsigned_ldst_w_g | (signed_ldst_w_g & ~sign_bit_w3_g);
+   
+assign  lsu_byp_byte_sign_extend_w3[4] =
+        signed_ldst_w_g & sign_bit_w3_g;
+        
+//assign  lsu_byp_byte_zero_extend_w3[5] =
+//    lsu_byp_byte_zero_extend_w3[4] ;
+assign  lsu_byp_byte_sign_extend_w3[5] =
+    lsu_byp_byte_sign_extend_w3[4] ;
+//assign  lsu_byp_byte_zero_extend_w3[6] =
+//    lsu_byp_byte_zero_extend_w3[4] ;
+assign  lsu_byp_byte_sign_extend_w3[6] =
+    lsu_byp_byte_sign_extend_w3[4] ;
+//assign  lsu_byp_byte_zero_extend_w3[7] =
+//    lsu_byp_byte_zero_extend_w3[4] ;
+assign  lsu_byp_byte_sign_extend_w3[7] =
+    lsu_byp_byte_sign_extend_w3[4] ;
+
+
+//mux4ds  #(14) zero_sign_sel_mux (
+//        .in0    ({lsu_byp_byte_zero_extend_w0[7:1],lsu_byp_byte_sign_extend_w0[7:1]}),
+//        .in1    ({lsu_byp_byte_zero_extend_w1[7:1],lsu_byp_byte_sign_extend_w1[7:1]}),
+//        .in2    ({lsu_byp_byte_zero_extend_w2[7:1],lsu_byp_byte_sign_extend_w2[7:1]}),
+//        .in3    ({lsu_byp_byte_zero_extend_w3[7:1],lsu_byp_byte_sign_extend_w3[7:1]}),
+//        .sel0   (cache_way_mx_sel[0]),  
+//        .sel1   (cache_way_mx_sel[1]),
+//        .sel2   (cache_way_mx_sel[2]),  
+//        .sel3   (cache_way_mx_sel[3]),
+//        .dout   ({lsu_byp_byte_zero_extend[7:1],lsu_byp_byte_sign_extend[7:1]})
+//);
+
+//assign lsu_byp_byte_zero_extend[3:1] =
+//   (cache_way_mx_sel[0] ?  lsu_byp_byte_zero_extend_w0[3:1] : 3'b0 ) |   
+//   (cache_way_mx_sel[1] ?  lsu_byp_byte_zero_extend_w1[3:1] : 3'b0 ) |   
+//   (cache_way_mx_sel[2] ?  lsu_byp_byte_zero_extend_w2[3:1] : 3'b0 ) |   
+//   (cache_way_mx_sel[3] ?  lsu_byp_byte_zero_extend_w3[3:1] : 3'b0 ) ;
+
+assign lsu_byp_byte_sign_extend[7:1] = 
+   (cache_way_mx_sel[0] ?  lsu_byp_byte_sign_extend_w0[7:1] : 7'b0) |
+   (cache_way_mx_sel[1] ?  lsu_byp_byte_sign_extend_w1[7:1] : 7'b0) |
+   (cache_way_mx_sel[2] ?  lsu_byp_byte_sign_extend_w2[7:1] : 7'b0) |
+   (cache_way_mx_sel[3] ?  lsu_byp_byte_sign_extend_w3[7:1] : 7'b0) ;
+   
+     
+
+dff_s #(37) stgg_mergesel(
+        .din    ({
+         merge7_sel_byte0_m, merge7_sel_byte7_m,
+         merge6_sel_byte1_m, merge6_sel_byte6_m,
+         merge5_sel_byte2_m, merge5_sel_byte5_m,
+         merge4_sel_byte3_m, merge4_sel_byte4_m,
+         merge3_sel_byte0_m, merge3_sel_byte3_m,
+         merge3_sel_byte4_m, merge3_sel_byte7_default_m, merge3_sel_byte_m,
+         merge2_sel_byte1_m, merge2_sel_byte2_m,         merge2_sel_byte5_m,
+         merge2_sel_byte6_default_m, merge2_sel_byte_m,
+         merge0_sel_byte0_m, merge0_sel_byte1_m,
+         merge0_sel_byte2_m, merge0_sel_byte3_default_m,
+         merge0_sel_byte4_m, merge0_sel_byte5_m,
+         merge0_sel_byte6_m, merge0_sel_byte7_default_m,
+         merge1_sel_byte0_m, merge1_sel_byte1_m,
+         merge1_sel_byte2_m, merge1_sel_byte3_default_m,
+         merge1_sel_byte4_m, merge1_sel_byte5_m,
+         merge1_sel_byte6_m, merge1_sel_byte7_default_m,
+         merge0_sel_byte_1h_m,merge1_sel_byte_1h_m, merge1_sel_byte_2h_m
+                }),
+        .q      ({
+         merge7_sel_byte0, merge7_sel_byte7,
+         merge6_sel_byte1, merge6_sel_byte6,
+         merge5_sel_byte2, merge5_sel_byte5,
+         merge4_sel_byte3, merge4_sel_byte4,
+         merge3_sel_byte0, merge3_sel_byte3,
+         merge3_sel_byte4, merge3_sel_byte7,merge3_sel_byte,
+         merge2_sel_byte1, merge2_sel_byte2, merge2_sel_byte5,
+         merge2_sel_byte6, merge2_sel_byte,
+         merge0_sel_byte0, merge0_sel_byte1,
+         merge0_sel_byte2, merge0_sel_byte3,
+         merge0_sel_byte4, merge0_sel_byte5,
+         merge0_sel_byte6, merge0_sel_byte7,
+         merge1_sel_byte0, merge1_sel_byte1,
+         merge1_sel_byte2, merge1_sel_byte3,
+         merge1_sel_byte4, merge1_sel_byte5,
+         merge1_sel_byte6, merge1_sel_byte7,
+         merge0_sel_byte_1h,merge1_sel_byte_1h, merge1_sel_byte_2h
+                }),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+assign 	lsu_exu_dfill_data_w2[63:0] = align_byte[63:0] ; 
+assign	lsu_ffu_ld_data[63:0] = align_byte[63:0] ;
+
+endmodule
+
+
Index: /trunk/T1-CPU/lsu/lsu_dctldp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_dctldp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_dctldp.v	(revision 6)
@@ -0,0 +1,1836 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_dctldp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+/////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module lsu_dctldp (/*AUTOARG*/
+   // Outputs
+   so, asi_d, lsu_excpctl_asi_state_m, lsu_dctl_asi_state_m, 
+   lsu_spu_asi_state_e, lsu_tlu_rsr_data_e, lsu_asi_state, 
+   lsu_asi_reg0, lsu_asi_reg1, lsu_asi_reg2, lsu_asi_reg3, 
+   lsu_t0_pctxt_state, lsu_t1_pctxt_state, lsu_t2_pctxt_state, 
+   lsu_t3_pctxt_state, lsu_tlu_dside_ctxt_m, lsu_tlu_pctxt_m, 
+   tlb_ctxt, lsu_pid_state0, lsu_pid_state1, lsu_pid_state2, 
+   lsu_pid_state3, lsu_dtlb_cam_pid_e, bist_ctl_reg_in, 
+   lsu_ifu_direct_map_l1, dc_direct_map, lsu_iobrdge_rd_data, 
+   lsu_ictag_mrgn, lsu_dctag_mrgn, lsu_mamem_mrgn, lsu_dtlb_mrgn, 
+   lsu_itlb_mrgn, lsu_local_ldxa_data_g, lsu_ldst_va_m, 
+   lsu_ldst_va_m_buf, lsu_tlu_ldst_va_m, lsu_tlu_tlb_asi_state_m, 
+   lsu_ifu_asi_state, lsu_tlu_tlb_ldst_va_m, lsu_tlu_tlb_dmp_va_m, 
+   lsu_ifu_asi_addr, lsu_diagnstc_wr_addr_e, 
+   lsu_diagnstc_dc_prty_invrt_e, lsu_ifu_err_addr, 
+   va_wtchpt_msk_match_m, lsu_ldst_va_g, lsu_dp_ctl_reg0, 
+   lsu_dp_ctl_reg1, lsu_dp_ctl_reg2, lsu_dp_ctl_reg3, 
+   lsu_diagnstc_wr_way_e, lsu_diag_va_prty_invrt, 
+   // Inputs
+   rclk, rst_l, si, se, async_tlb_index, lsu_dtlb_dmp_vld_e, 
+   tlu_lsu_asi_m, exu_tlu_wsr_data_m, tlu_lsu_asi_update_g, 
+   asi_state_wr_thrd, ifu_lsu_imm_asi_d, thread0_d, thread1_d, 
+   thread2_d, thread3_d, ifu_lsu_imm_asi_vld_d, lsu_err_addr_sel, 
+   pctxt_state_wr_thrd, sctxt_state_wr_thrd, st_rs3_data_g, 
+   thread0_ctxt, thread1_ctxt, thread2_ctxt, thread3_ctxt, 
+   thread_pctxt, thread_sctxt, thread_actxt, thread_default, 
+   tlu_dtlb_tte_tag_w2, tlu_dtlb_tte_tag_b58t56, thread0_g, 
+   thread1_g, thread2_g, thread3_g, pid_state_wr_en, thread0_e, 
+   thread1_e, thread2_e, thread3_e, thread0_m, thread1_m, thread2_m, 
+   thread3_m, lsu_iobrdge_wr_data, dfture_tap_wr_mx_sel, lctl_rst, 
+   lsu_ctl_state_wr_en, lsuctl_ctlbits_wr_en, dfture_tap_rd_en, 
+   bist_tap_wr_en, bist_ctl_reg_out, mrgn_tap_wr_en, ldiagctl_wr_en, 
+   misc_ctl_sel_din, lsu_asi_sel_fmx1, lsu_asi_sel_fmx2, 
+   exu_lsu_ldst_va_e, tlb_access_en0_g, tlb_access_en1_g, 
+   tlb_access_en2_g, tlb_access_en3_g, tlb_access_sel_thrd0, 
+   tlb_access_sel_thrd1, tlb_access_sel_thrd2, 
+   tlb_access_sel_default, mrgnctl_wr_en, lsu_dcfill_addr_e, 
+   lsu_error_pa_m, stb_ldst_byte_msk, lsu_diagnstc_va_sel, 
+   rst_tri_en
+   );
+
+   input rclk;
+   input rst_l;
+   input si;
+   input se;
+//   input tmb_l ;
+
+   output so;
+
+//   input	async_error_sel ;
+   input [5:0]	async_tlb_index ;
+
+   input	lsu_dtlb_dmp_vld_e ;
+   
+   input [7:0] tlu_lsu_asi_m;
+   input [7:0] exu_tlu_wsr_data_m;
+   input       tlu_lsu_asi_update_g;
+   input [3:0] asi_state_wr_thrd;
+   input [7:0] ifu_lsu_imm_asi_d;
+   input       thread0_d;
+   input       thread1_d;
+   input       thread2_d;
+   input       thread3_d;
+   input       ifu_lsu_imm_asi_vld_d;
+
+   input [2:0]	lsu_err_addr_sel ;
+   
+   output [7:0] asi_d;
+   output [7:0] lsu_excpctl_asi_state_m;
+   output [7:0] lsu_dctl_asi_state_m;
+     
+   output [7:0] lsu_spu_asi_state_e;
+   output [7:0] lsu_tlu_rsr_data_e;
+
+   output  [7:0]   lsu_asi_state ;   // ASI State + imm asi
+   output  [7:0]   lsu_asi_reg0 ;    // ASI State Register.
+   output  [7:0]   lsu_asi_reg1 ;    // ASI State Register.
+   output  [7:0]   lsu_asi_reg2 ;    // ASI State Register.
+   output  [7:0]   lsu_asi_reg3 ;    // ASI State Register.
+
+input  [3:0] pctxt_state_wr_thrd ;
+input  [3:0] sctxt_state_wr_thrd ;   
+//input [63:0] st_rs3_data_g;
+//input [59:56] st_rs3_data_g_59_56;
+//input [51:48] st_rs3_data_g_51_48;
+//input [43:40] st_rs3_data_g_43_40;
+input [32:0]  st_rs3_data_g;
+
+   input     thread0_ctxt;  //should be one hot, force default
+   input     thread1_ctxt;
+   input     thread2_ctxt;
+   input     thread3_ctxt;
+
+   input     thread_pctxt;
+   input     thread_sctxt;
+//   input     thread_nctxt;	
+   input     thread_actxt;
+   input     thread_default;
+   
+input [12:0]  tlu_dtlb_tte_tag_w2 ;
+input [2:0]	tlu_dtlb_tte_tag_b58t56 ;
+
+   input       thread0_g;
+   input       thread1_g;
+   input       thread2_g;
+   input       thread3_g;
+ 
+output  [12:0]    lsu_t0_pctxt_state ;  // primary ctxt - thread0
+output  [12:0]    lsu_t1_pctxt_state ;  // primary ctxt - thread1
+output  [12:0]    lsu_t2_pctxt_state ;  // primary ctxt - thread2
+output  [12:0]    lsu_t3_pctxt_state ;  // primary ctxt - thread3
+
+output  [12:0]    lsu_tlu_dside_ctxt_m ;
+output  [12:0]    lsu_tlu_pctxt_m ;
+output  [12:0]    tlb_ctxt ;    // ctxt for xslate or demap.
+
+   input [3:0]    pid_state_wr_en;
+   input          thread0_e;
+   input          thread1_e;
+   input          thread2_e;
+   input          thread3_e;
+
+   input          thread0_m;
+   input          thread1_m;
+   input          thread2_m;
+   input          thread3_m;
+   
+output	[2:0]	lsu_pid_state0 ;	// pid thread0 ; global use
+output	[2:0]	lsu_pid_state1 ;	// pid thread1 ; global use
+output	[2:0]	lsu_pid_state2 ;	// pid thread2 ; global use
+output	[2:0]	lsu_pid_state3 ;	// pid thread3 ; global use
+output	[2:0] lsu_dtlb_cam_pid_e ;
+
+input [27:0]  lsu_iobrdge_wr_data ;
+   input      dfture_tap_wr_mx_sel;
+   input [3:0] lctl_rst;
+   input [3:0] lsu_ctl_state_wr_en;
+   input [3:0] lsuctl_ctlbits_wr_en;
+   input [3:0] dfture_tap_rd_en;
+
+   input      bist_tap_wr_en;
+//  input      bistctl_wr_en;
+   output [6:0] bist_ctl_reg_in;
+   
+   input [10:0] bist_ctl_reg_out;
+   
+   input      mrgn_tap_wr_en;
+
+   output		lsu_ifu_direct_map_l1 ;	// l1 icache set to direct map.
+   output   dc_direct_map;
+   input    ldiagctl_wr_en;
+
+   output [43:0] lsu_iobrdge_rd_data ;
+
+   input [3:0]  misc_ctl_sel_din ;  //should force default
+
+output	[3:0]	lsu_ictag_mrgn ;	// icache tag self-timed margin control
+output	[3:0]	lsu_dctag_mrgn ;	// dcache tag self-timed margin control
+
+output	[3:0]	lsu_mamem_mrgn ;	// mamem self-timed margin control
+output	[7:0]	lsu_dtlb_mrgn ;	  // dtlb self-timed margin control
+output	[7:0]	lsu_itlb_mrgn ;	  // itlb self-timed margin control
+
+output  [47:0]    lsu_local_ldxa_data_g ;  // local ldxa data
+
+//   input          misc_asi_rd_en;
+//input [47:3]  lsu_va_wtchpt_addr ;
+   input [2:0] lsu_asi_sel_fmx1;
+   input [2:0] lsu_asi_sel_fmx2;
+
+input  [47:0]  exu_lsu_ldst_va_e;      // sub VA for mem-ref (src-execute)
+
+output [12:0]  lsu_ldst_va_m;  
+output [47:0]  lsu_ldst_va_m_buf;  
+output [9:0]  lsu_tlu_ldst_va_m;  
+
+   input       tlb_access_en0_g;
+   input       tlb_access_en1_g;
+   input       tlb_access_en2_g;
+   input       tlb_access_en3_g;
+
+output  [7:0]   lsu_tlu_tlb_asi_state_m ;
+output  [7:0]   lsu_ifu_asi_state;  
+
+   input tlb_access_sel_thrd0;
+   input tlb_access_sel_thrd1;
+   input tlb_access_sel_thrd2;
+   input tlb_access_sel_default;
+
+output  [10:0]   lsu_tlu_tlb_ldst_va_m ;
+output  [47:13]         lsu_tlu_tlb_dmp_va_m ;
+output  [17:0]    lsu_ifu_asi_addr ;
+
+   output [10:0]  lsu_diagnstc_wr_addr_e ;
+   output [7:0]   lsu_diagnstc_dc_prty_invrt_e ;
+
+///  output [13:11] lsu_lngltncy_ldst_va;
+   
+   input mrgnctl_wr_en;
+input [10:4]  lsu_dcfill_addr_e ;         // data cache fill addr
+input [28:0]  lsu_error_pa_m ;            // error phy addr
+//   input      sync_error_sel;
+   output  [47:4]    lsu_ifu_err_addr ;    // error address
+
+input [7:0]   stb_ldst_byte_msk ;
+   output va_wtchpt_msk_match_m;
+
+   output [7:0]  lsu_ldst_va_g;
+
+   output [5:0] lsu_dp_ctl_reg0;
+   output [5:0] lsu_dp_ctl_reg1;
+   output [5:0] lsu_dp_ctl_reg2;
+   output [5:0] lsu_dp_ctl_reg3;
+
+   input   [3:0] lsu_diagnstc_va_sel ;
+   output  [1:0] lsu_diagnstc_wr_way_e ;
+   output	 lsu_diag_va_prty_invrt ;
+   input   rst_tri_en;
+   
+wire  [12:0]  pctxt_state;
+wire  [12:0]  sctxt_state;
+wire  [2:0]   pid_state;
+
+wire   [13:0] lsu_ctl_reg0;
+wire   [13:0] lsu_ctl_reg1;
+wire   [13:0] lsu_ctl_reg2;
+wire   [13:0] lsu_ctl_reg3;
+
+wire   [13:0] lsu_ctl_reg;
+   
+   wire       clk;
+   assign     clk = rclk;
+
+/********************* ASI state ***********************/   
+   wire [7:0]  tlu_lsu_asi_g;
+   
+dff_s #(8) asi_stgw (
+        .din    (tlu_lsu_asi_m[7:0]),
+        .q      (tlu_lsu_asi_g[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+   wire [7:0]  exu_tlu_wsr_data_w;
+   
+dff_s #(8) ff_wsr_data_w (
+        .din    (exu_tlu_wsr_data_m[7:0]),
+        .q      (exu_tlu_wsr_data_w[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire [7:0]  asi_wr_din;
+   
+assign  asi_wr_din[7:0] = tlu_lsu_asi_update_g ? tlu_lsu_asi_g[7:0] : exu_tlu_wsr_data_w[7:0] ;   
+
+// ASI - Thread0
+   wire [7:0] asi_state0;
+   wire [7:0] lsu_asi_reg0;
+
+   wire       asi0_state_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf asi0_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~asi_state_wr_thrd[0]),
+                .tmb_l  (~se),
+                .clk    (asi0_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(8) asi0_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state0[7:0]),
+        .en (~(~asi_state_wr_thrd[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(8) asi0_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state0[7:0]),
+        .clk    (asi0_state_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+assign  lsu_asi_reg0[7:0] = asi_state0[7:0] ; 
+
+// ASI - Thread1
+   wire [7:0] asi_state1;
+   wire [7:0] lsu_asi_reg1;
+
+   wire       asi1_state_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf asi1_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~asi_state_wr_thrd[1]),
+                .tmb_l  (~se),
+                .clk    (asi1_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(8) asi1_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state1[7:0]),
+        .en (~(~asi_state_wr_thrd[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(8) asi1_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state1[7:0]),
+        .clk    (asi1_state_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+   
+assign  lsu_asi_reg1[7:0] = asi_state1[7:0] ; 
+
+// ASI - Thread2
+   wire [7:0] asi_state2;
+   wire [7:0] lsu_asi_reg2;
+
+   wire       asi2_state_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf asi2_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~asi_state_wr_thrd[2]),
+                .tmb_l  (~se),
+                .clk    (asi2_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(8) asi2_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state2[7:0]),
+        .en (~(~asi_state_wr_thrd[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(8) asi2_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state2[7:0]),
+        .clk    (asi2_state_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+assign  lsu_asi_reg2[7:0] = asi_state2[7:0] ; 
+
+// ASI - Thread3
+   wire [7:0] asi_state3;
+   wire [7:0] lsu_asi_reg3;
+   
+   wire       asi3_state_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf asi3_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~asi_state_wr_thrd[3]),
+                .tmb_l  (~se),
+                .clk    (asi3_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(8) asi3_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state3[7:0]),
+        .en (~(~asi_state_wr_thrd[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(8) asi3_state_ff (
+        .din    (asi_wr_din[7:0]),
+        .q      (asi_state3[7:0]),
+        .clk    (asi3_state_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+assign  lsu_asi_reg3[7:0] = asi_state3[7:0] ; 
+
+   wire [7:0] asi_state;
+   
+mux4ds #(8) lsu_asi_mux_d (
+   .in0 (asi_state0[7:0]),                        
+   .in1 (asi_state1[7:0]),                        
+   .in2 (asi_state2[7:0]),                        
+   .in3 (asi_state3[7:0]),                        
+   .sel0(thread0_d),
+   .sel1(thread1_d),
+   .sel2(thread2_d),
+   .sel3(thread3_d),
+   .dout(asi_state[7:0])                        
+   );
+
+assign  asi_d[7:0] = ifu_lsu_imm_asi_vld_d ? 
+                     ifu_lsu_imm_asi_d[7:0] : asi_state[7:0];
+
+wire  [7:0] asi_state_e, asi_state_m ;
+   
+dff_s #(8) asistate_stge (
+        .din    (asi_d[7:0]),
+        .q      (asi_state_e[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Make rsr_data independent of imm_asi.
+dff_s #(8) rdasi_stge (
+        .din    (asi_state[7:0]),
+        .q      (lsu_tlu_rsr_data_e[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//assign lsu_tlu_rsr_data_e[7:0] =  asi_state_e[7:0] ;
+
+assign  lsu_spu_asi_state_e[7:0] = asi_state_e[7:0] ;
+
+dff_s #(8) asistate_stgm (
+        .din    (asi_state_e[7:0]),
+        .q      (asi_state_m[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  lsu_excpctl_asi_state_m[7:0] = asi_state_m[7:0] ;
+assign  lsu_dctl_asi_state_m[7:0]    = asi_state_m[7:0] ;
+
+   wire [7:0] lsu_asi_state;
+dff_s #(8) asistate_stgg (
+        .din    (asi_state_m[7:0]),
+        .q      (lsu_asi_state[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+/*********************context************************/
+wire  [12:0]  pctxt_state0,pctxt_state1;
+wire  [12:0]  pctxt_state2,pctxt_state3;
+wire  [12:0]  sctxt_state0,sctxt_state1;
+wire  [12:0]  sctxt_state2,sctxt_state3;
+
+// PRIMARY CONTEXT - Thread0
+   wire       pctxt0_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pctxt0_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pctxt_state_wr_thrd[0]),
+                .tmb_l  (~se),
+                .clk    (pctxt0_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) pctxt_state0_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state0[12:0]),
+        .en (~(~pctxt_state_wr_thrd[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) pctxt_state0_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state0[12:0]),
+        .clk    (pctxt0_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign  lsu_t0_pctxt_state[12:0] = pctxt_state0[12:0] ;
+
+// PRIMARY CONTEXT - Thread1
+   wire       pctxt1_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pctxt1_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pctxt_state_wr_thrd[1]),
+                .tmb_l  (~se),
+                .clk    (pctxt1_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) pctxt_state1_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state1[12:0]),
+        .en (~(~pctxt_state_wr_thrd[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) pctxt_state1_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state1[12:0]),
+        .clk    (pctxt1_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign  lsu_t1_pctxt_state[12:0] = pctxt_state1[12:0] ;
+
+// PRIMARY CONTEXT - Thread2
+   wire       pctxt2_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pctxt2_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pctxt_state_wr_thrd[2]),
+                .tmb_l  (~se),
+                .clk    (pctxt2_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) pctxt_state2_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state2[12:0]),
+        .en (~(~pctxt_state_wr_thrd[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) pctxt_state2_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state2[12:0]),
+        .clk    (pctxt2_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign  lsu_t2_pctxt_state[12:0] = pctxt_state2[12:0] ;
+
+// PRIMARY CONTEXT - Thread3
+   wire       pctxt3_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pctxt3_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pctxt_state_wr_thrd[3]),
+                .tmb_l  (~se),
+                .clk    (pctxt3_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) pctxt_state3_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state3[12:0]),
+        .en (~(~pctxt_state_wr_thrd[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) pctxt_state3_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (pctxt_state3[12:0]),
+        .clk    (pctxt3_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign  lsu_t3_pctxt_state[12:0] = pctxt_state3[12:0] ;
+   
+// SECONDARY CONTEXT - Thread0
+   wire       sctxt0_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf sctxt0_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~sctxt_state_wr_thrd[0]),
+                .tmb_l  (~se),
+                .clk    (sctxt0_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) sctxt_state0_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state0[12:0]),
+        .en (~(~sctxt_state_wr_thrd[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) sctxt_state0_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state0[12:0]),
+        .clk    (sctxt0_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// SECONDARY CONTEXT - Thread1
+   wire       sctxt1_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf sctxt1_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~sctxt_state_wr_thrd[1]),
+                .tmb_l  (~se),
+                .clk    (sctxt1_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) sctxt_state1_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state1[12:0]),
+        .en (~(~sctxt_state_wr_thrd[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) sctxt_state1_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state1[12:0]),
+        .clk    (sctxt1_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// SECONDARY CONTEXT - Thread2
+   wire       sctxt2_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf sctxt2_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~sctxt_state_wr_thrd[2]),
+                .tmb_l  (~se),
+                .clk    (sctxt2_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) sctxt_state2_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state2[12:0]),
+        .en (~(~sctxt_state_wr_thrd[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) sctxt_state2_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state2[12:0]),
+        .clk    (sctxt2_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// SECONDARY CONTEXT - Thread3
+   wire       sctxt3_state_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf sctxt3_state_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~sctxt_state_wr_thrd[3]),
+                .tmb_l  (~se),
+                .clk    (sctxt3_state_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(13) sctxt_state3_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state3[12:0]),
+        .en (~(~sctxt_state_wr_thrd[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(13) sctxt_state3_ff (
+        .din    (st_rs3_data_g[12:0]),
+        .q      (sctxt_state3[12:0]),
+        .clk    (sctxt3_state_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+wire  [12:0]  current_pctxt_e,current_sctxt_e ;
+wire  [12:0]  current_pctxt_m ;
+
+wire  [12:0]  current_ctxt_e,current_ctxt_m ;
+
+mux4ds #(13) current_pctxt_e_mux (
+   .in0 (pctxt_state0[12:0]),                        
+   .in1 (pctxt_state1[12:0]),                        
+   .in2 (pctxt_state2[12:0]),                        
+   .in3 (pctxt_state3[12:0]),                        
+   .sel0(thread0_ctxt),
+   .sel1(thread1_ctxt),
+   .sel2(thread2_ctxt),
+   .sel3(thread3_ctxt),
+   .dout(current_pctxt_e[12:0])                        
+   );
+
+mux4ds #(13) current_sctxt_e_mux (
+   .in0 (sctxt_state0[12:0]),                        
+   .in1 (sctxt_state1[12:0]),                        
+   .in2 (sctxt_state2[12:0]),                        
+   .in3 (sctxt_state3[12:0]),                        
+   .sel0(thread0_ctxt),
+   .sel1(thread1_ctxt),
+   .sel2(thread2_ctxt),
+   .sel3(thread3_ctxt),
+   .dout(current_sctxt_e[12:0])                        
+   );
+ 
+   wire [12:0] tlb_actxt;
+  
+assign tlb_actxt[12:0] =
+       {tlu_dtlb_tte_tag_w2[12:0]} ; 
+
+   wire [3:0] thread_sel;
+   assign     thread_sel[0]= thread_pctxt   & ~rst_tri_en;
+   assign     thread_sel[1]= thread_sctxt   & ~rst_tri_en;
+   assign     thread_sel[2]= thread_actxt   & ~rst_tri_en;
+   assign     thread_sel[3]= thread_default |  rst_tri_en;
+
+// change buffer to nand /nor
+   
+mux4ds #(13) tlb_ctxt_mux (
+   .in0 (current_pctxt_e[12:0]),                        
+   .in1 (current_sctxt_e[12:0]),                        
+   .in2 (tlb_actxt[12:0]),                        
+   .in3 ({13'b0}),
+   .sel0(thread_sel[0]),
+   .sel1(thread_sel[1]),
+   .sel2(thread_sel[2]),
+   .sel3(thread_sel[3]),
+   .dout(tlb_ctxt[12:0])                        
+   );
+
+assign  current_ctxt_e[12:0] = tlb_ctxt[12:0] ;
+
+//Bug 3094
+wire	[12:0]	itrap_pctxt_e ;
+mux4ds #(13) itrap_pctxt_e_mux (
+   .in0 (pctxt_state0[12:0]),                        
+   .in1 (pctxt_state1[12:0]),                        
+   .in2 (pctxt_state2[12:0]),                        
+   .in3 (pctxt_state3[12:0]),                        
+   .sel0(thread0_e),
+   .sel1(thread1_e),
+   .sel2(thread2_e),
+   .sel3(thread3_e),
+   .dout(itrap_pctxt_e[12:0])                        
+   );
+
+// Create current ctxt for tlu purpose.
+dff_s #(26) cctxt_stgm (
+        .din    ({current_ctxt_e[12:0],itrap_pctxt_e[12:0]}),
+        .q      ({current_ctxt_m[12:0],current_pctxt_m[12:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  lsu_tlu_dside_ctxt_m[12:0] = current_ctxt_m[12:0] ;
+assign  lsu_tlu_pctxt_m[12:0] = current_pctxt_m[12:0] ;
+
+   // Primary Context 
+mux4ds #(13)     pctxt_mx (
+        .in0    (pctxt_state0[12:0]),
+        .in1    (pctxt_state1[12:0]),
+        .in2    (pctxt_state2[12:0]),
+        .in3    (pctxt_state3[12:0]),
+        .sel0   (thread0_g),
+        .sel1   (thread1_g),
+        .sel2   (thread2_g),
+        .sel3   (thread3_g),
+        .dout   (pctxt_state[12:0])
+        );
+
+// Secondary Context 
+mux4ds #(13)     sctxt_mx (
+        .in0    (sctxt_state0[12:0]),
+        .in1    (sctxt_state1[12:0]),
+        .in2    (sctxt_state2[12:0]),
+        .in3    (sctxt_state3[12:0]),
+        .sel0   (thread0_g),
+        .sel1   (thread1_g),
+        .sel2   (thread2_g),
+        .sel3   (thread3_g),
+        .dout   (sctxt_state[12:0])
+        );
+
+/********************partition id********************/
+ // ** Reset put in temporarily to ensure pid is correctly initialized **
+// ** Env/diags should be set-up to initialize pid correctly **
+wire	[2:0]	pid_state0, pid_state1, pid_state2, pid_state3;
+
+// Thread0
+   wire [2:0] pid_state_din;
+   assign     pid_state_din[2:0] = {3{rst_l}} & st_rs3_data_g[2:0];
+
+   wire       pid_state0_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pid_state0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pid_state_wr_en[0]),
+                .tmb_l  (~se),
+                .clk    (pid_state0_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(3) pid0_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state0[2:0]),
+        .en (~(~pid_state_wr_en[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(3) pid0_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state0[2:0]),
+        .clk    (pid_state0_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign	lsu_pid_state0[2:0] = pid_state0[2:0] ;
+
+// Thread1
+   wire       pid_state1_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pid_state1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pid_state_wr_en[1]),
+                .tmb_l  (~se),
+                .clk    (pid_state1_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(3) pid1_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state1[2:0]),
+        .en (~(~pid_state_wr_en[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(3) pid1_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state1[2:0]),
+        .clk    (pid_state1_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign	lsu_pid_state1[2:0] = pid_state1[2:0] ;
+
+// Thread2
+   wire       pid_state2_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pid_state2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pid_state_wr_en[2]),
+                .tmb_l  (~se),
+                .clk    (pid_state2_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(3) pid2_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state2[2:0]),
+        .en (~(~pid_state_wr_en[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(3) pid2_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state2[2:0]),
+        .clk    (pid_state2_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign	lsu_pid_state2[2:0] = pid_state2[2:0] ;
+
+// Thread3
+   wire       pid_state3_clk;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf pid_state3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~pid_state_wr_en[3]),
+                .tmb_l  (~se),
+                .clk    (pid_state3_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(3) pid3_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state3[2:0]),
+        .en (~(~pid_state_wr_en[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(3) pid3_state (
+        .din    (pid_state_din[2:0]),
+        .q      (pid_state3[2:0]),
+        .clk    (pid_state3_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+assign	lsu_pid_state3[2:0] = pid_state3[2:0] ;
+
+wire [2:0] cam_pid_e ;
+// Hypervisor related cam inputs
+mux4ds #(3)     cam_pid_mx (
+        .in0    (pid_state0[2:0]),
+        .in1    (pid_state1[2:0]),
+        .in2    (pid_state2[2:0]),
+        .in3    (pid_state3[2:0]),
+        .sel0   (thread0_e),
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),
+        .sel3   (thread3_e),
+        .dout   (cam_pid_e[2:0])
+        );
+
+assign  lsu_dtlb_cam_pid_e[2:0] =
+  lsu_dtlb_dmp_vld_e ? tlu_dtlb_tte_tag_b58t56[2:0] : cam_pid_e[2:0] ;
+  //thread_actxt ? tlu_dtlb_tte_tag_b58t56[2:0] : cam_pid_e[2:0] ;
+   
+mux4ds #(3)     pid_mx (
+        .in0    (pid_state0[2:0]),
+        .in1    (pid_state1[2:0]),
+        .in2    (pid_state2[2:0]),
+        .in3    (pid_state3[2:0]),
+        .sel0   (thread0_g),
+        .sel1   (thread1_g),
+        .sel2   (thread2_g),
+        .sel3   (thread3_g),
+        .dout   (pid_state[2:0])
+        );
+
+  
+/***********************lsu ctl reg********************/
+// Contents of lsu_ctl_reg
+/*
+  IC. I-Cache Enable. b0           b0
+  DC. D-Cache Enable. b1           b1
+  IM. I-MMU Enable.   b2           b2
+  DM. D-MMU Enable.   b3           b3
+  FM. Parity Mask.(delete) b4-19   --    
+  Reserved    b20                  --
+  VW. VA Wtchpt Wr  b21            b4
+  VR. VA Wtchpt Rd  b22            b5
+  PW. PA Wtchpt Wr  b23            --
+  PR. PA Wtchpt Rd  b24            --
+  VM. VA Wtchpt BMask   b25-32     b6-13
+  PM. PA Wtchpt BMask   b33-40     --
+*/
+
+   assign lsu_dp_ctl_reg0[5:0] = lsu_ctl_reg0[5:0];
+   assign lsu_dp_ctl_reg1[5:0] = lsu_ctl_reg1[5:0];
+   assign lsu_dp_ctl_reg2[5:0] = lsu_ctl_reg2[5:0];
+   assign lsu_dp_ctl_reg3[5:0] = lsu_ctl_reg3[5:0];
+   
+wire  [9:0]  lsu_ctl_reg_din ;
+
+//assign  lsu_ctl_reg_din[19:0] = st_rs3_data_g[40:21] ;
+   wire   lsu_ctl_reg_vw_din, lsu_ctl_reg_vr_din;
+   wire [7:0] lsu_ctl_reg_vm_din;
+   
+assign  lsu_ctl_reg_vw_din = st_rs3_data_g[21] ;
+assign  lsu_ctl_reg_vr_din = st_rs3_data_g[22] ;
+assign  lsu_ctl_reg_vm_din[7:0] = st_rs3_data_g[32:25];
+
+assign lsu_ctl_reg_din[9:0] = {lsu_ctl_reg_vm_din[7:0],
+                               lsu_ctl_reg_vr_din,
+                               lsu_ctl_reg_vw_din};
+   
+   
+wire [3:0]  lsuctl_ctlbits_wr_data ;  
+
+assign  lsuctl_ctlbits_wr_data[3:0] =
+          dfture_tap_wr_mx_sel ? lsu_iobrdge_wr_data[3:0] : st_rs3_data_g[3:0] ;
+
+// Thread0
+   wire [9:0] lsu_ctl_reg0_din;
+   assign      lsu_ctl_reg0_din[9:0] = {10{~lctl_rst[0]}} & lsu_ctl_reg_din[9:0];
+
+   wire        lsu_ctl_state0_clk;
+ 
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsu_ctl_state0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsu_ctl_state_wr_en[0]),
+                .tmb_l  (~se),
+                .clk    (lsu_ctl_state0_clk)
+                ) ;
+`endif
+  
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(10) lsu_ctl_reg0_ff2 (
+        .din    (lsu_ctl_reg0_din[9:0]),
+        .q      (lsu_ctl_reg0[13:4]),
+        .en (~(~lsu_ctl_state_wr_en[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(10) lsu_ctl_reg0_ff2 (
+        .din    (lsu_ctl_reg0_din[9:0]),
+        .q      (lsu_ctl_reg0[13:4]),
+        .clk    (lsu_ctl_state0_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+   wire [3:0]  lsuctl_ctlbits0_wr_data_din;
+   assign      lsuctl_ctlbits0_wr_data_din[3:0] = {4{~lctl_rst[0]}} & lsuctl_ctlbits_wr_data[3:0];
+
+   wire        lsuctl_ctlbits0_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsuctl_ctlbits0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsuctl_ctlbits_wr_en[0]),
+                .tmb_l  (~se),
+                .clk    (lsuctl_ctlbits0_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(4) lsu_ctl_reg0_ff1 (
+        .din    (lsuctl_ctlbits0_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg0[3:0]),
+        .en (~(~lsuctl_ctlbits_wr_en[0])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(4) lsu_ctl_reg0_ff1 (
+        .din    (lsuctl_ctlbits0_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg0[3:0]),
+        .clk    (lsuctl_ctlbits0_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// Thread1
+   wire [9:0] lsu_ctl_reg1_din;
+   assign      lsu_ctl_reg1_din[9:0] = {10{~lctl_rst[1]}} & lsu_ctl_reg_din[9:0];
+
+   wire        lsu_ctl_state1_clk;
+ 
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsu_ctl_state1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsu_ctl_state_wr_en[1]),
+                .tmb_l  (~se),
+                .clk    (lsu_ctl_state1_clk)
+                ) ;
+`endif
+  
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(10) lsu_ctl_reg1_ff2 (
+        .din    (lsu_ctl_reg1_din[9:0]),
+        .q      (lsu_ctl_reg1[13:4]),
+        .en (~(~lsu_ctl_state_wr_en[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(10) lsu_ctl_reg1_ff2 (
+        .din    (lsu_ctl_reg1_din[9:0]),
+        .q      (lsu_ctl_reg1[13:4]),
+        .clk    (lsu_ctl_state1_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+   wire [3:0]  lsuctl_ctlbits1_wr_data_din;
+   assign      lsuctl_ctlbits1_wr_data_din[3:0] = {4{~lctl_rst[1]}} & lsuctl_ctlbits_wr_data[3:0];
+
+   wire        lsuctl_ctlbits1_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsuctl_ctlbits1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsuctl_ctlbits_wr_en[1]),
+                .tmb_l  (~se),
+                .clk    (lsuctl_ctlbits1_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(4) lsu_ctl_reg1_ff1 (
+        .din    (lsuctl_ctlbits1_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg1[3:0]),
+        .en (~(~lsuctl_ctlbits_wr_en[1])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(4) lsu_ctl_reg1_ff1 (
+        .din    (lsuctl_ctlbits1_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg1[3:0]),
+        .clk    (lsuctl_ctlbits1_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// Thread2
+   wire [9:0] lsu_ctl_reg2_din;
+   assign      lsu_ctl_reg2_din[9:0] = {10{~lctl_rst[2]}} & lsu_ctl_reg_din[9:0];
+
+   wire        lsu_ctl_state2_clk;
+ 
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsu_ctl_state2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsu_ctl_state_wr_en[2]),
+                .tmb_l  (~se),
+                .clk    (lsu_ctl_state2_clk)
+                ) ;
+`endif
+  
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(10) lsu_ctl_reg2_ff2 (
+        .din    (lsu_ctl_reg2_din[9:0]),
+        .q      (lsu_ctl_reg2[13:4]),
+        .en (~(~lsu_ctl_state_wr_en[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(10) lsu_ctl_reg2_ff2 (
+        .din    (lsu_ctl_reg2_din[9:0]),
+        .q      (lsu_ctl_reg2[13:4]),
+        .clk    (lsu_ctl_state2_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+   wire [3:0]  lsuctl_ctlbits2_wr_data_din;
+   assign      lsuctl_ctlbits2_wr_data_din[3:0] = {4{~lctl_rst[2]}} & lsuctl_ctlbits_wr_data[3:0];
+
+   wire        lsuctl_ctlbits2_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsuctl_ctlbits2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsuctl_ctlbits_wr_en[2]),
+                .tmb_l  (~se),
+                .clk    (lsuctl_ctlbits2_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(4) lsu_ctl_reg2_ff1 (
+        .din    (lsuctl_ctlbits2_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg2[3:0]),
+        .en (~(~lsuctl_ctlbits_wr_en[2])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(4) lsu_ctl_reg2_ff1 (
+        .din    (lsuctl_ctlbits2_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg2[3:0]),
+        .clk    (lsuctl_ctlbits2_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// Thread3
+   wire [9:0] lsu_ctl_reg3_din;
+   assign      lsu_ctl_reg3_din[9:0] = {10{~lctl_rst[3]}} & lsu_ctl_reg_din[9:0];
+
+   wire        lsu_ctl_state3_clk;
+ 
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsu_ctl_state3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsu_ctl_state_wr_en[3]),
+                .tmb_l  (~se),
+                .clk    (lsu_ctl_state3_clk)
+                ) ;
+`endif
+  
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(10) lsu_ctl_reg3_ff2 (
+        .din    (lsu_ctl_reg3_din[9:0]),
+        .q      (lsu_ctl_reg3[13:4]),
+        .en (~(~lsu_ctl_state_wr_en[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(10) lsu_ctl_reg3_ff2 (
+        .din    (lsu_ctl_reg3_din[9:0]),
+        .q      (lsu_ctl_reg3[13:4]),
+        .clk    (lsu_ctl_state3_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+   wire [3:0]  lsuctl_ctlbits3_wr_data_din;
+   assign      lsuctl_ctlbits3_wr_data_din[3:0] = {4{~lctl_rst[3]}} & lsuctl_ctlbits_wr_data[3:0];
+
+   wire        lsuctl_ctlbits3_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lsuctl_ctlbits3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~lsuctl_ctlbits_wr_en[3]),
+                .tmb_l  (~se),
+                .clk    (lsuctl_ctlbits3_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(4) lsu_ctl_reg3_ff1 (
+        .din    (lsuctl_ctlbits3_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg3[3:0]),
+        .en (~(~lsuctl_ctlbits_wr_en[3])), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`else
+dff_s #(4) lsu_ctl_reg3_ff1 (
+        .din    (lsuctl_ctlbits3_wr_data_din[3:0]),
+        .q      (lsu_ctl_reg3[3:0]),
+        .clk    (lsuctl_ctlbits3_clk),
+        .se     (se),       .si (),          .so ()
+        );  
+`endif
+
+// LSU Ctl Reg
+mux4ds #(14)     lctlrg_mx (
+        .in0    (lsu_ctl_reg0[13:0]),
+        .in1    (lsu_ctl_reg1[13:0]),
+        .in2    (lsu_ctl_reg2[13:0]),
+        .in3    (lsu_ctl_reg3[13:0]),
+        .sel0   (thread0_g),
+        .sel1   (thread1_g),
+        .sel2   (thread2_g),
+        .sel3   (thread3_g),
+        .dout   (lsu_ctl_reg[13:0])
+        );
+
+   wire [3:0] dfture_tap_rd_data;
+
+mux4ds #(4)     dfture_tap_rd_data_mx (
+        .in0    (lsu_ctl_reg0[3:0]),
+        .in1    (lsu_ctl_reg1[3:0]),
+        .in2    (lsu_ctl_reg2[3:0]),
+        .in3    (lsu_ctl_reg3[3:0]),
+        .sel0   (dfture_tap_rd_en[0]),
+        .sel1   (dfture_tap_rd_en[1]),
+        .sel2   (dfture_tap_rd_en[2]),
+        .sel3   (dfture_tap_rd_en[3]),
+        .dout   (dfture_tap_rd_data[3:0])
+        );
+   
+   wire [7:0] va_wtchpt_mask;
+
+mux4ds #(8)     va_wtchpt_mask_mx (
+        .in0    (lsu_ctl_reg0[13:6]),
+        .in1    (lsu_ctl_reg1[13:6]),
+        .in2    (lsu_ctl_reg2[13:6]),
+        .in3    (lsu_ctl_reg3[13:6]),
+        .sel0   (thread0_m),
+        .sel1   (thread1_m),
+        .sel2   (thread2_m),
+        .sel3   (thread3_m),
+        .dout   (va_wtchpt_mask[7:0])
+        );  
+
+// Bug 1671 fix
+//assign va_wtchpt_msk_match_m  =   (stb_ldst_byte_msk[7:0] == va_wtchpt_mask[7:0]);
+//assign va_wtchpt_msk_match_m  =   |(stb_ldst_byte_msk[7:0] & va_wtchpt_mask[7:0]);
+
+assign va_wtchpt_msk_match_m  =   
+       stb_ldst_byte_msk[0] & va_wtchpt_mask[7] |
+       stb_ldst_byte_msk[1] & va_wtchpt_mask[6] |
+       stb_ldst_byte_msk[2] & va_wtchpt_mask[5] |
+       stb_ldst_byte_msk[3] & va_wtchpt_mask[4] |
+       stb_ldst_byte_msk[4] & va_wtchpt_mask[3] |
+       stb_ldst_byte_msk[5] & va_wtchpt_mask[2] |
+       stb_ldst_byte_msk[6] & va_wtchpt_mask[1] |
+       stb_ldst_byte_msk[7] & va_wtchpt_mask[0] ;
+ 
+       
+   
+/***********************ldxa****************************/   
+// BIST_Controller ASI
+// tap wr takes precedence
+//wire  [10:0]  bistctl_data_in;
+//wire  [10:0]  bist_ctl_reg ;
+
+//assign  bistctl_data_in[13:0] =
+//  bist_tap_wr_en ? lsu_iobrdge_wr_data[13:0] : st_rs3_data_g[13:0] ;
+
+//assign  bistctl_data_in[10:7] = lsu_iobrdge_wr_data[10:7];
+//assign  bistctl_data_in[6:0] =
+//  bist_tap_wr_en ? lsu_iobrdge_wr_data[6:0] : st_rs3_data_g[6:0] ;
+   
+assign  bist_ctl_reg_in[6:0] =  
+bist_tap_wr_en ? lsu_iobrdge_wr_data[6:0] : st_rs3_data_g[6:0];
+
+/*   wire bistctl_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf bistctl_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~bistctl_wr_en),
+                .tmb_l  (tmb_l),
+                .clk    (bistctl_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(11) bistctl_ff (
+        .din    (bistctl_data_in[10:0]),
+        .q      (bist_ctl_reg[10:0]),
+        .en (~(~bistctl_wr_en)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(11) bistctl_ff (
+        .din    (bistctl_data_in[10:0]),
+        .q      (bist_ctl_reg[10:0]),
+        .clk    (bistctl_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+*/
+   
+// Self-Timed Margin Control ASI
+// tap wr takes precedence
+wire  [27:0]  mrgnctl_data_in;   
+wire  [27:0]  spc_mrgnctl_data_in;
+   
+wire  [27:0]  mrgn_ctl_reg ;
+
+//itlb         [27:20]
+//dtlb         [19:12]
+//idct (i)     [11: 8]
+//idct (d)     [ 7: 4]
+//idct (mamem) [ 3: 0]
+
+assign mrgnctl_data_in[27:0] =
+mrgn_tap_wr_en ? lsu_iobrdge_wr_data[27:0] :
+                 spc_mrgnctl_data_in[27:0];
+          
+assign spc_mrgnctl_data_in[27:0] =        
+(~rst_l) ?  {8'b01011011, 8'b01011011, 4'b0101,4'b0101,4'b0101} :
+             st_rs3_data_g[27:0];
+                               
+   wire mrgnctl_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf mrgnctl_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~mrgnctl_wr_en),
+                .tmb_l  (~se),
+                .clk    (mrgnctl_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(28) mrgnctl_ff (
+        .din    (mrgnctl_data_in[27:0]),
+        .q      (mrgn_ctl_reg[27:0]),
+        .en (~(~mrgnctl_wr_en)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(28) mrgnctl_ff (
+        .din    (mrgnctl_data_in[27:0]),
+        .q      (mrgn_ctl_reg[27:0]),
+        .clk    (mrgnctl_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+assign	lsu_itlb_mrgn[7:0] = mrgn_ctl_reg[27:20] ;
+assign	lsu_dtlb_mrgn[7:0] = mrgn_ctl_reg[19:12] ;
+assign	lsu_ictag_mrgn[3:0] = mrgn_ctl_reg[11:8] ;
+assign	lsu_dctag_mrgn[3:0] = mrgn_ctl_reg[7:4] ;
+assign	lsu_mamem_mrgn[3:0] = mrgn_ctl_reg[3:0] ;
+
+// LSU Diag Reg ASI
+wire  [1:0] ldiagctl_data_in ;
+
+wire  [1:0] ldiag_ctl_reg ;
+
+assign  ldiagctl_data_in[1:0] = {2{rst_l}} & st_rs3_data_g[1:0] ;
+
+   wire ldiagctl_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf ldiagctl_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~ldiagctl_wr_en),
+                .tmb_l  (~se),
+                .clk    (ldiagctl_clk)
+                ) ;   
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(2) ldiagctl_ff (
+        .din    (ldiagctl_data_in[1:0]),
+        .q      (ldiag_ctl_reg[1:0]),
+        .en (~(~ldiagctl_wr_en)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(2) ldiagctl_ff (
+        .din    (ldiagctl_data_in[1:0]),
+        .q      (ldiag_ctl_reg[1:0]),
+        .clk    (ldiagctl_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+assign	lsu_ifu_direct_map_l1 = ldiag_ctl_reg[0] ;
+assign	dc_direct_map = ldiag_ctl_reg[1] ;
+
+   wire [43:0] misc_ctl_reg;
+
+   wire [3:0] misc_ctl_sel_q;
+   
+dff_s #(4) misc_ctl_sel_stgg (
+    .din ( misc_ctl_sel_din[3:0] ),
+    .q   ( misc_ctl_sel_q[3:0]   ),       
+    .clk (clk),
+    .se  (se),       .si (),          .so ()
+);
+   wire [3:0] misc_ctl_sel;
+ 
+   assign     misc_ctl_sel[0] =  misc_ctl_sel_q [0] & ~rst_tri_en;
+   assign     misc_ctl_sel[1] =  misc_ctl_sel_q [1] & ~rst_tri_en;
+   assign     misc_ctl_sel[2] =  misc_ctl_sel_q [2] |  rst_tri_en;
+   assign     misc_ctl_sel[3] =  misc_ctl_sel_q [3] & ~rst_tri_en;
+      
+// Misc Ctl Registers
+mux4ds #(44)     miscrg_mx (
+        .in0    ({33'b0,bist_ctl_reg_out[10:0]}),
+        .in1    ({16'b0,mrgn_ctl_reg[27:0]}),
+        .in2    ({42'd0,ldiag_ctl_reg[1:0]}),
+        .in3    ({40'd0,dfture_tap_rd_data[3:0]}),
+        .sel0   (misc_ctl_sel[0]),
+        .sel1   (misc_ctl_sel[1]),
+        .sel2   (misc_ctl_sel[2]),
+        .sel3   (misc_ctl_sel[3]),
+        .dout   (misc_ctl_reg[43:0])
+        );
+
+assign	lsu_iobrdge_rd_data[43:0] = misc_ctl_reg[43:0] ;
+
+wire	[12:0]	ldxa_data_fmx1 ;
+   
+mux3ds #(13)     lsuasi_fmx1 (
+        .in0    (pctxt_state[12:0]),
+        .in1    (sctxt_state[12:0]),
+        .in2    ({10'd0,pid_state[2:0]}),
+        .sel0   (lsu_asi_sel_fmx1[0]),
+        .sel1   (lsu_asi_sel_fmx1[1]),
+        .sel2   (lsu_asi_sel_fmx1[2]),
+        .dout   (ldxa_data_fmx1[12:0])
+        );
+
+wire  [47:0]  final_ldxa_data_g ;
+   
+//mux3ds #(48)     lsuasi_fmx2 (
+//        .in0    ({35'd0,ldxa_data_fmx1[12:0]}),
+//        .in1    ({15'd0,lsu_ctl_reg[15:8],2'b00,lsu_ctl_reg[5:4],17'd0,lsu_ctl_reg[3:0]}),
+//        .in2    ({lsu_va_wtchpt_addr[47:3],3'b000}),
+//        .sel0   (lsu_asi_sel_fmx2[0]),
+//        .sel1   (lsu_asi_sel_fmx2[1]),
+//        .sel2   (lsu_asi_sel_fmx2[2]),
+//        .dout   (local_ldxa_data_g[47:0])
+//        );
+
+//mux2ds #(48)     lsuasi_final (
+//        .in0    (local_ldxa_data_g[47:0]),
+//        .in1    ({4'd0,misc_ctl_reg[43:0]}),
+//        .sel0   (~misc_asi_rd_en),
+//        .sel1   (misc_asi_rd_en),
+//        .dout   (final_ldxa_data_g[47:0])
+//        );
+
+mux3ds #(48)     lsuasi_fmx2 (
+        .in0    ({35'd0,ldxa_data_fmx1[12:0]}),
+        .in1    ({15'd0,lsu_ctl_reg[13:6],2'b00,lsu_ctl_reg[5:4],17'd0,lsu_ctl_reg[3:0]}),
+        .in2    ({4'd0,misc_ctl_reg[43:0]}),
+        .sel0   (lsu_asi_sel_fmx2[0]),
+        .sel1   (lsu_asi_sel_fmx2[1]),
+        .sel2   (lsu_asi_sel_fmx2[2]),
+        .dout   (final_ldxa_data_g[47:0])
+        );   
+
+assign        lsu_local_ldxa_data_g[47:0] =  final_ldxa_data_g[47:0];
+
+
+/****************va staging*******************/
+ wire [47:0] ldst_va_m;   
+dff_s  #(48) va_stgm (
+        .din    (exu_lsu_ldst_va_e[47:0]),
+        .q      (ldst_va_m[47:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign lsu_ldst_va_m[12:0] = ldst_va_m[12:0];   
+
+assign lsu_ldst_va_m_buf[47:0] = ldst_va_m[47:0];
+   
+       
+assign lsu_tlu_ldst_va_m[9:0] = ldst_va_m[9:0];   
+   
+wire [47:0] ldst_va_g;   
+dff_s  #(48) va_stgg (
+        .din    (ldst_va_m[47:0]),
+        .q      (ldst_va_g[47:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign  lsu_ldst_va_g[7:0] = ldst_va_g[7:0] ;
+
+
+wire  [7:0] asi_state_g ;
+assign  asi_state_g[7:0] = lsu_asi_state[7:0] ; 
+
+wire  [7:0] tlb_asi_state0,tlb_asi_state1,tlb_asi_state2,tlb_asi_state3 ;
+wire  [47:13] lngltncy_dmp_va ;
+   
+// Thread 0
+   wire [47:0] ldst_va0;
+
+   wire        tlb_access0_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf tlb_access0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~tlb_access_en0_g),
+                .tmb_l  (~se),
+                .clk    (tlb_access0_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(56)  asi_thrd0 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state0[7:0],ldst_va0[47:0]}),
+        .en (~(~tlb_access_en0_g)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(56)  asi_thrd0 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state0[7:0],ldst_va0[47:0]}),
+        .clk    (tlb_access0_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+// Thread 1
+   wire [47:0] ldst_va1;
+
+   wire        tlb_access1_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf tlb_access1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~tlb_access_en1_g),
+                .tmb_l  (~se),
+                .clk    (tlb_access1_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(56)  asi_thrd1 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state1[7:0],ldst_va1[47:0]}),
+        .en (~(~tlb_access_en1_g)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(56)  asi_thrd1 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state1[7:0],ldst_va1[47:0]}),
+        .clk    (tlb_access1_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+// Thread 2
+   wire [47:0] ldst_va2;
+
+   wire        tlb_access2_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf tlb_access2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~tlb_access_en2_g),
+                .tmb_l  (~se),
+                .clk    (tlb_access2_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(56)  asi_thrd2 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state2[7:0],ldst_va2[47:0]}),
+        .en (~(~tlb_access_en2_g)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(56)  asi_thrd2 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state2[7:0],ldst_va2[47:0]}),
+        .clk    (tlb_access2_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+// Thread 3
+   wire [47:0] ldst_va3;
+
+   wire        tlb_access3_clk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf tlb_access3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (~tlb_access_en3_g),
+                .tmb_l  (~se),
+                .clk    (tlb_access3_clk)
+                ) ;
+`endif
+   
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(56)  asi_thrd3 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state3[7:0],ldst_va3[47:0]}),
+        .en (~(~tlb_access_en3_g)), .clk(clk),
+        .se     (se),       .si (),          .so ()
+        );
+`else
+dff_s #(56)  asi_thrd3 (
+        .din    ({asi_state_g[7:0],   ldst_va_g[47:0]}),
+        .q      ({tlb_asi_state3[7:0],ldst_va3[47:0]}),
+        .clk    (tlb_access3_clk),
+        .se     (se),       .si (),          .so ()
+        );
+`endif
+
+   wire [47:0] ldst_va_dout;
+   
+mux4ds #(56)     ldst_va_mx (
+        .in0    ({tlb_asi_state0[7:0],ldst_va0[47:0]}),
+        .in1    ({tlb_asi_state1[7:0],ldst_va1[47:0]}),
+        .in2    ({tlb_asi_state2[7:0],ldst_va2[47:0]}),
+        .in3    ({tlb_asi_state3[7:0],ldst_va3[47:0]}),
+        .sel0   (tlb_access_sel_thrd0),
+        .sel1   (tlb_access_sel_thrd1),
+        .sel2   (tlb_access_sel_thrd2),
+        .sel3   (tlb_access_sel_default),
+        .dout   ({lsu_tlu_tlb_asi_state_m[7:0], ldst_va_dout[47:0]})
+        );
+
+assign  lsu_ifu_asi_state[7:0] = lsu_tlu_tlb_asi_state_m[7:0] ;
+
+wire [17:0] lngltncy_ldst_va ;
+
+assign  lngltncy_ldst_va[17:0] = ldst_va_dout[17:0];
+assign  lngltncy_dmp_va[47:13] = ldst_va_dout[47:13];   
+assign  lsu_tlu_tlb_ldst_va_m[10:0] = lngltncy_ldst_va[10:0] ;
+assign  lsu_tlu_tlb_dmp_va_m[47:13] = lngltncy_dmp_va[47:13] ;
+assign  lsu_ifu_asi_addr[17:0] = lngltncy_ldst_va[17:0] ;
+
+// Diagnostics
+
+//wire	[3:0]	lsu_diag_access_sel_d1 ;
+
+//dff #(4)  diagsel_stgd1 (
+//        .din    (lsu_diag_access_sel[3:0]),
+//        .q      (lsu_diag_access_sel_d1[3:0]),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        ); 
+  wire [3:0] diagnstc_va_sel;
+//change buffer to nand / nor 
+assign     diagnstc_va_sel[0] =   lsu_diagnstc_va_sel[0] & ~rst_tri_en;
+assign     diagnstc_va_sel[1] =   lsu_diagnstc_va_sel[1] & ~rst_tri_en;
+assign     diagnstc_va_sel[2] =   lsu_diagnstc_va_sel[2] & ~rst_tri_en;
+assign     diagnstc_va_sel[3] =   lsu_diagnstc_va_sel[3] |  rst_tri_en;
+   
+wire	[20:0] diag_va ;
+mux4ds #(21)     diag_va_mx (
+        .in0    (ldst_va0[20:0]),
+        .in1    (ldst_va1[20:0]),
+        .in2    (ldst_va2[20:0]),
+        .in3    (ldst_va3[20:0]),
+        .sel0   (diagnstc_va_sel[0]),
+        .sel1   (diagnstc_va_sel[1]),
+        .sel2   (diagnstc_va_sel[2]),
+        .sel3   (diagnstc_va_sel[3]),
+        .dout   (diag_va[20:0])
+        );
+
+assign  lsu_diagnstc_wr_addr_e[10:0] = diag_va[10:0] ;
+assign  lsu_diagnstc_dc_prty_invrt_e[7:0] = diag_va[20:13] ;   
+
+//assign  lsu_lngltncy_ldst_va[13:11]= lngltncy_ldst_va[13:11] ;
+
+//assign  lsu_diagnstc_wr_way_e[0] = ~diag_va[12] & ~diag_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[1] = ~diag_va[12] &  diag_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[2] =  diag_va[12] & ~diag_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[3] =  diag_va[12] &  diag_va[11] ;
+
+assign  lsu_diagnstc_wr_way_e[1:0] =  {diag_va[12],  diag_va[11]};
+   
+   
+assign	lsu_diag_va_prty_invrt = diag_va[13] ;
+
+/***************error addr***************/
+wire  [10:4] dcfill_addr_m,dcfill_addr_g ;
+
+dff_s #(7)  filla_stgm (
+        .din    (lsu_dcfill_addr_e[10:4]),
+        .q      (dcfill_addr_m[10:4]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+dff_s #(7)  filla_stgg (
+        .din    (dcfill_addr_m[10:4]),
+        .q      (dcfill_addr_g[10:4]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire  [28:0]  error_pa_g ;
+dff_s #(29)  epa_stgg (
+        .din    (lsu_error_pa_m[28:0]),
+        .q      (error_pa_g[28:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire  [47:4]  err_addr_g ;
+
+mux3ds #(44)     erra_mx (
+        .in0    (ldst_va_g[47:4]),
+        .in1    ({38'd0,async_tlb_index[5:0]}),
+        .in2    ({8'd0,error_pa_g[28:0],dcfill_addr_g[10:4]}),
+        .sel0   (lsu_err_addr_sel[0]),
+        .sel1   (lsu_err_addr_sel[1]),
+        .sel2   (lsu_err_addr_sel[2]),
+        .dout   (err_addr_g[47:4])
+        );
+
+/*assign  err_addr_g[47:4] =
+  sync_error_sel ?  ldst_va_g[47:4] : 
+	async_error_sel ? {38'd0,async_tlb_index[5:0]} :
+			{8'd0,error_pa_g[28:0],dcfill_addr_g[10:4]} ;*/
+
+dff_s #(44)  errad_stgg (
+        .din    (err_addr_g[47:4]),
+        .q      (lsu_ifu_err_addr[47:4]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+     
+endmodule // lsu_dctldp
Index: /trunk/T1-CPU/lsu/lsu_rrobin_picker2.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_rrobin_picker2.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_rrobin_picker2.v	(revision 6)
@@ -0,0 +1,168 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_rrobin_picker2.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Round-Robin Picker for 4 eventss.
+//                      (see description of picker at the end of this file)
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////    
+
+module lsu_rrobin_picker2 (/*AUTOARG*/
+   // Outputs
+   so, pick_one_hot, 
+   // Inputs
+   rclk, grst_l, arst_l, si, se, events, events_picked, thread_force
+   );
+
+input           rclk ;
+input           grst_l;
+input           arst_l;
+input           si;
+input           se;
+output          so;
+
+
+input 	[3:0]	events ;		// multi-hot; events that could be chosen
+input 	[3:0]	events_picked ;		// one-hot; events that were picked - same cycle as pick
+input 	[3:0]	thread_force ;	        // multi-hot; thread events that have high priority
+
+output 	[3:0]	pick_one_hot ;		// one-hot
+
+wire         clk;
+wire         reset,dbb_reset_l ;
+   
+wire  [3:0]  thread_force_pe_mask ;
+wire  [3:0]  pick_thread_force_1hot ;
+wire         thread_force_events_sel ;
+
+wire  [3:0]  pick_rrobin_1hot, pick_rev_rrobin_1hot, pick_rrobin_1hot_mx ;
+wire         events_pick_dir_d1 ;
+wire         events_pick_dir ;
+wire  [3:0]  pick_rrobin_status_or_one_hot ;
+wire  [3:0]  pick_rrobin_din ;
+wire  [3:0]  pick_rrobin ;
+wire         pick_rrobin_reset ;
+wire         pick_rrobin_dir_upd ;
+wire  [3:0]  pick_rrobin_events ;
+
+   
+
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+assign  reset =  ~dbb_reset_l;
+assign  clk = rclk;
+
+
+//*******************************************************************************************************
+//PICK  
+//*******************************************************************************************************
+
+   //pick for thread force events
+assign	thread_force_events_sel = |(events[3:0] & thread_force[3:0]) ;
+
+assign  thread_force_pe_mask[3:0]  =  events[3:0] & thread_force[3:0] ;
+assign	pick_thread_force_1hot[0] = thread_force_pe_mask[0] ;
+assign	pick_thread_force_1hot[1] = thread_force_pe_mask[1] & ~thread_force_pe_mask[0] ;
+assign	pick_thread_force_1hot[2] = thread_force_pe_mask[2] & ~|thread_force_pe_mask[1:0] ;
+assign	pick_thread_force_1hot[3] = thread_force_pe_mask[3] & ~|thread_force_pe_mask[2:0] ;
+
+   //pick for round robin events
+assign  pick_rrobin_events[3:0]  =  events[3:0] & ~pick_rrobin[3:0] ;
+
+assign  pick_rrobin_1hot[0] = ~events_pick_dir_d1 & pick_rrobin_events[0] ;
+assign	pick_rrobin_1hot[1] = ~events_pick_dir_d1 & pick_rrobin_events[1] & ~pick_rrobin_events[0] ;
+assign	pick_rrobin_1hot[2] = ~events_pick_dir_d1 & pick_rrobin_events[2] & ~|pick_rrobin_events[1:0] ;
+assign	pick_rrobin_1hot[3] = ~events_pick_dir_d1 & pick_rrobin_events[3] & ~|pick_rrobin_events[2:0] ;
+
+   //pick for reverse round robin events
+assign  pick_rev_rrobin_1hot[0] = events_pick_dir_d1 & pick_rrobin_events[0] & ~|pick_rrobin_events[3:1] ;
+assign	pick_rev_rrobin_1hot[1] = events_pick_dir_d1 & pick_rrobin_events[1] & ~|pick_rrobin_events[3:2] ;
+assign	pick_rev_rrobin_1hot[2] = events_pick_dir_d1 & pick_rrobin_events[2] & ~|pick_rrobin_events[3] ;
+assign	pick_rev_rrobin_1hot[3] = events_pick_dir_d1 & pick_rrobin_events[3] ;
+
+assign  pick_rrobin_1hot_mx[3:0]  =  pick_rev_rrobin_1hot[3:0] | pick_rrobin_1hot[3:0] ;
+assign  pick_one_hot[3:0]    =  thread_force_events_sel ? pick_thread_force_1hot[3:0] : 
+                                                          pick_rrobin_1hot_mx[3:0] ;
+
+//*******************************************************************************************************
+
+
+
+//*******************************************************************************************************
+//PICK ROUND ROBIN (bug4814)
+//*******************************************************************************************************
+// this is used if there are no requests to be picked based on pick_status[3:0]
+
+assign pick_rrobin_status_or_one_hot[3:0] = pick_rrobin[3:0] | events_picked[3:0] ;
+assign pick_rrobin_reset = reset | ~|(events[3:0] & ~pick_rrobin_status_or_one_hot[3:0]) ;
+   //change direction bit only when events are non-zero
+assign pick_rrobin_dir_upd = |events[3:0] & (~|(events[3:0] & ~pick_rrobin_status_or_one_hot[3:0])) ;
+
+   // make reset dominant
+assign pick_rrobin_din[3:0] = pick_rrobin_status_or_one_hot[3:0] & ~{4{pick_rrobin_reset}};
+
+dff_s   #(4) ff_pick_rrobin (
+           .din    (pick_rrobin_din[3:0]),
+           .q      (pick_rrobin[3:0]    ),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+            );
+//*******************************************************************************************************
+
+
+//*******************************************************************************************************
+// PICK DIRECTION
+//*******************************************************************************************************
+
+   //bug4609 - change direction of pick all events are picked in round robin pick
+   //          this is needed when the condition below occurs. assuming misc is less frequent
+   //          this should pick load/store in round robin fashion
+   //-------------------------------------------------------
+   // cycle                 0   1   2
+   //-------------------------------------------------------
+   // history{misc,st,ld}  010 011 011
+   // vld{misc,st,ld}      011 011 011
+   //-------------------------------------------------------
+
+assign events_pick_dir  =  ~reset &
+                           (( ~pick_rrobin_dir_upd & events_pick_dir_d1) |		//hold
+                            (  pick_rrobin_dir_upd & ~events_pick_dir_d1)) ;		//set - invert direction
+   
+   dff_s   #(1) ff_events_pick_dir (
+        .din    (events_pick_dir),
+        .q      (events_pick_dir_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+   
+//*******************************************************************************************************
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_stb_ctldp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_stb_ctldp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_stb_ctldp.v	(revision 6)
@@ -0,0 +1,318 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_stb_ctldp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module lsu_stb_ctldp (/*AUTOARG*/
+   // Outputs
+   so, stb_state_si_0, stb_state_si_1, stb_state_si_2, 
+   stb_state_si_3, stb_state_si_4, stb_state_si_5, stb_state_si_6, 
+   stb_state_si_7, stb_state_rtype_0, stb_state_rtype_1, 
+   stb_state_rtype_2, stb_state_rtype_3, stb_state_rtype_4, 
+   stb_state_rtype_5, stb_state_rtype_6, stb_state_rtype_7, 
+   stb_state_rmo, 
+   // Inputs
+   rclk, si, se, stb_clk_en_l, lsu_stb_va_m, lsu_st_rq_type_m, 
+   lsu_st_rmo_m
+   );
+   
+   input rclk;
+   input si;
+   input se;
+//   input tmb_l;
+
+   output so;
+   
+   input [7:0] stb_clk_en_l;
+
+   input [7:6] lsu_stb_va_m;
+   input [2:1] lsu_st_rq_type_m;
+   input       lsu_st_rmo_m;
+
+   output [3:2] stb_state_si_0;
+   output [3:2] stb_state_si_1;
+   output [3:2] stb_state_si_2;
+   output [3:2] stb_state_si_3;
+   output [3:2] stb_state_si_4;
+   output [3:2] stb_state_si_5;
+   output [3:2] stb_state_si_6;
+   output [3:2] stb_state_si_7;
+
+   output [2:1] stb_state_rtype_0;
+   output [2:1] stb_state_rtype_1;
+   output [2:1] stb_state_rtype_2;
+   output [2:1] stb_state_rtype_3;
+   output [2:1] stb_state_rtype_4;
+   output [2:1] stb_state_rtype_5;
+   output [2:1] stb_state_rtype_6;
+   output [2:1] stb_state_rtype_7;
+
+   output [7:0] stb_state_rmo;
+   
+
+   wire [7:0] stb_clk;
+
+   wire       clk;
+   assign     clk = rclk;
+   
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb0_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[0]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[0])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb1_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[1]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[1])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb2_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[2]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[2])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb3_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[3]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[3])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb4_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[4]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[4])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb5_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[5]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[5])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb6_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[6]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[6])
+                ) ;
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+   clken_buf stb7_clkbuf (
+                .rclk   (clk),
+                .enb_l  (stb_clk_en_l[7]),
+                .tmb_l  (~se),
+                .clk    (stb_clk[7])
+                ) ;
+`endif
+
+   
+   
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_0         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                      lsu_st_rmo_m}),
+        .q      ({stb_state_si_0[3:2], stb_state_rtype_0[2:1],     
+		                       stb_state_rmo[0]}    ),
+        .en (~(stb_clk_en_l[0])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_0         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                      lsu_st_rmo_m}),
+        .q      ({stb_state_si_0[3:2], stb_state_rtype_0[2:1],     
+		                       stb_state_rmo[0]}    ),
+        .clk    (stb_clk[0]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_1         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                      lsu_st_rmo_m}),
+        .q      ({stb_state_si_1[3:2], stb_state_rtype_1[2:1],     
+		                   stb_state_rmo[1]}    ),
+        .en (~(stb_clk_en_l[1])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_1         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                      lsu_st_rmo_m}),
+        .q      ({stb_state_si_1[3:2], stb_state_rtype_1[2:1],     
+		                   stb_state_rmo[1]}    ),
+        .clk    (stb_clk[1]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_2         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_2[3:2], stb_state_rtype_2[2:1],     
+		                   stb_state_rmo[2]}    ),
+        .en (~(stb_clk_en_l[2])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_2         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_2[3:2], stb_state_rtype_2[2:1],     
+		                   stb_state_rmo[2]}    ),
+        .clk    (stb_clk[2]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_3         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_3[3:2], stb_state_rtype_3[2:1],     
+		                   stb_state_rmo[3]}    ),
+        .en (~(stb_clk_en_l[3])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_3         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_3[3:2], stb_state_rtype_3[2:1],     
+		                   stb_state_rmo[3]}    ),
+        .clk    (stb_clk[3]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_4         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_4[3:2], stb_state_rtype_4[2:1],     
+		                   stb_state_rmo[4]}    ),
+        .en (~(stb_clk_en_l[4])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_4         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_4[3:2], stb_state_rtype_4[2:1],     
+		                   stb_state_rmo[4]}    ),
+        .clk    (stb_clk[4]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_5         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_5[3:2], stb_state_rtype_5[2:1],     
+		                   stb_state_rmo[5]}    ),
+        .en (~(stb_clk_en_l[5])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_5         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_5[3:2], stb_state_rtype_5[2:1],     
+		                   stb_state_rmo[5]}    ),
+        .clk    (stb_clk[5]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_6         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_6[3:2], stb_state_rtype_6[2:1],     
+		                   stb_state_rmo[6]}    ),
+        .en (~(stb_clk_en_l[6])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_6         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_6[3:2], stb_state_rtype_6[2:1],     
+		                   stb_state_rmo[6]}    ),
+        .clk    (stb_clk[6]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+  dffe_s #(5)  ff_spec_write_7         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_7[3:2], stb_state_rtype_7[2:1],     
+              		     stb_state_rmo[7]}    ),
+        .en (~(stb_clk_en_l[7])), .clk(clk),
+        .se     (se), .si (), .so ()
+        );
+`else
+  dff_s #(5)  ff_spec_write_7         (
+        .din    ({lsu_stb_va_m[7:6], lsu_st_rq_type_m[2:1], 
+		                    lsu_st_rmo_m}),
+        .q      ({stb_state_si_7[3:2], stb_state_rtype_7[2:1],     
+              		     stb_state_rmo[7]}    ),
+        .clk    (stb_clk[7]),
+        .se     (se), .si (), .so ()
+        );
+`endif
+
+
+endmodule // lsu_stb_ctldp
Index: /trunk/T1-CPU/lsu/lsu_asi_decode.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_asi_decode.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_asi_decode.v	(revision 6)
@@ -0,0 +1,467 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_asi_decode.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    ASI Decode for LSU
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////    
+
+module lsu_asi_decode (/*AUTOARG*/
+   // Outputs
+   asi_internal_d, nucleus_asi_d, primary_asi_d, secondary_asi_d, 
+   lendian_asi_d, nofault_asi_d, quad_asi_d, binit_quad_asi_d, 
+   dcache_byp_asi_d, tlb_lng_ltncy_asi_d, tlb_byp_asi_d, 
+   as_if_user_asi_d, atomic_asi_d, blk_asi_d, dc_diagnstc_asi_d, 
+   dtagv_diagnstc_asi_d, wr_only_asi_d, rd_only_asi_d, unimp_asi_d, 
+   ifu_nontlb_asi_d, recognized_asi_d, ifill_tlb_asi_d, 
+   dfill_tlb_asi_d, rd_only_ltlb_asi_d, wr_only_ltlb_asi_d, 
+   phy_use_ec_asi_d, phy_byp_ec_asi_d, mmu_rd_only_asi_d, 
+   intrpt_disp_asi_d, dmmu_asi58_d, immu_asi50_d, 
+   // Inputs
+   asi_d
+   );
+
+input 	[7:0]	asi_d ;
+output		asi_internal_d ;
+output		nucleus_asi_d ;
+output		primary_asi_d ;
+output		secondary_asi_d ;
+output		lendian_asi_d ;
+output		nofault_asi_d ;
+output		quad_asi_d ;
+output		binit_quad_asi_d ;
+output		dcache_byp_asi_d ;
+output		tlb_lng_ltncy_asi_d ;
+output		tlb_byp_asi_d ;
+output		as_if_user_asi_d ;
+output		atomic_asi_d ;
+output		blk_asi_d ;
+//output		blk_cmt_asi_d ;
+output		dc_diagnstc_asi_d;
+output		dtagv_diagnstc_asi_d;
+output		wr_only_asi_d ;
+output		rd_only_asi_d ;
+output		unimp_asi_d ;
+output		ifu_nontlb_asi_d ;	// non-tlb asi's in ifu
+output		recognized_asi_d ;
+output		ifill_tlb_asi_d ;	// itlb fill asi
+output		dfill_tlb_asi_d ;	// dtlb fill asi
+output		rd_only_ltlb_asi_d ;	// read-only long-latency asi
+output		wr_only_ltlb_asi_d ;	// write-only long-latency asi
+output		phy_use_ec_asi_d ;
+output		phy_byp_ec_asi_d ;
+
+output		mmu_rd_only_asi_d ;	// does not include asi with va
+output		intrpt_disp_asi_d ;
+output		dmmu_asi58_d ;
+output    immu_asi50_d;
+   
+wire	quad_ldd_real, quad_ldd_real_little ;
+wire	asi_if_user_prim_all_d,asi_if_user_sec_all_d ;
+wire	asi_if_user_prim_d,asi_if_user_sec_d ;
+wire	nucleus_asi_exact_d ;
+wire	prim_asi_exact_d ;
+wire	phy_use_ec_asi ;
+wire	phy_byp_ec_asi ;
+wire	sec_asi_exact_d ;
+wire	idemap,ddemap,ddata_in,ddaccess ;
+wire	dtag_read,idata_in,idaccess,invld_all,itag_read ;
+wire	blk_asif_usr_plittle, blk_asif_usr_slittle ;
+wire	blk_plittle, blk_slittle ;
+wire	blk_asif_usr_p, blk_asif_usr_s ;
+wire	blk_cmt_p, blk_cmt_s; 
+wire	blk_p, blk_s ;
+wire	binit_nucleus_d, binit_nucleus_little_d ;
+wire	real_mem_little,real_io_little ;
+   wire unimp_CD_prm;
+   wire unimp_CD_sec;
+
+// Start decode in d-stage. Required late e-stage. The logic could
+// be moved to the e-stage to save staging flops.
+
+wire	dtsb_8k_ptr, dtsb_64k_ptr, dtsb_dir_ptr;
+wire	itsb_8k_ptr, itsb_64k_ptr;
+assign	dtsb_8k_ptr = (asi_d[7:0] == 8'h59) ;
+assign	dtsb_64k_ptr = (asi_d[7:0] == 8'h5A) ;
+assign	dtsb_dir_ptr = (asi_d[7:0] == 8'h5B) ;
+assign	itsb_8k_ptr = (asi_d[7:0] == 8'h51) ;
+assign	itsb_64k_ptr = (asi_d[7:0] == 8'h52) ;
+
+assign	mmu_rd_only_asi_d =
+	dtsb_8k_ptr | dtsb_64k_ptr | dtsb_dir_ptr | itsb_8k_ptr | itsb_64k_ptr ;
+
+assign intrpt_disp_asi_d = (asi_d[7:0] == 8'h73) ; // INTR_W 
+
+assign	dmmu_asi58_d =	(asi_d[7:0] == 8'h58) ; 
+assign  immu_asi50_d =  (asi_d[7:0] == 8'h50) ;
+   
+// ASI Internal Registers - switches out thread among other things
+assign	asi_internal_d =
+	(asi_d[7:0] == 8'h40) |	// streaming/ma
+	(asi_d[7:0] == 8'h45) |	// LSU Control 
+	(asi_d[7:0] == 8'h50) | // I-TSB Tag Target/SFSR/TSB/Tag-Access
+	itsb_8k_ptr	      | // I-TSB 8K Ptr 
+	itsb_64k_ptr	      | // I-TSB 64K Ptr
+	dmmu_asi58_d |
+	//(asi_d[7:0] == 8'h58) | // D-TSB Tag Target/SFSR/SFAR/TSB/Tag-Access/VA-PA-Watchpt
+	(asi_d[7:0] == 8'h21) | // Primary/Secondary Context
+	(asi_d[7:0] == 8'h20) | // Scratchpad.
+	(asi_d[7:0] == 8'h25) | // Queue
+	(asi_d[7:0] == 8'h4F) | // Hyp Scratchpad
+	dtsb_8k_ptr 	      | // D-TSB 8K Ptr
+	dtsb_64k_ptr	      | // D-TSB 64K Ptr
+	dtsb_dir_ptr	      | // D-TSB Direct Ptr
+	(asi_d[7:0] == 8'h72) | // INTR_RECEIVE
+	intrpt_disp_asi_d     | // INTR_W
+	(asi_d[7:0] == 8'h74) | // INTR_R
+	(asi_d[7:0] == 8'h44) | // Self-Timed Margin Ctl
+	(asi_d[7:0] == 8'h31) | // dmmu_zctxt_ps0_tsb
+	(asi_d[7:0] == 8'h32) | // dmmu_zctxt_ps1_tsb
+	(asi_d[7:0] == 8'h39) | // dmmu_nzctxt_ps0_tsb
+	(asi_d[7:0] == 8'h3A) | // dmmu_nzctxt_ps1_tsb
+	(asi_d[7:0] == 8'h33) | // dmmu_zctxt_cfg_tsb
+	(asi_d[7:0] == 8'h3B) | // dmmu_nzctxt_cfg_tsb
+	(asi_d[7:0] == 8'h35) | // immu_zctxt_ps0_tsb
+	(asi_d[7:0] == 8'h36) | // immu_zctxt_ps1_tsb
+	(asi_d[7:0] == 8'h3D) | // immu_nzctxt_ps0_tsb
+	(asi_d[7:0] == 8'h3E) | // immu_nzctxt_ps1_tsb
+	(asi_d[7:0] == 8'h37) | // immu_zctxt_cfg_tsb
+	(asi_d[7:0] == 8'h3F) | // immu_nzctxt_cfg_tsb
+	dc_diagnstc_asi_d     | // Dcache Diagnostic
+	dtagv_diagnstc_asi_d  | // Dcache Diagnostic
+	tlb_lng_ltncy_asi_d   |
+	ifu_nontlb_asi_d      ;	
+
+assign	ifu_nontlb_asi_d = 
+	(asi_d[7:0] == 8'h42) | // instruction-mask
+	(asi_d[7:0] == 8'h43) | // error-inj
+	(asi_d[7:0] == 8'h4B) | // sparc-error-enable
+	(asi_d[7:0] == 8'h4C) | // sparc-error-status
+	(asi_d[7:0] == 8'h4D) | // sparc-error-address
+	(asi_d[7:0] == 8'h66) | // icache-instr
+	(asi_d[7:0] == 8'h67) ; // icache-tag
+
+assign	dc_diagnstc_asi_d = (asi_d[7:0] == 8'h46) ;
+assign	dtagv_diagnstc_asi_d = (asi_d[7:0] == 8'h47) ;
+
+assign	idemap = (asi_d[7:0] == 8'h57) ; // I-MMU Demap Operation
+assign	ddemap = (asi_d[7:0] == 8'h5F) ; // D-MMU Demap Operation
+assign	ddata_in = (asi_d[7:0] == 8'h5C) ; // D-TLB Data-In
+assign	ddaccess = (asi_d[7:0] == 8'h5D) ; // D-TLB Data-Access
+assign	dtag_read = (asi_d[7:0] == 8'h5E) ; // D-TLB Tag Read
+assign	idata_in = (asi_d[7:0] == 8'h54) ; // I-TLB Data-In
+assign	idaccess = (asi_d[7:0] == 8'h55) ; // I-TLB Data-Access
+assign	invld_all = (asi_d[7:0] == 8'h60) ; // I/D Invalidate All
+assign	itag_read = (asi_d[7:0] == 8'h56) ; // I-TLB Tag Read
+
+assign	tlb_lng_ltncy_asi_d = 
+	idemap 		| ddemap 	| ddata_in 	| 
+	ddaccess 	| dtag_read 	| idata_in 	| 
+	idaccess 	| invld_all 	| itag_read 	;
+
+assign	wr_only_ltlb_asi_d = 
+	ddata_in 	|	idata_in 	|
+	idemap		|	ddemap		|
+	invld_all ;
+
+assign	rd_only_ltlb_asi_d =
+	dtag_read	|	itag_read	;
+
+assign	ifill_tlb_asi_d =	// itlb fill asi
+	idata_in	| 	idaccess	;
+
+assign	dfill_tlb_asi_d =	// i/d tlb fill asi
+	ddata_in	|	ddaccess	;
+
+assign	nucleus_asi_exact_d =
+	(asi_d[7:0] == 8'h04) |	// asi_nucleus
+	(asi_d[7:0] == 8'h0C) ; // asi_nucleus_little
+
+// Nucleus Ctxt
+assign	nucleus_asi_d =
+	 nucleus_asi_exact_d |
+	(asi_d[7:0] == 8'h24) | // asi_nucleus_quad_ldd
+	(asi_d[7:0] == 8'h2C) ; // asi_nucleus_quad_ldd_little
+
+assign	asi_if_user_prim_d =
+	(asi_d[7:0] == 8'h10) |	// asi_as_if_user_primary
+	(asi_d[7:0] == 8'h18) ;	// asi_as_if_user_primary_little
+
+// asi_if_user primary asi
+assign	asi_if_user_prim_all_d =
+	 asi_if_user_prim_d   |		
+	(asi_d[7:0] == 8'h22) |	// asi_as_if_user_primary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'h2A) ;	// asi_as_if_user_primary_quad_ldd_little (blk-init)
+
+assign	prim_asi_exact_d =
+	(asi_d[7:0] == 8'h80) |	// asi_primary
+	(asi_d[7:0] == 8'h88) ;	// asi_primary_little
+
+// Primary Ctxt
+assign	primary_asi_d =
+	 asi_if_user_prim_all_d   |	
+	 prim_asi_exact_d     |	
+	(asi_d[7:0] == 8'h82) |	// asi_primary_no_fault
+	(asi_d[7:0] == 8'h8A) |	// asi_primary_no_fault_little
+	(asi_d[7:0] == 8'hE2) |	// asi_primary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'hEA) |	// asi_primary_quad_ldd_little (blk-init)
+	blk_asif_usr_p | blk_asif_usr_plittle | 
+	blk_plittle | blk_p | // block primary asi
+	blk_cmt_p |	// Bug 4051
+  unimp_CD_prm ;  // Bug 4532
+   
+assign	asi_if_user_sec_d =
+	(asi_d[7:0] == 8'h11) | // asi_as_if_user_secondary
+	(asi_d[7:0] == 8'h19) ; // asi_as_if_user_secondary_little
+
+// asi_if_user secondary asi
+assign	asi_if_user_sec_all_d =
+	 asi_if_user_sec_d   |		
+	(asi_d[7:0] == 8'h23) |	// asi_as_if_user_secondary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'h2B) ;	// asi_as_if_user_secondary_quad_ldd_little (blk-init)
+
+assign	as_if_user_asi_d = asi_if_user_prim_all_d | asi_if_user_sec_all_d |
+blk_asif_usr_p | blk_asif_usr_plittle | blk_asif_usr_s | blk_asif_usr_slittle ;
+
+assign	sec_asi_exact_d =
+	(asi_d[7:0] == 8'h81) | // asi_secondary
+	(asi_d[7:0] == 8'h89) ; // asi_secondary_little
+
+// Secondary Ctxt
+assign	secondary_asi_d =
+	 asi_if_user_sec_all_d    |
+	 sec_asi_exact_d      |		
+	(asi_d[7:0] == 8'h83) | // asi_secondary_no_fault
+	(asi_d[7:0] == 8'h8B) | // asi_secondary_no_fault_little
+	(asi_d[7:0] == 8'hE3) |	// asi_secondary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'hEB) |	// asi_secondary_quad_ldd_little (blk-init)
+	blk_asif_usr_s | blk_asif_usr_slittle | 
+	blk_slittle |  blk_s | // block secondary asi
+	blk_cmt_s |  // Bug 4051
+  unimp_CD_sec; // Bug 4532
+
+// Little Endian
+assign	lendian_asi_d =
+	(asi_d[7:0] == 8'h0C) | // asi_nucleus_little
+	(asi_d[7:0] == 8'h2C) | // asi_nucleus_quad_ldd_little 
+	(asi_d[7:0] == 8'h18) |	// asi_as_if_user_primary_little
+	(asi_d[7:0] == 8'h8A) |	// asi_primary_no_fault_little
+	(asi_d[7:0] == 8'h8B) | // asi_secondary_no_fault_little
+	(asi_d[7:0] == 8'h2A) |	// asi_as_if_user_primary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'hEA) |	// asi_primary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'h19) | // asi_as_if_user_secondary_little
+	(asi_d[7:0] == 8'h89) | // asi_secondary_little
+	(asi_d[7:0] == 8'h88) |	// asi_primary_little
+	(asi_d[7:0] == 8'h2B) |	// asi_as_if_user_secondary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'hEB) |	// asi_secondary_quad_ldd_little (blk-init)
+	real_mem_little |
+	real_io_little	|
+	//(asi_d[7:0] == 8'h1D) |	// asi_phys_bypass_ec_with_ebit_littl
+	//(asi_d[7:0] == 8'h1C) |	// asi_phys_bypass_ec_with_ebit_littl
+	blk_asif_usr_plittle  | blk_asif_usr_slittle |	// little
+	blk_plittle	      | blk_slittle |		// little
+	quad_ldd_real_little  | // asi_quad_ldd_real_little
+	binit_nucleus_little_d ;// asi_nucleus_blk_init_st_quad_ldd_little
+
+// No Fault
+assign	nofault_asi_d =
+	(asi_d[7:0] == 8'h82) |	// asi_primary_no_fault
+	(asi_d[7:0] == 8'h8A) |	// asi_primary_no_fault_little
+	(asi_d[7:0] == 8'h83) | // asi_secondary_no_fault
+	(asi_d[7:0] == 8'h8B) ; // asi_secondary_no_fault_little
+
+assign	binit_nucleus_d =
+	(asi_d[7:0] == 8'h27) ;	// asi_nucleus_blk_init_st_quad_ldd
+assign	binit_nucleus_little_d =
+	(asi_d[7:0] == 8'h2F) ;	// asi_nucleus_blk_init_st_quad_ldd_little
+
+// Quad (These are duplicated - they can be shared)
+assign	binit_quad_asi_d =
+   	binit_nucleus_d |	// asi_nucleus_blk_init_st_quad_ldd
+	binit_nucleus_little_d |// asi_nucleus_blk_init_st_quad_ldd_little
+	(asi_d[7:0] == 8'h22) |	// asi_as_if_user_primary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'h2A) |	// asi_as_if_user_primary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'h23) |	// asi_as_if_user_secondary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'h2B) |	// asi_as_if_user_secondary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'hE2) |	// asi_primary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'hEA) |	// asi_primary_quad_ldd_little (blk-init)
+	(asi_d[7:0] == 8'hE3) |	// asi_secondary_quad_ldd (blk-init)
+	(asi_d[7:0] == 8'hEB) ;	// asi_secondary_quad_ldd_little (blk-init)
+
+assign	quad_ldd_real = 
+	(asi_d[7:0] == 8'h26) ; // asi_quad_ldd_real
+assign	quad_ldd_real_little = 
+	(asi_d[7:0] == 8'h2E) ; // asi_quad_ldd_real_little
+
+assign	quad_asi_d =
+	binit_quad_asi_d      | // blk-init quad asi
+	quad_ldd_real 	      | // asi_quad_ldd_real
+	quad_ldd_real_little  | // asi_quad_ldd_real_little
+	(asi_d[7:0] == 8'h24) | // asi_nucleus_quad_ldd 
+	(asi_d[7:0] == 8'h2C) ; // asi_nucleus_quad_ldd_little 
+	
+// EC
+assign	real_io_little = (asi_d[7:0] == 8'h1D) ;
+assign	real_mem_little = (asi_d[7:0] == 8'h1C) ;
+	
+assign	phy_byp_ec_asi =
+	(asi_d[7:0] == 8'h15) |	// asi_phys_bypass_ec_with_ebit(real_io)
+	real_io_little ;	// asi_phys_bypass_ec_with_ebit_little(real_io_little)
+	//(asi_d[7:0] == 8'h1D) ;	// asi_phys_bypass_ec_with_ebit_little(real_io_little)
+				// asi assumed for io address specifically !!
+				// asi assumed for io address specifically !!
+
+assign	phy_use_ec_asi =
+	(asi_d[7:0] == 8'h14) |	// asi_phys_use_ec(real_mem)
+	real_mem_little ;	// asi_phys_use_ec_little(real_mem_little)
+	//(asi_d[7:0] == 8'h1C) ;	// asi_phys_use_ec_little(real_mem_little)
+
+assign	phy_use_ec_asi_d = phy_use_ec_asi ;
+assign	phy_byp_ec_asi_d = phy_byp_ec_asi ;
+
+// Physical Use - Always results in R->P xslation.
+assign	tlb_byp_asi_d = 
+		phy_byp_ec_asi | phy_use_ec_asi | 
+		quad_ldd_real  | quad_ldd_real_little ;
+
+// Atomic asi
+assign	atomic_asi_d = nucleus_asi_exact_d | prim_asi_exact_d | sec_asi_exact_d | 
+		asi_if_user_prim_d | asi_if_user_sec_d | phy_use_ec_asi ;
+
+assign	dcache_byp_asi_d = tlb_byp_asi_d ;
+
+// ASI causing Data Access Exceptions - (TBD)
+
+assign	rd_only_asi_d =
+	(asi_d[7:0] == 8'h82) |	// asi_primary_no_fault
+	(asi_d[7:0] == 8'h8A) |	// asi_primary_no_fault_little
+	(asi_d[7:0] == 8'h83) | // asi_secondary_no_fault
+	(asi_d[7:0] == 8'h8B) | // asi_secondary_no_fault_little
+	(asi_d[7:0] == 8'h74) ; // asi_swrvr_udb_intr_r !! Does not have to be done by intrpt blk !!
+
+assign	wr_only_asi_d =
+	(asi_d[7:0] == 8'h73) ; // asi_swrvr_udb_intr_w
+
+// Block Asi
+assign	blk_asif_usr_p = (asi_d[7:0] == 8'h16) ; // asi_block_as_if_user_primary
+assign	blk_asif_usr_plittle = (asi_d[7:0] == 8'h1E) ; // asi_block_as_if_user_primary_little
+assign	blk_asif_usr_s = (asi_d[7:0] == 8'h17) ; // asi_block_as_if_user_secondary
+assign	blk_asif_usr_slittle = (asi_d[7:0] == 8'h1F) ; // asi_block_as_if_user_secondary_little
+assign	blk_plittle = (asi_d[7:0] == 8'hF8) ; // asi_block_primary_little 
+assign	blk_slittle = (asi_d[7:0] == 8'hF9) ; // asi_block_secondary_little 
+assign	blk_cmt_p = (asi_d[7:0] == 8'hE0) ; // asi_block_commit_primary ?? behaviour 
+assign	blk_cmt_s = (asi_d[7:0] == 8'hE1) ; // asi_block_commit_secondary ?? behaviour 
+assign	blk_p = (asi_d[7:0] == 8'hF0) ; // asi_block_primary
+assign	blk_s = (asi_d[7:0] == 8'hF1) ; // asi_block_secondary
+
+//assign	blk_cmt_asi_d = blk_cmt_p | blk_cmt_s ;
+
+assign	blk_asi_d = 
+	blk_asif_usr_p 	| blk_asif_usr_s |
+	blk_plittle	| blk_slittle	 |
+	//blk_cmt_p	| blk_cmt_s	 |
+	blk_p		| blk_s		 |
+	blk_asif_usr_plittle  | blk_asif_usr_slittle |	// little
+	blk_plittle	      | blk_slittle ;		// little
+
+// add to little-endian decode
+// add to use_real ...
+//assign      as_if_supv = 
+//    (asi_d[7:0] == 8'h??) | // asi_if_supv_real 
+//    (asi_d[7:0] == 8'h??) ; // asi_if_supv_real_little 
+
+wire	unimp_C ;
+assign	unimp_C =
+	((asi_d[7:4]==4'hC) & 
+		~((asi_d[3:0]==4'h6) |
+		  (asi_d[3:0]==4'h7) |
+		  (asi_d[3:0]==4'hE) |
+		  (asi_d[3:0]==4'hF))) ;
+
+wire	unimp_D ;
+assign	unimp_D =
+	((asi_d[7:4]==4'hD) & 
+		~((asi_d[3:0]==4'h4) |
+		  (asi_d[3:0]==4'h5) |
+		  (asi_d[3:0]==4'h6) |
+		  (asi_d[3:0]==4'h7) |
+		  (asi_d[3:0]==4'hC) |
+		  (asi_d[3:0]==4'hD) |
+		  (asi_d[3:0]==4'hE) |
+		  (asi_d[3:0]==4'hF))) ;
+
+assign  unimp_CD_prm =
+(asi_d[7:0] == 8'hC0) |
+(asi_d[7:0] == 8'hC2) |
+(asi_d[7:0] == 8'hC4) |
+(asi_d[7:0] == 8'hC8) |
+(asi_d[7:0] == 8'hCA) |
+(asi_d[7:0] == 8'hCC) |
+(asi_d[7:0] == 8'hD0) |
+(asi_d[7:0] == 8'hD2) |
+(asi_d[7:0] == 8'hD8) |
+(asi_d[7:0] == 8'hDA) ;
+   
+assign  unimp_CD_sec = 
+(asi_d[7:0] == 8'hC1) |
+(asi_d[7:0] == 8'hC3) |
+(asi_d[7:0] == 8'hC5) |
+(asi_d[7:0] == 8'hC9) |
+(asi_d[7:0] == 8'hCB) |
+(asi_d[7:0] == 8'hCD) |
+(asi_d[7:0] == 8'hD1) |
+(asi_d[7:0] == 8'hD3) |
+(asi_d[7:0] == 8'hD9) |
+(asi_d[7:0] == 8'hDB) ;
+   
+   
+// Unimplemented asi
+assign	unimp_asi_d =
+// Bug 4692 - all unimplemented internal asi are now
+// illegal.
+//	(asi_d[7:0] == 8'h6E) | // asi_icache_pre_decode
+//	(asi_d[7:0] == 8'h6F) | // asi_icache_next_field
+//	(asi_d[7:0] == 8'h48) | // asi_intr_dispatch_status
+//	(asi_d[7:0] == 8'h49) | // asi_intr_receive
+//	(asi_d[7:0] == 8'h4A) | // asi_upa_config_register
+//	(asi_d[7:0] == 8'h4E) | // asi_ecache_tag_data
+//  	dflush_asi_d |      //Bug 4580
+ 	unimp_C | unimp_D | // Bug 4438	
+	blk_cmt_p | blk_cmt_s ;
+	
+// Set of recognized asi's
+assign	recognized_asi_d = 
+	asi_internal_d | nucleus_asi_d |  primary_asi_d | secondary_asi_d | lendian_asi_d |
+	nofault_asi_d | quad_asi_d | tlb_byp_asi_d | unimp_asi_d | blk_asi_d ;
+
+// Displacement Flush for L2
+//assign	dflush_asi_d =
+//	(asi_d[7:0] == 8'h30) ; // asi_direct_map_ecache
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu.v	(revision 6)
@@ -0,0 +1,5099 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Description:  Load/Store Unit for Sparc Core  
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+          // time scale definition
+
+`include        "iop.h"
+`include        "lsu.h"
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu ( /*AUTOARG*/
+   // Outputs
+   spc_pcx_req_pq, spc_pcx_data_pa, spc_pcx_atom_pq, 
+   spc_efc_dfuse_data, mbist_dcache_data_in, lsu_tlu_wsr_inst_e, 
+   lsu_tlu_ttype_vld_m2, lsu_tlu_ttype_m2, lsu_tlu_tlb_st_inst_m, 
+   lsu_tlu_tlb_ldst_va_m, lsu_tlu_tlb_ld_inst_m, 
+   lsu_tlu_tlb_dmp_va_m, lsu_tlu_tlb_asi_state_m, 
+   lsu_tlu_tlb_access_tid_m, lsu_tlu_thrid_d, lsu_tlu_stb_full_w2, 
+   lsu_tlu_rsr_data_e, lsu_tlu_rs3_data_g, lsu_tlu_pcxpkt_ack, 
+   lsu_tlu_pctxt_m, lsu_tlu_misalign_addr_ldst_atm_m, 
+   lsu_tlu_ldst_va_m, lsu_tlu_l2_dmiss, lsu_tlu_intpkt, 
+   lsu_tlu_early_flush_w, lsu_tlu_early_flush2_w, lsu_tlu_dtlb_done, 
+   lsu_tlu_dside_ctxt_m, lsu_tlu_dmmu_miss_g, 
+   lsu_tlu_defr_trp_taken_g, lsu_tlu_dcache_miss_w2, 
+   lsu_tlu_daccess_excptn_g, lsu_tlu_cpx_vld, lsu_tlu_cpx_req, 
+   lsu_tlu_async_ttype_w2, lsu_tlu_async_ttype_vld_w2, 
+   lsu_tlu_async_tid_w2, lsu_t3_pctxt_state, lsu_t2_pctxt_state, 
+   lsu_t1_pctxt_state, lsu_t0_pctxt_state, lsu_spu_strm_ack_cmplt, 
+   lsu_spu_stb_empty, lsu_spu_ldst_ack, lsu_spu_early_flush_g, 
+   lsu_spu_asi_state_e, lsu_pid_state3, lsu_pid_state2, 
+   lsu_pid_state1, lsu_pid_state0, lsu_mmu_rs3_data_g, 
+   lsu_mmu_flush_pipe_w, lsu_mmu_defr_trp_taken_g, lsu_mamem_mrgn, 
+   lsu_itlb_mrgn, lsu_ifu_tlb_tag_ue, lsu_ifu_tlb_data_ue, 
+   lsu_ifu_tlb_data_su, lsu_ifu_stxa_data, lsu_ifu_stbcnt3, 
+   lsu_ifu_stbcnt2, lsu_ifu_stbcnt1, lsu_ifu_stbcnt0, 
+   lsu_ifu_stallreq, lsu_ifu_pcxpkt_ack_d, lsu_ifu_ldsta_internal_e, 
+   lsu_ifu_ldst_miss_w, lsu_ifu_ldst_cmplt, lsu_ifu_ld_pcxpkt_vld, 
+   lsu_ifu_ld_pcxpkt_tid, lsu_ifu_ld_icache_index, 
+   lsu_ifu_l2_unc_error, lsu_ifu_l2_corr_error, lsu_ifu_itlb_en, 
+   lsu_ifu_io_error, lsu_ifu_icache_en, lsu_ifu_flush_pipe_w, 
+   lsu_ifu_error_tid, lsu_ifu_direct_map_l1, 
+   lsu_ifu_dcache_tag_perror, lsu_ifu_dcache_data_perror, 
+   lsu_ifu_dc_parity_error_w2, lsu_ifu_cpxpkt_vld_i1, 
+   lsu_ifu_cpxpkt_i1, lsu_ifu_asi_vld, lsu_ifu_asi_thrid, 
+   lsu_ifu_asi_state, lsu_ifu_asi_load, lsu_ifu_asi_addr, 
+   lsu_ictag_mrgn, lsu_ffu_stb_full3, lsu_ffu_stb_full2, 
+   lsu_ffu_stb_full1, lsu_ffu_stb_full0, lsu_ffu_st_dtlb_perr_g, 
+   lsu_ffu_ld_vld, lsu_ffu_ld_data, lsu_ffu_flush_pipe_w, 
+   lsu_ffu_blk_asi_e, lsu_ffu_ack, lsu_exu_thr_m, 
+   lsu_exu_st_dtlb_perr_g, lsu_exu_rd_m, lsu_exu_ldst_miss_w2, 
+   lsu_exu_flush_pipe_w, lsu_exu_dfill_vld_w2, lsu_exu_dfill_data_w2, 
+   lsu_dsfsr_din_g, lsu_dmmu_sfsr_trp_wr, lsu_asi_reg3, lsu_asi_reg2, 
+   lsu_asi_reg1, lsu_asi_reg0, ifu_tlu_flush_fd_w, 
+   ifu_tlu_flush_fd3_w, ifu_tlu_flush_fd2_w, bist_ctl_reg_wr_en, 
+   bist_ctl_reg_in, lsu_asi_state, lsu_ifu_err_addr, lsu_sscan_data, 
+   ifu_tlu_inst_vld_m_bf1, lsu_ffu_bld_cnt_w, so0, so1, short_so0, 
+   short_so1, lsu_tlu_nucleus_ctxt_m, lsu_tlu_tte_pg_sz_g, 
+   lsu_tlu_squash_va_oor_m, lsu_tlu_wtchpt_trp_g, 
+   lsu_tlu_daccess_prot_g, lsu_tlu_priv_action_g, 
+   // Inputs
+   tlu_lsu_tl_zero, tlu_lsu_tid_m, tlu_lsu_stxa_ack_tid, 
+   tlu_lsu_stxa_ack, tlu_lsu_redmode_rst_d1, tlu_lsu_redmode, 
+   tlu_lsu_pstate_priv, tlu_lsu_pstate_cle, tlu_lsu_pstate_am, 
+   tlu_lsu_priv_trap_m, tlu_lsu_pcxpkt, tlu_lsu_ldxa_tid_w2, 
+   tlu_lsu_ldxa_async_data_vld, tlu_lsu_int_ldxa_vld_w2, 
+   tlu_lsu_int_ldxa_data_w2, tlu_lsu_int_ld_ill_va_w2, 
+   tlu_lsu_hpv_priv, tlu_lsu_hpstate_en, tlu_lsu_asi_update_m, 
+   tlu_lsu_asi_m, tlu_idtlb_dmp_thrid_g, tlu_idtlb_dmp_key_g, 
+   tlu_exu_early_flush_pipe_w, tlu_early_flush_pipe_w, 
+   tlu_early_flush_pipe2_w, tlu_dtlb_tte_tag_w2, 
+   tlu_dtlb_tte_data_w2, tlu_dtlb_tag_rd_g, tlu_dtlb_rw_index_vld_g, 
+   tlu_dtlb_rw_index_g, tlu_dtlb_invalidate_all_g, 
+   tlu_dtlb_dmp_vld_g, tlu_dtlb_dmp_sctxt_g, tlu_dtlb_dmp_pctxt_g, 
+   tlu_dtlb_dmp_nctxt_g, tlu_dtlb_dmp_all_g, tlu_dtlb_dmp_actxt_g, 
+   tlu_dtlb_data_rd_g, tlu_dsfsr_flt_vld, testmode_l, 
+   spu_lsu_unc_error_w2, spu_lsu_stxa_ack_tid, spu_lsu_stxa_ack, 
+   spu_lsu_ldxa_tid_w2, spu_lsu_ldxa_illgl_va_w2, 
+   spu_lsu_ldxa_data_w2, spu_lsu_ldxa_data_vld_w2, spu_lsu_int_w2, 
+   sehold, se, pcx_spc_grant_px, mux_drive_disable, 
+   mem_write_disable, mbist_write_data, mbist_dcache_write, 
+   mbist_dcache_word, mbist_dcache_way, mbist_dcache_read, 
+   mbist_dcache_index, ifu_tlu_wsr_inst_d, ifu_tlu_thrid_e, 
+   ifu_tlu_sraddr_d, ifu_tlu_mb_inst_e, ifu_tlu_inst_vld_m, 
+   ifu_tlu_flush_m, ifu_tlu_flsh_inst_e, ifu_lsu_thrid_s, 
+   ifu_lsu_swap_e, ifu_lsu_st_inst_e, ifu_lsu_sign_ext_e, 
+   ifu_lsu_rd_e, ifu_lsu_pref_inst_e, ifu_lsu_pcxreq_d, 
+   ifu_lsu_pcxpkt_e, ifu_lsu_nceen, ifu_lsu_memref_d, 
+   ifu_lsu_ldxa_tid_w2, ifu_lsu_ldxa_illgl_va_w2, 
+   ifu_lsu_ldxa_data_w2, ifu_lsu_ldxa_data_vld_w2, ifu_lsu_ldstub_e, 
+   ifu_lsu_ldst_size_e, ifu_lsu_ldst_fp_e, ifu_lsu_ldst_dbl_e, 
+   ifu_lsu_ld_inst_e, ifu_lsu_inv_clear, ifu_lsu_imm_asi_vld_d, 
+   ifu_lsu_imm_asi_d, ifu_lsu_ibuf_busy, ifu_lsu_fwd_wr_ack, 
+   ifu_lsu_fwd_data_vld, ifu_lsu_destid_s, ifu_lsu_casa_e, 
+   ifu_lsu_asi_rd_unc, ifu_lsu_asi_ack, ifu_lsu_alt_space_e, 
+   ifu_lsu_alt_space_d, grst_l, gdbginit_l, ffu_lsu_kill_fst_w, 
+   ffu_lsu_fpop_rq_vld, ffu_lsu_blk_st_va_e, ffu_lsu_blk_st_e, 
+   exu_tlu_va_oor_m, exu_tlu_misalign_addr_jmpl_rtn_m, 
+   exu_lsu_rs3_data_e, exu_lsu_rs2_data_e, efc_spc_fuse_clk2, 
+   efc_spc_fuse_clk1, efc_spc_dfuse_dshift, efc_spc_dfuse_data, 
+   efc_spc_dfuse_ashift, ctu_sscan_tid, const_cpuid, clk, 
+   bist_ctl_reg_out, arst_l, cpx_spc_data_cx, spu_lsu_ldst_pckt, 
+   exu_lsu_ldst_va_e, exu_lsu_early_va_e, ffu_lsu_data, si0, si1, 
+   short_si1, short_si0, exu_tlu_wsr_data_m
+   );
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+input                   arst_l;                 // To qctl1 of lsu_qctl1.v, ...
+input [10:0]            bist_ctl_reg_out;       // To dctldp of lsu_dctldp.v
+input                   clk;                    // To qctl1 of lsu_qctl1.v, ...
+input [2:0]             const_cpuid;            // To qctl2 of lsu_qctl2.v, ...
+input [3:0]             ctu_sscan_tid;          // To dctl of lsu_dctl.v
+input                   efc_spc_dfuse_ashift;   // To dcdhdr of cmp_sram_redhdr.v
+input                   efc_spc_dfuse_data;     // To dcdhdr of cmp_sram_redhdr.v
+input                   efc_spc_dfuse_dshift;   // To dcdhdr of cmp_sram_redhdr.v
+input                   efc_spc_fuse_clk1;      // To dcdhdr of cmp_sram_redhdr.v, ...
+input                   efc_spc_fuse_clk2;      // To dcdhdr of cmp_sram_redhdr.v
+input [63:0]            exu_lsu_rs2_data_e;     // To stb_rwdp of lsu_stb_rwdp.v
+input [63:0]            exu_lsu_rs3_data_e;     // To excpctl of lsu_excpctl.v, ...
+input                   exu_tlu_misalign_addr_jmpl_rtn_m;// To excpctl of lsu_excpctl.v
+input                   exu_tlu_va_oor_m;       // To excpctl of lsu_excpctl.v
+input                   ffu_lsu_blk_st_e;       // To dctl of lsu_dctl.v, ...
+input [5:3]             ffu_lsu_blk_st_va_e;    // To stb_rwctl of lsu_stb_rwctl.v
+input                   ffu_lsu_fpop_rq_vld;    // To qctl1 of lsu_qctl1.v
+input                   ffu_lsu_kill_fst_w;     // To stb_rwctl of lsu_stb_rwctl.v
+input                   gdbginit_l;             // To qctl1 of lsu_qctl1.v, ...
+input                   grst_l;                 // To qctl2 of lsu_qctl2.v, ...
+input                   ifu_lsu_alt_space_d;    // To dctl of lsu_dctl.v
+input                   ifu_lsu_alt_space_e;    // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_lsu_asi_ack;        // To dctl of lsu_dctl.v
+input                   ifu_lsu_asi_rd_unc;     // To dctl of lsu_dctl.v
+input                   ifu_lsu_casa_e;         // To qctl1 of lsu_qctl1.v, ...
+input [2:0]             ifu_lsu_destid_s;       // To qctl1 of lsu_qctl1.v
+input                   ifu_lsu_fwd_data_vld;   // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_lsu_fwd_wr_ack;     // To qctl2 of lsu_qctl2.v
+input                   ifu_lsu_ibuf_busy;      // To qctl2 of lsu_qctl2.v
+input [7:0]             ifu_lsu_imm_asi_d;      // To dctldp of lsu_dctldp.v
+input                   ifu_lsu_imm_asi_vld_d;  // To dctldp of lsu_dctldp.v
+input                   ifu_lsu_inv_clear;      // To qctl2 of lsu_qctl2.v
+input                   ifu_lsu_ld_inst_e;      // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_lsu_ldst_dbl_e;     // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_lsu_ldst_fp_e;      // To qctl1 of lsu_qctl1.v, ...
+input [1:0]             ifu_lsu_ldst_size_e;    // To dctl of lsu_dctl.v, ...
+input                   ifu_lsu_ldstub_e;       // To dctl of lsu_dctl.v, ...
+input                   ifu_lsu_ldxa_data_vld_w2;// To dctl of lsu_dctl.v
+input [63:0]            ifu_lsu_ldxa_data_w2;   // To qdp1 of lsu_qdp1.v
+input                   ifu_lsu_ldxa_illgl_va_w2;// To dctl of lsu_dctl.v
+input [1:0]             ifu_lsu_ldxa_tid_w2;    // To dctl of lsu_dctl.v
+input                   ifu_lsu_memref_d;       // To qctl2 of lsu_qctl2.v, ...
+input [3:0]             ifu_lsu_nceen;          // To excpctl of lsu_excpctl.v, ...
+input [51:0]            ifu_lsu_pcxpkt_e;       // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_lsu_pcxreq_d;       // To qctl1 of lsu_qctl1.v
+input                   ifu_lsu_pref_inst_e;    // To qctl1 of lsu_qctl1.v, ...
+input [4:0]             ifu_lsu_rd_e;           // To qctl2 of lsu_qctl2.v, ...
+input                   ifu_lsu_sign_ext_e;     // To dctl of lsu_dctl.v
+input                   ifu_lsu_st_inst_e;      // To excpctl of lsu_excpctl.v, ...
+input                   ifu_lsu_swap_e;         // To dctl of lsu_dctl.v, ...
+input [1:0]             ifu_lsu_thrid_s;        // To dctl of lsu_dctl.v
+input                   ifu_tlu_flsh_inst_e;    // To dctl of lsu_dctl.v, ...
+input                   ifu_tlu_flush_m;        // To dctl of lsu_dctl.v
+input                   ifu_tlu_inst_vld_m;     // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_tlu_mb_inst_e;      // To dctl of lsu_dctl.v
+input [6:0]             ifu_tlu_sraddr_d;       // To dctl of lsu_dctl.v
+input [1:0]             ifu_tlu_thrid_e;        // To qctl1 of lsu_qctl1.v, ...
+input                   ifu_tlu_wsr_inst_d;     // To dctl of lsu_dctl.v
+input [6:0]             mbist_dcache_index;     // To dctl of lsu_dctl.v
+input                   mbist_dcache_read;      // To dctl of lsu_dctl.v
+input [1:0]             mbist_dcache_way;       // To dctl of lsu_dctl.v
+input                   mbist_dcache_word;      // To dctl of lsu_dctl.v
+input                   mbist_dcache_write;     // To dctl of lsu_dctl.v
+input [7:0]             mbist_write_data;       // To qdp2 of lsu_qdp2.v
+input                   mem_write_disable;      // To dcache of bw_r_dcd.v, ...
+input                   mux_drive_disable;      // To qctl1 of lsu_qctl1.v, ...
+input [4:0]             pcx_spc_grant_px;       // To qctl1 of lsu_qctl1.v
+input                   se;                     // To qctl1 of lsu_qctl1.v, ...
+input                   sehold;                 // To qctl1 of lsu_qctl1.v, ...
+input                   spu_lsu_int_w2;         // To dctl of lsu_dctl.v
+input                   spu_lsu_ldxa_data_vld_w2;// To dctl of lsu_dctl.v
+input [63:0]            spu_lsu_ldxa_data_w2;   // To qdp1 of lsu_qdp1.v
+input                   spu_lsu_ldxa_illgl_va_w2;// To dctl of lsu_dctl.v
+input [1:0]             spu_lsu_ldxa_tid_w2;    // To dctl of lsu_dctl.v
+input                   spu_lsu_stxa_ack;       // To dctl of lsu_dctl.v
+input [1:0]             spu_lsu_stxa_ack_tid;   // To dctl of lsu_dctl.v
+input                   spu_lsu_unc_error_w2;   // To dctl of lsu_dctl.v
+input                   testmode_l;             // To dcdhdr of cmp_sram_redhdr.v
+input [3:0]             tlu_dsfsr_flt_vld;      // To excpctl of lsu_excpctl.v
+input                   tlu_dtlb_data_rd_g;     // To dctl of lsu_dctl.v
+input                   tlu_dtlb_dmp_actxt_g;   // To dctl of lsu_dctl.v, ...
+input                   tlu_dtlb_dmp_all_g;     // To dctl of lsu_dctl.v
+input                   tlu_dtlb_dmp_nctxt_g;   // To dctl of lsu_dctl.v
+input                   tlu_dtlb_dmp_pctxt_g;   // To dctl of lsu_dctl.v
+input                   tlu_dtlb_dmp_sctxt_g;   // To dctl of lsu_dctl.v
+input                   tlu_dtlb_dmp_vld_g;     // To dctl of lsu_dctl.v
+input                   tlu_dtlb_invalidate_all_g;// To dctl of lsu_dctl.v
+input [5:0]             tlu_dtlb_rw_index_g;    // To dctl of lsu_dctl.v, ...
+input                   tlu_dtlb_rw_index_vld_g;// To dctl of lsu_dctl.v
+input                   tlu_dtlb_tag_rd_g;      // To dctl of lsu_dctl.v
+input [42:0]            tlu_dtlb_tte_data_w2;   // To dtlb of bw_r_tlb.v
+input [58:0]            tlu_dtlb_tte_tag_w2;    // To dctldp of lsu_dctldp.v, ...
+input                   tlu_early_flush_pipe2_w;// To qctl1 of lsu_qctl1.v, ...
+input                   tlu_early_flush_pipe_w; // To excpctl of lsu_excpctl.v
+input                   tlu_exu_early_flush_pipe_w;// To stb_rwctl of lsu_stb_rwctl.v
+input [40:0]            tlu_idtlb_dmp_key_g;    // To dtlb of bw_r_tlb.v
+input [1:0]             tlu_idtlb_dmp_thrid_g;  // To dctl of lsu_dctl.v
+input [7:0]             tlu_lsu_asi_m;          // To dctldp of lsu_dctldp.v
+input                   tlu_lsu_asi_update_m;   // To dctl of lsu_dctl.v
+input [3:0]             tlu_lsu_hpstate_en;     // To dctl of lsu_dctl.v
+input [3:0]             tlu_lsu_hpv_priv;       // To dctl of lsu_dctl.v
+input                   tlu_lsu_int_ld_ill_va_w2;// To dctl of lsu_dctl.v
+input [63:0]            tlu_lsu_int_ldxa_data_w2;// To qdp1 of lsu_qdp1.v
+input                   tlu_lsu_int_ldxa_vld_w2;// To dctl of lsu_dctl.v
+input                   tlu_lsu_ldxa_async_data_vld;// To dctl of lsu_dctl.v
+input [1:0]             tlu_lsu_ldxa_tid_w2;    // To dctl of lsu_dctl.v
+input [25:0]            tlu_lsu_pcxpkt;         // To qctl1 of lsu_qctl1.v, ...
+input                   tlu_lsu_priv_trap_m;    // To excpctl of lsu_excpctl.v
+input [3:0]             tlu_lsu_pstate_am;      // To excpctl of lsu_excpctl.v, ...
+input [3:0]             tlu_lsu_pstate_cle;     // To excpctl of lsu_excpctl.v, ...
+input [3:0]             tlu_lsu_pstate_priv;    // To excpctl of lsu_excpctl.v
+input [3:0]             tlu_lsu_redmode;        // To dctl of lsu_dctl.v
+input [3:0]             tlu_lsu_redmode_rst_d1; // To dctl of lsu_dctl.v
+input                   tlu_lsu_stxa_ack;       // To dctl of lsu_dctl.v
+input [1:0]             tlu_lsu_stxa_ack_tid;   // To dctl of lsu_dctl.v
+input [1:0]             tlu_lsu_tid_m;          // To dctl of lsu_dctl.v
+input [3:0]             tlu_lsu_tl_zero;        // To dctl of lsu_dctl.v
+// End of automatics
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+output [6:0]            bist_ctl_reg_in;        // From dctldp of lsu_dctldp.v
+output                  bist_ctl_reg_wr_en;     // From dctl of lsu_dctl.v
+output                  ifu_tlu_flush_fd2_w;    // From dctl of lsu_dctl.v
+output                  ifu_tlu_flush_fd3_w;    // From dctl of lsu_dctl.v
+output                  ifu_tlu_flush_fd_w;     // From dctl of lsu_dctl.v
+output [7:0]            lsu_asi_reg0;           // From dctldp of lsu_dctldp.v
+output [7:0]            lsu_asi_reg1;           // From dctldp of lsu_dctldp.v
+output [7:0]            lsu_asi_reg2;           // From dctldp of lsu_dctldp.v
+output [7:0]            lsu_asi_reg3;           // From dctldp of lsu_dctldp.v
+output [3:0]            lsu_dmmu_sfsr_trp_wr;   // From excpctl of lsu_excpctl.v
+output [23:0]           lsu_dsfsr_din_g;        // From excpctl of lsu_excpctl.v
+output [63:0]           lsu_exu_dfill_data_w2;  // From dcdp of lsu_dcdp.v
+output                  lsu_exu_dfill_vld_w2;   // From dctl of lsu_dctl.v
+output                  lsu_exu_flush_pipe_w;   // From excpctl of lsu_excpctl.v
+output                  lsu_exu_ldst_miss_w2;   // From dctl of lsu_dctl.v
+output [4:0]            lsu_exu_rd_m;           // From qctl2 of lsu_qctl2.v
+output                  lsu_exu_st_dtlb_perr_g; // From excpctl of lsu_excpctl.v
+output [1:0]            lsu_exu_thr_m;          // From dctl of lsu_dctl.v
+output                  lsu_ffu_ack;            // From qctl1 of lsu_qctl1.v
+output                  lsu_ffu_blk_asi_e;      // From dctl of lsu_dctl.v
+output                  lsu_ffu_flush_pipe_w;   // From excpctl of lsu_excpctl.v
+output [63:0]           lsu_ffu_ld_data;        // From dcdp of lsu_dcdp.v
+output                  lsu_ffu_ld_vld;         // From dctl of lsu_dctl.v
+output                  lsu_ffu_st_dtlb_perr_g; // From excpctl of lsu_excpctl.v
+output                  lsu_ffu_stb_full0;      // From stb_rwctl of lsu_stb_rwctl.v
+output                  lsu_ffu_stb_full1;      // From stb_rwctl of lsu_stb_rwctl.v
+output                  lsu_ffu_stb_full2;      // From stb_rwctl of lsu_stb_rwctl.v
+output                  lsu_ffu_stb_full3;      // From stb_rwctl of lsu_stb_rwctl.v
+output [3:0]            lsu_ictag_mrgn;         // From dctldp of lsu_dctldp.v
+output [17:0]           lsu_ifu_asi_addr;       // From dctldp of lsu_dctldp.v
+output                  lsu_ifu_asi_load;       // From dctl of lsu_dctl.v
+output [7:0]            lsu_ifu_asi_state;      // From dctldp of lsu_dctldp.v
+output [1:0]            lsu_ifu_asi_thrid;      // From dctl of lsu_dctl.v
+output                  lsu_ifu_asi_vld;        // From dctl of lsu_dctl.v
+output [`CPX_VLD-1:0]   lsu_ifu_cpxpkt_i1;      // From qdp2 of lsu_qdp2.v
+output                  lsu_ifu_cpxpkt_vld_i1;  // From qctl2 of lsu_qctl2.v
+output                  lsu_ifu_dc_parity_error_w2;// From dctl of lsu_dctl.v
+output                  lsu_ifu_dcache_data_perror;// From dctl of lsu_dctl.v
+output                  lsu_ifu_dcache_tag_perror;// From dctl of lsu_dctl.v
+output                  lsu_ifu_direct_map_l1;  // From dctldp of lsu_dctldp.v
+output [1:0]            lsu_ifu_error_tid;      // From dctl of lsu_dctl.v
+output                  lsu_ifu_flush_pipe_w;   // From excpctl of lsu_excpctl.v
+output [3:0]            lsu_ifu_icache_en;      // From dctl of lsu_dctl.v
+output                  lsu_ifu_io_error;       // From dctl of lsu_dctl.v
+output [3:0]            lsu_ifu_itlb_en;        // From dctl of lsu_dctl.v
+output                  lsu_ifu_l2_corr_error;  // From dctl of lsu_dctl.v
+output                  lsu_ifu_l2_unc_error;   // From dctl of lsu_dctl.v
+output [11:5]           lsu_ifu_ld_icache_index;// From qdp1 of lsu_qdp1.v
+output [1:0]            lsu_ifu_ld_pcxpkt_tid;  // From qdp1 of lsu_qdp1.v
+output                  lsu_ifu_ld_pcxpkt_vld;  // From qctl1 of lsu_qctl1.v
+output [3:0]            lsu_ifu_ldst_cmplt;     // From dctl of lsu_dctl.v
+output                  lsu_ifu_ldst_miss_w;    // From dctl of lsu_dctl.v
+output                  lsu_ifu_ldsta_internal_e;// From dctl of lsu_dctl.v
+output                  lsu_ifu_pcxpkt_ack_d;   // From qctl1 of lsu_qctl1.v
+output                  lsu_ifu_stallreq;       // From qctl2 of lsu_qctl2.v
+output [3:0]            lsu_ifu_stbcnt0;        // From stb_rwctl of lsu_stb_rwctl.v
+output [3:0]            lsu_ifu_stbcnt1;        // From stb_rwctl of lsu_stb_rwctl.v
+output [3:0]            lsu_ifu_stbcnt2;        // From stb_rwctl of lsu_stb_rwctl.v
+output [3:0]            lsu_ifu_stbcnt3;        // From stb_rwctl of lsu_stb_rwctl.v
+output [47:0]           lsu_ifu_stxa_data;      // From qdp1 of lsu_qdp1.v
+output                  lsu_ifu_tlb_data_su;    // From excpctl of lsu_excpctl.v
+output                  lsu_ifu_tlb_data_ue;    // From excpctl of lsu_excpctl.v
+output                  lsu_ifu_tlb_tag_ue;     // From excpctl of lsu_excpctl.v
+output [7:0]            lsu_itlb_mrgn;          // From dctldp of lsu_dctldp.v
+output [3:0]            lsu_mamem_mrgn;         // From dctldp of lsu_dctldp.v
+output                  lsu_mmu_defr_trp_taken_g;// From excpctl of lsu_excpctl.v
+output                  lsu_mmu_flush_pipe_w;   // From excpctl of lsu_excpctl.v
+output [63:0]           lsu_mmu_rs3_data_g;     // From qdp1 of lsu_qdp1.v
+output [2:0]            lsu_pid_state0;         // From dctldp of lsu_dctldp.v
+output [2:0]            lsu_pid_state1;         // From dctldp of lsu_dctldp.v
+output [2:0]            lsu_pid_state2;         // From dctldp of lsu_dctldp.v
+output [2:0]            lsu_pid_state3;         // From dctldp of lsu_dctldp.v
+output [7:0]            lsu_spu_asi_state_e;    // From dctldp of lsu_dctldp.v
+output                  lsu_spu_early_flush_g;  // From excpctl of lsu_excpctl.v
+output                  lsu_spu_ldst_ack;       // From qctl1 of lsu_qctl1.v
+output [3:0]            lsu_spu_stb_empty;      // From stb_rwctl of lsu_stb_rwctl.v
+output [1:0]            lsu_spu_strm_ack_cmplt; // From qctl2 of lsu_qctl2.v
+output [12:0]           lsu_t0_pctxt_state;     // From dctldp of lsu_dctldp.v
+output [12:0]           lsu_t1_pctxt_state;     // From dctldp of lsu_dctldp.v
+output [12:0]           lsu_t2_pctxt_state;     // From dctldp of lsu_dctldp.v
+output [12:0]           lsu_t3_pctxt_state;     // From dctldp of lsu_dctldp.v
+output [1:0]            lsu_tlu_async_tid_w2;   // From dctl of lsu_dctl.v
+output                  lsu_tlu_async_ttype_vld_w2;// From dctl of lsu_dctl.v
+output [6:0]            lsu_tlu_async_ttype_w2; // From dctl of lsu_dctl.v
+output [3:0]            lsu_tlu_cpx_req;        // From qctl2 of lsu_qctl2.v
+output                  lsu_tlu_cpx_vld;        // From qctl2 of lsu_qctl2.v
+output                  lsu_tlu_daccess_excptn_g;// From excpctl of lsu_excpctl.v
+output [3:0]            lsu_tlu_dcache_miss_w2; // From qctl1 of lsu_qctl1.v
+output                  lsu_tlu_defr_trp_taken_g;// From excpctl of lsu_excpctl.v
+output                  lsu_tlu_dmmu_miss_g;    // From excpctl of lsu_excpctl.v
+output [12:0]           lsu_tlu_dside_ctxt_m;   // From dctldp of lsu_dctldp.v
+output                  lsu_tlu_dtlb_done;      // From dctl of lsu_dctl.v
+output                  lsu_tlu_early_flush2_w; // From excpctl of lsu_excpctl.v
+output                  lsu_tlu_early_flush_w;  // From excpctl of lsu_excpctl.v
+output [17:0]           lsu_tlu_intpkt;         // From qctl2 of lsu_qctl2.v
+output [3:0]            lsu_tlu_l2_dmiss;       // From qctl2 of lsu_qctl2.v
+output [9:0]            lsu_tlu_ldst_va_m;      // From dctldp of lsu_dctldp.v
+output                  lsu_tlu_misalign_addr_ldst_atm_m;// From excpctl of lsu_excpctl.v
+output [12:0]           lsu_tlu_pctxt_m;        // From dctldp of lsu_dctldp.v
+output                  lsu_tlu_pcxpkt_ack;     // From qctl1 of lsu_qctl1.v
+output [63:0]           lsu_tlu_rs3_data_g;     // From qdp1 of lsu_qdp1.v
+output [7:0]            lsu_tlu_rsr_data_e;     // From dctldp of lsu_dctldp.v
+output [3:0]            lsu_tlu_stb_full_w2;    // From stb_ctl0 of lsu_stb_ctl.v, ...
+output [1:0]            lsu_tlu_thrid_d;        // From dctl of lsu_dctl.v
+output [1:0]            lsu_tlu_tlb_access_tid_m;// From dctl of lsu_dctl.v
+output [7:0]            lsu_tlu_tlb_asi_state_m;// From dctldp of lsu_dctldp.v
+output [47:13]          lsu_tlu_tlb_dmp_va_m;   // From dctldp of lsu_dctldp.v
+output                  lsu_tlu_tlb_ld_inst_m;  // From dctl of lsu_dctl.v
+output [10:0]           lsu_tlu_tlb_ldst_va_m;  // From dctldp of lsu_dctldp.v
+output                  lsu_tlu_tlb_st_inst_m;  // From dctl of lsu_dctl.v
+output [8:0]            lsu_tlu_ttype_m2;       // From excpctl of lsu_excpctl.v
+output                  lsu_tlu_ttype_vld_m2;   // From excpctl of lsu_excpctl.v
+output                  lsu_tlu_wsr_inst_e;     // From dctl of lsu_dctl.v
+output [71:0]           mbist_dcache_data_in;   // From dcdp of lsu_dcdp.v
+output                  spc_efc_dfuse_data;     // From dcdhdr of cmp_sram_redhdr.v
+output                  spc_pcx_atom_pq;        // From qctl1 of lsu_qctl1.v
+output [`PCX_WIDTH-1:0] spc_pcx_data_pa;        // From qdp1 of lsu_qdp1.v
+output [4:0]            spc_pcx_req_pq;         // From qctl1 of lsu_qctl1.v
+// End of automatics
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+   
+wire                    as_if_user_asi_m;       // From dctl of lsu_dctl.v
+wire [7:0]              asi_d;                  // From dctldp of lsu_dctldp.v
+wire                    asi_internal_m;         // From dctl of lsu_dctl.v
+wire [3:0]              asi_state_wr_thrd;      // From dctl of lsu_dctl.v
+wire                    asi_tte_data_perror;    // From excpctl of lsu_excpctl.v
+wire                    asi_tte_tag_perror;     // From excpctl of lsu_excpctl.v
+wire [5:0]              async_tlb_index;        // From dctl of lsu_dctl.v
+wire                    atomic_asi_m;           // From dctl of lsu_dctl.v
+wire                    atomic_m;               // From dctl of lsu_dctl.v
+wire                    binit_quad_asi_m;       // From dctl of lsu_dctl.v
+wire                    bist_tap_wr_en;         // From dctl of lsu_dctl.v
+wire                    blk_asi_m;              // From dctl of lsu_dctl.v
+wire                    cache_hit;              // From dtlb of bw_r_tlb.v
+wire [3:0]              cache_way_hit;          // From dtlb of bw_r_tlb.v
+wire [3:0]              cache_way_hit_buf1;     // From tlbdp of lsu_tlbdp.v
+wire [3:0]              cache_way_hit_buf2;     // From tlbdp of lsu_tlbdp.v
+wire                    cam_real_m;             // From dctl of lsu_dctl.v
+wire                    cpx_fwd_pkt_en_cx;      // From qctl2 of lsu_qctl2.v
+wire                    cpx_st_ack_tid0;        // From qctl2 of lsu_qctl2.v
+wire                    cpx_st_ack_tid1;        // From qctl2 of lsu_qctl2.v
+wire                    cpx_st_ack_tid2;        // From qctl2 of lsu_qctl2.v
+wire                    cpx_st_ack_tid3;        // From qctl2 of lsu_qctl2.v
+wire                    data_rd_vld_g;          // From dctl of lsu_dctl.v
+wire                    dc_direct_map;          // From dctldp of lsu_dctldp.v
+wire [63:0]             dcache_alt_data_w0_m;   // From qdp1 of lsu_qdp1.v
+wire                    dcache_alt_mx_sel_e;    // From dctl of lsu_dctl.v
+wire                    dcache_alt_mx_sel_e_bf; // From dctl of lsu_dctl.v
+wire                    dcache_arry_data_sel_m; // From dctl of lsu_dctl.v
+wire [15:0]             dcache_byte_wr_en_e;    // From dctl of lsu_dctl.v
+wire [7:0]              dcache_iob_addr_e;      // From qdp2 of lsu_qdp2.v
+wire [7:0]              dcache_rdata_msb_w0_m;  // From dcache of bw_r_dcd.v
+wire [7:0]              dcache_rdata_msb_w1_m;  // From dcache of bw_r_dcd.v
+wire [7:0]              dcache_rdata_msb_w2_m;  // From dcache of bw_r_dcd.v
+wire [7:0]              dcache_rdata_msb_w3_m;  // From dcache of bw_r_dcd.v
+wire [63:0]             dcache_rdata_wb;        // From dcache of bw_r_dcd.v
+wire [63:0]             dcache_rdata_wb_buf;    // From dcdp of lsu_dcdp.v
+wire                    dcache_rparity_err_wb;  // From dcache of bw_r_dcd.v
+wire [7:0]              dcache_rparity_wb;      // From dcache of bw_r_dcd.v
+wire                    dcache_rvld_e;          // From dctl of lsu_dctl.v
+wire [1:0]              dcd_fuse_repair_en;     // From dcache of bw_r_dcd.v
+wire [7:0]              dcd_fuse_repair_value;  // From dcache of bw_r_dcd.v
+wire                    dctl_rst_l;             // From dctl of lsu_dctl.v
+wire                    dfill_tlb_asi_e;        // From dctl of lsu_dctl.v
+wire                    dfq_byp_ff_en;          // From qctl2 of lsu_qctl2.v
+wire [3:0]              dfq_byp_sel;            // From qctl2 of lsu_qctl2.v
+wire [4:0]              dfq_rptr;               // From qctl2 of lsu_qctl2.v
+wire                    dfq_rptr_vld;           // From qctl2 of lsu_qctl2.v
+wire [1:0]              dfq_tid;                // From qdp2 of lsu_qdp2.v
+wire [4:0]              dfq_wptr;               // From qctl2 of lsu_qctl2.v
+wire                    dfq_wptr_vld;           // From qctl2 of lsu_qctl2.v
+wire [3:0]              dfture_tap_rd_en;       // From dctl of lsu_dctl.v
+wire                    dfture_tap_wr_mx_sel;   // From dctl of lsu_dctl.v
+wire [32:0]             dtag_rdata_w0_m;        // From dtag of bw_r_idct.v
+wire [32:0]             dtag_rdata_w1_m;        // From dtag of bw_r_idct.v
+wire [32:0]             dtag_rdata_w2_m;        // From dtag of bw_r_idct.v
+wire [32:0]             dtag_rdata_w3_m;        // From dtag of bw_r_idct.v
+wire                    dtlb_bypass_m;          // From dctl of lsu_dctl.v
+wire [15:0]             dva_bit_wr_en_e;        // From dctl of lsu_dctl.v
+wire                    dva_din_e;              // From dctl of lsu_dctl.v
+wire [4:0]              dva_snp_addr_e;         // From qctl2 of lsu_qctl2.v
+wire [15:0]             dva_snp_bit_wr_en_e;    // From qctl2 of lsu_qctl2.v
+wire                    dva_svld_e;             // From qctl2 of lsu_qctl2.v
+wire [3:0]              dva_vld_m;              // From dva of bw_r_rf16x32.v
+wire [3:0]              dva_vld_m_bf;           // From dctl of lsu_dctl.v
+wire [10:6]             dva_wr_adr_e;           // From dctl of lsu_dctl.v
+wire                    flsh_inst_m;            // From stb_rwctl of lsu_stb_rwctl.v
+wire                    fp_ldst_m;              // From dctl of lsu_dctl.v
+wire [1:0]              fuse_dcd_repair_en;     // From dcdhdr of cmp_sram_redhdr.v
+wire [7:0]              fuse_dcd_repair_value;  // From dcdhdr of cmp_sram_redhdr.v
+wire [5:0]              fuse_dcd_rid;           // From dcdhdr of cmp_sram_redhdr.v
+wire                    fuse_dcd_wren;          // From dcdhdr of cmp_sram_redhdr.v
+wire [2:0]              fwd_int_fp_pcx_mx_sel;  // From qctl1 of lsu_qctl1.v
+wire                    hpstate_en_m;           // From dctl of lsu_dctl.v
+wire                    hpv_priv_m;             // From dctl of lsu_dctl.v
+wire                    ifill_tlb_asi_e;        // From dctl of lsu_dctl.v
+wire                    ifu_lsu_flush_w;        // From dctl of lsu_dctl.v
+wire                    ifu_tlu_inst_vld_m_bf2; // From stb_rwctl of lsu_stb_rwctl.v
+wire                    imiss_pcx_mx_sel;       // From qctl1 of lsu_qctl1.v
+wire                    l2fill_vld_m;           // From dctl of lsu_dctl.v
+wire [3:0]              lctl_rst;               // From dctl of lsu_dctl.v
+wire [`LMQ_WIDTH-1:40]  ld_pcx_pkt_g;           // From dctl of lsu_dctl.v
+wire [1:0]              ld_pcx_thrd;            // From qctl1 of lsu_qctl1.v
+wire [2:0]              ld_rawp_st_ackid_w2;    // From stb_rwctl of lsu_stb_rwctl.v
+wire                    ld_rawp_st_ced_w2;      // From stb_rwctl of lsu_stb_rwctl.v
+wire                    ld_sec_active;          // From qctl2 of lsu_qctl2.v
+wire                    ld_sec_hit_thrd0;       // From qdp1 of lsu_qdp1.v
+wire                    ld_sec_hit_thrd1;       // From qdp1 of lsu_qdp1.v
+wire                    ld_sec_hit_thrd2;       // From qdp1 of lsu_qdp1.v
+wire                    ld_sec_hit_thrd3;       // From qdp1 of lsu_qdp1.v
+wire                    ld_stb_full_raw_w2;     // From qctl1 of lsu_qctl1.v
+wire [3:0]              ld_thrd_byp_mxsel_m;    // From dctl of lsu_dctl.v
+wire                    lda_internal_m;         // From dctl of lsu_dctl.v
+wire                    ldd_in_dfq_out;         // From qctl2 of lsu_qctl2.v
+wire                    ldiagctl_wr_en;         // From dctl of lsu_dctl.v
+wire                    ldst_dbl_m;             // From dctl of lsu_dctl.v
+wire [1:0]              ldst_sz_m;              // From dctl of lsu_dctl.v
+wire                    ldxa_internal;          // From dctl of lsu_dctl.v
+wire [1:0]              lmq0_byp_misc_sz;       // From qdp1 of lsu_qdp1.v
+wire                    lmq0_l2fill_fpld;       // From qdp1 of lsu_qdp1.v
+wire [2:0]              lmq0_ld_rq_type;        // From qdp1 of lsu_qdp1.v
+wire                    lmq0_ldd_vld;           // From qdp1 of lsu_qdp1.v
+wire                    lmq0_ncache_ld;         // From qdp1 of lsu_qdp1.v
+wire [10:0]             lmq0_pcx_pkt_addr;      // From qdp1 of lsu_qdp1.v
+wire [1:0]              lmq0_pcx_pkt_way;       // From qctl1 of lsu_qctl1.v
+wire [1:0]              lmq1_byp_misc_sz;       // From qdp1 of lsu_qdp1.v
+wire                    lmq1_l2fill_fpld;       // From qdp1 of lsu_qdp1.v
+wire [2:0]              lmq1_ld_rq_type;        // From qdp1 of lsu_qdp1.v
+wire                    lmq1_ldd_vld;           // From qdp1 of lsu_qdp1.v
+wire                    lmq1_ncache_ld;         // From qdp1 of lsu_qdp1.v
+wire [10:0]             lmq1_pcx_pkt_addr;      // From qdp1 of lsu_qdp1.v
+wire [1:0]              lmq1_pcx_pkt_way;       // From qctl1 of lsu_qctl1.v
+wire [1:0]              lmq2_byp_misc_sz;       // From qdp1 of lsu_qdp1.v
+wire                    lmq2_l2fill_fpld;       // From qdp1 of lsu_qdp1.v
+wire [2:0]              lmq2_ld_rq_type;        // From qdp1 of lsu_qdp1.v
+wire                    lmq2_ldd_vld;           // From qdp1 of lsu_qdp1.v
+wire                    lmq2_ncache_ld;         // From qdp1 of lsu_qdp1.v
+wire [10:0]             lmq2_pcx_pkt_addr;      // From qdp1 of lsu_qdp1.v
+wire [1:0]              lmq2_pcx_pkt_way;       // From qctl1 of lsu_qctl1.v
+wire [1:0]              lmq3_byp_misc_sz;       // From qdp1 of lsu_qdp1.v
+wire                    lmq3_l2fill_fpld;       // From qdp1 of lsu_qdp1.v
+wire [2:0]              lmq3_ld_rq_type;        // From qdp1 of lsu_qdp1.v
+wire                    lmq3_ldd_vld;           // From qdp1 of lsu_qdp1.v
+wire                    lmq3_ncache_ld;         // From qdp1 of lsu_qdp1.v
+wire [10:0]             lmq3_pcx_pkt_addr;      // From qdp1 of lsu_qdp1.v
+wire [1:0]              lmq3_pcx_pkt_way;       // From qctl1 of lsu_qctl1.v
+wire [3:0]              lmq_byp_data_en_w2;     // From dctl of lsu_dctl.v
+wire [3:0]              lmq_byp_data_fmx_sel;   // From dctl of lsu_dctl.v
+wire [3:0]              lmq_byp_data_mxsel0;    // From dctl of lsu_dctl.v
+wire [3:0]              lmq_byp_data_mxsel1;    // From dctl of lsu_dctl.v
+wire [3:0]              lmq_byp_data_mxsel2;    // From dctl of lsu_dctl.v
+wire [3:0]              lmq_byp_data_mxsel3;    // From dctl of lsu_dctl.v
+wire [2:0]              lmq_byp_ldxa_mxsel0;    // From dctl of lsu_dctl.v
+wire [2:0]              lmq_byp_ldxa_mxsel1;    // From dctl of lsu_dctl.v
+wire [2:0]              lmq_byp_ldxa_mxsel2;    // From dctl of lsu_dctl.v
+wire [2:0]              lmq_byp_ldxa_mxsel3;    // From dctl of lsu_dctl.v
+wire [3:0]              lmq_enable;             // From qctl1 of lsu_qctl1.v
+wire                    lmq_ld_addr_b3;         // From dctl of lsu_dctl.v
+wire [4:0]              lmq_ld_rd1;             // From qdp1 of lsu_qdp1.v
+wire                    lmq_ldd_vld;            // From dctl of lsu_dctl.v
+wire                    lsu_alt_space_m;        // From dctl of lsu_dctl.v
+wire [2:0]              lsu_asi_sel_fmx1;       // From dctl of lsu_dctl.v
+wire [2:0]              lsu_asi_sel_fmx2;       // From dctl of lsu_dctl.v
+wire                    lsu_atm_st_cmplt_e;     // From qctl2 of lsu_qctl2.v
+wire [2:0]              lsu_atomic_pkt2_bsel_g; // From dctl of lsu_dctl.v
+wire [3:0]              lsu_bist_rsel_way_e;    // From dctl of lsu_dctl.v
+wire [2:0]              lsu_bld_cnt_m;          // From qctl1 of lsu_qctl1.v
+wire                    lsu_bld_helper_cmplt_m; // From qctl1 of lsu_qctl1.v
+wire                    lsu_bld_pcx_rq;         // From qctl1 of lsu_qctl1.v
+wire                    lsu_bld_reset;          // From qctl1 of lsu_qctl1.v
+wire [1:0]              lsu_bld_rq_addr;        // From qctl1 of lsu_qctl1.v
+wire                    lsu_blk_asi_m;          // From dctl of lsu_dctl.v
+wire                    lsu_blk_st_m;           // From dctl of lsu_dctl.v
+wire [39:10]            lsu_blkst_pgnum_m;      // From dctl of lsu_dctl.v
+wire                    lsu_bst_in_pipe_m;      // From dctl of lsu_dctl.v
+wire                    lsu_byp_ldd_oddrd_m;    // From qctl2 of lsu_qctl2.v
+wire [1:0]              lsu_byp_misc_sz_e;      // From qdp1 of lsu_qdp1.v
+wire [7:0]              lsu_cpu_dcd_sel;        // From qctl2 of lsu_qctl2.v
+wire                    lsu_cpu_inv_data_b0;    // From qdp2 of lsu_qdp2.v
+wire [13:9]             lsu_cpu_inv_data_b13to9;// From qdp2 of lsu_qdp2.v
+wire [7:2]              lsu_cpu_inv_data_b7to2; // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpu_uhlf_sel;       // From qctl2 of lsu_qctl2.v
+wire                    lsu_cpx_ld_dcache_perror_e;// From qctl2 of lsu_qctl2.v
+wire                    lsu_cpx_ld_dtag_perror_e;// From qctl2 of lsu_qctl2.v
+wire                    lsu_cpx_pkt_atm_st_cmplt;// From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_atomic;     // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_binit_st;   // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_ifill_type; // From qdp2 of lsu_qdp2.v
+wire [4:0]              lsu_cpx_pkt_inv_pa;     // From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_cpx_pkt_invwy;      // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_l2miss;     // From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_cpx_pkt_ld_err;     // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_perror_dinv;// From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_perror_iinv;// From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_cpx_pkt_perror_set; // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_prefetch;   // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_prefetch2;  // From qdp2 of lsu_qdp2.v
+wire                    lsu_cpx_pkt_strm_ack;   // From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_cpx_pkt_tid;        // From qdp2 of lsu_qdp2.v
+wire [3:0]              lsu_cpx_rmo_st_ack;     // From qctl2 of lsu_qctl2.v
+wire                    lsu_cpx_spc_inv_vld;    // From qctl2 of lsu_qctl2.v
+wire                    lsu_cpx_stack_dcfill_vld;// From qctl2 of lsu_qctl2.v
+wire [3:0]              lsu_cpx_thrdid;         // From qctl2 of lsu_qctl2.v
+wire [5:0]              lsu_cpxpkt_type_dcd_cx; // From qctl2 of lsu_qctl2.v
+wire [3:0]              lsu_ctl_state_wr_en;    // From dctl of lsu_dctl.v
+wire                    lsu_dc_iob_access_e;    // From dctl of lsu_dctl.v
+wire                    lsu_dcache_data_perror_g;// From dctl of lsu_dctl.v
+wire [10:3]             lsu_dcache_fill_addr_e; // From dctl of lsu_dctl.v
+wire [10:4]             lsu_dcache_fill_addr_e_err;// From dctl of lsu_dctl.v
+wire [143:0]            lsu_dcache_fill_data_e; // From qdp2 of lsu_qdp2.v
+wire [3:0]              lsu_dcache_fill_way_e;  // From dctl of lsu_dctl.v
+wire                    lsu_dcache_iob_rd_w;    // From qctl2 of lsu_qctl2.v
+wire [1:0]              lsu_dcache_iob_way_e;   // From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_dcache_rand;        // From dctl of lsu_dctl.v
+wire                    lsu_dcache_tag_perror_g;// From dctl of lsu_dctl.v
+wire                    lsu_dcache_wr_vld_e;    // From dctl of lsu_dctl.v
+wire                    lsu_dcfill_active_e;    // From qctl2 of lsu_qctl2.v
+wire                    lsu_dcfill_data_mx_sel_e;// From dctl of lsu_dctl.v
+wire [3:0]              lsu_dctag_mrgn;         // From dctldp of lsu_dctldp.v
+wire [7:0]              lsu_dctl_asi_state_m;   // From dctldp of lsu_dctldp.v
+wire                    lsu_dctldp_thread0_m;   // From dctl of lsu_dctl.v
+wire                    lsu_dctldp_thread1_m;   // From dctl of lsu_dctl.v
+wire                    lsu_dctldp_thread2_m;   // From dctl of lsu_dctl.v
+wire                    lsu_dctldp_thread3_m;   // From dctl of lsu_dctl.v
+wire                    lsu_defr_trp_taken_g;   // From excpctl of lsu_excpctl.v
+wire                    lsu_dfill_data_sel_hi;  // From qctl2 of lsu_qctl2.v
+wire [3:0]              lsu_dfill_dcd_thrd;     // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_byp_atm;        // From qdp2 of lsu_qdp2.v
+wire                    lsu_dfq_byp_binit_st;   // From qdp2 of lsu_qdp2.v
+wire                    lsu_dfq_byp_cpx_inv;    // From qdp2 of lsu_qdp2.v
+wire                    lsu_dfq_byp_ff_en;      // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_byp_flush;      // From qdp2 of lsu_qdp2.v
+wire                    lsu_dfq_byp_invwy_vld;  // From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_dfq_byp_stack_adr_b54;// From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_dfq_byp_stack_wrway;// From qdp2 of lsu_qdp2.v
+wire [1:0]              lsu_dfq_byp_tid;        // From qdp2 of lsu_qdp2.v
+wire [3:0]              lsu_dfq_byp_tid_d1_sel; // From qctl1 of lsu_qctl1.v
+wire [5:0]              lsu_dfq_byp_type;       // From qdp2 of lsu_qdp2.v
+wire [3:0]              lsu_dfq_flsh_cmplt;     // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_ld_vld;         // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_ldst_vld;       // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_rd_vld_d1;      // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_st_vld;         // From qctl2 of lsu_qctl2.v
+wire                    lsu_dfq_vld;            // From qctl2 of lsu_qctl2.v
+wire                    lsu_diag_va_prty_invrt; // From dctldp of lsu_dctldp.v
+wire [3:0]              lsu_diagnstc_data_sel;  // From dctl of lsu_dctl.v
+wire [7:0]              lsu_diagnstc_dc_prty_invrt_e;// From dctldp of lsu_dctldp.v
+wire                    lsu_diagnstc_dtagv_prty_invrt_e;// From dctl of lsu_dctl.v
+wire [3:0]              lsu_diagnstc_va_sel;    // From dctl of lsu_dctl.v
+wire [10:0]             lsu_diagnstc_wr_addr_e; // From dctldp of lsu_dctldp.v
+wire                    lsu_diagnstc_wr_data_b0;// From qdp1 of lsu_qdp1.v
+wire [63:0]             lsu_diagnstc_wr_data_e; // From qdp1 of lsu_qdp1.v
+wire [1:0]              lsu_diagnstc_wr_way_e;  // From dctldp of lsu_dctldp.v
+wire [5:0]              lsu_dp_ctl_reg0;        // From dctldp of lsu_dctldp.v
+wire [5:0]              lsu_dp_ctl_reg1;        // From dctldp of lsu_dctldp.v
+wire [5:0]              lsu_dp_ctl_reg2;        // From dctldp of lsu_dctldp.v
+wire [5:0]              lsu_dp_ctl_reg3;        // From dctldp of lsu_dctldp.v
+wire                    lsu_dtag_index_sel_x_e; // From dctl of lsu_dctl.v
+wire [3:0]              lsu_dtag_rsel_m;        // From dctl of lsu_dctl.v
+wire                    lsu_dtag_wrreq_x_e;     // From dctl of lsu_dctl.v
+wire                    lsu_dtagv_wr_vld_e;     // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_addr_mask_l_e; // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_bypass_e;      // From dctl of lsu_dctl.v
+wire [2:0]              lsu_dtlb_cam_pid_e;     // From dctldp of lsu_dctldp.v
+wire                    lsu_dtlb_data_rd_e;     // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_dmp_all_e;     // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_dmp_vld_e;     // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_invalid_all_l_m;// From dctl of lsu_dctl.v
+wire [7:0]              lsu_dtlb_mrgn;          // From dctldp of lsu_dctldp.v
+wire                    lsu_dtlb_rwindex_vld_e; // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_tag_rd_e;      // From dctl of lsu_dctl.v
+wire                    lsu_dtlb_wr_vld_e;      // From dctl of lsu_dctl.v
+wire [1:0]              lsu_encd_way_hit;       // From dctl of lsu_dctl.v
+wire [2:0]              lsu_err_addr_sel;       // From dctl of lsu_dctl.v
+wire [28:0]             lsu_error_pa_m;         // From qdp1 of lsu_qdp1.v
+wire [7:0]              lsu_excpctl_asi_state_m;// From dctldp of lsu_dctldp.v
+wire                    lsu_fldd_vld_en;        // From qctl2 of lsu_qctl2.v
+wire                    lsu_flsh_inst_m;        // From dctl of lsu_dctl.v
+wire                    lsu_fwd_rply_sz1_unc;   // From qctl2 of lsu_qctl2.v
+wire [4:0]              lsu_fwdpkt_dest;        // From qctl2 of lsu_qctl2.v
+wire                    lsu_fwdpkt_pcx_rq_sel;  // From qctl1 of lsu_qctl1.v
+wire                    lsu_fwdpkt_vld;         // From qctl2 of lsu_qctl2.v
+wire                    lsu_ifu_asi_data_en_l;  // From dctl of lsu_dctl.v
+wire                    lsu_imiss_pcx_rq_sel_d1;// From qctl1 of lsu_qctl1.v
+wire [3:0]              lsu_intrpt_cmplt;       // From qctl1 of lsu_qctl1.v
+wire                    lsu_iobrdge_fwd_pkt_vld;// From qctl2 of lsu_qctl2.v
+wire [2:0]              lsu_iobrdge_rply_data_sel;// From qctl2 of lsu_qctl2.v
+wire [8:0]              lsu_iobrdge_tap_rq_type;// From qdp2 of lsu_qdp2.v
+wire [43:0]             lsu_iobrdge_wr_data;    // From qdp2 of lsu_qdp2.v
+wire                    lsu_l2fill_bendian_m;   // From qdp1 of lsu_qdp1.v
+wire [63:0]             lsu_l2fill_data;        // From qdp2 of lsu_qdp2.v
+wire                    lsu_l2fill_fpld_e;      // From dctl of lsu_dctl.v
+wire                    lsu_l2fill_sign_extend_m;// From qdp1 of lsu_qdp1.v
+wire                    lsu_l2fill_vld;         // From qctl2 of lsu_qctl2.v
+wire                    lsu_ld0_spec_vld_kill_w2;// From qctl1 of lsu_qctl1.v
+wire                    lsu_ld1_spec_vld_kill_w2;// From qctl1 of lsu_qctl1.v
+wire                    lsu_ld2_spec_vld_kill_w2;// From qctl1 of lsu_qctl1.v
+wire                    lsu_ld3_spec_vld_kill_w2;// From qctl1 of lsu_qctl1.v
+wire [3:0]              lsu_ld_inst_vld_g;      // From dctl of lsu_dctl.v
+wire                    lsu_ld_miss_wb;         // From dctl of lsu_dctl.v
+wire [3:0]              lsu_ld_pcx_rq_mxsel;    // From qctl1 of lsu_qctl1.v
+wire [3:0]              lsu_ld_pcx_rq_sel_d2;   // From qctl1 of lsu_qctl1.v
+wire [2:0]              lsu_ld_thrd_byp_sel_e;  // From dctl of lsu_dctl.v
+wire                    lsu_ldquad_inst_m;      // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_ldst_inst_vld_e;    // From dctl of lsu_dctl.v
+wire [7:0]              lsu_ldst_va_g;          // From dctldp of lsu_dctldp.v
+wire [12:0]             lsu_ldst_va_m;          // From dctldp of lsu_dctldp.v
+wire [47:0]             lsu_ldst_va_m_buf;      // From dctldp of lsu_dctldp.v
+wire [1:0]              lsu_ldst_va_way_g;      // From qdp1 of lsu_qdp1.v
+wire                    lsu_ldstub_g;           // From dctl of lsu_dctl.v
+wire [3:0]              lsu_lmq_byp_misc_sel;   // From qctl1 of lsu_qctl1.v
+wire                    lsu_local_diagnstc_tagrd_sel_g;// From dctl of lsu_dctl.v
+wire                    lsu_local_early_flush_g;// From excpctl of lsu_excpctl.v
+wire                    lsu_local_ldxa_sel_g;   // From dctl of lsu_dctl.v
+wire                    lsu_local_ldxa_tlbrd_sel_g;// From dctl of lsu_dctl.v
+wire                    lsu_memref_m;           // From dctl of lsu_dctl.v
+wire [63:0]             lsu_misc_rdata_w2;      // From tagdp of lsu_tagdp.v
+wire [3:0]              lsu_no_spc_pref;        // From dctl of lsu_dctl.v
+wire                    lsu_nonalt_nucl_access_m;// From dctl of lsu_dctl.v
+wire [3:0]              lsu_outstanding_rmo_st_max;// From dctl of lsu_dctl.v
+wire [107:0]            lsu_pcx_fwd_pkt;        // From qdp2 of lsu_qdp2.v
+wire                    lsu_pcx_fwd_reply;      // From qctl2 of lsu_qctl2.v
+wire                    lsu_pcx_ld_dtag_perror_w2;// From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_req_squash0;    // From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_req_squash1;    // From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_req_squash2;    // From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_req_squash3;    // From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_req_squash_d1;  // From qctl1 of lsu_qctl1.v
+wire                    lsu_pcx_rq_sz_b3;       // From qctl1 of lsu_qctl1.v
+wire                    lsu_pref_pcx_req;       // From qdp1 of lsu_qdp1.v
+wire                    lsu_qdp2_dfq_ld_vld;    // From qctl2 of lsu_qctl2.v
+wire                    lsu_qdp2_dfq_st_vld;    // From qctl2 of lsu_qctl2.v
+wire                    lsu_quad_asi_e;         // From dctl of lsu_dctl.v
+wire                    lsu_quad_word_access_g; // From dctl of lsu_dctl.v
+wire                    lsu_ramtest_rd_w;       // From qctl1 of lsu_qctl1.v
+wire [3:0]              lsu_rd_dtag_parity_g;   // From tagdp of lsu_tagdp.v
+wire                    lsu_snap_blk_st_m;      // From dctl of lsu_dctl.v
+wire                    lsu_squash_va_oor_m;    // From dctl of lsu_dctl.v
+wire [3:0]              lsu_st_ack_dq_stb;      // From qctl2 of lsu_qctl2.v
+wire [1:0]              lsu_st_dcfill_size_e;   // From qdp2 of lsu_qdp2.v
+wire [3:0]              lsu_st_dtlb_perr_g;     // From excpctl of lsu_excpctl.v
+wire                    lsu_st_hw_le_g;         // From dctl of lsu_dctl.v
+wire [3:0]              lsu_st_pcx_rq_kill_w2;  // From stb_ctl0 of lsu_stb_ctl.v, ...
+wire [3:0]              lsu_st_pcx_rq_pick;     // From qctl1 of lsu_qctl1.v
+wire                    lsu_st_pcx_rq_vld;      // From qctl1 of lsu_qctl1.v
+wire                    lsu_st_rmo_m;           // From dctl of lsu_dctl.v
+wire [2:1]              lsu_st_rq_type_m;       // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_b_m;          // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_bhw_m;        // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_bhww_m;       // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_dw_m;         // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_hw_m;         // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_hww_m;        // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_w_m;          // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_sz_wdw_m;        // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_st_w_or_dbl_le_g;   // From dctl of lsu_dctl.v
+wire [1:0]              lsu_st_way_e;           // From qdp2 of lsu_qdp2.v
+wire                    lsu_st_wr_dcache;       // From qctl2 of lsu_qctl2.v
+wire                    lsu_st_x_le_g;          // From dctl of lsu_dctl.v
+wire [3:0]              lsu_stb_data_early_sel_e;// From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_stb_data_final_sel_m;// From stb_rwctl of lsu_stb_rwctl.v
+wire [3:0]              lsu_stb_empty;          // From stb_ctl0 of lsu_stb_ctl.v, ...
+wire [3:0]              lsu_stb_empty_buf;      // From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_stb_pcx_rvld_d1;    // From qctl1 of lsu_qctl1.v
+wire [1:0]              lsu_stb_rd_tid;         // From qctl1 of lsu_qctl1.v
+wire [3:0]              lsu_stb_rmo_st_issue;   // From stb_ctl0 of lsu_stb_ctl.v, ...
+wire [9:3]              lsu_stb_va_m;           // From stb_rwctl of lsu_stb_rwctl.v
+wire [3:0]              lsu_stbcnt0;            // From stb_ctl0 of lsu_stb_ctl.v
+wire [3:0]              lsu_stbcnt1;            // From stb_ctl1 of lsu_stb_ctl.v
+wire [3:0]              lsu_stbcnt2;            // From stb_ctl2 of lsu_stb_ctl.v
+wire [3:0]              lsu_stbcnt3;            // From stb_ctl3 of lsu_stb_ctl.v
+wire                    lsu_stbctl_flush_pipe_w;// From stb_rwctl of lsu_stb_rwctl.v
+wire                    lsu_swap_g;             // From dctl of lsu_dctl.v
+wire                    lsu_swap_sel_default_byte_7_2_g;// From dctl of lsu_dctl.v
+wire                    lsu_swap_sel_default_g; // From dctl of lsu_dctl.v
+wire [3:0]              lsu_thread_g;           // From dctl of lsu_dctl.v
+wire                    lsu_tlb_asi_data_perr_g;// From dctl of lsu_dctl.v
+wire                    lsu_tlb_asi_tag_perr_g; // From dctl of lsu_dctl.v
+wire                    lsu_tlb_data_rd_vld_g;  // From dctl of lsu_dctl.v
+wire                    lsu_tlb_perr_ld_rq_kill_w;// From excpctl of lsu_excpctl.v
+wire [63:0]             lsu_tlb_rd_data;        // From tlbdp of lsu_tlbdp.v
+wire [3:0]              lsu_tlb_st_sel_m;       // From dctl of lsu_dctl.v
+wire                    lsu_tlbop_force_swo;    // From dctl of lsu_dctl.v
+wire [2:0]              lsu_tlu_ctxt_sel_m;     // From dctl of lsu_dctl.v
+wire                    lsu_tlu_nonalt_ldst_m;  // From dctl of lsu_dctl.v
+wire                    lsu_tlu_write_op_m;     // From dctl of lsu_dctl.v
+wire                    lsu_tlu_xslating_ldst_m;// From dctl of lsu_dctl.v
+wire                    lsu_ttype_vld_m2;       // From excpctl of lsu_excpctl.v
+wire                    lsu_ttype_vld_m2_bf1;   // From excpctl of lsu_excpctl.v
+wire                    lsu_va_match_b31_b3_m;  // From qdp1 of lsu_qdp1.v
+wire                    lsu_va_match_b47_b32_m; // From qdp1 of lsu_qdp1.v
+wire                    lsu_va_wtchpt0_wr_en_l; // From dctl of lsu_dctl.v
+wire                    lsu_va_wtchpt1_wr_en_l; // From dctl of lsu_dctl.v
+wire                    lsu_va_wtchpt2_wr_en_l; // From dctl of lsu_dctl.v
+wire                    lsu_va_wtchpt3_wr_en_l; // From dctl of lsu_dctl.v
+wire [47:3]             lsu_va_wtchpt_addr;     // From qdp1 of lsu_qdp1.v
+wire                    lsu_va_wtchpt_sel_g;    // From dctl of lsu_dctl.v
+wire                    lsu_way_hit_or;         // From dctl of lsu_dctl.v
+wire [3:0]              lsuctl_ctlbits_wr_en;   // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte0_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte1_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte2_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte3_default_m;// From dctl of lsu_dctl.v
+wire                    merge0_sel_byte4_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte5_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte6_m;     // From dctl of lsu_dctl.v
+wire                    merge0_sel_byte7_default_m;// From dctl of lsu_dctl.v
+wire                    merge0_sel_byte_1h_m;   // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte0_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte1_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte2_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte3_default_m;// From dctl of lsu_dctl.v
+wire                    merge1_sel_byte4_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte5_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte6_m;     // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte7_default_m;// From dctl of lsu_dctl.v
+wire                    merge1_sel_byte_1h_m;   // From dctl of lsu_dctl.v
+wire                    merge1_sel_byte_2h_m;   // From dctl of lsu_dctl.v
+wire                    merge2_sel_byte1_m;     // From dctl of lsu_dctl.v
+wire                    merge2_sel_byte2_m;     // From dctl of lsu_dctl.v
+wire                    merge2_sel_byte5_m;     // From dctl of lsu_dctl.v
+wire                    merge2_sel_byte6_default_m;// From dctl of lsu_dctl.v
+wire                    merge2_sel_byte_m;      // From dctl of lsu_dctl.v
+wire                    merge3_sel_byte0_m;     // From dctl of lsu_dctl.v
+wire                    merge3_sel_byte3_m;     // From dctl of lsu_dctl.v
+wire                    merge3_sel_byte4_m;     // From dctl of lsu_dctl.v
+wire                    merge3_sel_byte7_default_m;// From dctl of lsu_dctl.v
+wire                    merge3_sel_byte_m;      // From dctl of lsu_dctl.v
+wire                    merge4_sel_byte3_m;     // From dctl of lsu_dctl.v
+wire                    merge4_sel_byte4_m;     // From dctl of lsu_dctl.v
+wire                    merge5_sel_byte2_m;     // From dctl of lsu_dctl.v
+wire                    merge5_sel_byte5_m;     // From dctl of lsu_dctl.v
+wire                    merge6_sel_byte1_m;     // From dctl of lsu_dctl.v
+wire                    merge6_sel_byte6_m;     // From dctl of lsu_dctl.v
+wire                    merge7_sel_byte0_m;     // From dctl of lsu_dctl.v
+wire                    merge7_sel_byte7_m;     // From dctl of lsu_dctl.v
+wire [3:0]              misc_ctl_sel_din;       // From dctl of lsu_dctl.v
+wire                    mmu_rd_only_asi_m;      // From dctl of lsu_dctl.v
+wire [7:0]              morphed_addr_m;         // From dctl of lsu_dctl.v
+wire                    mrgn_tap_wr_en;         // From dctl of lsu_dctl.v
+wire                    mrgnctl_wr_en;          // From dctl of lsu_dctl.v
+wire                    nofault_asi_m;          // From dctl of lsu_dctl.v
+wire [3:0]              pctxt_state_wr_thrd;    // From dctl of lsu_dctl.v
+wire [3:0]              pcx_pkt_src_sel;        // From qctl1 of lsu_qctl1.v
+wire [3:0]              pcx_rq_for_stb;         // From qctl1 of lsu_qctl1.v
+wire [3:0]              pcx_rq_for_stb_d1;      // From qctl1 of lsu_qctl1.v
+wire                    phy_byp_ec_asi_m;       // From dctl of lsu_dctl.v
+wire                    phy_use_ec_asi_m;       // From dctl of lsu_dctl.v
+wire [3:0]              pid_state_wr_en;        // From dctl of lsu_dctl.v
+wire                    quad_asi_m;             // From dctl of lsu_dctl.v
+wire                    rd_only_asi_m;          // From dctl of lsu_dctl.v
+wire                    rd_only_ltlb_asi_e;     // From dctl of lsu_dctl.v
+wire                    recognized_asi_m;       // From dctl of lsu_dctl.v
+wire [3:0]              sctxt_state_wr_thrd;    // From dctl of lsu_dctl.v
+wire                    signed_ldst_byte_m;     // From dctl of lsu_dctl.v
+wire                    signed_ldst_hw_m;       // From dctl of lsu_dctl.v
+wire                    signed_ldst_w_m;        // From dctl of lsu_dctl.v
+wire [10:0]             st_dcfill_addr;         // From qdp2 of lsu_qdp2.v
+wire [63:0]             st_rs3_data_g;          // From qdp1 of lsu_qdp1.v
+wire                    sta_internal_m;         // From dctl of lsu_dctl.v
+wire [2:1]              stb0_atm_rq_type;       // From stb_ctl0 of lsu_stb_ctl.v
+wire [7:0]              stb0_clk_en_l;          // From stb_ctl0 of lsu_stb_ctl.v
+wire [2:0]              stb0_crnt_ack_id;       // From stb_ctl0 of lsu_stb_ctl.v
+wire [2:0]              stb0_l2b_addr;          // From stb_ctl0 of lsu_stb_ctl.v
+wire [7:0]              stb0_state_rmo;         // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_0;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_1;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_2;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_3;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_4;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_5;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_6;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb0_state_rtype_7;     // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_0;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_1;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_2;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_3;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_4;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_5;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_6;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [3:2]              stb0_state_si_7;        // From stb_ctldp0 of lsu_stb_ctldp.v
+wire [2:1]              stb1_atm_rq_type;       // From stb_ctl1 of lsu_stb_ctl.v
+wire [7:0]              stb1_clk_en_l;          // From stb_ctl1 of lsu_stb_ctl.v
+wire [2:0]              stb1_crnt_ack_id;       // From stb_ctl1 of lsu_stb_ctl.v
+wire [2:0]              stb1_l2b_addr;          // From stb_ctl1 of lsu_stb_ctl.v
+wire [7:0]              stb1_state_rmo;         // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_0;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_1;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_2;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_3;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_4;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_5;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_6;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb1_state_rtype_7;     // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_0;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_1;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_2;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_3;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_4;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_5;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_6;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [3:2]              stb1_state_si_7;        // From stb_ctldp1 of lsu_stb_ctldp.v
+wire [2:1]              stb2_atm_rq_type;       // From stb_ctl2 of lsu_stb_ctl.v
+wire [7:0]              stb2_clk_en_l;          // From stb_ctl2 of lsu_stb_ctl.v
+wire [2:0]              stb2_crnt_ack_id;       // From stb_ctl2 of lsu_stb_ctl.v
+wire [2:0]              stb2_l2b_addr;          // From stb_ctl2 of lsu_stb_ctl.v
+wire [7:0]              stb2_state_rmo;         // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_0;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_1;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_2;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_3;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_4;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_5;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_6;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb2_state_rtype_7;     // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_0;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_1;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_2;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_3;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_4;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_5;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_6;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [3:2]              stb2_state_si_7;        // From stb_ctldp2 of lsu_stb_ctldp.v
+wire [2:1]              stb3_atm_rq_type;       // From stb_ctl3 of lsu_stb_ctl.v
+wire [7:0]              stb3_clk_en_l;          // From stb_ctl3 of lsu_stb_ctl.v
+wire [2:0]              stb3_crnt_ack_id;       // From stb_ctl3 of lsu_stb_ctl.v
+wire [2:0]              stb3_l2b_addr;          // From stb_ctl3 of lsu_stb_ctl.v
+wire [7:0]              stb3_state_rmo;         // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_0;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_1;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_2;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_3;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_4;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_5;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_6;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [2:1]              stb3_state_rtype_7;     // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_0;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_1;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_2;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_3;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_4;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_5;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_6;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [3:2]              stb3_state_si_7;        // From stb_ctldp3 of lsu_stb_ctldp.v
+wire [1:0]              stb_cam_cm_tid;         // From stb_rwctl of lsu_stb_rwctl.v
+wire                    stb_cam_hit;            // From stb_cam of bw_r_scm.v
+wire                    stb_cam_hit_bf;         // From excpctl of lsu_excpctl.v
+wire                    stb_cam_hit_bf1;        // From excpctl of lsu_excpctl.v
+wire [2:0]              stb_cam_hit_ptr;        // From stb_cam of bw_r_scm.v
+wire                    stb_cam_mhit;           // From stb_cam of bw_r_scm.v
+wire                    stb_cam_rptr_vld;       // From stb_rwctl of lsu_stb_rwctl.v
+wire [4:0]              stb_cam_rw_ptr;         // From stb_rwctl of lsu_stb_rwctl.v
+wire [7:0]              stb_cam_sqsh_msk;       // From excpctl of lsu_excpctl.v
+wire                    stb_cam_vld;            // From tlbdp of lsu_tlbdp.v
+wire                    stb_cam_wptr_vld;       // From stb_rwctl of lsu_stb_rwctl.v
+wire                    stb_cam_wr_no_ivld_m;   // From stb_rwctl of lsu_stb_rwctl.v
+wire [3:0]              stb_cam_wvld_m;         // From stb_rwctl of lsu_stb_rwctl.v
+wire [4:0]              stb_data_rd_ptr;        // From stb_rwctl of lsu_stb_rwctl.v
+wire                    stb_data_rptr_vld;      // From stb_rwctl of lsu_stb_rwctl.v
+wire                    stb_data_wptr_vld;      // From stb_rwctl of lsu_stb_rwctl.v
+wire [4:0]              stb_data_wr_ptr;        // From stb_rwctl of lsu_stb_rwctl.v
+wire [3:0]              stb_flush_st_g;         // From stb_rwctl of lsu_stb_rwctl.v
+wire [7:0]              stb_ld_full_raw;        // From stb_cam of bw_r_scm.v
+wire [7:0]              stb_ld_partial_raw;     // From stb_cam of bw_r_scm.v
+wire [7:0]              stb_ldst_byte_msk;      // From stb_rwctl of lsu_stb_rwctl.v
+wire [2:0]              stb_pcx_rptr0;          // From stb_ctl0 of lsu_stb_ctl.v
+wire [2:0]              stb_pcx_rptr1;          // From stb_ctl1 of lsu_stb_ctl.v
+wire [2:0]              stb_pcx_rptr2;          // From stb_ctl2 of lsu_stb_ctl.v
+wire [2:0]              stb_pcx_rptr3;          // From stb_ctl3 of lsu_stb_ctl.v
+wire [3:0]              stb_rd_for_pcx;         // From stb_ctl0 of lsu_stb_ctl.v, ...
+wire [44:0]             stb_rdata_ramc;         // From stb_cam of bw_r_scm.v
+wire [14:9]             stb_rdata_ramc_buf;     // From dcdp of lsu_dcdp.v
+wire                    stb_rdata_ramd_b74_buf; // From stb_rwdp of lsu_stb_rwdp.v
+wire [69:0]             stb_rdata_ramd_buf;     // From stb_rwdp of lsu_stb_rwdp.v
+wire [7:0]              stb_state_ced0;         // From stb_ctl0 of lsu_stb_ctl.v
+wire [7:0]              stb_state_ced1;         // From stb_ctl1 of lsu_stb_ctl.v
+wire [7:0]              stb_state_ced2;         // From stb_ctl2 of lsu_stb_ctl.v
+wire [7:0]              stb_state_ced3;         // From stb_ctl3 of lsu_stb_ctl.v
+wire [7:0]              stb_state_vld0;         // From stb_ctl0 of lsu_stb_ctl.v
+wire [7:0]              stb_state_vld1;         // From stb_ctl1 of lsu_stb_ctl.v
+wire [7:0]              stb_state_vld2;         // From stb_ctl2 of lsu_stb_ctl.v
+wire [7:0]              stb_state_vld3;         // From stb_ctl3 of lsu_stb_ctl.v
+wire [3:0]              stb_thrd_en_g;          // From stb_rwctl of lsu_stb_rwctl.v
+wire [2:0]              stb_wrptr0;             // From stb_ctl0 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr0_prev;        // From stb_ctl0 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr1;             // From stb_ctl1 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr1_prev;        // From stb_ctl1 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr2;             // From stb_ctl2 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr2_prev;        // From stb_ctl2 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr3;             // From stb_ctl3 of lsu_stb_ctl.v
+wire [2:0]              stb_wrptr3_prev;        // From stb_ctl3 of lsu_stb_ctl.v
+wire                    strm_asi_m;             // From dctl of lsu_dctl.v
+wire                    tag_rd_vld_g;           // From dctl of lsu_dctl.v
+wire                    thread0_ctxt;           // From dctl of lsu_dctl.v
+wire                    thread0_d;              // From dctl of lsu_dctl.v
+wire                    thread0_e;              // From dctl of lsu_dctl.v
+wire                    thread0_g;              // From dctl of lsu_dctl.v
+wire                    thread0_m;              // From dctl of lsu_dctl.v
+wire                    thread1_ctxt;           // From dctl of lsu_dctl.v
+wire                    thread1_d;              // From dctl of lsu_dctl.v
+wire                    thread1_e;              // From dctl of lsu_dctl.v
+wire                    thread1_g;              // From dctl of lsu_dctl.v
+wire                    thread1_m;              // From dctl of lsu_dctl.v
+wire                    thread2_ctxt;           // From dctl of lsu_dctl.v
+wire                    thread2_d;              // From dctl of lsu_dctl.v
+wire                    thread2_e;              // From dctl of lsu_dctl.v
+wire                    thread2_g;              // From dctl of lsu_dctl.v
+wire                    thread2_m;              // From dctl of lsu_dctl.v
+wire                    thread3_ctxt;           // From dctl of lsu_dctl.v
+wire                    thread3_d;              // From dctl of lsu_dctl.v
+wire                    thread3_e;              // From dctl of lsu_dctl.v
+wire                    thread3_g;              // From dctl of lsu_dctl.v
+wire                    thread3_m;              // From dctl of lsu_dctl.v
+wire                    thread_actxt;           // From dctl of lsu_dctl.v
+wire                    thread_default;         // From dctl of lsu_dctl.v
+wire                    thread_pctxt;           // From dctl of lsu_dctl.v
+wire                    thread_sctxt;           // From dctl of lsu_dctl.v
+wire                    tlb_access_en0_g;       // From dctl of lsu_dctl.v
+wire                    tlb_access_en1_g;       // From dctl of lsu_dctl.v
+wire                    tlb_access_en2_g;       // From dctl of lsu_dctl.v
+wire                    tlb_access_en3_g;       // From dctl of lsu_dctl.v
+wire                    tlb_access_sel_default; // From dctl of lsu_dctl.v
+wire                    tlb_access_sel_thrd0;   // From dctl of lsu_dctl.v
+wire                    tlb_access_sel_thrd1;   // From dctl of lsu_dctl.v
+wire                    tlb_access_sel_thrd2;   // From dctl of lsu_dctl.v
+wire                    tlb_cam_hit;            // From dtlb of bw_r_tlb.v
+wire                    tlb_cam_hit_g;          // From dctl of lsu_dctl.v
+wire [12:0]             tlb_ctxt;               // From dctldp of lsu_dctldp.v
+wire                    tlb_ldst_cam_vld;       // From dctl of lsu_dctl.v
+wire [39:10]            tlb_pgnum;              // From dtlb of bw_r_tlb.v
+wire [39:10]            tlb_pgnum_buf;          // From tlbdp of lsu_tlbdp.v
+wire [39:37]            tlb_pgnum_buf2;         // From tlbdp of lsu_tlbdp.v
+wire [39:10]            tlb_pgnum_crit;         // From dtlb of bw_r_tlb.v
+wire [42:0]             tlb_rd_tte_data;        // From dtlb of bw_r_tlb.v
+wire                    tlb_rd_tte_data_ie_buf; // From tlbdp of lsu_tlbdp.v
+wire [58:0]             tlb_rd_tte_tag;         // From dtlb of bw_r_tlb.v
+wire                    tlu_lsu_asi_update_g;   // From dctl of lsu_dctl.v
+wire                    tte_data_parity_error;  // From tlbdp of lsu_tlbdp.v
+wire                    tte_data_perror_unc;    // From excpctl of lsu_excpctl.v
+wire                    tte_tag_parity_error;   // From tlbdp of lsu_tlbdp.v
+wire                    unimp_asi_m;            // From dctl of lsu_dctl.v
+wire                    va_wtchpt_cmp_en_m;     // From dctl of lsu_dctl.v
+wire                    va_wtchpt_msk_match_m;  // From dctldp of lsu_dctldp.v
+wire                    wr_only_asi_m;          // From dctl of lsu_dctl.v
+wire                    wr_only_ltlb_asi_e;     // From dctl of lsu_dctl.v
+// End of automatics
+
+
+//split bus. emacs cannot handle
+input [`CPX_WIDTH-1:0]  cpx_spc_data_cx;   // cpx to processor pkt  
+input [`PCX_WIDTH-1:0]  spu_lsu_ldst_pckt;   
+input [47:0]            exu_lsu_ldst_va_e;  // VA for mem-ref (src-execute) 
+input [10:3]            exu_lsu_early_va_e;  // early partial VA for lookup
+input	[80:0]		ffu_lsu_data ;  
+
+  
+output [7:0]            lsu_asi_state;
+output [47:4]           lsu_ifu_err_addr;
+output [15:0]		lsu_sscan_data ;	// fragmented across dbbs
+output                  ifu_tlu_inst_vld_m_bf1;
+output [2:0]		lsu_ffu_bld_cnt_w ;
+   
+wire [47:0]  lsu_local_ldxa_data_g;
+wire [43:0]  lsu_iobrdge_rd_data;	
+wire [79:0]  stb_rdata_ramd;
+wire [75:64]  stb_wdata_ramd_b75_b64;
+wire [63:0]   lsu_stb_st_data_g;
+   
+wire [151:0] dfq_rdata;
+wire [151:0] dfq_wdata;
+wire         lsu_cpx_stack_icfill_vld;
+wire [29:0]  dtag_wdata_m;
+   wire      lsu_cpx_stack_dcfill_vld_b130;
+   wire [7:0] stb_ldst_byte_msk_min;
+   
+// scan chain
+input                   si0,si1,short_si1,short_si0; 
+output                  so0, so1,short_so0,short_so1;
+   wire     short_scan1_1;
+   wire     short_scan1_2;
+   wire     short_scan1_3;
+   wire     short_scan1_4;
+   wire     short_scan1_5;
+   wire     short_scan1_6;
+   wire     short_scan1_7;
+   wire     short_scan1_8;
+
+   wire     short_scan0_1;
+   wire     short_scan0_2;
+   wire     short_scan0_3;
+   wire     short_scan0_4;
+   wire     short_scan0_5;
+   wire     short_scan0_6;
+   wire     short_scan0_7;
+   wire     short_scan0_8;
+   wire     short_scan0_9;
+
+   wire     scan1_1;
+   wire     scan1_2;
+   wire     scan1_3;
+   wire     scan1_4;
+   
+   wire     scan0_1;
+   wire     scan0_2;
+   
+/*defined input*/
+
+input [7:0]            exu_tlu_wsr_data_m; 
+
+/*defined output*/
+
+output                  lsu_tlu_nucleus_ctxt_m ;// access is nucleus context //??no driver
+output  [2:0]           lsu_tlu_tte_pg_sz_g ;   // page-size of tte //??no driver
+
+     
+// dsfsr support moved from tlu_tcl to lsu_excpctl ; becomes wire
+// !! first check if needed by iside in tlu_tcl, or mmu_ctl !!!
+//output			lsu_tlu_nonalt_ldst_m;	// From dctl of lsu_dctl.v
+output			lsu_tlu_squash_va_oor_m;// From dctl of lsu_dctl.v
+output			lsu_tlu_wtchpt_trp_g;	// From excpctl of lsu_excpctl.v
+//output			lsu_tlu_priv_violtn_g;	// From excpctl of lsu_excpctl.v
+output			lsu_tlu_daccess_prot_g;	// From excpctl of lsu_excpctl.v
+output			lsu_tlu_priv_action_g;	// From excpctl of lsu_excpctl.v
+// To accommodate 1Thread design
+wire [3:0] 		lsu_tlu_stb_full_w2_t;  // To accommodate 1T design   
+   wire [7:0]		lsu_asi_reg0_t;
+   wire [7:0]		lsu_asi_reg1_t;
+   wire [7:0]		lsu_asi_reg2_t;
+   wire [7:0]		lsu_asi_reg3_t;
+   wire [12:0] 		lsu_t0_pctxt_state_t;
+   wire [12:0] 		lsu_t1_pctxt_state_t;
+   wire [12:0] 		lsu_t2_pctxt_state_t;
+   wire [12:0] 		lsu_t3_pctxt_state_t;
+   wire [2:0] 		lsu_pid_state0_t;
+   wire [2:0] 		lsu_pid_state1_t;
+   wire [2:0] 		lsu_pid_state2_t;
+   wire [2:0] 		lsu_pid_state3_t;
+   
+`ifdef FPGA_SYN_1THREAD
+   assign 		lsu_tlu_stb_full_w2[3:0] = {3'b000, lsu_tlu_stb_full_w2_t[0]};
+   assign 		lsu_asi_reg0[7:0] = lsu_asi_reg0_t[7:0];
+   assign 		lsu_asi_reg1[7:0] = 7'b0000000;
+   assign 		lsu_asi_reg2[7:0] = 7'b0000000;
+   assign 		lsu_asi_reg3[7:0] = 7'b0000000;
+   assign 		lsu_t0_pctxt_state[12:0] = lsu_t0_pctxt_state_t[12:0];
+   assign 		lsu_t1_pctxt_state[12:0] = 13'b0000000000000;
+   assign 		lsu_t2_pctxt_state[12:0] = 13'b0000000000000;
+   assign 		lsu_t3_pctxt_state[12:0] = 13'b0000000000000;
+   assign 		lsu_pid_state0[2:0] = lsu_pid_state0_t[2:0];
+   assign 		lsu_pid_state1[2:0] = 2'b00;
+   assign 		lsu_pid_state2[2:0] = 2'b00;
+   assign 		lsu_pid_state3[2:0] = 2'b00;
+   
+`else
+   assign 		lsu_tlu_stb_full_w2[3:0] = lsu_tlu_stb_full_w2_t[3:0];
+   assign 		lsu_asi_reg0[7:0] = lsu_asi_reg0_t[7:0];
+   assign 		lsu_asi_reg1[7:0] = lsu_asi_reg1_t[7:0];
+   assign 		lsu_asi_reg2[7:0] = lsu_asi_reg2_t[7:0];
+   assign 		lsu_asi_reg3[7:0] = lsu_asi_reg3_t[7:0];
+   assign 		lsu_t0_pctxt_state[12:0] = lsu_t0_pctxt_state_t[12:0];
+   assign 		lsu_t1_pctxt_state[12:0] = lsu_t1_pctxt_state_t[12:0];
+   assign 		lsu_t2_pctxt_state[12:0] = lsu_t2_pctxt_state_t[12:0];
+   assign 		lsu_t3_pctxt_state[12:0] = lsu_t3_pctxt_state_t[12:0];
+   assign 		lsu_pid_state0[2:0] = lsu_pid_state0_t[2:0];
+   assign 		lsu_pid_state1[2:0] = lsu_pid_state1_t[2:0];
+   assign 		lsu_pid_state2[2:0] = lsu_pid_state2_t[2:0];
+   assign 		lsu_pid_state3[2:0] = lsu_pid_state3_t[2:0];
+
+`endif
+        
+   
+/* lsu_qctl1 AUTO_TEMPLATE (
+                .grst_l                 (gdbginit_l),
+                .rst_tri_en             (mux_drive_disable),
+                .lsu_ttype_vld_m2       (lsu_ttype_vld_m2_bf1),
+                .lsu_ldst_va_m          (lsu_ldst_va_m_buf[7:6]),
+                .ifu_lsu_pcxpkt_e_b50   (ifu_lsu_pcxpkt_e[50]),
+                .rclk                   (clk),
+                .lsu_sscan_data		(lsu_sscan_data[12:0]),     
+                .ld_inst_vld_e          (ifu_lsu_ld_inst_e), 
+                .lsu_ld_miss_g          (lsu_ld_miss_wb),        
+                .spu_lsu_ldst_pckt_vld  (spu_lsu_ldst_pckt[`PCX_VLD]), 
+                .lsu_stb_empty        	(lsu_stb_empty_buf[3:0]),
+                .tlb_pgnum_g            (tlb_pgnum_buf[39:37]),      
+                .tlu_lsu_pcxpkt_l2baddr (tlu_lsu_pcxpkt[11:10]), 
+                .tlu_lsu_pcxpkt_tid     (tlu_lsu_pcxpkt[19:18]), 
+                .tlu_lsu_pcxpkt_vld     (tlu_lsu_pcxpkt[25]),
+                .ld_thrd_byp_sel_e      (lsu_ld_thrd_byp_sel_e[2:0]));    
+*/
+
+
+`ifdef FPGA_SYN_1THREAD
+   
+lsu_qctl1 qctl1  (
+                  .so                   (short_scan1_1),                  
+                  .si                   (short_si1),
+                  /*AUTOINST*/
+                  // Outputs
+                  .lsu_bld_helper_cmplt_m(lsu_bld_helper_cmplt_m),
+                  .lsu_bld_cnt_m        (lsu_bld_cnt_m[2:0]),
+                  .lsu_bld_reset        (lsu_bld_reset),
+                  .lsu_pcx_rq_sz_b3     (lsu_pcx_rq_sz_b3),
+                  .lsu_ramtest_rd_w     (lsu_ramtest_rd_w),
+                  .ld_stb_full_raw_w2   (ld_stb_full_raw_w2),
+                  .lsu_ld_pcx_rq_sel_d2 (lsu_ld_pcx_rq_sel_d2[3:0]),
+                  .spc_pcx_req_pq       (spc_pcx_req_pq[4:0]),
+                  .spc_pcx_atom_pq      (spc_pcx_atom_pq),
+                  .lsu_ifu_pcxpkt_ack_d (lsu_ifu_pcxpkt_ack_d),
+                  .pcx_pkt_src_sel      (pcx_pkt_src_sel[3:0]),
+                  .lmq_enable           (lmq_enable[3:0]),
+                  .imiss_pcx_mx_sel     (imiss_pcx_mx_sel),
+                  .fwd_int_fp_pcx_mx_sel(fwd_int_fp_pcx_mx_sel[2:0]),
+                  .lsu_ffu_bld_cnt_w    (lsu_ffu_bld_cnt_w[2:0]),
+                  .lsu_ld_pcx_rq_mxsel  (lsu_ld_pcx_rq_mxsel[3:0]),
+                  .ld_pcx_thrd          (ld_pcx_thrd[1:0]),
+                  .lsu_spu_ldst_ack     (lsu_spu_ldst_ack),
+                  .pcx_rq_for_stb       (pcx_rq_for_stb[3:0]),
+                  .pcx_rq_for_stb_d1    (pcx_rq_for_stb_d1[3:0]),
+                  .lsu_ffu_ack          (lsu_ffu_ack),
+                  .lsu_ifu_ld_pcxpkt_vld(lsu_ifu_ld_pcxpkt_vld),
+                  .lsu_pcx_req_squash0  (lsu_pcx_req_squash0),
+                  .lsu_pcx_req_squash1  (lsu_pcx_req_squash1),
+                  .lsu_pcx_req_squash2  (lsu_pcx_req_squash2),
+                  .lsu_pcx_req_squash3  (lsu_pcx_req_squash3),
+                  .lsu_pcx_req_squash_d1(lsu_pcx_req_squash_d1),
+                  .lsu_pcx_ld_dtag_perror_w2(lsu_pcx_ld_dtag_perror_w2),
+                  .lsu_tlu_dcache_miss_w2(lsu_tlu_dcache_miss_w2[3:0]),
+                  .lsu_bld_pcx_rq       (lsu_bld_pcx_rq),
+                  .lsu_bld_rq_addr      (lsu_bld_rq_addr[1:0]),
+                  .lsu_fwdpkt_pcx_rq_sel(lsu_fwdpkt_pcx_rq_sel),
+                  .lsu_imiss_pcx_rq_sel_d1(lsu_imiss_pcx_rq_sel_d1),
+                  .lsu_tlu_pcxpkt_ack   (lsu_tlu_pcxpkt_ack),
+                  .lsu_intrpt_cmplt     (lsu_intrpt_cmplt[3:0]),
+                  .lsu_lmq_byp_misc_sel (lsu_lmq_byp_misc_sel[3:0]),
+                  .lsu_sscan_data       (lsu_sscan_data[12:0]),  // Templated
+                  .lsu_dfq_byp_tid_d1_sel(lsu_dfq_byp_tid_d1_sel[3:0]),
+                  .lmq0_pcx_pkt_way     (lmq0_pcx_pkt_way[1:0]),
+                  .lmq1_pcx_pkt_way     (),
+                  .lmq2_pcx_pkt_way     (),
+                  .lmq3_pcx_pkt_way     (),
+                  .lsu_st_pcx_rq_pick   (lsu_st_pcx_rq_pick[3:0]),
+                  .lsu_stb_pcx_rvld_d1  (lsu_stb_pcx_rvld_d1),
+                  .lsu_stb_rd_tid       (lsu_stb_rd_tid[1:0]),
+                  .lsu_ld0_spec_vld_kill_w2(lsu_ld0_spec_vld_kill_w2),
+                  .lsu_ld1_spec_vld_kill_w2(),
+                  .lsu_ld2_spec_vld_kill_w2(),
+                  .lsu_ld3_spec_vld_kill_w2(),
+                  .lsu_st_pcx_rq_vld    (lsu_st_pcx_rq_vld),
+                  // Inputs
+                  .rclk                 (clk),                   // Templated
+                  .se                   (se),
+                  .sehold               (sehold),
+                  .grst_l               (gdbginit_l),            // Templated
+                  .arst_l               (arst_l),
+                  .lsu_quad_word_access_g(lsu_quad_word_access_g),
+                  .pcx_spc_grant_px     (pcx_spc_grant_px[4:0]),
+                  .ld_inst_vld_e        (ifu_lsu_ld_inst_e),     // Templated
+                  .lsu_ldst_va_m        (lsu_ldst_va_m_buf[7:6]), // Templated
+                  .stb0_l2b_addr        (stb0_l2b_addr[2:0]),
+                  .stb1_l2b_addr        (3'b000),
+                  .stb2_l2b_addr        (3'b000),
+                  .stb3_l2b_addr        (3'b000),
+                  .lsu_ld_miss_g        (lsu_ld_miss_wb),        // Templated
+                  .ifu_lsu_ldst_fp_e    (ifu_lsu_ldst_fp_e),
+                  .ld_rawp_st_ced_w2    (ld_rawp_st_ced_w2),
+                  .ld_rawp_st_ackid_w2  (ld_rawp_st_ackid_w2[2:0]),
+                  .stb0_crnt_ack_id     (stb0_crnt_ack_id[2:0]),
+                  .stb1_crnt_ack_id     (3'b000),
+                  .stb2_crnt_ack_id     (3'b000),
+                  .stb3_crnt_ack_id     (3'b000),
+                  .ifu_tlu_thrid_e      (ifu_tlu_thrid_e[1:0]),
+                  .ldxa_internal        (ldxa_internal),
+                  .spu_lsu_ldst_pckt    (spu_lsu_ldst_pckt[`PCX_AD_LO+7:`PCX_AD_LO+6]),
+                  .spu_lsu_ldst_pckt_vld(spu_lsu_ldst_pckt[`PCX_VLD]), // Templated
+                  .ifu_tlu_inst_vld_m   (ifu_tlu_inst_vld_m),
+                  .ifu_lsu_flush_w      (ifu_lsu_flush_w),
+                  .ifu_lsu_casa_e       (ifu_lsu_casa_e),
+                  .lsu_ldstub_g         (lsu_ldstub_g),
+                  .lsu_swap_g           (lsu_swap_g),
+                  .stb0_atm_rq_type     (stb0_atm_rq_type[2:1]),
+                  .stb1_atm_rq_type     (2'b00),
+                  .stb2_atm_rq_type     (2'b00),
+                  .stb3_atm_rq_type     (2'b00),
+                  .tlb_pgnum_g          (tlb_pgnum_buf[39:37]),  // Templated
+                  .stb_rd_for_pcx       ({3'b000, stb_rd_for_pcx[0]}),
+                  .ffu_lsu_data         (ffu_lsu_data[80:79]),
+                  .ffu_lsu_fpop_rq_vld  (ffu_lsu_fpop_rq_vld),
+                  .ifu_lsu_ldst_dbl_e   (ifu_lsu_ldst_dbl_e),
+                  .ifu_lsu_pcxreq_d     (ifu_lsu_pcxreq_d),
+                  .ifu_lsu_destid_s     (ifu_lsu_destid_s[2:0]),
+                  .ifu_lsu_pref_inst_e  (ifu_lsu_pref_inst_e),
+                  .tlb_cam_hit_g        (tlb_cam_hit_g),
+                  .lsu_blk_asi_m        (lsu_blk_asi_m),
+                  .stb_cam_hit_bf       (stb_cam_hit_bf),
+                  .lsu_fwdpkt_vld       (lsu_fwdpkt_vld),
+                  .lsu_dcfill_active_e  (lsu_dcfill_active_e),
+                  .dfq_byp_sel          (dfq_byp_sel[3:0]),
+                  .lsu_dfq_ld_vld       (lsu_dfq_ld_vld),
+                  .lsu_fldd_vld_en      (lsu_fldd_vld_en),
+                  .lsu_dfill_dcd_thrd   (lsu_dfill_dcd_thrd[3:0]),
+                  .lsu_fwdpkt_dest      (lsu_fwdpkt_dest[4:0]),
+                  .tlu_lsu_pcxpkt_tid   (tlu_lsu_pcxpkt[19:18]), // Templated
+                  .lsu_stb_empty        (lsu_stb_empty_buf[3:0]), // Templated
+                  .tlu_lsu_pcxpkt_vld   (tlu_lsu_pcxpkt[25]),    // Templated
+                  .tlu_lsu_pcxpkt_l2baddr(tlu_lsu_pcxpkt[11:10]), // Templated
+                  .ld_sec_hit_thrd0     (ld_sec_hit_thrd0),
+                  .ld_sec_hit_thrd1     (1'b0),
+                  .ld_sec_hit_thrd2     (1'b0),
+                  .ld_sec_hit_thrd3     (1'b0),
+                  .ld_thrd_byp_sel_e    (lsu_ld_thrd_byp_sel_e[2:0]), // Templated
+                  .lsu_st_pcx_rq_kill_w2({3'b000, lsu_st_pcx_rq_kill_w2[0]}),
+                  .ifu_lsu_alt_space_e  (ifu_lsu_alt_space_e),
+                  .lsu_dfq_byp_tid      (lsu_dfq_byp_tid[1:0]),
+                  .dfq_byp_ff_en        (dfq_byp_ff_en),
+                  .stb_ld_full_raw      (stb_ld_full_raw[7:0]),
+                  .stb_ld_partial_raw   (stb_ld_partial_raw[7:0]),
+                  .stb_cam_mhit         (stb_cam_mhit),
+                  .lsu_ldquad_inst_m    (lsu_ldquad_inst_m),
+                  .stb_cam_wr_no_ivld_m (stb_cam_wr_no_ivld_m),
+                  .lsu_ldst_va_way_g    (lsu_ldst_va_way_g[1:0]),
+                  .lsu_dcache_rand      (lsu_dcache_rand[1:0]),
+                  .lsu_encd_way_hit     (lsu_encd_way_hit[1:0]),
+                  .lsu_way_hit_or       (lsu_way_hit_or),
+                  .dc_direct_map        (dc_direct_map),
+                  .lsu_tlb_perr_ld_rq_kill_w(lsu_tlb_perr_ld_rq_kill_w),
+                  .lsu_dcache_tag_perror_g(lsu_dcache_tag_perror_g),
+                  .lsu_ld_inst_vld_g    (lsu_ld_inst_vld_g[3:0]),
+                  .asi_internal_m       (asi_internal_m),
+                  .ifu_lsu_pcxpkt_e_b50 (ifu_lsu_pcxpkt_e[50]),  // Templated
+                  .lda_internal_m       (lda_internal_m),
+                  .atomic_m             (atomic_m),
+                  .lsu_dcache_iob_rd_w  (lsu_dcache_iob_rd_w),
+                  .ifu_lsu_fwd_data_vld (ifu_lsu_fwd_data_vld),
+                  .rst_tri_en           (mux_drive_disable),     // Templated
+                  .lsu_no_spc_pref      (lsu_no_spc_pref[3:0]),
+                  .tlu_early_flush_pipe2_w(tlu_early_flush_pipe2_w),
+                  .lsu_ttype_vld_m2     (lsu_ttype_vld_m2_bf1));  // Templated
+`else // !`ifdef FPGA_SYN_1THREAD
+
+   lsu_qctl1 qctl1  (
+                  .so                   (short_scan1_1),                  
+                  .si                   (short_si1),
+                  /*AUTOINST*/
+                  // Outputs
+                  .lsu_bld_helper_cmplt_m(lsu_bld_helper_cmplt_m),
+                  .lsu_bld_cnt_m        (lsu_bld_cnt_m[2:0]),
+                  .lsu_bld_reset        (lsu_bld_reset),
+                  .lsu_pcx_rq_sz_b3     (lsu_pcx_rq_sz_b3),
+                  .lsu_ramtest_rd_w     (lsu_ramtest_rd_w),
+                  .ld_stb_full_raw_w2   (ld_stb_full_raw_w2),
+                  .lsu_ld_pcx_rq_sel_d2 (lsu_ld_pcx_rq_sel_d2[3:0]),
+                  .spc_pcx_req_pq       (spc_pcx_req_pq[4:0]),
+                  .spc_pcx_atom_pq      (spc_pcx_atom_pq),
+                  .lsu_ifu_pcxpkt_ack_d (lsu_ifu_pcxpkt_ack_d),
+                  .pcx_pkt_src_sel      (pcx_pkt_src_sel[3:0]),
+                  .lmq_enable           (lmq_enable[3:0]),
+                  .imiss_pcx_mx_sel     (imiss_pcx_mx_sel),
+                  .fwd_int_fp_pcx_mx_sel(fwd_int_fp_pcx_mx_sel[2:0]),
+                  .lsu_ffu_bld_cnt_w    (lsu_ffu_bld_cnt_w[2:0]),
+                  .lsu_ld_pcx_rq_mxsel  (lsu_ld_pcx_rq_mxsel[3:0]),
+                  .ld_pcx_thrd          (ld_pcx_thrd[1:0]),
+                  .lsu_spu_ldst_ack     (lsu_spu_ldst_ack),
+                  .pcx_rq_for_stb       (pcx_rq_for_stb[3:0]),
+                  .pcx_rq_for_stb_d1    (pcx_rq_for_stb_d1[3:0]),
+                  .lsu_ffu_ack          (lsu_ffu_ack),
+                  .lsu_ifu_ld_pcxpkt_vld(lsu_ifu_ld_pcxpkt_vld),
+                  .lsu_pcx_req_squash0  (lsu_pcx_req_squash0),
+                  .lsu_pcx_req_squash1  (lsu_pcx_req_squash1),
+                  .lsu_pcx_req_squash2  (lsu_pcx_req_squash2),
+                  .lsu_pcx_req_squash3  (lsu_pcx_req_squash3),
+                  .lsu_pcx_req_squash_d1(lsu_pcx_req_squash_d1),
+                  .lsu_pcx_ld_dtag_perror_w2(lsu_pcx_ld_dtag_perror_w2),
+                  .lsu_tlu_dcache_miss_w2(lsu_tlu_dcache_miss_w2[3:0]),
+                  .lsu_bld_pcx_rq       (lsu_bld_pcx_rq),
+                  .lsu_bld_rq_addr      (lsu_bld_rq_addr[1:0]),
+                  .lsu_fwdpkt_pcx_rq_sel(lsu_fwdpkt_pcx_rq_sel),
+                  .lsu_imiss_pcx_rq_sel_d1(lsu_imiss_pcx_rq_sel_d1),
+                  .lsu_tlu_pcxpkt_ack   (lsu_tlu_pcxpkt_ack),
+                  .lsu_intrpt_cmplt     (lsu_intrpt_cmplt[3:0]),
+                  .lsu_lmq_byp_misc_sel (lsu_lmq_byp_misc_sel[3:0]),
+                  .lsu_sscan_data       (lsu_sscan_data[12:0]),  // Templated
+                  .lsu_dfq_byp_tid_d1_sel(lsu_dfq_byp_tid_d1_sel[3:0]),
+                  .lmq0_pcx_pkt_way     (lmq0_pcx_pkt_way[1:0]),
+                  .lmq1_pcx_pkt_way     (lmq1_pcx_pkt_way[1:0]),
+                  .lmq2_pcx_pkt_way     (lmq2_pcx_pkt_way[1:0]),
+                  .lmq3_pcx_pkt_way     (lmq3_pcx_pkt_way[1:0]),
+                  .lsu_st_pcx_rq_pick   (lsu_st_pcx_rq_pick[3:0]),
+                  .lsu_stb_pcx_rvld_d1  (lsu_stb_pcx_rvld_d1),
+                  .lsu_stb_rd_tid       (lsu_stb_rd_tid[1:0]),
+                  .lsu_ld0_spec_vld_kill_w2(lsu_ld0_spec_vld_kill_w2),
+                  .lsu_ld1_spec_vld_kill_w2(lsu_ld1_spec_vld_kill_w2),
+                  .lsu_ld2_spec_vld_kill_w2(lsu_ld2_spec_vld_kill_w2),
+                  .lsu_ld3_spec_vld_kill_w2(lsu_ld3_spec_vld_kill_w2),
+                  .lsu_st_pcx_rq_vld    (lsu_st_pcx_rq_vld),
+                  // Inputs
+                  .rclk                 (clk),                   // Templated
+                  .se                   (se),
+                  .sehold               (sehold),
+                  .grst_l               (gdbginit_l),            // Templated
+                  .arst_l               (arst_l),
+                  .lsu_quad_word_access_g(lsu_quad_word_access_g),
+                  .pcx_spc_grant_px     (pcx_spc_grant_px[4:0]),
+                  .ld_inst_vld_e        (ifu_lsu_ld_inst_e),     // Templated
+                  .lsu_ldst_va_m        (lsu_ldst_va_m_buf[7:6]), // Templated
+                  .stb0_l2b_addr        (stb0_l2b_addr[2:0]),
+                  .stb1_l2b_addr        (stb1_l2b_addr[2:0]),
+                  .stb2_l2b_addr        (stb2_l2b_addr[2:0]),
+                  .stb3_l2b_addr        (stb3_l2b_addr[2:0]),
+                  .lsu_ld_miss_g        (lsu_ld_miss_wb),        // Templated
+                  .ifu_lsu_ldst_fp_e    (ifu_lsu_ldst_fp_e),
+                  .ld_rawp_st_ced_w2    (ld_rawp_st_ced_w2),
+                  .ld_rawp_st_ackid_w2  (ld_rawp_st_ackid_w2[2:0]),
+                  .stb0_crnt_ack_id     (stb0_crnt_ack_id[2:0]),
+                  .stb1_crnt_ack_id     (stb1_crnt_ack_id[2:0]),
+                  .stb2_crnt_ack_id     (stb2_crnt_ack_id[2:0]),
+                  .stb3_crnt_ack_id     (stb3_crnt_ack_id[2:0]),
+                  .ifu_tlu_thrid_e      (ifu_tlu_thrid_e[1:0]),
+                  .ldxa_internal        (ldxa_internal),
+                  .spu_lsu_ldst_pckt    (spu_lsu_ldst_pckt[`PCX_AD_LO+7:`PCX_AD_LO+6]),
+                  .spu_lsu_ldst_pckt_vld(spu_lsu_ldst_pckt[`PCX_VLD]), // Templated
+                  .ifu_tlu_inst_vld_m   (ifu_tlu_inst_vld_m),
+                  .ifu_lsu_flush_w      (ifu_lsu_flush_w),
+                  .ifu_lsu_casa_e       (ifu_lsu_casa_e),
+                  .lsu_ldstub_g         (lsu_ldstub_g),
+                  .lsu_swap_g           (lsu_swap_g),
+                  .stb0_atm_rq_type     (stb0_atm_rq_type[2:1]),
+                  .stb1_atm_rq_type     (stb1_atm_rq_type[2:1]),
+                  .stb2_atm_rq_type     (stb2_atm_rq_type[2:1]),
+                  .stb3_atm_rq_type     (stb3_atm_rq_type[2:1]),
+                  .tlb_pgnum_g          (tlb_pgnum_buf[39:37]),  // Templated
+                  .stb_rd_for_pcx       (stb_rd_for_pcx[3:0]),
+                  .ffu_lsu_data         (ffu_lsu_data[80:79]),
+                  .ffu_lsu_fpop_rq_vld  (ffu_lsu_fpop_rq_vld),
+                  .ifu_lsu_ldst_dbl_e   (ifu_lsu_ldst_dbl_e),
+                  .ifu_lsu_pcxreq_d     (ifu_lsu_pcxreq_d),
+                  .ifu_lsu_destid_s     (ifu_lsu_destid_s[2:0]),
+                  .ifu_lsu_pref_inst_e  (ifu_lsu_pref_inst_e),
+                  .tlb_cam_hit_g        (tlb_cam_hit_g),
+                  .lsu_blk_asi_m        (lsu_blk_asi_m),
+                  .stb_cam_hit_bf       (stb_cam_hit_bf),
+                  .lsu_fwdpkt_vld       (lsu_fwdpkt_vld),
+                  .lsu_dcfill_active_e  (lsu_dcfill_active_e),
+                  .dfq_byp_sel          (dfq_byp_sel[3:0]),
+                  .lsu_dfq_ld_vld       (lsu_dfq_ld_vld),
+                  .lsu_fldd_vld_en      (lsu_fldd_vld_en),
+                  .lsu_dfill_dcd_thrd   (lsu_dfill_dcd_thrd[3:0]),
+                  .lsu_fwdpkt_dest      (lsu_fwdpkt_dest[4:0]),
+                  .tlu_lsu_pcxpkt_tid   (tlu_lsu_pcxpkt[19:18]), // Templated
+                  .lsu_stb_empty        (lsu_stb_empty_buf[3:0]), // Templated
+                  .tlu_lsu_pcxpkt_vld   (tlu_lsu_pcxpkt[25]),    // Templated
+                  .tlu_lsu_pcxpkt_l2baddr(tlu_lsu_pcxpkt[11:10]), // Templated
+                  .ld_sec_hit_thrd0     (ld_sec_hit_thrd0),
+                  .ld_sec_hit_thrd1     (ld_sec_hit_thrd1),
+                  .ld_sec_hit_thrd2     (ld_sec_hit_thrd2),
+                  .ld_sec_hit_thrd3     (ld_sec_hit_thrd3),
+                  .ld_thrd_byp_sel_e    (lsu_ld_thrd_byp_sel_e[2:0]), // Templated
+                  .lsu_st_pcx_rq_kill_w2(lsu_st_pcx_rq_kill_w2[3:0]),
+                  .ifu_lsu_alt_space_e  (ifu_lsu_alt_space_e),
+                  .lsu_dfq_byp_tid      (lsu_dfq_byp_tid[1:0]),
+                  .dfq_byp_ff_en        (dfq_byp_ff_en),
+                  .stb_ld_full_raw      (stb_ld_full_raw[7:0]),
+                  .stb_ld_partial_raw   (stb_ld_partial_raw[7:0]),
+                  .stb_cam_mhit         (stb_cam_mhit),
+                  .lsu_ldquad_inst_m    (lsu_ldquad_inst_m),
+                  .stb_cam_wr_no_ivld_m (stb_cam_wr_no_ivld_m),
+                  .lsu_ldst_va_way_g    (lsu_ldst_va_way_g[1:0]),
+                  .lsu_dcache_rand      (lsu_dcache_rand[1:0]),
+                  .lsu_encd_way_hit     (lsu_encd_way_hit[1:0]),
+                  .lsu_way_hit_or       (lsu_way_hit_or),
+                  .dc_direct_map        (dc_direct_map),
+                  .lsu_tlb_perr_ld_rq_kill_w(lsu_tlb_perr_ld_rq_kill_w),
+                  .lsu_dcache_tag_perror_g(lsu_dcache_tag_perror_g),
+                  .lsu_ld_inst_vld_g    (lsu_ld_inst_vld_g[3:0]),
+                  .asi_internal_m       (asi_internal_m),
+                  .ifu_lsu_pcxpkt_e_b50 (ifu_lsu_pcxpkt_e[50]),  // Templated
+                  .lda_internal_m       (lda_internal_m),
+                  .atomic_m             (atomic_m),
+                  .lsu_dcache_iob_rd_w  (lsu_dcache_iob_rd_w),
+                  .ifu_lsu_fwd_data_vld (ifu_lsu_fwd_data_vld),
+                  .rst_tri_en           (mux_drive_disable),     // Templated
+                  .lsu_no_spc_pref      (lsu_no_spc_pref[3:0]),
+                  .tlu_early_flush_pipe2_w(tlu_early_flush_pipe2_w),
+                  .lsu_ttype_vld_m2     (lsu_ttype_vld_m2_bf1));  // Templated
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+
+/* lsu_qctl2 AUTO_TEMPLATE (
+                .rst_tri_en             (mux_drive_disable),
+                .rclk                    (clk),
+                .lsu_dfq_rdata_b103      (dfq_rdata[103]),
+                .lsu_dfq_rdata_rq_type   (dfq_rdata[`CPX_WIDTH-2:`CPX_WIDTH-5]),
+                .lsu_dfq_rdata_type      (dfq_rdata[`DFQ_WIDTH-1:`DFQ_WIDTH-6]),
+                .lsu_dfq_rdata_invwy_vld (dfq_rdata[`CPX_WYVLD]),
+                .lsu_dfq_rdata_cpuid     (dfq_rdata[`CPX_INV_CID_HI:`CPX_INV_CID_LO]),
+                .lsu_dfq_rdata_stack_iinv_vld(dfq_rdata[128]),
+                .lsu_dfq_rdata_st_ack_type(dfq_rdata[`DFQ_WIDTH-4]), 
+                .lsu_dfq_rdata_stack_dcfill_vld(dfq_rdata[130]),
+                .lsu_ifill_pkt_vld      (lsu_ifu_cpxpkt_vld_i1),
+                .lsu_dfq_vld_entry_w 	(lsu_sscan_data[15]),     
+                .ifu_pcx_pkt_b10t5      (ifu_lsu_pcxpkt_e[10:5]), 
+                .ifu_pcx_pkt_b41t40     (ifu_lsu_pcxpkt_e[41:40]), 
+                .ifu_pcx_pkt_b51        (ifu_lsu_pcxpkt_e[51]),  
+                .ld_inst_vld_e          (ifu_lsu_ld_inst_e),     
+                .lsu_l2fill_fpld_e      (lsu_l2fill_fpld_e),
+                .lsu_dfq_rdata_flush_bit (dfq_rdata[136]),
+                .lsu_dfq_rdata_b17_b0    (dfq_rdata[17:0]),
+                .lsu_ld_miss_g          (lsu_ld_miss_wb));        
+*/
+
+`ifdef FPGA_SYN_1THREAD
+   
+lsu_qctl2 qctl2  (
+                  .so                   (scan1_1),
+                  .si                   (si1),
+                  .lsu_cpx_stack_dcfill_vld_b130  (lsu_cpx_stack_dcfill_vld_b130),
+
+                  .cpx_spc_data_cx_b144to140  (cpx_spc_data_cx[`CPX_WIDTH-1:140]),
+                  .cpx_spc_data_cx_b138       (cpx_spc_data_cx[138]),
+                  .cpx_spc_data_cx_b135to134  (cpx_spc_data_cx[`CPX_TH_HI:`CPX_TH_LO]),
+                  .cpx_spc_data_cx_b133       (cpx_spc_data_cx[`CPX_WYVLD]),
+                  .cpx_spc_data_cx_b130       (cpx_spc_data_cx[130]),
+                  .cpx_spc_data_cx_b129       (cpx_spc_data_cx[129]),
+                  .cpx_spc_data_cx_b128       (cpx_spc_data_cx[128]),
+                  .cpx_spc_data_cx_b125       (cpx_spc_data_cx[`CPX_BINIT_STACK]),
+                  .cpx_spc_data_cx_b124to123  (cpx_spc_data_cx[`CPX_PERR_DINV+1:`CPX_PERR_DINV]),
+                  .cpx_spc_data_cx_b120to118  (cpx_spc_data_cx[`CPX_INV_CID_HI:`CPX_INV_CID_LO]),
+                  .cpx_spc_data_cx_b71to70    (cpx_spc_data_cx[71:70]),
+                  .cpx_spc_data_cx_b0         (cpx_spc_data_cx[0]),
+                  .cpx_spc_data_cx_b4         (cpx_spc_data_cx[4]),
+                  .cpx_spc_data_cx_b8         (cpx_spc_data_cx[8]),
+                  .cpx_spc_data_cx_b12        (cpx_spc_data_cx[12]),
+                  .cpx_spc_data_cx_b16        (cpx_spc_data_cx[16]),
+                  .cpx_spc_data_cx_b20        (cpx_spc_data_cx[20]),
+                  .cpx_spc_data_cx_b24        (cpx_spc_data_cx[24]),
+                  .cpx_spc_data_cx_b28        (cpx_spc_data_cx[28]),
+
+                  .cpx_spc_data_cx_b32        (cpx_spc_data_cx[32]),
+                  .cpx_spc_data_cx_b35        (cpx_spc_data_cx[35]),
+                  .cpx_spc_data_cx_b38        (cpx_spc_data_cx[38]),
+                  .cpx_spc_data_cx_b41        (cpx_spc_data_cx[41]),
+                  .cpx_spc_data_cx_b44        (cpx_spc_data_cx[44]),
+                  .cpx_spc_data_cx_b47        (cpx_spc_data_cx[47]),
+                  .cpx_spc_data_cx_b50        (cpx_spc_data_cx[50]),
+                  .cpx_spc_data_cx_b53        (cpx_spc_data_cx[53]),
+
+                  .cpx_spc_data_cx_b56        (cpx_spc_data_cx[56]),
+                  .cpx_spc_data_cx_b60        (cpx_spc_data_cx[60]),
+                  .cpx_spc_data_cx_b64        (cpx_spc_data_cx[64]),
+                  .cpx_spc_data_cx_b68        (cpx_spc_data_cx[68]),
+                  .cpx_spc_data_cx_b72        (cpx_spc_data_cx[72]),
+                  .cpx_spc_data_cx_b76        (cpx_spc_data_cx[76]),
+                  .cpx_spc_data_cx_b80        (cpx_spc_data_cx[80]),
+                  .cpx_spc_data_cx_b84        (cpx_spc_data_cx[84]),
+
+                  .cpx_spc_data_cx_b88        (cpx_spc_data_cx[88]),
+                  .cpx_spc_data_cx_b91        (cpx_spc_data_cx[91]),
+                  .cpx_spc_data_cx_b94        (cpx_spc_data_cx[94]),
+                  .cpx_spc_data_cx_b97        (cpx_spc_data_cx[97]),
+                  .cpx_spc_data_cx_b100       (cpx_spc_data_cx[100]),
+                  .cpx_spc_data_cx_b103       (cpx_spc_data_cx[103]),
+                  .cpx_spc_data_cx_b106       (cpx_spc_data_cx[106]),
+                  .cpx_spc_data_cx_b109       (cpx_spc_data_cx[109]),
+
+                  .cpx_spc_data_cx_b1         (cpx_spc_data_cx[1]),
+                  .cpx_spc_data_cx_b5         (cpx_spc_data_cx[5]),
+                  .cpx_spc_data_cx_b9         (cpx_spc_data_cx[9]),
+                  .cpx_spc_data_cx_b13        (cpx_spc_data_cx[13]),
+                  .cpx_spc_data_cx_b17        (cpx_spc_data_cx[17]),
+                  .cpx_spc_data_cx_b21        (cpx_spc_data_cx[21]),
+                  .cpx_spc_data_cx_b25        (cpx_spc_data_cx[25]),
+                  .cpx_spc_data_cx_b29        (cpx_spc_data_cx[29]),
+
+                  .cpx_spc_data_cx_b57        (cpx_spc_data_cx[57]),
+                  .cpx_spc_data_cx_b61        (cpx_spc_data_cx[61]),
+                  .cpx_spc_data_cx_b65        (cpx_spc_data_cx[65]),
+                  .cpx_spc_data_cx_b69        (cpx_spc_data_cx[69]),
+                  .cpx_spc_data_cx_b73        (cpx_spc_data_cx[73]),
+                  .cpx_spc_data_cx_b77        (cpx_spc_data_cx[77]),
+                  .cpx_spc_data_cx_b81        (cpx_spc_data_cx[81]),
+                  .cpx_spc_data_cx_b85        (cpx_spc_data_cx[85]),
+
+		  .lsu_cpx_stack_icfill_vld(lsu_cpx_stack_icfill_vld),
+                  /*AUTOINST*/
+                  // Outputs
+                  .lsu_fwd_rply_sz1_unc (lsu_fwd_rply_sz1_unc),
+                  .lsu_dcache_iob_rd_w  (lsu_dcache_iob_rd_w),
+                  .ldd_in_dfq_out       (ldd_in_dfq_out),
+                  .lsu_dfq_rd_vld_d1    (lsu_dfq_rd_vld_d1),
+                  .dfq_byp_ff_en        (dfq_byp_ff_en),
+                  .lsu_dfill_data_sel_hi(lsu_dfill_data_sel_hi),
+                  .lsu_ifill_pkt_vld    (lsu_ifu_cpxpkt_vld_i1), // Templated
+                  .cpx_fwd_pkt_en_cx    (cpx_fwd_pkt_en_cx),
+                  .lsu_cpxpkt_type_dcd_cx(lsu_cpxpkt_type_dcd_cx[5:0]),
+                  .lsu_cpu_dcd_sel      (lsu_cpu_dcd_sel[7:0]),
+                  .lsu_cpu_uhlf_sel     (lsu_cpu_uhlf_sel),
+                  .lsu_iobrdge_rply_data_sel(lsu_iobrdge_rply_data_sel[2:0]),
+                  .lsu_iobrdge_fwd_pkt_vld(lsu_iobrdge_fwd_pkt_vld),
+                  .lsu_tlu_cpx_vld      (lsu_tlu_cpx_vld),
+                  .lsu_tlu_cpx_req      (lsu_tlu_cpx_req[3:0]),
+                  .lsu_tlu_intpkt       (lsu_tlu_intpkt[17:0]),
+                  .ld_sec_active        (ld_sec_active),
+                  .dfq_byp_sel          (dfq_byp_sel[3:0]),
+                  .lsu_cpx_ld_dtag_perror_e(lsu_cpx_ld_dtag_perror_e),
+                  .lsu_cpx_ld_dcache_perror_e(lsu_cpx_ld_dcache_perror_e),
+                  .lsu_exu_rd_m         (lsu_exu_rd_m[4:0]),
+                  .lsu_spu_strm_ack_cmplt(lsu_spu_strm_ack_cmplt[1:0]),
+                  .lsu_atm_st_cmplt_e   (lsu_atm_st_cmplt_e),
+                  .dva_svld_e           (dva_svld_e),
+                  .dfq_wptr_vld         (dfq_wptr_vld),
+                  .dfq_wptr             (dfq_wptr[4:0]),
+                  .lsu_dfq_flsh_cmplt   (lsu_dfq_flsh_cmplt[3:0]),
+                  .dfq_rptr_vld         (dfq_rptr_vld),
+                  .dfq_rptr             (dfq_rptr[4:0]),
+                  .lsu_ifu_stallreq     (lsu_ifu_stallreq),
+                  .dva_snp_addr_e       (dva_snp_addr_e[4:0]),
+                  .lsu_st_ack_dq_stb    (lsu_st_ack_dq_stb[3:0]),
+                  .lsu_cpx_rmo_st_ack   (lsu_cpx_rmo_st_ack[3:0]),
+                  .lsu_st_wr_dcache     (lsu_st_wr_dcache),
+                  .cpx_st_ack_tid0      (cpx_st_ack_tid0),
+                  .cpx_st_ack_tid1      (),
+                  .cpx_st_ack_tid2      (),
+                  .cpx_st_ack_tid3      (),
+                  .lsu_tlu_l2_dmiss     (lsu_tlu_l2_dmiss[3:0]),
+                  .lsu_l2fill_vld       (lsu_l2fill_vld),
+                  .lsu_byp_ldd_oddrd_m  (lsu_byp_ldd_oddrd_m),
+                  .lsu_pcx_fwd_reply    (lsu_pcx_fwd_reply),
+                  .lsu_fwdpkt_vld       (lsu_fwdpkt_vld),
+                  .lsu_dcfill_active_e  (lsu_dcfill_active_e),
+                  .lsu_dfq_ld_vld       (lsu_dfq_ld_vld),
+                  .lsu_fldd_vld_en      (lsu_fldd_vld_en),
+                  .lsu_dfill_dcd_thrd   (lsu_dfill_dcd_thrd[3:0]),
+                  .lsu_fwdpkt_dest      (lsu_fwdpkt_dest[4:0]),
+                  .dva_snp_bit_wr_en_e  (dva_snp_bit_wr_en_e[15:0]),
+                  .lsu_cpx_spc_inv_vld  (lsu_cpx_spc_inv_vld),
+                  .lsu_cpx_thrdid       (lsu_cpx_thrdid[3:0]),
+                  .lsu_cpx_stack_dcfill_vld(lsu_cpx_stack_dcfill_vld),
+                  .lsu_dfq_vld_entry_w  (lsu_sscan_data[15]),    // Templated
+                  .lsu_dfq_st_vld       (lsu_dfq_st_vld),
+                  .lsu_dfq_ldst_vld     (lsu_dfq_ldst_vld),
+                  .lsu_qdp2_dfq_ld_vld  (lsu_qdp2_dfq_ld_vld),
+                  .lsu_qdp2_dfq_st_vld  (lsu_qdp2_dfq_st_vld),
+                  .lsu_dfq_vld          (lsu_dfq_vld),
+                  .lsu_dfq_byp_ff_en    (lsu_dfq_byp_ff_en),
+                  // Inputs
+                  .rclk                 (clk),                   // Templated
+                  .grst_l               (grst_l),
+                  .arst_l               (arst_l),
+                  .se                   (se),
+                  .rst_tri_en           (mux_drive_disable),     // Templated
+                  .ld_inst_vld_e        (ifu_lsu_ld_inst_e),     // Templated
+                  .ifu_pcx_pkt_b51      (ifu_lsu_pcxpkt_e[51]),  // Templated
+                  .ifu_pcx_pkt_b41t40   (ifu_lsu_pcxpkt_e[41:40]), // Templated
+                  .ifu_pcx_pkt_b10t5    (ifu_lsu_pcxpkt_e[10:5]), // Templated
+                  .lsu_dfq_rdata_flush_bit(dfq_rdata[136]),      // Templated
+                  .lsu_dfq_rdata_b17_b0 (dfq_rdata[17:0]),       // Templated
+                  .ifu_lsu_rd_e         (ifu_lsu_rd_e[4:0]),
+                  .lmq_ld_rd1           (lmq_ld_rd1[4:0]),
+                  .lmq_ldd_vld          (lmq_ldd_vld),
+                  .dfq_tid              (dfq_tid[1:0]),
+                  .const_cpuid          (const_cpuid[2:0]),
+                  .lmq_ld_addr_b3       (lmq_ld_addr_b3),
+                  .ifu_lsu_ibuf_busy    (ifu_lsu_ibuf_busy),
+                  .ifu_lsu_inv_clear    (ifu_lsu_inv_clear),
+                  .lsu_byp_misc_sz_e    (lsu_byp_misc_sz_e[1:0]),
+                  .lsu_dfq_byp_tid      (lsu_dfq_byp_tid[1:0]),
+                  .lsu_cpx_pkt_atm_st_cmplt(lsu_cpx_pkt_atm_st_cmplt),
+                  .lsu_cpx_pkt_l2miss   (lsu_cpx_pkt_l2miss),
+                  .lsu_cpx_pkt_tid      (lsu_cpx_pkt_tid[1:0]),
+                  .lsu_cpx_pkt_invwy    (lsu_cpx_pkt_invwy[1:0]),
+                  .lsu_dfq_byp_flush    (lsu_dfq_byp_flush),
+                  .lsu_dfq_byp_type     (lsu_dfq_byp_type[5:0]),
+                  .lsu_dfq_byp_invwy_vld(lsu_dfq_byp_invwy_vld),
+                  .lsu_cpu_inv_data_b13to9(lsu_cpu_inv_data_b13to9[13:9]),
+                  .lsu_cpu_inv_data_b7to2(lsu_cpu_inv_data_b7to2[7:2]),
+                  .lsu_cpu_inv_data_b0  (lsu_cpu_inv_data_b0),
+                  .lsu_cpx_pkt_inv_pa   (lsu_cpx_pkt_inv_pa[4:0]),
+                  .lsu_cpx_pkt_ifill_type(lsu_cpx_pkt_ifill_type),
+                  .lsu_cpx_pkt_atomic   (lsu_cpx_pkt_atomic),
+                  .lsu_cpx_pkt_binit_st (lsu_cpx_pkt_binit_st),
+                  .lsu_cpx_pkt_prefetch (lsu_cpx_pkt_prefetch),
+                  .lsu_dfq_byp_binit_st (lsu_dfq_byp_binit_st),
+                  .lsu_tlbop_force_swo  (lsu_tlbop_force_swo),
+                  .lsu_iobrdge_tap_rq_type(lsu_iobrdge_tap_rq_type[7:3]),
+                  .lsu_dcache_tag_perror_g(lsu_dcache_tag_perror_g),
+                  .lsu_dcache_data_perror_g(lsu_dcache_data_perror_g),
+                  .lsu_cpx_pkt_perror_iinv(lsu_cpx_pkt_perror_iinv),
+                  .lsu_cpx_pkt_perror_dinv(lsu_cpx_pkt_perror_dinv),
+                  .lsu_cpx_pkt_perror_set(lsu_cpx_pkt_perror_set[1:0]),
+                  .lsu_l2fill_fpld_e    (lsu_l2fill_fpld_e),     // Templated
+                  .lsu_cpx_pkt_strm_ack (lsu_cpx_pkt_strm_ack),
+                  .ifu_lsu_memref_d     (ifu_lsu_memref_d),
+                  .lsu_fwdpkt_pcx_rq_sel(lsu_fwdpkt_pcx_rq_sel),
+                  .lsu_imiss_pcx_rq_sel_d1(lsu_imiss_pcx_rq_sel_d1),
+                  .lsu_dfq_byp_cpx_inv  (lsu_dfq_byp_cpx_inv),
+                  .lsu_dfq_byp_stack_adr_b54(lsu_dfq_byp_stack_adr_b54[1:0]),
+                  .lsu_dfq_byp_stack_wrway(lsu_dfq_byp_stack_wrway[1:0]),
+                  .lsu_dfq_rdata_st_ack_type(dfq_rdata[`DFQ_WIDTH-4]), // Templated
+                  .lsu_dfq_rdata_stack_dcfill_vld(dfq_rdata[130]), // Templated
+                  .lsu_dfq_rdata_stack_iinv_vld(dfq_rdata[128]), // Templated
+                  .lsu_dfq_rdata_cpuid  (dfq_rdata[`CPX_INV_CID_HI:`CPX_INV_CID_LO]), // Templated
+                  .lsu_dfq_byp_atm      (lsu_dfq_byp_atm),
+                  .lsu_ld_inst_vld_g    (lsu_ld_inst_vld_g[3:0]),
+                  .lsu_dfq_rdata_type   (dfq_rdata[`DFQ_WIDTH-1:`DFQ_WIDTH-6]), // Templated
+                  .lsu_dfq_rdata_invwy_vld(dfq_rdata[`CPX_WYVLD]), // Templated
+                  .ifu_lsu_fwd_data_vld (ifu_lsu_fwd_data_vld),
+                  .ifu_lsu_fwd_wr_ack   (ifu_lsu_fwd_wr_ack),
+                  .lsu_dfq_rdata_rq_type(dfq_rdata[`CPX_WIDTH-2:`CPX_WIDTH-5]), // Templated
+                  .lsu_dfq_rdata_b103   (dfq_rdata[103]),        // Templated
+                  .sehold               (sehold));
+
+`else // !`ifdef FPGA_SYN_1THREAD
+
+   lsu_qctl2 qctl2  (
+                  .so                   (scan1_1),
+                  .si                   (si1),
+                  .lsu_cpx_stack_dcfill_vld_b130  (lsu_cpx_stack_dcfill_vld_b130),
+
+                  .cpx_spc_data_cx_b144to140  (cpx_spc_data_cx[`CPX_WIDTH-1:140]),
+                  .cpx_spc_data_cx_b138       (cpx_spc_data_cx[138]),
+                  .cpx_spc_data_cx_b135to134  (cpx_spc_data_cx[`CPX_TH_HI:`CPX_TH_LO]),
+                  .cpx_spc_data_cx_b133       (cpx_spc_data_cx[`CPX_WYVLD]),
+                  .cpx_spc_data_cx_b130       (cpx_spc_data_cx[130]),
+                  .cpx_spc_data_cx_b129       (cpx_spc_data_cx[129]),
+                  .cpx_spc_data_cx_b128       (cpx_spc_data_cx[128]),
+                  .cpx_spc_data_cx_b125       (cpx_spc_data_cx[`CPX_BINIT_STACK]),
+                  .cpx_spc_data_cx_b124to123  (cpx_spc_data_cx[`CPX_PERR_DINV+1:`CPX_PERR_DINV]),
+                  .cpx_spc_data_cx_b120to118  (cpx_spc_data_cx[`CPX_INV_CID_HI:`CPX_INV_CID_LO]),
+                  .cpx_spc_data_cx_b71to70    (cpx_spc_data_cx[71:70]),
+                  .cpx_spc_data_cx_b0         (cpx_spc_data_cx[0]),
+                  .cpx_spc_data_cx_b4         (cpx_spc_data_cx[4]),
+                  .cpx_spc_data_cx_b8         (cpx_spc_data_cx[8]),
+                  .cpx_spc_data_cx_b12        (cpx_spc_data_cx[12]),
+                  .cpx_spc_data_cx_b16        (cpx_spc_data_cx[16]),
+                  .cpx_spc_data_cx_b20        (cpx_spc_data_cx[20]),
+                  .cpx_spc_data_cx_b24        (cpx_spc_data_cx[24]),
+                  .cpx_spc_data_cx_b28        (cpx_spc_data_cx[28]),
+
+                  .cpx_spc_data_cx_b32        (cpx_spc_data_cx[32]),
+                  .cpx_spc_data_cx_b35        (cpx_spc_data_cx[35]),
+                  .cpx_spc_data_cx_b38        (cpx_spc_data_cx[38]),
+                  .cpx_spc_data_cx_b41        (cpx_spc_data_cx[41]),
+                  .cpx_spc_data_cx_b44        (cpx_spc_data_cx[44]),
+                  .cpx_spc_data_cx_b47        (cpx_spc_data_cx[47]),
+                  .cpx_spc_data_cx_b50        (cpx_spc_data_cx[50]),
+                  .cpx_spc_data_cx_b53        (cpx_spc_data_cx[53]),
+
+                  .cpx_spc_data_cx_b56        (cpx_spc_data_cx[56]),
+                  .cpx_spc_data_cx_b60        (cpx_spc_data_cx[60]),
+                  .cpx_spc_data_cx_b64        (cpx_spc_data_cx[64]),
+                  .cpx_spc_data_cx_b68        (cpx_spc_data_cx[68]),
+                  .cpx_spc_data_cx_b72        (cpx_spc_data_cx[72]),
+                  .cpx_spc_data_cx_b76        (cpx_spc_data_cx[76]),
+                  .cpx_spc_data_cx_b80        (cpx_spc_data_cx[80]),
+                  .cpx_spc_data_cx_b84        (cpx_spc_data_cx[84]),
+
+                  .cpx_spc_data_cx_b88        (cpx_spc_data_cx[88]),
+                  .cpx_spc_data_cx_b91        (cpx_spc_data_cx[91]),
+                  .cpx_spc_data_cx_b94        (cpx_spc_data_cx[94]),
+                  .cpx_spc_data_cx_b97        (cpx_spc_data_cx[97]),
+                  .cpx_spc_data_cx_b100       (cpx_spc_data_cx[100]),
+                  .cpx_spc_data_cx_b103       (cpx_spc_data_cx[103]),
+                  .cpx_spc_data_cx_b106       (cpx_spc_data_cx[106]),
+                  .cpx_spc_data_cx_b109       (cpx_spc_data_cx[109]),
+
+                  .cpx_spc_data_cx_b1         (cpx_spc_data_cx[1]),
+                  .cpx_spc_data_cx_b5         (cpx_spc_data_cx[5]),
+                  .cpx_spc_data_cx_b9         (cpx_spc_data_cx[9]),
+                  .cpx_spc_data_cx_b13        (cpx_spc_data_cx[13]),
+                  .cpx_spc_data_cx_b17        (cpx_spc_data_cx[17]),
+                  .cpx_spc_data_cx_b21        (cpx_spc_data_cx[21]),
+                  .cpx_spc_data_cx_b25        (cpx_spc_data_cx[25]),
+                  .cpx_spc_data_cx_b29        (cpx_spc_data_cx[29]),
+
+                  .cpx_spc_data_cx_b57        (cpx_spc_data_cx[57]),
+                  .cpx_spc_data_cx_b61        (cpx_spc_data_cx[61]),
+                  .cpx_spc_data_cx_b65        (cpx_spc_data_cx[65]),
+                  .cpx_spc_data_cx_b69        (cpx_spc_data_cx[69]),
+                  .cpx_spc_data_cx_b73        (cpx_spc_data_cx[73]),
+                  .cpx_spc_data_cx_b77        (cpx_spc_data_cx[77]),
+                  .cpx_spc_data_cx_b81        (cpx_spc_data_cx[81]),
+                  .cpx_spc_data_cx_b85        (cpx_spc_data_cx[85]),
+
+		  .lsu_cpx_stack_icfill_vld(lsu_cpx_stack_icfill_vld),
+                  /*AUTOINST*/
+                  // Outputs
+                  .lsu_fwd_rply_sz1_unc (lsu_fwd_rply_sz1_unc),
+                  .lsu_dcache_iob_rd_w  (lsu_dcache_iob_rd_w),
+                  .ldd_in_dfq_out       (ldd_in_dfq_out),
+                  .lsu_dfq_rd_vld_d1    (lsu_dfq_rd_vld_d1),
+                  .dfq_byp_ff_en        (dfq_byp_ff_en),
+                  .lsu_dfill_data_sel_hi(lsu_dfill_data_sel_hi),
+                  .lsu_ifill_pkt_vld    (lsu_ifu_cpxpkt_vld_i1), // Templated
+                  .cpx_fwd_pkt_en_cx    (cpx_fwd_pkt_en_cx),
+                  .lsu_cpxpkt_type_dcd_cx(lsu_cpxpkt_type_dcd_cx[5:0]),
+                  .lsu_cpu_dcd_sel      (lsu_cpu_dcd_sel[7:0]),
+                  .lsu_cpu_uhlf_sel     (lsu_cpu_uhlf_sel),
+                  .lsu_iobrdge_rply_data_sel(lsu_iobrdge_rply_data_sel[2:0]),
+                  .lsu_iobrdge_fwd_pkt_vld(lsu_iobrdge_fwd_pkt_vld),
+                  .lsu_tlu_cpx_vld      (lsu_tlu_cpx_vld),
+                  .lsu_tlu_cpx_req      (lsu_tlu_cpx_req[3:0]),
+                  .lsu_tlu_intpkt       (lsu_tlu_intpkt[17:0]),
+                  .ld_sec_active        (ld_sec_active),
+                  .dfq_byp_sel          (dfq_byp_sel[3:0]),
+                  .lsu_cpx_ld_dtag_perror_e(lsu_cpx_ld_dtag_perror_e),
+                  .lsu_cpx_ld_dcache_perror_e(lsu_cpx_ld_dcache_perror_e),
+                  .lsu_exu_rd_m         (lsu_exu_rd_m[4:0]),
+                  .lsu_spu_strm_ack_cmplt(lsu_spu_strm_ack_cmplt[1:0]),
+                  .lsu_atm_st_cmplt_e   (lsu_atm_st_cmplt_e),
+                  .dva_svld_e           (dva_svld_e),
+                  .dfq_wptr_vld         (dfq_wptr_vld),
+                  .dfq_wptr             (dfq_wptr[4:0]),
+                  .lsu_dfq_flsh_cmplt   (lsu_dfq_flsh_cmplt[3:0]),
+                  .dfq_rptr_vld         (dfq_rptr_vld),
+                  .dfq_rptr             (dfq_rptr[4:0]),
+                  .lsu_ifu_stallreq     (lsu_ifu_stallreq),
+                  .dva_snp_addr_e       (dva_snp_addr_e[4:0]),
+                  .lsu_st_ack_dq_stb    (lsu_st_ack_dq_stb[3:0]),
+                  .lsu_cpx_rmo_st_ack   (lsu_cpx_rmo_st_ack[3:0]),
+                  .lsu_st_wr_dcache     (lsu_st_wr_dcache),
+                  .cpx_st_ack_tid0      (cpx_st_ack_tid0),
+                  .cpx_st_ack_tid1      (cpx_st_ack_tid1),
+                  .cpx_st_ack_tid2      (cpx_st_ack_tid2),
+                  .cpx_st_ack_tid3      (cpx_st_ack_tid3),
+                  .lsu_tlu_l2_dmiss     (lsu_tlu_l2_dmiss[3:0]),
+                  .lsu_l2fill_vld       (lsu_l2fill_vld),
+                  .lsu_byp_ldd_oddrd_m  (lsu_byp_ldd_oddrd_m),
+                  .lsu_pcx_fwd_reply    (lsu_pcx_fwd_reply),
+                  .lsu_fwdpkt_vld       (lsu_fwdpkt_vld),
+                  .lsu_dcfill_active_e  (lsu_dcfill_active_e),
+                  .lsu_dfq_ld_vld       (lsu_dfq_ld_vld),
+                  .lsu_fldd_vld_en      (lsu_fldd_vld_en),
+                  .lsu_dfill_dcd_thrd   (lsu_dfill_dcd_thrd[3:0]),
+                  .lsu_fwdpkt_dest      (lsu_fwdpkt_dest[4:0]),
+                  .dva_snp_bit_wr_en_e  (dva_snp_bit_wr_en_e[15:0]),
+                  .lsu_cpx_spc_inv_vld  (lsu_cpx_spc_inv_vld),
+                  .lsu_cpx_thrdid       (lsu_cpx_thrdid[3:0]),
+                  .lsu_cpx_stack_dcfill_vld(lsu_cpx_stack_dcfill_vld),
+                  .lsu_dfq_vld_entry_w  (lsu_sscan_data[15]),    // Templated
+                  .lsu_dfq_st_vld       (lsu_dfq_st_vld),
+                  .lsu_dfq_ldst_vld     (lsu_dfq_ldst_vld),
+                  .lsu_qdp2_dfq_ld_vld  (lsu_qdp2_dfq_ld_vld),
+                  .lsu_qdp2_dfq_st_vld  (lsu_qdp2_dfq_st_vld),
+                  .lsu_dfq_vld          (lsu_dfq_vld),
+                  .lsu_dfq_byp_ff_en    (lsu_dfq_byp_ff_en),
+                  // Inputs
+                  .rclk                 (clk),                   // Templated
+                  .grst_l               (grst_l),
+                  .arst_l               (arst_l),
+                  .se                   (se),
+                  .rst_tri_en           (mux_drive_disable),     // Templated
+                  .ld_inst_vld_e        (ifu_lsu_ld_inst_e),     // Templated
+                  .ifu_pcx_pkt_b51      (ifu_lsu_pcxpkt_e[51]),  // Templated
+                  .ifu_pcx_pkt_b41t40   (ifu_lsu_pcxpkt_e[41:40]), // Templated
+                  .ifu_pcx_pkt_b10t5    (ifu_lsu_pcxpkt_e[10:5]), // Templated
+                  .lsu_dfq_rdata_flush_bit(dfq_rdata[136]),      // Templated
+                  .lsu_dfq_rdata_b17_b0 (dfq_rdata[17:0]),       // Templated
+                  .ifu_lsu_rd_e         (ifu_lsu_rd_e[4:0]),
+                  .lmq_ld_rd1           (lmq_ld_rd1[4:0]),
+                  .lmq_ldd_vld          (lmq_ldd_vld),
+                  .dfq_tid              (dfq_tid[1:0]),
+                  .const_cpuid          (const_cpuid[2:0]),
+                  .lmq_ld_addr_b3       (lmq_ld_addr_b3),
+                  .ifu_lsu_ibuf_busy    (ifu_lsu_ibuf_busy),
+                  .ifu_lsu_inv_clear    (ifu_lsu_inv_clear),
+                  .lsu_byp_misc_sz_e    (lsu_byp_misc_sz_e[1:0]),
+                  .lsu_dfq_byp_tid      (lsu_dfq_byp_tid[1:0]),
+                  .lsu_cpx_pkt_atm_st_cmplt(lsu_cpx_pkt_atm_st_cmplt),
+                  .lsu_cpx_pkt_l2miss   (lsu_cpx_pkt_l2miss),
+                  .lsu_cpx_pkt_tid      (lsu_cpx_pkt_tid[1:0]),
+                  .lsu_cpx_pkt_invwy    (lsu_cpx_pkt_invwy[1:0]),
+                  .lsu_dfq_byp_flush    (lsu_dfq_byp_flush),
+                  .lsu_dfq_byp_type     (lsu_dfq_byp_type[5:0]),
+                  .lsu_dfq_byp_invwy_vld(lsu_dfq_byp_invwy_vld),
+                  .lsu_cpu_inv_data_b13to9(lsu_cpu_inv_data_b13to9[13:9]),
+                  .lsu_cpu_inv_data_b7to2(lsu_cpu_inv_data_b7to2[7:2]),
+                  .lsu_cpu_inv_data_b0  (lsu_cpu_inv_data_b0),
+                  .lsu_cpx_pkt_inv_pa   (lsu_cpx_pkt_inv_pa[4:0]),
+                  .lsu_cpx_pkt_ifill_type(lsu_cpx_pkt_ifill_type),
+                  .lsu_cpx_pkt_atomic   (lsu_cpx_pkt_atomic),
+                  .lsu_cpx_pkt_binit_st (lsu_cpx_pkt_binit_st),
+                  .lsu_cpx_pkt_prefetch (lsu_cpx_pkt_prefetch),
+                  .lsu_dfq_byp_binit_st (lsu_dfq_byp_binit_st),
+                  .lsu_tlbop_force_swo  (lsu_tlbop_force_swo),
+                  .lsu_iobrdge_tap_rq_type(lsu_iobrdge_tap_rq_type[7:3]),
+                  .lsu_dcache_tag_perror_g(lsu_dcache_tag_perror_g),
+                  .lsu_dcache_data_perror_g(lsu_dcache_data_perror_g),
+                  .lsu_cpx_pkt_perror_iinv(lsu_cpx_pkt_perror_iinv),
+                  .lsu_cpx_pkt_perror_dinv(lsu_cpx_pkt_perror_dinv),
+                  .lsu_cpx_pkt_perror_set(lsu_cpx_pkt_perror_set[1:0]),
+                  .lsu_l2fill_fpld_e    (lsu_l2fill_fpld_e),     // Templated
+                  .lsu_cpx_pkt_strm_ack (lsu_cpx_pkt_strm_ack),
+                  .ifu_lsu_memref_d     (ifu_lsu_memref_d),
+                  .lsu_fwdpkt_pcx_rq_sel(lsu_fwdpkt_pcx_rq_sel),
+                  .lsu_imiss_pcx_rq_sel_d1(lsu_imiss_pcx_rq_sel_d1),
+                  .lsu_dfq_byp_cpx_inv  (lsu_dfq_byp_cpx_inv),
+                  .lsu_dfq_byp_stack_adr_b54(lsu_dfq_byp_stack_adr_b54[1:0]),
+                  .lsu_dfq_byp_stack_wrway(lsu_dfq_byp_stack_wrway[1:0]),
+                  .lsu_dfq_rdata_st_ack_type(dfq_rdata[`DFQ_WIDTH-4]), // Templated
+                  .lsu_dfq_rdata_stack_dcfill_vld(dfq_rdata[130]), // Templated
+                  .lsu_dfq_rdata_stack_iinv_vld(dfq_rdata[128]), // Templated
+                  .lsu_dfq_rdata_cpuid  (dfq_rdata[`CPX_INV_CID_HI:`CPX_INV_CID_LO]), // Templated
+                  .lsu_dfq_byp_atm      (lsu_dfq_byp_atm),
+                  .lsu_ld_inst_vld_g    (lsu_ld_inst_vld_g[3:0]),
+                  .lsu_dfq_rdata_type   (dfq_rdata[`DFQ_WIDTH-1:`DFQ_WIDTH-6]), // Templated
+                  .lsu_dfq_rdata_invwy_vld(dfq_rdata[`CPX_WYVLD]), // Templated
+                  .ifu_lsu_fwd_data_vld (ifu_lsu_fwd_data_vld),
+                  .ifu_lsu_fwd_wr_ack   (ifu_lsu_fwd_wr_ack),
+                  .lsu_dfq_rdata_rq_type(dfq_rdata[`CPX_WIDTH-2:`CPX_WIDTH-5]), // Templated
+                  .lsu_dfq_rdata_b103   (dfq_rdata[103]),        // Templated
+                  .sehold               (sehold));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*   cmp_sram_redhdr AUTO_TEMPLATE( 
+                            .fuse_ary_wren(fuse_dcd_wren),
+                            .fuse_ary_rid(fuse_dcd_rid[5:0]),
+                            .fuse_ary_repair_value(fuse_dcd_repair_value[7:0]),
+                            .fuse_ary_repair_en(fuse_dcd_repair_en[1:0]),
+                            .spc_efc_xfuse_data(spc_efc_dfuse_data),
+
+                            .efc_spc_xfuse_data(efc_spc_dfuse_data),
+                            .efc_spc_xfuse_ashift(efc_spc_dfuse_ashift),
+                            .efc_spc_xfuse_dshift(efc_spc_dfuse_dshift),
+                            .ary_fuse_repair_value(dcd_fuse_repair_value[7:0]),
+                            .ary_fuse_repair_en(dcd_fuse_repair_en[1:0]),
+                            .scanin   (si0),
+                            .rclk     (clk));
+ */
+   
+cmp_sram_redhdr dcdhdr(
+                       .scanout         (scan0_1),
+                       /*AUTOINST*/
+                       // Outputs
+                       .fuse_ary_wren   (fuse_dcd_wren),         // Templated
+                       .fuse_ary_rid    (fuse_dcd_rid[5:0]),     // Templated
+                       .fuse_ary_repair_value(fuse_dcd_repair_value[7:0]), // Templated
+                       .fuse_ary_repair_en(fuse_dcd_repair_en[1:0]), // Templated
+                       .spc_efc_xfuse_data(spc_efc_dfuse_data),  // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .se              (se),
+                       .scanin          (si0),                   // Templated
+                       .arst_l          (arst_l),
+                       .testmode_l      (testmode_l),
+                       .efc_spc_fuse_clk1(efc_spc_fuse_clk1),
+                       .efc_spc_fuse_clk2(efc_spc_fuse_clk2),
+                       .efc_spc_xfuse_data(efc_spc_dfuse_data),  // Templated
+                       .efc_spc_xfuse_ashift(efc_spc_dfuse_ashift), // Templated
+                       .efc_spc_xfuse_dshift(efc_spc_dfuse_dshift), // Templated
+                       .ary_fuse_repair_value(dcd_fuse_repair_value[7:0]), // Templated
+                       .ary_fuse_repair_en(dcd_fuse_repair_en[1:0])); // Templated
+/* bw_r_dcd  AUTO_TEMPLATE (
+                   .rst_tri_en           (mem_write_disable),
+                   //.sehold               (),
+                   .rclk                 (clk),
+                   .dcache_alt_addr_e    (lsu_dcache_fill_addr_e[10:3]),
+                   .dcache_alt_rsel_way_e(lsu_bist_rsel_way_e[3:0]), 
+                   .dcache_rd_addr_e     (exu_lsu_early_va_e[10:3]), 
+                   .dcache_rsel_way_wb   (cache_way_hit[3:0]),
+                   .dcache_wdata_e       (lsu_dcache_fill_data_e[143:0]), 
+                   .dcache_wr_rway_e     (lsu_dcache_fill_way_e[3:0]),
+                   .dcache_wvld_e        (lsu_dcache_wr_vld_e));
+*/
+   
+bw_r_dcd dcache (
+                 .so                    (scan1_2),
+                 .si                    (scan1_1),
+                 /*AUTOINST*/
+                 // Outputs
+                 .dcache_rdata_wb       (dcache_rdata_wb[63:0]),
+                 .dcache_rparity_wb     (dcache_rparity_wb[7:0]),
+                 .dcache_rparity_err_wb (dcache_rparity_err_wb),
+                 .dcache_rdata_msb_w0_m (dcache_rdata_msb_w0_m[7:0]),
+                 .dcache_rdata_msb_w1_m (dcache_rdata_msb_w1_m[7:0]),
+                 .dcache_rdata_msb_w2_m (dcache_rdata_msb_w2_m[7:0]),
+                 .dcache_rdata_msb_w3_m (dcache_rdata_msb_w3_m[7:0]),
+                 .dcd_fuse_repair_value (dcd_fuse_repair_value[7:0]),
+                 .dcd_fuse_repair_en    (dcd_fuse_repair_en[1:0]),
+                 // Inputs
+                 .dcache_rd_addr_e      (exu_lsu_early_va_e[10:3]), // Templated
+                 .dcache_alt_addr_e     (lsu_dcache_fill_addr_e[10:3]), // Templated
+                 .dcache_rvld_e         (dcache_rvld_e),
+                 .dcache_wvld_e         (lsu_dcache_wr_vld_e),   // Templated
+                 .dcache_wdata_e        (lsu_dcache_fill_data_e[143:0]), // Templated
+                 .dcache_wr_rway_e      (lsu_dcache_fill_way_e[3:0]), // Templated
+                 .dcache_byte_wr_en_e   (dcache_byte_wr_en_e[15:0]),
+                 .dcache_alt_rsel_way_e (lsu_bist_rsel_way_e[3:0]), // Templated
+                 .dcache_rsel_way_wb    (cache_way_hit[3:0]),    // Templated
+                 .dcache_alt_mx_sel_e   (dcache_alt_mx_sel_e),
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .rst_tri_en            (mem_write_disable),     // Templated
+                 .arst_l                (arst_l),
+                 .rclk                  (clk),                   // Templated
+                 .dcache_alt_data_w0_m  (dcache_alt_data_w0_m[63:0]),
+                 .dcache_arry_data_sel_m(dcache_arry_data_sel_m),
+                 .efc_spc_fuse_clk1     (efc_spc_fuse_clk1),
+                 .fuse_dcd_wren         (fuse_dcd_wren),
+                 .fuse_dcd_rid          (fuse_dcd_rid[2:0]),
+                 .fuse_dcd_repair_value (fuse_dcd_repair_value[7:0]),
+                 .fuse_dcd_repair_en    (fuse_dcd_repair_en[1:0]));
+/* bw_r_rf16x32  AUTO_TEMPLATE (
+             .rst_tri_en           (mem_write_disable),
+             .rclk             (clk),
+             .bit_wen          (dva_bit_wr_en_e[15:0]),
+             .din                (dva_din_e), 
+             .dout               (dva_vld_m[3:0]),
+             .rd_adr1            (exu_lsu_early_va_e[10:4]),
+             .rd_adr1_sel        (1'b1),
+             .rd_adr2            (7'b0),
+             .rd_en              (ifu_lsu_ld_inst_e),
+             .reset_l            (arst_l),
+             //.sehold             (),
+             .wr_adr             (dva_wr_adr_e[10:6]),
+             .wr_en              (lsu_dtagv_wr_vld_e));
+*/   
+
+bw_r_rf16x32 dva ( 
+                  .so                   (short_scan0_1),
+                  .si                   (short_si0),
+                   /*AUTOINST*/
+                  // Outputs
+                  .dout                 (dva_vld_m[3:0]),        // Templated
+                  // Inputs
+                  .rclk                 (clk),                   // Templated
+                  .se                   (se),
+                  .reset_l              (arst_l),                // Templated
+                  .sehold               (sehold),
+                  .rst_tri_en           (mem_write_disable),     // Templated
+                  .rd_adr1              (exu_lsu_early_va_e[10:4]), // Templated
+                  .rd_adr2              (7'b0),                  // Templated
+                  .rd_adr1_sel          (1'b1),                  // Templated
+                  .rd_en                (ifu_lsu_ld_inst_e),     // Templated
+                  .wr_adr               (dva_wr_adr_e[10:6]),    // Templated
+                  .wr_en                (lsu_dtagv_wr_vld_e),    // Templated
+                  .bit_wen              (dva_bit_wr_en_e[15:0]), // Templated
+                  .din                  (dva_din_e));             // Templated
+/* bw_r_idct  AUTO_TEMPLATE (
+               .rst_tri_en           (mem_write_disable),
+               //.sehold                  (),
+               .rclk                    (clk),
+               .adj                     (lsu_dctag_mrgn[3:0]),   
+               .index0_x                (exu_lsu_early_va_e[10:4]),
+               .index1_x                (lsu_dcache_fill_addr_e[10:4]),
+               .index_sel_x             (lsu_dtag_index_sel_x_e),
+               .rdreq_x                 (lsu_ldst_inst_vld_e),
+               .rdtag_w0_y              (dtag_rdata_w0_m[32:0]),
+               .rdtag_w1_y              (dtag_rdata_w1_m[32:0]),
+               .rdtag_w2_y              (dtag_rdata_w2_m[32:0]),
+               .rdtag_w3_y              (dtag_rdata_w3_m[32:0]),
+               .wrreq_x                 (lsu_dtag_wrreq_x_e),    
+               //.wrtag_w0_y                 ({3'b000,dtag_wdata_m[29:0]}),
+               //.wrtag_w1_y                 ({3'b000,dtag_wdata_m[29:0]}),
+               //.wrtag_w2_y                 ({3'b000,dtag_wdata_m[29:0]}),
+               //.wrtag_w3_y                 ({3'b000,dtag_wdata_m[29:0]}),
+               .dec_wrway_x                (lsu_dcache_fill_way_e[3:0]),
+               .reset_l                 (arst_l));    
+*/ 
+
+bw_r_idct dtag (
+                .so                     (short_scan0_2),
+                .si                     (short_scan0_1),
+		.wrtag_w0_y		({3'b000,dtag_wdata_m[29:0]}),
+		.wrtag_w1_y		({3'b000,dtag_wdata_m[29:0]}),
+		.wrtag_w2_y		({3'b000,dtag_wdata_m[29:0]}),
+		.wrtag_w3_y		({3'b000,dtag_wdata_m[29:0]}),
+                /*AUTOINST*/
+                // Outputs
+                .rdtag_w0_y             (dtag_rdata_w0_m[32:0]), // Templated
+                .rdtag_w1_y             (dtag_rdata_w1_m[32:0]), // Templated
+                .rdtag_w2_y             (dtag_rdata_w2_m[32:0]), // Templated
+                .rdtag_w3_y             (dtag_rdata_w3_m[32:0]), // Templated
+                // Inputs
+                .rclk                   (clk),                   // Templated
+                .se                     (se),
+                .reset_l                (arst_l),                // Templated
+                .sehold                 (sehold),
+                .rst_tri_en             (mem_write_disable),     // Templated
+                .index0_x               (exu_lsu_early_va_e[10:4]), // Templated
+                .index1_x               (lsu_dcache_fill_addr_e[10:4]), // Templated
+                .index_sel_x            (lsu_dtag_index_sel_x_e), // Templated
+                .dec_wrway_x            (lsu_dcache_fill_way_e[3:0]), // Templated
+                .rdreq_x                (lsu_ldst_inst_vld_e),   // Templated
+                .wrreq_x                (lsu_dtag_wrreq_x_e),    // Templated
+                .adj                    (lsu_dctag_mrgn[3:0]));   // Templated
+/*lsu_tlbdp  AUTO_TEMPLATE (
+               .rclk  (clk));
+*/ 
+
+lsu_tlbdp tlbdp (
+                 .so                    (scan1_3),
+                 .si                    (scan1_2),
+                 .tlb_rd_tte_data_parity  (tlb_rd_tte_data[42]),   
+                 .tlb_rd_tte_tag_parity   (tlb_rd_tte_tag[54]),
+                 /*AUTOINST*/
+                 // Outputs
+                 .lsu_tlb_rd_data       (lsu_tlb_rd_data[63:0]),
+                 .tlb_pgnum_buf         (tlb_pgnum_buf[39:10]),
+                 .tlb_pgnum_buf2        (tlb_pgnum_buf2[39:37]),
+                 .tlb_rd_tte_data_ie_buf(tlb_rd_tte_data_ie_buf),
+                 .stb_cam_vld           (stb_cam_vld),
+                 .tte_data_parity_error (tte_data_parity_error),
+                 .tte_tag_parity_error  (tte_tag_parity_error),
+                 .cache_way_hit_buf1    (cache_way_hit_buf1[3:0]),
+                 .cache_way_hit_buf2    (cache_way_hit_buf2[3:0]),
+                 .lsu_tlu_tte_pg_sz_g   (lsu_tlu_tte_pg_sz_g[2:0]),
+                 // Inputs
+                 .rclk                  (clk),                   // Templated
+                 .se                    (se),
+                 .tlb_rd_tte_tag        (tlb_rd_tte_tag[58:0]),
+                 .tlb_rd_tte_data       (tlb_rd_tte_data[42:0]),
+                 .lsu_tlb_data_rd_vld_g (lsu_tlb_data_rd_vld_g),
+                 .tlb_pgnum             (tlb_pgnum[39:10]),
+                 .asi_internal_m        (asi_internal_m),
+                 .lsu_alt_space_m       (lsu_alt_space_m),
+                 .tlb_cam_hit           (tlb_cam_hit),
+                 .ifu_lsu_ld_inst_e     (ifu_lsu_ld_inst_e),
+                 .lsu_dtlb_bypass_e     (lsu_dtlb_bypass_e),
+                 .cache_way_hit         (cache_way_hit[3:0]));
+
+/*
+lsu_tagdp AUTO_TEMPLATE (
+                  .dva_vld_m              (dva_vld_m_bf[3:0]),
+                  .rclk                   (clk));
+*/
+
+lsu_tagdp tagdp (
+                 .so                    (scan1_4),
+                 .si                    (scan1_3),
+		             .lsu_local_ldxa_data_g ({15'b0,lsu_local_ldxa_data_g[32:0]}),
+                 /*AUTOINST*/
+                 // Outputs
+                 .lsu_misc_rdata_w2     (lsu_misc_rdata_w2[63:0]),
+                 .lsu_rd_dtag_parity_g  (lsu_rd_dtag_parity_g[3:0]),
+                 // Inputs
+                 .rclk                  (clk),                   // Templated
+                 .se                    (se),
+                 .lsu_va_wtchpt_addr    (lsu_va_wtchpt_addr[47:3]),
+                 .lsu_va_wtchpt_sel_g   (lsu_va_wtchpt_sel_g),
+                 .dva_vld_m             (dva_vld_m_bf[3:0]),     // Templated
+                 .dtag_rdata_w0_m       (dtag_rdata_w0_m[29:0]),
+                 .dtag_rdata_w1_m       (dtag_rdata_w1_m[29:0]),
+                 .dtag_rdata_w2_m       (dtag_rdata_w2_m[29:0]),
+                 .dtag_rdata_w3_m       (dtag_rdata_w3_m[29:0]),
+                 .lsu_dtag_rsel_m       (lsu_dtag_rsel_m[3:0]),
+                 .lsu_local_ldxa_sel_g  (lsu_local_ldxa_sel_g),
+                 .lsu_tlb_rd_data       (lsu_tlb_rd_data[63:0]),
+                 .lsu_local_ldxa_tlbrd_sel_g(lsu_local_ldxa_tlbrd_sel_g),
+                 .lsu_local_diagnstc_tagrd_sel_g(lsu_local_diagnstc_tagrd_sel_g));
+
+/*
+lsu_excpctl AUTO_TEMPLATE (
+                .ifu_tlu_inst_vld_m     (ifu_tlu_inst_vld_m_bf2),
+                .tlb_rd_tte_data_ebit   (tlb_rd_tte_data[`STLB_DATA_E]),
+                .tlb_rd_tte_data_pbit   (tlb_rd_tte_data[`STLB_DATA_P]),
+                .tlb_rd_tte_data_nfobit (tlb_rd_tte_data[`STLB_DATA_NFO]),
+                .tlb_rd_tte_data_wbit   (tlb_rd_tte_data[`STLB_DATA_W]),
+		.lsu_ldst_va_b39_m	(lsu_ldst_va_m_buf[39]),
+                .tlb_pgnum_b39          (tlb_pgnum[39]),
+       	        .lsu_sun4r_va_m_l	      (lsu_ldst_va_m[10]),
+	              .lsu_sun4r_pgsz_b2t0_e  ({exu_lsu_rs3_data_e[48],exu_lsu_rs3_data_e[62:61]}),
+	              .lsu_sun4v_pgsz_b2t0_e  (exu_lsu_rs3_data_e[2:0]),
+                .ld_inst_vld_e          (ifu_lsu_ld_inst_e),     
+                .st_inst_vld_e          (ifu_lsu_st_inst_e),
+                .rclk                   (clk));     
+*/
+   
+lsu_excpctl excpctl (
+                     .so                (short_scan0_3),
+                     .si                (short_scan0_2),
+
+                 /*AUTOINST*/
+                     // Outputs
+                     .lsu_exu_st_dtlb_perr_g(lsu_exu_st_dtlb_perr_g),
+                     .lsu_ffu_st_dtlb_perr_g(lsu_ffu_st_dtlb_perr_g),
+                     .lsu_defr_trp_taken_g(lsu_defr_trp_taken_g),
+                     .lsu_tlu_defr_trp_taken_g(lsu_tlu_defr_trp_taken_g),
+                     .lsu_mmu_defr_trp_taken_g(lsu_mmu_defr_trp_taken_g),
+                     .lsu_st_dtlb_perr_g(lsu_st_dtlb_perr_g[3:0]),
+                     .lsu_dmmu_sfsr_trp_wr(lsu_dmmu_sfsr_trp_wr[3:0]),
+                     .lsu_dsfsr_din_g   (lsu_dsfsr_din_g[23:0]),
+                     .lsu_tlb_perr_ld_rq_kill_w(lsu_tlb_perr_ld_rq_kill_w),
+                     .lsu_spu_early_flush_g(lsu_spu_early_flush_g),
+                     .lsu_local_early_flush_g(lsu_local_early_flush_g),
+                     .lsu_tlu_early_flush_w(lsu_tlu_early_flush_w),
+                     .lsu_tlu_early_flush2_w(lsu_tlu_early_flush2_w),
+                     .lsu_ttype_vld_m2  (lsu_ttype_vld_m2),
+                     .lsu_ttype_vld_m2_bf1(lsu_ttype_vld_m2_bf1),
+                     .lsu_ifu_flush_pipe_w(lsu_ifu_flush_pipe_w),
+                     .lsu_exu_flush_pipe_w(lsu_exu_flush_pipe_w),
+                     .lsu_mmu_flush_pipe_w(lsu_mmu_flush_pipe_w),
+                     .lsu_ffu_flush_pipe_w(lsu_ffu_flush_pipe_w),
+                     .lsu_tlu_wtchpt_trp_g(lsu_tlu_wtchpt_trp_g),
+                     .lsu_tlu_dmmu_miss_g(lsu_tlu_dmmu_miss_g),
+                     .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+                     .lsu_tlu_daccess_excptn_g(lsu_tlu_daccess_excptn_g),
+                     .lsu_tlu_daccess_prot_g(lsu_tlu_daccess_prot_g),
+                     .lsu_tlu_priv_action_g(lsu_tlu_priv_action_g),
+                     .lsu_ifu_tlb_data_su(lsu_ifu_tlb_data_su),
+                     .lsu_ifu_tlb_data_ue(lsu_ifu_tlb_data_ue),
+                     .lsu_ifu_tlb_tag_ue(lsu_ifu_tlb_tag_ue),
+                     .lsu_tlu_ttype_m2  (lsu_tlu_ttype_m2[8:0]),
+                     .lsu_tlu_ttype_vld_m2(lsu_tlu_ttype_vld_m2),
+                     .stb_cam_sqsh_msk  (stb_cam_sqsh_msk[7:0]),
+                     .stb_cam_hit_bf    (stb_cam_hit_bf),
+                     .stb_cam_hit_bf1   (stb_cam_hit_bf1),
+                     .tte_data_perror_unc(tte_data_perror_unc),
+                     .asi_tte_data_perror(asi_tte_data_perror),
+                     .asi_tte_tag_perror(asi_tte_tag_perror),
+                     // Inputs
+                     .rclk              (clk),                   // Templated
+                     .se                (se),
+                     .grst_l            (grst_l),
+                     .arst_l            (arst_l),
+                     .tlb_rd_tte_data_ebit(tlb_rd_tte_data[`STLB_DATA_E]), // Templated
+                     .tlb_rd_tte_data_pbit(tlb_rd_tte_data[`STLB_DATA_P]), // Templated
+                     .tlb_rd_tte_data_nfobit(tlb_rd_tte_data[`STLB_DATA_NFO]), // Templated
+                     .tlb_rd_tte_data_wbit(tlb_rd_tte_data[`STLB_DATA_W]), // Templated
+                     .tlb_cam_hit       (tlb_cam_hit),
+                     .tlb_pgnum_b39     (tlb_pgnum[39]),         // Templated
+                     .lsu_ldst_va_b39_m (lsu_ldst_va_m_buf[39]), // Templated
+                     .lsu_sun4r_va_m_l  (lsu_ldst_va_m[10]),     // Templated
+                     .lsu_sun4r_pgsz_b2t0_e({exu_lsu_rs3_data_e[48],exu_lsu_rs3_data_e[62:61]}), // Templated
+                     .lsu_sun4v_pgsz_b2t0_e(exu_lsu_rs3_data_e[2:0]), // Templated
+                     .tlu_early_flush_pipe_w(tlu_early_flush_pipe_w),
+                     .ifu_lsu_flush_w   (ifu_lsu_flush_w),
+                     .ifu_lsu_nceen     (ifu_lsu_nceen[3:0]),
+                     .lsu_tlb_asi_data_perr_g(lsu_tlb_asi_data_perr_g),
+                     .lsu_tlb_asi_tag_perr_g(lsu_tlb_asi_tag_perr_g),
+                     .stb_state_vld0    (stb_state_vld0[7:0]),
+                     .stb_state_vld1    (stb_state_vld1[7:0]),
+                     .stb_state_vld2    (stb_state_vld2[7:0]),
+                     .stb_state_vld3    (stb_state_vld3[7:0]),
+                     .ifu_tlu_thrid_e   (ifu_tlu_thrid_e[1:0]),
+                     .tlu_lsu_priv_trap_m(tlu_lsu_priv_trap_m),
+                     .tlu_lsu_pstate_priv(tlu_lsu_pstate_priv[3:0]),
+                     .st_inst_vld_e     (ifu_lsu_st_inst_e),     // Templated
+                     .ld_inst_vld_e     (ifu_lsu_ld_inst_e),     // Templated
+                     .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+                     .lsu_ldst_va_m     (lsu_ldst_va_m[7:0]),
+                     .hpv_priv_m        (hpv_priv_m),
+                     .hpstate_en_m      (hpstate_en_m),
+                     .stb_cam_hit       (stb_cam_hit),
+                     .dtlb_bypass_m     (dtlb_bypass_m),
+                     .lsu_alt_space_m   (lsu_alt_space_m),
+                     .atomic_m          (atomic_m),
+                     .ldst_dbl_m        (ldst_dbl_m),
+                     .fp_ldst_m         (fp_ldst_m),
+                     .lda_internal_m    (lda_internal_m),
+                     .sta_internal_m    (sta_internal_m),
+                     .cam_real_m        (cam_real_m),
+                     .data_rd_vld_g     (data_rd_vld_g),
+                     .tag_rd_vld_g      (tag_rd_vld_g),
+                     .ldst_sz_m         (ldst_sz_m[1:0]),
+                     .asi_internal_m    (asi_internal_m),
+                     .rd_only_ltlb_asi_e(rd_only_ltlb_asi_e),
+                     .wr_only_ltlb_asi_e(wr_only_ltlb_asi_e),
+                     .dfill_tlb_asi_e   (dfill_tlb_asi_e),
+                     .ifill_tlb_asi_e   (ifill_tlb_asi_e),
+                     .nofault_asi_m     (nofault_asi_m),
+                     .as_if_user_asi_m  (as_if_user_asi_m),
+                     .atomic_asi_m      (atomic_asi_m),
+                     .phy_use_ec_asi_m  (phy_use_ec_asi_m),
+                     .phy_byp_ec_asi_m  (phy_byp_ec_asi_m),
+                     .quad_asi_m        (quad_asi_m),
+                     .binit_quad_asi_m  (binit_quad_asi_m),
+                     .blk_asi_m         (blk_asi_m),
+                     .recognized_asi_m  (recognized_asi_m),
+                     .strm_asi_m        (strm_asi_m),
+                     .mmu_rd_only_asi_m (mmu_rd_only_asi_m),
+                     .rd_only_asi_m     (rd_only_asi_m),
+                     .wr_only_asi_m     (wr_only_asi_m),
+                     .unimp_asi_m       (unimp_asi_m),
+                     .lsu_nonalt_nucl_access_m(lsu_nonalt_nucl_access_m),
+                     .va_wtchpt_cmp_en_m(va_wtchpt_cmp_en_m),
+                     .lsu_va_match_b47_b32_m(lsu_va_match_b47_b32_m),
+                     .lsu_va_match_b31_b3_m(lsu_va_match_b31_b3_m),
+                     .va_wtchpt_msk_match_m(va_wtchpt_msk_match_m),
+                     .ifu_tlu_inst_vld_m(ifu_tlu_inst_vld_m_bf2), // Templated
+                     .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+                     .exu_tlu_va_oor_m  (exu_tlu_va_oor_m),
+                     .tlu_dsfsr_flt_vld (tlu_dsfsr_flt_vld[3:0]),
+                     .tlu_lsu_pstate_cle(tlu_lsu_pstate_cle[3:0]),
+                     .tlu_lsu_pstate_am (tlu_lsu_pstate_am[3:0]),
+                     .lsu_excpctl_asi_state_m(lsu_excpctl_asi_state_m[7:0]),
+                     .lsu_tlu_nonalt_ldst_m(lsu_tlu_nonalt_ldst_m),
+                     .lsu_squash_va_oor_m(lsu_squash_va_oor_m),
+                     .lsu_tlu_xslating_ldst_m(lsu_tlu_xslating_ldst_m),
+                     .lsu_tlu_ctxt_sel_m(lsu_tlu_ctxt_sel_m[2:0]),
+                     .lsu_tlu_write_op_m(lsu_tlu_write_op_m),
+                     .lsu_memref_m      (lsu_memref_m),
+                     .lsu_flsh_inst_m   (lsu_flsh_inst_m),
+                     .tte_data_parity_error(tte_data_parity_error),
+                     .tte_tag_parity_error(tte_tag_parity_error));
+   
+/*lsu_dctldp AUTO_TEMPLATE (
+               .thread0_m               (lsu_dctldp_thread0_m),
+               .thread1_m               (lsu_dctldp_thread1_m),
+               .thread2_m               (lsu_dctldp_thread2_m),
+               .thread3_m               (lsu_dctldp_thread3_m),
+               .rst_tri_en              (mux_drive_disable),
+               .rclk                    (clk),
+               .rst_l                   (dctl_rst_l),
+      	       .tlu_dtlb_tte_tag_b58t56 (tlu_dtlb_tte_tag_w2[58:56]),
+               .lsu_dcfill_addr_e       (lsu_dcache_fill_addr_e_err[10:4]));
+*/    
+
+`ifdef FPGA_SYN_1THREAD
+   
+lsu_dctldp dctldp (
+                   .so                  (short_scan1_2),
+                   .si                  (short_scan1_1),
+		   .lsu_iobrdge_rd_data	(lsu_iobrdge_rd_data[43:0]),
+		   .lsu_local_ldxa_data_g(lsu_local_ldxa_data_g[47:0]),
+                   /*AUTOINST*/
+                   // Outputs
+                   .asi_d               (asi_d[7:0]),
+                   .lsu_excpctl_asi_state_m(lsu_excpctl_asi_state_m[7:0]),
+                   .lsu_dctl_asi_state_m(lsu_dctl_asi_state_m[7:0]),
+                   .lsu_spu_asi_state_e (lsu_spu_asi_state_e[7:0]),
+                   .lsu_tlu_rsr_data_e  (lsu_tlu_rsr_data_e[7:0]),
+                   .lsu_asi_state       (lsu_asi_state[7:0]),
+                   .lsu_asi_reg0        (lsu_asi_reg0_t[7:0]),
+                   .lsu_asi_reg1        (),
+                   .lsu_asi_reg2        (),
+                   .lsu_asi_reg3        (),
+                   .lsu_t0_pctxt_state  (lsu_t0_pctxt_state_t[12:0]),
+                   .lsu_t1_pctxt_state  (),
+                   .lsu_t2_pctxt_state  (),
+                   .lsu_t3_pctxt_state  (),
+                   .lsu_tlu_dside_ctxt_m(lsu_tlu_dside_ctxt_m[12:0]),
+                   .lsu_tlu_pctxt_m     (lsu_tlu_pctxt_m[12:0]),
+                   .tlb_ctxt            (tlb_ctxt[12:0]),
+                   .lsu_pid_state0      (lsu_pid_state0_t[2:0]),
+                   .lsu_pid_state1      (),
+                   .lsu_pid_state2      (),
+                   .lsu_pid_state3      (),
+                   .lsu_dtlb_cam_pid_e  (lsu_dtlb_cam_pid_e[2:0]),
+                   .bist_ctl_reg_in     (bist_ctl_reg_in[6:0]),
+                   .lsu_ifu_direct_map_l1(lsu_ifu_direct_map_l1),
+                   .dc_direct_map       (dc_direct_map),
+                   .lsu_ictag_mrgn      (lsu_ictag_mrgn[3:0]),
+                   .lsu_dctag_mrgn      (lsu_dctag_mrgn[3:0]),
+                   .lsu_mamem_mrgn      (lsu_mamem_mrgn[3:0]),
+                   .lsu_dtlb_mrgn       (lsu_dtlb_mrgn[7:0]),
+                   .lsu_itlb_mrgn       (lsu_itlb_mrgn[7:0]),
+                   .lsu_ldst_va_m       (lsu_ldst_va_m[12:0]),
+                   .lsu_ldst_va_m_buf   (lsu_ldst_va_m_buf[47:0]),
+                   .lsu_tlu_ldst_va_m   (lsu_tlu_ldst_va_m[9:0]),
+                   .lsu_tlu_tlb_asi_state_m(lsu_tlu_tlb_asi_state_m[7:0]),
+                   .lsu_ifu_asi_state   (lsu_ifu_asi_state[7:0]),
+                   .lsu_tlu_tlb_ldst_va_m(lsu_tlu_tlb_ldst_va_m[10:0]),
+                   .lsu_tlu_tlb_dmp_va_m(lsu_tlu_tlb_dmp_va_m[47:13]),
+                   .lsu_ifu_asi_addr    (lsu_ifu_asi_addr[17:0]),
+                   .lsu_diagnstc_wr_addr_e(lsu_diagnstc_wr_addr_e[10:0]),
+                   .lsu_diagnstc_dc_prty_invrt_e(lsu_diagnstc_dc_prty_invrt_e[7:0]),
+                   .lsu_ifu_err_addr    (lsu_ifu_err_addr[47:4]),
+                   .va_wtchpt_msk_match_m(va_wtchpt_msk_match_m),
+                   .lsu_ldst_va_g       (lsu_ldst_va_g[7:0]),
+                   .lsu_dp_ctl_reg0     (lsu_dp_ctl_reg0[5:0]),
+                   .lsu_dp_ctl_reg1     (),
+                   .lsu_dp_ctl_reg2     (),
+                   .lsu_dp_ctl_reg3     (),
+                   .lsu_diagnstc_wr_way_e(lsu_diagnstc_wr_way_e[1:0]),
+                   .lsu_diag_va_prty_invrt(lsu_diag_va_prty_invrt),
+                   // Inputs
+                   .rclk                (clk),                   // Templated
+                   .rst_l               (dctl_rst_l),            // Templated
+                   .se                  (se),
+                   .async_tlb_index     (async_tlb_index[5:0]),
+                   .lsu_dtlb_dmp_vld_e  (lsu_dtlb_dmp_vld_e),
+                   .tlu_lsu_asi_m       (tlu_lsu_asi_m[7:0]),
+                   .exu_tlu_wsr_data_m  (exu_tlu_wsr_data_m[7:0]),
+                   .tlu_lsu_asi_update_g(tlu_lsu_asi_update_g),
+                   .asi_state_wr_thrd   (asi_state_wr_thrd[3:0]),
+                   .ifu_lsu_imm_asi_d   (ifu_lsu_imm_asi_d[7:0]),
+                   .thread0_d           (thread0_d),
+                   .thread1_d           (1'b0),
+                   .thread2_d           (1'b0),
+                   .thread3_d           (1'b0),
+                   .ifu_lsu_imm_asi_vld_d(ifu_lsu_imm_asi_vld_d),
+                   .lsu_err_addr_sel    (lsu_err_addr_sel[2:0]),
+                   .pctxt_state_wr_thrd (pctxt_state_wr_thrd[3:0]),
+                   .sctxt_state_wr_thrd (sctxt_state_wr_thrd[3:0]),
+                   .st_rs3_data_g       (st_rs3_data_g[32:0]),
+                   .thread0_ctxt        (thread0_ctxt),
+                   .thread1_ctxt        (1'b0),
+                   .thread2_ctxt        (1'b0),
+                   .thread3_ctxt        (1'b0),
+                   .thread_pctxt        (thread_pctxt),
+                   .thread_sctxt        (thread_sctxt),
+                   .thread_actxt        (thread_actxt),
+                   .thread_default      (thread_default),
+                   .tlu_dtlb_tte_tag_w2 (tlu_dtlb_tte_tag_w2[12:0]),
+                   .tlu_dtlb_tte_tag_b58t56(tlu_dtlb_tte_tag_w2[58:56]), // Templated
+                   .thread0_g           (thread0_g),
+                   .thread1_g           (1'b0),
+                   .thread2_g           (1'b0),
+                   .thread3_g           (1'b0),
+                   .pid_state_wr_en     (pid_state_wr_en[3:0]),
+                   .thread0_e           (thread0_e),
+                   .thread1_e           (1'b0),
+                   .thread2_e           (1'b0),
+                   .thread3_e           (1'b0),
+                   .thread0_m           (lsu_dctldp_thread0_m),  // Templated
+                   .thread1_m           (1'b0),  // Templated
+                   .thread2_m           (1'b0),  // Templated
+                   .thread3_m           (1'b0),  // Templated
+                   .lsu_iobrdge_wr_data (lsu_iobrdge_wr_data[27:0]),
+                   .dfture_tap_wr_mx_sel(dfture_tap_wr_mx_sel),
+                   .lctl_rst            (lctl_rst[3:0]),
+                   .lsu_ctl_state_wr_en (lsu_ctl_state_wr_en[3:0]),
+                   .lsuctl_ctlbits_wr_en(lsuctl_ctlbits_wr_en[3:0]),
+                   .dfture_tap_rd_en    (dfture_tap_rd_en[3:0]),
+                   .bist_tap_wr_en      (bist_tap_wr_en),
+                   .bist_ctl_reg_out    (bist_ctl_reg_out[10:0]),
+                   .mrgn_tap_wr_en      (mrgn_tap_wr_en),
+                   .ldiagctl_wr_en      (ldiagctl_wr_en),
+                   .misc_ctl_sel_din    (misc_ctl_sel_din[3:0]),
+                   .lsu_asi_sel_fmx1    (lsu_asi_sel_fmx1[2:0]),
+                   .lsu_asi_sel_fmx2    (lsu_asi_sel_fmx2[2:0]),
+                   .exu_lsu_ldst_va_e   (exu_lsu_ldst_va_e[47:0]),
+                   .tlb_access_en0_g    (tlb_access_en0_g),
+                   .tlb_access_en1_g    (1'b0),
+                   .tlb_access_en2_g    (1'b0),
+                   .tlb_access_en3_g    (1'b0),
+                   .tlb_access_sel_thrd0(tlb_access_sel_thrd0),
+                   .tlb_access_sel_thrd1(tlb_access_sel_thrd1),
+                   .tlb_access_sel_thrd2(tlb_access_sel_thrd2),
+                   .tlb_access_sel_default(tlb_access_sel_default),
+                   .mrgnctl_wr_en       (mrgnctl_wr_en),
+                   .lsu_dcfill_addr_e   (lsu_dcache_fill_addr_e_err[10:4]), // Templated
+                   .lsu_error_pa_m      (lsu_error_pa_m[28:0]),
+                   .stb_ldst_byte_msk   (stb_ldst_byte_msk[7:0]),
+                   .lsu_diagnstc_va_sel (lsu_diagnstc_va_sel[3:0]),
+                   .rst_tri_en          (mux_drive_disable));     // Templated
+
+`else // !`ifdef FPGA_SYN_1THREAD
+
+   
+   lsu_dctldp dctldp (
+                   .so                  (short_scan1_2),
+                   .si                  (short_scan1_1),
+		   .lsu_iobrdge_rd_data	(lsu_iobrdge_rd_data[43:0]),
+		   .lsu_local_ldxa_data_g(lsu_local_ldxa_data_g[47:0]),
+                   /*AUTOINST*/
+                   // Outputs
+                   .asi_d               (asi_d[7:0]),
+                   .lsu_excpctl_asi_state_m(lsu_excpctl_asi_state_m[7:0]),
+                   .lsu_dctl_asi_state_m(lsu_dctl_asi_state_m[7:0]),
+                   .lsu_spu_asi_state_e (lsu_spu_asi_state_e[7:0]),
+                   .lsu_tlu_rsr_data_e  (lsu_tlu_rsr_data_e[7:0]),
+                   .lsu_asi_state       (lsu_asi_state[7:0]),
+                   .lsu_asi_reg0        (lsu_asi_reg0_t[7:0]),
+                   .lsu_asi_reg1        (lsu_asi_reg1_t[7:0]),
+                   .lsu_asi_reg2        (lsu_asi_reg2_t[7:0]),
+                   .lsu_asi_reg3        (lsu_asi_reg3_t[7:0]),
+                   .lsu_t0_pctxt_state  (lsu_t0_pctxt_state_t[12:0]),
+                   .lsu_t1_pctxt_state  (lsu_t1_pctxt_state_t[12:0]),
+                   .lsu_t2_pctxt_state  (lsu_t2_pctxt_state_t[12:0]),
+                   .lsu_t3_pctxt_state  (lsu_t3_pctxt_state_t[12:0]),
+                   .lsu_tlu_dside_ctxt_m(lsu_tlu_dside_ctxt_m[12:0]),
+                   .lsu_tlu_pctxt_m     (lsu_tlu_pctxt_m[12:0]),
+                   .tlb_ctxt            (tlb_ctxt[12:0]),
+                   .lsu_pid_state0      (lsu_pid_state0_t[2:0]),
+                   .lsu_pid_state1      (lsu_pid_state1_t[2:0]),
+                   .lsu_pid_state2      (lsu_pid_state2_t[2:0]),
+                   .lsu_pid_state3      (lsu_pid_state3_t[2:0]),
+                   .lsu_dtlb_cam_pid_e  (lsu_dtlb_cam_pid_e[2:0]),
+                   .bist_ctl_reg_in     (bist_ctl_reg_in[6:0]),
+                   .lsu_ifu_direct_map_l1(lsu_ifu_direct_map_l1),
+                   .dc_direct_map       (dc_direct_map),
+                   .lsu_ictag_mrgn      (lsu_ictag_mrgn[3:0]),
+                   .lsu_dctag_mrgn      (lsu_dctag_mrgn[3:0]),
+                   .lsu_mamem_mrgn      (lsu_mamem_mrgn[3:0]),
+                   .lsu_dtlb_mrgn       (lsu_dtlb_mrgn[7:0]),
+                   .lsu_itlb_mrgn       (lsu_itlb_mrgn[7:0]),
+                   .lsu_ldst_va_m       (lsu_ldst_va_m[12:0]),
+                   .lsu_ldst_va_m_buf   (lsu_ldst_va_m_buf[47:0]),
+                   .lsu_tlu_ldst_va_m   (lsu_tlu_ldst_va_m[9:0]),
+                   .lsu_tlu_tlb_asi_state_m(lsu_tlu_tlb_asi_state_m[7:0]),
+                   .lsu_ifu_asi_state   (lsu_ifu_asi_state[7:0]),
+                   .lsu_tlu_tlb_ldst_va_m(lsu_tlu_tlb_ldst_va_m[10:0]),
+                   .lsu_tlu_tlb_dmp_va_m(lsu_tlu_tlb_dmp_va_m[47:13]),
+                   .lsu_ifu_asi_addr    (lsu_ifu_asi_addr[17:0]),
+                   .lsu_diagnstc_wr_addr_e(lsu_diagnstc_wr_addr_e[10:0]),
+                   .lsu_diagnstc_dc_prty_invrt_e(lsu_diagnstc_dc_prty_invrt_e[7:0]),
+                   .lsu_ifu_err_addr    (lsu_ifu_err_addr[47:4]),
+                   .va_wtchpt_msk_match_m(va_wtchpt_msk_match_m),
+                   .lsu_ldst_va_g       (lsu_ldst_va_g[7:0]),
+                   .lsu_dp_ctl_reg0     (lsu_dp_ctl_reg0[5:0]),
+                   .lsu_dp_ctl_reg1     (lsu_dp_ctl_reg1[5:0]),
+                   .lsu_dp_ctl_reg2     (lsu_dp_ctl_reg2[5:0]),
+                   .lsu_dp_ctl_reg3     (lsu_dp_ctl_reg3[5:0]),
+                   .lsu_diagnstc_wr_way_e(lsu_diagnstc_wr_way_e[1:0]),
+                   .lsu_diag_va_prty_invrt(lsu_diag_va_prty_invrt),
+                   // Inputs
+                   .rclk                (clk),                   // Templated
+                   .rst_l               (dctl_rst_l),            // Templated
+                   .se                  (se),
+                   .async_tlb_index     (async_tlb_index[5:0]),
+                   .lsu_dtlb_dmp_vld_e  (lsu_dtlb_dmp_vld_e),
+                   .tlu_lsu_asi_m       (tlu_lsu_asi_m[7:0]),
+                   .exu_tlu_wsr_data_m  (exu_tlu_wsr_data_m[7:0]),
+                   .tlu_lsu_asi_update_g(tlu_lsu_asi_update_g),
+                   .asi_state_wr_thrd   (asi_state_wr_thrd[3:0]),
+                   .ifu_lsu_imm_asi_d   (ifu_lsu_imm_asi_d[7:0]),
+                   .thread0_d           (thread0_d),
+                   .thread1_d           (thread1_d),
+                   .thread2_d           (thread2_d),
+                   .thread3_d           (thread3_d),
+                   .ifu_lsu_imm_asi_vld_d(ifu_lsu_imm_asi_vld_d),
+                   .lsu_err_addr_sel    (lsu_err_addr_sel[2:0]),
+                   .pctxt_state_wr_thrd (pctxt_state_wr_thrd[3:0]),
+                   .sctxt_state_wr_thrd (sctxt_state_wr_thrd[3:0]),
+                   .st_rs3_data_g       (st_rs3_data_g[32:0]),
+                   .thread0_ctxt        (thread0_ctxt),
+                   .thread1_ctxt        (thread1_ctxt),
+                   .thread2_ctxt        (thread2_ctxt),
+                   .thread3_ctxt        (thread3_ctxt),
+                   .thread_pctxt        (thread_pctxt),
+                   .thread_sctxt        (thread_sctxt),
+                   .thread_actxt        (thread_actxt),
+                   .thread_default      (thread_default),
+                   .tlu_dtlb_tte_tag_w2 (tlu_dtlb_tte_tag_w2[12:0]),
+                   .tlu_dtlb_tte_tag_b58t56(tlu_dtlb_tte_tag_w2[58:56]), // Templated
+                   .thread0_g           (thread0_g),
+                   .thread1_g           (thread1_g),
+                   .thread2_g           (thread2_g),
+                   .thread3_g           (thread3_g),
+                   .pid_state_wr_en     (pid_state_wr_en[3:0]),
+                   .thread0_e           (thread0_e),
+                   .thread1_e           (thread1_e),
+                   .thread2_e           (thread2_e),
+                   .thread3_e           (thread3_e),
+                   .thread0_m           (lsu_dctldp_thread0_m),  // Templated
+                   .thread1_m           (lsu_dctldp_thread1_m),  // Templated
+                   .thread2_m           (lsu_dctldp_thread2_m),  // Templated
+                   .thread3_m           (lsu_dctldp_thread3_m),  // Templated
+                   .lsu_iobrdge_wr_data (lsu_iobrdge_wr_data[27:0]),
+                   .dfture_tap_wr_mx_sel(dfture_tap_wr_mx_sel),
+                   .lctl_rst            (lctl_rst[3:0]),
+                   .lsu_ctl_state_wr_en (lsu_ctl_state_wr_en[3:0]),
+                   .lsuctl_ctlbits_wr_en(lsuctl_ctlbits_wr_en[3:0]),
+                   .dfture_tap_rd_en    (dfture_tap_rd_en[3:0]),
+                   .bist_tap_wr_en      (bist_tap_wr_en),
+                   .bist_ctl_reg_out    (bist_ctl_reg_out[10:0]),
+                   .mrgn_tap_wr_en      (mrgn_tap_wr_en),
+                   .ldiagctl_wr_en      (ldiagctl_wr_en),
+                   .misc_ctl_sel_din    (misc_ctl_sel_din[3:0]),
+                   .lsu_asi_sel_fmx1    (lsu_asi_sel_fmx1[2:0]),
+                   .lsu_asi_sel_fmx2    (lsu_asi_sel_fmx2[2:0]),
+                   .exu_lsu_ldst_va_e   (exu_lsu_ldst_va_e[47:0]),
+                   .tlb_access_en0_g    (tlb_access_en0_g),
+                   .tlb_access_en1_g    (tlb_access_en1_g),
+                   .tlb_access_en2_g    (tlb_access_en2_g),
+                   .tlb_access_en3_g    (tlb_access_en3_g),
+                   .tlb_access_sel_thrd0(tlb_access_sel_thrd0),
+                   .tlb_access_sel_thrd1(tlb_access_sel_thrd1),
+                   .tlb_access_sel_thrd2(tlb_access_sel_thrd2),
+                   .tlb_access_sel_default(tlb_access_sel_default),
+                   .mrgnctl_wr_en       (mrgnctl_wr_en),
+                   .lsu_dcfill_addr_e   (lsu_dcache_fill_addr_e_err[10:4]), // Templated
+                   .lsu_error_pa_m      (lsu_error_pa_m[28:0]),
+                   .stb_ldst_byte_msk   (stb_ldst_byte_msk[7:0]),
+                   .lsu_diagnstc_va_sel (lsu_diagnstc_va_sel[3:0]),
+                   .rst_tri_en          (mux_drive_disable));     // Templated
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*
+lsu_dctl AUTO_TEMPLATE (
+               .rst_tri_en           (mux_drive_disable),
+               .stb_cam_hit             (stb_cam_hit_bf1),
+               .lsu_ttype_vld_m2        (lsu_ttype_vld_m2_bf1),
+               .ifu_tlu_inst_vld_m      (ifu_tlu_inst_vld_m_bf1),
+               .bistctl_wr_en           (),
+               .rclk                    (clk),
+               .lsu_iobrdge_tap_rq_type_b8    (lsu_iobrdge_tap_rq_type[8:8]),
+               .lsu_iobrdge_tap_rq_type_b6_b3 (lsu_iobrdge_tap_rq_type[6:3]),
+               .lsu_iobrdge_tap_rq_type_b1_b0 (lsu_iobrdge_tap_rq_type[1:0]),
+ 
+               .lsu_ifu_err_addr_b39    (lsu_ifu_err_addr[39]),
+               .ld_inst_vld_e           (ifu_lsu_ld_inst_e),     
+               .lsu_sscan_data		(lsu_sscan_data[14:13]),     
+               .ldst_sz_e               (ifu_lsu_ldst_size_e[1:0]), 
+               .lsu_l1hit_sign_extend_e (ifu_lsu_sign_ext_e),    
+               .lsu_tlb_invert_endian_g (tlb_rd_tte_data_ie_buf), 
+               .lsu_tte_data_cp_g	(tlb_rd_tte_data[`STLB_DATA_CP]), 
+               .st_inst_vld_e           (ifu_lsu_st_inst_e),     
+               .tlb_demap_actxt         (tlu_dtlb_dmp_actxt_g),  
+               .tlb_demap_nctxt         (tlu_dtlb_dmp_nctxt_g),  
+               .tlb_demap_pctxt         (tlu_dtlb_dmp_pctxt_g),  
+               .tlb_demap_sctxt         (tlu_dtlb_dmp_sctxt_g),  
+               .tlb_demap_thrid         (tlu_idtlb_dmp_thrid_g[1:0]), 
+       	       .lsu_dfill_tid_e		      (dfq_tid[1:0]),
+	             .tlb_pgnum		            ({tlb_pgnum_buf[39:10]}),
+               .lsu_ldst_va_b12_b11_m   (lsu_ldst_va_m[12:11]),
+               .lsu_ldst_va_b7_b0_m     (lsu_ldst_va_m[7:0]));
+
+*/
+
+`ifdef FPGA_SYN_1THREAD
+   
+lsu_dctl dctl (
+               .so                      (short_scan1_3),
+               .si                      (short_scan1_2),
+
+         .lsu_dtlb_cam_real_e     (lsu_dtlb_cam_real_e),
+
+		      /*AUTOINST*/
+               // Outputs
+               .lsu_tlu_nucleus_ctxt_m  (lsu_tlu_nucleus_ctxt_m),
+               .lsu_quad_word_access_g  (lsu_quad_word_access_g),
+               .dctl_rst_l              (dctl_rst_l),
+               .lsu_tlu_wsr_inst_e      (lsu_tlu_wsr_inst_e),
+               .lsu_l2fill_fpld_e       (lsu_l2fill_fpld_e),
+               .dva_vld_m_bf            (dva_vld_m_bf[3:0]),
+               .lsu_no_spc_pref         (lsu_no_spc_pref[3:0]),
+               .ifu_tlu_flush_fd_w      (ifu_tlu_flush_fd_w),
+               .ifu_tlu_flush_fd2_w     (ifu_tlu_flush_fd2_w),
+               .ifu_tlu_flush_fd3_w     (ifu_tlu_flush_fd3_w),
+               .ifu_lsu_flush_w         (ifu_lsu_flush_w),
+               .lsu_tlu_thrid_d         (lsu_tlu_thrid_d[1:0]),
+               .lsu_diagnstc_data_sel   (lsu_diagnstc_data_sel[3:0]),
+               .lsu_diagnstc_va_sel     (lsu_diagnstc_va_sel[3:0]),
+               .lsu_err_addr_sel        (lsu_err_addr_sel[2:0]),
+               .dva_bit_wr_en_e         (dva_bit_wr_en_e[15:0]),
+               .dva_wr_adr_e            (dva_wr_adr_e[10:6]),
+               .lsu_exu_ldst_miss_w2    (lsu_exu_ldst_miss_w2),
+               .lsu_exu_dfill_vld_w2    (lsu_exu_dfill_vld_w2),
+               .lsu_ffu_ld_vld          (lsu_ffu_ld_vld),
+               .lsu_ld_miss_wb          (lsu_ld_miss_wb),
+               .lsu_dtlb_bypass_e       (lsu_dtlb_bypass_e),
+               .ld_pcx_pkt_g            (ld_pcx_pkt_g[`LMQ_WIDTH-1:40]),
+               .tlb_ldst_cam_vld        (tlb_ldst_cam_vld),
+               .ldxa_internal           (ldxa_internal),
+               .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+               .lsu_ifu_ldst_cmplt      (lsu_ifu_ldst_cmplt[3:0]),
+               .lsu_ifu_itlb_en         (lsu_ifu_itlb_en[3:0]),
+               .lsu_ifu_icache_en       (lsu_ifu_icache_en[3:0]),
+               .lmq_byp_data_en_w2      (lmq_byp_data_en_w2[3:0]),
+               .lmq_byp_data_fmx_sel    (lmq_byp_data_fmx_sel[3:0]),
+               .lmq_byp_data_mxsel0     (lmq_byp_data_mxsel0[3:0]),
+               .lmq_byp_data_mxsel1     (lmq_byp_data_mxsel1[3:0]),
+               .lmq_byp_data_mxsel2     (lmq_byp_data_mxsel2[3:0]),
+               .lmq_byp_data_mxsel3     (lmq_byp_data_mxsel3[3:0]),
+               .lmq_byp_ldxa_mxsel0     (lmq_byp_ldxa_mxsel0[2:0]),
+               .lmq_byp_ldxa_mxsel1     (lmq_byp_ldxa_mxsel1[2:0]),
+               .lmq_byp_ldxa_mxsel2     (lmq_byp_ldxa_mxsel2[2:0]),
+               .lmq_byp_ldxa_mxsel3     (lmq_byp_ldxa_mxsel3[2:0]),
+               .lsu_ld_thrd_byp_sel_e   (lsu_ld_thrd_byp_sel_e[2:0]),
+               .dcache_byte_wr_en_e     (dcache_byte_wr_en_e[15:0]),
+               .lsu_dcache_wr_vld_e     (lsu_dcache_wr_vld_e),
+               .lsu_ldstub_g            (lsu_ldstub_g),
+               .lsu_swap_g              (lsu_swap_g),
+               .lsu_tlu_dtlb_done       (lsu_tlu_dtlb_done),
+               .lsu_exu_thr_m           (lsu_exu_thr_m[1:0]),
+               .merge7_sel_byte0_m      (merge7_sel_byte0_m),
+               .merge7_sel_byte7_m      (merge7_sel_byte7_m),
+               .merge6_sel_byte1_m      (merge6_sel_byte1_m),
+               .merge6_sel_byte6_m      (merge6_sel_byte6_m),
+               .merge5_sel_byte2_m      (merge5_sel_byte2_m),
+               .merge5_sel_byte5_m      (merge5_sel_byte5_m),
+               .merge4_sel_byte3_m      (merge4_sel_byte3_m),
+               .merge4_sel_byte4_m      (merge4_sel_byte4_m),
+               .merge3_sel_byte0_m      (merge3_sel_byte0_m),
+               .merge3_sel_byte3_m      (merge3_sel_byte3_m),
+               .merge3_sel_byte4_m      (merge3_sel_byte4_m),
+               .merge3_sel_byte7_default_m(merge3_sel_byte7_default_m),
+               .merge3_sel_byte_m       (merge3_sel_byte_m),
+               .merge2_sel_byte1_m      (merge2_sel_byte1_m),
+               .merge2_sel_byte2_m      (merge2_sel_byte2_m),
+               .merge2_sel_byte5_m      (merge2_sel_byte5_m),
+               .merge2_sel_byte6_default_m(merge2_sel_byte6_default_m),
+               .merge2_sel_byte_m       (merge2_sel_byte_m),
+               .merge0_sel_byte0_m      (merge0_sel_byte0_m),
+               .merge0_sel_byte1_m      (merge0_sel_byte1_m),
+               .merge0_sel_byte2_m      (merge0_sel_byte2_m),
+               .merge0_sel_byte3_default_m(merge0_sel_byte3_default_m),
+               .merge0_sel_byte4_m      (merge0_sel_byte4_m),
+               .merge0_sel_byte5_m      (merge0_sel_byte5_m),
+               .merge0_sel_byte6_m      (merge0_sel_byte6_m),
+               .merge0_sel_byte7_default_m(merge0_sel_byte7_default_m),
+               .merge1_sel_byte0_m      (merge1_sel_byte0_m),
+               .merge1_sel_byte1_m      (merge1_sel_byte1_m),
+               .merge1_sel_byte2_m      (merge1_sel_byte2_m),
+               .merge1_sel_byte3_default_m(merge1_sel_byte3_default_m),
+               .merge1_sel_byte4_m      (merge1_sel_byte4_m),
+               .merge1_sel_byte5_m      (merge1_sel_byte5_m),
+               .merge1_sel_byte6_m      (merge1_sel_byte6_m),
+               .merge1_sel_byte7_default_m(merge1_sel_byte7_default_m),
+               .merge0_sel_byte_1h_m    (merge0_sel_byte_1h_m),
+               .merge1_sel_byte_1h_m    (merge1_sel_byte_1h_m),
+               .merge1_sel_byte_2h_m    (merge1_sel_byte_2h_m),
+               .lsu_dtagv_wr_vld_e      (lsu_dtagv_wr_vld_e),
+               .lsu_dtag_wrreq_x_e      (lsu_dtag_wrreq_x_e),
+               .lsu_dtag_index_sel_x_e  (lsu_dtag_index_sel_x_e),
+               .lsu_dtlb_wr_vld_e       (lsu_dtlb_wr_vld_e),
+               .lsu_dtlb_tag_rd_e       (lsu_dtlb_tag_rd_e),
+               .lsu_dtlb_data_rd_e      (lsu_dtlb_data_rd_e),
+               .lsu_dtlb_dmp_vld_e      (lsu_dtlb_dmp_vld_e),
+               .lsu_dtlb_dmp_all_e      (lsu_dtlb_dmp_all_e),
+               .lsu_dtlb_rwindex_vld_e  (lsu_dtlb_rwindex_vld_e),
+               .lsu_dtlb_invalid_all_l_m(lsu_dtlb_invalid_all_l_m),
+               .lsu_tlu_tlb_ld_inst_m   (lsu_tlu_tlb_ld_inst_m),
+               .lsu_tlu_tlb_st_inst_m   (lsu_tlu_tlb_st_inst_m),
+               .lsu_tlu_tlb_access_tid_m(lsu_tlu_tlb_access_tid_m[1:0]),
+               .lsu_tlb_data_rd_vld_g   (lsu_tlb_data_rd_vld_g),
+               .lsu_tlb_st_sel_m        (lsu_tlb_st_sel_m[3:0]),
+               .lsu_va_wtchpt0_wr_en_l  (lsu_va_wtchpt0_wr_en_l),
+               .lsu_va_wtchpt1_wr_en_l  (lsu_va_wtchpt1_wr_en_l),
+               .lsu_va_wtchpt2_wr_en_l  (lsu_va_wtchpt2_wr_en_l),
+               .lsu_va_wtchpt3_wr_en_l  (lsu_va_wtchpt3_wr_en_l),
+               .thread0_m               (thread0_m),
+               .thread1_m               (),
+               .thread2_m               (),
+               .thread3_m               (),
+               .lsu_dctldp_thread0_m    (lsu_dctldp_thread0_m),
+               .lsu_dctldp_thread1_m    (),
+               .lsu_dctldp_thread2_m    (),
+               .lsu_dctldp_thread3_m    (),
+               .thread0_g               (thread0_g),
+               .thread1_g               (),
+               .thread2_g               (),
+               .thread3_g               (),
+               .lsu_tlu_nonalt_ldst_m   (lsu_tlu_nonalt_ldst_m),
+               .lsu_tlu_xslating_ldst_m (lsu_tlu_xslating_ldst_m),
+               .lsu_tlu_ctxt_sel_m      (lsu_tlu_ctxt_sel_m[2:0]),
+               .lsu_tlu_write_op_m      (lsu_tlu_write_op_m),
+               .lsu_dtlb_addr_mask_l_e  (lsu_dtlb_addr_mask_l_e),
+               .dva_din_e               (dva_din_e),
+               .lsu_diagnstc_dtagv_prty_invrt_e(lsu_diagnstc_dtagv_prty_invrt_e),
+               .lsu_ifu_asi_load        (lsu_ifu_asi_load),
+               .lsu_ifu_asi_thrid       (lsu_ifu_asi_thrid[1:0]),
+               .lsu_ifu_asi_vld         (lsu_ifu_asi_vld),
+               .lsu_quad_asi_e          (lsu_quad_asi_e),
+               .lsu_local_ldxa_sel_g    (lsu_local_ldxa_sel_g),
+               .lsu_dtag_rsel_m         (lsu_dtag_rsel_m[3:0]),
+               .lsu_tlbop_force_swo     (lsu_tlbop_force_swo),
+               .lsu_atomic_pkt2_bsel_g  (lsu_atomic_pkt2_bsel_g[2:0]),
+               .lsu_dcache_tag_perror_g (lsu_dcache_tag_perror_g),
+               .lsu_dcache_data_perror_g(lsu_dcache_data_perror_g),
+               .lsu_ifu_l2_unc_error    (lsu_ifu_l2_unc_error),
+               .lsu_ifu_l2_corr_error   (lsu_ifu_l2_corr_error),
+               .lsu_ifu_dcache_data_perror(lsu_ifu_dcache_data_perror),
+               .lsu_ifu_dcache_tag_perror(lsu_ifu_dcache_tag_perror),
+               .lsu_ifu_error_tid       (lsu_ifu_error_tid[1:0]),
+               .lsu_ifu_io_error        (lsu_ifu_io_error),
+               .lsu_tlu_squash_va_oor_m (lsu_tlu_squash_va_oor_m),
+               .lsu_squash_va_oor_m     (lsu_squash_va_oor_m),
+               .tlb_cam_hit_g           (tlb_cam_hit_g),
+               .lsu_st_hw_le_g          (lsu_st_hw_le_g),
+               .lsu_st_w_or_dbl_le_g    (lsu_st_w_or_dbl_le_g),
+               .lsu_st_x_le_g           (lsu_st_x_le_g),
+               .lsu_swap_sel_default_g  (lsu_swap_sel_default_g),
+               .lsu_swap_sel_default_byte_7_2_g(lsu_swap_sel_default_byte_7_2_g),
+               .lsu_st_rmo_m            (lsu_st_rmo_m),
+               .lsu_bst_in_pipe_m       (lsu_bst_in_pipe_m),
+               .lsu_snap_blk_st_m       (lsu_snap_blk_st_m),
+               .lsu_blk_st_m            (lsu_blk_st_m),
+               .lsu_blkst_pgnum_m       (lsu_blkst_pgnum_m[39:10]),
+               .lsu_ffu_blk_asi_e       (lsu_ffu_blk_asi_e),
+               .lsu_blk_asi_m           (lsu_blk_asi_m),
+               .lsu_nonalt_nucl_access_m(lsu_nonalt_nucl_access_m),
+               .dcache_alt_mx_sel_e     (dcache_alt_mx_sel_e),
+               .dcache_alt_mx_sel_e_bf  (dcache_alt_mx_sel_e_bf),
+               .dcache_rvld_e           (dcache_rvld_e),
+               .lsu_dc_iob_access_e     (lsu_dc_iob_access_e),
+               .lsu_ifu_ldst_miss_w     (lsu_ifu_ldst_miss_w),
+               .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+               .lsu_ldst_inst_vld_e     (lsu_ldst_inst_vld_e),
+               .lsu_local_ldxa_tlbrd_sel_g(lsu_local_ldxa_tlbrd_sel_g),
+               .lsu_local_diagnstc_tagrd_sel_g(lsu_local_diagnstc_tagrd_sel_g),
+               .lsu_va_wtchpt_sel_g     (lsu_va_wtchpt_sel_g),
+               .asi_state_wr_thrd       (asi_state_wr_thrd[3:0]),
+               .thread0_d               (thread0_d),
+               .thread1_d               (),
+               .thread2_d               (),
+               .thread3_d               (),
+               .tlu_lsu_asi_update_g    (tlu_lsu_asi_update_g),
+               .pctxt_state_wr_thrd     (pctxt_state_wr_thrd[3:0]),
+               .sctxt_state_wr_thrd     (sctxt_state_wr_thrd[3:0]),
+               .thread_pctxt            (thread_pctxt),
+               .thread_sctxt            (thread_sctxt),
+               .thread_actxt            (thread_actxt),
+               .thread_default          (thread_default),
+               .thread0_ctxt            (thread0_ctxt),
+               .thread1_ctxt            (),
+               .thread2_ctxt            (),
+               .thread3_ctxt            (),
+               .pid_state_wr_en         (pid_state_wr_en[3:0]),
+               .thread0_e               (thread0_e),
+               .thread1_e               (),
+               .thread2_e               (),
+               .thread3_e               (),
+               .dfture_tap_wr_mx_sel    (dfture_tap_wr_mx_sel),
+               .lctl_rst                (lctl_rst[3:0]),
+               .lsu_ctl_state_wr_en     (lsu_ctl_state_wr_en[3:0]),
+               .lsuctl_ctlbits_wr_en    (lsuctl_ctlbits_wr_en[3:0]),
+               .dfture_tap_rd_en        (dfture_tap_rd_en[3:0]),
+               .bist_tap_wr_en          (bist_tap_wr_en),
+               .bistctl_wr_en           (),                      // Templated
+               .bist_ctl_reg_wr_en      (bist_ctl_reg_wr_en),
+               .mrgn_tap_wr_en          (mrgn_tap_wr_en),
+               .ldiagctl_wr_en          (ldiagctl_wr_en),
+               .misc_ctl_sel_din        (misc_ctl_sel_din[3:0]),
+               .lsu_asi_sel_fmx1        (lsu_asi_sel_fmx1[2:0]),
+               .lsu_asi_sel_fmx2        (lsu_asi_sel_fmx2[2:0]),
+               .tlb_access_en0_g        (tlb_access_en0_g),
+               .tlb_access_en1_g        (),
+               .tlb_access_en2_g        (),
+               .tlb_access_en3_g        (),
+               .tlb_access_sel_thrd0    (tlb_access_sel_thrd0),
+               .tlb_access_sel_thrd1    (tlb_access_sel_thrd1),
+               .tlb_access_sel_thrd2    (tlb_access_sel_thrd2),
+               .tlb_access_sel_default  (tlb_access_sel_default),
+               .mrgnctl_wr_en           (mrgnctl_wr_en),
+               .hpv_priv_m              (hpv_priv_m),
+               .hpstate_en_m            (hpstate_en_m),
+               .dcache_arry_data_sel_m  (dcache_arry_data_sel_m),
+               .dtlb_bypass_m           (dtlb_bypass_m),
+               .lsu_alt_space_m         (lsu_alt_space_m),
+               .atomic_m                (atomic_m),
+               .ldst_dbl_m              (ldst_dbl_m),
+               .fp_ldst_m               (fp_ldst_m),
+               .lda_internal_m          (lda_internal_m),
+               .sta_internal_m          (sta_internal_m),
+               .cam_real_m              (cam_real_m),
+               .data_rd_vld_g           (data_rd_vld_g),
+               .tag_rd_vld_g            (tag_rd_vld_g),
+               .ldst_sz_m               (ldst_sz_m[1:0]),
+               .asi_internal_m          (asi_internal_m),
+               .rd_only_ltlb_asi_e      (rd_only_ltlb_asi_e),
+               .wr_only_ltlb_asi_e      (wr_only_ltlb_asi_e),
+               .dfill_tlb_asi_e         (dfill_tlb_asi_e),
+               .ifill_tlb_asi_e         (ifill_tlb_asi_e),
+               .nofault_asi_m           (nofault_asi_m),
+               .as_if_user_asi_m        (as_if_user_asi_m),
+               .atomic_asi_m            (atomic_asi_m),
+               .phy_use_ec_asi_m        (phy_use_ec_asi_m),
+               .phy_byp_ec_asi_m        (phy_byp_ec_asi_m),
+               .quad_asi_m              (quad_asi_m),
+               .binit_quad_asi_m        (binit_quad_asi_m),
+               .blk_asi_m               (blk_asi_m),
+               .recognized_asi_m        (recognized_asi_m),
+               .strm_asi_m              (strm_asi_m),
+               .mmu_rd_only_asi_m       (mmu_rd_only_asi_m),
+               .rd_only_asi_m           (rd_only_asi_m),
+               .wr_only_asi_m           (wr_only_asi_m),
+               .unimp_asi_m             (unimp_asi_m),
+               .va_wtchpt_cmp_en_m      (va_wtchpt_cmp_en_m),
+               .lsu_tlu_async_ttype_vld_w2(lsu_tlu_async_ttype_vld_w2),
+               .lsu_tlu_async_ttype_w2  (lsu_tlu_async_ttype_w2[6:0]),
+               .lsu_tlu_async_tid_w2    (lsu_tlu_async_tid_w2[1:0]),
+               .async_tlb_index         (async_tlb_index[5:0]),
+               .l2fill_vld_m            (l2fill_vld_m),
+               .ld_thrd_byp_mxsel_m     (ld_thrd_byp_mxsel_m[3:0]),
+               .morphed_addr_m          (morphed_addr_m[7:0]),
+               .signed_ldst_byte_m      (signed_ldst_byte_m),
+               .signed_ldst_hw_m        (signed_ldst_hw_m),
+               .signed_ldst_w_m         (signed_ldst_w_m),
+               .lsu_tlb_asi_data_perr_g (lsu_tlb_asi_data_perr_g),
+               .lsu_tlb_asi_tag_perr_g  (lsu_tlb_asi_tag_perr_g),
+               .lsu_sscan_data          (lsu_sscan_data[14:13]), // Templated
+               .lsu_ld_inst_vld_g       (lsu_ld_inst_vld_g[3:0]),
+               .lsu_dcache_rand         (lsu_dcache_rand[1:0]),
+               .lsu_encd_way_hit        (lsu_encd_way_hit[1:0]),
+               .lsu_way_hit_or          (lsu_way_hit_or),
+               .lsu_memref_m            (lsu_memref_m),
+               .lsu_flsh_inst_m         (lsu_flsh_inst_m),
+               .lsu_ifu_asi_data_en_l   (lsu_ifu_asi_data_en_l),
+               .lsu_dcache_fill_addr_e  (lsu_dcache_fill_addr_e[10:3]),
+               .lsu_dcache_fill_addr_e_err(lsu_dcache_fill_addr_e_err[10:4]),
+               .lsu_thread_g            (lsu_thread_g[3:0]),
+               .lmq_ldd_vld             (lmq_ldd_vld),
+               .lsu_bist_rsel_way_e     (lsu_bist_rsel_way_e[3:0]),
+               .lsu_dcache_fill_way_e   (lsu_dcache_fill_way_e[3:0]),
+               .lmq_ld_addr_b3          (lmq_ld_addr_b3),
+               .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[3:0]),
+               .lsu_dcfill_data_mx_sel_e(lsu_dcfill_data_mx_sel_e),
+               // Inputs
+               .se                      (se),
+               .sehold                  (sehold),
+               .rst_tri_en              (mux_drive_disable),     // Templated
+               .rclk                    (clk),                   // Templated
+               .grst_l                  (grst_l),
+               .arst_l                  (arst_l),
+               .lsu_diag_va_prty_invrt  (lsu_diag_va_prty_invrt),
+               .dva_svld_e              (dva_svld_e),
+               .dva_snp_bit_wr_en_e     (dva_snp_bit_wr_en_e[15:0]),
+               .dva_snp_addr_e          (dva_snp_addr_e[4:0]),
+               .lsu_tte_data_cp_g       (tlb_rd_tte_data[`STLB_DATA_CP]), // Templated
+               .lsu_l2fill_vld          (lsu_l2fill_vld),
+               .ld_inst_vld_e           (ifu_lsu_ld_inst_e),     // Templated
+               .st_inst_vld_e           (ifu_lsu_st_inst_e),     // Templated
+               .ifu_lsu_ldst_fp_e       (ifu_lsu_ldst_fp_e),
+               .ldst_sz_e               (ifu_lsu_ldst_size_e[1:0]), // Templated
+               .lsu_ldst_va_b12_b11_m   (lsu_ldst_va_m[12:11]),  // Templated
+               .lsu_ldst_va_b7_b0_m     (lsu_ldst_va_m[7:0]),    // Templated
+               .ifu_lsu_rd_e            (ifu_lsu_rd_e[4:0]),
+               .tlb_cam_hit             (tlb_cam_hit),
+               .ifu_tlu_sraddr_d        (ifu_tlu_sraddr_d[6:0]),
+               .ifu_tlu_wsr_inst_d      (ifu_tlu_wsr_inst_d),
+               .ifu_lsu_alt_space_d     (ifu_lsu_alt_space_d),
+               .tlu_lsu_int_ldxa_vld_w2 (tlu_lsu_int_ldxa_vld_w2),
+               .tlu_lsu_int_ld_ill_va_w2(tlu_lsu_int_ld_ill_va_w2),
+               .tlu_lsu_ldxa_tid_w2     (tlu_lsu_ldxa_tid_w2[1:0]),
+               .ifu_lsu_ldxa_data_vld_w2(ifu_lsu_ldxa_data_vld_w2),
+               .ifu_lsu_ldxa_illgl_va_w2(ifu_lsu_ldxa_illgl_va_w2),
+               .ifu_lsu_ldxa_tid_w2     (ifu_lsu_ldxa_tid_w2[1:0]),
+               .ifu_lsu_asi_rd_unc      (ifu_lsu_asi_rd_unc),
+               .tlu_lsu_tl_zero         (tlu_lsu_tl_zero[3:0]),
+               .ifu_lsu_thrid_s         (ifu_lsu_thrid_s[1:0]),
+               .ifu_lsu_ldst_dbl_e      (ifu_lsu_ldst_dbl_e),
+               .ld_stb_full_raw_w2      (ld_stb_full_raw_w2),
+               .ld_sec_active           (ld_sec_active),
+               .ifu_tlu_inst_vld_m      (ifu_tlu_inst_vld_m_bf1), // Templated
+               .lsu_l2fill_bendian_m    (lsu_l2fill_bendian_m),
+               .lmq0_l2fill_fpld        (lmq0_l2fill_fpld),
+               .lmq1_l2fill_fpld        (lmq1_l2fill_fpld),
+               .lmq2_l2fill_fpld        (lmq2_l2fill_fpld),
+               .lmq3_l2fill_fpld        (lmq3_l2fill_fpld),
+               .cache_way_hit_buf1      (cache_way_hit_buf1[3:0]),
+               .cache_hit               (cache_hit),
+               .lmq0_byp_misc_sz        (lmq0_byp_misc_sz[1:0]),
+               .lmq1_byp_misc_sz        (lmq1_byp_misc_sz[1:0]),
+               .lmq2_byp_misc_sz        (lmq2_byp_misc_sz[1:0]),
+               .lmq3_byp_misc_sz        (lmq3_byp_misc_sz[1:0]),
+               .lsu_l2fill_sign_extend_m(lsu_l2fill_sign_extend_m),
+               .lsu_l1hit_sign_extend_e (ifu_lsu_sign_ext_e),    // Templated
+               .tlu_lsu_pstate_cle      (tlu_lsu_pstate_cle[3:0]),
+               .tlu_lsu_pstate_am       (tlu_lsu_pstate_am[3:0]),
+               .tlb_pgnum               ({tlb_pgnum_buf[39:10]}), // Templated
+               .tlb_demap_nctxt         (tlu_dtlb_dmp_nctxt_g),  // Templated
+               .tlb_demap_pctxt         (tlu_dtlb_dmp_pctxt_g),  // Templated
+               .tlb_demap_sctxt         (tlu_dtlb_dmp_sctxt_g),  // Templated
+               .tlb_demap_actxt         (tlu_dtlb_dmp_actxt_g),  // Templated
+               .tlb_demap_thrid         (tlu_idtlb_dmp_thrid_g[1:0]), // Templated
+               .ifu_lsu_casa_e          (ifu_lsu_casa_e),
+               .ifu_lsu_ldstub_e        (ifu_lsu_ldstub_e),
+               .ifu_lsu_swap_e          (ifu_lsu_swap_e),
+               .lsu_atm_st_cmplt_e      (lsu_atm_st_cmplt_e),
+               .lsu_cpx_pkt_atm_st_cmplt(lsu_cpx_pkt_atm_st_cmplt),
+               .spu_lsu_ldxa_data_vld_w2(spu_lsu_ldxa_data_vld_w2),
+               .spu_lsu_ldxa_illgl_va_w2(spu_lsu_ldxa_illgl_va_w2),
+               .spu_lsu_ldxa_tid_w2     (spu_lsu_ldxa_tid_w2[1:0]),
+               .spu_lsu_stxa_ack_tid    (spu_lsu_stxa_ack_tid[1:0]),
+               .spu_lsu_stxa_ack        (spu_lsu_stxa_ack),
+               .spu_lsu_unc_error_w2    (spu_lsu_unc_error_w2),
+               .spu_lsu_int_w2          (spu_lsu_int_w2),
+               .tlu_lsu_stxa_ack        (tlu_lsu_stxa_ack),
+               .tlu_lsu_stxa_ack_tid    (tlu_lsu_stxa_ack_tid[1:0]),
+               .lsu_tlb_invert_endian_g (tlb_rd_tte_data_ie_buf), // Templated
+               .lmq0_ncache_ld          (lmq0_ncache_ld),
+               .lmq1_ncache_ld          (lmq1_ncache_ld),
+               .lmq2_ncache_ld          (lmq2_ncache_ld),
+               .lmq3_ncache_ld          (lmq3_ncache_ld),
+               .ifu_tlu_mb_inst_e       (ifu_tlu_mb_inst_e),
+               .ifu_tlu_flsh_inst_e     (ifu_tlu_flsh_inst_e),
+               .lsu_stb_empty           ({3'b000, lsu_stb_empty[0]}),
+               .tlu_dtlb_tag_rd_g       (tlu_dtlb_tag_rd_g),
+               .tlu_dtlb_data_rd_g      (tlu_dtlb_data_rd_g),
+               .tlu_dtlb_dmp_vld_g      (tlu_dtlb_dmp_vld_g),
+               .tlu_dtlb_dmp_all_g      (tlu_dtlb_dmp_all_g),
+               .tlu_dtlb_rw_index_vld_g (tlu_dtlb_rw_index_vld_g),
+               .tlu_dtlb_invalidate_all_g(tlu_dtlb_invalidate_all_g),
+               .lsu_st_wr_dcache        (lsu_st_wr_dcache),
+               .tlu_lsu_asi_update_m    (tlu_lsu_asi_update_m),
+               .tlu_lsu_tid_m           (tlu_lsu_tid_m[1:0]),
+               .lsu_rd_dtag_parity_g    (lsu_rd_dtag_parity_g[3:0]),
+               .dcache_rparity_err_wb   (dcache_rparity_err_wb),
+               .lsu_diagnstc_wr_data_b0 (lsu_diagnstc_wr_data_b0),
+               .lsu_byp_ldd_oddrd_m     (lsu_byp_ldd_oddrd_m),
+               .tlu_lsu_redmode         (tlu_lsu_redmode[3:0]),
+               .tlu_lsu_redmode_rst_d1  (tlu_lsu_redmode_rst_d1[3:0]),
+               .dva_vld_m               (dva_vld_m[3:0]),
+               .lsu_dfill_tid_e         (dfq_tid[1:0]),          // Templated
+               .ifu_lsu_asi_ack         (ifu_lsu_asi_ack),
+               .lsu_intrpt_cmplt        (lsu_intrpt_cmplt[3:0]),
+               .lsu_iobrdge_tap_rq_type_b8(lsu_iobrdge_tap_rq_type[8:8]), // Templated
+               .lsu_iobrdge_tap_rq_type_b6_b3(lsu_iobrdge_tap_rq_type[6:3]), // Templated
+               .lsu_iobrdge_tap_rq_type_b1_b0(lsu_iobrdge_tap_rq_type[1:0]), // Templated
+               .lsu_iobrdge_fwd_pkt_vld (lsu_iobrdge_fwd_pkt_vld),
+               .lsu_cpx_ld_dtag_perror_e(lsu_cpx_ld_dtag_perror_e),
+               .lsu_cpx_ld_dcache_perror_e(lsu_cpx_ld_dcache_perror_e),
+               .lsu_cpx_pkt_ld_err      (lsu_cpx_pkt_ld_err[1:0]),
+               .ifu_lsu_nceen           (ifu_lsu_nceen[3:0]),
+               .tlu_lsu_ldxa_async_data_vld(tlu_lsu_ldxa_async_data_vld),
+               .tlu_lsu_hpv_priv        (tlu_lsu_hpv_priv[3:0]),
+               .tlu_lsu_hpstate_en      (tlu_lsu_hpstate_en[3:0]),
+               .ifu_lsu_memref_d        (ifu_lsu_memref_d),
+               .ifu_lsu_pref_inst_e     (ifu_lsu_pref_inst_e),
+               .lsu_pref_pcx_req        (lsu_pref_pcx_req),
+               .lsu_cpx_pkt_prefetch2   (lsu_cpx_pkt_prefetch2),
+               .lsu_ld_pcx_rq_sel_d2    (lsu_ld_pcx_rq_sel_d2[3:0]),
+               .lsu_pcx_req_squash_d1   (lsu_pcx_req_squash_d1),
+               .lsu_bld_helper_cmplt_m  (lsu_bld_helper_cmplt_m),
+               .lsu_bld_cnt_m           (lsu_bld_cnt_m[2:0]),
+               .lsu_bld_reset           (lsu_bld_reset),
+               .ffu_lsu_blk_st_e        (ffu_lsu_blk_st_e),
+               .lsu_stb_rmo_st_issue    ({3'b000, lsu_stb_rmo_st_issue[0]}),
+               .lsu_cpx_rmo_st_ack      (lsu_cpx_rmo_st_ack[3:0]),
+               .lsu_dfq_flsh_cmplt      (lsu_dfq_flsh_cmplt[3:0]),
+               .stb_cam_hit             (stb_cam_hit_bf1),       // Templated
+               .ifu_tlu_flush_m         (ifu_tlu_flush_m),
+               .ctu_sscan_tid           (ctu_sscan_tid[3:0]),
+               .tte_data_perror_unc     (tte_data_perror_unc),
+               .asi_tte_data_perror     (asi_tte_data_perror),
+               .asi_tte_tag_perror      (asi_tte_tag_perror),
+               .tlu_dtlb_rw_index_g     (tlu_dtlb_rw_index_g[5:0]),
+               .lsu_local_early_flush_g (lsu_local_early_flush_g),
+               .lsu_dfq_vld             (lsu_dfq_vld),
+               .gdbginit_l              (gdbginit_l),
+               .dc_direct_map           (dc_direct_map),
+               .asi_d                   (asi_d[7:0]),
+               .lsu_dctl_asi_state_m    (lsu_dctl_asi_state_m[7:0]),
+               .lsu_ldst_va_g           (lsu_ldst_va_g[7:0]),
+               .lsu_ifu_err_addr_b39    (lsu_ifu_err_addr[39]),  // Templated
+               .lsu_dp_ctl_reg0         (lsu_dp_ctl_reg0[5:0]),
+               .lsu_dp_ctl_reg1         (6'b000000),
+               .lsu_dp_ctl_reg2         (6'b000000),
+               .lsu_dp_ctl_reg3         (6'b000000),
+               .ldd_in_dfq_out          (ldd_in_dfq_out),
+               .dcache_iob_addr_e       (dcache_iob_addr_e[7:0]),
+               .mbist_dcache_index      (mbist_dcache_index[6:0]),
+               .mbist_dcache_word       (mbist_dcache_word),
+               .lsu_diagnstc_wr_addr_e  (lsu_diagnstc_wr_addr_e[10:0]),
+               .st_dcfill_addr          (st_dcfill_addr[10:0]),
+               .lsu_dfq_ld_vld          (lsu_dfq_ld_vld),
+               .lsu_dfq_st_vld          (lsu_dfq_st_vld),
+               .lmq0_ldd_vld            (lmq0_ldd_vld),
+               .lmq1_ldd_vld            (lmq1_ldd_vld),
+               .lmq2_ldd_vld            (lmq2_ldd_vld),
+               .lmq3_ldd_vld            (lmq3_ldd_vld),
+               .lsu_dfq_byp_tid         (lsu_dfq_byp_tid[1:0]),
+               .dfq_byp_ff_en           (dfq_byp_ff_en),
+               .lsu_dcache_iob_way_e    (lsu_dcache_iob_way_e[1:0]),
+               .mbist_dcache_way        (mbist_dcache_way[1:0]),
+               .lsu_diagnstc_wr_way_e   (lsu_diagnstc_wr_way_e[1:0]),
+               .lsu_st_way_e            (lsu_st_way_e[1:0]),
+               .lmq0_pcx_pkt_way        (lmq0_pcx_pkt_way[1:0]),
+               .lmq1_pcx_pkt_way        (2'b00),
+               .lmq2_pcx_pkt_way        (2'b00),
+               .lmq3_pcx_pkt_way        (2'b00),
+               .lmq0_ld_rq_type         (lmq0_ld_rq_type[2:0]),
+               .lmq1_ld_rq_type         (lmq1_ld_rq_type[2:0]),
+               .lmq2_ld_rq_type         (lmq2_ld_rq_type[2:0]),
+               .lmq3_ld_rq_type         (lmq3_ld_rq_type[2:0]),
+               .lmq0_pcx_pkt_addr       (lmq0_pcx_pkt_addr[10:0]),
+               .lmq1_pcx_pkt_addr       (lmq1_pcx_pkt_addr[10:0]),
+               .lmq2_pcx_pkt_addr       (lmq2_pcx_pkt_addr[10:0]),
+               .lmq3_pcx_pkt_addr       (lmq3_pcx_pkt_addr[10:0]),
+               .lsu_ttype_vld_m2        (lsu_ttype_vld_m2_bf1),  // Templated
+               .tlu_early_flush_pipe2_w (tlu_early_flush_pipe2_w),
+               .lsu_st_dcfill_size_e    (lsu_st_dcfill_size_e[1:0]),
+               .mbist_dcache_write      (mbist_dcache_write),
+               .mbist_dcache_read       (mbist_dcache_read));
+
+`else // !`ifdef FPGA_SYN_1THREAD
+      
+lsu_dctl dctl (
+               .so                      (short_scan1_3),
+               .si                      (short_scan1_2),
+
+         .lsu_dtlb_cam_real_e     (lsu_dtlb_cam_real_e),
+
+		      /*AUTOINST*/
+               // Outputs
+               .lsu_tlu_nucleus_ctxt_m  (lsu_tlu_nucleus_ctxt_m),
+               .lsu_quad_word_access_g  (lsu_quad_word_access_g),
+               .dctl_rst_l              (dctl_rst_l),
+               .lsu_tlu_wsr_inst_e      (lsu_tlu_wsr_inst_e),
+               .lsu_l2fill_fpld_e       (lsu_l2fill_fpld_e),
+               .dva_vld_m_bf            (dva_vld_m_bf[3:0]),
+               .lsu_no_spc_pref         (lsu_no_spc_pref[3:0]),
+               .ifu_tlu_flush_fd_w      (ifu_tlu_flush_fd_w),
+               .ifu_tlu_flush_fd2_w     (ifu_tlu_flush_fd2_w),
+               .ifu_tlu_flush_fd3_w     (ifu_tlu_flush_fd3_w),
+               .ifu_lsu_flush_w         (ifu_lsu_flush_w),
+               .lsu_tlu_thrid_d         (lsu_tlu_thrid_d[1:0]),
+               .lsu_diagnstc_data_sel   (lsu_diagnstc_data_sel[3:0]),
+               .lsu_diagnstc_va_sel     (lsu_diagnstc_va_sel[3:0]),
+               .lsu_err_addr_sel        (lsu_err_addr_sel[2:0]),
+               .dva_bit_wr_en_e         (dva_bit_wr_en_e[15:0]),
+               .dva_wr_adr_e            (dva_wr_adr_e[10:6]),
+               .lsu_exu_ldst_miss_w2    (lsu_exu_ldst_miss_w2),
+               .lsu_exu_dfill_vld_w2    (lsu_exu_dfill_vld_w2),
+               .lsu_ffu_ld_vld          (lsu_ffu_ld_vld),
+               .lsu_ld_miss_wb          (lsu_ld_miss_wb),
+               .lsu_dtlb_bypass_e       (lsu_dtlb_bypass_e),
+               .ld_pcx_pkt_g            (ld_pcx_pkt_g[`LMQ_WIDTH-1:40]),
+               .tlb_ldst_cam_vld        (tlb_ldst_cam_vld),
+               .ldxa_internal           (ldxa_internal),
+               .lsu_ifu_ldsta_internal_e(lsu_ifu_ldsta_internal_e),
+               .lsu_ifu_ldst_cmplt      (lsu_ifu_ldst_cmplt[3:0]),
+               .lsu_ifu_itlb_en         (lsu_ifu_itlb_en[3:0]),
+               .lsu_ifu_icache_en       (lsu_ifu_icache_en[3:0]),
+               .lmq_byp_data_en_w2      (lmq_byp_data_en_w2[3:0]),
+               .lmq_byp_data_fmx_sel    (lmq_byp_data_fmx_sel[3:0]),
+               .lmq_byp_data_mxsel0     (lmq_byp_data_mxsel0[3:0]),
+               .lmq_byp_data_mxsel1     (lmq_byp_data_mxsel1[3:0]),
+               .lmq_byp_data_mxsel2     (lmq_byp_data_mxsel2[3:0]),
+               .lmq_byp_data_mxsel3     (lmq_byp_data_mxsel3[3:0]),
+               .lmq_byp_ldxa_mxsel0     (lmq_byp_ldxa_mxsel0[2:0]),
+               .lmq_byp_ldxa_mxsel1     (lmq_byp_ldxa_mxsel1[2:0]),
+               .lmq_byp_ldxa_mxsel2     (lmq_byp_ldxa_mxsel2[2:0]),
+               .lmq_byp_ldxa_mxsel3     (lmq_byp_ldxa_mxsel3[2:0]),
+               .lsu_ld_thrd_byp_sel_e   (lsu_ld_thrd_byp_sel_e[2:0]),
+               .dcache_byte_wr_en_e     (dcache_byte_wr_en_e[15:0]),
+               .lsu_dcache_wr_vld_e     (lsu_dcache_wr_vld_e),
+               .lsu_ldstub_g            (lsu_ldstub_g),
+               .lsu_swap_g              (lsu_swap_g),
+               .lsu_tlu_dtlb_done       (lsu_tlu_dtlb_done),
+               .lsu_exu_thr_m           (lsu_exu_thr_m[1:0]),
+               .merge7_sel_byte0_m      (merge7_sel_byte0_m),
+               .merge7_sel_byte7_m      (merge7_sel_byte7_m),
+               .merge6_sel_byte1_m      (merge6_sel_byte1_m),
+               .merge6_sel_byte6_m      (merge6_sel_byte6_m),
+               .merge5_sel_byte2_m      (merge5_sel_byte2_m),
+               .merge5_sel_byte5_m      (merge5_sel_byte5_m),
+               .merge4_sel_byte3_m      (merge4_sel_byte3_m),
+               .merge4_sel_byte4_m      (merge4_sel_byte4_m),
+               .merge3_sel_byte0_m      (merge3_sel_byte0_m),
+               .merge3_sel_byte3_m      (merge3_sel_byte3_m),
+               .merge3_sel_byte4_m      (merge3_sel_byte4_m),
+               .merge3_sel_byte7_default_m(merge3_sel_byte7_default_m),
+               .merge3_sel_byte_m       (merge3_sel_byte_m),
+               .merge2_sel_byte1_m      (merge2_sel_byte1_m),
+               .merge2_sel_byte2_m      (merge2_sel_byte2_m),
+               .merge2_sel_byte5_m      (merge2_sel_byte5_m),
+               .merge2_sel_byte6_default_m(merge2_sel_byte6_default_m),
+               .merge2_sel_byte_m       (merge2_sel_byte_m),
+               .merge0_sel_byte0_m      (merge0_sel_byte0_m),
+               .merge0_sel_byte1_m      (merge0_sel_byte1_m),
+               .merge0_sel_byte2_m      (merge0_sel_byte2_m),
+               .merge0_sel_byte3_default_m(merge0_sel_byte3_default_m),
+               .merge0_sel_byte4_m      (merge0_sel_byte4_m),
+               .merge0_sel_byte5_m      (merge0_sel_byte5_m),
+               .merge0_sel_byte6_m      (merge0_sel_byte6_m),
+               .merge0_sel_byte7_default_m(merge0_sel_byte7_default_m),
+               .merge1_sel_byte0_m      (merge1_sel_byte0_m),
+               .merge1_sel_byte1_m      (merge1_sel_byte1_m),
+               .merge1_sel_byte2_m      (merge1_sel_byte2_m),
+               .merge1_sel_byte3_default_m(merge1_sel_byte3_default_m),
+               .merge1_sel_byte4_m      (merge1_sel_byte4_m),
+               .merge1_sel_byte5_m      (merge1_sel_byte5_m),
+               .merge1_sel_byte6_m      (merge1_sel_byte6_m),
+               .merge1_sel_byte7_default_m(merge1_sel_byte7_default_m),
+               .merge0_sel_byte_1h_m    (merge0_sel_byte_1h_m),
+               .merge1_sel_byte_1h_m    (merge1_sel_byte_1h_m),
+               .merge1_sel_byte_2h_m    (merge1_sel_byte_2h_m),
+               .lsu_dtagv_wr_vld_e      (lsu_dtagv_wr_vld_e),
+               .lsu_dtag_wrreq_x_e      (lsu_dtag_wrreq_x_e),
+               .lsu_dtag_index_sel_x_e  (lsu_dtag_index_sel_x_e),
+               .lsu_dtlb_wr_vld_e       (lsu_dtlb_wr_vld_e),
+               .lsu_dtlb_tag_rd_e       (lsu_dtlb_tag_rd_e),
+               .lsu_dtlb_data_rd_e      (lsu_dtlb_data_rd_e),
+               .lsu_dtlb_dmp_vld_e      (lsu_dtlb_dmp_vld_e),
+               .lsu_dtlb_dmp_all_e      (lsu_dtlb_dmp_all_e),
+               .lsu_dtlb_rwindex_vld_e  (lsu_dtlb_rwindex_vld_e),
+               .lsu_dtlb_invalid_all_l_m(lsu_dtlb_invalid_all_l_m),
+               .lsu_tlu_tlb_ld_inst_m   (lsu_tlu_tlb_ld_inst_m),
+               .lsu_tlu_tlb_st_inst_m   (lsu_tlu_tlb_st_inst_m),
+               .lsu_tlu_tlb_access_tid_m(lsu_tlu_tlb_access_tid_m[1:0]),
+               .lsu_tlb_data_rd_vld_g   (lsu_tlb_data_rd_vld_g),
+               .lsu_tlb_st_sel_m        (lsu_tlb_st_sel_m[3:0]),
+               .lsu_va_wtchpt0_wr_en_l  (lsu_va_wtchpt0_wr_en_l),
+               .lsu_va_wtchpt1_wr_en_l  (lsu_va_wtchpt1_wr_en_l),
+               .lsu_va_wtchpt2_wr_en_l  (lsu_va_wtchpt2_wr_en_l),
+               .lsu_va_wtchpt3_wr_en_l  (lsu_va_wtchpt3_wr_en_l),
+               .thread0_m               (thread0_m),
+               .thread1_m               (thread1_m),
+               .thread2_m               (thread2_m),
+               .thread3_m               (thread3_m),
+               .lsu_dctldp_thread0_m    (lsu_dctldp_thread0_m),
+               .lsu_dctldp_thread1_m    (lsu_dctldp_thread1_m),
+               .lsu_dctldp_thread2_m    (lsu_dctldp_thread2_m),
+               .lsu_dctldp_thread3_m    (lsu_dctldp_thread3_m),
+               .thread0_g               (thread0_g),
+               .thread1_g               (thread1_g),
+               .thread2_g               (thread2_g),
+               .thread3_g               (thread3_g),
+               .lsu_tlu_nonalt_ldst_m   (lsu_tlu_nonalt_ldst_m),
+               .lsu_tlu_xslating_ldst_m (lsu_tlu_xslating_ldst_m),
+               .lsu_tlu_ctxt_sel_m      (lsu_tlu_ctxt_sel_m[2:0]),
+               .lsu_tlu_write_op_m      (lsu_tlu_write_op_m),
+               .lsu_dtlb_addr_mask_l_e  (lsu_dtlb_addr_mask_l_e),
+               .dva_din_e               (dva_din_e),
+               .lsu_diagnstc_dtagv_prty_invrt_e(lsu_diagnstc_dtagv_prty_invrt_e),
+               .lsu_ifu_asi_load        (lsu_ifu_asi_load),
+               .lsu_ifu_asi_thrid       (lsu_ifu_asi_thrid[1:0]),
+               .lsu_ifu_asi_vld         (lsu_ifu_asi_vld),
+               .lsu_quad_asi_e          (lsu_quad_asi_e),
+               .lsu_local_ldxa_sel_g    (lsu_local_ldxa_sel_g),
+               .lsu_dtag_rsel_m         (lsu_dtag_rsel_m[3:0]),
+               .lsu_tlbop_force_swo     (lsu_tlbop_force_swo),
+               .lsu_atomic_pkt2_bsel_g  (lsu_atomic_pkt2_bsel_g[2:0]),
+               .lsu_dcache_tag_perror_g (lsu_dcache_tag_perror_g),
+               .lsu_dcache_data_perror_g(lsu_dcache_data_perror_g),
+               .lsu_ifu_l2_unc_error    (lsu_ifu_l2_unc_error),
+               .lsu_ifu_l2_corr_error   (lsu_ifu_l2_corr_error),
+               .lsu_ifu_dcache_data_perror(lsu_ifu_dcache_data_perror),
+               .lsu_ifu_dcache_tag_perror(lsu_ifu_dcache_tag_perror),
+               .lsu_ifu_error_tid       (lsu_ifu_error_tid[1:0]),
+               .lsu_ifu_io_error        (lsu_ifu_io_error),
+               .lsu_tlu_squash_va_oor_m (lsu_tlu_squash_va_oor_m),
+               .lsu_squash_va_oor_m     (lsu_squash_va_oor_m),
+               .tlb_cam_hit_g           (tlb_cam_hit_g),
+               .lsu_st_hw_le_g          (lsu_st_hw_le_g),
+               .lsu_st_w_or_dbl_le_g    (lsu_st_w_or_dbl_le_g),
+               .lsu_st_x_le_g           (lsu_st_x_le_g),
+               .lsu_swap_sel_default_g  (lsu_swap_sel_default_g),
+               .lsu_swap_sel_default_byte_7_2_g(lsu_swap_sel_default_byte_7_2_g),
+               .lsu_st_rmo_m            (lsu_st_rmo_m),
+               .lsu_bst_in_pipe_m       (lsu_bst_in_pipe_m),
+               .lsu_snap_blk_st_m       (lsu_snap_blk_st_m),
+               .lsu_blk_st_m            (lsu_blk_st_m),
+               .lsu_blkst_pgnum_m       (lsu_blkst_pgnum_m[39:10]),
+               .lsu_ffu_blk_asi_e       (lsu_ffu_blk_asi_e),
+               .lsu_blk_asi_m           (lsu_blk_asi_m),
+               .lsu_nonalt_nucl_access_m(lsu_nonalt_nucl_access_m),
+               .dcache_alt_mx_sel_e     (dcache_alt_mx_sel_e),
+               .dcache_alt_mx_sel_e_bf  (dcache_alt_mx_sel_e_bf),
+               .dcache_rvld_e           (dcache_rvld_e),
+               .lsu_dc_iob_access_e     (lsu_dc_iob_access_e),
+               .lsu_ifu_ldst_miss_w     (lsu_ifu_ldst_miss_w),
+               .lsu_ifu_dc_parity_error_w2(lsu_ifu_dc_parity_error_w2),
+               .lsu_ldst_inst_vld_e     (lsu_ldst_inst_vld_e),
+               .lsu_local_ldxa_tlbrd_sel_g(lsu_local_ldxa_tlbrd_sel_g),
+               .lsu_local_diagnstc_tagrd_sel_g(lsu_local_diagnstc_tagrd_sel_g),
+               .lsu_va_wtchpt_sel_g     (lsu_va_wtchpt_sel_g),
+               .asi_state_wr_thrd       (asi_state_wr_thrd[3:0]),
+               .thread0_d               (thread0_d),
+               .thread1_d               (thread1_d),
+               .thread2_d               (thread2_d),
+               .thread3_d               (thread3_d),
+               .tlu_lsu_asi_update_g    (tlu_lsu_asi_update_g),
+               .pctxt_state_wr_thrd     (pctxt_state_wr_thrd[3:0]),
+               .sctxt_state_wr_thrd     (sctxt_state_wr_thrd[3:0]),
+               .thread_pctxt            (thread_pctxt),
+               .thread_sctxt            (thread_sctxt),
+               .thread_actxt            (thread_actxt),
+               .thread_default          (thread_default),
+               .thread0_ctxt            (thread0_ctxt),
+               .thread1_ctxt            (thread1_ctxt),
+               .thread2_ctxt            (thread2_ctxt),
+               .thread3_ctxt            (thread3_ctxt),
+               .pid_state_wr_en         (pid_state_wr_en[3:0]),
+               .thread0_e               (thread0_e),
+               .thread1_e               (thread1_e),
+               .thread2_e               (thread2_e),
+               .thread3_e               (thread3_e),
+               .dfture_tap_wr_mx_sel    (dfture_tap_wr_mx_sel),
+               .lctl_rst                (lctl_rst[3:0]),
+               .lsu_ctl_state_wr_en     (lsu_ctl_state_wr_en[3:0]),
+               .lsuctl_ctlbits_wr_en    (lsuctl_ctlbits_wr_en[3:0]),
+               .dfture_tap_rd_en        (dfture_tap_rd_en[3:0]),
+               .bist_tap_wr_en          (bist_tap_wr_en),
+               .bistctl_wr_en           (),                      // Templated
+               .bist_ctl_reg_wr_en      (bist_ctl_reg_wr_en),
+               .mrgn_tap_wr_en          (mrgn_tap_wr_en),
+               .ldiagctl_wr_en          (ldiagctl_wr_en),
+               .misc_ctl_sel_din        (misc_ctl_sel_din[3:0]),
+               .lsu_asi_sel_fmx1        (lsu_asi_sel_fmx1[2:0]),
+               .lsu_asi_sel_fmx2        (lsu_asi_sel_fmx2[2:0]),
+               .tlb_access_en0_g        (tlb_access_en0_g),
+               .tlb_access_en1_g        (tlb_access_en1_g),
+               .tlb_access_en2_g        (tlb_access_en2_g),
+               .tlb_access_en3_g        (tlb_access_en3_g),
+               .tlb_access_sel_thrd0    (tlb_access_sel_thrd0),
+               .tlb_access_sel_thrd1    (tlb_access_sel_thrd1),
+               .tlb_access_sel_thrd2    (tlb_access_sel_thrd2),
+               .tlb_access_sel_default  (tlb_access_sel_default),
+               .mrgnctl_wr_en           (mrgnctl_wr_en),
+               .hpv_priv_m              (hpv_priv_m),
+               .hpstate_en_m            (hpstate_en_m),
+               .dcache_arry_data_sel_m  (dcache_arry_data_sel_m),
+               .dtlb_bypass_m           (dtlb_bypass_m),
+               .lsu_alt_space_m         (lsu_alt_space_m),
+               .atomic_m                (atomic_m),
+               .ldst_dbl_m              (ldst_dbl_m),
+               .fp_ldst_m               (fp_ldst_m),
+               .lda_internal_m          (lda_internal_m),
+               .sta_internal_m          (sta_internal_m),
+               .cam_real_m              (cam_real_m),
+               .data_rd_vld_g           (data_rd_vld_g),
+               .tag_rd_vld_g            (tag_rd_vld_g),
+               .ldst_sz_m               (ldst_sz_m[1:0]),
+               .asi_internal_m          (asi_internal_m),
+               .rd_only_ltlb_asi_e      (rd_only_ltlb_asi_e),
+               .wr_only_ltlb_asi_e      (wr_only_ltlb_asi_e),
+               .dfill_tlb_asi_e         (dfill_tlb_asi_e),
+               .ifill_tlb_asi_e         (ifill_tlb_asi_e),
+               .nofault_asi_m           (nofault_asi_m),
+               .as_if_user_asi_m        (as_if_user_asi_m),
+               .atomic_asi_m            (atomic_asi_m),
+               .phy_use_ec_asi_m        (phy_use_ec_asi_m),
+               .phy_byp_ec_asi_m        (phy_byp_ec_asi_m),
+               .quad_asi_m              (quad_asi_m),
+               .binit_quad_asi_m        (binit_quad_asi_m),
+               .blk_asi_m               (blk_asi_m),
+               .recognized_asi_m        (recognized_asi_m),
+               .strm_asi_m              (strm_asi_m),
+               .mmu_rd_only_asi_m       (mmu_rd_only_asi_m),
+               .rd_only_asi_m           (rd_only_asi_m),
+               .wr_only_asi_m           (wr_only_asi_m),
+               .unimp_asi_m             (unimp_asi_m),
+               .va_wtchpt_cmp_en_m      (va_wtchpt_cmp_en_m),
+               .lsu_tlu_async_ttype_vld_w2(lsu_tlu_async_ttype_vld_w2),
+               .lsu_tlu_async_ttype_w2  (lsu_tlu_async_ttype_w2[6:0]),
+               .lsu_tlu_async_tid_w2    (lsu_tlu_async_tid_w2[1:0]),
+               .async_tlb_index         (async_tlb_index[5:0]),
+               .l2fill_vld_m            (l2fill_vld_m),
+               .ld_thrd_byp_mxsel_m     (ld_thrd_byp_mxsel_m[3:0]),
+               .morphed_addr_m          (morphed_addr_m[7:0]),
+               .signed_ldst_byte_m      (signed_ldst_byte_m),
+               .signed_ldst_hw_m        (signed_ldst_hw_m),
+               .signed_ldst_w_m         (signed_ldst_w_m),
+               .lsu_tlb_asi_data_perr_g (lsu_tlb_asi_data_perr_g),
+               .lsu_tlb_asi_tag_perr_g  (lsu_tlb_asi_tag_perr_g),
+               .lsu_sscan_data          (lsu_sscan_data[14:13]), // Templated
+               .lsu_ld_inst_vld_g       (lsu_ld_inst_vld_g[3:0]),
+               .lsu_dcache_rand         (lsu_dcache_rand[1:0]),
+               .lsu_encd_way_hit        (lsu_encd_way_hit[1:0]),
+               .lsu_way_hit_or          (lsu_way_hit_or),
+               .lsu_memref_m            (lsu_memref_m),
+               .lsu_flsh_inst_m         (lsu_flsh_inst_m),
+               .lsu_ifu_asi_data_en_l   (lsu_ifu_asi_data_en_l),
+               .lsu_dcache_fill_addr_e  (lsu_dcache_fill_addr_e[10:3]),
+               .lsu_dcache_fill_addr_e_err(lsu_dcache_fill_addr_e_err[10:4]),
+               .lsu_thread_g            (lsu_thread_g[3:0]),
+               .lmq_ldd_vld             (lmq_ldd_vld),
+               .lsu_bist_rsel_way_e     (lsu_bist_rsel_way_e[3:0]),
+               .lsu_dcache_fill_way_e   (lsu_dcache_fill_way_e[3:0]),
+               .lmq_ld_addr_b3          (lmq_ld_addr_b3),
+               .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[3:0]),
+               .lsu_dcfill_data_mx_sel_e(lsu_dcfill_data_mx_sel_e),
+               // Inputs
+               .se                      (se),
+               .sehold                  (sehold),
+               .rst_tri_en              (mux_drive_disable),     // Templated
+               .rclk                    (clk),                   // Templated
+               .grst_l                  (grst_l),
+               .arst_l                  (arst_l),
+               .lsu_diag_va_prty_invrt  (lsu_diag_va_prty_invrt),
+               .dva_svld_e              (dva_svld_e),
+               .dva_snp_bit_wr_en_e     (dva_snp_bit_wr_en_e[15:0]),
+               .dva_snp_addr_e          (dva_snp_addr_e[4:0]),
+               .lsu_tte_data_cp_g       (tlb_rd_tte_data[`STLB_DATA_CP]), // Templated
+               .lsu_l2fill_vld          (lsu_l2fill_vld),
+               .ld_inst_vld_e           (ifu_lsu_ld_inst_e),     // Templated
+               .st_inst_vld_e           (ifu_lsu_st_inst_e),     // Templated
+               .ifu_lsu_ldst_fp_e       (ifu_lsu_ldst_fp_e),
+               .ldst_sz_e               (ifu_lsu_ldst_size_e[1:0]), // Templated
+               .lsu_ldst_va_b12_b11_m   (lsu_ldst_va_m[12:11]),  // Templated
+               .lsu_ldst_va_b7_b0_m     (lsu_ldst_va_m[7:0]),    // Templated
+               .ifu_lsu_rd_e            (ifu_lsu_rd_e[4:0]),
+               .tlb_cam_hit             (tlb_cam_hit),
+               .ifu_tlu_sraddr_d        (ifu_tlu_sraddr_d[6:0]),
+               .ifu_tlu_wsr_inst_d      (ifu_tlu_wsr_inst_d),
+               .ifu_lsu_alt_space_d     (ifu_lsu_alt_space_d),
+               .tlu_lsu_int_ldxa_vld_w2 (tlu_lsu_int_ldxa_vld_w2),
+               .tlu_lsu_int_ld_ill_va_w2(tlu_lsu_int_ld_ill_va_w2),
+               .tlu_lsu_ldxa_tid_w2     (tlu_lsu_ldxa_tid_w2[1:0]),
+               .ifu_lsu_ldxa_data_vld_w2(ifu_lsu_ldxa_data_vld_w2),
+               .ifu_lsu_ldxa_illgl_va_w2(ifu_lsu_ldxa_illgl_va_w2),
+               .ifu_lsu_ldxa_tid_w2     (ifu_lsu_ldxa_tid_w2[1:0]),
+               .ifu_lsu_asi_rd_unc      (ifu_lsu_asi_rd_unc),
+               .tlu_lsu_tl_zero         (tlu_lsu_tl_zero[3:0]),
+               .ifu_lsu_thrid_s         (ifu_lsu_thrid_s[1:0]),
+               .ifu_lsu_ldst_dbl_e      (ifu_lsu_ldst_dbl_e),
+               .ld_stb_full_raw_w2      (ld_stb_full_raw_w2),
+               .ld_sec_active           (ld_sec_active),
+               .ifu_tlu_inst_vld_m      (ifu_tlu_inst_vld_m_bf1), // Templated
+               .lsu_l2fill_bendian_m    (lsu_l2fill_bendian_m),
+               .lmq0_l2fill_fpld        (lmq0_l2fill_fpld),
+               .lmq1_l2fill_fpld        (lmq1_l2fill_fpld),
+               .lmq2_l2fill_fpld        (lmq2_l2fill_fpld),
+               .lmq3_l2fill_fpld        (lmq3_l2fill_fpld),
+               .cache_way_hit_buf1      (cache_way_hit_buf1[3:0]),
+               .cache_hit               (cache_hit),
+               .lmq0_byp_misc_sz        (lmq0_byp_misc_sz[1:0]),
+               .lmq1_byp_misc_sz        (lmq1_byp_misc_sz[1:0]),
+               .lmq2_byp_misc_sz        (lmq2_byp_misc_sz[1:0]),
+               .lmq3_byp_misc_sz        (lmq3_byp_misc_sz[1:0]),
+               .lsu_l2fill_sign_extend_m(lsu_l2fill_sign_extend_m),
+               .lsu_l1hit_sign_extend_e (ifu_lsu_sign_ext_e),    // Templated
+               .tlu_lsu_pstate_cle      (tlu_lsu_pstate_cle[3:0]),
+               .tlu_lsu_pstate_am       (tlu_lsu_pstate_am[3:0]),
+               .tlb_pgnum               ({tlb_pgnum_buf[39:10]}), // Templated
+               .tlb_demap_nctxt         (tlu_dtlb_dmp_nctxt_g),  // Templated
+               .tlb_demap_pctxt         (tlu_dtlb_dmp_pctxt_g),  // Templated
+               .tlb_demap_sctxt         (tlu_dtlb_dmp_sctxt_g),  // Templated
+               .tlb_demap_actxt         (tlu_dtlb_dmp_actxt_g),  // Templated
+               .tlb_demap_thrid         (tlu_idtlb_dmp_thrid_g[1:0]), // Templated
+               .ifu_lsu_casa_e          (ifu_lsu_casa_e),
+               .ifu_lsu_ldstub_e        (ifu_lsu_ldstub_e),
+               .ifu_lsu_swap_e          (ifu_lsu_swap_e),
+               .lsu_atm_st_cmplt_e      (lsu_atm_st_cmplt_e),
+               .lsu_cpx_pkt_atm_st_cmplt(lsu_cpx_pkt_atm_st_cmplt),
+               .spu_lsu_ldxa_data_vld_w2(spu_lsu_ldxa_data_vld_w2),
+               .spu_lsu_ldxa_illgl_va_w2(spu_lsu_ldxa_illgl_va_w2),
+               .spu_lsu_ldxa_tid_w2     (spu_lsu_ldxa_tid_w2[1:0]),
+               .spu_lsu_stxa_ack_tid    (spu_lsu_stxa_ack_tid[1:0]),
+               .spu_lsu_stxa_ack        (spu_lsu_stxa_ack),
+               .spu_lsu_unc_error_w2    (spu_lsu_unc_error_w2),
+               .spu_lsu_int_w2          (spu_lsu_int_w2),
+               .tlu_lsu_stxa_ack        (tlu_lsu_stxa_ack),
+               .tlu_lsu_stxa_ack_tid    (tlu_lsu_stxa_ack_tid[1:0]),
+               .lsu_tlb_invert_endian_g (tlb_rd_tte_data_ie_buf), // Templated
+               .lmq0_ncache_ld          (lmq0_ncache_ld),
+               .lmq1_ncache_ld          (lmq1_ncache_ld),
+               .lmq2_ncache_ld          (lmq2_ncache_ld),
+               .lmq3_ncache_ld          (lmq3_ncache_ld),
+               .ifu_tlu_mb_inst_e       (ifu_tlu_mb_inst_e),
+               .ifu_tlu_flsh_inst_e     (ifu_tlu_flsh_inst_e),
+               .lsu_stb_empty           (lsu_stb_empty[3:0]),
+               .tlu_dtlb_tag_rd_g       (tlu_dtlb_tag_rd_g),
+               .tlu_dtlb_data_rd_g      (tlu_dtlb_data_rd_g),
+               .tlu_dtlb_dmp_vld_g      (tlu_dtlb_dmp_vld_g),
+               .tlu_dtlb_dmp_all_g      (tlu_dtlb_dmp_all_g),
+               .tlu_dtlb_rw_index_vld_g (tlu_dtlb_rw_index_vld_g),
+               .tlu_dtlb_invalidate_all_g(tlu_dtlb_invalidate_all_g),
+               .lsu_st_wr_dcache        (lsu_st_wr_dcache),
+               .tlu_lsu_asi_update_m    (tlu_lsu_asi_update_m),
+               .tlu_lsu_tid_m           (tlu_lsu_tid_m[1:0]),
+               .lsu_rd_dtag_parity_g    (lsu_rd_dtag_parity_g[3:0]),
+               .dcache_rparity_err_wb   (dcache_rparity_err_wb),
+               .lsu_diagnstc_wr_data_b0 (lsu_diagnstc_wr_data_b0),
+               .lsu_byp_ldd_oddrd_m     (lsu_byp_ldd_oddrd_m),
+               .tlu_lsu_redmode         (tlu_lsu_redmode[3:0]),
+               .tlu_lsu_redmode_rst_d1  (tlu_lsu_redmode_rst_d1[3:0]),
+               .dva_vld_m               (dva_vld_m[3:0]),
+               .lsu_dfill_tid_e         (dfq_tid[1:0]),          // Templated
+               .ifu_lsu_asi_ack         (ifu_lsu_asi_ack),
+               .lsu_intrpt_cmplt        (lsu_intrpt_cmplt[3:0]),
+               .lsu_iobrdge_tap_rq_type_b8(lsu_iobrdge_tap_rq_type[8:8]), // Templated
+               .lsu_iobrdge_tap_rq_type_b6_b3(lsu_iobrdge_tap_rq_type[6:3]), // Templated
+               .lsu_iobrdge_tap_rq_type_b1_b0(lsu_iobrdge_tap_rq_type[1:0]), // Templated
+               .lsu_iobrdge_fwd_pkt_vld (lsu_iobrdge_fwd_pkt_vld),
+               .lsu_cpx_ld_dtag_perror_e(lsu_cpx_ld_dtag_perror_e),
+               .lsu_cpx_ld_dcache_perror_e(lsu_cpx_ld_dcache_perror_e),
+               .lsu_cpx_pkt_ld_err      (lsu_cpx_pkt_ld_err[1:0]),
+               .ifu_lsu_nceen           (ifu_lsu_nceen[3:0]),
+               .tlu_lsu_ldxa_async_data_vld(tlu_lsu_ldxa_async_data_vld),
+               .tlu_lsu_hpv_priv        (tlu_lsu_hpv_priv[3:0]),
+               .tlu_lsu_hpstate_en      (tlu_lsu_hpstate_en[3:0]),
+               .ifu_lsu_memref_d        (ifu_lsu_memref_d),
+               .ifu_lsu_pref_inst_e     (ifu_lsu_pref_inst_e),
+               .lsu_pref_pcx_req        (lsu_pref_pcx_req),
+               .lsu_cpx_pkt_prefetch2   (lsu_cpx_pkt_prefetch2),
+               .lsu_ld_pcx_rq_sel_d2    (lsu_ld_pcx_rq_sel_d2[3:0]),
+               .lsu_pcx_req_squash_d1   (lsu_pcx_req_squash_d1),
+               .lsu_bld_helper_cmplt_m  (lsu_bld_helper_cmplt_m),
+               .lsu_bld_cnt_m           (lsu_bld_cnt_m[2:0]),
+               .lsu_bld_reset           (lsu_bld_reset),
+               .ffu_lsu_blk_st_e        (ffu_lsu_blk_st_e),
+               .lsu_stb_rmo_st_issue    (lsu_stb_rmo_st_issue[3:0]),
+               .lsu_cpx_rmo_st_ack      (lsu_cpx_rmo_st_ack[3:0]),
+               .lsu_dfq_flsh_cmplt      (lsu_dfq_flsh_cmplt[3:0]),
+               .stb_cam_hit             (stb_cam_hit_bf1),       // Templated
+               .ifu_tlu_flush_m         (ifu_tlu_flush_m),
+               .ctu_sscan_tid           (ctu_sscan_tid[3:0]),
+               .tte_data_perror_unc     (tte_data_perror_unc),
+               .asi_tte_data_perror     (asi_tte_data_perror),
+               .asi_tte_tag_perror      (asi_tte_tag_perror),
+               .tlu_dtlb_rw_index_g     (tlu_dtlb_rw_index_g[5:0]),
+               .lsu_local_early_flush_g (lsu_local_early_flush_g),
+               .lsu_dfq_vld             (lsu_dfq_vld),
+               .gdbginit_l              (gdbginit_l),
+               .dc_direct_map           (dc_direct_map),
+               .asi_d                   (asi_d[7:0]),
+               .lsu_dctl_asi_state_m    (lsu_dctl_asi_state_m[7:0]),
+               .lsu_ldst_va_g           (lsu_ldst_va_g[7:0]),
+               .lsu_ifu_err_addr_b39    (lsu_ifu_err_addr[39]),  // Templated
+               .lsu_dp_ctl_reg0         (lsu_dp_ctl_reg0[5:0]),
+               .lsu_dp_ctl_reg1         (lsu_dp_ctl_reg1[5:0]),
+               .lsu_dp_ctl_reg2         (lsu_dp_ctl_reg2[5:0]),
+               .lsu_dp_ctl_reg3         (lsu_dp_ctl_reg3[5:0]),
+               .ldd_in_dfq_out          (ldd_in_dfq_out),
+               .dcache_iob_addr_e       (dcache_iob_addr_e[7:0]),
+               .mbist_dcache_index      (mbist_dcache_index[6:0]),
+               .mbist_dcache_word       (mbist_dcache_word),
+               .lsu_diagnstc_wr_addr_e  (lsu_diagnstc_wr_addr_e[10:0]),
+               .st_dcfill_addr          (st_dcfill_addr[10:0]),
+               .lsu_dfq_ld_vld          (lsu_dfq_ld_vld),
+               .lsu_dfq_st_vld          (lsu_dfq_st_vld),
+               .lmq0_ldd_vld            (lmq0_ldd_vld),
+               .lmq1_ldd_vld            (lmq1_ldd_vld),
+               .lmq2_ldd_vld            (lmq2_ldd_vld),
+               .lmq3_ldd_vld            (lmq3_ldd_vld),
+               .lsu_dfq_byp_tid         (lsu_dfq_byp_tid[1:0]),
+               .dfq_byp_ff_en           (dfq_byp_ff_en),
+               .lsu_dcache_iob_way_e    (lsu_dcache_iob_way_e[1:0]),
+               .mbist_dcache_way        (mbist_dcache_way[1:0]),
+               .lsu_diagnstc_wr_way_e   (lsu_diagnstc_wr_way_e[1:0]),
+               .lsu_st_way_e            (lsu_st_way_e[1:0]),
+               .lmq0_pcx_pkt_way        (lmq0_pcx_pkt_way[1:0]),
+               .lmq1_pcx_pkt_way        (lmq1_pcx_pkt_way[1:0]),
+               .lmq2_pcx_pkt_way        (lmq2_pcx_pkt_way[1:0]),
+               .lmq3_pcx_pkt_way        (lmq3_pcx_pkt_way[1:0]),
+               .lmq0_ld_rq_type         (lmq0_ld_rq_type[2:0]),
+               .lmq1_ld_rq_type         (lmq1_ld_rq_type[2:0]),
+               .lmq2_ld_rq_type         (lmq2_ld_rq_type[2:0]),
+               .lmq3_ld_rq_type         (lmq3_ld_rq_type[2:0]),
+               .lmq0_pcx_pkt_addr       (lmq0_pcx_pkt_addr[10:0]),
+               .lmq1_pcx_pkt_addr       (lmq1_pcx_pkt_addr[10:0]),
+               .lmq2_pcx_pkt_addr       (lmq2_pcx_pkt_addr[10:0]),
+               .lmq3_pcx_pkt_addr       (lmq3_pcx_pkt_addr[10:0]),
+               .lsu_ttype_vld_m2        (lsu_ttype_vld_m2_bf1),  // Templated
+               .tlu_early_flush_pipe2_w (tlu_early_flush_pipe2_w),
+               .lsu_st_dcfill_size_e    (lsu_st_dcfill_size_e[1:0]),
+               .mbist_dcache_write      (mbist_dcache_write),
+               .mbist_dcache_read       (mbist_dcache_read));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*lsu_dcdp AUTO_TEMPLATE (
+           .dcache_alt_mx_sel_e  (dcache_alt_mx_sel_e_bf),
+           .rst_tri_en           (mux_drive_disable),
+           .rclk                 (clk));
+*/
+
+lsu_dcdp dcdp ( 
+               .so                      (scan0_2),
+               .si                      (scan0_1),
+                /*AUTOINST*/
+               // Outputs
+               .dcache_rdata_wb_buf     (dcache_rdata_wb_buf[63:0]),
+               .mbist_dcache_data_in    (mbist_dcache_data_in[71:0]),
+               .lsu_exu_dfill_data_w2   (lsu_exu_dfill_data_w2[63:0]),
+               .lsu_ffu_ld_data         (lsu_ffu_ld_data[63:0]),
+               .stb_rdata_ramc_buf      (stb_rdata_ramc_buf[14:9]),
+               // Inputs
+               .rclk                    (clk),                   // Templated
+               .se                      (se),
+               .rst_tri_en              (mux_drive_disable),     // Templated
+               .dcache_rdata_wb         (dcache_rdata_wb[63:0]),
+               .dcache_rparity_wb       (dcache_rparity_wb[7:0]),
+               .dcache_rdata_msb_w0_m   (dcache_rdata_msb_w0_m[7:0]),
+               .dcache_rdata_msb_w1_m   (dcache_rdata_msb_w1_m[7:0]),
+               .dcache_rdata_msb_w2_m   (dcache_rdata_msb_w2_m[7:0]),
+               .dcache_rdata_msb_w3_m   (dcache_rdata_msb_w3_m[7:0]),
+               .lsu_bist_rsel_way_e     (lsu_bist_rsel_way_e[3:0]),
+               .dcache_alt_mx_sel_e     (dcache_alt_mx_sel_e_bf), // Templated
+               .cache_way_hit_buf2      (cache_way_hit_buf2[3:0]),
+               .morphed_addr_m          (morphed_addr_m[7:0]),
+               .signed_ldst_byte_m      (signed_ldst_byte_m),
+               .signed_ldst_hw_m        (signed_ldst_hw_m),
+               .signed_ldst_w_m         (signed_ldst_w_m),
+               .merge7_sel_byte0_m      (merge7_sel_byte0_m),
+               .merge7_sel_byte7_m      (merge7_sel_byte7_m),
+               .merge6_sel_byte1_m      (merge6_sel_byte1_m),
+               .merge6_sel_byte6_m      (merge6_sel_byte6_m),
+               .merge5_sel_byte2_m      (merge5_sel_byte2_m),
+               .merge5_sel_byte5_m      (merge5_sel_byte5_m),
+               .merge4_sel_byte3_m      (merge4_sel_byte3_m),
+               .merge4_sel_byte4_m      (merge4_sel_byte4_m),
+               .merge3_sel_byte0_m      (merge3_sel_byte0_m),
+               .merge3_sel_byte3_m      (merge3_sel_byte3_m),
+               .merge3_sel_byte4_m      (merge3_sel_byte4_m),
+               .merge3_sel_byte7_default_m(merge3_sel_byte7_default_m),
+               .merge3_sel_byte_m       (merge3_sel_byte_m),
+               .merge2_sel_byte1_m      (merge2_sel_byte1_m),
+               .merge2_sel_byte2_m      (merge2_sel_byte2_m),
+               .merge2_sel_byte5_m      (merge2_sel_byte5_m),
+               .merge2_sel_byte6_default_m(merge2_sel_byte6_default_m),
+               .merge2_sel_byte_m       (merge2_sel_byte_m),
+               .merge0_sel_byte0_m      (merge0_sel_byte0_m),
+               .merge0_sel_byte1_m      (merge0_sel_byte1_m),
+               .merge0_sel_byte2_m      (merge0_sel_byte2_m),
+               .merge0_sel_byte3_default_m(merge0_sel_byte3_default_m),
+               .merge0_sel_byte4_m      (merge0_sel_byte4_m),
+               .merge0_sel_byte5_m      (merge0_sel_byte5_m),
+               .merge0_sel_byte6_m      (merge0_sel_byte6_m),
+               .merge0_sel_byte7_default_m(merge0_sel_byte7_default_m),
+               .merge1_sel_byte0_m      (merge1_sel_byte0_m),
+               .merge1_sel_byte1_m      (merge1_sel_byte1_m),
+               .merge1_sel_byte2_m      (merge1_sel_byte2_m),
+               .merge1_sel_byte3_default_m(merge1_sel_byte3_default_m),
+               .merge1_sel_byte4_m      (merge1_sel_byte4_m),
+               .merge1_sel_byte5_m      (merge1_sel_byte5_m),
+               .merge1_sel_byte6_m      (merge1_sel_byte6_m),
+               .merge1_sel_byte7_default_m(merge1_sel_byte7_default_m),
+               .merge0_sel_byte_1h_m    (merge0_sel_byte_1h_m),
+               .merge1_sel_byte_1h_m    (merge1_sel_byte_1h_m),
+               .merge1_sel_byte_2h_m    (merge1_sel_byte_2h_m),
+               .stb_rdata_ramc          (stb_rdata_ramc[14:9]));
+
+/*
+bw_r_tlb  AUTO_TEMPLATE (
+                 .rst_tri_en            (mem_write_disable),
+                 .rclk                  (clk),
+                 .adj                   (lsu_dtlb_mrgn[7:0]),   
+                 .cache_set_vld         (dva_vld_m[3:0]),        
+                 .grst_l                (1'b1), // hard reset not to be used 
+                 .rst_soft_l            (lsu_dtlb_invalid_all_l_m), 
+                 .hold              	(sehold),                      
+                 .tlb_addr_mask_l       (lsu_dtlb_addr_mask_l_e), 
+                 .tlb_bypass            (lsu_dtlb_bypass_e),     
+                 .tlb_bypass_va         (exu_lsu_ldst_va_e[12:10]), 
+                 .tlb_cam_pid           (lsu_dtlb_cam_pid_e[2:0]), 
+                 //.tlb_cam_real          (lsu_dtlb_cam_real_e),   
+                 .tlb_cam_vld           (tlb_ldst_cam_vld),      
+                 .tlb_demap             (lsu_dtlb_dmp_vld_e),    
+                 .tlb_demap_all         (lsu_dtlb_dmp_all_e),    
+                 .tlb_demap_auto        (tlu_dtlb_dmp_actxt_g),  
+                 //.tlb_demap_ctxt        (tlu_dtlb_dmp_by_ctxt_g), 
+                 .tlb_demap_key         (tlu_idtlb_dmp_key_g[40:0]), 
+                 .tlb_rd_data_vld       (lsu_dtlb_data_rd_e),    
+                 .tlb_rd_tag_vld        (lsu_dtlb_tag_rd_e),     
+                 .tlb_rw_index          (tlu_dtlb_rw_index_g[5:0]), 
+                 .tlb_rw_index_vld      (lsu_dtlb_rwindex_vld_e), 
+                 .tlb_wr_tte_data       (tlu_dtlb_tte_data_w2[42:0]), 
+                 .tlb_wr_tte_tag        (tlu_dtlb_tte_tag_w2[58:0]), 
+                 .tlb_wr_vld            (lsu_dtlb_wr_vld_e),     
+                 .cache_ptag_w0   ({dtag_rdata_w0_m[28:0], lsu_ldst_va_m[10]}), 
+                 .cache_ptag_w1   ({dtag_rdata_w1_m[28:0], lsu_ldst_va_m[10]}),  
+                 .cache_ptag_w2   ({dtag_rdata_w2_m[28:0], lsu_ldst_va_m[10]}),  
+                 .cache_ptag_w3   ({dtag_rdata_w3_m[28:0], lsu_ldst_va_m[10]}));     
+*/
+
+bw_r_tlb_fpga dtlb  (
+                .so                     (short_scan0_4),
+                .si                     (short_scan0_3),
+          .tlb_cam_key   ( {exu_lsu_ldst_va_e[47:28], 1'b1, 
+                            exu_lsu_ldst_va_e[27:22], 1'b1, 
+			                      exu_lsu_ldst_va_e[21:16], 1'b1, 
+                            exu_lsu_ldst_va_e[15:13], 1'b1, 
+                            //1'b1,
+			                      lsu_dtlb_cam_real_e,
+			                      lsu_dtlb_cam_real_e}
+                          ), 
+
+                 /*AUTOINST*/
+                // Outputs
+                .tlb_rd_tte_tag         (tlb_rd_tte_tag[58:0]),
+                .tlb_rd_tte_data        (tlb_rd_tte_data[42:0]),
+                .tlb_pgnum              (tlb_pgnum[39:10]),
+                .tlb_pgnum_crit         (tlb_pgnum_crit[39:10]),
+                .tlb_cam_hit            (tlb_cam_hit),
+                .cache_way_hit          (cache_way_hit[3:0]),
+                .cache_hit              (cache_hit),
+                // Inputs
+                .tlb_cam_vld            (tlb_ldst_cam_vld),      // Templated
+                .tlb_cam_pid            (lsu_dtlb_cam_pid_e[2:0]), // Templated
+                .tlb_demap_key          (tlu_idtlb_dmp_key_g[40:0]), // Templated
+                .tlb_addr_mask_l        (lsu_dtlb_addr_mask_l_e), // Templated
+                .tlb_ctxt               (tlb_ctxt[12:0]),
+                .tlb_wr_vld             (lsu_dtlb_wr_vld_e),     // Templated
+                .tlb_wr_tte_tag         (tlu_dtlb_tte_tag_w2[58:0]), // Templated
+                .tlb_wr_tte_data        (tlu_dtlb_tte_data_w2[42:0]), // Templated
+                .tlb_rd_tag_vld         (lsu_dtlb_tag_rd_e),     // Templated
+                .tlb_rd_data_vld        (lsu_dtlb_data_rd_e),    // Templated
+                .tlb_rw_index           (tlu_dtlb_rw_index_g[5:0]), // Templated
+                .tlb_rw_index_vld       (lsu_dtlb_rwindex_vld_e), // Templated
+                .tlb_demap              (lsu_dtlb_dmp_vld_e),    // Templated
+                .tlb_demap_auto         (tlu_dtlb_dmp_actxt_g),  // Templated
+                .tlb_demap_all          (lsu_dtlb_dmp_all_e),    // Templated
+                .cache_ptag_w0          ({dtag_rdata_w0_m[28:0], lsu_ldst_va_m[10]}), // Templated
+                .cache_ptag_w1          ({dtag_rdata_w1_m[28:0], lsu_ldst_va_m[10]}), // Templated
+                .cache_ptag_w2          ({dtag_rdata_w2_m[28:0], lsu_ldst_va_m[10]}), // Templated
+                .cache_ptag_w3          ({dtag_rdata_w3_m[28:0], lsu_ldst_va_m[10]}), // Templated
+                .cache_set_vld          (dva_vld_m[3:0]),        // Templated
+                .tlb_bypass_va          (exu_lsu_ldst_va_e[12:10]), // Templated
+                .tlb_bypass             (lsu_dtlb_bypass_e),     // Templated
+                .se                     (se),
+                .hold                   (sehold),                // Templated
+                .adj                    (lsu_dtlb_mrgn[7:0]),    // Templated
+                .arst_l                 (arst_l),
+                .rst_soft_l             (lsu_dtlb_invalid_all_l_m), // Templated
+                .rclk                   (clk),                   // Templated
+                .rst_tri_en             (mem_write_disable));     // Templated
+/*
+lsu_stb_rwctl  AUTO_TEMPLATE  (
+                         .rst_tri_en              (mux_drive_disable),
+                         .rclk             (clk),
+                         .stb_cam_hit      (stb_cam_hit_bf1),
+ 			                   .lsu_st_ack_rq_stb(4'b0000),
+                         .ffu_lsu_blk_st_tid_m (ffu_lsu_data[77:76]),
+                         .ld_inst_vld_e (ifu_lsu_ld_inst_e),     
+                         .ldst_sz_e     (ifu_lsu_ldst_size_e[1:0]), 
+                         .st_inst_vld_e (ifu_lsu_st_inst_e),     
+                         .stb_rdata_ramc_b8t0 (stb_rdata_ramc[8:0]),     
+                         .tlb_pgnum_b39_g(tlb_pgnum_buf[39]));        
+*/ 
+`ifdef FPGA_SYN_1THREAD
+   
+lsu_stb_rwctl stb_rwctl (
+                         .so            (short_scan1_4),
+                         .si            (short_scan1_3),
+                         .stb_wdata_ramd_b75_b64(stb_wdata_ramd_b75_b64[75:64]),                    
+                  		   .stb_ldst_byte_msk_min	(stb_ldst_byte_msk_min[7:0]),
+                         /*AUTOINST*/
+                         // Outputs
+                         .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                         .stb_cam_wr_no_ivld_m(stb_cam_wr_no_ivld_m),
+                         .ld_rawp_st_ced_w2(ld_rawp_st_ced_w2),
+                         .stb_data_wr_ptr(stb_data_wr_ptr[4:0]),
+                         .stb_data_wptr_vld(stb_data_wptr_vld),
+                         .stb_data_rd_ptr(stb_data_rd_ptr[4:0]),
+                         .stb_data_rptr_vld(stb_data_rptr_vld),
+                         .stb_cam_cm_tid(stb_cam_cm_tid[1:0]),
+                         .stb_ldst_byte_msk(stb_ldst_byte_msk[7:0]),
+                         .stb_cam_rw_ptr(stb_cam_rw_ptr[4:0]),
+                         .stb_cam_wptr_vld(stb_cam_wptr_vld),
+                         .stb_cam_rptr_vld(stb_cam_rptr_vld),
+                         .lsu_st_sz_bhww_m(lsu_st_sz_bhww_m),
+                         .lsu_st_sz_dw_m(lsu_st_sz_dw_m),
+                         .lsu_st_sz_bhw_m(lsu_st_sz_bhw_m),
+                         .lsu_st_sz_wdw_m(lsu_st_sz_wdw_m),
+                         .lsu_st_sz_b_m (lsu_st_sz_b_m),
+                         .lsu_st_sz_w_m (lsu_st_sz_w_m),
+                         .lsu_st_sz_hw_m(lsu_st_sz_hw_m),
+                         .lsu_st_sz_hww_m(lsu_st_sz_hww_m),
+                         .ld_rawp_st_ackid_w2(ld_rawp_st_ackid_w2[2:0]),
+                         .stb_flush_st_g(stb_flush_st_g[3:0]),
+                         .stb_cam_wvld_m(stb_cam_wvld_m[3:0]),
+                         .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                         .lsu_stb_data_early_sel_e(lsu_stb_data_early_sel_e[3:0]),
+                         .lsu_stb_data_final_sel_m(lsu_stb_data_final_sel_m),
+                         .lsu_ldquad_inst_m(lsu_ldquad_inst_m),
+                         .stb_thrd_en_g (stb_thrd_en_g[3:0]),
+                         .flsh_inst_m   (flsh_inst_m),
+                         .lsu_stb_va_m  (lsu_stb_va_m[9:3]),
+                         .lsu_stb_empty_buf(lsu_stb_empty_buf[3:0]),
+                         .lsu_spu_stb_empty(lsu_spu_stb_empty[3:0]),
+                         .ifu_tlu_inst_vld_m_bf1(ifu_tlu_inst_vld_m_bf1),
+                         .ifu_tlu_inst_vld_m_bf2(ifu_tlu_inst_vld_m_bf2),
+                         .lsu_ifu_stbcnt0(lsu_ifu_stbcnt0[3:0]),
+                         .lsu_ifu_stbcnt1(lsu_ifu_stbcnt1[3:0]),
+                         .lsu_ifu_stbcnt2(lsu_ifu_stbcnt2[3:0]),
+                         .lsu_ifu_stbcnt3(lsu_ifu_stbcnt3[3:0]),
+                         .lsu_ffu_stb_full0(lsu_ffu_stb_full0),
+                         .lsu_ffu_stb_full1(lsu_ffu_stb_full1),
+                         .lsu_ffu_stb_full2(lsu_ffu_stb_full2),
+                         .lsu_ffu_stb_full3(lsu_ffu_stb_full3),
+                         // Inputs
+                         .rclk          (clk),                   // Templated
+                         .rst_tri_en    (mux_drive_disable),     // Templated
+                         .se            (se),
+                         .ld_inst_vld_e (ifu_lsu_ld_inst_e),     // Templated
+                         .ldst_sz_e     (ifu_lsu_ldst_size_e[1:0]), // Templated
+                         .st_inst_vld_e (ifu_lsu_st_inst_e),     // Templated
+                         .stb_pcx_rptr0 (stb_pcx_rptr0[2:0]),
+                         .stb_wrptr0    (stb_wrptr0[2:0]),
+                         .stb_pcx_rptr1 (stb_pcx_rptr1[2:0]),
+                         .stb_wrptr1    (stb_wrptr1[2:0]),
+                         .stb_pcx_rptr2 (stb_pcx_rptr2[2:0]),
+                         .stb_wrptr2    (stb_wrptr2[2:0]),
+                         .stb_pcx_rptr3 (stb_pcx_rptr3[2:0]),
+                         .stb_wrptr3    (stb_wrptr3[2:0]),
+                         .stb_cam_hit_ptr(stb_cam_hit_ptr[2:0]),
+                         .stb_cam_hit   (stb_cam_hit_bf1),       // Templated
+                         .lsu_ldst_va_m (lsu_ldst_va_m[9:0]),
+                         .sta_internal_m(sta_internal_m),
+                         .ifu_tlu_thrid_e(ifu_tlu_thrid_e[1:0]),
+                         .tlu_exu_early_flush_pipe_w(tlu_exu_early_flush_pipe_w),
+                         .lsu_ttype_vld_m2(lsu_ttype_vld_m2),
+                         .ifu_lsu_flush_w(ifu_lsu_flush_w),
+                         .lsu_defr_trp_taken_g(lsu_defr_trp_taken_g),
+                         .ifu_lsu_casa_e(ifu_lsu_casa_e),
+                         .ifu_lsu_ldstub_e(ifu_lsu_ldstub_e),
+                         .ifu_lsu_swap_e(ifu_lsu_swap_e),
+                         .ifu_lsu_ldst_dbl_e(ifu_lsu_ldst_dbl_e),
+                         .stb_state_ced0(stb_state_ced0[7:0]),
+                         .stb_state_ced1(stb_state_ced1[7:0]),
+                         .stb_state_ced2(stb_state_ced2[7:0]),
+                         .stb_state_ced3(stb_state_ced3[7:0]),
+                         .stb_ld_full_raw(stb_ld_full_raw[7:0]),
+                         .stb_ld_partial_raw(stb_ld_partial_raw[7:0]),
+                         .stb_wrptr0_prev(stb_wrptr0_prev[2:0]),
+                         .stb_wrptr1_prev(stb_wrptr1_prev[2:0]),
+                         .stb_wrptr2_prev(stb_wrptr2_prev[2:0]),
+                         .stb_wrptr3_prev(stb_wrptr3_prev[2:0]),
+                         .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+                         .ifu_lsu_ldst_fp_e(ifu_lsu_ldst_fp_e),
+                         .lsu_quad_asi_e(lsu_quad_asi_e),
+                         .lsu_st_rmo_m  (lsu_st_rmo_m),
+                         .lsu_bst_in_pipe_m(lsu_bst_in_pipe_m),
+                         .ffu_lsu_kill_fst_w(ffu_lsu_kill_fst_w),
+                         .ffu_lsu_blk_st_e(ffu_lsu_blk_st_e),
+                         .ffu_lsu_blk_st_tid_m(ffu_lsu_data[77:76]), // Templated
+                         .ffu_lsu_blk_st_va_e(ffu_lsu_blk_st_va_e[5:3]),
+                         .lsu_snap_blk_st_m(lsu_snap_blk_st_m),
+                         .tlb_pgnum_b39_g(tlb_pgnum_buf[39]),    // Templated
+                         .lsu_stb_empty ({3'b000, lsu_stb_empty[0]}),
+                         .ifu_tlu_flsh_inst_e(ifu_tlu_flsh_inst_e),
+                         .stb_cam_mhit  (stb_cam_mhit),
+                         .ifu_tlu_inst_vld_m(ifu_tlu_inst_vld_m),
+                         .lsu_st_pcx_rq_pick(lsu_st_pcx_rq_pick[3:0]),
+                         .lsu_st_pcx_rq_vld(lsu_st_pcx_rq_vld),
+                         .stb_rdata_ramc_b8t0(stb_rdata_ramc[8:0]), // Templated
+                         .lsu_stbcnt0   (lsu_stbcnt0[3:0]),
+                         .lsu_stbcnt1   (lsu_stbcnt1[3:0]),
+                         .lsu_stbcnt2   (lsu_stbcnt2[3:0]),
+                         .lsu_stbcnt3   (lsu_stbcnt3[3:0]));
+`else
+   
+lsu_stb_rwctl stb_rwctl (
+                         .so            (short_scan1_4),
+                         .si            (short_scan1_3),
+                         .stb_wdata_ramd_b75_b64(stb_wdata_ramd_b75_b64[75:64]),                    
+                  		   .stb_ldst_byte_msk_min	(stb_ldst_byte_msk_min[7:0]),
+                         /*AUTOINST*/
+                         // Outputs
+                         .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                         .stb_cam_wr_no_ivld_m(stb_cam_wr_no_ivld_m),
+                         .ld_rawp_st_ced_w2(ld_rawp_st_ced_w2),
+                         .stb_data_wr_ptr(stb_data_wr_ptr[4:0]),
+                         .stb_data_wptr_vld(stb_data_wptr_vld),
+                         .stb_data_rd_ptr(stb_data_rd_ptr[4:0]),
+                         .stb_data_rptr_vld(stb_data_rptr_vld),
+                         .stb_cam_cm_tid(stb_cam_cm_tid[1:0]),
+                         .stb_ldst_byte_msk(stb_ldst_byte_msk[7:0]),
+                         .stb_cam_rw_ptr(stb_cam_rw_ptr[4:0]),
+                         .stb_cam_wptr_vld(stb_cam_wptr_vld),
+                         .stb_cam_rptr_vld(stb_cam_rptr_vld),
+                         .lsu_st_sz_bhww_m(lsu_st_sz_bhww_m),
+                         .lsu_st_sz_dw_m(lsu_st_sz_dw_m),
+                         .lsu_st_sz_bhw_m(lsu_st_sz_bhw_m),
+                         .lsu_st_sz_wdw_m(lsu_st_sz_wdw_m),
+                         .lsu_st_sz_b_m (lsu_st_sz_b_m),
+                         .lsu_st_sz_w_m (lsu_st_sz_w_m),
+                         .lsu_st_sz_hw_m(lsu_st_sz_hw_m),
+                         .lsu_st_sz_hww_m(lsu_st_sz_hww_m),
+                         .ld_rawp_st_ackid_w2(ld_rawp_st_ackid_w2[2:0]),
+                         .stb_flush_st_g(stb_flush_st_g[3:0]),
+                         .stb_cam_wvld_m(stb_cam_wvld_m[3:0]),
+                         .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                         .lsu_stb_data_early_sel_e(lsu_stb_data_early_sel_e[3:0]),
+                         .lsu_stb_data_final_sel_m(lsu_stb_data_final_sel_m),
+                         .lsu_ldquad_inst_m(lsu_ldquad_inst_m),
+                         .stb_thrd_en_g (stb_thrd_en_g[3:0]),
+                         .flsh_inst_m   (flsh_inst_m),
+                         .lsu_stb_va_m  (lsu_stb_va_m[9:3]),
+                         .lsu_stb_empty_buf(lsu_stb_empty_buf[3:0]),
+                         .lsu_spu_stb_empty(lsu_spu_stb_empty[3:0]),
+                         .ifu_tlu_inst_vld_m_bf1(ifu_tlu_inst_vld_m_bf1),
+                         .ifu_tlu_inst_vld_m_bf2(ifu_tlu_inst_vld_m_bf2),
+                         .lsu_ifu_stbcnt0(lsu_ifu_stbcnt0[3:0]),
+                         .lsu_ifu_stbcnt1(lsu_ifu_stbcnt1[3:0]),
+                         .lsu_ifu_stbcnt2(lsu_ifu_stbcnt2[3:0]),
+                         .lsu_ifu_stbcnt3(lsu_ifu_stbcnt3[3:0]),
+                         .lsu_ffu_stb_full0(lsu_ffu_stb_full0),
+                         .lsu_ffu_stb_full1(lsu_ffu_stb_full1),
+                         .lsu_ffu_stb_full2(lsu_ffu_stb_full2),
+                         .lsu_ffu_stb_full3(lsu_ffu_stb_full3),
+                         // Inputs
+                         .rclk          (clk),                   // Templated
+                         .rst_tri_en    (mux_drive_disable),     // Templated
+                         .se            (se),
+                         .ld_inst_vld_e (ifu_lsu_ld_inst_e),     // Templated
+                         .ldst_sz_e     (ifu_lsu_ldst_size_e[1:0]), // Templated
+                         .st_inst_vld_e (ifu_lsu_st_inst_e),     // Templated
+                         .stb_pcx_rptr0 (stb_pcx_rptr0[2:0]),
+                         .stb_wrptr0    (stb_wrptr0[2:0]),
+                         .stb_pcx_rptr1 (stb_pcx_rptr1[2:0]),
+                         .stb_wrptr1    (stb_wrptr1[2:0]),
+                         .stb_pcx_rptr2 (stb_pcx_rptr2[2:0]),
+                         .stb_wrptr2    (stb_wrptr2[2:0]),
+                         .stb_pcx_rptr3 (stb_pcx_rptr3[2:0]),
+                         .stb_wrptr3    (stb_wrptr3[2:0]),
+                         .stb_cam_hit_ptr(stb_cam_hit_ptr[2:0]),
+                         .stb_cam_hit   (stb_cam_hit_bf1),       // Templated
+                         .lsu_ldst_va_m (lsu_ldst_va_m[9:0]),
+                         .sta_internal_m(sta_internal_m),
+                         .ifu_tlu_thrid_e(ifu_tlu_thrid_e[1:0]),
+                         .tlu_exu_early_flush_pipe_w(tlu_exu_early_flush_pipe_w),
+                         .lsu_ttype_vld_m2(lsu_ttype_vld_m2),
+                         .ifu_lsu_flush_w(ifu_lsu_flush_w),
+                         .lsu_defr_trp_taken_g(lsu_defr_trp_taken_g),
+                         .ifu_lsu_casa_e(ifu_lsu_casa_e),
+                         .ifu_lsu_ldstub_e(ifu_lsu_ldstub_e),
+                         .ifu_lsu_swap_e(ifu_lsu_swap_e),
+                         .ifu_lsu_ldst_dbl_e(ifu_lsu_ldst_dbl_e),
+                         .stb_state_ced0(stb_state_ced0[7:0]),
+                         .stb_state_ced1(stb_state_ced1[7:0]),
+                         .stb_state_ced2(stb_state_ced2[7:0]),
+                         .stb_state_ced3(stb_state_ced3[7:0]),
+                         .stb_ld_full_raw(stb_ld_full_raw[7:0]),
+                         .stb_ld_partial_raw(stb_ld_partial_raw[7:0]),
+                         .stb_wrptr0_prev(stb_wrptr0_prev[2:0]),
+                         .stb_wrptr1_prev(stb_wrptr1_prev[2:0]),
+                         .stb_wrptr2_prev(stb_wrptr2_prev[2:0]),
+                         .stb_wrptr3_prev(stb_wrptr3_prev[2:0]),
+                         .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+                         .ifu_lsu_ldst_fp_e(ifu_lsu_ldst_fp_e),
+                         .lsu_quad_asi_e(lsu_quad_asi_e),
+                         .lsu_st_rmo_m  (lsu_st_rmo_m),
+                         .lsu_bst_in_pipe_m(lsu_bst_in_pipe_m),
+                         .ffu_lsu_kill_fst_w(ffu_lsu_kill_fst_w),
+                         .ffu_lsu_blk_st_e(ffu_lsu_blk_st_e),
+                         .ffu_lsu_blk_st_tid_m(ffu_lsu_data[77:76]), // Templated
+                         .ffu_lsu_blk_st_va_e(ffu_lsu_blk_st_va_e[5:3]),
+                         .lsu_snap_blk_st_m(lsu_snap_blk_st_m),
+                         .tlb_pgnum_b39_g(tlb_pgnum_buf[39]),    // Templated
+                         .lsu_stb_empty (lsu_stb_empty[3:0]),
+                         .ifu_tlu_flsh_inst_e(ifu_tlu_flsh_inst_e),
+                         .stb_cam_mhit  (stb_cam_mhit),
+                         .ifu_tlu_inst_vld_m(ifu_tlu_inst_vld_m),
+                         .lsu_st_pcx_rq_pick(lsu_st_pcx_rq_pick[3:0]),
+                         .lsu_st_pcx_rq_vld(lsu_st_pcx_rq_vld),
+                         .stb_rdata_ramc_b8t0(stb_rdata_ramc[8:0]), // Templated
+                         .lsu_stbcnt0   (lsu_stbcnt0[3:0]),
+                         .lsu_stbcnt1   (lsu_stbcnt1[3:0]),
+                         .lsu_stbcnt2   (lsu_stbcnt2[3:0]),
+                         .lsu_stbcnt3   (lsu_stbcnt3[3:0]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*
+lsu_stb_rwdp AUTO_TEMPLATE (
+      .rst_tri_en           (mux_drive_disable),
+      .stb_rdata_ramd_b74     (stb_rdata_ramd[74]),
+ 			.rclk	(clk));
+*/
+
+lsu_stb_rwdp  stb_rwdp  (
+                         .so            (so0),
+                         .si            (scan0_2),
+                         .lsu_stb_st_data_g(lsu_stb_st_data_g[63:0]),
+                         /*AUTOINST*/
+                         // Outputs
+                         .stb_rdata_ramd_buf(stb_rdata_ramd_buf[69:0]),
+                         .stb_rdata_ramd_b74_buf(stb_rdata_ramd_b74_buf),
+                         // Inputs
+                         .rclk          (clk),                   // Templated
+                         .se            (se),
+                         .rst_tri_en    (mux_drive_disable),     // Templated
+                         .exu_lsu_rs3_data_e(exu_lsu_rs3_data_e[63:0]),
+                         .lsu_stb_data_early_sel_e(lsu_stb_data_early_sel_e[3:0]),
+                         .lsu_stb_data_final_sel_m(lsu_stb_data_final_sel_m),
+                         .exu_lsu_rs2_data_e(exu_lsu_rs2_data_e[63:0]),
+                         .lsu_st_sz_bhww_m(lsu_st_sz_bhww_m),
+                         .lsu_st_sz_dw_m(lsu_st_sz_dw_m),
+                         .lsu_st_sz_bhw_m(lsu_st_sz_bhw_m),
+                         .lsu_st_sz_wdw_m(lsu_st_sz_wdw_m),
+                         .lsu_st_sz_b_m (lsu_st_sz_b_m),
+                         .lsu_st_sz_w_m (lsu_st_sz_w_m),
+                         .lsu_st_sz_hw_m(lsu_st_sz_hw_m),
+                         .lsu_st_sz_hww_m(lsu_st_sz_hww_m),
+                         .ffu_lsu_data  (ffu_lsu_data[63:0]),
+                         .lsu_st_hw_le_g(lsu_st_hw_le_g),
+                         .lsu_st_w_or_dbl_le_g(lsu_st_w_or_dbl_le_g),
+                         .lsu_st_x_le_g (lsu_st_x_le_g),
+                         .lsu_swap_sel_default_g(lsu_swap_sel_default_g),
+                         .lsu_swap_sel_default_byte_7_2_g(lsu_swap_sel_default_byte_7_2_g),
+                         .stb_rdata_ramd(stb_rdata_ramd[69:0]),
+                         .stb_rdata_ramd_b74(stb_rdata_ramd[74])); // Templated
+/*
+bw_r_scm  AUTO_TEMPLATE (
+                         .rst_tri_en           (mem_write_disable),
+                         .rclk            (clk),
+                         .stb_quad_ld_cam (lsu_ldquad_inst_m),
+                         .stb_alt_wr_data (lsu_blkst_pgnum_m[39:10]),
+                         .stb_alt_wsel (lsu_blk_st_m),
+                         .stb_cam_data  (tlb_pgnum_crit[39:10]),
+                         .stb_cam_rw_tid(stb_cam_rw_ptr[4:3]));
+*/
+   
+bw_r_scm   stb_cam   (
+          .stb_camwr_data ({lsu_stb_va_m[9:3],stb_ldst_byte_msk_min[7:0]}),
+          /*AUTOINST*/
+                      // Outputs
+                      .stb_rdata_ramc   (stb_rdata_ramc[44:0]),
+                      .stb_ld_full_raw  (stb_ld_full_raw[7:0]),
+                      .stb_ld_partial_raw(stb_ld_partial_raw[7:0]),
+                      .stb_cam_hit_ptr  (stb_cam_hit_ptr[2:0]),
+                      .stb_cam_hit      (stb_cam_hit),
+                      .stb_cam_mhit     (stb_cam_mhit),
+                      // Inputs
+                      .stb_cam_data     (tlb_pgnum_crit[39:10]), // Templated
+                      .stb_alt_wr_data  (lsu_blkst_pgnum_m[39:10]), // Templated
+                      .stb_alt_wsel     (lsu_blk_st_m),          // Templated
+                      .stb_cam_vld      (stb_cam_vld),
+                      .stb_cam_cm_tid   (stb_cam_cm_tid[1:0]),
+                      .stb_cam_sqsh_msk (stb_cam_sqsh_msk[7:0]),
+                      .stb_cam_rw_ptr   (stb_cam_rw_ptr[2:0]),
+                      .stb_cam_wptr_vld (stb_cam_wptr_vld),
+                      .stb_cam_rptr_vld (stb_cam_rptr_vld),
+                      .stb_cam_rw_tid   (stb_cam_rw_ptr[4:3]),   // Templated
+                      .stb_quad_ld_cam  (lsu_ldquad_inst_m),     // Templated
+                      .rclk             (clk),                   // Templated
+                      .rst_tri_en       (mem_write_disable));     // Templated
+/*bw_r_rf32x80 AUTO_TEMPLATE (
+                         .rst_tri_en           (mem_write_disable),
+                         .dout   (stb_rdata_ramd[79:0]),
+                         .wr_adr (stb_data_wr_ptr[4:0]),
+                         .wr_en  (stb_data_wptr_vld),
+                         .nib_wr_en ({20{1'b1}}),
+                         .rd_adr (stb_data_rd_ptr[4:0]),
+                         .rd_en  (stb_data_rptr_vld),
+                         //.sehold (),
+                         .reset_l(arst_l),
+                         .rclk    (clk));
+ 
+*/    
+bw_r_rf32x80 stb_data(
+                      .din ({4'b0, stb_wdata_ramd_b75_b64[75:64], lsu_stb_st_data_g[63:0]}),  
+                      .so               (short_scan0_5),
+                      .si               (short_scan0_4),
+                      /*AUTOINST*/
+                      // Outputs
+                      .dout             (stb_rdata_ramd[79:0]),  // Templated
+                      // Inputs
+                      .rd_adr           (stb_data_rd_ptr[4:0]),  // Templated
+                      .rd_en            (stb_data_rptr_vld),     // Templated
+                      .wr_en            (stb_data_wptr_vld),     // Templated
+                      .nib_wr_en        ({20{1'b1}}),            // Templated
+                      .wr_adr           (stb_data_wr_ptr[4:0]),  // Templated
+                      .rclk             (clk),                   // Templated
+                      .reset_l          (arst_l),                // Templated
+                      .rst_tri_en       (mem_write_disable),     // Templated
+                      .sehold           (sehold),
+                      .se               (se));
+/*lsu_stb_ctl AUTO_TEMPLATE (
+         .rclk             	(clk),
+	 .st_dtlb_perr_g	(lsu_st_dtlb_perr_g[@]),
+         .lsu_outstanding_rmo_st_max (lsu_outstanding_rmo_st_max[@]),
+ 	       .st_pcx_rq_kill_w2    (lsu_st_pcx_rq_kill_w2[@]),
+      	 .flshinst_rst	       (lsu_dfq_flsh_cmplt[@]),
+      	 .stb_rmo_st_issue     (lsu_stb_rmo_st_issue[@]),
+         .lsu_stb_empty        (lsu_stb_empty[@]),
+         .stb_l2bnk_addr       (stb@_l2b_addr[2:0]),
+         .stb_atm_rq_type      (stb@_atm_rq_type[2:1]),
+         .stb_wrptr            (stb_wrptr@[2:0]),
+         .stb_wrptr_prev       (stb_wrptr@_prev[2:0]),
+         .stb_state_ced_mod    (stb_state_ced@[7:0]),
+         .stb_state_vld_out    (stb_state_vld@[7:0]),
+         .stb_rd_for_pcx       (stb_rd_for_pcx[@]),
+         .stb_dfq_rptr         (stb_dfq_rptr@[2:0]),
+         .stb_pcx_rptr         (stb_pcx_rptr@[2:0]),
+         .thrd_en_g            (stb_thrd_en_g[@]),
+         .pcx_rq_for_stb       (pcx_rq_for_stb[@]),
+         .stb_crnt_ack_id      (stb@_crnt_ack_id[2:0]),
+         .lsu_stbcnt           (lsu_stbcnt@[3:0]),
+         .pcx_req_squash       (lsu_pcx_req_squash@),
+         .cpx_st_ack_tid       (cpx_st_ack_tid@),
+         .st_ack_dq_stb        (lsu_st_ack_dq_stb[@]),
+         .stb_cam_wvld_m       (stb_cam_wvld_m[@]),
+         .stb_flush_st_g       (stb_flush_st_g[@]),
+         .cpx_st_ack_tid       (cpx_st_ack_tid@),
+	       .stb_full	           (lsu_tlu_stb_full_w2[@]),
+         .tlb_pgnum_g          (tlb_pgnum_buf2[39:37]), // timing fix
+	 .stb_alt_sel	       (lsu_blk_st_m),
+	 .stb_alt_addr	       (lsu_blkst_pgnum_m[39:37]),
+         .stb_clk_en_l         (stb@_clk_en_l[7:0]),
+         .stb_non_l2bnk        (stb@_non_l2bnk),
+         .stb_state_si_0       (stb@_state_si_0[3:2]),
+         .stb_state_si_1       (stb@_state_si_1[3:2]),
+         .stb_state_si_2       (stb@_state_si_2[3:2]),
+         .stb_state_si_3       (stb@_state_si_3[3:2]),
+         .stb_state_si_4       (stb@_state_si_4[3:2]),
+         .stb_state_si_5       (stb@_state_si_5[3:2]),
+         .stb_state_si_6       (stb@_state_si_6[3:2]),
+         .stb_state_si_7       (stb@_state_si_7[3:2]),
+         .stb_state_rtype_0    (stb@_state_rtype_0[2:1]),
+         .stb_state_rtype_1    (stb@_state_rtype_1[2:1]),
+         .stb_state_rtype_2    (stb@_state_rtype_2[2:1]),
+         .stb_state_rtype_3    (stb@_state_rtype_3[2:1]),
+         .stb_state_rtype_4    (stb@_state_rtype_4[2:1]),
+         .stb_state_rtype_5    (stb@_state_rtype_5[2:1]),
+         .stb_state_rtype_6    (stb@_state_rtype_6[2:1]),
+         .stb_state_rtype_7    (stb@_state_rtype_7[2:1]),
+         .stb_state_io         (stb@_state_io[7:0]), 
+         .stb_state_rmo        (stb@_state_rmo[7:0]));
+*/
+
+/*lsu_stb_ctldp AUTO_TEMPLATE (
+         .rclk         	       (clk),
+         .stb_clk_en_l         (stb@_clk_en_l[7:0]),
+         .stb_non_l2bnk        (stb@_non_l2bnk),
+         .stb_state_si_0       (stb@_state_si_0[3:2]),
+         .stb_state_si_1       (stb@_state_si_1[3:2]),
+         .stb_state_si_2       (stb@_state_si_2[3:2]),
+         .stb_state_si_3       (stb@_state_si_3[3:2]),
+         .stb_state_si_4       (stb@_state_si_4[3:2]),
+         .stb_state_si_5       (stb@_state_si_5[3:2]),
+         .stb_state_si_6       (stb@_state_si_6[3:2]),
+         .stb_state_si_7       (stb@_state_si_7[3:2]),
+         .stb_state_rtype_0    (stb@_state_rtype_0[2:1]),
+         .stb_state_rtype_1    (stb@_state_rtype_1[2:1]),
+         .stb_state_rtype_2    (stb@_state_rtype_2[2:1]),
+         .stb_state_rtype_3    (stb@_state_rtype_3[2:1]),
+         .stb_state_rtype_4    (stb@_state_rtype_4[2:1]),
+         .stb_state_rtype_5    (stb@_state_rtype_5[2:1]),
+         .stb_state_rtype_6    (stb@_state_rtype_6[2:1]),
+         .stb_state_rtype_7    (stb@_state_rtype_7[2:1]),
+         .stb_state_io         (stb@_state_io[7:0]), 
+         .stb_state_rmo        (stb@_state_rmo[7:0]));
+*/
+   
+lsu_stb_ctl stb_ctl0  (
+                       .so              (short_scan0_6),
+                       .si              (short_scan0_5),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb0_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (stb0_crnt_ack_id[2:0]), // Templated
+                       .lsu_stb_empty   (lsu_stb_empty[0]),      // Templated
+                       .stb_l2bnk_addr  (stb0_l2b_addr[2:0]),    // Templated
+                       .stb_atm_rq_type (stb0_atm_rq_type[2:1]), // Templated
+                       .stb_wrptr       (stb_wrptr0[2:0]),       // Templated
+                       .stb_rd_for_pcx  (stb_rd_for_pcx[0]),     // Templated
+                       .stb_pcx_rptr    (stb_pcx_rptr0[2:0]),    // Templated
+                       .stb_wrptr_prev  (stb_wrptr0_prev[2:0]),  // Templated
+                       .stb_state_ced_mod(stb_state_ced0[7:0]),  // Templated
+                       .stb_state_vld_out(stb_state_vld0[7:0]),  // Templated
+                       .lsu_stbcnt      (lsu_stbcnt0[3:0]),      // Templated
+                       .stb_rmo_st_issue(lsu_stb_rmo_st_issue[0]), // Templated
+                       .stb_full        (lsu_tlu_stb_full_w2_t[0]), // Templated
+                       .st_pcx_rq_kill_w2(lsu_st_pcx_rq_kill_w2[0]), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (stb_thrd_en_g[0]),      // Templated
+                       .cpx_st_ack_tid  (cpx_st_ack_tid0),       // Templated
+                       .pcx_rq_for_stb  (pcx_rq_for_stb[0]),     // Templated
+                       .st_ack_dq_stb   (lsu_st_ack_dq_stb[0]),  // Templated
+                       .stb_flush_st_g  (stb_flush_st_g[0]),     // Templated
+                       .stb_cam_wvld_m  (stb_cam_wvld_m[0]),     // Templated
+                       .lsu_blk_st_m    (lsu_blk_st_m),
+                       .tlb_pgnum_g     (tlb_pgnum_buf2[39:37]), // Templated
+                       .pcx_req_squash  (lsu_pcx_req_squash0),   // Templated
+                       .flshinst_rst    (lsu_dfq_flsh_cmplt[0]), // Templated
+                       .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                       .flsh_inst_m     (flsh_inst_m),
+                       .stb_state_si_0  (stb0_state_si_0[3:2]),  // Templated
+                       .stb_state_si_1  (stb0_state_si_1[3:2]),  // Templated
+                       .stb_state_si_2  (stb0_state_si_2[3:2]),  // Templated
+                       .stb_state_si_3  (stb0_state_si_3[3:2]),  // Templated
+                       .stb_state_si_4  (stb0_state_si_4[3:2]),  // Templated
+                       .stb_state_si_5  (stb0_state_si_5[3:2]),  // Templated
+                       .stb_state_si_6  (stb0_state_si_6[3:2]),  // Templated
+                       .stb_state_si_7  (stb0_state_si_7[3:2]),  // Templated
+                       .stb_state_rtype_0(stb0_state_rtype_0[2:1]), // Templated
+                       .stb_state_rtype_1(stb0_state_rtype_1[2:1]), // Templated
+                       .stb_state_rtype_2(stb0_state_rtype_2[2:1]), // Templated
+                       .stb_state_rtype_3(stb0_state_rtype_3[2:1]), // Templated
+                       .stb_state_rtype_4(stb0_state_rtype_4[2:1]), // Templated
+                       .stb_state_rtype_5(stb0_state_rtype_5[2:1]), // Templated
+                       .stb_state_rtype_6(stb0_state_rtype_6[2:1]), // Templated
+                       .stb_state_rtype_7(stb0_state_rtype_7[2:1]), // Templated
+                       .stb_state_rmo   (stb0_state_rmo[7:0]),   // Templated
+                       .stb_alt_sel     (lsu_blk_st_m),          // Templated
+                       .stb_alt_addr    (lsu_blkst_pgnum_m[39:37]), // Templated
+                       .lsu_dtlb_bypass_e(lsu_dtlb_bypass_e),
+                       .tlb_cam_hit     (tlb_cam_hit),
+                       .st_dtlb_perr_g  (lsu_st_dtlb_perr_g[0]), // Templated
+                       .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[0])); // Templated
+lsu_stb_ctldp stb_ctldp0  (
+                           .so          (short_scan1_5),
+                           .si          (short_scan1_4),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(stb0_state_si_0[3:2]), // Templated
+                           .stb_state_si_1(stb0_state_si_1[3:2]), // Templated
+                           .stb_state_si_2(stb0_state_si_2[3:2]), // Templated
+                           .stb_state_si_3(stb0_state_si_3[3:2]), // Templated
+                           .stb_state_si_4(stb0_state_si_4[3:2]), // Templated
+                           .stb_state_si_5(stb0_state_si_5[3:2]), // Templated
+                           .stb_state_si_6(stb0_state_si_6[3:2]), // Templated
+                           .stb_state_si_7(stb0_state_si_7[3:2]), // Templated
+                           .stb_state_rtype_0(stb0_state_rtype_0[2:1]), // Templated
+                           .stb_state_rtype_1(stb0_state_rtype_1[2:1]), // Templated
+                           .stb_state_rtype_2(stb0_state_rtype_2[2:1]), // Templated
+                           .stb_state_rtype_3(stb0_state_rtype_3[2:1]), // Templated
+                           .stb_state_rtype_4(stb0_state_rtype_4[2:1]), // Templated
+                           .stb_state_rtype_5(stb0_state_rtype_5[2:1]), // Templated
+                           .stb_state_rtype_6(stb0_state_rtype_6[2:1]), // Templated
+                           .stb_state_rtype_7(stb0_state_rtype_7[2:1]), // Templated
+                           .stb_state_rmo(stb0_state_rmo[7:0]),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb0_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(lsu_stb_va_m[7:6]),
+                           .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                           .lsu_st_rmo_m(lsu_st_rmo_m));
+
+`ifdef FPGA_SYN_1THREAD
+   
+   
+lsu_stb_ctl stb_ctl1  (
+                       .so              (short_scan1_6),
+                       .si              (short_scan1_5),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb1_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (), // Templated
+                       .lsu_stb_empty   (),      // Templated
+                       .stb_l2bnk_addr  (),    // Templated
+                       .stb_atm_rq_type (), // Templated
+                       .stb_wrptr       (),       // Templated
+                       .stb_rd_for_pcx  (),     // Templated
+                       .stb_pcx_rptr    (),    // Templated
+                       .stb_wrptr_prev  (),  // Templated
+                       .stb_state_ced_mod(),  // Templated
+                       .stb_state_vld_out(),  // Templated
+                       .lsu_stbcnt      (),      // Templated
+                       .stb_rmo_st_issue(), // Templated
+                       .stb_full        (), // Templated
+                       .st_pcx_rq_kill_w2(), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (1'b0),      // Templated
+                       .cpx_st_ack_tid  (1'b0),       // Templated
+                       .pcx_rq_for_stb  (1'b0),     // Templated
+                       .st_ack_dq_stb   (1'b0),  // Templated
+                       .stb_flush_st_g  (1'b0),     // Templated
+                       .stb_cam_wvld_m  (1'b0),     // Templated
+                       .lsu_blk_st_m    (1'b0),
+                       .tlb_pgnum_g     (3'b000), // Templated
+                       .pcx_req_squash  (1'b0),   // Templated
+                       .flshinst_rst    (1'b0), // Templated
+                       .lsu_stbctl_flush_pipe_w(1'b0),
+                       .flsh_inst_m     (1'b0),
+                       .stb_state_si_0  (2'b00),  // Templated
+                       .stb_state_si_1  (2'b00),  // Templated
+                       .stb_state_si_2  (2'b00),  // Templated
+                       .stb_state_si_3  (2'b00),  // Templated
+                       .stb_state_si_4  (2'b00),  // Templated
+                       .stb_state_si_5  (2'b00),  // Templated
+                       .stb_state_si_6  (2'b00),  // Templated
+                       .stb_state_si_7  (2'b00),  // Templated
+                       .stb_state_rtype_0(2'b00), // Templated
+                       .stb_state_rtype_1(2'b00), // Templated
+                       .stb_state_rtype_2(2'b00), // Templated
+                       .stb_state_rtype_3(2'b00), // Templated
+                       .stb_state_rtype_4(2'b00), // Templated
+                       .stb_state_rtype_5(2'b00), // Templated
+                       .stb_state_rtype_6(2'b00), // Templated
+                       .stb_state_rtype_7(2'b00), // Templated
+                       .stb_state_rmo   (8'h00),   // Templated
+                       .stb_alt_sel     (1'b0),          // Templated
+                       .stb_alt_addr    (3'b000), // Templated
+                       .lsu_dtlb_bypass_e(1'b0),
+                       .tlb_cam_hit     (1'b0),
+                       .st_dtlb_perr_g  (1'b0), // Templated
+                       .lsu_outstanding_rmo_st_max(1'b0)); // Templated
+lsu_stb_ctldp stb_ctldp1  (
+                           .so          (short_scan0_7),
+                           .si          (short_scan0_6),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(), // Templated
+                           .stb_state_si_1(), // Templated
+                           .stb_state_si_2(), // Templated
+                           .stb_state_si_3(), // Templated
+                           .stb_state_si_4(), // Templated
+                           .stb_state_si_5(), // Templated
+                           .stb_state_si_6(), // Templated
+                           .stb_state_si_7(), // Templated
+                           .stb_state_rtype_0(), // Templated
+                           .stb_state_rtype_1(), // Templated
+                           .stb_state_rtype_2(), // Templated
+                           .stb_state_rtype_3(), // Templated
+                           .stb_state_rtype_4(), // Templated
+                           .stb_state_rtype_5(), // Templated
+                           .stb_state_rtype_6(), // Templated
+                           .stb_state_rtype_7(), // Templated
+                           .stb_state_rmo(),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb1_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(2'b00),
+                           .lsu_st_rq_type_m(2'b00),
+                           .lsu_st_rmo_m(1'b0));
+
+   lsu_stb_ctl stb_ctl2  (
+                       .so              (short_scan1_7),
+                       .si              (short_scan1_6),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb2_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (), // Templated
+                       .lsu_stb_empty   (),      // Templated
+                       .stb_l2bnk_addr  (),    // Templated
+                       .stb_atm_rq_type (), // Templated
+                       .stb_wrptr       (),       // Templated
+                       .stb_rd_for_pcx  (),     // Templated
+                       .stb_pcx_rptr    (),    // Templated
+                       .stb_wrptr_prev  (),  // Templated
+                       .stb_state_ced_mod(),  // Templated
+                       .stb_state_vld_out(),  // Templated
+                       .lsu_stbcnt      (),      // Templated
+                       .stb_rmo_st_issue(), // Templated
+                       .stb_full        (), // Templated
+                       .st_pcx_rq_kill_w2(), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (1'b0),      // Templated
+                       .cpx_st_ack_tid  (1'b0),       // Templated
+                       .pcx_rq_for_stb  (1'b0),     // Templated
+                       .st_ack_dq_stb   (1'b0),  // Templated
+                       .stb_flush_st_g  (1'b0),     // Templated
+                       .stb_cam_wvld_m  (1'b0),     // Templated
+                       .lsu_blk_st_m    (1'b0),
+                       .tlb_pgnum_g     (3'b000), // Templated
+                       .pcx_req_squash  (1'b0),   // Templated
+                       .flshinst_rst    (1'b0), // Templated
+                       .lsu_stbctl_flush_pipe_w(1'b0),
+                       .flsh_inst_m     (1'b0),
+                       .stb_state_si_0  (2'b00),  // Templated
+                       .stb_state_si_1  (2'b00),  // Templated
+                       .stb_state_si_2  (2'b00),  // Templated
+                       .stb_state_si_3  (2'b00),  // Templated
+                       .stb_state_si_4  (2'b00),  // Templated
+                       .stb_state_si_5  (2'b00),  // Templated
+                       .stb_state_si_6  (2'b00),  // Templated
+                       .stb_state_si_7  (2'b00),  // Templated
+                       .stb_state_rtype_0(2'b00), // Templated
+                       .stb_state_rtype_1(2'b00), // Templated
+                       .stb_state_rtype_2(2'b00), // Templated
+                       .stb_state_rtype_3(2'b00), // Templated
+                       .stb_state_rtype_4(2'b00), // Templated
+                       .stb_state_rtype_5(2'b00), // Templated
+                       .stb_state_rtype_6(2'b00), // Templated
+                       .stb_state_rtype_7(2'b00), // Templated
+                       .stb_state_rmo   (8'h00),   // Templated
+                       .stb_alt_sel     (1'b0),          // Templated
+                       .stb_alt_addr    (3'b000), // Templated
+                       .lsu_dtlb_bypass_e(1'b0),
+                       .tlb_cam_hit     (1'b0),
+                       .st_dtlb_perr_g  (1'b0), // Templated
+                       .lsu_outstanding_rmo_st_max(1'b0)); // Templated
+lsu_stb_ctldp stb_ctldp2  (
+                           .so          (short_scan1_8),
+                           .si          (short_scan1_7),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(), // Templated
+                           .stb_state_si_1(), // Templated
+                           .stb_state_si_2(), // Templated
+                           .stb_state_si_3(), // Templated
+                           .stb_state_si_4(), // Templated
+                           .stb_state_si_5(), // Templated
+                           .stb_state_si_6(), // Templated
+                           .stb_state_si_7(), // Templated
+                           .stb_state_rtype_0(), // Templated
+                           .stb_state_rtype_1(), // Templated
+                           .stb_state_rtype_2(), // Templated
+                           .stb_state_rtype_3(), // Templated
+                           .stb_state_rtype_4(), // Templated
+                           .stb_state_rtype_5(), // Templated
+                           .stb_state_rtype_6(), // Templated
+                           .stb_state_rtype_7(), // Templated
+                           .stb_state_rmo(),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb2_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(2'b00),
+                           .lsu_st_rq_type_m(2'b00),
+                           .lsu_st_rmo_m(1'b0));
+
+   lsu_stb_ctl stb_ctl3  (
+                       .so              (short_scan0_8),
+                       .si              (short_scan0_7),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb3_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (), // Templated
+                       .lsu_stb_empty   (),      // Templated
+                       .stb_l2bnk_addr  (),    // Templated
+                       .stb_atm_rq_type (), // Templated
+                       .stb_wrptr       (),       // Templated
+                       .stb_rd_for_pcx  (),     // Templated
+                       .stb_pcx_rptr    (),    // Templated
+                       .stb_wrptr_prev  (),  // Templated
+                       .stb_state_ced_mod(),  // Templated
+                       .stb_state_vld_out(),  // Templated
+                       .lsu_stbcnt      (),      // Templated
+                       .stb_rmo_st_issue(), // Templated
+                       .stb_full        (), // Templated
+                       .st_pcx_rq_kill_w2(), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (1'b0),      // Templated
+                       .cpx_st_ack_tid  (1'b0),       // Templated
+                       .pcx_rq_for_stb  (1'b0),     // Templated
+                       .st_ack_dq_stb   (1'b0),  // Templated
+                       .stb_flush_st_g  (1'b0),     // Templated
+                       .stb_cam_wvld_m  (1'b0),     // Templated
+                       .lsu_blk_st_m    (1'b0),
+                       .tlb_pgnum_g     (3'b000), // Templated
+                       .pcx_req_squash  (1'b0),   // Templated
+                       .flshinst_rst    (1'b0), // Templated
+                       .lsu_stbctl_flush_pipe_w(1'b0),
+                       .flsh_inst_m     (1'b0),
+                       .stb_state_si_0  (2'b00),  // Templated
+                       .stb_state_si_1  (2'b00),  // Templated
+                       .stb_state_si_2  (2'b00),  // Templated
+                       .stb_state_si_3  (2'b00),  // Templated
+                       .stb_state_si_4  (2'b00),  // Templated
+                       .stb_state_si_5  (2'b00),  // Templated
+                       .stb_state_si_6  (2'b00),  // Templated
+                       .stb_state_si_7  (2'b00),  // Templated
+                       .stb_state_rtype_0(2'b00), // Templated
+                       .stb_state_rtype_1(2'b00), // Templated
+                       .stb_state_rtype_2(2'b00), // Templated
+                       .stb_state_rtype_3(2'b00), // Templated
+                       .stb_state_rtype_4(2'b00), // Templated
+                       .stb_state_rtype_5(2'b00), // Templated
+                       .stb_state_rtype_6(2'b00), // Templated
+                       .stb_state_rtype_7(2'b00), // Templated
+                       .stb_state_rmo   (8'h00),   // Templated
+                       .stb_alt_sel     (1'b0),          // Templated
+                       .stb_alt_addr    (3'b000), // Templated
+                       .lsu_dtlb_bypass_e(1'b0),
+                       .tlb_cam_hit     (1'b0),
+                       .st_dtlb_perr_g  (1'b0), // Templated
+                       .lsu_outstanding_rmo_st_max(1'b0)); // Templated
+lsu_stb_ctldp stb_ctldp3  (
+                           .so          (short_so1),
+                           .si          (short_scan1_8),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(), // Templated
+                           .stb_state_si_1(), // Templated
+                           .stb_state_si_2(), // Templated
+                           .stb_state_si_3(), // Templated
+                           .stb_state_si_4(), // Templated
+                           .stb_state_si_5(), // Templated
+                           .stb_state_si_6(), // Templated
+                           .stb_state_si_7(), // Templated
+                           .stb_state_rtype_0(), // Templated
+                           .stb_state_rtype_1(), // Templated
+                           .stb_state_rtype_2(), // Templated
+                           .stb_state_rtype_3(), // Templated
+                           .stb_state_rtype_4(), // Templated
+                           .stb_state_rtype_5(), // Templated
+                           .stb_state_rtype_6(), // Templated
+                           .stb_state_rtype_7(), // Templated
+                           .stb_state_rmo(),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb3_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(2'b00),
+                           .lsu_st_rq_type_m(2'b00),
+                           .lsu_st_rmo_m(1'b0));
+`else // !`ifdef FPGA_SYN_1THREAD
+
+   
+lsu_stb_ctl stb_ctl1  (
+                       .so              (short_scan1_6),
+                       .si              (short_scan1_5),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb1_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (stb1_crnt_ack_id[2:0]), // Templated
+                       .lsu_stb_empty   (lsu_stb_empty[1]),      // Templated
+                       .stb_l2bnk_addr  (stb1_l2b_addr[2:0]),    // Templated
+                       .stb_atm_rq_type (stb1_atm_rq_type[2:1]), // Templated
+                       .stb_wrptr       (stb_wrptr1[2:0]),       // Templated
+                       .stb_rd_for_pcx  (stb_rd_for_pcx[1]),     // Templated
+                       .stb_pcx_rptr    (stb_pcx_rptr1[2:0]),    // Templated
+                       .stb_wrptr_prev  (stb_wrptr1_prev[2:0]),  // Templated
+                       .stb_state_ced_mod(stb_state_ced1[7:0]),  // Templated
+                       .stb_state_vld_out(stb_state_vld1[7:0]),  // Templated
+                       .lsu_stbcnt      (lsu_stbcnt1[3:0]),      // Templated
+                       .stb_rmo_st_issue(lsu_stb_rmo_st_issue[1]), // Templated
+                       .stb_full        (lsu_tlu_stb_full_w2_t[1]), // Templated
+                       .st_pcx_rq_kill_w2(lsu_st_pcx_rq_kill_w2[1]), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (stb_thrd_en_g[1]),      // Templated
+                       .cpx_st_ack_tid  (cpx_st_ack_tid1),       // Templated
+                       .pcx_rq_for_stb  (pcx_rq_for_stb[1]),     // Templated
+                       .st_ack_dq_stb   (lsu_st_ack_dq_stb[1]),  // Templated
+                       .stb_flush_st_g  (stb_flush_st_g[1]),     // Templated
+                       .stb_cam_wvld_m  (stb_cam_wvld_m[1]),     // Templated
+                       .lsu_blk_st_m    (lsu_blk_st_m),
+                       .tlb_pgnum_g     (tlb_pgnum_buf2[39:37]), // Templated
+                       .pcx_req_squash  (lsu_pcx_req_squash1),   // Templated
+                       .flshinst_rst    (lsu_dfq_flsh_cmplt[1]), // Templated
+                       .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                       .flsh_inst_m     (flsh_inst_m),
+                       .stb_state_si_0  (stb1_state_si_0[3:2]),  // Templated
+                       .stb_state_si_1  (stb1_state_si_1[3:2]),  // Templated
+                       .stb_state_si_2  (stb1_state_si_2[3:2]),  // Templated
+                       .stb_state_si_3  (stb1_state_si_3[3:2]),  // Templated
+                       .stb_state_si_4  (stb1_state_si_4[3:2]),  // Templated
+                       .stb_state_si_5  (stb1_state_si_5[3:2]),  // Templated
+                       .stb_state_si_6  (stb1_state_si_6[3:2]),  // Templated
+                       .stb_state_si_7  (stb1_state_si_7[3:2]),  // Templated
+                       .stb_state_rtype_0(stb1_state_rtype_0[2:1]), // Templated
+                       .stb_state_rtype_1(stb1_state_rtype_1[2:1]), // Templated
+                       .stb_state_rtype_2(stb1_state_rtype_2[2:1]), // Templated
+                       .stb_state_rtype_3(stb1_state_rtype_3[2:1]), // Templated
+                       .stb_state_rtype_4(stb1_state_rtype_4[2:1]), // Templated
+                       .stb_state_rtype_5(stb1_state_rtype_5[2:1]), // Templated
+                       .stb_state_rtype_6(stb1_state_rtype_6[2:1]), // Templated
+                       .stb_state_rtype_7(stb1_state_rtype_7[2:1]), // Templated
+                       .stb_state_rmo   (stb1_state_rmo[7:0]),   // Templated
+                       .stb_alt_sel     (lsu_blk_st_m),          // Templated
+                       .stb_alt_addr    (lsu_blkst_pgnum_m[39:37]), // Templated
+                       .lsu_dtlb_bypass_e(lsu_dtlb_bypass_e),
+                       .tlb_cam_hit     (tlb_cam_hit),
+                       .st_dtlb_perr_g  (lsu_st_dtlb_perr_g[1]), // Templated
+                       .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[1])); // Templated
+lsu_stb_ctldp stb_ctldp1  (
+                           .so          (short_scan0_7),
+                           .si          (short_scan0_6),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(stb1_state_si_0[3:2]), // Templated
+                           .stb_state_si_1(stb1_state_si_1[3:2]), // Templated
+                           .stb_state_si_2(stb1_state_si_2[3:2]), // Templated
+                           .stb_state_si_3(stb1_state_si_3[3:2]), // Templated
+                           .stb_state_si_4(stb1_state_si_4[3:2]), // Templated
+                           .stb_state_si_5(stb1_state_si_5[3:2]), // Templated
+                           .stb_state_si_6(stb1_state_si_6[3:2]), // Templated
+                           .stb_state_si_7(stb1_state_si_7[3:2]), // Templated
+                           .stb_state_rtype_0(stb1_state_rtype_0[2:1]), // Templated
+                           .stb_state_rtype_1(stb1_state_rtype_1[2:1]), // Templated
+                           .stb_state_rtype_2(stb1_state_rtype_2[2:1]), // Templated
+                           .stb_state_rtype_3(stb1_state_rtype_3[2:1]), // Templated
+                           .stb_state_rtype_4(stb1_state_rtype_4[2:1]), // Templated
+                           .stb_state_rtype_5(stb1_state_rtype_5[2:1]), // Templated
+                           .stb_state_rtype_6(stb1_state_rtype_6[2:1]), // Templated
+                           .stb_state_rtype_7(stb1_state_rtype_7[2:1]), // Templated
+                           .stb_state_rmo(stb1_state_rmo[7:0]),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb1_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(lsu_stb_va_m[7:6]),
+                           .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                           .lsu_st_rmo_m(lsu_st_rmo_m));
+   
+lsu_stb_ctl stb_ctl2  (
+                       .so              (short_scan1_7),
+                       .si              (short_scan1_6),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb2_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (stb2_crnt_ack_id[2:0]), // Templated
+                       .lsu_stb_empty   (lsu_stb_empty[2]),      // Templated
+                       .stb_l2bnk_addr  (stb2_l2b_addr[2:0]),    // Templated
+                       .stb_atm_rq_type (stb2_atm_rq_type[2:1]), // Templated
+                       .stb_wrptr       (stb_wrptr2[2:0]),       // Templated
+                       .stb_rd_for_pcx  (stb_rd_for_pcx[2]),     // Templated
+                       .stb_pcx_rptr    (stb_pcx_rptr2[2:0]),    // Templated
+                       .stb_wrptr_prev  (stb_wrptr2_prev[2:0]),  // Templated
+                       .stb_state_ced_mod(stb_state_ced2[7:0]),  // Templated
+                       .stb_state_vld_out(stb_state_vld2[7:0]),  // Templated
+                       .lsu_stbcnt      (lsu_stbcnt2[3:0]),      // Templated
+                       .stb_rmo_st_issue(lsu_stb_rmo_st_issue[2]), // Templated
+                       .stb_full        (lsu_tlu_stb_full_w2_t[2]), // Templated
+                       .st_pcx_rq_kill_w2(lsu_st_pcx_rq_kill_w2[2]), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (stb_thrd_en_g[2]),      // Templated
+                       .cpx_st_ack_tid  (cpx_st_ack_tid2),       // Templated
+                       .pcx_rq_for_stb  (pcx_rq_for_stb[2]),     // Templated
+                       .st_ack_dq_stb   (lsu_st_ack_dq_stb[2]),  // Templated
+                       .stb_flush_st_g  (stb_flush_st_g[2]),     // Templated
+                       .stb_cam_wvld_m  (stb_cam_wvld_m[2]),     // Templated
+                       .lsu_blk_st_m    (lsu_blk_st_m),
+                       .tlb_pgnum_g     (tlb_pgnum_buf2[39:37]), // Templated
+                       .pcx_req_squash  (lsu_pcx_req_squash2),   // Templated
+                       .flshinst_rst    (lsu_dfq_flsh_cmplt[2]), // Templated
+                       .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                       .flsh_inst_m     (flsh_inst_m),
+                       .stb_state_si_0  (stb2_state_si_0[3:2]),  // Templated
+                       .stb_state_si_1  (stb2_state_si_1[3:2]),  // Templated
+                       .stb_state_si_2  (stb2_state_si_2[3:2]),  // Templated
+                       .stb_state_si_3  (stb2_state_si_3[3:2]),  // Templated
+                       .stb_state_si_4  (stb2_state_si_4[3:2]),  // Templated
+                       .stb_state_si_5  (stb2_state_si_5[3:2]),  // Templated
+                       .stb_state_si_6  (stb2_state_si_6[3:2]),  // Templated
+                       .stb_state_si_7  (stb2_state_si_7[3:2]),  // Templated
+                       .stb_state_rtype_0(stb2_state_rtype_0[2:1]), // Templated
+                       .stb_state_rtype_1(stb2_state_rtype_1[2:1]), // Templated
+                       .stb_state_rtype_2(stb2_state_rtype_2[2:1]), // Templated
+                       .stb_state_rtype_3(stb2_state_rtype_3[2:1]), // Templated
+                       .stb_state_rtype_4(stb2_state_rtype_4[2:1]), // Templated
+                       .stb_state_rtype_5(stb2_state_rtype_5[2:1]), // Templated
+                       .stb_state_rtype_6(stb2_state_rtype_6[2:1]), // Templated
+                       .stb_state_rtype_7(stb2_state_rtype_7[2:1]), // Templated
+                       .stb_state_rmo   (stb2_state_rmo[7:0]),   // Templated
+                       .stb_alt_sel     (lsu_blk_st_m),          // Templated
+                       .stb_alt_addr    (lsu_blkst_pgnum_m[39:37]), // Templated
+                       .lsu_dtlb_bypass_e(lsu_dtlb_bypass_e),
+                       .tlb_cam_hit     (tlb_cam_hit),
+                       .st_dtlb_perr_g  (lsu_st_dtlb_perr_g[2]), // Templated
+                       .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[2])); // Templated
+lsu_stb_ctldp stb_ctldp2  (
+                           .so          (short_scan1_8),
+                           .si          (short_scan1_7),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(stb2_state_si_0[3:2]), // Templated
+                           .stb_state_si_1(stb2_state_si_1[3:2]), // Templated
+                           .stb_state_si_2(stb2_state_si_2[3:2]), // Templated
+                           .stb_state_si_3(stb2_state_si_3[3:2]), // Templated
+                           .stb_state_si_4(stb2_state_si_4[3:2]), // Templated
+                           .stb_state_si_5(stb2_state_si_5[3:2]), // Templated
+                           .stb_state_si_6(stb2_state_si_6[3:2]), // Templated
+                           .stb_state_si_7(stb2_state_si_7[3:2]), // Templated
+                           .stb_state_rtype_0(stb2_state_rtype_0[2:1]), // Templated
+                           .stb_state_rtype_1(stb2_state_rtype_1[2:1]), // Templated
+                           .stb_state_rtype_2(stb2_state_rtype_2[2:1]), // Templated
+                           .stb_state_rtype_3(stb2_state_rtype_3[2:1]), // Templated
+                           .stb_state_rtype_4(stb2_state_rtype_4[2:1]), // Templated
+                           .stb_state_rtype_5(stb2_state_rtype_5[2:1]), // Templated
+                           .stb_state_rtype_6(stb2_state_rtype_6[2:1]), // Templated
+                           .stb_state_rtype_7(stb2_state_rtype_7[2:1]), // Templated
+                           .stb_state_rmo(stb2_state_rmo[7:0]),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb2_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(lsu_stb_va_m[7:6]),
+                           .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                           .lsu_st_rmo_m(lsu_st_rmo_m));
+
+lsu_stb_ctl stb_ctl3  (
+                       .so              (short_scan0_8),
+                       .si              (short_scan0_7),
+                       /*AUTOINST*/
+                       // Outputs
+                       .stb_clk_en_l    (stb3_clk_en_l[7:0]),    // Templated
+                       .stb_crnt_ack_id (stb3_crnt_ack_id[2:0]), // Templated
+                       .lsu_stb_empty   (lsu_stb_empty[3]),      // Templated
+                       .stb_l2bnk_addr  (stb3_l2b_addr[2:0]),    // Templated
+                       .stb_atm_rq_type (stb3_atm_rq_type[2:1]), // Templated
+                       .stb_wrptr       (stb_wrptr3[2:0]),       // Templated
+                       .stb_rd_for_pcx  (stb_rd_for_pcx[3]),     // Templated
+                       .stb_pcx_rptr    (stb_pcx_rptr3[2:0]),    // Templated
+                       .stb_wrptr_prev  (stb_wrptr3_prev[2:0]),  // Templated
+                       .stb_state_ced_mod(stb_state_ced3[7:0]),  // Templated
+                       .stb_state_vld_out(stb_state_vld3[7:0]),  // Templated
+                       .lsu_stbcnt      (lsu_stbcnt3[3:0]),      // Templated
+                       .stb_rmo_st_issue(lsu_stb_rmo_st_issue[3]), // Templated
+                       .stb_full        (lsu_tlu_stb_full_w2_t[3]), // Templated
+                       .st_pcx_rq_kill_w2(lsu_st_pcx_rq_kill_w2[3]), // Templated
+                       // Inputs
+                       .rclk            (clk),                   // Templated
+                       .grst_l          (grst_l),
+                       .arst_l          (arst_l),
+                       .se              (se),
+                       .thrd_en_g       (stb_thrd_en_g[3]),      // Templated
+                       .cpx_st_ack_tid  (cpx_st_ack_tid3),       // Templated
+                       .pcx_rq_for_stb  (pcx_rq_for_stb[3]),     // Templated
+                       .st_ack_dq_stb   (lsu_st_ack_dq_stb[3]),  // Templated
+                       .stb_flush_st_g  (stb_flush_st_g[3]),     // Templated
+                       .stb_cam_wvld_m  (stb_cam_wvld_m[3]),     // Templated
+                       .lsu_blk_st_m    (lsu_blk_st_m),
+                       .tlb_pgnum_g     (tlb_pgnum_buf2[39:37]), // Templated
+                       .pcx_req_squash  (lsu_pcx_req_squash3),   // Templated
+                       .flshinst_rst    (lsu_dfq_flsh_cmplt[3]), // Templated
+                       .lsu_stbctl_flush_pipe_w(lsu_stbctl_flush_pipe_w),
+                       .flsh_inst_m     (flsh_inst_m),
+                       .stb_state_si_0  (stb3_state_si_0[3:2]),  // Templated
+                       .stb_state_si_1  (stb3_state_si_1[3:2]),  // Templated
+                       .stb_state_si_2  (stb3_state_si_2[3:2]),  // Templated
+                       .stb_state_si_3  (stb3_state_si_3[3:2]),  // Templated
+                       .stb_state_si_4  (stb3_state_si_4[3:2]),  // Templated
+                       .stb_state_si_5  (stb3_state_si_5[3:2]),  // Templated
+                       .stb_state_si_6  (stb3_state_si_6[3:2]),  // Templated
+                       .stb_state_si_7  (stb3_state_si_7[3:2]),  // Templated
+                       .stb_state_rtype_0(stb3_state_rtype_0[2:1]), // Templated
+                       .stb_state_rtype_1(stb3_state_rtype_1[2:1]), // Templated
+                       .stb_state_rtype_2(stb3_state_rtype_2[2:1]), // Templated
+                       .stb_state_rtype_3(stb3_state_rtype_3[2:1]), // Templated
+                       .stb_state_rtype_4(stb3_state_rtype_4[2:1]), // Templated
+                       .stb_state_rtype_5(stb3_state_rtype_5[2:1]), // Templated
+                       .stb_state_rtype_6(stb3_state_rtype_6[2:1]), // Templated
+                       .stb_state_rtype_7(stb3_state_rtype_7[2:1]), // Templated
+                       .stb_state_rmo   (stb3_state_rmo[7:0]),   // Templated
+                       .stb_alt_sel     (lsu_blk_st_m),          // Templated
+                       .stb_alt_addr    (lsu_blkst_pgnum_m[39:37]), // Templated
+                       .lsu_dtlb_bypass_e(lsu_dtlb_bypass_e),
+                       .tlb_cam_hit     (tlb_cam_hit),
+                       .st_dtlb_perr_g  (lsu_st_dtlb_perr_g[3]), // Templated
+                       .lsu_outstanding_rmo_st_max(lsu_outstanding_rmo_st_max[3])); // Templated
+lsu_stb_ctldp stb_ctldp3  (
+                           .so          (short_so1),
+                           .si          (short_scan1_8),
+                           /*AUTOINST*/
+                           // Outputs
+                           .stb_state_si_0(stb3_state_si_0[3:2]), // Templated
+                           .stb_state_si_1(stb3_state_si_1[3:2]), // Templated
+                           .stb_state_si_2(stb3_state_si_2[3:2]), // Templated
+                           .stb_state_si_3(stb3_state_si_3[3:2]), // Templated
+                           .stb_state_si_4(stb3_state_si_4[3:2]), // Templated
+                           .stb_state_si_5(stb3_state_si_5[3:2]), // Templated
+                           .stb_state_si_6(stb3_state_si_6[3:2]), // Templated
+                           .stb_state_si_7(stb3_state_si_7[3:2]), // Templated
+                           .stb_state_rtype_0(stb3_state_rtype_0[2:1]), // Templated
+                           .stb_state_rtype_1(stb3_state_rtype_1[2:1]), // Templated
+                           .stb_state_rtype_2(stb3_state_rtype_2[2:1]), // Templated
+                           .stb_state_rtype_3(stb3_state_rtype_3[2:1]), // Templated
+                           .stb_state_rtype_4(stb3_state_rtype_4[2:1]), // Templated
+                           .stb_state_rtype_5(stb3_state_rtype_5[2:1]), // Templated
+                           .stb_state_rtype_6(stb3_state_rtype_6[2:1]), // Templated
+                           .stb_state_rtype_7(stb3_state_rtype_7[2:1]), // Templated
+                           .stb_state_rmo(stb3_state_rmo[7:0]),  // Templated
+                           // Inputs
+                           .rclk        (clk),                   // Templated
+                           .se          (se),
+                           .stb_clk_en_l(stb3_clk_en_l[7:0]),    // Templated
+                           .lsu_stb_va_m(lsu_stb_va_m[7:6]),
+                           .lsu_st_rq_type_m(lsu_st_rq_type_m[2:1]),
+                           .lsu_st_rmo_m(lsu_st_rmo_m));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*
+lsu_qdp1 AUTO_TEMPLATE (
+        .tlb_pgnum            (tlb_pgnum_buf[39:13]),
+        .rst_tri_en           (mux_drive_disable),
+        .lsu_ldst_va_m         (lsu_ldst_va_m_buf[47:0]),
+        .rclk                  (clk),
+        .dfq_byp_sel           (lsu_dfq_byp_tid_d1_sel[3:0]),
+        .ld_pcx_rq_sel         (lsu_ld_pcx_rq_mxsel[3:0]),
+        .lmq_byp_ldxa_sel0     (lmq_byp_ldxa_mxsel0[2:0]),
+        .lmq_byp_ldxa_sel1     (lmq_byp_ldxa_mxsel1[2:0]),
+        .lmq_byp_ldxa_sel2     (lmq_byp_ldxa_mxsel2[2:0]),
+        .lmq_byp_ldxa_sel3     (lmq_byp_ldxa_mxsel3[2:0]),
+        .lmq_byp_data_sel0     (lmq_byp_data_mxsel0[3:0]),
+        .lmq_byp_data_sel1     (lmq_byp_data_mxsel1[3:0]),
+        .lmq_byp_data_sel2     (lmq_byp_data_mxsel2[3:0]),
+        .lmq_byp_data_sel3     (lmq_byp_data_mxsel3[3:0]),
+        .ld_thrd_byp_sel_m     (ld_thrd_byp_mxsel_m[3:0]),
+        .ifu_pcx_pkt           (ifu_lsu_pcxpkt_e[51:0]), 
+        .lmq_byp_misc_sel      (lsu_lmq_byp_misc_sel[3:0]),
+        .lsu_dcache_rdata_w    (dcache_rdata_wb_buf[63:0]));
+*/                   
+`ifdef FPGA_SYN_1THREAD
+                                                    
+lsu_qdp1  qdp1  (
+                 .so                    (short_scan0_9),
+                 .si                    (short_scan0_8),
+		             .lsu_iobrdge_rd_data	  ({16'b0,lsu_iobrdge_rd_data[27:0]}), 
+		             .dtag_wdata_m		      (dtag_wdata_m[29:0]),
+                 /*AUTOINST*/
+                 // Outputs
+                 .lsu_va_match_b47_b32_m(lsu_va_match_b47_b32_m),
+                 .lsu_va_match_b31_b3_m (lsu_va_match_b31_b3_m),
+                 .lsu_va_wtchpt_addr    (lsu_va_wtchpt_addr[47:3]),
+                 .spc_pcx_data_pa       (spc_pcx_data_pa[`PCX_WIDTH-1:0]),
+                 .lmq0_byp_misc_sz      (lmq0_byp_misc_sz[1:0]),
+                 .lmq1_byp_misc_sz      (lmq1_byp_misc_sz[1:0]),
+                 .lmq2_byp_misc_sz      (lmq2_byp_misc_sz[1:0]),
+                 .lmq3_byp_misc_sz      (lmq3_byp_misc_sz[1:0]),
+                 .lsu_byp_misc_sz_e     (lsu_byp_misc_sz_e[1:0]),
+                 .lsu_l2fill_sign_extend_m(lsu_l2fill_sign_extend_m),
+                 .lsu_l2fill_bendian_m  (lsu_l2fill_bendian_m),
+                 .lmq0_l2fill_fpld      (lmq0_l2fill_fpld),
+                 .lmq1_l2fill_fpld      (lmq1_l2fill_fpld),
+                 .lmq2_l2fill_fpld      (lmq2_l2fill_fpld),
+                 .lmq3_l2fill_fpld      (lmq3_l2fill_fpld),
+                 .lmq_ld_rd1            (lmq_ld_rd1[4:0]),
+                 .lmq0_ncache_ld        (lmq0_ncache_ld),
+                 .lmq1_ncache_ld        (lmq1_ncache_ld),
+                 .lmq2_ncache_ld        (lmq2_ncache_ld),
+                 .lmq3_ncache_ld        (lmq3_ncache_ld),
+                 .lmq0_ld_rq_type       (lmq0_ld_rq_type[2:0]),
+                 .lmq1_ld_rq_type       (lmq1_ld_rq_type[2:0]),
+                 .lmq2_ld_rq_type       (lmq2_ld_rq_type[2:0]),
+                 .lmq3_ld_rq_type       (lmq3_ld_rq_type[2:0]),
+                 .lmq0_ldd_vld          (lmq0_ldd_vld),
+                 .lmq1_ldd_vld          (lmq1_ldd_vld),
+                 .lmq2_ldd_vld          (lmq2_ldd_vld),
+                 .lmq3_ldd_vld          (lmq3_ldd_vld),
+                 .ld_sec_hit_thrd0      (ld_sec_hit_thrd0),
+                 .ld_sec_hit_thrd1      (ld_sec_hit_thrd1),
+                 .ld_sec_hit_thrd2      (ld_sec_hit_thrd2),
+                 .ld_sec_hit_thrd3      (ld_sec_hit_thrd3),
+                 .lmq0_pcx_pkt_addr     (lmq0_pcx_pkt_addr[10:0]),
+                 .lmq1_pcx_pkt_addr     (lmq1_pcx_pkt_addr[10:0]),
+                 .lmq2_pcx_pkt_addr     (lmq2_pcx_pkt_addr[10:0]),
+                 .lmq3_pcx_pkt_addr     (lmq3_pcx_pkt_addr[10:0]),
+                 .lsu_mmu_rs3_data_g    (lsu_mmu_rs3_data_g[63:0]),
+                 .lsu_tlu_rs3_data_g    (lsu_tlu_rs3_data_g[63:0]),
+                 .lsu_diagnstc_wr_data_b0(lsu_diagnstc_wr_data_b0),
+                 .lsu_diagnstc_wr_data_e(lsu_diagnstc_wr_data_e[63:0]),
+                 .lsu_ifu_stxa_data     (lsu_ifu_stxa_data[47:0]),
+                 .lsu_ifu_ld_icache_index(lsu_ifu_ld_icache_index[11:5]),
+                 .lsu_ifu_ld_pcxpkt_tid (lsu_ifu_ld_pcxpkt_tid[1:0]),
+                 .lsu_error_pa_m        (lsu_error_pa_m[28:0]),
+                 .lsu_pref_pcx_req      (lsu_pref_pcx_req),
+                 .st_rs3_data_g         (st_rs3_data_g[63:0]),
+                 .lsu_ldst_va_way_g     (lsu_ldst_va_way_g[1:0]),
+                 .dcache_alt_data_w0_m  (dcache_alt_data_w0_m[63:0]),
+                 // Inputs
+                 .rclk                  (clk),                   // Templated
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .lsu_dcache_iob_rd_w   (lsu_dcache_iob_rd_w),
+                 .lsu_ramtest_rd_w      (lsu_ramtest_rd_w),
+                 .lsu_pcx_rq_sz_b3      (lsu_pcx_rq_sz_b3),
+                 .lsu_diagnstc_data_sel (lsu_diagnstc_data_sel[3:0]),
+                 .pcx_pkt_src_sel       (pcx_pkt_src_sel[3:0]),
+                 .lsu_stb_pcx_rvld_d1   (lsu_stb_pcx_rvld_d1),
+                 .imiss_pcx_mx_sel      (imiss_pcx_mx_sel),
+                 .fwd_int_fp_pcx_mx_sel (fwd_int_fp_pcx_mx_sel[2:0]),
+                 .spu_lsu_ldst_pckt     (spu_lsu_ldst_pckt[`PCX_WIDTH-1:0]),
+                 .tlu_lsu_pcxpkt        (tlu_lsu_pcxpkt[25:0]),
+                 .const_cpuid           (const_cpuid[2:0]),
+                 .ifu_pcx_pkt           (ifu_lsu_pcxpkt_e[51:0]), // Templated
+                 .lmq_byp_data_en_w2    (lmq_byp_data_en_w2[3:0]),
+                 .lmq_byp_data_sel0     (lmq_byp_data_mxsel0[3:0]), // Templated
+                 .lmq_byp_data_sel1     (lmq_byp_data_mxsel1[3:0]), // Templated
+                 .lmq_byp_data_sel2     (lmq_byp_data_mxsel2[3:0]), // Templated
+                 .lmq_byp_data_sel3     (lmq_byp_data_mxsel3[3:0]), // Templated
+                 .lmq_byp_ldxa_sel0     (lmq_byp_ldxa_mxsel0[2:0]), // Templated
+                 .lmq_byp_ldxa_sel1     (lmq_byp_ldxa_mxsel1[2:0]), // Templated
+                 .lmq_byp_ldxa_sel2     (lmq_byp_ldxa_mxsel2[2:0]), // Templated
+                 .lmq_byp_ldxa_sel3     (lmq_byp_ldxa_mxsel3[2:0]), // Templated
+                 .lmq_byp_data_fmx_sel  (lmq_byp_data_fmx_sel[3:0]),
+                 .exu_lsu_rs3_data_e    (exu_lsu_rs3_data_e[63:0]),
+                 .ifu_lsu_ldxa_data_w2  (ifu_lsu_ldxa_data_w2[63:0]),
+                 .tlu_lsu_int_ldxa_data_w2(tlu_lsu_int_ldxa_data_w2[63:0]),
+                 .spu_lsu_ldxa_data_w2  (spu_lsu_ldxa_data_w2[63:0]),
+                 .stb_rdata_ramd        (stb_rdata_ramd[75:0]),
+                 .stb_rdata_ramc        (stb_rdata_ramc[44:9]),
+                 .lmq_byp_misc_sel      (lsu_lmq_byp_misc_sel[3:0]), // Templated
+                 .dfq_byp_sel           (lsu_dfq_byp_tid_d1_sel[3:0]), // Templated
+                 .ld_pcx_rq_sel         (lsu_ld_pcx_rq_mxsel[3:0]), // Templated
+                 .ld_pcx_thrd           (ld_pcx_thrd[1:0]),
+                 .lmq_enable            (lmq_enable[3:0]),
+                 .ld_pcx_pkt_g          (ld_pcx_pkt_g[`LMQ_WIDTH-1:40]),
+                 .ffu_lsu_data          (ffu_lsu_data[80:0]),
+                 .lsu_tlb_st_sel_m      (lsu_tlb_st_sel_m[3:0]),
+                 .lsu_pcx_fwd_pkt       (lsu_pcx_fwd_pkt[107:0]),
+                 .lsu_pcx_fwd_reply     (lsu_pcx_fwd_reply),
+                 .lsu_diagnstc_dtagv_prty_invrt_e(lsu_diagnstc_dtagv_prty_invrt_e),
+                 .lsu_misc_rdata_w2     (lsu_misc_rdata_w2[63:0]),
+                 .lsu_stb_rd_tid        (lsu_stb_rd_tid[1:0]),
+                 .lsu_iobrdge_rply_data_sel(lsu_iobrdge_rply_data_sel[2:0]),
+                 .lsu_atomic_pkt2_bsel_g(lsu_atomic_pkt2_bsel_g[2:0]),
+                 .lsu_pcx_ld_dtag_perror_w2(lsu_pcx_ld_dtag_perror_w2),
+                 .lsu_dcache_rdata_w    (dcache_rdata_wb_buf[63:0]), // Templated
+                 .lsu_va_wtchpt0_wr_en_l(lsu_va_wtchpt0_wr_en_l),
+                 .lsu_va_wtchpt1_wr_en_l(lsu_va_wtchpt1_wr_en_l),
+                 .lsu_va_wtchpt2_wr_en_l(lsu_va_wtchpt2_wr_en_l),
+                 .lsu_va_wtchpt3_wr_en_l(lsu_va_wtchpt3_wr_en_l),
+                 .thread0_m             (thread0_m),
+                 .thread1_m             (1'b0),
+                 .thread2_m             (1'b0),
+                 .thread3_m             (1'b0),
+                 .lsu_thread_g          (lsu_thread_g[3:0]),
+                 .lsu_ldst_va_m         (lsu_ldst_va_m_buf[47:0]), // Templated
+                 .tlb_pgnum             (tlb_pgnum_buf[39:13]),  // Templated
+                 .lsu_bld_pcx_rq        (lsu_bld_pcx_rq),
+                 .lsu_bld_rq_addr       (lsu_bld_rq_addr[1:0]),
+                 .lmq0_pcx_pkt_way      (lmq0_pcx_pkt_way[1:0]),
+                 .lmq1_pcx_pkt_way      (2'b00),
+                 .lmq2_pcx_pkt_way      (2'b00),
+                 .lmq3_pcx_pkt_way      (2'b00),
+                 .lsu_dfq_ld_vld        (lsu_dfq_ld_vld),
+                 .lsu_ifu_asi_data_en_l (lsu_ifu_asi_data_en_l),
+                 .lsu_ld0_spec_vld_kill_w2(lsu_ld0_spec_vld_kill_w2),
+                 .lsu_ld1_spec_vld_kill_w2(1'b0),
+                 .lsu_ld2_spec_vld_kill_w2(1'b0),
+                 .lsu_ld3_spec_vld_kill_w2(1'b0),
+                 .lsu_fwd_rply_sz1_unc  (lsu_fwd_rply_sz1_unc),
+                 .rst_tri_en            (mux_drive_disable),     // Templated
+                 .lsu_l2fill_data       (lsu_l2fill_data[63:0]),
+                 .l2fill_vld_m          (l2fill_vld_m),
+                 .ld_thrd_byp_sel_m     (ld_thrd_byp_mxsel_m[3:0])); // Templated
+`else
+                                                     
+lsu_qdp1  qdp1  (
+                 .so                    (short_scan0_9),
+                 .si                    (short_scan0_8),
+		             .lsu_iobrdge_rd_data	  ({16'b0,lsu_iobrdge_rd_data[27:0]}), 
+		             .dtag_wdata_m		      (dtag_wdata_m[29:0]),
+                 /*AUTOINST*/
+                 // Outputs
+                 .lsu_va_match_b47_b32_m(lsu_va_match_b47_b32_m),
+                 .lsu_va_match_b31_b3_m (lsu_va_match_b31_b3_m),
+                 .lsu_va_wtchpt_addr    (lsu_va_wtchpt_addr[47:3]),
+                 .spc_pcx_data_pa       (spc_pcx_data_pa[`PCX_WIDTH-1:0]),
+                 .lmq0_byp_misc_sz      (lmq0_byp_misc_sz[1:0]),
+                 .lmq1_byp_misc_sz      (lmq1_byp_misc_sz[1:0]),
+                 .lmq2_byp_misc_sz      (lmq2_byp_misc_sz[1:0]),
+                 .lmq3_byp_misc_sz      (lmq3_byp_misc_sz[1:0]),
+                 .lsu_byp_misc_sz_e     (lsu_byp_misc_sz_e[1:0]),
+                 .lsu_l2fill_sign_extend_m(lsu_l2fill_sign_extend_m),
+                 .lsu_l2fill_bendian_m  (lsu_l2fill_bendian_m),
+                 .lmq0_l2fill_fpld      (lmq0_l2fill_fpld),
+                 .lmq1_l2fill_fpld      (lmq1_l2fill_fpld),
+                 .lmq2_l2fill_fpld      (lmq2_l2fill_fpld),
+                 .lmq3_l2fill_fpld      (lmq3_l2fill_fpld),
+                 .lmq_ld_rd1            (lmq_ld_rd1[4:0]),
+                 .lmq0_ncache_ld        (lmq0_ncache_ld),
+                 .lmq1_ncache_ld        (lmq1_ncache_ld),
+                 .lmq2_ncache_ld        (lmq2_ncache_ld),
+                 .lmq3_ncache_ld        (lmq3_ncache_ld),
+                 .lmq0_ld_rq_type       (lmq0_ld_rq_type[2:0]),
+                 .lmq1_ld_rq_type       (lmq1_ld_rq_type[2:0]),
+                 .lmq2_ld_rq_type       (lmq2_ld_rq_type[2:0]),
+                 .lmq3_ld_rq_type       (lmq3_ld_rq_type[2:0]),
+                 .lmq0_ldd_vld          (lmq0_ldd_vld),
+                 .lmq1_ldd_vld          (lmq1_ldd_vld),
+                 .lmq2_ldd_vld          (lmq2_ldd_vld),
+                 .lmq3_ldd_vld          (lmq3_ldd_vld),
+                 .ld_sec_hit_thrd0      (ld_sec_hit_thrd0),
+                 .ld_sec_hit_thrd1      (ld_sec_hit_thrd1),
+                 .ld_sec_hit_thrd2      (ld_sec_hit_thrd2),
+                 .ld_sec_hit_thrd3      (ld_sec_hit_thrd3),
+                 .lmq0_pcx_pkt_addr     (lmq0_pcx_pkt_addr[10:0]),
+                 .lmq1_pcx_pkt_addr     (lmq1_pcx_pkt_addr[10:0]),
+                 .lmq2_pcx_pkt_addr     (lmq2_pcx_pkt_addr[10:0]),
+                 .lmq3_pcx_pkt_addr     (lmq3_pcx_pkt_addr[10:0]),
+                 .lsu_mmu_rs3_data_g    (lsu_mmu_rs3_data_g[63:0]),
+                 .lsu_tlu_rs3_data_g    (lsu_tlu_rs3_data_g[63:0]),
+                 .lsu_diagnstc_wr_data_b0(lsu_diagnstc_wr_data_b0),
+                 .lsu_diagnstc_wr_data_e(lsu_diagnstc_wr_data_e[63:0]),
+                 .lsu_ifu_stxa_data     (lsu_ifu_stxa_data[47:0]),
+                 .lsu_ifu_ld_icache_index(lsu_ifu_ld_icache_index[11:5]),
+                 .lsu_ifu_ld_pcxpkt_tid (lsu_ifu_ld_pcxpkt_tid[1:0]),
+                 .lsu_error_pa_m        (lsu_error_pa_m[28:0]),
+                 .lsu_pref_pcx_req      (lsu_pref_pcx_req),
+                 .st_rs3_data_g         (st_rs3_data_g[63:0]),
+                 .lsu_ldst_va_way_g     (lsu_ldst_va_way_g[1:0]),
+                 .dcache_alt_data_w0_m  (dcache_alt_data_w0_m[63:0]),
+                 // Inputs
+                 .rclk                  (clk),                   // Templated
+                 .se                    (se),
+                 .sehold                (sehold),
+                 .lsu_dcache_iob_rd_w   (lsu_dcache_iob_rd_w),
+                 .lsu_ramtest_rd_w      (lsu_ramtest_rd_w),
+                 .lsu_pcx_rq_sz_b3      (lsu_pcx_rq_sz_b3),
+                 .lsu_diagnstc_data_sel (lsu_diagnstc_data_sel[3:0]),
+                 .pcx_pkt_src_sel       (pcx_pkt_src_sel[3:0]),
+                 .lsu_stb_pcx_rvld_d1   (lsu_stb_pcx_rvld_d1),
+                 .imiss_pcx_mx_sel      (imiss_pcx_mx_sel),
+                 .fwd_int_fp_pcx_mx_sel (fwd_int_fp_pcx_mx_sel[2:0]),
+                 .spu_lsu_ldst_pckt     (spu_lsu_ldst_pckt[`PCX_WIDTH-1:0]),
+                 .tlu_lsu_pcxpkt        (tlu_lsu_pcxpkt[25:0]),
+                 .const_cpuid           (const_cpuid[2:0]),
+                 .ifu_pcx_pkt           (ifu_lsu_pcxpkt_e[51:0]), // Templated
+                 .lmq_byp_data_en_w2    (lmq_byp_data_en_w2[3:0]),
+                 .lmq_byp_data_sel0     (lmq_byp_data_mxsel0[3:0]), // Templated
+                 .lmq_byp_data_sel1     (lmq_byp_data_mxsel1[3:0]), // Templated
+                 .lmq_byp_data_sel2     (lmq_byp_data_mxsel2[3:0]), // Templated
+                 .lmq_byp_data_sel3     (lmq_byp_data_mxsel3[3:0]), // Templated
+                 .lmq_byp_ldxa_sel0     (lmq_byp_ldxa_mxsel0[2:0]), // Templated
+                 .lmq_byp_ldxa_sel1     (lmq_byp_ldxa_mxsel1[2:0]), // Templated
+                 .lmq_byp_ldxa_sel2     (lmq_byp_ldxa_mxsel2[2:0]), // Templated
+                 .lmq_byp_ldxa_sel3     (lmq_byp_ldxa_mxsel3[2:0]), // Templated
+                 .lmq_byp_data_fmx_sel  (lmq_byp_data_fmx_sel[3:0]),
+                 .exu_lsu_rs3_data_e    (exu_lsu_rs3_data_e[63:0]),
+                 .ifu_lsu_ldxa_data_w2  (ifu_lsu_ldxa_data_w2[63:0]),
+                 .tlu_lsu_int_ldxa_data_w2(tlu_lsu_int_ldxa_data_w2[63:0]),
+                 .spu_lsu_ldxa_data_w2  (spu_lsu_ldxa_data_w2[63:0]),
+                 .stb_rdata_ramd        (stb_rdata_ramd[75:0]),
+                 .stb_rdata_ramc        (stb_rdata_ramc[44:9]),
+                 .lmq_byp_misc_sel      (lsu_lmq_byp_misc_sel[3:0]), // Templated
+                 .dfq_byp_sel           (lsu_dfq_byp_tid_d1_sel[3:0]), // Templated
+                 .ld_pcx_rq_sel         (lsu_ld_pcx_rq_mxsel[3:0]), // Templated
+                 .ld_pcx_thrd           (ld_pcx_thrd[1:0]),
+                 .lmq_enable            (lmq_enable[3:0]),
+                 .ld_pcx_pkt_g          (ld_pcx_pkt_g[`LMQ_WIDTH-1:40]),
+                 .ffu_lsu_data          (ffu_lsu_data[80:0]),
+                 .lsu_tlb_st_sel_m      (lsu_tlb_st_sel_m[3:0]),
+                 .lsu_pcx_fwd_pkt       (lsu_pcx_fwd_pkt[107:0]),
+                 .lsu_pcx_fwd_reply     (lsu_pcx_fwd_reply),
+                 .lsu_diagnstc_dtagv_prty_invrt_e(lsu_diagnstc_dtagv_prty_invrt_e),
+                 .lsu_misc_rdata_w2     (lsu_misc_rdata_w2[63:0]),
+                 .lsu_stb_rd_tid        (lsu_stb_rd_tid[1:0]),
+                 .lsu_iobrdge_rply_data_sel(lsu_iobrdge_rply_data_sel[2:0]),
+                 .lsu_atomic_pkt2_bsel_g(lsu_atomic_pkt2_bsel_g[2:0]),
+                 .lsu_pcx_ld_dtag_perror_w2(lsu_pcx_ld_dtag_perror_w2),
+                 .lsu_dcache_rdata_w    (dcache_rdata_wb_buf[63:0]), // Templated
+                 .lsu_va_wtchpt0_wr_en_l(lsu_va_wtchpt0_wr_en_l),
+                 .lsu_va_wtchpt1_wr_en_l(lsu_va_wtchpt1_wr_en_l),
+                 .lsu_va_wtchpt2_wr_en_l(lsu_va_wtchpt2_wr_en_l),
+                 .lsu_va_wtchpt3_wr_en_l(lsu_va_wtchpt3_wr_en_l),
+                 .thread0_m             (thread0_m),
+                 .thread1_m             (thread1_m),
+                 .thread2_m             (thread2_m),
+                 .thread3_m             (thread3_m),
+                 .lsu_thread_g          (lsu_thread_g[3:0]),
+                 .lsu_ldst_va_m         (lsu_ldst_va_m_buf[47:0]), // Templated
+                 .tlb_pgnum             (tlb_pgnum_buf[39:13]),  // Templated
+                 .lsu_bld_pcx_rq        (lsu_bld_pcx_rq),
+                 .lsu_bld_rq_addr       (lsu_bld_rq_addr[1:0]),
+                 .lmq0_pcx_pkt_way      (lmq0_pcx_pkt_way[1:0]),
+                 .lmq1_pcx_pkt_way      (lmq1_pcx_pkt_way[1:0]),
+                 .lmq2_pcx_pkt_way      (lmq2_pcx_pkt_way[1:0]),
+                 .lmq3_pcx_pkt_way      (lmq3_pcx_pkt_way[1:0]),
+                 .lsu_dfq_ld_vld        (lsu_dfq_ld_vld),
+                 .lsu_ifu_asi_data_en_l (lsu_ifu_asi_data_en_l),
+                 .lsu_ld0_spec_vld_kill_w2(lsu_ld0_spec_vld_kill_w2),
+                 .lsu_ld1_spec_vld_kill_w2(lsu_ld1_spec_vld_kill_w2),
+                 .lsu_ld2_spec_vld_kill_w2(lsu_ld2_spec_vld_kill_w2),
+                 .lsu_ld3_spec_vld_kill_w2(lsu_ld3_spec_vld_kill_w2),
+                 .lsu_fwd_rply_sz1_unc  (lsu_fwd_rply_sz1_unc),
+                 .rst_tri_en            (mux_drive_disable),     // Templated
+                 .lsu_l2fill_data       (lsu_l2fill_data[63:0]),
+                 .l2fill_vld_m          (l2fill_vld_m),
+                 .ld_thrd_byp_sel_m     (ld_thrd_byp_mxsel_m[3:0])); // Templated
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/* 
+lsu_qdp2 AUTO_TEMPLATE (
+       .rst_tri_en              (mux_drive_disable),
+       .dfq_byp_ff_en          (lsu_dfq_byp_ff_en),
+       .dfq_rd_vld_d1          (lsu_dfq_rd_vld_d1),
+       .lsu_dfq_ld_vld         (lsu_qdp2_dfq_ld_vld),
+       .lsu_dfq_st_vld         (lsu_qdp2_dfq_st_vld),
+       .rclk                    (clk),
+       .lsu_ifill_pkt          (lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]),
+       .lsu_cpx_pkt_vld        ());
+*/
+
+lsu_qdp2 qdp2  (
+                .so                     (so1),
+                .si                     (scan1_4),
+		.dfq_wdata		(dfq_wdata[`DFQ_WIDTH:0]),
+                /*AUTOINST*/
+                // Outputs
+                .lsu_l2fill_data        (lsu_l2fill_data[63:0]),
+                .dfq_tid                (dfq_tid[1:0]),
+                .lsu_dcache_fill_data_e (lsu_dcache_fill_data_e[143:0]),
+                .lsu_ifill_pkt          (lsu_ifu_cpxpkt_i1[`CPX_VLD-1:0]), // Templated
+                .lsu_pcx_fwd_pkt        (lsu_pcx_fwd_pkt[107:0]),
+                .lsu_cpx_pkt_strm_ack   (lsu_cpx_pkt_strm_ack),
+                .lsu_cpx_pkt_vld        (),                      // Templated
+                .lsu_cpx_pkt_atm_st_cmplt(lsu_cpx_pkt_atm_st_cmplt),
+                .lsu_cpx_pkt_tid        (lsu_cpx_pkt_tid[1:0]),
+                .lsu_cpx_pkt_invwy      (lsu_cpx_pkt_invwy[1:0]),
+                .lsu_cpx_pkt_inv_pa     (lsu_cpx_pkt_inv_pa[4:0]),
+                .lsu_cpx_pkt_l2miss     (lsu_cpx_pkt_l2miss),
+                .lsu_dfq_byp_invwy_vld  (lsu_dfq_byp_invwy_vld),
+                .lsu_dfq_byp_type       (lsu_dfq_byp_type[5:0]),
+                .lsu_dfq_byp_flush      (lsu_dfq_byp_flush),
+                .lsu_dfq_byp_tid        (lsu_dfq_byp_tid[1:0]),
+                .lsu_cpu_inv_data_b13to9(lsu_cpu_inv_data_b13to9[13:9]),
+                .lsu_cpu_inv_data_b7to2 (lsu_cpu_inv_data_b7to2[7:2]),
+                .lsu_cpu_inv_data_b0    (lsu_cpu_inv_data_b0),
+                .lsu_iobrdge_wr_data    (lsu_iobrdge_wr_data[43:0]),
+                .lsu_iobrdge_tap_rq_type(lsu_iobrdge_tap_rq_type[8:0]),
+                .lsu_cpx_pkt_perror_dinv(lsu_cpx_pkt_perror_dinv),
+                .lsu_cpx_pkt_perror_iinv(lsu_cpx_pkt_perror_iinv),
+                .lsu_cpx_pkt_perror_set (lsu_cpx_pkt_perror_set[1:0]),
+                .lsu_cpx_pkt_ld_err     (lsu_cpx_pkt_ld_err[1:0]),
+                .lsu_dfq_byp_binit_st   (lsu_dfq_byp_binit_st),
+                .lsu_cpx_pkt_binit_st   (lsu_cpx_pkt_binit_st),
+                .lsu_cpx_pkt_prefetch   (lsu_cpx_pkt_prefetch),
+                .lsu_cpx_pkt_prefetch2  (lsu_cpx_pkt_prefetch2),
+                .lsu_dfq_byp_cpx_inv    (lsu_dfq_byp_cpx_inv),
+                .lsu_dfq_byp_stack_adr_b54(lsu_dfq_byp_stack_adr_b54[1:0]),
+                .lsu_dfq_byp_stack_wrway(lsu_dfq_byp_stack_wrway[1:0]),
+                .lsu_dfq_byp_atm        (lsu_dfq_byp_atm),
+                .dcache_iob_addr_e      (dcache_iob_addr_e[7:0]),
+                .st_dcfill_addr         (st_dcfill_addr[10:0]),
+                .lsu_st_way_e           (lsu_st_way_e[1:0]),
+                .lsu_dcache_iob_way_e   (lsu_dcache_iob_way_e[1:0]),
+                .lsu_st_dcfill_size_e   (lsu_st_dcfill_size_e[1:0]),
+                .lsu_cpx_pkt_ifill_type (lsu_cpx_pkt_ifill_type),
+                .lsu_cpx_pkt_atomic     (lsu_cpx_pkt_atomic),
+                // Inputs
+                .rst_tri_en             (mux_drive_disable),     // Templated
+                .rclk                   (clk),                   // Templated
+                .se                     (se),
+                .lsu_dfill_data_sel_hi  (lsu_dfill_data_sel_hi),
+                .dfq_byp_ff_en          (lsu_dfq_byp_ff_en),     // Templated
+                .dfq_rd_vld_d1          (lsu_dfq_rd_vld_d1),     // Templated
+                .dfq_rdata              (dfq_rdata[`DFQ_WIDTH:0]),
+                .cpx_spc_data_cx        (cpx_spc_data_cx[`CPX_WIDTH-1:0]),
+                .stb_rdata_ramd_buf     (stb_rdata_ramd_buf[69:0]),
+                .stb_rdata_ramd_b74_buf (stb_rdata_ramd_b74_buf),
+                .stb_rdata_ramc_buf     (stb_rdata_ramc_buf[14:9]),
+                .lsu_stb_pcx_rvld_d1    (lsu_stb_pcx_rvld_d1),
+                .lsu_diagnstc_wr_data_e (lsu_diagnstc_wr_data_e[63:0]),
+                .lsu_diagnstc_dc_prty_invrt_e(lsu_diagnstc_dc_prty_invrt_e[7:0]),
+                .mbist_write_data       (mbist_write_data[7:0]),
+                .cpx_fwd_pkt_en_cx      (cpx_fwd_pkt_en_cx),
+                .lsu_cpu_dcd_sel        (lsu_cpu_dcd_sel[7:0]),
+                .lsu_cpu_uhlf_sel       (lsu_cpu_uhlf_sel),
+                .lsu_cpxpkt_type_dcd_cx (lsu_cpxpkt_type_dcd_cx[5:0]),
+                .lsu_dc_iob_access_e    (lsu_dc_iob_access_e),
+                .lsu_dcfill_data_mx_sel_e(lsu_dcfill_data_mx_sel_e),
+                .lsu_cpx_spc_inv_vld    (lsu_cpx_spc_inv_vld),
+                .lsu_cpx_thrdid         (lsu_cpx_thrdid[3:0]),
+                .lsu_cpx_stack_dcfill_vld(lsu_cpx_stack_dcfill_vld),
+                .pcx_rq_for_stb_d1      (pcx_rq_for_stb_d1[3:0]),
+                .lsu_dfq_ld_vld         (lsu_qdp2_dfq_ld_vld),   // Templated
+                .lsu_dfq_st_vld         (lsu_qdp2_dfq_st_vld),   // Templated
+                .lsu_dfq_ldst_vld       (lsu_dfq_ldst_vld));
+
+   
+/*
+bw_r_rf32x152b AUTO_TEMPLATE (
+                 .rst_tri_en           (mem_write_disable),
+                 .rclk                    (clk),
+                 .dout                  (dfq_rdata[151:0]),
+                 .wr_adr                (dfq_wptr[4:0]),
+                 .rd_adr                (dfq_rptr[4:0]),
+                 .wr_en                 (dfq_wptr_vld),
+                 .rd_en                 (dfq_rptr_vld),
+                 //.sehold                (),
+                 .reset_l               (arst_l));
+*/
+   
+bw_r_rf32x152b   dfq   ( 
+                 .din            ({dfq_wdata[151:131],
+                                   lsu_cpx_stack_dcfill_vld_b130,
+                                   dfq_wdata[129],
+                                   lsu_cpx_stack_icfill_vld,
+                                   dfq_wdata[127:0]}),
+                        .so             (short_so0),
+                        .si             (short_scan0_9),
+                 /*AUTOINST*/
+                        // Outputs
+                        .dout           (dfq_rdata[151:0]),      // Templated
+                        // Inputs
+                        .rd_adr         (dfq_rptr[4:0]),         // Templated
+                        .rd_en          (dfq_rptr_vld),          // Templated
+                        .wr_en          (dfq_wptr_vld),          // Templated
+                        .wr_adr         (dfq_wptr[4:0]),         // Templated
+                        .rclk           (clk),                   // Templated
+                        .reset_l        (arst_l),                // Templated
+                        .rst_tri_en     (mem_write_disable),     // Templated
+                        .sehold         (sehold),
+                        .se             (se));
+endmodule
+
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl" "../../../common/rtl")
+// End:
Index: /trunk/T1-CPU/lsu/lsu_qctl1.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_qctl1.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_qctl1.v	(revision 6)
@@ -0,0 +1,4170 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_qctl1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//////////////////////////////////////////////////////////////////////
+/*
+//  Description:  LSU Queue Control for Sparc Core  
+//      - includes monitoring for pcx queues
+//      - control for lsu datapath
+//      - rd/wr control of dfq 
+*/
+////////////////////////////////////////////////////////////////////////
+// header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+                  // time scale definition
+`include  "iop.h" 
+
+`include  "lsu.h" 
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_qctl1 ( /*AUTOARG*/
+   // Outputs
+   lsu_bld_helper_cmplt_m, lsu_bld_cnt_m, lsu_bld_reset, 
+   lsu_pcx_rq_sz_b3, lsu_ramtest_rd_w, ld_stb_full_raw_w2, 
+   lsu_ld_pcx_rq_sel_d2, spc_pcx_req_pq, spc_pcx_atom_pq, 
+   lsu_ifu_pcxpkt_ack_d, pcx_pkt_src_sel, lmq_enable, 
+   imiss_pcx_mx_sel, fwd_int_fp_pcx_mx_sel, lsu_ffu_bld_cnt_w, 
+   lsu_ld_pcx_rq_mxsel, ld_pcx_thrd, lsu_spu_ldst_ack, 
+   pcx_rq_for_stb, pcx_rq_for_stb_d1, lsu_ffu_ack, 
+   lsu_ifu_ld_pcxpkt_vld, lsu_pcx_req_squash0, lsu_pcx_req_squash1, 
+   lsu_pcx_req_squash2, lsu_pcx_req_squash3, lsu_pcx_req_squash_d1, 
+   lsu_pcx_ld_dtag_perror_w2, lsu_tlu_dcache_miss_w2, lsu_bld_pcx_rq, 
+   lsu_bld_rq_addr, lsu_fwdpkt_pcx_rq_sel, lsu_imiss_pcx_rq_sel_d1, 
+   lsu_tlu_pcxpkt_ack, lsu_intrpt_cmplt, lsu_lmq_byp_misc_sel, 
+   lsu_sscan_data, so, lsu_dfq_byp_tid_d1_sel, lmq0_pcx_pkt_way, 
+   lmq1_pcx_pkt_way, lmq2_pcx_pkt_way, lmq3_pcx_pkt_way, 
+   lsu_st_pcx_rq_pick, lsu_stb_pcx_rvld_d1, lsu_stb_rd_tid, 
+   lsu_ld0_spec_vld_kill_w2, lsu_ld1_spec_vld_kill_w2, 
+   lsu_ld2_spec_vld_kill_w2, lsu_ld3_spec_vld_kill_w2, 
+   lsu_st_pcx_rq_vld, 
+   // Inputs
+   rclk, si, se, sehold, grst_l, arst_l, lsu_quad_word_access_g, 
+   pcx_spc_grant_px, ld_inst_vld_e, lsu_ldst_va_m, stb0_l2b_addr, 
+   stb1_l2b_addr, stb2_l2b_addr, stb3_l2b_addr, lsu_ld_miss_g, 
+   ifu_lsu_ldst_fp_e, ld_rawp_st_ced_w2, ld_rawp_st_ackid_w2, 
+   stb0_crnt_ack_id, stb1_crnt_ack_id, stb2_crnt_ack_id, 
+   stb3_crnt_ack_id, ifu_tlu_thrid_e, ldxa_internal, 
+   spu_lsu_ldst_pckt, spu_lsu_ldst_pckt_vld, ifu_tlu_inst_vld_m, 
+   ifu_lsu_flush_w, ifu_lsu_casa_e, lsu_ldstub_g, lsu_swap_g, 
+   stb0_atm_rq_type, stb1_atm_rq_type, stb2_atm_rq_type, 
+   stb3_atm_rq_type, tlb_pgnum_g, stb_rd_for_pcx, ffu_lsu_data, 
+   ffu_lsu_fpop_rq_vld, ifu_lsu_ldst_dbl_e, ifu_lsu_pcxreq_d, 
+   ifu_lsu_destid_s, ifu_lsu_pref_inst_e, tlb_cam_hit_g, 
+   lsu_blk_asi_m, stb_cam_hit_bf, lsu_fwdpkt_vld, 
+   lsu_dcfill_active_e, dfq_byp_sel, lsu_dfq_ld_vld, lsu_fldd_vld_en, 
+   lsu_dfill_dcd_thrd, lsu_fwdpkt_dest, tlu_lsu_pcxpkt_tid, 
+   lsu_stb_empty, tlu_lsu_pcxpkt_vld, tlu_lsu_pcxpkt_l2baddr, 
+   ld_sec_hit_thrd0, ld_sec_hit_thrd1, ld_sec_hit_thrd2, 
+   ld_sec_hit_thrd3, ld_thrd_byp_sel_e, lsu_st_pcx_rq_kill_w2, 
+   ifu_lsu_alt_space_e, lsu_dfq_byp_tid, dfq_byp_ff_en, 
+   stb_ld_full_raw, stb_ld_partial_raw, stb_cam_mhit, 
+   lsu_ldquad_inst_m, stb_cam_wr_no_ivld_m, lsu_ldst_va_way_g, 
+   lsu_dcache_rand, lsu_encd_way_hit, lsu_way_hit_or, dc_direct_map, 
+   lsu_tlb_perr_ld_rq_kill_w, lsu_dcache_tag_perror_g, 
+   lsu_ld_inst_vld_g, asi_internal_m, ifu_lsu_pcxpkt_e_b50, 
+   lda_internal_m, atomic_m, lsu_dcache_iob_rd_w, 
+   ifu_lsu_fwd_data_vld, rst_tri_en, lsu_no_spc_pref, 
+   tlu_early_flush_pipe2_w, lsu_ttype_vld_m2
+   );
+
+
+input     rclk ;
+input                   si;
+input                   se;
+input                   sehold;
+input                   grst_l;
+input                   arst_l;
+
+//input [1:0] 		ld_pcx_pkt_wy_g ;
+input			lsu_quad_word_access_g ;
+
+// LSU <- PCX
+// bit5 - FP, bit4 - IO.
+input [4:0]             pcx_spc_grant_px ;    // pcx grants packet to destination.
+input                   ld_inst_vld_e;        // valid ld inst; d-stage
+input [7:6]             lsu_ldst_va_m ;           // Virt. Addr. of ld/st/atomic.
+
+input [2:0]             stb0_l2b_addr ;         // st's addr for pcx - thread0.
+input [2:0]             stb1_l2b_addr ;         // st's addr for pcx - thread1.
+input [2:0]             stb2_l2b_addr ;         // st's addr for pcx - thread2.
+input [2:0]             stb3_l2b_addr ;         // st's addr for pcx - thread3.
+input                   lsu_ld_miss_g ;         // load misses in dcache.
+//input                   lsu_ld_hit_g ;          // load hits in dcache.
+input                   ifu_lsu_ldst_fp_e ;     // fp load/store.
+
+//input                   ld_stb_full_raw_g ;    // full raw for load - thread0
+//input                   ld_stb_partial_raw_g ; // partial raw for load - thread0
+input                   ld_rawp_st_ced_w2 ;      // store has been acked - thread0
+//input                   ld_rawp_st_ced_g ;      // store has been acked - thread0
+input   [2:0]           ld_rawp_st_ackid_w2 ;    // ackid for acked store - thread0
+input [2:0]             stb0_crnt_ack_id ;      // ackid for crnt outstanding st. 
+input [2:0]             stb1_crnt_ack_id ;      // ackid for crnt outstanding st. 
+input [2:0]             stb2_crnt_ack_id ;      // ackid for crnt outstanding st. 
+input [2:0]             stb3_crnt_ack_id ;      // ackid for crnt outstanding st. 
+input [1:0]             ifu_tlu_thrid_e ;       // thread-id
+input                   ldxa_internal ;         // internal ldxa, stg g 
+
+input [`PCX_AD_LO+7:`PCX_AD_LO+6] spu_lsu_ldst_pckt ;  // addr bits
+input                   spu_lsu_ldst_pckt_vld ; // vld
+input                   ifu_tlu_inst_vld_m ;    // inst is vld - wstage
+
+input                   ifu_lsu_flush_w ;  	// ifu's flush
+input                   ifu_lsu_casa_e ;        // compare-swap instr
+input                   lsu_ldstub_g ;          // ldstub(a) instruction
+input                   lsu_swap_g ;            // swap(a) instruction 
+input  [2:1]            stb0_atm_rq_type ;      // stb pcx rq type - atomic
+input  [2:1]            stb1_atm_rq_type ;      // stb pcx rq type - atomic
+input  [2:1]            stb2_atm_rq_type ;      // stb pcx rq type - atomic
+input  [2:1]            stb3_atm_rq_type ;      // stb_pcx_rq_type - atomic
+input [39:37]           tlb_pgnum_g ;           // ldst access to io 
+input [3:0]             stb_rd_for_pcx ;        // rd for pcx can be scheduled
+input [80:79]           ffu_lsu_data ;
+input                   ffu_lsu_fpop_rq_vld ;   // ffu dispatches fpop issue request.
+input                   ifu_lsu_ldst_dbl_e ;    // ld/st double
+input                   ifu_lsu_pcxreq_d ;
+input   [2:0]           ifu_lsu_destid_s ;
+input			ifu_lsu_pref_inst_e ;	   // prefetch inst
+input 			tlb_cam_hit_g ;		   // tlb cam hit ; error included
+input			lsu_blk_asi_m ;
+//input                   stb_cam_wptr_vld;
+input		        stb_cam_hit_bf;
+
+input                   lsu_fwdpkt_vld;
+//input  [3:0]            lsu_error_rst;
+input                   lsu_dcfill_active_e;
+input  [3:0]            dfq_byp_sel ;	
+//input  [3:0]            lsu_dfq_byp_mxsel ;	
+//input  [3:0]            lsu_st_ack_rq_stb ;
+input                   lsu_dfq_ld_vld;
+input                   lsu_fldd_vld_en;
+input  [3:0]            lsu_dfill_dcd_thrd ;
+input  [4:0]            lsu_fwdpkt_dest ;
+   
+input [19:18]           tlu_lsu_pcxpkt_tid ;
+input   [3:0]           lsu_stb_empty ;
+input                   tlu_lsu_pcxpkt_vld ;
+input [11:10]           tlu_lsu_pcxpkt_l2baddr ;
+input                   ld_sec_hit_thrd0 ;      // ld has sec. hit against th0
+input                   ld_sec_hit_thrd1 ;      // ld has sec. hit against th1
+input                   ld_sec_hit_thrd2 ;      // ld has sec. hit against th2
+input                   ld_sec_hit_thrd3 ;      // ld has sec. hit against th3
+input [2:0]             ld_thrd_byp_sel_e ;       // stb,ldxa thread byp sel
+input   [3:0]   	lsu_st_pcx_rq_kill_w2 ;
+
+input			ifu_lsu_alt_space_e ;
+input   [1:0]           lsu_dfq_byp_tid;
+
+input                   dfq_byp_ff_en;
+
+//input	[3:0]		lsu_dtag_perror_w2 ;
+
+input [7:0]   		stb_ld_full_raw ;
+input [7:0]   		stb_ld_partial_raw ;
+
+input			stb_cam_mhit ;		// multiple hits in stb
+input      		lsu_ldquad_inst_m ; // ldquad inst
+
+input			stb_cam_wr_no_ivld_m ;
+
+input  [1:0]            lsu_ldst_va_way_g ;          // 12:11 for direct map
+input  [1:0]            lsu_dcache_rand;
+input  [1:0]            lsu_encd_way_hit;
+input                   lsu_way_hit_or;
+input                   dc_direct_map;
+//input                   lsu_quad_asi_g;
+
+input			lsu_tlb_perr_ld_rq_kill_w ;
+
+input                   lsu_dcache_tag_perror_g ;  // dcache tag parity error
+input   [3:0]           lsu_ld_inst_vld_g ;
+//input                  lsu_pcx_ld_dtag_perror_w2 ;	// from qctl2
+
+input			asi_internal_m ;
+
+input			ifu_lsu_pcxpkt_e_b50 ;
+
+input			lda_internal_m ;
+input			atomic_m ;
+
+input			lsu_dcache_iob_rd_w ;
+input			ifu_lsu_fwd_data_vld ;
+
+input                   rst_tri_en ;
+
+output			lsu_bld_helper_cmplt_m ;
+output	[2:0]		lsu_bld_cnt_m ;
+output			lsu_bld_reset ;
+
+output			lsu_pcx_rq_sz_b3 ;
+
+output			lsu_ramtest_rd_w ;
+
+
+output 	   		ld_stb_full_raw_w2 ;
+
+output 	[3:0]		lsu_ld_pcx_rq_sel_d2 ;
+   
+output  [4:0]           spc_pcx_req_pq;         // request destination for packet.
+              // FPU, IO, L2_BANK[3:0].
+              // 1-hot - create monitor !
+output                  spc_pcx_atom_pq ;       // atomic packet. 
+output                  lsu_ifu_pcxpkt_ack_d ;  // ack for I$ fill request.
+output  [3:0]           pcx_pkt_src_sel ;	// - qdp1
+output  [3:0]           lmq_enable ;	// - qdp1
+output                  imiss_pcx_mx_sel ;	// - qdp1
+output  [2:0]           fwd_int_fp_pcx_mx_sel ;	// - qdp1
+output	[2:0]		lsu_ffu_bld_cnt_w ;
+//output  [3:0]           ld_pcx_rq_sel ;	// - qctl2
+output  [3:0]           lsu_ld_pcx_rq_mxsel ;	// - qdp1
+output  [1:0]           ld_pcx_thrd ;	// - qdp1
+output                  lsu_spu_ldst_ack ;  // strm ld/st ack to spu
+//output                  strm_sldst_cam_vld; // strm ld/st xslate rq
+//output                  strm_sld_dc_rd_vld; // strm alloc. ld xslate rq.
+//output                  strm_sldst_cam_d2;  // strm ld/st xslate rq-d2
+output  [3:0]           pcx_rq_for_stb ;      // pcx demands rd for store - stb_ctl
+output  [3:0]           pcx_rq_for_stb_d1 ;  // pcx demands rd for store - qdp2
+output                  lsu_ffu_ack ;         // ack to ffu.
+output                  lsu_ifu_ld_pcxpkt_vld ;
+//output  [3:0]           lsu_iobrdge_rply_data_sel ;	// - qdp1
+//output                  lsu_pcx_req_squash ;
+output                  lsu_pcx_req_squash0 ;
+output                  lsu_pcx_req_squash1 ;
+output                  lsu_pcx_req_squash2 ;
+output                  lsu_pcx_req_squash3 ;
+output                  lsu_pcx_req_squash_d1 ;
+output                  lsu_pcx_ld_dtag_perror_w2 ;	// - qdp1
+output	[3:0]		lsu_tlu_dcache_miss_w2 ;
+output 			lsu_bld_pcx_rq ;	// cycle after request	// - qdp1
+output [1:0] 		lsu_bld_rq_addr ;	// cycle after request	// - qdp1
+//output			lsu_ifu_flush_ireg ;
+
+output                 lsu_fwdpkt_pcx_rq_sel ;
+//output                 lsu_ld0_pcx_rq_sel_d1, lsu_ld1_pcx_rq_sel_d1 ;
+//output                 lsu_ld2_pcx_rq_sel_d1, lsu_ld3_pcx_rq_sel_d1 ;
+output                 lsu_imiss_pcx_rq_sel_d1 ;
+output                 lsu_tlu_pcxpkt_ack;
+output  [3:0]           lsu_intrpt_cmplt ;      // intrpt can restart thread
+//output                  lsu_ld_sec_hit_l2access_g ;
+//output  [1:0]           lsu_ld_sec_hit_wy_g ;
+output  [3:0]           lsu_lmq_byp_misc_sel ;    // select g-stage lmq source
+
+output	[12:0]		lsu_sscan_data ;
+
+output                  so;
+output  [3:0]           lsu_dfq_byp_tid_d1_sel;
+
+
+   input [3:0]          lsu_no_spc_pref;
+   
+//output  [1:0]           lsu_lmq_pkt_way_g;
+output  [1:0]           lmq0_pcx_pkt_way;
+output  [1:0]           lmq1_pcx_pkt_way;
+output  [1:0]           lmq2_pcx_pkt_way;
+output  [1:0]           lmq3_pcx_pkt_way;
+output  [3:0]           lsu_st_pcx_rq_pick;
+
+// signals related to logic moved from stb_rwctl
+output                  lsu_stb_pcx_rvld_d1;
+output  [1:0]           lsu_stb_rd_tid;
+
+output                  lsu_ld0_spec_vld_kill_w2 ;
+output                  lsu_ld1_spec_vld_kill_w2 ;
+output                  lsu_ld2_spec_vld_kill_w2 ;
+output                  lsu_ld3_spec_vld_kill_w2 ;
+
+output                  lsu_st_pcx_rq_vld ;
+
+
+   input                tlu_early_flush_pipe2_w;
+   input                lsu_ttype_vld_m2;
+   
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+wire  thread0_e,thread1_e,thread2_e,thread3_e;
+wire  thread0_w2,thread1_w2,thread2_w2,thread3_w2;
+wire    ld0_inst_vld_e,ld1_inst_vld_e,ld2_inst_vld_e,ld3_inst_vld_e ;
+wire    ld0_inst_vld_g,ld1_inst_vld_g,ld2_inst_vld_g,ld3_inst_vld_g ;
+wire    ld0_inst_vld_w2,ld1_inst_vld_w2,ld2_inst_vld_w2,ld3_inst_vld_w2 ;
+//wire    st_inst_vld_m,st_inst_vld_g;
+wire  imiss_pcx_rq_sel_d1, strm_pcx_rq_sel_d1 ;
+wire  imiss_pcx_rq_sel_d2 ;
+wire    fpop_pcx_rq_sel_d1, fpop_pcx_rq_sel_d2 ;
+wire        imiss_pcx_rq_sel ;
+wire        imiss_pkt_vld ;
+wire  [2:0]     imiss_l2bnk_addr ;
+wire  [4:0]     imiss_l2bnk_dest ;
+wire    fpst_vld_m, fpst_vld_g ;
+wire    fpop_vld_reset ;
+wire    fpop_pcx_rq_sel ;
+wire    fpop_pcx_rq_sel_tmp ;
+wire    fpop_vld_en ;
+wire    fpop_pkt1 ;
+wire    fpop_pkt_vld,fpop_pkt_vld_unmasked ;
+wire    fpop_atom_req, fpop_atom_rq_pq ;
+wire  [4:0] fpop_l2bnk_dest ;
+wire        pcx_req_squash ;
+wire  [4:0] strm_l2bnk_dest ;
+wire    strm_pkt_vld;
+wire        st0_pkt_vld ;
+wire        st1_pkt_vld ;
+wire        st2_pkt_vld ;
+wire        st3_pkt_vld ;
+wire    st0_pcx_rq_sel_d1, st1_pcx_rq_sel_d1;
+wire    st2_pcx_rq_sel_d1, st3_pcx_rq_sel_d1;
+wire    st0_pcx_rq_sel_d2, st1_pcx_rq_sel_d2;
+wire    st2_pcx_rq_sel_d2, st3_pcx_rq_sel_d2;
+wire    st0_pcx_rq_sel_d3, st1_pcx_rq_sel_d3;
+wire    st2_pcx_rq_sel_d3, st3_pcx_rq_sel_d3;
+wire    st0_cas_vld, st1_cas_vld, st2_cas_vld, st3_cas_vld ;
+wire    st0_atomic_vld, st1_atomic_vld, st2_atomic_vld, st3_atomic_vld ;
+wire  [4:0]     st0_l2bnk_dest,st1_l2bnk_dest ;
+wire  [4:0]     st2_l2bnk_dest,st3_l2bnk_dest ;
+wire	bld_helper_cmplt_e, bld_helper_cmplt_m, bld_helper_cmplt_g ;	
+wire	bld_din,bld_dout ;
+wire	bld_g ;
+wire	bld_en ;
+wire	[1:0]	bld_cnt ;
+wire	[1:0]	bcnt_din ;
+wire	[2:0]	bld_rd_din, bld_rd_dout, bld_rd_dout_m ;
+wire	[3:0]	bld_annul,bld_annul_d1 ;
+wire	bld_rd_en ;
+wire    casa_m, casa_g ;
+wire        ld0_vld_reset, ld0_pkt_vld ;
+wire        ld0_pcx_rq_sel_d2, ld1_pcx_rq_sel_d2 ;
+wire        ld2_pcx_rq_sel_d2, ld3_pcx_rq_sel_d2 ;
+wire    ld0_fill_reset, ld1_fill_reset,ld2_fill_reset,ld3_fill_reset;
+wire    ld0_fill_reset_d1,ld1_fill_reset_d1,ld2_fill_reset_d1,ld3_fill_reset_d1;
+wire    ld0_fill_reset_d2,ld1_fill_reset_d2,ld2_fill_reset_d2,ld3_fill_reset_d2;
+wire    ld0_fill_reset_d2_tmp,ld1_fill_reset_d2_tmp,ld2_fill_reset_d2_tmp,ld3_fill_reset_d2_tmp;
+wire  [4:0]     ld0_l2bnk_dest, ld1_l2bnk_dest ;
+wire  [4:0]     ld2_l2bnk_dest, ld3_l2bnk_dest ;
+wire        ld1_vld_reset, ld1_pkt_vld ;
+wire        ld2_vld_reset, ld2_pkt_vld ;
+wire        ld3_vld_reset, ld3_pkt_vld ;
+//wire    casa0_g, casa1_g, casa2_g, casa3_g;
+wire    ld0_rawp_reset,ld0_rawp_en,ld0_rawp_disabled;
+wire    ld1_rawp_reset,ld1_rawp_en,ld1_rawp_disabled;
+wire    ld2_rawp_reset,ld2_rawp_en,ld2_rawp_disabled;
+wire    ld3_rawp_reset,ld3_rawp_en,ld3_rawp_disabled;
+wire  [2:0] ld0_rawp_ackid,ld1_rawp_ackid ;
+wire  [2:0] ld2_rawp_ackid,ld3_rawp_ackid ;
+wire        ld0_pcx_rq_vld, ld1_pcx_rq_vld ;
+wire        ld2_pcx_rq_vld, ld3_pcx_rq_vld ;
+wire  [4:0]     queue_write ;
+wire	mcycle_squash_d1 ;
+//wire        ld_pcx_rq_vld, st_pcx_rq_vld ;
+wire  [4:0] st0_q_wr,st1_q_wr,st2_q_wr,st3_q_wr ;
+wire  [4:0]     sel_qentry0 ;
+wire    st0_atom_rq,st1_atom_rq,st2_atom_rq,st3_atom_rq ;
+wire    st0_atom_rq_d1,st1_atom_rq_d1,st2_atom_rq_d1,st3_atom_rq_d1 ;
+wire    st0_cas_vld_d1,st1_cas_vld_d1,st2_cas_vld_d1,st3_cas_vld_d1 ;
+wire    st0_atom_rq_d2,st1_atom_rq_d2,st2_atom_rq_d2,st3_atom_rq_d2 ;
+wire    st0_cas_vld_d2,st1_cas_vld_d2,st2_cas_vld_d2,st3_cas_vld_d2 ;
+//wire    st_cas_rq_d2,st_quad_rq_d2;
+wire    st_cas_rq_d2 ;
+wire        st0_pcx_rq_vld, st1_pcx_rq_vld;
+wire        st2_pcx_rq_vld, st3_pcx_rq_vld;
+wire    st_atom_rq ;
+wire    st_atom_rq_d1 ;
+wire        imiss_pcx_rq_vld ;
+wire  [4:0] spc_pcx_req_update_g,spc_pcx_req_update_w2 ;
+wire    strm_pcx_rq_vld ;
+wire    fwdpkt_rq_vld ;
+wire    intrpt_pcx_rq_vld ;
+wire    fpop_pcx_rq_vld ;
+wire	[4:0]	pre_qwr ;
+wire        ld0_pcx_rq_sel, ld1_pcx_rq_sel ;
+wire        ld2_pcx_rq_sel, ld3_pcx_rq_sel ;
+wire    strm_pcx_rq_sel ;
+wire    intrpt_pcx_rq_sel ;
+//wire    imiss_strm_pcx_rq_sel ;
+//wire  [2:0]     dest_pkt_sel ;
+wire  [4:0] spc_pcx_req_g ;
+wire  [1:0]   strm_l2bnk_addr ;
+wire  [2:0]     ld0_l2bnk_addr, ld1_l2bnk_addr ;
+wire  [2:0]     ld2_l2bnk_addr, ld3_l2bnk_addr ;
+wire  [4:0]     current_pkt_dest ;
+wire  [7:6]      ldst_va_m, ldst_va_g ;
+wire  [4:0]     ld_pkt_dest ;
+wire  [4:0] st_pkt_dest  ;
+
+
+wire  [4:0]            intrpt_l2bnk_dest ;
+wire                   pcx_req_squash_d1, pcx_req_squash_d2 ;
+wire                   intrpt_pcx_rq_sel_d1 ;
+wire  [2:0] intrpt_l2bnk_addr ;
+//wire                  st0_stq_vld,st1_stq_vld,st2_stq_vld,st3_stq_vld ;
+wire        st0_pcx_rq_sel, st1_pcx_rq_sel;
+wire        st2_pcx_rq_sel, st3_pcx_rq_sel;
+//wire    ld0_sec_hit_g,ld1_sec_hit_g,ld2_sec_hit_g,ld3_sec_hit_g;
+wire    ld0_sec_hit_w2,ld1_sec_hit_w2,ld2_sec_hit_w2,ld3_sec_hit_w2;
+//wire  [3:0] dfq_byp_sel_m, dfq_byp_sel_g ;
+//wire  [3:0] dfq_byp_sel_m;
+wire                 ld0_unfilled,ld1_unfilled,ld2_unfilled,ld3_unfilled;
+wire                 ld0_unfilled_tmp,ld1_unfilled_tmp,ld2_unfilled_tmp,ld3_unfilled_tmp;
+wire  [1:0]          ld0_unfilled_wy,ld1_unfilled_wy,ld2_unfilled_wy,ld3_unfilled_wy ;
+wire        ld0_l2cache_rq,ld1_l2cache_rq ;
+wire        ld2_l2cache_rq,ld3_l2cache_rq ;
+wire                 ld0_pcx_rq_sel_d1, ld1_pcx_rq_sel_d1 ;
+wire                 ld2_pcx_rq_sel_d1, ld3_pcx_rq_sel_d1 ;
+wire         intrpt_pkt_vld;
+wire                 fwdpkt_pcx_rq_sel;
+wire		fwdpkt_pcx_rq_sel_d1,fwdpkt_pcx_rq_sel_d2,fwdpkt_pcx_rq_sel_d3 ;
+wire         reset,dbb_reset_l;
+wire         clk;
+//wire         st_inst_vld_unflushed;
+wire         ldst_dbl_g;
+//wire                  lsu_ld_sec_hit_l2access_g ;
+wire                  lsu_ld_sec_hit_l2access_w2 ;
+//wire  [1:0]           lsu_ld_sec_hit_wy_g ;
+wire  [1:0]           lsu_ld_sec_hit_wy_w2 ;
+//wire  [1:0]  ld_way;
+//wire [1:0]	      ld_pcx_pkt_wy_g ;
+
+wire	[3:0]		lsu_dtag_perror_w2 ;
+
+wire	[3:0]		lmq_enable_w2 ;
+wire  ld0_spec_pick_vld_g ,
+      ld0_spec_pick_vld_w2 ;
+wire  ld1_spec_pick_vld_g ,
+      ld1_spec_pick_vld_w2 ;
+wire  ld2_spec_pick_vld_g ,
+      ld2_spec_pick_vld_w2 ;
+wire  ld3_spec_pick_vld_g ,
+      ld3_spec_pick_vld_w2 ;
+wire  non_l2bnk_mx0_d1 ;
+wire  non_l2bnk_mx1_d1 ;
+wire  non_l2bnk_mx2_d1 ;
+wire  non_l2bnk_mx3_d1 ;
+wire  lsu_pcx_req_squash ;
+wire  spc_pcx_atom_pq_buf2 ;
+wire  [4:0]  spc_pcx_req_pq_buf2 ;
+wire         lsu_ld0_pcx_rq_sel_d1, lsu_ld1_pcx_rq_sel_d1 ;
+wire         lsu_ld2_pcx_rq_sel_d1, lsu_ld3_pcx_rq_sel_d1 ;
+
+wire  [3:0]  ld_thrd_force_d1 ;
+wire  [3:0]  st_thrd_force_d1 ;
+wire  [3:0]  misc_thrd_force_d1 ;
+wire  [3:0]  ld_thrd_force_vld ;
+wire  [3:0]  st_thrd_force_vld ;
+wire  [3:0]  misc_thrd_force_vld ;
+wire  [3:0]  all_thrd_force_vld ;
+wire  [3:0]  ld_thrd_pick_din ;
+wire  [3:0]  st_thrd_pick_din ;
+wire  [3:0]  misc_thrd_pick_din ;
+wire  [3:0]  ld_thrd_pick_status_din ;
+wire  [3:0]  st_thrd_pick_status_din ;
+wire  [3:0]  misc_thrd_pick_status_din ;
+wire  [3:0]  ld_thrd_pick_status ;
+wire  [3:0]  st_thrd_pick_status ;
+wire  [3:0]  misc_thrd_pick_status ;
+wire         ld_thrd_pick_rst ;
+wire         st_thrd_pick_rst ;
+wire         misc_thrd_pick_rst ;
+wire         all_thrd_pick_rst ;
+
+
+   
+   
+assign  clk = rclk;
+
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+assign  reset  =  ~dbb_reset_l;
+
+
+//assign	lsu_ifu_flush_ireg = 1'b0 ;
+//=================================================================================================
+// TEMP !! rm from vlin.filter also !!
+//=================================================================================================
+
+wire atm_in_stb_g ;
+assign atm_in_stb_g = 1'b0 ;
+
+//=================================================================================================
+// LOGIC MOVED FROM STB_RWCTL
+//=================================================================================================
+
+// pcx is making request for data in current cycle. Can be multi-hot.
+//assign  pcx_any_rq_for_stb = |pcx_rq_for_stb[3:0] ;
+//assign  pcx_any_rq_for_stb = 
+//        (pcx_rq_for_stb[0] & ~lsu_st_pcx_rq_kill_w2[0]) | 
+//        (pcx_rq_for_stb[1] & ~lsu_st_pcx_rq_kill_w2[1]) | 
+//        (pcx_rq_for_stb[2] & ~lsu_st_pcx_rq_kill_w2[2]) | 
+//        (pcx_rq_for_stb[3] & ~lsu_st_pcx_rq_kill_w2[3]) ; 
+//
+//dff #(1)  prvld_stgd1 (
+//  .din  (pcx_any_rq_for_stb), 
+//  .q  (lsu_stb_pcx_rvld_d1),
+//  .clk  (clk), 
+//  .se (1'b0), .si (), .so ()
+//  );
+
+// replacement for above logic - pcx_rq_for_stb is already qual'ed w/ lsu_st_pcx_rq_kill_w2
+// this signal is used in qdp1 and qdp2 as pcx paket valids.
+assign  lsu_stb_pcx_rvld_d1  =  st3_pcx_rq_sel_d1 |
+                                st2_pcx_rq_sel_d1 |
+                                st1_pcx_rq_sel_d1 | 
+                                st0_pcx_rq_sel_d1 ;
+
+
+//assign  stb_rd_tid[0] = pcx_rq_for_stb[1] | pcx_rq_for_stb[3] ;
+//assign  stb_rd_tid[1] = pcx_rq_for_stb[2] | pcx_rq_for_stb[3] ;
+//
+//dff #(2) stbtid_stgd1 (
+//  .din    (stb_rd_tid[1:0]),  .q  (lsu_stb_rd_tid[1:0]),
+//  .clk    (clk),
+//  .se   (1'b0), .si (), .so ()
+//  );
+
+assign  lsu_stb_rd_tid[0]  =  st1_pcx_rq_sel_d1 | st3_pcx_rq_sel_d1;
+assign  lsu_stb_rd_tid[1]  =  st2_pcx_rq_sel_d1 | st3_pcx_rq_sel_d1;
+
+//=================================================================================================
+
+assign	lsu_ramtest_rd_w = lsu_dcache_iob_rd_w | ifu_lsu_fwd_data_vld ;
+
+//=================================================================================================
+// LD PCX PKT WAY
+//=================================================================================================
+
+
+// For direct-map mode, assume that addition set-index bits 12:11 are
+// used to file line in set.
+// timing fix: 5/19/03: move secondary hit way generation to w2
+//assign  ld_way[1:0] = 
+//    lsu_way_hit_or ? lsu_encd_way_hit[1:0]: 
+//                lsu_ld_sec_hit_l2access_g ? lsu_ld_sec_hit_wy_g[1:0] :
+//                        (dc_direct_map ? lsu_ldst_va_way_g[1:0] : lsu_dcache_rand[1:0]) ;
+//
+//assign  lsu_lmq_pkt_way_g[1:0] = 
+//(ldst_dbl_g & st_inst_vld_unflushed & lsu_quad_asi_g) ? 2'b01 :
+//        casa_g ? 2'b00 : ld_way[1:0] ;
+//
+//assign  ld_pcx_pkt_wy_g[1:0] = lsu_lmq_pkt_way_g[1:0];
+wire  [1:0]  ld_way_mx1_g , ld_way_mx2_g , ld_way_mx2_w2;
+
+assign  ld_way_mx1_g[1:0] = 
+    lsu_way_hit_or ? lsu_encd_way_hit[1:0]: 
+                        (dc_direct_map ? lsu_ldst_va_way_g[1:0] : lsu_dcache_rand[1:0]) ;
+
+assign  ld_way_mx2_g[1:0] = 
+//(ldst_dbl_g & st_inst_vld_unflushed & lsu_quad_asi_g) ? 2'b01 :  //quad st, obsolete
+        casa_g ? 2'b00 : ld_way_mx1_g[1:0] ;
+
+dff_s #(2)  ff_ld_way_mx2_w2 (
+        .din    (ld_way_mx2_g[1:0]),
+        .q      (ld_way_mx2_w2[1:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire  [1:0]  lsu_lmq_pkt_way_w2;
+assign  lsu_lmq_pkt_way_w2[1:0] =  lsu_ld_sec_hit_l2access_w2 ? lsu_ld_sec_hit_wy_w2[1:0] :
+                                         ld_way_mx2_w2[1:0];
+
+//bug2705 - add mx for way in w2-cycle
+wire  [1:0]  lmq0_pcx_pkt_way_tmp, lmq1_pcx_pkt_way_tmp, lmq2_pcx_pkt_way_tmp, lmq3_pcx_pkt_way_tmp ;
+
+assign  lmq0_pcx_pkt_way[1:0] =  ld0_spec_pick_vld_w2 ? lsu_lmq_pkt_way_w2[1:0] : lmq0_pcx_pkt_way_tmp[1:0] ;
+assign  lmq1_pcx_pkt_way[1:0] =  ld1_spec_pick_vld_w2 ? lsu_lmq_pkt_way_w2[1:0] : lmq1_pcx_pkt_way_tmp[1:0] ;
+assign  lmq2_pcx_pkt_way[1:0] =  ld2_spec_pick_vld_w2 ? lsu_lmq_pkt_way_w2[1:0] : lmq2_pcx_pkt_way_tmp[1:0] ;
+assign  lmq3_pcx_pkt_way[1:0] =  ld3_spec_pick_vld_w2 ? lsu_lmq_pkt_way_w2[1:0] : lmq3_pcx_pkt_way_tmp[1:0] ;
+
+wire	qword_access0,qword_access1,qword_access2,qword_access3;
+
+// Extend by 1-b to add support for 3rd size bit for iospace.
+// move the flops from qdp1 to qctl1
+dffe_s #(2)  ff_lmq0_pcx_pkt_way (
+           .din    (lsu_lmq_pkt_way_w2[1:0]),
+           .q      (lmq0_pcx_pkt_way_tmp[1:0]),
+           .en     (lmq_enable_w2[0]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s #(2)  ff_lmq1_pcx_pkt_way (
+           .din    (lsu_lmq_pkt_way_w2[1:0]),
+           .q      (lmq1_pcx_pkt_way_tmp[1:0]),
+           .en     (lmq_enable_w2[1]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s #(2)  ff_lmq2_pcx_pkt_way (
+           .din    (lsu_lmq_pkt_way_w2[1:0]),
+           .q      (lmq2_pcx_pkt_way_tmp[1:0]),
+           .en     (lmq_enable_w2[2]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s #(2)  ff_lmq3_pcx_pkt_way (
+           .din    (lsu_lmq_pkt_way_w2[1:0]),
+           .q      (lmq3_pcx_pkt_way_tmp[1:0]),
+           .en     (lmq_enable_w2[3]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+
+// Q Word Access to IO
+dffe_s   ff_lmq0_qw (
+           .din    (lsu_quad_word_access_g),
+           .q      (qword_access0),
+           .en     (lmq_enable[0]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s   ff_lmq1_qw (
+           .din    (lsu_quad_word_access_g),
+           .q      (qword_access1),
+           .en     (lmq_enable[1]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s   ff_lmq2_qw(
+           .din    (lsu_quad_word_access_g),
+           .q      (qword_access2),
+           .en     (lmq_enable[2]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+dffe_s   ff_lmq3_qw (
+           .din    (lsu_quad_word_access_g),
+           .q      (qword_access3),
+           .en     (lmq_enable[3]),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+
+assign	lsu_pcx_rq_sz_b3 =
+	(ld0_pcx_rq_sel_d1 & qword_access0) |
+	(ld1_pcx_rq_sel_d1 & qword_access1) |
+	(ld2_pcx_rq_sel_d1 & qword_access2) |
+	(ld3_pcx_rq_sel_d1 & qword_access3) ;
+
+//=================================================================================================
+// SHADOW SCAN
+//=================================================================================================
+
+
+// Monitors outstanding loads. This would hang a thread.
+assign	lsu_sscan_data[3:0] = 
+		{ld0_pcx_rq_vld, ld1_pcx_rq_vld , ld2_pcx_rq_vld , ld3_pcx_rq_vld} ;
+// Monitors outstanding loads. This would hang issue from stb
+assign	lsu_sscan_data[7:4] = 
+		{st0_pcx_rq_vld, st1_pcx_rq_vld, st2_pcx_rq_vld, st3_pcx_rq_vld} ;
+assign	lsu_sscan_data[8] = imiss_pcx_rq_vld ; 	// imiss
+assign	lsu_sscan_data[9] = strm_pcx_rq_vld ;  	// strm
+assign	lsu_sscan_data[10] = fwdpkt_rq_vld ;	// fwd rply/rq 
+assign	lsu_sscan_data[11] = intrpt_pcx_rq_vld ; // intrpt
+assign	lsu_sscan_data[12] = fpop_pcx_rq_vld ;	// fpop
+
+
+//=================================================================================================
+// QDP1 selects
+//=================================================================================================
+
+wire [3:0] dfq_byp_tid_sel;
+   
+assign dfq_byp_tid_sel[0]  =  (lsu_dfq_byp_tid[1:0]==2'b00);
+assign dfq_byp_tid_sel[1]  =  (lsu_dfq_byp_tid[1:0]==2'b01);
+assign dfq_byp_tid_sel[2]  =  (lsu_dfq_byp_tid[1:0]==2'b10);
+assign dfq_byp_tid_sel[3]  =  (lsu_dfq_byp_tid[1:0]==2'b11);
+//assign dfq_byp_tid__sel[3]  =  ~|(lsu_dfq_byp_d1_sel[2:0]);
+
+wire  [3:0]  lsu_dfq_byp_tid_d1_sel_tmp ;
+   
+dffe_s #(4)  dfq_byp_tid_sel_ff (
+           .din    (dfq_byp_tid_sel[3:0]),
+           .q      (lsu_dfq_byp_tid_d1_sel_tmp[3:0]),
+           .en     (dfq_byp_ff_en),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+
+//11/21/03 - add rst_tri_en to lsu_dfq_byp_tid_d1_sel[3:0] going to qdp1 as dfq_byp_sel[3:0]
+
+assign  lsu_dfq_byp_tid_d1_sel[2:0]  =  lsu_dfq_byp_tid_d1_sel_tmp[2:0]  & {3{~rst_tri_en}};
+assign  lsu_dfq_byp_tid_d1_sel[3]    =  lsu_dfq_byp_tid_d1_sel_tmp[3]    | rst_tri_en;
+
+
+//=================================================================================================
+// INST_VLD_W GENERATION
+//=================================================================================================
+
+
+wire [1:0] thrid_m, thrid_g ;
+dff_s #(2)  stgm_thrid (
+        .din    (ifu_tlu_thrid_e[1:0]),
+        .q      (thrid_m[1:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  #(2) stgg_thrid (
+        .din    (thrid_m[1:0]),
+        .q      (thrid_g[1:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	flush_w_inst_vld_m ;
+wire	lsu_inst_vld_w,lsu_inst_vld_tmp ;
+wire	other_flush_pipe_w ;
+wire  qctl1_flush_pipe_w;
+   
+assign	flush_w_inst_vld_m = 
+	ifu_tlu_inst_vld_m & 
+	~(qctl1_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
+   
+dff_s  stgw_ivld (
+        .din    (flush_w_inst_vld_m),
+        .q      (lsu_inst_vld_tmp),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+assign	other_flush_pipe_w = tlu_early_flush_pipe2_w | (lsu_ttype_vld_m2 & lsu_inst_vld_tmp);     
+assign	qctl1_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
+
+assign	lsu_inst_vld_w = lsu_inst_vld_tmp & ~qctl1_flush_pipe_w ;
+
+
+//=================================================================================================
+// SECONDARY VS. PRIMARY LOADS
+//=================================================================================================
+
+// An incoming load can hit can match addresses with an outstanding load request
+// from another thread. In this case, the secondary load must wait until the primary
+// load returns and then it will bypass (but not fill). There can only be one primary
+// load but multiple secondary loads. The secondary loads will not enter the dfq.
+// The primary load will however be recirculated until all secondary loads have bypassed.
+
+// Could have multiple secondary hits. Only one thread can be chosen
+// as primary though.
+
+//An incoming load can match addresses with any outstanding load request from other threads.
+//can be multiple hits
+// timing fix: 5/19/03: move secondary hit way generation to w2
+//
+//assign  ld0_sec_hit_g = ld_sec_hit_thrd0 & ld0_unfilled  ;
+//assign  ld1_sec_hit_g = ld_sec_hit_thrd1 & ld1_unfilled  ;
+//assign  ld2_sec_hit_g = ld_sec_hit_thrd2 & ld2_unfilled  ;
+//assign  ld3_sec_hit_g = ld_sec_hit_thrd3 & ld3_unfilled  ;
+//
+//
+// Fix for Bug1606
+//assign  lsu_ld_sec_hit_l2access_g =
+//         ld0_sec_hit_g | ld1_sec_hit_g | ld2_sec_hit_g | ld3_sec_hit_g ;
+//
+//phase 2
+//since can be multiple hits, it isn't one-hot mux, but fix priority-sel mux
+//assign  lsu_ld_sec_hit_wy_g[1:0] =
+//    ld0_sec_hit_g ? ld0_unfilled_wy[1:0] :
+//      ld1_sec_hit_g ? ld1_unfilled_wy[1:0] :
+//        ld2_sec_hit_g ? ld2_unfilled_wy[1:0] :
+//          ld3_sec_hit_g ? ld3_unfilled_wy[1:0] : 2'bxx ;
+
+wire  ld_sec_hit_thrd0_w2,ld_sec_hit_thrd1_w2,ld_sec_hit_thrd2_w2,ld_sec_hit_thrd3_w2;
+
+dff_s #(4) ff_ld_sec_hit_thrd0to3_d1 (
+        .din    ({ld_sec_hit_thrd0,ld_sec_hit_thrd1,ld_sec_hit_thrd2,ld_sec_hit_thrd3}),
+        .q      ({ld_sec_hit_thrd0_w2,ld_sec_hit_thrd1_w2,ld_sec_hit_thrd2_w2,ld_sec_hit_thrd3_w2}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign  ld0_sec_hit_w2 = ld_sec_hit_thrd0_w2 & ld0_unfilled  ;
+assign  ld1_sec_hit_w2 = ld_sec_hit_thrd1_w2 & ld1_unfilled  ;
+assign  ld2_sec_hit_w2 = ld_sec_hit_thrd2_w2 & ld2_unfilled  ;
+assign  ld3_sec_hit_w2 = ld_sec_hit_thrd3_w2 & ld3_unfilled  ;
+
+// Fix for Bug1606
+assign  lsu_ld_sec_hit_l2access_w2 =
+         ld0_sec_hit_w2 | ld1_sec_hit_w2 | ld2_sec_hit_w2 | ld3_sec_hit_w2 ;
+
+//phase 2
+//since can be multiple hits, it isn't one-hot mux, but fix priority-sel mux
+assign  lsu_ld_sec_hit_wy_w2[1:0] =
+    ld0_sec_hit_w2 ? ld0_unfilled_wy[1:0] :
+      ld1_sec_hit_w2 ? ld1_unfilled_wy[1:0] :
+        ld2_sec_hit_w2 ? ld2_unfilled_wy[1:0] :
+          ld3_sec_hit_w2 ? ld3_unfilled_wy[1:0] : 2'bxx ;
+
+//dff #(4)  stgm_dbypsel (
+//        .din    (dfq_byp_sel[3:0]),
+//        .q      (dfq_byp_sel_m[3:0]),
+//        .clk    (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+//dff #(4)  stgg_dbypsel (
+//        .din    (dfq_byp_sel_m[3:0]),
+//        .q      (dfq_byp_sel_g[3:0]),
+//        .clk    (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        );
+
+// select g-stage lmq source.
+// Selects for lmq contents shared by fill/hit and alternate sources such as ldxa/raw.
+// Is qualification of dfq_byp_sel_g by ld_thrd_byp_sel necessary ???
+
+wire	[3:0] lmq_byp_misc_sel_e ;
+
+assign  lmq_byp_misc_sel_e[0] = ld_thrd_byp_sel_e[0]  |        // select for ldxa/raw.
+                                dfq_byp_sel[0]  ;              // select for dfq.
+assign  lmq_byp_misc_sel_e[1] = ld_thrd_byp_sel_e[1]  |        // select for ldxa/raw.
+                                dfq_byp_sel[1] ;               // select for dfq.
+assign  lmq_byp_misc_sel_e[2] = ld_thrd_byp_sel_e[2]  |        // select for ldxa/raw.
+                                dfq_byp_sel[2] ;               // select for dfq.
+assign  lmq_byp_misc_sel_e[3] = ~|lmq_byp_misc_sel_e[2:0];
+                                //ld_thrd_byp_sel_e[3]  |        // select for ldxa/raw.
+                                //dfq_byp_sel[3] ;               // select for dfq.
+
+/*
+assign  lmq_byp_misc_sel_e[0] = ld_thrd_byp_sel_e[0]  |        // select for ldxa/raw.
+          (dfq_byp_sel[0] & ~ld_thrd_byp_sel_e[0]) ;  // select for dfq.
+assign  lmq_byp_misc_sel_e[1] = ld_thrd_byp_sel_e[1]  |        // select for ldxa/raw.
+          (dfq_byp_sel[1] & ~ld_thrd_byp_sel_e[1]) ; // select for dfq.
+assign  lmq_byp_misc_sel_e[2] = ld_thrd_byp_sel_e[2]  |        // select for ldxa/raw.
+          (dfq_byp_sel[2] & ~ld_thrd_byp_sel_e[2]) ; // select for dfq.
+assign  lmq_byp_misc_sel_e[3] = ld_thrd_byp_sel_e[3]  |        // select for ldxa/raw.
+          (dfq_byp_sel[3] & ~ld_thrd_byp_sel_e[3]) ; // select for dfq.
+*/
+
+// M-Stage 
+//10/27/03 - add rst_tri_en for the select - lsu_lmq_byp_misc_sel to qdp1
+wire  [3:0]  lsu_lmq_byp_misc_sel_tmp ;
+dff_s #(4)  stgg_lbsel (
+        .din    (lmq_byp_misc_sel_e[3:0]),
+        .q      (lsu_lmq_byp_misc_sel_tmp[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign lsu_lmq_byp_misc_sel[2:0]=  lsu_lmq_byp_misc_sel_tmp[2:0] & {3{~rst_tri_en}} ;
+assign lsu_lmq_byp_misc_sel[3]  =  lsu_lmq_byp_misc_sel_tmp[3] | rst_tri_en ;
+
+
+/*
+assign  lsu_lmq_byp_misc_sel[0] = ld_thrd_byp_sel[0]  |        // select for ldxa/raw.
+          (dfq_byp_sel_g[0] & ~ld_thrd_byp_sel[0]) ;  // select for dfq.
+assign  lsu_lmq_byp_misc_sel[1] = ld_thrd_byp_sel[1]  |        // select for ldxa/raw.
+          (dfq_byp_sel_g[1] & ~ld_thrd_byp_sel[1]) ; // select for dfq.
+assign  lsu_lmq_byp_misc_sel[2] = ld_thrd_byp_sel[2]  |        // select for ldxa/raw.
+          (dfq_byp_sel_g[2] & ~ld_thrd_byp_sel[2]) ; // select for dfq.
+assign  lsu_lmq_byp_misc_sel[3] = ld_thrd_byp_sel[3]  |        // select for ldxa/raw.
+          (dfq_byp_sel_g[3] & ~ld_thrd_byp_sel[3]) ; // select for dfq.
+*/
+
+
+
+//=================================================================================================
+//  Miscellaneous Staging
+//=================================================================================================
+
+   
+assign  thread0_e = ~ifu_tlu_thrid_e[1] & ~ifu_tlu_thrid_e[0] ;
+assign  thread1_e = ~ifu_tlu_thrid_e[1] &  ifu_tlu_thrid_e[0] ;
+assign  thread2_e =  ifu_tlu_thrid_e[1] & ~ifu_tlu_thrid_e[0] ;
+assign  thread3_e =  ifu_tlu_thrid_e[1] &  ifu_tlu_thrid_e[0] ;
+
+assign  ld0_inst_vld_e = ld_inst_vld_e & thread0_e ;
+assign  ld1_inst_vld_e = ld_inst_vld_e & thread1_e ;
+assign  ld2_inst_vld_e = ld_inst_vld_e & thread2_e ;
+assign  ld3_inst_vld_e = ld_inst_vld_e & thread3_e ;
+     
+assign ldst_va_m[7:6] = lsu_ldst_va_m[7:6];
+   
+dff_s  #(6) stgm_ad_m (
+        .din    ({ld0_inst_vld_e,ld1_inst_vld_e,
+    ld2_inst_vld_e,ld3_inst_vld_e,ifu_lsu_ldst_fp_e,
+    ifu_lsu_ldst_dbl_e}),
+        .q      ({ld0_inst_vld_m,ld1_inst_vld_m,
+    ld2_inst_vld_m,ld3_inst_vld_m,ldst_fp_m,
+    ldst_dbl_m}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+dff_s  #(8) stgm_ad_g (
+        .din    ({ldst_va_m[7:6],ld0_inst_vld_m,ld1_inst_vld_m,
+        //.din    ({ldst_va_m[8:6],ld0_inst_vld_m,ld1_inst_vld_m,
+    ld2_inst_vld_m,ld3_inst_vld_m,ldst_fp_m,
+    //ld2_inst_vld_m,ld3_inst_vld_m,st_inst_vld_m,ldst_fp_m,
+    ldst_dbl_m}),
+        .q      ({ldst_va_g[7:6],ld0_inst_vld_unflushed,ld1_inst_vld_unflushed,
+        //.q      ({ldst_va_g[8:6],ld0_inst_vld_unflushed,ld1_inst_vld_unflushed,
+    ld2_inst_vld_unflushed,ld3_inst_vld_unflushed,
+    //ld2_inst_vld_unflushed,ld3_inst_vld_unflushed,st_inst_vld_unflushed,
+    ldst_fp_g,ldst_dbl_g}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+assign  ld0_inst_vld_g = ld0_inst_vld_unflushed & lsu_inst_vld_w ;
+assign  ld1_inst_vld_g = ld1_inst_vld_unflushed & lsu_inst_vld_w ;
+assign  ld2_inst_vld_g = ld2_inst_vld_unflushed & lsu_inst_vld_w ;
+assign  ld3_inst_vld_g = ld3_inst_vld_unflushed & lsu_inst_vld_w ;
+//assign  st_inst_vld_g  = st_inst_vld_unflushed & lsu_inst_vld_w  ;
+
+dff_s  #(4) ivld_stgw2 (
+        .din    ({ld0_inst_vld_g,ld1_inst_vld_g,ld2_inst_vld_g,ld3_inst_vld_g}),
+        .q    	({ld0_inst_vld_w2,ld1_inst_vld_w2,ld2_inst_vld_w2,ld3_inst_vld_w2}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+dff_s  #(4) th_stgm (
+        .din    ({thread0_e,thread1_e,thread2_e,thread3_e}),
+        .q      ({thread0_m,thread1_m,thread2_m,thread3_m}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+dff_s  #(4) th_stgg (
+        .din    ({thread0_m,thread1_m,thread2_m,thread3_m}),
+        .q      ({thread0_g,thread1_g,thread2_g,thread3_g}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+dff_s  #(4) th_stgw2 (
+        .din    ({thread0_g,thread1_g,thread2_g,thread3_g}),
+        .q      ({thread0_w2,thread1_w2,thread2_w2,thread3_w2}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+
+
+//=================================================================================================
+//
+// IMISS PCX PKT REQ CTL
+//
+//=================================================================================================
+
+
+// ** ifu request packet should be sent out in e-stage **
+// ** Prefer not to make dfq dual-ported **
+
+// Format of IFU pcx packet (50b) :
+//  b49 - valid
+//  b48:44 - req type
+//  b43:42 - rep way (for "eviction" - maintains directory consistency )
+//  b41:40 - mil id
+//  b39:0  - imiss address
+// *
+// destid :
+//  b2  - b39 of pa
+//  b1  - b8  of pa
+//  b0  - b7  of pa 
+// pcxpkt :
+//  b51 - valid
+//  b50 - reserved
+//  b49 - NC
+//  b48:44 - req type
+//  b43:42 - rep way (for "eviction" - maintains directory consistency )
+//  b41:40 - mil id
+//  b39:0  - imiss address
+
+// IMISS REQUEST CONTROL
+// Vld is reset if imiss pkt requests and request is not subsequently
+// squashed and new imiss pkt unavailable.
+
+// Request rate is 1/3 cycles.
+
+/*dff  iack_stg (
+        .din    (imiss_pcx_rq_sel),
+        .q      (lsu_ifu_pcxpkt_ack_d),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+assign	lsu_ifu_pcxpkt_ack_d = imiss_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+assign  imiss_pkt_vld =  ifu_lsu_pcxreq_d & ~(imiss_pcx_rq_sel_d1 | imiss_pcx_rq_sel_d2) ;
+
+//timing fix: 5/21/03 - ifu sends destid 1 cycle early
+//assign  imiss_l2bnk_addr[2:0] = ifu_lsu_destid_d[2:0] ;
+
+wire  ifu_destid_en ;
+assign  ifu_destid_en  =  ~ifu_lsu_pcxreq_d | (lsu_ifu_pcxpkt_ack_d & ~ifu_lsu_pcxpkt_e_b50);
+
+wire  [2:0]  ifu_destid_d;
+dffe_s #(3)  ff_ifu_destid_d (
+           .din    (ifu_lsu_destid_s[2:0]),
+           .q      (ifu_destid_d[2:0]),
+           .en     (ifu_destid_en),
+           .clk    (clk),
+           .se     (1'b0),       .si (),          .so ()
+           );
+assign  imiss_l2bnk_addr[2:0] = ifu_destid_d[2:0] ;
+
+assign imiss_l2bnk_dest[0] = 
+~imiss_l2bnk_addr[2] & ~imiss_l2bnk_addr[1] & ~imiss_l2bnk_addr[0] ;
+assign imiss_l2bnk_dest[1] = 
+~imiss_l2bnk_addr[2] & ~imiss_l2bnk_addr[1] &  imiss_l2bnk_addr[0] ;
+assign imiss_l2bnk_dest[2] = 
+~imiss_l2bnk_addr[2] & imiss_l2bnk_addr[1] & ~imiss_l2bnk_addr[0] ;
+assign imiss_l2bnk_dest[3] = 
+~imiss_l2bnk_addr[2] & imiss_l2bnk_addr[1] &  imiss_l2bnk_addr[0] ;
+assign imiss_l2bnk_dest[4] = imiss_l2bnk_addr[2] ;
+
+
+//=================================================================================================
+//  FPOP PCX RQ CTL
+//=================================================================================================
+
+
+assign  fpst_vld_m = ffu_lsu_data[80] & ffu_lsu_data[79] ;
+
+dff_s  fpst_stg (
+        .din    (fpst_vld_m),
+        .q      (fpst_vld_g),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// ffu req is  never speculative as it must always begin with the queue empty 
+assign  lsu_ffu_ack =   
+	fpop_pcx_rq_sel_d1 |   // fpop needs to wait until selected;d1 for timing
+	//fpop_pcx_rq_sel |   // fpop needs to wait until selected
+      	fpst_vld_g ;    // fpst responds immediately.
+
+// req_squash needs to match up with rq_sel_d1 !!!
+// keep vld around for two cycles.
+assign fpop_vld_reset = 
+  (reset | fpop_pcx_rq_sel) ; 
+  //(reset | fpop_pcx_rq_sel_d1) ; 
+
+assign fpop_vld_en = ffu_lsu_fpop_rq_vld ;
+
+// fpop valid
+dffre_s #(1)  fpop_vld (
+        .din  (ffu_lsu_fpop_rq_vld),
+        .q    (fpop_pkt_vld_unmasked),
+  	.rst  (fpop_vld_reset), .en (fpop_vld_en),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        );
+
+// ** fpop_pkt1 should not be required.
+assign	fpop_pkt1 = fpop_pkt_vld_unmasked & ~fpop_pcx_rq_sel_d1 ;
+
+assign  fpop_pkt_vld = fpop_pkt_vld_unmasked ; // & ~ffu_lsu_kill_fpop_rq ;
+
+assign  fpop_atom_req = fpop_pkt1 & fpop_pcx_rq_sel ;
+
+dff_s  fpatm_stg (
+        .din    (fpop_atom_req),
+        .q      (fpop_atom_rq_pq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign fpop_l2bnk_dest[4:0]   = 5'b10000 ;
+
+
+
+//=================================================================================================
+//  SPU PCX PKT REQ CONTROL
+//=================================================================================================
+  
+// If ack is sent in a given cycle, then the earliest the spu can send
+// a response is in the same cycle. 
+
+wire	strm_pcx_rq_sel_d2 ;
+assign  lsu_spu_ldst_ack = 
+  strm_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;  // spu request sent to pcx.
+  //strm_pcx_rq_sel_d1 & ~pcx_req_squash ;  // spu request sent to pcx.
+
+dff_s #(1) rqsel_d2 (
+  .din (strm_pcx_rq_sel_d1),
+  .q   (strm_pcx_rq_sel_d2),
+  .clk (clk),
+  .se     (1'b0),       .si (),          .so ()
+);              
+
+wire spu_ack_d1 ;
+dff_s #(1) spuack_d1 (
+  .din (lsu_spu_ldst_ack),
+  .q   (spu_ack_d1),
+  .clk (clk),
+  .se     (1'b0),       .si (),          .so ()
+);              
+
+dff_s #(2) ff_spu_lsu_ldst_pckt_d1 (
+  .din (spu_lsu_ldst_pckt[`PCX_AD_LO+7:`PCX_AD_LO+6]),
+  .q   (strm_l2bnk_addr[1:0]),
+  .clk (clk),
+  .se     (1'b0),       .si (),          .so ()
+);              
+                              
+// Streaming does not access io space.
+assign strm_l2bnk_dest[0] = 
+~strm_l2bnk_addr[1] & ~strm_l2bnk_addr[0] ;
+assign strm_l2bnk_dest[1] = 
+~strm_l2bnk_addr[1] &  strm_l2bnk_addr[0] ;
+assign strm_l2bnk_dest[2] = 
+strm_l2bnk_addr[1] & ~strm_l2bnk_addr[0] ;
+assign strm_l2bnk_dest[3] = 
+strm_l2bnk_addr[1] &  strm_l2bnk_addr[0] ;
+assign strm_l2bnk_dest[4] = 1'b0 ;
+
+wire  strm_pkt_vld_unmasked ;
+
+dff_s #(1) spu_pkt_vld_d1 (
+        .din    (spu_lsu_ldst_pckt_vld),
+        .q      (strm_pkt_vld_unmasked),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );              
+                                  
+assign  strm_pkt_vld = 
+	strm_pkt_vld_unmasked & ~(strm_pcx_rq_sel_d1 | lsu_spu_ldst_ack | spu_ack_d1);
+
+// temp = remove strming interface
+//assign strm_sldst_cam_vld = 1'b0 ;
+//assign strm_sld_dc_rd_vld = 1'b0 ;
+//assign strm_sldst_cam_d2 = 1'b0 ;
+// temp = remove strming interface
+
+
+//=================================================================================================
+//  STORE PCX PKT REQ CONTROL
+//=================================================================================================
+
+// Stage by a cycle.
+
+// Thread0
+wire	[2:1]	stb0_rqtype ;
+wire	[2:0]	stb0_rqaddr ; 
+dff_s #(5) stgd1_s0rq (
+        .din    ({stb0_atm_rq_type[2:1], stb0_l2b_addr[2:0]}),
+        .q      ({stb0_rqtype[2:1],stb0_rqaddr[2:0]}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Thread1
+wire	[2:1]	stb1_rqtype ;
+wire	[2:0]	stb1_rqaddr ; 
+dff_s #(5) stgd1_s1rq (
+        .din    ({stb1_atm_rq_type[2:1], stb1_l2b_addr[2:0]}),
+        .q      ({stb1_rqtype[2:1],stb1_rqaddr[2:0]}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Thread2
+wire	[2:1]	stb2_rqtype ;
+wire	[2:0]	stb2_rqaddr ; 
+dff_s #(5) stgd1_s2rq (
+        .din    ({stb2_atm_rq_type[2:1], stb2_l2b_addr[2:0]}),
+        .q      ({stb2_rqtype[2:1],stb2_rqaddr[2:0]}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Thread3
+wire	[2:1]	stb3_rqtype ;
+wire	[2:0]	stb3_rqaddr ; 
+dff_s #(5) stgd1_s3rq (
+        .din    ({stb3_atm_rq_type[2:1], stb3_l2b_addr[2:0]}),
+        .q      ({stb3_rqtype[2:1],stb3_rqaddr[2:0]}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	stb0_rd_for_pcx,stb1_rd_for_pcx,stb2_rd_for_pcx,stb3_rd_for_pcx ;
+wire	stb0_rd_for_pcx_tmp,stb1_rd_for_pcx_tmp,stb2_rd_for_pcx_tmp,stb3_rd_for_pcx_tmp ;
+dff_s #(4) stgd1_rdpcx (
+        .din    (stb_rd_for_pcx[3:0]),
+        .q      ({stb3_rd_for_pcx_tmp,stb2_rd_for_pcx_tmp,stb1_rd_for_pcx_tmp,stb0_rd_for_pcx_tmp}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// timing fix: 5/6 - move kill qual after store pick
+//assign	stb0_rd_for_pcx = stb0_rd_for_pcx_tmp & ~lsu_st_pcx_rq_kill_w2[0] ;
+//assign	stb1_rd_for_pcx = stb1_rd_for_pcx_tmp & ~lsu_st_pcx_rq_kill_w2[1] ;
+//assign	stb2_rd_for_pcx = stb2_rd_for_pcx_tmp & ~lsu_st_pcx_rq_kill_w2[2] ;
+//assign	stb3_rd_for_pcx = stb3_rd_for_pcx_tmp & ~lsu_st_pcx_rq_kill_w2[3] ;
+
+assign	stb0_rd_for_pcx = stb0_rd_for_pcx_tmp;
+assign	stb1_rd_for_pcx = stb1_rd_for_pcx_tmp;
+assign	stb2_rd_for_pcx = stb2_rd_for_pcx_tmp;
+assign	stb3_rd_for_pcx = stb3_rd_for_pcx_tmp;
+
+// STORE REQUEST CONTROL
+// ** Data must come from bypass mux output.
+// THREAD0
+
+// Reads for stores will have to be made non-speculative ????
+// or delay when ced bit is set such that there is no need
+// to replay store.
+// The size of atm_rq_type can be reduced in stb_ctl etc !!!
+assign  st0_pkt_vld = stb0_rd_for_pcx & ~st0_pcx_rq_sel_d1 ;
+assign  st0_cas_vld = ~stb0_rqtype[2] & stb0_rqtype[1] ;
+// stquad not supported.
+//assign  st0_stq_vld = 1'b0 ;
+assign  st0_atomic_vld = st0_cas_vld ;
+  //st0_stq_vld |               // stq(1)
+  //(~stb0_rqtype[2] & stb0_rqtype[1] & ~stb0_rqtype[0]) ; // cas(1)
+
+assign  st1_pkt_vld = stb1_rd_for_pcx & ~st1_pcx_rq_sel_d1 ;
+assign  st1_cas_vld = ~stb1_rqtype[2] & stb1_rqtype[1] ;
+//assign  st1_stq_vld = 1'b0 ;
+assign  st1_atomic_vld = st1_cas_vld ;
+
+assign  st2_pkt_vld = stb2_rd_for_pcx & ~st2_pcx_rq_sel_d1 ;
+assign  st2_cas_vld = ~stb2_rqtype[2] & stb2_rqtype[1] ;
+//assign  st2_stq_vld = 1'b0 ;
+assign  st2_atomic_vld = st2_cas_vld ;
+
+assign  st3_pkt_vld = stb3_rd_for_pcx & ~st3_pcx_rq_sel_d1 ;
+assign  st3_cas_vld = ~stb3_rqtype[2] & stb3_rqtype[1] ;
+//assign  st3_stq_vld = 1'b0 ;
+assign  st3_atomic_vld = st3_cas_vld ;
+
+// Can this be based on st0_pcx_rq_vld instead to ease critical path.
+
+//assign  pcx_rq_for_stb[0] = st_pcx_rq_mhot_sel[0] ;
+//assign  pcx_rq_for_stb[1] = st_pcx_rq_mhot_sel[1] ;
+//assign  pcx_rq_for_stb[2] = st_pcx_rq_mhot_sel[2] ;
+//assign  pcx_rq_for_stb[3] = st_pcx_rq_mhot_sel[3] ;
+
+   
+assign st0_l2bnk_dest[0] = 
+~stb0_rqaddr[2] & ~stb0_rqaddr[1] & ~stb0_rqaddr[0] ;
+assign st0_l2bnk_dest[1] = 
+~stb0_rqaddr[2] & ~stb0_rqaddr[1] &  stb0_rqaddr[0] ;
+assign st0_l2bnk_dest[2] = 
+~stb0_rqaddr[2] &  stb0_rqaddr[1] & ~stb0_rqaddr[0] ;
+assign st0_l2bnk_dest[3] = 
+~stb0_rqaddr[2] &  stb0_rqaddr[1] &  stb0_rqaddr[0] ;
+assign st0_l2bnk_dest[4] =  stb0_rqaddr[2] ;
+
+assign st1_l2bnk_dest[0] = 
+~stb1_rqaddr[2] & ~stb1_rqaddr[1] & ~stb1_rqaddr[0] ;
+assign st1_l2bnk_dest[1] = 
+~stb1_rqaddr[2] & ~stb1_rqaddr[1] &  stb1_rqaddr[0] ;
+assign st1_l2bnk_dest[2] = 
+~stb1_rqaddr[2] &  stb1_rqaddr[1] & ~stb1_rqaddr[0] ;
+assign st1_l2bnk_dest[3] = 
+~stb1_rqaddr[2] &  stb1_rqaddr[1] &  stb1_rqaddr[0] ;
+assign st1_l2bnk_dest[4] =  stb1_rqaddr[2] ;
+
+assign st2_l2bnk_dest[0] = 
+~stb2_rqaddr[2] & ~stb2_rqaddr[1] & ~stb2_rqaddr[0] ;
+assign st2_l2bnk_dest[1] = 
+~stb2_rqaddr[2] & ~stb2_rqaddr[1] &  stb2_rqaddr[0] ;
+assign st2_l2bnk_dest[2] = 
+~stb2_rqaddr[2] &  stb2_rqaddr[1] & ~stb2_rqaddr[0] ;
+assign st2_l2bnk_dest[3] = 
+~stb2_rqaddr[2] &  stb2_rqaddr[1] &  stb2_rqaddr[0] ;
+assign st2_l2bnk_dest[4] =  stb2_rqaddr[2] ;
+
+assign st3_l2bnk_dest[0] = 
+~stb3_rqaddr[2] & ~stb3_rqaddr[1] & ~stb3_rqaddr[0] ;
+assign st3_l2bnk_dest[1] = 
+~stb3_rqaddr[2] & ~stb3_rqaddr[1] &  stb3_rqaddr[0] ;
+assign st3_l2bnk_dest[2] = 
+~stb3_rqaddr[2] &  stb3_rqaddr[1] & ~stb3_rqaddr[0] ;
+assign st3_l2bnk_dest[3] = 
+~stb3_rqaddr[2] &  stb3_rqaddr[1] &  stb3_rqaddr[0] ;
+assign st3_l2bnk_dest[4] =  stb3_rqaddr[2] ;
+
+//=================================================================================================
+//  	BLK-LOAD TRACKING
+//=================================================================================================
+
+// The 64B load request is divided into 4 16B requests, i.e., 4 pcx pkts.
+// The last bld request to the pcx must be marked as so. 
+// Only one bld can be processed at any time.
+
+   wire [1:0] bld_thrd_din;
+   wire [1:0] bld_thrd_dout;
+   wire [3:0] bld_dcd_thrd;
+   wire       ld_03_inst_vld_g;
+   wire       bld_pcx_rq_sel_d1;
+   
+dff_s  stgg_blkasi (
+        .din    (lsu_blk_asi_m),
+        .q      (blk_asi_g),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	bld_helper_cmplt_e = lsu_fldd_vld_en & bld_dout & (
+                             bld_dcd_thrd[0] & lsu_dfill_dcd_thrd[0] |
+                             bld_dcd_thrd[1] & lsu_dfill_dcd_thrd[1] |
+                             bld_dcd_thrd[2] & lsu_dfill_dcd_thrd[2] |
+                             bld_dcd_thrd[3] & lsu_dfill_dcd_thrd[3] );
+
+   
+dff_s #(1) stgm_bldhlpr (
+        .din    (bld_helper_cmplt_e),
+        .q    	(bld_helper_cmplt_m),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	lsu_bld_helper_cmplt_m = bld_helper_cmplt_m ;
+
+dff_s #(1) stgg_bldhlpr (
+        .din    (bld_helper_cmplt_m),
+        .q    	(bld_helper_cmplt_g),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	alt_space_m, alt_space_g, alt_space_w2 ;
+dff_s stg_aspacem(
+        .din    (ifu_lsu_alt_space_e),
+        .q    	(alt_space_m),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s stg_aspaceg(
+        .din    (alt_space_m),
+        .q    	(alt_space_g),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s stg_aspacew2 (
+        .din    (alt_space_g),
+        .q    	(alt_space_w2),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// PCX bld helper issue :
+// 00-1st->01-2nd->10-3rd->11-4th->00
+    
+   assign     bld_thrd_din[0] = ld1_inst_vld_unflushed | ld3_inst_vld_unflushed;
+   assign     bld_thrd_din[1] = ld2_inst_vld_unflushed | ld3_inst_vld_unflushed;
+
+
+   assign ld_03_inst_vld_g =  lsu_inst_vld_w & (
+                              ld0_inst_vld_unflushed | ld1_inst_vld_unflushed | 
+                              ld2_inst_vld_unflushed | ld3_inst_vld_unflushed );
+   
+   assign	bld_g = blk_asi_g & ldst_fp_g & ldst_dbl_g & alt_space_g & ld_03_inst_vld_g ;
+		//~lsu_tlb_perr_ld_rq_kill_w ; // Bug 4645
+
+wire	bld_w2 ;
+dff_s #(1) bldstg (
+        .din    (bld_g),
+        .q      (bld_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	perr_ld_rq_kill_w2 ;
+wire	bld_perr_kill_w2 ;
+assign	bld_perr_kill_w2 = bld_w2 & perr_ld_rq_kill_w2 ;
+   
+dffre_s #(2) bld_thrd (
+        .din  (bld_thrd_din[1:0] ),
+        .q    (bld_thrd_dout[1:0]),
+  	.rst  (bld_reset), .en (bld_g),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        );                     
+   assign bld_dcd_thrd[0] = ~bld_thrd_dout[1] & ~bld_thrd_dout[0];
+   assign bld_dcd_thrd[1] = ~bld_thrd_dout[1] &  bld_thrd_dout[0];
+   assign bld_dcd_thrd[2] =  bld_thrd_dout[1] & ~bld_thrd_dout[0];
+   assign bld_dcd_thrd[3] =  bld_thrd_dout[1] &  bld_thrd_dout[0];
+
+//bug 2757    
+   assign bld_pcx_rq_sel_d1 = ld0_pcx_rq_sel_d1 & bld_dcd_thrd[0] | 
+                              ld1_pcx_rq_sel_d1 & bld_dcd_thrd[1] |
+                              ld2_pcx_rq_sel_d1 & bld_dcd_thrd[2] | 
+                              ld3_pcx_rq_sel_d1 & bld_dcd_thrd[3];
+
+   //wire   bld_pcx_rq_sel_d2, bld_pcx_rq_sel;
+   wire    bld_pcx_rq_sel;
+   //bug 3322
+//   assign bld_pcx_rq_sel = bld_pcx_rq_sel_d2 & ~pcx_req_squash_d1;
+   
+//dff #(1) ff_bld_pcx_rq_sel_d2 (
+//        .din  (bld_pcx_rq_sel_d1),
+//        .q    (bld_pcx_rq_sel_d2),
+//        .clk  (clk),
+//        .se   (1'b0),       .si (),          .so ()
+//        );                  
+
+  assign bld_pcx_rq_sel = (ld0_pcx_rq_sel_d2 & bld_dcd_thrd[0] | 
+                           ld1_pcx_rq_sel_d2 & bld_dcd_thrd[1] |
+                           ld2_pcx_rq_sel_d2 & bld_dcd_thrd[2] | 
+                           ld3_pcx_rq_sel_d2 & bld_dcd_thrd[3] ) &
+                           ~pcx_req_squash_d1;
+     
+  assign	bld_en = bld_g |  (bld_pcx_rq_sel & bld_dout & ~(bld_cnt[1] & bld_cnt[0])) ; 
+  assign	bld_din = bld_g | bld_dout ;
+  assign	bcnt_din[1:0] = bld_cnt[1:0] + {1'b0,(bld_pcx_rq_sel & bld_dout)} ;
+
+// Reset by last completing bld helper.  
+  assign	bld_reset = 
+	reset | bld_perr_kill_w2 |
+	(bld_rd_dout[2] & bld_rd_dout[1] & bld_rd_dout[0] & bld_helper_cmplt_g) ;
+
+assign	lsu_bld_reset = bld_reset ;
+
+wire	bld_dout_tmp ;
+dffre_s #(3)   bld_pcx_cnt (
+        .din  ({bcnt_din[1:0],bld_din}),
+        .q    ({bld_cnt[1:0], bld_dout_tmp}),
+       	.rst  (bld_reset), .en (bld_en),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        );
+
+assign	bld_dout = bld_dout_tmp & ~bld_perr_kill_w2 ;
+
+// Last one allows ld-rq-vld to be reset.
+assign  bld_annul[0] =  bld_dcd_thrd[0] & (bld_dout & ~(bld_cnt[1] & bld_cnt[0])) ;
+assign  bld_annul[1] =  bld_dcd_thrd[1] & (bld_dout & ~(bld_cnt[1] & bld_cnt[0])) ;
+assign  bld_annul[2] =  bld_dcd_thrd[2] & (bld_dout & ~(bld_cnt[1] & bld_cnt[0])) ;
+assign  bld_annul[3] =  bld_dcd_thrd[3] & (bld_dout & ~(bld_cnt[1] & bld_cnt[0])) ;
+
+dff_s #(4) bannul_d1 (
+        .din    (bld_annul[3:0]),
+        .q      (bld_annul_d1[3:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+   
+// Maintain rd (cpx return pkt counter). This is based on when the blk ld helper completes.
+// lower 3b of rd have to start out as zero.
+// Should be asserted 8 times for the entire bld.
+assign	bld_rd_en = (bld_helper_cmplt_m & bld_dout) ;
+assign	bld_rd_din[2:0] = bld_rd_dout_m[2:0] + {2'b00,(bld_helper_cmplt_m & bld_dout)} ;
+//assign	bld_rd_en = (bld_helper_cmplt_g & bld_dout) ;
+//assign	bld_rd_din[2:0] = bld_rd_dout[2:0] + {2'b00,(bld_helper_cmplt_g & bld_dout)} ;
+
+dffre_s #(3)   bld_cpx_cnt (
+        .din  (bld_rd_din[2:0]),
+        .q    (bld_rd_dout_m[2:0]),
+  	    .rst  (bld_reset), .en   (bld_rd_en),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(3) bld_cnt_stg (
+        .din    (bld_rd_dout_m[2:0]),
+        .q      (bld_rd_dout[2:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Select appr. rd. (cpx return pkt counter)
+assign	lsu_ffu_bld_cnt_w[2:0] = bld_rd_dout[2:0] ;
+assign	lsu_bld_cnt_m[2:0] = bld_rd_dout_m[2:0] ;
+
+// pcx pkt address cntrl.
+wire	[1:0]	addr_b54 ;
+assign   addr_b54[1:0] = bld_cnt[1:0];
+
+/*wire bld_rq_w2 ;
+assign bld_rq_w2 = bld_dout; */
+
+dff_s #(2) blkrq_d1 (
+        .din    ({addr_b54[1:0]}),
+        .q      ({lsu_bld_rq_addr[1:0]}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	lsu_bld_pcx_rq = bld_pcx_rq_sel_d1 & bld_dout ;
+
+/*dff #(3) blkrq_d1 (
+        .din    ({addr_b54[1:0],bld_rq_w2}),
+        .q      ({lsu_bld_rq_addr[1:0],lsu_bld_pcx_rq}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );*/
+
+
+//=================================================================================================
+//  	LOAD PCX PKT REQ CONTROL
+//=================================================================================================
+
+// Staging pref.
+wire pref_inst_m, pref_inst_g ;
+
+dff_s  stgm_prf (
+        .din    (ifu_lsu_pref_inst_e),
+        .q      (pref_inst_m),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  stgg_prf (
+        .din    (pref_inst_m),
+        .q      (pref_inst_g),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Performance Ctr Info
+dff_s #(4)  stgg_dmiss (
+        .din    ({ld3_l2cache_rq,ld2_l2cache_rq,ld1_l2cache_rq,ld0_l2cache_rq}),
+        .q      (lsu_tlu_dcache_miss_w2[3:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire  ld0_l2cache_rq_w2, ld1_l2cache_rq_w2, ld2_l2cache_rq_w2, ld3_l2cache_rq_w2 ;
+
+assign ld0_l2cache_rq_w2  =  lsu_tlu_dcache_miss_w2[0];
+assign ld1_l2cache_rq_w2  =  lsu_tlu_dcache_miss_w2[1];
+assign ld2_l2cache_rq_w2  =  lsu_tlu_dcache_miss_w2[2];
+assign ld3_l2cache_rq_w2  =  lsu_tlu_dcache_miss_w2[3];
+
+wire    pref_vld0_g, pref_vld1_g, pref_vld2_g, pref_vld3_g ;
+wire    pref_rq_vld0_g, pref_rq_vld1_g, pref_rq_vld2_g, pref_rq_vld3_g ;
+wire    pref_vld_g ;
+assign  pref_vld_g = pref_inst_g & ~tlb_pgnum_g[39] & tlb_cam_hit_g ; // Bug 4318. 
+assign  pref_rq_vld0_g = pref_vld_g & thread0_g & lsu_inst_vld_w ;
+assign  pref_rq_vld1_g = pref_vld_g & thread1_g & lsu_inst_vld_w ;
+assign  pref_rq_vld2_g = pref_vld_g & thread2_g & lsu_inst_vld_w ;
+assign  pref_rq_vld3_g = pref_vld_g & thread3_g & lsu_inst_vld_w ;
+assign  pref_vld0_g = pref_inst_g & thread0_g ;
+assign  pref_vld1_g = pref_inst_g & thread1_g ;
+assign  pref_vld2_g = pref_inst_g & thread2_g ;
+assign  pref_vld3_g = pref_inst_g & thread3_g ;
+
+//=========================================================================================
+// Shift full-raw/partial-raw logic from rw_ctl to qctl1
+
+wire    ldquad_inst_g ;
+dff_s  ldq_stgg (
+  .din    (lsu_ldquad_inst_m), .q (ldquad_inst_g),
+  .clk    (clk),
+  .se   (1'b0), .si (), .so ()
+  );
+
+wire    io_ld,io_ld_w2 ;
+assign  io_ld = tlb_pgnum_g[39] ; // Bug 4362
+//assign  io_ld = tlb_pgnum_g[39]  & ~(~tlb_pgnum_g[38]  & tlb_pgnum_g[37]) ;
+
+wire	stb_not_empty ;
+assign  stb_not_empty  =
+  thread0_g ? ~lsu_stb_empty[0] :
+    thread1_g ? ~lsu_stb_empty[1] :
+      thread2_g ? ~lsu_stb_empty[2] :
+                        ~lsu_stb_empty[3] ;
+
+wire  	ldq_hit_g,ldq_hit_w2 ;
+wire    ldq_stb_cam_hit ;
+assign  ldq_stb_cam_hit = stb_cam_hit_bf & ldquad_inst_g ;
+// Terms can be made common.
+assign  ldq_hit_g = ldq_stb_cam_hit ;
+
+wire	full_raw_g,partial_raw_g ;
+wire	full_raw_w2,partial_raw_w2 ;
+assign	full_raw_g = |stb_ld_full_raw[7:0] ;
+assign	partial_raw_g = |stb_ld_partial_raw[7:0] ;
+
+wire	stb_cam_mhit_w2 ;
+wire	stb_not_empty_w2 ;
+dff_s #(6) stgw2_rawcond (
+        .din    ({full_raw_g,partial_raw_g,stb_cam_mhit,ldq_hit_g,io_ld,stb_not_empty}),
+        .q      ({full_raw_w2,partial_raw_w2,stb_cam_mhit_w2,ldq_hit_w2,io_ld_w2,
+		stb_not_empty_w2}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// BEGIN !!! ld_stb_full_raw_g for SAS support only !!!
+//wire	ld_stb_full_raw_g ;
+//wire	ld_stb_partial_raw_g ;
+
+// END !!! ld_stb_full_raw_g for SAS support only !!!
+assign  ld_stb_full_raw_w2 =
+        (full_raw_w2 & ~(stb_cam_mhit_w2 | ldq_hit_w2 | io_ld_w2)) ;
+        //(full_raw_w2 & ~(stb_cam_mhit_w2 | ldq_hit_w2 | io_ld_w2)) ; // Bug 3624
+wire	ld_stb_partial_raw_w2 ;
+wire	stb_cam_hit_w2 ;
+assign  ld_stb_partial_raw_w2 =
+        (partial_raw_w2 | stb_cam_mhit_w2 | ldq_hit_w2 | 
+	(io_ld_w2 & stb_not_empty_w2)) ;
+        //(partial_raw_w2 | stb_cam_mhit_w2 | ldq_hit_w2 | (io_ld_w2 & stb_not_empty_w2)) ;
+
+//=========================================================================================
+
+/*wire	ld_stb_full_raw_w2 ;
+dff_s #(1) stgw2_fraw (
+        .din    (ld_stb_full_raw_g),
+        .q      (ld_stb_full_raw_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+// THREAD0 LOAD PCX REQUEST CONTROL
+
+//=====
+// For delayed ld0,1,2,3_l2cache_rq, we need to delay certain
+// inputs to flops enabled by ld0,1,2,3_l2cache_rq.
+
+wire	ld0_ldbl_rq_w2 ;
+wire	ld1_ldbl_rq_w2 ;
+wire	ld2_ldbl_rq_w2 ;
+wire	ld3_ldbl_rq_w2 ;
+// wire [1:0] 	ld_pcx_pkt_wy_w2 ;
+ wire		pref_rq_vld0_w2,pref_rq_vld1_w2,pref_rq_vld2_w2,pref_rq_vld3_w2 ;
+ wire    	non_l2bnk ;
+ wire		non_l2bnk_w2 ;
+ wire [7:6]	ldst_va_w2 ;
+
+dff_s #(7) stgw2_l2crqmx (
+        .din    ({
+                //ld_pcx_pkt_wy_g[1:0],
+ 		pref_rq_vld0_g,pref_rq_vld1_g,pref_rq_vld2_g,pref_rq_vld3_g,
+ 		non_l2bnk,
+		ldst_va_g[7:6]}),
+        .q    	({
+                //ld_pcx_pkt_wy_w2[1:0],
+ 		pref_rq_vld0_w2,pref_rq_vld1_w2,pref_rq_vld2_w2,pref_rq_vld3_w2,
+ 		non_l2bnk_w2,
+		ldst_va_w2[7:6]}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// wire [1:0] 	ld_pcx_pkt_wy_mx0,ld_pcx_pkt_wy_mx1,ld_pcx_pkt_wy_mx2,ld_pcx_pkt_wy_mx3 ;
+ wire		pref_rq_vld0_mx,pref_rq_vld1_mx,pref_rq_vld2_mx,pref_rq_vld3_mx ;
+ wire		non_l2bnk_mx0,non_l2bnk_mx1,non_l2bnk_mx2,non_l2bnk_mx3 ;
+ wire [7:6]	ldst_va_mx0,ldst_va_mx1,ldst_va_mx2,ldst_va_mx3 ;
+
+// timing fix: 5/19/03: move secondary hit way generation to w2
+//                      remove ld_pcx_pkt_wy_mx[0-3] and replace w/ lsu_lmq_pkt_way_w2
+// assign	ld_pcx_pkt_wy_mx0[1:0] = 
+//	ld0_ldbl_rq_w2 ? ld_pcx_pkt_wy_w2[1:0] : ld_pcx_pkt_wy_g[1:0] ;
+// assign	ld_pcx_pkt_wy_mx1[1:0] = 
+//	ld1_ldbl_rq_w2 ? ld_pcx_pkt_wy_w2[1:0] : ld_pcx_pkt_wy_g[1:0] ;
+// assign	ld_pcx_pkt_wy_mx2[1:0] = 
+//	ld2_ldbl_rq_w2 ? ld_pcx_pkt_wy_w2[1:0] : ld_pcx_pkt_wy_g[1:0] ;
+// assign	ld_pcx_pkt_wy_mx3[1:0] = 
+//	ld3_ldbl_rq_w2 ? ld_pcx_pkt_wy_w2[1:0] : ld_pcx_pkt_wy_g[1:0] ;
+
+
+ assign	pref_rq_vld0_mx =
+	ld0_ldbl_rq_w2 ? pref_rq_vld0_w2 : pref_rq_vld0_g ;
+ assign	pref_rq_vld1_mx =
+	ld1_ldbl_rq_w2 ? pref_rq_vld1_w2 : pref_rq_vld1_g ;
+ assign	pref_rq_vld2_mx =
+	ld2_ldbl_rq_w2 ? pref_rq_vld2_w2 : pref_rq_vld2_g ;
+ assign	pref_rq_vld3_mx =
+	ld3_ldbl_rq_w2 ? pref_rq_vld3_w2 : pref_rq_vld3_g ;
+ assign	non_l2bnk_mx0 =
+	ld0_ldbl_rq_w2 ? non_l2bnk_w2 : non_l2bnk ;
+ assign	non_l2bnk_mx1 =
+	ld1_ldbl_rq_w2 ? non_l2bnk_w2 : non_l2bnk ;
+ assign	non_l2bnk_mx2 =
+	ld2_ldbl_rq_w2 ? non_l2bnk_w2 : non_l2bnk ;
+ assign	non_l2bnk_mx3 =
+	ld3_ldbl_rq_w2 ? non_l2bnk_w2 : non_l2bnk ;
+
+//timing fix: 10/13/03 - ldst_va_mx[0-3] is used in the same cycle 'cos of perf bug fix-bug2705
+//            this delays the ld request valid which in turn delays pcx_rq_for_stb
+//            fix is to isolate this mux and the following l2bank addr mux from ld?_ldbl_rq_w2;
+//            use ld[0-3]_inst_vld_w2 instead of ld[0-3]_ldbl_rq_w2 as select
+ assign	ldst_va_mx0[7:6] =
+	ld0_inst_vld_w2 ? ldst_va_w2[7:6] : ldst_va_g[7:6] ;
+ assign	ldst_va_mx1[7:6] =
+	ld1_inst_vld_w2 ? ldst_va_w2[7:6] : ldst_va_g[7:6] ;
+ assign	ldst_va_mx2[7:6] =
+	ld2_inst_vld_w2 ? ldst_va_w2[7:6] : ldst_va_g[7:6] ;
+ assign	ldst_va_mx3[7:6] =
+	ld3_inst_vld_w2 ? ldst_va_w2[7:6] : ldst_va_g[7:6] ;
+
+//=====
+
+wire	atomic_g ;
+assign  atomic_g = casa_g | lsu_swap_g | lsu_ldstub_g ;
+
+   wire dbl_force_l2access_g;
+   wire dbl_force_l2access_w2;
+   assign dbl_force_l2access_g = ldst_dbl_g & ~(ldst_fp_g & ~(alt_space_g & blk_asi_g));
+   
+dff_s #(2) stgw2_atm (
+        .din    ({atomic_g, dbl_force_l2access_g}),
+        .q      ({atomic_w2,dbl_force_l2access_w2}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(1) stgw2_perrkill (
+        .din    (lsu_tlb_perr_ld_rq_kill_w),
+        .q      (perr_ld_rq_kill_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	asi_internal_g,asi_internal_w2;
+dff_s #(1) stgg_intasi (
+        .din    (asi_internal_m),
+        .q      (asi_internal_g),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(1) stgw2_intasi (
+        .din    (asi_internal_g),
+        .q      (asi_internal_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	ld0_l2cache_rq_kill ; 
+assign	ld0_l2cache_rq_kill = 
+	ld0_inst_vld_w2 & ((ld_stb_full_raw_w2 & ~dbl_force_l2access_w2) | perr_ld_rq_kill_w2) ; 
+						// full-raw which looks like partial
+assign	ld0_ldbl_rq_w2 = 
+		((ld_stb_full_raw_w2 & dbl_force_l2access_w2) | ld_stb_partial_raw_w2)
+			& ~atomic_w2 & ~perr_ld_rq_kill_w2 & ~(asi_internal_w2 & alt_space_w2) 
+			& ld0_inst_vld_w2 ;
+
+//bug:2877 - dtag parity error 2nd packet request; dont reset if dtag parity error 2nd pkt valid
+//           dtag error is reset 1 cycle after 1st pkt sent
+//----------------------------------------------------------------------------------------------------------
+// |       1       |        2       |         3          |        4       |        5        |        6    |
+//  spc_pcx_rq_pq=1   ld_err-pkt1                                           spc_pcx_rq_pq=1   ld_err-pkt2
+//                                     ld0_vld_reset=0       pick 2nd pkt
+//                                     error_rst=1
+//----------------------------------------------------------------------------------------------------------
+
+wire   [3:0]  dtag_perr_pkt2_vld_d1 ;
+assign ld0_vld_reset = 
+  (reset | (ld0_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld0_inst_vld_g | bld_annul_d1[0] | dtag_perr_pkt2_vld_d1[0]))) |
+  ld0_l2cache_rq_kill ;
+  //(reset | (ld0_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld0_inst_vld_g | bld_annul_d1[0]))) |
+
+// The equation for partial raw has redundancy !! Change it.
+// prefetch will not bypass from stb
+/* prim vs sec phase 2 change
+assign ld0_l2cache_rq = 
+        (((lsu_ld_miss_g & ~ld_stb_full_raw_g  & ~ld_sec_hit_g & ~ldxa_internal) |
+        ((lsu_ld_hit_g | lsu_ld_miss_g) & (ld_stb_partial_raw_g | (ld_stb_full_raw_g & ldst_dbl_g))))
+        & ~atomic_g & ld0_inst_vld_g) |
+	| (pref_inst_g & tlb_cam_hit_g & thread0_g) ;
+*/
+
+
+wire  ld0_l2cache_rq_g;
+
+assign ld0_l2cache_rq_g = 
+        (((lsu_ld_miss_g & ~ldxa_internal))
+        //((lsu_ld_hit_g | lsu_ld_miss_g) & (ld_stb_partial_raw_g)))
+        & ~atomic_g & ld0_inst_vld_g)
+        | pref_rq_vld0_g; 
+
+assign ld0_l2cache_rq = ld0_l2cache_rq_g | ld0_ldbl_rq_w2 ;
+   
+wire  ld0_pkt_vld_unmasked ;
+wire  ld1_pkt_vld_unmasked ;
+wire  ld2_pkt_vld_unmasked ;
+wire  ld3_pkt_vld_unmasked ;
+   
+// ld valid until request made.
+wire pref_rq_vld0;         
+dffre_s #(2)  ld0_vld (
+        .din    ({ld0_l2cache_rq, pref_rq_vld0_mx}    ),
+        .q      ({ld0_pkt_vld_unmasked, pref_rq_vld0}),
+  .rst  (ld0_vld_reset),  .en (ld0_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// bug2705 - speculative pick in w-cycle -begin
+// dbl_force_l2access_g is set for ldd(f),std(f),ldq,stq
+//perf fix: 7/29/03 - kill spec vld if other thread non-spec valids are set
+//timing fix: 8/29/03 - flop atomic_m and ldxa_internal_m from dctl for spec req
+wire    atomic_or_ldxa_internal_rq_m ;
+assign  atomic_or_ldxa_internal_rq_m  =  atomic_m | lda_internal_m ;
+
+dff_s #(1) ff_atomic_or_ldxa_internal_rq_g (
+        .din    (atomic_or_ldxa_internal_rq_m),
+        .q      (atomic_or_ldxa_internal_rq_g),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire    ld0_spec_vld_g ;
+assign  ld0_spec_vld_g  =  ld0_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g &
+                           ~atomic_or_ldxa_internal_rq_g &
+                           ~(ld1_pkt_vld_unmasked | ld2_pkt_vld_unmasked | ld3_pkt_vld_unmasked);
+//assign  ld0_spec_vld_g  =  ld0_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g ;
+
+dff_s #(1) ff_ld0_spec_pick_vld_w2 (
+        .din    (ld0_spec_pick_vld_g),
+        .q      (ld0_spec_pick_vld_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// kill packet valid if spec req is picked in w and stb hits in w2
+// cannot use ld0_ldbl_rawp_en_w2 because it is late signal instead use ld0_ldbl_rq_w2
+//timing fix: 7/21/03 - kill pkt vld if spec pick in w-cycle was to non$ address
+//timing fix: 8/6/03 - kill pkt_vld if ld?_l2cache_rq_g=0 in w-cycle but spec_pick=1
+wire    ld0_pkt_vld_tmp ;
+//bug 3964 - replace ld0_pkt_vld_unmasked w/ ld0_l2cache_rq_w2
+//assign lsu_ld0_spec_vld_kill_w2  =  ld0_spec_pick_vld_w2 & (~ld0_pkt_vld_unmasked | ld0_l2cache_rq_kill | ld0_ldbl_rq_w2 | non_l2bnk_mx0_d1) ;
+assign lsu_ld0_spec_vld_kill_w2  =  ld0_spec_pick_vld_w2 & (~ld0_l2cache_rq_w2 | ld0_l2cache_rq_kill | ld0_ldbl_rq_w2 | non_l2bnk_mx0_d1) ;
+
+assign  ld0_pkt_vld_tmp =  ld0_pkt_vld_unmasked & ~(ld0_pcx_rq_sel_d1 | ld0_pcx_rq_sel_d2) &
+                      ~(ld0_l2cache_rq_kill | ld0_ldbl_rq_w2) &
+                      ~(pref_rq_vld0 & lsu_no_spc_pref[0]) ;  // prefetch pending 
+                       
+assign  ld0_pkt_vld = ld0_pkt_vld_tmp | ld0_spec_vld_g ;
+// bug2705 - speculative pick in w-cycle -end
+
+//assign  ld0_pkt_vld = ld0_pkt_vld_unmasked & ~ld0_pcx_rq_sel_d1 ; 
+
+assign  ld0_fill_reset = reset | (lsu_dfq_ld_vld & lsu_dcfill_active_e & dfq_byp_sel[0]) ;
+
+
+dff_s #(4) stgm_lduwyd1 (
+        .din    ({ld0_fill_reset,ld1_fill_reset,ld2_fill_reset,ld3_fill_reset}),
+        .q      ({ld0_fill_reset_d1,ld1_fill_reset_d1,ld2_fill_reset_d1,ld3_fill_reset_d1}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(4) stgm_lduwyd2 (
+        .din    ({ld0_fill_reset_d1,ld1_fill_reset_d1,ld2_fill_reset_d1,ld3_fill_reset_d1}),
+        .q      ({ld0_fill_reset_d2_tmp,ld1_fill_reset_d2_tmp,ld2_fill_reset_d2_tmp,ld3_fill_reset_d2_tmp}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire  ld0_l2cache_rq_w2_tmp;
+wire  ld0_l2cache_rq_g_tmp;
+
+assign ld0_l2cache_rq_g_tmp  =  ld0_l2cache_rq_g & ~pref_inst_g ;
+
+dff_s #(1) ff_ld0_l2cache_rq_w2 (
+        .din    (ld0_l2cache_rq_g_tmp),
+        .q      (ld0_l2cache_rq_w2_tmp),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+//wire ld0_unfilled_en ;
+//assign	ld0_unfilled_en = ld0_l2cache_rq & ~pref_inst_g ;
+wire ld0_unfilled_wy_en ;
+assign	ld0_unfilled_wy_en = ld0_l2cache_rq_w2_tmp | ld0_ldbl_rq_w2 ;
+
+wire  ld0_l2cache_rq_tmp;
+assign ld0_l2cache_rq_tmp  = ld0_unfilled_wy_en & ~ld0_l2cache_rq_kill;
+
+// ld valid until fill occur.
+dffre_s #(1)  ld0out_state (
+        //.din    (ld0_l2cache_rq),
+        .din    (ld0_l2cache_rq_tmp),
+        .q      (ld0_unfilled_tmp),
+        .rst    (ld0_fill_reset_d2),  .en     (ld0_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dffre_s #(2)  ld0out_state_way (
+        //.din    (ld_pcx_pkt_wy_mx0[1:0]}),
+        .din    (lsu_lmq_pkt_way_w2[1:0]),
+        .q      (ld0_unfilled_wy[1:0]),
+        .rst    (ld0_fill_reset_d2),  .en     (ld0_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	ld0_fill_reset_d2 = ld0_fill_reset_d2_tmp | ld0_l2cache_rq_kill ;
+//assign	ld0_unfilled = ld0_unfilled_tmp & ~ld0_l2cache_rq_kill ;
+assign	ld0_unfilled = ld0_unfilled_tmp ;
+
+//bug3516
+//assign  non_l2bnk = tlb_pgnum_g[39] & tlb_pgnum_g[38] ;
+assign  non_l2bnk = tlb_pgnum_g[39]  & ~(~tlb_pgnum_g[38]  & tlb_pgnum_g[37]) ;
+
+// ld l2bank address
+dffe_s  #(3) ld0_l2bnka (
+        .din    ({non_l2bnk_mx0,ldst_va_mx0[7:6]}),
+        .q      (ld0_l2bnk_addr[2:0]),
+  .en (ld0_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - add byp for address to be available in w-cycle
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2 if non_l2bnk_mx0=1 (non$ access)
+wire  [2:0]  ld0_l2bnk_addr_mx ;
+assign  ld0_l2bnk_addr_mx[2:0]  =  ld0_pkt_vld_unmasked ? ld0_l2bnk_addr[2:0] :
+                                                          {1'b0,ldst_va_mx0[7:6]} ;             // assume $able access for spec pick
+
+//assign  ld0_l2bnk_addr_mx[2:0]  =  (ld0_inst_vld_unflushed  & lsu_inst_vld_tmp) ? 
+//                                        {1'b0,ldst_va_mx0[7:6]} :             // assume $able access for spec pick
+//                                        //{non_l2bnk_mx0,ldst_va_mx0[7:6]} : 
+//				             ld0_l2bnk_addr[2:0] ;
+
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2
+dff_s #(1) ff_non_l2bnk_mx0_d1 (
+        .din    (non_l2bnk_mx0),
+        .q      (non_l2bnk_mx0_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - change ld0_l2bnk_addr[2:0] to ld0_l2bnk_addr_mx[2:0]
+assign ld0_l2bnk_dest[0] = ~ld0_l2bnk_addr_mx[2] & ~ld0_l2bnk_addr_mx[1] & ~ld0_l2bnk_addr_mx[0] ;
+assign ld0_l2bnk_dest[1] = ~ld0_l2bnk_addr_mx[2] & ~ld0_l2bnk_addr_mx[1] &  ld0_l2bnk_addr_mx[0] ;
+assign ld0_l2bnk_dest[2] = ~ld0_l2bnk_addr_mx[2] &  ld0_l2bnk_addr_mx[1] & ~ld0_l2bnk_addr_mx[0] ;
+assign ld0_l2bnk_dest[3] = ~ld0_l2bnk_addr_mx[2] &  ld0_l2bnk_addr_mx[1] &  ld0_l2bnk_addr_mx[0] ;
+assign ld0_l2bnk_dest[4] =  ld0_l2bnk_addr_mx[2] ;
+
+// THREAD1 LOAD PCX REQUEST CONTROL
+
+wire	ld1_l2cache_rq_kill ; 
+assign	ld1_l2cache_rq_kill = 
+	ld1_inst_vld_w2 & ((ld_stb_full_raw_w2 & ~dbl_force_l2access_w2) | perr_ld_rq_kill_w2) ; 	
+					// full-raw which looks like partial
+assign	ld1_ldbl_rq_w2 = 
+		((ld_stb_full_raw_w2 & dbl_force_l2access_w2) | ld_stb_partial_raw_w2)
+			& ~atomic_w2 & ~perr_ld_rq_kill_w2 & ~(asi_internal_w2 & alt_space_w2) &
+			ld1_inst_vld_w2 ;
+
+assign ld1_vld_reset = 
+  (reset | (ld1_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld1_inst_vld_g | bld_annul_d1[1] | dtag_perr_pkt2_vld_d1[1]))) |
+  ld1_l2cache_rq_kill ;
+  //(reset | (ld1_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld1_inst_vld_g | bld_annul_d1[1]))) | // bug2877
+  //(reset | (ld1_pcx_rq_sel_d1 & ~(pcx_req_squash | ld1_inst_vld_g | bld_annul[1]))) ;
+
+wire  ld1_l2cache_rq_g;
+assign ld1_l2cache_rq_g = 
+        (((lsu_ld_miss_g & ~ldxa_internal))
+        //((lsu_ld_hit_g | lsu_ld_miss_g) & (ld_stb_partial_raw_g))) // ldst_dbl always rqs
+        & ~atomic_g & ld1_inst_vld_g)  
+        | pref_rq_vld1_g ;
+
+assign ld1_l2cache_rq =  ld1_l2cache_rq_g | ld1_ldbl_rq_w2 ;
+
+   
+// ld valid
+wire pref_rq_vld1;   
+dffre_s #(2)  ld1_vld (
+        .din    ({ld1_l2cache_rq, pref_rq_vld1_mx}),
+        .q      ({ld1_pkt_vld_unmasked, pref_rq_vld1}),
+  .rst  (ld1_vld_reset),  .en (ld1_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// bug2705 - speculative pick in w-cycle-begin
+wire    ld1_spec_vld_g ;
+assign  ld1_spec_vld_g  =  ld1_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g  &
+                           ~atomic_or_ldxa_internal_rq_g &
+                           ~(ld0_pkt_vld_unmasked | ld2_pkt_vld_unmasked | ld3_pkt_vld_unmasked);
+//assign  ld1_spec_vld_g  =  ld1_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g ;
+
+dff_s #(1) ff_ld1_spec_pick_vld_w2 (
+        .din    (ld1_spec_pick_vld_g),
+        .q      (ld1_spec_pick_vld_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// kill packet valid if spec req is picked in w and stb hits in w2
+wire    ld1_pkt_vld_tmp ;
+assign lsu_ld1_spec_vld_kill_w2  =  ld1_spec_pick_vld_w2 & (~ld1_l2cache_rq_w2 | ld1_l2cache_rq_kill | ld1_ldbl_rq_w2 | non_l2bnk_mx1_d1) ;
+
+assign  ld1_pkt_vld_tmp =  ld1_pkt_vld_unmasked & ~(ld1_pcx_rq_sel_d1 | ld1_pcx_rq_sel_d2) &
+                      ~(ld1_l2cache_rq_kill | ld1_ldbl_rq_w2) &
+                      ~(pref_rq_vld1 & lsu_no_spc_pref[1]) ; 
+
+assign  ld1_pkt_vld = ld1_pkt_vld_tmp | ld1_spec_vld_g ;
+// bug2705 - speculative pick in w-cycle-end
+
+//assign  ld1_pkt_vld = ld1_pkt_vld_unmasked & ~ld1_pcx_rq_sel_d1 ; 
+
+
+assign  ld1_fill_reset = reset | (lsu_dfq_ld_vld & lsu_dcfill_active_e & dfq_byp_sel[1]) ;
+
+wire  ld1_l2cache_rq_g_tmp;
+wire  ld1_l2cache_rq_w2_tmp;
+
+assign ld1_l2cache_rq_g_tmp  =  ld1_l2cache_rq_g & ~pref_inst_g ;
+ 
+dff_s #(1) ff_ld1_l2cache_rq_w2 (
+         .din    (ld1_l2cache_rq_g_tmp),
+         .q      (ld1_l2cache_rq_w2_tmp),
+         .clk  (clk),
+         .se     (1'b0),       .si (),          .so ()
+         );
+
+//wire ld1_unfilled_en ;
+//assign	ld1_unfilled_en = ld1_l2cache_rq & ~pref_inst_g ;
+wire ld1_unfilled_wy_en ;
+assign	ld1_unfilled_wy_en = ld1_l2cache_rq_w2_tmp | ld1_ldbl_rq_w2 ;
+
+wire  ld1_l2cache_rq_tmp;
+assign ld1_l2cache_rq_tmp  = ld1_unfilled_wy_en & ~ld1_l2cache_rq_kill;
+
+// ld valid until fill occur.
+dffre_s  #(1)  ld1out_state (
+        //.din    (ld1_l2cache_rq),
+        .din    (ld1_l2cache_rq_tmp),
+        .q      (ld1_unfilled_tmp),
+        .rst    (ld1_fill_reset_d2),  .en     (ld1_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+dffre_s  #(2)  ld1out_state_way (
+        //.din    (ld_pcx_pkt_wy_mx1[1:0]),
+        .din    (lsu_lmq_pkt_way_w2[1:0]),
+        .q      (ld1_unfilled_wy[1:0]),
+        .rst    (ld1_fill_reset_d2),  .en     (ld1_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+  );
+
+
+assign	ld1_fill_reset_d2 = ld1_fill_reset_d2_tmp | ld1_l2cache_rq_kill ;
+//assign	ld1_unfilled = ld1_unfilled_tmp & ~ld1_l2cache_rq_kill ;
+assign	ld1_unfilled = ld1_unfilled_tmp ;
+
+// ld l2bank address
+dffe_s  #(3) ld1_l2bnka (
+        .din    ({non_l2bnk_mx1,ldst_va_mx1[7:6]}),
+        .q      (ld1_l2bnk_addr[2:0]),
+  .en (ld1_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - add byp for address to be available in w-cycle
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2 if non_l2bnk_mx0=1 (non$ access)
+
+wire  [2:0]  ld1_l2bnk_addr_mx ;
+assign  ld1_l2bnk_addr_mx[2:0]  =  ld1_pkt_vld_unmasked  ? ld1_l2bnk_addr[2:0] :
+				           {1'b0,ldst_va_mx1[7:6]} ; 
+
+//assign  ld1_l2bnk_addr_mx[2:0]  =  (ld1_inst_vld_unflushed  & lsu_inst_vld_tmp) ? 
+//				           {1'b0,ldst_va_mx1[7:6]} : 
+//				           //{non_l2bnk_mx1,ldst_va_mx1[7:6]} : 
+//						ld1_l2bnk_addr[2:0] ;
+
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2
+dff_s #(1) ff_non_l2bnk_mx1_d1 (
+        .din    (non_l2bnk_mx1),
+        .q      (non_l2bnk_mx1_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - change ld1_l2bnk_addr[2:0] to ld1_l2bnk_addr_mx[2:0]
+assign ld1_l2bnk_dest[0] = ~ld1_l2bnk_addr_mx[2] & ~ld1_l2bnk_addr_mx[1] & ~ld1_l2bnk_addr_mx[0] ;
+assign ld1_l2bnk_dest[1] = ~ld1_l2bnk_addr_mx[2] & ~ld1_l2bnk_addr_mx[1] &  ld1_l2bnk_addr_mx[0] ;
+assign ld1_l2bnk_dest[2] = ~ld1_l2bnk_addr_mx[2] &  ld1_l2bnk_addr_mx[1] & ~ld1_l2bnk_addr_mx[0] ;
+assign ld1_l2bnk_dest[3] = ~ld1_l2bnk_addr_mx[2] &  ld1_l2bnk_addr_mx[1] &  ld1_l2bnk_addr_mx[0] ;
+assign ld1_l2bnk_dest[4] =  ld1_l2bnk_addr_mx[2] ;
+
+
+// THREAD2 LOAD PCX REQUEST CONTROL
+
+wire	ld2_l2cache_rq_kill ; 
+assign	ld2_l2cache_rq_kill = 
+	ld2_inst_vld_w2 & ((ld_stb_full_raw_w2 & ~dbl_force_l2access_w2) | perr_ld_rq_kill_w2) ; 	
+						// full-raw which looks like partial
+assign	ld2_ldbl_rq_w2 = 
+		((ld_stb_full_raw_w2 & dbl_force_l2access_w2) | ld_stb_partial_raw_w2)
+			& ~atomic_w2 & ~perr_ld_rq_kill_w2 & ~(asi_internal_w2 & alt_space_w2) & 
+			ld2_inst_vld_w2 ;
+//assign	ld2_l2cache_rq_kill = ld2_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 ;
+//assign	ld2_ldbl_rq_w2 = ld_stb_full_raw_w2 & dbl_force_l2access_w2 & ~atomic_w2 & ld2_inst_vld_w2 ;
+
+assign ld2_vld_reset = 
+  (reset | (ld2_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld2_inst_vld_g | bld_annul_d1[2] | dtag_perr_pkt2_vld_d1[2]))) |
+  ld2_l2cache_rq_kill ;
+  //(reset | (ld2_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld2_inst_vld_g | bld_annul_d1[2]))) | // bug2877
+  //(reset | (ld2_pcx_rq_sel_d1 & ~(pcx_req_squash | ld2_inst_vld_g | bld_annul[2]))) ;
+
+wire  ld2_l2cache_rq_g;
+
+assign ld2_l2cache_rq_g = 
+        (((lsu_ld_miss_g & ~ldxa_internal)) 
+        //((lsu_ld_hit_g | lsu_ld_miss_g) & (ld_stb_partial_raw_g))) // ldst_dbl always rqs
+        & ~atomic_g & ld2_inst_vld_g )
+        | pref_rq_vld2_g ;
+   
+assign ld2_l2cache_rq = ld2_l2cache_rq_g | ld2_ldbl_rq_w2 ;
+
+   
+// ld valid
+wire pref_rq_vld2;         
+dffre_s #(2) ld2_vld (
+        .din    ({ld2_l2cache_rq, pref_rq_vld2_mx}),
+        .q      ({ld2_pkt_vld_unmasked, pref_rq_vld2} ),
+  .rst  (ld2_vld_reset),  .en (ld2_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// bug2705 - speculative pick in w-cycle - begin
+wire    ld2_spec_vld_g ;
+assign  ld2_spec_vld_g  =  ld2_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g  &
+                           ~atomic_or_ldxa_internal_rq_g &
+                           ~(ld0_pkt_vld_unmasked | ld1_pkt_vld_unmasked | ld3_pkt_vld_unmasked);
+//assign  ld2_spec_vld_g  =  ld2_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g ;
+
+dff_s #(1) ff_ld2_spec_pick_vld_w2 (
+        .din    (ld2_spec_pick_vld_g),
+        .q      (ld2_spec_pick_vld_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// kill packet valid if spec req is picked in w and stb hits in w2
+wire    ld2_pkt_vld_tmp ;
+assign lsu_ld2_spec_vld_kill_w2  =  ld2_spec_pick_vld_w2 & (~ld2_l2cache_rq_w2 | ld2_l2cache_rq_kill | ld2_ldbl_rq_w2 | non_l2bnk_mx2_d1) ;
+
+assign  ld2_pkt_vld_tmp = ld2_pkt_vld_unmasked & ~(ld2_pcx_rq_sel_d1 | ld2_pcx_rq_sel_d2) & 
+                      ~(ld2_l2cache_rq_kill | ld2_ldbl_rq_w2) &
+                      ~(pref_rq_vld2 & lsu_no_spc_pref[2]) ; 
+
+assign  ld2_pkt_vld = ld2_pkt_vld_tmp | ld2_spec_vld_g ;
+// bug2705 - speculative pick in w-cycle - end
+
+//assign  ld2_pkt_vld = ld2_pkt_vld_unmasked & ~ld2_pcx_rq_sel_d1 ; 
+
+
+assign  ld2_fill_reset = reset | (lsu_dfq_ld_vld & lsu_dcfill_active_e & dfq_byp_sel[2]) ;
+
+wire  ld2_l2cache_rq_g_tmp;
+wire  ld2_l2cache_rq_w2_tmp;
+
+assign ld2_l2cache_rq_g_tmp  =  ld2_l2cache_rq_g & ~pref_inst_g ;
+
+dff_s #(1) ff_ld2_l2cache_rq_w2 (
+         .din    (ld2_l2cache_rq_g_tmp),
+         .q      (ld2_l2cache_rq_w2_tmp),
+         .clk  (clk),
+         .se     (1'b0),       .si (),          .so ()
+         );
+
+//wire ld2_unfilled_en ;
+//assign	ld2_unfilled_en = ld2_l2cache_rq & ~pref_inst_g ;
+wire ld2_unfilled_wy_en ;
+assign	ld2_unfilled_wy_en = ld2_l2cache_rq_w2_tmp | ld2_ldbl_rq_w2 ;
+
+wire  ld2_l2cache_rq_tmp;
+assign ld2_l2cache_rq_tmp  = ld2_unfilled_wy_en & ~ld2_l2cache_rq_kill;
+
+// ld valid until fill occur.
+dffre_s  #(1) ld2out_state (
+        //.din    (ld2_l2cache_rq),
+        .din    (ld2_l2cache_rq_tmp),
+        .q      (ld2_unfilled_tmp),
+        .rst    (ld2_fill_reset_d2),  .en     (ld2_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+dffre_s  #(2) ld2out_state_way (
+        .din    (lsu_lmq_pkt_way_w2[1:0]),
+        .q      (ld2_unfilled_wy[1:0]),
+        .rst    (ld2_fill_reset_d2),  .en     (ld2_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+assign	ld2_fill_reset_d2 = ld2_fill_reset_d2_tmp | ld2_l2cache_rq_kill ;
+//assign	ld2_unfilled = ld2_unfilled_tmp & ~ld2_l2cache_rq_kill ;
+assign	ld2_unfilled = ld2_unfilled_tmp ;
+
+// ld l2bank address
+dffe_s  #(3) ld2_l2bnka (
+        .din    ({non_l2bnk_mx2,ldst_va_mx2[7:6]}),
+        .q      (ld2_l2bnk_addr[2:0]),
+  .en (ld2_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - add byp for address to be available in w-cycle
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2 if non_l2bnk_mx0=1 (non$ access)
+
+wire  [2:0]  ld2_l2bnk_addr_mx ;
+assign  ld2_l2bnk_addr_mx[2:0]  =  ld2_pkt_vld_unmasked ? ld2_l2bnk_addr[2:0] :
+					{1'b0,ldst_va_mx2[7:6]} ; 
+
+//assign  ld2_l2bnk_addr_mx[2:0]  =  (ld2_inst_vld_unflushed  & lsu_inst_vld_tmp) ? 
+//					{1'b0,ldst_va_mx2[7:6]} : 
+//					//{non_l2bnk_mx2,ldst_va_mx2[7:6]} : 
+//						ld2_l2bnk_addr[2:0] ;
+
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2
+dff_s #(1) ff_non_l2bnk_mx2_d1 (
+        .din    (non_l2bnk_mx2),
+        .q      (non_l2bnk_mx2_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - change ld2_l2bnk_addr[2:0] to ld2_l2bnk_addr_mx[2:0]
+assign ld2_l2bnk_dest[0] = ~ld2_l2bnk_addr_mx[2] & ~ld2_l2bnk_addr_mx[1] & ~ld2_l2bnk_addr_mx[0] ;
+assign ld2_l2bnk_dest[1] = ~ld2_l2bnk_addr_mx[2] & ~ld2_l2bnk_addr_mx[1] &  ld2_l2bnk_addr_mx[0] ;
+assign ld2_l2bnk_dest[2] = ~ld2_l2bnk_addr_mx[2] &  ld2_l2bnk_addr_mx[1] & ~ld2_l2bnk_addr_mx[0] ;
+assign ld2_l2bnk_dest[3] = ~ld2_l2bnk_addr_mx[2] &  ld2_l2bnk_addr_mx[1] &  ld2_l2bnk_addr_mx[0] ;
+assign ld2_l2bnk_dest[4] =  ld2_l2bnk_addr_mx[2] ;
+
+// THREAD3 LOAD PCX REQUEST CONTROL
+
+wire	ld3_l2cache_rq_kill ; 
+assign	ld3_l2cache_rq_kill = 
+	ld3_inst_vld_w2 & ((ld_stb_full_raw_w2 & ~dbl_force_l2access_w2) | perr_ld_rq_kill_w2) ; 	
+						// full-raw which looks like partial
+assign	ld3_ldbl_rq_w2 = 
+		((ld_stb_full_raw_w2 & dbl_force_l2access_w2) | ld_stb_partial_raw_w2)
+			& ~atomic_w2 & ~perr_ld_rq_kill_w2 & ~(asi_internal_w2 & alt_space_w2) &
+			ld3_inst_vld_w2 ;
+//assign	ld3_l2cache_rq_kill = ld3_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 ;
+//assign	ld3_ldbl_rq_w2 = ld_stb_full_raw_w2 & dbl_force_l2access_w2 & ~atomic_w2 & ld3_inst_vld_w2 ;
+
+assign ld3_vld_reset = 
+  (reset | (ld3_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld3_inst_vld_g | bld_annul_d1[3] | dtag_perr_pkt2_vld_d1[3]))) |
+  ld3_l2cache_rq_kill ;
+  //(reset | (ld3_pcx_rq_sel_d2 & ~(pcx_req_squash_d1 | ld3_inst_vld_g | bld_annul_d1[3]))) | // bug 2877
+  //(reset | (ld3_pcx_rq_sel_d1 & ~(pcx_req_squash | ld3_inst_vld_g | bld_annul[3]))) ;
+
+wire  ld3_l2cache_rq_g;
+assign ld3_l2cache_rq_g = 
+        (((lsu_ld_miss_g & ~ldxa_internal))
+        //((lsu_ld_hit_g | lsu_ld_miss_g) & (ld_stb_partial_raw_g))) // ldst_dbl always rqs
+        & ~atomic_g & ld3_inst_vld_g) 
+        | pref_rq_vld3_g ;
+
+assign  ld3_l2cache_rq =  ld3_l2cache_rq_g | ld3_ldbl_rq_w2 ;
+   
+   
+// ld valid
+wire pref_rq_vld3;         
+dffre_s  #(2) ld3_vld (
+        .din    ({ld3_l2cache_rq, pref_rq_vld3_mx} ),
+        .q      ({ld3_pkt_vld_unmasked, pref_rq_vld3}),
+  .rst  (ld3_vld_reset),  .en (ld3_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// bug2705 - speculative pick in w-cycle - begin
+wire    ld3_spec_vld_g ;
+assign  ld3_spec_vld_g  =  ld3_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g  &
+                           ~atomic_or_ldxa_internal_rq_g &
+                           ~(ld0_pkt_vld_unmasked | ld1_pkt_vld_unmasked | ld2_pkt_vld_unmasked);
+//assign  ld3_spec_vld_g  =  ld3_inst_vld_unflushed & lsu_inst_vld_tmp & ~dbl_force_l2access_g & tlb_cam_hit_g ;
+
+
+dff_s #(1) ff_ld3_spec_pick_vld_w2 (
+        .din    (ld3_spec_pick_vld_g),
+        .q      (ld3_spec_pick_vld_w2),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// kill packet valid if spec req is picked in w and stb hits in w2
+wire    ld3_pkt_vld_tmp ;
+assign lsu_ld3_spec_vld_kill_w2  =  ld3_spec_pick_vld_w2 & (~ld3_l2cache_rq_w2 | ld3_l2cache_rq_kill | ld3_ldbl_rq_w2 | non_l2bnk_mx3_d1) ;
+
+assign  ld3_pkt_vld_tmp = ld3_pkt_vld_unmasked & ~(ld3_pcx_rq_sel_d1 | ld3_pcx_rq_sel_d2) & 
+                      ~(ld3_l2cache_rq_kill | ld3_ldbl_rq_w2) &
+                      ~(pref_rq_vld3 & lsu_no_spc_pref[3]) ;
+
+assign  ld3_pkt_vld = ld3_pkt_vld_tmp | ld3_spec_vld_g ;
+// bug2705 - speculative pick in w-cycle - end
+
+//assign  ld3_pkt_vld = ld3_pkt_vld_unmasked & ~ld3_pcx_rq_sel_d1 ; 
+
+assign  ld3_fill_reset = reset | (lsu_dfq_ld_vld & lsu_dcfill_active_e & dfq_byp_sel[3]) ;
+
+wire  ld3_l2cache_rq_g_tmp;
+wire  ld3_l2cache_rq_w2_tmp;
+
+assign ld3_l2cache_rq_g_tmp  =  ld3_l2cache_rq_g & ~pref_inst_g ;
+
+dff_s #(1) ff_ld3_l2cache_rq_w2 (
+         .din    (ld3_l2cache_rq_g_tmp),
+         .q      (ld3_l2cache_rq_w2_tmp),
+         .clk  (clk),
+         .se     (1'b0),       .si (),          .so ()
+         );
+
+//wire ld3_unfilled_en ;
+//assign	ld3_unfilled_en = ld3_l2cache_rq & ~pref_inst_g ;
+wire ld3_unfilled_wy_en ;
+assign	ld3_unfilled_wy_en = ld3_l2cache_rq_w2_tmp | ld3_ldbl_rq_w2 ;
+
+wire  ld3_l2cache_rq_tmp;
+assign ld3_l2cache_rq_tmp  = ld3_unfilled_wy_en & ~ld3_l2cache_rq_kill;
+
+// ld valid until fill occur.
+dffre_s #(1)  ld3out_state (
+        //.din    (ld3_l2cache_rq),
+        .din    (ld3_l2cache_rq_tmp),
+        .q      (ld3_unfilled_tmp),
+        .rst    (ld3_fill_reset_d2),  .en     (ld3_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+dffre_s #(2)  ld3out_state_way (
+        .din    (lsu_lmq_pkt_way_w2[1:0]),
+        .q      (ld3_unfilled_wy[1:0]),
+        .rst    (ld3_fill_reset_d2),  .en     (ld3_unfilled_wy_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+assign	ld3_fill_reset_d2 = ld3_fill_reset_d2_tmp | ld3_l2cache_rq_kill ;
+//assign	ld3_unfilled = ld3_unfilled_tmp & ~ld3_l2cache_rq_kill ;
+assign	ld3_unfilled = ld3_unfilled_tmp;
+
+// ld l2bank address
+dffe_s  #(3) ld3_l2bnka (
+        .din    ({non_l2bnk_mx3,ldst_va_mx3[7:6]}),
+        .q      (ld3_l2bnk_addr[2:0]),
+  .en (ld3_l2cache_rq),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//bug2705 - add byp for address to be available in w-cycle
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2 if non_l2bnk_mx0=1 (non$ access)
+
+wire  [2:0]  ld3_l2bnk_addr_mx ;
+assign  ld3_l2bnk_addr_mx[2:0]  =  ld3_pkt_vld_unmasked ? ld3_l2bnk_addr[2:0] :
+					{1'b0,ldst_va_mx3[7:6]} ; 
+
+//assign  ld3_l2bnk_addr_mx[2:0]  =  (ld3_inst_vld_unflushed  & lsu_inst_vld_tmp) ? 
+//					{1'b0,ldst_va_mx3[7:6]} : 
+//					//{non_l2bnk_mx3,ldst_va_mx3[7:6]} : 
+//						ld3_l2bnk_addr[2:0] ;
+
+//7/21/03: timing fix - non_l2bnk_mx0 (uses tlb_pgnum_g[39:37] which arrives in qctl1 ~400ps)
+//         this will cause timing paths in spec pick in w-cycle; hence assume $able access for
+//         spec pick and kill pkt vld in w2
+dff_s #(1) ff_non_l2bnk_mx3_d1 (
+        .din    (non_l2bnk_mx3),
+        .q      (non_l2bnk_mx3_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+//bug2705 - change ld3_l2bnk_addr[2:0] to ld3_l2bnk_addr_mx[2:0]
+assign ld3_l2bnk_dest[0] = ~ld3_l2bnk_addr_mx[2] & ~ld3_l2bnk_addr_mx[1] & ~ld3_l2bnk_addr_mx[0] ;
+assign ld3_l2bnk_dest[1] = ~ld3_l2bnk_addr_mx[2] & ~ld3_l2bnk_addr_mx[1] &  ld3_l2bnk_addr_mx[0] ;
+assign ld3_l2bnk_dest[2] = ~ld3_l2bnk_addr_mx[2] &  ld3_l2bnk_addr_mx[1] & ~ld3_l2bnk_addr_mx[0] ;
+assign ld3_l2bnk_dest[3] = ~ld3_l2bnk_addr_mx[2] &  ld3_l2bnk_addr_mx[1] &  ld3_l2bnk_addr_mx[0] ;
+assign ld3_l2bnk_dest[4] =  ld3_l2bnk_addr_mx[2] ;
+
+//=================================================================================================
+//  LMQ Miscellaneous Control
+//=================================================================================================
+
+dff_s #(1) stgm_cas (
+        .din    (ifu_lsu_casa_e),
+        .q      (casa_m),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  #(1) stgg_cas (
+        .din    (casa_m),
+        .q      (casa_g),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//assign  casa0_g = casa_g & thread0_g ;
+//assign  casa1_g = casa_g & thread1_g ;
+//assign  casa2_g = casa_g & thread2_g ;
+//assign  casa3_g = casa_g & thread3_g ;
+
+// PARTIAL RAW BYPASSING.
+
+// Partial raw of load in stb. Even if the load hits in the dcache, it must follow
+// the st to the pcx, obtain merged data to bypass to the pipeline. This load will 
+// also fill the dcache. i.e., once the store is received it looks like a normal load.
+
+// This path is also used for 2nd cas pkt. rs1(addr) and rs2(cmp data) are in 1st
+// pkt which is written to stb. rd(swap value) is written to lmq as 2nd pkt. The
+// 2nd pkt will wait in the lmq until the 1st pkt is sent.
+
+// *** Atomics need to switch out the thread ***
+
+// THREAD0
+
+// timing fix: 9/15/03 - reduce loading on pcx_rq_for_stb[3:0] to stb_clt[0-3]. it had FO2 (stb_ctl,qdp2 - cap=0.5-0.8)
+//             move the flop from qdp2 to qctl1
+
+dff_s #(4)  ff_pcx_rq_for_stb_d1 (
+        .din    (pcx_rq_for_stb[3:0]),
+        .q      (pcx_rq_for_stb_d1[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dff_s #(4)  srqsel_d1 (
+        .din    (pcx_rq_for_stb[3:0]),
+        //.q      ({st3_pcx_rq_tmp, st2_pcx_rq_tmp,st1_pcx_rq_tmp, st0_pcx_rq_tmp}),
+        .q      ({st3_pcx_rq_sel_d1, st2_pcx_rq_sel_d1,st1_pcx_rq_sel_d1, st0_pcx_rq_sel_d1}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dff_s #(4)  srqsel_d2 (
+        .din      ({st3_pcx_rq_sel_d1, st2_pcx_rq_sel_d1,st1_pcx_rq_sel_d1, st0_pcx_rq_sel_d1}),
+        .q      ({st3_pcx_rq_sel_d2, st2_pcx_rq_sel_d2,st1_pcx_rq_sel_d2, st0_pcx_rq_sel_d2}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dff_s #(4)  srqsel_d3 (
+        .din    ({st3_pcx_rq_sel_d2, st2_pcx_rq_sel_d2,st1_pcx_rq_sel_d2, st0_pcx_rq_sel_d2}),
+        .q      ({st3_pcx_rq_sel_d3, st2_pcx_rq_sel_d3,st1_pcx_rq_sel_d3, st0_pcx_rq_sel_d3}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+wire	ld0_ldbl_rawp_en_w2 ;
+assign	ld0_ldbl_rawp_en_w2 = ld0_ldbl_rq_w2 & ~ld_rawp_st_ced_w2 & ~ld0_rawp_reset ;
+
+/*assign	st3_pcx_rq_sel_d1 = st3_pcx_rq_tmp & ~pcx_req_squash ;
+assign	st2_pcx_rq_sel_d1 = st2_pcx_rq_tmp & ~pcx_req_squash ;
+assign	st1_pcx_rq_sel_d1 = st1_pcx_rq_tmp & ~pcx_req_squash ;
+assign	st0_pcx_rq_sel_d1 = st0_pcx_rq_tmp & ~pcx_req_squash ;*/
+
+assign ld0_rawp_reset =
+        (reset | (st0_pcx_rq_sel_d3 & ~pcx_req_squash_d2 & ld0_rawp_disabled & (ld0_rawp_ackid[2:0] == stb0_crnt_ack_id[2:0])));
+        //(reset | (st0_pcx_rq_sel_d2 & ~pcx_req_squash_d1 & ld0_rawp_disabled & (ld0_rawp_ackid[2:0] == stb0_crnt_ack_id[2:0])));
+
+// TO BE REMOVED ALONG WITH defines !!!
+//wire	ld_rawp_st_ced_g ;
+//assign 	ld_rawp_st_ced_g = 1'b0 ;
+
+// reset needs to be dominant in case ack comes on fly.
+// atomics will not set rawp_disabled
+assign ld0_rawp_en = 
+        //(((ld_stb_partial_raw_g) & ~ld_rawp_st_ced_g & ~ld0_rawp_reset)     // partial_raw
+        //& ~atomic_g & ld0_inst_vld_g) |          // cas inst - 2nd pkt
+	ld0_ldbl_rawp_en_w2 ;
+
+// ack-id and wait-for-ack disable - Thread 0
+dffre_s #(1)  ldrawp0_dis (
+        .din    (ld0_rawp_en),
+        .q      (ld0_rawp_disabled),
+        .rst    (ld0_rawp_reset),  .en     (ld0_rawp_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dffe_s #(3)  ldrawp0_ackid (
+        .din    (ld_rawp_st_ackid_w2[2:0]),
+        .q      (ld0_rawp_ackid[2:0]),
+        .en     (ld0_inst_vld_w2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+// THREAD1
+
+wire	ld1_ldbl_rawp_en_w2 ;
+assign	ld1_ldbl_rawp_en_w2 = ld1_ldbl_rq_w2 & ~ld_rawp_st_ced_w2 & ~ld1_rawp_reset ;
+
+// 1st st ack for st-quad will not cause ack.
+
+assign ld1_rawp_reset =
+        (reset | (st1_pcx_rq_sel_d3 & ~pcx_req_squash_d2 & ld1_rawp_disabled & 
+        //(reset | (st1_pcx_rq_sel_d2 & ~pcx_req_squash_d1 & ld1_rawp_disabled & 
+			(ld1_rawp_ackid[2:0] == stb1_crnt_ack_id[2:0])));
+
+// reset needs to be dominant in case ack comes on fly.
+// atomics will not set rawp_disabled
+assign ld1_rawp_en = 
+        //(((ld_stb_partial_raw_g) & ~ld_rawp_st_ced_g & ~ld1_rawp_reset) // partial raw
+        //(((ld_stb_partial_raw_g | (ld_stb_full_raw_g & ldst_dbl_g)) & ~ld_rawp_st_ced_g & ~ld1_rawp_reset) // partial raw
+        //& ~atomic_g  & ld1_inst_vld_g)  |                // cas inst - 2nd pkt
+	ld1_ldbl_rawp_en_w2 ;
+
+// ack-id and wait-for-ack disable - Thread 0
+dffre_s #(1)  ldrawp1_dis (
+        .din    (ld1_rawp_en),
+        .q      (ld1_rawp_disabled),
+        .rst    (ld1_rawp_reset),  .en     (ld1_rawp_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dffe_s #(3)  ldrawp1_ackid (
+        .din    (ld_rawp_st_ackid_w2[2:0]),
+        .q      (ld1_rawp_ackid[2:0]),
+        .en     (ld1_inst_vld_w2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+// THREAD2
+
+wire	ld2_ldbl_rawp_en_w2 ;
+assign	ld2_ldbl_rawp_en_w2 = ld2_ldbl_rq_w2 & ~ld_rawp_st_ced_w2 & ~ld2_rawp_reset ;
+
+assign ld2_rawp_reset =
+        (reset | (st2_pcx_rq_sel_d3 & ~pcx_req_squash_d2 & ld2_rawp_disabled & 
+        //(reset | (st2_pcx_rq_sel_d2 & ~pcx_req_squash_d1 & ld2_rawp_disabled & 
+			(ld2_rawp_ackid[2:0] == stb2_crnt_ack_id[2:0])));
+
+// reset needs to be dominant in case ack comes on fly.
+// atomics will not set rawp_disabled
+assign ld2_rawp_en = 
+        //(((ld_stb_partial_raw_g) & ~ld_rawp_st_ced_g & ~ld2_rawp_reset) // partial raw
+        //& ~atomic_g & ld2_inst_vld_g) |          // cas inst - 2nd pkt
+	ld2_ldbl_rawp_en_w2 ;
+
+// ack-id and wait-for-ack disable - Thread 0
+dffre_s #(1)  ldrawp2_dis (
+        .din    (ld2_rawp_en),
+        .q      (ld2_rawp_disabled),
+        .rst    (ld2_rawp_reset),  .en     (ld2_rawp_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dffe_s #(3)  ldrawp2_ackid (
+        .din    (ld_rawp_st_ackid_w2[2:0]),
+        .q      (ld2_rawp_ackid[2:0]),
+        .en     (ld2_inst_vld_w2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+// THREAD3
+
+wire	ld3_ldbl_rawp_en_w2 ;
+assign	ld3_ldbl_rawp_en_w2 = ld3_ldbl_rq_w2 & ~ld_rawp_st_ced_w2 & ~ld3_rawp_reset ;
+
+assign ld3_rawp_reset =
+        (reset | (st3_pcx_rq_sel_d3 & ~pcx_req_squash_d2 & ld3_rawp_disabled & 
+        //(reset | (st3_pcx_rq_sel_d2 & ~pcx_req_squash_d1 & ld3_rawp_disabled & 
+				(ld3_rawp_ackid[2:0] == stb3_crnt_ack_id[2:0])));
+
+// reset needs to be dominant in case ack comes on fly.
+// atomics will not set rawp_disabled
+assign ld3_rawp_en = 
+        //(((ld_stb_partial_raw_g) & ~ld_rawp_st_ced_g & ~ld3_rawp_reset) // partial raw
+        //& ~atomic_g & ld3_inst_vld_g) |          // cas inst - 2nd pkt
+	ld3_ldbl_rawp_en_w2 ;
+
+// ack-id and wait-for-ack disable - Thread 0
+dffre_s #(1)  ldrawp3_dis (
+        .din    (ld3_rawp_en),
+        .q      (ld3_rawp_disabled),
+        .rst    (ld3_rawp_reset),  .en     (ld3_rawp_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+dffe_s #(3)  ldrawp3_ackid (
+        .din    (ld_rawp_st_ackid_w2[2:0]),
+        .q      (ld3_rawp_ackid[2:0]),
+        .en     (ld3_inst_vld_w2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                
+
+
+
+//=================================================================================================
+//  INTERRUPT PCX PKT REQ CTL
+//=================================================================================================
+
+wire    intrpt_pcx_rq_sel_d2 ;
+wire    intrpt_vld_reset;
+wire    intrpt_vld_en ;
+wire  [3:0] intrpt_thread ;
+wire    intrpt_clr ;
+
+
+assign  lsu_tlu_pcxpkt_ack = intrpt_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+assign intrpt_vld_reset =
+        reset | lsu_tlu_pcxpkt_ack ;
+        //reset | (intrpt_pcx_rq_sel_d1 & ~pcx_req_squash);
+wire    intrpt_pkt_vld_unmasked ;
+// assumption is that pkt vld cannot be turned around in same cycle
+assign intrpt_vld_en = ~intrpt_pkt_vld_unmasked ;
+//assign intrpt_vld_en = ~lsu_intrpt_pkt_vld ;
+
+dff_s #(1) intpkt_stgd2 (
+        .din    (intrpt_pcx_rq_sel_d1),
+        .q      (intrpt_pcx_rq_sel_d2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// intrpt valid
+dffre_s  intrpt_vld (
+        .din    (tlu_lsu_pcxpkt_vld),
+        .q      (intrpt_pkt_vld_unmasked),
+        .rst    (intrpt_vld_reset),     .en     (intrpt_vld_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign  intrpt_thread[0] = ~tlu_lsu_pcxpkt_tid[19] & ~tlu_lsu_pcxpkt_tid[18] ;
+assign  intrpt_thread[1] = ~tlu_lsu_pcxpkt_tid[19] &  tlu_lsu_pcxpkt_tid[18] ;
+assign  intrpt_thread[2] =  tlu_lsu_pcxpkt_tid[19] & ~tlu_lsu_pcxpkt_tid[18] ;
+assign  intrpt_thread[3] =  tlu_lsu_pcxpkt_tid[19] &  tlu_lsu_pcxpkt_tid[18] ;
+
+assign  intrpt_clr =
+  (intrpt_thread[0] & lsu_stb_empty[0]) |
+  (intrpt_thread[1] & lsu_stb_empty[1]) |
+  (intrpt_thread[2] & lsu_stb_empty[2]) |
+  (intrpt_thread[3] & lsu_stb_empty[3]) ;
+
+wire	intrpt_clr_d1 ;
+dff_s #(1) intclr_stgd1 (
+        .din    (intrpt_clr),
+        .q      (intrpt_clr_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	[3:0]	intrpt_cmplt ;
+
+assign  intrpt_cmplt[0] = lsu_tlu_pcxpkt_ack & intrpt_thread[0] ;
+assign  intrpt_cmplt[1] = lsu_tlu_pcxpkt_ack & intrpt_thread[1] ;
+assign  intrpt_cmplt[2] = lsu_tlu_pcxpkt_ack & intrpt_thread[2] ;
+assign  intrpt_cmplt[3] = lsu_tlu_pcxpkt_ack & intrpt_thread[3] ;
+
+dff_s #(4) intrpt_stg (
+        .din    (intrpt_cmplt[3:0]),
+        .q      (lsu_intrpt_cmplt[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign  intrpt_pkt_vld =
+intrpt_pkt_vld_unmasked & ~(intrpt_pcx_rq_sel_d1 | intrpt_pcx_rq_sel_d2) & intrpt_clr_d1 ;
+
+// ** enabled flop should not be required !!
+// intrpt l2bank address
+// ?? Can interrupt requests go to io-bridge ??
+// Using upper 3b of 5b thread field of INTR_W to address 4 l2 banks
+dffe_s #(3) intrpt_l2bnka (
+        .din    ({1'b0,tlu_lsu_pcxpkt_l2baddr[11:10]}),
+        .q      (intrpt_l2bnk_addr[2:0]),
+        .en     (intrpt_vld_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// IO Requests should not go to iobrdge.
+assign intrpt_l2bnk_dest[0] =
+~intrpt_l2bnk_addr[2] & ~intrpt_l2bnk_addr[1] & ~intrpt_l2bnk_addr[0] ;
+assign intrpt_l2bnk_dest[1] =
+~intrpt_l2bnk_addr[2] & ~intrpt_l2bnk_addr[1] &  intrpt_l2bnk_addr[0] ;
+assign intrpt_l2bnk_dest[2] =
+~intrpt_l2bnk_addr[2] & intrpt_l2bnk_addr[1] & ~intrpt_l2bnk_addr[0] ;
+assign intrpt_l2bnk_dest[3] =
+~intrpt_l2bnk_addr[2] & intrpt_l2bnk_addr[1] &  intrpt_l2bnk_addr[0] ;
+assign intrpt_l2bnk_dest[4] = intrpt_l2bnk_addr[2] ;
+
+//=================================================================================================
+//
+// QDP Specific Control
+//
+//=================================================================================================
+
+
+// Qualify with thread.
+// Write cas pckt 2 to lmq
+// Timing Change : ld0_l2cache_rq guarantees validity.
+//assign lmq_enable[0] = lsu_ld_miss_g & thread0_g ;
+//assign lmq_enable[0] = ld0_inst_vld_g | pref_vld0_g ;
+
+//assign lmq_enable[0] = (ld0_inst_vld_unflushed & lsu_inst_vld_w) | pref_vld0_g ;
+//assign lmq_enable[1] = (ld1_inst_vld_unflushed & lsu_inst_vld_w) | pref_vld1_g ;
+//assign lmq_enable[2] = (ld2_inst_vld_unflushed & lsu_inst_vld_w) | pref_vld2_g ;
+//assign lmq_enable[3] = (ld3_inst_vld_unflushed & lsu_inst_vld_w) | pref_vld3_g ;
+
+//bug 2771; timing path - remove flush-pipe, add ifu's flush signal
+//assign lmq_enable[0] = (ld0_inst_vld_unflushed | pref_vld0_g) & lsu_inst_vld_w ;
+assign lmq_enable[0] = (ld0_inst_vld_unflushed | pref_vld0_g) & lsu_inst_vld_tmp & ~ifu_lsu_flush_w ;
+assign lmq_enable[1] = (ld1_inst_vld_unflushed | pref_vld1_g) & lsu_inst_vld_tmp & ~ifu_lsu_flush_w ;
+assign lmq_enable[2] = (ld2_inst_vld_unflushed | pref_vld2_g) & lsu_inst_vld_tmp & ~ifu_lsu_flush_w ;
+assign lmq_enable[3] = (ld3_inst_vld_unflushed | pref_vld3_g) & lsu_inst_vld_tmp & ~ifu_lsu_flush_w ; 
+        
+// timing fix: 5/19/03: move secondary hit way generation to w2
+dff_s #(4) ff_lmq_enable_w2 (
+        .din    (lmq_enable[3:0]),
+        .q      (lmq_enable_w2[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// needs to be 1-hot always.
+assign imiss_pcx_mx_sel = imiss_pcx_rq_sel_d1 ;
+//assign imiss_pcx_mx_sel[1] = strm_pcx_rq_sel_d1 ;
+//assign imiss_pcx_mx_sel[2] = intrpt_pcx_rq_sel_d1 ;
+//assign imiss_pcx_mx_sel[3] = fpop_pcx_rq_sel_d1 ;
+
+//11/7/03: add rst_tri_en
+wire  [2:0]  fwd_int_fp_pcx_mx_sel_tmp ;
+
+assign fwd_int_fp_pcx_mx_sel_tmp[0]= ~fwd_int_fp_pcx_mx_sel[1] & ~fwd_int_fp_pcx_mx_sel[2];
+assign fwd_int_fp_pcx_mx_sel_tmp[1]=  intrpt_pcx_rq_sel_d1 ;
+assign fwd_int_fp_pcx_mx_sel_tmp[2]=  fpop_pcx_rq_sel_d1 | fpop_pcx_rq_sel_d2 ;
+
+assign fwd_int_fp_pcx_mx_sel[1:0] = fwd_int_fp_pcx_mx_sel_tmp[1:0] & ~{2{rst_tri_en}} ;
+assign fwd_int_fp_pcx_mx_sel[2]   = fwd_int_fp_pcx_mx_sel_tmp[2] | rst_tri_en ;
+
+
+//*************************************************************************************************
+// 			PCX REQUEST GENERATION (BEGIN)
+
+//=================================================================================================
+//  PCX REQUEST SELECTION CONTROL
+//=================================================================================================
+
+// LOAD
+// fpops have to squash other rqs in the 2nd cycle also.
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign ld0_pcx_rq_vld = 
+  (|(queue_write[4:0] & ld0_l2bnk_dest[4:0])) & 
+    ld0_pkt_vld & ~ld0_rawp_disabled;
+    //ld0_pkt_vld & ~ld0_rawp_disabled & ~mcycle_squash_d1;
+    //ld0_pkt_vld & ~ld0_rawp_disabled & ~st_atom_rq_d1 ;
+assign ld1_pcx_rq_vld = 
+  (|(queue_write[4:0] & ld1_l2bnk_dest[4:0])) & 
+    ld1_pkt_vld & ~ld1_rawp_disabled;
+    //ld1_pkt_vld & ~ld1_rawp_disabled & ~mcycle_squash_d1;
+    //ld1_pkt_vld & ~ld1_rawp_disabled & ~st_atom_rq_d1 ;
+assign ld2_pcx_rq_vld = 
+  (|(queue_write[4:0] & ld2_l2bnk_dest[4:0])) & 
+    ld2_pkt_vld & ~ld2_rawp_disabled ;
+    //ld2_pkt_vld & ~ld2_rawp_disabled & ~mcycle_squash_d1;
+    //ld2_pkt_vld & ~ld2_rawp_disabled & ~st_atom_rq_d1 ;
+assign ld3_pcx_rq_vld = 
+  (|(queue_write[4:0] & ld3_l2bnk_dest[4:0])) & 
+    ld3_pkt_vld & ~ld3_rawp_disabled;
+    //ld3_pkt_vld & ~ld3_rawp_disabled & ~mcycle_squash_d1;
+    //ld3_pkt_vld & ~ld3_rawp_disabled & ~st_atom_rq_d1 ;
+
+//assign  ld_pcx_rq_vld = ld0_pcx_rq_vld | ld1_pcx_rq_vld 
+//      | ld2_pcx_rq_vld | ld3_pcx_rq_vld ;
+
+wire    st0_atomic_pend_d1, st1_atomic_pend_d1, st2_atomic_pend_d1, st3_atomic_pend_d1 ;
+
+assign  st0_q_wr[4:0] = st0_atomic_pend_d1 ? pre_qwr[4:0] : queue_write[4:0] ;
+assign  st1_q_wr[4:0] = st1_atomic_pend_d1 ? pre_qwr[4:0] : queue_write[4:0] ;
+assign  st2_q_wr[4:0] = st2_atomic_pend_d1 ? pre_qwr[4:0] : queue_write[4:0] ;
+assign  st3_q_wr[4:0] = st3_atomic_pend_d1 ? pre_qwr[4:0] : queue_write[4:0] ;
+
+assign  st0_atom_rq = (st0_pcx_rq_sel & st0_atomic_vld) ; 
+assign  st1_atom_rq = (st1_pcx_rq_sel & st1_atomic_vld) ;
+assign  st2_atom_rq = (st2_pcx_rq_sel & st2_atomic_vld) ;
+assign  st3_atom_rq = (st3_pcx_rq_sel & st3_atomic_vld) ;
+
+dff_s #(8)  avlds_d1 (
+        .din    ({st0_atom_rq,st1_atom_rq,st2_atom_rq,st3_atom_rq,
+    st0_cas_vld,st1_cas_vld,st2_cas_vld,st3_cas_vld}),
+        .q      ({st0_atom_rq_d1,st1_atom_rq_d1,st2_atom_rq_d1,st3_atom_rq_d1,
+    st0_cas_vld_d1,st1_cas_vld_d1,st2_cas_vld_d1,st3_cas_vld_d1}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(8)  avlds_d2 (
+        .din    ({st0_atom_rq_d1,st1_atom_rq_d1,st2_atom_rq_d1,st3_atom_rq_d1,
+    st0_cas_vld_d1,st1_cas_vld_d1,st2_cas_vld_d1,st3_cas_vld_d1}),
+        .q      ({st0_atom_rq_d2,st1_atom_rq_d2,st2_atom_rq_d2,st3_atom_rq_d2,
+    st0_cas_vld_d2,st1_cas_vld_d2,st2_cas_vld_d2,st3_cas_vld_d2}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//timing fix : 7/28/03 - move the OR before flop
+assign st_atom_rq = st0_atom_rq | st1_atom_rq | st2_atom_rq | st3_atom_rq ; 
+//assign st_atom_rq_d1 = st0_atom_rq_d1 | st1_atom_rq_d1 | st2_atom_rq_d1 | st3_atom_rq_d1 ; 
+
+// timing fix: 7/28/03 - move the OR before flop
+dff_s #(1)  ff_st_atom_pq (
+  .din (st_atom_rq),
+  .q   (st_atom_rq_d1),
+  .clk (clk),
+  .se  (1'b0), .si (), .so ()
+  );
+
+
+assign st_cas_rq_d2 = 
+  (st0_atom_rq_d2 & st0_cas_vld_d2)  | 
+  (st1_atom_rq_d2 & st1_cas_vld_d2)  | 
+  (st2_atom_rq_d2 & st2_cas_vld_d2)  | 
+  (st3_atom_rq_d2 & st3_cas_vld_d2)  ; 
+//assign st_quad_rq_d2 = 
+//  (st0_atom_rq_d2 & ~st0_cas_vld_d2)  | 
+//  (st1_atom_rq_d2 & ~st1_cas_vld_d2)  | 
+//  (st2_atom_rq_d2 & ~st2_cas_vld_d2)  | 
+//  (st3_atom_rq_d2 & ~st3_cas_vld_d2)  ; 
+
+//timing fix: 9/17/03 - move the OR to previous cycle and add flop for spc_pcx_atom_pq
+//                      instantiate buf30 for flop output
+//assign  spc_pcx_atom_pq = 
+//    st_atom_rq_d1 |  
+//    fpop_atom_rq_pq ;
+
+wire  spc_pcx_atom_w, spc_pcx_atom_pq_tmp ;
+assign spc_pcx_atom_w  =  st_atom_rq | fpop_atom_req ;
+
+dff_s #(1)  ff_spc_pcx_atom_pq (
+  .din (spc_pcx_atom_w),
+  .q   (spc_pcx_atom_pq_tmp),
+  .clk (clk),
+  .se  (1'b0), .si (), .so ()
+  );
+
+bw_u1_buf_30x UZfix_spc_pcx_atom_pq_buf1 ( .a(spc_pcx_atom_pq_tmp), .z(spc_pcx_atom_pq) );
+bw_u1_buf_30x UZsize_spc_pcx_atom_pq_buf2 ( .a(spc_pcx_atom_pq_tmp), .z(spc_pcx_atom_pq_buf2) );
+
+// STORE
+// st will wait in pcx bypass until previous st in chain is acked !!!!
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign st0_pcx_rq_vld = 
+  (|(st0_q_wr[4:0] & st0_l2bnk_dest[4:0])) & st0_pkt_vld ;
+  //(|(st0_q_wr[4:0] & st0_l2bnk_dest[4:0])) & st0_pkt_vld & ~mcycle_squash_d1;
+  //(|(st0_q_wr[4:0] & st0_l2bnk_dest[4:0])) & st0_pkt_vld & ~st_atom_rq_d1 ;
+assign st1_pcx_rq_vld = 
+  (|(st1_q_wr[4:0] & st1_l2bnk_dest[4:0])) & st1_pkt_vld ;
+  //(|(st1_q_wr[4:0] & st1_l2bnk_dest[4:0])) & st1_pkt_vld & ~mcycle_squash_d1;
+  //(|(st1_q_wr[4:0] & st1_l2bnk_dest[4:0])) & st1_pkt_vld & ~st_atom_rq_d1 ;
+assign st2_pcx_rq_vld = 
+  (|(st2_q_wr[4:0] & st2_l2bnk_dest[4:0])) & st2_pkt_vld ;
+  //(|(st2_q_wr[4:0] & st2_l2bnk_dest[4:0])) & st2_pkt_vld & ~mcycle_squash_d1;
+  //(|(st2_q_wr[4:0] & st2_l2bnk_dest[4:0])) & st2_pkt_vld & ~st_atom_rq_d1 ;
+assign st3_pcx_rq_vld = 
+  (|(st3_q_wr[4:0] & st3_l2bnk_dest[4:0])) & st3_pkt_vld ;
+  //(|(st3_q_wr[4:0] & st3_l2bnk_dest[4:0])) & st3_pkt_vld & ~mcycle_squash_d1;
+  //(|(st3_q_wr[4:0] & st3_l2bnk_dest[4:0])) & st3_pkt_vld & ~st_atom_rq_d1 ;
+
+// IMISS
+// imiss requests will not speculate - ** change !!!
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign imiss_pcx_rq_vld = 
+  (|(queue_write[4:0] & imiss_l2bnk_dest[4:0])) & imiss_pkt_vld ;
+  //(|(queue_write[4:0] & imiss_l2bnk_dest[4:0])) & imiss_pkt_vld & ~mcycle_squash_d1;
+  //(|((queue_write[4:0] & (sel_qentry0[4:0] | (~sel_qentry0[4:0] & ~spc_pcx_req_update_w2[4:0]))) & imiss_l2bnk_dest[4:0])) & imiss_pkt_vld & ~mcycle_squash_d1;
+
+// SPU
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign strm_pcx_rq_vld = 
+  (|(queue_write[4:0] & strm_l2bnk_dest[4:0])) & strm_pkt_vld ;
+  //(|(queue_write[4:0] & strm_l2bnk_dest[4:0])) & strm_pkt_vld & ~mcycle_squash_d1;
+
+wire lsu_fwdpkt_vld_d1 ;
+wire	[4:0]	fwdpkt_dest_d1 ;
+// This delay is to compensate for the 1-cycle delay for internal rd/wr.
+dff_s #(6)  fvld_stgd1 (
+  .din ({lsu_fwdpkt_vld,lsu_fwdpkt_dest[4:0]}),
+  .q   ({lsu_fwdpkt_vld_d1,fwdpkt_dest_d1[4:0]}),
+  .clk (clk),
+  .se  (1'b0), .si (), .so ()
+  );
+
+// FWD PKT
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign fwdpkt_rq_vld = 
+  (|(queue_write[4:0] & fwdpkt_dest_d1[4:0])) & 
+	lsu_fwdpkt_vld_d1 & 
+	~(fwdpkt_pcx_rq_sel_d1 | fwdpkt_pcx_rq_sel_d2 |  // screen vld until reset can be sent.
+	fwdpkt_pcx_rq_sel_d3) ;	// extra cycle since fwdpkt_vld is now flop delayed.
+	//~mcycle_squash_d1;
+
+// This to reset state. It must thus take into account speculative requests.
+assign lsu_fwdpkt_pcx_rq_sel =  fwdpkt_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+// INTERRUPT
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign intrpt_pcx_rq_vld = 
+  (|(queue_write[4:0] & intrpt_l2bnk_dest[4:0])) & intrpt_pkt_vld ;
+  //(|(queue_write[4:0] & intrpt_l2bnk_dest[4:0])) & intrpt_pkt_vld & ~mcycle_squash_d1;
+
+// FFU
+// fpop will never get squashed.
+// ** Should be able to simplify equation.
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+//for fpop pre_qwr is good enough to qual 'cos there are no ld/st atomics to IOB
+wire [4:0]  fpop_q_wr ;
+assign fpop_pcx_rq_vld = 
+  //sel_qentry0[4] & fpop_l2bnk_dest[4] & fpop_pkt_vld ;
+  //(|(queue_write[4:0] & fpop_l2bnk_dest[4:0])) & 
+  //(|(pre_qwr[4:0] & fpop_l2bnk_dest[4:0])) &
+  (|(fpop_q_wr[4:0] & fpop_l2bnk_dest[4:0])) & 
+	// change sel_qentry0[5] to sel_qentry0[4] for fpio merge
+      fpop_pkt_vld ;
+      //fpop_pkt_vld & ((sel_qentry0[4] & fpop_pkt1) | ~fpop_pkt1) ;
+	//~mcycle_squash_d1 ;
+
+
+//=================================================================================================
+// HIERARCHICAL PICKER FOR PCX REQ GENERATION
+//=================================================================================================
+
+// 13 requests to choose from :
+// - imiss, 4 ld, 4 st, (intrpt,strm,fpop,fwdpkt).
+// - 4 categories are thus formed, each with equal weight.
+// - As a consequence, imiss has the highest priority (because it is one vs. 4 in others)
+// - Fair scheduling thru round-robin is ensured between and within categories.
+// - Starvation for 2-cycle b2b ops (cas/fpop) is prevented.
+// - strm requests, even though they lie in the misc category, will get good 
+// thruput as the other misc requests will be infrequent.
+
+// LEVEL ONE - PICK WITHIN CATEGORIES
+
+// Note : picker defaults to 1-hot.
+
+wire	[3:0]	all_pcx_rq_pick ;
+wire	[3:0]	ld_events_raw ;
+//wire	[3:0]	ld_events_final ;
+wire	ld3_pcx_rq_pick,ld2_pcx_rq_pick,ld1_pcx_rq_pick,ld0_pcx_rq_pick ;
+
+//bug6807 - kill load events raw when partial raw is detected.
+assign ld_events_raw[0]  =  (ld0_pkt_vld_unmasked & ~ld0_rawp_disabled) | ld0_pcx_rq_sel_d1 | ld0_pcx_rq_sel_d2 ;
+assign ld_events_raw[1]  =  (ld1_pkt_vld_unmasked & ~ld1_rawp_disabled) | ld1_pcx_rq_sel_d1 | ld1_pcx_rq_sel_d2 ;
+assign ld_events_raw[2]  =  (ld2_pkt_vld_unmasked & ~ld2_rawp_disabled) | ld2_pcx_rq_sel_d1 | ld2_pcx_rq_sel_d2 ;
+assign ld_events_raw[3]  =  (ld3_pkt_vld_unmasked & ~ld3_rawp_disabled) | ld3_pcx_rq_sel_d1 | ld3_pcx_rq_sel_d2 ;
+
+//bug4814 - change rrobin_picker1 to rrobin_picker2
+// Choose one among 4 loads.
+//lsu_rrobin_picker1 ld4_rrobin  (
+//    .events   	  ({ld3_pcx_rq_vld,ld2_pcx_rq_vld,
+//		    ld1_pcx_rq_vld,ld0_pcx_rq_vld}),
+//    .events_raw   ({ld3_pkt_vld_unmasked,ld2_pkt_vld_unmasked,
+//		    ld1_pkt_vld_unmasked,ld0_pkt_vld_unmasked}),
+//    .pick_one_hot ({ld3_pcx_rq_pick,ld2_pcx_rq_pick,
+//		    ld1_pcx_rq_pick,ld0_pcx_rq_pick}),
+//    .events_final (ld_events_final[3:0]),
+//    .rclk         (rclk),
+//    .grst_l       (grst_l),
+//    .arst_l       (arst_l),
+//    .si(),
+//    .se(se),
+//    .so()
+//  );
+
+lsu_rrobin_picker2 ld4_rrobin  (
+    .events   	  ({ld3_pcx_rq_vld,ld2_pcx_rq_vld,ld1_pcx_rq_vld,ld0_pcx_rq_vld}),
+    .thread_force (ld_thrd_force_vld[3:0]),
+    .pick_one_hot ({ld3_pcx_rq_pick,ld2_pcx_rq_pick,ld1_pcx_rq_pick,ld0_pcx_rq_pick}),
+    .events_picked({ld3_pcx_rq_sel,ld2_pcx_rq_sel,ld1_pcx_rq_sel,ld0_pcx_rq_sel}),
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so()
+  );
+
+
+
+
+//timing fix: 05/20/03 - move mcycle_squash_d1 after pick instead of before pick
+//assign	ld3_pcx_rq_sel = ld3_pcx_rq_pick & ld3_pcx_rq_vld & all_pcx_rq_pick[1] ;
+//assign	ld2_pcx_rq_sel = ld2_pcx_rq_pick & ld2_pcx_rq_vld & all_pcx_rq_pick[1] ;
+//assign	ld1_pcx_rq_sel = ld1_pcx_rq_pick & ld1_pcx_rq_vld & all_pcx_rq_pick[1] ;
+//assign	ld0_pcx_rq_sel = ld0_pcx_rq_pick & ld0_pcx_rq_vld & all_pcx_rq_pick[1] ;
+//bug2705 - add spec valid qualification
+//assign	ld3_pcx_rq_sel = ld3_pcx_rq_pick & ld3_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+//timing fix: 08/06/03 - tag_rdata->gen tag_parity_err->lsu_ld_miss_g arrives @625 in qctl1
+//                       cache_way_hit ->lsu_ld_miss_g arrives @525 in qctl1
+//                       cache_way_hit ->lsu_way_hit_or arrives @510 in qctl1
+//                       625ps + ld?_l2cache_rq_g (130ps) + urq_stgpq flop logic(100ps) (slack=-100ps)
+//assign  ld0_spec_pick_vld_g  =   ld0_spec_vld_g & ld0_l2cache_rq_g & ld0_pcx_rq_pick & ld0_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+wire    ld0_nspec_pick_vld ,
+        ld1_nspec_pick_vld ,
+        ld2_nspec_pick_vld ,
+        ld3_nspec_pick_vld ;
+
+assign  ld0_spec_pick_vld_g  =   ld0_spec_vld_g & ~lsu_way_hit_or & ld0_pcx_rq_pick & ld0_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+assign  ld0_nspec_pick_vld   =  ~ld0_spec_vld_g &                   ld0_pcx_rq_pick & ld0_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+
+assign  ld1_spec_pick_vld_g  =   ld1_spec_vld_g & ~lsu_way_hit_or & ld1_pcx_rq_pick & ld1_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+assign  ld1_nspec_pick_vld   =  ~ld1_spec_vld_g &                   ld1_pcx_rq_pick & ld1_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+
+assign  ld2_spec_pick_vld_g  =   ld2_spec_vld_g & ~lsu_way_hit_or & ld2_pcx_rq_pick & ld2_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+assign  ld2_nspec_pick_vld   =  ~ld2_spec_vld_g &                   ld2_pcx_rq_pick & ld2_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+
+assign  ld3_spec_pick_vld_g  =   ld3_spec_vld_g & ~lsu_way_hit_or & ld3_pcx_rq_pick & ld3_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+assign  ld3_nspec_pick_vld   =  ~ld3_spec_vld_g &                   ld3_pcx_rq_pick & ld3_pcx_rq_vld & all_pcx_rq_pick[1] & ~mcycle_squash_d1 ;
+
+
+assign	ld0_pcx_rq_sel = (ld0_spec_pick_vld_g | ld0_nspec_pick_vld) ;
+assign	ld1_pcx_rq_sel = (ld1_spec_pick_vld_g | ld1_nspec_pick_vld) ;
+assign	ld2_pcx_rq_sel = (ld2_spec_pick_vld_g | ld2_nspec_pick_vld) ;
+assign	ld3_pcx_rq_sel = (ld3_spec_pick_vld_g | ld3_nspec_pick_vld) ;
+
+//bug3506: set mask in the level1 pick in w3-cycle if picked by pcx
+//assign  ld_events_final[3] = ld3_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  ld_events_final[2] = ld2_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  ld_events_final[1] = ld1_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  ld_events_final[0] = ld0_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+
+
+wire	st3_pcx_rq_pick,st2_pcx_rq_pick,st1_pcx_rq_pick,st0_pcx_rq_pick ;
+
+// Choose one among 4 st.
+
+wire   pcx_rq_for_stb_en;
+//wire [3:0]  st_events_final ;
+wire [3:0]  st_events_raw ;
+
+//8/20/03: bug3506 fix is incomplete - vld may not be held until d2 cycle
+assign st_events_raw[0]  =  stb0_rd_for_pcx | st0_pcx_rq_sel_d1 | st0_pcx_rq_sel_d2 ;
+assign st_events_raw[1]  =  stb1_rd_for_pcx | st1_pcx_rq_sel_d1 | st1_pcx_rq_sel_d2 ;
+assign st_events_raw[2]  =  stb2_rd_for_pcx | st2_pcx_rq_sel_d1 | st2_pcx_rq_sel_d2 ;
+assign st_events_raw[3]  =  stb3_rd_for_pcx | st3_pcx_rq_sel_d1 | st3_pcx_rq_sel_d2 ;
+
+//bug4814 - change rrobin_picker1 to rrobin_picker2
+//lsu_rrobin_picker1 st4_rrobin  (
+//    .events   	  ({st3_pcx_rq_vld,st2_pcx_rq_vld,
+//		                st1_pcx_rq_vld,st0_pcx_rq_vld}),
+//    .events_raw	  (st_events_raw[3:0]),
+//    .pick_one_hot ({st3_pcx_rq_pick,st2_pcx_rq_pick,
+//		                st1_pcx_rq_pick,st0_pcx_rq_pick}),
+//    //.en           (pcx_rq_for_stb_en),                                                                
+//    .events_final (st_events_final[3:0]),
+//    .rclk         (rclk),
+//    .grst_l       (grst_l),
+//    .arst_l       (arst_l),
+//    .si(),
+//    .se(se),
+//    .so()
+//
+//  );
+
+lsu_rrobin_picker2 st4_rrobin  (
+    .events   	  ({st3_pcx_rq_vld,st2_pcx_rq_vld,st1_pcx_rq_vld,st0_pcx_rq_vld}),
+    .thread_force(st_thrd_force_vld[3:0]),
+    .pick_one_hot ({st3_pcx_rq_pick,st2_pcx_rq_pick,st1_pcx_rq_pick,st0_pcx_rq_pick}),
+
+    .events_picked(pcx_rq_for_stb[3:0]),
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so()
+  );
+
+
+
+assign lsu_st_pcx_rq_pick[3:0]  =  {st3_pcx_rq_pick,st2_pcx_rq_pick,st1_pcx_rq_pick,st0_pcx_rq_pick};
+//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
+//                     stb_cam_rptr_vld and stb_data_rptr_vld
+assign lsu_st_pcx_rq_vld  =  st0_pcx_rq_vld | st1_pcx_rq_vld | st2_pcx_rq_vld | st3_pcx_rq_vld ;
+
+//wire        st0_pcx_rq_sel_tmp, st1_pcx_rq_sel_tmp;
+//wire        st2_pcx_rq_sel_tmp, st3_pcx_rq_sel_tmp;
+   
+
+   wire stb_cam_hit_w;
+
+//bug3503
+assign stb_cam_hit_w  =  stb_cam_hit_bf & lsu_inst_vld_w ;
+
+dff_s #(1)  stb_cam_hit_stg_w2  (
+  .din (stb_cam_hit_w),
+  .q   (stb_cam_hit_w2),
+  .clk (clk),
+  .se  (1'b0), .si (), .so ()
+  );
+
+
+//RAW read STB at W3 (not W2), so stb_cam_hit_w2 isn't critical   
+//assign pcx_rq_for_stb_en = ~(|lsu_st_ack_rq_stb[3:0]) &   ~stb_cam_hit_w2 & ~stb_cam_wptr_vld;
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+assign pcx_rq_for_stb_en = ~stb_cam_hit_w2 & ~stb_cam_wr_no_ivld_m & ~mcycle_squash_d1 ;
+
+//timing fix : 5/6 - move kill_w2 after store pick
+//assign	pcx_rq_for_stb[3] = st3_pcx_rq_pick & st3_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en;
+//assign	pcx_rq_for_stb[2] = st2_pcx_rq_pick & st2_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en;
+//assign	pcx_rq_for_stb[1] = st1_pcx_rq_pick & st1_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en;
+//assign	pcx_rq_for_stb[0] = st0_pcx_rq_pick & st0_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en;
+
+//timing fix: 05/20/03 - move mcycle_squash_d1 after pick instead of before pick
+//bug4513 - kill pcx_rq_for_stb if atomic request is picked and 2 entries to the l2bank are not available
+
+wire  [3:0]  pcx_rq_for_stb_tmp ;
+wire   st0_qmon_2entry_avail,st1_qmon_2entry_avail,st2_qmon_2entry_avail,st3_qmon_2entry_avail ;
+
+assign	pcx_rq_for_stb_tmp[3] =
+        st3_pcx_rq_pick & st3_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[3] & ~mcycle_squash_d1 ;
+        //st3_pcx_rq_pick & st3_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[3];
+
+assign	pcx_rq_for_stb_tmp[2] =
+        st2_pcx_rq_pick & st2_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[2] & ~mcycle_squash_d1 ;
+        //st2_pcx_rq_pick & st2_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[2];
+
+assign	pcx_rq_for_stb_tmp[1] =
+        st1_pcx_rq_pick & st1_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[1] & ~mcycle_squash_d1 ;
+        //st1_pcx_rq_pick & st1_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[1];
+
+assign	pcx_rq_for_stb_tmp[0] = 
+        st0_pcx_rq_pick & st0_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[0] & ~mcycle_squash_d1 ;
+        //st0_pcx_rq_pick & st0_pcx_rq_vld & all_pcx_rq_pick[2] & pcx_rq_for_stb_en & ~lsu_st_pcx_rq_kill_w2[0];
+
+//bug4513 - kill pcx_rq_for_stb if atomic request is picked and 2 entries to the l2bank are not available
+assign	pcx_rq_for_stb[3] = ((st3_atomic_vld & st3_qmon_2entry_avail) | ~st3_atomic_vld) & pcx_rq_for_stb_tmp[3] ;
+assign	pcx_rq_for_stb[2] = ((st2_atomic_vld & st2_qmon_2entry_avail) | ~st2_atomic_vld) & pcx_rq_for_stb_tmp[2] ;
+assign	pcx_rq_for_stb[1] = ((st1_atomic_vld & st1_qmon_2entry_avail) | ~st1_atomic_vld) & pcx_rq_for_stb_tmp[1] ;
+assign	pcx_rq_for_stb[0] = ((st0_atomic_vld & st0_qmon_2entry_avail) | ~st0_atomic_vld) & pcx_rq_for_stb_tmp[0] ;
+
+//assign	st3_pcx_rq_sel_tmp = st3_pcx_rq_pick & st3_pcx_rq_vld & all_pcx_rq_pick[2] ;
+//assign	st2_pcx_rq_sel_tmp = st2_pcx_rq_pick & st2_pcx_rq_vld & all_pcx_rq_pick[2] ;
+//assign	st1_pcx_rq_sel_tmp = st1_pcx_rq_pick & st1_pcx_rq_vld & all_pcx_rq_pick[2] ;
+//assign	st0_pcx_rq_sel_tmp = st0_pcx_rq_pick & st0_pcx_rq_vld & all_pcx_rq_pick[2] ;
+
+//bug3506: set mask in the level1 pick in w3-cycle if picked by pcx
+//assign  st_events_final[3] = st3_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  st_events_final[2] = st2_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  st_events_final[1] = st1_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+//assign  st_events_final[0] = st0_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+
+
+wire	strm_pcx_rq_pick,fpop_pcx_rq_pick,intrpt_pcx_rq_pick,fwdpkt_pcx_rq_pick;
+//wire [3:0]  misc_events_final ;
+wire [3:0]  misc_events_raw ;
+
+//8/20/03: bug3506 fix is incomplete - vld may not be held until d2 cycle
+assign  misc_events_raw[0]   =  lsu_fwdpkt_vld_d1 | fwdpkt_pcx_rq_sel_d1 | fwdpkt_pcx_rq_sel_d2 ;
+//bug6807 - kill interrupt events raw when store buffer is not empty i.e. interrupt clear=0
+assign  misc_events_raw[1]   =  (intrpt_pkt_vld_unmasked & intrpt_clr_d1) | intrpt_pcx_rq_sel_d1 | intrpt_pcx_rq_sel_d2 ;
+assign  misc_events_raw[2]   =  fpop_pkt_vld_unmasked | fpop_pcx_rq_sel_d1 | fpop_pcx_rq_sel_d2 ;
+assign  misc_events_raw[3]   =  strm_pkt_vld_unmasked | strm_pcx_rq_sel_d1 | strm_pcx_rq_sel_d2 ;
+
+
+//bug4814 - change rrobin_picker1 to rrobin_picker2
+//lsu_rrobin_picker1 misc4_rrobin  (
+//    .events   	  ({strm_pcx_rq_vld,fpop_pcx_rq_vld,
+//		    intrpt_pcx_rq_vld,fwdpkt_rq_vld}),
+//    .events_raw   (misc_events_raw[3:0]),
+//    .pick_one_hot ({strm_pcx_rq_pick,fpop_pcx_rq_pick,
+//		    intrpt_pcx_rq_pick,fwdpkt_pcx_rq_pick}),
+//    .events_final (misc_events_final[3:0]),
+//    .rclk         (rclk),
+//    .grst_l       (grst_l),
+//    .arst_l       (arst_l),
+//    .si(),
+//    .se(se),
+//    .so()
+//  );
+
+lsu_rrobin_picker2 misc4_rrobin  (
+    .events   	  ({strm_pcx_rq_vld,fpop_pcx_rq_vld,intrpt_pcx_rq_vld,fwdpkt_rq_vld}),
+    .thread_force(misc_thrd_force_vld[3:0]),
+    .pick_one_hot ({strm_pcx_rq_pick,fpop_pcx_rq_pick,intrpt_pcx_rq_pick,fwdpkt_pcx_rq_pick}),
+
+    .events_picked({strm_pcx_rq_sel,fpop_pcx_rq_sel,intrpt_pcx_rq_sel,fwdpkt_pcx_rq_sel}),
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so()
+  );
+
+
+//timing fix: 05/20/03 - move mcycle_squash_d1 after pick instead of before pick
+//assign	strm_pcx_rq_sel = strm_pcx_rq_pick & strm_pcx_rq_vld & all_pcx_rq_pick[3] ;
+//assign	fpop_pcx_rq_sel = fpop_pcx_rq_pick & fpop_pcx_rq_vld & all_pcx_rq_pick[3] ;
+//assign	intrpt_pcx_rq_sel = intrpt_pcx_rq_pick & intrpt_pcx_rq_vld & all_pcx_rq_pick[3] ;
+//assign	fwdpkt_pcx_rq_sel = fwdpkt_pcx_rq_pick & fwdpkt_rq_vld & all_pcx_rq_pick[3] ;
+assign	strm_pcx_rq_sel = strm_pcx_rq_pick & strm_pcx_rq_vld & all_pcx_rq_pick[3] & ~mcycle_squash_d1 ;
+
+//11/15/03 - change fpop atomic to be same as store atomic (bug4513)
+//assign	fpop_pcx_rq_sel = fpop_pcx_rq_pick & fpop_pcx_rq_vld & all_pcx_rq_pick[3] & ~mcycle_squash_d1 ;
+wire    fpop_qmon_2entry_avail ;
+assign	fpop_pcx_rq_sel_tmp = fpop_pcx_rq_pick & fpop_pcx_rq_vld & all_pcx_rq_pick[3] & ~mcycle_squash_d1 ;
+assign	fpop_pcx_rq_sel = fpop_pcx_rq_sel_tmp & fpop_qmon_2entry_avail ;
+
+assign	intrpt_pcx_rq_sel = intrpt_pcx_rq_pick & intrpt_pcx_rq_vld & all_pcx_rq_pick[3] & ~mcycle_squash_d1 ;
+assign	fwdpkt_pcx_rq_sel = fwdpkt_pcx_rq_pick & fwdpkt_rq_vld & all_pcx_rq_pick[3] & ~mcycle_squash_d1 ;
+
+
+//bug3506: set mask in the level1 pick in w3-cycle if picked by pcx
+//assign  misc_events_final[3] = lsu_spu_ldst_ack ;
+//assign  misc_events_final[2] = lsu_tlu_pcxpkt_ack ;
+//assign  misc_events_final[1] = lsu_fwdpkt_pcx_rq_sel ;
+//assign  misc_events_final[0] = fpop_pcx_rq_sel_d2 & ~pcx_req_squash_d1 ;
+
+
+
+
+// LEVEL TWO - PICK AMONG CATEGORIES
+// In parallel with level one
+
+wire	ld_pcx_rq_all, st_pcx_rq_all, misc_pcx_rq_all ;
+assign	ld_pcx_rq_all = ld3_pcx_rq_vld | ld2_pcx_rq_vld | ld1_pcx_rq_vld | ld0_pcx_rq_vld ;	
+assign	st_pcx_rq_all = st3_pcx_rq_vld | st2_pcx_rq_vld | st1_pcx_rq_vld | st0_pcx_rq_vld ; 
+assign	misc_pcx_rq_all = strm_pcx_rq_vld | fpop_pcx_rq_vld | intrpt_pcx_rq_vld | fwdpkt_rq_vld ;
+
+//bug3506- raw valid used in resetting pick status
+//8/20/03: bug3506 fix is incomplete - vld may not be held until d2 cycle
+
+//wire all4_rrobin_en;
+//timing fix: 5/20/03 - pcx_rq_for_stb will be independent of ifu_lsu_pcxreq_d
+//assign all4_rrobin_en = ~(all_pcx_rq_pick[2] & ~pcx_rq_for_stb_en) ;
+//timing fix: 05/20/03 - move mycle_squash_d1 after pick instead of before pick
+//assign all4_rrobin_en = ~((all_pcx_rq_pick[2] & ~pcx_rq_for_stb_en) | imiss_pcx_rq_vld );
+//bug3348 - setting history moved from w-stage to w3-stage(1-cycle after spc_pcx_req_pq)
+//          and hence there are no cases to disable logging of history
+//assign all4_rrobin_en = ~((all_pcx_rq_pick[2] & ~pcx_rq_for_stb_en) | imiss_pcx_rq_vld | mcycle_squash_d1);
+//wire   spc_pcx_req_vld_pq1 ;
+//assign all4_rrobin_en =  spc_pcx_req_vld_pq1 ;
+ 
+//wire  [3:1]  all_pcx_rq_pick_no_iqual;
+wire  [3:0]  all_pcx_rq_pick_no_iqual;  
+//wire  [3:0]  all_pcx_pick_status_d2;  // bug 3348
+//wire  [3:0]  all_pick_status_rst_d2;    //bug 3506
+wire  [3:0]  all_pick_status_set;
+
+//bug3506: set pick status in the same cycle
+assign  all_pick_status_set[3]  =  |{ strm_pcx_rq_sel, intrpt_pcx_rq_sel,fpop_pcx_rq_sel, fwdpkt_pcx_rq_sel} ;
+assign  all_pick_status_set[2]  =  |pcx_rq_for_stb[3:0] ;
+assign  all_pick_status_set[1]  =  |{ld0_pcx_rq_sel,ld1_pcx_rq_sel,ld2_pcx_rq_sel,ld3_pcx_rq_sel} ;
+assign  all_pick_status_set[0]  =  1'b0 ;
+
+
+
+lsu_rrobin_picker2 all4_rrobin  (
+    .events   	  ({misc_pcx_rq_all,st_pcx_rq_all,ld_pcx_rq_all,1'b0}),
+    .thread_force(all_thrd_force_vld[3:0]),
+    .pick_one_hot (all_pcx_rq_pick_no_iqual[3:0]),
+
+    .events_picked(all_pick_status_set[3:0]),
+    //.en           (all4_rrobin_en),      // bug 3348
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so()
+  );
+
+
+// 5/22/03: cmp1_regr fail - qual all pick w/  ~mcycle_squash_d1; not doing this causes multi-hot select to
+//          pcx_pkt mux
+assign all_pcx_rq_pick[0]    =  imiss_pcx_rq_vld & ~mcycle_squash_d1;
+assign all_pcx_rq_pick[3:1]  =  all_pcx_rq_pick_no_iqual[3:1] & ~{3{imiss_pcx_rq_vld | mcycle_squash_d1}};
+
+wire   all_pcx_rq_dest_sel3 ;
+assign all_pcx_rq_dest_sel3  =  ~|all_pcx_rq_pick[2:0];
+
+//timing fix: 5/20/03 - pcx_rq_for_stb will be independent of ifu_lsu_pcxreq_d
+//assign	imiss_pcx_rq_sel = imiss_pcx_rq_vld & all_pcx_rq_pick[0] ;
+//timing fix: 05/20/03 - move mcycle_squash_d1 after pick instead of before pick
+//assign	imiss_pcx_rq_sel = imiss_pcx_rq_vld;
+assign	imiss_pcx_rq_sel = imiss_pcx_rq_vld & ~mcycle_squash_d1 ;
+
+//=================================================================================================
+
+// Select appr. load. Need a scheme which allows threads to
+// make fwd progress.
+/*assign  ld0_pcx_rq_sel = ld0_pcx_rq_vld ;
+assign  ld1_pcx_rq_sel = ld1_pcx_rq_vld & ~ld0_pcx_rq_vld ;
+assign  ld2_pcx_rq_sel = ld2_pcx_rq_vld & ~(ld0_pcx_rq_vld | ld1_pcx_rq_vld);
+assign  ld3_pcx_rq_sel = ld3_pcx_rq_vld & ~(ld0_pcx_rq_vld | ld1_pcx_rq_vld | ld2_pcx_rq_vld)   ; */
+
+dff_s #(4)  lrsel_stgd1 (
+        .din    ({ld0_pcx_rq_sel, ld1_pcx_rq_sel, ld2_pcx_rq_sel, ld3_pcx_rq_sel}),
+        .q      ({ld0_pcx_rq_sel_d1, ld1_pcx_rq_sel_d1, ld2_pcx_rq_sel_d1, ld3_pcx_rq_sel_d1}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//bug2705- kill pcx pick if spec vld kill is set
+assign  lsu_ld0_pcx_rq_sel_d1  =  ld0_pcx_rq_sel_d1 & ~lsu_ld0_spec_vld_kill_w2 ;
+assign  lsu_ld1_pcx_rq_sel_d1  =  ld1_pcx_rq_sel_d1 & ~lsu_ld1_spec_vld_kill_w2 ;
+assign  lsu_ld2_pcx_rq_sel_d1  =  ld2_pcx_rq_sel_d1 & ~lsu_ld2_spec_vld_kill_w2 ;
+assign  lsu_ld3_pcx_rq_sel_d1  =  ld3_pcx_rq_sel_d1 & ~lsu_ld3_spec_vld_kill_w2 ;
+
+
+dff_s #(4)  lrsel_stgd2 (
+        .din    ({lsu_ld0_pcx_rq_sel_d1, lsu_ld1_pcx_rq_sel_d1, lsu_ld2_pcx_rq_sel_d1, lsu_ld3_pcx_rq_sel_d1}),
+        .q      ({ld0_pcx_rq_sel_d2, ld1_pcx_rq_sel_d2, ld2_pcx_rq_sel_d2, ld3_pcx_rq_sel_d2}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+// Used to complete prefetch. Be careful ! ld could be squashed. Add pcx_req_squash.
+assign	lsu_ld_pcx_rq_sel_d2[3] = ld3_pcx_rq_sel_d2 ;
+assign	lsu_ld_pcx_rq_sel_d2[2] = ld2_pcx_rq_sel_d2 ;
+assign	lsu_ld_pcx_rq_sel_d2[1] = ld1_pcx_rq_sel_d2 ;
+assign	lsu_ld_pcx_rq_sel_d2[0] = ld0_pcx_rq_sel_d2 ;
+
+//bug2705- kill pcx pick if spec vld kill is set
+wire	ld_pcxpkt_vld ;
+assign  ld_pcxpkt_vld = 
+  lsu_ld0_pcx_rq_sel_d1 | lsu_ld1_pcx_rq_sel_d1 | lsu_ld2_pcx_rq_sel_d1 | lsu_ld3_pcx_rq_sel_d1 ;
+  //ld0_pcx_rq_sel_d1 | ld1_pcx_rq_sel_d1 | ld2_pcx_rq_sel_d1 | ld3_pcx_rq_sel_d1 ;
+
+dff_s #(1)  icindx_stgd1 (
+        .din    (ld_pcxpkt_vld), 
+	.q  	(lsu_ifu_ld_pcxpkt_vld),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+wire  [3:0]           ld_pcx_rq_sel ;	
+
+assign  ld_pcx_rq_sel[0] =  ld0_pcx_rq_sel_d1 | st0_atom_rq_d2 ;
+assign  ld_pcx_rq_sel[1] =  ld1_pcx_rq_sel_d1 | st1_atom_rq_d2 ;
+assign  ld_pcx_rq_sel[2] =  ld2_pcx_rq_sel_d1 | st2_atom_rq_d2 ;
+assign  ld_pcx_rq_sel[3] =  ld3_pcx_rq_sel_d1 | st3_atom_rq_d2 ;
+
+//11/7/03: add rst_tri_en
+assign  lsu_ld_pcx_rq_mxsel[2:0]  =    ld_pcx_rq_sel[2:0] & {3{~rst_tri_en}} ;
+assign  lsu_ld_pcx_rq_mxsel[3]    =  (~|ld_pcx_rq_sel[2:0]) | rst_tri_en ;
+
+assign ld_pcx_thrd[0] = ld_pcx_rq_sel[1] | ld_pcx_rq_sel[3] ;
+assign ld_pcx_thrd[1] = ld_pcx_rq_sel[2] | ld_pcx_rq_sel[3] ;
+   
+// Assume a simple priority based scheme for now.
+// This should not be prioritized at this point. 
+//assign st_pcx_rq_mhot_sel[0] = st0_pcx_rq_sel_tmp ;
+//assign st_pcx_rq_mhot_sel[1] = st1_pcx_rq_sel_tmp ;
+//assign st_pcx_rq_mhot_sel[2] = st2_pcx_rq_sel_tmp ;
+//assign st_pcx_rq_mhot_sel[3] = st3_pcx_rq_sel_tmp ;
+   
+/*assign st_pcx_rq_mhot_sel[0] = 
+  ~ld_pcx_rq_vld  & st0_pcx_rq_vld ;
+assign st_pcx_rq_mhot_sel[1] = 
+  ~ld_pcx_rq_vld  & st1_pcx_rq_vld ;
+assign st_pcx_rq_mhot_sel[2] = 
+  ~ld_pcx_rq_vld  & st2_pcx_rq_vld ;
+assign st_pcx_rq_mhot_sel[3] = 
+  ~ld_pcx_rq_vld  & st3_pcx_rq_vld ;*/
+
+   
+assign  st0_pcx_rq_sel = pcx_rq_for_stb[0] ;
+assign  st1_pcx_rq_sel = pcx_rq_for_stb[1] ;
+assign  st2_pcx_rq_sel = pcx_rq_for_stb[2] ;
+assign  st3_pcx_rq_sel = pcx_rq_for_stb[3] ;
+
+//assign  st_pcx_rq_vld =  (|pcx_rq_for_stb[3:0]);
+
+// Temporary.
+//assign  st0_pcx_rq_sel = stb_rd_for_pcx_sel[0] ;
+//assign  st1_pcx_rq_sel = stb_rd_for_pcx_sel[1] ;
+//assign  st2_pcx_rq_sel = stb_rd_for_pcx_sel[2] ;
+//assign  st3_pcx_rq_sel = stb_rd_for_pcx_sel[3] ;
+
+// This will be on a critical path. Massage !!!
+// Allows for speculative requests.
+//assign  st_pcx_rq_vld = 
+//    (st0_pcx_rq_sel & stb_rd_for_pcx_sel[0]) | 
+//    (st1_pcx_rq_sel & stb_rd_for_pcx_sel[1]) | 
+//    (st2_pcx_rq_sel & stb_rd_for_pcx_sel[2]) | 
+//    (st3_pcx_rq_sel & stb_rd_for_pcx_sel[3])  ;
+
+
+   
+/*assign imiss_pcx_rq_sel = 
+  imiss_pcx_rq_vld & ~(ld_pcx_rq_vld | st_pcx_rq_vld) ;
+assign strm_pcx_rq_sel = 
+  strm_pcx_rq_vld & ~(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_sel) ;
+assign fpop_pcx_rq_sel = 
+  fpop_pcx_rq_vld & ~(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_vld | strm_pcx_rq_vld) ;
+assign intrpt_pcx_rq_sel = 
+  intrpt_pcx_rq_vld & ~(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_vld | strm_pcx_rq_vld | fpop_pcx_rq_sel) ;
+assign fwdpkt_pcx_rq_sel = 
+  fwdpkt_rq_vld & ~(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_vld | strm_pcx_rq_vld | intrpt_pcx_rq_vld 
+                        | fpop_pcx_rq_sel) ; */
+
+
+//assign imiss_strm_pcx_rq_sel = imiss_pcx_rq_sel | strm_pcx_rq_sel ;
+
+// request was made with the queues full but not grant. 
+assign  pcx_req_squash = 
+  (|(spc_pcx_req_pq_buf2[4:0] & ~pre_qwr[4:0] & ~pcx_spc_grant_px[4:0])) ;
+//(|(spc_pcx_req_pq[4:0] & ~queue_write[4:0] & ~pcx_spc_grant_px[4:0])) ;
+//  (|lsu_error_rst[3:0]) | // dtag parity error requires two ld pkts
+//  (st_atom_rq_d1) ; // cas,stq - 2 pkt requests
+
+//bug:2877 - dtag parity error 2nd packet request;
+//wire	error_rst ;
+
+//assign	error_rst =
+//	(ld0_pcx_rq_sel_d1 & lsu_dtag_perror_w2[0]) |
+//	(ld1_pcx_rq_sel_d1 & lsu_dtag_perror_w2[1]) |
+//	(ld2_pcx_rq_sel_d1 & lsu_dtag_perror_w2[2]) |
+//	(ld3_pcx_rq_sel_d1 & lsu_dtag_perror_w2[3]) ;
+
+//wire	error_rst_d1 ;
+//dff #(1)  erst_stgd1 (
+//        .din    (error_rst), 
+//	.q  	(error_rst_d1),
+//        .clk  	(clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+wire   [3:0]  dtag_perr_pkt2_vld ;
+assign  dtag_perr_pkt2_vld[0] =  lsu_ld0_pcx_rq_sel_d1 & lsu_dtag_perror_w2[0];
+assign  dtag_perr_pkt2_vld[1] =  lsu_ld1_pcx_rq_sel_d1 & lsu_dtag_perror_w2[1];
+assign  dtag_perr_pkt2_vld[2] =  lsu_ld2_pcx_rq_sel_d1 & lsu_dtag_perror_w2[2];
+assign  dtag_perr_pkt2_vld[3] =  lsu_ld3_pcx_rq_sel_d1 & lsu_dtag_perror_w2[3];
+
+//bug:2877 - dtag parity error 2nd packet request; flop to sync w/ ld?_pcx_rq_sel_d2
+dff_s #(4) ff_dtag_perr_pkt2_vld_d1 (
+        .din    (dtag_perr_pkt2_vld[3:0]), 
+        .q      (dtag_perr_pkt2_vld_d1[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+
+//bug:2877 - dtag parity error 2nd packet request; error_rst can be removed from mcycle_mask_d1 since
+//           it does not behave like an atomic i.e. it is sent as 2 separate packets.
+assign	mcycle_squash_d1 =
+ // error_rst | // dtag parity error requires two ld pkts
+  //(|lsu_error_rst[3:0]) | // dtag parity error requires two ld pkts
+  spc_pcx_atom_pq_buf2 ;   // cas/fpop
+
+dff_s #(1)  sqsh_stgd1 (
+        .din    (pcx_req_squash), 
+	.q  	(pcx_req_squash_d1),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+dff_s #(1)  sqsh_stgd2 (
+        .din    (pcx_req_squash_d1), 
+	.q  	(pcx_req_squash_d2),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+//timing fix: 9/19/03 - split the lsu_pcx_req_squash to 4 signals to stb_ctl[0-3] to reduce loading
+assign  lsu_pcx_req_squash = pcx_req_squash & ~st_atom_rq_d1 ;
+assign  lsu_pcx_req_squash0 = lsu_pcx_req_squash ;
+assign  lsu_pcx_req_squash1 = lsu_pcx_req_squash ;
+assign  lsu_pcx_req_squash2 = lsu_pcx_req_squash ;
+assign  lsu_pcx_req_squash3 = lsu_pcx_req_squash ;
+
+assign  lsu_pcx_req_squash_d1 = pcx_req_squash_d1 ;
+
+dff_s #(5)  rsel_stgd1 (
+        //.din    ({imiss_strm_pcx_rq_sel,
+        .din    ({
+    imiss_pcx_rq_sel, strm_pcx_rq_sel, intrpt_pcx_rq_sel, fpop_pcx_rq_sel,
+    fwdpkt_pcx_rq_sel}), 
+        //.q      ({imiss_strm_pcx_rq_sel_d1,
+        .q      ({
+    imiss_pcx_rq_sel_d1, strm_pcx_rq_sel_d1, intrpt_pcx_rq_sel_d1,fpop_pcx_rq_sel_d1,
+    fwdpkt_pcx_rq_sel_d1}), 
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+assign  lsu_imiss_pcx_rq_sel_d1  =  imiss_pcx_rq_sel_d1;
+
+dff_s  imrqs_stgd2 (
+        .din    (imiss_pcx_rq_sel_d1), 
+	.q  	(imiss_pcx_rq_sel_d2),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+dff_s  fwdrqs_stgd2 (
+        .din    (fwdpkt_pcx_rq_sel_d1), 
+	.q  	(fwdpkt_pcx_rq_sel_d2),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+dff_s  fwdrqs_stgd3 (
+        .din    (fwdpkt_pcx_rq_sel_d2), 
+	.q  	(fwdpkt_pcx_rq_sel_d3),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+dff_s  fpop_stgd2 (
+        .din    (fpop_pcx_rq_sel_d1), .q  (fpop_pcx_rq_sel_d2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//bug4665: add sehold to pcx_pkt_src_sel[1]
+//wire	ld_pcx_rq_sel_d1,st_pcx_rq_sel_d1,misc_pcx_rq_sel_d1;
+wire	ld_pcx_rq_sel_d1,st_pcx_rq_sel_d1;
+wire    all_pcx_rq_pick_b2 ;
+assign  all_pcx_rq_pick_b2  =  sehold ? st_pcx_rq_sel_d1 : all_pcx_rq_pick[2] ;
+
+dff_s #(2)  pick_stgd1 (
+        .din    ({all_pcx_rq_pick_b2, all_pcx_rq_pick[1]}), 
+        .q      ({st_pcx_rq_sel_d1,ld_pcx_rq_sel_d1}),
+        //.din    ({all_pcx_rq_pick[3], all_pcx_rq_pick_b2, all_pcx_rq_pick[1]}), 
+        //.q      ({misc_pcx_rq_sel_d1,st_pcx_rq_sel_d1,ld_pcx_rq_sel_d1}),
+        //.din    (all_pcx_rq_pick[2:1]), .q      ({st_pcx_rq_sel_d1,ld_pcx_rq_sel_d1}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+// add other sources in such as interrupt and fpop.
+//bug:2877 - dtag parity error 2nd packet request; remove error_rst_d1 since dtag parity error does not
+//           behave as an atomic
+//assign pcx_pkt_src_sel[0] = ld_pcx_rq_sel_d1 | st_cas_rq_d2 | error_rst_d1 ;
+
+//11/7/03 - add rst_tri_en
+wire  [3:0]  pcx_pkt_src_sel_tmp ;
+assign pcx_pkt_src_sel_tmp[0] = ld_pcx_rq_sel_d1 | st_cas_rq_d2 ;
+assign pcx_pkt_src_sel_tmp[1] = st_pcx_rq_sel_d1 ;
+assign pcx_pkt_src_sel_tmp[2] = ~|{pcx_pkt_src_sel[3],pcx_pkt_src_sel[1:0]};
+                                //imiss_strm_pcx_rq_sel_d1  ;
+assign pcx_pkt_src_sel_tmp[3] = fpop_pcx_rq_sel_d1 | fpop_pcx_rq_sel_d2 |
+			        fwdpkt_pcx_rq_sel_d1 | intrpt_pcx_rq_sel_d1 ;
+
+//bug4888 -  change rst_tri_en to select b[1] instead of b[3]
+
+assign pcx_pkt_src_sel[3:2]  =  pcx_pkt_src_sel_tmp[3:2] & {2{~rst_tri_en}} ;
+assign pcx_pkt_src_sel[1]    =  pcx_pkt_src_sel_tmp[1] | rst_tri_en ;
+assign pcx_pkt_src_sel[0]    =  pcx_pkt_src_sel_tmp[0] & ~rst_tri_en ;
+
+//assign  dest_pkt_sel[0] = ld_pcx_rq_vld ;
+//assign  dest_pkt_sel[1] = st_pcx_rq_vld ;
+//assign  dest_pkt_sel[2] = ~(ld_pcx_rq_vld | st_pcx_rq_vld);
+
+//=================================================================================================
+//		SELECT DESTINATION
+//=================================================================================================
+
+// Select dest for load.
+mux4ds  #(5) ldsel_dest (
+  .in0  (ld0_l2bnk_dest[4:0]),
+  .in1  (ld1_l2bnk_dest[4:0]),
+  .in2  (ld2_l2bnk_dest[4:0]),
+  .in3  (ld3_l2bnk_dest[4:0]),
+  .sel0 (ld0_pcx_rq_pick),
+  .sel1 (ld1_pcx_rq_pick),
+  .sel2 (ld2_pcx_rq_pick),
+  .sel3 (ld3_pcx_rq_pick),
+  .dout (ld_pkt_dest[4:0])
+);
+
+// Select dest for store
+mux4ds  #(5) stsel_dest (
+  .in0  (st0_l2bnk_dest[4:0]),
+  .in1  (st1_l2bnk_dest[4:0]),
+  .in2  (st2_l2bnk_dest[4:0]),
+  .in3  (st3_l2bnk_dest[4:0]),
+  .sel0 (st0_pcx_rq_pick),
+  .sel1 (st1_pcx_rq_pick),
+  .sel2 (st2_pcx_rq_pick),
+  .sel3 (st3_pcx_rq_pick),
+  .dout (st_pkt_dest[4:0])
+);
+
+wire	[4:0]	misc_pkt_dest ;
+mux4ds  #(5) miscsel_dest (
+  .in0  (strm_l2bnk_dest[4:0]),
+  .in1  (fpop_l2bnk_dest[4:0]),
+  .in2  (intrpt_l2bnk_dest[4:0]),
+  .in3  (fwdpkt_dest_d1[4:0]),
+  .sel0 (strm_pcx_rq_pick),
+  .sel1 (fpop_pcx_rq_pick),
+  .sel2 (intrpt_pcx_rq_pick),
+  .sel3 (fwdpkt_pcx_rq_pick),
+  .dout (misc_pkt_dest[4:0])
+);
+
+// This is temporary until the req/ack path is restructured
+/*assign  imiss_strm_pkt_dest[4:0] =
+  imiss_pcx_rq_sel ? imiss_l2bnk_dest[4:0] :  
+    strm_pcx_rq_sel ? strm_l2bnk_dest[4:0] :
+      fpop_pcx_rq_sel ? fpop_l2bnk_dest[4:0] :
+        intrpt_pcx_rq_sel ? intrpt_l2bnk_dest[4:0] :
+                lsu_fwdpkt_dest[4:0] ;  */
+
+/*
+// This needs to be replaced with structural mux once rq/ack resolved.
+mux4ds  #(5) istrmsel_dest (
+  .in0  (imiss_l2bnk_dest[4:0]),
+  .in1  (strm_l2bnk_dest[4:0]),
+  .in2  (fpop_l2bnk_dest[4:0]),
+  .in3  (intrpt_l2bnk_dest[4:0]),
+  .sel0 (imiss_pcx_rq_sel),  
+  .sel1   (strm_pcx_rq_sel),
+  .sel2   (fpop_pcx_rq_sel),
+  .sel3   (intrpt_pcx_rq_sel),
+  .dout (imiss_strm_pkt_dest[4:0])
+);
+*/
+
+mux4ds  #(5) sel_final_dest (
+  .in0  (imiss_l2bnk_dest[4:0]),
+  .in1  (ld_pkt_dest[4:0]),
+  .in2  (st_pkt_dest[4:0]),
+  .in3  (misc_pkt_dest[4:0]),
+  .sel0 (all_pcx_rq_pick[0]),  
+  .sel1 (all_pcx_rq_pick[1]),
+  .sel2 (all_pcx_rq_pick[2]), 
+  .sel3 (all_pcx_rq_dest_sel3), 
+  //.sel3 (all_pcx_rq_pick[3]), 
+  .dout (current_pkt_dest[4:0])
+);
+
+/*mux3ds  #(5) sel_dest (
+  .in0  (ld_pkt_dest[4:0]),
+  .in1  (st_pkt_dest[4:0]),
+  .in2    (imiss_strm_pkt_dest[4:0]),
+  .sel0 (dest_pkt_sel[0]),  
+  .sel1   (dest_pkt_sel[1]),
+  .sel2   (dest_pkt_sel[2]), 
+  .dout (current_pkt_dest[4:0])
+);*/
+
+wire	pcx_rq_sel ;
+assign	pcx_rq_sel = 
+  ld0_pcx_rq_sel | ld1_pcx_rq_sel | ld2_pcx_rq_sel | ld3_pcx_rq_sel |
+  st0_pcx_rq_sel | st1_pcx_rq_sel | st2_pcx_rq_sel | st3_pcx_rq_sel |
+  imiss_pcx_rq_sel | strm_pcx_rq_sel | fpop_pcx_rq_sel | intrpt_pcx_rq_sel |
+  fwdpkt_pcx_rq_sel ;
+
+assign spc_pcx_req_g[4:0] = 
+  (current_pkt_dest[4:0] & {5{pcx_rq_sel}}) ;
+  //(current_pkt_dest[4:0] & 
+  //{5{(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_vld | strm_pcx_rq_vld | intrpt_pcx_rq_vld | fpop_atom_req | fwdpkt_rq_vld)}}) ;
+
+//timing fix: 9/19/03 - instantiate buffer for spc_pcx_req_pq
+wire  [4:0]  spc_pcx_req_pq_tmp ;
+dff_s #(5)  rq_stgpq (
+        .din    (spc_pcx_req_g[4:0]), .q  (spc_pcx_req_pq_tmp[4:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+bw_u1_buf_30x UZfix_spc_pcx_req_pq0_buf1 ( .a(spc_pcx_req_pq_tmp[0]), .z(spc_pcx_req_pq[0]) );
+bw_u1_buf_30x UZfix_spc_pcx_req_pq1_buf1 ( .a(spc_pcx_req_pq_tmp[1]), .z(spc_pcx_req_pq[1]) );
+bw_u1_buf_30x UZfix_spc_pcx_req_pq2_buf1 ( .a(spc_pcx_req_pq_tmp[2]), .z(spc_pcx_req_pq[2]) );
+bw_u1_buf_30x UZfix_spc_pcx_req_pq3_buf1 ( .a(spc_pcx_req_pq_tmp[3]), .z(spc_pcx_req_pq[3]) );
+bw_u1_buf_30x UZfix_spc_pcx_req_pq4_buf1 ( .a(spc_pcx_req_pq_tmp[4]), .z(spc_pcx_req_pq[4]) );
+
+bw_u1_buf_30x UZsize_spc_pcx_req_pq0_buf2 ( .a(spc_pcx_req_pq_tmp[0]), .z(spc_pcx_req_pq_buf2[0]) );
+bw_u1_buf_30x UZsize_spc_pcx_req_pq1_buf2 ( .a(spc_pcx_req_pq_tmp[1]), .z(spc_pcx_req_pq_buf2[1]) );
+bw_u1_buf_30x UZsize_spc_pcx_req_pq2_buf2 ( .a(spc_pcx_req_pq_tmp[2]), .z(spc_pcx_req_pq_buf2[2]) );
+bw_u1_buf_30x UZsize_spc_pcx_req_pq3_buf2 ( .a(spc_pcx_req_pq_tmp[3]), .z(spc_pcx_req_pq_buf2[3]) );
+bw_u1_buf_30x UZsize_spc_pcx_req_pq4_buf2 ( .a(spc_pcx_req_pq_tmp[4]), .z(spc_pcx_req_pq_buf2[4]) );
+
+//bug3348 - not needed
+//wire   spc_pcx_req_vld_pq ;
+//assign spc_pcx_req_vld_pq =  |spc_pcx_req_pq[4:0];
+//
+//dff #(1)  rq_stgpq1 (
+//        .din    (spc_pcx_req_vld_pq), .q  (spc_pcx_req_vld_pq1),
+//        .clk  (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+assign spc_pcx_req_update_g[4:0] = 
+        (st_atom_rq_d1 | fpop_atom_rq_pq) ?
+        spc_pcx_req_pq_buf2[4:0] :           // Recirculate same request if back to back case - stda, cas etc
+        (current_pkt_dest[4:0] & 
+        {5{pcx_rq_sel}}) ;
+        //{5{(ld_pcx_rq_vld | st_pcx_rq_vld | imiss_pcx_rq_vld | strm_pcx_rq_vld | intrpt_pcx_rq_vld | fpop_pcx_rq_vld | fwdpkt_rq_vld)}}) ;
+                                        // Standard request
+
+dff_s #(5)  urq_stgpq (
+        .din    (spc_pcx_req_update_g[4:0]), .q  (spc_pcx_req_update_w2[4:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//=================================================================================================
+//	2-CYCLE OP HANDLING
+//=================================================================================================
+
+// cas,fpop,dtag-error pkt. dtag-error pkt does not have to be b2b.
+// prevent starvation, ensure requests are b2b.
+// fpop can only request to fpu.(bit4) cas can only request to L2 (b3:0) 
+// ** error rst needs to be handled correctly.
+
+// ** This needs to be massaged for timing.
+// timing fix: 5/7/03 - delay the mask 1 cycle for stores.
+wire	[3:0]	mcycle_mask_qwr ;
+wire	[4:0]	mcycle_mask_qwr_d1 ;
+//assign	mcycle_mask_qwr[3:0] =
+//	({4{(stb0_rd_for_pcx & st0_atomic_vld)}} & st0_l2bnk_dest[3:0]) |
+//	({4{(stb1_rd_for_pcx & st1_atomic_vld)}} & st1_l2bnk_dest[3:0]) |
+//	({4{(stb2_rd_for_pcx & st2_atomic_vld)}} & st2_l2bnk_dest[3:0]) |
+//	({4{(stb3_rd_for_pcx & st3_atomic_vld)}} & st3_l2bnk_dest[3:0]) ;
+
+
+//bug4513- kill the atomic store pcx req in this cycle if only 1 entry is available -
+//         atomic packets have to be sent b2bto pcx.
+//
+// ex. thread0 to l2 bank0 atomic store - w/ only 1 bank0 entry available
+//---------------------------------------------------------------------------------
+//				1	2	3	4	5	6	7
+//---------------------------------------------------------------------------------
+// st0_atomic_vld-------------->1
+// pcx_rq_for_stb_tmp[0]------->1
+// pcx_rq_for_stb[0]----------->0                               1
+// st0_qmon_2entry_avail------->0                               1
+//---------------------------------------------------------------------------------
+// st0_atomic_pend------------->1                               0
+// st0_atomic_pend_d1------------------>1                               0
+// mcycle_mask_qwr_d1[0]--------------->1                               0
+//---------------------------------------------------------------------------------
+
+
+assign  st0_qmon_2entry_avail =  |(st0_l2bnk_dest[3:0] & sel_qentry0[3:0]) ;
+assign  st1_qmon_2entry_avail =  |(st1_l2bnk_dest[3:0] & sel_qentry0[3:0]) ;
+assign  st2_qmon_2entry_avail =  |(st2_l2bnk_dest[3:0] & sel_qentry0[3:0]) ;
+assign  st3_qmon_2entry_avail =  |(st3_l2bnk_dest[3:0] & sel_qentry0[3:0]) ;
+assign  fpop_qmon_2entry_avail =  fpop_l2bnk_dest[4] & sel_qentry0[4] ;
+
+
+//bug4513 - when atomic is picked, if 2 entries are not free, kill all requests until 2entries are free
+wire    st0_atomic_pend, st1_atomic_pend, st2_atomic_pend, st3_atomic_pend ;
+
+assign  st0_atomic_pend  =  (pcx_rq_for_stb_tmp[0] & st0_atomic_vld & ~st0_qmon_2entry_avail) |  //set
+                            (st0_atomic_pend_d1 & ~st0_qmon_2entry_avail) ;		     //recycle/reset
+
+assign  st1_atomic_pend  =  (pcx_rq_for_stb_tmp[1] & st1_atomic_vld & ~st1_qmon_2entry_avail) |  //set
+                            (st1_atomic_pend_d1 & ~st1_qmon_2entry_avail) ;		     //recycle/reset
+
+assign  st2_atomic_pend  =  (pcx_rq_for_stb_tmp[2] & st2_atomic_vld & ~st2_qmon_2entry_avail) |  //set
+                            (st2_atomic_pend_d1 & ~st2_qmon_2entry_avail) ;		     //recycle/reset
+
+assign  st3_atomic_pend  =  (pcx_rq_for_stb_tmp[3] & st3_atomic_vld & ~st3_qmon_2entry_avail) |  //set
+                            (st3_atomic_pend_d1 & ~st3_qmon_2entry_avail) ;		     //recycle/reset
+
+dff_s #(4)  ff_st0to3_atomic_pend_d1 (
+        .din ({st3_atomic_pend,st2_atomic_pend,st1_atomic_pend,st0_atomic_pend}),
+        .q   ({st3_atomic_pend_d1,st2_atomic_pend_d1,st1_atomic_pend_d1,st0_atomic_pend_d1}),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//bug4513 - kill all requests after atomic if 2 entries to the bank are not available
+assign	mcycle_mask_qwr[3:0] =
+	({4{st0_atomic_pend}} & st0_l2bnk_dest[3:0]) |
+	({4{st1_atomic_pend}} & st1_l2bnk_dest[3:0]) |
+	({4{st2_atomic_pend}} & st2_l2bnk_dest[3:0]) |
+	({4{st3_atomic_pend}} & st3_l2bnk_dest[3:0]) ;
+
+//11/15/03 - change fpop atomic to be same as store atomic (bug4513)
+//assign	mcycle_mask_qwr[4] = fpop_pkt_vld | fpop_pcx_rq_sel_d1 ;
+
+wire    fpop_atomic_pend, fpop_atomic_pend_d1 ;
+
+
+assign fpop_atomic_pend = (fpop_pcx_rq_sel_tmp & ~fpop_qmon_2entry_avail) |
+                          (fpop_atomic_pend_d1 & ~fpop_qmon_2entry_avail) ;
+
+assign  fpop_q_wr[4:0] = fpop_atomic_pend_d1 ? pre_qwr[4:0] : queue_write[4:0] ;
+
+dff_s #(1)  ff_fpop_atomic_pend_d1 (
+        .din (fpop_atomic_pend),
+        .q   (fpop_atomic_pend_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+dff_s #(5)  ff_mcycle_mask_qwr_b4to0 (
+        .din ({fpop_atomic_pend,mcycle_mask_qwr[3:0]}), 
+        .q   (mcycle_mask_qwr_d1[4:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+// 			PCX REQUEST GENERATION (END)
+//*************************************************************************************************
+
+//=================================================================================================
+//
+// CPX Packet Processing
+//
+//=================================================================================================
+
+
+// D-SIDE PROCESSING
+
+/*input  [3:0]   lsu_cpx_pkt_rqtype ;
+input          lsu_cpx_pkt_vld ;*/
+
+// non-cacheables are processed at the head of the dfq.
+// cpx_ld_type may not have to factor in strm load.
+
+//=================================================================================================
+//
+// PCX Queue Control
+//
+//=================================================================================================
+
+//timing fix: 5/7/03 - delay mask 1 cycle for stores
+//11/15/03 - change fpop atomic to be same as store atomic (bug4513)
+//assign	queue_write[4:0] = pre_qwr[4:0] & ~{mcycle_mask_qwr[4],mcycle_mask_qwr_d1[3:0]} ;
+assign	queue_write[4:0] = pre_qwr[4:0] & ~mcycle_mask_qwr_d1[4:0] ;
+
+//bug4513 - mcycle_mask_qwr will kill all requests other than stores. stores can be killed
+//          by fpop atomics
+//11/14/03- fox for bug4513 was incorrect ; st_queue_write[3:0] not needed 'cos st[0-3]_q_wr 
+//          has been changed to use st0_atomic_pend instead of st0_atomic_vld
+//assign	st_queue_write[4] = pre_qwr[4] & ~mcycle_mask_qwr[4] ;
+//assign	st_queue_write[3:0] = pre_qwr[3:0] ;
+
+//assign	queue_write[4:0] = pre_qwr[4:0] & ~mcycle_mask_qwr[4:0] ; // timing fix
+// assign	queue_write[4:0] = pre_qwr[4:0] ;
+
+// PCX Queue Control
+// - qctl tracks 2-input queue state for each of 6 destinations
+// through grant signals available from pcx.
+
+// L2 Bank0 Queue Monitor
+lsu_pcx_qmon  l2bank0_qmon (
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so(),
+  .send_by_pcx  (pcx_spc_grant_px[0]),
+  .send_to_pcx  (spc_pcx_req_update_w2[0]),
+  //.qwrite   (queue_write[0]),
+  .qwrite   (pre_qwr[0]),
+  .sel_qentry0  (sel_qentry0[0])
+);
+
+// L2 Bank1 Queue Monitor
+lsu_pcx_qmon  l2bank1_qmon (
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so(),
+  .send_by_pcx  (pcx_spc_grant_px[1]),
+  .send_to_pcx  (spc_pcx_req_update_w2[1]),
+  //.qwrite   (queue_write[1]),
+  .qwrite   (pre_qwr[1]),
+  .sel_qentry0  (sel_qentry0[1])
+);
+
+// L2 Bank2 Queue Monitor
+lsu_pcx_qmon  l2bank2_qmon (
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so(),
+  .send_by_pcx  (pcx_spc_grant_px[2]),
+  .send_to_pcx  (spc_pcx_req_update_w2[2]),
+  //.qwrite   (queue_write[2]),
+  .qwrite   (pre_qwr[2]),
+  .sel_qentry0  (sel_qentry0[2])
+);
+
+// L2 Bank3 Queue Monitor
+lsu_pcx_qmon  l2bank3_qmon (
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so(),
+  .send_by_pcx  (pcx_spc_grant_px[3]),
+  .send_to_pcx  (spc_pcx_req_update_w2[3]),
+  //.qwrite   (queue_write[3]),
+  .qwrite   (pre_qwr[3]),
+  .sel_qentry0  (sel_qentry0[3])
+);
+
+// FP/IO Bridge Queue Monitor
+lsu_pcx_qmon  fpiobridge_qmon (
+    .rclk         (rclk),
+    .grst_l       (grst_l),
+    .arst_l       (arst_l),
+    .si(),
+    .se(se),
+    .so(),
+  .send_by_pcx  (pcx_spc_grant_px[4]),
+  .send_to_pcx  (spc_pcx_req_update_w2[4]),
+  //.qwrite   (queue_write[4]),
+  .qwrite   (pre_qwr[4]),
+  .sel_qentry0  (sel_qentry0[4])
+);
+
+
+
+
+// 5/13/03: timing fix for lsu_dtag_perror_w2 thru st_pick
+wire  [3:0]  error_en;
+wire  [3:0]  error_rst_thrd;
+
+//assign  error_en[0] = lmq_enable[0] | (lsu_cpx_pkt_atm_st_cmplt & dcfill_active_e & dfq_byp_sel[0]);
+assign  error_en[0] = lsu_ld_inst_vld_g[0];
+assign  error_en[1] = lsu_ld_inst_vld_g[1];
+assign  error_en[2] = lsu_ld_inst_vld_g[2];
+assign  error_en[3] = lsu_ld_inst_vld_g[3];
+
+//assign  error_rst_thrd[0] = reset | (lsu_ld0_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst_thrd[1] = reset | (lsu_ld1_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst_thrd[2] = reset | (lsu_ld2_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst_thrd[3] = reset | (lsu_ld3_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+
+// reset moved to d2 'cos if 1st pkt is speculative and grant=0, error should not be reset.
+//bug4512 - stb_full_raw has to be qual w/ ld[0-3] inst_vld_w2
+//          also, need to qualify stb_full_raw w/ fp loads i.e. dont reset error if full raw is for fp double loads
+assign  error_rst_thrd[0] = reset | (ld0_pcx_rq_sel_d2 & ~pcx_req_squash_d1) 
+				| (ld0_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 & thread0_w2) ; // Bug4512
+				//| (ld_stb_full_raw_w2 & thread0_w2) ; // Bug 4361
+
+assign  error_rst_thrd[1] = reset | (ld1_pcx_rq_sel_d2 & ~pcx_req_squash_d1)
+				| (ld1_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 & thread1_w2) ;
+
+assign  error_rst_thrd[2] = reset | (ld2_pcx_rq_sel_d2 & ~pcx_req_squash_d1)
+				| (ld2_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 & thread2_w2) ;
+
+assign  error_rst_thrd[3] = reset | (ld3_pcx_rq_sel_d2 & ~pcx_req_squash_d1)
+				| (ld3_inst_vld_w2 & ld_stb_full_raw_w2 & ~dbl_force_l2access_w2 & thread3_w2) ;
+
+//assign  lsu_error_rst[3:0]  =  error_rst[3:0];
+
+wire	dtag_perror3,dtag_perror2,dtag_perror1,dtag_perror0;
+
+// Thread 0
+dffre_s  #(1) error_t0 (
+        .din    (lsu_dcache_tag_perror_g),
+        .q      (dtag_perror0),
+        .rst  (error_rst_thrd[0]), .en     (error_en[0]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 1
+dffre_s  #(1) error_t1 (
+        .din    (lsu_dcache_tag_perror_g),
+        .q      (dtag_perror1),
+        .rst  (error_rst_thrd[1]), .en     (error_en[1]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 2
+dffre_s  #(1) error_t2 (
+        .din    (lsu_dcache_tag_perror_g),
+        .q      (dtag_perror2),
+        .rst  (error_rst_thrd[2]), .en     (error_en[2]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 3
+dffre_s  #(1) error_t3 (
+        .din    (lsu_dcache_tag_perror_g),
+        .q      (dtag_perror3),
+        .rst  (error_rst_thrd[3]), .en     (error_en[3]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+assign	lsu_dtag_perror_w2[3] = dtag_perror3 ;
+assign	lsu_dtag_perror_w2[2] = dtag_perror2 ;
+assign	lsu_dtag_perror_w2[1] = dtag_perror1 ;
+assign	lsu_dtag_perror_w2[0] = dtag_perror0 ;
+
+// Determine if ld pkt requires correction due to dtag parity error.
+assign  lsu_pcx_ld_dtag_perror_w2 =
+  ld_pcx_rq_sel[0] ? dtag_perror0 :
+    ld_pcx_rq_sel[1] ? dtag_perror1 :
+      ld_pcx_rq_sel[2] ? dtag_perror2 : dtag_perror3 ;
+
+
+//=================================================================================================
+//
+// THREAD RETRY DETECTION (picker related logic)
+//
+//=================================================================================================
+
+//bug4814 - move pick_staus out of picker and reset pick status when all 12 valid requests have
+//          is picked and not squashed.
+
+assign  ld_thrd_pick_din[0]  =  ld_thrd_pick_status[0] | (ld0_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  ld_thrd_pick_din[1]  =  ld_thrd_pick_status[1] | (ld1_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  ld_thrd_pick_din[2]  =  ld_thrd_pick_status[2] | (ld2_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  ld_thrd_pick_din[3]  =  ld_thrd_pick_status[3] | (ld3_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+
+assign  ld_thrd_pick_rst  =  ~|(ld_events_raw[3:0] & ~ld_thrd_pick_din[3:0]) ;
+   
+assign  ld_thrd_pick_status_din[3:0] =  ld_thrd_pick_din[3:0] & ~{4{all_thrd_pick_rst}} ;
+//assign  ld_thrd_pick_status_din[3:0]  =  ld_thrd_pick_din[3:0] & ~{4{ld_thrd_pick_rst}} ;
+
+assign  st_thrd_pick_din[0] = st_thrd_pick_status[0] | (st0_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  st_thrd_pick_din[1] = st_thrd_pick_status[1] | (st1_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  st_thrd_pick_din[2] = st_thrd_pick_status[2] | (st2_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  st_thrd_pick_din[3] = st_thrd_pick_status[3] | (st3_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+
+assign  st_thrd_pick_rst  =  ~|(st_events_raw[3:0] & ~st_thrd_pick_din[3:0]) ;
+assign  st_thrd_pick_status_din[3:0]  =  st_thrd_pick_din[3:0] & ~{4{all_thrd_pick_rst}} ;
+//assign  st_thrd_pick_status_din[3:0]  =  st_thrd_pick_din[3:0] & ~{4{st_thrd_pick_rst}} ;
+
+assign  misc_thrd_pick_din[3] = misc_thrd_pick_status[3] | lsu_spu_ldst_ack ;
+assign  misc_thrd_pick_din[2] = misc_thrd_pick_status[2] | (fpop_pcx_rq_sel_d2 & ~pcx_req_squash_d1) ;
+assign  misc_thrd_pick_din[1] = misc_thrd_pick_status[1] | lsu_tlu_pcxpkt_ack ;
+assign  misc_thrd_pick_din[0] = misc_thrd_pick_status[0] | lsu_fwdpkt_pcx_rq_sel ;
+
+assign  misc_thrd_pick_rst  =  ~|(misc_events_raw[3:0] & ~misc_thrd_pick_din[3:0]) ;
+   
+assign  misc_thrd_pick_status_din[3:0]  =  misc_thrd_pick_din[3:0] & ~{4{all_thrd_pick_rst}} ;
+//assign  misc_thrd_pick_status_din[3:0]  =  misc_thrd_pick_din[3:0] & ~{4{misc_thrd_pick_rst}} ;
+
+assign  all_thrd_pick_rst  =  ld_thrd_pick_rst & st_thrd_pick_rst & misc_thrd_pick_rst ;
+
+
+dff_s    #(4) ff_ld_thrd_force(
+        .din    (ld_thrd_pick_status_din[3:0]),
+        .q      (ld_thrd_pick_status[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s    #(4) ff_st_thrd_force(
+        .din    (st_thrd_pick_status_din[3:0]),
+        .q      (st_thrd_pick_status[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s    #(4) ff_misc_thrd_force(
+        .din    (misc_thrd_pick_status_din[3:0]),
+        .q      (misc_thrd_pick_status[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign  ld_thrd_force_d1[3:0]  =  ~ld_thrd_pick_status[3:0] ;
+assign  st_thrd_force_d1[3:0]  =  ~st_thrd_pick_status[3:0] ;
+assign  misc_thrd_force_d1[3:0]  =  ~misc_thrd_pick_status[3:0] ;
+
+assign ld_thrd_force_vld[0]  =  ld_thrd_force_d1[0] & 
+                               ~(ld0_pcx_rq_sel_d1 | ld0_pcx_rq_sel_d2) ;
+
+assign ld_thrd_force_vld[1]  =  ld_thrd_force_d1[1] &
+                               ~(ld1_pcx_rq_sel_d1 | ld1_pcx_rq_sel_d2) ;
+
+assign ld_thrd_force_vld[2]  =  ld_thrd_force_d1[2] &
+                               ~(ld2_pcx_rq_sel_d1 | ld2_pcx_rq_sel_d2) ;
+
+assign ld_thrd_force_vld[3]  =  ld_thrd_force_d1[3] &
+                               ~(ld3_pcx_rq_sel_d1 | ld3_pcx_rq_sel_d2) ;
+
+
+// force valid to store picker if 1 entry is free and if it not picked in d1/d2
+assign st_thrd_force_vld[0]  =  st_thrd_force_d1[0] &
+                               ~(st0_pcx_rq_sel_d1 | st0_pcx_rq_sel_d2) ;
+
+assign st_thrd_force_vld[1]  =  st_thrd_force_d1[1] &
+                               ~(st1_pcx_rq_sel_d1 | st1_pcx_rq_sel_d2) ;
+
+assign st_thrd_force_vld[2]  =  st_thrd_force_d1[2] &
+                               ~(st2_pcx_rq_sel_d1 | st2_pcx_rq_sel_d2) ;
+
+assign st_thrd_force_vld[3]  =  st_thrd_force_d1[3] &
+                               ~(st3_pcx_rq_sel_d1 | st3_pcx_rq_sel_d2) ;
+
+
+
+// force valid to misc picker if 1 entry is free and if it is not picked in d1/d2
+assign misc_thrd_force_vld[0]  =  misc_thrd_force_d1[0] &
+                                 ~(fwdpkt_pcx_rq_sel_d1 | fwdpkt_pcx_rq_sel_d2) ;
+
+assign misc_thrd_force_vld[1]  =  misc_thrd_force_d1[1] &
+                                 ~(intrpt_pcx_rq_sel_d1 | intrpt_pcx_rq_sel_d2);
+
+assign misc_thrd_force_vld[2]  =  misc_thrd_force_d1[2] &
+                                 ~(fpop_pcx_rq_sel_d1 | fpop_pcx_rq_sel_d2) ;
+
+assign misc_thrd_force_vld[3]  =  misc_thrd_force_d1[3] &
+                                 ~(strm_pcx_rq_sel_d1 | strm_pcx_rq_sel_d2) ;
+
+//2nd level pick thread force - force only req are valid and l2bnk is free
+assign all_thrd_force_vld[0]  =  1'b0 ;
+
+assign all_thrd_force_vld[1]  =  
+                    |(ld_thrd_force_vld[3:0] & 
+                     {ld3_pcx_rq_vld,ld2_pcx_rq_vld,ld1_pcx_rq_vld,ld0_pcx_rq_vld}) ;
+
+assign all_thrd_force_vld[2]  =  
+                    |(st_thrd_force_vld[3:0] &
+                     {st3_pcx_rq_vld,st2_pcx_rq_vld,st1_pcx_rq_vld,st0_pcx_rq_vld}) ;
+
+assign all_thrd_force_vld[3]  =  
+                    |(misc_thrd_force_vld[3:0] &
+                     {strm_pcx_rq_vld,fpop_pcx_rq_vld,intrpt_pcx_rq_vld,fwdpkt_rq_vld}) ;
+
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_pcx_qmon.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_pcx_qmon.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_pcx_qmon.v	(revision 6)
@@ -0,0 +1,141 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_pcx_qmon.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Monitors queue state of pcx.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_pcx_qmon (/*AUTOARG*/
+   // Outputs
+   so, qwrite, sel_qentry0, 
+   // Inputs
+   rclk, grst_l, arst_l, si, se, send_by_pcx, send_to_pcx
+   ) ;                                          
+
+input           rclk ;
+input           grst_l;
+input           arst_l;
+input           si;
+input           se;
+output          so;
+
+input 	send_by_pcx ;		// PCX sends packet to dest.
+input 	send_to_pcx ;		// SKB sends packet to PCX.
+ 	
+output 	qwrite ;		// PCX queue is writable.
+output 	sel_qentry0 ;		// entry to be written.
+
+wire       clk;
+wire 	reset ,dbb_reset_l ;
+wire	entry0_rst, entry1_rst ;
+wire	entry0_en, entry1_en ;
+wire	entry0_din, entry1_din ;
+wire	entry0_full,entry1_full;
+
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+assign  reset =  ~dbb_reset_l;
+assign  clk = rclk;
+
+
+//======================================================================================
+//
+//	Queue Monitor 
+//
+//======================================================================================
+
+//
+//	Pipeline :
+//--------------------------------------------------------------------------------------
+//
+//	| req to pcx 	| payload to pcx| 		|		|
+//	| qfull=0	|   arb/grant=1 | 		|		|
+//	| qentry=1	| 		| 		|		|
+//	|		|	      	| 		|		|
+//	|		| req to pcx 	| payload to pcx| 		|
+//	|		| qfull=0	|   arb/grant=0	|		|
+//	|		| qentry=2	|		|		|
+//	|		|		| req to pcx 	| payload to pcx| 
+//	|		|		| qfull=0	|     arb/grant	|
+//
+//	
+
+
+// OPERATION :
+// Monitors state per 2 input queue of pcx for given processor.
+// - Implemented as FIFO.
+// - The queue is cleared on reset. 
+// - A packet sent from the core to pcx will set a bit in the 
+// corresponding logical queue entry.
+// - A packet sent from pcx to dest, will cause entry0 to be cleared.
+// Only entry0 need be cleared as entry1 will shift to entry0 on
+// a grant by the pcx.
+// - The queue will never overflow as a packet will never be sent 
+// from the skb to the pcx unless at least one queue entry is free.
+// Timing : May have to flop grant and then use it.
+
+assign entry0_rst = 	reset | 
+			(send_by_pcx & ~entry0_en) ; 		// pcx sends to dest.
+assign entry0_en  = 	( entry1_full & send_by_pcx)  	| 	// shift entry1 to entry0
+			(~(entry0_full & ~send_by_pcx) & send_to_pcx) ;		
+assign entry0_din = 	entry0_en ;
+
+// represents oldest packet.
+dffre_s  qstate_entry0 (
+        .din    (entry0_din), .q  (entry0_full),
+        .rst    (entry0_rst), .en (entry0_en), .clk (clk),
+        .se     (1'b0),       .si (), 	       .so ()
+        );
+
+assign entry1_rst =	reset | 
+			(send_by_pcx & ~entry1_en) ;
+assign entry1_en  = 	entry0_full & send_to_pcx 
+			& ~(send_by_pcx & ~entry1_full) ; // new packet to entry1
+assign entry1_din = 	entry1_en ;
+
+// represents youngest packet.
+dffre_s  qstate_entry1 (
+        .din    (entry1_din), .q  (entry1_full),
+        .rst 	(entry1_rst), .en (entry1_en), 	.clk (clk),
+        .se     (1'b0), .si     (), .so ()
+        );
+
+assign qwrite = ~entry1_full ; 
+		//(entry1_full & send_by_pcx) ;		// look at top of stack only.
+assign sel_qentry0 = 
+	(~entry0_full & ~send_to_pcx) ; 
+	//(~entry0_full | 
+	//(~entry1_full & entry0_full & send_by_pcx)) & ~send_to_pcx ;					
+									// select which entry to write.
+
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_stb_rwdp.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_stb_rwdp.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_stb_rwdp.v	(revision 6)
@@ -0,0 +1,377 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_stb_rwdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////
+/*
+//	Description:	Datapath for STB
+//				- Mainly for formatting stb data 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+`include "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_stb_rwdp (/*AUTOARG*/
+   // Outputs
+   so, stb_rdata_ramd_buf, stb_rdata_ramd_b74_buf, lsu_stb_st_data_g, 
+   // Inputs
+   rclk, si, se, rst_tri_en, exu_lsu_rs3_data_e, 
+   lsu_stb_data_early_sel_e, lsu_stb_data_final_sel_m, 
+   exu_lsu_rs2_data_e, lsu_st_sz_bhww_m, lsu_st_sz_dw_m, 
+   lsu_st_sz_bhw_m, lsu_st_sz_wdw_m, lsu_st_sz_b_m, lsu_st_sz_w_m, 
+   lsu_st_sz_hw_m, lsu_st_sz_hww_m, ffu_lsu_data, lsu_st_hw_le_g, 
+   lsu_st_w_or_dbl_le_g, lsu_st_x_le_g, lsu_swap_sel_default_g, 
+   lsu_swap_sel_default_byte_7_2_g, stb_rdata_ramd, 
+   stb_rdata_ramd_b74
+   ) ;	
+
+   input  rclk ;
+   input  si;
+   output so;
+   input  se;
+   input  rst_tri_en;
+   
+input   [63:0]          exu_lsu_rs3_data_e ;    // data for store.
+input	[3:0]		lsu_stb_data_early_sel_e ;// early source of data for stb
+input			lsu_stb_data_final_sel_m ;// early source of data for stb
+input   [63:0]          exu_lsu_rs2_data_e ;    // rs2 data for cas.
+input			lsu_st_sz_bhww_m ;	// byte or hword or word
+input			lsu_st_sz_dw_m ;	// double word
+input			lsu_st_sz_bhw_m ;	// byte or hword
+input			lsu_st_sz_wdw_m ;	// word or dword
+input			lsu_st_sz_b_m ;		// byte
+input			lsu_st_sz_w_m ;		// word
+input			lsu_st_sz_hw_m ;	// hword
+input			lsu_st_sz_hww_m ;	// hword or word
+input	[63:0]		ffu_lsu_data ;	// fp store data - m stage
+//input			lsu_bendian_access_g ;	// bendian st
+//input			lsu_stdbl_inst_m ;	// stdbl
+
+   input        lsu_st_hw_le_g;
+   input        lsu_st_w_or_dbl_le_g;
+   input        lsu_st_x_le_g;
+   input        lsu_swap_sel_default_g;
+   input        lsu_swap_sel_default_byte_7_2_g;
+   
+   input [69:0] stb_rdata_ramd;
+   input        stb_rdata_ramd_b74;
+   
+   output [69:0] stb_rdata_ramd_buf;
+   output        stb_rdata_ramd_b74_buf;
+   
+output	[63:0]		lsu_stb_st_data_g ;	// data to be written to stb
+
+wire	[7:0]	byte0, byte1, byte2, byte3 ;
+wire	[7:0]	byte4, byte5, byte6, byte7 ;
+wire	[7:0]	swap_byte0, swap_byte1, swap_byte2, swap_byte3 ;
+wire	[7:0]	swap_byte4, swap_byte5, swap_byte6, swap_byte7 ;
+
+wire	[63:0]	stb_st_data_g ;
+wire	[63:0]	stb_st_data_early_e ;
+wire	[63:0]	stb_st_data_early_m ;
+wire	[63:0]	stb_st_data_final_m ;
+wire		st_sz_bhww_g ;
+wire		st_sz_dw_g ;
+wire		st_sz_bhw_g ;
+wire		st_sz_wdw_g ;
+wire		st_sz_b_g ;
+wire		st_sz_w_g ;
+wire		st_sz_hw_g ;
+wire		st_sz_hww_g ;
+//wire		bendian ;
+//wire		stdbl_g ;
+
+   wire clk;
+   assign clk = rclk;
+   
+//assign  stb_st_data_early_e[63:0] =       //@@ bw_u1_muxi41d_2x   
+//        lsu_stb_data_early_sel_e[0] ? 64'hffff_ffff_ffff_ffff :            		// ldstub writes all ones
+//                lsu_stb_data_early_sel_e[1] ? exu_lsu_rs2_data_e[63:0] :        	// cas pkt1 uses rs2
+//                	lsu_stb_data_early_sel_e[2] ? exu_lsu_rs3_data_e[63:0] :   	// use rs3/rd data.
+//                		lsu_stb_data_early_sel_e[3] ? {exu_lsu_rs2_data_e[31:0],exu_lsu_rs3_data_e[31:0]} :  
+											// else std non-alt
+//						64'hxxxx_xxxx_xxxx_xxxx ;				
+
+mux4ds #(64) stb_st_data_early_e_mx (
+ .in0 (64'hffff_ffff_ffff_ffff), 
+ .in1 (exu_lsu_rs2_data_e[63:0]),
+ .in2 (exu_lsu_rs3_data_e[63:0]),
+ .in3 ({exu_lsu_rs2_data_e[31:0],exu_lsu_rs3_data_e[31:0]}),
+ .sel0(lsu_stb_data_early_sel_e[0]),
+ .sel1(lsu_stb_data_early_sel_e[1]),
+ .sel2(lsu_stb_data_early_sel_e[2]),
+ .sel3(lsu_stb_data_early_sel_e[3]),
+ .dout(stb_st_data_early_e[63:0]));
+                                    
+
+// Stage early data to m
+dff_s #(64)  stgm_rs2     (             //@@ bw_u1_soffi_2x
+        .din            (stb_st_data_early_e[63:0]),
+        .q              (stb_st_data_early_m[63:0]),
+        .clk            (clk),
+        .se             (se), .si     (), .so ()
+        );
+
+assign  stb_st_data_final_m[63:0] =    //@@ bw_u1_muxi21_2x
+        lsu_stb_data_final_sel_m ? stb_st_data_early_m[63:0] : ffu_lsu_data[63:0] ; 	// mux in fpst data
+
+// Precursor of data to be stored in stb
+// For ldstub, all one's need to be written to stb.
+// For cas/swap, data remains unmodified.
+// Stage final data to g
+dff_s #(64)  stgg_rs2     (             //@@ bw_u1_soffi_2x
+        .din            (stb_st_data_final_m[63:0]),
+        .q              (stb_st_data_g[63:0]),
+        .clk            (clk),
+        .se             (se), .si     (), .so ()
+        );
+
+dff_s #(8)  stgm_sel     (             //@@ bw_u1_soff_8x
+	.din		({lsu_st_sz_bhww_m,lsu_st_sz_dw_m,lsu_st_sz_bhw_m,lsu_st_sz_wdw_m,
+			lsu_st_sz_b_m,lsu_st_sz_w_m,lsu_st_sz_hw_m,lsu_st_sz_hww_m}),
+	.q		({st_sz_bhww_g,st_sz_dw_g,st_sz_bhw_g,st_sz_wdw_g,
+			st_sz_b_g,st_sz_w_g,st_sz_hw_g,st_sz_hww_g}),
+        .clk            (clk),
+        .se             (se), .si     (), .so ()
+        );
+
+// Now format data for st data.
+assign	byte0[7:0] = stb_st_data_g[7:0] ; //@@ PASS
+assign	byte1[7:0] = stb_st_data_g[15:8] ; //@@ PASS
+assign	byte2[7:0] = stb_st_data_g[23:16] ; //@@ PASS
+assign	byte3[7:0] = stb_st_data_g[31:24] ; //@@ PASS
+assign	byte4[7:0] = stb_st_data_g[39:32] ; //@@ PASS
+assign	byte5[7:0] = stb_st_data_g[47:40] ; //@@ PASS
+assign	byte6[7:0] = stb_st_data_g[55:48] ; //@@ PASS
+assign	byte7[7:0] = stb_st_data_g[63:56] ; //@@ PASS
+
+
+//assign	bendian = lsu_bendian_access_g ;	// bendian store
+
+// Control needs to move to lsu_stb_rwctl once this is fully tested.
+
+// First do swap for big-endian vs little-endian case.
+
+//wire	swap_sel_default ;
+
+//assign	swap_sel_default = bendian | (~bendian & st_sz_b_g) ;
+ 
+// swap byte0
+//assign	swap_byte0[7:0] =               //@@ bw_u1_muxi41d_4x
+//	lsu_swap_sel_default_g ? byte0[7:0] : 
+//		lsu_st_hw_le_g ? byte1[7:0] :
+//			lsu_st_w_or_dbl_le_g ? byte3[7:0] :
+//				lsu_st_x_le_g ? byte7[7:0] : 8'bxxxx_xxxx ; 
+
+mux4ds #(8) swap_byte0_mx (
+  .in0 (byte0[7:0]), .sel0(lsu_swap_sel_default_g),
+  .in1 (byte1[7:0]), .sel1(lsu_st_hw_le_g),
+  .in2 (byte3[7:0]), .sel2(lsu_st_w_or_dbl_le_g),
+  .in3 (byte7[7:0]), .sel3(lsu_st_x_le_g),
+  .dout(swap_byte0[7:0]));
+                         
+// swap byte1
+//assign	swap_byte1[7:0] =               //@@ bw_u1_muxi41d_4x
+//	lsu_swap_sel_default_g ? byte1[7:0] : 
+//		lsu_st_hw_le_g ? byte0[7:0] :	
+//			 lsu_st_w_or_dbl_le_g ? byte2[7:0] :
+//				 lsu_st_x_le_g ? byte6[7:0] : 8'bxxxx_xxxx ; 
+
+mux4ds #(8) swap_byte1_mx (
+ .in0 (byte1[7:0]), .sel0(lsu_swap_sel_default_g),
+ .in1 (byte0[7:0]), .sel1(lsu_st_hw_le_g),
+ .in2 (byte2[7:0]), .sel2(lsu_st_w_or_dbl_le_g),
+ .in3 (byte6[7:0]), .sel3(lsu_st_x_le_g),
+ .dout (swap_byte1[7:0]));
+    
+// swap byte2
+//assign	swap_byte2[7:0] =                //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte2[7:0] : 
+//		lsu_st_w_or_dbl_le_g ? byte1[7:0] :
+//			lsu_st_x_le_g ? byte5[7:0] : 8'bxxxx_xxxx ; 
+   
+mux3ds #(8) swap_byte2_mx (
+  .in0 (byte2[7:0]), .sel0(lsu_swap_sel_default_byte_7_2_g),
+  .in1 (byte1[7:0]), .sel1(lsu_st_w_or_dbl_le_g),
+  .in2 (byte5[7:0]), .sel2(lsu_st_x_le_g),
+  .dout (swap_byte2[7:0]));
+      
+// swap byte3
+//assign	swap_byte3[7:0] =                 //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte3[7:0] : 
+//		lsu_st_w_or_dbl_le_g ? byte0[7:0] :
+//			lsu_st_x_le_g ? byte4[7:0] : 8'bxxxx_xxxx ; 
+
+mux3ds #(8) swap_byte3_mx (
+ .in0 (byte3[7:0]), .sel0(lsu_swap_sel_default_byte_7_2_g),
+ .in1 (byte0[7:0]), .sel1(lsu_st_w_or_dbl_le_g),
+ .in2 (byte4[7:0]), .sel2(lsu_st_x_le_g),
+ .dout(swap_byte3[7:0]));
+                          
+// swap byte4
+//assign	swap_byte4[7:0] =                 //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte4[7:0] : 
+//		 lsu_st_w_or_dbl_le_g ? byte7[7:0] :
+//			 lsu_st_x_le_g ? byte3[7:0] : 8'bxxxx_xxxx ; 
+
+mux3ds #(8) swap_byte4_mx (
+.in0 (byte4[7:0]), .sel0(lsu_swap_sel_default_byte_7_2_g),
+.in1 (byte7[7:0]), .sel1(lsu_st_w_or_dbl_le_g),
+.in2 (byte3[7:0]), .sel2(lsu_st_x_le_g),
+.dout(swap_byte4[7:0]));
+  
+// swap byte5
+//assign	swap_byte5[7:0] =                 //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte5[7:0] : 
+//		 lsu_st_w_or_dbl_le_g ? byte6[7:0] :
+//			  lsu_st_x_le_g ? byte2[7:0] : 8'bxxxx_xxxx ; 
+
+mux3ds #(8) swap_byte5_mx (
+ .in0 (byte5[7:0]), .sel0(lsu_swap_sel_default_byte_7_2_g),
+ .in1 (byte6[7:0]), .sel1(lsu_st_w_or_dbl_le_g),
+ .in2 (byte2[7:0]), .sel2(lsu_st_x_le_g),
+ .dout(swap_byte5[7:0]));
+ 
+// swap byte6
+//assign	swap_byte6[7:0] =                 //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte6[7:0] : 
+//		 lsu_st_w_or_dbl_le_g ? byte5[7:0] :
+//			  lsu_st_x_le_g ? byte1[7:0] : 8'bxxxx_xxxx ; 
+
+mux3ds #(8) swap_byte6_mx (
+ .in0 (byte6[7:0]), .sel0 (lsu_swap_sel_default_byte_7_2_g),
+ .in1 (byte5[7:0]), .sel1 (lsu_st_w_or_dbl_le_g),
+ .in2 (byte1[7:0]), .sel2 (lsu_st_x_le_g),
+ .dout(swap_byte6[7:0]));
+  
+// swap byte7
+//assign	swap_byte7[7:0] =                 //@@ bw_u1_muxi31d_4x
+//	lsu_swap_sel_default_g ? byte7[7:0] : 
+//		 lsu_st_w_or_dbl_le_g ? byte4[7:0] :
+//		    lsu_st_x_le_g ? byte0[7:0] : 8'bxxxx_xxxx ; 
+
+mux3ds #(8) swap_byte7_mx (
+ .in0 (byte7[7:0]), .sel0 (lsu_swap_sel_default_byte_7_2_g),
+ .in1 (byte4[7:0]), .sel1 (lsu_st_w_or_dbl_le_g),
+ .in2 (byte0[7:0]), .sel2 (lsu_st_x_le_g),
+ .dout (swap_byte7[7:0]));
+   
+// Now replicate date across 8 bytes.
+
+// replicated byte0
+assign	lsu_stb_st_data_g[7:0] = swap_byte0[7:0] ;	// all data sizes //@@ bw_u1_inv_8x
+
+// replicated byte1
+assign	lsu_stb_st_data_g[15:8] =                 //@@ bw_u1_muxi21_6x
+		st_sz_b_g ? swap_byte0[7:0] : swap_byte1[7:0] ;
+
+// replicated byte2
+assign	lsu_stb_st_data_g[23:16] =                //@@ bw_u1_muxi21_6x
+		st_sz_bhw_g ? swap_byte0[7:0] : swap_byte2[7:0] ;
+
+// replicated byte3
+//assign	lsu_stb_st_data_g[31:24] =                 //@@ bw_u1_muxi31d_6x
+//		st_sz_b_g ? swap_byte0 :			// swap_byte
+//			st_sz_hw_g ? swap_byte1 :	// hword
+//				st_sz_wdw_g ? swap_byte3 : // dword or word
+//					8'bxxxx_xxxx ;
+
+   wire st_sz_b_g_sel, st_sz_hw_g_sel, st_sz_wdw_g_sel;
+   assign st_sz_b_g_sel = st_sz_b_g & ~rst_tri_en;
+   assign st_sz_hw_g_sel = st_sz_hw_g & ~rst_tri_en;
+   assign st_sz_wdw_g_sel = st_sz_wdw_g | rst_tri_en;
+   
+mux3ds #(8) rpl_byte3_mx (
+  .in0 (swap_byte0[7:0]), .sel0 (st_sz_b_g_sel),
+  .in1 (swap_byte1[7:0]), .sel1 (st_sz_hw_g_sel),
+  .in2 (swap_byte3[7:0]), .sel2 (st_sz_wdw_g_sel),
+  .dout (lsu_stb_st_data_g[31:24]));
+                             
+// replicated byte4
+assign	lsu_stb_st_data_g[39:32] =                 //@@ bw_u1_muxi21_6x
+	st_sz_bhww_g ? swap_byte0[7:0] : swap_byte4[7:0] ;	// dword
+
+
+// replicated byte5
+//assign	lsu_stb_st_data_g[47:40] =                 //@@ bw_u1_muxi31d_6x
+//		st_sz_b_g ? swap_byte0 :			// swap_byte 
+//			st_sz_hww_g ? swap_byte1 :	// hword or word
+//				st_sz_dw_g ? swap_byte5 : // dword
+//					8'bxxxx_xxxx ;
+
+    wire  st_sz_hww_g_sel, st_sz_dw_g_sel;
+   assign st_sz_hww_g_sel = st_sz_hww_g & ~rst_tri_en;
+   assign st_sz_dw_g_sel = st_sz_dw_g | rst_tri_en;
+  
+mux3ds #(8) rpl_byte5_mx (
+  .in0 (swap_byte0[7:0]), .sel0(st_sz_b_g_sel),
+  .in1 (swap_byte1[7:0]), .sel1(st_sz_hww_g_sel),
+  .in2 (swap_byte5[7:0]), .sel2(st_sz_dw_g_sel),
+  .dout(lsu_stb_st_data_g[47:40]));
+                           
+// replicated byte6
+//assign	lsu_stb_st_data_g[55:48] =                 //@@ bw_u1_muxi31d_6x
+//		st_sz_bhw_g ? swap_byte0 :		// swap_byte or hword
+//			st_sz_w_g ? swap_byte2 :		// word
+//				st_sz_wdw_g ? swap_byte6 : // dword
+//					8'bxxxx_xxxx ;
+
+   wire   st_sz_bhw_g_sel, st_sz_w_g_sel;
+   assign st_sz_bhw_g_sel = st_sz_bhw_g & ~rst_tri_en;
+   assign st_sz_w_g_sel = st_sz_w_g & ~rst_tri_en;
+   
+  
+mux3ds #(8) rpl_byte6_mx (
+  .in0 (swap_byte0[7:0]),
+  .in1 (swap_byte2[7:0]),
+  .in2 (swap_byte6[7:0]),
+  .sel0(st_sz_bhw_g_sel),
+  .sel1(st_sz_w_g_sel),
+  .sel2(st_sz_dw_g_sel),
+  .dout(lsu_stb_st_data_g[55:48]));
+ 
+// replicated byte7
+//assign	lsu_stb_st_data_g[63:56] =                //@@ bw_u1_muxi41d_6x
+//		st_sz_b_g ? swap_byte0 :			// swap_byte
+//			st_sz_hw_g ? swap_byte1 :	// hword
+//				st_sz_w_g ? swap_byte3 :	// word
+//					st_sz_dw_g ? swap_byte7 : // dword
+//						8'bxxxx_xxxx ;
+
+mux4ds #(8) rpl_byte7_mx (
+  .in0(swap_byte0[7:0]), .sel0(st_sz_b_g_sel),
+  .in1(swap_byte1[7:0]), .sel1(st_sz_hw_g_sel),
+  .in2(swap_byte3[7:0]), .sel2(st_sz_w_g_sel),
+  .in3(swap_byte7[7:0]), .sel3(st_sz_dw_g_sel),
+  .dout (lsu_stb_st_data_g[63:56]));
+    
+//=========================================================
+//stb rdata buffer
+   assign stb_rdata_ramd_buf[69:0] = stb_rdata_ramd[69:0];
+   assign stb_rdata_ramd_b74_buf = stb_rdata_ramd_b74;
+      
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_qctl2.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_qctl2.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_qctl2.v	(revision 6)
@@ -0,0 +1,2273 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_qctl2.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+/////////////////////////////////////////////////////////////////////
+/*
+//  Description:  LSU Queue Control for Sparc Core  
+//      - includes monitoring for pcx queues
+//      - control for lsu datapath
+//      - rd/wr control of dfq 
+//
+*/
+////////////////////////////////////////////////////////////////////////
+// header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+                  // time scale definition
+`include  "iop.h" 
+
+`include  "lsu.h" 
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module lsu_qctl2 ( /*AUTOARG*/
+   // Outputs
+   so, lsu_fwd_rply_sz1_unc, lsu_dcache_iob_rd_w, ldd_in_dfq_out, 
+   lsu_dfq_rd_vld_d1, dfq_byp_ff_en, lsu_dfill_data_sel_hi, 
+   lsu_ifill_pkt_vld, cpx_fwd_pkt_en_cx, lsu_cpxpkt_type_dcd_cx, 
+   lsu_cpu_dcd_sel, lsu_cpu_uhlf_sel, lsu_iobrdge_rply_data_sel, 
+   lsu_iobrdge_fwd_pkt_vld, lsu_tlu_cpx_vld, lsu_tlu_cpx_req, 
+   lsu_tlu_intpkt, ld_sec_active, dfq_byp_sel, 
+   lsu_cpx_ld_dtag_perror_e, lsu_cpx_ld_dcache_perror_e, 
+   lsu_exu_rd_m, lsu_spu_strm_ack_cmplt, lsu_atm_st_cmplt_e, 
+   dva_svld_e, dfq_wptr_vld, dfq_wptr, lsu_dfq_flsh_cmplt, 
+   dfq_rptr_vld, dfq_rptr, lsu_ifu_stallreq, dva_snp_addr_e, 
+   lsu_st_ack_dq_stb, lsu_cpx_rmo_st_ack, lsu_st_wr_dcache, 
+   cpx_st_ack_tid0, cpx_st_ack_tid1, cpx_st_ack_tid2, 
+   cpx_st_ack_tid3, lsu_tlu_l2_dmiss, lsu_l2fill_vld, 
+   lsu_byp_ldd_oddrd_m, lsu_pcx_fwd_reply, lsu_fwdpkt_vld, 
+   lsu_dcfill_active_e, lsu_dfq_ld_vld, lsu_fldd_vld_en, 
+   lsu_dfill_dcd_thrd, lsu_fwdpkt_dest, dva_snp_bit_wr_en_e, 
+   lsu_cpx_spc_inv_vld, lsu_cpx_thrdid, lsu_cpx_stack_dcfill_vld, 
+   lsu_dfq_vld_entry_w, lsu_cpx_stack_icfill_vld, lsu_dfq_st_vld, 
+   lsu_dfq_ldst_vld, lsu_qdp2_dfq_ld_vld, lsu_qdp2_dfq_st_vld, 
+   lsu_cpx_stack_dcfill_vld_b130, lsu_dfq_vld, lsu_dfq_byp_ff_en, 
+   // Inputs
+   rclk, grst_l, arst_l, si, se, rst_tri_en, ld_inst_vld_e, 
+   ifu_pcx_pkt_b51, ifu_pcx_pkt_b41t40, ifu_pcx_pkt_b10t5, 
+   lsu_dfq_rdata_flush_bit, lsu_dfq_rdata_b17_b0, 
+   cpx_spc_data_cx_b144to140, cpx_spc_data_cx_b138, 
+   cpx_spc_data_cx_b135to134, 
+   cpx_spc_data_cx_b133, cpx_spc_data_cx_b130, cpx_spc_data_cx_b129, 
+   cpx_spc_data_cx_b128, cpx_spc_data_cx_b125, 
+   cpx_spc_data_cx_b124to123, cpx_spc_data_cx_b120to118, 
+   cpx_spc_data_cx_b71to70, cpx_spc_data_cx_b0, cpx_spc_data_cx_b4, 
+   cpx_spc_data_cx_b8, cpx_spc_data_cx_b12, cpx_spc_data_cx_b16, 
+   cpx_spc_data_cx_b20, cpx_spc_data_cx_b24, cpx_spc_data_cx_b28, 
+   cpx_spc_data_cx_b32, cpx_spc_data_cx_b35, cpx_spc_data_cx_b38, 
+   cpx_spc_data_cx_b41, cpx_spc_data_cx_b44, cpx_spc_data_cx_b47, 
+   cpx_spc_data_cx_b50, cpx_spc_data_cx_b53, cpx_spc_data_cx_b56, 
+   cpx_spc_data_cx_b60, cpx_spc_data_cx_b64, cpx_spc_data_cx_b68, 
+   cpx_spc_data_cx_b72, cpx_spc_data_cx_b76, cpx_spc_data_cx_b80, 
+   cpx_spc_data_cx_b84, cpx_spc_data_cx_b88, cpx_spc_data_cx_b91, 
+   cpx_spc_data_cx_b94, cpx_spc_data_cx_b97, cpx_spc_data_cx_b100, 
+   cpx_spc_data_cx_b103, cpx_spc_data_cx_b106, cpx_spc_data_cx_b109, 
+   cpx_spc_data_cx_b1, cpx_spc_data_cx_b5, cpx_spc_data_cx_b9, 
+   cpx_spc_data_cx_b13, cpx_spc_data_cx_b17, cpx_spc_data_cx_b21, 
+   cpx_spc_data_cx_b25, cpx_spc_data_cx_b29, cpx_spc_data_cx_b57, 
+   cpx_spc_data_cx_b61, cpx_spc_data_cx_b65, cpx_spc_data_cx_b69, 
+   cpx_spc_data_cx_b73, cpx_spc_data_cx_b77, cpx_spc_data_cx_b81, 
+   cpx_spc_data_cx_b85, ifu_lsu_rd_e, lmq_ld_rd1, lmq_ldd_vld, 
+   dfq_tid, const_cpuid, lmq_ld_addr_b3, ifu_lsu_ibuf_busy, 
+   ifu_lsu_inv_clear, lsu_byp_misc_sz_e, lsu_dfq_byp_tid, 
+   lsu_cpx_pkt_atm_st_cmplt, lsu_cpx_pkt_l2miss, lsu_cpx_pkt_tid, 
+   lsu_cpx_pkt_invwy, lsu_dfq_byp_flush, lsu_dfq_byp_type, 
+   lsu_dfq_byp_invwy_vld, lsu_cpu_inv_data_b13to9, 
+   lsu_cpu_inv_data_b7to2, lsu_cpu_inv_data_b0, lsu_cpx_pkt_inv_pa, 
+   lsu_cpx_pkt_ifill_type, lsu_cpx_pkt_atomic, lsu_cpx_pkt_binit_st, 
+   lsu_cpx_pkt_prefetch, lsu_dfq_byp_binit_st, lsu_tlbop_force_swo, 
+   lsu_iobrdge_tap_rq_type, lsu_dcache_tag_perror_g, 
+   lsu_dcache_data_perror_g, lsu_cpx_pkt_perror_iinv, 
+   lsu_cpx_pkt_perror_dinv, lsu_cpx_pkt_perror_set, 
+   lsu_l2fill_fpld_e, lsu_cpx_pkt_strm_ack, ifu_lsu_memref_d, 
+   lsu_fwdpkt_pcx_rq_sel, lsu_imiss_pcx_rq_sel_d1, 
+   lsu_dfq_byp_cpx_inv, lsu_dfq_byp_stack_adr_b54, 
+   lsu_dfq_byp_stack_wrway, lsu_dfq_rdata_st_ack_type, 
+   lsu_dfq_rdata_stack_dcfill_vld, lsu_dfq_rdata_stack_iinv_vld, 
+   lsu_dfq_rdata_cpuid, lsu_dfq_byp_atm, lsu_ld_inst_vld_g, 
+   lsu_dfq_rdata_type, lsu_dfq_rdata_invwy_vld, ifu_lsu_fwd_data_vld, 
+   ifu_lsu_fwd_wr_ack, lsu_dfq_rdata_rq_type, lsu_dfq_rdata_b103, 
+   sehold
+   ) ;  
+
+
+input     rclk ;
+input     grst_l;
+input     arst_l;
+input     si;
+input     se;
+input     rst_tri_en;
+output    so;
+
+input                   ld_inst_vld_e;        // valid ld inst; d-stage
+input                   ifu_pcx_pkt_b51;        // pcx pkt from ifu on imiss
+input [1:0]             ifu_pcx_pkt_b41t40;     // pcx pkt from ifu on imiss
+input [5:0]             ifu_pcx_pkt_b10t5;      // pcx pkt from ifu on imiss
+//input                   cpx_spc_data_rdy_cx ;   // data ready to processor
+//input [`CPX_WIDTH-1:71] cpx_spc_data_cx ;       // cpx to processor packet
+//input [`CPX_WIDTH-1:0] cpx_spc_data_cx ;       // cpx to processor packet
+//input [17:0]            cpx_spc_data_b17t0_cx ; // cpx to processor packet
+   input                lsu_dfq_rdata_flush_bit;
+   input [17:0]         lsu_dfq_rdata_b17_b0;
+   
+input [`CPX_WIDTH-1:140] cpx_spc_data_cx_b144to140 ;       // vld, req type
+input                   cpx_spc_data_cx_b138 ;  
+//input                   cpx_spc_data_cx_b136 ;  
+input [`CPX_TH_HI:`CPX_TH_LO] cpx_spc_data_cx_b135to134 ;  // thread id
+input                   cpx_spc_data_cx_b133 ;  
+input                   cpx_spc_data_cx_b130 ;  
+input                   cpx_spc_data_cx_b129 ;  
+input                   cpx_spc_data_cx_b128 ;  
+input                   cpx_spc_data_cx_b125 ;  
+input [`CPX_PERR_DINV+1:`CPX_PERR_DINV] cpx_spc_data_cx_b124to123 ;  // inv packet iinv,dinv
+input [`CPX_INV_CID_HI:`CPX_INV_CID_LO] cpx_spc_data_cx_b120to118 ;  // inv packet cpu id
+input [1:0]             cpx_spc_data_cx_b71to70 ;  
+
+input        cpx_spc_data_cx_b0 ;
+input        cpx_spc_data_cx_b4 ;
+input        cpx_spc_data_cx_b8 ;
+input        cpx_spc_data_cx_b12 ;
+input        cpx_spc_data_cx_b16 ;
+input        cpx_spc_data_cx_b20 ;
+input        cpx_spc_data_cx_b24 ;
+input        cpx_spc_data_cx_b28 ;
+
+input        cpx_spc_data_cx_b32 ;
+input        cpx_spc_data_cx_b35 ;
+input        cpx_spc_data_cx_b38 ;
+input        cpx_spc_data_cx_b41 ;
+input        cpx_spc_data_cx_b44 ;
+input        cpx_spc_data_cx_b47 ;
+input        cpx_spc_data_cx_b50 ;
+input        cpx_spc_data_cx_b53 ;
+
+input        cpx_spc_data_cx_b56 ;
+input        cpx_spc_data_cx_b60 ;
+input        cpx_spc_data_cx_b64 ;
+input        cpx_spc_data_cx_b68 ;
+input        cpx_spc_data_cx_b72 ;
+input        cpx_spc_data_cx_b76 ;
+input        cpx_spc_data_cx_b80 ;
+input        cpx_spc_data_cx_b84 ;
+
+input        cpx_spc_data_cx_b88 ;
+input        cpx_spc_data_cx_b91 ;
+input        cpx_spc_data_cx_b94 ;
+input        cpx_spc_data_cx_b97 ;
+input        cpx_spc_data_cx_b100 ;
+input        cpx_spc_data_cx_b103 ;
+input        cpx_spc_data_cx_b106 ;
+input        cpx_spc_data_cx_b109 ;
+
+input        cpx_spc_data_cx_b1 ;
+input        cpx_spc_data_cx_b5 ;
+input        cpx_spc_data_cx_b9 ;
+input        cpx_spc_data_cx_b13 ;
+input        cpx_spc_data_cx_b17 ;
+input        cpx_spc_data_cx_b21 ;
+input        cpx_spc_data_cx_b25 ;
+input        cpx_spc_data_cx_b29 ;
+
+input        cpx_spc_data_cx_b57 ;
+input        cpx_spc_data_cx_b61 ;
+input        cpx_spc_data_cx_b65 ;
+input        cpx_spc_data_cx_b69 ;
+input        cpx_spc_data_cx_b73 ;
+input        cpx_spc_data_cx_b77 ;
+input        cpx_spc_data_cx_b81 ;
+input        cpx_spc_data_cx_b85 ;
+
+input [4:0]             ifu_lsu_rd_e ;          // rd for current load request.
+//input                   lsu_ld_miss_g ;         // load misses in dcache.
+input  [4:0]            lmq_ld_rd1 ;            // rd for all loads
+input                   lmq_ldd_vld ;           // ld double   
+//input                   ld_stb_full_raw_g ;    // full raw for load - thread0
+//input                   ld_stb_partial_raw_g ; // partial raw for load - thread0
+/*
+input                   ld_sec_hit_thrd0 ;      // ld has sec. hit against th0
+input                   ld_sec_hit_thrd1 ;      // ld has sec. hit against th1
+input                   ld_sec_hit_thrd2 ;      // ld has sec. hit against th2
+input                   ld_sec_hit_thrd3 ;      // ld has sec. hit against th3
+*/
+input   [1:0]           dfq_tid ;               // thread-id for load at head of DFQ. 
+//input   [1:0]           dfq_byp_tid ;           // in-flight thread-id for load at head of DFQ. 
+//input                   ldxa_internal ;         // internal ldxa, stg g 
+//input [3:0]             ld_thrd_byp_sel ;       // stb,ldxa thread byp sel
+input [2:0]             const_cpuid ;           // cpu id
+input                   lmq_ld_addr_b3 ;        // bit3 of addr at head of queue.
+//input                   ifu_tlu_inst_vld_m ;    // inst is vld - wstage
+//input                   tlu_ifu_flush_pipe_w ;  // flush event in wstage
+//input                   lsu_ldstub_g ;          // ldstub(a) instruction
+//input                   lsu_swap_g ;            // swap(a) instruction 
+//input                   tlu_lsu_pcxpkt_vld ;
+//input [11:10]           tlu_lsu_pcxpkt_l2baddr ;
+//input [19:18]           tlu_lsu_pcxpkt_tid ;
+input                   ifu_lsu_ibuf_busy ;
+input                   ifu_lsu_inv_clear ;
+input   [1:0]           lsu_byp_misc_sz_e ;     // size for ldxa/raw etc
+input   [1:0]           lsu_dfq_byp_tid ;
+input                   lsu_cpx_pkt_atm_st_cmplt ;
+input                   lsu_cpx_pkt_l2miss ;
+input   [1:0]           lsu_cpx_pkt_tid ;
+input   [1:0]           lsu_cpx_pkt_invwy ;     // invalidate way
+input                   lsu_dfq_byp_flush ;
+input   [5:0]           lsu_dfq_byp_type ;
+input                   lsu_dfq_byp_invwy_vld ;
+//input   [13:0]          lsu_cpu_inv_data ;
+input   [13:9]          lsu_cpu_inv_data_b13to9 ;
+input   [7:2]           lsu_cpu_inv_data_b7to2 ;
+input                   lsu_cpu_inv_data_b0 ;
+//input   [2:0]           lsu_dfq_byp_cpuid ;
+input   [4:0]           lsu_cpx_pkt_inv_pa ;    // invalidate pa [10:6]
+input                   lsu_cpx_pkt_ifill_type ;
+//input                   stb_cam_hit ; REMOVED
+input                   lsu_cpx_pkt_atomic ;
+//input                   lsu_dfq_byp_stquad_pkt2 ;
+//input                   lsu_cpx_pkt_stquad_pkt2 ;
+input                   lsu_cpx_pkt_binit_st ;
+input                   lsu_cpx_pkt_prefetch ;
+input                   lsu_dfq_byp_binit_st ;
+//input   [3:0]           lsu_stb_empty ;
+input                   lsu_tlbop_force_swo ;
+input   [7:3]           lsu_iobrdge_tap_rq_type ; 
+input                   lsu_dcache_tag_perror_g ;  // dcache tag parity error
+input                   lsu_dcache_data_perror_g ; // dcache data parity error
+//input                   lsu_dfq_byp_perror_dinv ;  // dtag perror corr. st ack
+//input                   lsu_dfq_byp_perror_iinv ;  // itag perror corr. st ack
+
+
+input                   lsu_cpx_pkt_perror_iinv ;   // itag perror corr. st ack
+input                   lsu_cpx_pkt_perror_dinv ;   // dtag perror corr. st ack
+input   [1:0]           lsu_cpx_pkt_perror_set ;   // dtag perror - spec. b54
+//input                   lsu_diagnstc_wr_src_sel_e ;// dcache/dtag/vld
+input         		lsu_l2fill_fpld_e ;      // fp load
+input                   lsu_cpx_pkt_strm_ack ;
+   
+input                   ifu_lsu_memref_d ;
+//input   [3:0]           lmq_enable;
+//input   [3:0]           ld_pcx_rq_sel ;
+input                   lsu_fwdpkt_pcx_rq_sel ;
+//input                   lsu_ld0_pcx_rq_sel_d1, lsu_ld1_pcx_rq_sel_d1 ;
+//input                   lsu_ld2_pcx_rq_sel_d1, lsu_ld3_pcx_rq_sel_d1 ;
+input                   lsu_imiss_pcx_rq_sel_d1 ;
+
+//input                   lsu_dc_iob_access_e;
+
+//   input                mbist_dcache_write;
+//   input                mbist_dcache_read;
+   
+
+input                   lsu_dfq_byp_cpx_inv ;
+//input			lsu_dfq_byp_stack_dcfill_vld ;
+input  [1:0]            lsu_dfq_byp_stack_adr_b54;
+input  [1:0]            lsu_dfq_byp_stack_wrway;
+
+input                   lsu_dfq_rdata_st_ack_type;
+input                   lsu_dfq_rdata_stack_dcfill_vld;
+
+input                   lsu_dfq_rdata_stack_iinv_vld;
+
+input  [2:0]            lsu_dfq_rdata_cpuid;
+
+input                   lsu_dfq_byp_atm;
+
+input	[3:0]		lsu_ld_inst_vld_g ;
+
+input	[5:0]		lsu_dfq_rdata_type ;
+input			lsu_dfq_rdata_invwy_vld ;
+
+input			ifu_lsu_fwd_data_vld ; // icache ramtest read cmplt
+input			ifu_lsu_fwd_wr_ack ;   // icache ramtest wr cmplt
+
+input	[3:0]		lsu_dfq_rdata_rq_type ;
+input                   lsu_dfq_rdata_b103 ;
+
+input                   sehold ;
+
+output 			lsu_fwd_rply_sz1_unc ;
+output			lsu_dcache_iob_rd_w ;
+
+output     		ldd_in_dfq_out;
+   
+output                  lsu_dfq_rd_vld_d1 ;
+output                  dfq_byp_ff_en ;
+output                  lsu_dfill_data_sel_hi;// select hi or low order 8B. 
+output                  lsu_ifill_pkt_vld ;   // ifill pkt vld
+output                  cpx_fwd_pkt_en_cx ;
+output  [5:0]           lsu_cpxpkt_type_dcd_cx ;
+output  [7:0]           lsu_cpu_dcd_sel ;
+output                  lsu_cpu_uhlf_sel ;
+//output                  lsu_st_wr_sel_e ;
+//output  [1:0]           lsu_st_ack_addr_b54 ;
+//output  [1:0]           lsu_st_ack_wrwy ;       // cache set way to write to.
+
+output  [2:0]           lsu_iobrdge_rply_data_sel ;
+output                  lsu_iobrdge_fwd_pkt_vld ;
+output                  lsu_tlu_cpx_vld;    // cpx pkt vld
+output  [3:0]           lsu_tlu_cpx_req;    // cpx pkt rq type
+output  [17:0]          lsu_tlu_intpkt;     // cpx interrupt pkt
+//output                  lsu_tlu_pcxpkt_ack; // ack for intr pkt.
+//output  [3:0]           lsu_intrpt_cmplt ;      // intrpt can restart thread
+//output                  lsu_ld_sec_hit_l2access_g ;
+//output  [1:0]           lsu_ld_sec_hit_wy_g ;
+output                  ld_sec_active ;     // secondary bypassing
+output  [3:0]           dfq_byp_sel ;
+//output  [3:0]           lsu_dfq_byp_mxsel ; // to qdp1
+//output  [3:0]           lmq_byp_misc_sel ;    // select g-stage lmq source
+//output                  lsu_pcx_ld_dtag_perror_w2 ;
+output                  lsu_cpx_ld_dtag_perror_e ;
+output                  lsu_cpx_ld_dcache_perror_e ;
+//output  [1:0]           lsu_cpx_atm_st_err ;
+//output                  lsu_ignore_fill ;
+//output  [4:0]           lsu_exu_rd_w2 ;
+output  [4:0]           lsu_exu_rd_m ;
+output  [1:0]           lsu_spu_strm_ack_cmplt ;
+output	           	lsu_atm_st_cmplt_e ;  // atm st ack will restart thread
+output                  dva_svld_e ;        // snoop is valid
+output                  dfq_wptr_vld ;          // write pointer valid
+output  [4:0]           dfq_wptr ;              // encoded write pointer
+output  [3:0]           lsu_dfq_flsh_cmplt ;
+output                  dfq_rptr_vld ;          // read pointer valid
+output  [4:0]           dfq_rptr ;              // encoded read pointer
+output                  lsu_ifu_stallreq ;      // cfq has crossed high-water mark
+output  [4:0]           dva_snp_addr_e;         // Upper 5b of cache set index PA[10:6]
+//output  [3:0]           dva_snp_set_vld_e;      // Lower 2b of cache set index - decoded
+//output  [1:0]           dva_snp_wy0_e ;         // way for addr<5:4>=00
+//output  [1:0]           dva_snp_wy1_e ;         // way for addr<5:4>=01
+//output  [1:0]           dva_snp_wy2_e ;         // way for addr<5:4>=10
+//output  [1:0]           dva_snp_wy3_e ;         // way for addr<5:4>=11
+//output  [3:0]           lsu_st_ack_rq_stb ;
+output  [3:0]           lsu_st_ack_dq_stb ;
+output  [3:0]           lsu_cpx_rmo_st_ack ;    // rmo ack clears
+output                  lsu_st_wr_dcache ;
+output                  cpx_st_ack_tid0 ;   // st ack for thread0
+output                  cpx_st_ack_tid1 ;   // st ack for thread1
+output                  cpx_st_ack_tid2 ;   // st ack for thread2
+output                  cpx_st_ack_tid3 ;   // st ack for thread3
+output  [3:0]           lsu_tlu_l2_dmiss ;       // performance cntr
+//output  [3:0]           lsu_ifu_stq_busy ;         // thread is busy with 1 stq - not used
+output                  lsu_l2fill_vld ;        // dfill data vld
+output                  lsu_byp_ldd_oddrd_m ; // rd fill for non-alt ldd
+output                  lsu_pcx_fwd_reply ;   // fwd reply on pcx pkt
+//output                  lsu_intrpt_pkt_vld ;
+output                  lsu_fwdpkt_vld;
+//output  [3:0]           lsu_error_rst ;
+output                  lsu_dcfill_active_e;	// not same as dcfill_active_e; qual'ed w/ ignore_fill
+//output                  lsu_dfq_byp_vld ;
+output                  lsu_dfq_ld_vld;
+output                  lsu_fldd_vld_en;
+output  [3:0]           lsu_dfill_dcd_thrd ;
+output  [4:0]           lsu_fwdpkt_dest ;
+//output                  dcfill_src_dfq_sel ;    // ld-inv is src
+output [15:0]        dva_snp_bit_wr_en_e;
+
+//output [3:0]         lsu_dcfill_mx_sel_e;
+//output               lsu_dcfill_addr_mx_sel_e;
+//output               lsu_dcfill_data_mx_sel_e;
+//output               lsu_dcfill_size_mx_sel_e;
+
+output               lsu_cpx_spc_inv_vld;  // dfq write data in[152]
+output [3:0]         lsu_cpx_thrdid;
+output               lsu_cpx_stack_dcfill_vld ;
+
+//output	[3:0]		lsu_dtag_perror_w2 ;
+
+output  		lsu_dfq_vld_entry_w ;
+
+output  		lsu_cpx_stack_icfill_vld ;
+
+output                  lsu_dfq_st_vld;
+output                  lsu_dfq_ldst_vld;
+   //pref counter
+//   output [3:0] lsu_cpx_pref_ack;
+
+output                  lsu_qdp2_dfq_ld_vld;
+output                  lsu_qdp2_dfq_st_vld;
+
+output                  lsu_cpx_stack_dcfill_vld_b130;
+   
+output  		lsu_dfq_vld ;
+
+output                  lsu_dfq_byp_ff_en ;
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+
+wire        cpx_local_st_ack_type ;
+wire  [3:0] cpx_pkt_thrd_sel ;
+//wire  [3:0] tap_thread ;
+wire      cpx_reverse_req , cpx_reverse_req_d1 ;
+wire    cpx_fwd_req,cpx_fwd_reply;
+wire    fwdpkt_reset ;
+wire        dfq_inv_vld ;
+//wire    intrpt_vld_reset ;
+//wire    intrpt_vld_en ;
+//wire    ld0_sec_hit_g,ld1_sec_hit_g,ld2_sec_hit_g,ld3_sec_hit_g;
+//wire  [3:0] intrpt_thread ;
+wire    dfq_byp_ld_vld ;
+//wire    intrpt_clr ;
+wire    dfq_rptr_vld_d1 ;
+wire    dfq_rd_advance ;
+wire        dfq_wr_en, dfq_byp_full, dcfill_active_e ;
+wire    dfq_thread0,dfq_thread1,dfq_thread2,dfq_thread3;
+//wire    ld_any_thrd_byp_sel ;
+wire    stwr_active_e,stdq_active_e ;
+wire  [3:0] error_en ;
+wire        ldd_vld_reset, ldd_vld_en, ldd_in_dfq_out ;
+wire    ldd_non_alt_space ;
+wire    ldd_oddrd_e ;
+wire        inv_active_e ;
+wire    dfq_st_vld ;
+//wire    local_inv ;
+wire    dfq_local_inv ;
+//wire    st_ack_rq_stb_d1 ;
+//wire    cpx_inv ;
+wire    dfq_byp_inv_vld ;
+wire    dfq_invwy_vld;
+wire    local_pkt ;
+wire    dfq_byp_st_vld ;
+wire        dfq_vld_reset, dfq_vld_en ;
+//wire  [3:0] st_wrwy_sel ;
+//wire  [13:0]  cpx_cpu_inv_data ;
+wire        dfq_vld_entry_exists ;
+wire    cpx_st_ack_type,cpx_strm_st_ack_type,cpx_int_type;
+wire    cpx_ld_type,cpx_ifill_type,cpx_evict_type;
+wire  [5:0]     dfq_wptr_new_w_wrap ;   // 5b ptr with wrap bit.
+wire  [5:0]     dfq_rptr_new_w_wrap ;   // 5b ptr with wrap bit.
+wire  [5:0]     dfq_wptr_w_wrap ;   // 5b ptr with wrap bit.
+//wire    i_and_d_codepend ;
+wire    dfq_ld_type,dfq_ifill_type,dfq_evict_type ;
+wire    dfq_st_ack_type,dfq_strm_st_ack_type,dfq_int_type;
+wire  [5:0]     dfq_rptr_w_wrap ;   // 3b ptr with wrap bit.
+wire  [3:0]   imiss_dcd_b54 ;
+//wire    st_ack_rq_stb ;
+//wire  [1:0] st_ack_tid ;
+wire  [3:0] cpu_sel ;
+wire  [1:0] fwdpkt_l2bnk_addr ;
+//wire  [2:0] intrpt_l2bnk_addr ;
+//wire  [3:0] dfq_byp_sel_m, dfq_byp_sel_g ;
+//wire  [1:0] ld_error0,ld_error1,ld_error2,ld_error3 ;
+//wire  [4:0] ld_l1hit_rd_m,ld_l1hit_rd_g;
+wire  [4:0] ld_l1hit_rd_m;
+//wire  [13:0]  dfq_inv_data ;
+wire  [13:9]  dfq_inv_data_b13to9 ;
+wire  [7:2]   dfq_inv_data_b7to2 ;
+wire          dfq_inv_data_b0 ;
+wire          fwdpkt_vld;
+wire  [3:0]   dfill_dcd_thrd ;
+wire  [3:0]   error_rst ;
+wire          dfq_ld_vld;
+wire          dfq_byp_vld ;
+wire          reset;
+wire          st_rd_advance;
+wire	vld_dfq_pkt ;
+wire          dfq_vld_entry_exists_w;
+wire          dfq_rdata_local_pkt;
+wire 	      dfq_st_cmplt ;
+wire          cpx_fp_type ;
+wire	dfq_stall, dfq_stall_d1 ;
+wire          cpx_error_type ;
+wire          dfq_error_type ;
+wire          cpx_fwd_req_ic ;
+wire          dfq_fwd_req_ic_type ;
+wire          dfq_rd_vld_d1 ;
+
+
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+assign  reset  =  ~dbb_reset_l;
+assign  clk = rclk;
+
+
+
+//wire                   lsu_bist_wvld_e;
+//wire                   lsu_bist_rvld_e;
+
+//dff #(2) mbist_stge (
+//   .din ({mbist_dcache_write, mbist_dcache_read}),
+//   .q   ({lsu_bist_wvld_e,    lsu_bist_rvld_e  }),
+//   .clk (clk),
+//   .se  (1'b0),       .si (),          .so ()
+//);   
+   
+//=================================================================================================
+// SHADOW SCAN
+//=================================================================================================
+
+// Monitors whether there is a valid entry in the dfq.
+assign	lsu_dfq_vld_entry_w = dfq_vld_entry_exists_w ;
+// Monitors whether dfq_byp flop remains full
+//assign	lsu_sscan_data[?] = dfq_byp_full ;
+   
+//=================================================================================================
+//
+// QDP2 Specific Control
+//
+//=================================================================================================
+
+// Need to be careful. This may prevent stores
+//assign  dcfill_src_dfq_sel = dcfill_active_e ;
+
+
+
+
+
+//=================================================================================================
+//  IMISS X-INVALIDATION
+//=================================================================================================
+
+// Assume all imisses are alligned to a 32B boundary in L2 ?
+
+wire  imiss0_inv_en, imiss1_inv_en ;
+wire  imiss2_inv_en, imiss3_inv_en ;
+wire  [10:5] imiss0_set_index,imiss1_set_index ;
+wire  [10:5] imiss2_set_index,imiss3_set_index ;
+//8/28/03 - vlint cleanup
+//wire  [10:4] imiss0_set_index,imiss1_set_index ;
+//wire  [10:4] imiss2_set_index,imiss3_set_index ;
+
+assign  imiss0_inv_en = ifu_pcx_pkt_b51 & ~ifu_pcx_pkt_b41t40[1] & ~ifu_pcx_pkt_b41t40[0] & lsu_imiss_pcx_rq_sel_d1 ;
+assign  imiss1_inv_en = ifu_pcx_pkt_b51 & ~ifu_pcx_pkt_b41t40[1] &  ifu_pcx_pkt_b41t40[0] & lsu_imiss_pcx_rq_sel_d1 ;
+assign  imiss2_inv_en = ifu_pcx_pkt_b51 &  ifu_pcx_pkt_b41t40[1] & ~ifu_pcx_pkt_b41t40[0] & lsu_imiss_pcx_rq_sel_d1 ;
+assign  imiss3_inv_en = ifu_pcx_pkt_b51 &  ifu_pcx_pkt_b41t40[1] &  ifu_pcx_pkt_b41t40[0] & lsu_imiss_pcx_rq_sel_d1 ;
+
+dffe_s #(6) imiss_inv0 (
+        .din    ({ifu_pcx_pkt_b10t5[5:0]}),
+        .q      ({imiss0_set_index[10:5]}),
+        .en (imiss0_inv_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dffe_s #(6) imiss_inv1 (
+        .din    ({ifu_pcx_pkt_b10t5[5:0]}),
+        .q      ({imiss1_set_index[10:5]}),
+        .en (imiss1_inv_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dffe_s #(6) imiss_inv2 (
+        .din    ({ifu_pcx_pkt_b10t5[5:0]}),
+        .q      ({imiss2_set_index[10:5]}),
+        .en (imiss2_inv_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dffe_s #(6) imiss_inv3 (
+        .din    ({ifu_pcx_pkt_b10t5[5:0]}),
+        .q      ({imiss3_set_index[10:5]}),
+        .en (imiss3_inv_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign  cpx_pkt_thrd_sel[0] = ~lsu_cpx_pkt_tid[1] & ~lsu_cpx_pkt_tid[0] ;
+assign  cpx_pkt_thrd_sel[1] = ~lsu_cpx_pkt_tid[1] &  lsu_cpx_pkt_tid[0] ;
+assign  cpx_pkt_thrd_sel[2] =  lsu_cpx_pkt_tid[1] & ~lsu_cpx_pkt_tid[0] ;
+assign  cpx_pkt_thrd_sel[3] =  lsu_cpx_pkt_tid[1] &  lsu_cpx_pkt_tid[0] ;
+// This needs to be included once the change for the stb bug is complete
+wire  [6:1] imiss_inv_set_index ;
+assign  imiss_inv_set_index[6:1] =
+  cpx_pkt_thrd_sel[0] ? imiss0_set_index[10:5] : 
+    cpx_pkt_thrd_sel[1] ? imiss1_set_index[10:5] : 
+      cpx_pkt_thrd_sel[2] ? imiss2_set_index[10:5] : 
+        cpx_pkt_thrd_sel[3] ? imiss3_set_index[10:5] : 6'bxx_xxxx ;  
+
+
+
+//=================================================================================================
+//  FWD REPLY/REQUEST
+//=================================================================================================
+
+// cpx pkt decode. fwd req/reply do not go into dfq.
+
+
+//assign  tap_thread[0] = ~lsu_iobrdge_tap_rq_type[1] & ~lsu_iobrdge_tap_rq_type[0] ;
+//assign  tap_thread[1] = ~lsu_iobrdge_tap_rq_type[1] &  lsu_iobrdge_tap_rq_type[0] ;
+//assign  tap_thread[2] =  lsu_iobrdge_tap_rq_type[1] & ~lsu_iobrdge_tap_rq_type[0] ;
+//assign  tap_thread[3] =  lsu_iobrdge_tap_rq_type[1] &  lsu_iobrdge_tap_rq_type[0] ;
+
+// This is the pkt from the TAP to be returned to the TAP
+//assign  cpx_reverse_req = cpx_spc_data_cx[130] ;
+assign  cpx_reverse_req = cpx_spc_data_cx_b130;
+
+// removed tap_rq_type[2] from the data_sel logic
+assign  lsu_iobrdge_rply_data_sel[0] =  // defeature, margin, bist
+  (|lsu_iobrdge_tap_rq_type[5:3]) & cpx_reverse_req_d1 ;
+assign  lsu_iobrdge_rply_data_sel[1] =  // i/dcache
+  (|lsu_iobrdge_tap_rq_type[7:6] & ~(|lsu_iobrdge_tap_rq_type[5:3])) & cpx_reverse_req_d1 ;
+// regular fwd pkt
+//  - sothea - 0in bug - can be 0-hot
+//assign  lsu_iobrdge_rply_data_sel[2] = ~((|lsu_iobrdge_tap_rq_type[7:3]) & cpx_reverse_req_d1) ;
+assign  lsu_iobrdge_rply_data_sel[2] = ~|lsu_iobrdge_rply_data_sel[1:0] ;
+
+wire dcache_iob_rd,dcache_iob_rd_e, dcache_iob_rd_m, dcache_iob_rd_w ;
+assign	dcache_iob_rd = lsu_iobrdge_tap_rq_type[6] & lsu_iobrdge_fwd_pkt_vld ;
+
+dff_s  dciob_rd_e (
+        .din    (dcache_iob_rd),
+        .q      (dcache_iob_rd_e),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  dciob_rd_m (
+        .din    (dcache_iob_rd_e),
+        .q      (dcache_iob_rd_m),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  dciob_rd_w (
+        .din    (dcache_iob_rd_m),
+        .q      (dcache_iob_rd_w),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	lsu_dcache_iob_rd_w = dcache_iob_rd_w ;
+
+wire  cpx_fwd_rq_type ;
+assign  cpx_fwd_rq_type =
+        cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // fwd req
+        cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO];
+wire  cpx_fwd_rply_type ;
+assign  cpx_fwd_rply_type =
+        cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // fwd reply
+        cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO] ;
+
+// cpx pkt decode. fwd req/reply do not go into dfq.
+assign  cpx_fwd_req =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] & ~cpx_reverse_req & cpx_fwd_rq_type ;
+
+//8/25/03: add fwd req to L1I$ for RAMTEST to dfq_wr_en, dfq_rd_dvance
+//bug4293 - set fwd_req_ic based on cpx_fwd_req_type and not based on cpx_fwd_req. this causes the request to 
+//          de dropped i.e. not written into dfq 'cos cpx_fwd_req_ic is not set
+//assign  cpx_fwd_req_ic =  cpx_fwd_req & cpx_spc_data_cx_b103 ;
+
+assign  cpx_fwd_req_ic =  cpx_spc_data_cx_b144to140[`CPX_VLD] & cpx_fwd_rq_type &
+                          cpx_reverse_req & cpx_spc_data_cx_b103 ;
+
+assign  cpx_fwd_pkt_en_cx = cpx_fwd_req | cpx_fwd_reply ;
+
+assign  cpx_fwd_reply =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] & (cpx_fwd_rply_type | (cpx_fwd_rq_type & cpx_reverse_req)) ;
+
+dff_s #(1) fwdpkt_stgd1 (
+        .din    (fwd_reply_vld),
+        .q      (lsu_pcx_fwd_reply),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// Requests from iobrdge will not be speculative as core is expected to be quiescent.
+assign  fwdpkt_reset = 
+  (reset | lsu_fwdpkt_pcx_rq_sel) ; 
+  // (reset | (lsu_fwdpkt_pcx_rq_sel & ~pcx_req_squash)) ; 
+wire	fwdpkt_vld_unmasked,fwdpkt_vld_unmasked_d1 ;
+wire	fwd_unc_err ;
+// There can be only one outstanding fwd reply or request.
+dffre_s #(7)  fwdpkt_ff (
+        .din    ({cpx_fwd_pkt_en_cx,cpx_fwd_req,cpx_fwd_reply, 
+		cpx_spc_data_cx_b138,cpx_spc_data_cx_b71to70[1:0], cpx_reverse_req}),
+        .q      ({fwdpkt_vld_unmasked,fwd_req_vld,fwd_reply_vld, 
+		fwd_unc_err,fwdpkt_l2bnk_addr[1:0],cpx_reverse_req_d1}),
+  .rst  (fwdpkt_reset), .en (cpx_fwd_pkt_en_cx),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	fwd_rply_sz1_unc ; // Either size[1] for fwd-rq or unc-err for fwd-rply.
+assign	fwd_rply_sz1_unc = fwd_reply_vld ? fwd_unc_err : 1'b1 ;	
+
+dff_s  fpktunc_d1 (
+        .din    (fwd_rply_sz1_unc),
+        .q      (lsu_fwd_rply_sz1_unc),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  fpktv_d1 (
+        .din    (fwdpkt_vld_unmasked),
+        .q      (fwdpkt_vld_unmasked_d1),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+wire icache_rd_done,icache_wr_done ;
+dff_s #(2) ifwd_d1 (
+        .din    ({ifu_lsu_fwd_data_vld,ifu_lsu_fwd_wr_ack}),
+        .q      ({icache_rd_done,icache_wr_done}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// make one-shot : read data may be off.
+assign  lsu_iobrdge_fwd_pkt_vld = fwdpkt_vld_unmasked & ~fwdpkt_vld_unmasked_d1 & cpx_reverse_req_d1 ;
+//assign  lsu_iobrdge_fwd_pkt_vld = fwdpkt_vld ;
+assign  fwdpkt_vld = 
+	// immediate for all but dcache rd.
+	(fwdpkt_vld_unmasked & ~((|lsu_iobrdge_tap_rq_type[7:6]) & cpx_reverse_req_d1)) |
+        // dcache rd - wait until w.
+	(fwdpkt_vld_unmasked &  lsu_iobrdge_tap_rq_type[6] & cpx_reverse_req_d1 & 
+		~(dcache_iob_rd | dcache_iob_rd_e | dcache_iob_rd_m | dcache_iob_rd_w)) |
+	// icache rd - wait for rd & wr 
+	(fwdpkt_vld_unmasked &  lsu_iobrdge_tap_rq_type[7] & cpx_reverse_req_d1 &
+			(icache_rd_done | icache_wr_done)) ;
+
+assign  lsu_fwdpkt_vld  =  fwdpkt_vld;
+
+assign  lsu_fwdpkt_dest[0] = fwd_req_vld & ~fwdpkt_l2bnk_addr[1] & ~fwdpkt_l2bnk_addr[0] ; // l2bank=0
+assign  lsu_fwdpkt_dest[1] = fwd_req_vld & ~fwdpkt_l2bnk_addr[1] &  fwdpkt_l2bnk_addr[0] ; // l2bank=1
+assign  lsu_fwdpkt_dest[2] = fwd_req_vld &  fwdpkt_l2bnk_addr[1] & ~fwdpkt_l2bnk_addr[0] ; // l2bank=2
+assign  lsu_fwdpkt_dest[3] = fwd_req_vld &  fwdpkt_l2bnk_addr[1] &  fwdpkt_l2bnk_addr[0] ; // l2bank=3
+assign  lsu_fwdpkt_dest[4] = fwd_reply_vld ; // reply always goes back to IO Bridge
+
+//=================================================================================================
+//  INTERRUPT CPX PKT REQ CTL
+//=================================================================================================
+
+//bug6322
+//assign  lsu_tlu_cpx_vld = cpx_spc_data_cx_b144to140[`CPX_VLD] & ~cpx_spc_data_cx_b136 ;
+//assign  lsu_tlu_cpx_req[3:0] = cpx_spc_data_cx_b144to140[`CPX_RQ_HI:`CPX_RQ_LO] ;
+//assign  lsu_tlu_intpkt[17:0] = cpx_spc_data_b17t0_cx[17:0] ;
+
+   wire lsu_tlu_cpx_vld_din_l;
+   wire [17:0] lsu_tlu_intpkt_din;
+   wire [3:0]  lsu_tlu_cpx_req_din_l;
+   
+assign  lsu_tlu_cpx_vld_din_l = ~(dfq_int_type & ~lsu_dfq_rdata_flush_bit & dfq_rd_advance) ; 
+assign  lsu_tlu_intpkt_din[17:0] = lsu_dfq_rdata_b17_b0[17:0] ;
+assign  lsu_tlu_cpx_req_din_l[3:0] = ~ lsu_dfq_rdata_rq_type[3:0];
+
+   wire lsu_tlu_cpx_vld_l;
+   wire [3:0] lsu_tlu_cpx_req_l;
+   
+dff_s  #(23) lsu_tlu_stg (
+        .din    ({lsu_tlu_cpx_vld_din_l, lsu_tlu_intpkt_din[17:0], lsu_tlu_cpx_req_din_l[3:0]}),
+        .q      ({lsu_tlu_cpx_vld_l,     lsu_tlu_intpkt[17:0], lsu_tlu_cpx_req_l[3:0]}),
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+   assign     lsu_tlu_cpx_vld = ~lsu_tlu_cpx_vld_l;
+   assign     lsu_tlu_cpx_req[3:0] = ~lsu_tlu_cpx_req_l[3:0];
+   
+//=================================================================================================
+//  STQUAD PKT CONTROL
+//=================================================================================================
+
+
+
+
+
+//=================================================================================================
+// SECONDARY VS. PRIMARY LOADS
+//=================================================================================================
+
+   
+// NOT USED
+//wire  [1:0] dfq_sel_tid ;
+//assign  dfq_sel_tid[1:0] = 
+//  // select byp tid if ld from cfq or cpx will be latched in byp ff next cycle
+//  (dfq_byp_ld_vld & ((dfq_rptr_vld_d1 & dfq_rd_advance) | (cpx_spc_data_cx_b144to140[`CPX_VLD] & ~dfq_wr_en))) ? 
+//  dfq_byp_tid[1:0] : dfq_tid[1:0] ;
+
+//temp, send to dctl, phase 2     
+assign  ld_sec_active = 1'b0 ;
+   
+assign  dfq_thread0 = ~dfq_tid[1] & ~dfq_tid[0] ;
+assign  dfq_thread1 = ~dfq_tid[1] &  dfq_tid[0] ;
+assign  dfq_thread2 =  dfq_tid[1] & ~dfq_tid[0] ;
+assign  dfq_thread3 =  dfq_tid[1] &  dfq_tid[0] ;
+
+// NOT USED
+//assign  ld_any_thrd_byp_sel = |(ld_thrd_byp_sel[3:0]);
+
+// phase 2 change   
+// L2$ sends response for both prim and sec requests. Both will go into DFQ
+// and fill D$
+// can we eliminate dcfill_active_e ?
+   
+//11/7/03 - add rst_tri_en
+wire  [3:0]  dfq_byp_sel_tmp ;
+   assign dfq_byp_sel_tmp[0]  = dfq_thread0  & dcfill_active_e & ~lsu_cpx_pkt_prefetch;
+   assign dfq_byp_sel_tmp[1]  = dfq_thread1  & dcfill_active_e & ~lsu_cpx_pkt_prefetch;
+   assign dfq_byp_sel_tmp[2]  = dfq_thread2  & dcfill_active_e & ~lsu_cpx_pkt_prefetch;
+   assign dfq_byp_sel_tmp[3]  = dfq_thread3  & dcfill_active_e & ~lsu_cpx_pkt_prefetch;
+
+   assign dfq_byp_sel[2:0]  =  dfq_byp_sel_tmp[2:0]  & {3{~rst_tri_en}} ;
+   assign dfq_byp_sel[3]    =  dfq_byp_sel_tmp[3]    | rst_tri_en ;
+   
+//   assign lsu_dfq_byp_mxsel[0]  = dfq_thread0  & dcfill_active_e;
+//   assign lsu_dfq_byp_mxsel[1]  = dfq_thread1  & dcfill_active_e;
+//   assign lsu_dfq_byp_mxsel[2]  = dfq_thread2  & dcfill_active_e;
+//   assign lsu_dfq_byp_mxsel[3]  = ~|lsu_dfq_byp_mxsel[2:0];
+   
+// includes store cmplt tid also. 
+assign  dfill_dcd_thrd[0] =   dfq_byp_sel[0] |    // for load
+        (dfq_thread0 & stdq_active_e)  ;// for store
+assign  dfill_dcd_thrd[1] =   dfq_byp_sel[1] |    // for load
+        (dfq_thread1 & stdq_active_e)  ;// for store
+assign  dfill_dcd_thrd[2] =   dfq_byp_sel[2] |    // for load
+        (dfq_thread2 & stdq_active_e)  ;// for store
+assign  dfill_dcd_thrd[3] =   dfq_byp_sel[3] |    // for load
+        (dfq_thread3 & stdq_active_e)  ;// for store
+
+assign  lsu_dfill_dcd_thrd[3:0]  =  dfill_dcd_thrd[3:0];
+
+//=================================================================================================
+//  Error Related Logic
+//=================================================================================================
+
+// Equivalent of lmq but lmq has run out of bits
+// Following bits need to be logged.
+// Dtag parity error 
+//  - output on bit 130 of equivalent ld pkt
+//  - when cpx pkt is at head of cfq, then log error
+//  and take corresponding trap synchronous to pipe.
+// DCache parity error
+//  - when cpx pkt is at head of cfq, then log error
+//  and take corresponding trap synchronous to pipe.
+
+
+// The load component of the cpx response for an atomic will
+// save it's error info for the store component. The store
+// component will take the trap in the g stage, depending
+// on the error information from the ld. However, it can
+// always override the parity error info initially written,
+// as atomics do not lookup the cache or tag.
+
+
+//assign  error_en[0] = lmq_enable[0] | (lsu_cpx_pkt_atm_st_cmplt & dcfill_active_e & dfq_byp_sel[0]);
+assign  error_en[0] = 
+	//lsu_ld_inst_vld_g[0] | (lsu_cpx_pkt_atm_st_cmplt & dcfill_active_e & dfq_byp_sel[0]); // Bug 3624
+	lsu_ld_inst_vld_g[0] ; 
+assign  error_en[1] = 
+	lsu_ld_inst_vld_g[1] ; 
+assign  error_en[2] = 
+	lsu_ld_inst_vld_g[2] ; 
+assign  error_en[3] = 
+	lsu_ld_inst_vld_g[3] ;
+
+// 10/15/03: error reset is set only by reset. lsu_ld[0-3]_pcx_rq_sel_d1 is not needed because the
+//           the flop is used only for reporting error to ifu. Also, the error_en is set for new requests.
+//tmp fix for reset
+//wire              lsu_pcx_ld_dtag_perror_w2 ;
+//assign lsu_pcx_ld_dtag_perror_w2  = 1'b0;
+
+//assign  error_rst[0] = reset | (lsu_ld0_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst[1] = reset | (lsu_ld1_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst[2] = reset | (lsu_ld2_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+//assign  error_rst[3] = reset | (lsu_ld3_pcx_rq_sel_d1 & lsu_pcx_ld_dtag_perror_w2) ;
+
+assign  error_rst[0] = reset ;
+assign  error_rst[1] = reset ;
+assign  error_rst[2] = reset ;
+assign  error_rst[3] = reset ;
+
+//assign  lsu_error_rst[3:0]  =  error_rst[3:0];
+
+wire	dtag_perror3,dtag_perror2,dtag_perror1,dtag_perror0;
+
+// Thread 0
+dffre_s  #(2) error_t0 (
+        .din    ({lsu_dcache_tag_perror_g,lsu_dcache_data_perror_g}),
+    //lsu_cpx_pkt_ld_err[1:0]}),
+        .q      ({dtag_perror0,dcache_perror0}),
+        //.q      ({dtag_perror0,dcache_perror0,ld_error0[1:0]}),
+        .rst  (error_rst[0]), .en     (error_en[0]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 1
+dffre_s  #(2) error_t1 (
+        .din    ({lsu_dcache_tag_perror_g,lsu_dcache_data_perror_g}),
+    //lsu_cpx_pkt_ld_err[1:0]}),
+        .q      ({dtag_perror1,dcache_perror1}),
+        //.q      ({dtag_perror1,dcache_perror1,ld_error1[1:0]}),
+        .rst  (error_rst[1]), .en     (error_en[1]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 2
+dffre_s  #(2) error_t2 (
+        .din    ({lsu_dcache_tag_perror_g,lsu_dcache_data_perror_g}),
+    //lsu_cpx_pkt_ld_err[1:0]}),
+        .q      ({dtag_perror2,dcache_perror2}),
+        //.q      ({dtag_perror2,dcache_perror2,ld_error2[1:0]}),
+        .rst  (error_rst[2]), .en     (error_en[2]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+// Thread 3
+dffre_s  #(2) error_t3 (
+        .din    ({lsu_dcache_tag_perror_g,lsu_dcache_data_perror_g}),
+    //lsu_cpx_pkt_ld_err[1:0]}),
+        .q      ({dtag_perror3,dcache_perror3}),
+        //.q      ({dtag_perror3,dcache_perror3,ld_error3[1:0]}),
+        .rst  (error_rst[3]), .en     (error_en[3]),               
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );            
+
+//assign	lsu_dtag_perror_w2[3] = dtag_perror3 ;
+//assign	lsu_dtag_perror_w2[2] = dtag_perror2 ;
+//assign	lsu_dtag_perror_w2[1] = dtag_perror1 ;
+//assign	lsu_dtag_perror_w2[0] = dtag_perror0 ;
+
+// Determine if ld pkt requires correction due to dtag parity error.
+//5/22/03: moved to qctl1
+//assign  lsu_pcx_ld_dtag_perror_w2 =
+//  ld_pcx_rq_sel[0] ? dtag_perror0 :
+//    ld_pcx_rq_sel[1] ? dtag_perror1 :
+//      ld_pcx_rq_sel[2] ? dtag_perror2 : dtag_perror3 ;
+
+// Now post sparc related errors and take traps
+// error is reset after it is sent to pcx. the logic below will never be set!!
+assign  lsu_cpx_ld_dtag_perror_e =
+  dfq_byp_sel[0] ? dtag_perror0 :
+    dfq_byp_sel[1] ? dtag_perror1 :
+      dfq_byp_sel[2] ? dtag_perror2 : (dfq_byp_sel[3] & dtag_perror3) ; // Bug 4655
+
+assign  lsu_cpx_ld_dcache_perror_e =
+  dfq_byp_sel[0] ? dcache_perror0 :
+    dfq_byp_sel[1] ? dcache_perror1 :
+      dfq_byp_sel[2] ? dcache_perror2 : (dfq_byp_sel[3] & dcache_perror3) ; // Bug 4655
+
+//Bug 3624
+/*
+assign  lsu_cpx_atm_st_err[1:0] =
+  cpx_pkt_thrd_sel[0] ? ld_error0[1:0] :
+    cpx_pkt_thrd_sel[1] ? ld_error1[1:0] :
+      cpx_pkt_thrd_sel[2] ? ld_error2[1:0] : ld_error3[1:0] ;*/ 
+
+//===
+wire memref_e;
+
+dff_s #(1) stge_ad_e (
+  .din (ifu_lsu_memref_d),
+  .q   (memref_e),
+  .clk (clk),
+  .se     (1'b0),       .si (),          .so ()
+);
+   
+  
+  
+  
+//=================================================================================================
+//  LDD HANDLING
+//=================================================================================================
+
+assign ldd_vld_reset =
+        (reset | (dcfill_active_e & ldd_in_dfq_out)); 
+
+// prefetch qual is required for case where prefetch may get interference
+// from lmq contents set by a later load that issues before the prefetch
+// is returned.
+// integer
+assign ldd_vld_en = lmq_ldd_vld & ~lsu_cpx_pkt_prefetch & dcfill_active_e ;
+// fp
+assign lsu_fldd_vld_en = lmq_ldd_vld & ~lsu_cpx_pkt_prefetch & lsu_l2fill_fpld_e & dcfill_active_e ;
+
+
+dffre_s   ldd_in_dfq_ff (
+        .din    (lmq_ldd_vld), .q  (ldd_in_dfq_out),
+        .rst    (ldd_vld_reset),        .en     (ldd_vld_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+
+wire lsu_ignore_fill;
+//dfq_ld_vld is redundant   
+assign lsu_ignore_fill = dfq_ld_vld & lmq_ldd_vld & ~ldd_in_dfq_out & dcfill_active_e ;
+
+
+dff_s #(5)   dfq_rd_m (
+        .din    (ifu_lsu_rd_e[4:0]), .q  (ld_l1hit_rd_m[4:0]),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//dff #(5)   dfq_rd_g (
+//        .din    (ld_l1hit_rd_m[4:0]), .q  (ld_l1hit_rd_g[4:0]),
+//        .clk  (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+
+dff_s #(1)   stgd1_lrd (
+        .din    (ldd_in_dfq_out), 
+  .q    (ldd_in_dfq_out_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//dff #(1)   stgd2_lrd (
+//        .din    (ldd_in_dfq_out_d1), 
+//  .q    (ldd_in_dfq_out_d2),
+//        .clk  (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+
+//wire [4:0] lmq_ld_rd1_g;   
+//dff #(5) ff_lmq_ld_rd1 (
+//        .din  (lmq_ld_rd1[4:0]), 
+//        .q    (lmq_ld_rd1_g[4:0]),
+//        .clk  (clk),
+//        .se   (1'b0),       .si (),          .so ()
+//        ); 
+   
+
+// Stage l2fill vld
+//wire	l2fill_vld_m, l2fill_vld_g ;
+wire	l2fill_vld_e,l2fill_vld_m ;
+dff_s    	l2fv_stgm (
+        .din  (l2fill_vld_e), 
+  	.q    (l2fill_vld_m),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        ); 
+
+//dff    	l2fv_stgg (
+//        .din  (l2fill_vld_m), 
+//  	.q    (l2fill_vld_g),
+//        .clk  (clk),
+//        .se   (1'b0),       .si (),          .so ()
+//        ); 
+
+wire	ld_inst_vld_m ;
+dff_s    	lvld_stgm (
+        .din  (ld_inst_vld_e), 
+  	.q    (ld_inst_vld_m),
+        .clk  (clk),
+        .se   (1'b0),       .si (),          .so ()
+        ); 
+
+//wire	ld_inst_vld_g ;
+//dff    	lvld_stgg (
+//        .din  (ld_inst_vld_m), 
+//  	.q    (ld_inst_vld_g),
+//        .clk  (clk),
+//        .se   (1'b0),       .si (),          .so ()
+//        ); 
+
+wire	ldd_in_dfq_out_vld ;
+assign	ldd_in_dfq_out_vld = ldd_in_dfq_out_d1 & l2fill_vld_m ;
+assign lsu_exu_rd_m[4:0] = 
+  ld_inst_vld_m ? ld_l1hit_rd_m[4:0] : 
+    		ldd_in_dfq_out_vld ?  {lmq_ld_rd1[4:1],~lmq_ld_rd1[0]} 
+						: lmq_ld_rd1[4:0];
+/*wire	ldd_in_dfq_out_vld ;
+assign	ldd_in_dfq_out_vld = ldd_in_dfq_out_d2 & l2fill_vld_g ;
+assign lsu_exu_rd_w2[4:0] = 
+  ld_inst_vld_g ? ld_l1hit_rd_g[4:0] : 
+    		ldd_in_dfq_out_vld ?  {lmq_ld_rd1_g[4:1],~lmq_ld_rd1_g[0]} 
+						: lmq_ld_rd1_g[4:0];*/
+
+
+// Generate data select for 128b. ldd will cause hi-order 8B followed by low order
+// 8B to be selected.
+
+// ldd will select from same 64b dw.
+assign  lsu_dfill_data_sel_hi = ~lmq_ld_addr_b3 ^ (ldd_in_dfq_out & ~ldd_non_alt_space) ;
+
+// ldd non-alternate space. sz distinguishes between quad, fp ldd and int ldd.
+// quad ldd, fp ldd sz = 2'b11, int ldd sz = 2'b10   
+assign  ldd_non_alt_space = lsu_byp_misc_sz_e[1] & ~lsu_byp_misc_sz_e[0] ;
+
+assign  ldd_oddrd_e = ldd_in_dfq_out & ldd_non_alt_space ;
+
+dff_s   ldd_stgm (
+        .din    (ldd_oddrd_e), 
+  .q    (lsu_byp_ldd_oddrd_m),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+// all incoming ld and inv packets must be written to dfq or its bypass flop.
+// wrt ptr must be updated in cycle that cpx pkt is sent.
+
+// invalidate does not need bubble, only ld bypass and/or fill.
+// fill bypass can only occur if bubble is in pipeline.
+
+//------
+// strm ack cmplt - needs to be visible in dcache
+//------
+
+//bug4460 - qualify stream store ack w/ local packet
+//Bug4969
+wire    dfq_local_pkt ;
+wire	strmack_cmplt1, strmack_cmplt2, strmack_cmplt3 ;
+wire	strmack_cmplt1_d1, strmack_cmplt2_d1, strmack_cmplt3_d1 ;
+//wire	strm_ack_cmplt ;
+assign	strmack_cmplt1 =
+	// check inflight, no inv. if inv, write to dfq_byp.
+	(cpx_strm_st_ack_type & ~(dfq_wr_en | lsu_cpx_spc_inv_vld) & 
+         (const_cpuid[2:0] == cpx_spc_data_cx_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO])) ;
+assign	strmack_cmplt2 = 
+	// check dfq-rd - no inv, gets dropped.
+	(lsu_dfq_byp_type[1] & dfq_rd_advance & ~lsu_dfq_byp_cpx_inv & local_pkt) ;
+assign	strmack_cmplt3 = 
+	// check dfq-rd - inv, and thus process from dfq_bypass.
+	(lsu_cpx_pkt_strm_ack & inv_active_e & dfq_inv_vld & dfq_local_pkt) ;
+
+/*assign	strm_ack_cmplt =
+	// check inflight, no inv. if inv, write to dfq_byp.
+	(cpx_strm_st_ack_type & ~(dfq_wr_en | lsu_cpx_spc_inv_vld) & 
+         (const_cpuid[2:0] == cpx_spc_data_cx_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO])) |
+	// check dfq-rd - no inv, gets dropped.
+	(lsu_dfq_byp_type[1] & dfq_rd_advance & ~lsu_dfq_byp_cpx_inv & local_pkt) |
+	// check dfq-rd - inv, and thus process from dfq_bypass.
+	(lsu_cpx_pkt_strm_ack & inv_active_e & dfq_inv_vld & dfq_local_pkt) ;*/
+
+dff_s #(3)   strmackcnt_stg (
+        .din  	({strmack_cmplt3,strmack_cmplt2,strmack_cmplt1}), 
+        .q  	({strmack_cmplt3_d1,strmack_cmplt2_d1,strmack_cmplt1_d1}), 
+        .clk  	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+assign	lsu_spu_strm_ack_cmplt[0] =	// lsb  of cnt, 1 or 3.
+	(~strmack_cmplt1_d1 & ~strmack_cmplt2_d1 &  strmack_cmplt3_d1) | //001
+	(~strmack_cmplt1_d1 &  strmack_cmplt2_d1 & ~strmack_cmplt3_d1) | //010
+	( strmack_cmplt1_d1 &  strmack_cmplt2_d1 &  strmack_cmplt3_d1) | //111
+	( strmack_cmplt1_d1 & ~strmack_cmplt2_d1 & ~strmack_cmplt3_d1) ; //100
+
+assign	lsu_spu_strm_ack_cmplt[1] =	// msb  of cnt, 2 or 3.
+	(strmack_cmplt1_d1 & strmack_cmplt2_d1) |
+	(strmack_cmplt2_d1 & strmack_cmplt3_d1) |
+	(strmack_cmplt1_d1 & strmack_cmplt3_d1) ;
+
+/*dff   strmack_d1 (
+        .din  (strm_ack_cmplt), 
+  	.q    (lsu_spu_strm_ack_cmplt),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+	
+// Active as soon as it is visible in dfq byp ff.
+assign  inv_active_e = dfq_inv_vld ;      // evict/icache/strm-st
+//wire  st_atm_err ;
+// An atomic st is forced to punch a bubble in the pipe if
+// an error is encountered on the load. error en is not checked
+// at this point.
+/*assign  st_atm_err = 
+  ((|lsu_cpx_atm_st_err[1:0]) & lsu_cpx_pkt_atm_st_cmplt) ;*/
+
+assign  stwr_active_e = 
+  dfq_st_vld & dfq_local_inv  & ~memref_e &
+  ~lsu_cpx_pkt_atm_st_cmplt & ~lsu_cpx_pkt_binit_st ;
+// & ~lsu_cpx_pkt_stquad_pkt2 ;  // fix for ifill_pkt_vld -b[130]
+//  dfq_st_vld & local_inv & ~st_ack_rq_stb_d1 & ~memref_e & //st ack timing fix
+//  ~lsu_cpx_pkt_stquad_pkt2 // bug 2942
+
+assign  stdq_active_e = 
+  dfq_st_vld & 
+  //((~dfq_local_inv & (~st_atm_err | (st_atm_err & ~memref_e))) | //Bug 3624
+  ((~dfq_local_inv) | 
+   (dfq_local_inv & ~memref_e)) ;
+//  ((~local_inv & (~st_atm_err | (st_atm_err & ~memref_e))) | 
+//   (local_inv & (~st_ack_rq_stb_d1 & ~memref_e))) ;
+
+
+assign	dfq_st_cmplt = stdq_active_e | (inv_active_e & dfq_st_vld) ;
+
+wire	atm_st_cmplt ;
+assign  atm_st_cmplt = dfq_st_cmplt & lsu_cpx_pkt_atm_st_cmplt ; 
+assign  lsu_atm_st_cmplt_e = atm_st_cmplt ;
+
+assign  dcfill_active_e = dfq_ld_vld & ~memref_e ;
+
+//bug3753 - qualify ld*_fill_reset w/ dcfill_active & ~ignore_fill
+//          in qctl1 this is qual'ed w/ dfq_ld_vld
+assign  lsu_dcfill_active_e  =  dcfill_active_e & ~lsu_ignore_fill;
+//assign  lsu_dcfill_active_e  =  dcfill_active_e;
+
+assign  dva_svld_e = 
+  inv_active_e |      // evict/icache/strm-st
+  (dfq_st_vld & lsu_cpx_pkt_perror_dinv) |	// dtag parity error invalidation.
+  (dfq_local_inv & dfq_st_vld & // local st - atomic
+  lsu_cpx_pkt_atomic ) ;
+  //lsu_cpx_pkt_atomic & ~lsu_cpx_pkt_stquad_pkt2) ; // store quad pkt not present - cmp1_regr fail
+  //(local_inv & dfq_st_vld & // local st - stquad/atomic
+assign  l2fill_vld_e  = dcfill_active_e & 
+			~lsu_cpx_pkt_prefetch ; // prefetch will not fill
+
+assign	lsu_l2fill_vld = dcfill_active_e ;
+
+//=================================================================================================
+//  DFQ RD/WR CONTROL
+//=================================================================================================
+  
+//assign  cpx_inv =
+//  lsu_cpu_inv_data[`CPX_AX0_INV_DVLD]   |   // line 0
+//  lsu_cpu_inv_data[`CPX_AX1_INV_DVLD+4] |   // line 1
+//  lsu_cpu_inv_data[`CPX_AX0_INV_DVLD+7] |   // line 2
+//  lsu_cpu_inv_data[`CPX_AX1_INV_DVLD+11] ;  // line 3
+
+// All invalidates go into byp buffer
+assign  dfq_byp_ld_vld = lsu_dfq_byp_type[5] ;
+// local store inv path is separate.
+assign  dfq_byp_inv_vld = 
+       (lsu_dfq_byp_type[4] & dfq_invwy_vld)  	| // icache x-inv
+       (lsu_dfq_byp_type[3]    			| // evict
+       (lsu_dfq_byp_type[2] & ~local_pkt)   	| // sparc st-ack - non-local
+        lsu_dfq_byp_type[1] 			| // strm st-ack
+	(lsu_dfq_byp_type[2] & local_pkt & lsu_dfq_byp_binit_st)) &     
+				// blk init st invalidates L1
+        lsu_dfq_byp_cpx_inv ;         // local invalidate
+        //cpx_inv ;         // local invalidate
+
+// Local store which writes to cache
+//timing fix: 7/14/03 - to improve setup of dfq_st_vld and dfq_ld_vld and move the flop to qdp2 -
+//            to eventually improve dcache_fill_data timing
+//            add byp mux for cpuid in qctl2
+wire  [2:0]  dfq_byp_cpuid ;
+assign  dfq_byp_cpuid[2:0]  =  dfq_rd_vld_d1 ? lsu_dfq_rdata_cpuid[2:0] : 
+                                   cpx_spc_data_cx_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO] ;
+
+//assign  local_pkt =  &(const_cpuid[2:0] ~^ lsu_dfq_byp_cpuid[2:0]) ;
+assign  local_pkt =  &(const_cpuid[2:0] ~^ dfq_byp_cpuid[2:0]) ;
+assign  dfq_rdata_local_pkt =  &(const_cpuid[2:0] ~^ lsu_dfq_rdata_cpuid[2:0]) ;
+assign  dfq_byp_st_vld = lsu_dfq_byp_type[2] & local_pkt ;
+
+// Add ifill invalidate
+// screen cpx data which gets written to dfq
+assign  dfq_byp_vld = 
+(dfq_byp_ld_vld | dfq_byp_inv_vld | dfq_byp_st_vld) & 
+(dfq_rd_vld_d1 | (~dfq_rd_vld_d1 & ~dfq_wr_en))  ;  
+
+//assign  lsu_dfq_byp_vld  =  dfq_byp_vld;
+
+/*assign dfq_vld_reset =
+        reset | ((dcfill_active_e | inv_active_e | stdq_active_e) & 
+    ~dfq_vld_en & // dside pkt in waiting
+    ~lsu_ignore_fill &  // ldd
+    ~ld_ignore_sec  // secondary loads
+    ) ; */
+
+/*wire  ld_sec_rst, ld_sec_rst_d1 ;
+assign  ld_sec_rst = dcfill_active_e & ld_ignore_sec_last ;
+dff_s   secl_d1 (
+        .din    (ld_sec_rst), .q  (ld_sec_rst_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+/* phase 2 change
+assign dfq_vld_reset =
+    // dside pkt in waiting, ldd, secondary loads
+        reset | (dcfill_active_e & ~(dfq_vld_en | lsu_ignore_fill | (ld_ignore_sec & ~ld_ignore_sec_last))) |
+    // dside pkt in waiting
+          ((inv_active_e | stdq_active_e) & ~dfq_vld_en) ; 
+*/
+
+assign dfq_vld_reset =
+    // dside pkt in waiting, ldd, no need secondary loads waiting
+        reset | (dcfill_active_e & ~(dfq_vld_en | (lsu_ignore_fill & ~lsu_cpx_pkt_prefetch))) |
+    // dside pkt in waiting
+          ((inv_active_e | stdq_active_e) & ~dfq_vld_en) ; 
+   
+// vld is enabled only if both i and d side buffers are clear
+// for co-dependent events. co-dependent events are rare.
+wire    dfq_rd_advance_buf1 ;
+assign dfq_vld_en = dfq_byp_vld & 
+		(dfq_rd_advance_buf1 | 
+		(cpx_spc_data_cx_b144to140[`CPX_VLD] & vld_dfq_pkt & ~dfq_wr_en)) ;
+
+/* phase 2 change
+assign  dfq_byp_ff_en = 
+  (~dfq_byp_full |
+  ( dfq_byp_full & ((dcfill_active_e & ~(lsu_ignore_fill | ld_ignore_sec)) | 
+       (inv_active_e | stdq_active_e)))) ; 
+*/
+
+assign  dfq_byp_ff_en = 
+  (~dfq_byp_full |
+  ( dfq_byp_full & ((dcfill_active_e & ~lsu_ignore_fill) | 
+       (inv_active_e | stdq_active_e)))) ; 
+
+//bug4576: add sehold to the flop enable in qdp2
+assign lsu_dfq_byp_ff_en  =  sehold | dfq_byp_ff_en ;
+   
+   // i.e., byp currently filling.
+
+/*
+assign  dfq_byp_ff_en = 
+  (~dfq_byp_full |
+  (dfq_byp_full & (dcfill_active_e | inv_active_e | stdq_active_e) & ~(lsu_ignore_fill | ld_ignore_sec))) ; 
+  // i.e., byp currently filling.
+*/
+
+// dfq bypass valid
+//timing fix: 6/6/03: add duplicate flop for dfq_byp_ld_vld and dfq_byp_st_vld
+//timing fix: 10/3/03 - add separate flop for lsu_dfq_vld lsu_dfq_st_vld to dctl
+//bug4460:  qualify stream store ack w/ local packet - add local pkt flop
+dffre_s  #(10) dfq_vld (
+        .din({local_pkt,dfq_byp_st_vld,dfq_byp_vld,dfq_byp_vld,
+              dfq_byp_ld_vld,dfq_byp_inv_vld,dfq_byp_st_vld,
+              lsu_dfq_byp_cpx_inv,dfq_byp_ld_vld,dfq_byp_st_vld}),
+        .q  ({dfq_local_pkt,lsu_dfq_st_vld,lsu_dfq_vld,dfq_byp_full,
+              dfq_ld_vld,dfq_inv_vld,dfq_st_vld,
+              dfq_local_inv,lsu_qdp2_dfq_ld_vld,lsu_qdp2_dfq_st_vld}),
+//.din    ({dfq_byp_vld,dfq_byp_ld_vld,dfq_byp_inv_vld,dfq_byp_st_vld,cpx_inv,lsu_dfq_byp_cpx_inv}),
+//.q      ({dfq_byp_full,dfq_ld_vld,dfq_inv_vld,dfq_st_vld,local_inv,dfq_local_inv}),
+        .rst    (dfq_vld_reset),        .en     (dfq_vld_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                    
+
+//bug4057: kill diagnostic write if dfq has valid requests to l1d$
+//timing fix: 10/3/03 - add separate flop for lsu_dfq_vld
+//assign  lsu_dfq_vld  =  dfq_byp_full ;
+
+assign  lsu_dfq_ld_vld  =  dfq_ld_vld;
+//timing fix: 9/29/03 - instantiate buffer for dfq_st_vld to dctl
+//timing fix: 10/3/03 - remove buffer and add separate flop
+//assign  lsu_dfq_st_vld  =  dfq_st_vld;
+//bw_u1_buf_30x UZsize_lsu_dfq_st_vld_buf1 ( .a(dfq_st_vld), .z(lsu_dfq_st_vld) );
+assign  lsu_dfq_ldst_vld  =  lsu_qdp2_dfq_ld_vld | lsu_qdp2_dfq_st_vld;
+
+
+// Flop invalidate bits
+dffe_s  #(12) dfq_inv (
+        .din    ({lsu_cpu_inv_data_b13to9,lsu_cpu_inv_data_b7to2,lsu_cpu_inv_data_b0}),
+        .q    ({dfq_inv_data_b13to9,dfq_inv_data_b7to2,dfq_inv_data_b0}),
+        //.din    (lsu_cpu_inv_data[13:0]),
+        //.q      (dfq_inv_data[13:0]),
+        .en     (dfq_vld_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );                                    
+
+
+/*
+assign  lsu_st_ack_addr_b54[0] = dfq_inv_data[4] | dfq_inv_data[11] ;
+assign  lsu_st_ack_addr_b54[1] = dfq_inv_data[7] | dfq_inv_data[11] ;
+
+
+assign  st_wrwy_sel[0] = ~lsu_st_ack_addr_b54[1] & ~lsu_st_ack_addr_b54[0] ;
+assign  st_wrwy_sel[1] = ~lsu_st_ack_addr_b54[1] &  lsu_st_ack_addr_b54[0] ;
+assign  st_wrwy_sel[2] =  lsu_st_ack_addr_b54[1] & ~lsu_st_ack_addr_b54[0] ;
+assign  st_wrwy_sel[3] =  lsu_st_ack_addr_b54[1] &  lsu_st_ack_addr_b54[0] ;
+
+assign  lsu_st_ack_wrwy[1:0]   = 
+st_wrwy_sel[0] ? dfq_inv_data[`CPX_AX0_INV_WY_HI:`CPX_AX0_INV_WY_LO] :
+  st_wrwy_sel[1] ? dfq_inv_data[`CPX_AX1_INV_WY_HI+4:`CPX_AX1_INV_WY_LO+4] :
+    st_wrwy_sel[2] ? dfq_inv_data[`CPX_AX0_INV_WY_HI+7:`CPX_AX0_INV_WY_LO+7] :
+      st_wrwy_sel[3] ? dfq_inv_data[`CPX_AX1_INV_WY_HI+11:`CPX_AX1_INV_WY_LO+11] :
+            2'bxx ;
+*/
+
+// cpx invalidate data obtained via the cfq.
+// b[8[ and b[1] are unused
+//8/28/03: vlint cleanup - remove cpx_cpu_inv_data and use dfq_inv_data directly
+//assign  cpx_cpu_inv_data[13:0] =  {dfq_inv_data_b13to9,1'b0,dfq_inv_data_b7to2,1'b0,dfq_inv_data_b0} ;
+//assign  cpx_cpu_inv_data[13:0] =  dfq_inv_data[13:0] ;
+
+// write control set up.   
+// All cpx pkts are written.
+// - unwanted pkts are explicity overwritten by next incoming pkt.
+
+   /*wire stb_cam_hit_w2;
+   
+dff_s #(1)  stb_cam_hit_stg_w2  (
+  .din (stb_cam_hit), 
+  .q   (stb_cam_hit_w2),
+  .clk (clk), 
+  .se  (1'b0), .si (), .so ()
+  ); */
+
+// Need to include error pkt !!
+//8/25/03: add error type to dfq_wr_en, dfq_rd_advance
+//8/25/03: add fwd req to L1I$ for RAMTEST to dfq_wr_en, dfq_rd_dvance
+assign	vld_dfq_pkt = 
+cpx_int_type | cpx_ld_type | cpx_ifill_type | cpx_evict_type | cpx_st_ack_type | cpx_strm_st_ack_type | cpx_error_type | cpx_fwd_req_ic ;
+
+//NOTE: restore cpx_inv qualification after adding cpx_inv part of dfq read - done
+
+assign  dfq_wr_en = 
+  // local st wr which writes to cache is put in dfq if cam-hit occurs.
+  //(cpx_local_st_ack_type & stb_cam_hit_w2 & cpx_inv) |
+  //(cpx_local_st_ack_type & stb_cam_hit_w2 & lsu_dfq_byp_cpx_inv) |
+  //(cpx_local_st_ack_type) |  //bug2623
+  (cpx_st_ack_type) |
+  // always write under these conditions
+  //(vld_dfq_pkt & (dfq_vld_entry_exists | dfq_rptr_vld_d1)) | 
+  (vld_dfq_pkt & (dfq_vld_entry_exists_w | dfq_rptr_vld_d1)) | 
+  //(cpx_spc_data_cx_b144to140[`CPX_VLD] & (dfq_vld_entry_exists | dfq_rptr_vld_d1)) | 
+  // interrupts always write to queue
+    cpx_int_type |
+  // error type or forward request to l1i$ - bypass
+   ((cpx_error_type | cpx_fwd_req_ic) & ifu_lsu_ibuf_busy)  |
+  // selectively write under these conditions
+   ((cpx_ld_type & ~dfq_byp_ff_en)          | 
+    (cpx_ld_type &  cpx_spc_data_cx_b133 & ifu_lsu_ibuf_busy)  |
+    (cpx_ifill_type & ifu_lsu_ibuf_busy)          |
+    (cpx_ifill_type & cpx_spc_data_cx_b133 & ~dfq_byp_ff_en) |
+    // the evictions/acks will wr to the dfq if any buffer is full
+    ((cpx_evict_type | cpx_st_ack_type | cpx_strm_st_ack_type) & (ifu_lsu_ibuf_busy | ~dfq_byp_ff_en))) ;
+      
+assign  dfq_wptr_new_w_wrap[5:0]  = dfq_wptr_w_wrap[5:0] + {5'b00000, dfq_wr_en} ;
+//assign  dfq_wptr_vld = dfq_wr_en ;
+// every pkt is to be written to dfq. The pkt may be rejected by not updating
+// write ptr based on certain conditions.
+assign  dfq_wptr_vld = cpx_spc_data_cx_b144to140[`CPX_VLD] ;
+
+dffre_s  #(6) dfq_wptr_ff (
+        .din    (dfq_wptr_new_w_wrap[5:0]), .q  (dfq_wptr_w_wrap[5:0]),
+        .rst    (reset), .en (dfq_wr_en), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//1/20/04: mintime fix - add minbuf to dfq_wptr
+//assign  dfq_wptr[4:0] = dfq_wptr_w_wrap[4:0] ;
+
+wire  [4:0]  dfq_wptr_minbuf ;
+bw_u1_minbuf_5x UZfix_dfq_wptr_b0_minbuf (.a(dfq_wptr_w_wrap[0]), .z(dfq_wptr_minbuf[0]));
+bw_u1_minbuf_5x UZfix_dfq_wptr_b1_minbuf (.a(dfq_wptr_w_wrap[1]), .z(dfq_wptr_minbuf[1]));
+bw_u1_minbuf_5x UZfix_dfq_wptr_b2_minbuf (.a(dfq_wptr_w_wrap[2]), .z(dfq_wptr_minbuf[2]));
+bw_u1_minbuf_5x UZfix_dfq_wptr_b3_minbuf (.a(dfq_wptr_w_wrap[3]), .z(dfq_wptr_minbuf[3]));
+bw_u1_minbuf_5x UZfix_dfq_wptr_b4_minbuf (.a(dfq_wptr_w_wrap[4]), .z(dfq_wptr_minbuf[4]));
+
+bw_u1_buf_10x UZsize_dfq_wptr_b0_buf2 ( .a(dfq_wptr_minbuf[0]), .z(dfq_wptr[0]) );
+bw_u1_buf_10x UZsize_dfq_wptr_b1_buf2 ( .a(dfq_wptr_minbuf[1]), .z(dfq_wptr[1]) );
+bw_u1_buf_10x UZsize_dfq_wptr_b2_buf2 ( .a(dfq_wptr_minbuf[2]), .z(dfq_wptr[2]) );
+bw_u1_buf_10x UZsize_dfq_wptr_b3_buf2 ( .a(dfq_wptr_minbuf[3]), .z(dfq_wptr[3]) );
+bw_u1_buf_10x UZsize_dfq_wptr_b4_buf2 ( .a(dfq_wptr_minbuf[4]), .z(dfq_wptr[4]) );
+
+// Bit3 of both pointers is a wrap bit. Including this in the compare
+// will tell us whether the queue is empty or not. It is assumed that
+// the wptr will never runover the rptr because of flow control.
+// This will have to be fine-tuned once dfq is accurate !!!
+assign  dfq_vld_entry_exists = (dfq_rptr_new_w_wrap[5:0] != dfq_wptr_w_wrap[5:0]) ;
+
+assign  dfq_vld_entry_exists_w = (dfq_rptr_w_wrap[5:0] != dfq_wptr_w_wrap[5:0]) ;
+
+// dfq is read iff bypass flop is empty and valid entry in dfq available. 
+// i.e., we need to initialize bypass ff such that it always contains
+// latest entry.
+//  (dfq_rptr_vld_d1 & (~i_and_d_codepend | (i_and_d_codepend & dfq_rd_advance))) |
+
+//assign  lsu_ifill_pkt_vld =   
+//  (dfq_rptr_vld_d1 & ~(dfq_st_ack_type & lsu_dfq_byp_stack_dcfill_vld) & (~i_and_d_codepend | (i_and_d_codepend & dfq_byp_ff_en))) |
+//        (cpx_spc_data_cx[`CPX_VLD] & ~dfq_wr_en) ;
+//
+//  (dfq_rptr_vld_d1 & ~(dfq_st_ack_type & lsu_dfq_byp_stack_dcfill_vld) & ~ifill_pkt_fwd_done_d1) |
+//
+//  (dfq_rptr_vld_d1 & ~(lsu_dfq_rdata_st_ack_type & lsu_dfq_rdata_stack_dcfill_vld) & ~ifill_pkt_fwd_done_d1) | // bug:2767
+//  change lsu_dfq_rdata_stack_dcfill_vld from b[87] to b[151] in the top level 
+//
+//timing fix: 6/16/03 - fix for ifill_pkt_vld - use b130 if store_ack_dcfill_vld=1
+//            change lsu_dfq_rdata_stack_dcfill_vld from b[151] to b[130] in the top level 
+//  (dfq_rptr_vld_d1 & ~(lsu_dfq_rdata_st_ack_type & dfq_rdata_local_pkt & lsu_dfq_rdata_stack_dcfill_vld) & ~ifill_pkt_fwd_done_d1) |
+//
+//bug3657 - kill ifill vld in bypass path when cpxtype=fp/fwd_reply
+//NOTE: stream loads should also be included
+//bug5080 - kill ifill vld in bypass path when cpxtype=strm load - similar to bug3657
+//          kill bypass when dfq_rptr_vld_d1=1
+//  (cpx_spc_data_cx_b144to140[`CPX_VLD] & ~(dfq_wr_en | cpx_fwd_rply_type | cpx_fp_type)) ;
+//
+//bug6372: ifill dcache x-inv causes incorrect dcache index to be invalidated.
+//         - this occurs 'cos the imiss index gets overwritten by another imiss to the same thread.
+//           the dcache x-inv(head of dfq) is stalled in dfq 'cos of load in bypass flop being stalled by memref_e=1
+//           but the ifill pkt vld is set to 1 and ifu starts issuing the next imiss for same thread
+//         
+//  (dfq_rptr_vld_d1 & ~(lsu_dfq_rdata_st_ack_type & lsu_dfq_rdata_stack_dcfill_vld) & ~ifill_pkt_fwd_done_d1) |
+
+wire   ifill_pkt_fwd_done,ifill_pkt_fwd_done_d1;
+wire   ifill_dinv_head_of_dfq_pend ;
+
+
+assign  ifill_dinv_head_of_dfq_pend  =  lsu_dfq_rdata_type[4] & lsu_dfq_rdata_invwy_vld & ~dfq_byp_ff_en ;
+
+assign  lsu_ifill_pkt_vld =   
+  (dfq_rptr_vld_d1 & ~(lsu_dfq_rdata_st_ack_type & lsu_dfq_rdata_stack_dcfill_vld) & 
+                     ~ifill_dinv_head_of_dfq_pend &
+                     ~ifill_pkt_fwd_done_d1 ) |
+  (~dfq_rptr_vld_d1 & cpx_spc_data_cx_b144to140[`CPX_VLD] & ~(dfq_wr_en | cpx_fwd_rply_type | cpx_fp_type)) ;
+
+// this signal acts as a mask i.e. fill valid will be asserted until the ifu_lsu_ibuf_busy=0. But certain packets need
+// both busy=0 and memref_e=0 - in which case it is safer to mask until the dfq_rd_advance=1.
+
+//bug5309: add reset to the flop; x's get recycled from flop o/p until a dfq_rd_advance occurs i.e. flop reset
+//         after first ifill; failed in cmp1.92 cmp8 regression w/ vcs7.1
+
+assign  ifill_pkt_fwd_done  =  ~reset & 
+                               (((dfq_rptr_vld_d1 & ~ifu_lsu_ibuf_busy & ~ifill_dinv_head_of_dfq_pend) | 
+                                ifill_pkt_fwd_done_d1)   // set|hold
+                                & ~dfq_rd_advance);				                  // reset
+
+dff_s  #(1) ifill_pkt_fwd_done_ff (
+        .din    (ifill_pkt_fwd_done),
+        .q      (ifill_pkt_fwd_done_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+
+// Note that this becomes valid in cycle of read. Flush will be continuously read
+// out of dfq until all intermmediate buffers are clear.
+
+// timing fix: 06/04/03: dfq_rd_advance uses byp_mux output; instead use dfq read output
+//             i.e. dfq_rd_advance is valid only when there is a valid entry in dfq
+//             it is already qual'ed w/ dfq_rd_vld_d1 to determine this.
+
+//assign  dfq_ld_type     = lsu_dfq_byp_type[5] ;
+//assign  dfq_ifill_type    = lsu_dfq_byp_type[4] ;
+//assign  dfq_evict_type    = lsu_dfq_byp_type[3] ;
+//assign  dfq_st_ack_type   = lsu_dfq_byp_type[2] ;
+//assign  dfq_strm_st_ack_type  = lsu_dfq_byp_type[1] ;
+//assign  dfq_int_type    = lsu_dfq_byp_type[0] ;
+
+assign  dfq_ld_type     = lsu_dfq_rdata_type[5] ;
+assign  dfq_ifill_type    = lsu_dfq_rdata_type[4] ;
+assign  dfq_evict_type    = lsu_dfq_rdata_type[3] ;
+assign  dfq_st_ack_type   = lsu_dfq_rdata_type[2] ;
+assign  dfq_strm_st_ack_type  = lsu_dfq_rdata_type[1] ;
+assign  dfq_int_type    = lsu_dfq_rdata_type[0] ;
+
+//8/25/03: add error type to dfq_wr_en, dfq_rd_advance
+assign  dfq_error_type    = (lsu_dfq_rdata_rq_type[3:0]==4'b1100) ;
+//8/25/03: add fwd req to L1I$ for RAMTEST to dfq_wr_en, dfq_rd_dvance
+assign  dfq_fwd_req_ic_type  = (lsu_dfq_rdata_rq_type[3:0]==4'b1010) & lsu_dfq_rdata_b103;
+
+assign  dfq_invwy_vld     = lsu_dfq_byp_invwy_vld ;
+
+// if the there is a co-dependent event, then the ifu will not
+// be signalled vld until rd_advance is asserted.
+//assign  i_and_d_codepend = 
+//    ((dfq_ld_type | dfq_ifill_type) &  dfq_invwy_vld)   |
+//    (dfq_evict_type | dfq_st_ack_type | dfq_strm_st_ack_type) |
+//    dfq_int_type ;
+
+//NOTE: restore cpx_inv qualification after adding cpx_inv part of dfq read - done
+//assign  st_rd_advance  =  dfq_byp_st_vld & (~lsu_dfq_byp_cpx_inv | (lsu_dfq_byp_cpx_inv & ~stb_cam_hit_w2)) & dfq_byp_ff_en;
+//assign  st_rd_advance  =  dfq_byp_st_vld & dfq_byp_ff_en; // bug:2770
+//                          (dfq_byp_st_vld &  lsu_dfq_rdata_stack_iinv_vld & ~ifu_lsu_ibuf_busy) ; // bug:2775
+
+// timing fix: 06/04/03: dfq_rd_advance uses byp_mux output; instead use dfq read output
+//             i.e. dfq_rd_advance is valid only when there is a valid entry in dfq
+//             it is already qual'ed w/ dfq_rd_vld_d1 to determine this.
+
+
+assign  st_rd_advance  =  
+        (dfq_st_ack_type & dfq_rdata_local_pkt & ~lsu_dfq_rdata_stack_iinv_vld & dfq_byp_ff_en) |
+        (dfq_st_ack_type & dfq_rdata_local_pkt &  lsu_dfq_rdata_stack_iinv_vld & ~ifu_lsu_ibuf_busy & dfq_byp_ff_en) ;
+
+// The pointer is advanced based on pre-flop bypass data.
+
+wire inv_clear_d1 ;
+dff_s  #(1) invclr_d1 (
+        .din    (ifu_lsu_inv_clear),
+        .q      (inv_clear_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//---
+// Dealing with skid involving invalidate clear.
+// 1. No stall asserted. If the int is immed. preceeded by an inv,
+// then the the inv will not be visible thru inv_clear. For this
+// reason, int will always wait an additional cycle before examining
+// inv_clear.
+// 2. In case int has been dispatched to the ifu with stall asserted,
+// stalls are conditionally inserted. 
+// Note : interrupts are always written into dfq.
+//---
+
+wire	dfq_rd_advance_d1 ;
+dff_s   rda_d1 (
+        .din    (dfq_rd_advance),
+        .q      (dfq_rd_advance_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// Begin Bug 5583
+wire	dfq_int_type_d1 ;
+wire	int_skid_c1,int_skid_c2;
+wire	int_skid_stall ;
+dff_s   itype_d1 (
+        .din    (dfq_int_type),
+        .q      (dfq_int_type_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// decision made to issue intrpt from dfq even though 
+// intr-clear was not high, thus introduce stall for
+// 2 more cycles.
+assign int_skid_c1 = 
+	dfq_int_type_d1 & dfq_rd_advance_d1 & ~inv_clear_d1 ;
+
+dff_s   iskid_c2 (
+        .din    (int_skid_c1),
+        .q      (int_skid_c2),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	int_skid_stall = int_skid_c1 | int_skid_c2 ;
+
+// End Bug 5583
+
+// timing fix: 06/04/03: dfq_rd_advance uses byp_mux output; instead use dfq read output
+//             i.e. dfq_rd_advance is valid only when there is a valid entry in dfq
+//             it is already qual'ed w/ dfq_rd_vld_d1 to determine this.
+//8/25/03: add error type to dfq_wr_en, dfq_rd_advance
+//8/25/03: add fwd req to L1I$ for RAMTEST to dfq_wr_en, dfq_rd_dvance
+
+assign  dfq_rd_advance   =  
+  // local st which writes to cache cannot advance if simultaneous cam hit. 
+  //((dfq_byp_st_vld & (~cpx_inv | (cpx_inv & ~stb_cam_hit_w2)) & dfq_byp_ff_en)  | 
+  (st_rd_advance |
+  // advance beyond a dside ld if it can be written to the byp ff
+  (dfq_ld_type & ~lsu_dfq_rdata_invwy_vld & dfq_byp_ff_en) |
+  // advance beyond a dside & iside ld if it can be written to the byp ff/ibuf clr
+  (dfq_ld_type &  lsu_dfq_rdata_invwy_vld & (dfq_byp_ff_en & ~ifu_lsu_ibuf_busy))   |
+  // advance beyond a iside ifill if it can be written to the ibuf
+  (dfq_ifill_type & ~lsu_dfq_rdata_invwy_vld & ~ifu_lsu_ibuf_busy)      |
+  // advance beyond a dside & iside ifill if it can be written to the byp ff/ibuf clr
+  (dfq_ifill_type &  lsu_dfq_rdata_invwy_vld & (dfq_byp_ff_en & ~ifu_lsu_ibuf_busy))  |
+  // any form of invalidate could invalidate both i and dside.
+  ((dfq_evict_type | (dfq_st_ack_type & ~dfq_rdata_local_pkt) | dfq_strm_st_ack_type) & 
+        (dfq_byp_ff_en & ~ifu_lsu_ibuf_busy)) |
+  // interrupts and flushes have to ensure invalidates are visible in caches.
+  // interrupts do not enter d-side byp buffer.  flush needs to look at inv clear.
+  (dfq_int_type & (dfq_byp_ff_en & ~ifu_lsu_ibuf_busy & ((inv_clear_d1 & ~dfq_rd_advance_d1) | dfq_stall_d1))) | // Bug 3820.
+  //(dfq_int_type & (dfq_byp_ff_en & ~ifu_lsu_ibuf_busy & ((inv_clear_d1 & ~dfq_rd_advance_d1) | dfq_stall_d2))) | // Bug 3820.
+  ((dfq_error_type | dfq_fwd_req_ic_type) & ~ifu_lsu_ibuf_busy))
+    & dfq_rptr_vld_d1 & ~reset ;
+
+//timing fix: 9/16/03 - dfq_rd_advance is late signal; use it as mux select to pick the correct read pointer
+//            add duplicate signal for dfq_rd_advance - has FO16 - adds 3inv to this path
+//            fix for dfq_read -> dfq_rd_advance -> dfq_rptr to dfq
+wire   dfq_rd_advance_dup ;
+assign dfq_rd_advance_dup =  dfq_rd_advance ;
+
+//timing fix: 9/29/03 - instantiate buffer for dfq_rd_advance to dfq_vld_en
+bw_u1_buf_30x UZsize_dfq_rd_advance_buf1 ( .a(dfq_rd_advance), .z(dfq_rd_advance_buf1) );
+
+wire	local_flush ;
+assign	local_flush = lsu_dfq_byp_type[0] & lsu_dfq_byp_flush & local_pkt & dfq_rd_advance ;
+
+wire	[3:0]	dfq_flsh_cmplt ;
+assign	dfq_flsh_cmplt[0] = local_flush & ~lsu_dfq_byp_tid[1] & ~lsu_dfq_byp_tid[0] ;
+assign	dfq_flsh_cmplt[1] = local_flush & ~lsu_dfq_byp_tid[1] &  lsu_dfq_byp_tid[0] ;
+assign	dfq_flsh_cmplt[2] = local_flush &  lsu_dfq_byp_tid[1] & ~lsu_dfq_byp_tid[0] ;
+assign	dfq_flsh_cmplt[3] = local_flush &  lsu_dfq_byp_tid[1] &  lsu_dfq_byp_tid[0] ;
+
+dff_s  #(4) flshcmplt (
+        .din    (dfq_flsh_cmplt[3:0]),
+        .q      (lsu_dfq_flsh_cmplt[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// Check for extra bubbles in pipeline.
+//timing fix: 10/3/03 - use dfq_rd_advance as mux select
+//assign  dfq_rptr_new_w_wrap[5:0] =  dfq_rptr_w_wrap[5:0] + {5'b00000, dfq_rd_advance} ;
+wire  [5:0]  dfq_rptr_new_w_wrap_inc ;
+assign  dfq_rptr_new_w_wrap_inc[5:0] =  dfq_rptr_w_wrap[5:0] + 6'b000001 ;
+assign  dfq_rptr_new_w_wrap[5:0]  =  dfq_rd_advance ? dfq_rptr_new_w_wrap_inc[5:0] : dfq_rptr_w_wrap[5:0] ;
+
+// The dfq will always read as long as there is a valid entry.
+// ** Design note : If dfq output is held at latches, this is not longer required !! **
+//assign  dfq_rptr_vld  =   dfq_vld_entry_exists ;
+assign  dfq_rptr_vld  =   dfq_vld_entry_exists_w ;
+
+wire   dfq_rptr_vld_w_d1;
+
+
+dff_s   rvld_stgd1_new (
+        .din    (dfq_vld_entry_exists), .q  (dfq_vld_entry_exists_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+dff_s   rvld_stgd1 (
+        .din    (dfq_rptr_vld), .q  (dfq_rptr_vld_w_d1),
+        //.din    (dfq_rptr_vld), .q  (dfq_rptr_vld_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+//dff   rdad_stgd1 (
+//        .din    (dfq_rd_advance), .q  (dfq_rd_advance_d1),
+//        .clk  (clk),
+//        .se     (1'b0),       .si (),          .so ()
+//        ); 
+
+dffre_s  #(6) dfq_rptr_ff (
+        .din    (dfq_rptr_new_w_wrap[5:0]), .q  (dfq_rptr_w_wrap[5:0]),
+        .rst    (reset), .en (dfq_rd_advance), .clk (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+assign  dfq_rptr_vld_d1 = dfq_rptr_vld_w_d1 & dfq_vld_entry_exists_d1;
+assign  dfq_rd_vld_d1 = dfq_rptr_vld_d1 ;
+//bug4576: add sehold to the dfq_rdata mux select
+assign  lsu_dfq_rd_vld_d1 = sehold | dfq_rptr_vld_d1 ;
+
+//timing fix: 9/16/03 - dfq_rd_advance is late signal; use it as mux select to pick the correct read pointer
+//            add duplicate signal for dfq_rd_advance - has FO16 - adds 3inv to this path
+//            fix for dfq_read -> dfq_rd_advance -> dfq_rptr to dfq
+//assign  dfq_rptr[4:0] = dfq_rptr_w_wrap[4:0] + {4'b0000, dfq_rd_advance} ;
+
+//1/20/04: mintime fix - add minbuf to dfq_rptr_w_wrap in dfq_rptr
+wire  [4:0]  dfq_rptr_w_wrap_minbuf ;
+
+bw_u1_minbuf_5x UZfix_dfq_rptr_b0 (.a(dfq_rptr_w_wrap[0]), .z(dfq_rptr_w_wrap_minbuf[0]));
+bw_u1_minbuf_5x UZfix_dfq_rptr_b1 (.a(dfq_rptr_w_wrap[1]), .z(dfq_rptr_w_wrap_minbuf[1]));
+bw_u1_minbuf_5x UZfix_dfq_rptr_b2 (.a(dfq_rptr_w_wrap[2]), .z(dfq_rptr_w_wrap_minbuf[2]));
+bw_u1_minbuf_5x UZfix_dfq_rptr_b3 (.a(dfq_rptr_w_wrap[3]), .z(dfq_rptr_w_wrap_minbuf[3]));
+bw_u1_minbuf_5x UZfix_dfq_rptr_b4 (.a(dfq_rptr_w_wrap[4]), .z(dfq_rptr_w_wrap_minbuf[4]));
+
+wire  [4:0]  dfq_rptr_inc ;
+assign dfq_rptr_inc[4:0]  =  dfq_rptr_w_wrap[4:0] + 5'b00001 ;
+assign  dfq_rptr[4:0] = dfq_rd_advance_dup ? dfq_rptr_inc[4:0] : dfq_rptr_w_wrap_minbuf[4:0] ;
+//assign  dfq_rptr[4:0] = dfq_rd_advance_dup ? dfq_rptr_inc[4:0] : dfq_rptr_w_wrap[4:0] ;
+
+// Determine whether cfq has crossed high-water mark. IFU must switchout all threads
+// for every cycle that this is valid.
+// Need to change wptr size once new cfq array description incorporated.
+// Wrap bit may not be needed !!!
+wire  [5:0] dfq_vld_entries ;
+assign  dfq_vld_entries[5:0] = (dfq_wptr_w_wrap[5:0] - dfq_rptr_w_wrap[5:0]) ;
+/*assign  dfq_vld_entries[3:0] =
+  (dfq_rptr_w_wrap[4] ^ dfq_wptr_w_wrap[4]) ? 
+  (dfq_rptr_w_wrap[3:0] - dfq_wptr_w_wrap[3:0]) : (dfq_wptr_w_wrap[3:0] - dfq_rptr_w_wrap[3:0]) ;*/
+
+// High water mark conservatively put at 16-4 = 12
+assign	dfq_stall = (dfq_vld_entries[5:0] >= 6'd4) ;
+assign  lsu_ifu_stallreq = 
+	dfq_stall |  int_skid_stall | lsu_tlbop_force_swo ; 
+	//dfq_stall | dfq_stall_d1 | dfq_stall_d2 | int_skid_stall | lsu_tlbop_force_swo ; 
+
+dff_s   dfqst_d1 (
+        .din  (dfq_stall), .q  (dfq_stall_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//=================================================================================================
+//  INVALIDATE PROCESSING
+//=================================================================================================
+
+assign  dva_snp_addr_e[4:0]  =  
+  lsu_cpx_pkt_ifill_type ? imiss_inv_set_index[6:2] : {lsu_cpx_pkt_inv_pa[4:0]} ; 
+
+//bug3356 - b4 never changed to invalidate the 2nd offset of the i$ fill.
+//          l2 now generates b4 in b129 of cpx ifill packet. for ifill pkt
+//          b[129] = 0 for 1st ifill packet, b[129]=1 for 2nd ifill packet.
+
+wire    cpxpkt_ifill_b4 ;
+assign  cpxpkt_ifill_b4  =  lsu_cpx_pkt_atm_st_cmplt & lsu_cpx_pkt_ifill_type ;
+
+assign  imiss_dcd_b54[0] = ~imiss_inv_set_index[1] & ~cpxpkt_ifill_b4 ;
+assign  imiss_dcd_b54[1] = ~imiss_inv_set_index[1] &  cpxpkt_ifill_b4 ;
+assign  imiss_dcd_b54[2] =  imiss_inv_set_index[1] & ~cpxpkt_ifill_b4 ;
+assign  imiss_dcd_b54[3] =  imiss_inv_set_index[1] &  cpxpkt_ifill_b4 ;
+
+wire  [3:0] perror_dcd_b54 ;
+assign  perror_dcd_b54[0] = ~lsu_cpx_pkt_perror_set[1] & ~lsu_cpx_pkt_perror_set[0] ;
+assign  perror_dcd_b54[1] = ~lsu_cpx_pkt_perror_set[1] &  lsu_cpx_pkt_perror_set[0] ;
+assign  perror_dcd_b54[2] =  lsu_cpx_pkt_perror_set[1] & ~lsu_cpx_pkt_perror_set[0] ;
+assign  perror_dcd_b54[3] =  lsu_cpx_pkt_perror_set[1] &  lsu_cpx_pkt_perror_set[0] ;
+
+wire   [3:0]           dva_snp_set_vld_e;      // Lower 2b of cache set index - decoded
+wire   [1:0]           dva_snp_wy0_e ;         // way for addr<5:4>=00
+wire   [1:0]           dva_snp_wy1_e ;         // way for addr<5:4>=01
+wire   [1:0]           dva_snp_wy2_e ;         // way for addr<5:4>=10
+wire   [1:0]           dva_snp_wy3_e ;         // way for addr<5:4>=11
+
+
+
+/*
+assign  dva_snp_set_vld_e[0] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[0] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[0] : cpx_cpu_inv_data[`CPX_AX0_INV_DVLD] ;
+assign  dva_snp_set_vld_e[1] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[1] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[1] : cpx_cpu_inv_data[`CPX_AX1_INV_DVLD+4] ;
+assign  dva_snp_set_vld_e[2] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[2] :
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[2] : cpx_cpu_inv_data[`CPX_AX0_INV_DVLD+7] ;
+assign  dva_snp_set_vld_e[3] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[3] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[3] : cpx_cpu_inv_data[`CPX_AX1_INV_DVLD+11] ; 
+
+assign  dva_snp_wy0_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : cpx_cpu_inv_data[`CPX_AX0_INV_WY_HI:`CPX_AX0_INV_WY_LO];
+assign  dva_snp_wy1_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : cpx_cpu_inv_data[`CPX_AX1_INV_WY_HI+4:`CPX_AX1_INV_WY_LO+4];
+assign  dva_snp_wy2_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : cpx_cpu_inv_data[`CPX_AX0_INV_WY_HI+7:`CPX_AX0_INV_WY_LO+7];
+assign  dva_snp_wy3_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : cpx_cpu_inv_data[`CPX_AX1_INV_WY_HI+11:`CPX_AX1_INV_WY_LO+11];
+*/
+
+wire    stack_type_dcfill_vld,
+        stack_type_dcfill_vld_d1;
+//assign  stack_type_dcfill_vld  =  dfq_st_ack_type & lsu_dfq_byp_stack_dcfill_vld; // bug 2767
+//--------------------------------------------------------------
+// st_ack_type  local_pkt   b[87]  dcfill_vld==b[151]
+//--------------------------------------------------------------
+//   1           0          0          -      pkt not modified
+//   1           0          1          -      pkt not modified
+//--------------------------------------------------------------
+//   1           1          0          0      pkt not modified
+//   1           1          0          1      pkt modified
+//--------------------------------------------------------------
+//   1           1          1          0      pkt not modified  <---using b[87] will fail even w/ local pkt qual; hence use b[151]
+//   1           1          1          1      pkt modified 
+//--------------------------------------------------------------
+
+// 4/7/03: set dcfill_vld only for local dcache data write and not for invalidate
+//         atomic and bis do not write dcache and hence dont set dcfill_vld
+assign  stack_type_dcfill_vld  =  lsu_dfq_byp_type[2] & local_pkt & lsu_dfq_byp_cpx_inv & ~(lsu_dfq_byp_atm | lsu_dfq_byp_binit_st) ;
+
+wire  [1:0]  lsu_dfq_byp_stack_adr_b54_d1,
+             lsu_dfq_byp_stack_wrway_d1;
+
+// bug3375: add enable to this flop - dfq_vld_en
+dffe_s #(5)  dfq_by_wrway_ad54_ff (
+        .din    ({stack_type_dcfill_vld,lsu_dfq_byp_stack_adr_b54[1:0],lsu_dfq_byp_stack_wrway[1:0]}),
+        .q      ({stack_type_dcfill_vld_d1,lsu_dfq_byp_stack_adr_b54_d1[1:0],lsu_dfq_byp_stack_wrway_d1[1:0]}),
+        .en     (dfq_vld_en),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//cpx_cpu_inv_data[13:0] =  {dfq_inv_data_b13to9,1'b0,dfq_inv_data_b7to2,1'b0,dfq_inv_data_b0} 
+//CPX_AX0_INV_DVLD 0
+//CPX_AX0_INV_WY_LO 2
+//CPX_AX0_INV_WY_HI 3
+//CPX_AX1_INV_DVLD 0
+//CPX_AX1_INV_WY_LO 1
+//CPX_AX1_INV_WY_HI 2
+
+assign  dva_snp_set_vld_e[0] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[0] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[0] : 
+     stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b00) : dfq_inv_data_b0 ;
+     //stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b00) : cpx_cpu_inv_data[`CPX_AX0_INV_DVLD] ;
+assign  dva_snp_set_vld_e[1] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[1] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[1] : 
+     stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b01) : dfq_inv_data_b7to2[4] ;
+     //stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b01) : cpx_cpu_inv_data[`CPX_AX1_INV_DVLD+4] ;
+assign  dva_snp_set_vld_e[2] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[2] :
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[2] : 
+     stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b10) : dfq_inv_data_b7to2[7] ;
+     //stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b10) : cpx_cpu_inv_data[`CPX_AX0_INV_DVLD+7] ;
+assign  dva_snp_set_vld_e[3] = 
+lsu_cpx_pkt_ifill_type ? imiss_dcd_b54[3] : 
+  lsu_cpx_pkt_perror_dinv ? perror_dcd_b54[3] : 
+      stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b11) : dfq_inv_data_b13to9[11] ;
+      //stack_type_dcfill_vld_d1 ? (lsu_dfq_byp_stack_adr_b54_d1[1:0]==2'b11) : cpx_cpu_inv_data[`CPX_AX1_INV_DVLD+11] ; 
+
+assign  dva_snp_wy0_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : 
+   stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : dfq_inv_data_b7to2[3:2] ;
+   //stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : cpx_cpu_inv_data[`CPX_AX0_INV_WY_HI:`CPX_AX0_INV_WY_LO] ;
+assign  dva_snp_wy1_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : 
+   stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : dfq_inv_data_b7to2[6:5] ;
+   //stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : cpx_cpu_inv_data[`CPX_AX1_INV_WY_HI+4:`CPX_AX1_INV_WY_LO+4] ;
+assign  dva_snp_wy2_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : 
+   stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : dfq_inv_data_b13to9[10:9] ;
+   //stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : cpx_cpu_inv_data[`CPX_AX0_INV_WY_HI+7:`CPX_AX0_INV_WY_LO+7] ;
+assign  dva_snp_wy3_e[1:0]   = 
+lsu_cpx_pkt_ifill_type ? lsu_cpx_pkt_invwy[1:0] : 
+   stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : dfq_inv_data_b13to9[13:12] ;
+   //stack_type_dcfill_vld_d1 ? lsu_dfq_byp_stack_wrway_d1[1:0] : cpx_cpu_inv_data[`CPX_AX1_INV_WY_HI+11:`CPX_AX1_INV_WY_LO+11] ;
+
+
+
+//   wire [1:0] dva_snp_way_e;
+//assign dva_snp_way_e[1:0] =  
+//  dva_snp_set_vld_e[0] ?  dva_snp_wy0_e[1:0]:
+//  dva_snp_set_vld_e[1] ?  dva_snp_wy1_e[1:0]:
+//  dva_snp_set_vld_e[2] ?  dva_snp_wy2_e[1:0]:
+//  dva_snp_set_vld_e[3] ?  dva_snp_wy3_e[1:0]: 2'bxx;
+
+//bug 2333 fix
+//06/09/03: bug 3420 - add logic for dtag parity error invalidate - inv all 4 ways of the index that had error
+//bug 3608 - qualify perror_dinv w/ dfq_st_vld
+wire     derror_inv_vld ;
+assign   derror_inv_vld  =  dfq_st_vld & lsu_cpx_pkt_perror_dinv ;
+
+   assign dva_snp_bit_wr_en_e [15] =  dva_snp_set_vld_e[3] &  (( dva_snp_wy3_e [1] &  dva_snp_wy3_e[0]) | derror_inv_vld ) ;
+   assign dva_snp_bit_wr_en_e [14] =  dva_snp_set_vld_e[3] &  (( dva_snp_wy3_e [1] & ~dva_snp_wy3_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [13] =  dva_snp_set_vld_e[3] &  ((~dva_snp_wy3_e [1] &  dva_snp_wy3_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [12] =  dva_snp_set_vld_e[3] &  ((~dva_snp_wy3_e [1] & ~dva_snp_wy3_e[0]) | derror_inv_vld );
+                                                                                               
+   assign dva_snp_bit_wr_en_e [11] =  dva_snp_set_vld_e[2] &  (( dva_snp_wy2_e [1] &  dva_snp_wy2_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [10] =  dva_snp_set_vld_e[2] &  (( dva_snp_wy2_e [1] & ~dva_snp_wy2_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [09] =  dva_snp_set_vld_e[2] &  ((~dva_snp_wy2_e [1] &  dva_snp_wy2_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [08] =  dva_snp_set_vld_e[2] &  ((~dva_snp_wy2_e [1] & ~dva_snp_wy2_e[0]) | derror_inv_vld );
+                                                                                               
+   assign dva_snp_bit_wr_en_e [07] =  dva_snp_set_vld_e[1] &  (( dva_snp_wy1_e [1] &  dva_snp_wy1_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [06] =  dva_snp_set_vld_e[1] &  (( dva_snp_wy1_e [1] & ~dva_snp_wy1_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [05] =  dva_snp_set_vld_e[1] &  ((~dva_snp_wy1_e [1] &  dva_snp_wy1_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [04] =  dva_snp_set_vld_e[1] &  ((~dva_snp_wy1_e [1] & ~dva_snp_wy1_e[0]) | derror_inv_vld );
+                                                                                               
+   assign dva_snp_bit_wr_en_e [03] =  dva_snp_set_vld_e[0] &  (( dva_snp_wy0_e [1] &  dva_snp_wy0_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [02] =  dva_snp_set_vld_e[0] &  (( dva_snp_wy0_e [1] & ~dva_snp_wy0_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [01] =  dva_snp_set_vld_e[0] &  ((~dva_snp_wy0_e [1] &  dva_snp_wy0_e[0]) | derror_inv_vld );
+   assign dva_snp_bit_wr_en_e [00] =  dva_snp_set_vld_e[0] &  ((~dva_snp_wy0_e [1] & ~dva_snp_wy0_e[0]) | derror_inv_vld );
+
+
+//=================================================================================================
+//  LOCAL ST ACK PROCESSING
+//=================================================================================================
+
+// st-ack at head of cfq may write to cache if not indicated as invalid 
+// L2.
+
+//wire	byp_tag_perror ;
+//assign	byp_tag_perror = lsu_dfq_byp_perror_dinv | lsu_dfq_byp_perror_iinv ;
+
+// one-shot rd-enable for stb for st data.
+// st-quad pkt2 will not rd stb
+//NOTE: restore cpx_inv qualification after adding cpx_inv part of dfq read - done
+/*
+assign  st_ack_rq_stb = 
+   (dfq_byp_st_vld & st_rd_advance & ~byp_tag_perror)   // local st ack from dfq
+  & lsu_dfq_byp_cpx_inv ;
+*/
+  //((cpx_local_st_ack_type & ~dfq_wr_en & ~(|cpx_spc_data_cx[`CPX_PERR_DINV+1:`CPX_PERR_DINV])) | // local st ack from cpx
+  //(dfq_byp_st_vld & dfq_rd_advance & ~byp_tag_perror))   // local st ack from dfq
+  //(dfq_byp_st_vld & dfq_rd_advance_d1)) // local st ack from dfq
+
+/*assign  st_ack_rq_stb = 
+  ((cpx_local_st_ack_type & ~dfq_wr_en & ~cpx_spc_data_cx[107]) | // local st ack from cpx
+  (dfq_byp_st_vld & dfq_rd_advance & ~lsu_dfq_byp_stquad_pkt2))   // local st ack from dfq
+  //(dfq_byp_st_vld & dfq_rd_advance_d1)) // local st ack from dfq
+  & cpx_inv ; */
+
+/*
+dff_s #(1)  stackr_d1 (
+        .din    (st_ack_rq_stb),
+        .q      (st_ack_rq_stb_d1),
+        .clk  (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+*/
+
+// Mux's control signal can be flipped - TIMING
+//assign  st_ack_tid[1:0] =
+//  (dfq_byp_st_vld & dfq_rd_advance) ?  
+//	lsu_dfq_byp_tid[1:0] : cpx_spc_data_cx[`CPX_TH_HI:`CPX_TH_LO] ;
+
+// This can be critical !!!
+//assign  lsu_st_ack_rq_stb[0] = ~st_ack_tid[1] & ~st_ack_tid[0] & st_ack_rq_stb ;
+//assign  lsu_st_ack_rq_stb[1] = ~st_ack_tid[1] &  st_ack_tid[0] & st_ack_rq_stb ;
+//assign  lsu_st_ack_rq_stb[2] =  st_ack_tid[1] & ~st_ack_tid[0] & st_ack_rq_stb ;
+//assign  lsu_st_ack_rq_stb[3] =  st_ack_tid[1] &  st_ack_tid[0] & st_ack_rq_stb ;
+
+// the ack decode can be combined with the above (grape)
+
+assign  lsu_st_ack_dq_stb[0] = 
+	cpx_pkt_thrd_sel[0] & dfq_st_cmplt &
+	~(lsu_cpx_pkt_perror_dinv | lsu_cpx_pkt_perror_iinv | lsu_cpx_pkt_binit_st) ;
+assign  lsu_st_ack_dq_stb[1] = 
+	cpx_pkt_thrd_sel[1] & dfq_st_cmplt &
+	~(lsu_cpx_pkt_perror_dinv | lsu_cpx_pkt_perror_iinv | lsu_cpx_pkt_binit_st) ;
+assign  lsu_st_ack_dq_stb[2] = 
+	cpx_pkt_thrd_sel[2] & dfq_st_cmplt &
+	~(lsu_cpx_pkt_perror_dinv | lsu_cpx_pkt_perror_iinv | lsu_cpx_pkt_binit_st) ;
+assign  lsu_st_ack_dq_stb[3] = 
+	cpx_pkt_thrd_sel[3] & dfq_st_cmplt &
+	~(lsu_cpx_pkt_perror_dinv | lsu_cpx_pkt_perror_iinv | lsu_cpx_pkt_binit_st) ;
+
+// Signal rmo ack completion.
+assign  lsu_cpx_rmo_st_ack[0] = 
+	cpx_pkt_thrd_sel[0] & dfq_st_cmplt  & lsu_cpx_pkt_binit_st ;
+assign  lsu_cpx_rmo_st_ack[1] = 
+	cpx_pkt_thrd_sel[1] & dfq_st_cmplt  & lsu_cpx_pkt_binit_st ;
+assign  lsu_cpx_rmo_st_ack[2] = 
+	cpx_pkt_thrd_sel[2] & dfq_st_cmplt  & lsu_cpx_pkt_binit_st ;
+assign  lsu_cpx_rmo_st_ack[3] = 
+	cpx_pkt_thrd_sel[3] & dfq_st_cmplt  & lsu_cpx_pkt_binit_st ;
+
+assign  lsu_st_wr_dcache = stwr_active_e ;
+
+//assign  lsu_st_wr_sel_e = stwr_active_e |  lsu_diagnstc_wr_src_sel_e ;
+
+//=================================================================================================
+//  CPX PKT DECODE
+//=================================================================================================
+
+// The decode is meant to qualify writes into the dfq.
+// These values are also stored in the dfq to save on decode at the head of the queue.
+
+assign lsu_cpxpkt_type_dcd_cx[5:0] = 
+{cpx_ld_type,cpx_ifill_type,cpx_evict_type,cpx_st_ack_type,cpx_strm_st_ack_type,cpx_int_type};
+
+assign  cpx_ld_type = 
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0000
+          ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+assign  cpx_ifill_type = 
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0001
+          ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+assign  cpx_evict_type = 
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0011
+           cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+assign  cpx_st_ack_type =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]  &   cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0100
+          ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO])); 
+         //~cpx_spc_data_cx[108] ;  // 1st stquad ack is rejected
+
+assign  cpx_strm_st_ack_type =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0110
+           cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+assign  cpx_int_type =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        ((~cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 0111
+           cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+//bug3657  - kill ifill vld in bypass path when cpxtype=fp/fwd_reply
+
+assign  cpx_fp_type =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        (( cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 1000
+          ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+//8/25/03: add error type to dfq_wr_en, dfq_rd_advance
+assign  cpx_error_type =
+         cpx_spc_data_cx_b144to140[`CPX_VLD] &
+        (( cpx_spc_data_cx_b144to140[`CPX_RQ_HI]   &  cpx_spc_data_cx_b144to140[`CPX_RQ_LO+2] & // 1100
+          ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO+1] & ~cpx_spc_data_cx_b144to140[`CPX_RQ_LO]));
+
+// Miscellaneous cpu based decode
+
+assign  lsu_cpu_dcd_sel[7:0]  = {cpu_sel[3:0],cpu_sel[3:0]} ;
+assign  lsu_cpu_uhlf_sel  = const_cpuid[2] ;
+
+// removed cpu_id[2] qual in the eqn.
+assign  cpu_sel[0] =  ~const_cpuid[1] & ~const_cpuid[0] ;
+assign  cpu_sel[1] =  ~const_cpuid[1] &  const_cpuid[0] ;
+assign  cpu_sel[2] =   const_cpuid[1] & ~const_cpuid[0] ;
+assign  cpu_sel[3] =   const_cpuid[1] &  const_cpuid[0] ;
+
+
+// st ack to respective stb's. will not be generated for blk init stores
+// as such stores have already been deallocated.
+
+assign  cpx_local_st_ack_type = 
+  cpx_st_ack_type & (const_cpuid[2:0] == cpx_spc_data_cx_b120to118[`CPX_INV_CID_HI:`CPX_INV_CID_LO]) ;
+ // & ~(cpx_spc_data_cx[`CPX_BINIT_STACK] | (|cpx_spc_data_cx[`CPX_PERR_DINV+1:`CPX_PERR_DINV])) ;
+
+wire	squash_ack ;
+assign squash_ack =
+(cpx_spc_data_cx_b125 | (|cpx_spc_data_cx_b124to123[`CPX_PERR_DINV+1:`CPX_PERR_DINV])) ;
+
+assign  cpx_st_ack_tid0 = cpx_local_st_ack_type & ~squash_ack &
+                        ~cpx_spc_data_cx_b135to134[`CPX_TH_HI] & ~cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+                        //~cpx_spc_data_cx[125] ; // rmo st will not ack
+                        //~cpx_spc_data_cx[`CPX_WY_LO] ; // stquad1 will not ack - just invalidate.
+                                                      // b131 of cpx pkt used.  
+
+assign  cpx_st_ack_tid1 = cpx_local_st_ack_type & ~squash_ack &
+                        ~cpx_spc_data_cx_b135to134[`CPX_TH_HI] &  cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+                        //~cpx_spc_data_cx[125] ; // rmo st will not ack
+                        //~cpx_spc_data_cx[`CPX_WY_LO] ; // stquad1 will not ack - just invalidate.
+                                                      // b131 of cpx pkt used.
+
+assign  cpx_st_ack_tid2 = cpx_local_st_ack_type & ~squash_ack &
+                         cpx_spc_data_cx_b135to134[`CPX_TH_HI] & ~cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+                        //~cpx_spc_data_cx[125] ; // rmo st will not ack
+                        //~cpx_spc_data_cx[`CPX_WY_LO] ; // stquad1 will not ack - just invalidate.
+                                                      // b131 of cpx pkt used. 
+
+assign  cpx_st_ack_tid3 = cpx_local_st_ack_type & ~squash_ack &
+                         cpx_spc_data_cx_b135to134[`CPX_TH_HI] & cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+                        //~cpx_spc_data_cx[125] ; // rmo st will not ack
+                        //~cpx_spc_data_cx[`CPX_WY_LO] ; // stquad1 will not ack - just invalidate.
+                                                      // b131 of cpx pkt used.
+
+// Performance Ctr Info
+//assign lsu_tlu_l2_dmiss[0] =  dfill_dcd_thrd[0] & dcfill_active_e & lsu_cpx_pkt_l2miss ;
+assign lsu_tlu_l2_dmiss[0] =  dfq_thread0 & dcfill_active_e & lsu_cpx_pkt_l2miss ;
+assign lsu_tlu_l2_dmiss[1] =  dfq_thread1 & dcfill_active_e & lsu_cpx_pkt_l2miss ;
+assign lsu_tlu_l2_dmiss[2] =  dfq_thread2 & dcfill_active_e & lsu_cpx_pkt_l2miss ;
+assign lsu_tlu_l2_dmiss[3] =  dfq_thread3 & dcfill_active_e & lsu_cpx_pkt_l2miss ;
+
+//=================================================================================================
+//  GENERATE b[151] of DFQ WRITE DATA
+//=================================================================================================
+wire  [7:0]  cpx_inv_vld;
+wire  [7:0]  cpu_sel_dcd;
+
+assign  cpx_inv_vld[0] = cpx_spc_data_cx_b88 |
+                         cpx_spc_data_cx_b56 |
+                         cpx_spc_data_cx_b32 |
+                         cpx_spc_data_cx_b0 ;
+
+assign  cpx_inv_vld[1] = cpx_spc_data_cx_b91 |
+                         cpx_spc_data_cx_b60 |
+                         cpx_spc_data_cx_b35 |
+                         cpx_spc_data_cx_b4 ;
+
+assign  cpx_inv_vld[2] = cpx_spc_data_cx_b94 |
+                         cpx_spc_data_cx_b64 |
+                         cpx_spc_data_cx_b38 |
+                         cpx_spc_data_cx_b8 ;
+
+assign  cpx_inv_vld[3] = cpx_spc_data_cx_b97 |
+                         cpx_spc_data_cx_b68 |
+                         cpx_spc_data_cx_b41 |
+                         cpx_spc_data_cx_b12 ;
+
+assign  cpx_inv_vld[4] = cpx_spc_data_cx_b100 |
+                         cpx_spc_data_cx_b72  |
+                         cpx_spc_data_cx_b44  |
+                         cpx_spc_data_cx_b16  ;
+
+assign  cpx_inv_vld[5] = cpx_spc_data_cx_b103 |
+                         cpx_spc_data_cx_b76  |
+                         cpx_spc_data_cx_b47  |
+                         cpx_spc_data_cx_b20  ;
+
+assign  cpx_inv_vld[6] = cpx_spc_data_cx_b106 |
+                         cpx_spc_data_cx_b80  |
+                         cpx_spc_data_cx_b50  |
+                         cpx_spc_data_cx_b24  ;
+
+assign  cpx_inv_vld[7] = cpx_spc_data_cx_b109 |
+                         cpx_spc_data_cx_b84  |
+                         cpx_spc_data_cx_b53  |
+                         cpx_spc_data_cx_b28  ;
+
+assign cpu_sel_dcd[7:4] =  ({4{ lsu_cpu_uhlf_sel}} & cpu_sel[3:0]);
+assign cpu_sel_dcd[3:0] =  ({4{~lsu_cpu_uhlf_sel}} & cpu_sel[3:0]);
+
+assign lsu_cpx_spc_inv_vld  =  |(cpx_inv_vld[7:0] & cpu_sel_dcd[7:0]);
+
+//=================================================================================================
+//  GENERATE ICACHE INVALIDATE VALID (bug:2770)
+//=================================================================================================
+
+wire  [7:0]  cpx_iinv_vld;
+wire         cpx_spc_iinv_vld;
+
+assign  cpx_iinv_vld[0] = cpx_spc_data_cx_b57 |
+                          cpx_spc_data_cx_b1  ;
+
+assign  cpx_iinv_vld[1] = cpx_spc_data_cx_b61 |
+                          cpx_spc_data_cx_b5  ;
+
+assign  cpx_iinv_vld[2] = cpx_spc_data_cx_b65 |
+                          cpx_spc_data_cx_b9  ;
+
+assign  cpx_iinv_vld[3] = cpx_spc_data_cx_b69 |
+                          cpx_spc_data_cx_b13 ;
+
+assign  cpx_iinv_vld[4] = cpx_spc_data_cx_b73 |
+                          cpx_spc_data_cx_b17 ;
+
+assign  cpx_iinv_vld[5] = cpx_spc_data_cx_b77 |
+                          cpx_spc_data_cx_b21 ;
+
+assign  cpx_iinv_vld[6] = cpx_spc_data_cx_b81 |
+                          cpx_spc_data_cx_b25 ;
+
+assign  cpx_iinv_vld[7] = cpx_spc_data_cx_b85 |
+                          cpx_spc_data_cx_b29 ;
+
+//bug3701 - include i$ parity error invalidate - b[124]
+assign cpx_spc_iinv_vld  =  |( (cpx_iinv_vld[7:0] | {8{cpx_spc_data_cx_b124to123[`CPX_PERR_DINV+1]}}) & cpu_sel_dcd[7:0] )  ;
+
+
+// dfq_rd_advance - local st ack not qualified w/ ifu_lsu_ibuf_busy
+// qualify ifu_busy w/ local_st_ack=1 and iinv=1
+
+assign lsu_cpx_stack_icfill_vld  =  
+                  ( cpx_local_st_ack_type & cpx_spc_iinv_vld) |	       //if local st_ack=1, b[128]=iinv
+                  (~cpx_local_st_ack_type & cpx_spc_data_cx_b128) ;    //if local st_ack=0, b[128]=cpx_data[128]
+
+//=================================================================================================
+//  MISC QDP2 MUX SELECTS
+//=================================================================================================
+
+//assign  lsu_dcfill_mx_sel_e[0]  =  lsu_dc_iob_access_e;
+//assign  lsu_dcfill_mx_sel_e[1]  =  lsu_bist_wvld_e | lsu_bist_rvld_e;
+//assign  lsu_dcfill_mx_sel_e[2]  =  lsu_diagnstc_wr_src_sel_e;
+//assign  lsu_dcfill_mx_sel_e[3]  =  ~|lsu_dcfill_mx_sel_e[2:0];
+
+//assign  lsu_dcfill_addr_mx_sel_e  =  ~|lsu_dcfill_mx_sel_e[1:0];
+
+//assign  lsu_dcfill_data_mx_sel_e  =  lsu_dc_iob_access_e | lsu_bist_wvld_e;
+
+assign lsu_cpx_thrdid[0]  =  ~cpx_spc_data_cx_b135to134[`CPX_TH_HI] & ~cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+assign lsu_cpx_thrdid[1]  =  ~cpx_spc_data_cx_b135to134[`CPX_TH_HI] &  cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+assign lsu_cpx_thrdid[2]  =   cpx_spc_data_cx_b135to134[`CPX_TH_HI] & ~cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+assign lsu_cpx_thrdid[3]  =   cpx_spc_data_cx_b135to134[`CPX_TH_HI] &  cpx_spc_data_cx_b135to134[`CPX_TH_LO] ;
+
+// modify cpx packet only if dcache update from stb has to be made. 
+// lsu_cpx_spc_inv_vld = 1 => invalidate dcache for atomic- b[129] and bst- b[125]
+// 			      update dcache for other requests
+//
+// i.e. cpx_pkt==st_ack and local and dcfill_vld=1; if dcfill_vld==0, ifill info
+// has to be left as is. hence no pkt modification
+
+assign lsu_cpx_stack_dcfill_vld  =  
+                       (cpx_local_st_ack_type & ~(cpx_spc_data_cx_b129 | cpx_spc_data_cx_b125))  &
+                       lsu_cpx_spc_inv_vld ;
+
+//timing fix: 6/16/03 - fix for ifill_pkt_vld - use b130 if store_ack_dcfill_vld=1
+//bug3582 - b[130] for store ack is a dont-care i.e. capture b[130] only if packet type is not store ack
+assign lsu_cpx_stack_dcfill_vld_b130  =  // if lsu_cpx_stack_dcfill_vld=1 b[130]=lsu_cpx_stack_dcfill_vld
+                                         // if cpx_st_ack=0 b[130]=cpx_data[130]
+                                       lsu_cpx_stack_dcfill_vld |    
+                                       (~cpx_st_ack_type & cpx_spc_data_cx_b130) ;
+endmodule
Index: /trunk/T1-CPU/lsu/lsu_dctl.v
===================================================================
--- /trunk/T1-CPU/lsu/lsu_dctl.v	(revision 6)
+++ /trunk/T1-CPU/lsu/lsu_dctl.v	(revision 6)
@@ -0,0 +1,7544 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: lsu_dctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+/////////////////////////////////////////////////////////////////
+/*
+//  Description:  LSU Data Cache Control and Minor Datapath
+//      - Tag Comparison - hit/miss.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include  "sys.h" // system level definition file which contains the 
+          // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include  "lsu.h"
+
+module lsu_dctl ( /*AUTOARG*/
+   // Outputs
+   lsu_tlu_nucleus_ctxt_m, lsu_quad_word_access_g, so, dctl_rst_l, 
+   lsu_tlu_wsr_inst_e, lsu_l2fill_fpld_e, dva_vld_m_bf, 
+   lsu_no_spc_pref, ifu_tlu_flush_fd_w, ifu_tlu_flush_fd2_w, 
+   ifu_tlu_flush_fd3_w, ifu_lsu_flush_w, lsu_tlu_thrid_d, 
+   lsu_diagnstc_data_sel, lsu_diagnstc_va_sel, lsu_err_addr_sel, 
+   dva_bit_wr_en_e, dva_wr_adr_e, lsu_exu_ldst_miss_w2, 
+   lsu_exu_dfill_vld_w2, lsu_ffu_ld_vld, lsu_ld_miss_wb, 
+   lsu_dtlb_bypass_e, ld_pcx_pkt_g, tlb_ldst_cam_vld, ldxa_internal, 
+   lsu_ifu_ldsta_internal_e, lsu_ifu_ldst_cmplt, lsu_ifu_itlb_en, 
+   lsu_ifu_icache_en, lmq_byp_data_en_w2, lmq_byp_data_fmx_sel, 
+   lmq_byp_data_mxsel0, lmq_byp_data_mxsel1, lmq_byp_data_mxsel2, 
+   lmq_byp_data_mxsel3, lmq_byp_ldxa_mxsel0, lmq_byp_ldxa_mxsel1, 
+   lmq_byp_ldxa_mxsel2, lmq_byp_ldxa_mxsel3, lsu_ld_thrd_byp_sel_e, 
+   dcache_byte_wr_en_e, lsu_dcache_wr_vld_e, lsu_ldstub_g, 
+   lsu_swap_g, lsu_tlu_dtlb_done, lsu_exu_thr_m, merge7_sel_byte0_m, 
+   merge7_sel_byte7_m, merge6_sel_byte1_m, merge6_sel_byte6_m, 
+   merge5_sel_byte2_m, merge5_sel_byte5_m, merge4_sel_byte3_m, 
+   merge4_sel_byte4_m, merge3_sel_byte0_m, merge3_sel_byte3_m, 
+   merge3_sel_byte4_m, merge3_sel_byte7_default_m, merge3_sel_byte_m, 
+   merge2_sel_byte1_m, merge2_sel_byte2_m, merge2_sel_byte5_m, 
+   merge2_sel_byte6_default_m, merge2_sel_byte_m, merge0_sel_byte0_m, 
+   merge0_sel_byte1_m, merge0_sel_byte2_m, 
+   merge0_sel_byte3_default_m, merge0_sel_byte4_m, 
+   merge0_sel_byte5_m, merge0_sel_byte6_m, 
+   merge0_sel_byte7_default_m, merge1_sel_byte0_m, 
+   merge1_sel_byte1_m, merge1_sel_byte2_m, 
+   merge1_sel_byte3_default_m, merge1_sel_byte4_m, 
+   merge1_sel_byte5_m, merge1_sel_byte6_m, 
+   merge1_sel_byte7_default_m, merge0_sel_byte_1h_m, 
+   merge1_sel_byte_1h_m, merge1_sel_byte_2h_m, lsu_dtlb_cam_real_e, 
+   lsu_dtagv_wr_vld_e, lsu_dtag_wrreq_x_e, lsu_dtag_index_sel_x_e, 
+   lsu_dtlb_wr_vld_e, lsu_dtlb_tag_rd_e, lsu_dtlb_data_rd_e, 
+   lsu_dtlb_dmp_vld_e, lsu_dtlb_dmp_all_e, lsu_dtlb_rwindex_vld_e, 
+   lsu_dtlb_invalid_all_l_m, lsu_tlu_tlb_ld_inst_m, 
+   lsu_tlu_tlb_st_inst_m, lsu_tlu_tlb_access_tid_m, 
+   lsu_tlb_data_rd_vld_g, lsu_tlb_st_sel_m, lsu_va_wtchpt0_wr_en_l, 
+   lsu_va_wtchpt1_wr_en_l, lsu_va_wtchpt2_wr_en_l, 
+   lsu_va_wtchpt3_wr_en_l, thread0_m, thread1_m, thread2_m, 
+   thread3_m, lsu_dctldp_thread0_m, lsu_dctldp_thread1_m, 
+   lsu_dctldp_thread2_m, lsu_dctldp_thread3_m, thread0_g, thread1_g, 
+   thread2_g, thread3_g, lsu_tlu_nonalt_ldst_m, 
+   lsu_tlu_xslating_ldst_m, lsu_tlu_ctxt_sel_m, lsu_tlu_write_op_m, 
+   lsu_dtlb_addr_mask_l_e, dva_din_e, 
+   lsu_diagnstc_dtagv_prty_invrt_e, lsu_ifu_asi_load, 
+   lsu_ifu_asi_thrid, lsu_ifu_asi_vld, lsu_quad_asi_e, 
+   lsu_local_ldxa_sel_g, lsu_dtag_rsel_m, lsu_tlbop_force_swo, 
+   lsu_atomic_pkt2_bsel_g, lsu_dcache_tag_perror_g, 
+   lsu_dcache_data_perror_g, lsu_ifu_l2_unc_error, 
+   lsu_ifu_l2_corr_error, lsu_ifu_dcache_data_perror, 
+   lsu_ifu_dcache_tag_perror, lsu_ifu_error_tid, lsu_ifu_io_error, 
+   lsu_tlu_squash_va_oor_m, lsu_squash_va_oor_m, tlb_cam_hit_g, 
+   lsu_st_hw_le_g, lsu_st_w_or_dbl_le_g, lsu_st_x_le_g, 
+   lsu_swap_sel_default_g, lsu_swap_sel_default_byte_7_2_g, 
+   lsu_st_rmo_m, lsu_bst_in_pipe_m, lsu_snap_blk_st_m, lsu_blk_st_m, 
+   lsu_blkst_pgnum_m, lsu_ffu_blk_asi_e, lsu_blk_asi_m, 
+   lsu_nonalt_nucl_access_m, dcache_alt_mx_sel_e, 
+   dcache_alt_mx_sel_e_bf, dcache_rvld_e, lsu_dc_iob_access_e, 
+   lsu_ifu_ldst_miss_w, lsu_ifu_dc_parity_error_w2, 
+   lsu_ldst_inst_vld_e, lsu_local_ldxa_tlbrd_sel_g, 
+   lsu_local_diagnstc_tagrd_sel_g, lsu_va_wtchpt_sel_g, 
+   asi_state_wr_thrd, thread0_d, thread1_d, thread2_d, thread3_d, 
+   tlu_lsu_asi_update_g, pctxt_state_wr_thrd, sctxt_state_wr_thrd, 
+   thread_pctxt, thread_sctxt, thread_actxt, thread_default, 
+   thread0_ctxt, thread1_ctxt, thread2_ctxt, thread3_ctxt, 
+   pid_state_wr_en, thread0_e, thread1_e, thread2_e, thread3_e, 
+   dfture_tap_wr_mx_sel, lctl_rst, lsu_ctl_state_wr_en, 
+   lsuctl_ctlbits_wr_en, dfture_tap_rd_en, bist_tap_wr_en, 
+   bistctl_wr_en, bist_ctl_reg_wr_en, mrgn_tap_wr_en, ldiagctl_wr_en, 
+   misc_ctl_sel_din, lsu_asi_sel_fmx1, lsu_asi_sel_fmx2, 
+   tlb_access_en0_g, tlb_access_en1_g, tlb_access_en2_g, 
+   tlb_access_en3_g, tlb_access_sel_thrd0, tlb_access_sel_thrd1, 
+   tlb_access_sel_thrd2, tlb_access_sel_default, mrgnctl_wr_en, 
+   hpv_priv_m, hpstate_en_m, dcache_arry_data_sel_m, dtlb_bypass_m, 
+   lsu_alt_space_m, atomic_m, ldst_dbl_m, fp_ldst_m, lda_internal_m, 
+   sta_internal_m, cam_real_m, data_rd_vld_g, tag_rd_vld_g, 
+   ldst_sz_m, asi_internal_m, rd_only_ltlb_asi_e, wr_only_ltlb_asi_e, 
+   dfill_tlb_asi_e, ifill_tlb_asi_e, nofault_asi_m, as_if_user_asi_m, 
+   atomic_asi_m, phy_use_ec_asi_m, phy_byp_ec_asi_m, quad_asi_m, 
+   binit_quad_asi_m, blk_asi_m, recognized_asi_m, strm_asi_m, 
+   mmu_rd_only_asi_m, rd_only_asi_m, wr_only_asi_m, unimp_asi_m, 
+   va_wtchpt_cmp_en_m, lsu_tlu_async_ttype_vld_w2, 
+   lsu_tlu_async_ttype_w2, lsu_tlu_async_tid_w2, async_tlb_index, 
+   l2fill_vld_m, ld_thrd_byp_mxsel_m, morphed_addr_m, 
+   signed_ldst_byte_m, signed_ldst_hw_m, signed_ldst_w_m, 
+   lsu_tlb_asi_data_perr_g, lsu_tlb_asi_tag_perr_g, lsu_sscan_data, 
+   lsu_ld_inst_vld_g, lsu_dcache_rand, lsu_encd_way_hit, 
+   lsu_way_hit_or, lsu_memref_m, lsu_flsh_inst_m, 
+   lsu_ifu_asi_data_en_l, lsu_dcache_fill_addr_e, 
+   lsu_dcache_fill_addr_e_err, lsu_thread_g, lmq_ldd_vld, 
+   lsu_bist_rsel_way_e, lsu_dcache_fill_way_e, lmq_ld_addr_b3, 
+   lsu_outstanding_rmo_st_max, lsu_dcfill_data_mx_sel_e, 
+   // Inputs
+   si, se, sehold, rst_tri_en, rclk, grst_l, arst_l, 
+   lsu_diag_va_prty_invrt, dva_svld_e, dva_snp_bit_wr_en_e, 
+   dva_snp_addr_e, lsu_tte_data_cp_g, lsu_l2fill_vld, ld_inst_vld_e, 
+   st_inst_vld_e, ifu_lsu_ldst_fp_e, ldst_sz_e, 
+   lsu_ldst_va_b12_b11_m, lsu_ldst_va_b7_b0_m, ifu_lsu_rd_e, 
+   tlb_cam_hit, ifu_tlu_sraddr_d, ifu_tlu_wsr_inst_d, 
+   ifu_lsu_alt_space_d, tlu_lsu_int_ldxa_vld_w2, 
+   tlu_lsu_int_ld_ill_va_w2, tlu_lsu_ldxa_tid_w2, 
+   ifu_lsu_ldxa_data_vld_w2, ifu_lsu_ldxa_illgl_va_w2, 
+   ifu_lsu_ldxa_tid_w2, ifu_lsu_asi_rd_unc, tlu_lsu_tl_zero, 
+   ifu_lsu_thrid_s, ifu_lsu_ldst_dbl_e, ld_stb_full_raw_w2, 
+   ld_sec_active, ifu_tlu_inst_vld_m, lsu_l2fill_bendian_m, 
+   lmq0_l2fill_fpld, lmq1_l2fill_fpld, lmq2_l2fill_fpld, 
+   lmq3_l2fill_fpld, cache_way_hit_buf1, cache_hit, lmq0_byp_misc_sz, 
+   lmq1_byp_misc_sz, lmq2_byp_misc_sz, lmq3_byp_misc_sz, 
+   lsu_l2fill_sign_extend_m, lsu_l1hit_sign_extend_e, 
+   tlu_lsu_pstate_cle, tlu_lsu_pstate_am, tlb_pgnum, tlb_demap_nctxt, 
+   tlb_demap_pctxt, tlb_demap_sctxt, tlb_demap_actxt, 
+   tlb_demap_thrid, ifu_lsu_casa_e, ifu_lsu_ldstub_e, ifu_lsu_swap_e, 
+   lsu_atm_st_cmplt_e, lsu_cpx_pkt_atm_st_cmplt, 
+   spu_lsu_ldxa_data_vld_w2, spu_lsu_ldxa_illgl_va_w2, 
+   spu_lsu_ldxa_tid_w2, spu_lsu_stxa_ack_tid, spu_lsu_stxa_ack, 
+   spu_lsu_unc_error_w2, spu_lsu_int_w2, tlu_lsu_stxa_ack, 
+   tlu_lsu_stxa_ack_tid, lsu_tlb_invert_endian_g, lmq0_ncache_ld, 
+   lmq1_ncache_ld, lmq2_ncache_ld, lmq3_ncache_ld, ifu_tlu_mb_inst_e, 
+   ifu_tlu_flsh_inst_e, lsu_stb_empty, tlu_dtlb_tag_rd_g, 
+   tlu_dtlb_data_rd_g, tlu_dtlb_dmp_vld_g, tlu_dtlb_dmp_all_g, 
+   tlu_dtlb_rw_index_vld_g, tlu_dtlb_invalidate_all_g, 
+   lsu_st_wr_dcache, tlu_lsu_asi_update_m, tlu_lsu_tid_m, 
+   lsu_rd_dtag_parity_g, dcache_rparity_err_wb, 
+   lsu_diagnstc_wr_data_b0, lsu_byp_ldd_oddrd_m, tlu_lsu_redmode, 
+   tlu_lsu_redmode_rst_d1, dva_vld_m, lsu_dfill_tid_e, 
+   ifu_lsu_asi_ack, lsu_intrpt_cmplt, lsu_iobrdge_tap_rq_type_b8, 
+   lsu_iobrdge_tap_rq_type_b6_b3, lsu_iobrdge_tap_rq_type_b1_b0, 
+   lsu_iobrdge_fwd_pkt_vld, lsu_cpx_ld_dtag_perror_e, 
+   lsu_cpx_ld_dcache_perror_e, lsu_cpx_pkt_ld_err, ifu_lsu_nceen, 
+   tlu_lsu_ldxa_async_data_vld, tlu_lsu_hpv_priv, tlu_lsu_hpstate_en, 
+   ifu_lsu_memref_d, ifu_lsu_pref_inst_e, lsu_pref_pcx_req, 
+   lsu_cpx_pkt_prefetch2, lsu_ld_pcx_rq_sel_d2, 
+   lsu_pcx_req_squash_d1, lsu_bld_helper_cmplt_m, lsu_bld_cnt_m, 
+   lsu_bld_reset, ffu_lsu_blk_st_e, lsu_stb_rmo_st_issue, 
+   lsu_cpx_rmo_st_ack, lsu_dfq_flsh_cmplt, stb_cam_hit, 
+   ifu_tlu_flush_m, ctu_sscan_tid, tte_data_perror_unc, 
+   asi_tte_data_perror, asi_tte_tag_perror, tlu_dtlb_rw_index_g, 
+   lsu_local_early_flush_g, lsu_dfq_vld, gdbginit_l, dc_direct_map, 
+   asi_d, lsu_dctl_asi_state_m, lsu_ldst_va_g, lsu_ifu_err_addr_b39, 
+   lsu_dp_ctl_reg0, lsu_dp_ctl_reg1, lsu_dp_ctl_reg2, 
+   lsu_dp_ctl_reg3, ldd_in_dfq_out, dcache_iob_addr_e, 
+   mbist_dcache_index, mbist_dcache_word, lsu_diagnstc_wr_addr_e, 
+   st_dcfill_addr, lsu_dfq_ld_vld, lsu_dfq_st_vld, lmq0_ldd_vld, 
+   lmq1_ldd_vld, lmq2_ldd_vld, lmq3_ldd_vld, lsu_dfq_byp_tid, 
+   dfq_byp_ff_en, lsu_dcache_iob_way_e, mbist_dcache_way, 
+   lsu_diagnstc_wr_way_e, lsu_st_way_e, lmq0_pcx_pkt_way, 
+   lmq1_pcx_pkt_way, lmq2_pcx_pkt_way, lmq3_pcx_pkt_way, 
+   lmq0_ld_rq_type, lmq1_ld_rq_type, lmq2_ld_rq_type, 
+   lmq3_ld_rq_type, lmq0_pcx_pkt_addr, lmq1_pcx_pkt_addr, 
+   lmq2_pcx_pkt_addr, lmq3_pcx_pkt_addr, lsu_ttype_vld_m2, 
+   tlu_early_flush_pipe2_w, lsu_st_dcfill_size_e, mbist_dcache_write, 
+   mbist_dcache_read
+   ) ;  
+
+
+output                  lsu_tlu_nucleus_ctxt_m ;// access is nucleus context 
+output			lsu_quad_word_access_g ; // 128b ld request.
+
+input si;
+input se;
+input sehold ;
+input rst_tri_en ;
+output so;    
+
+input      rclk ;
+input                   grst_l;
+input                   arst_l;
+output     dctl_rst_l;
+
+input  lsu_diag_va_prty_invrt ;
+
+   input         dva_svld_e ;
+   input [15:0] dva_snp_bit_wr_en_e;
+   input [4:0]  dva_snp_addr_e;
+
+input	      lsu_tte_data_cp_g ; // cp bit from tlb    
+input         lsu_l2fill_vld ;    // fill from dfq to d$.
+input         ld_inst_vld_e ;     // load accesses d$.
+input         st_inst_vld_e ;     // load accesses d$.
+input         ifu_lsu_ldst_fp_e ; // fp load or store
+input [1:0]   ldst_sz_e ;         // sz of ld/st xsaction.
+
+
+input [12:11]  lsu_ldst_va_b12_b11_m;      
+input [7:0]    lsu_ldst_va_b7_b0_m;      
+
+input [4:0]   ifu_lsu_rd_e;           // primary rd of ld
+input         tlb_cam_hit ;           // xlation hits in tlb.     
+// Read/Write Privileged State Register Access.
+input [6:0]   ifu_tlu_sraddr_d ;      // addr of sr(st/pr)
+
+input         ifu_tlu_wsr_inst_d ;    // valid wr sr(st/pr)
+output        lsu_tlu_wsr_inst_e ;    // valid wr sr(st/pr)
+
+input         ifu_lsu_alt_space_d;        // alternate space ld/st
+
+input         tlu_lsu_int_ldxa_vld_w2 ;  // tlu ldxa data is valid (intrpt/scpd)
+input         tlu_lsu_int_ld_ill_va_w2 ;  // tlu ldxa'va is invalid (intrpt/scpd)
+
+input [1:0]   tlu_lsu_ldxa_tid_w2 ;       // thread id for tlu ldxa data. 
+
+input         ifu_lsu_ldxa_data_vld_w2 ;  // ifu ldxa data is valid
+input         ifu_lsu_ldxa_illgl_va_w2 ;  // ifu ldxa with illgl va
+input [1:0]   ifu_lsu_ldxa_tid_w2   ;     // thread id for ifu ldxa data. 
+input         ifu_lsu_asi_rd_unc ;        // unc error for tlb rd
+
+input [3:0]   tlu_lsu_tl_zero ;           // trap level is zero.
+input [1:0]   ifu_lsu_thrid_s ;           // thread id
+input         ifu_lsu_ldst_dbl_e ;        // ldd, atomic quad.
+
+input         ld_stb_full_raw_w2 ;     // full raw for load-thread0
+input         ld_sec_active ;          // secondary bypassing
+input         ifu_tlu_inst_vld_m ;     // inst vld in w stage
+
+input         lsu_l2fill_bendian_m ;
+
+//input         lsu_l2fill_fpld_e ;      // fp load
+output         lsu_l2fill_fpld_e ;      // fp load
+input         lmq0_l2fill_fpld ;      // fp load
+input         lmq1_l2fill_fpld ;      // fp load
+input         lmq2_l2fill_fpld ;      // fp load
+input         lmq3_l2fill_fpld ;      // fp load
+
+input [3:0]   cache_way_hit_buf1 ;          // hit in set of cache.
+   input      cache_hit;
+   
+//input [3:0]   lsu_byp_misc_addr_m ;   // lower 3bits of addr for ldxa/raw etc
+   
+input [1:0]   lmq0_byp_misc_sz ;     // size for ldxa/raw etc
+input [1:0]   lmq1_byp_misc_sz ;     // size for ldxa/raw etc
+input [1:0]   lmq2_byp_misc_sz ;     // size for ldxa/raw etc
+input [1:0]   lmq3_byp_misc_sz ;     // size for ldxa/raw etc
+
+input         lsu_l2fill_sign_extend_m ; // l2fill requires sign-extension
+input         lsu_l1hit_sign_extend_e ;  // l1hit requires sign-extension
+input [3:0]   tlu_lsu_pstate_cle ;       // current little endian
+input [3:0]   tlu_lsu_pstate_am ;        // address mask
+input [39:10] tlb_pgnum ;
+input         tlb_demap_nctxt;         // demap with nctxt
+input         tlb_demap_pctxt;         // demap with pctxt
+input         tlb_demap_sctxt;         // demap with sctxt
+input         tlb_demap_actxt;         // demap w autodemap ctxt
+input [1:0]   tlb_demap_thrid;         // demap thrid
+
+input         ifu_lsu_casa_e ;         // compare-swap instr
+input         ifu_lsu_ldstub_e ;       // ldstub
+input         ifu_lsu_swap_e ;         // swap
+
+
+input         lsu_atm_st_cmplt_e ;      // atm st ack will restart thread
+input	      lsu_cpx_pkt_atm_st_cmplt ; // applies to atomic ld also.
+
+input         spu_lsu_ldxa_data_vld_w2 ; // ldxa data from spu is valid
+input         spu_lsu_ldxa_illgl_va_w2 ; // ldxa data from spu with illgl va
+input [1:0]   spu_lsu_ldxa_tid_w2 ;      // ldxa data from spu is valid
+input [1:0]   spu_lsu_stxa_ack_tid ;     // stxa data from spu is valid
+input         spu_lsu_stxa_ack ;         // write to sdata reg complete
+input	      spu_lsu_unc_error_w2 ;
+input	      spu_lsu_int_w2 ;		 // spu disrupting trap.
+
+input         tlu_lsu_stxa_ack ;         // for mmu reads/writes/demaps
+input [1:0]   tlu_lsu_stxa_ack_tid ;      // for mmu reads/writes/demaps - tid
+
+input         lsu_tlb_invert_endian_g ;
+//input         lsu_ncache_ld_e ;       // non-cacheable ld from dfq
+   input      lmq0_ncache_ld;
+   input      lmq1_ncache_ld;
+   input      lmq2_ncache_ld;
+   input      lmq3_ncache_ld;
+   
+
+input         ifu_tlu_mb_inst_e ;     // membar instruction
+input         ifu_tlu_flsh_inst_e ;   // flush  instruction
+
+input [3:0]   lsu_stb_empty ;         // thread's stb is empty
+
+//input         tlu_dtlb_wr_vld_g ;
+input         tlu_dtlb_tag_rd_g ;
+input         tlu_dtlb_data_rd_g ;
+input         tlu_dtlb_dmp_vld_g ;
+input         tlu_dtlb_dmp_all_g ;
+input         tlu_dtlb_rw_index_vld_g ;
+input         tlu_dtlb_invalidate_all_g ;
+
+input         lsu_st_wr_dcache ;
+
+input         tlu_lsu_asi_update_m ;  // update asi
+input  [1:0]  tlu_lsu_tid_m ;         // thread for asi update
+input [3:0]   lsu_rd_dtag_parity_g;     // calculated tag parity
+
+input         dcache_rparity_err_wb;     // calculated tag parity
+   
+input         lsu_diagnstc_wr_data_b0 ;
+input         lsu_byp_ldd_oddrd_m ;   // rd fill for non-alt ldd
+
+input [3:0]   tlu_lsu_redmode ;       // redmode
+input [3:0]   tlu_lsu_redmode_rst_d1 ;   // redmode
+//input [2:0]   const_cpuid ;           // cpu's id
+input [3:0]   dva_vld_m ;             // valid bits for cache.
+output [3:0]  dva_vld_m_bf;
+   
+input [1:0]   lsu_dfill_tid_e ;       // thread id
+input         ifu_lsu_asi_ack;        // asi ack from ifu
+
+input [3:0]   lsu_intrpt_cmplt ;          // intrpt can restart thread
+//input [8:0]   lsu_iobrdge_tap_rq_type ;
+input  [8:8]  lsu_iobrdge_tap_rq_type_b8 ;
+input  [6:3]  lsu_iobrdge_tap_rq_type_b6_b3 ;
+input  [1:0]  lsu_iobrdge_tap_rq_type_b1_b0 ;
+
+input         lsu_iobrdge_fwd_pkt_vld ;
+
+input         lsu_cpx_ld_dtag_perror_e ;  // dtag parity error on issue
+input         lsu_cpx_ld_dcache_perror_e ;// dcache parity error on issue
+//input [1:1]   lsu_cpx_atm_st_err ;        // atomic st error field
+input [1:0]   lsu_cpx_pkt_ld_err ;        // err field - cpx ld pkt
+input [3:0]   ifu_lsu_nceen ;             // uncorrectible error enable 
+input         tlu_lsu_ldxa_async_data_vld ;   // tlu_lsu_ldxa_data_vld is for async op.
+input [3:0]   tlu_lsu_hpv_priv ;	  // hypervisor privilege modified
+input [3:0]   tlu_lsu_hpstate_en ;	  // enable bit from hpstate
+
+input         ifu_lsu_memref_d;
+input         ifu_lsu_pref_inst_e ;       // prefetch inst
+input         lsu_pref_pcx_req ;      	  // pref sent to pcx
+
+input	      lsu_cpx_pkt_prefetch2 ;	  // ld is prefetch
+
+// pref counter   
+input [3:0]   lsu_ld_pcx_rq_sel_d2 ;
+input         lsu_pcx_req_squash_d1;
+
+input	      lsu_bld_helper_cmplt_m ;	  // bld helper completes.
+input [2:0]   lsu_bld_cnt_m ;	
+input	      lsu_bld_reset ;
+   
+output [3:0]  lsu_no_spc_pref;
+    
+input	      ffu_lsu_blk_st_e ;	// blk st helper signalled by ffu
+input	[3:0]	lsu_stb_rmo_st_issue ;	// thread's stb issues rmo st
+input	[3:0]	lsu_cpx_rmo_st_ack ;	// rmo ack clears
+
+input	[3:0]	lsu_dfq_flsh_cmplt ;
+
+input   	stb_cam_hit ;
+ 
+input   ifu_tlu_flush_m;
+
+output  ifu_tlu_flush_fd_w;
+output  ifu_tlu_flush_fd2_w;
+output  ifu_tlu_flush_fd3_w;
+output  ifu_lsu_flush_w;
+   
+input   [3:0]           ctu_sscan_tid ;
+
+//input		tte_data_perror_corr ;
+input		tte_data_perror_unc ;
+input		asi_tte_data_perror ;
+input		asi_tte_tag_perror ;
+
+input  	[5:0]	tlu_dtlb_rw_index_g ;
+
+input		lsu_local_early_flush_g ;
+
+//input		lsu_error_pa_b39_m ;
+
+input         lsu_dfq_vld;
+
+input		gdbginit_l ;
+input		dc_direct_map ;
+
+output 	[1:0]	lsu_tlu_thrid_d ;
+
+output	[3:0] lsu_diagnstc_data_sel ;
+output	[3:0] lsu_diagnstc_va_sel ;
+
+output	[2:0] lsu_err_addr_sel ;
+
+output [15:0] dva_bit_wr_en_e;
+output [10:6] dva_wr_adr_e;
+   
+output      lsu_exu_ldst_miss_w2 ;  // load misses in d$.
+//output  [3:0]   lsu_way_hit ;   // ld/st access hits in d$.
+output      lsu_exu_dfill_vld_w2 ;  // data fill to irf(exu).
+output      lsu_ffu_ld_vld ;  // fp load writes to frf
+output      lsu_ld_miss_wb ;  // load misses in d$.
+//output      lsu_ld_hit_wb ;   // load hits in d$.
+   
+output      lsu_dtlb_bypass_e ; // dtlb is bypassed
+
+output [`LMQ_WIDTH-1:40] ld_pcx_pkt_g ;    // ld miss pkt for thread.
+output      tlb_ldst_cam_vld ;
+   
+
+//output      stxa_internal ;   // internal stxa, stg g 
+output      ldxa_internal ;   // internal ldxa, stg g
+
+output      lsu_ifu_ldsta_internal_e ; // any internal asi
+output  [3:0]   lsu_ifu_ldst_cmplt ;
+output  [3:0]   lsu_ifu_itlb_en ;
+output  [3:0]   lsu_ifu_icache_en ;
+   
+   
+output  [3:0]           lmq_byp_data_en_w2 ;
+
+output  [3:0]           lmq_byp_data_fmx_sel ;  // final data sel for lmq byp
+output  [3:0]           lmq_byp_data_mxsel0 ;     // ldxa vs stb bypass data sel.
+output  [3:0]           lmq_byp_data_mxsel1 ;     // ldxa vs stb bypass data sel.
+output  [3:0]           lmq_byp_data_mxsel2 ;     // ldxa vs stb bypass data sel.
+output  [3:0]           lmq_byp_data_mxsel3 ;     // ldxa vs stb bypass data sel.
+output  [2:0]           lmq_byp_ldxa_mxsel0 ;     // ldxa data sel - thread0
+output  [2:0]           lmq_byp_ldxa_mxsel1 ;     // ldxa data sel - thread1
+output  [2:0]           lmq_byp_ldxa_mxsel2 ;     // ldxa data sel - thread2
+output  [2:0]           lmq_byp_ldxa_mxsel3 ;     // ldxa data sel - thread3
+output  [2:0]   lsu_ld_thrd_byp_sel_e ;
+   
+output  [15:0]    dcache_byte_wr_en_e ; // 16-byte write enable mask.
+
+output      lsu_dcache_wr_vld_e ; // write to dcache.
+
+output      lsu_ldstub_g ;    // ldstub(a) instruction
+output      lsu_swap_g ;    // swap(a) instruction
+output                  lsu_tlu_dtlb_done;  // dtlb rd/dmp/wr cmplt
+output  [1:0]   lsu_exu_thr_m ;
+
+output                   merge7_sel_byte0_m;
+output                   merge7_sel_byte7_m;
+   
+output                   merge6_sel_byte1_m;
+output                   merge6_sel_byte6_m;
+
+output                   merge5_sel_byte2_m;   
+output                   merge5_sel_byte5_m;
+
+output                   merge4_sel_byte3_m;
+output                   merge4_sel_byte4_m;
+
+output                   merge3_sel_byte0_m;
+output                   merge3_sel_byte3_m;
+output                   merge3_sel_byte4_m;
+output                   merge3_sel_byte7_default_m;
+output                   merge3_sel_byte_m ;
+
+output                   merge2_sel_byte1_m;
+output                   merge2_sel_byte2_m;
+output                   merge2_sel_byte5_m;
+output                   merge2_sel_byte6_default_m;
+output                   merge2_sel_byte_m ;
+
+output                   merge0_sel_byte0_m, merge0_sel_byte1_m;
+output                   merge0_sel_byte2_m, merge0_sel_byte3_default_m;
+   
+output                   merge0_sel_byte4_m, merge0_sel_byte5_m;
+output                   merge0_sel_byte6_m, merge0_sel_byte7_default_m;
+                                                               
+output                   merge1_sel_byte0_m, merge1_sel_byte1_m;
+output                   merge1_sel_byte2_m, merge1_sel_byte3_default_m;
+output                   merge1_sel_byte4_m, merge1_sel_byte5_m;
+output                   merge1_sel_byte6_m, merge1_sel_byte7_default_m; 
+
+output			             merge0_sel_byte_1h_m ;
+   
+output			             merge1_sel_byte_1h_m, merge1_sel_byte_2h_m ;
+   
+output		lsu_dtlb_cam_real_e ;
+output      lsu_dtagv_wr_vld_e ;
+
+output      lsu_dtag_wrreq_x_e ;
+output      lsu_dtag_index_sel_x_e ;
+   
+output      lsu_dtlb_wr_vld_e ;
+output      lsu_dtlb_tag_rd_e ;
+output      lsu_dtlb_data_rd_e ;
+output      lsu_dtlb_dmp_vld_e ;
+output      lsu_dtlb_dmp_all_e ;
+output      lsu_dtlb_rwindex_vld_e ;
+output      lsu_dtlb_invalid_all_l_m ;
+output      lsu_tlu_tlb_ld_inst_m ;
+output      lsu_tlu_tlb_st_inst_m ;
+output  [1:0]   lsu_tlu_tlb_access_tid_m ;
+output      lsu_tlb_data_rd_vld_g ;
+
+   
+output  [3:0]   lsu_tlb_st_sel_m ;
+   
+output         lsu_va_wtchpt0_wr_en_l;
+output         lsu_va_wtchpt1_wr_en_l;
+output         lsu_va_wtchpt2_wr_en_l;
+output         lsu_va_wtchpt3_wr_en_l;
+
+output         thread0_m;
+output         thread1_m;
+output         thread2_m;
+output         thread3_m;
+
+output         lsu_dctldp_thread0_m;
+output         lsu_dctldp_thread1_m;
+output         lsu_dctldp_thread2_m;
+output         lsu_dctldp_thread3_m;
+   
+output         thread0_g;
+output         thread1_g;
+output         thread2_g;
+output         thread3_g;
+   
+output                  lsu_tlu_nonalt_ldst_m ; // non-alternate load or store
+output                  lsu_tlu_xslating_ldst_m ;// xslating ldst,atomic etc
+
+output   [2:0]          lsu_tlu_ctxt_sel_m;           // context selected:0-p,1-s,2-n
+output                  lsu_tlu_write_op_m;           // fault occurs for data write operation
+
+output                  lsu_dtlb_addr_mask_l_e ;  // address mask applies
+
+
+output            dva_din_e;
+
+output            lsu_diagnstc_dtagv_prty_invrt_e ;
+   
+output                  lsu_ifu_asi_load;   // asi load to ifu
+output [1:0]            lsu_ifu_asi_thrid;    // asi event thrid to ifu
+output                  lsu_ifu_asi_vld;    // asi event vld - ld+st
+output      lsu_quad_asi_e ;
+//output      lsu_tlu_64kpg_hit_g ;   // 64k page page accessed
+
+output            lsu_local_ldxa_sel_g;
+output  [3:0]     lsu_dtag_rsel_m ;  // dtag way sel
+
+output      lsu_tlbop_force_swo ;
+output  [2:0]     lsu_atomic_pkt2_bsel_g ;
+output      lsu_dcache_tag_perror_g ;       // dcache tag parity error
+output      lsu_dcache_data_perror_g ;      // dcache data parity error
+   
+output      lsu_ifu_l2_unc_error ;    // l2 uncorrectible error
+output      lsu_ifu_l2_corr_error ;   // l2 correctible error
+output      lsu_ifu_dcache_data_perror ;  // dcache data parity error
+output      lsu_ifu_dcache_tag_perror ; // dcache tag parity error
+output  [1:0]   lsu_ifu_error_tid ;   // thread id for error
+output      lsu_ifu_io_error ;    // error on io ld
+//output  [1:0]   lsu_tlu_derr_tid_g ;    // daccess error tid
+   
+output      lsu_tlu_squash_va_oor_m ;   // squash va_oor for mem-op.
+output      lsu_squash_va_oor_m ;   // squash va_oor for mem-op.
+
+output          tlb_cam_hit_g ;           // xlation hits in tlb.     
+
+   output        lsu_st_hw_le_g;
+   output        lsu_st_w_or_dbl_le_g;
+   output        lsu_st_x_le_g;
+   output        lsu_swap_sel_default_g;
+   output        lsu_swap_sel_default_byte_7_2_g;
+
+output		lsu_st_rmo_m ;		// rmo store in m stage
+output		lsu_bst_in_pipe_m ;	// 1st helper for bst.
+output  	lsu_snap_blk_st_m ;	// snap blk st state 
+output		lsu_blk_st_m ;		// blk st in m
+output	[39:10]	lsu_blkst_pgnum_m ;
+output		lsu_ffu_blk_asi_e ;	// blk
+output		lsu_blk_asi_m ;
+
+output		lsu_nonalt_nucl_access_m ;
+
+//output	[3:0]	lsu_spu_stb_empty ;
+
+   output     dcache_alt_mx_sel_e;
+   output     dcache_alt_mx_sel_e_bf;
+   output     dcache_rvld_e;
+   
+output		lsu_dc_iob_access_e ;	// dcache iob access
+
+output		lsu_ifu_ldst_miss_w ;
+
+   output lsu_ifu_dc_parity_error_w2;
+   
+   output lsu_ldst_inst_vld_e;
+
+output          lsu_local_ldxa_tlbrd_sel_g;
+output          lsu_local_diagnstc_tagrd_sel_g;
+output          lsu_va_wtchpt_sel_g;
+   
+
+   input [7:0]   asi_d;
+   input [7:0]   lsu_dctl_asi_state_m;
+   
+   output  [3:0] asi_state_wr_thrd;
+   output        thread0_d;
+   output        thread1_d;
+   output        thread2_d;
+   output        thread3_d;
+   output        tlu_lsu_asi_update_g;
+
+output  [3:0] pctxt_state_wr_thrd ;
+output  [3:0] sctxt_state_wr_thrd ;
+
+   output     thread_pctxt;
+   output     thread_sctxt;
+
+   output     thread_actxt;
+   output     thread_default;
+   
+   output     thread0_ctxt;  
+   output     thread1_ctxt;
+   output     thread2_ctxt;
+   output     thread3_ctxt;
+
+   output [3:0] pid_state_wr_en;
+   output       thread0_e;
+   output       thread1_e;
+   output       thread2_e;
+   output       thread3_e;
+
+   output       dfture_tap_wr_mx_sel;
+   output [3:0] lctl_rst;
+   output [3:0] lsu_ctl_state_wr_en;
+   output [3:0] lsuctl_ctlbits_wr_en;
+   output [3:0] dfture_tap_rd_en;
+
+   output      bist_tap_wr_en;
+   output      bistctl_wr_en;
+   output      bist_ctl_reg_wr_en;
+   output      mrgn_tap_wr_en;
+
+   output      ldiagctl_wr_en;
+
+   output [3:0]  misc_ctl_sel_din ;
+
+   output [2:0] lsu_asi_sel_fmx1;
+   output [2:0] lsu_asi_sel_fmx2;
+
+
+   output       tlb_access_en0_g;
+   output       tlb_access_en1_g;
+   output       tlb_access_en2_g;
+   output       tlb_access_en3_g;
+
+   output tlb_access_sel_thrd0;
+   output tlb_access_sel_thrd1;
+   output tlb_access_sel_thrd2;
+   output tlb_access_sel_default;
+
+   input [7:0] lsu_ldst_va_g;
+   
+   output mrgnctl_wr_en;
+
+   input  lsu_ifu_err_addr_b39;
+
+   input [5:0] lsu_dp_ctl_reg0;
+   input [5:0] lsu_dp_ctl_reg1;
+   input [5:0] lsu_dp_ctl_reg2;
+   input [5:0] lsu_dp_ctl_reg3;
+
+   input       ldd_in_dfq_out;     //from qctl2 
+   
+
+   output hpv_priv_m;
+   output hpstate_en_m;
+   
+   output                dcache_arry_data_sel_m;
+   
+   output                dtlb_bypass_m;
+   
+   output                lsu_alt_space_m;
+   output                atomic_m;
+
+   output                ldst_dbl_m;
+   output                fp_ldst_m;
+
+   output                lda_internal_m;
+   output                sta_internal_m;
+   output                cam_real_m;
+
+   output                data_rd_vld_g;
+   output                tag_rd_vld_g;
+   output [1:0]          ldst_sz_m;
+   output                asi_internal_m;
+
+//   output                ld_inst_vld_unflushed;
+//   output                st_inst_vld_unflushed;
+   
+   output                rd_only_ltlb_asi_e;
+   output                wr_only_ltlb_asi_e;
+   output                dfill_tlb_asi_e;
+   output                ifill_tlb_asi_e;
+
+   output                nofault_asi_m;
+   output                as_if_user_asi_m;
+
+   output                atomic_asi_m;
+   output                phy_use_ec_asi_m;
+   output                phy_byp_ec_asi_m;
+
+   output                quad_asi_m;
+   output                binit_quad_asi_m;
+   output                blk_asi_m;
+
+   output                recognized_asi_m;
+   output                strm_asi_m;
+   output                mmu_rd_only_asi_m;
+   output                rd_only_asi_m;
+   output                wr_only_asi_m;
+   output                unimp_asi_m;
+
+   output                va_wtchpt_cmp_en_m;
+
+   output		lsu_tlu_async_ttype_vld_w2 ;	// daccess error - asynchronous
+   output   [6:0]	lsu_tlu_async_ttype_w2 ;
+   output   [1:0] 	lsu_tlu_async_tid_w2 ;		// asynchronous trap - thread 
+
+   output   [5:0]	async_tlb_index ;
+   
+//=========================================
+//dc_fill CP
+//=========================================   
+   output                l2fill_vld_m;    //to qdp1
+   output  [3:0]   ld_thrd_byp_mxsel_m ;  //to qdp1
+   output [7:0]    morphed_addr_m;        //to dcdp
+ 
+   
+   output          signed_ldst_byte_m;    //to dcdp
+//   output          unsigned_ldst_byte_m;  //to dcdp 
+   output          signed_ldst_hw_m;      //to dcdp
+//   output          unsigned_ldst_hw_m;    //to dcdp
+   output          signed_ldst_w_m;       //to dcdp
+//   output          unsigned_ldst_w_m;     //to dcdp
+
+   output	lsu_tlb_asi_data_perr_g ;	
+   output	lsu_tlb_asi_tag_perr_g ;
+
+   output  [14:13]   lsu_sscan_data ;
+
+   output  [3:0] 	lsu_ld_inst_vld_g ;
+   
+   output  [1:0]     lsu_dcache_rand;
+   output  [1:0]     lsu_encd_way_hit;
+   output            lsu_way_hit_or;
+//   output            lsu_quad_asi_g;
+
+   output	     lsu_memref_m ;
+   output	     lsu_flsh_inst_m ;
+
+   output	    	lsu_ifu_asi_data_en_l ;
+
+
+//dcfill_addr [10:0]
+   input [7:0]  dcache_iob_addr_e;
+   input [6:0]  mbist_dcache_index;
+   input        mbist_dcache_word;
+   input [10:0] lsu_diagnstc_wr_addr_e;
+   input [10:0] st_dcfill_addr;
+   output [10:3] lsu_dcache_fill_addr_e;
+   output [10:4] lsu_dcache_fill_addr_e_err;
+
+   input         lsu_dfq_ld_vld;
+   input         lsu_dfq_st_vld;
+
+   output [3:0]  lsu_thread_g;
+
+//=========================================
+//LMQ thread sel
+//=========================================
+   input         lmq0_ldd_vld;      //from qdp1
+   input         lmq1_ldd_vld;
+   input         lmq2_ldd_vld;
+   input         lmq3_ldd_vld;
+   output        lmq_ldd_vld;       //to  qctl2 
+      
+   input [1:0]   lsu_dfq_byp_tid;   //from qdp2
+   input         dfq_byp_ff_en;     //from qctl2 
+
+   input [1:0]   lsu_dcache_iob_way_e;   //from qdp2
+ 
+   input   [1:0]  mbist_dcache_way;   
+   output  [3:0]  lsu_bist_rsel_way_e;
+   
+   input   [1:0]   lsu_diagnstc_wr_way_e ;  //from dctldp
+
+   input [1:0]     lsu_st_way_e;    //from qdp2
+
+   input [1:0]     lmq0_pcx_pkt_way;  //from qctl1
+   input [1:0]     lmq1_pcx_pkt_way;
+   input [1:0]     lmq2_pcx_pkt_way;
+   input [1:0]     lmq3_pcx_pkt_way;
+   output [3:0]    lsu_dcache_fill_way_e;
+
+
+input  [2:0]             lmq0_ld_rq_type ;        // for identifying atomic ld.
+input  [2:0]             lmq1_ld_rq_type ;        // for identifying atomic ld.
+input  [2:0]             lmq2_ld_rq_type ;        // for identifying atomic ld.
+input  [2:0]             lmq3_ld_rq_type ;        // for identifying atomic ld.
+   
+input  [10:0]            lmq0_pcx_pkt_addr;
+input  [10:0]            lmq1_pcx_pkt_addr;
+input  [10:0]            lmq2_pcx_pkt_addr;
+input  [10:0]            lmq3_pcx_pkt_addr;
+
+output                   lmq_ld_addr_b3;
+
+output [3:0]             lsu_outstanding_rmo_st_max;
+
+input                 lsu_ttype_vld_m2;
+input                 tlu_early_flush_pipe2_w;
+input [1:0]           lsu_st_dcfill_size_e;
+
+   input              mbist_dcache_write;
+   input              mbist_dcache_read;
+
+   output             lsu_dcfill_data_mx_sel_e;
+   
+wire  [3:0]   ld_thrd_byp_sel_e ;
+wire	      ifu_asi_vld,ifu_asi_vld_d1 ;
+wire  [1:0]   dcache_wr_size_e ;   
+wire          lsu_ncache_ld_e;
+wire          lsu_diagnstc_wr_src_sel_e ; // dcache/dtag/v write - diag
+   
+wire         dctl_flush_pipe_w ;   // flush pipe due to error
+ wire        dctl_early_flush_w;
+   
+wire  [10:0] lmq_pcx_pkt_addr;
+wire  [2:0]  lmq_ld_rq_type_e;
+   
+wire [10:0]  dcache_fill_addr_e;
+wire [2:0]   dcache_wr_addr_e ;       
+wire	lsuctl_dtlb_byp_e ;
+   
+wire	cam_perr_unc0,asi_data_perr0,asi_tag_perr0,ifu_unc_err0 ;
+wire	cam_perr_unc1,asi_data_perr1,asi_tag_perr1,ifu_unc_err1 ;
+wire	cam_perr_unc2,asi_data_perr2,asi_tag_perr2,ifu_unc_err2 ;
+wire	cam_perr_unc3,asi_data_perr3,asi_tag_perr3,ifu_unc_err3 ;
+wire	cam_perr_unc_e, asi_data_perr_e,asi_tag_perr_e,ifu_unc_err_e ;
+wire	cam_perr_unc_m, asi_data_perr_m,asi_tag_perr_m,ifu_unc_err_m ;
+wire	cam_perr_unc_g, asi_data_perr_g,asi_tag_perr_g,ifu_unc_err_g ;
+//wire	cam_real_err_e, cam_real_err_m ;
+wire	[3:0] squash_byp_cmplt,squash_byp_cmplt_m, squash_byp_cmplt_g ;
+wire      ld_inst_vld_m,ld_inst_vld_g ;
+wire      st_inst_vld_m,st_inst_vld_g ;
+wire      fp_ldst_m,fp_ldst_g,fp_ldst_w2 ;
+wire      lsu_ld_hit_wb, lsu_ld_miss_wb ;
+wire  [3:0]   lsu_way_hit ;
+wire  [1:0]   ldst_sz_m,ldst_sz_g ;
+wire  [4:0]   ld_rd_m, ld_rd_g ;
+wire      lsu_dtlb_bypass_g,dtlb_bypass_e,dtlb_bypass_m ;
+wire [6:0]  lsu_sraddr_e ;
+//wire    lsu_rsr_inst_e,lsu_rsr_inst_m, lsu_rsr_inst_w ;
+wire    lsu_wsr_inst_e;
+wire    pctxt_state_en, sctxt_state_en ;
+wire    asi_state_wr_en ;
+//wire  [3:0] pctxt_state_rd_en, sctxt_state_rd_en ;
+wire    lsu_alt_space_m,lsu_alt_space_g ;
+wire    ldxa_internal, stxa_internal ;
+wire    lsu_ctl_state_en;
+//wire  [3:0] lsu_ctl_state_rd_en;
+wire  [3:0]   lsu_ctl_state_wr_en ;
+//wire  [7:0] imm_asi_e,imm_asi_m,imm_asi_g ;
+//wire    imm_asi_vld_e,imm_asi_vld_m,imm_asi_vld_g;
+//wire  [7:0]   asi_state0,asi_state1,asi_state2,asi_state3 ;
+
+wire    ldsta_internal_e,sta_internal_e,lda_internal_e;
+wire    sta_internal_m,lda_internal_m;
+wire  [7:0] asi_d ;
+wire    [1:0]   thrid_d,thrid_e,thrid_m, thrid_g, thrid_w2, thrid_w3, ldxa_thrid_w2 ;
+wire    stxa_internal_d1, stxa_internal_d2 ;
+wire    ld_pcx_pkt_vld_e ;
+wire    ld_pcx_pkt_vld_m ;
+wire    ld_pcx_pkt_vld_g ;
+wire    ldst_dbl_m, ldst_dbl_g;
+wire    ldd_force_l2access_w2, ldd_force_l2access_w3;
+   
+//wire    ld_stb_full_raw_w2 ;
+wire    ld_stb_full_raw_w3 ;
+
+wire    ldbyp0_vld_rst, ldbyp0_vld_en, ldbyp0_fpld ;
+wire    ldbyp1_vld_rst, ldbyp1_vld_en, ldbyp1_fpld ;
+wire    ldbyp2_vld_rst, ldbyp2_vld_en, ldbyp2_fpld ;
+wire    ldbyp3_vld_rst, ldbyp3_vld_en, ldbyp3_fpld ;
+//wire    ldbyp0_vld_en_d1,ldbyp1_vld_en_d1,ldbyp2_vld_en_d1,ldbyp3_vld_en_d1 ;
+
+wire    thread0_e,thread1_e,thread2_e,thread3_e;
+wire    thread0_d,thread1_d,thread2_d,thread3_d;
+wire    thread0_m,thread1_m,thread2_m,thread3_m;
+wire    thread0_g,thread1_g,thread2_g,thread3_g;
+wire    thread0_w2,thread1_w2,thread2_w2,thread3_w2;
+wire    thread0_w3,thread1_w3,thread2_w3,thread3_w3;
+wire    tlu_stxa_thread0_w2,tlu_stxa_thread1_w2 ;
+wire    tlu_stxa_thread2_w2,tlu_stxa_thread3_w2 ;
+wire    tlu_ldxa_thread0_w2,tlu_ldxa_thread1_w2 ;
+wire    tlu_ldxa_thread2_w2,tlu_ldxa_thread3_w2 ;
+wire    spu_ldxa_thread0_w2,spu_ldxa_thread1_w2 ;
+wire    spu_ldxa_thread2_w2,spu_ldxa_thread3_w2 ;
+wire    spu_stxa_thread0,spu_stxa_thread1 ;
+wire    spu_stxa_thread2,spu_stxa_thread3 ;
+wire    ifu_ldxa_thread0_w2,ifu_ldxa_thread1_w2 ;
+wire    ifu_ldxa_thread2_w2,ifu_ldxa_thread3_w2 ;
+wire    ifu_stxa_thread0_w2,ifu_stxa_thread1_w2 ;
+wire    ifu_stxa_thread2_w2,ifu_stxa_thread3_w2 ;
+wire    ldbyp0_vld, ldbyp1_vld, ldbyp2_vld, ldbyp3_vld ;
+//wire    ld_any_byp_data_vld ;              
+wire  [3:0] asi_state_wr_thrd;
+wire  [3:0] pctxt_state_wr_thrd ;
+wire  [3:0] sctxt_state_wr_thrd ;
+wire    tlb_cam_hit_g ;
+wire    ld_inst_vld_unflushed ;
+wire    st_inst_vld_unflushed ;
+
+wire  [7:0]  baddr_m ;
+wire  [15:0]  byte_wr_enable ;
+//wire  [1:0] st_size ;
+//wire    l2fill_bendian_g ;
+wire    ldst_byte,ldst_hword,ldst_word,ldst_dword;
+wire    byte_m,hword_m,word_m,dword_m;
+wire    tlb_invert_endian_g ;
+//wire  [7:0] l2fill_bytes_msb_m, l2fill_bytes_msb_g ;
+//wire    byte_g, hword_g, word_g ;
+
+   wire signed_ldst_m ;
+//wire  unsigned_ldst_m ;
+//wire    sign_bit_g  ;
+//wire  [7:0] align_bytes_msb ;
+
+wire    l2fill_vld_m, l2fill_vld_g ;
+wire    l2fill_fpld_e, l2fill_fpld_m, l2fill_fpld_g ;
+wire    pstate_cle_e, pstate_cle_m, pstate_cle_g ;
+wire    l1hit_lendian_g ;
+wire    l1hit_sign_extend_m, l1hit_sign_extend_g ;
+wire    demap_thread0, demap_thread1, demap_thread2, demap_thread3 ;
+
+wire    misc_byte_m,misc_hword_m,misc_word_m,misc_dword_m;
+wire    byp_word_g;
+//wire  [15:0]  byp_baddr_g ;
+//wire    ld_stb_hit_g ;
+wire    atomic_ld_squash_e ;
+wire    atomic_m,atomic_g,atomic_w2, atomic_w3 ;
+wire  [2:0] ld_rq_type ;
+wire    ncache_pcx_rq_g ;
+wire    lmq_pkt_vld_g ;
+wire    tlb_lng_ltncy_asi_d,tlb_lng_ltncy_asi_e, tlb_lng_ltncy_asi_m,tlb_lng_ltncy_asi_g ; 
+wire    recognized_asi_d,recognized_asi_e,recognized_asi_m,recognized_asi_g,recognized_asi_tmp ;
+wire    asi_internal_d, asi_internal_e ;  
+wire    asi_internal_m, asi_internal_g ;  
+wire    dcache_byp_asi_d, dcache_byp_asi_e ;
+wire    dcache_byp_asi_m, dcache_byp_asi_g ;
+wire	phy_use_ec_asi_d,phy_use_ec_asi_e,phy_use_ec_asi_m;
+wire	phy_byp_ec_asi_d,phy_byp_ec_asi_e,phy_byp_ec_asi_m;
+wire    lendian_asi_d, lendian_asi_e;
+wire    lendian_asi_m, lendian_asi_g;
+wire	intrpt_disp_asi_d,intrpt_disp_asi_e,intrpt_disp_asi_m,intrpt_disp_asi_g ;
+wire    nofault_asi_d, nofault_asi_e, nofault_asi_m ;
+wire    nucleus_asi_d, nucleus_asi_e ;
+wire    primary_asi_d, primary_asi_e ;
+wire    quad_asi_d,quad_asi_e,quad_asi_m,quad_asi_g;
+wire    binit_quad_asi_d,binit_quad_asi_e,binit_quad_asi_m,binit_quad_asi_g ;
+wire    secondary_asi_d, secondary_asi_e ;
+wire    tlb_byp_asi_d, tlb_byp_asi_e;
+wire    thread0_ctxt, thread1_ctxt ; 
+wire    thread2_ctxt, thread3_ctxt ;
+
+
+wire    altspace_ldst_e, non_altspace_ldst_e ;
+wire    altspace_ldst_m, altspace_ldst_g ;
+wire    non_altspace_ldst_m, non_altspace_ldst_g ;
+wire    thread_pctxt, thread_sctxt, thread_nctxt, thread_actxt ;
+wire    ncache_asild_rq_g ;
+//SC wire    pstate_priv, pstate_priv_m ;
+//SC wire    priv_pg_usr_mode ;
+//SC wire    nonwr_pg_st_access ;
+//SC wire    nfo_pg_nonnfo_asi ;
+//wire    daccess_excptn ;
+wire    mbar_inst_m,flsh_inst_m ; 
+wire    mbar_inst_g,flsh_inst_g ; 
+wire    bsync0_reset,bsync1_reset;
+wire    bsync2_reset,bsync3_reset ;
+wire    bsync0_en,bsync1_en ;
+wire    bsync2_en,bsync3_en ;
+wire    flush_inst0_g,mbar_inst0_g ;
+wire    flush_inst1_g,mbar_inst1_g ;
+wire    flush_inst2_g,mbar_inst2_g ;
+wire    flush_inst3_g,mbar_inst3_g ;
+wire    dfill_thread0,dfill_thread1;
+wire    dfill_thread2,dfill_thread3;
+wire    mbar_vld0, flsh_vld0 ;
+wire    mbar_vld1, flsh_vld1 ;
+wire    mbar_vld2, flsh_vld2 ;
+wire    mbar_vld3, flsh_vld3 ;
+   wire [1:0] dfq_tid_m,dfq_tid_g;
+
+wire  [1:0]   ldbyp_tid_m ;
+wire    stxa_stall_asi_g ;
+wire    stxa_stall_wr_cmplt0, stxa_stall_wr_cmplt1 ;
+wire    stxa_stall_wr_cmplt2, stxa_stall_wr_cmplt3 ;
+wire    stxa_stall_wr_cmplt0_d1, stxa_stall_wr_cmplt1_d1 ;
+wire    stxa_stall_wr_cmplt2_d1, stxa_stall_wr_cmplt3_d1 ;
+wire    dtlb_done ;
+wire    tag_rd_vld_m, tag_rd_vld_g ;
+wire    data_rd_vld_m, data_rd_vld_g ;
+wire    tlb_demap_vld ;
+wire    dtlb_done_d1 ;
+wire    dtlb_done_d2 ;
+
+
+wire    tlu_lsu_asi_update_g ;
+wire  [1:0] tlu_lsu_tid_g ;
+wire    tsa_update_asi0,tsa_update_asi1;
+wire    tsa_update_asi2,tsa_update_asi3;
+wire    tlb_ld_inst0,tlb_ld_inst1,tlb_ld_inst2,tlb_ld_inst3 ;
+wire    tlb_st_inst0,tlb_st_inst1,tlb_st_inst2,tlb_st_inst3 ;
+wire    tlb_access_en0_e,tlb_access_en1_e,tlb_access_en2_e,tlb_access_en3_e ;
+wire    tlb_access_en0_m,tlb_access_en1_m,tlb_access_en2_m,tlb_access_en3_m ;
+wire    tlb_access_en0_tmp,tlb_access_en1_tmp,tlb_access_en2_tmp,tlb_access_en3_tmp ;
+wire    tlb_access_en0_g,tlb_access_en1_g,tlb_access_en2_g,tlb_access_en3_g ;
+wire    tlb_access_en0_unflushed,tlb_access_en1_unflushed,tlb_access_en2_unflushed,tlb_access_en3_unflushed ;
+wire    tlb_access_rst0,tlb_access_rst1,tlb_access_rst2,tlb_access_rst3 ;
+wire    tlb_access_sel_thrd0,tlb_access_sel_thrd1;
+wire    tlb_access_sel_thrd2,tlb_access_sel_thrd3;
+wire    tlb_access_blocked ;
+wire    tlb_access_pending ;
+wire    tlb_access_initiated ;
+//wire    tlb_pending_access_rst ;
+
+wire    vw_wtchpt_cmp_en_m,vr_wtchpt_cmp_en_m ;
+
+
+//wire    va_b12_3_match_m,va_b47_40_match_m ;
+//wire    va_b12_3_match_g,va_b47_40_match_g ;
+//wire    wtchpt_msk_match_m,wtchpt_msk_match_g ;
+
+wire    as_if_user_asi_d,as_if_user_asi_e,as_if_user_asi_m;
+//SC wire    as_if_usr_priv_pg ;
+//SC wire    priv_action,priv_action_m ;
+//SC wire    stdf_maddr_not_align, lddf_maddr_not_align ;
+//wire  [8:0] early_ttype_m,early_ttype_g ; 
+//wire    early_trap_vld_m, early_trap_vld_g ;  
+//SC wire    atm_access_w_nc, atm_access_unsup_asi ;
+wire    atomic_asi_d,atomic_asi_e,atomic_asi_m ;  
+//wire    dflush_asi_d,dflush_asi_e,dflush_asi_m,dflush_asi_g;  
+wire    blk_asi_d,blk_asi_e,blk_asi_m, blk_asi_g ;
+
+wire    fpld_byp_data_vld ;
+//wire  [7:0] dcache_rd_parity ;
+wire    dcache_rd_parity_error ;
+//SC wire    tte_data_parity_error ;
+
+wire  [3:0]   dtag_parity_error;
+//wire    dtag_mtag_parity_error ;
+//wire    daccess_error ;
+//SC wire    dmmu_miss_g ;
+wire  [2:0]   ctxt_sel_e ;
+wire    dc_diagnstc_asi_d, dc_diagnstc_asi_e ;
+wire    dc_diagnstc_asi_m, dc_diagnstc_asi_g ;
+wire    dtagv_diagnstc_asi_d, dtagv_diagnstc_asi_e ;
+wire    dtagv_diagnstc_asi_m, dtagv_diagnstc_asi_g ;
+//wire    dc_diagnstc_wr_e,dtagv_diagnstc_wr_e ;
+//wire    dside_diagnstc_wr_e ;
+wire    dc_diagnstc_wr_en,dtagv_diagnstc_wr_en ;
+
+wire  dtagv_diagnstc_rd_g ;
+wire  dc0_diagnstc_asi,dtagv0_diagnstc_asi;
+wire  dc1_diagnstc_asi,dtagv1_diagnstc_asi;
+wire  dc2_diagnstc_asi,dtagv2_diagnstc_asi;
+wire  dc3_diagnstc_asi,dtagv3_diagnstc_asi;
+//wire [3:0] lngltncy_st_go ;
+wire  [3:0]   tlb_st_data_sel_m ;
+wire  dc0_diagnstc_wr_en, dc1_diagnstc_wr_en, dc2_diagnstc_wr_en, dc3_diagnstc_wr_en ;  
+wire  dtagv0_diagnstc_wr_en, dtagv1_diagnstc_wr_en, dtagv2_diagnstc_wr_en, dtagv3_diagnstc_wr_en ;  
+//wire  merge2_sel_byte7, merge3_sel_byte7 ; 
+//SC wire  hw_align_addr,wd_align_addr,dw_align_addr;
+wire   hw_size,wd_size,dw_size;
+//SC wire  mem_addr_not_align ;
+
+wire  wr_only_asi_d,wr_only_asi_e,wr_only_asi_m ;
+wire  rd_only_asi_d,rd_only_asi_e,rd_only_asi_m ;
+wire  mmu_rd_only_asi_d,mmu_rd_only_asi_e,mmu_rd_only_asi_m ;
+wire  unimp_asi_d,unimp_asi_e,unimp_asi_m;
+wire  dmmu_asi58_d,dmmu_asi58_e,dmmu_asi58_m;
+wire  immu_asi50_d,immu_asi50_e,immu_asi50_m;
+
+wire  ifu_asi_store ;
+wire  nontlb_asi0, nontlb_asi1, nontlb_asi2, nontlb_asi3 ;
+//wire  stxa_stall_reset ;
+wire  ifu_nontlb0_asi,ifu_nontlb1_asi,ifu_nontlb2_asi,ifu_nontlb3_asi;
+wire  ifu_nontlb_asi_d, ifu_nontlb_asi_e,ifu_nontlb_asi_m,ifu_nontlb_asi_g ;
+wire  [2:0] lsu_asi_sel_fmx1 ;
+wire  [2:0] lsu_asi_sel_fmx2;   
+wire    lsu_asi_rd_en, lsu_asi_rd_en_w2 ;
+//wire  [12:0]  pctxt_state ;
+//wire  [12:0]  sctxt_state ;
+
+//wire  [1:0] dcache_rand,dcache_rand_new ;
+wire    dtlb_inv_all_e,dtlb_inv_all_m ;
+wire  dtlb_wr_vld_d1,dtlb_tag_rd_d1,dtlb_data_rd_d1,dtlb_dmp_vld_d1,dtlb_inv_all_d1 ;
+wire  ldst_in_pipe ;
+wire  tlbop_init, tlbop_init_d1, tlbop_init_d2 ;
+wire  tlbop_init_d3, tlbop_init_d4, tlbop_init_d5 ;
+wire  [3:0] ldxa_illgl_va_cmplt,ldxa_illgl_va_cmplt_d1 ;
+
+wire  lsuctl_va_vld ;
+wire  lsuctl_illgl_va ;
+wire  sctxt_va_vld;
+//wire  scxt_ldxa_illgl_va ;
+wire  pctxt_va_vld;
+
+wire  pscxt_ldxa_illgl_va ;
+wire  lsu_asi_illgl_va ;
+wire  [3:0] lsu_asi_illgl_va_cmplt,lsu_asi_illgl_va_cmplt_w2 ;
+wire  bistctl_va_vld,mrgnctl_va_vld,ldiagctl_va_vld ;
+wire  bistctl_state_en,mrgnctl_state_en,ldiagctl_state_en ;
+wire  mrgnctl_illgl_va ;
+wire  asi42_illgl_va ;
+
+wire    [3:0]   tap_thread ;
+wire    mrgn_tap_wr_en ;
+wire    bist_tap_wr_en ;
+
+wire [3:0] dfture_tap_rd_d1;
+wire [3:0] dfture_tap_wr_en;
+
+//wire  dfture_tap_rd_sel ;
+
+wire  misc_asi_rd_en ;
+
+wire [3:0]  lsuctl_ctlbits_wr_en ;
+wire  bistctl_wr_en;
+wire  mrgnctl_wr_en;
+//wire  ldiagctl_rd_en,ldiagctl_wr_en;
+wire  casa_m, casa_g ;
+wire  tte_data_perror_unc ;
+wire  asi_tte_data_perror,asi_tte_tag_perror ;
+
+wire  [1:0] dfill_tid_m,dfill_tid_g ;
+wire  dtag_error_m,dcache_error_m;
+wire  dtag_error_g,dcache_error_g;
+wire  dtag_error_w2,dcache_error_w2;
+wire  l2_unc_error_e,l2_corr_error_e;
+wire  l2_unc_error_m,l2_corr_error_m;
+wire  l2_unc_error_g,l2_corr_error_g;
+wire  l2_unc_error_w2,l2_corr_error_w2;
+wire  unc_err_trap_e,unc_err_trap_m,unc_err_trap_g ;
+//wire  corr_err_trap_e, corr_err_trap_m, corr_err_trap_g ;
+wire  dtag_perror_g ;
+
+
+wire  ifill_tlb_asi_d,dfill_tlb_asi_d,rd_only_ltlb_asi_d,wr_only_ltlb_asi_d ;
+wire  ifill_tlb_asi_e,dfill_tlb_asi_e,rd_only_ltlb_asi_e,wr_only_ltlb_asi_e ;
+//SC wire  tlb_daccess_excptn_e,tlb_daccess_error_e  ;
+//SC wire  tlb_daccess_excptn_m,tlb_daccess_error_m  ;
+//SC wire  tlb_daccess_excptn_g,tlb_daccess_error_g  ;
+wire  thread_tl_zero ;
+wire	pid_va_vld, pid_state_en ;
+wire	[3:0]	pid_state_wr_en ;
+
+//wire	[3:0]	pid_state_rd_en ;
+//wire	[2:0]	pid_state ;
+wire    [3:0]   intld_byp_cmplt ;
+
+//wire	hpv_priv,hpstate_en ;	
+wire	hpv_priv_m,hpstate_en_m ;	
+wire	hpv_priv_e,hpstate_en_e ;	
+wire	blkst_m, blkst_g ;
+//wire	dc_direct_map ;		
+wire	spubyp_trap_active_e,spubyp_trap_active_m, spubyp_trap_active_g ;
+wire [6:0] spubyp_ttype ;
+wire	spu_trap ;
+wire	spu_trap0, spu_trap1, spu_trap2, spu_trap3 ;
+wire	[6:0]	spu_ttype ; 
+wire	spubyp0_trap,spubyp1_trap,spubyp2_trap,spubyp3_trap;
+wire [6:0]	spubyp0_ttype,spubyp1_ttype,spubyp2_ttype,spubyp3_ttype;
+wire	bendian_g ;
+//wire va_wtchpt_rd_en, pa_wtchpt_rd_en;   
+//wire lsu_bendian_access_g;
+wire      lsu_tlb_tag_rd_vld_g ;
+wire      lsu_dtlb_invalid_all_m ;
+
+wire  [3:0]   dva_vld_g;
+wire          lsu_diagnstc_asi_rd_en;
+wire  [3:0]   ld_thrd_byp_sel_g ;
+wire  [3:0]           lmq_byp_data_sel0 ;     // ldxa vs stb bypass data sel.
+wire  [3:0]           lmq_byp_data_sel1 ;     // ldxa vs stb bypass data sel.
+wire  [3:0]           lmq_byp_data_sel2 ;     // ldxa vs stb bypass data sel.
+wire  [3:0]           lmq_byp_data_sel3 ;     // ldxa vs stb bypass data sel.
+wire  [2:0]           lmq_byp_ldxa_sel0 ;     // ldxa data sel - thread0
+wire  [2:0]           lmq_byp_ldxa_sel1 ;     // ldxa data sel - thread1
+wire  [2:0]           lmq_byp_ldxa_sel2 ;     // ldxa data sel - thread2
+wire  [2:0]           lmq_byp_ldxa_sel3 ;     // ldxa data sel - thread3
+wire    endian_mispred_g ;
+
+   wire       ld_inst_vld_w2, ld_inst_vld_w3;
+
+   wire [3:0] lmq_byp_data_raw_sel_d1;
+   wire [3:0] lmq_byp_data_raw_sel_d2;
+
+wire	asi_st_vld_g ;
+wire  ignore_fill;
+
+wire  [3:0]  pend_atm_ld_ue ;
+
+wire [2:0]   lsu_byp_misc_addr_m ;   // lower 3bits of addr for ldxa/raw etc
+wire [1:0]   lsu_byp_misc_sz_m ;     // size for ldxa/raw etc
+
+//==========================================================
+//RESET, CLK
+//==========================================================     
+   wire       reset;
+
+//   assign     reset = ~rst_l;
+   wire       dbb_reset_l;
+   wire       clk;
+   
+    dffrl_async rstff(.din (grst_l),
+                        .q   (dbb_reset_l),
+                        .clk (clk), .se(se), .si(), .so(),
+                        .rst_l (arst_l));
+
+   assign  reset  =  ~dbb_reset_l;
+   assign dctl_rst_l = dbb_reset_l;
+   assign clk = rclk;
+
+wire      lsu_bist_wvld_e ;           // bist writes to cache
+wire  		lsu_bist_rvld_e ;	          // bist reads dcache
+
+dff_s #(2) mbist_stge (
+   .din ({mbist_dcache_write, mbist_dcache_read}),
+   .q   ({lsu_bist_wvld_e,    lsu_bist_rvld_e  }),
+   .clk (clk),
+   .se  (se),       .si (),          .so ()
+);   
+  
+//===========================================================
+//from lsu_excpctl
+//wire		lsu_flush_pipe_w ;	// flush - local to lsu
+
+//   assign lsu_flush_pipe_w = dctl_flush_pipe_w;
+   
+//===========================================================
+//   
+   assign     lsu_ldst_inst_vld_e = ld_inst_vld_e | st_inst_vld_e;
+
+//wire    lsu_l2fill_bendian_g;
+
+wire memref_e;
+   
+dff_s #(1) stge_ad_e (
+  .din (ifu_lsu_memref_d),
+  .q   (memref_e),
+  .clk (clk),
+  .se     (se),       .si (),          .so ()
+);   
+
+//=================================================================================================
+// SHADOW SCAN
+//=================================================================================================
+
+wire	sscan_data_13, sscan_data_14 ;
+// stb status - this monitors the stb state
+assign sscan_data_13 =
+  ctu_sscan_tid[0] & lsu_stb_empty[0] |
+  ctu_sscan_tid[1] & lsu_stb_empty[1] |
+  ctu_sscan_tid[2] & lsu_stb_empty[2] |
+  ctu_sscan_tid[3] & lsu_stb_empty[3] ;
+   
+     
+// Monitors outstanding long-latency asi transactions - hangs thread. Doesn't cover all asi.
+assign  sscan_data_14 =
+                ctu_sscan_tid[0] & (tlb_ld_inst0 | tlb_st_inst0) |
+               	ctu_sscan_tid[1] & (tlb_ld_inst1 | tlb_st_inst1) |
+             		ctu_sscan_tid[2] & (tlb_ld_inst2 | tlb_st_inst2) | 
+               	ctu_sscan_tid[3] & (tlb_ld_inst3 | tlb_st_inst3) ;
+
+   
+dff_s #(2) stg_d1 (
+  .din ({sscan_data_14,sscan_data_13}),
+  .q   (lsu_sscan_data[14:13]),
+  .clk (clk),
+  .se     (se),       .si (),          .so ()
+);   
+
+//=========================================================================================
+//  INST_VLD_W GENERATION
+//=========================================================================================
+   
+wire    flush_w_inst_vld_m ;
+wire    lsu_inst_vld_w ;
+assign  flush_w_inst_vld_m =
+        ifu_tlu_inst_vld_m &
+	~(dctl_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
+
+dff_s  stgw_ivld (
+        .din    (flush_w_inst_vld_m),
+        .q      (lsu_inst_vld_w),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+// Specifically for qctl2. Does not include flush-pipe, but does include ifu's flush.
+wire	ld_vld ;
+
+   wire ifu_lsu_flush_w;
+
+   wire ifu_tlu_flush_fd_w_q, ifu_tlu_flush_fd2_w_q, ifu_tlu_flush_fd3_w_q;
+   
+dff_s #(4) ifu_tlu_flush_stgw (
+        .din    ({ifu_tlu_flush_m,ifu_tlu_flush_m,     ifu_tlu_flush_m,      ifu_tlu_flush_m}     ),
+        .q      ({ifu_lsu_flush_w,ifu_tlu_flush_fd_w_q,ifu_tlu_flush_fd2_w_q,ifu_tlu_flush_fd3_w_q}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+bw_u1_buf_30x UZfix_ifu_tlu_flush_fd_w  ( .a(ifu_tlu_flush_fd_w_q),  .z(ifu_tlu_flush_fd_w)  );
+bw_u1_buf_30x UZfix_ifu_tlu_flush_fd2_w ( .a(ifu_tlu_flush_fd2_w_q), .z(ifu_tlu_flush_fd2_w) );
+bw_u1_buf_30x UZfix_ifu_tlu_flush_fd3_w ( .a(ifu_tlu_flush_fd3_w_q), .z(ifu_tlu_flush_fd3_w) );
+   
+assign	ld_vld = ld_inst_vld_unflushed & lsu_inst_vld_w & ~ifu_lsu_flush_w ;
+wire	ld_vld_w_flush ;
+assign	ld_vld_w_flush = ld_vld & ~dctl_flush_pipe_w ;
+assign	lsu_ld_inst_vld_g[0] = ld_vld_w_flush & thread0_g ;
+assign	lsu_ld_inst_vld_g[1] = ld_vld_w_flush & thread1_g ;
+assign	lsu_ld_inst_vld_g[2] = ld_vld_w_flush & thread2_g ;
+assign	lsu_ld_inst_vld_g[3] = ld_vld_w_flush & thread3_g ;
+
+//=========================================================================================
+//  TLB Control 
+//=========================================================================================
+
+wire	alt_space_e ;
+dff_s #(1) aspace_e (
+        .din    (ifu_lsu_alt_space_d),
+        .q      (alt_space_e),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//Atomics require translation.
+assign tlb_ldst_cam_vld = 
+  memref_e & 
+    ~dtlb_bypass_e & ~(asi_internal_e & alt_space_e)  ;
+
+// in hyper-lite mode, assumption is that real translation is not supported -
+// a miss in tlb with real-translation enabled would result in real-address
+// translation miss. This would be purely accidental on software's part.
+//wire	dtlb_real_byp_e ;
+//assign	dtlb_real_byp_e = hpstate_en_e & ~hpv_priv_e ;
+// In hyper-lite mode, no concept of real xslation.
+assign	lsu_dtlb_cam_real_e =
+	// lsu-ctl based RA->PA 
+  ( lsuctl_dtlb_byp_e & ~hpv_priv_e & hpstate_en_e) |
+	// means RA->PA if used by hypervisor.
+  ( tlb_byp_asi_e & hpstate_en_e & altspace_ldst_e) ;  
+  //( tlb_byp_asi_e & dtlb_real_byp_e & altspace_ldst_e) ;  
+
+assign  demap_thread0 = ~tlb_demap_thrid[1] & ~tlb_demap_thrid[0] ;
+assign  demap_thread1 = ~tlb_demap_thrid[1] &  tlb_demap_thrid[0] ;
+assign  demap_thread2 =  tlb_demap_thrid[1] & ~tlb_demap_thrid[0] ;
+assign  demap_thread3 =  tlb_demap_thrid[1] &  tlb_demap_thrid[0] ;
+
+// demap access and regular ldst access to tlb are assumed to
+// be mutex.
+assign thread0_ctxt =   ( demap_thread0 & tlb_demap_vld) | 
+      (~tlb_demap_vld & thread0_e) ;
+      //(thread0_e & memref_e) ;
+assign thread1_ctxt =   ( demap_thread1 & tlb_demap_vld) | 
+      (~tlb_demap_vld & thread1_e) ;
+      //(thread1_e & memref_e) ;
+assign thread2_ctxt =   ( demap_thread2 & tlb_demap_vld) | 
+      (~tlb_demap_vld & thread2_e) ;
+      //(thread2_e & memref_e) ;
+assign thread3_ctxt =   ( demap_thread3 & tlb_demap_vld) | 
+      (~tlb_demap_vld & thread3_e) ;
+      //(thread3_e & memref_e) ;
+
+assign  altspace_ldst_e   = memref_e &  alt_space_e ;
+assign  non_altspace_ldst_e = memref_e & ~alt_space_e ;
+
+dff_s #(2) aspace_stgm (
+        .din    ({altspace_ldst_e,non_altspace_ldst_e}),
+        .q      ({altspace_ldst_m,non_altspace_ldst_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s #(2) aspace_stgg (
+        .din    ({altspace_ldst_m,non_altspace_ldst_m}),
+        .q      ({altspace_ldst_g,non_altspace_ldst_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	[3:0]	tl_zero_d1 ;
+dff_s #(4) tlz_stgd1 (
+        .din    (tlu_lsu_tl_zero[3:0]),
+        .q      (tl_zero_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+mux4ds  #(1) trap_level_zero_mux (
+        .in0    (tl_zero_d1[0]),
+        .in1    (tl_zero_d1[1]),
+        .in2    (tl_zero_d1[2]),
+        .in3    (tl_zero_d1[3]),
+        .sel0   (thread0_e),  
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),  
+        .sel3   (thread3_e),
+        .dout   (thread_tl_zero)
+);
+
+wire	thread_tl_zero_m ;
+dff_s #(1) ttlz_stgm (
+        .din    (thread_tl_zero),
+        .q      (thread_tl_zero_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+assign	lsu_nonalt_nucl_access_m = non_altspace_ldst_m & ~thread_tl_zero_m ;
+
+// Note : autodemap will need to be or'ed into tlb_demap_vld !!!
+// use of tlu_lsu_tl_zero needs to be threaded.
+assign  thread_pctxt =  ( tlb_demap_pctxt     &  tlb_demap_vld)      |  // demap
+      ( non_altspace_ldst_e &  thread_tl_zero) |  // ldst. non-alt- space
+      ( altspace_ldst_e     &  primary_asi_e)      |  // ldst. alt_space
+      (~(memref_e | tlb_demap_vld)) ; // default for pipe
+      //(~(ld_inst_vld_e | st_inst_vld_e | tlb_demap_vld)) ; // default for pipe
+assign  thread_sctxt =  ( tlb_demap_sctxt     &  tlb_demap_vld)      |  // demap
+      ( altspace_ldst_e     &  secondary_asi_e) ; // ldst. alt_space
+assign  thread_nctxt =  ( tlb_demap_nctxt     &  tlb_demap_vld)      |  // demap
+      ( non_altspace_ldst_e & ~thread_tl_zero) |  // ldst. non-alt- space
+      ( altspace_ldst_e     &  nucleus_asi_e) ; // ldst. alt_space
+assign  thread_actxt =  tlb_demap_actxt & tlb_demap_vld ; 
+
+//tmp
+   wire thread_default;
+   assign thread_default = ~(thread_pctxt | thread_sctxt | thread_actxt);
+   
+wire	[3:0]	pstate_am ;
+dff_s #(4) psam_stgd1 (
+        .din    (tlu_lsu_pstate_am[3:0]),
+        .q      (pstate_am[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//assign  lsu_dtlb_addr_mask_l_e = 
+//  thread0_e ? ~pstate_am[0] :
+//    thread1_e ? ~pstate_am[1] :
+//      thread2_e ? ~pstate_am[2] :
+//          ~pstate_am[3] ;
+
+mux4ds  #(1) pstate_am_mux (
+        .in0    (~pstate_am[0]),
+        .in1    (~pstate_am[1]),
+        .in2    (~pstate_am[2]),
+        .in3    (~pstate_am[3]),
+        .sel0   (thread0_e),  
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),  
+        .sel3   (thread3_e),
+        .dout   (lsu_dtlb_addr_mask_l_e)
+);
+   
+//=========================================================================================
+//  TLB RD/WR/DMP HANDLING
+//=========================================================================================
+
+// To speed up the tlb miss handler, wr_vld will now be generated based on
+// admp occurence. lsu_dtlb_wr_vld_g is to be ignored. The following paths
+// can be improved
+// admp->write initiation (+2)
+// write->completion initiation (+3)
+
+wire admp_write ;
+assign  admp_write = lsu_dtlb_dmp_vld_e & tlb_demap_actxt ;
+wire admp_rst ;
+assign  admp_rst = reset | lsu_dtlb_wr_vld_e ;
+
+wire    local_dtlb_wr_vld_g ;
+dffre_s #(1) twr_stgd1 (
+        .din    (admp_write),
+        .q      (local_dtlb_wr_vld_g),
+        .clk    (clk),
+        .en     (admp_write),   .rst    (admp_rst),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+wire    dtlb_wr_init_d1,dtlb_wr_init_d2,dtlb_wr_init_d3 ;
+// Handshake between tlu and lsu needs to be fine-tuned !!!
+assign  lsu_dtlb_wr_vld_e =  local_dtlb_wr_vld_g & ~(memref_e | dtlb_wr_init_d1 | dtlb_wr_init_d2) ;
+//assign  lsu_dtlb_wr_vld_e =  tlu_dtlb_wr_vld_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+assign  lsu_dtlb_tag_rd_e =  tlu_dtlb_tag_rd_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+assign  lsu_dtlb_data_rd_e =  tlu_dtlb_data_rd_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+assign  lsu_dtlb_dmp_vld_e =  tlu_dtlb_dmp_vld_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+
+   wire lsu_dtlb_dmp_all_e_tmp;
+   
+assign  lsu_dtlb_dmp_all_e_tmp =  tlu_dtlb_dmp_all_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+ bw_u1_buf_5x UZsize_lsu_dtlb_dmp_all_e (.a(lsu_dtlb_dmp_all_e_tmp), .z(lsu_dtlb_dmp_all_e));
+   
+assign  lsu_dtlb_rwindex_vld_e =  tlu_dtlb_rw_index_vld_g & ~(memref_e | dtlb_wr_init_d1 | dtlb_wr_init_d2) ;
+//assign  lsu_dtlb_rwindex_vld_e =  tlu_dtlb_rw_index_vld_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+// Can remove reset once invalidate asi in place !!!
+// assign lsu_dtlb_invalid_all_w2 = reset | tlu_dtlb_invalidate_all_g ;
+
+assign  tlb_demap_vld = lsu_dtlb_dmp_vld_e ;
+
+// Switchout for threads. Force threads to swo if tlb operation does not occur for over 5 cycles.
+
+dff_s #(5) tlbop_stgd1 (
+        //.din    ({tlu_dtlb_wr_vld_g,tlu_dtlb_tag_rd_g,tlu_dtlb_data_rd_g,tlu_dtlb_dmp_vld_g,
+        .din    ({local_dtlb_wr_vld_g,tlu_dtlb_tag_rd_g,tlu_dtlb_data_rd_g,tlu_dtlb_dmp_vld_g,
+    tlu_dtlb_invalidate_all_g}),
+        .q      ({dtlb_wr_vld_d1,dtlb_tag_rd_d1,dtlb_data_rd_d1,dtlb_dmp_vld_d1,
+    dtlb_inv_all_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Detect event.
+//bug6193 / ECO bug6511   
+assign  ldst_in_pipe = memref_e ;
+assign tlbop_init = 
+  ((~dtlb_wr_vld_d1 & local_dtlb_wr_vld_g)  |
+  (~dtlb_tag_rd_d1  & tlu_dtlb_tag_rd_g)   |
+  (~dtlb_data_rd_d1 & tlu_dtlb_data_rd_g) |
+  (~dtlb_inv_all_d1 & tlu_dtlb_invalidate_all_g) |
+  (~dtlb_dmp_vld_d1 & tlu_dtlb_dmp_vld_g)) & ldst_in_pipe ;
+
+dff_s #(1) tlbinit_stgd1 ( .din    (tlbop_init), .q      (tlbop_init_d1),
+        .clk    (clk), .se     (se),       .si (),          .so ());
+dff_s #(1) tlbinit_stgd2 ( .din    (tlbop_init_d1 &  ldst_in_pipe), .q      (tlbop_init_d2),
+        .clk    (clk), .se     (se),       .si (),          .so ());
+dff_s #(1) tlbinit_stgd3 ( .din    (tlbop_init_d2 &  ldst_in_pipe), .q      (tlbop_init_d3),
+        .clk    (clk), .se     (se),       .si (),          .so ());
+dff_s #(1) tlbinit_stgd4 ( .din    (tlbop_init_d3 &  ldst_in_pipe), .q      (tlbop_init_d4),
+        .clk    (clk), .se     (se),       .si (),          .so ());
+dff_s #(1) tlbinit_stgd5 ( .din    (tlbop_init_d4 &  ldst_in_pipe), .q      (tlbop_init_d5),
+        .clk    (clk), .se     (se),       .si (),          .so ());
+
+
+assign  lsu_tlbop_force_swo = tlbop_init_d5 & ldst_in_pipe ;
+
+//assign  dtlb_done =   lsu_dtlb_wr_vld_e  | lsu_dtlb_tag_rd_e | 
+assign  dtlb_done =   	lsu_dtlb_tag_rd_e | lsu_dtlb_data_rd_e | 
+			lsu_dtlb_dmp_vld_e | dtlb_inv_all_e ;
+
+assign  dtlb_inv_all_e = tlu_dtlb_invalidate_all_g & ~(memref_e | dtlb_done_d1 | dtlb_done_d2) ;
+
+dff_s #(3) dn_stgd1 (
+        .din    ({dtlb_done,lsu_dtlb_tag_rd_e,lsu_dtlb_data_rd_e}),
+        .q      ({dtlb_done_d1,tag_rd_vld_m,data_rd_vld_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	dtlb_inv_all_din ;
+assign	dtlb_inv_all_din = sehold ? dtlb_inv_all_m : dtlb_inv_all_e ;
+
+dff_s #(1) dinv_stgd1 (
+        .din    (dtlb_inv_all_din),
+        .q      (dtlb_inv_all_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  lsu_dtlb_invalid_all_m = dtlb_inv_all_m ;
+// added by sureshT
+assign  lsu_dtlb_invalid_all_l_m = ~lsu_dtlb_invalid_all_m;
+
+dff_s #(3) dn_stgd2 (
+        .din    ({dtlb_done_d1,tag_rd_vld_m,data_rd_vld_m}),
+        .q      ({dtlb_done_d2,tag_rd_vld_g,data_rd_vld_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  lsu_tlb_data_rd_vld_g = data_rd_vld_g ;
+assign  lsu_tlb_tag_rd_vld_g  = tag_rd_vld_g ;
+//assign  lsu_tlb_st_vld_g = ~lsu_tlb_tag_rd_vld_g & ~lsu_tlb_data_rd_vld_g ;
+   
+// The handshake will have to change !!!
+assign  lsu_tlu_dtlb_done = 
+	dtlb_done_d2 |		// rest
+	dtlb_wr_init_d3 ;	// write
+
+// Note : if mx_sel bit is high, then it selects va instead of pa.
+
+
+   
+//=========================================================================================
+//  State/ASI Registers.
+//=========================================================================================
+
+dff_s #(8) stctl_stg_e (
+        .din    ({ifu_tlu_sraddr_d[6:0],ifu_tlu_wsr_inst_d}),
+        .q      ({lsu_sraddr_e[6:0],    lsu_wsr_inst_e}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign lsu_tlu_wsr_inst_e = lsu_wsr_inst_e;
+
+   wire asi_state_wr_en_e, asi_state_wr_en_m;
+   
+assign  asi_state_wr_en_e =   
+	      ~lsu_sraddr_e[6] &  // 1=hypervisor
+	      ~lsu_sraddr_e[5] &  // =0 for state reg. 
+        ~lsu_sraddr_e[4] & ~lsu_sraddr_e[3] & 
+        ~lsu_sraddr_e[2] &  lsu_sraddr_e[1] & 
+         lsu_sraddr_e[0] & 
+         lsu_wsr_inst_e ; // write
+   
+dff_s #(2) stctl_stg_m (
+        .din    ({asi_state_wr_en_e, alt_space_e}),
+        .q      ({asi_state_wr_en_m, lsu_alt_space_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2) stctl_stg_w (
+        .din    ({asi_state_wr_en_m, lsu_alt_space_m}),
+        .q      ({asi_state_wr_en,   lsu_alt_space_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+//assign  asi_state_wr_en =   
+//	~lsu_sraddr_w[6] &  // 1=hypervisor
+//	~lsu_sraddr_w[5] &  // =0 for state reg. 
+//        ~lsu_sraddr_w[4] & ~lsu_sraddr_w[3] & 
+//        ~lsu_sraddr_w[2] &  lsu_sraddr_w[1] & 
+//         lsu_sraddr_w[0] &  
+//         lsu_wsr_inst_w ; // write
+
+
+dff_s #(3) asi_stgw (
+        .din    ({tlu_lsu_asi_update_m,tlu_lsu_tid_m[1:0]}),
+        .q      ({tlu_lsu_asi_update_g,tlu_lsu_tid_g[1:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+
+assign  tsa_update_asi0 =  ~tlu_lsu_tid_g[1] & ~tlu_lsu_tid_g[0] & tlu_lsu_asi_update_g ;
+assign  tsa_update_asi1 =  ~tlu_lsu_tid_g[1] &  tlu_lsu_tid_g[0] & tlu_lsu_asi_update_g ;
+assign  tsa_update_asi2 =   tlu_lsu_tid_g[1] & ~tlu_lsu_tid_g[0] & tlu_lsu_asi_update_g ;
+assign  tsa_update_asi3 =   tlu_lsu_tid_g[1] &  tlu_lsu_tid_g[0] & tlu_lsu_asi_update_g ;
+
+assign  asi_state_wr_thrd[0] = 
+((asi_state_wr_en & thread0_g) | tsa_update_asi0) & lsu_inst_vld_w & ~dctl_early_flush_w ;
+//((asi_state_wr_en & thread0_g) | tsa_update_asi0) & lsu_inst_vld_w & ~lsu_flush_pipe_w ;
+assign  asi_state_wr_thrd[1] = 
+((asi_state_wr_en & thread1_g) | tsa_update_asi1) & lsu_inst_vld_w & ~dctl_early_flush_w ;
+assign  asi_state_wr_thrd[2] = 
+((asi_state_wr_en & thread2_g) | tsa_update_asi2) & lsu_inst_vld_w & ~dctl_early_flush_w ;
+assign  asi_state_wr_thrd[3] = 
+((asi_state_wr_en & thread3_g) | tsa_update_asi3) & lsu_inst_vld_w & ~dctl_early_flush_w ;
+
+// dc diagnstc will swo on write.							
+assign  sta_internal_e = asi_internal_e & st_inst_vld_e & alt_space_e ;
+// dc diagnstc will not swo on read.							
+assign  lda_internal_e = asi_internal_e & ~dc_diagnstc_asi_e & ld_inst_vld_e & alt_space_e ;
+
+assign  ldsta_internal_e = sta_internal_e | lda_internal_e ;
+
+// MMU_ASI
+// Do no switch out for lds. lds switched out thru ldst_miss.
+// qualification must be removed.
+assign  lsu_ifu_ldsta_internal_e = asi_internal_e ;
+//assign  lsu_ifu_ldsta_internal_e = asi_internal_e & ~ld_inst_vld_e  ;
+
+
+dff_s #(2)  stai_stgm (
+        .din    ({sta_internal_e,lda_internal_e}),
+        .q      ({sta_internal_m,lda_internal_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire stxa_internal_m;
+   assign stxa_internal_m = sta_internal_m & ~(dtagv_diagnstc_asi_m | dc_diagnstc_asi_m);
+   
+dff_s #(2)  stai_stgg (
+        .din    ({stxa_internal_m, lda_internal_m}),
+        .q      ({stxa_internal,   ldxa_internal}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire [7:0] ldst_va_g;
+   
+   assign ldst_va_g[7:0] = lsu_ldst_va_g[7:0];
+
+   wire	[7:0]	lsu_asi_state ;
+dff_s #(8)  asistate_stgg (
+        .din    (lsu_dctl_asi_state_m[7:0]),
+        .q      (lsu_asi_state[7:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+assign  pctxt_va_vld = (ldst_va_g[7:0] == 8'h08) ;
+assign  pctxt_state_en =  (lsu_asi_state[7:0] == 8'h21) & pctxt_va_vld &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+
+
+//assign  pctxt_state_wr_thrd[0] = pctxt_state_en & st_inst_vld_g & thread0_g ;
+assign  pctxt_state_wr_thrd[0] = pctxt_state_en & asi_st_vld_g & thread0_g ;
+assign  pctxt_state_wr_thrd[1] = pctxt_state_en & asi_st_vld_g & thread1_g ;
+assign  pctxt_state_wr_thrd[2] = pctxt_state_en & asi_st_vld_g & thread2_g ;
+assign  pctxt_state_wr_thrd[3] = pctxt_state_en & asi_st_vld_g & thread3_g ;
+
+//assign  pctxt_state_rd_en[0] = pctxt_state_en & ld_inst_vld_g & thread0_g ;
+
+//assign  pctxt_state_rd_en[0] = pctxt_state_en & asi_ld_vld_g & thread0_g ;
+//assign  pctxt_state_rd_en[1] = pctxt_state_en & asi_ld_vld_g & thread1_g ;
+//assign  pctxt_state_rd_en[2] = pctxt_state_en & asi_ld_vld_g & thread2_g ;
+//assign  pctxt_state_rd_en[3] = pctxt_state_en & asi_ld_vld_g & thread3_g ;
+
+
+assign  sctxt_va_vld = (ldst_va_g[7:0] == 8'h10) ;
+assign  sctxt_state_en =  (lsu_asi_state[7:0] == 8'h21) & sctxt_va_vld &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+
+assign  pscxt_ldxa_illgl_va = 
+	(lsu_asi_state[7:0] == 8'h21) & ~(pctxt_va_vld | sctxt_va_vld) &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+
+//assign  sctxt_state_wr_thrd[0] = sctxt_state_en & st_inst_vld_g & thread0_g ;
+assign  sctxt_state_wr_thrd[0] = sctxt_state_en & asi_st_vld_g & thread0_g ;
+assign  sctxt_state_wr_thrd[1] = sctxt_state_en & asi_st_vld_g & thread1_g ;
+assign  sctxt_state_wr_thrd[2] = sctxt_state_en & asi_st_vld_g & thread2_g ;
+assign  sctxt_state_wr_thrd[3] = sctxt_state_en & asi_st_vld_g & thread3_g ;
+
+//assign  sctxt_state_rd_en[0]   = sctxt_state_en & ld_inst_vld_g & thread0_g ;
+
+//assign  sctxt_state_rd_en[0]   = sctxt_state_en & asi_ld_vld_g & thread0_g ;
+//assign  sctxt_state_rd_en[1]   = sctxt_state_en & asi_ld_vld_g & thread1_g ;
+//assign  sctxt_state_rd_en[2]   = sctxt_state_en & asi_ld_vld_g & thread2_g ;
+//assign  sctxt_state_rd_en[3]   = sctxt_state_en & asi_ld_vld_g & thread3_g ;
+   
+
+// LSU CONTROL REGISTER. ASI=0x45,VA=0x00.
+// b0 - i$ enable.
+// b1 - d$ enable. 
+// b2 - immu enable.
+// b3 - dmmu enable.
+
+assign  lsuctl_va_vld = (ldst_va_g[7:0] == 8'h00);
+assign  lsu_ctl_state_en = (lsu_asi_state[7:0] == 8'h45) & lsuctl_va_vld &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+assign  lsuctl_illgl_va = (lsu_asi_state[7:0] == 8'h45) & ~lsuctl_va_vld &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+
+wire  [3:0] lctl_rst ;
+   
+//assign  lsu_ctl_state_wr_en[0] = (lsu_ctl_state_en & st_inst_vld_g & thread0_g) | lctl_rst[0] ;
+assign  lsu_ctl_state_wr_en[0] = (lsu_ctl_state_en & asi_st_vld_g & thread0_g) | lctl_rst[0] ;
+assign  lsu_ctl_state_wr_en[1] = (lsu_ctl_state_en & asi_st_vld_g & thread1_g) | lctl_rst[1] ;
+assign  lsu_ctl_state_wr_en[2] = (lsu_ctl_state_en & asi_st_vld_g & thread2_g) | lctl_rst[2];
+assign  lsu_ctl_state_wr_en[3] = (lsu_ctl_state_en & asi_st_vld_g & thread3_g) | lctl_rst[3];
+
+//assign  lsu_ctl_state_rd_en[0] = lsu_ctl_state_en & ld_inst_vld_g & thread0_g ;
+//assign  lsu_ctl_state_rd_en[0] = lsu_ctl_state_en & asi_ld_vld_g & thread0_g ;
+//assign  lsu_ctl_state_rd_en[1] = lsu_ctl_state_en & asi_ld_vld_g & thread1_g ;
+//assign  lsu_ctl_state_rd_en[2] = lsu_ctl_state_en & asi_ld_vld_g & thread2_g ;
+//assign  lsu_ctl_state_rd_en[3] = lsu_ctl_state_en & asi_ld_vld_g & thread3_g ;
+
+   
+
+wire	[3:0]	redmode_rst ;
+//dff #(4) rdmode_stgd1 (
+//        .din    ({tlu_lsu_redmode_rst[3:0]}),
+//        .q      ({redmode_rst[3:0]}),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );  
+
+   assign   redmode_rst[3:0] =  tlu_lsu_redmode_rst_d1[3:0];
+ 
+assign  lctl_rst[0] = redmode_rst[0] | reset ;
+assign  lctl_rst[1] = redmode_rst[1] | reset ;
+assign  lctl_rst[2] = redmode_rst[2] | reset ;
+assign  lctl_rst[3] = redmode_rst[3] | reset ;
+
+assign  lsuctl_ctlbits_wr_en[0] = lsu_ctl_state_wr_en[0] | dfture_tap_wr_en[0] | lctl_rst[0]; 
+assign  lsuctl_ctlbits_wr_en[1] = lsu_ctl_state_wr_en[1] | dfture_tap_wr_en[1] | lctl_rst[1]; 
+assign  lsuctl_ctlbits_wr_en[2] = lsu_ctl_state_wr_en[2] | dfture_tap_wr_en[2] | lctl_rst[2]; 
+assign  lsuctl_ctlbits_wr_en[3] = lsu_ctl_state_wr_en[3] | dfture_tap_wr_en[3] | lctl_rst[3]; 
+
+   assign dfture_tap_wr_mx_sel = | dfture_tap_wr_en[3:0];
+   
+// Could enhance bypass/enable conditions by adding all asi conditions.  
+wire   [5:0] lsu_ctl_reg0;
+wire   [5:0] lsu_ctl_reg1;
+wire   [5:0] lsu_ctl_reg2;
+wire   [5:0] lsu_ctl_reg3;
+
+   assign lsu_ctl_reg0[5:0] = lsu_dp_ctl_reg0[5:0];
+   assign lsu_ctl_reg1[5:0] = lsu_dp_ctl_reg1[5:0];
+   assign lsu_ctl_reg2[5:0] = lsu_dp_ctl_reg2[5:0];
+   assign lsu_ctl_reg3[5:0] = lsu_dp_ctl_reg3[5:0];
+
+wire lsu_dcache_enable;
+assign lsu_dcache_enable = 
+  ((lsu_ctl_reg0[1] & thread0_e) | (lsu_ctl_reg1[1] & thread1_e)  | 
+   (lsu_ctl_reg2[1] & thread2_e) | (lsu_ctl_reg3[1] & thread3_e)) ;
+
+assign	lsuctl_dtlb_byp_e =
+  (~lsu_ctl_reg0[3] & thread0_e) | (~lsu_ctl_reg1[3] & thread1_e) | 
+  (~lsu_ctl_reg2[3] & thread2_e) | (~lsu_ctl_reg3[3] & thread3_e) ;
+assign dtlb_bypass_e = 
+  (lsuctl_dtlb_byp_e & ~hpstate_en_e) | // hpv enabled - byp is RA->PA for supv.
+  ( tlb_byp_asi_e & ~hpstate_en_e & altspace_ldst_e) |  // altspace tlb bypass - non-hpv
+    ((hpv_priv_e & hpstate_en_e) & ~(alt_space_e & (as_if_user_asi_e | tlb_byp_asi_e)));
+	// hpv enabled VA->PA 
+
+assign  lsu_dtlb_bypass_e = dtlb_bypass_e ; 
+wire  dcache_enable_m,dcache_enable_g ;
+dff_s #(2) dbyp_stgm (
+        .din    ({dtlb_bypass_e,lsu_dcache_enable}),
+        .q      ({dtlb_bypass_m,dcache_enable_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2) dbyp_stgg (
+        .din    ({dtlb_bypass_m,dcache_enable_m}),
+        .q      ({lsu_dtlb_bypass_g,dcache_enable_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+   wire lsu_ctl_reg0_bf_b0, lsu_ctl_reg1_bf_b0, lsu_ctl_reg2_bf_b0, lsu_ctl_reg3_bf_b0;
+   wire lsu_ctl_reg0_bf_b2, lsu_ctl_reg1_bf_b2, lsu_ctl_reg2_bf_b2, lsu_ctl_reg3_bf_b2;
+   
+bw_u1_buf_1x UZsize_ctl_reg0_b0  ( .a(lsu_ctl_reg0[0]),  .z(lsu_ctl_reg0_bf_b0)  );
+bw_u1_buf_1x UZsize_ctl_reg0_b2  ( .a(lsu_ctl_reg0[2]),  .z(lsu_ctl_reg0_bf_b2)  );
+bw_u1_buf_1x UZsize_ctl_reg1_b0  ( .a(lsu_ctl_reg1[0]),  .z(lsu_ctl_reg1_bf_b0)  );
+bw_u1_buf_1x UZsize_ctl_reg1_b2  ( .a(lsu_ctl_reg1[2]),  .z(lsu_ctl_reg1_bf_b2)  );
+bw_u1_buf_1x UZsize_ctl_reg2_b0  ( .a(lsu_ctl_reg2[0]),  .z(lsu_ctl_reg2_bf_b0)  );
+bw_u1_buf_1x UZsize_ctl_reg2_b2  ( .a(lsu_ctl_reg2[2]),  .z(lsu_ctl_reg2_bf_b2)  );
+bw_u1_buf_1x UZsize_ctl_reg3_b0  ( .a(lsu_ctl_reg3[0]),  .z(lsu_ctl_reg3_bf_b0)  );
+bw_u1_buf_1x UZsize_ctl_reg3_b2  ( .a(lsu_ctl_reg3[2]),  .z(lsu_ctl_reg3_bf_b2)  );
+   
+assign lsu_ifu_icache_en[3:0] = 
+  {lsu_ctl_reg3_bf_b0,lsu_ctl_reg2_bf_b0,lsu_ctl_reg1_bf_b0,lsu_ctl_reg0_bf_b0} & ~tlu_lsu_redmode[3:0] ;
+assign lsu_ifu_itlb_en[3:0] = 
+  {lsu_ctl_reg3_bf_b2,lsu_ctl_reg2_bf_b2,lsu_ctl_reg1_bf_b2,lsu_ctl_reg0_bf_b2} & ~tlu_lsu_redmode[3:0] ;
+
+//=========================================================================================
+//  DCACHE Access thru IOBrdge
+//=========================================================================================
+
+wire	iob_fwdpkt_vld ;
+dff_s  iobvld_stg (
+        .din    (lsu_iobrdge_fwd_pkt_vld),
+        .q      (iob_fwdpkt_vld),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+wire	dcache_iob_wr_e, dcache_iob_rd_e ;
+wire	dcache_iob_wr, dcache_iob_rd ;
+assign dcache_iob_wr =
+~lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[6] & lsu_iobrdge_fwd_pkt_vld ;
+assign dcache_iob_rd =
+ lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[6] & lsu_iobrdge_fwd_pkt_vld ;
+
+dff_s #(2) dcrw_stge (
+        .din    ({dcache_iob_wr,dcache_iob_rd}),
+        .q      ({dcache_iob_wr_e,dcache_iob_rd_e}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign	lsu_dc_iob_access_e = dcache_iob_wr_e | dcache_iob_rd_e ;
+
+//=========================================================================================
+//  Miscellaneous ASI
+//=========================================================================================
+
+// Defeature effects the asi lsu_ctl_reg.
+// Margin ASI
+// Diag  ASI - No TAP access
+// BIST ASI   
+
+assign  tap_thread[0] = ~lsu_iobrdge_tap_rq_type_b1_b0[1] & ~lsu_iobrdge_tap_rq_type_b1_b0[0] ;
+assign  tap_thread[1] = ~lsu_iobrdge_tap_rq_type_b1_b0[1] &  lsu_iobrdge_tap_rq_type_b1_b0[0] ;
+assign  tap_thread[2] =  lsu_iobrdge_tap_rq_type_b1_b0[1] & ~lsu_iobrdge_tap_rq_type_b1_b0[0] ;
+assign  tap_thread[3] =  lsu_iobrdge_tap_rq_type_b1_b0[1] &  lsu_iobrdge_tap_rq_type_b1_b0[0] ;
+
+wire bist_tap_rd,bist_tap_wr ;
+assign  bist_tap_rd =  
+ lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[5] & iob_fwdpkt_vld ;
+assign  bist_tap_wr = 
+~lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[5] & iob_fwdpkt_vld ;
+
+/*   
+dff_s #(2) bstrw_stge (
+        .din    ({bist_tap_rd,bist_tap_wr}),
+        .q      ({bist_tap_rd_en,bist_tap_wr_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+*/
+dff_s #(1) bstrw_stge (
+        .din    ({bist_tap_wr}),
+        .q      ({bist_tap_wr_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+   
+wire mrgn_tap_rd,mrgn_tap_wr ;
+assign  mrgn_tap_rd =  
+lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[4] & iob_fwdpkt_vld ;
+assign  mrgn_tap_wr = 
+~lsu_iobrdge_tap_rq_type_b8[8] & lsu_iobrdge_tap_rq_type_b6_b3[4] & iob_fwdpkt_vld ;
+/*
+dff_s #(2) mrgnrw_stge (
+        .din    ({mrgn_tap_rd,mrgn_tap_wr}),
+        .q      ({mrgn_tap_rd_en,mrgn_tap_wr_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+*/
+dff_s #(1) mrgnrw_stge (
+        .din    ({mrgn_tap_wr}),
+        .q      ({mrgn_tap_wr_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+   
+wire  dfture_access_vld ;
+wire	[3:0]	dfture_tap_rd,dfture_tap_wr ;
+assign  dfture_access_vld = lsu_iobrdge_tap_rq_type_b6_b3[3] & iob_fwdpkt_vld ;
+
+assign  dfture_tap_rd[0] =  
+  lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[0] ;
+assign  dfture_tap_rd[1] =  
+  lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[1] ;
+assign  dfture_tap_rd[2] =  
+  lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[2] ;
+assign  dfture_tap_rd[3] =  
+  lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[3] ;
+
+   wire dfture_tap_rd_default;
+   assign dfture_tap_rd_default = ~| dfture_tap_rd[2:0];
+   
+assign  dfture_tap_wr[0] = 
+  ~lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[0] ;
+assign  dfture_tap_wr[1] = 
+  ~lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[1] ;
+assign  dfture_tap_wr[2] = 
+  ~lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[2] ;
+assign  dfture_tap_wr[3] = 
+  ~lsu_iobrdge_tap_rq_type_b8[8] & dfture_access_vld & tap_thread[3] ;
+
+dff_s #(8) dftrw_stge (
+        .din    ({dfture_tap_rd_default, dfture_tap_rd[2:0],dfture_tap_wr[3:0]}),
+        .q    	({dfture_tap_rd_d1[3:0],                    dfture_tap_wr_en[3:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+   
+   assign dfture_tap_rd_en [0] = dfture_tap_rd_d1[0] & ~rst_tri_en;
+   assign dfture_tap_rd_en [1] = dfture_tap_rd_d1[1] & ~rst_tri_en;
+   assign dfture_tap_rd_en [2] = dfture_tap_rd_d1[2] & ~rst_tri_en;
+   assign dfture_tap_rd_en [3] = dfture_tap_rd_d1[3] | rst_tri_en;
+   
+                                      
+// BIST_Controller ASI
+
+wire	bistctl_va_vld_m,bistctl_state_en_m;
+assign  bistctl_va_vld_m = (lsu_ldst_va_b7_b0_m[7:0] == 8'h00);
+assign  bistctl_state_en_m = (lsu_dctl_asi_state_m[7:0] == 8'h42) & bistctl_va_vld_m &
+        lsu_alt_space_m ;
+dff_s  #(2) bistdcd_stw (
+        .din    ({bistctl_va_vld_m,bistctl_state_en_m}),
+        .q    	({bistctl_va_vld,bistctl_state_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+// asi42 dealt with as a whole.
+/*assign  bistctl_illgl_va = (lsu_asi_state[7:0] == 8'h42) & ~bistctl_va_vld &
+        lsu_alt_space_g ;*/
+//assign  bistctl_rd_en = bistctl_state_en & asi_ld_vld_g ;
+assign  bistctl_wr_en = (bistctl_state_en & asi_st_vld_g) | bist_tap_wr_en ;
+//assign  bistctl_rd_en = bistctl_state_en & ld_inst_vld_g ;
+//assign  bistctl_wr_en = (bistctl_state_en & st_inst_vld_g) | bist_tap_wr_en ;
+   
+//test_stub interface. bist_tap_wr_en should exclude?
+assign  bist_ctl_reg_wr_en = bistctl_wr_en;
+   
+
+// Self-Timed Margin Control ASI
+
+wire	mrgnctl_va_vld_m,mrgnctl_state_en_m;
+assign  mrgnctl_va_vld_m = (lsu_ldst_va_b7_b0_m[7:0] == 8'h00);
+assign  mrgnctl_state_en_m = (lsu_dctl_asi_state_m[7:0] == 8'h44) & mrgnctl_va_vld_m &
+        lsu_alt_space_m ;
+dff_s  #(2) mrgndcd_stw (
+        .din    ({mrgnctl_va_vld_m,mrgnctl_state_en_m}),
+        .q    	({mrgnctl_va_vld,mrgnctl_state_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign  mrgnctl_illgl_va = (lsu_asi_state[7:0] == 8'h44) & ~mrgnctl_va_vld &
+        lsu_alt_space_g ;
+
+assign  mrgnctl_wr_en = ((mrgnctl_state_en & asi_st_vld_g) | mrgn_tap_wr_en | ~dctl_rst_l) & ~sehold; //bug 4508
+
+// LSU Diag Reg ASI
+// No access from tap.
+wire	ldiagctl_va_vld_m,ldiagctl_state_en_m;
+assign  ldiagctl_va_vld_m = (lsu_ldst_va_b7_b0_m[7:0] == 8'h10);
+assign  ldiagctl_state_en_m = (lsu_dctl_asi_state_m[7:0] == 8'h42) & ldiagctl_va_vld_m &
+        lsu_alt_space_m ;
+dff_s  #(2) ldiagdcd_stw (
+        .din    ({ldiagctl_va_vld_m,ldiagctl_state_en_m}),
+        .q    	({ldiagctl_va_vld,ldiagctl_state_en}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+// asi42 dealt with as a whole.
+/*assign  ldiagctl_illgl_va = (lsu_asi_state[7:0] == 8'h42) & ~ldiagctl_va_vld &
+        lsu_alt_space_g ;*/
+
+wire	asi42_g ;
+wire	ifu_asi42_flush_g ;
+assign	ifu_asi42_flush_g = 
+	bistctl_state_en | ldiagctl_state_en | // lsu's asi42 should not set asi queue.
+	(asi42_g & asi42_illgl_va) ; 		// illgl-va should not set asi queue.
+
+//assign  ldiagctl_rd_en = ldiagctl_state_en & asi_ld_vld_g ;
+assign  ldiagctl_wr_en = (ldiagctl_state_en & asi_st_vld_g) | reset;
+//assign  ldiagctl_rd_en = ldiagctl_state_en & ld_inst_vld_g ;
+//assign  ldiagctl_wr_en = (ldiagctl_state_en & st_inst_vld_g) | reset;
+
+wire  instmsk_va_vld ;
+assign  instmsk_va_vld = (ldst_va_g[7:0] == 8'h08);
+assign	asi42_g = (lsu_asi_state[7:0] == 8'h42) ; 
+assign  asi42_illgl_va = 
+	asi42_g &
+	~(ldiagctl_va_vld | bistctl_va_vld | instmsk_va_vld) &
+        lsu_alt_space_g ;
+
+
+
+//=========================================================================================
+//  Partition ID Register
+//=========================================================================================
+
+// ASI=58, VA=0x80, Per thread
+// The pid is to be used by tlb-cam, and writes to tlb. It is kept in the lsu
+// as it is used by the dtlb, plus changes to mmu_dp are to be kept to a minimum.
+
+// Trap if supervisor accesses hyperpriv asi - see supv_use_hyp. Could be incorrect.
+// Correct on merge to mainline.
+
+// The VA compares can probably be shortened.
+assign  pid_va_vld = (ldst_va_g[7:0] == 8'h80);
+assign  pid_state_en = (lsu_asi_state[7:0] == 8'h58) & pid_va_vld &
+        lsu_alt_space_g & lsu_inst_vld_w ; 
+//assign  pid_illgl_va = (lsu_asi_state[7:0] == 8'h58) & ~pid_va_vld &
+//        lsu_alt_space_g & lsu_inst_vld_w ; 
+
+// remove reset ??
+//assign  pid_state_wr_en[0] = (pid_state_en & st_inst_vld_g & thread0_g) | reset ;
+assign  pid_state_wr_en[0] = (pid_state_en & asi_st_vld_g & thread0_g) | reset ;
+assign  pid_state_wr_en[1] = (pid_state_en & asi_st_vld_g & thread1_g) | reset ;
+assign  pid_state_wr_en[2] = (pid_state_en & asi_st_vld_g & thread2_g) | reset ;
+assign  pid_state_wr_en[3] = (pid_state_en & asi_st_vld_g & thread3_g) | reset ;
+
+//assign  pid_state_rd_en[0] = pid_state_en & ld_inst_vld_g & thread0_g ;
+
+//assign  pid_state_rd_en[0] = pid_state_en & asi_ld_vld_g & thread0_g ;
+//assign  pid_state_rd_en[1] = pid_state_en & asi_ld_vld_g & thread1_g ;
+//assign  pid_state_rd_en[2] = pid_state_en & asi_ld_vld_g & thread2_g ;
+//assign  pid_state_rd_en[3] = pid_state_en & asi_ld_vld_g & thread3_g ;
+
+
+//=========================================================================================
+//  Local LDXA Read
+//=========================================================================================
+
+// Timing : rd_en changed to _en with inst_vld
+
+//wire  [3:0] misc_ctl_sel ;
+wire    misc_tap_rd_sel ;
+/*
+assign  misc_tap_rd_sel = mrgn_tap_rd_en | bist_tap_rd_en |  dfture_tap_rd_sel ;
+assign  misc_ctl_sel[0] = bist_tap_rd_en | (~misc_tap_rd_sel &  bistctl_state_en & ld_inst_vld_unflushed) ;
+assign  misc_ctl_sel[1] = mrgn_tap_rd_en | (~misc_tap_rd_sel &  mrgnctl_state_en & ld_inst_vld_unflushed) ;
+assign  misc_ctl_sel[3] = dfture_tap_rd_sel ;
+
+//assign  misc_ctl_sel[2] = (~misc_tap_rd_sel & ldiagctl_state_en & ld_inst_vld_unflushed) ;
+assign  misc_ctl_sel[2] = ~(misc_ctl_sel[0] | misc_ctl_sel[1] | misc_ctl_sel[3] ); //force default
+*/
+   
+//****push misc_ctl_sel in previosu cycle*****
+   wire [3:0] misc_ctl_sel_din;
+
+//0-in bug, priority encode tap requests to prevent illegal type through one-hot mux   
+   wire       dfture_tap_rd_or ;
+   assign     dfture_tap_rd_or = | (dfture_tap_rd [3:0]);
+   assign     misc_tap_rd_sel = mrgn_tap_rd | bist_tap_rd |  dfture_tap_rd_or ;
+   assign     misc_ctl_sel_din[0] = bist_tap_rd | 
+                                   (~misc_tap_rd_sel &  bistctl_state_en_m & ld_inst_vld_m) ;
+   assign     misc_ctl_sel_din[1] = (~bist_tap_rd & mrgn_tap_rd) | 
+                                    (~misc_tap_rd_sel &  mrgnctl_state_en_m & ld_inst_vld_m) ;
+   assign     misc_ctl_sel_din[3] = ~bist_tap_rd & ~mrgn_tap_rd & dfture_tap_rd_or;
+   assign     misc_ctl_sel_din[2] = ~(misc_ctl_sel_din[0] | misc_ctl_sel_din[1] | misc_ctl_sel_din[3] ) ;
+
+
+  
+// ASI accesses should be mutex except for non-access cases.
+assign  lsu_asi_sel_fmx1[0] = pctxt_state_en & ld_inst_vld_unflushed;  
+assign  lsu_asi_sel_fmx1[1] = sctxt_state_en & ld_inst_vld_unflushed & ~lsu_asi_sel_fmx1[0]; 
+assign  lsu_asi_sel_fmx1[2] = ~(|lsu_asi_sel_fmx1[1:0]);   //force default
+
+assign  lsu_asi_sel_fmx2[0] = |lsu_asi_sel_fmx1[1:0] | (pid_state_en & ld_inst_vld_unflushed) ;  
+assign  lsu_asi_sel_fmx2[1] = lsu_ctl_state_en & ld_inst_vld_unflushed & ~(lsu_asi_sel_fmx2[0]);  
+assign  lsu_asi_sel_fmx2[2] = ~(|lsu_asi_sel_fmx2[1:0]) ; //force default
+
+   wire va_wtchpt_en;
+  
+wire	lsu_asi_rd_sel ; 
+//assign  lsu_asi_rd_sel = ((|lsu_asi_sel_fmx1[1:0]) | 
+//                         ((pid_state_en | va_wtchpt_en) & ld_inst_vld_unflushed) |
+//		                   	 (|lsu_asi_sel_fmx2[1:0]) | 
+//                          misc_asi_rd_en) & 
+//                        lsu_inst_vld_w ;   
+
+assign  lsu_asi_rd_sel = ((|lsu_asi_sel_fmx1[1:0]) | 
+                         (pid_state_en  & ld_inst_vld_unflushed) |     //remove va_wtchpt_en
+		                   	 (|lsu_asi_sel_fmx2[1:0]) | 
+                          misc_asi_rd_en) & 
+                          lsu_inst_vld_w ;   
+
+   
+assign	lsu_asi_rd_en = (lsu_asi_rd_sel | lsu_va_wtchpt_sel_g) & ~dctl_early_flush_w ; //add va_wtchpt
+
+//assign	lsu_asi_rd_en = lsu_asi_rd_sel & ~lsu_flush_pipe_w ;
+
+assign  misc_asi_rd_en = (bistctl_state_en | mrgnctl_state_en | ldiagctl_state_en) & ld_inst_vld_unflushed ;
+
+assign        lsu_local_ldxa_sel_g =  lsu_asi_rd_sel  & ~rst_tri_en ; // w/o flush
+assign        lsu_local_ldxa_tlbrd_sel_g  =  (lsu_tlb_tag_rd_vld_g | lsu_tlb_data_rd_vld_g) & ~rst_tri_en;
+assign        lsu_va_wtchpt_sel_g =  (va_wtchpt_en & ld_inst_vld_unflushed) & ~rst_tri_en;
+
+assign        lsu_local_diagnstc_tagrd_sel_g  =  (~(lsu_local_ldxa_sel_g | lsu_local_ldxa_tlbrd_sel_g |
+                                                   lsu_va_wtchpt_sel_g)) | rst_tri_en; //add va_wtchpt
+
+// or diagnostic read w/ asi read enable
+assign  lsu_diagnstc_asi_rd_en  =  lsu_asi_rd_en | dtagv_diagnstc_rd_g  ; //Bug 3959
+//assign  lsu_diagnstc_asi_rd_en  =  lsu_asi_rd_en | dtagv_diagnstc_rd_g  | lsu_local_ldxa_tlbrd_sel_g;
+
+
+dff_s  #(1) lldxa_stw2 (
+        .din    (lsu_diagnstc_asi_rd_en),
+        .q      (lsu_asi_rd_en_w2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire	ldxa_tlbrd0_w2,ldxa_tlbrd1_w2,ldxa_tlbrd2_w2,ldxa_tlbrd3_w2;
+wire	ldxa_tlbrd0_w3,ldxa_tlbrd1_w3,ldxa_tlbrd2_w3,ldxa_tlbrd3_w3;
+
+// stg mismatched intentionally. stxa_tid decode can be used by ldxa.
+assign	ldxa_tlbrd3_w2 = tlu_stxa_thread3_w2 & lsu_local_ldxa_tlbrd_sel_g ;
+assign	ldxa_tlbrd2_w2 = tlu_stxa_thread2_w2 & lsu_local_ldxa_tlbrd_sel_g ;
+assign	ldxa_tlbrd1_w2 = tlu_stxa_thread1_w2 & lsu_local_ldxa_tlbrd_sel_g ;
+assign	ldxa_tlbrd0_w2 = tlu_stxa_thread0_w2 & lsu_local_ldxa_tlbrd_sel_g ;
+
+// Bug 3959
+dff_s  #(4) tlbrd_stw3 (
+        .din    ({ldxa_tlbrd3_w2,ldxa_tlbrd2_w2,
+        	ldxa_tlbrd1_w2,ldxa_tlbrd0_w2}),
+        .q    	({ldxa_tlbrd3_w3,ldxa_tlbrd2_w3,
+        	ldxa_tlbrd1_w3,ldxa_tlbrd0_w3}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+// pid and va-wtchpt va removed.
+assign  lsu_asi_illgl_va = 
+  lsuctl_illgl_va | pscxt_ldxa_illgl_va | mrgnctl_illgl_va | asi42_illgl_va ;
+assign  lsu_asi_illgl_va_cmplt[0] = lsu_asi_illgl_va & ld_inst_vld_g & thread0_g ;
+assign  lsu_asi_illgl_va_cmplt[1] = lsu_asi_illgl_va & ld_inst_vld_g & thread1_g ;
+assign  lsu_asi_illgl_va_cmplt[2] = lsu_asi_illgl_va & ld_inst_vld_g & thread2_g ;
+assign  lsu_asi_illgl_va_cmplt[3] = lsu_asi_illgl_va & ld_inst_vld_g & thread3_g ;
+
+dff_s  #(4) lsuillgl_stgw2(
+        .din    (lsu_asi_illgl_va_cmplt[3:0]),
+        .q      (lsu_asi_illgl_va_cmplt_w2[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+//=========================================================================================
+//  ASI_DCACHE_TAG way decode
+//=========================================================================================
+
+// Bug 4569. 
+// add sehold. adding in dctldp flop will cause critical path.
+
+wire	[3:0]	dtag_rsel_dcd,dtag_rsel_hold ;
+assign  dtag_rsel_dcd[3:0]  =  	{(lsu_ldst_va_b12_b11_m[12:11] == 2'b11),
+                               	(lsu_ldst_va_b12_b11_m[12:11] == 2'b10),
+                               	(lsu_ldst_va_b12_b11_m[12:11] == 2'b01),
+                                (lsu_ldst_va_b12_b11_m[12:11] == 2'b00)};
+//bug5994
+dffe_s #(4) dtag_hold (
+        .din    (dtag_rsel_dcd[3:0]),
+        .q      (dtag_rsel_hold[3:0]),
+        .en     (sehold),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_dtag_rsel_m[3:0] = sehold ? dtag_rsel_hold[3:0] : dtag_rsel_dcd[3:0] ;
+
+
+//=========================================================================================
+//  Watchpoint Control
+//=========================================================================================
+   wire va_vld;
+   
+assign  va_vld = (ldst_va_g[7:0] == 8'h38);
+   
+assign  va_wtchpt_en = (lsu_asi_state[7:0] == 8'h58)  & va_vld &
+      lsu_alt_space_g & lsu_inst_vld_w ; 
+
+// Illegal va checking for asi 58 done in MMU.
+   
+// one VA watchptr supported per thread
+
+// Need to read register !!!
+// Switchout thread on read.
+// qualify with inst_vld_w.
+//assign  va_wtchpt_rd_en = va_wtchpt_en & ld_inst_vld_g ;
+
+   wire va_wtchpt0_wr_en, va_wtchpt1_wr_en, va_wtchpt2_wr_en, va_wtchpt3_wr_en;
+  
+//assign  va_wtchpt0_wr_en = va_wtchpt_en & st_inst_vld_g & thread0_g;
+assign  va_wtchpt0_wr_en = va_wtchpt_en & asi_st_vld_g & thread0_g;
+assign  va_wtchpt1_wr_en = va_wtchpt_en & asi_st_vld_g & thread1_g;
+assign  va_wtchpt2_wr_en = va_wtchpt_en & asi_st_vld_g & thread2_g;
+assign  va_wtchpt3_wr_en = va_wtchpt_en & asi_st_vld_g & thread3_g;
+assign  lsu_va_wtchpt0_wr_en_l = ~va_wtchpt0_wr_en ;
+assign  lsu_va_wtchpt1_wr_en_l = ~va_wtchpt1_wr_en ;
+assign  lsu_va_wtchpt2_wr_en_l = ~va_wtchpt2_wr_en ;
+assign  lsu_va_wtchpt3_wr_en_l = ~va_wtchpt3_wr_en ;
+
+assign  vw_wtchpt_cmp_en_m =  // VA Write Watchpoint Enable
+  (thread0_m & lsu_ctl_reg0[4]) | 
+  (thread1_m & lsu_ctl_reg1[4]) | 
+  (thread2_m & lsu_ctl_reg2[4]) | 
+  (thread3_m & lsu_ctl_reg3[4]) ; 
+
+assign  vr_wtchpt_cmp_en_m =  // VA Read Watchpoint Enable
+  (thread0_m & lsu_ctl_reg0[5]) | 
+  (thread1_m & lsu_ctl_reg1[5]) | 
+  (thread2_m & lsu_ctl_reg2[5]) | 
+  (thread3_m & lsu_ctl_reg3[5]) ; 
+
+   assign  va_wtchpt_cmp_en_m =
+(vw_wtchpt_cmp_en_m & st_inst_vld_m) | 
+(vr_wtchpt_cmp_en_m & ld_inst_vld_m) ; 
+
+//=========================================================================================
+//  Hit/Miss/Fill Control
+//=========================================================================================
+dff_s  #(10) stg_m (
+        .din    ({ld_inst_vld_e, st_inst_vld_e,ldst_sz_e[1:0],
+    ifu_lsu_rd_e[4:0],ifu_lsu_ldst_fp_e}),
+        .q      ({ld_inst_vld_m, st_inst_vld_m,ldst_sz_m[1:0],
+    ld_rd_m[4:0],fp_ldst_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire    dcache_arry_data_sel_e;
+
+assign   dcache_arry_data_sel_e = lsu_bist_rvld_e | ld_inst_vld_e | dcache_iob_rd_e ;
+dff_s #(1) dcache_arry_data_sel_stgm (
+  .din (dcache_arry_data_sel_e),
+  .q   (dcache_arry_data_sel_m),
+  .clk    (clk),
+  .se     (se),       .si (),          .so ()
+); 
+
+   
+dff_s  #(10) stg_g (
+        .din    ({ld_inst_vld_m, st_inst_vld_m,ldst_sz_m[1:0],
+    ld_rd_m[4:0],fp_ldst_m}),
+        .q      ({ld_inst_vld_unflushed, st_inst_vld_unflushed,ldst_sz_g[1:0],
+    ld_rd_g[4:0],fp_ldst_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+
+//assign  asi_ld_vld_g = ld_inst_vld_unflushed & lsu_inst_vld_w & ~dctl_early_flush_w ;
+assign  asi_st_vld_g = st_inst_vld_unflushed & lsu_inst_vld_w & ~dctl_early_flush_w ;
+assign  ld_inst_vld_g = ld_inst_vld_unflushed & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+assign  st_inst_vld_g = st_inst_vld_unflushed & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+assign  lsu_way_hit[0] = cache_way_hit_buf1[0] & dcache_enable_g ;
+assign  lsu_way_hit[1] = cache_way_hit_buf1[1] & dcache_enable_g ;
+assign  lsu_way_hit[2] = cache_way_hit_buf1[2] & dcache_enable_g ;
+assign  lsu_way_hit[3] = cache_way_hit_buf1[3] & dcache_enable_g ;
+   
+//assign  st_set_index_g[5:0] = ldst_va_g[9:4] ;
+//assign  st_set_way_g[3:1] = lsu_way_hit[3:1] ;
+
+// This should contain ld miss, MMU miss, exception. 
+// should tlb_cam_miss be factored in or can miss/hit be solely
+// based on way_hit.
+
+wire  tlb_cam_hit_mod ;
+dff_s  stgcmiss_g (
+        .din    (tlb_cam_hit),
+        .q      (tlb_cam_hit_mod),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// NOTE !! qualification with tte_data_parity_error removed for timing.
+assign tlb_cam_hit_g = tlb_cam_hit_mod ;
+//assign tlb_cam_hit_g = tlb_cam_hit_mod & ~tte_data_parity_error ;
+
+/*assign  ld_stb_hit_g = 
+        ld_stb0_full_raw_g | ld_stb1_full_raw_g |
+        ld_stb2_full_raw_g | ld_stb3_full_raw_g |
+        ld_stb0_partial_raw_g | ld_stb1_partial_raw_g |
+        ld_stb2_partial_raw_g | ld_stb3_partial_raw_g ; */
+
+wire nceen_pipe_m, nceen_pipe_g ;
+
+   wire [3:0] lsu_nceen_d1;
+   
+dff_s #(4) nceen_stg (
+   .din (ifu_lsu_nceen[3:0]),
+   .q   (lsu_nceen_d1[3:0]),
+   .clk (clk),
+   .se  (se),       .si (),          .so ()
+);
+                
+   
+assign  nceen_pipe_m = 
+(thread0_m & lsu_nceen_d1[0]) | (thread1_m & lsu_nceen_d1[1]) |
+(thread2_m & lsu_nceen_d1[2]) | (thread3_m & lsu_nceen_d1[3]) ;
+
+dff_s #(1)  stgg_een (
+        .din    (nceen_pipe_m),
+        .q      (nceen_pipe_g),
+        .clk  	(clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//wire	tte_data_perror_corr_en ;
+wire	tte_data_perror_unc_en ;
+// separate ld from st for error reporting.
+assign	tte_data_perror_unc_en = ld_inst_vld_unflushed & tte_data_perror_unc & nceen_pipe_g ;
+//assign	tte_data_perror_unc_en = tte_data_perror_unc & nceen_pipe_g ;
+//assign	tte_data_perror_corr_en = tte_data_perror_corr ;
+//assign	tte_data_perror_corr_en = tte_data_perror_corr & ceen_pipe_g ;
+
+wire	dtlb_perror_en_w,dtlb_perror_en_w2,dtlb_perror_en_w3 ;
+assign	dtlb_perror_en_w = tte_data_perror_unc_en ;
+//assign	dtlb_perror_en_w = tte_data_perror_unc_en | tte_data_perror_corr_en ;
+
+dff_s #(1)  stgw2_perr (
+        .din    (dtlb_perror_en_w),
+        .q      (dtlb_perror_en_w2),
+        .clk  	(clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s #(1)  stgw3_perr (
+        .din    (dtlb_perror_en_w2),
+        .q      (dtlb_perror_en_w3),
+        .clk  	(clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// For now, "or" ld_inst_vld_g and ldst_dbl. Ultimately, it ldst_dbl
+// needs to cause ld_inst_vld_g to be asserted.
+// st and ld ldst_dbl terms are redundant.
+// Diagnostic Dcache access will force a hit in cache. Whatever is read
+// out will be written back to irf regardless of whether hit or not. The
+// expectation is that cache has been set up to hit.
+// lsu_dcache_enable is redundant as factored in lsu_way_hit !!!
+// squash both ld_miss and ld_hit in cause of dtlb unc data error.
+   wire ldd_force_l2access_g;
+   
+   wire int_ldd_g, fp_ldd_g;
+   assign fp_ldd_g = fp_ldst_g & ~(blk_asi_g & lsu_alt_space_g);
+
+   //sas code need int_ldd_g
+   assign int_ldd_g = ldst_dbl_g  & ~fp_ldd_g;
+   assign ldd_force_l2access_g = int_ldd_g;
+
+assign  lsu_ld_miss_wb  = 
+(~(|lsu_way_hit[3:0]) | ~dcache_enable_g | ~(tlb_cam_hit_g | lsu_dtlb_bypass_g) |
+  ldxa_internal | ldd_force_l2access_g | atomic_g |  endian_mispred_g | // remove stb_cam_hit
+  dcache_rd_parity_error | dtag_perror_g) & 
+	~((dc_diagnstc_asi_g & lsu_alt_space_g)) & 
+	//~(tte_data_perror_unc_en | tte_data_perror_corr_en | (dc_diagnstc_asi_g & lsu_alt_space_g)) & 
+  (ld_vld & (~lsu_alt_space_g | (lsu_alt_space_g & recognized_asi_g))) |
+  //(ld_inst_vld_g & (~lsu_alt_space_g | (lsu_alt_space_g & recognized_asi_g))) |
+  //(ldst_dbl_g & st_inst_vld_g)  // signal ld-miss for stdbl.
+  ncache_asild_rq_g ;   // asi ld requires bypass
+
+assign  lsu_ld_hit_wb   = 
+((|lsu_way_hit[3:0])  & dcache_enable_g & (tlb_cam_hit_g | lsu_dtlb_bypass_g) &  //bug3702
+  ~ldxa_internal & ~dcache_rd_parity_error & ~dtag_perror_g & ~endian_mispred_g &
+  ~ldd_force_l2access_g & ~atomic_g &  ~ncache_asild_rq_g) &  // remove stb_cam_hit
+~((dc_diagnstc_asi_g & lsu_alt_space_g)) &
+//~(tte_data_perror_unc_en | tte_data_perror_corr_en | (dc_diagnstc_asi_g & lsu_alt_space_g)) &
+  ld_vld & (~lsu_alt_space_g | (lsu_alt_space_g & recognized_asi_g)) ;
+//ld_inst_vld_g & (~lsu_alt_space_g | (lsu_alt_space_g & recognized_asi_g)) ;
+// force hit for diagnostic write. 
+
+// correctible dtlb data parity error on cam will cause dmmu miss.
+// prefetch will rely on the ld_inst_vld/st_inst_vld not being asserted
+// to prevent mmu_miss from being signalled if prefetch does not translate.
+// Timing Change : Remove data perror from dmmu_miss ; to be treated as disrupting trap.
+//SC assign dmmu_miss_g = 
+//SC   ~tlb_cam_hit_mod & ~lsu_dtlb_bypass_g & 
+//SC   //~(tlb_cam_hit_mod & ~tte_data_perror_corr) & ~lsu_dtlb_bypass_g & 
+//SC   ((ld_inst_vld_unflushed & lsu_inst_vld_w) | 
+//SC    (st_inst_vld_unflushed & lsu_inst_vld_w)) & 
+//SC     ~(ldxa_internal | stxa_internal | early_trap_vld_g) ;
+
+//SC    wire dmmu_miss_only_g ;
+   
+//SC assign dmmu_miss_only_g = 
+//SC  ~tlb_cam_hit_mod & ~lsu_dtlb_bypass_g & 
+//SC   //~(tlb_cam_hit_mod & ~tte_data_perror_corr) & ~lsu_dtlb_bypass_g & 
+//SC   ((ld_inst_vld_unflushed & lsu_inst_vld_w) | 
+//SC    (st_inst_vld_unflushed & lsu_inst_vld_w)) & 
+//SC     ~(ldxa_internal | stxa_internal);
+    
+// Atomic Handling :
+// Bypass to irf will occur. However, the loads will not write to cache/tag etc.
+
+// Exceptions, tlb miss will have to be included.  
+// diagnostic dcache/dtagv will read respective arrays in pipeline. (changed!)
+// They will not switch out thread with this assumption. 
+
+//dc_diagnstc will not switch out, dtagv will switch out
+ 
+//wire dc_diagnstc_rd_g;  
+//assign  dc_diagnstc_rd_g = dc_diagnstc_asi_g & ld_inst_vld_g & lsu_alt_space_g ; 
+
+//wire	dc0_diagnstc_rd_g,dc1_diagnstc_rd_g,dc2_diagnstc_rd_g,dc3_diagnstc_rd_g ;
+//wire	dc0_diagnstc_rd_w2,dc1_diagnstc_rd_w2,dc2_diagnstc_rd_w2,dc3_diagnstc_rd_w2 ;
+//assign  dc0_diagnstc_rd_g = dc_diagnstc_rd_g & thread0_g ;
+//assign  dc1_diagnstc_rd_g = dc_diagnstc_rd_g & thread1_g ;
+//assign  dc2_diagnstc_rd_g = dc_diagnstc_rd_g & thread2_g ;
+//assign  dc3_diagnstc_rd_g = dc_diagnstc_rd_g & thread3_g ;
+
+//dff #(4)  stgw2_dcdiag (
+//        .din  ({dc3_diagnstc_rd_g,dc2_diagnstc_rd_g,dc1_diagnstc_rd_g,dc0_diagnstc_rd_g}),
+//        .q    ({dc3_diagnstc_rd_w2,dc2_diagnstc_rd_w2,dc1_diagnstc_rd_w2,dc0_diagnstc_rd_w2}),
+//        .clk  (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+
+assign  dtagv_diagnstc_rd_g = dtagv_diagnstc_asi_g & ld_inst_vld_g & lsu_alt_space_g ; 
+
+// Prefetch will swo thread if it does not miss in tlb.
+dff_s  stgm_prf (
+        .din    (ifu_lsu_pref_inst_e),
+        .q      (pref_inst_m),
+        .clk  (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  stgg_prf (
+        .din    (pref_inst_m),
+        .q      (pref_inst_g),
+        .clk  (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+
+//assign	lsu_ifu_data_error_w = 1'b0 ;
+
+// is this redundant ? isn't lsu_ncache_ld_e sufficient ?
+assign  atomic_ld_squash_e = 
+  ~lmq_ld_rq_type_e[2] & lmq_ld_rq_type_e[1] & lmq_ld_rq_type_e[0] ;
+
+// bypass will occur with hit in d$ or data return from L2.
+// Fill for dcache diagnostic rd will happen regardless. dfill vld qualified with
+// flush_pipe and inst_vld !!!
+
+//timing fix. move logic to previous cycle M.   
+//assign  lsu_exu_dfill_vld_w2  =   
+//  (l2fill_vld_g & ~(unc_err_trap_g | l2fill_fpld_g))  	      | // fill
+//  (~fp_ldst_g & ld_inst_vld_unflushed & lsu_inst_vld_w)       | // in pipe
+//  intld_byp_data_vld ;	                                        // bypass
+
+   wire lsu_exu_dfill_vld_m;
+   wire	intld_byp_data_vld_e,intld_byp_data_vld_m ;
+   wire	intld_byp_data_vld ;
+   wire	ldxa_swo_annul ;
+
+assign lsu_exu_dfill_vld_m = 
+  (l2fill_vld_m & ~(unc_err_trap_m | l2fill_fpld_m))  	      | // fill
+  (~fp_ldst_m & ld_inst_vld_m & 
+	~(ldxa_swo_annul & lsu_alt_space_m) & flush_w_inst_vld_m) | // in pipe
+  intld_byp_data_vld_m ;	                                      // bypass
+
+dff_s #(1) dfill_vld_stgg (
+   .din (lsu_exu_dfill_vld_m),
+   .q   (lsu_exu_dfill_vld_w2),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);       
+
+//------              
+// Bld errors : Bug 4315
+// Errors need to be accummulated across helpers. Once unc error detected 
+// in any helper, then all further writes to frf are squashed.
+// daccess_error trap taken at very end if *any* helper had an unc error.
+
+wire	bld_cnt_max_m,bld_cnt_max_g ;
+assign	bld_cnt_max_m = lsu_bld_cnt_m[2] & lsu_bld_cnt_m[1] & lsu_bld_cnt_m[0] ;
+
+wire	[1:0]	cpx_ld_err_m ;
+dff_s #(3) lderr_stgm (
+   .din ({lsu_cpx_pkt_ld_err[1:0],bld_cnt_max_m}),
+   .q   ({cpx_ld_err_m[1:0],bld_cnt_max_g}),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);       
+
+wire [1:0] bld_err ;
+wire [1:0] bld_err_din ;
+wire 	   bld_rst ;
+// Accummulate errors.
+assign	bld_err_din[1:0] = cpx_ld_err_m[1:0] | bld_err[1:0] ;
+assign	bld_rst = reset | lsu_bld_reset ;
+
+dffre_s #(2) blderr_ff (
+        .din    (bld_err_din[1:0]),
+        .q      (bld_err[1:0]),
+        .clk    (clk),
+        .en     (lsu_bld_helper_cmplt_m), .rst (bld_rst),
+        .se     (se),	.si (),	.so ()
+        );
+
+wire	bld_helper_cmplt_g ;
+dff_s  bldh_stgg (
+   .din (lsu_bld_helper_cmplt_m),
+   .q   (bld_helper_cmplt_g),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);
+
+wire	bld_unc_err_pend_g, bld_unc_err_pend_w2 ;
+assign	bld_unc_err_pend_g = bld_err[1] & bld_helper_cmplt_g ;
+wire	bld_corr_err_pend_g, bld_corr_err_pend_w2 ;
+// pended unc error gets priority.
+assign	bld_corr_err_pend_g = bld_err[0] & ~bld_err[1] & bld_helper_cmplt_g ;
+
+wire	bld_squash_err_g,bld_squash_err_w2 ;
+// bld cnt should be vld till g
+assign	bld_squash_err_g = bld_helper_cmplt_g & ~bld_cnt_max_g ;
+
+dff_s #(3)  bldsq_stgw2 (
+   .din ({bld_squash_err_g,bld_unc_err_pend_g,bld_corr_err_pend_g}),
+   .q   ({bld_squash_err_w2,bld_unc_err_pend_w2,bld_corr_err_pend_w2}),
+   .clk    (clk),
+   .se     (se),       .si (),          .so ()
+);
+
+//------              
+   
+wire	stb_cam_hit_w2 ;
+wire	fld_vld_sync_no_camhit,fld_vld_sync_no_camhit_w2 ;
+wire	fld_vld_async,fld_vld_async_w2 ;
+dff_s  #(3) stbchit_stg (
+        .din    ({stb_cam_hit,fld_vld_sync_no_camhit,fld_vld_async}),
+        .q      ({stb_cam_hit_w2,fld_vld_sync_no_camhit_w2,fld_vld_async_w2}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  fld_vld_sync_no_camhit =  
+	(lsu_ld_hit_wb & ~tte_data_perror_unc_en & fp_ldst_g &
+	~dctl_flush_pipe_w) ; // l1hit 
+
+assign	fld_vld_async =
+        (l2fill_vld_g & l2fill_fpld_g & ~(unc_err_trap_g | bld_unc_err_pend_g))  | 
+						// fill from l2, // bug 3705, 4315(err_trap)
+        fpld_byp_data_vld ;     // bypass data
+
+assign	lsu_ffu_ld_vld = 
+	(fld_vld_sync_no_camhit_w2 & ~stb_cam_hit_w2) |
+	fld_vld_async_w2 ;
+
+
+/*dff  #(1) fldvld_stgw2 (
+        .din    (ffu_ld_vld),
+        .q      (lsu_ffu_ld_vld),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+dff_s  #(2) dtid_stgm (
+        .din    (lsu_dfill_tid_e[1:0]),
+        .q      (dfq_tid_m[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  #(2) dtid_stgg (
+        .din    (dfq_tid_m[1:0]),
+        .q      (dfq_tid_g[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Timing Change -  shifting dfill-data sel gen. to m-stage
+//assign  ldbyp_tid[0] = ld_thrd_byp_sel_g[1] | ld_thrd_byp_sel_g[3] ;
+//assign  ldbyp_tid[1] = ld_thrd_byp_sel_g[2] | ld_thrd_byp_sel_g[3] ;
+wire 	[3:0]	ld_thrd_byp_sel_m ;
+assign  ldbyp_tid_m[0] = ld_thrd_byp_sel_m[1] | ld_thrd_byp_sel_m[3] ;
+assign  ldbyp_tid_m[1] = ld_thrd_byp_sel_m[2] | ld_thrd_byp_sel_m[3] ;
+
+
+/*assign  lsu_exu_thr_g[1:0] = ld_inst_vld_unflushed ? thrid_g[1:0] :
+          l2fill_vld_g ? dfq_tid_g[1:0] : ldbyp_tid[1:0] ; */
+assign  lsu_exu_thr_m[1:0] = ld_inst_vld_m ? thrid_m[1:0] :
+          l2fill_vld_m ? dfq_tid_m[1:0] : ldbyp_tid_m[1:0] ; 
+
+// What is the policy for load-double/atomics to update cache ?
+// cas will not update cache. similary neither will ldstub nor cas.
+// BIST will effect dcache only, not tags and vld bits.
+// Removed dcache_enable from dc_diagnstc_wr_en !!!
+wire	l2fill_vld_e ;
+wire	dcache_alt_src_wr_e ;
+assign	l2fill_vld_e = lsu_l2fill_vld & ~lsu_cpx_pkt_prefetch2 ;
+assign  lsu_dcache_wr_vld_e = 
+  (l2fill_vld_e & ~ignore_fill & ~atomic_ld_squash_e & ~ld_sec_active & ~lsu_ncache_ld_e) |
+  lsu_st_wr_dcache  | // st writes from stb
+  dcache_alt_src_wr_e ;
+
+assign  dcache_alt_src_wr_e =
+  (lsu_diagnstc_wr_src_sel_e & dc_diagnstc_wr_en)
+  | lsu_bist_wvld_e     // bist engine writes to cache
+  | dcache_iob_wr_e ;  // iobridge request write to dcache
+
+//d$ valid bit 
+   wire dv_diagnstic_wr;  
+assign  dv_diagnstic_wr = (lsu_diagnstc_wr_src_sel_e & dtagv_diagnstc_wr_en & lsu_diagnstc_wr_data_b0) ;
+
+   wire dva_din_e;
+   wire ld_fill_e;
+   
+   assign ld_fill_e= (l2fill_vld_e & ~atomic_ld_squash_e & ~ld_sec_active & ~lsu_ncache_ld_e) ;   //ld-fill
+   //######################################
+   //snp      => dva_din = 0
+   //ld fill  => dva_din = 1
+   //diag wrt => dva_din = wrt_value
+   //######################################
+   assign dva_din_e =  ld_fill_e  | //ld-fill
+                       dv_diagnstic_wr; // diagnostic write valid bit
+
+   
+// iob rd dominates
+   wire lsu_dc_alt_rd_vld_e;
+   
+assign	lsu_dc_alt_rd_vld_e = dcache_iob_rd_e | lsu_bist_rvld_e ;
+
+   //?? default when no ld in pipe
+   assign dcache_alt_mx_sel_e = 
+		//lsu_dcache_wr_vld_e | : Timing
+		dcache_alt_src_wr_e | // rm st updates/fill - ~ld_inst_vld_e.
+		lsu_dcache_wr_vld_e | 
+		lsu_dc_alt_rd_vld_e  | ~ld_inst_vld_e;
+  
+   assign dcache_alt_mx_sel_e_bf = dcache_alt_mx_sel_e;
+
+   wire   dcache_rvld_e_tmp, dcache_rvld_e_minbf;   
+   assign dcache_rvld_e_tmp =  ld_inst_vld_e | lsu_dc_alt_rd_vld_e ;
+   bw_u1_minbuf_5x  UZfix_dcache_rvld_e_minbf (.a(dcache_rvld_e_tmp), .z(dcache_rvld_e_minbf));
+   assign dcache_rvld_e = dcache_rvld_e_minbf;
+   
+   wire   lsu_dtag_wr_vld_e_tmp;
+   
+assign  lsu_dtag_wr_vld_e_tmp = 
+  ld_fill_e  & ~ignore_fill | //ld fill   //bug3601, 3676
+  (lsu_diagnstc_wr_src_sel_e & dtagv_diagnstc_wr_en) ; // dtag/vld diagnostic wr
+
+bw_u1_buf_30x UZsize_lsu_dtag_wrreq_x     ( .a(lsu_dtag_wr_vld_e_tmp), .z(lsu_dtag_wrreq_x_e)     );
+bw_u1_buf_30x UZsize_lsu_dtag_index_sel_x ( .a(lsu_dtag_wr_vld_e_tmp), .z(lsu_dtag_index_sel_x_e) );
+   
+assign  lsu_dtagv_wr_vld_e = 
+  lsu_dtag_wr_vld_e_tmp | 	// fill
+  dva_svld_e        |   // snp
+  lsu_bist_wvld_e ;     // bist clears dva by default
+
+// mem cell change for dva
+   wire [15:0] dva_fill_bit_wr_en_e;
+
+   assign      dva_fill_bit_wr_en_e[15] = dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[3];
+   assign      dva_fill_bit_wr_en_e[14] = dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[2];
+   assign      dva_fill_bit_wr_en_e[13] = dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[1];
+   assign      dva_fill_bit_wr_en_e[12] = dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[0];
+
+   assign      dva_fill_bit_wr_en_e[11] = dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[3];
+   assign      dva_fill_bit_wr_en_e[10] = dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[2];
+   assign      dva_fill_bit_wr_en_e[09] = dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[1];
+   assign      dva_fill_bit_wr_en_e[08] = dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[0];
+  
+   assign      dva_fill_bit_wr_en_e[07] = ~dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[3];
+   assign      dva_fill_bit_wr_en_e[06] = ~dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[2];
+   assign      dva_fill_bit_wr_en_e[05] = ~dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[1];
+   assign      dva_fill_bit_wr_en_e[04] = ~dcache_fill_addr_e[5] & dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[0];
+
+   assign      dva_fill_bit_wr_en_e[03] = ~dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[3];
+   assign      dva_fill_bit_wr_en_e[02] = ~dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[2];
+   assign      dva_fill_bit_wr_en_e[01] = ~dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[1];
+   assign      dva_fill_bit_wr_en_e[00] = ~dcache_fill_addr_e[5] & ~dcache_fill_addr_e[4] & lsu_dcache_fill_way_e[0];
+
+   wire [15:0] dva_bit_wr_en_e;
+   assign      dva_bit_wr_en_e[15:0] = dva_svld_e ? dva_snp_bit_wr_en_e[15:0] : dva_fill_bit_wr_en_e;
+
+   wire [4:0]  dva_snp_addr_e_bf;
+   bw_u1_buf_5x UZsize_dva_snp_addr_e_bf_b4 (.a(dva_snp_addr_e[4]), .z(dva_snp_addr_e_bf[4]));
+   bw_u1_buf_5x UZsize_dva_snp_addr_e_bf_b3 (.a(dva_snp_addr_e[3]), .z(dva_snp_addr_e_bf[3]));
+   bw_u1_buf_5x UZsize_dva_snp_addr_e_bf_b2 (.a(dva_snp_addr_e[2]), .z(dva_snp_addr_e_bf[2]));
+   bw_u1_buf_5x UZsize_dva_snp_addr_e_bf_b1 (.a(dva_snp_addr_e[1]), .z(dva_snp_addr_e_bf[1]));
+   bw_u1_buf_5x UZsize_dva_snp_addr_e_bf_b0 (.a(dva_snp_addr_e[0]), .z(dva_snp_addr_e_bf[0]));
+
+   assign      dva_wr_adr_e[10:6] = dva_svld_e ? dva_snp_addr_e_bf[4:0] : dcache_fill_addr_e[10:6];
+
+// should ldxa_data_vld be included ?
+
+assign  dfill_thread0 = ~lsu_dfill_tid_e[1] & ~lsu_dfill_tid_e[0] ;
+assign  dfill_thread1 = ~lsu_dfill_tid_e[1] &  lsu_dfill_tid_e[0] ;
+assign  dfill_thread2 =  lsu_dfill_tid_e[1] & ~lsu_dfill_tid_e[0] ;
+assign  dfill_thread3 =  lsu_dfill_tid_e[1] &  lsu_dfill_tid_e[0] ;
+
+assign  l2fill_fpld_e = lsu_l2fill_fpld_e ;
+
+//=========================================================================================
+//  LD/ST COMPLETE SIGNAL
+//=========================================================================================
+
+// Prefetch
+
+wire	pref_tlbmiss_g ;
+assign	pref_tlbmiss_g = 
+pref_inst_g & 
+(~tlb_cam_hit_g | (tlb_cam_hit_g & tlb_pgnum[39])) // nop on tlbmiss or io access
+& lsu_inst_vld_w & ~dctl_flush_pipe_w ; // Bug 4318 bug6406/eco6619
+   
+//assign	pref_tlbmiss_g = pref_inst_g & lsu_inst_vld_w & ~tlb_cam_hit_g ;
+wire	[3:0] pref_tlbmiss_cmplt,pref_tlbmiss_cmplt_d1,pref_tlbmiss_cmplt_d2 ;
+assign	pref_tlbmiss_cmplt[0] = pref_tlbmiss_g & thread0_g ;
+assign	pref_tlbmiss_cmplt[1] = pref_tlbmiss_g & thread1_g ;
+assign	pref_tlbmiss_cmplt[2] = pref_tlbmiss_g & thread2_g ;
+assign	pref_tlbmiss_cmplt[3] = pref_tlbmiss_g & thread3_g ;
+
+dff_s  #(4) pfcmpl_stgd1 (
+        .din    (pref_tlbmiss_cmplt[3:0]),
+        .q      (pref_tlbmiss_cmplt_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  #(4) pfcmpl_stgd2 (
+        .din    (pref_tlbmiss_cmplt_d1[3:0]),
+        .q      (pref_tlbmiss_cmplt_d2[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// *** add diagnstc rd and prefetch(tlb-miss) signals. ***
+// *** add ifu asi ack.
+
+// This equation is critical and needs to be optimized.
+wire [3:0] 	lsu_pcx_pref_issue;
+wire	diag_wr_cmplt0,diag_wr_cmplt1,diag_wr_cmplt2,diag_wr_cmplt3;
+wire	ldst_cmplt_late_0, ldst_cmplt_late_1 ;
+wire	ldst_cmplt_late_2, ldst_cmplt_late_3 ;
+wire	ldst_cmplt_late_0_d1, ldst_cmplt_late_1_d1 ;
+wire	ldst_cmplt_late_2_d1, ldst_cmplt_late_3_d1 ;
+
+   assign ignore_fill = lmq_ldd_vld & ~ldd_in_dfq_out;
+   
+assign  lsu_ifu_ldst_cmplt[0] = 
+    // * can be early or
+    ((stxa_internal_d2 & thread0_w3) | stxa_stall_wr_cmplt0_d1) | 
+    // * late signal and critical.
+    // Can this be snapped earlier ?
+    //(((l2fill_vld_e & ~atomic_ld_squash_e & ~ignore_fill)) //Bug 3624
+    (((l2fill_vld_e & ~ignore_fill))  // 1st fill for ldd.
+      & ~l2fill_fpld_e & ~lsu_cpx_pkt_atm_st_cmplt & 
+	~(lsu_cpx_pkt_ld_err[1] & lsu_nceen_d1[0]) & dfill_thread0)  |
+    intld_byp_cmplt[0] |
+    // * early-or signals
+    ldst_cmplt_late_0_d1 ;
+
+wire	atm_st_cmplt0 ;
+assign	atm_st_cmplt0 = lsu_atm_st_cmplt_e & dfill_thread0 ;
+assign	ldst_cmplt_late_0 = 
+    (atm_st_cmplt0 & ~pend_atm_ld_ue[0]) |  // Bug 3624,4048
+    bsync0_reset    |
+    lsu_intrpt_cmplt[0]   |
+    diag_wr_cmplt0 |
+//    dc0_diagnstc_rd_w2 |
+    ldxa_illgl_va_cmplt_d1[0] |
+    pref_tlbmiss_cmplt_d2[0] |
+    lsu_pcx_pref_issue[0];
+
+
+assign  lsu_ifu_ldst_cmplt[1] = 
+    ((stxa_internal_d2 & thread1_w3) | stxa_stall_wr_cmplt1_d1) | 
+    (((l2fill_vld_e & ~ignore_fill)) // // 1st fill for ldd
+      & ~l2fill_fpld_e & ~lsu_cpx_pkt_atm_st_cmplt & 
+	~(lsu_cpx_pkt_ld_err[1] & lsu_nceen_d1[1]) & dfill_thread1)  |
+    intld_byp_cmplt[1] |
+    ldst_cmplt_late_1_d1 ;
+
+wire	atm_st_cmplt1 ;
+assign	atm_st_cmplt1 = lsu_atm_st_cmplt_e & dfill_thread1 ;
+assign	ldst_cmplt_late_1 = 
+    (atm_st_cmplt1 & ~pend_atm_ld_ue[1]) |  // Bug 3624,4048
+    bsync1_reset    |
+    lsu_intrpt_cmplt[1]   |
+    diag_wr_cmplt1 |
+//    dc1_diagnstc_rd_w2 |
+    ldxa_illgl_va_cmplt_d1[1] |
+    pref_tlbmiss_cmplt_d2[1] |
+    lsu_pcx_pref_issue[1];
+
+assign  lsu_ifu_ldst_cmplt[2] = 
+    ((stxa_internal_d2 & thread2_w3) | stxa_stall_wr_cmplt2_d1) | 
+    (((l2fill_vld_e & ~ignore_fill)) // 1st fill for ldd.
+      & ~l2fill_fpld_e & ~lsu_cpx_pkt_atm_st_cmplt & 
+	~(lsu_cpx_pkt_ld_err[1] & lsu_nceen_d1[2]) & dfill_thread2)  |
+    intld_byp_cmplt[2] |
+    ldst_cmplt_late_2_d1 ;
+
+wire	atm_st_cmplt2 ;
+assign	atm_st_cmplt2 = lsu_atm_st_cmplt_e & dfill_thread2 ;
+assign	ldst_cmplt_late_2 = 
+    (atm_st_cmplt2 & ~pend_atm_ld_ue[2]) |  // Bug 3624,4048
+    bsync2_reset    |
+    lsu_intrpt_cmplt[2]   |
+    diag_wr_cmplt2 |
+//    dc2_diagnstc_rd_w2 |
+    ldxa_illgl_va_cmplt_d1[2] |
+    pref_tlbmiss_cmplt_d2[2] |
+    lsu_pcx_pref_issue[2];
+
+assign  lsu_ifu_ldst_cmplt[3] = 
+    ((stxa_internal_d2 & thread3_w3) | stxa_stall_wr_cmplt3_d1) | 
+    //(((l2fill_vld_e & atomic_st_cmplt) | 
+    (((l2fill_vld_e & ~ignore_fill)) // 1st fill for ldd.
+      & ~l2fill_fpld_e & ~lsu_cpx_pkt_atm_st_cmplt & 
+	~(lsu_cpx_pkt_ld_err[1] & lsu_nceen_d1[3]) & dfill_thread3)  |
+    intld_byp_cmplt[3] |
+    ldst_cmplt_late_3_d1 ;
+
+wire	atm_st_cmplt3 ;
+assign	atm_st_cmplt3 = lsu_atm_st_cmplt_e & dfill_thread3 ;
+assign	ldst_cmplt_late_3 = 
+    (atm_st_cmplt3 & ~pend_atm_ld_ue[3]) |  // Bug 3624,4048
+    bsync3_reset    |
+    lsu_intrpt_cmplt[3]   |
+    diag_wr_cmplt3 |
+//    dc3_diagnstc_rd_w2 |
+    ldxa_illgl_va_cmplt_d1[3] |
+    pref_tlbmiss_cmplt_d2[3] |
+    lsu_pcx_pref_issue[3];
+
+dff_s #(4) ldstcmplt_d1 (
+        .din    ({ldst_cmplt_late_3,ldst_cmplt_late_2,ldst_cmplt_late_1,ldst_cmplt_late_0}),
+        .q      ({ldst_cmplt_late_3_d1,ldst_cmplt_late_2_d1,
+		ldst_cmplt_late_1_d1,ldst_cmplt_late_0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//  LD/ST MISS SIGNAL - IFU
+//=========================================================================================
+
+// Switchout of internal asi ld
+// Do not switchout for tag-target,
+assign  ldxa_swo_annul = 
+	(lsu_dctl_asi_state_m[7:4] == 4'h3)   | 	// ldxa to 0x3X does not swo
+	(((lsu_dctl_asi_state_m[7:0] == 8'h58) &   	// tag-target,tag-access,sfsr,sfar
+		~((lsu_ldst_va_b7_b0_m[7:0] == 8'h38) | (lsu_ldst_va_b7_b0_m[7:0] == 8'h80))) | // wtcpt/pid
+	 (lsu_dctl_asi_state_m[7:0] == 8'h50)) |
+	mmu_rd_only_asi_m ;
+
+wire	ldxa_internal_swo_m,ldxa_internal_swo_g ;
+assign	ldxa_internal_swo_m = lda_internal_m & ~ldxa_swo_annul ;
+
+// This represents *all* ld asi.
+wire	asi_internal_ld_m,asi_internal_ld_g ;
+assign	asi_internal_ld_m =
+	asi_internal_m & ld_inst_vld_m & lsu_alt_space_m ;
+
+dff_s #(2) ldaswo_stgg (
+        .din    ({ldxa_internal_swo_m,asi_internal_ld_m}),
+        .q      ({ldxa_internal_swo_g,asi_internal_ld_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+		   
+wire	common_ldst_miss_w ;
+assign	common_ldst_miss_w =
+(~(cache_hit & (tlb_cam_hit_g | lsu_dtlb_bypass_g)) |	// include miss in tlb;bypass
+   ~dcache_enable_g 	| 	// 
+    //endian_mispred_g    |	// endian mispredict
+    ldd_force_l2access_g 		| 	// ifu to incorporate directly
+    ncache_asild_rq_g   ) &	// bypass asi
+ 	~asi_internal_ld_g ;
+
+assign	lsu_ifu_ldst_miss_w =
+  (common_ldst_miss_w  |         // common between ifu and exu.
+    // MMU_ASI : ifu must switch out early only for stores.
+    ldxa_internal_swo_g)
+//  ldxa_internal	|	// ifu incorporates directly
+//  atomic_g 		| 	// ifu incorporates directly
+//  ld_stb_hit_g 	| 	// late 
+//    stb_cam_hit)		// ** rm once ifu uses late signal. ** 
+//  dcache_rd_parity_error | 	// late
+//  dtag_perror_g) & 	|	// late
+    & (lsu_inst_vld_w & ld_inst_vld_unflushed) ;	// flush uptil m accounted for.
+//  & ld_inst_vld_g ;		// assume flush=1 clears ldst_miss=1
+//  ~tte_data_perror_unc & 	// in flush 
+//  (ld_inst_vld_g & (~lsu_alt_space_g | (lsu_alt_space_g & recognized_asi_g))) |
+//  ncache_asild_rq_g ;   // asi ld requires bypass
+
+
+   //timing fix
+   wire lsu_ifu_dc_parity_error_w;
+   assign lsu_ifu_dc_parity_error_w = 
+	( 
+	lsu_dcache_data_perror_g | // bug 4267
+	lsu_dcache_tag_perror_g  |  
+  endian_mispred_g         |	// endian mispredict ; mv'ed from ldst_miss
+	tte_data_perror_unc_en) ;
+   
+/*
+   wire   lsu_ld_inst_vld_flush_w, lsu_ld_inst_vld_flush_w2;
+   assign lsu_ld_inst_vld_flush_w = lsu_inst_vld_w & ld_inst_vld_unflushed & ~dctl_flush_pipe_w ;
+
+   
+dff_s #(1) lsu_ld_inst_vld_flush_stgw2 (
+        .din    (lsu_ld_inst_vld_flush_w),
+        .q      (lsu_ld_inst_vld_flush_w2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+*/
+   
+   wire   lsu_ifu_dc_parity_error_w2_q;
+  
+dff_s #(1) lsu_ifu_dc_parity_error_stgw2 (
+        .din    (lsu_ifu_dc_parity_error_w),
+        .q      (lsu_ifu_dc_parity_error_w2_q),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   assign lsu_ifu_dc_parity_error_w2 = (lsu_ifu_dc_parity_error_w2_q | stb_cam_hit_w2) & ld_inst_vld_w2;
+   
+//=========================================================================================
+//  LD/ST MISS SIGNAL - EXU
+//=========================================================================================
+
+// for a diagnstc access to the cache, the if it misses in the cache, then 
+// ldst_miss is asserted, preventing a write into the cache, but code is
+// allowed to continue executing.
+wire	exu_ldst_miss_g_no_stb_cam_hit ;
+assign  exu_ldst_miss_g_no_stb_cam_hit =  
+  (common_ldst_miss_w 	  |
+   ldxa_internal_swo_g	  |
+   endian_mispred_g    	  |	
+   atomic_g 		  |
+   lsu_dcache_data_perror_g 	|
+   lsu_dcache_tag_perror_g 	|  
+   tte_data_perror_unc_en    	|
+   pref_inst_g) & ld_inst_vld_unflushed & lsu_inst_vld_w ; // flush qual done in exu
+
+
+   wire ld_inst_vld_no_flush_w, ld_inst_vld_no_flush_w2;
+   assign ld_inst_vld_no_flush_w = ld_inst_vld_unflushed & lsu_inst_vld_w;
+   
+dff_s #(1) ld_inst_vld_no_flush_stgw2 (
+        .din    (ld_inst_vld_no_flush_w),
+        .q      (ld_inst_vld_no_flush_w2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+   wire lsu_exu_ldst_miss_w2_tmp;
+ 
+dff_s #(1) exuldstmiss_stgw2 (
+        .din    (exu_ldst_miss_g_no_stb_cam_hit),
+        .q      (lsu_exu_ldst_miss_w2_tmp),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   assign lsu_exu_ldst_miss_w2 =  (lsu_exu_ldst_miss_w2_tmp | stb_cam_hit_w2) & ld_inst_vld_no_flush_w2;
+   
+                                   
+wire	lsu_ldst_miss_w2;
+assign	lsu_ldst_miss_w2 = lsu_exu_ldst_miss_w2 ;
+
+//=========================================================================================
+//  RMO Store control data
+//=========================================================================================
+
+assign	lsu_st_rmo_m = (st_inst_vld_m & (binit_quad_asi_m | blk_asi_m) & lsu_alt_space_m) | blkst_m ;
+assign	lsu_bst_in_pipe_m = (st_inst_vld_m &  blk_asi_m & lsu_alt_space_m) ;
+
+//=========================================================================================
+//  ASI BUS 
+//=========================================================================================
+
+// *** This logic is now used by all long-latency asi operations on chip. ***
+
+// Start with SDATA Reg for Streaming
+wire	strm_asi, strm_asi_m ;
+assign	strm_asi_m = (lsu_dctl_asi_state_m[7:0]==8'h40) ;
+
+dff_s  strm_stgg (
+        .din    (strm_asi_m),
+        .q      (strm_asi),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  stxa_stall_asi_g = 
+  strm_asi & ((ldst_va_g[7:0] == 8'h80)) ;  	// ma ctl
+  /*strm_asi & (	(ldst_va_g[7:0] == 8'h18) |  	// streaming stxa to sdata
+  		(ldst_va_g[7:0] == 8'h00) |  	// stream ctl
+  		(ldst_va_g[7:0] == 8'h08) ) ;  	// ma ctl */
+
+wire    dtlb_wr_cmplt0, dtlb_wr_cmplt1;
+wire    dtlb_wr_cmplt2, dtlb_wr_cmplt3;
+assign  dtlb_wr_cmplt0 = demap_thread0 & lsu_dtlb_wr_vld_e ;
+assign  dtlb_wr_cmplt1 = demap_thread1 & lsu_dtlb_wr_vld_e ;
+assign  dtlb_wr_cmplt2 = demap_thread2 & lsu_dtlb_wr_vld_e ;
+assign  dtlb_wr_cmplt3 = demap_thread3 & lsu_dtlb_wr_vld_e ;
+
+dff_s  dtlbw_stgd1 (
+        .din    (lsu_dtlb_wr_vld_e),
+        .q      (dtlb_wr_init_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  dtlbw_stgd2 (
+        .din    (dtlb_wr_init_d1),
+        .q      (dtlb_wr_init_d2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  dtlbw_stgd3 (
+        .din    (dtlb_wr_init_d2),
+        .q      (dtlb_wr_init_d3),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire    dtlb_wr_init_d4 ;
+dff_s  dtlbw_stgd4 (
+        .din    (dtlb_wr_init_d3),
+        .q      (dtlb_wr_init_d4),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+
+wire	tlb_access_sel_thrd3_d1,tlb_access_sel_thrd2_d1;
+wire	tlb_access_sel_thrd1_d1,tlb_access_sel_thrd0_d1 ;
+wire	ifu_asi_store_cmplt_en, ifu_asi_store_cmplt_en_d1 ;
+assign  stxa_stall_wr_cmplt0 =  (spu_lsu_stxa_ack & spu_stxa_thread0) |
+        (tlu_stxa_thread0_w2 & tlu_lsu_stxa_ack & ~dtlb_wr_init_d4) |
+	(ifu_asi_store_cmplt_en_d1 & tlb_access_sel_thrd0_d1) |
+	dtlb_wr_cmplt0 ;
+assign  stxa_stall_wr_cmplt1 =  (spu_lsu_stxa_ack & spu_stxa_thread1) |
+        (tlu_stxa_thread1_w2 & tlu_lsu_stxa_ack & ~dtlb_wr_init_d4) |
+	(ifu_asi_store_cmplt_en_d1 & tlb_access_sel_thrd1_d1) |
+	dtlb_wr_cmplt1 ;
+assign  stxa_stall_wr_cmplt2 =  (spu_lsu_stxa_ack & spu_stxa_thread2) |
+        (tlu_stxa_thread2_w2 & tlu_lsu_stxa_ack & ~dtlb_wr_init_d4) |
+	(ifu_asi_store_cmplt_en_d1 & tlb_access_sel_thrd2_d1) |
+	dtlb_wr_cmplt2 ;
+assign  stxa_stall_wr_cmplt3 =  (spu_lsu_stxa_ack & spu_stxa_thread3) |
+        (tlu_stxa_thread3_w2 & tlu_lsu_stxa_ack & ~dtlb_wr_init_d4) |
+	(ifu_asi_store_cmplt_en_d1 & tlb_access_sel_thrd3_d1) |
+	dtlb_wr_cmplt3 ;
+
+dff_s  #(4) stxastall_stgd1 (
+        .din    ({stxa_stall_wr_cmplt3,stxa_stall_wr_cmplt2,
+		stxa_stall_wr_cmplt1,stxa_stall_wr_cmplt0}),
+        .q    	({stxa_stall_wr_cmplt3_d1,stxa_stall_wr_cmplt2_d1,
+		stxa_stall_wr_cmplt1_d1,stxa_stall_wr_cmplt0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+// enable speculates on inst not being flushed
+// Only dside diagnostic writes will be logged for long-latency action. dside diagnostic
+// reads are aligned to pipe.
+wire wr_dc_diag_asi_e, wr_dtagv_diag_asi_e ;
+
+assign	wr_dc_diag_asi_e = dc_diagnstc_asi_e & st_inst_vld_e ;
+assign	wr_dtagv_diag_asi_e =  dtagv_diagnstc_asi_e & st_inst_vld_e ;
+
+assign  tlb_access_en0_e = 
+  (tlb_lng_ltncy_asi_e | wr_dc_diag_asi_e | wr_dtagv_diag_asi_e | ifu_nontlb_asi_e)  
+    & thread0_e & alt_space_e ;
+assign  tlb_access_en1_e = 
+  (tlb_lng_ltncy_asi_e | wr_dc_diag_asi_e | wr_dtagv_diag_asi_e | ifu_nontlb_asi_e)  
+    & thread1_e & alt_space_e ;
+assign  tlb_access_en2_e = 
+  (tlb_lng_ltncy_asi_e | wr_dc_diag_asi_e | wr_dtagv_diag_asi_e | ifu_nontlb_asi_e)  
+    & thread2_e & alt_space_e ;
+assign  tlb_access_en3_e = 
+  (tlb_lng_ltncy_asi_e | wr_dc_diag_asi_e | wr_dtagv_diag_asi_e | ifu_nontlb_asi_e)  
+    & thread3_e & alt_space_e ;
+
+dff_s  #(4) tlbac_stgm (
+        .din    ({tlb_access_en0_e,tlb_access_en1_e,tlb_access_en2_e,tlb_access_en3_e}),
+        .q      ({tlb_access_en0_tmp,tlb_access_en1_tmp,tlb_access_en2_tmp,tlb_access_en3_tmp}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	ldst_vld_m = ld_inst_vld_m | st_inst_vld_m ;
+assign	tlb_access_en0_m = tlb_access_en0_tmp & ldst_vld_m ;
+assign	tlb_access_en1_m = tlb_access_en1_tmp & ldst_vld_m ;
+assign	tlb_access_en2_m = tlb_access_en2_tmp & ldst_vld_m ;
+assign	tlb_access_en3_m = tlb_access_en3_tmp & ldst_vld_m ;
+
+dff_s  #(4) tlbac_stgw (
+        .din    ({tlb_access_en0_m,tlb_access_en1_m,tlb_access_en2_m,tlb_access_en3_m}),
+        .q      ({tlb_access_en0_unflushed,tlb_access_en1_unflushed,tlb_access_en2_unflushed,tlb_access_en3_unflushed}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Flush ld/st with as=42 belonging to lsu. bistctl and ldiag
+
+assign  tlb_access_en0_g = tlb_access_en0_unflushed & lsu_inst_vld_w & ~(dctl_early_flush_w | ifu_asi42_flush_g) ;
+//assign  tlb_access_en0_g = tlb_access_en0_unflushed & lsu_inst_vld_w & ~(dctl_flush_pipe_w | ifu_asi42_flush_g) ;
+assign  tlb_access_en1_g = tlb_access_en1_unflushed & lsu_inst_vld_w & ~(dctl_early_flush_w | ifu_asi42_flush_g) ;
+assign  tlb_access_en2_g = tlb_access_en2_unflushed & lsu_inst_vld_w & ~(dctl_early_flush_w | ifu_asi42_flush_g) ;
+assign  tlb_access_en3_g = tlb_access_en3_unflushed & lsu_inst_vld_w & ~(dctl_early_flush_w | ifu_asi42_flush_g) ;
+
+assign	diag_wr_cmplt0 = lsu_diagnstc_wr_src_sel_e & tlb_access_sel_thrd0_d1 ;
+assign	diag_wr_cmplt1 = lsu_diagnstc_wr_src_sel_e & tlb_access_sel_thrd1_d1 ;
+assign	diag_wr_cmplt2 = lsu_diagnstc_wr_src_sel_e & tlb_access_sel_thrd2_d1 ;
+assign	diag_wr_cmplt3 = lsu_diagnstc_wr_src_sel_e & tlb_access_sel_thrd3_d1 ;
+
+wire	ifu_tlb_rd_cmplt0,ifu_tlb_rd_cmplt1,ifu_tlb_rd_cmplt2,ifu_tlb_rd_cmplt3 ;
+wire	st_sqsh_m, ifu_asi_ack_d1 ;
+assign	ifu_tlb_rd_cmplt0 =  (ifu_ldxa_thread0_w2 & ifu_lsu_ldxa_data_vld_w2 & ~ifu_nontlb0_asi) ;
+assign	ifu_tlb_rd_cmplt1 =  (ifu_ldxa_thread1_w2 & ifu_lsu_ldxa_data_vld_w2 & ~ifu_nontlb1_asi) ;
+assign	ifu_tlb_rd_cmplt2 =  (ifu_ldxa_thread2_w2 & ifu_lsu_ldxa_data_vld_w2 & ~ifu_nontlb2_asi) ;
+assign	ifu_tlb_rd_cmplt3 =  (ifu_ldxa_thread3_w2 & ifu_lsu_ldxa_data_vld_w2 & ~ifu_nontlb3_asi) ;
+  
+// stxa ack will share tid with ldxa
+// This should be qualified with inst_vld_w also !!!
+// ldxa_data_vld needs to be removed once full interface in !!!
+assign  tlb_access_rst0 =  reset | 
+  (tlu_ldxa_thread0_w2 & tlu_lsu_ldxa_async_data_vld) | 
+  (tlu_stxa_thread0_w2 & tlu_lsu_stxa_ack) | 
+  (ifu_tlb_rd_cmplt0) | 
+  (ifu_stxa_thread0_w2 & ifu_lsu_asi_ack) |
+  diag_wr_cmplt0 ;
+assign  tlb_access_rst1 =  reset | 
+  (tlu_ldxa_thread1_w2 & tlu_lsu_ldxa_async_data_vld) |
+  (tlu_stxa_thread1_w2 & tlu_lsu_stxa_ack) |
+  (ifu_tlb_rd_cmplt1) | 
+  (ifu_stxa_thread1_w2 & ifu_lsu_asi_ack) |
+  diag_wr_cmplt1 ;
+assign  tlb_access_rst2 =  reset | 
+  (tlu_ldxa_thread2_w2 & tlu_lsu_ldxa_async_data_vld) |
+  (tlu_stxa_thread2_w2 & tlu_lsu_stxa_ack) |
+  (ifu_tlb_rd_cmplt2) | 
+  (ifu_stxa_thread2_w2 & ifu_lsu_asi_ack) |
+  diag_wr_cmplt2 ;
+assign  tlb_access_rst3 =  reset | 
+  (tlu_ldxa_thread3_w2 & tlu_lsu_ldxa_async_data_vld) |
+  (tlu_stxa_thread3_w2 & tlu_lsu_stxa_ack) |
+  (ifu_tlb_rd_cmplt3) | 
+  (ifu_stxa_thread3_w2 & ifu_lsu_asi_ack) |
+  diag_wr_cmplt3 ;
+
+
+// tlb_ld_inst* and tlb_st_inst* are generically used to indicate a read or write. 
+// Thread 0
+   
+dffre_s #(2)  asiv_thrd0 (
+        .din    ({ld_inst_vld_g,st_inst_vld_g}),
+        .q      ({tlb_ld_inst0,tlb_st_inst0}),
+        .rst    (tlb_access_rst0),        .en     (tlb_access_en0_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dffe_s #(3)  asiv_thrd0_sec (
+        .din    ({dc_diagnstc_asi_g,dtagv_diagnstc_asi_g,ifu_nontlb_asi_g}),
+        .q      ({dc0_diagnstc_asi,dtagv0_diagnstc_asi,ifu_nontlb0_asi}),
+        .en     (tlb_access_en0_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  nontlb_asi0 = dc0_diagnstc_asi | dtagv0_diagnstc_asi | ifu_nontlb0_asi ;
+
+// Thread 1
+
+dffre_s #(2)  asiv_thrd1 (
+        .din    ({ld_inst_vld_g,st_inst_vld_g}),
+        .q      ({tlb_ld_inst1,tlb_st_inst1}),
+        .rst    (tlb_access_rst1),        .en     (tlb_access_en1_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dffe_s #(3)  asiv_thrd1_sec (
+        .din    ({dc_diagnstc_asi_g,dtagv_diagnstc_asi_g,ifu_nontlb_asi_g}),
+        .q      ({dc1_diagnstc_asi,dtagv1_diagnstc_asi,ifu_nontlb1_asi}),
+        .en     (tlb_access_en1_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  nontlb_asi1 = dc1_diagnstc_asi | dtagv1_diagnstc_asi | ifu_nontlb1_asi ;
+
+// Thread 2
+
+dffre_s #(2)  asiv_thrd2 (
+        .din    ({ld_inst_vld_g,st_inst_vld_g}),
+        .q      ({tlb_ld_inst2,tlb_st_inst2}),
+        .rst    (tlb_access_rst2),        .en     (tlb_access_en2_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dffe_s #(3)  asiv_thrd2_sec (
+        .din    ({dc_diagnstc_asi_g,dtagv_diagnstc_asi_g,ifu_nontlb_asi_g}),
+        .q      ({dc2_diagnstc_asi,dtagv2_diagnstc_asi,ifu_nontlb2_asi}),
+        .en     (tlb_access_en2_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  nontlb_asi2 = dc2_diagnstc_asi | dtagv2_diagnstc_asi | ifu_nontlb2_asi ;
+
+// Thread 3
+
+dffre_s #(2)  asiv_thrd3 (
+        .din    ({ld_inst_vld_g,st_inst_vld_g}),
+        .q      ({tlb_ld_inst3,tlb_st_inst3}),
+        .rst    (tlb_access_rst3),        .en     (tlb_access_en3_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dffe_s #(3)  asiv_thrd3_sec (
+        .din    ({dc_diagnstc_asi_g,dtagv_diagnstc_asi_g,ifu_nontlb_asi_g}),
+        .q      ({dc3_diagnstc_asi,dtagv3_diagnstc_asi,ifu_nontlb3_asi}),
+        .en     (tlb_access_en3_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  nontlb_asi3 = dc3_diagnstc_asi | dtagv3_diagnstc_asi | ifu_nontlb3_asi ;
+
+//---
+//  Prioritization of threaded events from asi queue.
+//  - It is not expected that a significant bias will exist in selecting
+//  1 of 4 possible events from the asi queue because of the low frequency
+//  of such events. However, to bulletproof we will prioritize the events
+//  in a fifo manner.
+//---
+
+// Control :
+
+wire	[3:0]	fifo_top ;
+wire	asi_fifo0_vld,asi_fifo1_vld,asi_fifo2_vld,asi_fifo3_vld;
+
+assign	fifo_top[0] = ~asi_fifo0_vld ; 
+assign	fifo_top[1] = ~asi_fifo1_vld & asi_fifo0_vld ; 
+assign	fifo_top[2] = ~asi_fifo2_vld & asi_fifo1_vld & asi_fifo0_vld ; 
+assign	fifo_top[3] = ~asi_fifo3_vld & asi_fifo2_vld & asi_fifo1_vld & asi_fifo0_vld ; 
+
+// Check for timing on flush.
+// Do not confuse thread# with fifo entry#.
+wire	fifo_wr, fifo_shift ;
+assign	fifo_wr = 
+tlb_access_en0_g | tlb_access_en1_g | tlb_access_en2_g | tlb_access_en3_g ;
+assign	fifo_shift =
+tlb_access_rst0 | tlb_access_rst1 | tlb_access_rst2 | tlb_access_rst3 ;
+
+wire	[3:0]	fifo_top_wr ;
+assign	fifo_top_wr[0] = fifo_top[0] & fifo_wr ;
+assign	fifo_top_wr[1] = fifo_top[1] & fifo_wr ;
+assign	fifo_top_wr[2] = fifo_top[2] & fifo_wr ;
+assign	fifo_top_wr[3] = fifo_top[3] & fifo_wr ;
+
+// Matrix for Data Selection.
+// shift | wr | din for entry
+// 0	   0	na
+// 0	   1	thrid_g
+// 1	   0	q
+// 1	   1	q if top is not 1 above
+// 1	   1	thrid_g if top is 1 above
+
+// shift writeable entry into correct position, if exists.
+wire	asi_fifo0_sel,asi_fifo1_sel,asi_fifo2_sel ;
+assign	asi_fifo0_sel = fifo_shift ? fifo_top_wr[1] : fifo_top_wr[0] ;
+assign	asi_fifo1_sel = fifo_shift ? fifo_top_wr[2] : fifo_top_wr[1] ;
+assign	asi_fifo2_sel = fifo_shift ? fifo_top_wr[3] : fifo_top_wr[2] ;
+
+wire	[1:0]	asi_fifo3_din,asi_fifo2_din,asi_fifo1_din,asi_fifo0_din ;
+wire	[1:0] 	asi_fifo3_q,asi_fifo2_q,asi_fifo1_q,asi_fifo0_q ;
+assign	asi_fifo0_din[1:0] = asi_fifo0_sel ? thrid_g[1:0] : asi_fifo1_q[1:0] ;
+assign	asi_fifo1_din[1:0] = asi_fifo1_sel ? thrid_g[1:0] : asi_fifo2_q[1:0] ;
+assign	asi_fifo2_din[1:0] = asi_fifo2_sel ? thrid_g[1:0] : asi_fifo3_q[1:0] ;
+assign	asi_fifo3_din[1:0] = thrid_g[1:0] ; // can never shift into.
+
+// Matrix for Enable 
+// shift | wr | Entry Written ?
+// 0	   0	0
+// 0	   1	if top
+// 1	   0	if entry+1 is vld
+// 1	   1	if entry itself is vld => as is.
+
+wire	wr_not_sh,sh_not_wr,wr_and_sh ;
+assign	wr_not_sh =  fifo_wr & ~fifo_shift ; // write not shift
+assign	sh_not_wr = ~fifo_wr &  fifo_shift ; // shift not write
+assign	wr_and_sh =  fifo_wr &  fifo_shift ; // shift and write
+
+wire	asi_fifo0_vin,asi_fifo1_vin,asi_fifo2_vin,asi_fifo3_vin ;
+assign	asi_fifo0_vin =  
+	(wr_not_sh & fifo_top[0]) |
+	(sh_not_wr & asi_fifo1_vld) |
+	(wr_and_sh & asi_fifo0_vld) ;
+assign	asi_fifo1_vin =  
+	(wr_not_sh & fifo_top[1]) |
+	(sh_not_wr & asi_fifo2_vld) |
+	(wr_and_sh & asi_fifo1_vld) ;
+assign	asi_fifo2_vin =  
+	(wr_not_sh & fifo_top[2]) |
+	(sh_not_wr & asi_fifo3_vld) |
+	(wr_and_sh & asi_fifo2_vld) ;
+assign	asi_fifo3_vin =  
+	(wr_not_sh & fifo_top[3]) |
+	(wr_and_sh & asi_fifo3_vld) ;
+
+wire	asi_fifo0_en,asi_fifo1_en,asi_fifo2_en,asi_fifo3_en ;
+assign	asi_fifo0_en = (fifo_wr & fifo_top[0]) | fifo_shift ; 
+assign	asi_fifo1_en = (fifo_wr & fifo_top[1]) | fifo_shift ; 
+assign	asi_fifo2_en = (fifo_wr & fifo_top[2]) | fifo_shift ; 
+assign	asi_fifo3_en = (fifo_wr & fifo_top[3]) | fifo_shift ; 
+
+wire	asi_fifo3_rst,asi_fifo2_rst,asi_fifo1_rst,asi_fifo0_rst ;
+assign	asi_fifo0_rst = reset ;
+assign	asi_fifo1_rst = reset ;
+assign	asi_fifo2_rst = reset ;
+assign	asi_fifo3_rst = reset ;
+
+// Datapath :
+// fifo entry 0 is earliest. fifo entry 3 is latest.
+dffe_s #(2)  asiq_fifo_0 (
+        .din    (asi_fifo0_din[1:0]),
+        .q      (asi_fifo0_q[1:0]),
+        .en     (asi_fifo0_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffre_s   asiqv_fifo_0 (
+        .din    (asi_fifo0_vin),
+        .q      (asi_fifo0_vld),
+        .en     (asi_fifo0_en),	.rst (asi_fifo0_rst),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+wire	asi_sel_thrd3,asi_sel_thrd2,asi_sel_thrd1,asi_sel_thrd0;
+assign	asi_sel_thrd0 = ~asi_fifo0_q[1] & ~asi_fifo0_q[0] & (tlb_ld_inst0 | tlb_st_inst0) ;
+assign	asi_sel_thrd1 = ~asi_fifo0_q[1] &  asi_fifo0_q[0] & (tlb_ld_inst1 | tlb_st_inst1) ;
+assign	asi_sel_thrd2 =  asi_fifo0_q[1] & ~asi_fifo0_q[0] & (tlb_ld_inst2 | tlb_st_inst2) ;
+assign	asi_sel_thrd3 =  asi_fifo0_q[1] &  asi_fifo0_q[0] & (tlb_ld_inst3 | tlb_st_inst3) ;
+
+dffe_s #(2)  asiq_fifo_1 (
+        .din    (asi_fifo1_din[1:0]),
+        .q      (asi_fifo1_q[1:0]),
+        .en     (asi_fifo1_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffre_s  asiqv_fifo_1 (
+        .din    (asi_fifo1_vin),
+        .q      (asi_fifo1_vld),
+        .en     (asi_fifo1_en),	.rst	(asi_fifo1_rst),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffe_s #(2)  asiq_fifo_2 (
+        .din    (asi_fifo2_din[1:0]),
+        .q      (asi_fifo2_q[1:0]),
+        .en     (asi_fifo2_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffre_s   asiqv_fifo_2 (
+        .din    (asi_fifo2_vin),
+        .q      (asi_fifo2_vld),
+        .en     (asi_fifo2_en),	.rst	(asi_fifo2_rst),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffe_s #(2)  asiq_fifo_3 (
+        .din    (asi_fifo3_din[1:0]),
+        .q      (asi_fifo3_q[1:0]),
+        .en     (asi_fifo3_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffre_s  asiqv_fifo_3 (
+        .din    (asi_fifo3_vin),
+        .q      (asi_fifo3_vld),
+        .en     (asi_fifo3_en),	.rst	(asi_fifo3_rst),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+//---
+
+assign  tlb_access_initiated =
+  ((tlb_access_sel_thrd0 & ~tlb_access_rst0) |
+   (tlb_access_sel_thrd1 & ~tlb_access_rst1) |
+   (tlb_access_sel_thrd2 & ~tlb_access_rst2) |
+   (tlb_access_sel_thrd3 & ~tlb_access_rst3)) & ~tlb_access_pending ;
+   
+
+wire  tlb_blocking_rst ;
+assign  tlb_blocking_rst = reset |
+  tlu_lsu_stxa_ack | tlu_lsu_ldxa_async_data_vld |
+  ifu_tlb_rd_cmplt0 | ifu_tlb_rd_cmplt1 | 
+  ifu_tlb_rd_cmplt2 | ifu_tlb_rd_cmplt3 | 
+  ifu_lsu_asi_ack |
+  lsu_diagnstc_wr_src_sel_e;
+
+
+// MMU/IFU/DIAG Action is pending
+dffre_s #(1)  tlbpnd (
+        .din    (tlb_access_initiated),
+        .q      (tlb_access_pending),
+        .rst    (tlb_blocking_rst),        .en     (tlb_access_initiated),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+/*wire	asi_pend0,asi_pend1,asi_pend2,asi_pend3 ;
+dffre_s #(4)  asithrdpnd (
+      	.din	({tlb_access_sel_thrd3,tlb_access_sel_thrd2,
+		            tlb_access_sel_thrd1,tlb_access_sel_thrd0}),
+        .q    ({asi_pend3,asi_pend2,asi_pend1,asi_pend0}),
+        .rst	(tlb_blocking_rst), 	.en     (tlb_access_initiated),
+        .clk  (clk),
+        .se   (se),       .si (),          .so ()
+        );
+
+wire	asi_pend_non_thrd0 ;
+assign	asi_pend_non_thrd0 = asi_pend1 | asi_pend2 | asi_pend3 ;
+wire	asi_pend_non_thrd1 ;
+assign	asi_pend_non_thrd1 = asi_pend0 | asi_pend2 | asi_pend3 ;
+wire	asi_pend_non_thrd2 ;
+assign	asi_pend_non_thrd2 = asi_pend0 | asi_pend1 | asi_pend3 ;
+wire	asi_pend_non_thrd3 ;
+assign	asi_pend_non_thrd3 = asi_pend0 | asi_pend1 | asi_pend2 ; */
+
+// Would like to remove st_inst_vld_m. This is however required to
+// source rs3 data to tlu/mmu. Send rs3_data directly !!!
+
+wire	diag_wr_src, diag_wr_src_d1, diag_wr_src_d2 ;
+   
+assign  tlb_access_blocked = 
+  (tlb_access_pending & ~ifu_asi_vld_d1 & ~diag_wr_src_d1) |
+  (st_sqsh_m & ~(ifu_asi_vld_d1 & ~ifu_asi_ack_d1) & ~diag_wr_src_d1) ; // Bug 4875
+  //(st_inst_vld_m & ~lsu_ifu_asi_vld_d1 & ~diag_wr_src_d1) ;
+
+// fixed priority. tlb accesses are issued speculatively in the m-stage and are
+// Change priority to round-robin !!!
+// flushed in the g-stage in the tlu if necessary.
+// diagnstc writes will block for cache/tag access.
+// This means that access can be blocked if a st is 
+// in the m-stage or a memref in the d stage. (!!!)
+// In this case, it is better to stage a different
+// bus for rs3 data.
+
+// Note : Selection Process.
+// 1. Priority Encoded selection if no access pending.
+// This may have to be changed to prevent bias towards a
+// single thread.
+// 2. Once thread is selected :
+//	a. generate single pulse - mmu. tlb_access_blocked
+//	used for this purpose.
+//	b. generate window - ifu/diag. To prevent spurious change
+// 	in selects, asi_pend_non_thrdx and tlb_access_pending
+//	qual. is required.
+
+
+assign  tlb_access_sel_thrd0 = ~rst_tri_en &  
+  asi_sel_thrd0 & ~tlb_access_blocked ;
+assign  tlb_access_sel_thrd1 = ~rst_tri_en & 
+  asi_sel_thrd1 & ~tlb_access_blocked ;
+assign  tlb_access_sel_thrd2 = ~rst_tri_en &  
+  asi_sel_thrd2 & ~tlb_access_blocked ;
+assign  tlb_access_sel_thrd3 = ~rst_tri_en &  
+  asi_sel_thrd3 & ~tlb_access_blocked ;
+
+//assign  tlb_access_sel_thrd0 = ~rst_tri_en & ( 
+//  (tlb_ld_inst0 | tlb_st_inst0) & ~tlb_access_blocked & 
+//  ~asi_pend_non_thrd0 );
+//assign  tlb_access_sel_thrd1 = ~rst_tri_en & (
+//  (tlb_ld_inst1 | tlb_st_inst1) & 
+//  ~(((tlb_ld_inst0 | tlb_st_inst0) & ~tlb_access_pending) | tlb_access_blocked) & 
+//  ~asi_pend_non_thrd1 );
+//assign  tlb_access_sel_thrd2 = ~rst_tri_en & ( 
+//  (tlb_ld_inst2 | tlb_st_inst2) & 
+//  ~(((tlb_ld_inst0 | tlb_st_inst0 | tlb_ld_inst1 | tlb_st_inst1) & ~tlb_access_pending) 
+//		| tlb_access_blocked) &
+//  ~asi_pend_non_thrd2 );
+//assign  tlb_access_sel_thrd3 = ~rst_tri_en & ( 
+//  (tlb_ld_inst3 | tlb_st_inst3) & 
+//  ~(((tlb_ld_inst0 | tlb_st_inst0 | tlb_ld_inst1 | tlb_st_inst1 | 
+//    tlb_ld_inst2 | tlb_st_inst2) & ~tlb_access_pending) | tlb_access_blocked) &
+//  ~asi_pend_non_thrd3 );
+        
+dff_s  #(4) selt_stgd1 (
+        .din    ({tlb_access_sel_thrd3,tlb_access_sel_thrd2,
+		tlb_access_sel_thrd1,tlb_access_sel_thrd0}),
+        .q     ({tlb_access_sel_thrd3_d1,tlb_access_sel_thrd2_d1,
+		tlb_access_sel_thrd1_d1,tlb_access_sel_thrd0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   wire tlb_access_sel_default;
+assign  tlb_access_sel_default = rst_tri_en | ( 
+        ~(tlb_access_sel_thrd2 | tlb_access_sel_thrd1 | tlb_access_sel_thrd0));
+   
+dff_s  #(4) lsu_diagnstc_data_sel_ff (
+        .din    ({tlb_access_sel_default,tlb_access_sel_thrd2,
+		tlb_access_sel_thrd1,tlb_access_sel_thrd0}),
+        .q     ({lsu_diagnstc_data_sel[3:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  #(4) lsu_diagnstc_va_sel_ff (
+        .din    ({tlb_access_sel_default,tlb_access_sel_thrd2,
+		tlb_access_sel_thrd1,tlb_access_sel_thrd0}),
+        .q     ({lsu_diagnstc_va_sel[3:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+   
+// Begin - Bug 3487
+assign	st_sqsh_m = 
+	(st_inst_vld_m & asi_internal_m & lsu_alt_space_m) ; // Squash as bus required for stxa.
+assign  tlb_st_data_sel_m[0] = (tlb_access_sel_thrd0 & ~st_sqsh_m) | (st_sqsh_m & thread0_m) ;
+assign  tlb_st_data_sel_m[1] = (tlb_access_sel_thrd1 & ~st_sqsh_m) | (st_sqsh_m & thread1_m) ;
+assign  tlb_st_data_sel_m[2] = (tlb_access_sel_thrd2 & ~st_sqsh_m) | (st_sqsh_m & thread2_m) ;
+assign  tlb_st_data_sel_m[3] = ~|tlb_st_data_sel_m[2:0];
+
+assign	lsu_ifu_asi_data_en_l = ~(ifu_asi_vld & tlb_access_initiated) ;
+
+// End - Bug 3487
+
+/*assign  tlb_st_data_sel_m[0] = tlb_access_sel_thrd0 | ((st_inst_vld_m & thread0_m) & tlb_access_blocked) ;
+assign  tlb_st_data_sel_m[1] = tlb_access_sel_thrd1 | ((st_inst_vld_m & thread1_m) & tlb_access_blocked) ;
+assign  tlb_st_data_sel_m[2] = tlb_access_sel_thrd2 | ((st_inst_vld_m & thread2_m) & tlb_access_blocked) ;
+assign  tlb_st_data_sel_m[3] = ~|tlb_st_data_sel_m[2:0];*/
+
+//assign	lsu_tlb_st_sel_m[3:0] = tlb_st_data_sel_m[3:0] ;
+assign	lsu_tlb_st_sel_m[0] = tlb_st_data_sel_m[0] & ~rst_tri_en;
+assign	lsu_tlb_st_sel_m[1] = tlb_st_data_sel_m[1] & ~rst_tri_en;
+assign	lsu_tlb_st_sel_m[2] = tlb_st_data_sel_m[2] & ~rst_tri_en;
+assign	lsu_tlb_st_sel_m[3] = tlb_st_data_sel_m[3] |  rst_tri_en;
+
+assign  lsu_tlu_tlb_ld_inst_m =
+  (tlb_access_sel_thrd0 & tlb_ld_inst0 & ~nontlb_asi0) |
+  (tlb_access_sel_thrd1 & tlb_ld_inst1 & ~nontlb_asi1) |
+  (tlb_access_sel_thrd2 & tlb_ld_inst2 & ~nontlb_asi2) |
+  (tlb_access_sel_thrd3 & tlb_ld_inst3 & ~nontlb_asi3) ;
+
+// diagnstic write for dside will not go thru tlu.
+assign  lsu_tlu_tlb_st_inst_m =
+  (tlb_access_sel_thrd0 & tlb_st_inst0 & ~nontlb_asi0) |
+  (tlb_access_sel_thrd1 & tlb_st_inst1 & ~nontlb_asi1) |
+  (tlb_access_sel_thrd2 & tlb_st_inst2 & ~nontlb_asi2) |
+  (tlb_access_sel_thrd3 & tlb_st_inst3 & ~nontlb_asi3) ;
+
+assign  lsu_tlu_tlb_access_tid_m[0] = tlb_access_sel_thrd1 | tlb_access_sel_thrd3 ;
+assign  lsu_tlu_tlb_access_tid_m[1] = tlb_access_sel_thrd2 | tlb_access_sel_thrd3 ;
+
+// Diagnostic write to dcache
+assign  dc0_diagnstc_wr_en = (tlb_access_sel_thrd0 & tlb_st_inst0 & dc0_diagnstc_asi) ;
+assign  dc1_diagnstc_wr_en = (tlb_access_sel_thrd1 & tlb_st_inst1 & dc1_diagnstc_asi) ;
+assign  dc2_diagnstc_wr_en = (tlb_access_sel_thrd2 & tlb_st_inst2 & dc2_diagnstc_asi) ;
+assign  dc3_diagnstc_wr_en = (tlb_access_sel_thrd3 & tlb_st_inst3 & dc3_diagnstc_asi) ;
+assign  dc_diagnstc_wr_en = 
+  dc0_diagnstc_wr_en | dc1_diagnstc_wr_en | dc2_diagnstc_wr_en | dc3_diagnstc_wr_en ;
+
+// Diagnostic write to dtag/vld
+assign  dtagv0_diagnstc_wr_en = (tlb_access_sel_thrd0 & tlb_st_inst0 & dtagv0_diagnstc_asi) ;
+assign  dtagv1_diagnstc_wr_en = (tlb_access_sel_thrd1 & tlb_st_inst1 & dtagv1_diagnstc_asi) ;
+assign  dtagv2_diagnstc_wr_en = (tlb_access_sel_thrd2 & tlb_st_inst2 & dtagv2_diagnstc_asi) ;
+assign  dtagv3_diagnstc_wr_en = (tlb_access_sel_thrd3 & tlb_st_inst3 & dtagv3_diagnstc_asi) ;
+assign  dtagv_diagnstc_wr_en = 
+  dtagv0_diagnstc_wr_en | dtagv1_diagnstc_wr_en | dtagv2_diagnstc_wr_en | dtagv3_diagnstc_wr_en ;
+
+// If a diagnostic access is selected in a cycle, then the earliest the
+// e-stage can occur for the write is 2-cycles later.
+
+assign  diag_wr_src = dtagv_diagnstc_wr_en | dc_diagnstc_wr_en ;
+
+   wire diag_wr_src_with_rst;
+   assign diag_wr_src_with_rst = diag_wr_src & ~lsu_diagnstc_wr_src_sel_e;
+   
+dff_s  #(1) diagwr_d1 (
+        .din    (diag_wr_src_with_rst),
+        .q      (diag_wr_src_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+   wire diag_wr_src_d1_with_rst;
+   assign diag_wr_src_d1_with_rst = diag_wr_src_d1 & ~lsu_diagnstc_wr_src_sel_e;
+     
+dff_s  #(1) diagwr_d2 (
+        .din    (diag_wr_src_d1_with_rst),
+        .q      (diag_wr_src_d2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+// If there is no memory reference, then the diag access is free to go.
+// tlb_access_blocked must be set appr. 
+wire diag_wr_src_sel_d1, diag_wr_src_sel_din;
+
+//bug4057: kill diagnostic write if dfq has valid requests to l1d$
+//assign diag_wr_src_sel_din = diag_wr_src_d2 & ~memref_e;
+assign diag_wr_src_sel_din = diag_wr_src_d2 & ~(memref_e | lsu_dfq_vld);
+   
+assign  lsu_diagnstc_wr_src_sel_e =  ~diag_wr_src_sel_d1 & diag_wr_src_sel_din ;
+
+dff_s  #(1) diagwrsel_d1 (
+        .din    (diag_wr_src_sel_din),
+        .q      (diag_wr_src_sel_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Decode for diagnostic cache/dtag/vld write 
+   //wire [13:11] lngltncy_ldst_va;
+   
+   //assign lngltncy_ldst_va[13:11]= lsu_lngltncy_ldst_va[13:11];
+
+//assign  lsu_diagnstc_wr_way_e[0] = ~lngltncy_ldst_va[12] & ~lngltncy_ldst_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[1] = ~lngltncy_ldst_va[12] &  lngltncy_ldst_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[2] =  lngltncy_ldst_va[12] & ~lngltncy_ldst_va[11] ;
+//assign  lsu_diagnstc_wr_way_e[3] =  lngltncy_ldst_va[12] &  lngltncy_ldst_va[11] ;
+
+assign  lsu_diagnstc_dtagv_prty_invrt_e = 
+	lsu_diag_va_prty_invrt & dtagv_diagnstc_wr_en & lsu_diagnstc_wr_src_sel_e ;   
+
+// ASI Interface to IFU
+
+assign  lsu_ifu_asi_load =
+  (tlb_access_sel_thrd0 & tlb_ld_inst0 & ifu_nontlb0_asi) |
+  (tlb_access_sel_thrd1 & tlb_ld_inst1 & ifu_nontlb1_asi) |
+  (tlb_access_sel_thrd2 & tlb_ld_inst2 & ifu_nontlb2_asi) |
+  (tlb_access_sel_thrd3 & tlb_ld_inst3 & ifu_nontlb3_asi) ;
+
+assign  ifu_asi_store =
+  (tlb_access_sel_thrd0 & tlb_st_inst0 & ifu_nontlb0_asi) |
+  (tlb_access_sel_thrd1 & tlb_st_inst1 & ifu_nontlb1_asi) |
+  (tlb_access_sel_thrd2 & tlb_st_inst2 & ifu_nontlb2_asi) |
+  (tlb_access_sel_thrd3 & tlb_st_inst3 & ifu_nontlb3_asi) ;
+
+assign  ifu_asi_vld = lsu_ifu_asi_load | ifu_asi_store ;
+
+dff_s  #(2) iasiv_d1 (
+        .din    ({ifu_asi_vld,ifu_lsu_asi_ack}),
+        .q      ({ifu_asi_vld_d1,ifu_asi_ack_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+// Bug 3932 - delay asi_vld for ifu.
+assign	lsu_ifu_asi_vld = ifu_asi_vld_d1 & ~ifu_asi_ack_d1 ;
+
+assign	ifu_asi_store_cmplt_en = ifu_asi_store & ifu_lsu_asi_ack ;
+dff_s  #(1) iasist_d1 (
+        .din    (ifu_asi_store_cmplt_en),
+        .q      (ifu_asi_store_cmplt_en_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign  lsu_ifu_asi_thrid[1:0] = lsu_tlu_tlb_access_tid_m[1:0] ;
+
+
+//=========================================================================================
+//  MEMBAR/FLUSH HANDLING
+//=========================================================================================
+
+// Check for skids in this area - verification.
+
+wire [3:0] no_spc_rmo_st ;
+
+// Can membar/flush cause switch out from front end ??? Need to remove from
+// ldst_miss if case.
+// membar/flush will both swo thread and assert flush.
+// membar will signal completion once stb for thread empty
+// flush  will signal completion once flush pkt is visible at head of cfq and
+// i-side invalidates are complete
+// ** flush bit needs to be added to dfq **
+
+dff_s  #(2) bsync_stgm (
+        .din    ({ifu_tlu_mb_inst_e,ifu_tlu_flsh_inst_e}),
+        .q      ({mbar_inst_m,flsh_inst_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_flsh_inst_m = flsh_inst_m ;
+
+wire  mbar_inst_unflushed,flsh_inst_unflushed ;
+
+dff_s  #(2) bsync_stgg (
+        .din    ({mbar_inst_m,flsh_inst_m}),
+        .q      ({mbar_inst_unflushed,flsh_inst_unflushed}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+wire	[3:0]	flsh_cmplt_d1 ;
+/*dff  #(4) flshcmplt (
+        .din    (lsu_dfq_flsh_cmplt[3:0]),
+        .q      (flsh_cmplt_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );*/
+
+// now flopped in dctl
+assign	flsh_cmplt_d1[3:0] = lsu_dfq_flsh_cmplt[3:0] ;
+
+assign  mbar_inst_g = mbar_inst_unflushed & lsu_inst_vld_w ;
+assign  flsh_inst_g = flsh_inst_unflushed & lsu_inst_vld_w ;
+
+// THREAD0 MEMBAR/FLUSH
+
+// barrier sync
+assign bsync0_reset = 
+        reset  | (mbar_vld0 & lsu_stb_empty[0] & no_spc_rmo_st[0]) 
+               | (flsh_vld0 & flsh_cmplt_d1[0]) ;
+
+assign  bsync0_en = (flush_inst0_g | mbar_inst0_g) & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+assign  flush_inst0_g = flsh_inst_g & thread0_g ; 
+assign  mbar_inst0_g  = mbar_inst_g & thread0_g ; 
+
+// bsyncs are set in g-stage to allow earlier stores in pipe to drain to 
+// thread's stb
+dffre_s #(2)  bsync_vld0 (
+        .din    ({mbar_inst0_g,flush_inst0_g}),
+        .q      ({mbar_vld0,flsh_vld0}),
+        .rst    (bsync0_reset),        .en     (bsync0_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// THREAD1 MEMBAR/FLUSH
+
+// barrier sync
+assign bsync1_reset = 
+        reset  | (mbar_vld1 & lsu_stb_empty[1] & no_spc_rmo_st[1])  
+               | (flsh_vld1 & flsh_cmplt_d1[1]) ;
+
+assign  bsync1_en = (flush_inst1_g | mbar_inst1_g) & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+assign  flush_inst1_g = flsh_inst_g & thread1_g ; 
+assign  mbar_inst1_g  = mbar_inst_g & thread1_g ; 
+
+// bsyncs are set in g-stage to allow earlier stores in pipe to drain to 
+// thread's stb
+dffre_s #(2)  bsync_vld1 (
+        .din    ({mbar_inst1_g,flush_inst1_g}),
+        .q      ({mbar_vld1,flsh_vld1}),
+        .rst    (bsync1_reset),        .en     (bsync1_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// THREAD2 MEMBAR/FLUSH
+
+// barrier sync
+assign bsync2_reset = 
+        reset  | (mbar_vld2 & lsu_stb_empty[2] & no_spc_rmo_st[2]) 
+               | (flsh_vld2 & flsh_cmplt_d1[2]) ;
+
+assign  bsync2_en = (flush_inst2_g | mbar_inst2_g) & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+assign  flush_inst2_g = flsh_inst_g & thread2_g ; 
+assign  mbar_inst2_g  = mbar_inst_g & thread2_g ; 
+
+// bsyncs are set in g-stage to allow earlier stores in pipe to drain to 
+// thread's stb
+dffre_s #(2)  bsync_vld2 (
+        .din    ({mbar_inst2_g,flush_inst2_g}),
+        .q      ({mbar_vld2,flsh_vld2}),
+        .rst    (bsync2_reset),        .en     (bsync2_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// THREAD3 MEMBAR/FLUSH
+
+// barrier sync
+assign bsync3_reset = 
+        reset  | (mbar_vld3 & lsu_stb_empty[3] & no_spc_rmo_st[3]) 
+               | (flsh_vld3 & flsh_cmplt_d1[3]) ;
+
+assign  bsync3_en = (flush_inst3_g | mbar_inst3_g) & lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+assign  flush_inst3_g = flsh_inst_g & thread3_g ; 
+assign  mbar_inst3_g  = mbar_inst_g & thread3_g ; 
+
+// bsyncs are set in g-stage to allow earlier stores in pipe to drain to 
+// thread's stb
+dffre_s #(2)  bsync_vld3 (
+        .din    ({mbar_inst3_g,flush_inst3_g}),
+        .q      ({mbar_vld3,flsh_vld3}),
+        .rst    (bsync3_reset),        .en     (bsync3_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//  RMO Store Ack Count
+//=========================================================================================
+
+// Each thread maintains an 8b outstanding rmo ack count. To avoid overflow,
+// it is the responsiblity of software to insert a membar after at most 256 rmo stores.
+// 03/08/2003 now change from 256 to 16
+// 8 outstanding instead of 16   
+
+wire	[3:0]	ackcnt0,ackcnt1,ackcnt2,ackcnt3 ;
+wire	[3:0]	ackcnt0_din,ackcnt1_din,ackcnt2_din,ackcnt3_din ;
+
+// st_rmo_issue/st_rmo_ack vectors are one hot.
+// Adders(2). Need two as two separate threads can be incremented and decremented
+// in a cycle.
+wire 	[3:0]	ackcnt_incr, ackcnt_decr ;
+wire 	[3:0]	ackcnt_mx_incr, ackcnt_mx_decr ;
+
+   wire [3:0] acknt_mx_incr_sel;
+   assign     acknt_mx_incr_sel[3:0] = lsu_stb_rmo_st_issue[3:0];
+
+assign ackcnt_mx_incr[3:0] =
+  (acknt_mx_incr_sel[0] ? ackcnt0[3:0] :  4'b0) |
+  (acknt_mx_incr_sel[1] ? ackcnt1[3:0] :  4'b0) |
+  (acknt_mx_incr_sel[2] ? ackcnt2[3:0] :  4'b0) |
+  (acknt_mx_incr_sel[3] ? ackcnt3[3:0] :  4'b0) ;
+   
+
+   wire [3:0] acknt_mx_decr_sel;
+   assign     acknt_mx_decr_sel[3:0] = lsu_cpx_rmo_st_ack[3:0];
+
+assign ackcnt_mx_decr[3:0] =
+  (acknt_mx_decr_sel[0] ? ackcnt0[3:0] : 4'b0 ) |
+  (acknt_mx_decr_sel[1] ? ackcnt1[3:0] : 4'b0 ) |
+  (acknt_mx_decr_sel[2] ? ackcnt2[3:0] : 4'b0 ) |
+  (acknt_mx_decr_sel[3] ? ackcnt3[3:0] : 4'b0 ) ;
+   
+    
+assign	ackcnt_incr[3:0] = ackcnt_mx_incr[3:0] + 4'b0001 ;
+assign	ackcnt_decr[3:0] = ackcnt_mx_decr[3:0] - 4'b0001 ;
+
+assign	ackcnt0_din[3:0] = lsu_cpx_rmo_st_ack[0] ? ackcnt_decr[3:0] : ackcnt_incr[3:0] ;
+assign	ackcnt1_din[3:0] = lsu_cpx_rmo_st_ack[1] ? ackcnt_decr[3:0] : ackcnt_incr[3:0] ;
+assign	ackcnt2_din[3:0] = lsu_cpx_rmo_st_ack[2] ? ackcnt_decr[3:0] : ackcnt_incr[3:0] ;
+assign	ackcnt3_din[3:0] = lsu_cpx_rmo_st_ack[3] ? ackcnt_decr[3:0] : ackcnt_incr[3:0] ;
+
+wire	[3:0]	ackcnt_en ;
+// if both occur in the same cycle then they cancel out.
+assign	ackcnt_en[0] = lsu_stb_rmo_st_issue[0] ^ lsu_cpx_rmo_st_ack[0] ;
+assign	ackcnt_en[1] = lsu_stb_rmo_st_issue[1] ^ lsu_cpx_rmo_st_ack[1] ;
+assign	ackcnt_en[2] = lsu_stb_rmo_st_issue[2] ^ lsu_cpx_rmo_st_ack[2] ;
+assign	ackcnt_en[3] = lsu_stb_rmo_st_issue[3] ^ lsu_cpx_rmo_st_ack[3] ;
+
+// Thread0
+dffre_s #(4)  ackcnt0_ff (
+        .din    (ackcnt0_din[3:0]),
+        .q      (ackcnt0[3:0]),
+        .rst    (reset),        .en     (ackcnt_en[0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread1
+dffre_s #(4)  ackcnt1_ff (
+        .din    (ackcnt1_din[3:0]),
+        .q      (ackcnt1[3:0]),
+        .rst    (reset),        .en     (ackcnt_en[1]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread2
+dffre_s #(4)  ackcnt2_ff (
+        .din    (ackcnt2_din[3:0]),
+        .q      (ackcnt2[3:0]),
+        .rst    (reset),        .en     (ackcnt_en[2]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread3
+dffre_s #(4)  ackcnt3_ff (
+        .din    (ackcnt3_din[3:0]),
+        .q      (ackcnt3[3:0]),
+        .rst    (reset),        .en     (ackcnt_en[3]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+assign	no_spc_rmo_st[0] = ~(|ackcnt0[3:0]) ;
+assign	no_spc_rmo_st[1] = ~(|ackcnt1[3:0]) ;
+assign	no_spc_rmo_st[2] = ~(|ackcnt2[3:0]) ;
+assign	no_spc_rmo_st[3] = ~(|ackcnt3[3:0]) ;
+
+//8 outstanding rmo st will throttle the PCX issue st   
+assign lsu_outstanding_rmo_st_max [0] = ackcnt0[3];
+assign lsu_outstanding_rmo_st_max [1] = ackcnt1[3];
+assign lsu_outstanding_rmo_st_max [2] = ackcnt2[3];
+assign lsu_outstanding_rmo_st_max [3] = ackcnt3[3];
+  
+// streaming unit does not have to care about outstanding rmo sparc-stores.
+// membar will take care of that. spu must insert appr. delay in sampling signal.
+
+/*dff #(4)  spustb_d1 ( // moved to stb_rwctl
+        .din    (lsu_stb_empty[3:0]),
+        .q      (lsu_spu_stb_empty[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); */              
+
+//assign		lsu_spu_stb_empty[3:0] = lsu_stb_empty[3:0] ;
+
+//=========================================================================================
+//  Thread Staging
+//=========================================================================================
+
+// Thread staging can be optimized. 
+
+dff_s  #(2) thrid_stgd (
+        .din    (ifu_lsu_thrid_s[1:0]),
+        .q      (thrid_d[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s  #(2) lsu_tlu_thrid_stgd (
+        .din    (ifu_lsu_thrid_s[1:0]),
+        .q      (lsu_tlu_thrid_d[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+//assign	lsu_tlu_thrid_d[1:0] = thrid_d[1:0] ;
+
+assign  thread0_d = ~thrid_d[1] & ~thrid_d[0] ;
+assign  thread1_d = ~thrid_d[1] &  thrid_d[0] ;
+assign  thread2_d =  thrid_d[1] & ~thrid_d[0] ;
+assign  thread3_d =  thrid_d[1] &  thrid_d[0] ;
+
+dff_s  #(2) thrid_stge (
+        .din    (thrid_d[1:0]),
+        .q      (thrid_e[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  thread0_e = ~thrid_e[1] & ~thrid_e[0] ;
+assign  thread1_e = ~thrid_e[1] &  thrid_e[0] ;
+assign  thread2_e =  thrid_e[1] & ~thrid_e[0] ;
+assign  thread3_e =  thrid_e[1] &  thrid_e[0] ;
+
+dff_s  #(2) thrid_stgm (
+        .din    (thrid_e[1:0]),
+        .q      (thrid_m[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  thread0_m = ~thrid_m[1] & ~thrid_m[0] ;
+assign  thread1_m = ~thrid_m[1] &  thrid_m[0] ;
+assign  thread2_m =  thrid_m[1] & ~thrid_m[0] ;
+assign  thread3_m =  thrid_m[1] &  thrid_m[0] ;
+   
+bw_u1_buf_30x UZfix_thread0_m  ( .a(thread0_m),  .z(lsu_dctldp_thread0_m)  );
+bw_u1_buf_30x UZfix_thread1_m  ( .a(thread1_m),  .z(lsu_dctldp_thread1_m)  );
+bw_u1_buf_30x UZfix_thread2_m  ( .a(thread2_m),  .z(lsu_dctldp_thread2_m)  );
+bw_u1_buf_30x UZfix_thread3_m  ( .a(thread3_m),  .z(lsu_dctldp_thread3_m)  );
+   
+dff_s  #(2) thrid_stgg (
+        .din    (thrid_m[1:0]),
+        .q      (thrid_g[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  thread0_g = ~thrid_g[1] & ~thrid_g[0] ;
+assign  thread1_g = ~thrid_g[1] &  thrid_g[0] ;
+assign  thread2_g =  thrid_g[1] & ~thrid_g[0] ;
+assign  thread3_g =  thrid_g[1] &  thrid_g[0] ;
+
+dff_s  #(2) thrid_stgw2 (
+        .din    (thrid_g[1:0]),
+        .q      (thrid_w2[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  thread0_w2 = ~thrid_w2[1] & ~thrid_w2[0] ;
+assign  thread1_w2 = ~thrid_w2[1] &  thrid_w2[0] ;
+assign  thread2_w2 =  thrid_w2[1] & ~thrid_w2[0] ;
+assign  thread3_w2 =  thrid_w2[1] &  thrid_w2[0] ;
+
+dff_s  #(2) thrid_stgw3 (
+        .din    (thrid_w2[1:0]),
+        .q      (thrid_w3[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  thread0_w3 = ~thrid_w3[1] & ~thrid_w3[0] ;
+assign  thread1_w3 = ~thrid_w3[1] &  thrid_w3[0] ;
+assign  thread2_w3 =  thrid_w3[1] & ~thrid_w3[0] ;
+assign  thread3_w3 =  thrid_w3[1] &  thrid_w3[0] ;
+   
+//dff  #(4) thrid_stgw3 (
+//        .din    ({thread0_w2,thread1_w2,thread2_w2,thread3_w2}),
+//        .q      ({thread0_w3,thread1_w3,thread2_w3,thread3_w3}),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+
+// ldxa thread id
+
+assign  ldxa_thrid_w2[1:0] = tlu_lsu_ldxa_tid_w2[1:0] ;  
+
+assign  tlu_ldxa_thread0_w2 = ~ldxa_thrid_w2[1] & ~ldxa_thrid_w2[0] ;
+assign  tlu_ldxa_thread1_w2 = ~ldxa_thrid_w2[1] &  ldxa_thrid_w2[0] ;
+assign  tlu_ldxa_thread2_w2 =  ldxa_thrid_w2[1] & ~ldxa_thrid_w2[0] ;
+assign  tlu_ldxa_thread3_w2 =  ldxa_thrid_w2[1] &  ldxa_thrid_w2[0] ;
+
+assign  spu_stxa_thread0 = ~spu_lsu_stxa_ack_tid[1] & ~spu_lsu_stxa_ack_tid[0] ;
+assign  spu_stxa_thread1 = ~spu_lsu_stxa_ack_tid[1] &  spu_lsu_stxa_ack_tid[0] ;
+assign  spu_stxa_thread2 =  spu_lsu_stxa_ack_tid[1] & ~spu_lsu_stxa_ack_tid[0] ;
+assign  spu_stxa_thread3 =  spu_lsu_stxa_ack_tid[1] &  spu_lsu_stxa_ack_tid[0] ;
+
+assign  spu_ldxa_thread0_w2 = ~spu_lsu_ldxa_tid_w2[1] & ~spu_lsu_ldxa_tid_w2[0] ;
+assign  spu_ldxa_thread1_w2 = ~spu_lsu_ldxa_tid_w2[1] &  spu_lsu_ldxa_tid_w2[0] ;
+assign  spu_ldxa_thread2_w2 =  spu_lsu_ldxa_tid_w2[1] & ~spu_lsu_ldxa_tid_w2[0] ;
+assign  spu_ldxa_thread3_w2 =  spu_lsu_ldxa_tid_w2[1] &  spu_lsu_ldxa_tid_w2[0] ;
+
+assign  ifu_ldxa_thread0_w2 = ~ifu_lsu_ldxa_tid_w2[1] & ~ifu_lsu_ldxa_tid_w2[0] ;
+assign  ifu_ldxa_thread1_w2 = ~ifu_lsu_ldxa_tid_w2[1] &  ifu_lsu_ldxa_tid_w2[0] ;
+assign  ifu_ldxa_thread2_w2 =  ifu_lsu_ldxa_tid_w2[1] & ~ifu_lsu_ldxa_tid_w2[0] ;
+assign  ifu_ldxa_thread3_w2 =  ifu_lsu_ldxa_tid_w2[1] &  ifu_lsu_ldxa_tid_w2[0] ;
+
+wire	[1:0]	ifu_nontlb_asi_tid ;
+dff_s  #(2) iasi_tid (
+        .din    (lsu_ifu_asi_thrid[1:0]),
+        .q      (ifu_nontlb_asi_tid[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  ifu_stxa_thread0_w2 = ~ifu_nontlb_asi_tid[1] & ~ifu_nontlb_asi_tid[0] ;
+assign  ifu_stxa_thread1_w2 = ~ifu_nontlb_asi_tid[1] &  ifu_nontlb_asi_tid[0] ;
+assign  ifu_stxa_thread2_w2 =  ifu_nontlb_asi_tid[1] & ~ifu_nontlb_asi_tid[0] ;
+assign  ifu_stxa_thread3_w2 =  ifu_nontlb_asi_tid[1] &  ifu_nontlb_asi_tid[0] ;
+
+assign  tlu_stxa_thread0_w2 = ~tlu_lsu_stxa_ack_tid[1] & ~tlu_lsu_stxa_ack_tid[0] ;
+assign  tlu_stxa_thread1_w2 = ~tlu_lsu_stxa_ack_tid[1] &  tlu_lsu_stxa_ack_tid[0] ;
+assign  tlu_stxa_thread2_w2 =  tlu_lsu_stxa_ack_tid[1] & ~tlu_lsu_stxa_ack_tid[0] ;
+assign  tlu_stxa_thread3_w2 =  tlu_lsu_stxa_ack_tid[1] &  tlu_lsu_stxa_ack_tid[0] ;
+
+//=========================================================================================
+//  Exception Handling
+//=========================================================================================
+
+
+// tlb related exceptions/errors
+//SC assign  tlb_daccess_excptn_e  =
+//SC  ((rd_only_ltlb_asi_e &  st_inst_vld_e)  |
+//SC   (wr_only_ltlb_asi_e &  ld_inst_vld_e)) & alt_space_e   ;
+
+//SC assign  tlb_daccess_error_e =
+//SC   ((dfill_tlb_asi_e & ~lsu_tlb_writeable)     | 
+//SC   (ifill_tlb_asi_e & ~ifu_lsu_tlb_writeable)) & st_inst_vld_e & alt_space_e ; 
+
+//SC dff  #(2) tlbex_stgm (
+//SC         .din    ({tlb_daccess_excptn_e,tlb_daccess_error_e}),
+//SC         .q      ({tlb_daccess_excptn_m,tlb_daccess_error_m}),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//SC dff  #(2) tlbex_stgg (
+//SC         .din    ({tlb_daccess_excptn_m,tlb_daccess_error_m}),
+//SC         .q      ({tlb_daccess_excptn_g,tlb_daccess_error_g}),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//assign  pstate_priv_m = 
+//  thread0_m ? tlu_lsu_pstate_priv[0] :
+//    thread1_m ? tlu_lsu_pstate_priv[1] :
+//      thread2_m ? tlu_lsu_pstate_priv[2] :
+//          tlu_lsu_pstate_priv[3] ;
+
+//SC mux4ds  #(1) pstate_priv_m_mux (
+//SC         .in0    (tlu_lsu_pstate_priv[0]),
+//SC         .in1    (tlu_lsu_pstate_priv[1]),
+//SC         .in2    (tlu_lsu_pstate_priv[2]),
+//SC         .in3    (tlu_lsu_pstate_priv[3]),
+//SC         .sel0   (thread0_m),  
+//SC         .sel1   (thread1_m),
+//SC         .sel2   (thread2_m),  
+//SC         .sel3   (thread3_m),
+//SC         .dout   (pstate_priv_m)
+//SC );
+   
+//SC dff  priv_stgg (
+//SC         .din    (pstate_priv_m),
+//SC         .q      (pstate_priv),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+// privilege violation - priv page accessed in user mode
+//SC assign  priv_pg_usr_mode =  // data access exception; TT=h30
+//SC   (ld_inst_vld_unflushed | st_inst_vld_unflushed) & ~(pstate_priv | hpv_priv) & tlb_rd_tte_data[`STLB_DATA_P] ;
+
+// protection violation - store to a page that does not have write permission
+//SC assign  nonwr_pg_st_access =  // data access protection; TT=h33
+//SC   st_inst_vld_unflushed   & 
+//SC   ~tlb_rd_tte_data[`STLB_DATA_W] & ~lsu_dtlb_bypass_g & tlb_cam_hit_g ;
+   //lsu_dtlb_bypass_g) ; // W=1 in bypass mode - In bypass mode this trap will never happen !!!
+
+//SC wire  daccess_prot ;
+//SC assign  daccess_prot = nonwr_pg_st_access  ;
+    //((~lsu_dtlb_bypass_g & tlb_cam_hit_g) | (tlb_byp_asi_g & lsu_alt_space_g)) ;
+
+// access to a page marked with the nfo with an asi other than nfo asi.
+//SC assign  nfo_pg_nonnfo_asi  =  // data access exception; TT=h30
+//SC   (ld_inst_vld_unflushed | st_inst_vld_unflushed) &   // any access
+//SC   ((~nofault_asi_g & lsu_alt_space_g) | ~lsu_alt_space_g) // in alternate space or not
+//SC   & tlb_rd_tte_data[`STLB_DATA_NFO] ;
+
+// as_if_usr asi accesses priv page.
+//SC assign  as_if_usr_priv_pg  =  // data access exception; TT=h30
+//SC   (ld_inst_vld_unflushed | st_inst_vld_unflushed) & as_if_user_asi_g & lsu_alt_space_g & 
+//SC       tlb_rd_tte_data[`STLB_DATA_P] ;
+
+
+// non-cacheable address - iospace or cp=0 (???)
+// atomic access to non-cacheable space.
+//SC assign  atm_access_w_nc = atomic_g & tlb_pgnum[39] ; // io space 
+
+// atomic inst with unsupported asi.
+//SC assign  atm_access_unsup_asi = atomic_g & ~atomic_asi_g & lsu_alt_space_g ;
+
+//SC wire  tlb_tte_vld_g ;
+//SC assign  tlb_tte_vld_g = ~lsu_dtlb_bypass_g & tlb_cam_hit_g ;
+
+//SC wire  pg_with_ebit ;
+//SC assign	pg_with_ebit = 
+//SC 	(tlb_rd_tte_data[`STLB_DATA_E] & tlb_tte_vld_g)  | // tte
+//SC         (lsu_dtlb_bypass_g & ~(phy_use_ec_asi_g & lsu_alt_space_g)) | // regular bypass 
+//SC         (tlb_byp_asi_g & ~phy_use_ec_asi_g & lsu_alt_space_g) ; // phy_byp
+	
+//SC wire  spec_access_epage ;
+//SC assign  spec_access_epage = 
+//SC   ((ld_inst_vld_unflushed & nofault_asi_g & lsu_alt_space_g) |  // spec load
+//SC   flsh_inst_g) & // flush inst
+//SC   pg_with_ebit ; // page with side effects
+//  tlb_rd_tte_data[`STLB_DATA_E] ; // page with side effects
+
+//SC wire  quad_asi_non_ldstda ;
+// quad-asi used with non ldda/stda
+// remove st_inst_vld - stquad unused
+// the equation may be incorrect - needs to be for a non-ldda
+//SC assign  quad_asi_non_ldstda = quad_asi_g & lsu_alt_space_g & ~ldst_dbl_g & 
+//SC      (ld_inst_vld_unflushed | st_inst_vld_unflushed) ;
+// need to put in similar exception for binit st
+//SC wire  binit_asi_non_ldda ;
+//SC assign  binit_asi_non_ldda = binit_quad_asi_g & lsu_alt_space_g & ~ldst_dbl_g & 
+//SC      (ld_inst_vld_unflushed) ;
+//SC wire  blk_asi_non_ldstdfa ;
+//SC assign  blk_asi_non_ldstdfa = blk_asi_g & lsu_alt_space_g & 
+//SC      ~(ldst_dbl_g & fp_ldst_g) & (ld_inst_vld_unflushed | st_inst_vld_unflushed) ;
+
+// trap on illegal asi
+//SC wire  illegal_asi_trap_g ;
+//SC assign  illegal_asi_trap_g = 
+//SC (ld_inst_vld_unflushed | st_inst_vld_unflushed) &
+//SC lsu_alt_space_g & ~recognized_asi_g & lsu_inst_vld_w ;
+
+// This can be pushed back into previous cycle.
+//SC wire wr_to_strm_sync ;
+//SC assign	wr_to_strm_sync =  	
+//SC   strm_asi & ((ldst_va_g[7:0] == 8'hA0) | (ldst_va_g[7:0] == 8'h68)) &
+//SC   st_inst_vld_unflushed & lsu_alt_space_g ;
+
+// This should not be double-anded with tlb_tte_vld_g. Check !!!
+//SC assign  daccess_excptn =  
+//SC     ((priv_pg_usr_mode | as_if_usr_priv_pg | nfo_pg_nonnfo_asi | 
+//SC     atm_access_w_nc | atm_access_unsup_asi)) 
+//SC       & tlb_tte_vld_g | 
+//SC     spec_access_epage |
+//SC     asi_related_trap_g | quad_asi_non_ldstda | tlb_daccess_excptn_g |
+//SC     illegal_asi_trap_g | spv_use_hpv | binit_asi_non_ldda | wr_to_strm_sync | 
+//SC    blk_asi_non_ldstdfa ;
+
+// HPV Changes 
+// Push back into previous stage.
+// qualification with hpv_priv and hpstate_en required to ensure hypervisor
+// is not trying to access.
+
+//assign  hpv_priv_e = 
+//  thread0_e ? tlu_lsu_hpv_priv[0] :
+//    thread1_e ? tlu_lsu_hpv_priv[1] :
+//      thread2_e ? tlu_lsu_hpv_priv[2] :
+//          		tlu_lsu_hpv_priv[3] ;
+
+// Timing change :
+
+wire [3:0] hpv_priv_d1 ;
+wire [3:0] hpstate_en_d1 ;
+
+dff_s #(8) hpv_stgd1 (
+        .din    ({tlu_lsu_hpv_priv[3:0],tlu_lsu_hpstate_en[3:0]}),
+        .q    	({hpv_priv_d1[3:0],hpstate_en_d1[3:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+  
+mux4ds  #(1) hpv_priv_e_mux (
+        .in0    (hpv_priv_d1[0]),
+        .in1    (hpv_priv_d1[1]),
+        .in2    (hpv_priv_d1[2]),
+        .in3    (hpv_priv_d1[3]),
+        .sel0   (thread0_e),  
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),  
+        .sel3   (thread3_e),
+        .dout   (hpv_priv_e)
+);
+ 
+//assign  hpstate_en_e = 
+//  thread0_e ? tlu_lsu_hpstate_en[0] :
+//    thread1_e ? tlu_lsu_hpstate_en[1] :
+//      thread2_e ? tlu_lsu_hpstate_en[2] :
+//          		tlu_lsu_hpstate_en[3] ;
+
+mux4ds  #(1) hpstate_en_e_mux (
+        .in0    (hpstate_en_d1[0]),
+        .in1    (hpstate_en_d1[1]),
+        .in2    (hpstate_en_d1[2]),
+        .in3    (hpstate_en_d1[3]),
+        .sel0   (thread0_e),  
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),  
+        .sel3   (thread3_e),
+        .dout   (hpstate_en_e)
+);
+   
+dff_s #(2) hpv_stgm (
+        .din    ({hpv_priv_e, hpstate_en_e}),
+        .q    	({hpv_priv_m, hpstate_en_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//dff #(2) hpv_stgg (
+//        .din    ({hpv_priv_m, hpstate_en_m}),
+//        .q    	({hpv_priv,   hpstate_en}),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        );
+
+/*assign  priv_action = (ld_inst_vld_unflushed | st_inst_vld_unflushed) & ~lsu_asi_state[7] & 
+      ~pstate_priv & ~(hpv_priv & hpstate_en) & lsu_alt_space_g ;*/
+// Generate a stage earlier
+//SC assign  priv_action_m = (ld_inst_vld_m | st_inst_vld_m) & ~lsu_dctl_asi_state_m[7] & 
+//SC       ~pstate_priv_m & ~(hpv_priv_m & hpstate_en_m) & lsu_alt_space_m ;
+
+//SC dff  pact_stgg (
+//SC         .din    (priv_action_m),
+//SC         .q    	(priv_action),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+// Take data_access exception if supervisor uses hypervisor asi
+//SC wire    hpv_asi_range ;
+//SC assign  hpv_asi_range =
+//SC                     ~lsu_asi_state[7] & (
+//SC                          (~lsu_asi_state[6] & lsu_asi_state[5] & lsu_asi_state[4]) | // 0x3?
+//SC                          ( lsu_asi_state[6]));                                   // 0x4?,5?,6?,7?
+
+// Take data_access exception if supervisor uses hypervisor asi
+//SC `ifdef  SPARC_HPV_EN
+//SC assign  spv_use_hpv = (ld_inst_vld_unflushed | st_inst_vld_unflushed) &
+//SC                          hpv_asi_range &
+//SC                          //~lsu_asi_state[7] & lsu_asi_state[6] & lsu_asi_state[5] & // 0x30-0x7f
+//SC                          pstate_priv & ~hpv_priv & lsu_alt_space_g ;
+//SC `else
+//SC assign  spv_use_hpv = 1'b0 ;
+//SC `endif
+
+
+// EARLY TRAPS
+
+// memory address not aligned
+//SC wire  qw_align_addr,blk_align_addr ;
+//SC assign  hw_align_addr = ~ldst_va_m[0] ;         // half-word addr
+//SC assign  wd_align_addr = ~ldst_va_m[1] & ~ldst_va_m[0] ;     // word addr
+//SC assign  dw_align_addr = ~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] ; // dw addr
+//SC assign  qw_align_addr = ~ldst_va_m[3] & ~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] ; // qw addr
+//SC assign  blk_align_addr = 
+//SC ~ldst_va_m[5] & ~ldst_va_m[4] & ~ldst_va_m[3] & 
+//SC ~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] ; // 64B aligned addr for block ld/st
+
+//assign  byte_size = ~ldst_sz_m[1] &  ~ldst_sz_m[0] ; // byte size    
+//assign  hw_size = ~ldst_sz_m[1] &  ldst_sz_m[0] ; // half-word size 
+//assign  wd_size =  ldst_sz_m[1] & ~ldst_sz_m[0] ; // word size
+//assign  dw_size =  ldst_sz_m[1] &  ldst_sz_m[0] ; // double-word size
+
+//assign  byte_size = byte_m;
+assign  hw_size = hword_m; 
+assign  wd_size = word_m;
+assign  dw_size = dword_m;
+   
+//SC assign  mem_addr_not_align
+//SC   = ((hw_size & ~hw_align_addr) | // half-word check
+//SC     (wd_size & ~wd_align_addr)  | // word check
+//SC     (dw_size & ~dw_align_addr)  | // double word check
+//SC    ((quad_asi_m | binit_quad_asi_m) & lsu_alt_space_m & ldst_dbl_m & ~qw_align_addr) | // quad word check
+//SC     (blk_asi_m & lsu_alt_space_m & fp_ldst_m & ldst_dbl_m & ~blk_align_addr)) & // 64B blk ld/st check
+//SC     //(blk_asi_m & lsu_alt_space_m & blk_asi_m & ~blk_align_addr)) & // 64B blk ld/st check
+//SC     (ld_inst_vld_m | st_inst_vld_m) ;
+
+//SC assign  stdf_maddr_not_align
+//SC     = st_inst_vld_m & fp_ldst_m & ldst_dbl_m & wd_align_addr & ~dw_align_addr ;
+
+//SC assign  lddf_maddr_not_align
+//SC     = ld_inst_vld_m & fp_ldst_m & ldst_dbl_m & wd_align_addr & ~dw_align_addr ;
+
+// internal asi access by ld/st other than ldxa/stxa/lddfa/stdfa.
+// qual with ldst_dbl_m needed. lda and stda should take trap if accessing internal asi.
+//SC assign  asi_internal_non_xdw 
+//SC     = (st_inst_vld_m | ld_inst_vld_m) & lsu_alt_space_m & asi_internal_m  & ~(dw_size & ~ldst_dbl_m) ;
+
+
+// asi related
+// rd-only mmu asi requiring va decode.
+//SC wire	mmu_rd_only_asi_wva_m ;
+//SC assign	mmu_rd_only_asi_wva_m =
+//SC 	((lsu_dctl_asi_state_m[7:0]==8'h58) & (
+//SC 		(ldst_va_m[8:0] == 9'h000) | 	// dtag_target
+//SC 		(ldst_va_m[8:0] == 9'h020))) | 	// dsync_far
+//SC 	((lsu_dctl_asi_state_m[7:0]==8'h50) & 
+//SC 		(ldst_va_m[8:0] == 9'h000)) ; 	// itag_target
+
+//SC assign  wr_to_rd_only_asi = 
+//SC 	(mmu_rd_only_asi_wva_m |// mmu with non-unique asi
+//SC 	mmu_rd_only_asi_m |	// mmu with unique asi
+//SC 	rd_only_asi_m)		// non mmu
+//SC 	 &  st_inst_vld_m & lsu_alt_space_m ;
+
+//SC assign  rd_of_wr_only_asi = wr_only_asi_m &  ld_inst_vld_m & lsu_alt_space_m ;
+//SC assign  unimp_asi_used = unimp_asi_m &  (ld_inst_vld_m | st_inst_vld_m) & lsu_alt_space_m ;
+//assign  asi_related_trap_m = wr_to_rd_only_asi | rd_of_wr_only_asi | unimp_asi_used | asi_internal_non_xdw ;
+
+//SC assign  early_trap_vld_m =  stdf_maddr_not_align | lddf_maddr_not_align | mem_addr_not_align ;
+      
+//SC assign  lsu_tlu_misalign_addr_ldst_atm_m = early_trap_vld_m ;
+
+// mux select order must be maintained
+//SC assign  early_ttype_m[8:0] = 
+//SC       stdf_maddr_not_align ? 9'h036 :
+//SC         lddf_maddr_not_align ? 9'h035 : 
+//SC           mem_addr_not_align ? 9'h034 : 9'hxxx ;
+
+//SC dff #(11)   etrp_stgg (
+//SC         .din    ({early_ttype_m[8:0],early_trap_vld_m,asi_related_trap_m}),
+//SC         .q      ({early_ttype_g[8:0],early_trap_vld_g,asi_related_trap_g}),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//SC wire nceen_pipe_g ;
+//SC assign  nceen_pipe_g = 
+//SC   (thread0_g & ifu_lsu_nceen[0]) | (thread1_g & ifu_lsu_nceen[1]) |
+//SC   (thread2_g & ifu_lsu_nceen[2]) | (thread3_g & ifu_lsu_nceen[3]) ;
+//SC wire nceen_fill_e,nceen_fill_m,nceen_fill_g ;
+//SC assign  nceen_fill_e = 
+//SC   (dfill_thread0 & ifu_lsu_nceen[0]) | (dfill_thread1 & ifu_lsu_nceen[1]) |
+//SC   (dfill_thread2 & ifu_lsu_nceen[2]) | (dfill_thread3 & ifu_lsu_nceen[3]) ;
+
+//SC dff  #(1) nce_stgm (
+//SC         .din    (nceen_fill_e),
+//SC         .q      (nceen_fill_m),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//SC dff  #(1) nce_stgg (
+//SC         .din    (nceen_fill_m),
+//SC         .q      (nceen_fill_g),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//SC assign  daccess_error = 1'b0 ;
+  // Commented out currently for timing reasons. This needs to be
+  // rolled into the ttype_vld sent to the tlu, but can be left out
+  // of the flush sent to the remaining units.
+  /*((tte_data_perror_unc) & nceen_pipe_g & // on xslate 
+  ~(early_trap_vld_g | priv_action | va_wtchpt_match | dmmu_miss_g)) |
+  tlb_asi_unc_err_g |     // asi read
+  (unc_err_trap_g & nceen_fill_g) | // cache data
+  tlb_daccess_error_g ;     // tlb not writeable */
+
+//SC assign  lsu_tlu_async_dacc_err_g = unc_err_trap_g | tlb_asi_unc_err_g ;
+
+//SC assign  lsu_tlu_dmmu_miss_g = dmmu_miss_g ;
+
+ wire  cam_real_m ;
+ dff_s   real_stgm (
+         .din    (lsu_dtlb_cam_real_e),
+         .q      (cam_real_m),
+         .clk    (clk),
+         .se     (se),       .si (),          .so ()
+         );
+ 
+// dff   real_stgg (
+//         .din    (cam_real_m),
+//         .q      (cam_real_g),
+//         .clk    (clk),
+//         .se     (se),       .si (),          .so ()
+//         );
+ 
+assign  lsu_tlu_nonalt_ldst_m =  (st_inst_vld_m | ld_inst_vld_m) & ~lsu_alt_space_m  ;
+assign  lsu_tlu_xslating_ldst_m = (st_inst_vld_m | ld_inst_vld_m) & 
+	(((~asi_internal_m  & recognized_asi_m) & lsu_alt_space_m)  | // Bug 4327
+	~lsu_alt_space_m) ;
+
+assign  ctxt_sel_e[0] = thread_pctxt ; 
+assign  ctxt_sel_e[1] = thread_sctxt ; 
+assign  ctxt_sel_e[2] = 
+	thread_nctxt | 
+	(~(thread_pctxt | thread_sctxt) &  // default to nucleus - translating asi
+	~(alt_space_e & (asi_internal_e | ~recognized_asi_e ))) ; //bug3660
+					   // nontranslating asi to select 11 in CT
+					   // field of dsfsr.
+
+dff_s  #(3) ctxsel (
+        .din    (ctxt_sel_e[2:0]),
+        .q      (lsu_tlu_ctxt_sel_m[2:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_tlu_nucleus_ctxt_m = lsu_tlu_ctxt_sel_m[2] ;
+
+assign  lsu_tlu_write_op_m = st_inst_vld_m | atomic_m ;
+
+// va_oor_m check needs to be in case of bypass, pstate.am=1, internal and illegal asi. 
+// pstate.am squashing is done locally in tlu.
+
+assign  lsu_tlu_squash_va_oor_m =
+  dtlb_bypass_m   |     // bypass
+  //sta_internal_m  | lda_internal_m |  // internal asi
+  (asi_internal_m & lsu_alt_space_m) |	// Bug 5156
+  (~recognized_asi_tmp & lsu_alt_space_m) ; // illegal asi // Timing change.
+
+   assign lsu_squash_va_oor_m =  lsu_tlu_squash_va_oor_m;
+  
+//=========================================================================================
+//  Generate Flush Pipe
+//=========================================================================================
+
+//SC wire	other_flush_pipe_w ;
+// lsu_tlu_ttype_vld needs to be optimized in terms of timing.
+//SC assign	other_flush_pipe_w = tlu_early_flush_pipe_w | (lsu_tlu_ttype_vld_m2 & lsu_inst_vld_w);
+//SC assign	lsu_ifu_flush_pipe_w = other_flush_pipe_w ;
+//SC assign	lsu_exu_flush_pipe_w = other_flush_pipe_w ;
+//SC assign	lsu_ffu_flush_pipe_w = other_flush_pipe_w ;
+
+//SC //assign	lsu_flush_pipe_w = other_flush_pipe_w | ifu_tlu_flush_w ;
+
+//=========================================================================================
+//  Early Traps to SPU
+//=========================================================================================
+
+// detect st to ma/strm sync - data-access exception.
+//SC wire	st_to_sync_dexcp_m ;
+// qual with alt_space not required - spu will do it.
+//SC assign	st_to_sync_dexcp_m = 
+//SC   strm_asi_m & ((ldst_va_m[7:0] == 8'ha0) | (ldst_va_m[7:0] == 8'h68)) & st_inst_vld_m ;  
+
+//SC wire	spu_early_flush_m ;
+
+//SC assign	spu_early_flush_m =
+//SC 	priv_action_m 		|
+//SC 	mem_addr_not_align 	|
+//SC 	st_to_sync_dexcp_m 	; 
+
+//SC dff  eflushspu_g (
+//SC         .din    (spu_early_flush_m),
+//SC         .q      (lsu_spu_early_flush_g),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+//SC dff  eflushtlu_g (
+//SC         .din    (spu_early_flush_m),
+//SC         .q      (lsu_tlu_early_flush_w),
+//SC        .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+ //SC        );
+
+//=========================================================================================
+//  Parity Error Checking
+//=========================================================================================
+
+// DCache Parity Error
+// - Parity Check is done for entire 64b. No attempt is made to match on size. A
+// parity error will force a miss and refetch a line to the same way of the cache.
+// - Logging of error is done in g-stage of issue.
+// - Trap taken on data return
+
+wire	dcache_perr_en ;
+assign	dcache_perr_en  =
+  dcache_enable_g & ~(asi_internal_g & lsu_alt_space_g) & 
+  ~atomic_g  & 
+  // dcache_rd_parity_err qualified with cache_way_hit - could be x.
+  (lsu_dtlb_bypass_g | (~lsu_dtlb_bypass_g & tlb_cam_hit_g)) ;
+assign dcache_rd_parity_error = dcache_rparity_err_wb & dcache_perr_en ;
+ 
+// dtag parity error gets priority over dcache priority.
+assign  lsu_dcache_data_perror_g = 
+  dcache_rd_parity_error & ld_inst_vld_unflushed & lsu_inst_vld_w & ~dtag_perror_g & 
+  dcache_perr_en ;
+//  dcache_enable_g & ~(asi_internal_g & lsu_alt_space_g) & 
+//  ~atomic_g ; 
+
+// DTLB Parity Errors. 
+// ASI read of Tag/Data :
+//  - uncorrectible error
+//  - logging occurs on read.
+//  - precise trap is taken when ldxa completes if nceen set.
+//  - if not set then ldxa is allowed to complete.
+// CAM Read of Tag/Data :
+//  - correctible if locked bit not set.
+//    - takes disrupting trap later.
+//  - uncorrectible if locked bit set.
+//  - both are treated as precise traps.
+//  - if errors not enabled, then load completes as if hit in L1.
+// ** TLB error will cause a trap which will preclude concurrent dcache,dtag  **
+// ** parity errors.                **
+
+//SC assign  tte_data_parity_error = 
+//SC   tlb_rd_tte_data_parity ^ lsu_rd_tte_data_parity ;
+//SC assign  tte_tag_parity_error  = 
+//SC   tlb_rd_tte_tag_parity ^ lsu_rd_tte_tag_parity ;
+
+// cam related tte data parity error - error assumed correctible if locked
+// bit is not set. Will cause a dmmu_miss for correction.
+// qualify with cam_hit ??
+//SC assign  tte_data_perror_corr = 
+//SC   tte_data_parity_error & ~tlb_rd_tte_data_locked & tlb_tte_vld_g & 
+//SC   (ld_inst_vld_unflushed | st_inst_vld_unflushed) & lsu_inst_vld_w ;
+// same as above except error is treated as uncorrectible. This is to be posted to 
+// error status register which will cause a disrupting trap later.
+//SC assign  tte_data_perror_unc  = 
+//SC   tte_data_parity_error &  tlb_rd_tte_data_locked & tlb_tte_vld_g & 
+//SC   (ld_inst_vld_unflushed | st_inst_vld_unflushed) & lsu_inst_vld_w ;
+// Asi rd parity error detection
+//SC assign  asi_tte_data_perror =
+//SC   tte_data_parity_error & data_rd_vld_g ;
+// For data tte read, both tag and data arrays are read.
+// Parity error on asi read of tag should not be reported.
+//SC assign  asi_tte_tag_perror =
+//SC   tte_tag_parity_error & tag_rd_vld_g & ~data_rd_vld_g ;
+//SC assign  lsu_tlu_asi_rd_unc = asi_tte_data_perror | asi_tte_tag_perror ;
+
+// asi rd parity errors need to be reported thru asi bus
+/*assign  lsu_ifu_tlb_data_ce = tte_data_perror_corr ;
+assign  lsu_ifu_tlb_data_ue = tte_data_perror_unc | asi_tte_data_perror ;
+assign  lsu_ifu_tlb_tag_ue  = asi_tte_tag_perror ; */
+
+
+//SC wire  tlb_data_ue_g ;
+//SC assign  tlb_data_ue_g = tte_data_perror_unc | asi_tte_data_perror ;
+
+//SC dff  #(3) terr_stgd1 (
+//SC         .din    ({tte_data_perror_corr,tlb_data_ue_g,asi_tte_tag_perror}),
+//SC         .q      ({lsu_ifu_tlb_data_ce,lsu_ifu_tlb_data_ue,lsu_ifu_tlb_tag_ue}),
+//SC         .clk    (clk),
+//SC         .se     (se),       .si (),          .so ()
+//SC         );
+
+// Dtag Parity Error
+// - corrected thru special mechanism
+// - correctible error
+// - Trap taken on data return
+
+// move parity error calculation to g stage
+
+dff_s  #(4) dva_vld_g_ff (
+         .din    (dva_vld_m[3:0]),
+         .q      (dva_vld_g[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+   assign dva_vld_m_bf[3:0] = dva_vld_m[3:0];
+   
+wire	dtag_perr_en ; 
+assign	dtag_perr_en = 
+dcache_enable_g & ~(asi_internal_g & lsu_alt_space_g) & // Bug 3541
+  ~(lsu_alt_space_g & blk_asi_g) &  // Bug 3926. 
+  ~atomic_g & // Bug 4274,4297 
+  ~pref_inst_g ; // Bug 5046
+assign  dtag_parity_error[0] = 
+      lsu_rd_dtag_parity_g[0] & dva_vld_g[0] & dtag_perr_en;
+assign  dtag_parity_error[1] = 
+      lsu_rd_dtag_parity_g[1] & dva_vld_g[1] & dtag_perr_en ;
+assign  dtag_parity_error[2] = 
+      lsu_rd_dtag_parity_g[2] & dva_vld_g[2] & dtag_perr_en ;
+assign  dtag_parity_error[3] = 
+      lsu_rd_dtag_parity_g[3] & dva_vld_g[3] & dtag_perr_en ;
+
+assign  dtag_perror_g = |dtag_parity_error[3:0] ;
+assign  lsu_dcache_tag_perror_g = 
+  (|dtag_parity_error[3:0]) & ld_inst_vld_unflushed & lsu_inst_vld_w &
+  // Correction pkt should not be generated to io.
+  ~(tlb_pgnum[39] & (lsu_dtlb_bypass_g | (~lsu_dtlb_bypass_g & tlb_cam_hit_g))) ;
+//  (|dtag_parity_error[3:0]) & ld_inst_vld_unflushed & lsu_inst_vld_w &
+//  ~(lsu_alt_space_g & blk_asi_g) &  // Bug 3926. 
+//  // Correction pkt should not be generated to io.
+//  ~(tlb_pgnum[39] & (lsu_dtlb_bypass_g | (~lsu_dtlb_bypass_g & tlb_cam_hit_g))) &
+//  ~atomic_g ; // Bug 4274,4297 
+//=========================================================================================
+//  Error Related Traps 
+//=========================================================================================
+
+//bug6382/eco6621   
+dff_s #(2)  derrtrp_stgm (
+        .din    ({lsu_cpx_ld_dtag_perror_e & ~ignore_fill, lsu_cpx_ld_dcache_perror_e & ~ignore_fill}),
+        .q      ({dtag_error_m,dcache_error_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2)  derrtrp_stgg (
+        .din    ({dtag_error_m,dcache_error_m}),
+        .q      ({dtag_error_g,dcache_error_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2)  derrtrp_stgw2 (
+        .din    ({dtag_error_g,dcache_error_g}),
+        .q      ({dtag_error_w2,dcache_error_w2}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign  lsu_ifu_dcache_data_perror = dcache_error_w2 & ~bld_squash_err_w2;  //bug6382/eco6621
+assign  lsu_ifu_dcache_tag_perror  = dtag_error_w2  ;
+
+assign  l2_unc_error_e  = lsu_cpx_pkt_ld_err[1] & l2fill_vld_e & ~ignore_fill  ; // Bug 4998
+assign  l2_corr_error_e = lsu_cpx_pkt_ld_err[0] & l2fill_vld_e & ~ignore_fill  ;
+
+dff_s #(2)  lerrtrp_stgm (
+        .din    ({l2_unc_error_e,l2_corr_error_e}),
+        .q      ({l2_unc_error_m,l2_corr_error_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2)  lerrtrp_stgg (
+        .din    ({l2_unc_error_m,l2_corr_error_m}),
+        .q      ({l2_unc_error_g,l2_corr_error_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2)  lerrtrp_stgw2 (
+        .din    ({l2_unc_error_g,l2_corr_error_g}),
+        .q      ({l2_unc_error_w2,l2_corr_error_w2}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign  lsu_ifu_l2_unc_error  = // Bug 4315
+(l2_unc_error_w2 | bld_unc_err_pend_w2) & ~lsu_ifu_err_addr_b39 & ~bld_squash_err_w2 ;
+assign  lsu_ifu_l2_corr_error = 
+(l2_corr_error_w2 | bld_corr_err_pend_w2) & ~bld_squash_err_w2 ;
+
+wire	fill_err_trap_e ;
+
+//assign  unc_err_trap_e = 
+assign  fill_err_trap_e = 
+  (lsu_cpx_pkt_ld_err[1] & l2fill_vld_e) ;
+   /*(lsu_cpx_atm_st_err[1] & lsu_atm_st_cmplt_e)) & 
+      ((dfill_thread0 & ifu_lsu_nceen[0]) |
+       (dfill_thread1 & ifu_lsu_nceen[1]) |
+       (dfill_thread2 & ifu_lsu_nceen[2]) |
+       (dfill_thread3 & ifu_lsu_nceen[3])) ; */ // Bug 3624
+
+assign	unc_err_trap_e = fill_err_trap_e ;
+
+/*assign  corr_err_trap_e = 
+  ((lsu_cpx_pkt_ld_err[0] | lsu_cpx_ld_dtag_perror_e | lsu_cpx_ld_dcache_perror_e) & 
+   l2fill_vld_e) |
+   (lsu_cpx_atm_st_err[0] & lsu_atm_st_cmplt_e)) & 
+   & ~unc_err_trap_e &
+      ((dfill_thread0 & ifu_lsu_ceen[0]) |
+       (dfill_thread1 & ifu_lsu_ceen[1]) |
+       (dfill_thread2 & ifu_lsu_ceen[2]) |
+       (dfill_thread3 & ifu_lsu_ceen[3])) ; */
+
+
+dff_s #(1)  errtrp_stgm (
+        .din    ({unc_err_trap_e}),
+        .q      ({unc_err_trap_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(1)  errtrp_stgg (
+        .din    ({unc_err_trap_m}),
+        .q      ({unc_err_trap_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+// The tlu should source demap_thrid for all tlb operations !!!
+dff_s #(2)  filla_stgm (
+        .din    ({lsu_dfill_tid_e[1:0]}),
+        .q      ({dfill_tid_m[1:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+dff_s #(2)  filla_stgg (
+        .din    ({dfill_tid_m[1:0]}),
+        .q      ({dfill_tid_g[1:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+
+
+//=========================================================================================
+//  LSU to IRF Data Bypass Control
+//=========================================================================================
+
+assign	spu_trap =  spu_lsu_unc_error_w2 ;
+assign	spu_trap0 = spu_trap & spu_ldxa_thread0_w2 ;
+assign	spu_trap1 = spu_trap & spu_ldxa_thread1_w2 ;
+assign	spu_trap2 = spu_trap & spu_ldxa_thread2_w2 ;
+assign	spu_trap3 = spu_trap & spu_ldxa_thread3_w2 ;
+
+assign	spu_ttype[6:0]	= spu_lsu_int_w2 ? 7'h70 : 7'h32 ;
+
+dff_s #(2)   lfraw_stgw2 (
+        .din    ({ld_inst_vld_g,fp_ldst_g}),
+        .q      ({ld_inst_vld_w2,fp_ldst_w2}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dff_s #(2)   lfraw_stgw3 (
+        .din    ({ld_stb_full_raw_w2, ld_inst_vld_w2}),
+        .q      ({ld_stb_full_raw_w3, ld_inst_vld_w3}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// Delay all ldbyp*vld_en by a cycle for write of unc error
+//dff #(4)  lbypen_stgd1 (
+//        .din    ({ldbyp0_vld_en,ldbyp1_vld_en,ldbyp2_vld_en,ldbyp3_vld_en}),
+//        .q      ({ldbyp0_vld_en_d1,ldbyp1_vld_en_d1,ldbyp2_vld_en_d1,ldbyp3_vld_en_d1}),
+//        .clk    (clk),
+//        .se     (se),       .si (),          .so ()
+//        ); 
+
+
+wire   fp_ldst_thrd0_w2,fp_ldst_thrd1_w2,fp_ldst_thrd2_w2,fp_ldst_thrd3_w2 ;
+wire   fp_ldst_thrd0_w3,fp_ldst_thrd1_w3,fp_ldst_thrd2_w3,fp_ldst_thrd3_w3 ;
+wire   fp_ldst_thrd0_w4,fp_ldst_thrd1_w4,fp_ldst_thrd2_w4,fp_ldst_thrd3_w4 ;
+wire   fp_ldst_thrd0_w5,fp_ldst_thrd1_w5,fp_ldst_thrd2_w5,fp_ldst_thrd3_w5 ;
+
+//RAW read STB at W3 (changed from W2)
+   
+dff_s #(4) fp_ldst_stg_w3 (
+  .din ({fp_ldst_thrd0_w2,fp_ldst_thrd1_w2,fp_ldst_thrd2_w2,fp_ldst_thrd3_w2}),
+  .q   ({fp_ldst_thrd0_w3,fp_ldst_thrd1_w3,fp_ldst_thrd2_w3,fp_ldst_thrd3_w3}),
+  .clk    (clk),
+  .se     (se),       .si (),          .so ()
+  );
+
+dff_s #(4) fp_ldst_stg_w4 (
+  .din ({fp_ldst_thrd0_w3,fp_ldst_thrd1_w3,fp_ldst_thrd2_w3,fp_ldst_thrd3_w3}),
+  .q   ({fp_ldst_thrd0_w4,fp_ldst_thrd1_w4,fp_ldst_thrd2_w4,fp_ldst_thrd3_w4}),
+  .clk    (clk),
+  .se     (se),       .si (),          .so ()
+  );
+
+dff_s #(4) fp_ldst_stg_w5 (
+  .din ({fp_ldst_thrd0_w4,fp_ldst_thrd1_w4,fp_ldst_thrd2_w4,fp_ldst_thrd3_w4}),
+  .q   ({fp_ldst_thrd0_w5,fp_ldst_thrd1_w5,fp_ldst_thrd2_w5,fp_ldst_thrd3_w5}),
+  .clk    (clk),
+  .se     (se),       .si (),          .so ()
+  );
+   
+// THREAD 0
+
+wire	tte_data_perror_unc_w2,asi_tte_data_perror_w2,asi_tte_tag_perror_w2 ;
+// if nceen/ceen=0, then tte_data_perror* are not logged for trap generation. Earlier error-reporting
+// is however never screened off.
+// asi_tte* however has to be logged in order to report errors thru the asiQ. Traps must be squashed. 
+dff_s #(3) ltlbrd_w2 (
+  .din ({tte_data_perror_unc_en,asi_tte_data_perror,asi_tte_tag_perror}),
+  .q   ({tte_data_perror_unc_w2,asi_tte_data_perror_w2,asi_tte_tag_perror_w2}),
+  .clk    (clk),
+  .se     (se),       .si (),          .so ()
+  );
+
+
+// Error Table for Queue
+// ** In all cases; squash writes to irf.
+//				| Error Reporting	| Trap ?	| 
+// ifu_lsu_asi_rd_unc		| NA;done by ifu	| daccess-error	|
+// tte_data_perror_unc_w2	| sync;in pipe		| daccess-error	|
+// tte_data_perror_corr_w2	| sync;in pipe		| dmmu-miss	| --> NA !! all unc.
+// asi_tte_data_perror_w2	| async;out of Q	| daccess-error	|
+// asi_tte_tag_perror_w2	| async;out of Q	| daccess-error	|
+
+wire [3:0] tlb_err_en_w2 ; 
+// used for xslate errors - enable queues
+//assign	tlb_err_en_w2[0] = (tte_data_perror_unc_w2 | tte_data_perror_corr_w2) & thread0_w2 ;	
+assign	tlb_err_en_w2[0] = tte_data_perror_unc_w2 & thread0_w2 ;	
+assign	tlb_err_en_w2[1] = tte_data_perror_unc_w2 & thread1_w2 ;	
+assign	tlb_err_en_w2[2] = tte_data_perror_unc_w2 & thread2_w2 ;	
+assign	tlb_err_en_w2[3] = tte_data_perror_unc_w2 & thread3_w2 ;	
+
+assign ldbyp0_vld_rst =
+        (reset | (ld_thrd_byp_sel_e[0])) | 
+	atm_st_cmplt0 ; // Bug 4048
+
+// thread qualification required.
+//assign ldbyp0_vld_en = (lmq_byp_data_en_w2[0] & 
+//        ~(|lmq_byp_data_sel0[2:1]))  // do not set vld for cas/stdbl
+//	| spu_trap0 ;
+
+wire 		atm_ld_w_uerr ;
+assign		atm_ld_w_uerr = l2fill_vld_e & lsu_cpx_pkt_atm_st_cmplt & lsu_cpx_pkt_ld_err[1] ;
+
+//bug6525 notes
+// spu ldxa and spu trap can async with the main pipe, and cause more than one ldbyp*_vld_en asserted 
+// at the same cycle   
+assign ldbyp0_vld_en = lmq_byp_data_raw_sel_d2[0] |                  //ld hit stb RAW bypass
+                       lmq_byp_data_sel0[3]       |                  //ldxa (ifu, spu*, lsu)
+		       (atm_ld_w_uerr & lsu_nceen_d1[0] & dfill_thread0) |       //atomic
+                       lmq_byp_data_fmx_sel[0]    |                  //tlu ldxa
+		       tlb_err_en_w2[0]	  |                                      //tlb parity err
+                       spu_trap0 ;                                   //spu trap*
+                  
+assign   fp_ldst_thrd0_w2 = fp_ldst_w2 & thread0_w2 & ld_inst_vld_w2 ;
+   
+// ld valid
+wire	ldbyp0_vld_tmp ;
+dffre_s #(1)  ldbyp0_vld_ff (
+        .din    (ldbyp0_vld_en),
+        .q      (ldbyp0_vld_tmp),
+        .rst    (ldbyp0_vld_rst),        .en     (ldbyp0_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+// Bug 5379 - make ld ue invisible in q until atm st ack resets.
+
+assign	ldbyp0_vld = ldbyp0_vld_tmp & ~pend_atm_ld_ue[0] ;
+
+
+// assumes that rw_index is not reset at mmu.
+wire [6:0]	misc_data_in ;
+wire [6:0]	misc_data0,misc_data1,misc_data2,misc_data3 ;
+wire		misc_sel ;
+wire [5:0]	rw_index_d1 ;
+dff_s #(6)  rwind_d1 (
+        .din    (tlu_dtlb_rw_index_g[5:0]),
+        .q      (rw_index_d1[5:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+assign	misc_sel = asi_tte_data_perror_w2 | asi_tte_tag_perror_w2 ;
+assign	misc_data_in[6:0] = misc_sel ? {1'b0,rw_index_d1[5:0]} : spu_ttype[6:0] ; 
+
+dffe_s #(9)  ldbyp0_other_ff (
+        .din    ({fp_ldst_thrd0_w5,spu_trap0,misc_data_in[6:0]}),  //bug6525 fix2
+        .q      ({ldbyp0_fpld,spubyp0_trap,misc_data0[6:0]}),
+        .en     (ldbyp0_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+
+dffre_s #(5)  ldbyp0_err_ff (
+  	.din   	({tte_data_perror_unc_w2,atm_ld_w_uerr,
+		asi_tte_data_perror_w2,asi_tte_tag_perror_w2,ifu_lsu_asi_rd_unc}),
+	.q	({cam_perr_unc0,pend_atm_ld_ue[0],asi_data_perr0,asi_tag_perr0,
+		ifu_unc_err0}),
+        .rst    (ldbyp0_vld_rst), .en     (ldbyp0_vld_en & ~spu_trap0 & ~lmq_byp_ldxa_sel0[1]), //bug6525 fix2
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+
+//assign  ldbyp0_unc_err = ldbyp0_unc_err_q & ifu_lsu_nceen[0] ;
+
+// THREAD 1
+
+assign ldbyp1_vld_rst =
+        (reset | (ld_thrd_byp_sel_e[1])) |
+	atm_st_cmplt1 ; // Bug 4048
+
+assign   fp_ldst_thrd1_w2 = fp_ldst_w2 & thread1_w2 & ld_inst_vld_w2 ;
+
+// thread qualification required.
+//assign ldbyp1_vld_en = (lmq_byp_data_en_w2[1] &
+//        ~(|lmq_byp_data_sel1[2:1])) | // do not set vld for cas/stdbl
+//	| spu_trap1 ;
+
+assign ldbyp1_vld_en = lmq_byp_data_raw_sel_d2[1] |
+                       lmq_byp_data_sel1[3]       |
+		       (atm_ld_w_uerr & lsu_nceen_d1[1] & dfill_thread1) |
+                       lmq_byp_data_fmx_sel[1]    |
+		       tlb_err_en_w2[1]	  |
+                       spu_trap1 ;
+   
+// ld valid
+wire	ldbyp1_vld_tmp ;
+dffre_s #(1)  ldbyp1_vld_ff (
+        .din    (ldbyp1_vld_en),
+        .q      (ldbyp1_vld_tmp),
+        .rst    (ldbyp1_vld_rst),        .en     (ldbyp1_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+assign	ldbyp1_vld = ldbyp1_vld_tmp & ~pend_atm_ld_ue[1] ;
+
+
+dffe_s #(9)  ldbyp1_other_ff (
+        .din    ({fp_ldst_thrd1_w5,spu_trap1,misc_data_in[6:0]}),  //bug6525 fix2
+        .q      ({ldbyp1_fpld,spubyp1_trap,misc_data1[6:0]}),
+        .en     (ldbyp1_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// The tlb rd unc errors are delayed a cycle wrt to ldxa_data
+// No reset required
+dffre_s #(5)  ldbyp1_err_ff (
+  	.din   	({tte_data_perror_unc_w2,atm_ld_w_uerr,
+		asi_tte_data_perror_w2,asi_tte_tag_perror_w2,ifu_lsu_asi_rd_unc}),
+	.q	({cam_perr_unc1,pend_atm_ld_ue[1],asi_data_perr1,asi_tag_perr1,
+		ifu_unc_err1}),
+        .rst    (ldbyp1_vld_rst), .en     (ldbyp1_vld_en & ~spu_trap1 & ~lmq_byp_ldxa_sel1[1]), //bug6525 fix2
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+//assign  ldbyp1_unc_err = ldbyp1_unc_err_q & ifu_lsu_nceen[1] ;
+
+// THREAD 2
+
+assign ldbyp2_vld_rst =
+        (reset | (ld_thrd_byp_sel_e[2])) |
+	atm_st_cmplt2 ; // Bug 4048
+
+// thread qualification required.
+//assign ldbyp2_vld_en = (lmq_byp_data_en_w2[2] &
+//        ~(|lmq_byp_data_sel2[2:1])) | // do not set vld for cas/stdbl
+//	spu_trap2 ;
+
+assign ldbyp2_vld_en = lmq_byp_data_raw_sel_d2[2] |
+                       lmq_byp_data_sel2[3]       |
+		       (atm_ld_w_uerr & lsu_nceen_d1[2] & dfill_thread2) |
+                       lmq_byp_data_fmx_sel[2]    |
+		       tlb_err_en_w2[2]	  |
+                       spu_trap2 ;
+
+assign   fp_ldst_thrd2_w2 = fp_ldst_w2 & thread2_w2 & ld_inst_vld_w2 ;
+
+// ld valid
+wire	ldbyp2_vld_tmp ;
+dffre_s #(1)  ldbyp2_vld_ff (
+        .din    (ldbyp2_vld_en),
+        .q      (ldbyp2_vld_tmp),
+        .rst    (ldbyp2_vld_rst),        .en     (ldbyp2_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+assign	ldbyp2_vld = ldbyp2_vld_tmp & ~pend_atm_ld_ue[2] ;
+
+dffe_s #(9)  ldbyp2_other_ff (
+        .din    ({fp_ldst_thrd2_w5,spu_trap2,misc_data_in[6:0]}),  //bug6525 fix2
+        .q      ({ldbyp2_fpld,spubyp2_trap,misc_data2[6:0]}),
+        .en     (ldbyp2_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+dffre_s #(5)  ldbyp2_err_ff (
+  	.din   	({tte_data_perror_unc_w2, atm_ld_w_uerr,
+		asi_tte_data_perror_w2,asi_tte_tag_perror_w2,ifu_lsu_asi_rd_unc}),
+	.q	({cam_perr_unc2,pend_atm_ld_ue[2],asi_data_perr2,asi_tag_perr2,
+		ifu_unc_err2}),
+        .rst    (ldbyp2_vld_rst), .en     (ldbyp2_vld_en & ~spu_trap2 & ~lmq_byp_ldxa_sel2[1]), //bug6525 fix2
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+//assign  ldbyp2_unc_err = ldbyp2_unc_err_q & ifu_lsu_nceen[2] ;
+
+// THREAD 3
+
+assign ldbyp3_vld_rst =
+        (reset | (ld_thrd_byp_sel_e[3])) |
+	atm_st_cmplt3 ; // Bug 4048
+
+// thread qualification required.
+//assign ldbyp3_vld_en = (lmq_byp_data_en_w2[3] &
+//        ~(|lmq_byp_data_sel3[2:1])) | // do not set vld for cas/stdbl
+//	| spu_trap3 ;
+
+assign ldbyp3_vld_en = lmq_byp_data_raw_sel_d2[3] |
+                       lmq_byp_data_sel3[3]       |
+		       (atm_ld_w_uerr & lsu_nceen_d1[3] & dfill_thread3) |
+                       lmq_byp_data_fmx_sel[3]    |
+		       tlb_err_en_w2[3]	  |
+                       spu_trap3 ;
+
+assign   fp_ldst_thrd3_w2 = fp_ldst_w2 & thread3_w2 & ld_inst_vld_w2 ;
+
+// ld valid
+wire	ldbyp3_vld_tmp ;
+dffre_s #(1)  ldbyp3_vld_ff (
+        .din    (ldbyp3_vld_en),
+        .q      (ldbyp3_vld_tmp),
+        .rst    (ldbyp3_vld_rst),        .en     (ldbyp3_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+assign	ldbyp3_vld = ldbyp3_vld_tmp & ~pend_atm_ld_ue[3] ;
+
+
+dffe_s #(9)  ldbyp3_other_ff (
+        .din    ({fp_ldst_thrd3_w5,spu_trap3,misc_data_in[6:0]}),  //bug6525 fix2
+        .q      ({ldbyp3_fpld,spubyp3_trap,misc_data3[6:0]}),
+        .en     (ldbyp3_vld_en),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+dffre_s #(5)  ldbyp3_err_ff (
+  	.din   	({tte_data_perror_unc_w2,atm_ld_w_uerr,
+		asi_tte_data_perror_w2,asi_tte_tag_perror_w2,ifu_lsu_asi_rd_unc}),
+	.q	({cam_perr_unc3,pend_atm_ld_ue[3],asi_data_perr3,asi_tag_perr3,
+		ifu_unc_err3}),
+        .rst    (ldbyp3_vld_rst), .en     (ldbyp3_vld_en & ~spu_trap3 & ~lmq_byp_ldxa_sel3[1]), //bug6525 fix2
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+//assign  ldbyp3_unc_err = ldbyp3_unc_err_q & ifu_lsu_nceen[3] ;
+
+//assign  ld_any_byp_data_vld = 
+//  ldbyp0_vld | ldbyp1_vld | ldbyp2_vld | ldbyp3_vld ;
+
+dff_s #(4)   stgm_sqshcmplt (
+        .din    (squash_byp_cmplt[3:0]),
+        .q      (squash_byp_cmplt_m[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(4)  stgg_sqshcmplt (
+        .din    (squash_byp_cmplt_m[3:0]),
+        .q      (squash_byp_cmplt_g[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign  fpld_byp_data_vld = 
+  (ld_thrd_byp_sel_g[0] & ldbyp0_fpld & ~squash_byp_cmplt_g[0]) | // Bug 4998
+  (ld_thrd_byp_sel_g[1] & ldbyp1_fpld & ~squash_byp_cmplt_g[1]) |
+  (ld_thrd_byp_sel_g[2] & ldbyp2_fpld & ~squash_byp_cmplt_g[2]) |
+  (ld_thrd_byp_sel_g[3] & ldbyp3_fpld & ~squash_byp_cmplt_g[3]) ;
+
+//assign  intld_byp_data_vld = |intld_byp_cmplt[3:0] ;
+// squash for spu-trap situation.
+assign  intld_byp_data_vld_e = 
+	//(intld_byp_cmplt[0] & ~spubyp0_trap) |
+	(intld_byp_cmplt[0]) | // squash now thru squash_byp_cmplt
+	(intld_byp_cmplt[1]) |
+	(intld_byp_cmplt[2]) |
+	(intld_byp_cmplt[3]) ;
+
+dff_s   stgm_ibvld (
+        .din    (intld_byp_data_vld_e),
+        .q      (intld_byp_data_vld_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+// to be removed - intld_byp_data_vld in lsu_mon.v
+/*
+dff_s   stgg_ibvld (
+        .din    (intld_byp_data_vld_m),
+        .q      (intld_byp_data_vld),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+*/
+assign	spubyp_trap_active_e =
+	//(intld_byp_cmplt[0] & spubyp0_trap) | // Bug 4040
+	(ld_thrd_byp_sel_e[0] & spubyp0_trap) |
+	(ld_thrd_byp_sel_e[1] & spubyp1_trap) |
+	(ld_thrd_byp_sel_e[2] & spubyp2_trap) |
+	(ld_thrd_byp_sel_e[3] & spubyp3_trap) ;
+
+dff_s   stgm_strmtrp (
+        .din    (spubyp_trap_active_e),
+        .q      (spubyp_trap_active_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s   stgg_strmtrp (
+        .din    (spubyp_trap_active_m),
+        .q      (spubyp_trap_active_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign	spubyp0_ttype[6:0] = misc_data0[6:0] ;
+assign	spubyp1_ttype[6:0] = misc_data1[6:0] ;
+assign	spubyp2_ttype[6:0] = misc_data2[6:0] ;
+assign	spubyp3_ttype[6:0] = misc_data3[6:0] ;
+
+mux4ds #(7) mux_spubyp_ttype (
+        .in0(spubyp0_ttype[6:0]),
+        .in1(spubyp1_ttype[6:0]),
+        .in2(spubyp2_ttype[6:0]),
+        .in3(spubyp3_ttype[6:0]),
+        .sel0(ld_thrd_byp_mxsel_m[0]),
+        .sel1(ld_thrd_byp_mxsel_m[1]),
+        .sel2(ld_thrd_byp_mxsel_m[2]),
+        .sel3(ld_thrd_byp_mxsel_m[3]),
+        .dout(spubyp_ttype[6:0])
+);               
+              
+assign  intld_byp_cmplt[0] = (ld_thrd_byp_sel_e[0] & ~(ldbyp0_fpld | squash_byp_cmplt[0])) ;
+assign  intld_byp_cmplt[1] = (ld_thrd_byp_sel_e[1] & ~(ldbyp1_fpld | squash_byp_cmplt[1])) ;
+assign  intld_byp_cmplt[2] = (ld_thrd_byp_sel_e[2] & ~(ldbyp2_fpld | squash_byp_cmplt[2])) ;
+assign  intld_byp_cmplt[3] = (ld_thrd_byp_sel_e[3] & ~(ldbyp3_fpld | squash_byp_cmplt[3])) ;
+
+dff_s #(2)  stgm_l2fv (
+        .din    ({l2fill_vld_e,lsu_l2fill_fpld_e}),
+        .q      ({l2fill_vld_m,l2fill_fpld_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2) stgg_l2fv (
+        .din    ({l2fill_vld_m,l2fill_fpld_m}),
+        .q      ({l2fill_vld_g,l2fill_fpld_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+// write to irf will need to be postphoned by a few cycles. 
+// may wish to find more bubbles by counting misses !!!
+//assign  lsu_irf_byp_data_src[0]  =      ld_inst_vld_unflushed ;
+//assign  lsu_irf_byp_data_src[1]  =    l2fill_vld_g ;
+//assign  lsu_irf_byp_data_src[2]  =    
+//  ~l2fill_vld_g    &      // no dfq fill
+//  ~ld_inst_vld_unflushed ;  // no ld/st in pipe.
+
+  //~(ld_inst_vld_unflushed | st_inst_vld_unflushed) ;  // no ld/st in pipe.
+   // Timing Change.
+   //ld_any_byp_data_vld ;      // full raw bypasses data
+
+
+// Store to load full raw bypassing. Plus ldxa data bypassing.
+// ldxa-data may be bypassed asap if port available.
+// ldxa/stb raw and atomics assumed to be mutually exclusive.
+
+wire int_ldxa_vld ;
+assign int_ldxa_vld = tlu_lsu_int_ldxa_vld_w2 & ~tlu_lsu_int_ld_ill_va_w2 ;
+assign	lmq_byp_data_fmx_sel[0] = int_ldxa_vld & thread0_w2 ;
+assign	lmq_byp_data_fmx_sel[1] = int_ldxa_vld & thread1_w2 ;
+assign	lmq_byp_data_fmx_sel[2] = int_ldxa_vld & thread2_w2 ;
+assign	lmq_byp_data_fmx_sel[3] = int_ldxa_vld & thread3_w2 ;
+
+assign lmq_byp_data_en_w2[0] =  (|lmq_byp_data_sel0[3:0]) | lmq_byp_data_fmx_sel[0] ;
+assign lmq_byp_data_en_w2[1] =  (|lmq_byp_data_sel1[3:0]) | lmq_byp_data_fmx_sel[1] ;
+assign lmq_byp_data_en_w2[2] =  (|lmq_byp_data_sel2[3:0]) | lmq_byp_data_fmx_sel[2] ;
+assign lmq_byp_data_en_w2[3] =  (|lmq_byp_data_sel3[3:0]) | lmq_byp_data_fmx_sel[3] ;
+
+/*
+assign  stq_pkt2_data_en[0] = 
+  st_inst_vld_g & ldst_dbl_g & quad_asi_g & thread0_g ;
+assign  stq_pkt2_data_en[1] = 
+  st_inst_vld_g & ldst_dbl_g & quad_asi_g & thread1_g ;
+assign  stq_pkt2_data_en[2] = 
+  st_inst_vld_g & ldst_dbl_g & quad_asi_g & thread2_g ;
+assign  stq_pkt2_data_en[3] = 
+  st_inst_vld_g & ldst_dbl_g & quad_asi_g & thread3_g ;
+*/
+   
+// casxa to be decoded as doubleword.
+// casa to be decoded as word.
+// ldstuba to be decoded as byte.
+// casa, casxa and ldstuba needed to be decoded as alternate space insts with optional
+// imm_asi use.
+// An atomic will switch out a thread.
+
+
+wire  ifu_ldxa_vld,  spu_ldxa_vld ;
+assign  ifu_ldxa_vld = ifu_lsu_ldxa_data_vld_w2 & ~ifu_lsu_ldxa_illgl_va_w2 ;
+//assign  tlu_ldxa_vld = tlu_lsu_ldxa_data_vld_w2 & ~tlu_lsu_ldxa_illgl_va_w2 ;
+assign  spu_ldxa_vld = spu_lsu_ldxa_data_vld_w2 & ~spu_lsu_ldxa_illgl_va_w2 ; 
+
+wire int_ldxa_ivld ;
+assign int_ldxa_ivld = tlu_lsu_int_ldxa_vld_w2 & tlu_lsu_int_ld_ill_va_w2 ;
+// ldxa data returns need to cmplt thread without writing to register file
+assign  ldxa_illgl_va_cmplt[0] =
+  ((ifu_lsu_ldxa_data_vld_w2 & ifu_lsu_ldxa_illgl_va_w2) & ifu_ldxa_thread0_w2) |
+  //((tlu_lsu_ldxa_data_vld_w2 & tlu_lsu_ldxa_illgl_va_w2) & tlu_ldxa_thread0_w2) |
+  ((spu_lsu_ldxa_data_vld_w2 & spu_lsu_ldxa_illgl_va_w2) & spu_ldxa_thread0_w2) |
+  (int_ldxa_ivld & thread0_w2) |
+  lsu_asi_illgl_va_cmplt_w2[0] ; 
+assign  ldxa_illgl_va_cmplt[1] =
+  ((ifu_lsu_ldxa_data_vld_w2 & ifu_lsu_ldxa_illgl_va_w2) & ifu_ldxa_thread1_w2) |
+  //((tlu_lsu_ldxa_data_vld_w2 & tlu_lsu_ldxa_illgl_va_w2) & tlu_ldxa_thread1_w2) |
+  ((spu_lsu_ldxa_data_vld_w2 & spu_lsu_ldxa_illgl_va_w2) & spu_ldxa_thread1_w2) |
+  (int_ldxa_ivld & thread1_w2) |
+  lsu_asi_illgl_va_cmplt_w2[1] ; 
+assign  ldxa_illgl_va_cmplt[2] =
+  ((ifu_lsu_ldxa_data_vld_w2 & ifu_lsu_ldxa_illgl_va_w2) & ifu_ldxa_thread2_w2) |
+  //((tlu_lsu_ldxa_data_vld_w2 & tlu_lsu_ldxa_illgl_va_w2) & tlu_ldxa_thread2_w2) |
+  ((spu_lsu_ldxa_data_vld_w2 & spu_lsu_ldxa_illgl_va_w2) & spu_ldxa_thread2_w2) |
+  (int_ldxa_ivld & thread2_w2) |
+  lsu_asi_illgl_va_cmplt_w2[2] ; 
+assign  ldxa_illgl_va_cmplt[3] =
+  ((ifu_lsu_ldxa_data_vld_w2 & ifu_lsu_ldxa_illgl_va_w2) & ifu_ldxa_thread3_w2) |
+  //((tlu_lsu_ldxa_data_vld_w2 & tlu_lsu_ldxa_illgl_va_w2) & tlu_ldxa_thread3_w2) |
+  ((spu_lsu_ldxa_data_vld_w2 & spu_lsu_ldxa_illgl_va_w2) & spu_ldxa_thread3_w2) |
+  (int_ldxa_ivld & thread3_w2) |
+  lsu_asi_illgl_va_cmplt_w2[3] ; 
+
+dff_s #(4)  illglva_cmplt_d1 (
+        .din    (ldxa_illgl_va_cmplt[3:0]),
+        .q      (ldxa_illgl_va_cmplt_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+// Thread0
+// Should be able to remove thread qualification for full-raw.
+// Could have and e stage store and w2 stage stb rd in same cycle !!! Qualify select3
+// with select0 to give the earlier event priority. 
+assign  lmq_byp_ldxa_sel0[0] = ifu_ldxa_vld & ifu_ldxa_thread0_w2 ; 
+//assign  lmq_byp_ldxa_sel0[1] = tlu_ldxa_vld & tlu_ldxa_thread0_w2 ; 
+assign  lmq_byp_ldxa_sel0[1] = spu_ldxa_vld & spu_ldxa_thread0_w2 ; 
+assign  lmq_byp_ldxa_sel0[2] = (lsu_asi_rd_en_w2 & thread0_w2) | ldxa_tlbrd0_w3 ;
+
+wire	fraw_annul0,fraw_annul1,fraw_annul2,fraw_annul3 ;
+wire	ldst_miss0,ldst_miss1,ldst_miss2,ldst_miss3 ;
+
+//RAW read STB at W3 (not W2)
+//   E M W        W2 W3                      w4
+//LD     cam_hit     RD STB, flop in byp FFs
+//inst+1 D        E  
+//inst+2          D  E                            <= squash (stxa) rs3_e to write into byp FFs
+//  
+assign	fraw_annul0 = ld_stb_full_raw_w3 & thread0_w3 & ld_inst_vld_w3;
+assign	fraw_annul1 = ld_stb_full_raw_w3 & thread1_w3 & ld_inst_vld_w3;
+assign	fraw_annul2 = ld_stb_full_raw_w3 & thread2_w3 & ld_inst_vld_w3;
+assign	fraw_annul3 = ld_stb_full_raw_w3 & thread3_w3 & ld_inst_vld_w3;
+
+assign	ldst_miss0 = lsu_ldst_miss_w2 & thread0_w2 ;
+assign	ldst_miss1 = lsu_ldst_miss_w2 & thread1_w2 ;
+assign	ldst_miss2 = lsu_ldst_miss_w2 & thread2_w2 ;
+assign	ldst_miss3 = lsu_ldst_miss_w2 & thread3_w2 ;
+
+wire	fraw_annul0_d1,fraw_annul1_d1,fraw_annul2_d1,fraw_annul3_d1 ;
+wire	ldst_miss0_d1,ldst_miss1_d1,ldst_miss2_d1,ldst_miss3_d1 ;
+
+dff_s #(4)  fraw_d1 (
+        .din    ({fraw_annul3,fraw_annul2,fraw_annul1,fraw_annul0}),
+        .q      ({fraw_annul3_d1,fraw_annul2_d1,fraw_annul1_d1,fraw_annul0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(4)  ldstm_d1 (
+        .din    ({ldst_miss3,ldst_miss2,ldst_miss1,ldst_miss0}),
+        .q      ({ldst_miss3_d1,ldst_miss2_d1,ldst_miss1_d1,ldst_miss0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+//wire	memref_d ;
+//assign	memref_d = ifu_lsu_memref_d ;
+/*wire	mref_vld0,mref_vld1,mref_vld2,mref_vld3;
+wire	mref_vld0_d1,mref_vld1_d1,mref_vld2_d1,mref_vld3_d1;
+
+// Bug 3053 - prevent overwrite of ldxa data with subsequent st-data
+assign	mref_vld0 = (memref_d | memref_e) & ~(lsu_ldst_miss_w2 & thread0_w2) ;
+assign	mref_vld1 = (memref_d | memref_e) & ~(lsu_ldst_miss_w2 & thread1_w2) ;
+assign	mref_vld2 = (memref_d | memref_e) & ~(lsu_ldst_miss_w2 & thread2_w2) ;
+assign	mref_vld3 = (memref_d | memref_e) & ~(lsu_ldst_miss_w2 & thread3_w2) ;
+
+dff_s #(4)  mrefv_d1 (
+        .din    ({mref_vld3,mref_vld2,mref_vld1,mref_vld0}),
+        .q      ({mref_vld3_d1,mref_vld2_d1,mref_vld1_d1,mref_vld0_d1}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  */
+
+//RAW timing change   
+assign  lmq_byp_data_sel0[0] = ld_stb_full_raw_w3 & ~(ldd_force_l2access_w3 | atomic_w3 | dtlb_perror_en_w3)  & thread0_w3 & ld_inst_vld_w3 ;  
+//assign  lmq_byp_data_sel0[1] = st_inst_vld_e & thread0_e & ~ifu_lsu_casa_e & ~fraw_annul0 ;
+// Timing fix - at most ld will also update the bypass buffer also.
+//assign  lmq_byp_data_sel0[1] = memref_e & thread0_e & ~ifu_lsu_casa_e & ~fraw_annul0 ; //bug3009
+assign  lmq_byp_data_sel0[1] =  ~lmq_byp_data_sel0[0] & memref_e & thread0_e & ~ifu_lsu_casa_e & 
+			~(fraw_annul0 | fraw_annul0_d1 | ldst_miss0 | ldst_miss0_d1); // Bug 3053,3180
+//assign  lmq_byp_data_sel0[1] = mref_vld0_d1 & thread0_e & ~ifu_lsu_casa_e & ~(fraw_annul0 | fraw_annul0_d1); // Bug 3053
+//assign  lmq_byp_data_sel0[1] = memref_e & thread0_e & ~ifu_lsu_casa_e & ~(fraw_annul0 | fraw_annul0_d1);
+assign  lmq_byp_data_sel0[2] = ~(|lmq_byp_data_sel0[1:0]) & casa_g & thread0_g & lsu_inst_vld_w & ~fraw_annul0_d1 ;
+assign  lmq_byp_data_sel0[3] = |lmq_byp_ldxa_sel0[2:0];
+//assign  lmq_byp_data_sel0[3] = |lmq_byp_ldxa_sel0[3:0];
+   
+// Thread1
+assign  lmq_byp_ldxa_sel1[0] = ifu_ldxa_vld & ifu_ldxa_thread1_w2 ; 
+//assign  lmq_byp_ldxa_sel1[1] = tlu_ldxa_vld & tlu_ldxa_thread1_w2 ; 
+assign  lmq_byp_ldxa_sel1[1] = spu_ldxa_vld & spu_ldxa_thread1_w2 ; 
+assign  lmq_byp_ldxa_sel1[2] = (lsu_asi_rd_en_w2 & thread1_w2) | ldxa_tlbrd1_w3 ;
+
+assign  lmq_byp_data_sel1[0] = ld_stb_full_raw_w3 & ~(ldd_force_l2access_w3 | atomic_w3 | dtlb_perror_en_w3) & ld_inst_vld_w3 & thread1_w3 ;   
+assign  lmq_byp_data_sel1[1] = ~lmq_byp_data_sel1[0] & memref_e & thread1_e & ~ifu_lsu_casa_e & 
+			~(fraw_annul1 | fraw_annul1_d1 | ldst_miss1 | ldst_miss1_d1); // Bug 3053,3180
+//assign  lmq_byp_data_sel1[1] = memref_e & thread1_e & ~ifu_lsu_casa_e & ~fraw_annul1; // bug3009
+//assign  lmq_byp_data_sel1[1] = mref_vld1_d1 & thread1_e & ~ifu_lsu_casa_e & ~(fraw_annul1 | fraw_annul1_d1);
+//assign  lmq_byp_data_sel1[1] = memref_e & thread1_e & ~ifu_lsu_casa_e & ~(fraw_annul1 | fraw_annul1_d1); // Bug 3053
+assign  lmq_byp_data_sel1[2] =  ~(|lmq_byp_data_sel1[1:0]) & casa_g & thread1_g & lsu_inst_vld_w & ~fraw_annul1_d1 ;
+assign  lmq_byp_data_sel1[3] = |lmq_byp_ldxa_sel1[2:0];
+
+// Thread2
+assign  lmq_byp_ldxa_sel2[0] = ifu_ldxa_vld & ifu_ldxa_thread2_w2 ; 
+//assign  lmq_byp_ldxa_sel2[1] = tlu_ldxa_vld & tlu_ldxa_thread2_w2 ; 
+assign  lmq_byp_ldxa_sel2[1] = spu_ldxa_vld & spu_ldxa_thread2_w2 ; 
+assign  lmq_byp_ldxa_sel2[2] = (lsu_asi_rd_en_w2 & thread2_w2) | ldxa_tlbrd2_w3 ;
+
+assign  lmq_byp_data_sel2[0] = ld_stb_full_raw_w3 & ~(ldd_force_l2access_w3 | atomic_w3 | dtlb_perror_en_w3) & ld_inst_vld_w3 & thread2_w3 ;   
+//assign  lmq_byp_data_sel2[1] = memref_e & thread2_e & ~ifu_lsu_casa_e & ~fraw_annul2; // bug3009
+assign  lmq_byp_data_sel2[1] = ~lmq_byp_data_sel2[0] & memref_e & thread2_e & ~ifu_lsu_casa_e & 
+			~(fraw_annul2 | fraw_annul2_d1 | ldst_miss2 | ldst_miss2_d1); // Bug 3053,3180
+//assign  lmq_byp_data_sel2[1] = memref_e & thread2_e & ~ifu_lsu_casa_e & ~(fraw_annul2 | fraw_annul2_d1); // Bug 3053
+assign  lmq_byp_data_sel2[2] =  ~(|lmq_byp_data_sel2[1:0]) & casa_g & thread2_g & lsu_inst_vld_w & ~fraw_annul2_d1 ;
+assign  lmq_byp_data_sel2[3] = |lmq_byp_ldxa_sel2[2:0];
+
+// Thread3
+assign  lmq_byp_ldxa_sel3[0] = ifu_ldxa_vld & ifu_ldxa_thread3_w2 ; 
+//assign  lmq_byp_ldxa_sel3[1] = tlu_ldxa_vld & tlu_ldxa_thread3_w2 ; 
+assign  lmq_byp_ldxa_sel3[1] = spu_ldxa_vld & spu_ldxa_thread3_w2 ; 
+assign  lmq_byp_ldxa_sel3[2] =  (lsu_asi_rd_en_w2 & thread3_w2) | ldxa_tlbrd3_w3 ;
+
+assign  lmq_byp_data_sel3[0] = ld_stb_full_raw_w3 & ~(ldd_force_l2access_w3 | atomic_w3 | dtlb_perror_en_w3) & ld_inst_vld_w3 & thread3_w3 ;   
+assign  lmq_byp_data_sel3[1] = ~lmq_byp_data_sel3[0] & memref_e & thread3_e & ~ifu_lsu_casa_e & 
+			~(fraw_annul3 | fraw_annul3_d1 | ldst_miss3 | ldst_miss3_d1); // Bug 3053,3180
+//assign  lmq_byp_data_sel3[1] = memref_e & thread3_e & ~ifu_lsu_casa_e & ~(fraw_annul3 | fraw_annul3_d1); // Bug 3053
+assign  lmq_byp_data_sel3[2] = ~(|lmq_byp_data_sel3[1:0]) & casa_g & thread3_g & lsu_inst_vld_w & ~fraw_annul3_d1 ;
+assign  lmq_byp_data_sel3[3] = |lmq_byp_ldxa_sel3[2:0];
+
+
+dff_s #(4)  ff_lmq_byp_data_raw_sel_d1 (
+        .din    ({lmq_byp_data_sel3[0], lmq_byp_data_sel2[0],
+                  lmq_byp_data_sel1[0], lmq_byp_data_sel0[0]}),
+        .q      (lmq_byp_data_raw_sel_d1[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(4)  ff_lmq_byp_data_raw_sel_d2 (
+        .din    (lmq_byp_data_raw_sel_d1[3:0]),
+        .q      (lmq_byp_data_raw_sel_d2[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+   
+wire 		lsu_irf_raw_byp_e;   
+// Includes both ldxa and raw bypass. 
+assign  lsu_irf_raw_byp_e  =    
+  ~l2fill_vld_e    &      // no dfq fill
+  ~(memref_e) ; // no ld/st in pipe. 
+  //~(ld_inst_vld_e | st_inst_vld_e) ; // no ld/st in pipe. 
+
+// bug 5379 plus misc (randomize selection to prevent deadlock.
+wire [3:0] bypass_sel ;
+assign	bypass_sel[0] = lsu_dcache_rand[0] ? 
+	ldbyp0_vld : (ldbyp0_vld & ~(ldbyp3_vld | ldbyp2_vld | ldbyp1_vld)) ; 
+assign	bypass_sel[1] = lsu_dcache_rand[0] ? 
+	(ldbyp1_vld & ~ldbyp0_vld) : (ldbyp1_vld & ~(ldbyp3_vld | ldbyp2_vld)) ; 
+assign	bypass_sel[2] = lsu_dcache_rand[0] ? 
+	(ldbyp2_vld & ~(ldbyp0_vld | ldbyp1_vld)) : (ldbyp2_vld & ~ldbyp3_vld) ; 
+assign	bypass_sel[3] = lsu_dcache_rand[0] ? 
+	(ldbyp3_vld & ~(ldbyp0_vld | ldbyp1_vld | ldbyp2_vld)) : ldbyp3_vld ; 
+  
+assign ld_thrd_byp_sel_e[0] = bypass_sel[0] & lsu_irf_raw_byp_e ;
+assign ld_thrd_byp_sel_e[1] = bypass_sel[1] & lsu_irf_raw_byp_e ;
+assign ld_thrd_byp_sel_e[2] = bypass_sel[2] & lsu_irf_raw_byp_e ;
+assign ld_thrd_byp_sel_e[3] = bypass_sel[3] & lsu_irf_raw_byp_e ;
+
+/*assign ld_thrd_byp_sel_e[0] = ldbyp0_vld & lsu_irf_raw_byp_e ;
+assign ld_thrd_byp_sel_e[1] = ldbyp1_vld & lsu_irf_raw_byp_e &
+      ~ldbyp0_vld ;                                     
+assign ld_thrd_byp_sel_e[2] = ldbyp2_vld & lsu_irf_raw_byp_e &
+      ~(ldbyp0_vld | ldbyp1_vld);                       
+assign ld_thrd_byp_sel_e[3] = ldbyp3_vld & lsu_irf_raw_byp_e &
+      ~(ldbyp0_vld | ldbyp1_vld | ldbyp2_vld) ; */
+
+   
+   //assign lsu_ld_thrd_byp_sel_e[2:0] = ld_thrd_byp_sel_e[2:0];
+    bw_u1_buf_30x UZsize_lsu_ld_thrd_byp_sel_e_b2 (.a(ld_thrd_byp_sel_e[2]), .z(lsu_ld_thrd_byp_sel_e[2]));  
+    bw_u1_buf_30x UZsize_lsu_ld_thrd_byp_sel_e_b1 (.a(ld_thrd_byp_sel_e[1]), .z(lsu_ld_thrd_byp_sel_e[1]));  
+    bw_u1_buf_30x UZsize_lsu_ld_thrd_byp_sel_e_b0 (.a(ld_thrd_byp_sel_e[0]), .z(lsu_ld_thrd_byp_sel_e[0]));  
+   
+dff_s #(4)  tbyp_stgd1 (
+        .din    (ld_thrd_byp_sel_e[3:0]),
+        .q      (ld_thrd_byp_sel_m[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+//assign ld_thrd_byp_mxsel_m[2:0]  =    ld_thrd_byp_sel_m[2:0];
+//assign ld_thrd_byp_mxsel_m[3]    =  ~|ld_thrd_byp_sel_m[2:0];
+
+assign ld_thrd_byp_mxsel_m[0]  =    ld_thrd_byp_sel_m[0] & ~rst_tri_en;
+assign ld_thrd_byp_mxsel_m[1]  =    ld_thrd_byp_sel_m[1] & ~rst_tri_en;
+assign ld_thrd_byp_mxsel_m[2]  =    ld_thrd_byp_sel_m[2] & ~rst_tri_en;
+assign ld_thrd_byp_mxsel_m[3]  =    (~|ld_thrd_byp_sel_m[2:0]) |  rst_tri_en;
+   
+dff_s #(4)  tbyp_stgd2 (
+        .din    (ld_thrd_byp_sel_m[3:0]),
+        .q      (ld_thrd_byp_sel_g[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+  //should move to M stage 
+   
+//assign ld_thrd_byp_mxsel_g[2:0]  =    ld_thrd_byp_sel_g[2:0];
+//assign ld_thrd_byp_mxsel_g[3]    =  ~|ld_thrd_byp_sel_g[2:0];
+
+assign  lmq_byp_ldxa_mxsel0[1:0] =   lmq_byp_ldxa_sel0[1:0];
+assign  lmq_byp_ldxa_mxsel0[2]   = ~|lmq_byp_ldxa_sel0[1:0];
+assign  lmq_byp_ldxa_mxsel1[1:0] =   lmq_byp_ldxa_sel1[1:0];
+assign  lmq_byp_ldxa_mxsel1[2]   = ~|lmq_byp_ldxa_sel1[1:0];
+assign  lmq_byp_ldxa_mxsel2[1:0] =   lmq_byp_ldxa_sel2[1:0];
+assign  lmq_byp_ldxa_mxsel2[2]   = ~|lmq_byp_ldxa_sel2[1:0];
+assign  lmq_byp_ldxa_mxsel3[1:0] =   lmq_byp_ldxa_sel3[1:0];
+assign  lmq_byp_ldxa_mxsel3[2]   = ~|lmq_byp_ldxa_sel3[1:0];
+
+assign  lmq_byp_data_mxsel0[0] =   lmq_byp_data_sel0[0] & ~rst_tri_en |  sehold;
+assign  lmq_byp_data_mxsel0[1] =   lmq_byp_data_sel0[1] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel0[2] =   lmq_byp_data_sel0[2] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel0[3]   = (~|lmq_byp_data_sel0[2:0] | rst_tri_en) & ~sehold;
+
+assign  lmq_byp_data_mxsel1[0] =   lmq_byp_data_sel1[0] & ~rst_tri_en |  sehold;
+assign  lmq_byp_data_mxsel1[1] =   lmq_byp_data_sel1[1] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel1[2] =   lmq_byp_data_sel1[2] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel1[3]   = (~|lmq_byp_data_sel1[2:0] | rst_tri_en) & ~sehold;
+
+assign  lmq_byp_data_mxsel2[0] =   lmq_byp_data_sel2[0] & ~rst_tri_en |  sehold;
+assign  lmq_byp_data_mxsel2[1] =   lmq_byp_data_sel2[1] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel2[2] =   lmq_byp_data_sel2[2] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel2[3]   = (~|lmq_byp_data_sel2[2:0] | rst_tri_en) & ~sehold;
+
+assign  lmq_byp_data_mxsel3[0] =   lmq_byp_data_sel3[0] & ~rst_tri_en |  sehold;
+assign  lmq_byp_data_mxsel3[1] =   lmq_byp_data_sel3[1] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel3[2] =   lmq_byp_data_sel3[2] & ~rst_tri_en & ~sehold;
+assign  lmq_byp_data_mxsel3[3]   = (~|lmq_byp_data_sel3[2:0] | rst_tri_en) & ~sehold;
+
+//=========================================================================================
+//	Error Based Traps/Reporting
+//
+//=========================================================================================
+
+// !!! ORIGINAL ABOVE !!!
+// Error Table for Queue
+// ** In all cases; squash writes to irf.
+//				| Error Reporting	| Trap ?	| 
+// ifu_lsu_asi_rd_unc		| NA;done by ifu	| daccess-error	|
+// tte_data_perror_unc_w2	| sync;in pipe		| daccess-error	|
+// tte_data_perror_corr_w2	| sync;in pipe		| dmmu-miss	|
+// asi_tte_data_perror_w2	| async;out of Q	| daccess-error	|
+// asi_tte_tag_perror_w2	| async;out of Q	| daccess-error	|
+
+assign	squash_byp_cmplt[0] = 
+	((cam_perr_unc0  |  		
+	asi_data_perr0 |  		
+	asi_tag_perr0  |  		
+	ifu_unc_err0   ) & lsu_nceen_d1[0]) |
+	pend_atm_ld_ue[0] |
+	spubyp0_trap ; // Bug 3873. add spu trap squash. (change reverted).
+assign	squash_byp_cmplt[1] = 
+	((cam_perr_unc1 | asi_data_perr1 | asi_tag_perr1 | ifu_unc_err1) & lsu_nceen_d1[1]) | 
+	pend_atm_ld_ue[1] | spubyp1_trap ;	
+assign	squash_byp_cmplt[2] = 
+	((cam_perr_unc2 | asi_data_perr2 | asi_tag_perr2 | ifu_unc_err2) & lsu_nceen_d1[2]) | 
+	pend_atm_ld_ue[2] | spubyp2_trap ;	
+assign	squash_byp_cmplt[3] = 
+	((cam_perr_unc3 | asi_data_perr3 | asi_tag_perr3 | ifu_unc_err3) & lsu_nceen_d1[3]) | 
+	pend_atm_ld_ue[3] | spubyp3_trap ;	
+
+assign  cam_perr_unc_e = 
+  (ld_thrd_byp_sel_e[0] & cam_perr_unc0) |
+  (ld_thrd_byp_sel_e[1] & cam_perr_unc1) |
+  (ld_thrd_byp_sel_e[2] & cam_perr_unc2) |
+  (ld_thrd_byp_sel_e[3] & cam_perr_unc3) ;
+assign  asi_data_perr_e = 
+  (ld_thrd_byp_sel_e[0] & asi_data_perr0) |
+  (ld_thrd_byp_sel_e[1] & asi_data_perr1) |
+  (ld_thrd_byp_sel_e[2] & asi_data_perr2) |
+  (ld_thrd_byp_sel_e[3] & asi_data_perr3) ;
+assign  asi_tag_perr_e = 
+  (ld_thrd_byp_sel_e[0] & asi_tag_perr0) |
+  (ld_thrd_byp_sel_e[1] & asi_tag_perr1) |
+  (ld_thrd_byp_sel_e[2] & asi_tag_perr2) |
+  (ld_thrd_byp_sel_e[3] & asi_tag_perr3) ;
+assign  ifu_unc_err_e = 
+  (ld_thrd_byp_sel_e[0] & ifu_unc_err0) |
+  (ld_thrd_byp_sel_e[1] & ifu_unc_err1) |
+  (ld_thrd_byp_sel_e[2] & ifu_unc_err2) |
+  (ld_thrd_byp_sel_e[3] & ifu_unc_err3) ;
+wire atm_st_unc_err_e,atm_st_unc_err_m,atm_st_unc_err_g ;
+assign	atm_st_unc_err_e = 
+(atm_st_cmplt0 & pend_atm_ld_ue[0]) | 
+(atm_st_cmplt1 & pend_atm_ld_ue[1]) | 
+(atm_st_cmplt2 & pend_atm_ld_ue[2]) | 
+(atm_st_cmplt3 & pend_atm_ld_ue[3]) ; 
+
+dff_s #(5)  stgm_tlberr (
+        .din    ({cam_perr_unc_e,asi_data_perr_e,
+		asi_tag_perr_e,ifu_unc_err_e,atm_st_unc_err_e}),
+        .q      ({cam_perr_unc_m,asi_data_perr_m,
+		asi_tag_perr_m,ifu_unc_err_m,atm_st_unc_err_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+
+dff_s #(5)  stgg_tlberr (
+        .din    ({cam_perr_unc_m,asi_data_perr_m,
+		asi_tag_perr_m,ifu_unc_err_m,atm_st_unc_err_m}),
+        .q      ({cam_perr_unc_g,asi_data_perr_g,
+		asi_tag_perr_g,ifu_unc_err_g,atm_st_unc_err_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign	lsu_tlb_asi_data_perr_g = asi_data_perr_g ;
+assign	lsu_tlb_asi_tag_perr_g = asi_tag_perr_g ;
+
+// Asynchronous Trap Reporting to TLU (Traps are still precise).
+// This version of nceen is meant specifically for trap reporting
+// out of the asi queue.
+wire nceen_m, nceen_g ;
+assign nceen_m =
+	(ld_thrd_byp_sel_m[0] & lsu_nceen_d1[0]) |
+	(ld_thrd_byp_sel_m[1] & lsu_nceen_d1[1]) |
+	(ld_thrd_byp_sel_m[2] & lsu_nceen_d1[2]) |
+	(ld_thrd_byp_sel_m[3] & lsu_nceen_d1[3]) ;
+
+wire nceen_dfq_m,nceen_dfq_g ;
+
+// This version is meant specifically for lds reporting traps
+// from the dfq.
+assign	nceen_dfq_m =
+	((~dfq_tid_m[1] & ~dfq_tid_m[0]) & lsu_nceen_d1[0]) |
+	((~dfq_tid_m[1] &  dfq_tid_m[0]) & lsu_nceen_d1[1]) |
+	(( dfq_tid_m[1] & ~dfq_tid_m[0]) & lsu_nceen_d1[2]) |
+	(( dfq_tid_m[1] &  dfq_tid_m[0]) & lsu_nceen_d1[3]) ;
+
+dff_s #(2)  trpen_stg (
+        .din    ({nceen_m,nceen_dfq_m}),
+        .q    	({nceen_g,nceen_dfq_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+
+// l2c/dram
+wire	atm_ld_w_uerr_m ;
+dff_s #(1)  atmldu_stm (
+        .din    (atm_ld_w_uerr),
+        .q    	(atm_ld_w_uerr_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire	pmem_unc_error_m,pmem_unc_error_g ;
+assign	pmem_unc_error_m = 
+	l2_unc_error_m &  // bug3666
+	~atm_ld_w_uerr_m ; //bug4048 - squash for atm ld with error.
+
+wire	pmem_unc_error_tmp ;
+dff_s #(1)  pmem_stg (
+        .din    (pmem_unc_error_m),
+        .q    	(pmem_unc_error_tmp),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign	pmem_unc_error_g = 
+	(pmem_unc_error_tmp | bld_unc_err_pend_g) & ~bld_squash_err_g ;
+
+wire	async_ttype_vld_g ;
+wire [6:0] async_ttype_g ;
+wire [1:0] async_tid_g ;
+
+//wire	st_dtlb_perr_en ;
+//assign	st_dtlb_perr_en = st_inst_vld_unflushed & tte_data_perror_unc & nceen_pipe_g ;
+
+// traps are not to be taken if enables are not set. The asi rds of the tlb must
+// thus complete as usual.
+assign	async_ttype_vld_g =
+	(((cam_perr_unc_g | asi_data_perr_g | asi_tag_perr_g | ifu_unc_err_g) & nceen_g) | 
+		(pmem_unc_error_g & nceen_dfq_g)) | // Bug 3335,3518
+	atm_st_unc_err_g |	// Bug 4048
+	//lsu_defr_trp_taken_g |
+	//st_dtlb_perr_en |
+	//cam_perr_corr_g |
+	spubyp_trap_active_g ;
+
+wire [6:0]	async_ttype_m ;
+assign	async_ttype_m[6:0] =
+	spubyp_trap_active_m ? spubyp_ttype[6:0] : 7'h32 ;
+
+dff_s #(7)  attype_stg (
+        .din    (async_ttype_m[6:0]),
+        .q      (async_ttype_g[6:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire [1:0]	async_err_tid_e,async_err_tid_m,async_err_tid_g ;
+assign	async_err_tid_e[0] = ld_thrd_byp_sel_e[1] | ld_thrd_byp_sel_e[3] ;
+assign	async_err_tid_e[1] = ld_thrd_byp_sel_e[3] | ld_thrd_byp_sel_e[2] ;
+
+dff_s #(2)  ldbyperr_stgm (
+        .din    (async_err_tid_e[1:0]),
+        .q      (async_err_tid_m[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+dff_s #(2)  ldbyperr_stgg (
+        .din    (async_err_tid_m[1:0]),
+        .q      (async_err_tid_g[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+wire	sel_dfq_tid ;
+assign	sel_dfq_tid = pmem_unc_error_g | atm_st_unc_err_g ;
+assign	async_tid_g[1:0] = 
+	//lsu_defr_trp_taken_g ? thrid_g[1:0] : // Bug 4660 - remove.
+	sel_dfq_tid ? // Bug 3335,4048
+	dfq_tid_g[1:0] : async_err_tid_g[1:0] ;
+
+// Delay async_trp interface to TLU by a cycle.
+
+dff_s #(10)  asynctrp_stgw2 (
+        .din    ({async_ttype_vld_g,async_tid_g[1:0],async_ttype_g[6:0]}),
+        .q      ({lsu_tlu_async_ttype_vld_w2,lsu_tlu_async_tid_w2[1:0],
+		lsu_tlu_async_ttype_w2[6:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+// Asynchronous Error Reporting to IFU 
+// Partial.
+
+wire  sync_error_sel ;
+wire	memref_m ,memref_g;
+   
+dff_s #(1) memref_stgg (
+        .din    (memref_m),
+        .q    	(memref_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+//assign  sync_error_sel = tte_data_perror_unc | tte_data_perror_corr ;
+
+//for in1 or in2 to be selected, memref_g must be 0.
+//in1 is reported thru the bypass/asi queues, in2 thru the dfq.
+//So err_addr_sel[0] can be memref_g.
+   assign sync_error_sel = memref_g;
+   
+wire	async_error_sel ;
+assign	async_error_sel = asi_data_perr_g | asi_tag_perr_g ;
+
+assign	lsu_err_addr_sel[0] =  sync_error_sel & ~rst_tri_en;
+assign	lsu_err_addr_sel[1] =  async_error_sel & ~rst_tri_en;
+assign	lsu_err_addr_sel[2] = ~(sync_error_sel | async_error_sel) | rst_tri_en;
+
+//mux4ds  #(6) async_tlb_index_mx(
+//  .in0  (misc_data0[5:0]),
+//  .in1  (misc_data1[5:0]),
+//  .in2  (misc_data2[5:0]),
+//  .in3  (misc_data3[5:0]),
+//  .sel0 (ld_thrd_byp_sel_g[0]),
+//  .sel1 (ld_thrd_byp_sel_g[1]),
+//  .sel2 (ld_thrd_byp_sel_g[2]),
+//  .sel3 (ld_thrd_byp_sel_g[3]),
+//  .dout (async_tlb_index[5:0])
+//   );
+   
+assign async_tlb_index[5:0] =  
+  (ld_thrd_byp_sel_g[0] ? misc_data0[5:0] : 6'b0) |
+  (ld_thrd_byp_sel_g[1] ? misc_data1[5:0] : 6'b0) |
+  (ld_thrd_byp_sel_g[2] ? misc_data2[5:0] : 6'b0) |
+  (ld_thrd_byp_sel_g[3] ? misc_data3[5:0] : 6'b0) ;
+        
+wire	[1:0] err_tid_g ;
+//assign  err_tid_g[1:0] =
+//  sync_error_sel ? thrid_g[1:0] :
+//  	async_error_sel ? async_err_tid_g[1:0] : dfill_tid_g[1:0] ;
+
+mux3ds #(2) err_tid_mx (
+  .in0 (thrid_g[1:0]),
+  .in1 (async_err_tid_g[1:0]),
+  .in2 (dfill_tid_g[1:0]),
+  .sel0(lsu_err_addr_sel[0]),
+  .sel1(lsu_err_addr_sel[1]),
+  .sel2(lsu_err_addr_sel[2]),
+  .dout(err_tid_g[1:0])
+                   );
+                
+// Can shift to m.
+//assign  lsu_tlu_derr_tid_g[1:0] = err_tid_g[1:0] ;
+
+dff_s #(2)  errad_stgg (
+        .din    (err_tid_g[1:0]),
+        .q      (lsu_ifu_error_tid[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        ); 
+
+assign  lsu_ifu_io_error = //l2_unc_error_w2 & lsu_ifu_err_addr_b39 ;
+// extend for bld to io space.
+(l2_unc_error_w2 | bld_unc_err_pend_w2) & lsu_ifu_err_addr_b39 & ~bld_squash_err_w2 ;
+
+ 
+//=========================================================================================
+
+
+wire stxa_internal_cmplt ;
+assign	stxa_internal_cmplt = 
+stxa_internal & 
+~(intrpt_disp_asi_g | stxa_stall_asi_g | (ifu_nontlb_asi_g & ~ifu_asi42_flush_g) | tlb_lng_ltncy_asi_g) & 
+					lsu_inst_vld_w & ~dctl_early_flush_w ;
+					//lsu_inst_vld_w & ~dctl_flush_pipe_w ;
+
+// Need to add stxa's related to ifu non-tlb asi.
+dff_s  stxa_int_d1 (
+        .din    (stxa_internal_cmplt),
+        //.din    (stxa_internal & ~(stxa_stall_asi_g | tlb_lng_ltncy_asi_g) & lsu_inst_vld_w),
+        .q      (stxa_internal_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s  stxa_int_d2 (
+        .din    (stxa_internal_d1),
+        .q      (stxa_internal_d2),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+
+//=========================================================================================
+//  Replacement Algorithm for Cache
+//=========================================================================================
+
+
+
+// Increment Condition.
+wire	lfsr_incr, lfsr_incr_d1 ;
+assign	lfsr_incr = 
+	ld_inst_vld_g & ~lsu_way_hit_or & ~ldxa_internal & 
+	~ncache_pcx_rq_g ; // must be cacheable
+
+dff_s  lfsrd1_ff (
+        .din    (lfsr_incr),
+        .q      (lfsr_incr_d1),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+wire	lfsr_rst ;
+assign	lfsr_rst = 
+		reset 		| 	
+		~gdbginit_l 	| // debug init.
+		dc_direct_map 	; // direct map mode will reset.
+
+// Bug 4027
+lsu_dcache_lfsr lfsr(.out (lsu_dcache_rand[1:0]),
+                                           .clk  (clk),
+                                           .advance (lfsr_incr_d1),
+                                           .reset (lfsr_rst),
+                                           .se (se),
+                                           .si (),
+                                           .so ());
+
+//assign  lsu_dcache_rand[1:0]  =  dcache_rand[1:0]; 
+
+
+/*assign  dcache_rand_new[1:0] = dcache_rand[1:0] + {1'b0, lsu_ld_miss_wb} ;
+dffre_s #(2) drand (
+        .din    (dcache_rand_new[1:0]),
+        .q      (dcache_rand[1:0]),
+        .rst  (reset), .en    (lsu_ld_miss_wb),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign  lsu_dcache_rand[1:0]  =  dcache_rand[1:0]; */
+
+//=========================================================================================
+//  Packet Assembly
+//=========================================================================================
+
+assign lsu_encd_way_hit[0] = cache_way_hit_buf1[1] | cache_way_hit_buf1[3] ;
+assign lsu_encd_way_hit[1] = cache_way_hit_buf1[2] | cache_way_hit_buf1[3] ;
+
+//assign lsu_way_hit_or  =  |lsu_way_hit[3:0];
+assign lsu_way_hit_or  =  |cache_way_hit_buf1[3:0]; // Bug 3940
+   
+//assign  stb_byp_pkt_vld_e = st_inst_vld_e & ~(ldsta_internal_e & alt_space_e);
+assign  ld_pcx_pkt_vld_e = ld_inst_vld_e & ~(ldsta_internal_e & alt_space_e);
+
+
+dff_s #(5)  pktctl_stgm (
+        .din    ({ifu_lsu_ldst_dbl_e, ld_pcx_pkt_vld_e,
+    ifu_lsu_casa_e,ifu_lsu_ldstub_e,ifu_lsu_swap_e}),
+        .q      ({ldst_dbl_m, ld_pcx_pkt_vld_m,
+    casa_m,ldstub_m,swap_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+assign  atomic_m = casa_m | ldstub_m | swap_m ;
+
+dff_s #(6) pktctl_stgg (
+        .din    ({ldst_dbl_m, ld_pcx_pkt_vld_m,
+    casa_m,ldstub_m,swap_m,atomic_m}),
+        .q      ({ldst_dbl_g, ld_pcx_pkt_vld_g,
+    casa_g,ldstub_g,swap_g,atomic_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2) pktctl_stgw2 (
+        .din    ({ldd_force_l2access_g, atomic_g}),
+        .q      ({ldd_force_l2access_w2,atomic_w2}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+dff_s #(2) pktctl_stgw3 (
+        .din    ({ldd_force_l2access_w2, atomic_w2}),
+        .q      ({ldd_force_l2access_w3, atomic_w3}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+   
+assign  lsu_ldstub_g = ldstub_g ;
+assign  lsu_swap_g = swap_g ;
+
+// Choose way for load. If load hits in dcache but sent out to xbar because
+// of partial raw then need to use hit way else use random. Similarly, dcache
+// parity error will force a miss and fill to same way.
+
+// Moved to qctl1
+// For direct-map mode, assume that addition set-index bits 12:11 are
+// used to file line in set.
+//assign  ld_way[1:0] = 
+//    (|lsu_way_hit[3:0]) ? 
+//        {lsu_encd_way_hit[1],lsu_encd_way_hit[0]} : 
+//          	lsu_ld_sec_hit_l2access_g ? lsu_ld_sec_hit_wy_g[1:0] :
+//	   		(dc_direct_map ? ldst_va_g[12:11] : dcache_rand[1:0]) ;
+
+// set to 011 for atomic - only cas encoding used for pcx pkt.
+assign  ld_rq_type[2:0] =
+    atomic_g ? 3'b011 :       // cas pkt 2/ldstub/swap 
+//        (ldst_dbl_g & st_inst_vld_g & quad_asi_g) ? 3'b001 : // stquad - label as store.
+    3'b000 ;      // normal load
+
+
+//assign  lmq_pkt_vld_g = ld_pcx_pkt_vld_g | (ldst_dbl_g & st_inst_vld_unflushed) | pref_inst_g ; 
+assign  lmq_pkt_vld_g = ld_pcx_pkt_vld_g | pref_inst_g ; 
+
+// Moved to qctl1
+// 2'b01 encodes ld as st-quad pkt2. 2'b00 needed for cas-pkt2
+//assign  lmq_pkt_way_g[1:0] = 
+//(ldst_dbl_g & st_inst_vld_unflushed & quad_asi_g) ? 2'b01 :
+//        casa_g ? 2'b00 : ld_way[1:0] ;
+
+// ld is 128b request.
+wire	qword_access_g;
+assign	qword_access_g = 
+(quad_asi_g | blk_asi_g ) & lsu_alt_space_g & ld_inst_vld_unflushed ;
+
+assign	lsu_quad_word_access_g = qword_access_g ;
+
+wire  fp_ld_inst_g ;
+assign  fp_ld_inst_g  = fp_ldst_g & ld_inst_vld_g ;  
+
+wire  ldst_sz_b0_g ;
+assign  ldst_sz_b0_g =  
+  ldst_sz_g[0] & 
+  ~(ldst_dbl_g & ~fp_ldst_g & 
+    (~lsu_alt_space_g | (lsu_alt_space_g & ~quad_asi_g))) ; 
+                // word for ld-dbl
+
+wire	asi_real_iomem_m,asi_real_iomem_g ;
+assign	asi_real_iomem_m = 
+(dtlb_bypass_m & (phy_use_ec_asi_m | phy_byp_ec_asi_m) & lsu_alt_space_m) ;
+
+dff_s #(1) stgg_asir (
+        .din    (asi_real_iomem_m),
+        .q    	(asi_real_iomem_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign  ncache_pcx_rq_g   = 
+  atomic_g    |   // cas,ldstub,swap  
+  asi_real_iomem_g | // real_mem, real_io
+  ~dcache_enable_g | // dcache disabled : Bug 5174 (accidental removal)
+  ((tlb_pgnum[39] & ~lsu_dtlb_bypass_g & tlb_cam_hit_g) | // IO - tlb not in bypass
+   (tlb_pgnum[39] &  lsu_dtlb_bypass_g)) |    // IO - tlb bypass
+  (~lsu_tte_data_cp_g & tlb_cam_hit_g) |      // cp bit is clear
+  ((quad_asi_g | binit_quad_asi_g | blk_asi_g)  & lsu_alt_space_g & ldst_dbl_g & ld_inst_vld_unflushed) |  // quad-ld
+  pref_inst_g ; // pref will not alloc. in L2 dir
+
+//wire	dflush_ld_g ;
+//assign  dflush_ld_g = dflush_asi_g & lsu_alt_space_g ;
+
+// st-quad pkt1 and pkt2 need different addresses !!
+// ** should be able to reduce the width, rd2,stquad,lmq_pkt_way ** 
+//assign  ld_pcx_pkt_g[`LMQ_WIDTH-1:0] =
+
+//bug3601
+//dbl_data_return will become lmq_ldd
+//it includes quad ld, int ldd, block ld, all these cases need return data twice.    
+   wire dbl_data_return;
+   assign dbl_data_return = ldst_dbl_g & ~ (fp_ldst_g & ~ (blk_asi_g & lsu_alt_space_g));
+   
+assign  ld_pcx_pkt_g[`LMQ_WIDTH-1:40] =
+  {lmq_pkt_vld_g,
+  1'b0,                  //dflush_ld_g, bug 4580 
+  pref_inst_g, 
+  fp_ld_inst_g, 
+  l1hit_sign_extend_g,
+  //lsu_bendian_access_g,
+  bendian_g,	// l2fill_bendian removed.
+  ld_rd_g[4:0], // use rd1 only for now.
+  dbl_data_return,  //bug 3601
+  //ldst_dbl_g & ~fp_ldst_g,  // rd2 used by ld double.
+  {ld_rd_g[4:1],~ld_rd_g[0]}, // rd2 to be used with atomics.
+  ld_rq_type[2:0],
+  ncache_pcx_rq_g,  // NC.
+  //lmq_pkt_way_g[1:0], // replacement way
+  2'b00,
+  ldst_sz_g[1],ldst_sz_b0_g};
+  //{tlb_pgnum[39:10], ldst_va_g[9:0]}};
+
+//=========================================================================================
+//  Byte Masking for writes
+//=========================================================================================
+
+// Byte-enables will be generated in cycle prior to fill (E-stage)
+// Reads and writes are mutex as array is single-ported.
+// byte-enables are handled thru read-modify-writes.
+
+// Create 16b Write Mask based on size and va ;
+// This is to be put in the DFQ once the DFQ is on-line.
+
+
+wire [2:0] dc_waddr_m ;
+dff_s #(4) stgm_addr (
+        .din    ({memref_e, dcache_wr_addr_e[2:0]}),
+        .q    	({memref_m, dc_waddr_m[2:0]}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_memref_m = memref_m ;
+
+//wire [3:0] rwaddr_enc ;
+//assign  rwaddr_enc[3:0] = memref_m ? 
+//        lsu_ldst_va_b7_b0_m[3:0] : dc_waddr_m[3:0];
+
+wire [2:0] rwaddr_enc ;
+assign  rwaddr_enc[2:0] = memref_m ? 
+        lsu_ldst_va_b7_b0_m[2:0] : dc_waddr_m[2:0];
+   
+
+   wire [1:0] wr_size;
+   
+   assign wr_size[1:0] = dcache_wr_size_e[1:0];
+
+   wire   wr_hword, wr_word, wr_dword;
+   
+//assign  wr_byte    = ~wr_size[1] & ~wr_size[0] ; // 01
+assign  wr_hword   = ~wr_size[1] &  wr_size[0] ; // 01
+assign  wr_word    =  wr_size[1] & ~wr_size[0] ; // 10
+assign  wr_dword   =  wr_size[1] &  wr_size[0] ; // 11
+
+assign  ldst_byte    = ~ldst_sz_e[1] & ~ldst_sz_e[0] ; // 01
+assign  ldst_hword   = ~ldst_sz_e[1] &  ldst_sz_e[0] ; // 01
+assign  ldst_word    =  ldst_sz_e[1] & ~ldst_sz_e[0] ; // 10
+assign  ldst_dword   =  ldst_sz_e[1] &  ldst_sz_e[0] ; // 11
+
+// In Bypass mode, endianness is determined by asi.
+// Need to complete this equation.
+
+// Note : add MMU disable bypass conditions !!!
+assign  tlb_invert_endian_g = lsu_tlb_invert_endian_g & ~lsu_dtlb_bypass_g & tlb_cam_hit_g ; 
+
+// Is qualification with reset needed ?
+//assign  l2fill_bendian_g = lsu_l2fill_bendian_g & ~reset;
+
+//assign  pstate_cle_m = 
+//  thread0_m ? tlu_lsu_pstate_cle[0] :
+//    thread1_m ? tlu_lsu_pstate_cle[1] :
+//      thread2_m ? tlu_lsu_pstate_cle[2] :
+//          tlu_lsu_pstate_cle[3] ;
+
+mux4ds  #(1) pstate_cle_e_mux (
+        .in0    (tlu_lsu_pstate_cle[0]),
+        .in1    (tlu_lsu_pstate_cle[1]),
+        .in2    (tlu_lsu_pstate_cle[2]),
+        .in3    (tlu_lsu_pstate_cle[3]),
+        .sel0   (thread0_e),  
+        .sel1   (thread1_e),
+        .sel2   (thread2_e),  
+        .sel3   (thread3_e),
+        .dout   (pstate_cle_e)
+);
+
+dff_s #(1) stgm_pstatecle (
+        .din    (pstate_cle_e),
+        .q      (pstate_cle_m),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+   
+dff_s #(1) stgg_pstatecle (
+        .din    (pstate_cle_m),
+        .q      (pstate_cle_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//SPARC V9 page 52. pstate.cle should only affect implicit ASI   
+assign  l1hit_lendian_g = 
+    ((non_altspace_ldst_g & (pstate_cle_g ^ tlb_invert_endian_g)) |       // non altspace ldst
+     (altspace_ldst_g     & (lendian_asi_g ^ tlb_invert_endian_g)))       // altspace ldst
+    & ~(asi_internal_g & lsu_alt_space_g);                                // internal asi is big-endian
+
+wire    l1hit_lendian_predict_m ;
+// Predict endian-ness in m-stage. Assume tte.IE=0
+assign  l1hit_lendian_predict_m =
+    ((non_altspace_ldst_m & pstate_cle_m) |        // non altspace ldst
+     (altspace_ldst_m     & lendian_asi_m))        // altspace ldst
+    & ~asi_internal_m ;                            // internal asi is big-endian
+   
+// Further, decode of ASI is not factored into endian calculation. 
+//assign  lsu_bendian_access_g = (ld_inst_vld_unflushed | st_inst_vld_unflushed) ?
+//    ~l1hit_lendian_g : l2fill_bendian_g ;
+
+// m stage endian signal is predicted for in-pipe lds only.
+wire    bendian_pred_m, bendian_pred_g ;
+assign  bendian_pred_m = (ld_inst_vld_m | st_inst_vld_m) ?
+    ~l1hit_lendian_predict_m : lsu_l2fill_bendian_m ;
+
+dff_s #(1) stgg_bendpr(
+        .din    (bendian_pred_m),
+        .q      (bendian_pred_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+// mispredict applies to only in-pipe lds.
+assign  endian_mispred_g =  bendian_pred_g ^ ~l1hit_lendian_g ;
+
+// Staging for alignment on read from l1 or fill to l2.
+dff_s #(4) stgm_sz (
+        .din    ({ldst_byte,  ldst_hword,  ldst_word,  ldst_dword}),
+        .q      ({byte_m,hword_m,word_m,dword_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  
+
+wire	[7:0]	rwaddr_dcd_part ;
+
+assign  rwaddr_dcd_part[0]  = ~rwaddr_enc[2] & ~rwaddr_enc[1] & ~rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[1]  = ~rwaddr_enc[2] & ~rwaddr_enc[1] &  rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[2]  = ~rwaddr_enc[2] &  rwaddr_enc[1] & ~rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[3]  = ~rwaddr_enc[2] &  rwaddr_enc[1] &  rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[4]  =  rwaddr_enc[2] & ~rwaddr_enc[1] & ~rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[5]  =  rwaddr_enc[2] & ~rwaddr_enc[1] &  rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[6]  =  rwaddr_enc[2] &  rwaddr_enc[1] & ~rwaddr_enc[0] ; 
+assign  rwaddr_dcd_part[7]  =  rwaddr_enc[2] &  rwaddr_enc[1] &  rwaddr_enc[0] ; 
+
+   assign baddr_m[7:0] = rwaddr_dcd_part[7:0];
+/*    
+assign baddr_m[0]  = ~rwaddr_enc[3] & rwaddr_dcd_part[0] ;
+assign baddr_m[1]  = ~rwaddr_enc[3] & rwaddr_dcd_part[1] ;
+assign baddr_m[2]  = ~rwaddr_enc[3] & rwaddr_dcd_part[2] ;
+assign baddr_m[3]  = ~rwaddr_enc[3] & rwaddr_dcd_part[3] ;
+assign baddr_m[4]  = ~rwaddr_enc[3] & rwaddr_dcd_part[4] ; 
+assign baddr_m[5]  = ~rwaddr_enc[3] & rwaddr_dcd_part[5] ;
+assign baddr_m[6]  = ~rwaddr_enc[3] & rwaddr_dcd_part[6] ;
+assign baddr_m[7]  = ~rwaddr_enc[3] & rwaddr_dcd_part[7] ;
+assign baddr_m[8]  =  rwaddr_enc[3] & rwaddr_dcd_part[0] ;
+assign baddr_m[9]  =  rwaddr_enc[3] & rwaddr_dcd_part[1] ;
+assign baddr_m[10] =  rwaddr_enc[3] & rwaddr_dcd_part[2] ;
+assign baddr_m[11] =  rwaddr_enc[3] & rwaddr_dcd_part[3] ;
+assign baddr_m[12] =  rwaddr_enc[3] & rwaddr_dcd_part[4] ;
+assign baddr_m[13] =  rwaddr_enc[3] & rwaddr_dcd_part[5] ;
+assign baddr_m[14] =  rwaddr_enc[3] & rwaddr_dcd_part[6] ;
+assign baddr_m[15] =  rwaddr_enc[3] & rwaddr_dcd_part[7] ;
+*/
+// Byte Address to start write from. Quantity can be byte/hword/word/dword.
+// E-stage decoding for write to cache.
+
+wire	[3:0]	waddr_enc ;
+wire	[7:0]	waddr_dcd_part ;
+wire	[15:0]	waddr_dcd ;
+
+assign  waddr_dcd_part[0]  = ~waddr_enc[2] & ~waddr_enc[1] & ~waddr_enc[0] ; 
+assign  waddr_dcd_part[1]  = ~waddr_enc[2] & ~waddr_enc[1] &  waddr_enc[0] ; 
+assign  waddr_dcd_part[2]  = ~waddr_enc[2] &  waddr_enc[1] & ~waddr_enc[0] ; 
+assign  waddr_dcd_part[3]  = ~waddr_enc[2] &  waddr_enc[1] &  waddr_enc[0] ; 
+assign  waddr_dcd_part[4]  =  waddr_enc[2] & ~waddr_enc[1] & ~waddr_enc[0] ; 
+assign  waddr_dcd_part[5]  =  waddr_enc[2] & ~waddr_enc[1] &  waddr_enc[0] ; 
+assign  waddr_dcd_part[6]  =  waddr_enc[2] &  waddr_enc[1] & ~waddr_enc[0] ; 
+assign  waddr_dcd_part[7]  =  waddr_enc[2] &  waddr_enc[1] &  waddr_enc[0] ; 
+
+assign  waddr_dcd[0]  = ~waddr_enc[3] & waddr_dcd_part[0] ;
+assign  waddr_dcd[1]  = ~waddr_enc[3] & waddr_dcd_part[1] ;
+assign  waddr_dcd[2]  = ~waddr_enc[3] & waddr_dcd_part[2] ;
+assign  waddr_dcd[3]  = ~waddr_enc[3] & waddr_dcd_part[3] ;
+assign  waddr_dcd[4]  = ~waddr_enc[3] & waddr_dcd_part[4] ; 
+assign  waddr_dcd[5]  = ~waddr_enc[3] & waddr_dcd_part[5] ;
+assign  waddr_dcd[6]  = ~waddr_enc[3] & waddr_dcd_part[6] ;
+assign  waddr_dcd[7]  = ~waddr_enc[3] & waddr_dcd_part[7] ;
+assign  waddr_dcd[8]  =  waddr_enc[3] & waddr_dcd_part[0] ;
+assign  waddr_dcd[9]  =  waddr_enc[3] & waddr_dcd_part[1] ;
+assign  waddr_dcd[10] =  waddr_enc[3] & waddr_dcd_part[2] ;
+assign  waddr_dcd[11] =  waddr_enc[3] & waddr_dcd_part[3] ;
+assign  waddr_dcd[12] =  waddr_enc[3] & waddr_dcd_part[4] ;
+assign  waddr_dcd[13] =  waddr_enc[3] & waddr_dcd_part[5] ;
+assign  waddr_dcd[14] =  waddr_enc[3] & waddr_dcd_part[6] ;
+assign  waddr_dcd[15] =  waddr_enc[3] & waddr_dcd_part[7] ;
+
+// Byte enables for 16 bytes.
+   //bug6216/eco6624
+   wire write_16byte_e;
+   assign write_16byte_e = l2fill_vld_e | lsu_bist_wvld_e;
+    
+assign byte_wr_enable[15] = 
+    write_16byte_e  |   waddr_dcd[0] ;    
+assign byte_wr_enable[14] = 
+    write_16byte_e  |   waddr_dcd[1]    |   
+    (wr_hword & waddr_dcd[0])  |   (wr_word & waddr_dcd[0]) |
+    (wr_dword & waddr_dcd[0])  ;     
+assign byte_wr_enable[13] = 
+    write_16byte_e  |   waddr_dcd[2]    |
+    (wr_word & waddr_dcd[0]) |     (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[12] = 
+    write_16byte_e  |   waddr_dcd[3]    |
+    (wr_hword & waddr_dcd[2])  |   (wr_word & waddr_dcd[0]) |
+    (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[11] = 
+    write_16byte_e  |   waddr_dcd[4]    |     
+    (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[10] = 
+    write_16byte_e  |   waddr_dcd[5]    |
+    (wr_hword & waddr_dcd[4])  |   (wr_word & waddr_dcd[4]) |
+    (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[9] = 
+    write_16byte_e  |   waddr_dcd[6]    |
+    (wr_word & waddr_dcd[4]) |     (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[8] = 
+    write_16byte_e  |   waddr_dcd[7]    |
+    (wr_hword & waddr_dcd[6])  |   (wr_word & waddr_dcd[4]) |
+    (wr_dword & waddr_dcd[0])  ;   
+assign byte_wr_enable[7] = 
+    write_16byte_e  |   waddr_dcd[8] ;    
+assign byte_wr_enable[6] = 
+    write_16byte_e  |   waddr_dcd[9]    |   
+    (wr_hword & waddr_dcd[8])  |   (wr_word & waddr_dcd[8]) |
+    (wr_dword & waddr_dcd[8])  ;     
+assign byte_wr_enable[5] = 
+    write_16byte_e  |   waddr_dcd[10]   |
+    (wr_word & waddr_dcd[8]) |     (wr_dword & waddr_dcd[8])  ;   
+assign byte_wr_enable[4] = 
+    write_16byte_e  |   waddr_dcd[11]   |
+    (wr_hword & waddr_dcd[10]) |   (wr_word & waddr_dcd[8]) |
+    (wr_dword & waddr_dcd[8])  ;   
+assign byte_wr_enable[3] = 
+    write_16byte_e  |   waddr_dcd[12]   |     
+    (wr_dword & waddr_dcd[8])  ;   
+assign byte_wr_enable[2] = 
+    write_16byte_e  |   waddr_dcd[13]   |
+    (wr_hword & waddr_dcd[12]) |   (wr_word & waddr_dcd[12])  |
+    (wr_dword & waddr_dcd[8])  ;   
+assign byte_wr_enable[1] = 
+    write_16byte_e  |   waddr_dcd[14]   |
+    (wr_word & waddr_dcd[12])  |   (wr_dword & waddr_dcd[8])  ;   
+assign byte_wr_enable[0] = 
+    write_16byte_e  |   waddr_dcd[15]   |
+    (wr_hword & waddr_dcd[14]) |   (wr_word & waddr_dcd[12])  |
+    (wr_dword & waddr_dcd[8])  ; 
+
+assign  dcache_byte_wr_en_e[15:0] = byte_wr_enable[15:0] ;
+//assign  lsu_st_byte_addr_g[15:0]  = byp_baddr_g[15:0] ;
+
+//=========================================================================================
+//  Sign/Zero-Extension
+//=========================================================================================
+
+dff_s #(1) stgm_msb (
+       .din    ({lsu_l1hit_sign_extend_e}),
+       .q      ({l1hit_sign_extend_m}),
+       .clk    (clk),
+       .se     (se),       .si (),          .so ()
+       );  
+
+dff_s #(1) stgg_msb (
+       .din    ({l1hit_sign_extend_m}),
+       .q      ({l1hit_sign_extend_g}),
+       .clk    (clk),
+       .se     (se),       .si (),          .so ()
+       ); 
+
+
+//wire [1:0] lsu_byp_misc_sz_g ;   
+
+/*dff #(2) ff_lsu_byp_misc_sz_g (
+        .din   (lsu_byp_misc_sz_m[1:0]),
+        .q     (lsu_byp_misc_sz_g[1:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );  */
+
+assign  misc_byte_m   = ~lsu_byp_misc_sz_m[1] & ~lsu_byp_misc_sz_m[0] ; // 00
+assign  misc_hword_m  = ~lsu_byp_misc_sz_m[1] &  lsu_byp_misc_sz_m[0] ; // 01
+assign  misc_word_m   =  lsu_byp_misc_sz_m[1] & ~lsu_byp_misc_sz_m[0] ; // 10
+assign  misc_dword_m  =  lsu_byp_misc_sz_m[1] &  lsu_byp_misc_sz_m[0] ; // 11
+
+wire    byp_byte_m,byp_hword_m,byp_word_m,byp_dword_m;
+assign  byp_byte_m =  (ld_inst_vld_m) ?  byte_m :  misc_byte_m ;
+assign  byp_hword_m = (ld_inst_vld_m) ? hword_m :  misc_hword_m ;
+assign  byp_word_m =  (ld_inst_vld_m) ?  word_m :  misc_word_m ;
+assign  byp_dword_m = (ld_inst_vld_m) ? dword_m :  misc_dword_m ;
+
+/*assign  byp_byte_g =  (|lsu_irf_byp_data_src[2:1]) ? misc_byte_g : byte_g ;
+assign  byp_hword_g = (|lsu_irf_byp_data_src[2:1]) ? misc_hword_g : hword_g ;
+assign  byp_word_g =  (|lsu_irf_byp_data_src[2:1]) ? misc_word_g : word_g ;*/
+
+dff_s #(1) bypsz_stgg(
+        .din   ({byp_word_m}),
+        .q     ({byp_word_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//wire [3:0]	misc_waddr_m ; 
+//assign  misc_waddr_m[3:0] = {lsu_byp_misc_addr_m[3],lsu_byp_misc_addr_m[2]^lsu_byp_ldd_oddrd_m,lsu_byp_misc_addr_m[1:0]} ;
+
+wire [2:0]	misc_waddr_m ; 
+assign  misc_waddr_m[2:0] = {lsu_byp_misc_addr_m[2]^lsu_byp_ldd_oddrd_m,lsu_byp_misc_addr_m[1:0]} ;
+   
+//wire    [15:0] misc_baddr_m ;
+wire    [7:0] misc_baddr_m ;
+
+// m-stage decoding
+// Might be better to stage encoded waddr, mux and then decode.
+/*
+assign  misc_baddr_m[0] = ~misc_waddr_m[3] & ~misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[1] = ~misc_waddr_m[3] & ~misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[2] = ~misc_waddr_m[3] & ~misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[3] = ~misc_waddr_m[3] & ~misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[4] = ~misc_waddr_m[3] &  misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[5] = ~misc_waddr_m[3] &  misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[6] = ~misc_waddr_m[3] &  misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[7] = ~misc_waddr_m[3] &  misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[8] =  misc_waddr_m[3] & ~misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[9] =  misc_waddr_m[3] & ~misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[10] =  misc_waddr_m[3] & ~misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[11] =  misc_waddr_m[3] & ~misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[12] =  misc_waddr_m[3] &  misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[13] =  misc_waddr_m[3] &  misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[14] =  misc_waddr_m[3] &  misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[15] =  misc_waddr_m[3] &  misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+*/
+assign  misc_baddr_m[0] = ~misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[1] = ~misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[2] = ~misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[3] = ~misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[4] =  misc_waddr_m[2] & ~misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[5] =  misc_waddr_m[2] & ~misc_waddr_m[1] &  misc_waddr_m[0] ; 
+assign  misc_baddr_m[6] =  misc_waddr_m[2] &  misc_waddr_m[1] & ~misc_waddr_m[0] ; 
+assign  misc_baddr_m[7] =  misc_waddr_m[2] &  misc_waddr_m[1] &  misc_waddr_m[0] ; 
+   
+//wire [15:0] byp_baddr_m ;
+//assign  byp_baddr_m[15:0] = (~(ld_inst_vld_m | st_inst_vld_m)) ? misc_baddr_m[15:0] : baddr_m[15:0] ;
+wire [7:0] byp_baddr_m ;
+assign  byp_baddr_m[7:0] = (~(ld_inst_vld_m | st_inst_vld_m)) ? misc_baddr_m[7:0] : baddr_m[7:0] ;
+
+   wire l2fill_sign_extend_m;
+   
+assign  l2fill_sign_extend_m = lsu_l2fill_sign_extend_m ;
+//?? why need st ??
+assign  signed_ldst_m = (ld_inst_vld_m | st_inst_vld_m) ?
+                         l1hit_sign_extend_m : l2fill_sign_extend_m ; 
+
+//assign  unsigned_ldst_m = ~signed_ldst_m ;
+
+   assign signed_ldst_byte_m = signed_ldst_m & byp_byte_m;
+//   assign unsigned_ldst_byte_m = unsigned_ldst_m & byp_byte_m;
+
+   assign signed_ldst_hw_m = signed_ldst_m & ( byp_byte_m | byp_hword_m );
+//   assign unsigned_ldst_hw_m = unsigned_ldst_m & ( byp_byte_m | byp_hword_m );
+ 
+   assign signed_ldst_w_m = signed_ldst_m & ( byp_byte_m | byp_hword_m | byp_word_m );
+//   assign unsigned_ldst_w_m = unsigned_ldst_m & ( byp_byte_m | byp_hword_m | byp_word_m );
+   
+//C assign  align_bytes_msb[7:0] = (ld_inst_vld_unflushed | st_inst_vld_unflushed) ? lsu_l1hit_bytes_msb_g[7:0] :
+//C	(l2fill_vld_g ? l2fill_bytes_msb_g[7:0] : lsu_misc_bytes_msb_g[7:0])  ;
+
+//assign  align_bytes_msb[7:0] = (ld_inst_vld_unflushed | st_inst_vld_unflushed) ? lsu_l1hit_bytes_msb_g[7:0] :
+//    (lsu_irf_byp_data_src[2] ? lsu_misc_bytes_msb_g[7:0] : l2fill_bytes_msb_g[7:0])  ;
+
+
+// For little-endian accesses, the following morphing must occur to the byte addr.
+//
+// Byte Addr(lower 3b)  
+//  000(0)  ->  001(1) (hw)
+//    ->  011(3) (w)
+//    ->  111(7) (dw)
+//  001(1)  ->  not morphed
+//  010(2)  ->  011(3) (hw)
+//  011(3)  ->  not morphed
+//  100(4)  ->  101(5) (hw)
+//    ->  111(7) (w)
+//  101(5)  ->  not morphed
+//  110(6)  ->  111(7) (hw)
+//  111(7)  ->  not morphed
+
+wire  [7:0] merged_addr_m ;   
+wire  [7:0] morphed_addr_m ;    
+
+//wire  bendian ;
+
+//assign  merged_addr_m[7:0] = byp_baddr_m[15:8] | byp_baddr_m[7:0] ;
+assign  merged_addr_m[7:0] = byp_baddr_m[7:0] ;
+
+assign  morphed_addr_m[0] 
+  =  merged_addr_m[0] & ~(~bendian_pred_m & ~byp_byte_m) ;
+assign  morphed_addr_m[1] 
+  =  merged_addr_m[1] | (merged_addr_m[0] & ~bendian_pred_m & byp_hword_m) ;
+assign  morphed_addr_m[2] 
+  =  merged_addr_m[2] & ~(~bendian_pred_m & byp_hword_m) ;
+assign  morphed_addr_m[3] 
+  =  merged_addr_m[3] | (merged_addr_m[0] & ~bendian_pred_m & byp_word_m) |
+  (merged_addr_m[2] & ~bendian_pred_m & byp_hword_m) ;
+assign  morphed_addr_m[4] 
+  =  merged_addr_m[4] & ~(~bendian_pred_m & (byp_hword_m | byp_word_m)) ;
+assign  morphed_addr_m[5] 
+  =  merged_addr_m[5] | (merged_addr_m[4] & ~bendian_pred_m & byp_hword_m) ;
+assign  morphed_addr_m[6] 
+  =  merged_addr_m[6] & ~(~bendian_pred_m & byp_hword_m) ;
+assign  morphed_addr_m[7] 
+  =  merged_addr_m[7] | (merged_addr_m[0] & ~bendian_pred_m & ~(byp_byte_m | byp_hword_m | byp_word_m))  |
+  (merged_addr_m[4] & ~bendian_pred_m & byp_word_m) | (merged_addr_m[6] & ~bendian_pred_m & byp_hword_m) ;
+
+
+   
+   
+//=========================================================================================
+//  ALIGNMENT CONTROL FOR DCDP 
+//=========================================================================================
+
+// First generate control for swapping related to endianness.
+// byte7-byte0 is source data from cache etc.
+// swap7-swap0 is result of endianness swapping.
+
+// First logical level - Swapping of bytes. 
+// Swap byte 0 
+
+wire  swap0_sel_byte0, swap0_sel_byte1, swap0_sel_byte3  ;
+wire  swap1_sel_byte0, swap1_sel_byte1, swap1_sel_byte2, swap1_sel_byte6 ;
+wire  swap2_sel_byte1, swap2_sel_byte2, swap2_sel_byte3, swap2_sel_byte5 ;
+wire  swap3_sel_byte0, swap3_sel_byte2, swap3_sel_byte3, swap3_sel_byte4 ;
+wire  swap4_sel_byte3, swap4_sel_byte4, swap4_sel_byte5 ;
+wire  swap5_sel_byte2, swap5_sel_byte4, swap5_sel_byte5, swap5_sel_byte6 ;
+wire  swap6_sel_byte1, swap6_sel_byte5, swap6_sel_byte6 ;
+wire  swap7_sel_byte0, swap7_sel_byte4, swap7_sel_byte6, swap7_sel_byte7 ;
+
+//assign  bendian = bendian_pred_m ;
+//assign  bendian = lsu_bendian_access_g ;
+
+assign  swap0_sel_byte0   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap0_sel_byte1   = ~bendian_pred_m & byp_hword_m ;
+assign  swap0_sel_byte3   = ~bendian_pred_m & byp_word_m ;
+// could be substituted with dword encoding.
+//assign  swap0_sel_byte7   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+
+// Swap byp_byte_m 1 
+assign  swap1_sel_byte0   = ~bendian_pred_m & byp_hword_m ;
+assign  swap1_sel_byte1   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap1_sel_byte2   = ~bendian_pred_m & byp_word_m ;
+assign  swap1_sel_byte6   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+
+// Swap byp_byte_m 2 
+assign  swap2_sel_byte1   = ~bendian_pred_m & byp_word_m ;
+assign  swap2_sel_byte2   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap2_sel_byte3   = ~bendian_pred_m & byp_hword_m ;
+assign  swap2_sel_byte5   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+
+// Swap byp_byte_m 3 
+assign  swap3_sel_byte0   = ~bendian_pred_m & byp_word_m ;
+assign  swap3_sel_byte2   = ~bendian_pred_m & byp_hword_m ;
+assign  swap3_sel_byte3   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap3_sel_byte4   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+
+// Swap byp_byte_m 4 
+assign  swap4_sel_byte3   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+assign  swap4_sel_byte4   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap4_sel_byte5   = ~bendian_pred_m & byp_hword_m ;
+//assign  swap4_sel_byte7   = ~bendian_pred_m & byp_word_m ;
+
+// Swap byp_byte_m 5 
+assign  swap5_sel_byte2   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+assign  swap5_sel_byte4   = ~bendian_pred_m & byp_hword_m ;
+assign  swap5_sel_byte5   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+assign  swap5_sel_byte6   = ~bendian_pred_m & byp_word_m ;
+
+// Swap byp_byte_m 6 
+assign  swap6_sel_byte1   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+assign  swap6_sel_byte5   = ~bendian_pred_m & byp_word_m ;
+assign  swap6_sel_byte6   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+//assign  swap6_sel_byte7   = ~bendian_pred_m & byp_hword_m ;
+
+// Swap byp_byte_m 7 
+assign  swap7_sel_byte0   = ~bendian_pred_m & ~(byp_word_m | byp_hword_m | byp_byte_m) ;
+assign  swap7_sel_byte4   = ~bendian_pred_m & byp_word_m ;
+assign  swap7_sel_byte6   = ~bendian_pred_m & byp_hword_m ;
+assign  swap7_sel_byte7   = bendian_pred_m | (~bendian_pred_m & byp_byte_m) ;
+
+// 2nd logical level - Alignment. 
+// rjust7-rjust0 is result of alignment operation.
+// sbyte7-sbyte0 is the result of the endian swapping from the 1st logic level.
+
+wire  rjust0_sel_sbyte0, rjust0_sel_sbyte1, rjust0_sel_sbyte2, rjust0_sel_sbyte3 ;
+wire  rjust0_sel_sbyte4, rjust0_sel_sbyte5, rjust0_sel_sbyte6, rjust0_sel_sbyte7 ;
+wire  rjust1_sel_sbyte1, rjust1_sel_sbyte3, rjust1_sel_sbyte5, rjust1_sel_sbyte7 ;
+wire  rjust2_sel_sbyte2, rjust2_sel_sbyte6 ;
+wire  rjust3_sel_sbyte3, rjust3_sel_sbyte7 ;
+
+// Aligned Byte 0
+assign  rjust0_sel_sbyte0   = 
+  ~(rjust0_sel_sbyte1 | rjust0_sel_sbyte2 | rjust0_sel_sbyte3 |
+    rjust0_sel_sbyte4 | rjust0_sel_sbyte5 | rjust0_sel_sbyte6 |
+    rjust0_sel_sbyte7) ;
+assign  rjust0_sel_sbyte1   = 
+//  ((byp_baddr_m[14] | byp_baddr_m[6]) & byp_byte_m) ;
+  ((byp_baddr_m[6]) & byp_byte_m) ;
+
+assign  rjust0_sel_sbyte2   = 
+//  ((byp_baddr_m[12] | byp_baddr_m[4]) & byp_hword_m) | 
+  ((byp_baddr_m[4]) & byp_hword_m) | 
+//  ((byp_baddr_m[13] | byp_baddr_m[5]) & byp_byte_m) ;
+  ((byp_baddr_m[5]) & byp_byte_m) ;
+assign  rjust0_sel_sbyte3 = 
+//  (byp_baddr_m[12] | byp_baddr_m[4]) & byp_byte_m ; 
+  (byp_baddr_m[4]) & byp_byte_m ; 
+assign  rjust0_sel_sbyte4 = 
+//  ((byp_baddr_m[10] | byp_baddr_m[2]) & byp_hword_m) | 
+//  ((byp_baddr_m[11] | byp_baddr_m[3]) & byp_byte_m) |
+//  ((byp_baddr_m[8] | byp_baddr_m[0]) & byp_word_m) ;
+  ((byp_baddr_m[2]) & byp_hword_m) | 
+  ((byp_baddr_m[3]) & byp_byte_m) |
+  ((byp_baddr_m[0]) & byp_word_m) ;
+assign  rjust0_sel_sbyte5 = 
+//  ((byp_baddr_m[10] | byp_baddr_m[2]) & byp_byte_m) ; 
+  ((byp_baddr_m[2]) & byp_byte_m) ; 
+assign  rjust0_sel_sbyte6 = 
+//  ((byp_baddr_m[8] | byp_baddr_m[0]) & byp_hword_m) | 
+//  ((byp_baddr_m[9] | byp_baddr_m[1]) & byp_byte_m) ;
+  ((byp_baddr_m[0]) & byp_hword_m) | 
+  ((byp_baddr_m[1]) & byp_byte_m) ;
+assign  rjust0_sel_sbyte7 = 
+//  (byp_baddr_m[8] | byp_baddr_m[0]) & byp_byte_m ;
+  (byp_baddr_m[0]) & byp_byte_m ;
+
+// Aligned Byte 1
+assign  rjust1_sel_sbyte1   = 
+  ~(rjust1_sel_sbyte3 | rjust1_sel_sbyte5 | rjust1_sel_sbyte7) ;
+assign  rjust1_sel_sbyte3   = 
+//  (byp_baddr_m[12] | byp_baddr_m[4]) & byp_hword_m ;
+  (byp_baddr_m[4]) & byp_hword_m ;
+assign  rjust1_sel_sbyte5   = 
+//  ((byp_baddr_m[10] | byp_baddr_m[2]) & byp_hword_m) | 
+//  ((byp_baddr_m[8] | byp_baddr_m[0]) & byp_word_m) ;
+  ((byp_baddr_m[2]) & byp_hword_m) | 
+  ((byp_baddr_m[0]) & byp_word_m) ;
+assign  rjust1_sel_sbyte7   = 
+//  (byp_baddr_m[8] | byp_baddr_m[0]) & byp_hword_m ;
+  (byp_baddr_m[0]) & byp_hword_m ;
+
+// Aligned Byte 2
+assign  rjust2_sel_sbyte2   = ~rjust2_sel_sbyte6 ;
+//assign  rjust2_sel_sbyte6   = (byp_baddr_m[8] | byp_baddr_m[0]) & byp_word_m ;
+assign  rjust2_sel_sbyte6   = (byp_baddr_m[0]) & byp_word_m ;
+
+// Aligned Byte 3
+assign  rjust3_sel_sbyte3   = ~rjust3_sel_sbyte7 ;
+//assign  rjust3_sel_sbyte7   = (byp_baddr_m[8] | byp_baddr_m[0]) & byp_word_m ;
+assign  rjust3_sel_sbyte7   = (byp_baddr_m[0]) & byp_word_m ;
+
+// 3rd logical level - Complete alignment. Sign-Extension/Zero-Extension.
+// merge7-merge0 corresponds to cumulative swapping and alignment result.
+// byte[7]-byte[0] refers to the original pre-swap/alignment data.
+
+wire merge7_sel_byte0_m, merge7_sel_byte7_m;
+wire merge6_sel_byte1_m, merge6_sel_byte6_m;
+wire merge5_sel_byte2_m, merge5_sel_byte5_m;
+wire merge4_sel_byte3_m, merge4_sel_byte4_m;
+wire merge3_sel_byte0_m, merge3_sel_byte3_m;
+wire merge3_sel_byte4_m, merge3_sel_byte7_m,merge3_sel_byte_m;
+wire merge2_sel_byte1_m, merge2_sel_byte2_m, merge2_sel_byte5_m;
+wire merge2_sel_byte6_m, merge2_sel_byte_m;
+wire merge0_sel_byte0_m, merge0_sel_byte1_m;
+wire merge0_sel_byte2_m, merge0_sel_byte3_m;
+wire merge0_sel_byte4_m, merge0_sel_byte5_m;
+wire merge0_sel_byte6_m;
+wire merge1_sel_byte0_m, merge1_sel_byte1_m;
+wire merge1_sel_byte2_m, merge1_sel_byte3_m;
+wire merge1_sel_byte4_m, merge1_sel_byte5_m;
+wire merge1_sel_byte6_m, merge1_sel_byte7_m;
+wire merge0_sel_byte_1h_m,merge1_sel_byte_1h_m, merge1_sel_byte_2h_m;
+
+// Final Merged Byte 0
+assign  merge0_sel_byte0_m  = 
+  (rjust0_sel_sbyte0 & swap0_sel_byte0) |
+  (rjust0_sel_sbyte1 & swap1_sel_byte0) |
+  (rjust0_sel_sbyte3 & swap3_sel_byte0) |
+  (rjust0_sel_sbyte7 & swap7_sel_byte0) ;
+
+assign  merge0_sel_byte1_m  = 
+  (rjust0_sel_sbyte0 & swap0_sel_byte1) |
+  (rjust0_sel_sbyte1 & swap1_sel_byte1) |
+  (rjust0_sel_sbyte2 & swap2_sel_byte1) |
+  (rjust0_sel_sbyte6 & swap6_sel_byte1) ;
+
+assign  merge0_sel_byte2_m  = 
+  (rjust0_sel_sbyte1 & swap1_sel_byte2) |
+  (rjust0_sel_sbyte2 & swap2_sel_byte2) |
+  (rjust0_sel_sbyte3 & swap3_sel_byte2) |
+  (rjust0_sel_sbyte5 & swap5_sel_byte2) ;
+
+   
+assign  merge0_sel_byte3_m  = 
+  (rjust0_sel_sbyte0 & swap0_sel_byte3) |
+  (rjust0_sel_sbyte2 & swap2_sel_byte3) |
+  (rjust0_sel_sbyte3 & swap3_sel_byte3) |
+  (rjust0_sel_sbyte4 & swap4_sel_byte3) ;
+
+assign merge0_sel_byte3_default_m = ~ (merge0_sel_byte0_m | merge0_sel_byte1_m | merge0_sel_byte2_m);
+
+assign  merge0_sel_byte4_m  = 
+  (rjust0_sel_sbyte3 & swap3_sel_byte4) |
+  (rjust0_sel_sbyte4 & swap4_sel_byte4) |
+  (rjust0_sel_sbyte5 & swap5_sel_byte4) |
+  (rjust0_sel_sbyte7 & swap7_sel_byte4) ;
+
+assign  merge0_sel_byte5_m  = 
+  (rjust0_sel_sbyte2 & swap2_sel_byte5) |
+  (rjust0_sel_sbyte4 & swap4_sel_byte5) |
+  (rjust0_sel_sbyte5 & swap5_sel_byte5) |
+  (rjust0_sel_sbyte6 & swap6_sel_byte5) ;
+
+assign  merge0_sel_byte6_m  = 
+  (rjust0_sel_sbyte1 & swap1_sel_byte6) |
+  (rjust0_sel_sbyte5 & swap5_sel_byte6) |
+  (rjust0_sel_sbyte6 & swap6_sel_byte6) |
+  (rjust0_sel_sbyte7 & swap7_sel_byte6) ;
+
+//assign  merge0_sel_byte7_m  = 
+//  (rjust0_sel_sbyte0 & swap0_sel_byte7) |
+//  (rjust0_sel_sbyte4 & swap4_sel_byte7) |
+//  (rjust0_sel_sbyte6 & swap6_sel_byte7) |
+//  (rjust0_sel_sbyte7 & swap7_sel_byte7) ;
+
+   assign merge0_sel_byte7_default_m = ~(merge0_sel_byte4_m | merge0_sel_byte5_m |  merge0_sel_byte6_m);
+   
+assign  merge0_sel_byte_1h_m = 
+  merge0_sel_byte0_m |  merge0_sel_byte1_m | merge0_sel_byte2_m | merge0_sel_byte3_m ;
+
+// Final Merged Byte 1
+assign  merge1_sel_byte0_m  = 
+  (rjust1_sel_sbyte1 & swap1_sel_byte0) |
+  (rjust1_sel_sbyte3 & swap3_sel_byte0) |
+  (rjust1_sel_sbyte7 & swap7_sel_byte0) ;
+
+assign  merge1_sel_byte1_m  = 
+  (rjust1_sel_sbyte1 & swap1_sel_byte1) ;
+
+assign  merge1_sel_byte2_m  = 
+  (rjust1_sel_sbyte1 & swap1_sel_byte2) |
+  (rjust1_sel_sbyte3 & swap3_sel_byte2) |
+  (rjust1_sel_sbyte5 & swap5_sel_byte2) ;
+
+assign  merge1_sel_byte3_m  = 
+  (rjust1_sel_sbyte3 & swap3_sel_byte3) ;
+
+   assign merge1_sel_byte3_default_m = ~( merge1_sel_byte0_m | merge1_sel_byte1_m | merge1_sel_byte2_m);
+                                              
+assign  merge1_sel_byte4_m  = 
+  (rjust1_sel_sbyte3 & swap3_sel_byte4) |
+  (rjust1_sel_sbyte5 & swap5_sel_byte4) |
+  (rjust1_sel_sbyte7 & swap7_sel_byte4) ;
+
+assign  merge1_sel_byte5_m  = 
+  (rjust1_sel_sbyte5 & swap5_sel_byte5) ;
+
+assign  merge1_sel_byte6_m  = 
+  (rjust1_sel_sbyte1 & swap1_sel_byte6) |
+  (rjust1_sel_sbyte5 & swap5_sel_byte6) |
+  (rjust1_sel_sbyte7 & swap7_sel_byte6) ;
+
+assign  merge1_sel_byte7_m  = 
+  (rjust1_sel_sbyte7 & swap7_sel_byte7) ;
+
+   assign merge1_sel_byte7_default_m = ~( merge1_sel_byte4_m | merge1_sel_byte5_m | merge1_sel_byte6_m);
+   
+assign  merge1_sel_byte_1h_m = ~byp_byte_m &
+  (merge1_sel_byte0_m |  merge1_sel_byte1_m | merge1_sel_byte2_m | merge1_sel_byte3_m) ;
+   
+assign  merge1_sel_byte_2h_m = ~byp_byte_m &
+  (merge1_sel_byte4_m |  merge1_sel_byte5_m | merge1_sel_byte6_m | merge1_sel_byte7_m) ;
+
+
+// Final Merged Byte 2
+
+assign  merge2_sel_byte1_m  = 
+  (rjust2_sel_sbyte2 & swap2_sel_byte1) |
+  (rjust2_sel_sbyte6 & swap6_sel_byte1) ;
+
+assign  merge2_sel_byte2_m  = 
+  (rjust2_sel_sbyte2 & swap2_sel_byte2) ;
+
+assign  merge2_sel_byte5_m  = 
+  (rjust2_sel_sbyte2 & swap2_sel_byte5) |
+  (rjust2_sel_sbyte6 & swap6_sel_byte5) ;
+
+assign  merge2_sel_byte6_m  = 
+  (rjust2_sel_sbyte6 & swap6_sel_byte6) ;
+
+   assign merge2_sel_byte6_default_m  = ~(merge2_sel_byte1_m | merge2_sel_byte2_m | merge2_sel_byte5_m);
+    
+assign merge2_sel_byte_m = ~byp_byte_m & ~byp_hword_m &
+(merge2_sel_byte1_m | merge2_sel_byte2_m | merge2_sel_byte5_m | merge2_sel_byte6_m);   
+
+// Final Merged Byte 3
+assign  merge3_sel_byte0_m  = 
+  (rjust3_sel_sbyte3 & swap3_sel_byte0) |
+  (rjust3_sel_sbyte7 & swap7_sel_byte0) ;
+
+assign  merge3_sel_byte3_m  = 
+  (rjust3_sel_sbyte3 & swap3_sel_byte3) ;
+
+assign  merge3_sel_byte4_m  = 
+  (rjust3_sel_sbyte3 & swap3_sel_byte4) |
+  (rjust3_sel_sbyte7 & swap7_sel_byte4) ;
+
+assign  merge3_sel_byte7_m  = 
+  (rjust3_sel_sbyte7 & swap7_sel_byte7) ;
+
+assign merge3_sel_byte7_default_m  =  ~(merge3_sel_byte0_m | merge3_sel_byte3_m | merge3_sel_byte4_m);
+
+assign merge3_sel_byte_m = ~byp_byte_m & ~byp_hword_m & 
+(merge3_sel_byte0_m | merge3_sel_byte3_m | merge3_sel_byte4_m | merge3_sel_byte7_m);
+   
+// Final Merged Byte 4
+assign  merge4_sel_byte3_m = byp_dword_m & swap4_sel_byte3 ;
+assign  merge4_sel_byte4_m = byp_dword_m & swap4_sel_byte4 ;
+
+
+// Final Merged Byte 5
+assign  merge5_sel_byte2_m = byp_dword_m & swap5_sel_byte2 ;
+assign  merge5_sel_byte5_m = byp_dword_m & swap5_sel_byte5 ;
+
+// Final Merged Byte 6
+assign  merge6_sel_byte1_m = byp_dword_m & swap6_sel_byte1 ;
+assign  merge6_sel_byte6_m = byp_dword_m & swap6_sel_byte6 ;
+
+// Final Merged Byte 7
+assign  merge7_sel_byte0_m = byp_dword_m & swap7_sel_byte0 ;
+assign  merge7_sel_byte7_m = byp_dword_m & swap7_sel_byte7 ;
+
+
+
+//=========================================================================================
+//  STQ/CAS 2ND PKT FORMATTING 
+//=========================================================================================
+
+// stq and cas write to an extra buffer. stq always uses a full 64bits.
+// cas may use either 64b or 32b. stq requires at most endian alignment.
+// cas may require both address and endian alignment.
+
+// Byte Alignment. Assume 8 bytes, 7-0
+//  Case 1 : 7,6,5,4,3,2,1,0 
+//  Case 2 : 3,2,1,0,0,1,2,3 
+//  Case 3 : 0,1,2,3,4,5,6,7  
+
+wire casa_wd_g ;
+assign  casa_wd_g = casa_g & byp_word_g ;
+wire casa_dwd_g ;
+assign  casa_dwd_g = casa_g & ~byp_word_g ;
+
+// Change bendian to bendian_g - should not be dependent on fill. 
+
+//assign  lsu_atomic_pkt2_bsel_g[2] =   // Case 1
+//  (casa_dwd_g &  bendian_g)   |  // bendian stq and dw cas
+//  (casa_wd_g &  bendian_g &  ldst_va_g[2]) ;  // bendian_g wd casa addr to uhalf
+
+assign lsu_atomic_pkt2_bsel_g[2] = ~| (lsu_atomic_pkt2_bsel_g[1:0]) | rst_tri_en ; //one-hot default
+
+assign  lsu_atomic_pkt2_bsel_g[1] =   // Case 2
+  ((casa_wd_g &  bendian_g & ~ldst_va_g[2]) |  // bendian_g wd casa addr to lhalf
+  (casa_wd_g & ~bendian_g &  ldst_va_g[2])) &  ~rst_tri_en ;  // lendian wd casa addr to uhalf
+assign  lsu_atomic_pkt2_bsel_g[0] =   // Case 3 
+  ((casa_dwd_g & ~bendian_g) |    // lendian stq and dw cas
+  (casa_wd_g & ~bendian_g & ~ldst_va_g[2])) &  ~rst_tri_en ;  // lendian wd cas addr to lhalf
+
+// Alignment done in qdp1
+
+//=========================================================================================
+//  ASI DECODE
+//=========================================================================================
+
+// Note : tlb_byp_asi same as phy_use/phy_byp asi.
+
+
+lsu_asi_decode asi_decode (/*AUTOINST*/
+                           // Outputs
+                           .asi_internal_d(asi_internal_d),
+                           .nucleus_asi_d(nucleus_asi_d),
+                           .primary_asi_d(primary_asi_d),
+                           .secondary_asi_d(secondary_asi_d),
+                           .lendian_asi_d(lendian_asi_d),
+                           .nofault_asi_d(nofault_asi_d),
+                           .quad_asi_d  (quad_asi_d),
+                           .binit_quad_asi_d(binit_quad_asi_d),
+                           .dcache_byp_asi_d(dcache_byp_asi_d),
+                           .tlb_lng_ltncy_asi_d(tlb_lng_ltncy_asi_d),
+                           .tlb_byp_asi_d(tlb_byp_asi_d),
+                           .as_if_user_asi_d(as_if_user_asi_d),
+                           .atomic_asi_d(atomic_asi_d),
+                           .blk_asi_d   (blk_asi_d),
+                           .dc_diagnstc_asi_d(dc_diagnstc_asi_d),
+                           .dtagv_diagnstc_asi_d(dtagv_diagnstc_asi_d),
+                           .wr_only_asi_d(wr_only_asi_d),
+                           .rd_only_asi_d(rd_only_asi_d),
+                           .unimp_asi_d (unimp_asi_d),
+                           .ifu_nontlb_asi_d(ifu_nontlb_asi_d),
+                           .recognized_asi_d(recognized_asi_d),
+                           .ifill_tlb_asi_d(ifill_tlb_asi_d),
+                           .dfill_tlb_asi_d(dfill_tlb_asi_d),
+                           .rd_only_ltlb_asi_d(rd_only_ltlb_asi_d),
+                           .wr_only_ltlb_asi_d(wr_only_ltlb_asi_d),
+                           .phy_use_ec_asi_d(phy_use_ec_asi_d),
+                           .phy_byp_ec_asi_d(phy_byp_ec_asi_d),
+                           .mmu_rd_only_asi_d(mmu_rd_only_asi_d),
+                           .intrpt_disp_asi_d(intrpt_disp_asi_d),
+                           .dmmu_asi58_d(dmmu_asi58_d),
+                           .immu_asi50_d(immu_asi50_d),
+                           // Inputs
+                           .asi_d       (asi_d[7:0]));
+
+dff_s #(31)  asidcd_stge (
+        .din    ({asi_internal_d,primary_asi_d,secondary_asi_d,nucleus_asi_d,
+    lendian_asi_d, tlb_byp_asi_d, dcache_byp_asi_d,nofault_asi_d,
+    tlb_lng_ltncy_asi_d,as_if_user_asi_d,atomic_asi_d, blk_asi_d,
+    dc_diagnstc_asi_d,dtagv_diagnstc_asi_d,
+    wr_only_asi_d, rd_only_asi_d,mmu_rd_only_asi_d,unimp_asi_d,dmmu_asi58_d, immu_asi50_d, quad_asi_d, binit_quad_asi_d,
+    ifu_nontlb_asi_d,recognized_asi_d, ifill_tlb_asi_d,
+    dfill_tlb_asi_d, rd_only_ltlb_asi_d,wr_only_ltlb_asi_d,phy_use_ec_asi_d, phy_byp_ec_asi_d, intrpt_disp_asi_d}),
+        .q      ({asi_internal_e,primary_asi_e,secondary_asi_e,nucleus_asi_e,
+    lendian_asi_e, tlb_byp_asi_e, dcache_byp_asi_e,nofault_asi_e,
+    tlb_lng_ltncy_asi_e,as_if_user_asi_e,atomic_asi_e, blk_asi_e,
+    dc_diagnstc_asi_e,dtagv_diagnstc_asi_e,
+    wr_only_asi_e, rd_only_asi_e,mmu_rd_only_asi_e,unimp_asi_e,dmmu_asi58_e, immu_asi50_e, quad_asi_e, binit_quad_asi_e,
+    ifu_nontlb_asi_e,recognized_asi_e,ifill_tlb_asi_e,
+    dfill_tlb_asi_e,rd_only_ltlb_asi_e,wr_only_ltlb_asi_e,phy_use_ec_asi_e, phy_byp_ec_asi_e, intrpt_disp_asi_e}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_ffu_blk_asi_e = blk_asi_e & alt_space_e;
+assign  lsu_quad_asi_e = quad_asi_e ;
+
+wire	unimp_asi_tmp ;
+dff_s #(23)  asidcd_stgm (
+        .din    ({asi_internal_e,dcache_byp_asi_e,nofault_asi_e,lendian_asi_e,tlb_lng_ltncy_asi_e,
+    as_if_user_asi_e,atomic_asi_e, blk_asi_e,dc_diagnstc_asi_e,dtagv_diagnstc_asi_e,
+    wr_only_asi_e, rd_only_asi_e,mmu_rd_only_asi_e,unimp_asi_e,dmmu_asi58_e, immu_asi50_e, quad_asi_e,binit_quad_asi_e,recognized_asi_e,
+    ifu_nontlb_asi_e,phy_use_ec_asi_e, phy_byp_ec_asi_e, intrpt_disp_asi_e}),
+        .q      ({asi_internal_m,dcache_byp_asi_m,nofault_asi_m,lendian_asi_m,tlb_lng_ltncy_asi_m,
+    as_if_user_asi_m,atomic_asi_m, blk_asi_m,dc_diagnstc_asi_m,dtagv_diagnstc_asi_m,
+    wr_only_asi_m, rd_only_asi_m,mmu_rd_only_asi_m,unimp_asi_tmp,dmmu_asi58_m, immu_asi50_m, quad_asi_m,binit_quad_asi_m,recognized_asi_tmp,
+    ifu_nontlb_asi_m,phy_use_ec_asi_m, phy_byp_ec_asi_m, intrpt_disp_asi_m}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+assign	lsu_blk_asi_m = blk_asi_m ;
+
+   wire	pa_wtchpt_unimp_m ; // Bug 3408
+   wire d_tsb_unimp_m, i_tsb_unimp_m, pctxt_unimp_m, sctxt_unimp_m;
+   wire unimp_m;
+   
+assign  pa_wtchpt_unimp_m  = dmmu_asi58_m & (lsu_ldst_va_b7_b0_m[7:0] == 8'h40);
+assign  d_tsb_unimp_m = dmmu_asi58_m & (lsu_ldst_va_b7_b0_m[7:0] == 8'h28);
+assign  pctxt_unimp_m = dmmu_asi58_m & (lsu_ldst_va_b7_b0_m[7:0] == 8'h8);   
+assign  sctxt_unimp_m = dmmu_asi58_m & (lsu_ldst_va_b7_b0_m[7:0] == 8'h10);
+assign  i_tsb_unimp_m = immu_asi50_m & (lsu_ldst_va_b7_b0_m[7:0] == 8'h28);
+assign  unimp_m =  pa_wtchpt_unimp_m |  
+                   d_tsb_unimp_m | i_tsb_unimp_m |
+                   pctxt_unimp_m | sctxt_unimp_m;
+   
+assign	unimp_asi_m = unimp_asi_tmp | unimp_m ;
+assign	recognized_asi_m = recognized_asi_tmp | unimp_m ;
+
+dff_s #(12)  asidcd_stgg (
+        .din    ({asi_internal_m,dcache_byp_asi_m, lendian_asi_m,tlb_lng_ltncy_asi_m,
+  blk_asi_m,dc_diagnstc_asi_m,dtagv_diagnstc_asi_m,quad_asi_m,
+  binit_quad_asi_m,recognized_asi_m,ifu_nontlb_asi_m,  intrpt_disp_asi_m}),
+        .q      ({asi_internal_g,dcache_byp_asi_g, lendian_asi_g,tlb_lng_ltncy_asi_g,
+  blk_asi_g,dc_diagnstc_asi_g,dtagv_diagnstc_asi_g,quad_asi_g,
+  binit_quad_asi_g,recognized_asi_g,ifu_nontlb_asi_g,  intrpt_disp_asi_g}),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+//assign lsu_quad_asi_g = quad_asi_g;
+assign  ncache_asild_rq_g   = dcache_byp_asi_g & altspace_ldst_g ;
+
+//st data alignment control signals
+wire st_sz_hw_g, st_sz_w_g, st_sz_dw_g, stdbl_g;
+wire stdbl_m;
+
+//assign stdbl_m =  ldst_dbl_m & (~lsu_alt_space_m | (lsu_alt_space_m & ~blk_asi_m)) ;
+assign stdbl_m =  ldst_dbl_m ;
+         
+dff_s #(4) ff_st_sz_m (
+  .din ({hw_size, wd_size, dw_size, stdbl_m }),
+  .q   ({st_sz_hw_g, st_sz_w_g, st_sz_dw_g, stdbl_g}),
+  .clk (clk),                   
+  .se  (se), .si (), .so ()
+);   
+
+   
+//assign	bendian = lsu_bendian_access_g ;	// bendian store
+
+wire	swap_sel_default_g, swap_sel_default_byte_7_2_g, st_hw_le_g,st_w_or_dbl_le_g,st_x_le_g;
+assign	bendian_g = ~l1hit_lendian_g ;
+//assign	swap_sel_default_g = (bendian_g | (~bendian_g & st_sz_b_g)) ;
+
+assign swap_sel_default_g = ~ (st_hw_le_g | st_w_or_dbl_le_g | st_x_le_g);
+assign swap_sel_default_byte_7_2_g = ~ (st_w_or_dbl_le_g | st_x_le_g);
+   
+assign  st_hw_le_g = (st_sz_hw_g & ~bendian_g) & (~stdbl_g | fp_ldst_g) & st_inst_vld_unflushed ;  //0-in bug
+//bug 3169 
+// std(a) on floating point is the same as stx(a)
+assign  st_w_or_dbl_le_g = ((st_sz_w_g | (stdbl_g & ~fp_ldst_g)) & ~bendian_g) &  st_inst_vld_unflushed ;
+assign  st_x_le_g = (st_sz_dw_g & (~stdbl_g | fp_ldst_g)  & ~bendian_g) &  st_inst_vld_unflushed;
+
+wire blkst_m_tmp ;
+dff_s  stgm_bst (
+  .din (ffu_lsu_blk_st_e),
+  .q   (blkst_m_tmp),
+  .clk (clk),
+  .se     (se),       .si (),          .so ()
+);
+
+assign	blkst_m = blkst_m_tmp & ~(st_inst_vld_m  | flsh_inst_m 
+		| ld_inst_vld_m) ; // Bug 3444
+
+assign	lsu_blk_st_m = blkst_m ;
+
+dff_s  stgg_bst (
+  .din (blkst_m),
+  .q   (blkst_g),
+  .clk (clk),
+  .se     (se),       .si (),          .so ()
+);
+
+wire	bst_swap_sel_default_g,	bst_swap_sel_default_byte_7_2_g,bst_st_hw_le_g,bst_st_w_or_dbl_le_g,bst_st_x_le_g;
+assign	lsu_swap_sel_default_g = (blkst_g ? bst_swap_sel_default_g : swap_sel_default_g) | rst_tri_en ;
+assign	lsu_swap_sel_default_byte_7_2_g = (blkst_g ? bst_swap_sel_default_byte_7_2_g : swap_sel_default_byte_7_2_g) 
+                                         | rst_tri_en ;
+
+assign	lsu_st_hw_le_g	= (blkst_g ? bst_st_hw_le_g : st_hw_le_g) & ~rst_tri_en ;
+assign	lsu_st_w_or_dbl_le_g = (blkst_g ? bst_st_w_or_dbl_le_g : st_w_or_dbl_le_g) & ~rst_tri_en ;
+assign	lsu_st_x_le_g = (blkst_g ? bst_st_x_le_g : st_x_le_g) & ~rst_tri_en ;
+
+
+//=========================================================================================
+//	BLK STORE
+//=========================================================================================
+
+// Blk-St Handling : Snap state in g-stage of issue from IFU.
+
+wire snap_blk_st_m,snap_blk_st_g ;
+assign snap_blk_st_m = st_inst_vld_m & blk_asi_m & lsu_alt_space_m & fp_ldst_m;
+
+assign lsu_snap_blk_st_m = snap_blk_st_m ; 
+
+wire	snap_blk_st_local_m;
+assign	snap_blk_st_local_m = snap_blk_st_m & ifu_tlu_inst_vld_m ;
+
+dff_s  stgg_snap (
+  .din (snap_blk_st_local_m),
+  .q   (snap_blk_st_g),
+  .clk (clk),
+  .se     (se),       .si (),          .so ()
+);
+
+// output to be used in g-stage.
+dffe_s #(5) bst_state_g (
+        .din    ({lsu_swap_sel_default_g, lsu_swap_sel_default_byte_7_2_g, lsu_st_hw_le_g,
+		lsu_st_w_or_dbl_le_g,lsu_st_x_le_g}),
+        .q      ({bst_swap_sel_default_g, bst_swap_sel_default_byte_7_2_g,  bst_st_hw_le_g,
+		bst_st_w_or_dbl_le_g,bst_st_x_le_g}),
+        .en     (snap_blk_st_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+
+// snapped in g, used in m
+
+   wire [39:10] blkst_pgnum_m;
+   
+dffe_s #(30) bst_pg_g (
+        .din    (tlb_pgnum[39:10]),
+        .q      (blkst_pgnum_m[39:10]),
+        .en     (snap_blk_st_g),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b10 (.a(blkst_pgnum_m[10]), .z(lsu_blkst_pgnum_m[10]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b11 (.a(blkst_pgnum_m[11]), .z(lsu_blkst_pgnum_m[11]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b12 (.a(blkst_pgnum_m[12]), .z(lsu_blkst_pgnum_m[12]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b13 (.a(blkst_pgnum_m[13]), .z(lsu_blkst_pgnum_m[13]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b14 (.a(blkst_pgnum_m[14]), .z(lsu_blkst_pgnum_m[14]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b15 (.a(blkst_pgnum_m[15]), .z(lsu_blkst_pgnum_m[15]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b16 (.a(blkst_pgnum_m[16]), .z(lsu_blkst_pgnum_m[16]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b17 (.a(blkst_pgnum_m[17]), .z(lsu_blkst_pgnum_m[17]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b18 (.a(blkst_pgnum_m[18]), .z(lsu_blkst_pgnum_m[18]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b19 (.a(blkst_pgnum_m[19]), .z(lsu_blkst_pgnum_m[19]));
+
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b20 (.a(blkst_pgnum_m[20]), .z(lsu_blkst_pgnum_m[20]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b21 (.a(blkst_pgnum_m[21]), .z(lsu_blkst_pgnum_m[21]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b22 (.a(blkst_pgnum_m[22]), .z(lsu_blkst_pgnum_m[22]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b23 (.a(blkst_pgnum_m[23]), .z(lsu_blkst_pgnum_m[23]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b24 (.a(blkst_pgnum_m[24]), .z(lsu_blkst_pgnum_m[24]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b25 (.a(blkst_pgnum_m[25]), .z(lsu_blkst_pgnum_m[25]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b26 (.a(blkst_pgnum_m[26]), .z(lsu_blkst_pgnum_m[26]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b27 (.a(blkst_pgnum_m[27]), .z(lsu_blkst_pgnum_m[27]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b28 (.a(blkst_pgnum_m[28]), .z(lsu_blkst_pgnum_m[28]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b29 (.a(blkst_pgnum_m[29]), .z(lsu_blkst_pgnum_m[29]));
+   
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b30 (.a(blkst_pgnum_m[30]), .z(lsu_blkst_pgnum_m[30]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b31 (.a(blkst_pgnum_m[31]), .z(lsu_blkst_pgnum_m[31]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b32 (.a(blkst_pgnum_m[32]), .z(lsu_blkst_pgnum_m[32]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b33 (.a(blkst_pgnum_m[33]), .z(lsu_blkst_pgnum_m[33]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b34 (.a(blkst_pgnum_m[34]), .z(lsu_blkst_pgnum_m[34]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b35 (.a(blkst_pgnum_m[35]), .z(lsu_blkst_pgnum_m[35]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b36 (.a(blkst_pgnum_m[36]), .z(lsu_blkst_pgnum_m[36]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b37 (.a(blkst_pgnum_m[37]), .z(lsu_blkst_pgnum_m[37]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b38 (.a(blkst_pgnum_m[38]), .z(lsu_blkst_pgnum_m[38]));
+bw_u1_minbuf_5x UZfix_lsu_blkst_pgnum_m_b39 (.a(blkst_pgnum_m[39]), .z(lsu_blkst_pgnum_m[39]));
+
+//=========================================================================================
+//  Prefetch Count
+//=========================================================================================
+
+wire [3:0] lsu_cpx_pref_ack;
+wire [3:0] no_spc_pref;
+
+wire	[3:0]	pref_ackcnt0,pref_ackcnt1,pref_ackcnt2,pref_ackcnt3 ;
+wire	[3:0]	pref_ackcnt0_din,pref_ackcnt1_din,pref_ackcnt2_din,pref_ackcnt3_din ;
+
+wire 	[3:0]	pref_ackcnt_incr, pref_ackcnt_decr ;
+wire 	[3:0]	pref_ackcnt_mx_incr, pref_ackcnt_mx_decr ;
+
+   wire     lsu_pref_pcx_req_d1;
+   
+dff_s #(1) pref_pcx_req_stg (
+         .din (lsu_pref_pcx_req),
+         .q   (lsu_pref_pcx_req_d1),
+         .clk (clk),
+         .se  (se),       .si (),          .so ()
+);                   
+
+assign   lsu_pcx_pref_issue[0] =  lsu_pref_pcx_req_d1 & lsu_ld_pcx_rq_sel_d2[0] & ~lsu_pcx_req_squash_d1;
+assign   lsu_pcx_pref_issue[1] =  lsu_pref_pcx_req_d1 & lsu_ld_pcx_rq_sel_d2[1] & ~lsu_pcx_req_squash_d1;
+assign   lsu_pcx_pref_issue[2] =  lsu_pref_pcx_req_d1 & lsu_ld_pcx_rq_sel_d2[2] & ~lsu_pcx_req_squash_d1;
+assign   lsu_pcx_pref_issue[3] =  lsu_pref_pcx_req_d1 & lsu_ld_pcx_rq_sel_d2[3] & ~lsu_pcx_req_squash_d1;
+  
+
+   wire [3:0] pref_acknt_mx_incr_sel;
+   assign     pref_acknt_mx_incr_sel[3:0] = lsu_pcx_pref_issue[3:0];
+
+assign  pref_ackcnt_mx_incr[3:0] = 
+  (pref_acknt_mx_incr_sel[0] ? pref_ackcnt0[3:0] : 4'b0) |
+  (pref_acknt_mx_incr_sel[1] ? pref_ackcnt1[3:0] : 4'b0) |
+  (pref_acknt_mx_incr_sel[2] ? pref_ackcnt2[3:0] : 4'b0) |
+  (pref_acknt_mx_incr_sel[3] ? pref_ackcnt3[3:0] : 4'b0) ;
+   
+  
+//====================================================================================
+// prefetch ack back from CPX
+   wire       dcfill_active_e;   
+   assign dcfill_active_e = lsu_dfq_ld_vld & ~memref_e ;
+
+   wire   dfq_thread0, dfq_thread1, dfq_thread2, dfq_thread3;
+
+   assign dfq_thread0 = dfill_thread0;
+   assign dfq_thread1 = dfill_thread1;
+   assign dfq_thread2 = dfill_thread2;
+   assign dfq_thread3 = dfill_thread3;
+   
+   assign lsu_cpx_pref_ack[0]  = dfq_thread0  & dcfill_active_e & lsu_cpx_pkt_prefetch2;
+   assign lsu_cpx_pref_ack[1]  = dfq_thread1  & dcfill_active_e & lsu_cpx_pkt_prefetch2;
+   assign lsu_cpx_pref_ack[2]  = dfq_thread2  & dcfill_active_e & lsu_cpx_pkt_prefetch2;
+   assign lsu_cpx_pref_ack[3]  = dfq_thread3  & dcfill_active_e & lsu_cpx_pkt_prefetch2;
+   
+   wire [3:0] pref_acknt_mx_decr_sel;
+   assign     pref_acknt_mx_decr_sel[3:0] = lsu_cpx_pref_ack[3:0];
+
+assign    pref_ackcnt_mx_decr[3:0] =
+  (pref_acknt_mx_decr_sel[0] ? pref_ackcnt0[3:0] : 4'b0) |
+  (pref_acknt_mx_decr_sel[1] ? pref_ackcnt1[3:0] : 4'b0) |
+  (pref_acknt_mx_decr_sel[2] ? pref_ackcnt2[3:0] : 4'b0) |
+  (pref_acknt_mx_decr_sel[3] ? pref_ackcnt3[3:0] : 4'b0) ;
+   
+    
+assign	pref_ackcnt_incr[3:0] = pref_ackcnt_mx_incr[3:0] + 4'b0001 ;
+assign	pref_ackcnt_decr[3:0] = pref_ackcnt_mx_decr[3:0] - 4'b0001 ;
+
+assign	pref_ackcnt0_din[3:0] = lsu_cpx_pref_ack[0] ? pref_ackcnt_decr[3:0] : pref_ackcnt_incr[3:0] ;
+assign	pref_ackcnt1_din[3:0] = lsu_cpx_pref_ack[1] ? pref_ackcnt_decr[3:0] : pref_ackcnt_incr[3:0] ;
+assign	pref_ackcnt2_din[3:0] = lsu_cpx_pref_ack[2] ? pref_ackcnt_decr[3:0] : pref_ackcnt_incr[3:0] ;
+assign	pref_ackcnt3_din[3:0] = lsu_cpx_pref_ack[3] ? pref_ackcnt_decr[3:0] : pref_ackcnt_incr[3:0] ;
+
+wire	[3:0]	pref_ackcnt_en ;
+// if both occur in the same cycle then they cancel out.
+assign	pref_ackcnt_en[0] = lsu_pcx_pref_issue[0] ^ lsu_cpx_pref_ack[0] ;
+assign	pref_ackcnt_en[1] = lsu_pcx_pref_issue[1] ^ lsu_cpx_pref_ack[1] ;
+assign	pref_ackcnt_en[2] = lsu_pcx_pref_issue[2] ^ lsu_cpx_pref_ack[2] ;
+assign	pref_ackcnt_en[3] = lsu_pcx_pref_issue[3] ^ lsu_cpx_pref_ack[3] ;
+
+// Thread0
+dffre_s #(4)  pref_ackcnt0_ff (
+        .din    (pref_ackcnt0_din[3:0]),
+        .q      (pref_ackcnt0[3:0]),
+        .rst    (reset),        .en     (pref_ackcnt_en[0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread1
+dffre_s #(4)  pref_ackcnt1_ff (
+        .din    (pref_ackcnt1_din[3:0]),
+        .q      (pref_ackcnt1[3:0]),
+        .rst    (reset),        .en     (pref_ackcnt_en[1]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread2
+dffre_s #(4)  pref_ackcnt2_ff (
+        .din    (pref_ackcnt2_din[3:0]),
+        .q      (pref_ackcnt2[3:0]),
+        .rst    (reset),        .en     (pref_ackcnt_en[2]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+// Thread3
+dffre_s #(4)  pref_ackcnt3_ff (
+        .din    (pref_ackcnt3_din[3:0]),
+        .q      (pref_ackcnt3[3:0]),
+        .rst    (reset),        .en     (pref_ackcnt_en[3]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );              
+
+assign	no_spc_pref[0] = pref_ackcnt0[3] ;
+assign	no_spc_pref[1] = pref_ackcnt1[3] ;
+assign	no_spc_pref[2] = pref_ackcnt2[3] ;
+assign	no_spc_pref[3] = pref_ackcnt3[3] ;
+
+assign  lsu_no_spc_pref[3:0] = no_spc_pref[3:0];
+
+//====================================================================
+   wire lsu_bist_e;
+
+   assign lsu_bist_e = lsu_bist_wvld_e | lsu_bist_rvld_e;
+
+   wire [10:0]      lmq_pcx_pkt_addr_din;
+
+   wire [3:0] dfq_byp_thrd_sel;
+   
+mux4ds #(11) lmq_pcx_pkt_addr_mux (
+       .in0 ({lmq0_pcx_pkt_addr[10:0]}),
+       .in1 ({lmq1_pcx_pkt_addr[10:0]}),
+       .in2 ({lmq2_pcx_pkt_addr[10:0]}),
+       .in3 ({lmq3_pcx_pkt_addr[10:0]}),
+       .sel0(dfq_byp_thrd_sel[0]),
+       .sel1(dfq_byp_thrd_sel[1]),
+       .sel2(dfq_byp_thrd_sel[2]),
+       .sel3(dfq_byp_thrd_sel[3]),
+       .dout({lmq_pcx_pkt_addr_din[10:0]})
+);
+                    
+dffe_s #(11)  lmq_pcx_pkt_addr_ff (
+           .din    ({lmq_pcx_pkt_addr_din[10:0]}),
+           .q      ({lmq_pcx_pkt_addr[10:0]}),
+           .en     (dfq_byp_ff_en),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+
+   wire [10:4] lmq_pcx_pkt_addr_minbf;
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b10 (.a(lmq_pcx_pkt_addr[10]), .z(lmq_pcx_pkt_addr_minbf[10]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b9 (.a(lmq_pcx_pkt_addr[9]), .z(lmq_pcx_pkt_addr_minbf[9]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b8 (.a(lmq_pcx_pkt_addr[8]), .z(lmq_pcx_pkt_addr_minbf[8]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b7 (.a(lmq_pcx_pkt_addr[7]), .z(lmq_pcx_pkt_addr_minbf[7]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b6 (.a(lmq_pcx_pkt_addr[6]), .z(lmq_pcx_pkt_addr_minbf[6]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b5 (.a(lmq_pcx_pkt_addr[5]), .z(lmq_pcx_pkt_addr_minbf[5]));
+   bw_u1_minbuf_5x UZfix_lmq_pcx_pkt_addr_minbf_b4 (.a(lmq_pcx_pkt_addr[4]), .z(lmq_pcx_pkt_addr_minbf[4]));
+   
+   
+assign           lmq_ld_addr_b3 = lmq_pcx_pkt_addr[3];
+   
+   
+assign  dcache_fill_addr_e[10:0] =
+{11{lsu_dc_iob_access_e}}               & {dcache_iob_addr_e[7:0],3'b000} |
+{11{lsu_bist_wvld_e | lsu_bist_rvld_e}} & {mbist_dcache_index[6:0], mbist_dcache_word, 3'b000} | 
+{11{lsu_diagnstc_wr_src_sel_e}}         & lsu_diagnstc_wr_addr_e[10:0] |
+{11{lsu_dfq_st_vld}}                    & st_dcfill_addr[10:0] |
+{11{lsu_dfq_ld_vld}}                    & {lmq_pcx_pkt_addr_minbf[10:4], lmq_pcx_pkt_addr[3:0]}; 
+
+assign lsu_dcache_fill_addr_e[10:3] = dcache_fill_addr_e[10:3];  
+
+   wire [10:4] dcache_fill_addr_e_tmp;
+assign dcache_fill_addr_e_tmp[10:4]    = dcache_fill_addr_e[10:4];  
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b10 ( .a(dcache_fill_addr_e_tmp[10]),  .z(lsu_dcache_fill_addr_e_err[10]));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b9  ( .a(dcache_fill_addr_e_tmp[9]),  .z(lsu_dcache_fill_addr_e_err[9] ));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b8  ( .a(dcache_fill_addr_e_tmp[8]),  .z(lsu_dcache_fill_addr_e_err[8]));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b7  ( .a(dcache_fill_addr_e_tmp[7]),  .z(lsu_dcache_fill_addr_e_err[7]));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b6  ( .a(dcache_fill_addr_e_tmp[6]),  .z(lsu_dcache_fill_addr_e_err[6]));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b5  ( .a(dcache_fill_addr_e_tmp[5]),  .z(lsu_dcache_fill_addr_e_err[5]));
+bw_u1_buf_30x UZfix_lsu_dcache_fill_addr_e_err_b4  ( .a(dcache_fill_addr_e_tmp[4]),  .z(lsu_dcache_fill_addr_e_err[4]));
+
+// used as ld bypass 
+assign dcache_wr_addr_e[2:0] = dcache_fill_addr_e[2:0];
+
+//ldfill doesn't need to create wrt byte msk, always fill one line
+assign waddr_enc[3:0] = 
+{4{lsu_dc_iob_access_e}}               & {dcache_iob_addr_e[0],3'b000} |
+{4{lsu_bist_e}}                        & {mbist_dcache_word, 3'b000} | 
+{4{lsu_diagnstc_wr_src_sel_e}}         & lsu_diagnstc_wr_addr_e[3:0] |
+{4{lsu_dfq_st_vld}}                    & st_dcfill_addr[3:0] ;
+
+//==============================================================
+/*
+dff_s  #(4) lsu_thread_stgg (
+        .din    ({thread3_m, thread2_m, thread1_m,thread0_m}),
+        .q      (lsu_thread_g[3:0]),
+        .clk    (clk),
+        .se     (se),       .si (),          .so ()
+        );
+*/
+   assign lsu_thread_g[3] = thread3_g;
+   assign lsu_thread_g[2] = thread2_g;
+   assign lsu_thread_g[1] = thread1_g;
+   assign lsu_thread_g[0] = thread0_g;
+   
+//===============================================================
+//LMQ thread sel
+//===============================================================
+//lmq_ldd_vld
+   assign     dfq_byp_thrd_sel[0] = ~lsu_dfq_byp_tid[1] & ~lsu_dfq_byp_tid[0];
+   assign     dfq_byp_thrd_sel[1] = ~lsu_dfq_byp_tid[1] &  lsu_dfq_byp_tid[0];
+   assign     dfq_byp_thrd_sel[2] =  lsu_dfq_byp_tid[1] & ~lsu_dfq_byp_tid[0];
+   assign     dfq_byp_thrd_sel[3] =  lsu_dfq_byp_tid[1] &  lsu_dfq_byp_tid[0];
+
+   wire       lmq_ldd_vld_din;
+   
+mux4ds #(1) lmq_ldd_vld_mux (
+       .in0 ({lmq0_ldd_vld}),
+       .in1 ({lmq1_ldd_vld}),
+       .in2 ({lmq2_ldd_vld}),
+       .in3 ({lmq3_ldd_vld}),
+       .sel0(dfq_byp_thrd_sel[0]),
+       .sel1(dfq_byp_thrd_sel[1]),
+       .sel2(dfq_byp_thrd_sel[2]),
+       .sel3(dfq_byp_thrd_sel[3]),
+       .dout({lmq_ldd_vld_din})
+);
+                    
+dffe_s #(1)  lmq_ldd_vld_ff (
+           .din    ({lmq_ldd_vld_din}),
+           .q      ({lmq_ldd_vld}),
+           .en     (dfq_byp_ff_en),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+                       
+//bist
+wire [1:0] bist_way_enc_e;
+wire [3:0] bist_way_e;
+
+
+assign bist_way_enc_e[1:0] =  lsu_dc_iob_access_e ?  
+       lsu_dcache_iob_way_e[1:0] : mbist_dcache_way[1:0] ;
+   
+assign  bist_way_e[0] = ~bist_way_enc_e[1] & ~bist_way_enc_e[0] ;
+assign  bist_way_e[1] = ~bist_way_enc_e[1] &  bist_way_enc_e[0] ;
+assign  bist_way_e[2] =  bist_way_enc_e[1] & ~bist_way_enc_e[0] ;
+assign  bist_way_e[3] =  bist_way_enc_e[1] &  bist_way_enc_e[0] ;
+
+assign lsu_bist_rsel_way_e[3:0] = bist_way_e[3:0];
+
+   wire lmq_l2fill_fp_din;
+assign    lmq_l2fill_fp_din =
+       dfq_byp_thrd_sel[0] & lmq0_l2fill_fpld | 
+       dfq_byp_thrd_sel[1] & lmq1_l2fill_fpld | 
+       dfq_byp_thrd_sel[2] & lmq2_l2fill_fpld | 
+       dfq_byp_thrd_sel[3] & lmq3_l2fill_fpld ;
+ 
+dffe_s #(1) lmq_l2fill_fp_ff (
+           .din (lmq_l2fill_fp_din),
+           .q   (lsu_l2fill_fpld_e),
+           .en  (dfq_byp_ff_en),
+           .clk (clk),
+           .se  (se),       .si (),          .so ()
+           );   
+
+   wire lmq_ncache_ld_din;
+assign    lmq_ncache_ld_din =
+       dfq_byp_thrd_sel[0] & lmq0_ncache_ld | 
+       dfq_byp_thrd_sel[1] & lmq1_ncache_ld | 
+       dfq_byp_thrd_sel[2] & lmq2_ncache_ld | 
+       dfq_byp_thrd_sel[3] & lmq3_ncache_ld ;
+ 
+dffe_s #(1) lmq_ncache_ld_ff (
+           .din (lmq_ncache_ld_din),
+           .q   (lsu_ncache_ld_e),
+           .en  (dfq_byp_ff_en),
+           .clk (clk),
+           .se  (se),       .si (),          .so ()
+           );   
+                         
+//lmq
+   wire [1:0]      lmq_ldfill_way_din;
+   
+mux4ds #(2) lmq_ldfill_way_mux (
+       .in0 ({lmq0_pcx_pkt_way[1:0]}),
+       .in1 ({lmq1_pcx_pkt_way[1:0]}),
+       .in2 ({lmq2_pcx_pkt_way[1:0]}),
+       .in3 ({lmq3_pcx_pkt_way[1:0]}),
+       .sel0(dfq_byp_thrd_sel[0]),
+       .sel1(dfq_byp_thrd_sel[1]),
+       .sel2(dfq_byp_thrd_sel[2]),
+       .sel3(dfq_byp_thrd_sel[3]),
+       .dout({lmq_ldfill_way_din[1:0]})
+);
+   wire [1:0]      lmq_ldfill_way;
+                    
+dffe_s #(2)  lmq_ldfill_way_ff (
+           .din    ({lmq_ldfill_way_din[1:0]}),
+           .q      ({lmq_ldfill_way[1:0]}),
+           .en     (dfq_byp_ff_en),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+wire [1:0] dcache_fill_way_enc_e;
+   
+assign dcache_fill_way_enc_e[1:0] = 
+{2{lsu_dc_iob_access_e}}               & lsu_dcache_iob_way_e[1:0] |
+{2{lsu_bist_e}}                        & bist_way_enc_e[1:0]       | 
+{2{lsu_diagnstc_wr_src_sel_e}}         & lsu_diagnstc_wr_way_e[1:0]|
+{2{lsu_dfq_st_vld}}                    & lsu_st_way_e[1:0]         |
+{2{lsu_dfq_ld_vld}}                    & lmq_ldfill_way[1:0]; 
+
+   assign lsu_dcache_fill_way_e[0] =   ~dcache_fill_way_enc_e[1] & ~dcache_fill_way_enc_e[0];
+   assign lsu_dcache_fill_way_e[1] =   ~dcache_fill_way_enc_e[1] &  dcache_fill_way_enc_e[0];
+   assign lsu_dcache_fill_way_e[2] =    dcache_fill_way_enc_e[1] & ~dcache_fill_way_enc_e[0];
+   assign lsu_dcache_fill_way_e[3] =    dcache_fill_way_enc_e[1] &  dcache_fill_way_enc_e[0];
+
+//ld_rq_type
+
+   wire [2:0]      lmq_ld_rq_type_din;
+   
+mux4ds #(3) lmq_ld_rq_type_mux (
+       .in0 ({lmq0_ld_rq_type[2:0]}),
+       .in1 ({lmq1_ld_rq_type[2:0]}),
+       .in2 ({lmq2_ld_rq_type[2:0]}),
+       .in3 ({lmq3_ld_rq_type[2:0]}),
+       .sel0(dfq_byp_thrd_sel[0]),
+       .sel1(dfq_byp_thrd_sel[1]),
+       .sel2(dfq_byp_thrd_sel[2]),
+       .sel3(dfq_byp_thrd_sel[3]),
+       .dout({lmq_ld_rq_type_din[2:0]})
+);
+                    
+dffe_s #(3)  lmq_ld_rq_type_e_ff (
+           .din    ({lmq_ld_rq_type_din[2:0]}),
+           .q      ({lmq_ld_rq_type_e[2:0]}),
+           .en     (dfq_byp_ff_en),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+
+//================================================================
+wire	other_flush_pipe_w ;
+
+assign	other_flush_pipe_w = tlu_early_flush_pipe2_w | (lsu_ttype_vld_m2 & lsu_inst_vld_w);     
+assign	dctl_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
+// Staged ifu_tlu_flush_m should be used !!
+assign  dctl_early_flush_w = (lsu_local_early_flush_g | tlu_early_flush_pipe2_w | ifu_lsu_flush_w) ;
+
+//================================================================
+// dcfill size
+   wire dcfill_size_mx_sel_e;
+//bug6216/eco6624 
+assign  dcfill_size_mx_sel_e  =  lsu_dc_iob_access_e | lsu_diagnstc_wr_src_sel_e;    
+
+mux2ds  #(2)  dcache_wr_size_e_mux (
+              .in0(2'b11),
+              .in1(lsu_st_dcfill_size_e[1:0]),
+              .sel0(dcfill_size_mx_sel_e),
+              .sel1(~dcfill_size_mx_sel_e),
+              .dout(dcache_wr_size_e[1:0])
+);
+
+
+//assign  lsu_dcfill_data_mx_sel_e  =   (dcache_iob_wr_e | dcache_iob_rd_e | lsu_bist_wvld_e);   
+   wire dcfill_data_mx_sel_e_l;
+   
+bw_u1_nor3_8x  UZsize_dcfill_data_mx_sel_e_l (.a (dcache_iob_wr_e),
+                                              .b (dcache_iob_rd_e), 
+                                              .c (lsu_bist_wvld_e),
+                                              .z (dcfill_data_mx_sel_e_l));
+
+bw_u1_inv_30x  UZsize_dcfill_data_mx_sel_e   ( .a(dcfill_data_mx_sel_e_l), .z (lsu_dcfill_data_mx_sel_e));
+   
+//================================================================
+   wire [3:0] dfq_thread_e;
+   assign     dfq_thread_e[0] = ~lsu_dfill_tid_e[1] & ~lsu_dfill_tid_e[0];
+   assign     dfq_thread_e[1] = ~lsu_dfill_tid_e[1] &  lsu_dfill_tid_e[0];
+   assign     dfq_thread_e[2] =  lsu_dfill_tid_e[1] & ~lsu_dfill_tid_e[0];
+   assign     dfq_thread_e[3] =  lsu_dfill_tid_e[1] &  lsu_dfill_tid_e[0];
+
+   wire [3:0] dfq_byp_sel_e;
+   assign     dfq_byp_sel_e[0] = dfq_thread_e[0] & dcfill_active_e & ~lsu_cpx_pkt_prefetch2;
+   assign     dfq_byp_sel_e[1] = dfq_thread_e[1] & dcfill_active_e & ~lsu_cpx_pkt_prefetch2;
+   assign     dfq_byp_sel_e[2] = dfq_thread_e[2] & dcfill_active_e & ~lsu_cpx_pkt_prefetch2;
+   assign     dfq_byp_sel_e[3] = dfq_thread_e[3] & dcfill_active_e & ~lsu_cpx_pkt_prefetch2;
+   
+wire	[3:0] lmq_byp_misc_sel_e ;
+
+assign  lmq_byp_misc_sel_e[0] = ld_thrd_byp_sel_e[0]  |        // select for ldxa/raw.
+                                dfq_byp_sel_e[0]  ;              // select for dfq.
+assign  lmq_byp_misc_sel_e[1] = ld_thrd_byp_sel_e[1]  |        // select for ldxa/raw.
+                                dfq_byp_sel_e[1] ;               // select for dfq.
+assign  lmq_byp_misc_sel_e[2] = ld_thrd_byp_sel_e[2]  |        // select for ldxa/raw.
+                                dfq_byp_sel_e[2] ;               // select for dfq.
+assign  lmq_byp_misc_sel_e[3] = ld_thrd_byp_sel_e[3]  | 
+                                dfq_byp_sel_e[3] ; 
+
+   wire [2:0] byp_misc_addr_e;
+assign byp_misc_addr_e[2:0] = (lmq_byp_misc_sel_e[0] ? lmq0_pcx_pkt_addr[2:0] : 3'b0) |
+                              (lmq_byp_misc_sel_e[1] ? lmq1_pcx_pkt_addr[2:0] : 3'b0) |
+                              (lmq_byp_misc_sel_e[2] ? lmq2_pcx_pkt_addr[2:0] : 3'b0) |
+                              (lmq_byp_misc_sel_e[3] ? lmq3_pcx_pkt_addr[2:0] : 3'b0) ;
+   
+   wire [1:0] byp_misc_sz_e;
+assign byp_misc_sz_e[1:0] = (lmq_byp_misc_sel_e[0] ? lmq0_byp_misc_sz[1:0] : 2'b0) |
+                            (lmq_byp_misc_sel_e[1] ? lmq1_byp_misc_sz[1:0] : 2'b0) |
+                            (lmq_byp_misc_sel_e[2] ? lmq2_byp_misc_sz[1:0] : 2'b0) |
+                            (lmq_byp_misc_sel_e[3] ? lmq3_byp_misc_sz[1:0] : 2'b0) ;
+   
+                                
+dff_s #(5)  lmq_byp_misc_stgm (
+           .din    ({byp_misc_addr_e[2:0], byp_misc_sz_e[1:0]}),
+           .q      ({lsu_byp_misc_addr_m[2:0], lsu_byp_misc_sz_m[1:0]}),
+           .clk    (clk),
+           .se     (se),       .si (),          .so ()
+           );
+  
+endmodule
+
+
Index: /trunk/T1-CPU/tlu/tlu_tdp.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_tdp.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_tdp.v	(revision 6)
@@ -0,0 +1,4937 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_tdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Trap Datapath 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include "sys.h" // system level definition file which contains the 
+			     // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include "tlu.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module tlu_tdp (/*AUTOARG*/
+   // Outputs
+   tlu_pib_rsr_data_e, tlu_restore_pc_w1, tlu_restore_npc_w1, tlu_partial_trap_pc_w1,
+   tsa_wdata, tlu_int_pstate_ie, local_pstate_ie, tlu_ifu_pstate_pef, 
+   tlu_lsu_pstate_cle, tlu_lsu_pstate_priv, tlu_int_redmode, tlu_lsu_redmode, 
+   tlu_sscan_test_data, 
+   // modified for bug 1767
+   tlu_pstate_am, tlu_sftint_id, 
+   // added for timing
+   // modfied for hypervisor support
+   tlu_dnrtry_global_g, tlu_tick_incr_din, tlu_exu_rsr_data_m, 
+   tlu_hpstate_priv, local_hpstate_priv, local_hpstate_enb, local_pstate_priv, 
+   tlu_hpstate_enb, tlu_hintp, tlu_por_rstint_g, tcl_hpstate_priv, tcl_hpstate_enb, 
+   tlu_trap_hpstate_enb, tlu_hpstate_tlz, tlu_asi_state_e, tlu_hpstate_ibe, 
+   so, 
+   // Inputs
+   tsa_rdata, tlu_wsr_data_w, lsu_tlu_rsr_data_e, tlu_ibrkpt_trap_w2, 
+   // reset was modified to abide to the Niagara reset methodology
+   rclk, tlu_rst, tlu_thrd_wsel_w2, ifu_lsu_imm_asi_d, // tm_l, 
+   tlu_final_ttype_w2, tlu_pstate_din_sel0, tlu_pstate_din_sel1, 
+   tlu_pstate_din_sel2, tlu_pstate_din_sel3, ifu_lsu_imm_asi_vld_d,  
+   lsu_asi_reg0, lsu_asi_reg1, lsu_asi_reg2, lsu_asi_reg3, 
+   exu_tlu_ccr0_w, exu_tlu_ccr1_w, exu_tlu_ccr2_w, exu_tlu_ccr3_w, 
+   exu_tlu_cwp0, exu_tlu_cwp1, exu_tlu_cwp2, exu_tlu_cwp3, tlu_trap_cwp_en, 
+   tlu_pc_new_w, tlu_npc_new_w, tlu_sftint_en_l_g, tlu_sftint_mx_sel, 
+   tlu_set_sftint_l_g, tlu_wr_tsa_inst_w2,  tlu_clr_sftint_l_g, 
+   tlu_wr_sftint_l_g, tlu_sftint_penc_sel, tlu_tba_en_l, tlu_tick_en_l, 
+   tlu_tickcmp_sel, tlu_tickcmp_en_l, // tlu_retry_inst_m, tlu_done_inst_m, 
+   tlu_update_pc_l_w, tlu_tl_gt_0_w2, pib_pich_wrap, // tlu_dnrtry_inst_m_l, 
+   tlu_select_tba_w2, tlu_select_redmode, tlu_update_pstate_l_w2, tlu_pil, 
+   tlu_trp_lvl, tlu_tick_npt, tlu_thrd_rsel_e, tlu_tick_incr_dout, 
+   tlu_rdpr_mx1_sel, tlu_rdpr_mx2_sel, tlu_rdpr_mx3_sel, tlu_rdpr_mx4_sel, 
+   tlu_hpstate_din_sel0, tlu_hpstate_din_sel1, tlu_pc_mxsel_w2,  
+   tlu_hpstate_din_sel2, tlu_hpstate_din_sel3, tlu_update_hpstate_l_w2, 
+   tlu_htba_en_l, tlu_rdpr_mx5_sel, tlu_rdpr_mx6_sel, pib_picl_wrap, 
+   tlu_rdpr_mx7_sel, tlu_htickcmp_intdis, tlu_stickcmp_en_l, tlu_htickcmp_en_l, 
+   tlu_gl_lvl0, tlu_gl_lvl1, tlu_gl_lvl2, tlu_gl_lvl3, tlu_wr_hintp_g, 
+   tlu_set_hintp_sel_g, ctu_sscan_tid, si, se
+   );	
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+input	[`TLU_TDP_TSA_WIDTH-1:0] tsa_rdata;		   // rd data for tsa.
+input   [`TLU_THRD_NUM-1:0] tlu_por_rstint_g;
+//
+// modified for timing
+input   [`TLU_ASR_DATA_WIDTH-1:0] tlu_wsr_data_w; // pr/st data from irf.
+
+input	[7:0]	lsu_tlu_rsr_data_e;	// lsu sr/pr read data
+
+input		rclk;			// clock
+//
+// reset was removed to abide to the Niagara reset methodology 
+input tlu_rst;			           // unit-reset
+input [`TLU_THRD_NUM-1:0] tlu_thrd_wsel_w2;// thread requiring tsa write.
+input [`TSA_TTYPE_WIDTH-1:0]	tlu_final_ttype_w2;	   // selected ttype - g
+input tlu_ibrkpt_trap_w2;	// instruction brkpt trap 
+input tlu_trap_hpstate_enb;	// mode indicator for the trapped thrd 
+input tlu_wr_tsa_inst_w2;	// write state inst
+input [1:0]  tlu_pstate_din_sel0; // sel source of tsa wdata
+input [1:0]  tlu_pstate_din_sel1; // sel source of tsa wdata
+input [1:0]  tlu_pstate_din_sel2; // sel source of tsa wdata
+input [1:0]  tlu_pstate_din_sel3; // sel source of tsa wdata
+input [`TLU_ASI_STATE_WIDTH-1:0] lsu_asi_reg0; // asi state - thread0
+input [`TLU_ASI_STATE_WIDTH-1:0] lsu_asi_reg1; // asi state - thread1
+input [`TLU_ASI_STATE_WIDTH-1:0] lsu_asi_reg2; // asi state - thread2
+input [`TLU_ASI_STATE_WIDTH-1:0] lsu_asi_reg3; // asi state - thread3
+input [`TLU_ASI_STATE_WIDTH-1:0] ifu_lsu_imm_asi_d; // asi state value from imm 
+input ifu_lsu_imm_asi_vld_d; // valid asi state value from imm
+
+input [3:0]	 tlu_tickcmp_sel;  // select src for tickcmp
+input [3:0]	 tlu_tickcmp_en_l; // tick cmp reg write enable
+input        tlu_tick_en_l;	   // tick reg write enable
+
+// overflow for the pic registers - lvl15 int 
+// input  [`TLU_THRD_NUM-1:0] pib_pic_wrap; 
+input  [`TLU_THRD_NUM-1:0] pib_pich_wrap; 
+input  [`TLU_THRD_NUM-1:0] pib_picl_wrap; 
+
+input [7:0]  exu_tlu_ccr0_w;  // ccr - thread0
+input [7:0]  exu_tlu_ccr1_w;  // ccr - thread1
+input [7:0]  exu_tlu_ccr2_w;  // ccr - thread2
+input [7:0]  exu_tlu_ccr3_w;  // ccr - thread3
+// input [2:0]  exu_tlu_cwp0_w;  // cwp - thread0
+// input [2:0]  exu_tlu_cwp1_w;  // cwp - thread1
+// input [2:0]  exu_tlu_cwp2_w;  // cwp - thread2
+// input [2:0]  exu_tlu_cwp3_w;  // cwp - thread3
+input [2:0]  exu_tlu_cwp0;  // cwp - thread0
+input [2:0]  exu_tlu_cwp1;  // cwp - thread1
+input [2:0]  exu_tlu_cwp2;  // cwp - thread2
+input [2:0]  exu_tlu_cwp3;  // cwp - thread3
+// added for bug3499
+input [`TLU_THRD_NUM-1:0] tlu_trap_cwp_en;
+// modified due to bug 3017
+// input [47:0] ifu_tlu_pc_m;	  // pc
+// input [47:0] ifu_tlu_npc_m;   // npc
+// modified due to redistribution of logic
+// input [48:0] ifu_tlu_pc_m;	  // pc
+// input [48:0] ifu_tlu_npc_m;   // npc
+input [48:0] tlu_pc_new_w;	  // pc
+input [48:0] tlu_npc_new_w;   // npc
+
+input [3:0]	 tlu_sftint_en_l_g; // wr enable for sftint regs.
+input [3:0]	 tlu_sftint_mx_sel; // mux select for sftint regs 
+input        tlu_set_sftint_l_g;       // set sftint
+input        tlu_clr_sftint_l_g;       // clr sftint
+input        tlu_wr_sftint_l_g;        // wr to sftin (asr 16)
+//
+// removed due to sftint recode
+// input [3:0]	 tlu_sftint_lvl14_int;  // sftint lvl 14 plus tick int
+input [3:0]	 tlu_sftint_penc_sel;
+input [3:0]	 tlu_tba_en_l;		   // tba reg write enable
+// logic moved to tlu_misctl
+// input		 tlu_retry_inst_m;	   // valid retry inst
+// input		 tlu_done_inst_m;	   // valid done inst
+// input		 tlu_dnrtry_inst_m;	   // valid done/retry inst - g
+// input		 tlu_dnrtry_inst_m_l;	   // valid done/retry inst - g
+// input [3:0]	 tlu_update_pc_l_m;	   // update pc or npc for a thread
+input [3:0]	 tlu_update_pc_l_w;	   // update pc or npc for a thread
+// modified due to timing
+// input  		 tlu_self_boot_rst_g;
+// input		 tlu_tl_gt_0_g;		   // trp lvl gt then 0
+// input  		 tlu_select_tba_g;
+// input tlu_select_htba_g;   // choosing htba for forming trappc/trapnpc 
+// input tlu_self_boot_rst_w2;
+// added for one-hot mux problem
+input [2:0] tlu_pc_mxsel_w2; 
+input tlu_tl_gt_0_w2;	  // trp lvl gt then 0
+input tlu_select_tba_w2;
+input [`TLU_THRD_NUM-1:0] tlu_update_pstate_l_w2; // pstate write enable
+input [`TLU_THRD_NUM-1:0] tlu_thrd_rsel_e; // read select for threaded regs
+input [3:0] tlu_pil;     // mx'ed pil
+input [2:0] tlu_trp_lvl; // mx'ed trp lvl
+
+input tlu_select_redmode;
+input tlu_tick_npt;       // npt bit of tick
+
+input [`TLU_ASR_DATA_WIDTH-4:0] tlu_tick_incr_dout;
+//
+// added and/or modified for hypervisor support
+input [1:0] tlu_hpstate_din_sel0; // sel source of tsa wdata
+input [1:0] tlu_hpstate_din_sel1; // sel source of tsa wdata
+input [1:0] tlu_hpstate_din_sel2; // sel source of tsa wdata
+input [1:0] tlu_hpstate_din_sel3; // sel source of tsa wdata
+input [`TLU_THRD_NUM-1:0] tlu_stickcmp_en_l; // stick cmp reg write enable
+input [`TLU_THRD_NUM-1:0] tlu_htickcmp_en_l; // htick cmp reg write enable
+input [`TLU_THRD_NUM-1:0] tlu_wr_hintp_g;    // wr control for hintp regs.
+input [`TLU_THRD_NUM-1:0] tlu_set_hintp_sel_g; // set control for hintp regs.
+input [`TLU_THRD_NUM-1:0] tlu_htba_en_l;     // htba reg write enable
+input [`TLU_THRD_NUM-1:0] tlu_update_hpstate_l_w2; // hpstate write enable
+input tlu_htickcmp_intdis; // int. disable bit of htick-cmp
+input [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl0; // global register value t0 
+input [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl1; // global register value t1 
+input [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl2; // global register value t2 
+input [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl3; // global register value t3 
+// mux select to read the new ASR registers
+input [3:1] tlu_rdpr_mx1_sel;
+input [3:1] tlu_rdpr_mx2_sel;
+input [2:1] tlu_rdpr_mx3_sel;
+input [2:1] tlu_rdpr_mx4_sel;
+input [3:1] tlu_rdpr_mx5_sel;
+input [2:0] tlu_rdpr_mx6_sel;
+input [3:0] tlu_rdpr_mx7_sel;
+//
+input [`TLU_THRD_NUM-1:0] ctu_sscan_tid;
+input [`TLU_ASR_DATA_WIDTH-1:0] tlu_pib_rsr_data_e; // rsr data from pib 
+
+input si; // scan-in
+input se; // scan-en
+
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+// End of automatics
+//
+// modified due to bug 3017
+output [48:0] tlu_restore_pc_w1;  // trap pc or pc on retry.
+output [48:0] tlu_restore_npc_w1; // trap pc or pc on retry.
+output [33:0] tlu_partial_trap_pc_w1;
+// the tlu_exu_rsr_data_e will become obsolete, to be removed
+// added for timing
+// output [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_e; // rsr data to exu 
+output [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_m; // rsr data to exu 
+// modified due to timing violations
+// output [`TLU_ASR_DATA_WIDTH-1:0] tlu_pib_rsr_data_e; // trap pc or pc on retry.
+//
+// modified for hypervisor support
+output [`TLU_TSA_WIDTH-1:0] tsa_wdata; // wr data for tsa.
+//
+output [`TLU_THRD_NUM-1:0] tlu_int_pstate_ie;   // interrupt enable
+output [`TLU_THRD_NUM-1:0] local_pstate_ie;   // interrupt enable
+output [`TLU_THRD_NUM-1:0] tlu_ifu_pstate_pef;  // fp enable
+output [`TLU_THRD_NUM-1:0] tlu_lsu_pstate_cle;  // current little endian
+output [`TLU_THRD_NUM-1:0] tlu_lsu_pstate_priv; // privilege mode
+output [`TLU_THRD_NUM-1:0] tlu_int_redmode;	  // redmode
+output [`TLU_THRD_NUM-1:0] tlu_lsu_redmode;	  // redmode
+// modified for bug 1767
+// output   [1:0] tlu_pstate0_mmodel; // mem. model - thread0
+// output   [1:0] tlu_pstate1_mmodel; // mem. model - thread1
+// output   [1:0] tlu_pstate2_mmodel; // mem. model - thread2
+// output   [1:0] tlu_pstate3_mmodel; // mem. model - thread3
+// output   [3:0] tlu_pstate_tle;	  // trap little endian
+// output [`TLU_THRD_NUM-1:0] tlu_pstate_cle;  // current little endian
+// output [`TLU_THRD_NUM-1:0] tlu_pstate_priv; // privilege mode
+output [`TLU_THRD_NUM-1:0] tlu_pstate_am;   // address mask
+//
+// removed for bug 2187
+// output [`TLU_THRD_NUM-1:0] tlu_sftint_lvl14;
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_priv; // hypervisor privilege	
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_enb;  // hypervisor lite enb	
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_tlz;  // hypervisor tlz 
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_ibe;  // hypervisor instruction brkpt	
+output [`TLU_THRD_NUM-1:0] local_hpstate_priv; // hypervisor privilege	
+output [`TLU_THRD_NUM-1:0] tcl_hpstate_priv; // hypervisor privilege	
+output [`TLU_THRD_NUM-1:0] local_pstate_priv;  // pstate privilege	
+output [`TLU_THRD_NUM-1:0] local_hpstate_enb;  // hypervisor lite enb	
+output [`TLU_THRD_NUM-1:0] tcl_hpstate_enb;  // hypervisor lite enb	
+output [3:0] tlu_sftint_id;	
+// output       tlu_tick_match;	// tick to tick cmp match
+// output       tlu_stick_match;	// stick to tick cmp match
+// output       tlu_htick_match;	// htick to tick cmp match
+// output [`TLU_ASR_DATA_WIDTH-1:0] tlu_tick_incr_din;
+output [`TLU_ASR_DATA_WIDTH-3:0] tlu_tick_incr_din;
+//
+// modified for hypervisor support
+// output	[2:0]	tlu_restore_globals; // restored global regs
+//
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_dnrtry_global_g; // restored globals 
+output [`TLU_THRD_NUM-1:0]     tlu_hintp;
+// 
+// current asi state 
+output [`TLU_ASI_STATE_WIDTH-1:0] tlu_asi_state_e;
+//
+// modified due to race key word limitation
+// output [62:0] tlu_sscan_test_data;
+output [`TDP_SSCAN_WIDTH-1:0] tlu_sscan_test_data;
+output		  so; // scan-out;
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+//
+// local reset was added to abide to the Niagara reset methodology 
+wire        local_rst; // local reset
+wire        se_l; // testmode_l replacement 
+//
+// rdpr muxe outputs
+wire [`TLU_ASR_DATA_WIDTH-1:0] tlu_rdpr_mx1_out;
+wire [3:0]  tlu_rdpr_mx2_out;
+wire [`SFTINT_WIDTH-1:0]  tlu_rdpr_mx3_out;
+wire [`RDSR_TSTATE_WIDTH-1:0]  tlu_rdpr_mx4_out;
+// 
+// constructing one-hot selects
+wire rdpr_mx1_onehot_sel, rdpr_mx2_onehot_sel; 
+wire rdpr_mx3_onehot_sel, rdpr_mx4_onehot_sel; 
+wire rdpr_mx5_onehot_sel, rdpr_mx6_onehot_sel; 
+//
+wire  [32:0] true_tba0,true_tba1,true_tba2,true_tba3;
+wire  [60:0] true_tick;
+// modified due to bug 3017
+wire  [48:0] true_pc0,true_pc1,true_pc2,true_pc3;
+// wire  [47:0] sscan_pc; 
+wire  [`TDP_SSCAN_WIDTH-1:0] sscan_data_test0;
+wire  [`TDP_SSCAN_WIDTH-1:0] sscan_data_test1;
+wire  [`TDP_SSCAN_WIDTH-1:0] sscan_data_test2;
+wire  [`TDP_SSCAN_WIDTH-1:0] sscan_data_test3;
+wire  [`TDP_SSCAN_WIDTH-1:0] tdp_sscan_test_data;
+wire  [`TLU_THRD_NUM-1:0] sscan_tid_sel; 
+wire  [48:0] true_npc0,true_npc1,true_npc2,true_npc3;
+// wire  [47:0] true_npc0,true_npc1,true_npc2,true_npc3;
+// wire  [47:0] true_pc0,true_pc1,true_pc2,true_pc3;
+// wire  [47:0] sscan_pc; 
+// wire  [47:0] normal_trap_pc, normal_trap_npc;
+//
+// modified for hypervisor support
+wire [`TLU_TSA_WIDTH-1:0] trap_tsa_wdata;
+wire [`TLU_TSA_WIDTH-1:0] trap0_tsa_wdata,trap1_tsa_wdata;
+wire [`TLU_TSA_WIDTH-1:0] trap2_tsa_wdata,trap3_tsa_wdata;
+wire [`TLU_TSA_WIDTH-1:0] wrpr_tsa_wdata;
+wire [`TLU_TSA_WIDTH-1:0] tsa_wdata;
+wire [`RDSR_TSTATE_WIDTH-1:0]  tstate_rdata;
+wire [1:0]  tstate_dummy_zero;
+wire [`WSR_TSTATE_WIDTH-1:0]   compose_tstate;
+wire [`TSA_HTSTATE_WIDTH-1:0]  compose_htstate;
+wire [`TSA_GLOBAL_WIDTH-1:0]   global_rdata;	
+// wire [`TLU_ASR_DATA_WIDTH-1:0] wsr_data_w;	
+wire [`SFTINT_WIDTH-1:0] wsr_data_w;	
+// reduced width to 48 due to lint violations
+wire [47:0] wsr_data_w2;	
+//
+// modified for bug 3017
+// wire  [47:2] trap_pc0,trap_pc1,trap_pc2,trap_pc3;
+// wire  [47:2] trap_npc0,trap_npc1,trap_npc2,trap_npc3;
+wire  [48:2] trap_pc0,trap_pc1,trap_pc2,trap_pc3;
+wire  [48:2] trap_npc0,trap_npc1,trap_npc2,trap_npc3;
+wire   [7:0] trap_ccr0,trap_ccr1,trap_ccr2,trap_ccr3;
+wire   [7:0] trap_asi0,trap_asi1,trap_asi2,trap_asi3;
+wire   [2:0] trap_cwp0,trap_cwp1,trap_cwp2,trap_cwp3;
+wire   [2:0] tlu_cwp0,tlu_cwp1,tlu_cwp2,tlu_cwp3;
+wire   [`TLU_ASI_STATE_WIDTH-1:0] imm_asi_e; 
+wire   [`TLU_ASI_STATE_WIDTH-1:0] asi_state_reg_e; 
+wire   [`TLU_ASI_STATE_WIDTH-1:0] asi_state_final_e; 
+wire   imm_asi_vld_e;
+//
+// modified due to tickcmp, stickcmp and sftint cleanup
+// wire  [15:0] sftint0, sftint1, sftint2, sftint3;
+// wire  [15:1] sftint_set_din, sftint_clr_din, sftint_wr_din;
+wire  [`SFTINT_WIDTH-1:0] sftint0, sftint1, sftint2, sftint3;
+wire  [`SFTINT_WIDTH-1:0] sftint_set_din, sftint_clr_din, sftint_wr_din;
+wire [`SFTINT_WIDTH-1:0] sftint_din;
+wire [`SFTINT_WIDTH-1:0] sftint;
+wire [`TLU_THRD_NUM-1:0] sftint_b0_din; 
+wire [`TLU_THRD_NUM-1:0] sftint_b0_en;
+wire [`TLU_THRD_NUM-1:0] sftint_b15_din; 
+wire [`TLU_THRD_NUM-1:0] sftint_b15_en;
+wire [`TLU_THRD_NUM-1:0] sftint_b16_din; 
+wire [`TLU_THRD_NUM-1:0] sftint_b16_en; 
+wire [`TLU_THRD_NUM-1:0] sftint_lvl14;
+wire [3:0] sftin_din_mxsel;
+// recoded for one-hot problem during reset
+// wire sftint_sel_onehot_g;
+//
+// added for PIB support
+wire	     tcmp0_clk, tcmp1_clk; 
+wire	     tcmp2_clk, tcmp3_clk;
+wire [14:0]  sftint_penc_din;
+wire	     sftint0_clk,sftint1_clk;
+wire	     sftint2_clk,sftint3_clk;
+// 
+wire [32:0] tba_data;
+wire [32:0] tba_rdata;
+wire [33:0] tlu_rstvaddr_base;
+wire [`TLU_HTBA_WIDTH-1:0] htba_data;
+wire        tba0_clk,tba1_clk,tba2_clk,tba3_clk;
+// modified for bug 3017
+// wire [46:0] tsa_pc_m,tsa_npc_m;
+// wire [48:0] dnrtry_pc,dnrtry_npc;
+wire [48:0] restore_pc_w2;
+wire [48:0] restore_npc_w2;
+// wire [48:0]	pc_new, npc_new;
+// wire [48:0]	pc_new_w, npc_new_w;
+wire [33:0] partial_trap_pc_w2;
+wire        pc0_clk,pc1_clk,pc2_clk,pc3_clk;
+// wire [`TLU_TSA_WIDTH-1:0] tsa_data_m;
+wire [`TLU_ASR_DATA_WIDTH-1:0] true_tickcmp0, true_tickcmp1;
+wire [`TLU_ASR_DATA_WIDTH-1:0] true_tickcmp2, true_tickcmp3;
+wire [`TLU_ASR_DATA_WIDTH-1:0] tickcmp_rdata;
+wire [`TLU_THRD_NUM-1:0] tickcmp_intdis_din;
+wire [`TLU_THRD_NUM-1:0] tickcmp_intdis_en;
+wire [`TLU_THRD_NUM-1:0] tickcmp_int;
+wire [`TLU_THRD_NUM-1:0] tlu_set_hintp_g;
+wire [`TLU_THRD_NUM-1:0] tlu_hintp_en_l_g;
+wire tlu_htick_match;	// htick to tick cmp match
+wire tick_match;
+wire [`TLU_ASR_DATA_WIDTH-4:0] tickcmp_data;
+wire [`TLU_ASR_DATA_WIDTH-2:2] tick_din;
+// reg	 [`TLU_ASR_DATA_WIDTH-1:0] tlu_rsr_data_e;
+wire [`PSTATE_TRUE_WIDTH-1:0] true_pstate0,true_pstate1;
+wire [`PSTATE_TRUE_WIDTH-1:0] true_pstate2,true_pstate3;
+// wire [`TLU_THRD_NUM-1:0] tlu_pstate_priv; // privilege mode
+// added for hypervisor support 
+wire [`TSA_PSTATE_WIDTH-1:0] trap_pstate0,trap_pstate1;
+wire [`TSA_PSTATE_WIDTH-1:0] trap_pstate2,trap_pstate3;
+//
+// wire [`PSTATE_TRUE_WIDTH-1:0] dnrtry_pstate;
+// wire [`PSTATE_TRUE_WIDTH-1:0] dnrtry_pstate_m;	
+// wire [`PSTATE_TRUE_WIDTH-1:0] wsr_data_pstate_g;	
+wire [`WSR_PSTATE_VR_WIDTH-1:0] dnrtry_pstate_m;	
+wire [`WSR_PSTATE_VR_WIDTH-1:0] dnrtry_pstate_g;	
+wire [`WSR_PSTATE_VR_WIDTH-1:0] dnrtry_pstate_w2;
+// removed for timing
+// wire [`WSR_PSTATE_VR_WIDTH-1:0] wsr_data_pstate_g;
+wire [`WSR_PSTATE_VR_WIDTH-1:0] wsr_data_pstate_w2;	
+//
+// modified for bug 1767
+//wire [`PSTATE_TRUE_WIDTH-1:0] ntrap_pstate;
+// wire [`PSTATE_TRUE_WIDTH-1:0] ntrap_pstate0;
+// wire [`PSTATE_TRUE_WIDTH-1:0] ntrap_pstate1;
+// wire [`PSTATE_TRUE_WIDTH-1:0] ntrap_pstate2;
+// wire [`PSTATE_TRUE_WIDTH-1:0] ntrap_pstate3;
+wire [`WSR_PSTATE_VR_WIDTH-1:0] ntrap_pstate0;
+wire [`WSR_PSTATE_VR_WIDTH-1:0] ntrap_pstate1;
+wire [`WSR_PSTATE_VR_WIDTH-1:0] ntrap_pstate2;
+wire [`WSR_PSTATE_VR_WIDTH-1:0] ntrap_pstate3;
+// modified for bug 2161 and 2584
+wire pstate_priv_set, hpstate_priv_set; 
+wire [`TLU_THRD_NUM-1:0] pstate_priv_thrd_set;
+// wire [`TLU_THRD_NUM-1:0] pstate_priv_update_g;
+wire [`TLU_THRD_NUM-1:0] pstate_priv_update_w2;
+// wire [`TLU_THRD_NUM-1:0] hpstate_dnrtry_priv_g;
+wire [`TLU_THRD_NUM-1:0] hpstate_dnrtry_priv_w2;
+wire [`TLU_THRD_NUM-1:0] hpstate_enb_set;
+wire [`TLU_THRD_NUM-1:0] hpstate_ibe_set;
+wire [`TLU_THRD_NUM-1:0] hpstate_tlz_set;
+// wire [`TLU_THRD_NUM-1:0] hpstate_priv_update_g;
+wire [`TLU_THRD_NUM-1:0] hpstate_priv_update_w2;
+//
+// removed for bug 2588
+// wire [1:0] tlu_select_mmodel0;
+// wire [1:0] tlu_select_mmodel1;
+// wire [1:0] tlu_select_mmodel2;
+// wire [1:0] tlu_select_mmodel3;
+wire [`TLU_THRD_NUM-1:0] tlu_select_tle;
+wire [`TLU_THRD_NUM-1:0] tlu_select_cle;
+// wire [1:0] tlu_pstate0_mmodel;	// mem. model - thread0
+// wire [1:0] tlu_pstate1_mmodel;	// mem. model - thread1
+// wire [1:0] tlu_pstate2_mmodel;	// mem. model - thread2
+// wire [1:0] tlu_pstate3_mmodel;	// mem. model - thread3
+wire [`TLU_THRD_NUM-1:0] tlu_pstate_tle; // trap little endian
+//
+// modified for bug 1575
+// wire	[`PSTATE_TRUE_WIDTH-1:0]	restore_pstate;
+// wire [`PSTATE_TRUE_WIDTH-1:0]	restore_pstate0;
+// wire [`PSTATE_TRUE_WIDTH-1:0]	restore_pstate1;
+// wire [`PSTATE_TRUE_WIDTH-1:0]	restore_pstate2; 
+// wire [`PSTATE_TRUE_WIDTH-1:0]	restore_pstate3;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate0;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate1;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate2; 
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate3;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate0_w3;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate1_w3;
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate2_w3; 
+wire [`WSR_PSTATE_VR_WIDTH-1:0]	restore_pstate3_w3;
+wire tlu_pstate_nt_sel0, tlu_pstate_nt_sel1;
+wire tlu_pstate_nt_sel2, tlu_pstate_nt_sel3;
+wire tlu_pstate_wsr_sel0, tlu_pstate_wsr_sel1;
+wire tlu_pstate_wsr_sel2, tlu_pstate_wsr_sel3;
+wire hpstate_redmode;
+wire pstate0_clk,pstate1_clk,pstate2_clk,pstate3_clk;
+
+//
+// added or modified for hypervisor support
+// wire	[2:0]   global_sel;	
+wire stcmp0_clk, stcmp1_clk, stcmp2_clk, stcmp3_clk;
+wire htcmp0_clk, htcmp1_clk, htcmp2_clk, htcmp3_clk;
+wire tlu_hpstate_hnt_sel0, tlu_hpstate_hnt_sel1;
+wire tlu_hpstate_hnt_sel2, tlu_hpstate_hnt_sel3;
+wire tlu_hpstate_wsr_sel0, tlu_hpstate_wsr_sel1;
+wire tlu_hpstate_wsr_sel2, tlu_hpstate_wsr_sel3;
+wire pc_bit15_sel;
+wire htba0_clk,htba1_clk,htba2_clk,htba3_clk;
+wire hpstate0_clk,hpstate1_clk,hpstate2_clk,hpstate3_clk;
+wire hintp0_clk,hintp1_clk,hintp2_clk,hintp3_clk;
+wire hintp_rdata;
+wire [`TLU_THRD_NUM-1:0]       hintp_din;
+// added or modified due to stickcmp clean-up
+// wire [`TLU_ASR_DATA_WIDTH-2:0] stickcmp_rdata;
+// wire [`TLU_ASR_DATA_WIDTH-2:0] true_stickcmp0, true_stickcmp1;
+// wire [`TLU_ASR_DATA_WIDTH-2:0] true_stickcmp2, true_stickcmp3;
+wire [`TLU_ASR_DATA_WIDTH-1:0] stickcmp_rdata;
+wire [`TLU_ASR_DATA_WIDTH-1:0] true_stickcmp0, true_stickcmp1;
+wire [`TLU_ASR_DATA_WIDTH-1:0] true_stickcmp2, true_stickcmp3;
+wire [`TLU_THRD_NUM-1:0] stickcmp_intdis_din;
+wire [`TLU_THRD_NUM-1:0] stickcmp_intdis_en; 
+wire [`TLU_THRD_NUM-1:0] stickcmp_int; 
+wire stick_match;
+wire [`TLU_ASR_DATA_WIDTH-4:0] stickcmp_data;
+//
+wire [`TLU_ASR_DATA_WIDTH-2:0] htickcmp_rdata;
+wire [`TLU_ASR_DATA_WIDTH-4:0] htickcmp_data;
+wire [`TLU_ASR_DATA_WIDTH-2:0] true_htickcmp0, true_htickcmp1;
+wire [`TLU_ASR_DATA_WIDTH-2:0] true_htickcmp2, true_htickcmp3;
+wire [`TLU_HPSTATE_WIDTH-1:0]  true_hpstate0,true_hpstate1;
+wire [`TLU_HPSTATE_WIDTH-1:0]  true_hpstate2,true_hpstate3;
+wire [`TLU_HPSTATE_WIDTH-1:0]  true_hpstate;
+wire [`TSA_HTSTATE_WIDTH-1:0]  tsa_dnrtry_hpstate_m; 
+wire [`TSA_HTSTATE_WIDTH-1:0]  tsa_dnrtry_hpstate_g; 
+wire [`TSA_HTSTATE_WIDTH-1:0]  tsa_dnrtry_hpstate_w2; 
+// wire [`TLU_HPSTATE_WIDTH-1:0]  dnrtry_hpstate0_g, dnrtry_hpstate1_g; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  dnrtry_hpstate0_w2, dnrtry_hpstate1_w2; 
+// wire [`TLU_HPSTATE_WIDTH-1:0]  dnrtry_hpstate2_g, dnrtry_hpstate3_g; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  dnrtry_hpstate2_w2, dnrtry_hpstate3_w2; 
+// wire [`TLU_HPSTATE_WIDTH-1:0]  hntrap_hpstate0_g, hntrap_hpstate1_g; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  hntrap_hpstate0_w2, hntrap_hpstate1_w2; 
+// wire [`TLU_HPSTATE_WIDTH-1:0]  hntrap_hpstate2_g, hntrap_hpstate3_g; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  hntrap_hpstate2_w2, hntrap_hpstate3_w2; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  wsr_data_hpstate_w2; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  restore_hpstate0, restore_hpstate1; 
+wire [`TLU_HPSTATE_WIDTH-1:0]  restore_hpstate2, restore_hpstate3; 
+wire [`TLU_HTBA_WIDTH-1:0]	   true_htba0, true_htba1;
+wire [`TLU_HTBA_WIDTH-1:0]	   true_htba2, true_htba3;
+wire [`TSA_GLOBAL_WIDTH-1:0]   dnrtry_global_m;	
+wire [`TLU_ASR_DATA_WIDTH-1:0] tlu_rdpr_mx5_out;
+wire [`SFTINT_WIDTH-1:0]       tlu_rdpr_mx6_out;
+wire [`TLU_ASR_DATA_WIDTH-1:0] tlu_rdpr_mx7_out;
+wire [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_e;
+wire clk; 
+//
+//=========================================================================================
+// create local reset
+
+assign local_rst = tlu_rst;
+assign se_l = ~se;
+
+// clock rename
+assign clk = rclk;
+
+//=========================================================================================
+// Design Notes :
+// HTSTATE-	       4 (ENB from HPSTATE is not saved)	
+// TPC-		      47 (48-2)VA+(1)VA_HOLE
+// TNPC-		  47 (48-2)VA+(1)VA_HOLE
+// TSTATE.GL-	   2 (Only two significant bits are saved)
+// TSTATE.CCR-     8
+// TSTATE.ASI-	   8
+// TSTATE.PSTATE-  8 (RED, IG, MG and AG bits are not used)
+// TSTATE.CWP-	   3
+// TRAPTYPE-	   9
+//========================================================
+// Total         136
+
+//=========================================================================================
+//	Timing Diagram	
+//=========================================================================================
+
+
+// WRITE TO TSA and other trap related registers.
+//	|	|	|		|		|
+//	|E	|M	|	W	|  	W2	| Integer
+//	|	|	| exceptions	| push tsa	|
+//	|	|	| reported	| xmit pc	|
+//	|	|	|		|		|
+//	|E	|M	|	G 	|	W2	| Long-Latency
+//	|	|	| exceptions	|		|
+//	|	|	| reported	| push tsa	|
+//	|	|	|		| xmit pc	|
+
+//=========================================================================================
+//	Generate TSA Control and Data
+//=========================================================================================
+
+// modified for bug 3017
+assign  trap_pc0[48:2] =  true_pc0[48:2];
+assign  trap_pc1[48:2] =  true_pc1[48:2];
+assign  trap_pc2[48:2] =  true_pc2[48:2];
+assign  trap_pc3[48:2] =  true_pc3[48:2];
+
+assign  trap_npc0[48:2] = true_npc0[48:2]; 
+assign  trap_npc1[48:2] = true_npc1[48:2];
+assign  trap_npc2[48:2] = true_npc2[48:2];
+assign  trap_npc3[48:2] = true_npc3[48:2];
+
+assign	trap_ccr0[7:0] = exu_tlu_ccr0_w[7:0];
+assign	trap_ccr1[7:0] = exu_tlu_ccr1_w[7:0];
+assign	trap_ccr2[7:0] = exu_tlu_ccr2_w[7:0];
+assign	trap_ccr3[7:0] = exu_tlu_ccr3_w[7:0];
+
+// assign	trap_cwp0[2:0] = exu_tlu_cwp0_w[2:0];
+// assign	trap_cwp1[2:0] = exu_tlu_cwp1_w[2:0];
+// assign	trap_cwp2[2:0] = exu_tlu_cwp2_w[2:0];
+// assign	trap_cwp3[2:0] = exu_tlu_cwp3_w[2:0];
+//
+// added for bug 3695
+dff_s #(3) dff_tlu_cwp0 (
+    .din (exu_tlu_cwp0[2:0]),
+    .q   (tlu_cwp0[2:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(3) dff_tlu_cwp1 (
+    .din (exu_tlu_cwp1[2:0]),
+    .q   (tlu_cwp1[2:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(3) dff_tlu_cwp2 (
+    .din (exu_tlu_cwp2[2:0]),
+    .q   (tlu_cwp2[2:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(3) dff_tlu_cwp3 (
+    .din (exu_tlu_cwp3[2:0]),
+    .q   (tlu_cwp3[2:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// modified for bug 3499 and 3695
+dffe_s #(3) dffe_trap_cwp0 (
+    // .din (exu_tlu_cwp0[2:0]),
+    .din (tlu_cwp0[2:0]),
+    .q   (trap_cwp0[2:0]),
+    .en  (tlu_trap_cwp_en[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(3) dffe_trap_cwp1 (
+    // .din (exu_tlu_cwp1[2:0]),
+    .din (tlu_cwp1[2:0]),
+    .q   (trap_cwp1[2:0]),
+    .en  (tlu_trap_cwp_en[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(3) dffe_trap_cwp2 (
+    // .din (exu_tlu_cwp2[2:0]),
+    .din (tlu_cwp2[2:0]),
+    .q   (trap_cwp2[2:0]),
+    .en  (tlu_trap_cwp_en[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(3) dffe_trap_cwp3 (
+    // .din (exu_tlu_cwp3[2:0]),
+    .din (tlu_cwp3[2:0]),
+    .q   (trap_cwp3[2:0]),
+    .en  (tlu_trap_cwp_en[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign	trap_asi0[7:0] = lsu_asi_reg0[7:0];
+assign	trap_asi1[7:0] = lsu_asi_reg1[7:0];
+assign	trap_asi2[7:0] = lsu_asi_reg2[7:0];
+assign	trap_asi3[7:0] = lsu_asi_reg3[7:0];
+// 
+// staging the immediate asi
+
+dff_s #(`TLU_ASI_STATE_WIDTH) dff_imm_asi_e (
+    .din (ifu_lsu_imm_asi_d[`TLU_ASI_STATE_WIDTH-1:0]),
+    .q   (imm_asi_e[`TLU_ASI_STATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_imm_asi_vld_e (
+     .din (ifu_lsu_imm_asi_vld_d),
+     .q   (imm_asi_vld_e),
+     .clk (clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+//
+// generating the current asi state
+mux4ds  #(`TLU_ASI_STATE_WIDTH) mx_tlu_asi_state_e (
+        .in0    (lsu_asi_reg0[`TLU_ASI_STATE_WIDTH-1:0]),
+        .in1    (lsu_asi_reg1[`TLU_ASI_STATE_WIDTH-1:0]),
+        .in2    (lsu_asi_reg2[`TLU_ASI_STATE_WIDTH-1:0]),
+        .in3    (lsu_asi_reg3[`TLU_ASI_STATE_WIDTH-1:0]),
+        .sel0   (tlu_thrd_rsel_e[0]),
+        .sel1   (tlu_thrd_rsel_e[1]),
+        .sel2   (tlu_thrd_rsel_e[2]),
+        .sel3   (tlu_thrd_rsel_e[3]),
+        // modified due to bug 2442
+        // .dout   (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0])
+        .dout   (asi_state_reg_e[`TLU_ASI_STATE_WIDTH-1:0])
+); 
+//
+// added for bug 2442
+// generating the current asi state
+mux2ds #(`TLU_ASI_STATE_WIDTH) mx_asi_state_final_e (
+       .in0  (imm_asi_e[`TLU_ASI_STATE_WIDTH-1:0]),
+	   .in1  (asi_state_reg_e[`TLU_ASI_STATE_WIDTH-1:0]),
+       .sel0 (imm_asi_vld_e),  	
+	   .sel1 (~imm_asi_vld_e),
+       .dout (asi_state_final_e[`TLU_ASI_STATE_WIDTH-1:0])
+); 
+
+assign tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0] =
+           asi_state_final_e[`TLU_ASI_STATE_WIDTH-1:0];
+//
+// thread 0
+assign trap_pstate0 = {
+       true_pstate0[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO], 
+       2'b0, true_pstate0[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+//
+// modified due to hpstate.ibe addition
+assign trap0_tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO] = 
+       {true_hpstate0[`TLU_HPSTATE_WIDTH-1],
+        true_hpstate0[`TSA_HTSTATE_WIDTH-2:0]};
+//
+// modified for bug 3017
+//
+assign trap0_tsa_wdata[`TLU_PC_HI:`TLU_PC_LO] = 
+           trap_pc0[48:2];
+//
+assign trap0_tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO] = 
+           trap_npc0[48:2];
+//
+assign trap0_tsa_wdata[`TLU_GL_HI:`TLU_GL_LO] = 
+       tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0]; 
+//
+assign trap0_tsa_wdata[`TLU_CCR_HI:`TLU_CCR_LO] = 
+       trap_ccr0[`TSA_CCR_WIDTH-1:0]; 
+//
+assign trap0_tsa_wdata[`TLU_ASI_HI:`TLU_ASI_LO] = 
+       trap_asi0[`TSA_ASI_WIDTH-1:0]; 
+//
+assign trap0_tsa_wdata[`TLU_PSTATE_HI:`TLU_PSTATE_LO] = 
+       trap_pstate0[`TSA_PSTATE_WIDTH-1:0]; 
+//
+assign trap0_tsa_wdata[`TLU_CWP_HI:`TLU_CWP_LO] = 
+       trap_cwp0[`TSA_CWP_WIDTH-1:0]; 
+//
+assign trap0_tsa_wdata[`TLU_TT_HI:`TLU_TT_LO] = 
+       tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]; 
+//
+// thread 1
+assign trap_pstate1 = {
+       true_pstate1[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO], 
+       2'b0, true_pstate1[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+//
+// modified due to hpstate.ibe addition
+assign trap1_tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO] = 
+       {true_hpstate1[`TLU_HPSTATE_WIDTH-1],
+        true_hpstate1[`TSA_HTSTATE_WIDTH-2:0]};
+//
+assign trap1_tsa_wdata[`TLU_PC_HI:`TLU_PC_LO] = 
+           trap_pc1[48:2];
+//
+assign trap1_tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO] = 
+           trap_npc1[48:2];
+//
+assign trap1_tsa_wdata[`TLU_GL_HI:`TLU_GL_LO] = 
+       tlu_gl_lvl1[`TSA_GLOBAL_WIDTH-1:0]; 
+//
+assign trap1_tsa_wdata[`TLU_CCR_HI:`TLU_CCR_LO] = 
+       trap_ccr1[`TSA_CCR_WIDTH-1:0]; 
+//
+assign trap1_tsa_wdata[`TLU_ASI_HI:`TLU_ASI_LO] = 
+       trap_asi1[`TSA_ASI_WIDTH-1:0]; 
+//
+assign trap1_tsa_wdata[`TLU_PSTATE_HI:`TLU_PSTATE_LO] = 
+       trap_pstate1[`TSA_PSTATE_WIDTH-1:0]; 
+//
+assign trap1_tsa_wdata[`TLU_CWP_HI:`TLU_CWP_LO] = 
+       trap_cwp1[`TSA_CWP_WIDTH-1:0]; 
+//
+assign trap1_tsa_wdata[`TLU_TT_HI:`TLU_TT_LO] = 
+       tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]; 
+//
+// thread 2
+assign trap_pstate2 = {
+       true_pstate2[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO], 
+       2'b0, true_pstate2[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+//
+// modified due to hpstate.ibe addition
+assign trap2_tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO] = 
+       {true_hpstate2[`TLU_HPSTATE_WIDTH-1],
+        true_hpstate2[`TSA_HTSTATE_WIDTH-2:0]};
+//
+assign trap2_tsa_wdata[`TLU_PC_HI:`TLU_PC_LO] = 
+           trap_pc2[48:2];
+//
+assign trap2_tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO] = 
+           trap_npc2[48:2];
+//
+assign trap2_tsa_wdata[`TLU_GL_HI:`TLU_GL_LO] = 
+       tlu_gl_lvl2[`TSA_GLOBAL_WIDTH-1:0]; 
+//
+assign trap2_tsa_wdata[`TLU_CCR_HI:`TLU_CCR_LO] = 
+       trap_ccr2[`TSA_CCR_WIDTH-1:0]; 
+//
+assign trap2_tsa_wdata[`TLU_ASI_HI:`TLU_ASI_LO] = 
+       trap_asi2[`TSA_ASI_WIDTH-1:0]; 
+//
+assign trap2_tsa_wdata[`TLU_PSTATE_HI:`TLU_PSTATE_LO] = 
+       trap_pstate2[`TSA_PSTATE_WIDTH-1:0]; 
+//
+assign trap2_tsa_wdata[`TLU_CWP_HI:`TLU_CWP_LO] = 
+       trap_cwp2[`TSA_CWP_WIDTH-1:0]; 
+//
+assign trap2_tsa_wdata[`TLU_TT_HI:`TLU_TT_LO] = 
+       tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]; 
+//
+// thread 3
+assign trap_pstate3 = {
+       true_pstate3[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO], 
+       2'b0, true_pstate3[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+//
+// modified due to hpstate.ibe addition
+assign trap3_tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO] = 
+       {true_hpstate3[`TLU_HPSTATE_WIDTH-1],
+        true_hpstate3[`TSA_HTSTATE_WIDTH-2:0]};
+//
+assign trap3_tsa_wdata[`TLU_PC_HI:`TLU_PC_LO] = 
+           trap_pc3[48:2];
+//
+assign trap3_tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO] = 
+           trap_npc3[48:2];
+//
+assign trap3_tsa_wdata[`TLU_GL_HI:`TLU_GL_LO] = 
+       tlu_gl_lvl3[`TSA_GLOBAL_WIDTH-1:0]; 
+//
+assign trap3_tsa_wdata[`TLU_CCR_HI:`TLU_CCR_LO] = 
+       trap_ccr3[`TSA_CCR_WIDTH-1:0]; 
+//
+assign trap3_tsa_wdata[`TLU_ASI_HI:`TLU_ASI_LO] = 
+       trap_asi3[`TSA_ASI_WIDTH-1:0]; 
+//
+assign trap3_tsa_wdata[`TLU_PSTATE_HI:`TLU_PSTATE_LO] = 
+       trap_pstate3[`TSA_PSTATE_WIDTH-1:0]; 
+//
+assign trap3_tsa_wdata[`TLU_CWP_HI:`TLU_CWP_LO] = 
+       trap_cwp3[`TSA_CWP_WIDTH-1:0]; 
+//
+assign trap3_tsa_wdata[`TLU_TT_HI:`TLU_TT_LO] = 
+       tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]; 
+//
+// modified for timing: tlu_thrd_wsel_g -> tlu_thrd_wsel_w2
+`ifdef FPGA_SYN_1THREAD
+   assign trap_tsa_wdata[`TLU_TSA_WIDTH-1:0] = trap0_tsa_wdata[`TLU_TSA_WIDTH-1:0];
+`else
+   
+mux4ds  #(`TLU_TSA_WIDTH) tsawdsel (
+        .in0    (trap0_tsa_wdata[`TLU_TSA_WIDTH-1:0]),
+        .in1    (trap1_tsa_wdata[`TLU_TSA_WIDTH-1:0]),
+        .in2    (trap2_tsa_wdata[`TLU_TSA_WIDTH-1:0]),
+        .in3    (trap3_tsa_wdata[`TLU_TSA_WIDTH-1:0]),
+        .sel0   (tlu_thrd_wsel_w2[0]),
+        .sel1   (tlu_thrd_wsel_w2[1]),
+        .sel2   (tlu_thrd_wsel_w2[2]),
+        .sel3   (tlu_thrd_wsel_w2[3]),
+        .dout   (trap_tsa_wdata[`TLU_TSA_WIDTH-1:0])
+); 
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//
+// modified for timing and lint violations
+// assign wsr_data_w[`TLU_ASR_DATA_WIDTH-1:0] = 
+//            tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-1:0];
+assign wsr_data_w[`SFTINT_WIDTH-1:0] = 
+           tlu_wsr_data_w[`SFTINT_WIDTH-1:0];
+// 
+// added for timing
+// reduced width to 48 due to lint violations
+dff_s #(48) dff_wsr_data_w2 (
+    .din (tlu_wsr_data_w[47:0]),
+    .q   (wsr_data_w2[47:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// extracting the relevant data for tstate from the WSR to be written
+// modified due to timing changes
+assign compose_tstate[`WSR_TSTATE_WIDTH-1:0] = 
+	  {wsr_data_w2[`WSR_TSTATE_GL_HI:`WSR_TSTATE_GL_LO],
+       wsr_data_w2[`WSR_TSTATE_CCR_HI:`WSR_TSTATE_CCR_LO],
+       wsr_data_w2[`WSR_TSTATE_ASI_HI:`WSR_TSTATE_ASI_LO],
+       wsr_data_w2[`WSR_TSTATE_PS2_HI:`WSR_TSTATE_PS2_LO],
+       2'b0,
+       wsr_data_w2[`WSR_TSTATE_PS1_HI:`WSR_TSTATE_PS1_LO],
+       wsr_data_w2[`WSR_TSTATE_CWP_HI:`WSR_TSTATE_CWP_LO]};
+//
+// extracting the relevant data from hstate from the WSR to be written
+assign compose_htstate[`TSA_HTSTATE_WIDTH-1:0] = 
+	  {wsr_data_w2[`WSR_HPSTATE_IBE],
+       wsr_data_w2[`WSR_HPSTATE_RED],
+	   wsr_data_w2[`WSR_HPSTATE_PRIV],
+	   wsr_data_w2[`WSR_HPSTATE_TLZ]};
+
+// htstate
+assign	wrpr_tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO]=
+        compose_htstate[`TSA_HTSTATE_WIDTH-1:0];
+// 
+// modified for bug 3017 
+// pc
+assign wrpr_tsa_wdata[`TLU_PC_HI:`TLU_PC_LO]=
+       {1'b0, wsr_data_w2[47:2]};
+// npc
+assign wrpr_tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO]=
+       {1'b0, wsr_data_w2[47:2]};
+// tstate data
+assign wrpr_tsa_wdata[`TLU_GL_HI:`TLU_CWP_LO]=
+       compose_tstate[`WSR_TSTATE_WIDTH-1:0];
+// ttype data
+assign wrpr_tsa_wdata[`TLU_TT_HI:`TLU_TT_LO]=
+       wsr_data_w2[`TSA_TTYPE_WIDTH-1:0];
+
+mux2ds #(`TLU_TSA_WIDTH) tsawdata_sel (
+       .in0    ({trap_tsa_wdata[`TLU_TSA_WIDTH-1:0]}),
+	   .in1    ({wrpr_tsa_wdata[`TLU_TSA_WIDTH-1:0]}),
+       .sel0   (~tlu_wr_tsa_inst_w2),
+       .sel1    (tlu_wr_tsa_inst_w2),
+       .dout   ({tsa_wdata[`TLU_TSA_WIDTH-1:0]})
+); 
+
+//=========================================================================================
+//	SOFT INTERRUPT for Threads
+//=========================================================================================
+
+// Assumption is that softint state is unknown after reset.
+// TICK_INT will be maintained separately. What is the relative order of
+// setting and clearing this bit ? What takes precedence ?
+//
+// modified for bug 2204
+// recoded due to one-hot problem during reset
+
+`ifdef FPGA_SYN_1THREAD
+   assign sftint[`SFTINT_WIDTH-1:0] = sftint0[`SFTINT_WIDTH-1:0];
+`else
+   
+mux4ds #(`SFTINT_WIDTH) mx_sftint (
+        .in0  (sftint0[`SFTINT_WIDTH-1:0]),
+        .in1  (sftint1[`SFTINT_WIDTH-1:0]),
+        .in2  (sftint2[`SFTINT_WIDTH-1:0]),
+        .in3  (sftint3[`SFTINT_WIDTH-1:0]),
+        .sel0 (tlu_sftint_mx_sel[0]),
+        .sel1 (tlu_sftint_mx_sel[1]),
+        .sel2 (tlu_sftint_mx_sel[2]),
+        .sel3 (tlu_sftint_mx_sel[3]),
+        .dout (sftint[`SFTINT_WIDTH-1:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*
+assign sftint_sel_onehot_g = 
+           ~tlu_sftint_en_l_g[0] | (&tlu_sftint_en_l_g[3:1]); 
+
+mux4ds #(`SFTINT_WIDTH) mx_sftint (
+        .in0  (sftint0[`SFTINT_WIDTH-1:0]),
+        .in1  (sftint1[`SFTINT_WIDTH-1:0]),
+        .in2  (sftint2[`SFTINT_WIDTH-1:0]),
+        .in3  (sftint3[`SFTINT_WIDTH-1:0]),
+        .sel0 (sftint_sel_onehot_g),
+        .sel1 (~tlu_sftint_en_l_g[1]),
+        .sel2 (~tlu_sftint_en_l_g[2]),
+        .sel3 (~tlu_sftint_en_l_g[3]),
+        .dout (sftint[`SFTINT_WIDTH-1:0])
+); 
+*/
+
+assign	sftint_set_din[`SFTINT_WIDTH-1:0] = 
+            (wsr_data_w[`SFTINT_WIDTH-1:0] | sftint[`SFTINT_WIDTH-1:0]);
+assign	sftint_clr_din[`SFTINT_WIDTH-1:0] = 
+            (~wsr_data_w[`SFTINT_WIDTH-1:0] & sftint[`SFTINT_WIDTH-1:0]);
+assign	sftint_wr_din[`SFTINT_WIDTH-1:0]  =  
+            wsr_data_w[`SFTINT_WIDTH-1:0];
+
+// consturcting the mux select for the sftin_din mux
+
+assign sftin_din_mxsel[0] = ~tlu_set_sftint_l_g;
+assign sftin_din_mxsel[1] = ~tlu_clr_sftint_l_g;
+assign sftin_din_mxsel[2] = ~tlu_wr_sftint_l_g;
+assign sftin_din_mxsel[3] =  
+           tlu_set_sftint_l_g & tlu_clr_sftint_l_g & tlu_wr_sftint_l_g; 
+
+mux4ds #(`SFTINT_WIDTH) mx_sftint_din (
+        .in0  (sftint_set_din[`SFTINT_WIDTH-1:0]),
+        .in1  (sftint_clr_din[`SFTINT_WIDTH-1:0]),
+        .in2  (sftint_wr_din[`SFTINT_WIDTH-1:0]),
+        .in3  (sftint[`SFTINT_WIDTH-1:0]),
+        .sel0 (sftin_din_mxsel[0]),
+        .sel1 (sftin_din_mxsel[1]),
+        .sel2 (sftin_din_mxsel[2]),
+        .sel3 (sftin_din_mxsel[3]),
+        .dout (sftint_din[`SFTINT_WIDTH-1:0])
+); 
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_st0 (
+		.rclk	(clk),
+		.enb_l	(tlu_sftint_en_l_g[0]),
+		.tmb_l	(se_l),
+		.clk	(sftint0_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_st1 (
+		.rclk	(clk),
+		.enb_l	(tlu_sftint_en_l_g[1]),
+		.tmb_l	(se_l),
+		.clk	(sftint1_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_st2 (
+		.rclk	(clk),
+		.enb_l	(tlu_sftint_en_l_g[2]),
+		.tmb_l	(se_l),
+		.clk	(sftint2_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_st3 (
+		.rclk	(clk),
+		.enb_l	(tlu_sftint_en_l_g[3]),
+		.tmb_l	(se_l),
+		.clk	(sftint3_clk)
+		);	
+`endif
+//		
+// added for PIB support - modified to make inst count precise
+assign sftint_b15_din[0] = 
+           (pib_picl_wrap[0] | pib_pich_wrap[0] | sftint_din[15]);
+assign sftint_b15_din[1] =                       
+           (pib_picl_wrap[1] | pib_pich_wrap[1] | sftint_din[15]);
+assign sftint_b15_din[2] =                       
+           (pib_picl_wrap[2] | pib_pich_wrap[2] | sftint_din[15]);
+assign sftint_b15_din[3] =                       
+           (pib_picl_wrap[3] | pib_pich_wrap[3] | sftint_din[15]);
+
+assign sftint_b15_en[0] = 
+           (pib_picl_wrap[0] | pib_pich_wrap[0] | ~tlu_sftint_en_l_g[0]);
+assign sftint_b15_en[1] = 
+           (pib_picl_wrap[1] | pib_pich_wrap[1] | ~tlu_sftint_en_l_g[1]);
+assign sftint_b15_en[2] = 
+           (pib_picl_wrap[2] | pib_pich_wrap[2] | ~tlu_sftint_en_l_g[2]);
+assign sftint_b15_en[3] = 
+           (pib_picl_wrap[3] | pib_pich_wrap[3] | ~tlu_sftint_en_l_g[3]);
+//		
+// added due to sftint spec change 
+// tickcmp interrupts
+assign sftint_b0_din[0] = (tickcmp_int[0] | sftint_din[0]);
+assign sftint_b0_din[1] = (tickcmp_int[1] | sftint_din[0]);
+assign sftint_b0_din[2] = (tickcmp_int[2] | sftint_din[0]);
+assign sftint_b0_din[3] = (tickcmp_int[3] | sftint_din[0]);
+
+assign sftint_b0_en[0] = (tickcmp_int[0] | ~tlu_sftint_en_l_g[0]);
+assign sftint_b0_en[1] = (tickcmp_int[1] | ~tlu_sftint_en_l_g[1]);
+assign sftint_b0_en[2] = (tickcmp_int[2] | ~tlu_sftint_en_l_g[2]);
+assign sftint_b0_en[3] = (tickcmp_int[3] | ~tlu_sftint_en_l_g[3]);
+//
+// stickcmp interrupts
+assign sftint_b16_din[0] = (stickcmp_int[0] | sftint_din[16]);
+assign sftint_b16_din[1] = (stickcmp_int[1] | sftint_din[16]);
+assign sftint_b16_din[2] = (stickcmp_int[2] | sftint_din[16]);
+assign sftint_b16_din[3] = (stickcmp_int[3] | sftint_din[16]);
+
+assign sftint_b16_en[0] = (stickcmp_int[0] | ~tlu_sftint_en_l_g[0]);
+assign sftint_b16_en[1] = (stickcmp_int[1] | ~tlu_sftint_en_l_g[1]);
+assign sftint_b16_en[2] = (stickcmp_int[2] | ~tlu_sftint_en_l_g[2]);
+assign sftint_b16_en[3] = (stickcmp_int[3] | ~tlu_sftint_en_l_g[3]);
+
+// modified for sftint spec change - special treatments for bit 0, 15 and 16 
+//
+// thread 0
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(14) dffr_sftint0 (
+    .din (sftint_din[14:1]), 
+    .q   (sftint0[14:1]),
+    .en (~(tlu_sftint_en_l_g[0])), .clk(clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(14) dffr_sftint0 (
+    .din (sftint_din[14:1]), 
+    .q   (sftint0[14:1]),
+    .clk (sftint0_clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+dffre_s dffre_sftint0_b0 (
+    .din (sftint_b0_din[0]), 
+    .q   (sftint0[`SFTINT_TICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b0_en[0]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint0_b15 (
+    .din (sftint_b15_din[0]), 
+    .q   (sftint0[`SFTINT_PIB_WRAP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b15_en[0]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint0_b16 (
+    .din (sftint_b16_din[0]), 
+    .q   (sftint0[`SFTINT_STICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b16_en[0]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+//
+// thread 1
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(14) sftint1ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint1[14:1]),
+    .en (~(tlu_sftint_en_l_g[1])), .clk(clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(14) sftint1ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint1[14:1]),
+    .clk (sftint1_clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+dffre_s dffre_sftint1_b0 (
+    .din (sftint_b0_din[1]), 
+    .q   (sftint1[`SFTINT_TICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b0_en[1]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint1_b15 (
+    .din (sftint_b15_din[1]), 
+    .q   (sftint1[`SFTINT_PIB_WRAP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b15_en[1]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint1_b16 (
+    .din (sftint_b16_din[1]), 
+    .q   (sftint1[`SFTINT_STICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b16_en[1]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+//
+// thread 2
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(14) sftint2ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint2[14:1]),
+    .en (~(tlu_sftint_en_l_g[2])), .clk(clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(14) sftint2ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint2[14:1]),
+    .clk (sftint2_clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+dffre_s dffre_sftint2_b0 (
+    .din (sftint_b0_din[2]), 
+    .q   (sftint2[`SFTINT_TICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b0_en[2]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint2_b15 (
+    .din (sftint_b15_din[2]), 
+    .q   (sftint2[`SFTINT_PIB_WRAP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b15_en[2]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint2_b16 (
+    .din (sftint_b16_din[2]), 
+    .q   (sftint2[`SFTINT_STICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b16_en[2]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+//
+// thread 3
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(14) sftint3ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint3[14:1]),
+    .en (~(tlu_sftint_en_l_g[3])), .clk(clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(14) sftint3ff (
+    .din (sftint_din[14:1]), 
+    .q   (sftint3[14:1]),
+    .clk (sftint3_clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+dffre_s dffre_sftint3_b0 (
+    .din (sftint_b0_din[3]), 
+    .q   (sftint3[`SFTINT_TICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b0_en[3]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint3_b15 (
+    .din (sftint_b15_din[3]), 
+    .q   (sftint3[`SFTINT_PIB_WRAP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b15_en[3]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_sftint3_b16 (
+    .din (sftint_b16_din[3]), 
+    .q   (sftint3[`SFTINT_STICK_CMP]),
+    .clk (clk),
+    .rst (local_rst),
+    .en  (sftint_b16_en[3]),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+// 
+// Datapath priority encoder.
+assign sftint_lvl14[0] = 
+           sftint0[`SFTINT_TICK_CMP] | sftint0[`SFTINT_STICK_CMP] | 
+           sftint0[14];
+assign sftint_lvl14[1] = 
+           sftint1[`SFTINT_TICK_CMP] | sftint1[`SFTINT_STICK_CMP] | 
+           sftint1[14];
+assign sftint_lvl14[2] = 
+           sftint2[`SFTINT_TICK_CMP] | sftint2[`SFTINT_STICK_CMP] | 
+           sftint2[14];
+assign sftint_lvl14[3] = 
+           sftint3[`SFTINT_TICK_CMP] | sftint3[`SFTINT_STICK_CMP] | 
+           sftint3[14];
+//
+// modified to ensure one-hot mux check
+
+`ifdef FPGA_SYN_1THREAD
+   assign sftint_penc_din[14:0] = ({sftint0[15],sftint_lvl14[0],sftint0[13:1]});
+`else
+   
+mux4ds #(`SFTINT_WIDTH-2) mx_sftint_penc_din (
+    .in0  ({sftint0[15],sftint_lvl14[0],sftint0[13:1]}),
+    .in1  ({sftint1[15],sftint_lvl14[1],sftint1[13:1]}),
+    .in2  ({sftint2[15],sftint_lvl14[2],sftint2[13:1]}),
+    .in3  ({sftint3[15],sftint_lvl14[3],sftint3[13:1]}),
+    .sel0 (tlu_sftint_penc_sel[0]),
+    .sel1 (tlu_sftint_penc_sel[1]),
+    .sel2 (tlu_sftint_penc_sel[2]),
+    .sel3 (tlu_sftint_penc_sel[3]),
+    .dout (sftint_penc_din[14:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+tlu_prencoder16	prencoder16 (
+			.din	(sftint_penc_din[14:0]),
+			.dout	(tlu_sftint_id[3:0])
+		);
+
+//wire	[15:0]	sftint_rdata;
+//
+// modified for hypervisor support
+// adding the SM bit
+wire [`SFTINT_WIDTH-1:0]	sftint_rdata;
+// modified due to spec change
+/*
+mux4ds #(`SFTINT_WIDTH) sftint_mx_rsel (
+    .in0  ({tlu_stick_int[0],sftint0[15:1],tlu_tick_int[0]}),
+    .in1  ({tlu_stick_int[1],sftint1[15:1],tlu_tick_int[1]}),
+    .in2  ({tlu_stick_int[2],sftint2[15:1],tlu_tick_int[2]}),
+    .in3  ({tlu_stick_int[3],sftint3[15:1],tlu_tick_int[3]}),
+    .sel0 (tlu_thrd_rsel_e[0]),
+    .sel1 (tlu_thrd_rsel_e[1]),
+    .sel2 (tlu_thrd_rsel_e[2]),
+    .sel3 (tlu_thrd_rsel_e[3]),
+    .dout (sftint_rdata[16:0])
+);
+*/
+`ifdef FPGA_SYN_1THREAD
+   assign      sftint_rdata[16:0] = sftint0[`SFTINT_WIDTH-1:0];
+`else
+   
+mux4ds #(`SFTINT_WIDTH) sftint_mx_rsel (
+    .in0  (sftint0[`SFTINT_WIDTH-1:0]),
+    .in1  (sftint1[`SFTINT_WIDTH-1:0]),
+    .in2  (sftint2[`SFTINT_WIDTH-1:0]),
+    .in3  (sftint3[`SFTINT_WIDTH-1:0]),
+    .sel0 (tlu_thrd_rsel_e[0]),
+    .sel1 (tlu_thrd_rsel_e[1]),
+    .sel2 (tlu_thrd_rsel_e[2]),
+    .sel3 (tlu_thrd_rsel_e[3]),
+    .dout (sftint_rdata[16:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//=========================================================================================
+//	TBA for Threads
+//=========================================================================================
+
+// Lower 15 bits are read as zero and ignored when written.
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tba0 (
+		.rclk	(clk),
+		.enb_l	(tlu_tba_en_l[0]),
+		.tmb_l	(se_l),
+		.clk	(tba0_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tba1 (
+		.rclk	(clk),
+		.enb_l	(tlu_tba_en_l[1]),
+		.tmb_l	(se_l),
+		.clk	(tba1_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tba2 (
+		.rclk	(clk),
+		.enb_l	(tlu_tba_en_l[2]),
+		.tmb_l	(se_l),
+		.clk	(tba2_clk)
+		);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tba3 (
+		.rclk	(clk),
+		.enb_l	(tlu_tba_en_l[3]),
+		.tmb_l	(se_l),
+		.clk	(tba3_clk)
+		);	
+`endif
+
+// THREAD0
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(33) tba0 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q   (true_tba0[32:0]),
+    .en (~(tlu_tba_en_l[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(33) tba0 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q   (true_tba0[32:0]),
+    .clk (tba0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// THREAD1
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(33) tba1 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q  (true_tba1[32:0]),
+    .en (~(tlu_tba_en_l[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(33) tba1 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q  (true_tba1[32:0]),
+    .clk (tba1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// THREAD2
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(33) tba2 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q   (true_tba2[32:0]),
+    .en (~(tlu_tba_en_l[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(33) tba2 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q   (true_tba2[32:0]),
+    .clk (tba2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// THREAD3
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(33) tba3 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q  (true_tba3[32:0]),
+    .en (~(tlu_tba_en_l[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(33) tba3 (
+    .din (tlu_wsr_data_w[47:15]), 
+    .q  (true_tba3[32:0]),
+    .clk (tba3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// tba_data is for traps specifically
+// modified for timing 
+
+`ifdef FPGA_SYN_1THREAD
+   assign      tba_data[32:0] = true_tba0[32:0];
+   assign      tba_rdata[32:0] = true_tba0[32:0];
+   
+`else
+   
+mux4ds #(33) mux_tba_data (
+       .in0  (true_tba0[32:0]),
+       .in1  (true_tba1[32:0]),
+       .in2  (true_tba2[32:0]),
+       .in3  (true_tba3[32:0]),
+       .sel0 (tlu_thrd_wsel_w2[0]),
+       .sel1 (tlu_thrd_wsel_w2[1]),
+       .sel2 (tlu_thrd_wsel_w2[2]),
+       .sel3 (tlu_thrd_wsel_w2[3]),
+       .dout (tba_data[32:0])
+);
+   
+/*
+mux4ds #(33) tba_mx (
+       .in0  (true_tba0[32:0]),
+       .in1  (true_tba1[32:0]),
+       .in2  (true_tba2[32:0]),
+       .in3  (true_tba3[32:0]),
+       .sel0 (tlu_thrd_rsel_g[0]),
+       .sel1 (tlu_thrd_rsel_g[1]),
+       .sel2 (tlu_thrd_rsel_g[2]),
+       .sel3 (tlu_thrd_rsel_g[3]),
+       .dout (tba_data[32:0])
+);
+*/
+// tba_rdata is for read of tba regs specifically.
+mux4ds #(33) tba_mx_rsel (
+       .in0  (true_tba0[32:0]),
+       .in1  (true_tba1[32:0]),
+       .in2  (true_tba2[32:0]),
+       .in3  (true_tba3[32:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (tba_rdata[32:0])
+); 
+`endif // !`ifdef FPGA_SYN_1THREAD
+
+// added for hypervisor support
+//
+// HTBA write - constructing clocks  
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htba0 (
+    .rclk  (clk),
+	.enb_l (tlu_htba_en_l[0]),
+	.tmb_l (se_l),
+	.clk   (htba0_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htba1 (
+    .rclk  (clk),
+	.enb_l (tlu_htba_en_l[1]),
+	.tmb_l (se_l),
+	.clk   (htba1_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htba2 (
+    .rclk  (clk),
+	.enb_l (tlu_htba_en_l[2]),
+	.tmb_l (se_l),
+	.clk   (htba2_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htba3 (
+    .rclk  (clk),
+	.enb_l (tlu_htba_en_l[3]),
+	.tmb_l (se_l),
+	.clk   (htba3_clk)
+);	
+`endif
+//
+// HTBA write - writing the registers
+// lower 14 bits of HTBA are reserved, therefore, not stored
+//
+// Thread 0
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_HTBA_WIDTH) dff_true_htba0 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba0[`TLU_HTBA_WIDTH-1:0]),
+    .en (~(tlu_htba_en_l[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_HTBA_WIDTH) dff_true_htba0 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba0[`TLU_HTBA_WIDTH-1:0]),
+    .clk (htba0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// Thread 1
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_HTBA_WIDTH) dff_true_htba1 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba1[`TLU_HTBA_WIDTH-1:0]),
+    .en (~(tlu_htba_en_l[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_HTBA_WIDTH) dff_true_htba1 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba1[`TLU_HTBA_WIDTH-1:0]),
+    .clk (htba1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// Thread 2
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_HTBA_WIDTH) dff_true_htba2 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba2[`TLU_HTBA_WIDTH-1:0]),
+    .en (~(tlu_htba_en_l[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_HTBA_WIDTH) dff_true_htba2 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba2[`TLU_HTBA_WIDTH-1:0]),
+    .clk (htba2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// Thread 3
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_HTBA_WIDTH) dff_true_htba3 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba3[`TLU_HTBA_WIDTH-1:0]),
+    .en (~(tlu_htba_en_l[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_HTBA_WIDTH) dff_true_htba3 (
+    .din (tlu_wsr_data_w[`TLU_HTBA_HI:`TLU_HTBA_LO]), 
+    .q   (true_htba3[`TLU_HTBA_WIDTH-1:0]),
+    .clk (htba3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// constructing the rdata for HTBA
+wire [`TLU_HTBA_WIDTH-1:0] htba_rdata;
+
+`ifdef FPGA_SYN_1THREAD
+   assign 		   htba_rdata[`TLU_HTBA_WIDTH-1:0] = true_htba0[`TLU_HTBA_WIDTH-1:0];
+   assign 		   htba_data[`TLU_HTBA_WIDTH-1:0] = true_htba0[`TLU_HTBA_WIDTH-1:0];
+`else
+   
+mux4ds #(`TLU_HTBA_WIDTH) mux_htba_rdata (
+       .in0  (true_htba0[`TLU_HTBA_WIDTH-1:0]),
+       .in1  (true_htba1[`TLU_HTBA_WIDTH-1:0]),
+       .in2  (true_htba2[`TLU_HTBA_WIDTH-1:0]),
+       .in3  (true_htba3[`TLU_HTBA_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (htba_rdata[`TLU_HTBA_WIDTH-1:0])
+);
+//
+// selecting the htba base address to use 
+// modified for timing
+mux4ds #(`TLU_HTBA_WIDTH) mux_htba_data (
+       .in0  (true_htba0[`TLU_HTBA_WIDTH-1:0]),
+       .in1  (true_htba1[`TLU_HTBA_WIDTH-1:0]),
+       .in2  (true_htba2[`TLU_HTBA_WIDTH-1:0]),
+       .in3  (true_htba3[`TLU_HTBA_WIDTH-1:0]),
+       .sel0 (tlu_thrd_wsel_w2[0]),
+       .sel1 (tlu_thrd_wsel_w2[1]),
+       .sel2 (tlu_thrd_wsel_w2[2]),
+       .sel3 (tlu_thrd_wsel_w2[3]),
+       .dout (htba_data[`TLU_HTBA_WIDTH-1:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+/*
+mux4ds #(`TLU_HTBA_WIDTH) mux_htba_data (
+       .in0  (true_htba0[`TLU_HTBA_WIDTH-1:0]),
+       .in1  (true_htba1[`TLU_HTBA_WIDTH-1:0]),
+       .in2  (true_htba2[`TLU_HTBA_WIDTH-1:0]),
+       .in3  (true_htba3[`TLU_HTBA_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_g[0]),
+       .sel1 (tlu_thrd_rsel_g[1]),
+       .sel2 (tlu_thrd_rsel_g[2]),
+       .sel3 (tlu_thrd_rsel_g[3]),
+       .dout (htba_data[`TLU_HTBA_WIDTH-1:0])
+);
+*/
+//=========================================================================================
+//	TICKS for Threads
+//=========================================================================================
+
+// npt needs to be muxed into read !!!
+
+
+// THREAD0,1,2,3
+
+mux2ds #(61) tick_sel (
+       .in0  (tlu_wsr_data_w[62:2]), 	
+	   .in1  (tlu_tick_incr_dout[60:0]),
+       .sel0 (~tlu_tick_en_l),  	
+	   .sel1 ( tlu_tick_en_l),
+       .dout (tick_din[62:2])
+); 
+// 
+// modified due to the switch to the soft macro
+// assign	tlu_tick_incr_din[`TLU_ASR_DATA_WIDTH-1:0] = 
+//         {3'b000,true_tick[60:0]};
+assign	tlu_tick_incr_din[`TLU_ASR_DATA_WIDTH-3:0] = 
+         {1'b0,true_tick[60:0]};
+
+// Does not need enable as either in increment or update state
+dff_s #(61) tick0123 (
+    .din (tick_din[62:2]), 
+    .q  (true_tick[60:0]),
+    .clk (clk),
+    .se  (se),       
+    .si (),          
+    .so ()
+);
+
+//=========================================================================================
+//	TICK COMPARE  for Threads
+//=========================================================================================
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tcmp0 (
+    .rclk  (clk),
+	.enb_l (tlu_tickcmp_en_l[0]),
+	.tmb_l (se_l),
+	.clk   (tcmp0_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tcmp1 (
+    .rclk  (clk),
+	.enb_l (tlu_tickcmp_en_l[1]),
+	.tmb_l (se_l),
+	.clk   (tcmp1_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tcmp2 (
+    .rclk  (clk),
+	.enb_l (tlu_tickcmp_en_l[2]),
+	.tmb_l (se_l),
+	.clk   (tcmp2_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_tcmp3 (
+    .rclk  (clk),
+	.enb_l (tlu_tickcmp_en_l[3]),
+	.tmb_l (se_l),
+	.clk   (tcmp3_clk)
+);	
+`endif
+
+// thread 0
+// added or modified due to tickcmp clean-up
+assign tickcmp_intdis_din[0] = 
+           tlu_wsr_data_w[`TICKCMP_INTDIS] | local_rst | 
+           tlu_por_rstint_g[0];
+// added and modified for bug 4763
+assign tickcmp_intdis_en[0] = 
+           ~tlu_tickcmp_en_l[0] | local_rst | tlu_por_rstint_g[0];  
+
+dffe_s dffe_tickcmp_intdis0 (
+    .din (tickcmp_intdis_din[0]),
+	.q   (true_tickcmp0[`TICKCMP_INTDIS]),
+    .en  (tickcmp_intdis_en[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_tickcmp_en_l[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (tcmp0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 1
+// added or modified due to tickcmp clean-up
+assign tickcmp_intdis_din[1] = 
+           tlu_wsr_data_w[`TICKCMP_INTDIS] | local_rst | 
+           tlu_por_rstint_g[1];
+// added and modified for bug 4763
+assign tickcmp_intdis_en[1] = 
+           ~tlu_tickcmp_en_l[1] | local_rst | tlu_por_rstint_g[1];  
+
+dffe_s dffe_tickcmp_intdis1 (
+    .din (tickcmp_intdis_din[1]),
+	.q   (true_tickcmp1[`TICKCMP_INTDIS]),
+    .en  (tickcmp_intdis_en[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_tickcmp_en_l[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (tcmp1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 2
+// added or modified due to tickcmp clean-up
+assign tickcmp_intdis_din[2] = 
+           tlu_wsr_data_w[`TICKCMP_INTDIS] | local_rst | 
+           tlu_por_rstint_g[2];
+// added and modified for bug 4763
+assign tickcmp_intdis_en[2] = 
+           ~tlu_tickcmp_en_l[2] | local_rst | tlu_por_rstint_g[2];  
+
+dffe_s dffe_tickcmp_intdis2 (
+    .din (tickcmp_intdis_din[2]),
+	.q   (true_tickcmp2[`TICKCMP_INTDIS]),
+    .en  (tickcmp_intdis_en[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_tickcmp_en_l[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (tcmp2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 3
+// added or modified due to tickcmp clean-up
+assign tickcmp_intdis_din[3] = 
+           tlu_wsr_data_w[`TICKCMP_INTDIS] | local_rst | 
+           tlu_por_rstint_g[3];
+// added and modified for bug 4763
+assign tickcmp_intdis_en[3] = 
+           ~tlu_tickcmp_en_l[3] | local_rst | tlu_por_rstint_g[3];  
+
+dffe_s dffe_tickcmp_intdis3 (
+    .din (tickcmp_intdis_din[3]),
+	.q   (true_tickcmp3[`TICKCMP_INTDIS]),
+    .en  (tickcmp_intdis_en[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_tickcmp_en_l[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) tickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_tickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (tcmp3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// Select 1/4 sources. Assume compare is independent of read
+// and thus needs separate mux
+`ifdef FPGA_SYN_1THREAD
+   assign tickcmp_data[`TLU_ASR_DATA_WIDTH-4:0] = true_tickcmp0[`TLU_ASR_DATA_WIDTH-2:2];
+   assign tickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0] = true_tickcmp0[`TLU_ASR_DATA_WIDTH-1:0];
+`else
+   
+mux4ds #(`TLU_ASR_DATA_WIDTH-3) tcmp_mx_sel (
+       .in0  (true_tickcmp0[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in1  (true_tickcmp1[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in2  (true_tickcmp2[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in3  (true_tickcmp3[`TLU_ASR_DATA_WIDTH-2:2]),
+       .sel0 (tlu_tickcmp_sel[0]),
+       .sel1 (tlu_tickcmp_sel[1]),
+       .sel2 (tlu_tickcmp_sel[2]),
+       .sel3 (tlu_tickcmp_sel[3]),
+       .dout (tickcmp_data[`TLU_ASR_DATA_WIDTH-4:0])
+);
+
+// mux for read
+mux4ds #(`TLU_ASR_DATA_WIDTH) tcmp_mx_rsel (
+       .in0  (true_tickcmp0[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in1  (true_tickcmp1[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in2  (true_tickcmp2[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in3  (true_tickcmp3[`TLU_ASR_DATA_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (tickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//
+// evaluate for tickcmp match
+assign tick_match = 
+           (tickcmp_data[`TICKCMP_RANGE_HI:`TICKCMP_RANGE_LO] == 
+            true_tick[60:0]);
+//
+// moved from tlu_tcl
+assign	tickcmp_int[0] = 
+            tick_match & ~true_tickcmp0[`TICKCMP_INTDIS] & tlu_tickcmp_sel[0];  
+assign	tickcmp_int[1] = 
+            tick_match & ~true_tickcmp1[`TICKCMP_INTDIS] & tlu_tickcmp_sel[1];
+assign	tickcmp_int[2] = 
+            tick_match & ~true_tickcmp2[`TICKCMP_INTDIS] & tlu_tickcmp_sel[2];
+assign	tickcmp_int[3] = 
+            tick_match & ~true_tickcmp3[`TICKCMP_INTDIS] & tlu_tickcmp_sel[3];
+
+//=========================================================================================
+//	STICK COMPARE  for Threads
+//=========================================================================================
+// added for hypervisor support
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_stcmp0 (
+    .rclk  (clk),
+	.enb_l (tlu_stickcmp_en_l[0]),
+	.tmb_l (se_l),
+	.clk   (stcmp0_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_stcmp1 (
+    .rclk  (clk),
+	.enb_l (tlu_stickcmp_en_l[1]),
+	.tmb_l (se_l),
+	.clk   (stcmp1_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_stcmp2 (
+    .rclk  (clk),
+	.enb_l (tlu_stickcmp_en_l[2]),
+	.tmb_l (se_l),
+	.clk   (stcmp2_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_stcmp3 (
+    .rclk  (clk),
+	.enb_l (tlu_stickcmp_en_l[3]),
+	.tmb_l (se_l),
+	.clk   (stcmp3_clk)
+);
+`endif
+
+// thread 0
+// added or modified due to stickcmp clean-up
+assign stickcmp_intdis_din[0] = tickcmp_intdis_din[0]; 
+// added and modified for bug 4763
+assign stickcmp_intdis_en[0] = 
+           ~tlu_stickcmp_en_l[0] | local_rst | tlu_por_rstint_g[0];  
+
+dffe_s dffe_stickcmp_intdis0 (
+    .din (stickcmp_intdis_din[0]),
+	.q   (true_stickcmp0[`TICKCMP_INTDIS]),
+    .en  (stickcmp_intdis_en[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_stickcmp_en_l[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (stcmp0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 1
+// added or modified due to stickcmp clean-up
+assign stickcmp_intdis_din[1] = tickcmp_intdis_din[1]; 
+// added and modified for bug 4763
+assign stickcmp_intdis_en[1] = 
+           ~tlu_stickcmp_en_l[1] | local_rst | tlu_por_rstint_g[1];  
+
+dffe_s dffe_stickcmp_intdis1 (
+    .din (stickcmp_intdis_din[1]),
+	.q   (true_stickcmp1[`TICKCMP_INTDIS]),
+    .en  (stickcmp_intdis_en[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_stickcmp_en_l[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (stcmp1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 2
+// added or modified due to stickcmp clean-up
+assign stickcmp_intdis_din[2] = tickcmp_intdis_din[2]; 
+// added for bug 4763
+assign stickcmp_intdis_en[2] = 
+           ~tlu_stickcmp_en_l[2] | local_rst | tlu_por_rstint_g[2];  
+
+dffe_s dffe_stickcmp_intdis2 (
+    .din (stickcmp_intdis_din[2]),
+	.q   (true_stickcmp2[`TICKCMP_INTDIS]),
+    .en  (stickcmp_intdis_en[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_stickcmp_en_l[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (stcmp2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// thread 3
+// added or modified due to stickcmp clean-up
+assign stickcmp_intdis_din[3] = tickcmp_intdis_din[3]; 
+// added and modified for bug 4763
+assign stickcmp_intdis_en[3] = 
+           ~tlu_stickcmp_en_l[3] | local_rst | tlu_por_rstint_g[3];  
+
+dffe_s dffe_stickcmp_intdis3 (
+    .din (stickcmp_intdis_din[3]),
+	.q   (true_stickcmp3[`TICKCMP_INTDIS]),
+    .en  (stickcmp_intdis_en[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_stickcmp_en_l[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) stickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_stickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (stcmp3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif // !`ifdef FPGA_SYN_CLK_DFF
+   
+// Select 1/4 sources. Assume compare is independent of read
+// and thus needs separate mux
+
+`ifdef FPGA_SYN_1THREAD
+   assign stickcmp_data[`TLU_ASR_DATA_WIDTH-4:0] = true_stickcmp0[`TLU_ASR_DATA_WIDTH-2:2];
+   assign stickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0] = true_stickcmp0[`TLU_ASR_DATA_WIDTH-1:0];
+`else
+   
+mux4ds #(`TLU_ASR_DATA_WIDTH-3) mux_stickcmp_data (
+       .in0  (true_stickcmp0[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in1  (true_stickcmp1[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in2  (true_stickcmp2[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in3  (true_stickcmp3[`TLU_ASR_DATA_WIDTH-2:2]),
+       .sel0 (tlu_tickcmp_sel[0]),
+       .sel1 (tlu_tickcmp_sel[1]),
+       .sel2 (tlu_tickcmp_sel[2]),
+       .sel3 (tlu_tickcmp_sel[3]),
+       .dout (stickcmp_data[`TLU_ASR_DATA_WIDTH-4:0])
+);
+//
+// mux for read
+mux4ds #(`TLU_ASR_DATA_WIDTH) mux_stickcmp_rdata (
+       .in0  (true_stickcmp0[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in1  (true_stickcmp1[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in2  (true_stickcmp2[`TLU_ASR_DATA_WIDTH-1:0]),
+       .in3  (true_stickcmp3[`TLU_ASR_DATA_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (stickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//
+// evaluate for stickcmp match
+assign stick_match = 
+           (stickcmp_data[`TICKCMP_RANGE_HI:`TICKCMP_RANGE_LO] == 
+            true_tick[60:0]);
+//
+// moved from tlu_tcl
+assign	stickcmp_int[0] = 
+            stick_match & ~true_stickcmp0[`TICKCMP_INTDIS] & tlu_tickcmp_sel[0];  
+assign	stickcmp_int[1] = 
+            stick_match & ~true_stickcmp1[`TICKCMP_INTDIS] & tlu_tickcmp_sel[1];
+assign	stickcmp_int[2] = 
+            stick_match & ~true_stickcmp2[`TICKCMP_INTDIS] & tlu_tickcmp_sel[2];
+assign	stickcmp_int[3] = 
+            stick_match & ~true_stickcmp3[`TICKCMP_INTDIS] & tlu_tickcmp_sel[3];
+
+//=========================================================================================
+//	HTICK COMPARE  for Threads
+//=========================================================================================
+// added for hypervisor support
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htcmp0 (
+    .rclk  (clk),
+	.enb_l (tlu_htickcmp_en_l[0]),
+	.tmb_l (se_l),
+	.clk   (htcmp0_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htcmp1 (
+    .rclk  (clk),
+	.enb_l (tlu_htickcmp_en_l[1]),
+	.tmb_l (se_l),
+	.clk   (htcmp1_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htcmp2 (
+    .rclk  (clk),
+	.enb_l (tlu_htickcmp_en_l[2]),
+	.tmb_l (se_l),
+	.clk   (htcmp2_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_htcmp3 (
+    .rclk  (clk),
+	.enb_l (tlu_htickcmp_en_l[3]),
+	.tmb_l (se_l),
+	.clk   (htcmp3_clk)
+);	
+`endif
+   
+// THREAD0
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_htickcmp_en_l[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp0 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (htcmp0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// THREAD1
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_htickcmp_en_l[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp1 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (htcmp1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// THREAD2
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_htickcmp_en_l[2])), .clk(clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp2 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (htcmp2_clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+`endif
+
+// THREAD3
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .en (~(tlu_htickcmp_en_l[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`TLU_ASR_DATA_WIDTH-1) htickcmp3 (
+    .din (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-2:0]),
+	.q   (true_htickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+    .clk (htcmp3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+
+// Select 1/4 sources. Assume compare is independent of read
+// and thus needs separate mux
+`ifdef FPGA_SYN_1THREAD
+   assign htickcmp_data[`TLU_ASR_DATA_WIDTH-4:0] = true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:2];
+   assign htickcmp_rdata[`TLU_ASR_DATA_WIDTH-2:0] = true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:0];
+`else
+   
+mux4ds #(`TLU_ASR_DATA_WIDTH-3) mux_htickcmp_data (
+       .in0  (true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in1  (true_htickcmp1[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in2  (true_htickcmp2[`TLU_ASR_DATA_WIDTH-2:2]),
+       .in3  (true_htickcmp3[`TLU_ASR_DATA_WIDTH-2:2]),
+       .sel0 (tlu_tickcmp_sel[0]),
+       .sel1 (tlu_tickcmp_sel[1]),
+       .sel2 (tlu_tickcmp_sel[2]),
+       .sel3 (tlu_tickcmp_sel[3]),
+       .dout (htickcmp_data[`TLU_ASR_DATA_WIDTH-4:0])
+);
+//
+// mux for read
+mux4ds #(`TLU_ASR_DATA_WIDTH-1) mux_htickcmp_rdata (
+       .in0  (true_htickcmp0[`TLU_ASR_DATA_WIDTH-2:0]),
+       .in1  (true_htickcmp1[`TLU_ASR_DATA_WIDTH-2:0]),
+       .in2  (true_htickcmp2[`TLU_ASR_DATA_WIDTH-2:0]),
+       .in3  (true_htickcmp3[`TLU_ASR_DATA_WIDTH-2:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (htickcmp_rdata[`TLU_ASR_DATA_WIDTH-2:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//
+// evaluate for htickcmp match
+assign tlu_htick_match = 
+           (htickcmp_data[`TICKCMP_RANGE_HI:`TICKCMP_RANGE_LO] == 
+            true_tick[60:0]);
+//
+//=========================================================================================
+// HINTP REG for Threads
+//=========================================================================================
+// added for hypervisor support
+// modified for timing
+// creating clocks for accessing the hintp regs
+assign tlu_hintp_en_l_g[0] = 
+           ~(tlu_set_hintp_g[0] | tlu_wr_hintp_g[0]); 
+assign tlu_hintp_en_l_g[1] = 
+           ~(tlu_set_hintp_g[1] | tlu_wr_hintp_g[1]); 
+assign tlu_hintp_en_l_g[2] = 
+           ~(tlu_set_hintp_g[2] | tlu_wr_hintp_g[2]); 
+assign tlu_hintp_en_l_g[3] = 
+           ~(tlu_set_hintp_g[3] | tlu_wr_hintp_g[3]); 
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hintp0 (
+		.rclk	(clk),
+		.enb_l	(tlu_hintp_en_l_g[0]),
+		.tmb_l	(se_l),
+		.clk	(hintp0_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hintp1 (
+		.rclk	(clk),
+		.enb_l	(tlu_hintp_en_l_g[1]),
+		.tmb_l	(se_l),
+		.clk	(hintp1_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hintp2 (
+		.rclk	(clk),
+		.enb_l	(tlu_hintp_en_l_g[2]),
+		.tmb_l	(se_l),
+		.clk	(hintp2_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hintp3 (
+		.rclk	(clk),
+		.enb_l	(tlu_hintp_en_l_g[3]),
+		.tmb_l	(se_l),
+		.clk	(hintp3_clk)
+);
+`endif
+// 
+// setting the value of hintp registers
+//
+// Thread 0
+// added for timing
+assign tlu_set_hintp_g[0] = 
+           tlu_set_hintp_sel_g[0] & tlu_htick_match;
+
+// modified to reflect the physical implementation
+// assign hintp_din[0] = 
+//            (tlu_set_hintp_g[0])? tlu_set_hintp_g[0]: wsr_data_w[0]; 
+
+mux2ds mx_hintp_din_0 (
+       .in0  (tlu_set_hintp_g[0]),
+	   .in1  (wsr_data_w[0]),
+       .sel0 (tlu_set_hintp_g[0]),  	
+	   .sel1 (~tlu_set_hintp_g[0]),
+       .dout (hintp_din[0])
+); 
+
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s dffr_hintp0 (
+     .din (hintp_din[0]), 
+     .q   (tlu_hintp[0]),
+     .en (~(tlu_hintp_en_l_g[0])), .clk(clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`else
+dffr_s dffr_hintp0 (
+     .din (hintp_din[0]), 
+     .q   (tlu_hintp[0]),
+     .clk (hintp0_clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`endif
+
+// Thread 1
+// added for timing
+assign tlu_set_hintp_g[1] = 
+           tlu_set_hintp_sel_g[1] & tlu_htick_match;
+
+// modified to reflect the physical implementation
+// assign hintp_din[1] = 
+//            (tlu_set_hintp_g[1])? tlu_set_hintp_g[1]: wsr_data_w[0]; 
+
+mux2ds mx_hintp_din_1 (
+       .in0  (tlu_set_hintp_g[1]),
+	   .in1  (wsr_data_w[0]),
+       .sel0 (tlu_set_hintp_g[1]),  	
+	   .sel1 (~tlu_set_hintp_g[1]),
+       .dout (hintp_din[1])
+); 
+
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s dffr_hintp1 (
+     .din (hintp_din[1]), 
+     .q   (tlu_hintp[1]),
+     .en (~(tlu_hintp_en_l_g[1])), .clk(clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`else
+dffr_s dffr_hintp1 (
+     .din (hintp_din[1]), 
+     .q   (tlu_hintp[1]),
+     .clk (hintp1_clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`endif
+
+// Thread 2
+// added for timing
+assign tlu_set_hintp_g[2] = 
+           tlu_set_hintp_sel_g[2] & tlu_htick_match;
+
+// modified to reflect the physical implementation
+// assign hintp_din[2] = 
+//            (tlu_set_hintp_g[2])? tlu_set_hintp_g[2]: wsr_data_w[0]; 
+
+mux2ds mx_hintp_din_2 (
+       .in0  (tlu_set_hintp_g[2]),
+	   .in1  (wsr_data_w[0]),
+       .sel0 (tlu_set_hintp_g[2]),  	
+	   .sel1 (~tlu_set_hintp_g[2]),
+       .dout (hintp_din[2])
+); 
+
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s dffr_hintp2 (
+     .din (hintp_din[2]), 
+     .q   (tlu_hintp[2]),
+     .en (~(tlu_hintp_en_l_g[2])), .clk(clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`else
+dffr_s dffr_hintp2 (
+     .din (hintp_din[2]), 
+     .q   (tlu_hintp[2]),
+     .clk (hintp2_clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`endif
+
+// Thread 3
+// added for timing
+assign tlu_set_hintp_g[3] = 
+           tlu_set_hintp_sel_g[3] & tlu_htick_match;
+
+// modified to reflect the physical implementation
+// assign hintp_din[3] = 
+//            (tlu_set_hintp_g[3])? tlu_set_hintp_g[3]: wsr_data_w[0]; 
+
+mux2ds mx_hintp_din_3 (
+       .in0  (tlu_set_hintp_g[3]),
+	   .in1  (wsr_data_w[0]),
+       .sel0 (tlu_set_hintp_g[3]),  	
+	   .sel1 (~tlu_set_hintp_g[3]),
+       .dout (hintp_din[3])
+); 
+
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s dffr_hintp3 (
+     .din (hintp_din[3]), 
+     .q   (tlu_hintp[3]),
+     .en (~(tlu_hintp_en_l_g[3])), .clk(clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`else
+dffr_s dffr_hintp3 (
+     .din (hintp_din[3]), 
+     .q   (tlu_hintp[3]),
+     .clk (hintp3_clk),
+	 .rst (local_rst),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+`endif
+
+//=========================================================================================
+//	DONE/RETRY 
+//=========================================================================================
+
+// PC/nPC will be updated by pc/npc from IFU,
+// OR, Done/Retry which reads TSA in E stage. Execution of Done/Retry will
+// put pc/npc temporarily in bypass flop which will then update actual pc/npc
+// in g. Update of pc/npc by inst_in_w or done/retry thus becomes aligned.
+// recoded due to lint violations - individualized the components
+/*
+dff_s #(`TLU_TSA_WIDTH) poptsa_m (
+    .din (tsa_rdata[`TLU_TSA_WIDTH-1:0]), 
+	.q   (tsa_data_m[`TLU_TSA_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// added, modified for hypervisor and timing 
+assign dnrtry_pstate_m = 
+       {2'b0,  // old IG, MG - replaced by global register
+        tsa_data_m[`TSA_PSTATE_VRANGE2_HI:`TSA_PSTATE_VRANGE2_LO],
+        2'b0,  // memory model has been change to TSO only - bug 2588
+        1'b0,  // old RED - replaced by hpstate.red
+        tsa_data_m[`TSA_PSTATE_VRANGE1_HI:`TSA_PSTATE_VRANGE1_LO],
+        1'b0}; // old AG - replaced by global register 
+        
+dff_s #(12) dff_pstate_g (
+    .din (dnrtry_pstate_m[`PSTATE_TRUE_WIDTH-1:0]),
+	.q   (dnrtry_pstate[`PSTATE_TRUE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+    );
+*/
+// recoded due to lint violation
+
+dff_s #(`WSR_PSTATE_VR_WIDTH) dff_dnrtry_pstate_m (
+    .din ({tsa_rdata[`TSA_PSTATE_VRANGE2_HI:`TSA_PSTATE_VRANGE2_LO],
+           tsa_rdata[`TSA_PSTATE_VRANGE1_HI:`TSA_PSTATE_VRANGE1_LO]}),
+	.q   (dnrtry_pstate_m[`WSR_PSTATE_VR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+        
+dff_s #(`WSR_PSTATE_VR_WIDTH) dff_pstate_g (
+    .din (dnrtry_pstate_m[`WSR_PSTATE_VR_WIDTH-1:0]),
+	.q   (dnrtry_pstate_g[`WSR_PSTATE_VR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`WSR_PSTATE_VR_WIDTH) dff_pstate_w2 (
+    .din (dnrtry_pstate_g[`WSR_PSTATE_VR_WIDTH-1:0]),
+	.q   (dnrtry_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// assign dnrtry_pstate_m[`WSR_PSTATE_VR_WIDTH-1:0] = 
+//        {tsa_data_m[`TSA_PSTATE_VRANGE2_HI:`TSA_PSTATE_VRANGE2_LO],
+//         tsa_data_m[`TSA_PSTATE_VRANGE1_HI:`TSA_PSTATE_VRANGE1_LO]}; 
+// 
+// reading hpstate from tsa for recovery
+// recoded due to lint violations
+
+dff_s #(`TSA_HTSTATE_WIDTH) dff_tsa_dnrtry_hpstate_m (
+    // .din (tsa_rdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO]), 
+    .din (tsa_rdata[`TLU_RD_HTSTATE_HI:`TLU_RD_HTSTATE_LO]), 
+	.q   (tsa_dnrtry_hpstate_m[`TSA_HTSTATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`TSA_HTSTATE_WIDTH) dff_tsa_dnrtry_hpstate_g (
+//     .din (tsa_data_m[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO]),
+    .din (tsa_dnrtry_hpstate_m[`TSA_HTSTATE_WIDTH-1:0]),
+	.q   (tsa_dnrtry_hpstate_g[`TSA_HTSTATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// added for timing
+dff_s #(`TSA_HTSTATE_WIDTH) dff_tsa_dnrtry_hpstate_w2 (
+    .din (tsa_dnrtry_hpstate_g[`TSA_HTSTATE_WIDTH-1:0]),
+	.q   (tsa_dnrtry_hpstate_w2[`TSA_HTSTATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// reading value of original global registers from tsa for recovery
+// recoded due to lint cleanup
+// assign dnrtry_global_m = tsa_data_m[`TLU_GL_HI:`TLU_GL_LO];
+
+dff_s #(`TSA_GLOBAL_WIDTH) dff_dnrtry_global_m (
+    .din (tsa_rdata[`TLU_GL_HI:`TLU_GL_LO]),
+	.q   (dnrtry_global_m[`TSA_GLOBAL_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`TSA_GLOBAL_WIDTH) dff_global_g (
+    .din (dnrtry_global_m[`TSA_GLOBAL_WIDTH-1:0]),
+	.q   (tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se), 
+    .si  (),    
+    .so  ()
+);
+//
+/* logic moved to tlu_misctl
+// added due to lint violations
+dff_s #(47) dff_tsa_pc_m (
+    .din (tsa_rdata[`TLU_PC_HI:`TLU_PC_LO]),
+	.q   (tsa_pc_m[46:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(47) dff_tsa_npc_m (
+    .din (tsa_rdata[`TLU_NPC_HI:`TLU_NPC_LO]),
+	.q   (tsa_npc_m[46:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// pstate may have to be staged by an additional cycle.
+assign dnrtry_pc[48:0]  = {tsa_pc_m[46:0],2'b00};
+assign dnrtry_npc[48:0] = {tsa_npc_m[46:0],2'b00};
+*/
+
+//=========================================================================================
+//	PC/nPC
+//=========================================================================================
+
+// TRUE PC/NPC. AN INSTRUCTION'S PC/NPC IS VISIBLE IN W2.
+// F:S:D:E:M:G:W2
+// Modified by Done/Retry and inst
+
+/* logic moved to tlu_misctl
+// On done, npc will become pc. 
+// modified due to bug 3017 
+// pc width increase from 48 -> 49 bits
+mux3ds #(49) finalpc_sel_m (
+       .in0  (dnrtry_pc[48:0]), 	
+	   .in1  (dnrtry_npc[48:0]),
+	   .in2  (ifu_tlu_pc_m[48:0]),
+       .sel0 (tlu_retry_inst_m),  	
+	   .sel1 (tlu_done_inst_m),
+	   .sel2 (tlu_dnrtry_inst_m_l),
+       .dout (pc_new[48:0])
+); 
+// On done, npc will stay npc. The valid to the IFU will
+// not be signaled along with npc for a done. 
+// modified due to bug 3017 
+// pc width increase from 48 -> 49 bits
+mux2ds #(49) finalnpc_sel_m (
+       .in0  (dnrtry_npc[48:0]), 	
+       .in1  (ifu_tlu_npc_m[48:0]),
+       .sel0 (~tlu_dnrtry_inst_m_l),  	
+       .sel1 (tlu_dnrtry_inst_m_l),
+       .dout (npc_new[48:0])
+); 
+
+dff_s #(49) dff_pc_new_w (
+    .din (pc_new[48:0]), 	
+    .q   (pc_new_w[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(49) dff_npc_new_w (
+    .din (npc_new[48:0]), 	
+    .q   (npc_new_w[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+//
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_pc0 (
+    .rclk  (clk),
+    .enb_l (tlu_update_pc_l_w[0]),
+	.tmb_l (se_l),
+	.clk   (pc0_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_pc1 (
+    .rclk  (clk),
+    .enb_l (tlu_update_pc_l_w[1]),
+	.tmb_l (se_l),
+	.clk   (pc1_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_pc2 (
+    .rclk  (clk),
+    .enb_l (tlu_update_pc_l_w[2]),
+	.tmb_l (se_l),
+	.clk   (pc2_clk)
+);	
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_pc3 (
+    .rclk  (clk),
+    .enb_l (tlu_update_pc_l_w[3]),
+	.tmb_l (se_l),
+	.clk   (pc3_clk)
+);	
+`endif	
+//
+// modified for bug 3017 
+// all pc width has been increased from 48 -> 49 bits
+// Thread 0
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) pc0_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc0[48:0]),
+    .en (~(tlu_update_pc_l_w[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) pc0_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc0[48:0]),
+    .clk (pc0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// update_pc will be used for both pc and npc - in this case
+// npc will contain gibberish but it's okay. 
+// modified to avert area growth 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) npc0_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q  (true_npc0[48:0]),
+    .en (~(tlu_update_pc_l_w[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) npc0_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q  (true_npc0[48:0]),
+    .clk (pc0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// THREAD1
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) pc1_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc1[48:0]),
+    .en (~(tlu_update_pc_l_w[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) pc1_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc1[48:0]),
+    .clk (pc1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// update_pc will be used for both pc and npc - in this case
+// npc will contain gibberish but it's okay. 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) npc1_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc1[48:0]),
+    .en (~(tlu_update_pc_l_w[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) npc1_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc1[48:0]),
+    .clk (pc1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// THREAD2
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) pc2_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc2[48:0]),
+    .en (~(tlu_update_pc_l_w[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) pc2_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc2[48:0]),
+    .clk (pc2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// update_pc will be used for both pc and npc - in this case
+// npc will contain gibberish but it's okay. 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) npc2_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc2[48:0]),
+    .en (~(tlu_update_pc_l_w[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) npc2_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc2[48:0]),
+    .clk (pc2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+// THREAD3
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) pc3_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc3[48:0]),
+    .en (~(tlu_update_pc_l_w[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) pc3_true (
+    .din (tlu_pc_new_w[48:0]), 
+    .q   (true_pc3[48:0]),
+    .clk (pc3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+// update_pc will be used for both pc and npc - in this case
+// npc will contain gibberish but it's okay. 
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(49) npc3_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc3[48:0]),
+    .en (~(tlu_update_pc_l_w[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(49) npc3_true (
+    .din (tlu_npc_new_w[48:0]), 	
+    .q   (true_npc3[48:0]),
+    .clk (pc3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+
+//=========================================================================================
+//	Generating Trap Vector
+//=========================================================================================
+// 
+// Normal Trap Processing.
+mux2ds mux_pc_bit15_sel (
+    .in0  (tlu_tl_gt_0_w2),
+    .in1  (htba_data[0]),
+    .sel0  (~tlu_trap_hpstate_enb),
+    .sel1  (tlu_trap_hpstate_enb),
+    .dout (pc_bit15_sel)
+);
+//
+// modified to help speed up simulation time
+//
+assign tlu_rstvaddr_base[33:0] = `RSTVADDR_BASE;
+mux3ds #(34) nrmlpc_sel_w2 (
+       .in0  (tlu_rstvaddr_base[33:0]),
+	   .in1  ({tba_data[32:0], tlu_tl_gt_0_w2}),
+	   .in2  ({htba_data[33:1], pc_bit15_sel}),
+       .sel0 (tlu_pc_mxsel_w2[0]),
+	   .sel1 (tlu_pc_mxsel_w2[1]),
+	   .sel2 (tlu_pc_mxsel_w2[2]),
+       .dout (partial_trap_pc_w2[33:0])
+);
+
+assign tlu_partial_trap_pc_w1[33:0] = partial_trap_pc_w2[33:0]; 
+
+// restore pc/npc select
+// true pc muxed into restore pc; previously restore_pcx was muxed in.
+// modified due to bug 3017
+`ifdef FPGA_SYN_1THREAD
+   assign restore_pc_w2[48:0] = true_pc0[48:0];
+   assign restore_npc_w2[48:0] = true_npc0[48:0];
+`else
+   
+mux4ds  #(98) trprsel (
+        .in0    ({true_pc0[48:0],true_npc0[48:0]}),
+        .in1    ({true_pc1[48:0],true_npc1[48:0]}),
+        .in2    ({true_pc2[48:0],true_npc2[48:0]}),
+        .in3    ({true_pc3[48:0],true_npc3[48:0]}),
+        .sel0   (tlu_thrd_wsel_w2[0]),
+        .sel1   (tlu_thrd_wsel_w2[1]),
+        .sel2   (tlu_thrd_wsel_w2[2]),
+        .sel3   (tlu_thrd_wsel_w2[3]),
+        .dout   ({restore_pc_w2[48:0],restore_npc_w2[48:0]})
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+//
+// the matching of the w1 and w2 is intentional
+assign tlu_restore_pc_w1[48:0]  = restore_pc_w2[48:0];
+assign tlu_restore_npc_w1[48:0] = restore_npc_w2[48:0];
+
+//=========================================================================================
+//	TAP PC OBSERVABILITY
+//=========================================================================================
+//
+// modified due to spec change
+// shadow scan 
+// thread 0 data
+assign sscan_data_test0[`TDP_SSCAN_WIDTH-1:0] = 
+           {true_hpstate0[2:0], 
+            true_pstate0[`PSTATE_PRIV],
+            true_pstate0[`PSTATE_IE],
+            true_pc0[47:2]};
+//
+// thread 1 data
+assign sscan_data_test1[`TDP_SSCAN_WIDTH-1:0] = 
+           {true_hpstate1[2:0], 
+            true_pstate1[`PSTATE_PRIV],
+            true_pstate1[`PSTATE_IE],
+            true_pc1[47:2]};
+//
+// thread 2 data
+assign sscan_data_test2[`TDP_SSCAN_WIDTH-1:0] = 
+           {true_hpstate2[2:0], 
+            true_pstate2[`PSTATE_PRIV],
+            true_pstate2[`PSTATE_IE],
+            true_pc2[47:2]};
+//
+// thread 3 data
+assign sscan_data_test3[`TDP_SSCAN_WIDTH-1:0] = 
+           {true_hpstate3[2:0], 
+            true_pstate3[`PSTATE_PRIV],
+            true_pstate3[`PSTATE_IE],
+            true_pc3[47:2]};
+//
+`ifdef FPGA_SYN_1THREAD
+   assign tdp_sscan_test_data[`TDP_SSCAN_WIDTH-1:0] = sscan_data_test0[`TDP_SSCAN_WIDTH-1:0];
+`else
+   
+mux4ds #(`TDP_SSCAN_WIDTH) mx_sscan_test_data (
+       .in0  (sscan_data_test0[`TDP_SSCAN_WIDTH-1:0]),
+       .in1  (sscan_data_test1[`TDP_SSCAN_WIDTH-1:0]),
+       .in2  (sscan_data_test2[`TDP_SSCAN_WIDTH-1:0]),
+       .in3  (sscan_data_test3[`TDP_SSCAN_WIDTH-1:0]),
+       .sel0 (sscan_tid_sel[0]),
+       .sel1 (sscan_tid_sel[1]),
+       .sel2 (sscan_tid_sel[2]),
+       .sel3 (sscan_tid_sel[3]),
+       .dout (tdp_sscan_test_data[`TDP_SSCAN_WIDTH-1:0])
+); 
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+assign sscan_tid_sel[`TLU_THRD_NUM-1:0] = ctu_sscan_tid[`TLU_THRD_NUM-1:0]; 
+
+assign tlu_sscan_test_data[`TDP_SSCAN_WIDTH-1:0] =
+          tdp_sscan_test_data[`TDP_SSCAN_WIDTH-1:0]; 
+
+//=========================================================================================
+//	PSTATE for Threads
+//=========================================================================================
+
+// pstate needs to be updated on a trap. Assume for now that only non-RED state instruction
+// related traps are handled.
+
+// Normal traps, non-red mode.
+assign pstate_priv_set = tlu_select_tba_w2 | local_rst | tlu_select_redmode;
+//
+assign pstate_priv_thrd_set[0] = pstate_priv_set | ~true_hpstate0[`HPSTATE_ENB];
+assign pstate_priv_thrd_set[1] = pstate_priv_set | ~true_hpstate1[`HPSTATE_ENB];
+assign pstate_priv_thrd_set[2] = pstate_priv_set | ~true_hpstate2[`HPSTATE_ENB];
+assign pstate_priv_thrd_set[3] = pstate_priv_set | ~true_hpstate3[`HPSTATE_ENB];
+//
+// modified for bug 3349
+assign pstate_priv_update_w2[0] = 
+       ~(tlu_update_pstate_l_w2[0] & 
+        (true_hpstate0[`HPSTATE_ENB] | tlu_update_hpstate_l_w2[0])) |
+        (~wsr_data_w2[`WSR_HPSTATE_ENB] & tlu_hpstate_din_sel0[1]);
+assign pstate_priv_update_w2[1] = 
+       ~(tlu_update_pstate_l_w2[1] & 
+        (true_hpstate1[`HPSTATE_ENB] | tlu_update_hpstate_l_w2[1])) |
+        (~wsr_data_w2[`WSR_HPSTATE_ENB] & tlu_hpstate_din_sel1[1]);
+assign pstate_priv_update_w2[2] = 
+       ~(tlu_update_pstate_l_w2[2] & 
+        (true_hpstate2[`HPSTATE_ENB] | tlu_update_hpstate_l_w2[2])) |
+        (~wsr_data_w2[`WSR_HPSTATE_ENB] & tlu_hpstate_din_sel2[1]);
+assign pstate_priv_update_w2[3] = 
+       ~(tlu_update_pstate_l_w2[3] & 
+        (true_hpstate3[`HPSTATE_ENB] | tlu_update_hpstate_l_w2[3])) |
+        (~wsr_data_w2[`WSR_HPSTATE_ENB] & tlu_hpstate_din_sel3[1]);
+//
+assign hpstate_priv_update_w2[0] = 
+       ~(tlu_update_hpstate_l_w2[0] & 
+        (true_hpstate0[`HPSTATE_ENB] | tlu_update_pstate_l_w2[0]));
+assign hpstate_priv_update_w2[1] = 
+       ~(tlu_update_hpstate_l_w2[1] & 
+        (true_hpstate1[`HPSTATE_ENB] | tlu_update_pstate_l_w2[1]));
+assign hpstate_priv_update_w2[2] = 
+       ~(tlu_update_hpstate_l_w2[2] & 
+        (true_hpstate2[`HPSTATE_ENB] | tlu_update_pstate_l_w2[2]));
+assign hpstate_priv_update_w2[3] = 
+       ~(tlu_update_hpstate_l_w2[3] & 
+        (true_hpstate3[`HPSTATE_ENB] | tlu_update_pstate_l_w2[3]));
+//
+// added for bug 2161 and modified for bug 2161
+assign hpstate_enb_set[0] = true_hpstate0[`HPSTATE_ENB] & ~(local_rst | tlu_select_redmode); 
+assign hpstate_enb_set[1] = true_hpstate1[`HPSTATE_ENB] & ~(local_rst | tlu_select_redmode); 
+assign hpstate_enb_set[2] = true_hpstate2[`HPSTATE_ENB] & ~(local_rst | tlu_select_redmode);
+assign hpstate_enb_set[3] = true_hpstate3[`HPSTATE_ENB] & ~(local_rst | tlu_select_redmode);
+
+// added for hpstate.ibe ECO 
+// modified due to timing - tlu_ibrkpt_trap_g has been delayed one stage to tlu_ibrkpt_trap_w2
+assign hpstate_ibe_set[0] = 
+           true_hpstate0[`HPSTATE_IBE] & ~(local_rst | tlu_select_redmode | tlu_ibrkpt_trap_w2);
+assign hpstate_ibe_set[1] = 
+           true_hpstate1[`HPSTATE_IBE] & ~(local_rst | tlu_select_redmode | tlu_ibrkpt_trap_w2);
+assign hpstate_ibe_set[2] = 
+           true_hpstate2[`HPSTATE_IBE] & ~(local_rst | tlu_select_redmode | tlu_ibrkpt_trap_w2);
+assign hpstate_ibe_set[3] = 
+           true_hpstate3[`HPSTATE_IBE] & ~(local_rst | tlu_select_redmode | tlu_ibrkpt_trap_w2);
+//
+// added due to TLZ spec change 
+// modified for bug 3505
+assign hpstate_tlz_set[0] = true_hpstate0[`HPSTATE_TLZ] & ~(local_rst | tlu_select_redmode);
+assign hpstate_tlz_set[1] = true_hpstate1[`HPSTATE_TLZ] & ~(local_rst | tlu_select_redmode);
+assign hpstate_tlz_set[2] = true_hpstate2[`HPSTATE_TLZ] & ~(local_rst | tlu_select_redmode);
+assign hpstate_tlz_set[3] = true_hpstate3[`HPSTATE_TLZ] & ~(local_rst | tlu_select_redmode);
+//
+// thread 0
+assign tlu_select_tle[0] =
+           tlu_pstate_tle[0] & ~(tlu_select_redmode);
+// modified for timing and bug 3417 
+assign tlu_select_cle[0] =
+           tlu_select_tle[0] & 
+          (tlu_select_tba_w2 | ~true_hpstate0[`HPSTATE_ENB]); 
+//         tlu_select_tle[0] & tlu_select_tba_w2; 
+//
+// modified for timing and width cleanup
+/*
+assign	ntrap_pstate0[`PSTATE_TRUE_WIDTH-1:0] = 
+    {2'b0,  // tlu_select_int_global - replaced by gl register	
+	        // tlu_select_mmu_global - replaced by gl register
+	 tlu_select_cle[0], // cle<-tle, or 0	
+	 tlu_select_tle[0], // keep old tle, or 0
+     2'b0,
+     1'b0,  // tlu_select_redmode - replaced by hpstate.red
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[0], // enter priv mode for priv traps
+	 1'b0,  // interrupts disabled
+	 1'b0}; // tlu_select_alt_global - replaced by gl register 
+*/
+assign	ntrap_pstate0[`WSR_PSTATE_VR_WIDTH-1:0] = 
+    {tlu_select_cle[0], // cle<-tle, or 0	
+	 tlu_select_tle[0], // keep old tle, or 0
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[0], // enter priv mode for priv traps
+	 1'b0}; // interrupts disabled
+//
+// thread 1
+assign tlu_select_tle[1] =
+           tlu_pstate_tle[1] & ~(tlu_select_redmode);
+// modified for timing and bug 3417 
+assign tlu_select_cle[1] =
+           tlu_select_tle[1] & 
+          (tlu_select_tba_w2 | ~true_hpstate1[`HPSTATE_ENB]); 
+//           tlu_select_tle[1] & tlu_select_tba_w2;
+//
+// modified due to timing
+/*
+assign	ntrap_pstate1[`PSTATE_TRUE_WIDTH-1:0] = 
+    {2'b0,  // tlu_select_int_global - replaced by gl register	
+	        // tlu_select_mmu_global - replaced by gl register
+	 tlu_select_cle[1], // cle<-tle, or 0	
+	 tlu_select_tle[1], // keep old tle, or 0
+     2'b0,
+     1'b0,  // tlu_select_redmode - replaced by hpstate.red
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[1], // enter priv mode for priv traps
+	 1'b0,  // interrupts disabled
+	 1'b0}; // tlu_select_alt_global - replaced by gl register 
+*/
+assign	ntrap_pstate1[`WSR_PSTATE_VR_WIDTH-1:0] = 
+    {tlu_select_cle[1], // cle<-tle, or 0	
+	 tlu_select_tle[1], // keep old tle, or 0
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[1], // enter priv mode for priv traps
+	 1'b0}; // interrupts disabled// 
+//
+// thread 2
+assign tlu_select_tle[2] =
+           tlu_pstate_tle[2] & ~(tlu_select_redmode);
+// modified for timing and bug 3417 
+assign tlu_select_cle[2] =
+           tlu_select_tle[2] & 
+          (tlu_select_tba_w2 | ~true_hpstate2[`HPSTATE_ENB]); 
+//           tlu_select_tle[2] & tlu_select_tba_w2; 
+//
+// modified for timing and width cleanup
+/*
+assign	ntrap_pstate2[`PSTATE_TRUE_WIDTH-1:0] = 
+    {2'b0,  // tlu_select_int_global - replaced by gl register	
+	        // tlu_select_mmu_global - replaced by gl register
+	 tlu_select_cle[2], // cle<-tle, or 0	
+	 tlu_select_tle[2], // keep old tle, or 0
+     2'b0,
+     1'b0,  // tlu_select_redmode - replaced by hpstate.red
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[2], // enter priv mode for priv traps
+	 1'b0,  // interrupts disabled
+	 1'b0}; // tlu_select_alt_global - replaced by gl register 
+*/
+assign	ntrap_pstate2[`WSR_PSTATE_VR_WIDTH-1:0] = 
+    {tlu_select_cle[2], // cle<-tle, or 0	
+	 tlu_select_tle[2], // keep old tle, or 0
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[2], // enter priv mode for priv traps
+	 1'b0}; // interrupts disabled// 
+//
+// thread 3
+assign tlu_select_tle[3] =
+           tlu_pstate_tle[3] & ~(tlu_select_redmode);
+// modified for timing and bug 3417 
+assign tlu_select_cle[3] =
+           tlu_select_tle[3] & 
+          (tlu_select_tba_w2 | ~true_hpstate3[`HPSTATE_ENB]); 
+//           tlu_select_tle[3] & tlu_select_tba_w2;
+//
+// modified for timing
+/*
+assign	ntrap_pstate3[`PSTATE_TRUE_WIDTH-1:0] = 
+    {2'b0,  // tlu_select_int_global - replaced by gl register	
+	        // tlu_select_mmu_global - replaced by gl register
+	 tlu_select_cle[3], // cle<-tle, or 0	
+	 tlu_select_tle[3], // keep old tle, or 0
+     2'b0,
+     1'b0,  // tlu_select_redmode - replaced by hpstate.red
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[3], // enter priv mode for priv traps
+	 1'b0,  // interrupts disabled
+	 1'b0}; // tlu_select_alt_global - replaced by gl register 
+*/
+assign	ntrap_pstate3[`WSR_PSTATE_VR_WIDTH-1:0] = 
+    {tlu_select_cle[3], // cle<-tle, or 0	
+	 tlu_select_tle[3], // keep old tle, or 0
+	 1'b1,  // fp turned on
+	 1'b0,  // address masking turned off
+	 pstate_priv_thrd_set[3], // enter priv mode for priv traps
+	 1'b0}; // interrupts disabled// 
+
+// Clock Enable Buffers
+//
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ps0 (
+    .rclk  (clk),
+	.enb_l (tlu_update_pstate_l_w2[0]),
+	.tmb_l (se_l),
+	.clk   (pstate0_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ps1 (
+    .rclk  (clk),
+	.enb_l (tlu_update_pstate_l_w2[1]),
+	.tmb_l (se_l),
+	.clk   (pstate1_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ps2 (
+    .rclk  (clk),
+	.enb_l (tlu_update_pstate_l_w2[2]),
+	.tmb_l (se_l),
+	.clk   (pstate2_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ps3 (
+    .rclk  (clk),
+	.enb_l (tlu_update_pstate_l_w2[3]),
+	.tmb_l (se_l),
+	.clk   (pstate3_clk)
+);
+`endif
+//
+// added for hypervisor support 
+// clock enable buffers for updating the hpstate registers
+//
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hps0 (
+    .rclk  (clk),
+	.enb_l (tlu_update_hpstate_l_w2[0]),
+	.tmb_l (se_l),
+	.clk   (hpstate0_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hps1 (
+    .rclk  (clk),
+	.enb_l (tlu_update_hpstate_l_w2[1]),
+	.tmb_l (se_l),
+	.clk   (hpstate1_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hps2 (
+    .rclk  (clk),
+	.enb_l (tlu_update_hpstate_l_w2[2]),
+	.tmb_l (se_l),
+	.clk   (hpstate2_clk)
+);
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_hps3 (
+    .rclk  (clk),
+	.enb_l (tlu_update_hpstate_l_w2[3]),
+	.tmb_l (se_l),
+	.clk   (hpstate3_clk)
+);
+`endif
+// assign the initial value of hpstate.red mode
+//
+// modified for bug 1893
+// assign hpstate_redmode = 
+//            (local_rst)? 1'b1: tlu_select_redmode;
+assign hpstate_redmode = 
+           local_rst | (~local_rst & tlu_select_redmode); 
+// 
+// extracting hpstate from wsr_data
+//
+// modified for timing tlu_wsr_data_w -> wsr_data_w2
+assign wsr_data_hpstate_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+     {wsr_data_w2[`WSR_HPSTATE_IBE],
+      wsr_data_w2[`WSR_HPSTATE_ENB],
+      wsr_data_w2[`WSR_HPSTATE_RED],
+      wsr_data_w2[`WSR_HPSTATE_PRIV],
+      wsr_data_w2[`WSR_HPSTATE_TLZ]
+     };
+//
+// added or modified for hypervisor support
+// modified due to timing
+/*
+assign wsr_data_pstate_g[`PSTATE_TRUE_WIDTH-1:0] = 
+    {2'b0,  // old IG, MG - replaced by global register
+     tlu_wsr_data_w[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO], 
+     2'b0,  // memory model has been change to TSO only - bug 2588
+     1'b0,  // old red, - replaced by hpstate.red 
+     tlu_wsr_data_w[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO], 
+     1'b0};  // old AG - replaced by global register 
+
+assign wsr_data_pstate_g[`WSR_PSTATE_VR_WIDTH-1:0] = 
+       {tlu_wsr_data_w[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO],
+        tlu_wsr_data_w[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+*/
+assign wsr_data_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0] = 
+       {wsr_data_w2[`PSTATE_VRANGE2_HI:`PSTATE_VRANGE2_LO],
+        wsr_data_w2[`PSTATE_VRANGE1_HI:`PSTATE_VRANGE1_LO]};
+//
+// THREAD0
+// added for bug 1575
+// modified for bug 2584
+// assign tlu_pstate_nt_sel0 = ~|(tlu_pstate_din_sel0[1:0]);
+assign tlu_pstate_nt_sel0 = 
+          ~(tlu_pstate_din_sel0[0] | tlu_pstate_wsr_sel0);
+// 
+// modified for bug 3349
+assign tlu_pstate_wsr_sel0 = 
+           tlu_pstate_din_sel0[1] | 
+           (~(true_hpstate0[`HPSTATE_ENB] & wsr_data_w2[`WSR_HPSTATE_ENB]) &
+              tlu_hpstate_din_sel0[1]);
+//            (~true_hpstate0[`HPSTATE_ENB] & tlu_hpstate_din_sel0[1]);
+
+mux3ds #(`WSR_PSTATE_VR_WIDTH) mux_restore_pstate0(
+       .in0  (dnrtry_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]), 	
+	   .in1  (wsr_data_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]),
+	   .in2  (ntrap_pstate0[`WSR_PSTATE_VR_WIDTH-1:0]),
+       .sel0 (tlu_pstate_din_sel0[0]),  		
+	   .sel1 (tlu_pstate_wsr_sel0),
+	   .sel2 (tlu_pstate_nt_sel0),
+       .dout (restore_pstate0[`WSR_PSTATE_VR_WIDTH-1:0])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate0_w3 (
+    .din ({restore_pstate0[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate0[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate0_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate0_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .en (~(tlu_update_pstate_l_w2[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate0_w3 (
+    .din ({restore_pstate0[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate0[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate0_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate0_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .clk (pstate0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate0_priv (
+    .din (restore_pstate0[`WSR_PSTATE_VR_PRIV]),
+    .q   (restore_pstate0_w3[`WSR_PSTATE_VR_PRIV]),
+    .en  (pstate_priv_update_w2[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// true_pstate0 assignments
+assign true_pstate0[`PSTATE_TRUE_WIDTH-1:0] = 
+           {2'b0, // tlu_select_int_global - replaced by gl register
+                  // tlu_select_mmu_global - replaced by gl register 
+            restore_pstate0_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE2_LO],
+            2'b0, // fixed mmodel - TSO
+            1'b0, // redmode - replaced by hpstate.red
+            restore_pstate0_w3[`WSR_PSTATE_VRANGE1_HI:`WSR_PSTATE_VRANGE1_LO],
+            1'b0}; // tlu_select_alt_global - replaced by gl register 
+//
+// modified for timing
+/*
+mux3ds #(9) mux_restore_pstate0(
+       .in0  (dnrtry_pstate[`PSTATE_TRUE_WIDTH-3:1]), 	
+	   .in1  (wsr_data_pstate_g[`PSTATE_TRUE_WIDTH-3:1]),
+	   .in2  (ntrap_pstate0[`PSTATE_TRUE_WIDTH-3:1]),
+       .sel0 (tlu_pstate_din_sel0[0]),  		
+       // modified for bug 2584
+	   // .sel1 (tlu_pstate_din_sel0[1]),
+	   .sel1 (tlu_pstate_wsr_sel0),
+	   .sel2 (tlu_pstate_nt_sel0),
+       .dout (restore_pstate0[`PSTATE_TRUE_WIDTH-3:1])
+);
+
+dff_s #(`PSTATE_TRUE_WIDTH) pstate0_1 (
+    .din (restore_pstate0[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate0[`PSTATE_TRUE_WIDTH-1:0]),
+    .clk (pstate0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+ );
+//
+dff_s #(`PSTATE_TRUE_WIDTH-1) dff_true_pstate0 (
+    .din ({restore_pstate0[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate0[1:0]}), 
+    .q   ({true_pstate0[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate0[1:0]}), 
+    .clk (pstate0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+dffe_s dffe_pstate0_priv (
+    .din (restore_pstate0[`PSTATE_PRIV]),
+    .q   (true_pstate0[`PSTATE_PRIV]),
+    .en  (pstate_priv_update_g[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// modified for hypervisor support
+assign restore_pstate0[11:10] = 2'b0;
+assign restore_pstate0[0]     = 1'b0;
+//
+// modified to reflect the physical implementation
+// assign hpstate_dnrtry_priv_w2[0] = 
+           (true_hpstate0[`HPSTATE_ENB])? 
+            tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV] :
+            dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV];
+*/
+mux2ds mx_hpstate_dnrtry_priv_w2_0 (
+       .in0  (tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV]),
+	   .in1  (dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV]),
+       .sel0 (true_hpstate0[`HPSTATE_ENB]),  	
+	   .sel1 (~true_hpstate0[`HPSTATE_ENB]),
+       .dout (hpstate_dnrtry_priv_w2[0])
+); 
+//
+assign dnrtry_hpstate0_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {tsa_dnrtry_hpstate_w2[`TSA_HTSTATE_WIDTH-1],
+        true_hpstate0[`HPSTATE_ENB],  
+        tsa_dnrtry_hpstate_w2[`HPSTATE_RED],
+        hpstate_dnrtry_priv_w2[0],
+        tsa_dnrtry_hpstate_w2[`HPSTATE_TLZ]};
+
+// added for bug 3747
+assign hpstate_priv_set = ~(tlu_select_tba_w2) | tlu_select_redmode; 
+//
+// constructing the hpstate for hyper-privileged traps 
+//
+assign hntrap_hpstate0_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {hpstate_ibe_set[0],  
+        hpstate_enb_set[0],  
+        hpstate_redmode, // Redmode bit
+        // modified for bug 3747
+        hpstate_priv_set, // hyper-privileged bit
+        hpstate_tlz_set[0]}; // TLZ interrupt bit 
+
+assign tlu_hpstate_hnt_sel0 = 
+       ~(tlu_hpstate_din_sel0[0] | tlu_hpstate_wsr_sel0);
+//
+assign tlu_hpstate_wsr_sel0 = 
+           tlu_hpstate_din_sel0[1] | 
+           (~true_hpstate0[`HPSTATE_ENB] & tlu_pstate_din_sel0[1]);
+
+mux3ds #(`TLU_HPSTATE_WIDTH) mux_restore_hpstate0(
+       .in0  (dnrtry_hpstate0_w2[`TLU_HPSTATE_WIDTH-1:0]), 	
+	   .in1  (wsr_data_hpstate_w2[`TLU_HPSTATE_WIDTH-1:0]),
+	   .in2  (hntrap_hpstate0_w2[`TLU_HPSTATE_WIDTH-1:0]),
+       .sel0 (tlu_hpstate_din_sel0[0]),  		
+       .sel1 (tlu_hpstate_wsr_sel0),
+	   .sel2 (tlu_hpstate_hnt_sel0),
+       .dout (restore_hpstate0[`TLU_HPSTATE_WIDTH-1:0])
+);
+//
+// need to initialize hpstate.enb = 0
+// need to initialize hpstate.ibe = 0
+// modified due to the addition of hpstate.ibe
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(2) dffr_true_hpst0_enb_ibe (
+    .din (restore_hpstate0[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate0[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .en (~(tlu_update_hpstate_l_w2[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+ );
+`else
+dffr_s #(2) dffr_true_hpst0_enb_ibe (
+    .din (restore_hpstate0[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate0[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .clk (hpstate0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+ );
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(2) dff_true_hpstate0 (
+    .din ({restore_hpstate0[`HPSTATE_RED], 
+           restore_hpstate0[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate0[`HPSTATE_RED], 
+           true_hpstate0[`HPSTATE_TLZ]}),
+    .en (~(tlu_update_hpstate_l_w2[0])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(2) dff_true_hpstate0 (
+    .din ({restore_hpstate0[`HPSTATE_RED], 
+           restore_hpstate0[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate0[`HPSTATE_RED], 
+           true_hpstate0[`HPSTATE_TLZ]}),
+    .clk (hpstate0_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_hpstate0_priv (
+    .din (restore_hpstate0[`HPSTATE_PRIV]), 
+    .q   (true_hpstate0[`HPSTATE_PRIV]), 
+    .en  (hpstate_priv_update_w2[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_pstate_pef[0]   = true_pstate0[`PSTATE_PEF];
+assign tlu_lsu_pstate_cle[0]   = true_pstate0[`PSTATE_CLE];
+assign tlu_lsu_pstate_priv[0]  = true_pstate0[`PSTATE_PRIV];
+assign tlu_int_pstate_ie[0]    = true_pstate0[`PSTATE_IE];
+assign local_pstate_ie[0]      = true_pstate0[`PSTATE_IE];
+// assign tlu_pstate_cle[0] 	   = true_pstate0[`PSTATE_CLE];
+assign tlu_pstate_tle[0] 	   = true_pstate0[`PSTATE_TLE];
+// assign tlu_pstate_priv[0] 	   = true_pstate0[`PSTATE_PRIV];
+assign local_pstate_priv[0]    = true_pstate0[`PSTATE_PRIV];
+assign tlu_pstate_am[0] 	   = true_pstate0[`PSTATE_AM];
+assign tlu_int_redmode[0] = true_hpstate0[`HPSTATE_RED];
+assign tlu_lsu_redmode[0] = true_hpstate0[`HPSTATE_RED];
+// 
+// hypervisor privilege indicator
+assign tlu_hpstate_priv[0]   = true_hpstate0[`HPSTATE_PRIV];
+assign local_hpstate_priv[0] = true_hpstate0[`HPSTATE_PRIV];
+assign tcl_hpstate_priv[0]   = true_hpstate0[`HPSTATE_PRIV];
+//
+// hypervisor lite mode selector
+assign tlu_hpstate_enb[0]   = true_hpstate0[`HPSTATE_ENB];
+assign local_hpstate_enb[0] = true_hpstate0[`HPSTATE_ENB];
+assign tcl_hpstate_enb[0]   = true_hpstate0[`HPSTATE_ENB];
+
+// hypervisor tlz indicator
+assign tlu_hpstate_tlz[0] = true_hpstate0[`HPSTATE_TLZ];
+
+// hypervisor instruction breakpt enable 
+assign tlu_hpstate_ibe[0] = true_hpstate0[`HPSTATE_IBE];
+
+`ifdef FPGA_SYN_1THREAD
+   assign tlu_ifu_pstate_pef[3:1] = 3'b000;
+   assign tlu_lsu_pstate_cle[3:1] = 3'b000;
+   assign tlu_lsu_pstate_priv[3:1] = 3'b000;
+   assign tlu_int_pstate_ie[3:1] = 3'b000;
+   assign local_pstate_ie[3:1] = 3'b000;
+   assign tlu_pstate_tle[3:1] = 3'b000;
+   assign local_pstate_priv[3:1] = 3'b000;
+   assign tlu_pstate_am[3:1] = 3'b000;
+   assign tlu_int_redmode[3:1] = 3'b000;
+   assign tlu_lsu_redmode[3:1] = 3'b000;
+   assign tlu_hpstate_priv[3:1] = 3'b000;
+   assign local_hpstate_priv[3:1] = 3'b000;
+   assign tcl_hpstate_priv[3:1] = 3'b000;
+   assign tlu_hpstate_enb[3:1] = 3'b000;
+   assign local_hpstate_enb[3:1] = 3'b000;
+   assign tcl_hpstate_enb[3:1] = 3'b000;
+   assign tlu_hpstate_tlz[3:1] = 3'b000;
+   assign tlu_hpstate_ibe[3:1] = 3'b000;
+   
+`else
+   
+// THREAD 1
+assign tlu_pstate_nt_sel1 = 
+          ~(tlu_pstate_din_sel1[0] | tlu_pstate_wsr_sel1);
+//
+// modified for bug 3349
+assign tlu_pstate_wsr_sel1 = 
+              tlu_pstate_din_sel1[1] | 
+           (~(true_hpstate1[`HPSTATE_ENB] & wsr_data_w2[`WSR_HPSTATE_ENB]) &
+              tlu_hpstate_din_sel1[1]);
+//            (~true_hpstate1[`HPSTATE_ENB] & tlu_hpstate_din_sel1[1]);
+
+mux3ds #(`WSR_PSTATE_VR_WIDTH) mux_restore_pstate1(
+       .in0  (dnrtry_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]), 	
+	   .in1  (wsr_data_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]),
+	   .in2  (ntrap_pstate1[`WSR_PSTATE_VR_WIDTH-1:0]),
+       .sel0 (tlu_pstate_din_sel1[0]),  		
+	   .sel1 (tlu_pstate_wsr_sel1),
+	   .sel2 (tlu_pstate_nt_sel1),
+       .dout (restore_pstate1[`WSR_PSTATE_VR_WIDTH-1:0])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate1_w3 (
+    .din ({restore_pstate1[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate1[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate1_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate1_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .en (~(tlu_update_pstate_l_w2[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate1_w3 (
+    .din ({restore_pstate1[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate1[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate1_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate1_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .clk (pstate1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate1_priv (
+    .din (restore_pstate1[`WSR_PSTATE_VR_PRIV]),
+    .q   (restore_pstate1_w3[`WSR_PSTATE_VR_PRIV]),
+    .en  (pstate_priv_update_w2[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified to reflect the physical implementation
+/*
+assign hpstate_dnrtry_priv_w2[1] = 
+           (true_hpstate1[`HPSTATE_ENB])? 
+            tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV] :
+            dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV];
+*/
+mux2ds mx_hpstate_dnrtry_priv_w2_1 (
+       .in0  (tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV]),
+	   .in1  (dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV]),
+       .sel0 (true_hpstate1[`HPSTATE_ENB]),  	
+	   .sel1 (~true_hpstate1[`HPSTATE_ENB]),
+       .dout (hpstate_dnrtry_priv_w2[1])
+); 
+//
+assign dnrtry_hpstate1_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {tsa_dnrtry_hpstate_w2[`TSA_HTSTATE_WIDTH-1],
+        true_hpstate1[`HPSTATE_ENB],  
+        tsa_dnrtry_hpstate_w2[`HPSTATE_RED],
+        hpstate_dnrtry_priv_w2[1],
+        tsa_dnrtry_hpstate_w2[`HPSTATE_TLZ]};
+//
+// true_pstate1 assignments
+assign true_pstate1[`PSTATE_TRUE_WIDTH-1:0] = 
+           {2'b0, // tlu_select_int_global - replaced by gl register
+                  // tlu_select_mmu_global - replaced by gl register 
+            restore_pstate1_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE2_LO],
+            2'b0, // fixed mmodel - TSO
+            1'b0, // redmode - replaced by hpstate.red
+            restore_pstate1_w3[`WSR_PSTATE_VRANGE1_HI:`WSR_PSTATE_VRANGE1_LO],
+            1'b0}; // tlu_select_alt_global - replaced by gl register 
+//
+// modified for timing
+/*
+mux3ds #(9) mux_restore_pstate1(
+       .in0  (dnrtry_pstate[`PSTATE_TRUE_WIDTH-3:1]),
+	   .in1  (wsr_data_pstate_g[`PSTATE_TRUE_WIDTH-3:1]),
+	   .in2  (ntrap_pstate1[`PSTATE_TRUE_WIDTH-3:1]),
+       .sel0 (tlu_pstate_din_sel1[0]),  		
+       // modified for bug 2584
+	   // .sel1 (tlu_pstate_din_sel1[1]),
+	   .sel1 (tlu_pstate_wsr_sel1),
+	   .sel2 (tlu_pstate_nt_sel1),
+       .dout (restore_pstate1[`PSTATE_TRUE_WIDTH-3:1])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH) pstate1_1 (
+    .din (restore_pstate1[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate1[`PSTATE_TRUE_WIDTH-1:0]),
+    .en (~(tlu_update_pstate_l_w2[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+    );
+`else
+dff_s #(`PSTATE_TRUE_WIDTH) pstate1_1 (
+    .din (restore_pstate1[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate1[`PSTATE_TRUE_WIDTH-1:0]),
+    .clk (pstate1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+    );
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH-1) dff_true_pstate1 (
+    .din ({restore_pstate1[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate1[1:0]}), 
+    .q   ({true_pstate1[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate1[1:0]}), 
+    .en (~(tlu_update_pstate_l_w2[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`PSTATE_TRUE_WIDTH-1) dff_true_pstate1 (
+    .din ({restore_pstate1[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate1[1:0]}), 
+    .q   ({true_pstate1[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate1[1:0]}), 
+    .clk (pstate1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate1_priv (
+    .din (restore_pstate1[`PSTATE_PRIV]),
+    .q   (true_pstate1[`PSTATE_PRIV]), 
+    .en  (pstate_priv_update_g[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified for hypervisor support
+assign restore_pstate1[11:10] = 2'b0;
+assign restore_pstate1[0]     = 1'b0;
+*/
+//
+// constructing the hpstate for hyper-privileged traps 
+//
+assign hntrap_hpstate1_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {hpstate_ibe_set[1],  
+        hpstate_enb_set[1],  
+        hpstate_redmode,  // Redmode bit
+        hpstate_priv_set, // hyper-privileged bit
+        hpstate_tlz_set[1]}; // TLZ interrupt bit 
+//
+assign tlu_hpstate_hnt_sel1 = 
+       ~(tlu_hpstate_din_sel1[0] | tlu_hpstate_wsr_sel1);
+//
+assign tlu_hpstate_wsr_sel1 = 
+           tlu_hpstate_din_sel1[1] | 
+           (~true_hpstate1[`HPSTATE_ENB] & tlu_pstate_din_sel1[1]);
+
+mux3ds #(`TLU_HPSTATE_WIDTH) mux_restore_hpstate1 (
+       .in0  (dnrtry_hpstate1_w2[`TLU_HPSTATE_WIDTH-1:0]), 	
+	   .in1  (wsr_data_hpstate_w2[`TLU_HPSTATE_WIDTH-1:0]),
+	   .in2  (hntrap_hpstate1_w2[`TLU_HPSTATE_WIDTH-1:0]),
+       .sel0 (tlu_hpstate_din_sel1[0]),  		
+       .sel1 (tlu_hpstate_wsr_sel1),
+	   .sel2 (tlu_hpstate_hnt_sel1),
+       .dout (restore_hpstate1[`TLU_HPSTATE_WIDTH-1:0])
+);
+
+// need to initialize hpstate.enb = 0
+// need to initialize hpstate.ibe = 0
+// modified due to the addition of hpstate.ibe
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(2) dffr_true_hpst1_enb_ibe (
+    .din (restore_hpstate1[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate1[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .en (~(tlu_update_hpstate_l_w2[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(2) dffr_true_hpst1_enb_ibe (
+    .din (restore_hpstate1[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate1[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .clk (hpstate1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(2) dff_true_hpstate1 (
+    .din ({restore_hpstate1[`HPSTATE_RED], 
+           restore_hpstate1[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate1[`HPSTATE_RED], 
+           true_hpstate1[`HPSTATE_TLZ]}),
+    .en (~(tlu_update_hpstate_l_w2[1])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(2) dff_true_hpstate1 (
+    .din ({restore_hpstate1[`HPSTATE_RED], 
+           restore_hpstate1[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate1[`HPSTATE_RED], 
+           true_hpstate1[`HPSTATE_TLZ]}),
+    .clk (hpstate1_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_hpstate1_priv (
+    .din (restore_hpstate1[`HPSTATE_PRIV]), 
+    .q   (true_hpstate1[`HPSTATE_PRIV]), 
+    .en  (hpstate_priv_update_w2[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_pstate_pef[1]   = true_pstate1[`PSTATE_PEF];
+assign tlu_lsu_pstate_cle[1]   = true_pstate1[`PSTATE_CLE];
+assign tlu_lsu_pstate_priv[1]  = true_pstate1[`PSTATE_PRIV];
+assign tlu_int_pstate_ie[1]    = true_pstate1[`PSTATE_IE];
+assign local_pstate_ie[1]      = true_pstate1[`PSTATE_IE];
+// assign tlu_pstate_cle[1] 	   = true_pstate1[`PSTATE_CLE];
+assign tlu_pstate_tle[1] 	   = true_pstate1[`PSTATE_TLE];
+// assign tlu_pstate_priv[1] 	   = true_pstate1[`PSTATE_PRIV];
+assign local_pstate_priv[1]    = true_pstate1[`PSTATE_PRIV];
+assign tlu_pstate_am[1] 	   = true_pstate1[`PSTATE_AM];
+// assign tlu_pstate1_mmodel[1:0] = true_pstate1[`PSTATE_MM_HI:`PSTATE_MM_LO];
+//
+assign tlu_int_redmode[1] = true_hpstate1[`HPSTATE_RED];
+assign tlu_lsu_redmode[1] = true_hpstate1[`HPSTATE_RED];
+// 
+// hypervisor privilege indicator
+assign tlu_hpstate_priv[1]   = true_hpstate1[`HPSTATE_PRIV];
+assign local_hpstate_priv[1] = true_hpstate1[`HPSTATE_PRIV];
+assign tcl_hpstate_priv[1]   = true_hpstate1[`HPSTATE_PRIV];
+//
+// hypervisor lite mode selector
+assign tlu_hpstate_enb[1]   = true_hpstate1[`HPSTATE_ENB];
+assign local_hpstate_enb[1] = true_hpstate1[`HPSTATE_ENB];
+assign tcl_hpstate_enb[1]   = true_hpstate1[`HPSTATE_ENB];
+
+// hypervisor tlz indicator
+assign tlu_hpstate_tlz[1] = true_hpstate1[`HPSTATE_TLZ];
+
+// hypervisor instruction breakpt enable 
+assign tlu_hpstate_ibe[1] = true_hpstate1[`HPSTATE_IBE];
+
+// THREAD2
+// added for bug 1575
+// modified for bug 2584
+// assign tlu_pstate_nt_sel2 = ~|(tlu_pstate_din_sel2[1:0]);
+assign tlu_pstate_nt_sel2 = 
+          ~(tlu_pstate_din_sel2[0] | tlu_pstate_wsr_sel2);
+// 
+// modified for bug 3349
+assign tlu_pstate_wsr_sel2 = 
+           tlu_pstate_din_sel2[1] | 
+           (~(true_hpstate2[`HPSTATE_ENB] & wsr_data_w2[`WSR_HPSTATE_ENB]) &
+              tlu_hpstate_din_sel2[1]);
+//            (~true_hpstate2[`HPSTATE_ENB] & tlu_hpstate_din_sel2[1]);
+
+mux3ds #(`WSR_PSTATE_VR_WIDTH) mux_restore_pstate2(
+       .in0  (dnrtry_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]), 	
+	   .in1  (wsr_data_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]),
+	   .in2  (ntrap_pstate2[`WSR_PSTATE_VR_WIDTH-1:0]),
+       .sel0 (tlu_pstate_din_sel2[0]),  		
+	   .sel1 (tlu_pstate_wsr_sel2),
+	   .sel2 (tlu_pstate_nt_sel2),
+       .dout (restore_pstate2[`WSR_PSTATE_VR_WIDTH-1:0])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate2_w3 (
+    .din ({restore_pstate2[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate2[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate2_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate2_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .en (~(tlu_update_pstate_l_w2[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate2_w3 (
+    .din ({restore_pstate2[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate2[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate2_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate2_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .clk (pstate2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate2_priv (
+    .din (restore_pstate2[`WSR_PSTATE_VR_PRIV]),
+    .q   (restore_pstate2_w3[`WSR_PSTATE_VR_PRIV]),
+    .en  (pstate_priv_update_w2[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// true_pstate2 assignments
+assign true_pstate2[`PSTATE_TRUE_WIDTH-1:0] = 
+           {2'b0, // tlu_select_int_global - replaced by gl register
+                  // tlu_select_mmu_global - replaced by gl register 
+            restore_pstate2_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE2_LO],
+            2'b0, // fixed mmodel - TSO
+            1'b0, // redmode - replaced by hpstate.red
+            restore_pstate2_w3[`WSR_PSTATE_VRANGE1_HI:`WSR_PSTATE_VRANGE1_LO],
+            1'b0}; // tlu_select_alt_global - replaced by gl register 
+//
+// modified for timing
+/*
+mux3ds #(9) mux_restore_pstate2( 
+       .in0  (dnrtry_pstate[`PSTATE_TRUE_WIDTH-3:1]), 	
+	   .in1  (wsr_data_pstate_g[`PSTATE_TRUE_WIDTH-3:1]),
+	   .in2  (ntrap_pstate2[`PSTATE_TRUE_WIDTH-3:1]),
+       .sel0 (tlu_pstate_din_sel2[0]),  		
+       // modified for bug 2584
+	   // .sel1 (tlu_pstate_din_sel2[1]),
+	   .sel1 (tlu_pstate_wsr_sel2),
+	   .sel2 (tlu_pstate_nt_sel2),
+       .dout (restore_pstate2[`PSTATE_TRUE_WIDTH-3:1])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH) pstate2_1 (
+    .din (restore_pstate2[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate2[`PSTATE_TRUE_WIDTH-1:0]),
+    .en (~(tlu_update_pstate_l_w2[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`PSTATE_TRUE_WIDTH) pstate2_1 (
+    .din (restore_pstate2[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate2[`PSTATE_TRUE_WIDTH-1:0]),
+    .clk (pstate2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH-1) dff_true_pstate2 (
+    .din ({restore_pstate2[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate2[1:0]}), 
+    .q   ({true_pstate2[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate2[1:0]}), 
+    .en (~(tlu_update_pstate_l_w2[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`PSTATE_TRUE_WIDTH-1) dff_true_pstate2 (
+    .din ({restore_pstate2[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate2[1:0]}), 
+    .q   ({true_pstate2[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate2[1:0]}), 
+    .clk (pstate2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate2_priv (
+    .din (restore_pstate2[`PSTATE_PRIV]),
+    .q   (true_pstate2[`PSTATE_PRIV]), 
+    .en  (pstate_priv_update_g[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified for hypervisor support
+assign restore_pstate2[11:10] = 2'b0;
+assign restore_pstate2[0]     = 1'b0;
+// modified to reflect the physical implementation
+// restructing the hpstate for done/retry instructions
+//
+assign hpstate_dnrtry_priv_w2[2] = 
+           (true_hpstate2[`HPSTATE_ENB])? 
+            tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV] :
+            dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV];
+*/
+mux2ds mx_hpstate_dnrtry_priv_w2_2 (
+       .in0  (tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV]),
+	   .in1  (dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV]),
+       .sel0 (true_hpstate2[`HPSTATE_ENB]),  	
+	   .sel1 (~true_hpstate2[`HPSTATE_ENB]),
+       .dout (hpstate_dnrtry_priv_w2[2])
+); 
+//
+assign dnrtry_hpstate2_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {tsa_dnrtry_hpstate_w2[`TSA_HTSTATE_WIDTH-1],
+        true_hpstate2[`HPSTATE_ENB],  
+        tsa_dnrtry_hpstate_w2[`HPSTATE_RED],
+        hpstate_dnrtry_priv_w2[2],
+        tsa_dnrtry_hpstate_w2[`HPSTATE_TLZ]};
+//
+// constructing the hpstate for hyper-privileged traps 
+//
+assign hntrap_hpstate2_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {hpstate_ibe_set[2],  
+        hpstate_enb_set[2],  
+        hpstate_redmode,  // Redmode bit
+        hpstate_priv_set, // hyper-privileged bit
+        hpstate_tlz_set[2]}; // TLZ interrupt bit 
+//
+assign tlu_hpstate_hnt_sel2 = 
+       ~(tlu_hpstate_din_sel2[0] | tlu_hpstate_wsr_sel2);
+//
+assign tlu_hpstate_wsr_sel2 = 
+           tlu_hpstate_din_sel2[1] | 
+           (~true_hpstate2[`HPSTATE_ENB] & tlu_pstate_din_sel2[1]);
+
+mux3ds #(`TLU_HPSTATE_WIDTH) mux_restore_hpstate2 (
+       .in0  (dnrtry_hpstate2_w2[`TLU_HPSTATE_WIDTH-1:0]), 	
+	   .in1  (wsr_data_hpstate_w2[`TLU_HPSTATE_WIDTH-1:0]),
+	   .in2  (hntrap_hpstate2_w2[`TLU_HPSTATE_WIDTH-1:0]),
+       .sel0 (tlu_hpstate_din_sel2[0]),  		
+	   .sel1 (tlu_hpstate_wsr_sel2),
+	   .sel2 (tlu_hpstate_hnt_sel2),
+       .dout (restore_hpstate2[`TLU_HPSTATE_WIDTH-1:0])
+);
+//
+// need to initialize hpstate.enb = 0
+// need to initialize hpstate.ibe = 0
+// modified due to the addition of hpstate.ibe
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(2) dffr_true_hpst2_enb_ibe (
+    .din (restore_hpstate2[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate2[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .en (~(tlu_update_hpstate_l_w2[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(2) dffr_true_hpst2_enb_ibe (
+    .din (restore_hpstate2[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+	.q   (true_hpstate2[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .clk (hpstate2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(2) dff_true_hpstate2 (
+    .din ({restore_hpstate2[`HPSTATE_RED], 
+           restore_hpstate2[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate2[`HPSTATE_RED], 
+           true_hpstate2[`HPSTATE_TLZ]}),
+    .en (~(tlu_update_hpstate_l_w2[2])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(2) dff_true_hpstate2 (
+    .din ({restore_hpstate2[`HPSTATE_RED], 
+           restore_hpstate2[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate2[`HPSTATE_RED], 
+           true_hpstate2[`HPSTATE_TLZ]}),
+    .clk (hpstate2_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_hpstate2_priv (
+    .din (restore_hpstate2[`HPSTATE_PRIV]), 
+    .q   (true_hpstate2[`HPSTATE_PRIV]), 
+    .en  (hpstate_priv_update_w2[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_pstate_pef[2]   = true_pstate2[`PSTATE_PEF];
+assign tlu_lsu_pstate_cle[2]   = true_pstate2[`PSTATE_CLE];
+assign tlu_lsu_pstate_priv[2]  = true_pstate2[`PSTATE_PRIV];
+assign tlu_int_pstate_ie[2]    = true_pstate2[`PSTATE_IE];
+assign local_pstate_ie[2]      = true_pstate2[`PSTATE_IE];
+// assign tlu_pstate_cle[2] 	   = true_pstate2[`PSTATE_CLE];
+assign tlu_pstate_tle[2] 	   = true_pstate2[`PSTATE_TLE];
+// assign tlu_pstate_priv[2] 	   = true_pstate2[`PSTATE_PRIV];
+assign local_pstate_priv[2]    = true_pstate2[`PSTATE_PRIV];
+assign tlu_pstate_am[2] 	   = true_pstate2[`PSTATE_AM];
+// assign tlu_pstate2_mmodel[1:0] = true_pstate2[`PSTATE_MM_HI:`PSTATE_MM_LO];
+//
+// modified for hypervisor support
+// assign	tlu_int_redmode[2]	= true_pstate2[`PSTATE_RED];
+assign tlu_int_redmode[2] = true_hpstate2[`HPSTATE_RED];
+assign tlu_lsu_redmode[2] = true_hpstate2[`HPSTATE_RED];
+// 
+// hypervisor privilege indicator
+assign tlu_hpstate_priv[2]   = true_hpstate2[`HPSTATE_PRIV];
+assign local_hpstate_priv[2] = true_hpstate2[`HPSTATE_PRIV];
+assign tcl_hpstate_priv[2]   = true_hpstate2[`HPSTATE_PRIV];
+//
+// hypervisor lite mode selector
+assign tlu_hpstate_enb[2]   = true_hpstate2[`HPSTATE_ENB];
+assign local_hpstate_enb[2] = true_hpstate2[`HPSTATE_ENB];
+assign tcl_hpstate_enb[2]   = true_hpstate2[`HPSTATE_ENB];
+
+// hypervisor tlz indicator
+assign tlu_hpstate_tlz[2] = true_hpstate2[`HPSTATE_TLZ];
+
+// hypervisor instruction breakpt enable 
+assign tlu_hpstate_ibe[2] = true_hpstate2[`HPSTATE_IBE];
+
+// THREAD3
+// added for bug 1575
+// modified for bug 2584
+// assign tlu_pstate_nt_sel3 = ~|(tlu_pstate_din_sel3[1:0]);
+assign tlu_pstate_nt_sel3 = 
+          ~(tlu_pstate_din_sel3[0] | tlu_pstate_wsr_sel3);
+//
+// modified for bug 3349
+assign tlu_pstate_wsr_sel3 = 
+           tlu_pstate_din_sel3[1] | 
+           (~(true_hpstate3[`HPSTATE_ENB] & wsr_data_w2[`WSR_HPSTATE_ENB]) &
+              tlu_hpstate_din_sel3[1]);
+//            (~true_hpstate3[`HPSTATE_ENB] & tlu_hpstate_din_sel3[1]);
+//
+mux3ds #(`WSR_PSTATE_VR_WIDTH) mux_restore_pstate3(
+       .in0  (dnrtry_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]), 	
+	   .in1  (wsr_data_pstate_w2[`WSR_PSTATE_VR_WIDTH-1:0]),
+	   .in2  (ntrap_pstate3[`WSR_PSTATE_VR_WIDTH-1:0]),
+       .sel0 (tlu_pstate_din_sel3[0]),  		
+	   .sel1 (tlu_pstate_wsr_sel3),
+	   .sel2 (tlu_pstate_nt_sel3),
+       .dout (restore_pstate3[`WSR_PSTATE_VR_WIDTH-1:0])
+);
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate3_w3 (
+    .din ({restore_pstate3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate3[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate3_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate3_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .en (~(tlu_update_pstate_l_w2[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`WSR_PSTATE_VR_WIDTH-1) dff_restore_pstate3_w3 (
+    .din ({restore_pstate3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1], 
+           restore_pstate3[`WSR_PSTATE_VRANGE1_LO]}), 
+    .q   ({restore_pstate3_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE1_HI-1],
+           restore_pstate3_w3[`WSR_PSTATE_VRANGE1_LO]}),
+    .clk (pstate3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate3_priv (
+    .din (restore_pstate3[`WSR_PSTATE_VR_PRIV]),
+    .q   (restore_pstate3_w3[`WSR_PSTATE_VR_PRIV]),
+    .en  (pstate_priv_update_w2[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// true_pstate3 assignments
+assign true_pstate3[`PSTATE_TRUE_WIDTH-1:0] = 
+           {2'b0, // tlu_select_int_global - replaced by gl register
+                  // tlu_select_mmu_global - replaced by gl register 
+            restore_pstate3_w3[`WSR_PSTATE_VRANGE2_HI:`WSR_PSTATE_VRANGE2_LO],
+            2'b0, // fixed mmodel - TSO
+            1'b0, // redmode - replaced by hpstate.red
+            restore_pstate3_w3[`WSR_PSTATE_VRANGE1_HI:`WSR_PSTATE_VRANGE1_LO],
+            1'b0}; // tlu_select_alt_global - replaced by gl register 
+//
+// modified for timing
+/*
+mux3ds #(9) mux_restore_pstate3(
+       .in0  (dnrtry_pstate[`PSTATE_TRUE_WIDTH-3:1]), 	
+	   .in1  (wsr_data_pstate_g[`PSTATE_TRUE_WIDTH-3:1]),
+	   .in2  (ntrap_pstate3[`PSTATE_TRUE_WIDTH-3:1]),
+       .sel0 (tlu_pstate_din_sel3[0]),  		
+       // modified for bug 2584
+	   // .sel1 (tlu_pstate_din_sel3[1]),
+	   .sel1 (tlu_pstate_wsr_sel3),
+	   .sel2 (tlu_pstate_nt_sel3),
+       .dout (restore_pstate3[`PSTATE_TRUE_WIDTH-3:1])
+);
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH) pstate3_1 (
+    .din (restore_pstate3[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate3[`PSTATE_TRUE_WIDTH-1:0]),
+    .en (~(tlu_update_pstate_l_w2[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`PSTATE_TRUE_WIDTH) pstate3_1 (
+    .din (restore_pstate3[`PSTATE_TRUE_WIDTH-1:0]), 
+	.q   (true_pstate3[`PSTATE_TRUE_WIDTH-1:0]),
+    .clk (pstate3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(`PSTATE_TRUE_WIDTH-1) pstate3_1 (
+    .din ({restore_pstate3[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate3[1:0]}), 
+    .q   ({true_pstate3[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate3[1:0]}), 
+    .en (~(tlu_update_pstate_l_w2[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(`PSTATE_TRUE_WIDTH-1) pstate3_1 (
+    .din ({restore_pstate3[`PSTATE_TRUE_WIDTH-1:3], 
+           restore_pstate3[1:0]}), 
+    .q   ({true_pstate3[`PSTATE_TRUE_WIDTH-1:3], 
+           true_pstate3[1:0]}), 
+    .clk (pstate3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_pstate3_priv (
+    .din (restore_pstate3[`PSTATE_PRIV]),
+    .q   (true_pstate3[`PSTATE_PRIV]),
+    .en  (pstate_priv_update_g[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified for hypervisor support
+assign restore_pstate3[11:10] = 2'b0;
+assign restore_pstate3[0]     = 1'b0;
+//
+// modified to reflect the physical implementation
+assign hpstate_dnrtry_priv_w2[3] = 
+           (true_hpstate3[`HPSTATE_ENB])? 
+            tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV] :
+            dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV];
+*/
+mux2ds mx_hpstate_dnrtry_priv_w2_3 (
+       .in0  (tsa_dnrtry_hpstate_w2[`HPSTATE_PRIV]),
+	   .in1  (dnrtry_pstate_w2[`WSR_PSTATE_VR_PRIV]),
+       .sel0 (true_hpstate3[`HPSTATE_ENB]),  	
+	   .sel1 (~true_hpstate3[`HPSTATE_ENB]),
+       .dout (hpstate_dnrtry_priv_w2[3])
+); 
+//
+assign dnrtry_hpstate3_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {tsa_dnrtry_hpstate_w2[`TSA_HTSTATE_WIDTH-1],
+        true_hpstate3[`HPSTATE_ENB],  
+        tsa_dnrtry_hpstate_w2[`HPSTATE_RED],
+        hpstate_dnrtry_priv_w2[3],
+        tsa_dnrtry_hpstate_w2[`HPSTATE_TLZ]};
+//
+// constructing the hpstate for hyper-privileged traps 
+//
+assign hntrap_hpstate3_w2[`TLU_HPSTATE_WIDTH-1:0] = 
+       {hpstate_ibe_set[3],  
+        hpstate_enb_set[3],  
+        hpstate_redmode,  // Redmode bit
+        hpstate_priv_set, // hyper-privileged bit
+        hpstate_tlz_set[3]}; // TLZ interrupt bit 
+
+assign tlu_hpstate_hnt_sel3 = 
+       ~(tlu_hpstate_din_sel3[0] | tlu_hpstate_wsr_sel3);
+//
+assign tlu_hpstate_wsr_sel3 = 
+           tlu_hpstate_din_sel3[1] | 
+           (~true_hpstate3[`HPSTATE_ENB] & tlu_pstate_din_sel3[1]);
+
+mux3ds #(`TLU_HPSTATE_WIDTH) mux_restore_hpstate3 (
+       .in0  (dnrtry_hpstate3_w2[`TLU_HPSTATE_WIDTH-1:0]),
+	   .in1  (wsr_data_hpstate_w2[`TLU_HPSTATE_WIDTH-1:0]),
+	   .in2  (hntrap_hpstate3_w2[`TLU_HPSTATE_WIDTH-1:0]),
+       .sel0 (tlu_hpstate_din_sel3[0]),  		
+	   .sel1 (tlu_hpstate_wsr_sel3),
+	   .sel2 (tlu_hpstate_hnt_sel3),
+       .dout (restore_hpstate3[`TLU_HPSTATE_WIDTH-1:0])
+);
+//
+// need to initialize hpstate.enb = 0
+// need to initialize hpstate.ibe = 0
+// modified due to the addition of hpstate.ibe
+`ifdef FPGA_SYN_CLK_DFF
+dffre_s #(2) dffr_true_hpst3_enb_ibe (
+    .din (restore_hpstate3[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]), 
+	.q   (true_hpstate3[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .en (~(tlu_update_hpstate_l_w2[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dffr_s #(2) dffr_true_hpst3_enb_ibe (
+    .din (restore_hpstate3[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]), 
+	.q   (true_hpstate3[`TLU_HPSTATE_WIDTH-1:`TLU_HPSTATE_WIDTH-2]),
+    .rst (local_rst),
+    .clk (hpstate3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+//
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(2) dff_true_hpstate3 (
+    .din ({restore_hpstate3[`HPSTATE_RED], 
+           restore_hpstate3[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate3[`HPSTATE_RED], 
+           true_hpstate3[`HPSTATE_TLZ]}),
+    .en (~(tlu_update_hpstate_l_w2[3])), .clk(clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`else
+dff_s #(2) dff_true_hpstate3 (
+    .din ({restore_hpstate3[`HPSTATE_RED], 
+           restore_hpstate3[`HPSTATE_TLZ]}),
+    .q   ({true_hpstate3[`HPSTATE_RED], 
+           true_hpstate3[`HPSTATE_TLZ]}),
+    .clk (hpstate3_clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+`endif
+//
+dffe_s dffe_hpstate3_priv (
+    .din (restore_hpstate3[`HPSTATE_PRIV]), 
+    .q   (true_hpstate3[`HPSTATE_PRIV]), 
+    .en  (hpstate_priv_update_w2[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_pstate_pef[3]   = true_pstate3[`PSTATE_PEF];
+assign tlu_lsu_pstate_cle[3]   = true_pstate3[`PSTATE_CLE];
+assign tlu_lsu_pstate_priv[3]  = true_pstate3[`PSTATE_PRIV];
+assign tlu_int_pstate_ie[3]    = true_pstate3[`PSTATE_IE];
+assign local_pstate_ie[3]      = true_pstate3[`PSTATE_IE];
+// assign tlu_pstate_cle[3] 	   = true_pstate3[`PSTATE_CLE];
+assign tlu_pstate_tle[3] 	   = true_pstate3[`PSTATE_TLE];
+// assign tlu_pstate_priv[3] 	   = true_pstate3[`PSTATE_PRIV];
+assign local_pstate_priv[3]    = true_pstate3[`PSTATE_PRIV];
+assign tlu_pstate_am[3] 	   = true_pstate3[`PSTATE_AM];
+// assign tlu_pstate3_mmodel[1:0] = true_pstate3[`PSTATE_MM_HI:`PSTATE_MM_LO];
+//
+// modified for hypervisor support
+// assign	tlu_int_redmode[3]	= true_pstate3[`PSTATE_RED];
+assign tlu_int_redmode[3] = true_hpstate3[`HPSTATE_RED];
+assign tlu_lsu_redmode[3] = true_hpstate3[`HPSTATE_RED];
+// 
+// hypervisor privilege indicator
+assign tlu_hpstate_priv[3]   = true_hpstate3[`HPSTATE_PRIV];
+assign local_hpstate_priv[3] = true_hpstate3[`HPSTATE_PRIV];
+assign tcl_hpstate_priv[3]   = true_hpstate3[`HPSTATE_PRIV];
+//
+// hypervisor lite mode selector
+assign tlu_hpstate_enb[3]   = true_hpstate3[`HPSTATE_ENB];
+assign local_hpstate_enb[3] = true_hpstate3[`HPSTATE_ENB];
+assign tcl_hpstate_enb[3]   = true_hpstate3[`HPSTATE_ENB];
+
+// hypervisor tlz indicator
+assign tlu_hpstate_tlz[3] = true_hpstate3[`HPSTATE_TLZ];
+
+// hypervisor instruction breakpt enable 
+assign tlu_hpstate_ibe[3] = true_hpstate3[`HPSTATE_IBE];
+
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+// Mux to choose the pstate register to read base on thread
+wire [`PSTATE_TRUE_WIDTH-1:0] pstate_rdata;
+wire [`RDSR_HPSTATE_WIDTH-1:0] hpstate_rdata;
+
+`ifdef FPGA_SYN_1THREAD
+   assign       pstate_rdata[`PSTATE_TRUE_WIDTH-1:0] = true_pstate0[`PSTATE_TRUE_WIDTH-1:0];
+   assign 	true_hpstate[`TLU_HPSTATE_WIDTH-1:0] = true_hpstate0[`TLU_HPSTATE_WIDTH-1:0];
+`else
+   
+mux4ds #(12) pstate_mx_sel (
+       .in0  (true_pstate0[`PSTATE_TRUE_WIDTH-1:0]),
+       .in1  (true_pstate1[`PSTATE_TRUE_WIDTH-1:0]),
+       .in2  (true_pstate2[`PSTATE_TRUE_WIDTH-1:0]),
+       .in3  (true_pstate3[`PSTATE_TRUE_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (pstate_rdata[`PSTATE_TRUE_WIDTH-1:0])
+); 
+//
+// added for hypervisor support 
+// mux to choose the pstate register to read base on thread
+
+mux4ds #(`TLU_HPSTATE_WIDTH) hpstate_mx_sel (
+       .in0  (true_hpstate0[`TLU_HPSTATE_WIDTH-1:0]),
+       .in1  (true_hpstate1[`TLU_HPSTATE_WIDTH-1:0]),
+       .in2  (true_hpstate2[`TLU_HPSTATE_WIDTH-1:0]),
+       .in3  (true_hpstate3[`TLU_HPSTATE_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (true_hpstate[`TLU_HPSTATE_WIDTH-1:0])
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+// 
+// assigned the stored hpstate bits to the ASR positions
+//
+assign hpstate_rdata[`WSR_HPSTATE_ENB]  = true_hpstate[`HPSTATE_ENB]; 
+assign hpstate_rdata[`WSR_HPSTATE_IBE]  = true_hpstate[`HPSTATE_IBE]; 
+assign hpstate_rdata[`WSR_HPSTATE_RED]  = true_hpstate[`HPSTATE_RED]; 
+assign hpstate_rdata[`WSR_HPSTATE_PRIV] = true_hpstate[`HPSTATE_PRIV]; 
+assign hpstate_rdata[`WSR_HPSTATE_TLZ]  = true_hpstate[`HPSTATE_TLZ]; 
+//
+// grounding the reserved bits
+// modified due to the addition of hpstate.ibe 
+// assign hpstate_rdata[`WSR_HPSTATE_ENB-1 :`WSR_HPSTATE_RED+1]  = 5'h00; 
+assign hpstate_rdata[`WSR_HPSTATE_IBE-1 :`WSR_HPSTATE_RED+1]  = 4'h0; 
+assign hpstate_rdata[`WSR_HPSTATE_RED-1 :`WSR_HPSTATE_PRIV+1] = 2'b00; 
+assign hpstate_rdata[`WSR_HPSTATE_PRIV-1:`WSR_HPSTATE_TLZ+1]  = 1'b0; 
+//
+// constructing data for htstate
+//
+wire [`RDSR_HPSTATE_WIDTH-1:0] htstate_rdata;
+
+// assign htstate_rdata[`WSR_HPSTATE_RED]  = tsa_rdata[`TLU_HTSTATE_HI]; 
+// assign htstate_rdata[`WSR_HPSTATE_PRIV] = tsa_rdata[`TLU_HTSTATE_HI-1]; 
+/* modified due to logic redistribution
+assign htstate_rdata[`WSR_HPSTATE_IBE]  = tsa_rdata[`TLU_HTSTATE_HI]; 
+assign htstate_rdata[`WSR_HPSTATE_RED]  = tsa_rdata[`TLU_HTSTATE_HI-1]; 
+assign htstate_rdata[`WSR_HPSTATE_PRIV] = tsa_rdata[`TLU_HTSTATE_HI-2]; 
+assign htstate_rdata[`WSR_HPSTATE_TLZ]  = tsa_rdata[`TLU_HTSTATE_LO]; 
+*/
+assign htstate_rdata[`WSR_HPSTATE_IBE]  = tsa_rdata[`TLU_RD_HTSTATE_HI]; 
+assign htstate_rdata[`WSR_HPSTATE_RED]  = tsa_rdata[`TLU_RD_HTSTATE_HI-1]; 
+assign htstate_rdata[`WSR_HPSTATE_PRIV] = tsa_rdata[`TLU_RD_HTSTATE_HI-2]; 
+assign htstate_rdata[`WSR_HPSTATE_TLZ]  = tsa_rdata[`TLU_RD_HTSTATE_LO]; 
+//
+// grounding the reserved bits
+// modified due to addition of hpstate.ibe
+// assign htstate_rdata[`RDSR_HPSTATE_WIDTH-1 :`WSR_HPSTATE_RED+1] = 6'h00; 
+assign htstate_rdata[`RDSR_HPSTATE_WIDTH-1] = 1'b0; 
+assign htstate_rdata[`WSR_HPSTATE_IBE-1 :`WSR_HPSTATE_RED+1]  = 4'h0; 
+assign htstate_rdata[`WSR_HPSTATE_RED-1 :`WSR_HPSTATE_PRIV+1] = 2'b00; 
+assign htstate_rdata[`WSR_HPSTATE_PRIV-1:`WSR_HPSTATE_TLZ+1]  = 1'b0; 
+
+//=========================================================================================
+//	RDPR - This section has been recoded due to timing
+//=========================================================================================
+
+// mux data width - 2b
+`ifdef FPGA_SYN_1THREAD
+   assign global_rdata[`TSA_GLOBAL_WIDTH-1:0] = tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0];
+   assign hintp_rdata = tlu_hintp[0];
+`else
+   
+			 
+mux4ds #(`TSA_GLOBAL_WIDTH) mux_global_rdata (
+       .in0  (tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0]),
+       .in1  (tlu_gl_lvl1[`TSA_GLOBAL_WIDTH-1:0]),
+       .in2  (tlu_gl_lvl2[`TSA_GLOBAL_WIDTH-1:0]),
+       .in3  (tlu_gl_lvl3[`TSA_GLOBAL_WIDTH-1:0]),
+       .sel0 (tlu_thrd_rsel_e[0]),
+       .sel1 (tlu_thrd_rsel_e[1]),
+       .sel2 (tlu_thrd_rsel_e[2]),
+       .sel3 (tlu_thrd_rsel_e[3]),
+       .dout (global_rdata[`TSA_GLOBAL_WIDTH-1:0])
+);
+// 
+// htickcmp interrupt enable
+//
+mux4ds #(1) mux_hintp_rdata (
+        .in0    (tlu_hintp[0]),
+        .in1    (tlu_hintp[1]),
+        .in2    (tlu_hintp[2]),
+        .in3    (tlu_hintp[3]),
+        .sel0   (tlu_thrd_rsel_e[0]),
+        .sel1   (tlu_thrd_rsel_e[1]),
+        .sel2   (tlu_thrd_rsel_e[2]),
+        .sel3   (tlu_thrd_rsel_e[3]),
+        .dout   (hintp_rdata)
+);
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+// 
+// tstate.gl - 2b
+assign tstate_rdata[`WSR_TSTATE_GL_HI:`WSR_TSTATE_GL_LO] = 
+       tsa_rdata[`TLU_GL_HI:`TLU_GL_LO];
+//
+// tstate.ccr - 8b
+assign tstate_rdata[`WSR_TSTATE_CCR_HI:`WSR_TSTATE_CCR_LO] = 
+       tsa_rdata[`TLU_CCR_HI:`TLU_CCR_LO];
+//
+// tstate.asi - 8b
+assign tstate_rdata[`WSR_TSTATE_ASI_HI:`WSR_TSTATE_ASI_LO] = 
+       tsa_rdata[`TLU_ASI_HI:`TLU_ASI_LO];
+//
+// tstate.pstate(valid range 2) - 2b
+assign tstate_rdata[`WSR_TSTATE_PS2_HI:`WSR_TSTATE_PS2_LO] = 
+       tsa_rdata[`TSA_PSTATE_VRANGE2_HI:`TSA_PSTATE_VRANGE2_LO];
+// 
+// added for to please lint 
+assign tstate_dummy_zero[1:0] = 
+       tsa_rdata[`TSA_PSTATE_VRANGE2_LO-1:`TSA_PSTATE_VRANGE1_HI+1] & 2'b0; 
+//
+// tstate.pstate(valid range 1) - 4b
+assign tstate_rdata[`WSR_TSTATE_PS1_HI:`WSR_TSTATE_PS1_LO] = 
+       tsa_rdata[`TSA_PSTATE_VRANGE1_HI:`TSA_PSTATE_VRANGE1_LO];
+//
+// tstate.cwp - 3b
+assign tstate_rdata[`WSR_TSTATE_CWP_HI:`WSR_TSTATE_CWP_LO] = 
+       tsa_rdata[`TLU_CWP_HI:`TLU_CWP_LO];
+//
+// reserved bits with ASR - assign to  1'b0
+assign tstate_rdata[`RDSR_TSTATE_WIDTH-1:`WSR_TSTATE_GL_HI+1] = 
+       6'h00; 
+assign tstate_rdata[`WSR_TSTATE_ASI_LO-1:`WSR_TSTATE_PS2_HI+1] = 
+       6'h00; 
+assign tstate_rdata[`WSR_TSTATE_PS2_LO-1:`WSR_TSTATE_PS1_HI+1] = 
+       {1'b0, tstate_dummy_zero[1:0]}; 
+assign tstate_rdata[`WSR_TSTATE_PS1_LO-1:`WSR_TSTATE_CWP_HI+1] = 
+       6'h00; 
+//
+//============================================================================
+// new rdpr mux coding due to timing changes 
+//============================================================================
+//
+// added for bug 2332
+assign rdpr_mx1_onehot_sel = 
+           ~(|tlu_rdpr_mx1_sel[3:1]);
+// mux1- 64b
+mux4ds #(`TLU_ASR_DATA_WIDTH) rdpr_mx1(
+	.in0({tlu_tick_npt,true_tick[60:0], 2'b0}),
+	.in1(tickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in2(stickcmp_rdata[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in3({tlu_htickcmp_intdis,htickcmp_rdata[`TLU_ASR_DATA_WIDTH-2:0]}),
+	.sel0(rdpr_mx1_onehot_sel),
+	.sel1(tlu_rdpr_mx1_sel[1]),
+	.sel2(tlu_rdpr_mx1_sel[2]),
+	.sel3(tlu_rdpr_mx1_sel[3]),
+	.dout(tlu_rdpr_mx1_out[`TLU_ASR_DATA_WIDTH-1:0])
+);
+// 
+//
+// added for bug 2332
+assign rdpr_mx2_onehot_sel = 
+           ~(|tlu_rdpr_mx2_sel[3:1]); 
+//
+// mux2 - 4b 
+mux4ds #(4) rdpr_mx2(
+	.in0({2'b0,global_rdata[`TSA_GLOBAL_WIDTH-1:0]}),
+	.in1({3'b0,hintp_rdata}),
+	.in2({1'b0,tlu_trp_lvl[2:0]}),
+	.in3(tlu_pil[3:0]),
+	.sel0(rdpr_mx2_onehot_sel),
+	.sel1(tlu_rdpr_mx2_sel[1]),
+	.sel2(tlu_rdpr_mx2_sel[2]),
+	.sel3(tlu_rdpr_mx2_sel[3]),
+	.dout(tlu_rdpr_mx2_out[3:0])
+);
+//
+// added for bug 2332
+assign rdpr_mx3_onehot_sel = 
+           ~(|tlu_rdpr_mx3_sel[2:1]);
+//
+// mux3 - 17b
+mux3ds #(`SFTINT_WIDTH) rdpr_mx3(
+	.in0(sftint_rdata[`SFTINT_WIDTH-1:0]),
+	.in1({5'b0,pstate_rdata[`PSTATE_TRUE_WIDTH-1:0]}),
+	.in2({5'b0,hpstate_rdata[`RDSR_HPSTATE_WIDTH-1:0]}),
+	.sel0(rdpr_mx3_onehot_sel),
+	.sel1(tlu_rdpr_mx3_sel[1]),
+	.sel2(tlu_rdpr_mx3_sel[2]),
+	.dout(tlu_rdpr_mx3_out[`SFTINT_WIDTH-1:0])
+);
+//
+// added for bug 2332
+assign rdpr_mx4_onehot_sel = 
+           ~(|tlu_rdpr_mx4_sel[2:1]);
+//
+// mux4 - 48b 
+mux3ds #(`RDSR_TSTATE_WIDTH) rdpr_mx4(
+	.in0({tsa_rdata[`TLU_RD_PC_HI:`TLU_RD_PC_LO],2'b00}),
+	.in1({tsa_rdata[`TLU_RD_NPC_HI:`TLU_NPC_LO],2'b00}),
+	// .in0({tsa_rdata[`TLU_PC_HI-1:`TLU_PC_LO],2'b00}),
+	// .in1({tsa_rdata[`TLU_NPC_HI-1:`TLU_NPC_LO],2'b00}),
+    .in2(tstate_rdata[`RDSR_TSTATE_WIDTH-1:0]),
+	.sel0(rdpr_mx4_onehot_sel),
+	.sel1(tlu_rdpr_mx4_sel[1]), 
+	.sel2(tlu_rdpr_mx4_sel[2]), 
+	.dout(tlu_rdpr_mx4_out[`RDSR_TSTATE_WIDTH-1:0])
+);
+//
+// added for bug 2332
+assign rdpr_mx5_onehot_sel = 
+           ~(|tlu_rdpr_mx5_sel[3:1]);
+//
+// mux5 - 64b 
+mux4ds #(`TLU_ASR_DATA_WIDTH) rdpr_mx5(
+	.in0({{16{tba_rdata[`TLU_TBA_WIDTH-1]}},
+           tba_rdata[`TLU_TBA_WIDTH-1:0],15'h0000}),
+	.in1({{16{htba_rdata[`TLU_HTBA_WIDTH-1]}},
+           htba_rdata[`TLU_HTBA_WIDTH-1:0],14'h0000}),
+	.in2(tlu_rdpr_mx1_out[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in3(tlu_pib_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0]),
+	.sel0(rdpr_mx5_onehot_sel),
+	.sel1(tlu_rdpr_mx5_sel[1]),
+	.sel2(tlu_rdpr_mx5_sel[2]),
+	.sel3(tlu_rdpr_mx5_sel[3]),
+	.dout(tlu_rdpr_mx5_out[`TLU_ASR_DATA_WIDTH-1:0])
+);
+//
+// added for bug 2332
+assign rdpr_mx6_onehot_sel = 
+           ~(|tlu_rdpr_mx6_sel[2:0]);
+//
+// mux6 - 12b 
+mux4ds #(`SFTINT_WIDTH) rdpr_mx6(
+	.in0({8'b0,tsa_rdata[8:0]}),  // ttype
+	.in1({5'b0,htstate_rdata[`RDSR_HPSTATE_WIDTH-1:0]}),
+	.in2({13'b0,tlu_rdpr_mx2_out[3:0]}),
+	.in3({tlu_rdpr_mx3_out[`SFTINT_WIDTH-1:0]}),
+	.sel0(rdpr_mx6_onehot_sel),
+	.sel1(tlu_rdpr_mx6_sel[0]),
+	.sel2(tlu_rdpr_mx6_sel[1]),
+	.sel3(tlu_rdpr_mx6_sel[2]),
+	.dout(tlu_rdpr_mx6_out[`SFTINT_WIDTH-1:0])
+);
+//
+// mux7- 64b
+mux4ds #(`TLU_ASR_DATA_WIDTH) rdpr_mx7(
+	.in0({{16{tlu_rdpr_mx4_out[`RDSR_TSTATE_WIDTH-1]}}, 
+           tlu_rdpr_mx4_out[`RDSR_TSTATE_WIDTH-1:0]}),
+	.in1(tlu_rdpr_mx5_out[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in2({47'b0,tlu_rdpr_mx6_out[`SFTINT_WIDTH-1:0]}),
+	.in3({56'b0,lsu_tlu_rsr_data_e[7:0]}),
+	.sel0(tlu_rdpr_mx7_sel[0]),
+	.sel1(tlu_rdpr_mx7_sel[1]),
+	.sel2(tlu_rdpr_mx7_sel[2]),
+	.sel3(tlu_rdpr_mx7_sel[3]),
+	.dout(tlu_rdpr_mx7_out[`TLU_ASR_DATA_WIDTH-1:0])
+);
+/*
+mux4ds #(`TLU_ASR_DATA_WIDTH) rdpr_mx7(
+	.in0({{16{tlu_rdpr_mx4_out[`RDSR_TSTATE_WIDTH-1]}}, 
+           tlu_rdpr_mx4_out[`RDSR_TSTATE_WIDTH-1:0]}),
+	.in1(tlu_rdpr_mx5_out[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in2({47'b0,tlu_rdpr_mx6_out[`SFTINT_WIDTH-1:0]}),
+	.in3({56'b0,lsu_tlu_rsr_data_e[7:0]}),
+	.sel0(tlu_rdpr_mx7_sel[0]),
+	.sel1(tlu_rdpr_mx7_sel[1]),
+	.sel2(tlu_rdpr_mx7_sel[2]),
+	.sel3(tlu_rdpr_mx7_sel[3]),
+	.dout(tlu_rdpr_mx7_out[`TLU_ASR_DATA_WIDTH-1:0])
+);
+*/
+//
+// drive rsr data to exu
+assign tlu_exu_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0] = 
+           tlu_rdpr_mx7_out[`TLU_ASR_DATA_WIDTH-1:0];
+//
+// added for timing
+dff_s #(`TLU_ASR_DATA_WIDTH) dff_tlu_exu_rsr_data_m (
+    .din (tlu_exu_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0]),
+    .q   (tlu_exu_rsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+endmodule
Index: /trunk/T1-CPU/tlu/sparc_tlu_penc64.v
===================================================================
--- /trunk/T1-CPU/tlu/sparc_tlu_penc64.v	(revision 6)
+++ /trunk/T1-CPU/tlu/sparc_tlu_penc64.v	(revision 6)
@@ -0,0 +1,60 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_tlu_penc64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_tlu_penc64
+//  Description:    
+//    64 -> 6 priority encoder
+//    Bit 63 has the highest priority
+//
+*/
+
+module sparc_tlu_penc64 (/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [63:0] in;
+
+   output [5:0] out;
+
+
+   reg [5:0] 	out;
+   integer 	i;
+   
+always @ (in)
+begin
+//
+// code modified for verplex to avoid inferred latches
+//	     if (in == 64'b0) // don't want a latch
+	out = 6'b0;
+//	else 
+	for (i=0;i<64;i=i+1)
+	    begin
+	       if (in[i])
+		   out[5:0] = i[5:0];
+	    end
+end
+   
+endmodule // sparc_tlu_penc64
+
Index: /trunk/T1-CPU/tlu/tlu_prencoder16.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_prencoder16.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_prencoder16.v	(revision 6)
@@ -0,0 +1,70 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_prencoder16.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Datapath Priority Encoder 16b
+//				- 15b multihot vector as input
+//				- 15b 1-hit vector as output
+//				- Can use some std length such as 16b
+//				- msb is given highest priority
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module	tlu_prencoder16 (din, dout);
+
+input	[14:0]	din  ;
+output	[3:0]	dout ;
+
+wire	[14:0]	onehot ;
+
+assign	onehot[14] = din[14] ;
+assign	onehot[13] = din[13] & ~din[14] ;
+assign	onehot[12] = din[12] & ~(|din[14:13]) ;
+assign	onehot[11] = din[11] & ~(|din[14:12]) ;
+assign	onehot[10] = din[10] & ~(|din[14:11]) ;
+assign	onehot[9]  = din[9]  & ~(|din[14:10]) ;
+assign	onehot[8]  = din[8]  & ~(|din[14:9]) ;
+assign	onehot[7]  = din[7]  & ~(|din[14:8]) ;
+assign	onehot[6]  = din[6]  & ~(|din[14:7]) ;
+assign	onehot[5]  = din[5]  & ~(|din[14:6]) ;
+assign	onehot[4]  = din[4]  & ~(|din[14:5]) ;
+assign	onehot[3]  = din[3]  & ~(|din[14:4]) ;
+assign	onehot[2]  = din[2]  & ~(|din[14:3]) ;
+assign	onehot[1]  = din[1]  & ~(|din[14:2]) ;
+assign	onehot[0]  = din[0]  & ~(|din[14:1]) ;
+//assign	onehot[0]  = din[0]  & ~(|din[15:1]) ;
+
+assign	dout[3]  =  |onehot[14:7] ;
+assign	dout[2]  = (|onehot[6:3]) | (|onehot[14:11]) ;
+assign	dout[1]  = (|onehot[2:1]) | (|onehot[6:5]) |
+		   (|onehot[10:9]) | (|onehot[14:13]) ;
+assign	dout[0]  =  onehot[0] | onehot[2] | onehot[4] | onehot[6] |
+		    onehot[8] | onehot[10] | onehot[12] | onehot[14] ;
+
+endmodule
Index: /trunk/T1-CPU/tlu/tlu_rrobin_picker.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_rrobin_picker.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_rrobin_picker.v	(revision 6)
@@ -0,0 +1,104 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_rrobin_picker.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Round-Robin Picker for 4 eventss.
+//			Differs from lsu'v rrobin picker by the
+//			fact that there is no default 1-hot event.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                                        // time scale definition
+
+module tlu_rrobin_picker (/*AUTOARG*/
+   // Outputs
+   pick_one_hot, 
+   // Inputs
+   events, tlu_rst_l, clk
+   );
+
+input 	[3:0]	events ;		// multi-hot; events that could be chosen
+// this siganl was modified to abide to the Niagara reset methodology
+input		tlu_rst_l ;			// reset - active low
+input		clk ;
+
+output	[3:0]	pick_one_hot ;  // one-hot; events that must be chosen
+//
+// this signal was added to abide to the Niagara reset methodology
+wire	tlu_rst ;	
+
+// This section was modified to abide to the Niagara synthesis methodology
+//
+// reg	[3:0]	pick_status ;	
+wire	pick_status_reset ;	
+wire	[3:0]	pick_status_in ;	
+wire	[3:0]	pick_status ;	
+
+wire	events_unpicked ;
+wire	[3:0]	pe_mask ;
+
+//
+// this signal was added to abide to the Niagara reset methodology
+assign tlu_rst = ~tlu_rst_l;
+
+assign	events_unpicked = |(events[3:0] & ~pick_status[3:0]) ;
+			// term replicated.
+
+// priority encode mask
+assign	pe_mask[3:0] =
+		events_unpicked ? 
+		(events[3:0] & ~pick_status[3:0]) : 	// choose from eventss that have not picked.
+		events[3:0] ;				// else all eventss on equal terms
+
+assign	pick_one_hot[0] = 
+		pe_mask[0] ;
+		//pe_mask[0] | ~(|pe_mask[3:0]);		// none requesting then 0 is forced hot
+assign	pick_one_hot[1] = 
+		pe_mask[1] & ~pe_mask[0] ;
+assign	pick_one_hot[2] = 
+		pe_mask[2] & ~(|pe_mask[1:0]) ;
+assign	pick_one_hot[3] = 
+		pe_mask[3] & ~(|pe_mask[2:0]) ;
+
+// This section was modified to abide to the Niagara synthesis methodology
+//
+// Define Pick Status
+//always	@ (posedge clk)
+//	begin
+//		if ((&(pick_status[3:0] | pick_one_hot[3:0])) | tlu_rst) 
+//			pick_status[3:0] <= 4'b0000 ;	// clear pick_status
+//		else
+//			pick_status[3:0] <= pick_status[3:0] | pick_one_hot[3:0] ;
+//					// term replicated
+//	end
+
+assign pick_status_reset = (&(pick_status[3:0] | pick_one_hot[3:0])) | tlu_rst;
+assign pick_status_in    = pick_status[3:0] | pick_one_hot[3:0]; 
+
+dffr_s #(4)  dffre_pick_status  (
+        .din (pick_status_in[3:0]), .q (pick_status[3:0]),
+        .rst (pick_status_reset), .clk (clk),
+        .se  (1'b0),  .si  (),       .so ()
+        );
+
+endmodule
Index: /trunk/T1-CPU/tlu/tlu_misctl.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_misctl.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_misctl.v	(revision 6)
@@ -0,0 +1,637 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_misctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Block that contain most of miscellaneous 
+//                      control and datapath components 
+//                      to alleviate tdp and tcp congestions 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+`include "tlu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module	tlu_misctl (/*AUTOARG*/
+    // outputs
+    tlu_exu_cwp_m, tlu_exu_ccr_m, tlu_lsu_asi_m, tlu_cwp_no_change_m, 
+    tlu_sscan_misctl_data, tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2, 
+    tlu_pc_new_w, tlu_npc_new_w, so, 
+    // PIC experiment
+    tlu_exu_pic_onebelow_m, tlu_exu_pic_twobelow_m, 
+    // inputs
+    ctu_sscan_tid, ifu_tlu_pc_m, exu_tlu_cwp0, exu_tlu_cwp1, exu_tlu_cwp2, 
+    exu_tlu_cwp3, tlu_final_ttype_w2, tsa_wr_tid, tlu_true_pc_sel_w, 
+    tsa1_wr_vld, tsa_ttype_en, tsa_rd_vld_e, tsa0_rdata_cwp, tsa0_rdata_pstate,
+    tsa0_rdata_asi, tsa0_rdata_ccr, tsa0_rdata_gl, tsa0_rdata_pc, tsa1_rdata_ttype, 
+    tsa1_rdata_npc, tsa1_rdata_htstate, tlu_thrd_rsel_e, tlu_final_offset_w1, 
+    tlu_partial_trap_pc_w1,  tlu_restore_pc_w1, tlu_restore_npc_w1, 
+    ifu_npc_w, tlu_restore_pc_sel_w1, tlu_pic_cnt_en_m, tlu_pic_onebelow_e,
+    tlu_pic_twobelow_e, tlu_rst, si, se, rclk);
+    // pich_threebelow_flg, pich_twobelow_flg, pich_onebelow_flg, 
+
+//=================================================
+// output
+//=================================================
+output [`TSA_CCR_WIDTH-1:0] tlu_exu_ccr_m; // restored ccr
+output [`TSA_CWP_WIDTH-1:0] tlu_exu_cwp_m; // restored cwp
+output [`TLU_ASI_STATE_WIDTH-1:0] tlu_lsu_asi_m; // restored asi
+output tlu_cwp_no_change_m; // cwp change indicator
+//
+// sscan output
+output [`MISCTL_SSCAN_WIDTH-1:0] tlu_sscan_misctl_data;
+//
+// trap pc and npc
+output [48:0] tlu_ifu_trappc_w2, tlu_ifu_trapnpc_w2; 
+output [48:0] tlu_pc_new_w, tlu_npc_new_w; 
+// global nets
+output so;
+// PIC experiment
+output       tlu_exu_pic_onebelow_m; // local traps send to exu 
+output       tlu_exu_pic_twobelow_m; // local traps send to exu 
+
+//=================================================
+// input
+//=================================================
+// sscan related inputs
+input [`TLU_THRD_NUM-1:0] ctu_sscan_tid;
+input [`TSA_TTYPE_WIDTH-1:0] tlu_final_ttype_w2;
+input [1:0] tsa_wr_tid;	
+input tsa1_wr_vld, tsa_rd_vld_e; 
+input tsa_ttype_en; 
+// 
+// current cwp value from exu
+input [2:0]  exu_tlu_cwp0;  // cwp - thread0
+input [2:0]  exu_tlu_cwp1;  // cwp - thread1
+input [2:0]  exu_tlu_cwp2;  // cwp - thread2
+input [2:0]  exu_tlu_cwp3;  // cwp - thread3
+// 
+// componets from trap stack arrays (tsas)
+input [`TSA_CWP_WIDTH-1:0] tsa0_rdata_cwp;
+input [`TSA_PSTATE_WIDTH-1:0] tsa0_rdata_pstate;
+input [`TSA_CCR_WIDTH-1:0] tsa0_rdata_ccr;
+input [`TLU_ASI_STATE_WIDTH-1:0] tsa0_rdata_asi;
+input [`TSA_GLOBAL_WIDTH-1:0] tsa0_rdata_gl;
+input [46:0] tsa0_rdata_pc;
+input [`TSA_TTYPE_WIDTH-1:0] tsa1_rdata_ttype;
+input [46:0] tsa1_rdata_npc;
+input [`TSA_HTSTATE_WIDTH-1:0] tsa1_rdata_htstate;
+//
+// trap pc calculations signals
+input [48:0] ifu_tlu_pc_m;	  // pc
+// input [48:0] ifu_tlu_npc_m;   // npc
+input [`TSA_TTYPE_WIDTH-1:0] tlu_final_offset_w1;
+input [33:0] tlu_partial_trap_pc_w1;
+input [48:0] tlu_restore_pc_w1;
+input [48:0] tlu_restore_npc_w1;
+// input [48:0] ifu_pc_w;
+input [48:0] ifu_npc_w;
+input tlu_restore_pc_sel_w1; 
+//
+// modified due to timing fix
+input [2:0] tlu_true_pc_sel_w;
+// input tlu_retry_inst_m;
+// input tlu_done_inst_m;
+// input tlu_dnrtry_inst_m_l;
+//
+input [`TLU_THRD_NUM-1:0] tlu_thrd_rsel_e;
+// global nets
+input si, se;
+//
+//clk
+input rclk;
+//
+// PIC trap experiment 
+// input [`TLU_THRD_NUM-1:0] tlu_thread_inst_vld_w2; // valid inst for a thread
+// input [`TLU_THRD_NUM-1:0] pich_threebelow_flg;
+// input [`TLU_THRD_NUM-1:0] pich_twobelow_flg;
+// input [`TLU_THRD_NUM-1:0] pich_onebelow_flg;
+input tlu_pic_onebelow_e;
+input tlu_pic_twobelow_e;
+input tlu_pic_cnt_en_m;
+input tlu_rst;
+
+//=================================================
+// local wires
+//=================================================
+// local clock
+wire clk;
+//
+// staged thread id
+wire [`TLU_THRD_NUM-1:0] thrd_sel_m;
+wire [`TLU_THRD_NUM-1:0] tsa_wsel_thrd_w2;
+// 
+// staged tsa_controls
+wire tsa_rd_vld_m; // tsa_rd_vld_e,  
+// 
+// components from tsas
+// tsa0
+wire [`TLU_ASI_STATE_WIDTH-1:0] tsa0_asi_m;
+wire [`TSA_CWP_WIDTH-1:0] tsa0_cwp_m;
+wire [`TSA_CCR_WIDTH-1:0] tsa0_ccr_m;
+wire [`TSA_PSTATE_WIDTH-1:0] tsa0_pstate_m;
+wire [`TSA_GLOBAL_WIDTH-1:0] tsa0_gl_m;
+wire [46:0] tsa0_pc_m;
+// tsa1
+wire [`TSA_TTYPE_WIDTH-1:0]   tsa1_ttype_m;
+wire [`TSA_HTSTATE_WIDTH-1:0] tsa1_htstate_m;
+wire [46:0] tsa1_npc_m;
+//
+// modified for timing
+// wire [48:0] pc_new_m, npc_new_m;
+wire [48:0] pc_new_w, npc_new_w, ifu_pc_w;
+wire [46:0] tsa0_pc_w, tsa1_npc_w;
+// 
+// sscan related signals 
+wire [`TLU_THRD_NUM-1:0] sscan_tid_sel; 
+wire [`TLU_THRD_NUM-1:0] sscan_ttype_en;
+wire [`TLU_THRD_NUM-1:0] sscan_tt_rd_sel;
+wire [`TLU_THRD_NUM-1:0] sscan_tt_wr_sel;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt0_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt1_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt2_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt3_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt0_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt1_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt2_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt3_din;
+wire [`MISCTL_SSCAN_WIDTH-1:0] misctl_sscan_test_data;
+//
+// cwp logic 
+wire cwp_no_change_m;
+wire [`TSA_CWP_WIDTH-1:0] cwp_xor_m, trap_old_cwp_m; 
+wire [48:0] normal_trap_pc_w1, normal_trap_npc_w1; 
+wire [48:0] trap_pc_w1, trap_npc_w1; 
+wire [48:0] trap_pc_w2, trap_npc_w2; 
+//
+// PIC experiment
+wire tlu_pic_onebelow_m, tlu_pic_twobelow_m; 
+// wire [`TLU_THRD_NUM-1:0] pic_onebelow_e, pic_twobelow_e; 
+wire local_rst;
+// 
+//=========================================================================================
+// local clock
+//=========================================================================================
+
+assign clk = rclk; 
+
+//=========================================================================================
+//	TSA data capture
+//=========================================================================================
+
+dff_s #(`TSA_CCR_WIDTH) dff_tsa0_ccr_m (
+    .din (tsa0_rdata_ccr[`TSA_CCR_WIDTH-1:0]),
+    .q   (tsa0_ccr_m[`TSA_CCR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TSA_CWP_WIDTH) dff_tsa0_cwp_m (
+    .din (tsa0_rdata_cwp[`TSA_CWP_WIDTH-1:0]),
+    .q   (tsa0_cwp_m[`TSA_CWP_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TLU_ASI_STATE_WIDTH) dff_lsu_asi_m (
+    .din (tsa0_rdata_asi[`TLU_ASI_STATE_WIDTH-1:0]),
+    .q   (tsa0_asi_m[`TLU_ASI_STATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+    );
+
+dff_s #(`TSA_PSTATE_WIDTH) dff_tsa0_pstate_m (
+    .din (tsa0_rdata_pstate[`TSA_CCR_WIDTH-1:0]),
+    .q   (tsa0_pstate_m[`TSA_PSTATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TSA_GLOBAL_WIDTH) dff_tsa0_gl_m (
+    .din (tsa0_rdata_gl[`TSA_GLOBAL_WIDTH-1:0]),
+    .q   (tsa0_gl_m[`TSA_GLOBAL_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(47) dff_tsa0_pc_m (
+    .din (tsa0_rdata_pc[46:0]),
+    .q   (tsa0_pc_m[46:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TSA_TTYPE_WIDTH) dff_tsa1_ttype_m (
+    .din (tsa1_rdata_ttype[`TSA_TTYPE_WIDTH-1:0]), 
+	.q   (tsa1_ttype_m[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TSA_HTSTATE_WIDTH) dff_tsa1_htstate_m (
+    .din (tsa1_rdata_htstate[`TSA_HTSTATE_WIDTH-1:0]), 
+	.q   (tsa1_htstate_m[`TSA_HTSTATE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(47) dff_tsa1_npc_m (
+    .din (tsa1_rdata_npc[46:0]),
+    .q   (tsa1_npc_m[46:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+//=========================================================================================
+//	CWP/CCR restoration
+//=========================================================================================
+
+assign tlu_exu_ccr_m[`TSA_CCR_WIDTH-1:0] = 
+           tsa0_ccr_m[`TSA_CCR_WIDTH-1:0];
+assign tlu_exu_cwp_m[`TSA_CWP_WIDTH-1:0] = 
+           tsa0_cwp_m[`TSA_CWP_WIDTH-1:0];
+assign tlu_lsu_asi_m[`TLU_ASI_STATE_WIDTH-1:0] = 
+           tsa0_asi_m[`TLU_ASI_STATE_WIDTH-1:0];
+
+// modified/added for timing violations
+// moved the logic from exu to tlu due to timing violations
+
+dff_s #(`TLU_THRD_NUM) dff_thrd_sel_m (
+    .din (tlu_thrd_rsel_e[`TLU_THRD_NUM-1:0]),
+	.q   (thrd_sel_m[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+mux4ds #(`TSA_CWP_WIDTH) mux_trap_old_cwp_m(
+    .in0(exu_tlu_cwp0[`TSA_CWP_WIDTH-1:0]),
+    .in1(exu_tlu_cwp1[`TSA_CWP_WIDTH-1:0]), 
+    .in2(exu_tlu_cwp2[`TSA_CWP_WIDTH-1:0]),
+    .in3(exu_tlu_cwp3[`TSA_CWP_WIDTH-1:0]),
+    .sel0(thrd_sel_m[0]),
+    .sel1(thrd_sel_m[1]),
+    .sel2(thrd_sel_m[2]),
+    .sel3(thrd_sel_m[3]),
+    .dout(trap_old_cwp_m[`TSA_CWP_WIDTH-1:0])
+);
+
+assign cwp_xor_m[`TSA_CWP_WIDTH-1:0] = 
+           trap_old_cwp_m[`TSA_CWP_WIDTH-1:0] ^ tlu_exu_cwp_m[`TSA_CWP_WIDTH-1:0];
+
+assign cwp_no_change_m = ~|(cwp_xor_m[`TSA_CWP_WIDTH-1:0]); 
+
+assign tlu_cwp_no_change_m = cwp_no_change_m;
+
+//=========================================================================================
+//	Generate TTYPE SSCAN data 
+//=========================================================================================
+//
+// staging the tsa_rd_vld signal
+// moved to tlu_tcl for timing 
+/* 
+dff_s dff_tsa_rd_vld_e ( 
+    .din (tsa_rd_vld),
+	.q   (tsa_rd_vld_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+*/
+
+dff_s dff_tsa_rd_vld_m (
+    .din (tsa_rd_vld_e),
+	.q   (tsa_rd_vld_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	tsa_wsel_thrd_w2[0] = ~tsa_wr_tid[1] & ~tsa_wr_tid[0];
+assign	tsa_wsel_thrd_w2[1] = ~tsa_wr_tid[1] &  tsa_wr_tid[0];
+assign	tsa_wsel_thrd_w2[2]=   tsa_wr_tid[1] & ~tsa_wr_tid[0];
+assign	tsa_wsel_thrd_w2[3] =  tsa_wr_tid[1] &  tsa_wr_tid[0];
+
+// generating write indicators of ttype to the tsa
+assign sscan_tt_wr_sel[0] = 
+           tsa_ttype_en & tsa1_wr_vld & tsa_wsel_thrd_w2[0]; 
+assign sscan_tt_wr_sel[1] = 
+           tsa_ttype_en & tsa1_wr_vld & tsa_wsel_thrd_w2[1]; 
+assign sscan_tt_wr_sel[2] = 
+           tsa_ttype_en & tsa1_wr_vld & tsa_wsel_thrd_w2[2]; 
+assign sscan_tt_wr_sel[3] = 
+           tsa_ttype_en & tsa1_wr_vld & tsa_wsel_thrd_w2[3];
+//
+// generating read indicators of ttype from the tsa
+assign sscan_tt_rd_sel[0] = 
+           tsa_rd_vld_m & thrd_sel_m[0]; 
+assign sscan_tt_rd_sel[1] = 
+           tsa_rd_vld_m & thrd_sel_m[1]; 
+assign sscan_tt_rd_sel[2] = 
+           tsa_rd_vld_m & thrd_sel_m[2]; 
+assign sscan_tt_rd_sel[3] = 
+           tsa_rd_vld_m & thrd_sel_m[3]; 
+
+assign sscan_ttype_en[0] = 
+           sscan_tt_rd_sel[0] | sscan_tt_wr_sel[0]; 
+assign sscan_ttype_en[1] = 
+           sscan_tt_rd_sel[1] | sscan_tt_wr_sel[1]; 
+assign sscan_ttype_en[2] = 
+           sscan_tt_rd_sel[2] | sscan_tt_wr_sel[2]; 
+assign sscan_ttype_en[3] = 
+           sscan_tt_rd_sel[3] | sscan_tt_wr_sel[3]; 
+//
+assign sscan_tt0_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[0]) ? 
+            tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] :
+            tsa1_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt1_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[1]) ? 
+            tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] :
+            tsa1_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt2_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[2]) ? 
+            tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] :
+            tsa1_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt3_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[3]) ? 
+            tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] :
+            tsa1_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+//
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt0_data (
+    .din (sscan_tt0_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt0_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[0]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt1_data (
+    .din (sscan_tt1_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt1_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[1]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt2_data (
+    .din (sscan_tt2_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt2_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[2]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt3_data (
+    .din (sscan_tt3_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt3_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[3]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign sscan_tid_sel[`TLU_THRD_NUM-1:0] = 
+           ctu_sscan_tid[`TLU_THRD_NUM-1:0]; 
+
+mux4ds #(`MISCTL_SSCAN_WIDTH) mx_sscan_test_data (
+       .in0  (sscan_tt0_data[`TSA_TTYPE_WIDTH-1:0]),
+       .in1  (sscan_tt1_data[`TSA_TTYPE_WIDTH-1:0]),
+       .in2  (sscan_tt2_data[`TSA_TTYPE_WIDTH-1:0]),
+       .in3  (sscan_tt3_data[`TSA_TTYPE_WIDTH-1:0]),
+       .sel0 (sscan_tid_sel[0]),
+       .sel1 (sscan_tid_sel[1]),
+       .sel2 (sscan_tid_sel[2]),
+       .sel3 (sscan_tid_sel[3]),
+       .dout (misctl_sscan_test_data[`MISCTL_SSCAN_WIDTH-1:0])
+); 
+
+assign tlu_sscan_misctl_data[`MISCTL_SSCAN_WIDTH-1:0] = 
+           misctl_sscan_test_data[`MISCTL_SSCAN_WIDTH-1:0]; 
+//
+// code moved from tlu_tcl - trap pc delivery logic
+// 
+assign	normal_trap_pc_w1[48:0] = 
+            {1'b0, tlu_partial_trap_pc_w1[33:0],
+             tlu_final_offset_w1[`TSA_TTYPE_WIDTH-1:0], 5'b00000};
+assign	normal_trap_npc_w1[48:0] = 
+            {1'b0, tlu_partial_trap_pc_w1[33:0],
+             tlu_final_offset_w1[`TSA_TTYPE_WIDTH-1:0], 5'b00100};
+//
+// code moved from tlu_tdp
+mux2ds #(49) mx_trap_pc_w1 (
+       .in0  (normal_trap_pc_w1[48:0]), 
+       .in1  (tlu_restore_pc_w1[48:0]),
+       .sel0 (~tlu_restore_pc_sel_w1),  
+       .sel1 (tlu_restore_pc_sel_w1),
+       .dout (trap_pc_w1[48:0])
+);           
+//
+dff_s #(49) dff_trap_pc_w2 (
+    .din (trap_pc_w1[48:0]), 	
+    .q   (trap_pc_w2[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_trappc_w2[48:0] = trap_pc_w2[48:0];
+
+mux2ds #(49) mx_trap_npc_w1 (
+       .in0  (normal_trap_npc_w1[48:0]), 
+       .in1  (tlu_restore_npc_w1[48:0]),
+       .sel0 (~tlu_restore_pc_sel_w1),  
+       .sel1 (tlu_restore_pc_sel_w1),
+       .dout (trap_npc_w1[48:0])
+);           
+//
+dff_s #(49) dff_trap_npc_w2 (
+    .din (trap_npc_w1[48:0]), 	
+    .q   (trap_npc_w2[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_trapnpc_w2[48:0] = trap_npc_w2[48:0];
+
+//--------------------------------------------------------------------------------
+// Recovery PC and NPC selection 
+//--------------------------------------------------------------------------------
+// On done, npc will become pc. 
+// modified for timing
+//
+dff_s #(47) dff_tsa0_pc_w (
+    .din (tsa0_pc_m[46:0]), 	
+    .q   (tsa0_pc_w[46:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(49) dff_ifu_pc_w (
+    .din (ifu_tlu_pc_m[48:0]), 	
+    .q   (ifu_pc_w[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux3ds #(49) mux_pc_new_w (
+       .in0  ({tsa0_pc_w[46:0], 2'b00}), 	
+	   .in1  ({tsa1_npc_w[46:0], 2'b00}),
+	   .in2  (ifu_pc_w[48:0]),
+       .sel0 (tlu_true_pc_sel_w[0]),  	
+	   .sel1 (tlu_true_pc_sel_w[1]),
+	   .sel2 (tlu_true_pc_sel_w[2]),
+       .dout (pc_new_w[48:0])
+); 
+
+assign tlu_pc_new_w[48:0] = pc_new_w[48:0];
+
+//
+// On done, npc will become pc. 
+// On done, npc will stay npc. The valid to the IFU will
+// not be signaled along with npc for a done. 
+// modified for timing
+dff_s #(47) dff_tsa1_npc_w (
+    .din (tsa1_npc_m[46:0]), 	
+    .q   (tsa1_npc_w[46:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds #(49) mux_npc_new_w (
+       .in0  ({tsa1_npc_w[46:0],2'b00}), 	
+       .in1  (ifu_npc_w[48:0]),
+       .sel0 (~tlu_true_pc_sel_w[2]),  	
+       .sel1 (tlu_true_pc_sel_w[2]),
+       .dout (npc_new_w[48:0])
+); 
+
+assign tlu_npc_new_w[48:0] = npc_new_w[48:0];
+
+//--------------------------------------------------------------------------------
+// PIC trap experiment 
+//--------------------------------------------------------------------------------
+
+// added for bug 4785
+assign local_rst = tlu_rst;
+
+dffr_s dffr_tlu_exu_pic_onebelow_m (
+   .din (tlu_pic_onebelow_e),
+   .q   (tlu_pic_onebelow_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+dffr_s dffr_tlu_exu_pic_twobelow_m (
+   .din (tlu_pic_twobelow_e),
+   .q   (tlu_pic_twobelow_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign tlu_exu_pic_onebelow_m = 
+           tlu_pic_onebelow_m & tlu_pic_cnt_en_m;
+
+assign tlu_exu_pic_twobelow_m = 
+           tlu_pic_twobelow_m & tlu_pic_cnt_en_m;
+
+/*
+assign pic_onebelow_e[0] = 
+       tlu_thread_inst_vld_w2[0]? pich_twobelow_flg[0]: pich_onebelow_flg[0];
+assign pic_onebelow_e[1] = 
+       tlu_thread_inst_vld_w2[1]? pich_twobelow_flg[1]: pich_onebelow_flg[1];
+assign pic_onebelow_e[2] = 
+       tlu_thread_inst_vld_w2[2]? pich_twobelow_flg[2]: pich_onebelow_flg[2];
+assign pic_onebelow_e[3] = 
+       tlu_thread_inst_vld_w2[3]? pich_twobelow_flg[3]: pich_onebelow_flg[3];
+
+assign tlu_pic_onebelow_e = 
+           (tlu_thrd_rsel_e[0]) ? pic_onebelow_e[0]:
+           (tlu_thrd_rsel_e[1]) ? pic_onebelow_e[1]:
+           (tlu_thrd_rsel_e[2]) ? pic_onebelow_e[2]:
+            pic_onebelow_e[3];
+
+assign pic_twobelow_e[0] = 
+       tlu_thread_inst_vld_w2[0]? pich_threebelow_flg[0]: pich_twobelow_flg[0];
+assign pic_twobelow_e[1] = 
+       tlu_thread_inst_vld_w2[1]? pich_threebelow_flg[1]: pich_twobelow_flg[1];
+assign pic_twobelow_e[2] = 
+       tlu_thread_inst_vld_w2[2]? pich_threebelow_flg[2]: pich_twobelow_flg[2];
+assign pic_twobelow_e[3] = 
+       tlu_thread_inst_vld_w2[3]? pich_threebelow_flg[3]: pich_twobelow_flg[3];
+
+assign tlu_pic_twobelow_e = 
+           (tlu_thrd_rsel_e[0]) ? pic_twobelow_e[0]:
+           (tlu_thrd_rsel_e[1]) ? pic_twobelow_e[1]:
+           (tlu_thrd_rsel_e[2]) ? pic_twobelow_e[2]:
+            pic_twobelow_e[3];
+*/
+
+endmodule
Index: /trunk/T1-CPU/tlu/sparc_tlu_dec64.v
===================================================================
--- /trunk/T1-CPU/tlu/sparc_tlu_dec64.v	(revision 6)
+++ /trunk/T1-CPU/tlu/sparc_tlu_dec64.v	(revision 6)
@@ -0,0 +1,58 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_tlu_dec64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_tlu_dec64
+//  Description:    
+//    6 -> 64 decoder
+*/
+
+module sparc_tlu_dec64(/*AUTOARG*/
+   // Outputs
+   out, 
+   // Inputs
+   in
+   );
+
+   input [5:0] in;
+
+   output [63:0] out;
+
+
+   wire [5:0] 	 in;
+   reg [63:0] 	 out;
+
+   integer 	 i;
+   
+   always @ (in)
+     begin
+	for (i=0;i<64;i=i+1)
+	  begin
+	     if (i[5:0] == in[5:0])
+	       out[i] = 1'b1;
+	     else
+	       out[i] = 1'b0;
+	  end
+     end
+
+endmodule // sparc_tlu_dec64
+
+	
Index: /trunk/T1-CPU/tlu/tlu_mmu_ctl.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_mmu_ctl.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_mmu_ctl.v	(revision 6)
@@ -0,0 +1,2367 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_mmu_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	MMU Control - I & D.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module tlu_mmu_ctl ( /*AUTOARG*/
+   // Outputs
+   dmmu_any_sfsr_wr, dmmu_sfsr_wr_en_l, dmmu_sfar_wr_en_l, 
+   immu_any_sfsr_wr, immu_sfsr_wr_en_l, immu_tsb_rd_en, tlu_tte_tag_g, 
+   tlu_dtlb_rw_index_vld_g,  tlu_dtlb_rw_index_g, 
+   tlu_dtlb_data_rd_g, tlu_dtlb_tag_rd_g, tlu_itlb_rw_index_vld_g, 
+   tlu_itlb_wr_vld_g, itlb_wr_vld_g, tlu_itlb_rw_index_g, 
+   tlu_itlb_data_rd_g, tlu_itlb_tag_rd_g, tlu_idtsb_8k_ptr, 
+   tlu_dtlb_invalidate_all_g, tlu_itlb_invalidate_all_g, tlu_slxa_thrd_sel, 
+   tlu_lsu_ldxa_tid_w2, tlu_itlb_dmp_vld_g, 
+   tlu_itlb_dmp_all_g, tlu_itlb_dmp_pctxt_g, tlu_itlb_dmp_actxt_g, 
+   tlu_itlb_dmp_nctxt_g, tlu_dtlb_dmp_vld_g, tlu_dtlb_dmp_all_g, 
+   tlu_dtlb_dmp_pctxt_g, tlu_dtlb_dmp_sctxt_g, tlu_dtlb_dmp_nctxt_g, 
+   tlu_dtlb_dmp_actxt_g, tlu_idtlb_dmp_thrid_g, tlu_dmp_key_vld_g, 
+   tlu_int_asi_load, tlu_int_asi_store, tlu_int_asi_thrid, 
+   tlu_int_asi_vld, tlb_access_rst_l, 
+   tlu_lsu_stxa_ack, tlu_lsu_stxa_ack_tid, mra_wr_ptr, mra_rd_ptr, 
+   mra_wr_vld, mra_rd_vld, tag_access_wdata_sel, 
+   tlu_admp_key_sel, mra_byte_wen,
+   tlu_tte_wr_pid_g, tlu_lsu_ldxa_async_data_vld, tlu_tte_real_g, 
+   tlu_ldxa_l1mx1_sel, tlu_ldxa_l1mx2_sel, tlu_ldxa_l2mx1_sel, 
+   lsu_ifu_inj_ack, tlu_tlb_tag_invrt_parity,  tlu_tlb_data_invrt_parity, 
+   tlu_sun4r_tte_g, so, lsu_exu_ldxa_m, tlu_lng_ltncy_en_l, 
+   tlu_tag_access_ctxt_sel_m, tlu_tsb_rd_ps0_sel, tlu_tlb_access_en_l_d1,
+   // Inputs
+   ifu_lsu_ld_inst_e, ifu_lsu_st_inst_e, spu_tlu_rsrv_illgl_m,
+   lsu_tlu_dmmu_miss_g, 
+   tlu_dtsb_split_w2, tlu_dtsb_size_w2, tlu_dtag_access_w2, tlu_itsb_split_w2, 
+   tlu_itsb_size_w2, tlu_ctxt_cfg_w2, lsu_tlu_st_rs3_data_g, 
+   lsu_tlu_st_rs3_data_b48_g, lsu_tlu_st_rs3_data_b12t0_g, 
+   ifu_tlu_immu_miss_m, ifu_lsu_thrid_s, 
+   ifu_lsu_alt_space_e, lsu_tlu_dtlb_done, 
+   ifu_tlu_itlb_done, lsu_tlu_tlb_asi_state_m, lsu_tlu_tlb_ldst_va_m, 
+   lsu_tlu_tlb_ld_inst_m, lsu_tlu_tlb_st_inst_m, 
+   lsu_tlu_tlb_access_tid_m, dmmu_sfsr_trp_wr, 
+   immu_sfsr_trp_wr, lsu_tlu_daccess_excptn_g, 
+   lsu_tlu_daccess_prot_g, 
+   lsu_pid_state0, lsu_pid_state1, lsu_pid_state2, lsu_pid_state3, 
+   lsu_tlu_nucleus_ctxt_m, lsu_tlu_tte_pg_sz_g, ifu_lsu_error_inj, 
+   ifu_tlu_alt_space_d, ifu_lsu_imm_asi_d, 
+   ifu_lsu_memref_d, lsu_asi_reg0, lsu_asi_reg1, lsu_asi_reg2, 
+   lsu_asi_reg3, exu_mmu_early_va_e, rclk, arst_l, grst_l,
+   si,se,ifu_tlu_flush_m,tlu_mmu_early_flush_pipe_w,lsu_mmu_early_flush_w,
+   tlu_tag_access_ctxt_g, tlu_lsu_tl_zero, 
+   exu_tlu_va_oor_jl_ret_m, exu_tlu_va_oor_m, tlu_lsu_pstate_am, tlu_tsb_base_w2_d1,
+   lsu_mmu_flush_pipe_w, ifu_tlu_inst_vld_m, ifu_mmu_trap_m, ffu_tlu_ill_inst_m,
+   exu_lsu_priority_trap_m, sehold, rst_tri_en, tlu_itag_acc_sel_g, lsu_mmu_defr_trp_taken_g,
+   ifu_tlu_priv_violtn_m 
+   ) ;	
+
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+
+input                   ifu_lsu_ld_inst_e;      // inst_is_load (src-decode)
+input                   ifu_lsu_st_inst_e;      // inst is store (src-decode)
+input                   lsu_tlu_dmmu_miss_g ;   // ld/st misses in dtlb.
+input			spu_tlu_rsrv_illgl_m ; 
+
+input			tlu_itag_acc_sel_g ;
+input			lsu_mmu_defr_trp_taken_g ;
+
+// The timing on these signals can be changed to any earlier stage.
+// For both SPARC_HPV_EN and non-SPARC_HPV_EN - tsb,tag-access 
+// dtsb maps to ps0. itsb maps to ps1.
+input  [47:13]         	tlu_tsb_base_w2_d1 ;
+//input  [47:13]         	tlu_dtsb_base_w2 ;
+input                  	tlu_dtsb_split_w2 ;
+input  [3:0]           	tlu_dtsb_size_w2 ;       
+input  [47:13]         	tlu_dtag_access_w2 ;  	// used to represent both i/d.
+//input  [47:13]         	tlu_itsb_base_w2 ;
+input                  	tlu_itsb_split_w2 ;
+input  [3:0]           	tlu_itsb_size_w2 ;       
+
+// For SPARC_HPV_EN - BEGIN
+input  [5:0] 		tlu_ctxt_cfg_w2 ;	// i/d context zero/non-zero config.
+//input  			tlu_tag_access_nctxt_g ;// tag-access contains nucleus context.
+// For SPARC_HPV_EN - END
+
+input   [62:61]         lsu_tlu_st_rs3_data_g ;	    // Page Size (1,0) bits of TTE
+input            	lsu_tlu_st_rs3_data_b48_g ; // Page Size (2)   bits of TTE
+//input   [2:0]           lsu_tlu_st_rs3_data_b10t8_g ; // ps1 of ctxt-cfg
+input   [12:0]          lsu_tlu_st_rs3_data_b12t0_g ; 
+//input   [2:0]           lsu_tlu_st_rs3_data_b2t0_g ; // sun4v tte size
+input			ifu_tlu_immu_miss_m ;
+input   [1:0]           ifu_lsu_thrid_s ;   	// Thread id.
+input			ifu_lsu_alt_space_e ;	// alt-space access
+input                   lsu_tlu_dtlb_done ; // dtlb rd/wr/dmp complete
+input            	ifu_tlu_itlb_done ; // itlb rd/wr/dmp complete
+//input			int_tlu_asi_data_vld ;	// asi return vld for int blk
+//input			int_tlu_ldxa_illgl_va ;	// int asi has illgl va
+input  [7:0]           	lsu_tlu_tlb_asi_state_m ;
+input  [10:0]           lsu_tlu_tlb_ldst_va_m ;
+input                  	lsu_tlu_tlb_ld_inst_m ;
+input                  	lsu_tlu_tlb_st_inst_m ;
+input  [1:0]           	lsu_tlu_tlb_access_tid_m ;
+input		   	ifu_tlu_flush_m ;
+input		   	tlu_mmu_early_flush_pipe_w ;
+input		   	lsu_mmu_early_flush_w ;
+input  	[3:0]   	dmmu_sfsr_trp_wr ;
+input  	[3:0]   	immu_sfsr_trp_wr ;  
+//input          		tlu_inst_vld_m ;        // qualified inst vld
+input			lsu_tlu_daccess_excptn_g ; // data access exception 
+input			lsu_tlu_daccess_prot_g ;// data access protection
+						// obsolete with SPARC_HPV_EN !!!
+//input           	lsu_tlu_asi_rd_unc ;    // uncorrectable error for tlb rd
+input  	[2:0]   	lsu_pid_state0 ;        // pid thread0 ; global use
+input  	[2:0]   	lsu_pid_state1 ;        // pid thread1 ; global use
+input  	[2:0]   	lsu_pid_state2 ;        // pid thread2 ; global use
+input  	[2:0]   	lsu_pid_state3 ;        // pid thread3 ; global use
+
+input			lsu_tlu_nucleus_ctxt_m ;// access is nucleus context
+input 	[2:0] 		lsu_tlu_tte_pg_sz_g ;	// page-size of tte 
+
+input   [3:0]           ifu_lsu_error_inj ;     // inject parity error into tlb
+
+// BEGIN - MMU_ASI_RD_CHANGE
+// !! early va required.
+input			ifu_tlu_alt_space_d ;	// alt space access - new;_e exists
+//input			ifu_lsu_imm_asi_vld_d ; // imm asi is vld - current
+input	[8:0]		ifu_lsu_imm_asi_d ;	// imm asi - current
+input                   ifu_lsu_memref_d;	// ld/st - prefer ld_inst_e;
+input   [7:0]   	lsu_asi_reg0 ;          // asi state - thread0
+input   [7:0]   	lsu_asi_reg1 ;          // asi state - thread1
+input   [7:0]   	lsu_asi_reg2 ;          // asi state - thread2
+input   [7:0]   	lsu_asi_reg3 ;          // asi state - thread3
+//input	[1:0]		ifu_tlu_thrid_d ;       // thread id
+input 	[7:0]  		exu_mmu_early_va_e;	// early va from exu
+// END - MMU_ASI_RD_CHANGE
+
+input [12:0]          tlu_tag_access_ctxt_g ;
+
+input [3:0] tlu_lsu_tl_zero;   // trap level is zero.
+//input           exu_tlu_ttype_vld_m;    // exu src ttype vld
+input           exu_tlu_va_oor_jl_ret_m;
+input           exu_tlu_va_oor_m;
+input [3:0] tlu_lsu_pstate_am; 
+
+input		lsu_mmu_flush_pipe_w ;
+input		ifu_tlu_inst_vld_m ;
+input		ifu_mmu_trap_m ; 
+input		ffu_tlu_ill_inst_m ;
+input		exu_lsu_priority_trap_m ; // fill/ue
+input		ifu_tlu_priv_violtn_m ;
+
+input		rclk ;
+input		arst_l, grst_l;
+input		si,se;
+input		sehold ;
+input		rst_tri_en ;
+
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+// End of automatics
+
+output			dmmu_any_sfsr_wr ;
+output	[3:0]		dmmu_sfsr_wr_en_l ; 
+output	[3:0]		dmmu_sfar_wr_en_l ;
+//output	[3:0]		dmmu_tsb_wr_en ;
+//output	[3:0]		dmmu_tsb_rd_en ;
+//output	[3:0]		dmmu_tag_access_wr_en ;
+//output	[3:0]		dmmu_tag_access_rd_en ;
+//output			dmmu_tag_read_en ; 
+
+output			immu_any_sfsr_wr ; 
+output	[3:0]		immu_sfsr_wr_en_l ; 
+//output	[3:0]		immu_tsb_wr_en ;
+output	[3:0]		immu_tsb_rd_en ;
+//output	[3:0]		immu_tag_access_wr_en ;
+//output	[3:0]		immu_tag_access_rd_en ;
+//output			immu_tag_read_en ; 
+
+// tlb/itlb related control can potentially be
+// made g-stage.
+output  [2:0]          	tlu_tte_tag_g ;
+output			tlu_dtlb_rw_index_vld_g ;
+output	[5:0]		tlu_dtlb_rw_index_g ;
+output			tlu_dtlb_data_rd_g ;
+output			tlu_dtlb_tag_rd_g ;
+output			tlu_itlb_rw_index_vld_g ;
+output			tlu_itlb_wr_vld_g ;
+output			itlb_wr_vld_g ;
+output	[5:0]		tlu_itlb_rw_index_g ;
+output			tlu_itlb_data_rd_g ;
+output			tlu_itlb_tag_rd_g ;
+output	[47:0]		tlu_idtsb_8k_ptr ;	// maps to ps0/ps1 ptr. require only 1.
+
+output			tlu_dtlb_invalidate_all_g ;
+output			tlu_itlb_invalidate_all_g ;
+
+output  [3:0]           tlu_slxa_thrd_sel ;
+
+output	[1:0]		tlu_lsu_ldxa_tid_w2 ;
+
+output			tlu_itlb_dmp_vld_g ;
+output			tlu_itlb_dmp_all_g ;
+output			tlu_itlb_dmp_pctxt_g ;
+output			tlu_itlb_dmp_actxt_g ;
+output			tlu_itlb_dmp_nctxt_g ;
+output			tlu_dtlb_dmp_vld_g ;
+output			tlu_dtlb_dmp_all_g ;
+output			tlu_dtlb_dmp_pctxt_g ;
+output			tlu_dtlb_dmp_sctxt_g ;
+output			tlu_dtlb_dmp_nctxt_g ;
+output			tlu_dtlb_dmp_actxt_g ;
+output	[1:0]		tlu_idtlb_dmp_thrid_g ;
+output  [4:0]           tlu_dmp_key_vld_g ;
+output                	tlu_int_asi_load; 
+output                	tlu_int_asi_store; 
+output 	[1:0]          	tlu_int_asi_thrid;
+output                	tlu_int_asi_vld; 
+//output			tlb_access_en_l ;
+output			tlb_access_rst_l ;
+output			tlu_lsu_stxa_ack ;	   // write to tlb is complete.
+output	 [1:0]		tlu_lsu_stxa_ack_tid ;
+output   [3:0]          mra_wr_ptr ;    // wr ptr for mra
+output   [3:0]          mra_rd_ptr ;    // thrd id for rd.
+output                  mra_wr_vld ;    // write pointer vld
+output                  mra_rd_vld ;    // read vld
+output	 [19:0]		mra_byte_wen ;
+output	 [2:0]		tag_access_wdata_sel ;
+output			tlu_admp_key_sel ;
+//output			tlu_mmu_sync_data_excp_g ;	// sync asi related data excp
+//output			tlu_lsu_dtlb_rd_unc ;		// unc error for tlb rd
+
+//output   [3:0]          tlu_dldxa_mx2_sel ;		// obsolete for SPARC_HPV_EN
+//output   [2:0]          tlu_dldxa_mx3_sel ;		// obsolete for SPARC_HPV_EN
+//output   [2:0]          tlu_dldxa_fmx_sel ;		// obsolete for SPARC_HPV_EN
+//output   [3:0]          tlu_ildxa_mx1_sel ;		// obsolete for SPARC_HPV_EN
+//output   [2:0]          tlu_ildxa_fmx_sel ;		// obsolete for SPARC_HPV_EN
+
+output	 [2:0]		tlu_tte_wr_pid_g ;	// thread selected pid
+output                  tlu_lsu_ldxa_async_data_vld ;   // tlu_lsu_ldxa_data_vld is for async op.
+
+output   		tlu_tte_real_g ;                // tte is real
+
+output  [3:0]   	tlu_ldxa_l1mx1_sel ;    // mmu ldxa level1 mx1 sel
+output  [3:0]   	tlu_ldxa_l1mx2_sel ;    // mmu ldxa level1 mx2 sel
+output  [2:0]   	tlu_ldxa_l2mx1_sel ;    // mmu ldxa level2 mx1 sel
+
+output  [3:0]           lsu_ifu_inj_ack ;       // ack for tlb error injection.
+output			tlu_tlb_tag_invrt_parity ;	// invert parity on write tag.
+output			tlu_tlb_data_invrt_parity ;	// invert parity on write data.
+
+output			tlu_sun4r_tte_g ;	// sun4r vs. sun4v tte.
+
+output			lsu_exu_ldxa_m ;
+
+output			tlu_lng_ltncy_en_l ;
+
+output	[2:0]		tlu_tag_access_ctxt_sel_m ;
+
+output			tlu_tsb_rd_ps0_sel ;
+
+output			tlu_tlb_access_en_l_d1 ;
+
+output			so ;
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+reg			dmmu_invalidate_all_en_m ;
+reg			immu_invalidate_all_en_m ;
+reg 	dmmu_decode_asi58_e ;
+reg 	immu_decode_asi50_e ;
+reg	dmmu_8k_ptr_e,dmmu_64k_ptr_e,dmmu_direct_ptr_e ;
+reg	immu_8k_ptr_e,immu_64k_ptr_e;
+reg	dmmu_zctxt_ps0_tsb_e, dmmu_zctxt_ps1_tsb_e ;
+reg	dmmu_nzctxt_ps0_tsb_e, dmmu_nzctxt_ps1_tsb_e ;
+reg	dmmu_zctxt_cfg_e, dmmu_nzctxt_cfg_e ;
+reg	immu_zctxt_ps0_tsb_e, immu_zctxt_ps1_tsb_e ;
+reg	immu_nzctxt_ps0_tsb_e, immu_nzctxt_ps1_tsb_e ;
+reg	immu_zctxt_cfg_e, immu_nzctxt_cfg_e ;
+
+reg			dmmu_data_in_en_m,dmmu_data_access_en_m;
+reg			dmmu_tag_read_en_m,dmmu_demap_en_m;
+
+wire			sehold_d1 ;
+wire			tlb_access_en_l ;
+wire			dmmu_sync_illgl_va_g ;
+wire			dmmu_async_supported_asi,dmmu_async_illgl_va_g ; 	
+wire			immu_sync_illgl_va_g ;
+wire			immu_async_supported_asi,immu_async_illgl_va_g ;
+wire			ld_inst_m,st_inst_m ;
+wire			ld_inst_g,st_inst_g ;
+wire [3:0] 		tsb_size ;
+wire			tsb_split ;
+//wire [47:13]		tsb_base ;
+wire [47:13]		tag_access ;
+/*wire 	tsb_sz_8k_b0_mx1_out,tsb_sz_8k_b1_mx1_out,tsb_sz_8k_b2_mx1_out,tsb_sz_8k_b3_mx1_out;
+wire 	tsb_sz_8k_b4_mx1_out,tsb_sz_8k_b5_mx1_out,tsb_sz_8k_b6_mx1_out,tsb_sz_8k_b7_mx1_out;
+wire 	tsb_sz_8k_b0_mx2_out,tsb_sz_8k_b1_mx2_out,tsb_sz_8k_b2_mx2_out,tsb_sz_8k_b3_mx2_out;
+wire 	tsb_sz_8k_b4_mx2_out,tsb_sz_8k_b5_mx2_out,tsb_sz_8k_b6_mx2_out,tsb_sz_8k_b7_mx2_out;
+wire 	tsb_sz_8k_b0_mx3_out,tsb_sz_8k_b1_mx3_out,tsb_sz_8k_b2_mx3_out,tsb_sz_8k_b3_mx3_out;
+wire 	tsb_sz_8k_b4_mx3_out,tsb_sz_8k_b5_mx3_out,tsb_sz_8k_b6_mx3_out,tsb_sz_8k_b7_mx3_out;
+wire 	tsb_sz_64k_b0_mx1_out,tsb_sz_64k_b1_mx1_out,tsb_sz_64k_b2_mx1_out,tsb_sz_64k_b3_mx1_out;
+wire 	tsb_sz_64k_b4_mx1_out,tsb_sz_64k_b5_mx1_out,tsb_sz_64k_b6_mx1_out,tsb_sz_64k_b7_mx1_out;
+wire 	tsb_sz_64k_b0_mx2_out,tsb_sz_64k_b1_mx2_out,tsb_sz_64k_b2_mx2_out,tsb_sz_64k_b3_mx2_out;
+wire 	tsb_sz_64k_b4_mx2_out,tsb_sz_64k_b5_mx2_out,tsb_sz_64k_b6_mx2_out ;
+wire 	tsb_sz_64k_b0_mx3_out,tsb_sz_64k_b1_mx3_out,tsb_sz_64k_b2_mx3_out,tsb_sz_64k_b3_mx3_out;
+wire 	tsb_sz_64k_b4_mx3_out ;*/
+wire	dtlb_rw_index_vld_g,dtlb_wr_vld_g ;
+wire		dmmu_data_in_wr_en, dmmu_data_access_wr_en ; 
+wire		dmmu_tag_read_rd_en, dmmu_data_access_rd_en ;
+wire		immu_data_in_wr_en, immu_data_access_wr_en ;
+wire		immu_data_access_rd_en, immu_tag_read_rd_en ;
+wire		itlb_rw_index_vld_g,itlb_wr_vld_g;
+wire		tlu_ldxa_data_vld ;
+wire	tlu_dldxa_data_vld ;
+wire	[1:0]	thrid_d,thrid_e,thrid_m,thrid_g ;
+wire		thread0_sel_g, thread1_sel_g ;
+wire		thread2_sel_g, thread3_sel_g ;
+wire		alt_space_m, alt_space_g ;
+wire		immu_miss_g;
+wire		ddemap_by_page,ddemap_by_ctxt,ddemap_all;
+wire		idemap_by_page,idemap_by_ctxt,idemap_all;
+wire		demap_pctxt,demap_sctxt,demap_nctxt ;
+//wire		lsu_tlu_page_ebit_g ;
+wire		ddemap_vld, idemap_vld ;
+wire	[2:0]   tlu_tte_tag_g ;
+wire		demap_resrv ;
+wire	itlb_wr_pend,itlb_data_rd_pend,itlb_tag_rd_pend ;
+wire	dtlb_wr_pend,dtlb_data_rd_pend,dtlb_tag_rd_pend ;
+wire	tlb_access_en ;
+wire	tlb_access_rst ;
+wire	dmra_wr_g, imra_wr_g ;
+wire			dmmu_data_in_en, dmmu_data_access_en, dmmu_tag_read_en, dmmu_demap_en ; 
+wire			immu_data_in_en, immu_data_access_en, immu_tag_read_en, immu_demap_en ; 
+wire	immu_invalidate_all_en,dmmu_invalidate_all_en ;
+wire	tlb_wr_vld_g ;
+wire	tlb_admp_en, tlb_admp_rst, tlb_wr_rst ;
+wire	tlb_admp_mode,tlb_write_mode ;
+wire	tlb_ldst_inst_m ;
+wire 	tlb_admp_mode_d1 ;
+wire	itlb_wr_vld_unmsked,dtlb_wr_vld_unmsked;
+wire	idemap_pend, ddemap_pend ;
+wire	itlb_tag_rd_en, dtlb_tag_rd_en ;
+wire	[3:0]	dsfsr_asi_wr_en ;
+wire	[3:0]	isfsr_asi_wr_en ;
+wire	[10:3]	tlb_ldst_va_g ;
+wire		tlb_ld_inst_g,tlb_st_inst_g ;
+wire		tlb_ld_inst_unflushed,tlb_st_inst_unflushed ;
+wire	[1:0]	tlb_access_tid_g ;
+wire		inst_vld_g ;
+wire	st_inst_unflushed, ld_inst_unflushed ;
+wire	imra_lng_lat_rd,dmra_lng_lat_rd ;
+wire	iside_mra_access_rd, iside_mra_access_wr ;
+wire	[1:0]	mra_raccess_tid ;
+//wire	dmmu_sync_rd_only_asi_g ;
+//wire	immu_sync_rd_only_asi_g ;
+wire	dptr0_pg64k_en,dptr1_pg64k_en,dptr2_pg64k_en,dptr3_pg64k_en;
+wire	dptr0_pg64k_vld,dptr1_pg64k_vld,dptr2_pg64k_vld,dptr3_pg64k_vld;
+//wire 	dmmu_direct_ptr_rd_en ;
+wire    tlu_dtlb_rd_done ;
+wire	dmmu_ctxt_cfg_en, immu_ctxt_cfg_en ;
+//wire	dmmu_ctxt_cfg_rd_en ;
+wire	dacc_prot_ps1_match ;
+wire	tacc_nctxt, itacc_nctxt, dtacc_nctxt ;	// for in-pipe access
+wire	tacc_anctxt, itacc_anctxt, dtacc_anctxt ;// for async access
+wire	thread0_async_g,thread1_async_g,thread2_async_g ;
+wire	sun4r_tte_g ;
+wire	dmmu_decode_asi58_m, immu_decode_asi50_m ;
+wire	dmmu_zctxt_ps0_tsb_m, dmmu_zctxt_ps1_tsb_m,
+	dmmu_nzctxt_ps0_tsb_m, dmmu_nzctxt_ps1_tsb_m,
+	dmmu_zctxt_cfg_m, dmmu_nzctxt_cfg_m,
+	immu_zctxt_ps0_tsb_m, immu_zctxt_ps1_tsb_m,
+	immu_nzctxt_ps0_tsb_m, immu_nzctxt_ps1_tsb_m,
+	immu_zctxt_cfg_m, immu_nzctxt_cfg_m ;
+wire	dmmu_sync_fsr_en, dmmu_sync_far_en,
+	dmmu_zctxt_ps0_tsb_en, dmmu_zctxt_ps1_tsb_en,
+	dmmu_nzctxt_ps0_tsb_en, dmmu_nzctxt_ps1_tsb_en,
+	dmmu_zctxt_cfg_en, dmmu_nzctxt_cfg_en,
+	immu_sync_fsr_en,
+	immu_zctxt_ps0_tsb_en, immu_zctxt_ps1_tsb_en,
+	immu_nzctxt_ps0_tsb_en, immu_nzctxt_ps1_tsb_en,
+	immu_zctxt_cfg_en, immu_nzctxt_cfg_en ;
+wire	dmmu_tag_target_en_m,dmmu_tag_access_en_m;
+wire	immu_tag_target_en_m,immu_tag_access_en_m;
+wire	dmmu_tag_access_en;
+wire	immu_tag_access_en;
+wire	dmmu_8k_ptr_en_m,dmmu_64k_ptr_en_m,dmmu_direct_ptr_en_m ;
+wire	immu_8k_ptr_en_m,immu_64k_ptr_en_m ;
+wire	dmmu_sync_fsr_en_m, dmmu_sync_far_en_m,
+	dmmu_zctxt_ps0_tsb_en_m, dmmu_zctxt_ps1_tsb_en_m,
+	dmmu_nzctxt_ps0_tsb_en_m, dmmu_nzctxt_ps1_tsb_en_m,
+	dmmu_zctxt_cfg_en_m, dmmu_nzctxt_cfg_en_m,
+	immu_sync_fsr_en_m,
+	immu_zctxt_ps0_tsb_en_m, immu_zctxt_ps1_tsb_en_m,
+	immu_nzctxt_ps0_tsb_en_m, immu_nzctxt_ps1_tsb_en_m,
+	immu_zctxt_cfg_en_m, immu_nzctxt_cfg_en_m ;
+wire	thread0_d,thread1_d,thread2_d,thread3_d;
+wire 	thread0_e, thread1_e, thread2_e, thread3_e ;
+wire [7:0]	asi_state_d, asi_state_e ;
+wire	memref_e,memref_m ;
+wire [7:0] early_va_m ;
+wire	idmra_rd_d ;
+wire idmra_nzctxt_rd_d ;
+wire idmra_fault_rd_d ;
+wire	dmmu_tsb_en_m, dmmu_ctxt_cfg_en_m ; 
+wire	immu_tsb_en_m, immu_ctxt_cfg_en_m ; 
+wire	tlu_ildxa_data_vld ;
+wire	dmmu_direct_8kptr_sel_g ;	// direct ptr should select 8k ptr
+
+	wire	dmmu_tsb_en ;
+	wire	immu_tsb_en ;
+
+wire	mra_field1_en, mra_field2_en ; 
+wire	mra_field3_en, mra_field4_en ; 
+
+//=========================================================================================
+//      RESET/CLK
+//=========================================================================================
+ 
+    wire       clk;
+    assign     clk = rclk;
+ 
+    wire       rst_l;
+    
+    dffrl_async rstff(.din (grst_l),
+                      .q   (rst_l),
+                      .clk (clk), .se(se), .si(), .so(),
+                      .rst_l (arst_l));
+
+
+//=========================================================================================
+//	Early Flush Generation
+//=========================================================================================
+
+
+
+
+wire	ifu_tlu_flush_w ;
+dff_s  #(1) stg_w (
+        .din    (ifu_tlu_flush_m),
+        .q      (ifu_tlu_flush_w),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+wire	local_flush_w ;
+
+assign	local_flush_w = 
+	ifu_tlu_flush_w 		|	// ifu flush 
+	lsu_mmu_defr_trp_taken_g	|	// defr trp 
+	tlu_mmu_early_flush_pipe_w 	| 	// tlu flush
+	lsu_mmu_early_flush_w 		;	// lsu early flush
+
+wire    flush_w_inst_vld_m ;
+assign  flush_w_inst_vld_m =
+        ifu_tlu_inst_vld_m &
+        ~(lsu_mmu_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
+
+dff_s  stgw_ivld (
+        .din    (flush_w_inst_vld_m),
+        .q      (inst_vld_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Bug 4183
+wire	priority_squash_m, priority_squash_g ;
+assign	priority_squash_m = 
+ifu_mmu_trap_m | ffu_tlu_ill_inst_m | exu_lsu_priority_trap_m |  spu_tlu_rsrv_illgl_m ; 
+
+wire	trp_vld_m,trp_vld_g ;
+assign	trp_vld_m = flush_w_inst_vld_m & ~priority_squash_m ;
+
+dff_s  #(2) sqshstgw (
+        .din    ({priority_squash_m,trp_vld_m}),
+        .q      ({priority_squash_g,trp_vld_g}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+//=========================================================================================
+//	Staging
+//=========================================================================================
+
+dff_s  #(2) stg_d (
+        .din    (ifu_lsu_thrid_s[1:0]),
+        .q      (thrid_d[1:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dff_s  #(2) stg_e (
+        .din    (thrid_d[1:0]),
+        .q      (thrid_e[1:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dff_s  #(5) stg_m (
+        .din    ({ifu_lsu_ld_inst_e,ifu_lsu_st_inst_e,
+		thrid_e[1:0],ifu_lsu_alt_space_e}),
+        .q      ({ld_inst_m,st_inst_m,thrid_m[1:0],alt_space_m}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dff_s  #(6) stg_g (
+        .din    ({ld_inst_m,st_inst_m,thrid_m[1:0],alt_space_m,ifu_tlu_immu_miss_m}),
+        .q      ({ld_inst_unflushed,st_inst_unflushed,thrid_g[1:0],alt_space_g,immu_miss_g}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+// reads are terminated for illegal va case.
+assign	ld_inst_g = ld_inst_unflushed & inst_vld_g & ~local_flush_w ;
+//assign	ld_inst_g = ld_inst_unflushed & inst_vld_g & ~(dmmu_sync_illgl_va_g | immu_sync_illgl_va_g) & ;
+// writes are terminated for illegal va case.
+assign	st_inst_g = st_inst_unflushed & inst_vld_g & ~local_flush_w & 
+			~(dmmu_sync_illgl_va_g | immu_sync_illgl_va_g) ;
+//assign	st_inst_g = st_inst_unflushed & inst_vld_g & ~(dmmu_sync_illgl_va_g | immu_sync_illgl_va_g);
+
+assign	thread0_sel_g =  ~thrid_g[1] & ~thrid_g[0] ;
+assign	thread1_sel_g =  ~thrid_g[1] &  thrid_g[0] ;
+assign	thread2_sel_g =   thrid_g[1] & ~thrid_g[0] ;
+assign	thread3_sel_g =   thrid_g[1] &  thrid_g[0] ;
+
+assign tlu_slxa_thrd_sel[0] = ~thrid_m[1] & ~thrid_m[0] ;
+assign tlu_slxa_thrd_sel[1] = ~thrid_m[1] &  thrid_m[0] ;
+assign tlu_slxa_thrd_sel[2] =  thrid_m[1] & ~thrid_m[0] ;
+assign tlu_slxa_thrd_sel[3] =  thrid_m[1] &  thrid_m[0] ;
+
+/*dff stgivld_g (
+        .din    (tlu_inst_vld_m),
+        .q      (inst_vld_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+//=========================================================================================
+//	ASI RD DP MUX SELECT
+//=========================================================================================
+
+// qualification with vld not required as this dp is used by synchronous ops only
+// Need to be made non zero-hot in functional mode
+
+// Decode of bits va[5:4] to distinguish reads.  
+wire va_54_eq_0,va_54_eq_1,va_54_eq_2,va_54_eq_3 ;
+wire	[2:0]	ldxa_l1mx1_sel_d1 ;
+assign va_54_eq_0 = (~early_va_m[5] & ~early_va_m[4]) ;
+assign va_54_eq_1 = (~early_va_m[5] &  early_va_m[4]) ;
+assign va_54_eq_2 = ( early_va_m[5] & ~early_va_m[4]) ;
+assign va_54_eq_3 = ( early_va_m[5] &  early_va_m[4]) ;
+
+// i/d tag-target
+// Extend for MacroTest Control.
+assign	tlu_ldxa_l1mx1_sel[0] = 
+((((dmmu_decode_asi58_m | immu_decode_asi50_m) & va_54_eq_0) & ~sehold_d1) | rst_tri_en) |  
+(ldxa_l1mx1_sel_d1[0] & sehold_d1) ;
+assign	tlu_ldxa_l1mx1_sel[1] = 
+((dmmu_zctxt_ps0_tsb_e | dmmu_nzctxt_ps0_tsb_e | 
+immu_zctxt_ps0_tsb_e | immu_nzctxt_ps0_tsb_e) & ~sehold_d1 & ~rst_tri_en) | 
+(ldxa_l1mx1_sel_d1[1] & sehold_d1) ;
+assign	tlu_ldxa_l1mx1_sel[2] = 
+((dmmu_zctxt_ps1_tsb_e | dmmu_nzctxt_ps1_tsb_e |
+immu_zctxt_ps1_tsb_e | immu_nzctxt_ps1_tsb_e) & ~sehold_d1 & ~rst_tri_en) |
+(ldxa_l1mx1_sel_d1[2] & sehold_d1) ;
+
+
+
+// Extend flops to hold selects for MacroTest of MRA.
+wire [2:0] ldxa_l1mx1_sel_out ;
+dff_s #(3)   l1mx1s_stgd1(
+        .din    (tlu_ldxa_l1mx1_sel[2:0]), 
+	.q  	(ldxa_l1mx1_sel_out[2:0]),
+        .clk 	(clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// scan protection.
+assign	ldxa_l1mx1_sel_d1[0] = ldxa_l1mx1_sel_out[0] ;
+assign	ldxa_l1mx1_sel_d1[1] = ldxa_l1mx1_sel_out[1] & ~rst_tri_en ;
+assign	ldxa_l1mx1_sel_d1[2] = ldxa_l1mx1_sel_out[2] & ~rst_tri_en ;
+
+wire	sehold_out ;
+dff_s #(1)   seh_d1 (
+        .din    (sehold), 
+	.q  	(sehold_out),
+        .clk 	(clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+assign	sehold_d1 = sehold_out & ~rst_tri_en ;
+
+// i/d tag-access
+assign	tlu_ldxa_l1mx1_sel[3] =  ~|tlu_ldxa_l1mx1_sel[2:1];
+wire    ldxa_l1mx1_sel3;
+// * read timing change.
+assign	ldxa_l1mx1_sel3 = 
+(dmmu_decode_asi58_m | immu_decode_asi50_m) & va_54_eq_3 & ~rst_tri_en ;
+
+// d sync-fsr
+// * read timing change.
+wire	dmmu_sync_fsr_m_sel,dmmu_sync_far_m_sel,immu_sync_fsr_m_sel;
+assign	dmmu_sync_fsr_m_sel	= (dmmu_decode_asi58_m & va_54_eq_1) | rst_tri_en ;
+assign	dmmu_sync_far_m_sel	= (dmmu_decode_asi58_m & va_54_eq_2) & ~rst_tri_en ;
+assign	immu_sync_fsr_m_sel	= (immu_decode_asi50_m & va_54_eq_1) & ~rst_tri_en ;
+assign	tlu_ldxa_l1mx2_sel[0] = dmmu_sync_fsr_m_sel ;
+// d sync-far
+// * read timing change.
+assign	tlu_ldxa_l1mx2_sel[1] = dmmu_sync_far_m_sel ;
+// i sync-fsr
+assign	tlu_ldxa_l1mx2_sel[2] = immu_sync_fsr_m_sel ;
+assign	tlu_ldxa_l1mx2_sel[3] = ~|tlu_ldxa_l1mx2_sel[2:0];
+wire    ldxa_l1mx2_sel3;
+assign	ldxa_l1mx2_sel3 = (dmmu_zctxt_cfg_m | dmmu_nzctxt_cfg_m |
+			  immu_zctxt_cfg_m | immu_nzctxt_cfg_m) & ~rst_tri_en ;
+
+assign	tlu_ldxa_l2mx1_sel[0] = 
+|{ldxa_l1mx1_sel3,ldxa_l1mx1_sel_d1[2:1],(tlu_ldxa_l1mx1_sel[0] & ~rst_tri_en)} ;
+assign	tlu_ldxa_l2mx1_sel[1] = |{ldxa_l1mx2_sel3,tlu_ldxa_l1mx2_sel[2:0]} ;
+assign	tlu_ldxa_l2mx1_sel[2] = ~|tlu_ldxa_l2mx1_sel[1:0];
+
+//=========================================================================================
+//	MRA RD/WRITE
+//=========================================================================================
+
+wire	[3:0]	isfsr_trp_wr ;
+wire	flush_mmuasi_wr ;
+assign	flush_mmuasi_wr = ifu_tlu_flush_w | lsu_mmu_defr_trp_taken_g ; // Bug 5196
+assign	isfsr_trp_wr[0] = immu_sfsr_trp_wr[0] & ~flush_mmuasi_wr ;
+assign	isfsr_trp_wr[1] = immu_sfsr_trp_wr[1] & ~flush_mmuasi_wr ;
+assign	isfsr_trp_wr[2] = immu_sfsr_trp_wr[2] & ~flush_mmuasi_wr ;
+assign	isfsr_trp_wr[3] = immu_sfsr_trp_wr[3] & ~flush_mmuasi_wr ;
+
+wire  tag_access_nctxt_g ;
+
+wire immu_miss_vld_g ;
+assign immu_miss_vld_g = immu_miss_g & inst_vld_g ;
+
+// fast-asi read takes precedence over long-latency rd. Can long-latency read get
+// starved out ?? Assume memref_d is never x.
+assign	dmra_lng_lat_rd = ((dmmu_data_in_en | dmmu_data_access_en) & tlb_st_inst_g & ~ifu_lsu_memref_d) ;
+assign	imra_lng_lat_rd = ((immu_data_in_en | immu_data_access_en) & tlb_st_inst_g & ~ifu_lsu_memref_d) ;
+//assign	dmra_lng_lat_rd = ((dmmu_data_in_en | dmmu_data_access_en) & tlb_st_inst_g) ;
+//assign	imra_lng_lat_rd = ((immu_data_in_en | immu_data_access_en) & tlb_st_inst_g) ;
+
+wire  dmra_ldst,imra_ldst ;
+assign        dmra_ldst = dmmu_tag_access_en | dmmu_tsb_en | dmmu_ctxt_cfg_en ; 
+assign        imra_ldst = immu_tag_access_en | immu_tsb_en | immu_ctxt_cfg_en ; 
+
+// sync_far_en no longer written/read
+assign	dmra_wr_g = 
+	(dmra_ldst & st_inst_g) |
+	(lsu_tlu_dmmu_miss_g | lsu_tlu_daccess_excptn_g | lsu_tlu_daccess_prot_g) 
+	& trp_vld_g & ~flush_mmuasi_wr ;
+	//(lsu_tlu_dmmu_miss_g | lsu_tlu_daccess_excptn_g | lsu_tlu_daccess_prot_g) & inst_vld_g ;
+	// Bug 4183
+wire	isfsr_trap ;
+assign	isfsr_trap = |isfsr_trp_wr[3:0] ;
+assign	imra_wr_g = 
+	(imra_ldst & st_inst_g) |
+	//((immu_tag_access_en | immu_tsb_en | immu_ctxt_cfg_en) & st_inst_g) | 
+	(immu_miss_vld_g & ~flush_mmuasi_wr) | isfsr_trap ;
+
+wire	dmra_rw_d ;
+assign	iside_mra_access_rd = ((~dmra_rw_d) & ~(imra_lng_lat_rd | dmra_lng_lat_rd))  | imra_lng_lat_rd ;
+assign	iside_mra_access_wr = imra_wr_g ;
+
+assign	mra_raccess_tid[1:0] = (dmra_lng_lat_rd | imra_lng_lat_rd) ? tlb_access_tid_g[1:0] : thrid_d[1:0] ;
+
+wire idside_nzctxt_accwr_early_m,idside_nzctxt_accwr_early_g  ;
+assign	idside_nzctxt_accwr_early_m =
+	((dmmu_nzctxt_cfg_en_m 	 | immu_nzctxt_cfg_en_m     |
+	dmmu_nzctxt_ps0_tsb_en_m | immu_nzctxt_ps0_tsb_en_m |
+	dmmu_nzctxt_ps1_tsb_en_m | immu_nzctxt_ps1_tsb_en_m) & st_inst_m) ; // tsb/cfg asi wr
+
+dff_s ctacc_stgg (
+        .din    (idside_nzctxt_accwr_early_m),
+        .q      (idside_nzctxt_accwr_early_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+	
+//wire	idside_nzctxt_access ;
+wire	idside_nzctxt_access_rd,idside_nzctxt_access_wr ;
+wire	st_wr_g ;
+
+assign	idside_nzctxt_access_wr = 
+	((dmmu_tag_access_en	| immu_tag_access_en) 	// tag-access asi write
+		& st_inst_unflushed & ~tag_access_nctxt_g)	 |
+	((lsu_tlu_daccess_excptn_g | lsu_tlu_daccess_prot_g | lsu_tlu_dmmu_miss_g |
+	immu_miss_g | (isfsr_trap))		// tag-access exception write 
+		& inst_vld_g & ~tag_access_nctxt_g)	 |
+	(idside_nzctxt_accwr_early_g & st_wr_g) ; // Bug 4828
+	//((dmmu_nzctxt_cfg_en 	| immu_nzctxt_cfg_en 	 |
+	//dmmu_nzctxt_ps0_tsb_en 	| immu_nzctxt_ps0_tsb_en |
+	//dmmu_nzctxt_ps1_tsb_en 	| immu_nzctxt_ps1_tsb_en) & st_inst_unflushed) ; // tsb/cfg asi wr
+assign	idside_nzctxt_access_rd = 
+	(idmra_nzctxt_rd_d) 		 |  // => nzctxt rd with decode
+	(idmra_fault_rd_d & ~tacc_nctxt) |  // => fault-based rd
+	((dmra_lng_lat_rd | imra_lng_lat_rd) & ~tacc_anctxt) ;
+// access non zero context levels
+		
+assign	mra_wr_ptr[3:0]	= {thrid_g[1:0],idside_nzctxt_access_wr,iside_mra_access_wr};	
+assign	mra_rd_ptr[3:0]	= {mra_raccess_tid[1:0],idside_nzctxt_access_rd,iside_mra_access_rd};	
+
+assign	mra_wr_vld = dmra_wr_g | imra_wr_g ;
+assign	mra_rd_vld = idmra_rd_d | dmra_lng_lat_rd | imra_lng_lat_rd ;
+
+assign	dmmu_ctxt_cfg_en = dmmu_zctxt_cfg_en | dmmu_nzctxt_cfg_en ;
+assign	immu_ctxt_cfg_en = immu_zctxt_cfg_en | immu_nzctxt_cfg_en ;
+//assign	dmmu_ctxt_cfg_rd_en = (dmmu_zctxt_cfg_en | dmmu_nzctxt_cfg_en) & ld_inst_g ;
+//assign	immu_ctxt_cfg_rd_en = (immu_zctxt_cfg_en | immu_nzctxt_cfg_en) & ld_inst_g ;
+
+// Change - with 8 tsbs per thread, tsb can be in any of the 3 fields
+// of a line in the mra.
+wire	mra_itag_acc_en,mra_dtag_acc_en ;
+// Be careful about loading on trap conditions.
+assign st_wr_g = st_inst_unflushed & ~local_flush_w ;
+assign	mra_itag_acc_en = 
+	(immu_tag_access_en & st_wr_g) | immu_miss_g | (isfsr_trap) ;
+assign	mra_dtag_acc_en = 
+	(dmmu_tag_access_en & st_wr_g) | lsu_tlu_dmmu_miss_g | lsu_tlu_daccess_excptn_g | 
+	lsu_tlu_daccess_prot_g ;
+assign	mra_field1_en 	= (dmmu_zctxt_ps0_tsb_en  | immu_zctxt_ps0_tsb_en |
+			  dmmu_nzctxt_ps0_tsb_en | immu_nzctxt_ps0_tsb_en) & st_wr_g ;
+			  // dmmu_nzctxt_ps0_tsb_en | immu_nzctxt_ps0_tsb_en) & st_inst_unflushed ; Bug 3378
+assign	mra_field2_en 	= (dmmu_zctxt_ps1_tsb_en  | immu_zctxt_ps1_tsb_en |
+			  dmmu_nzctxt_ps1_tsb_en | immu_nzctxt_ps1_tsb_en) & st_wr_g ;
+assign	mra_field3_en	= mra_itag_acc_en | mra_dtag_acc_en ;
+assign	mra_field4_en 	= (dmmu_ctxt_cfg_en | immu_ctxt_cfg_en) & st_wr_g ;
+			  
+// for use of rf16x160
+assign	mra_byte_wen[19:14] = {6{mra_field1_en}} ;
+assign	mra_byte_wen[13:8]  = {6{mra_field2_en}} ;
+assign	mra_byte_wen[7:2]  =  {6{mra_field3_en}} ;
+assign	mra_byte_wen[1:0]  =  {2{mra_field4_en}} ;
+
+// active-low selects
+// Need to add inst_access_excp to the sel !!!
+// Prioritized between the two sels.
+assign        tag_access_wdata_sel[0] = 
+      ~(tag_access_wdata_sel[1] | tag_access_wdata_sel[2]) | rst_tri_en ;
+//assign        tag_access_wdata_sel[1] = (immu_miss_g | isfsr_trap) & ~rst_tri_en ; // Timing
+assign        tag_access_wdata_sel[1] = tlu_itag_acc_sel_g & ~rst_tri_en ;
+assign        tag_access_wdata_sel[2] = (dmra_ldst | imra_ldst) & st_wr_g & ~rst_tri_en ; 
+					// Bug 4728
+
+wire  [12:0]  tag_access_wdata_ctxt ;
+assign        tag_access_wdata_ctxt[12:0] = 
+       tag_access_wdata_sel[2] ? lsu_tlu_st_rs3_data_b12t0_g[12:0] : tlu_tag_access_ctxt_g[12:0] ;
+
+assign  tag_access_nctxt_g = (tag_access_wdata_ctxt[12:0] == 13'd0) ;
+ 
+//=========================================================================================
+//	Tag-Access Context Per thread
+//=========================================================================================
+
+// Mark ctxt field in tag-access register as being nucleus or non-nucleus.
+// State will not be ~rst_l as use is expected to be preceeded by write.
+
+wire	[3:0]	itacc_ctxt_en, dtacc_ctxt_en ;
+wire		itacc_nctxt0,itacc_nctxt1,itacc_nctxt2,itacc_nctxt3;
+wire		dtacc_nctxt0,dtacc_nctxt1,dtacc_nctxt2,dtacc_nctxt3;
+assign	itacc_ctxt_en[0] = thread0_sel_g & mra_itag_acc_en & mra_wr_vld ;
+assign	itacc_ctxt_en[1] = thread1_sel_g & mra_itag_acc_en & mra_wr_vld ;
+assign	itacc_ctxt_en[2] = thread2_sel_g & mra_itag_acc_en & mra_wr_vld ;
+assign	itacc_ctxt_en[3] = thread3_sel_g & mra_itag_acc_en & mra_wr_vld ;
+assign	dtacc_ctxt_en[0] = thread0_sel_g & mra_dtag_acc_en & mra_wr_vld ;
+assign	dtacc_ctxt_en[1] = thread1_sel_g & mra_dtag_acc_en & mra_wr_vld ;
+assign	dtacc_ctxt_en[2] = thread2_sel_g & mra_dtag_acc_en & mra_wr_vld ;
+assign	dtacc_ctxt_en[3] = thread3_sel_g & mra_dtag_acc_en & mra_wr_vld ;
+
+// Thread0
+dffe_s   itacc_ctxt0 (
+        .din    (tag_access_nctxt_g), .q  (itacc_nctxt0),
+        .en 	(itacc_ctxt_en[0]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+dffe_s   dtacc_ctxt0 (
+        .din    (tag_access_nctxt_g), .q  (dtacc_nctxt0),
+        .en 	(dtacc_ctxt_en[0]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread1
+dffe_s   itacc_ctxt1 (
+        .din    (tag_access_nctxt_g), .q  (itacc_nctxt1),
+        .en 	(itacc_ctxt_en[1]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+dffe_s   dtacc_ctxt1 (
+        .din    (tag_access_nctxt_g), .q  (dtacc_nctxt1),
+        .en 	(dtacc_ctxt_en[1]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread2
+dffe_s   itacc_ctxt2 (
+        .din    (tag_access_nctxt_g), .q  (itacc_nctxt2),
+        .en 	(itacc_ctxt_en[2]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+dffe_s   dtacc_ctxt2 (
+        .din    (tag_access_nctxt_g), .q  (dtacc_nctxt2),
+        .en 	(dtacc_ctxt_en[2]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread3
+dffe_s   itacc_ctxt3 (
+        .din    (tag_access_nctxt_g), .q  (itacc_nctxt3),
+        .en 	(itacc_ctxt_en[3]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+dffe_s   dtacc_ctxt3 (
+        .din    (tag_access_nctxt_g), .q  (dtacc_nctxt3),
+        .en 	(dtacc_ctxt_en[3]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// In-pipe Access
+assign	itacc_nctxt =
+	thread0_d ? itacc_nctxt0 :
+		thread1_d ? itacc_nctxt1 :
+			thread2_d ? itacc_nctxt2 : itacc_nctxt3 ;
+assign	dtacc_nctxt =
+	thread0_d ? dtacc_nctxt0 :
+		thread1_d ? dtacc_nctxt1 :
+			thread2_d ? dtacc_nctxt2 : dtacc_nctxt3 ;
+assign	tacc_nctxt =
+	iside_mra_access_rd ? itacc_nctxt : dtacc_nctxt ;
+
+// Asynchronous Access
+assign	itacc_anctxt =
+	thread0_async_g ? itacc_nctxt0 :
+		thread1_async_g ? itacc_nctxt1 :
+			thread2_async_g ? itacc_nctxt2 : itacc_nctxt3 ;
+assign	dtacc_anctxt =
+	thread0_async_g ? dtacc_nctxt0 :
+		thread1_async_g ? dtacc_nctxt1 :
+			thread2_async_g ? dtacc_nctxt2 : dtacc_nctxt3 ;
+
+assign	tacc_anctxt =
+	imra_lng_lat_rd ? itacc_anctxt : dtacc_anctxt ;
+
+//=========================================================================================
+//	Interrupt Control
+//=========================================================================================
+
+assign	tlu_int_asi_load =  ld_inst_g & alt_space_g ;
+assign	tlu_int_asi_store =  st_inst_g & alt_space_g ;
+assign	tlu_int_asi_thrid[1:0] = thrid_g[1:0] ;
+assign	tlu_int_asi_vld = alt_space_g ;
+
+//=========================================================================================
+//	ASI Error Condition
+//=========================================================================================
+
+// Supported asi but illegal_va. ldxa must signal this occurrence when returning data
+// to LSU.
+// The decode can be shared with the statement below (grape)
+// SPARC_HPV_EN - Needs to change once asi assignments are available !!!
+// Bug 2201 : pid and va_wtchpt decoded in lsu (asi 58)
+/*wire lsu_asi58_g ;
+assign lsu_asi58_g = 
+	((tlu_ldst_va_g[8:0] == 9'h080) |	// pid
+	(tlu_ldst_va_g[8:0] == 9'h038)) ;	// va-wtchpt
+assign	dmmu_sync_supported_asi = 
+	(((lsu_asi_state[7:0] == 8'h58) & ~lsu_asi58_g) |
+	(lsu_asi_state[7:0] == 8'h59) |
+	(lsu_asi_state[7:0] == 8'h5A) |
+	(lsu_asi_state[7:0] == 8'h5B)) & alt_space_g  ;*/
+
+
+wire    dmmu_inv_all_asi ;
+assign dmmu_inv_all_asi = 
+({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h60,8'h08}) ;
+
+wire	dmmu_async_supported_asi_m ;
+assign	dmmu_async_supported_asi_m = 
+	((lsu_tlu_tlb_asi_state_m[7:0] == 8'h5C) |
+	//dmmu_inv_all_asi |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h60) | // Bug 4901
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h5D) |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h5E) |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h5F)) & tlb_ldst_inst_m ;
+
+dff_s stgg_dasi (
+        .din    (dmmu_async_supported_asi_m),
+        .q      (dmmu_async_supported_asi),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	dmmu_async_illgl_va_g =
+	dmmu_async_supported_asi & 
+	~(dmmu_data_in_en | 
+	dmmu_invalidate_all_en | immu_invalidate_all_en | // Bug 4901
+	dmmu_data_access_en | 
+	dmmu_tag_read_en | dmmu_demap_en) ;
+
+/*assign	immu_sync_supported_asi = 
+	((lsu_asi_state[7:0] == 8'h50) |
+	(lsu_asi_state[7:0] == 8'h51) |
+	(lsu_asi_state[7:0] == 8'h52)) & alt_space_g ;
+
+assign	immu_sync_illgl_va_g =
+	immu_sync_supported_asi & ~(immu_tag_target_en | immu_sync_fsr_en | immu_tsb_en | 
+	immu_tag_access_en | immu_8k_ptr_en | immu_64k_ptr_en | immu_ctxt_cfg_en) ;*/
+
+wire    immu_inv_all_asi ;
+assign immu_inv_all_asi = 
+({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h60,8'h00}) ;
+
+wire	immu_async_supported_asi_m ;
+assign	immu_async_supported_asi_m = 
+	((lsu_tlu_tlb_asi_state_m[7:0] == 8'h54) |
+	//immu_inv_all_asi |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h60) | // Bug 4901
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h55) |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h56) |
+	(lsu_tlu_tlb_asi_state_m[7:0] == 8'h57)) & tlb_ldst_inst_m  ;
+
+dff_s stgg_iasi (
+        .din    (immu_async_supported_asi_m),
+        .q      (immu_async_supported_asi),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	immu_async_illgl_va_g =
+	immu_async_supported_asi & 
+	~(immu_data_in_en | 
+	immu_data_access_en | immu_tag_read_en | immu_demap_en | 
+	immu_invalidate_all_en | dmmu_invalidate_all_en) ; // Bug 4901
+
+//=========================================================================================
+//	IN-PIPE ASI RD SUPPORT
+//=========================================================================================
+
+
+assign  thread0_d = ~thrid_d[1] & ~thrid_d[0] ;
+assign  thread1_d = ~thrid_d[1] &  thrid_d[0] ;
+assign  thread2_d =  thrid_d[1] & ~thrid_d[0] ;
+assign  thread3_d =  thrid_d[1] &  thrid_d[0] ;
+
+wire    [7:0]   asi_reg0_d1 ;
+dff_s #(8) stgd1_asi0 (
+        .din    (lsu_asi_reg0[7:0]),
+        .q      (asi_reg0_d1[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire    [7:0]   asi_reg1_d1 ;
+dff_s #(8) stgd1_asi1 (
+        .din    (lsu_asi_reg1[7:0]),
+        .q      (asi_reg1_d1[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire    [7:0]   asi_reg2_d1 ;
+dff_s #(8) stgd1_asi2 (
+        .din    (lsu_asi_reg2[7:0]),
+        .q      (asi_reg2_d1[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire    [7:0]   asi_reg3_d1 ;
+dff_s #(8) stgd1_asi3 (
+        .din    (lsu_asi_reg3[7:0]),
+        .q      (asi_reg3_d1[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire    [7:0]   asi_reg_state ;
+assign  asi_reg_state[7:0] =
+        (thread0_d ? asi_reg0_d1[7:0] : 
+          (thread1_d ? asi_reg1_d1[7:0] : 
+            (thread2_d ? asi_reg2_d1[7:0] : 
+              asi_reg3_d1[7:0]))) ;
+
+wire    imm_asi_vld_d ;
+assign  imm_asi_vld_d = ~ifu_lsu_imm_asi_d[8] ;
+
+// Use of asi delayed by a cycle.
+assign  asi_state_d[7:0] = imm_asi_vld_d ? 
+      ifu_lsu_imm_asi_d[7:0] : asi_reg_state[7:0] ;
+
+dff_s #(8) stgd1_asi (
+        .din    (asi_state_d[7:0]),
+        .q      (asi_state_e[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// bit8 is unused.
+dff_s #(8) stgd1_eva (
+        .din    (exu_mmu_early_va_e[7:0]),
+        .q      (early_va_m[7:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(6) stgd1_mref (
+        .din    ({ifu_lsu_memref_d,thread0_d,thread1_d,thread2_d,thread3_d,ifu_tlu_alt_space_d}),
+        .q      ({memref_e,thread0_e, thread1_e, thread2_e, thread3_e,alt_space_e}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s #(1) stgm_mref (
+        .din    (memref_e),
+        .q      (memref_m),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// qualification with memref_d to cut down on number of speculative reads
+// decode can be shared with corresponding enables
+// gates can be shared.
+
+// Establish that mra *could* be read by sync events. full decode would
+// cause critical path.
+assign	idmra_rd_d =
+	//((asi_state_d[6:4] == 3'h6)  | // specifically tag-access.
+	((asi_state_d[6:4] == 3'h5)  |
+	 (asi_state_d[6:4] == 3'h3)) & ifu_tlu_alt_space_d & ifu_lsu_memref_d ;
+
+// need to decode 58,59,5a,5B,31,32,39,3A,33,3B
+// use lower hex. need to distinguish 1 & 2 between both accesses.
+assign	dmra_rw_d =
+	(asi_state_d[3:0] == 4'b1000)   | // 8	
+	(((asi_state_d[3:0] == 4'b0001)  | // 1	
+	(asi_state_d[3:0] == 4'b0010)) & asi_state_d[5])  | // 2 ;1 & 2 need distinction between I&D	
+	(asi_state_d[3:0] == 4'b1001)   | // 9	
+	(asi_state_d[3:0] == 4'b1010)   | // A	
+	(asi_state_d[2:0] == 3'b011)   ; // partial B	
+	
+
+// Read requires that ctxt of access be chosen.
+// ctxt_cfg,ps0_tsb,ps1_tsb require decode for ctxt.
+// tag_access,ps0-ptr,ps1-ptr,direct-ptr,tag-target require lookup of logged ctxt. 
+// ** Solution here is to exclude zctxt asi rds from equation.
+
+assign idmra_nzctxt_rd_d =
+	(asi_state_d[7:4] == 4'h3) &	// common
+		((asi_state_d[3:0] == 4'h9) |	// dmmu_nzctxt_ps0_tsb
+		(asi_state_d[3:0] == 4'hA) |	// dmmu_nzctxt_ps1_tsb
+		(asi_state_d[3:0] == 4'hB) |	// dmmu_nzctxt_cfg
+		(asi_state_d[3:0] == 4'hD) |	// immu_nzctxt_ps0_tsb
+		(asi_state_d[3:0] == 4'hE) |	// immu_nzctxt_ps1_tsb
+		(asi_state_d[3:0] == 4'hF)) & 	// immu_nzctxt_cfg
+		ifu_tlu_alt_space_d & ifu_lsu_memref_d ;
+
+// Fault based reads
+assign	idmra_fault_rd_d =
+	(asi_state_d[7:4] == 4'h5) &	// common
+		((asi_state_d[3:0] == 4'h8) |	// dmmu_tag_access/target; va ignored
+		(asi_state_d[3:0] == 4'h9) |	// dmmu_ps0_ptr
+		(asi_state_d[3:0] == 4'hA) |	// dmmu_ps1_ptr
+		(asi_state_d[3:0] == 4'hB) |	// direct_ptr
+		(asi_state_d[3:0] == 4'h0) |	// immu_tag_access/target ; va ignored
+		(asi_state_d[3:0] == 4'h1) |	// immu_ps0_ptr
+		(asi_state_d[3:0] == 4'h2)) & 	// immu_ps1_ptr
+		ifu_tlu_alt_space_d & ifu_lsu_memref_d ;
+
+
+// Note - tag_access needs to be included.
+always	@ (/*AUTOSENSE*/alt_space_e or asi_state_e or memref_e)
+	begin	
+		// DMMU
+		dmmu_decode_asi58_e =
+		 ({asi_state_e[7:0]} == {8'h58}) & alt_space_e & memref_e ; 	
+		dmmu_8k_ptr_e =
+		 ({asi_state_e[7:0]} == {8'h59}) & alt_space_e & memref_e ; 	
+		dmmu_64k_ptr_e =
+		 ({asi_state_e[7:0]} == {8'h5A}) & alt_space_e & memref_e ; 	
+		dmmu_direct_ptr_e =
+		 ({asi_state_e[7:0]} == {8'h5B}) & alt_space_e & memref_e ; 	
+		dmmu_zctxt_ps0_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h31}) & alt_space_e & memref_e ; 	
+		dmmu_zctxt_ps1_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h32}) & alt_space_e & memref_e ; 	
+		dmmu_nzctxt_ps0_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h39}) & alt_space_e & memref_e ; 	
+		dmmu_nzctxt_ps1_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h3A}) & alt_space_e & memref_e ; 	
+		dmmu_zctxt_cfg_e = 
+		 ({asi_state_e[7:0]} == {8'h33}) & alt_space_e & memref_e ; 	
+		dmmu_nzctxt_cfg_e = 
+		 ({asi_state_e[7:0]} == {8'h3B}) & alt_space_e & memref_e ; 	
+		// IMMU
+		immu_decode_asi50_e =
+		 ({asi_state_e[7:0]} == {8'h50}) & alt_space_e & memref_e ; 	
+		immu_8k_ptr_e =
+		 ({asi_state_e[7:0]} == {8'h51}) & alt_space_e & memref_e ; 	
+		immu_64k_ptr_e =
+		 ({asi_state_e[7:0]} == {8'h52}) & alt_space_e & memref_e ; 	
+		immu_zctxt_ps0_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h35}) & alt_space_e & memref_e ; 	
+		immu_zctxt_ps1_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h36}) & alt_space_e & memref_e ; 	
+		immu_nzctxt_ps0_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h3D}) & alt_space_e & memref_e ; 	
+		immu_nzctxt_ps1_tsb_e = 
+		 ({asi_state_e[7:0]} == {8'h3E}) & alt_space_e & memref_e ; 	
+		immu_zctxt_cfg_e = 
+		 ({asi_state_e[7:0]} == {8'h37}) & alt_space_e & memref_e ; 	
+		immu_nzctxt_cfg_e = 
+		 ({asi_state_e[7:0]} == {8'h3F}) & alt_space_e & memref_e ; 	
+	end
+
+wire immu_64k_ptr_m,immu_8k_ptr_m,dmmu_direct_ptr_m,dmmu_64k_ptr_m,
+dmmu_8k_ptr_m ;
+dff_s  #(19) fastasi_m (
+        .din    ({dmmu_8k_ptr_e,dmmu_64k_ptr_e,dmmu_direct_ptr_e,
+		dmmu_decode_asi58_e, immu_decode_asi50_e,
+		dmmu_zctxt_ps0_tsb_e, dmmu_zctxt_ps1_tsb_e,
+		dmmu_nzctxt_ps0_tsb_e, dmmu_nzctxt_ps1_tsb_e,
+		dmmu_zctxt_cfg_e, dmmu_nzctxt_cfg_e,
+		immu_zctxt_ps0_tsb_e, immu_zctxt_ps1_tsb_e,
+		immu_nzctxt_ps0_tsb_e, immu_nzctxt_ps1_tsb_e,
+		immu_zctxt_cfg_e, immu_nzctxt_cfg_e,
+		immu_8k_ptr_e,immu_64k_ptr_e}),
+        .q      ({dmmu_8k_ptr_m,dmmu_64k_ptr_m,dmmu_direct_ptr_m,
+		dmmu_decode_asi58_m, immu_decode_asi50_m,
+		dmmu_zctxt_ps0_tsb_m, dmmu_zctxt_ps1_tsb_m,
+		dmmu_nzctxt_ps0_tsb_m, dmmu_nzctxt_ps1_tsb_m,
+		dmmu_zctxt_cfg_m, dmmu_nzctxt_cfg_m,
+		immu_zctxt_ps0_tsb_m, immu_zctxt_ps1_tsb_m,
+		immu_nzctxt_ps0_tsb_m, immu_nzctxt_ps1_tsb_m,
+		immu_zctxt_cfg_m, immu_nzctxt_cfg_m,
+		immu_8k_ptr_m,immu_64k_ptr_m}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+assign	dmmu_tag_target_en_m = dmmu_decode_asi58_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_tag_access_en_m = dmmu_decode_asi58_m & (early_va_m[7:0] == 8'h30) ;
+assign	dmmu_sync_fsr_en_m = dmmu_decode_asi58_m & (early_va_m[7:0] == 8'h18) ;
+assign	dmmu_sync_far_en_m = dmmu_decode_asi58_m & (early_va_m[7:0] == 8'h20) ;
+assign	dmmu_zctxt_ps0_tsb_en_m = dmmu_zctxt_ps0_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_zctxt_ps1_tsb_en_m = dmmu_zctxt_ps1_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_nzctxt_ps0_tsb_en_m = dmmu_nzctxt_ps0_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_nzctxt_ps1_tsb_en_m = dmmu_nzctxt_ps1_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_zctxt_cfg_en_m = dmmu_zctxt_cfg_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_nzctxt_cfg_en_m = dmmu_nzctxt_cfg_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_8k_ptr_en_m = dmmu_8k_ptr_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_64k_ptr_en_m = dmmu_64k_ptr_m & (early_va_m[7:0] == 8'h00) ;
+assign	dmmu_direct_ptr_en_m = dmmu_direct_ptr_m & (early_va_m[7:0] == 8'h00) ;
+
+// Calculation of dmmu illgl-va
+
+wire	dmmu_sync_supported_asi_e ;
+wire	dmmu_sync_supported_asi_m ;
+assign	dmmu_sync_supported_asi_e =
+	(dmmu_decode_asi58_e | dmmu_zctxt_ps0_tsb_e | dmmu_zctxt_ps1_tsb_e |
+	dmmu_nzctxt_ps0_tsb_e | dmmu_nzctxt_ps1_tsb_e | dmmu_zctxt_cfg_e |
+	dmmu_nzctxt_cfg_e | dmmu_8k_ptr_e | dmmu_64k_ptr_e | dmmu_direct_ptr_e);
+
+dff_s stgm_dsynca (
+        .din    (dmmu_sync_supported_asi_e),
+        .q      (dmmu_sync_supported_asi_m),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	dmmu_sync_illgl_va_m ;
+assign	dmmu_sync_illgl_va_m = dmmu_sync_supported_asi_m & ~(dmmu_tag_target_en_m |
+	dmmu_tag_access_en_m | dmmu_sync_fsr_en_m | dmmu_sync_far_en_m | dmmu_tsb_en_m |
+	dmmu_ctxt_cfg_en_m | dmmu_8k_ptr_en_m | dmmu_64k_ptr_en_m | dmmu_direct_ptr_en_m);
+
+assign	dmmu_tsb_en_m = 
+	dmmu_zctxt_ps0_tsb_en_m  | dmmu_zctxt_ps1_tsb_en_m |
+	dmmu_nzctxt_ps0_tsb_en_m | dmmu_nzctxt_ps1_tsb_en_m ;
+assign	dmmu_ctxt_cfg_en_m = dmmu_zctxt_cfg_en_m | dmmu_nzctxt_cfg_en_m ;
+
+assign	immu_tag_target_en_m = immu_decode_asi50_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_tag_access_en_m = immu_decode_asi50_m & (early_va_m[7:0] == 8'h30) ;
+assign	immu_sync_fsr_en_m = immu_decode_asi50_m & (early_va_m[7:0] == 8'h18) ;
+assign	immu_zctxt_ps0_tsb_en_m = immu_zctxt_ps0_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_zctxt_ps1_tsb_en_m = immu_zctxt_ps1_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_nzctxt_ps0_tsb_en_m = immu_nzctxt_ps0_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_nzctxt_ps1_tsb_en_m = immu_nzctxt_ps1_tsb_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_zctxt_cfg_en_m = immu_zctxt_cfg_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_nzctxt_cfg_en_m = immu_nzctxt_cfg_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_8k_ptr_en_m = immu_8k_ptr_m & (early_va_m[7:0] == 8'h00) ;
+assign	immu_64k_ptr_en_m = immu_64k_ptr_m & (early_va_m[7:0] == 8'h00) ;
+
+assign	immu_tsb_en_m = 
+	immu_zctxt_ps0_tsb_en_m  | immu_zctxt_ps1_tsb_en_m |
+	immu_nzctxt_ps0_tsb_en_m | immu_nzctxt_ps1_tsb_en_m ;
+assign	immu_ctxt_cfg_en_m = immu_zctxt_cfg_en_m | immu_nzctxt_cfg_en_m ;
+
+
+// Calculation of immu illgl-va
+
+wire	immu_sync_supported_asi_e ;
+wire	immu_sync_supported_asi_m ;
+assign	immu_sync_supported_asi_e =
+	(immu_decode_asi50_e | immu_zctxt_ps0_tsb_e | immu_zctxt_ps1_tsb_e |
+	immu_nzctxt_ps0_tsb_e | immu_nzctxt_ps1_tsb_e | immu_zctxt_cfg_e |
+	immu_nzctxt_cfg_e | immu_8k_ptr_e | immu_64k_ptr_e);
+
+dff_s stgm_isynca (
+        .din    (immu_sync_supported_asi_e),
+        .q      (immu_sync_supported_asi_m),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	immu_sync_illgl_va_m ;
+assign	immu_sync_illgl_va_m = immu_sync_supported_asi_m & ~(immu_tag_target_en_m |
+	immu_tag_access_en_m | immu_sync_fsr_en_m | immu_tsb_en_m | immu_ctxt_cfg_en_m |
+	immu_8k_ptr_en_m | immu_64k_ptr_en_m);
+
+dff_s #(2) stgg_illgl (
+        .din    ({immu_sync_illgl_va_m,dmmu_sync_illgl_va_m}),
+        .q      ({immu_sync_illgl_va_g,dmmu_sync_illgl_va_g}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Staged to g for writes
+dff_s  #(17) fastasi_g (
+        .din    ({dmmu_tag_access_en_m,
+		dmmu_sync_fsr_en_m, dmmu_sync_far_en_m,
+		dmmu_zctxt_ps0_tsb_en_m, dmmu_zctxt_ps1_tsb_en_m,
+		dmmu_nzctxt_ps0_tsb_en_m, dmmu_nzctxt_ps1_tsb_en_m,
+		dmmu_zctxt_cfg_en_m, dmmu_nzctxt_cfg_en_m,
+        	immu_tag_access_en_m,
+		immu_sync_fsr_en_m,
+		immu_zctxt_ps0_tsb_en_m, immu_zctxt_ps1_tsb_en_m,
+		immu_nzctxt_ps0_tsb_en_m, immu_nzctxt_ps1_tsb_en_m,
+		immu_zctxt_cfg_en_m, immu_nzctxt_cfg_en_m}),
+        .q      ({dmmu_tag_access_en,
+		dmmu_sync_fsr_en, dmmu_sync_far_en,
+		dmmu_zctxt_ps0_tsb_en, dmmu_zctxt_ps1_tsb_en,
+		dmmu_nzctxt_ps0_tsb_en, dmmu_nzctxt_ps1_tsb_en,
+		dmmu_zctxt_cfg_en, dmmu_nzctxt_cfg_en,
+        	immu_tag_access_en,
+		immu_sync_fsr_en,
+		immu_zctxt_ps0_tsb_en, immu_zctxt_ps1_tsb_en,
+		immu_nzctxt_ps0_tsb_en, immu_nzctxt_ps1_tsb_en,
+		immu_zctxt_cfg_en, immu_nzctxt_cfg_en}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+//=========================================================================================
+//	MMU ASI Decode - D-Side
+//=========================================================================================
+
+
+// Assumption is that only 9 bits of VA are required.
+// Comparison for asi-state and va is to be done uniformly in w2.
+
+// This will have to change because of tsb mapping to mra.
+	assign	dmmu_tsb_en = 
+			dmmu_zctxt_ps0_tsb_en  | dmmu_zctxt_ps1_tsb_en |
+			dmmu_nzctxt_ps0_tsb_en | dmmu_nzctxt_ps1_tsb_en ;
+
+assign	tlb_ldst_inst_m = lsu_tlu_tlb_ld_inst_m | lsu_tlu_tlb_st_inst_m ;
+
+// M-stage decoding for long-latency tlb accesses
+always	@ (/*AUTOSENSE*/dmmu_inv_all_asi or lsu_tlu_tlb_asi_state_m
+           or lsu_tlu_tlb_ldst_va_m[7:0] or tlb_ldst_inst_m)
+	begin
+		dmmu_data_in_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h5C,8'h00}) & tlb_ldst_inst_m ;
+		dmmu_invalidate_all_en_m =
+		 dmmu_inv_all_asi & tlb_ldst_inst_m ;
+		 //({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h60,8'h08}) & tlb_ldst_inst_m ;
+		// Address specifies tlb entry.
+		dmmu_data_access_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h5D}) & 	tlb_ldst_inst_m ;
+		// Address specifies tlb entry.
+		dmmu_tag_read_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h5E}) & 	tlb_ldst_inst_m ;
+		dmmu_demap_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h5F}) &  tlb_ldst_inst_m ;
+	end
+
+// Stage to g.
+// Make dff->dffre. This required to avoid conflict between fast-asi and lng-latency
+// rds of mra. Specifically, data-in/data_access need to be staged, along with
+// support information.
+
+wire lng_ltncy_en_d1 ;
+assign	tlu_lng_ltncy_en_l = ~lng_ltncy_en_d1 | sehold ;
+wire	lng_ltncy_en ;
+dff_s stgd1_lltncyen (
+        .din    (lng_ltncy_en),
+        .q      (lng_ltncy_en_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	lng_ltncy_en = (lsu_tlu_tlb_st_inst_m | lsu_tlu_tlb_ld_inst_m) ;
+wire	lng_ltncy_rst ;
+assign	lng_ltncy_rst = 
+	tlb_ld_inst_unflushed |		// all reads processed immediately
+	(tlb_st_inst_unflushed & 	// all writes not requiring mra processed immediately
+		~(dmmu_data_in_en | dmmu_data_access_en | immu_data_in_en | immu_data_access_en)) |
+	dmra_lng_lat_rd | imra_lng_lat_rd | // lng-ltncy rds - delay until bubble available.
+	((tlb_ld_inst_unflushed | tlb_st_inst_unflushed) &  // rst w/o use if illgl-va
+			(dmmu_async_illgl_va_g | immu_async_illgl_va_g)) |
+	~rst_l ;
+
+dffe_s  #(10) dtlbacc_stgg (
+        .din    ({lsu_tlu_tlb_ldst_va_m[10:3], lsu_tlu_tlb_access_tid_m[1:0]}),
+        .q      ({tlb_ldst_va_g[10:3],tlb_access_tid_g[1:0]}),
+        .clk    (clk),
+	.en	(lng_ltncy_en),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dffre_s  #(7) dtlbaccr_stgg (
+        .din    ({dmmu_data_in_en_m,dmmu_data_access_en_m,dmmu_tag_read_en_m,
+ 		dmmu_demap_en_m,dmmu_invalidate_all_en_m,
+		lsu_tlu_tlb_ld_inst_m,lsu_tlu_tlb_st_inst_m}),
+        .q      ({dmmu_data_in_en,dmmu_data_access_en,dmmu_tag_read_en,
+		dmmu_demap_en,dmmu_invalidate_all_en,
+		tlb_ld_inst_unflushed,tlb_st_inst_unflushed}),
+        .clk    (clk),
+	.rst	(lng_ltncy_rst),	.en	(lng_ltncy_en),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+
+assign	tlb_st_inst_g = tlb_st_inst_unflushed & ~(dmmu_async_illgl_va_g | immu_async_illgl_va_g) ;
+assign	tlb_ld_inst_g = tlb_ld_inst_unflushed & ~(dmmu_async_illgl_va_g | immu_async_illgl_va_g) ;
+
+assign	dsfsr_asi_wr_en[0] = dmmu_sync_fsr_en & st_inst_g & thread0_sel_g ;
+assign	dsfsr_asi_wr_en[1] = dmmu_sync_fsr_en & st_inst_g & thread1_sel_g ;
+assign	dsfsr_asi_wr_en[2] = dmmu_sync_fsr_en & st_inst_g & thread2_sel_g ;
+assign	dsfsr_asi_wr_en[3] = dmmu_sync_fsr_en & st_inst_g & thread3_sel_g ;
+
+assign	dmmu_any_sfsr_wr = dmmu_sync_fsr_en & st_inst_g ; //|(dsfsr_asi_wr_en[3:0]);
+
+assign	dmmu_sfsr_wr_en_l[3:0] = 
+~(dsfsr_asi_wr_en[3:0] | (dmmu_sfsr_trp_wr[3:0] & {4{~priority_squash_g}})) ; // Bug 4183
+
+assign	dmmu_sfar_wr_en_l[0] = 
+~((dmmu_sync_far_en & st_inst_g & thread0_sel_g) | 
+(dmmu_sfsr_trp_wr[0] & ~priority_squash_g)) ; // Bug 4183
+assign	dmmu_sfar_wr_en_l[1] = 
+~((dmmu_sync_far_en & st_inst_g & thread1_sel_g) | 
+(dmmu_sfsr_trp_wr[1] & ~priority_squash_g)) ; 
+assign	dmmu_sfar_wr_en_l[2] = 
+~((dmmu_sync_far_en & st_inst_g & thread2_sel_g) | 
+(dmmu_sfsr_trp_wr[2] & ~priority_squash_g)) ; 
+assign	dmmu_sfar_wr_en_l[3] = 
+~((dmmu_sync_far_en & st_inst_g & thread3_sel_g) | 
+(dmmu_sfsr_trp_wr[3] & ~priority_squash_g)) ; 
+
+
+assign	dmmu_data_in_wr_en = dmmu_data_in_en & tlb_st_inst_g ;	// Write-Only.
+assign	dmmu_data_access_wr_en = dmmu_data_access_en & tlb_st_inst_g ;
+// non-threaded as shared resource
+assign	dmmu_data_access_rd_en = dmmu_data_access_en & tlb_ld_inst_g ;
+
+// take exception for write case.
+assign	dmmu_tag_read_rd_en = dmmu_tag_read_en & tlb_ld_inst_g ;
+
+
+assign	dtlb_rw_index_vld_g = dmmu_data_access_rd_en | dmmu_data_access_wr_en | dmmu_tag_read_rd_en ;
+// terminate write if tlb full and signal exception.
+assign	dtlb_wr_vld_g = (dmmu_data_in_wr_en | dmmu_data_access_wr_en) & ~ifu_lsu_memref_d ;
+
+wire		dtlb_rw_index_vld_pend ;
+wire [5:0]	dtlb_rw_index_pend ;
+
+dffre_s  #(1) stgw2_dtlbctl (
+        .din    (dtlb_rw_index_vld_g),
+        .q    	(dtlb_rw_index_vld_pend),
+	.rst	(tlb_access_rst),	.en	(tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dffre_s  #(6) stgw2_dtlbidx (
+        .din    (tlb_ldst_va_g[8:3]),
+        .q    	(dtlb_rw_index_pend[5:0]),
+	.rst	(tlb_access_rst),	.en	(tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+wire	tlb_rd_mode, tlb_rd_mode_d1 ;
+assign	tlb_rd_mode = 
+		tlu_itlb_tag_rd_g | tlu_itlb_data_rd_g |	// i-side read
+		tlu_dtlb_tag_rd_g | tlu_dtlb_data_rd_g ;	// d-side read
+
+dff_s stgd1_rmode (
+        .din    (tlb_rd_mode),
+        .q      (tlb_rd_mode_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	dtlb_done_d1 ;	
+dff_s stgd1_ddone (
+        .din    (lsu_tlu_dtlb_done),
+        .q      (dtlb_done_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	itlb_done_d1 ;	
+dff_s stgd1_idone (
+        .din    (ifu_tlu_itlb_done),
+        .q      (itlb_done_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Advanced by a cycle.
+assign	tlu_dtlb_rw_index_vld_g  = dtlb_rw_index_vld_g | dtlb_rw_index_vld_pend ;
+//assign	tlu_dtlb_rw_index_vld_g  = dtlb_rw_index_vld_g | (dtlb_rw_index_vld_pend & ~dtlb_done_d1) ; //Bug3974
+//assign	tlu_dtlb_rw_index_vld_g  = dtlb_rw_index_vld_g | (dtlb_rw_index_vld_pend & ~lsu_tlu_dtlb_done) ;
+assign	tlu_dtlb_rw_index_g[5:0] = (tlb_ldst_va_g[8:3] & {6{~(tlb_admp_mode | tlb_write_mode | tlb_rd_mode_d1)}})  | 
+					dtlb_rw_index_pend[5:0]  ;
+
+// Exception on reserved field.
+assign	demap_pctxt = ~tlb_ldst_va_g[5] & ~tlb_ldst_va_g[4] ;
+assign	demap_sctxt = ~tlb_ldst_va_g[5] &  tlb_ldst_va_g[4] ;
+assign	demap_nctxt =  tlb_ldst_va_g[5] & ~tlb_ldst_va_g[4] ;
+// reserved ctxt causes demap to be ignored.
+// reserved dmp type causes demap to be ignored.
+assign	demap_resrv =  	(tlb_ldst_va_g[5] &  tlb_ldst_va_g[4]) 		// ctxt
+			| (tlb_ldst_va_g[7] &  tlb_ldst_va_g[6]) ;	// type
+
+assign	ddemap_by_page  = dmmu_demap_en & ~tlb_ldst_va_g[7] & ~tlb_ldst_va_g[6] ;
+assign	ddemap_by_ctxt  = dmmu_demap_en & ~tlb_ldst_va_g[7] &  tlb_ldst_va_g[6] ;
+assign	ddemap_all      = dmmu_demap_en &  tlb_ldst_va_g[7] & ~tlb_ldst_va_g[6] ;
+
+// assumption is that demap_all is unaffected by presence of reserved ctxt as it
+// does not use ctxt.
+assign	ddemap_vld  	= ((ddemap_by_page | ddemap_by_ctxt) & ~demap_resrv) | 
+				ddemap_all ;
+
+//wire		dtlb_dmp_by_ctxt_pend ;
+wire		dtlb_dmp_all_pend ;
+wire		dtlb_dmp_pctxt_pend ;
+wire		dtlb_dmp_sctxt_pend ;
+wire		dtlb_dmp_nctxt_pend ;
+wire	[1:0]	idtlb_dmp_thrid_pend ;
+wire	[1:0]	ldst_asi_tid ;
+wire		dmmu_inv_all_g, dmmu_inv_all_pend ;
+
+assign	dmmu_inv_all_g = dmmu_invalidate_all_en & tlb_st_inst_g ;
+
+// Demap/Invalidate
+dffre_s  #(5) stgw2_dtlbdmp (
+        .din    ({ddemap_all,demap_pctxt,demap_sctxt,demap_nctxt,dmmu_inv_all_g}),
+        .q    	({dtlb_dmp_all_pend,dtlb_dmp_pctxt_pend,dtlb_dmp_sctxt_pend, 
+		dtlb_dmp_nctxt_pend,dmmu_inv_all_pend }),
+	.rst	(tlb_access_rst),	.en	(tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+// Bug 3905 - rm from above flop.
+assign	idtlb_dmp_thrid_pend[1:0] = tlb_access_tid_g[1:0] ;
+
+assign	ldst_asi_tid[1:0] = 
+	(lsu_tlu_dtlb_done | dmmu_async_illgl_va_g | immu_async_illgl_va_g)  ?  
+	idtlb_dmp_thrid_pend[1:0] : thrid_g[1:0] ;
+
+// Thread for tlb
+dff_s  #(4) stg_w2 (
+        .din    ({ldst_asi_tid[1:0],idtlb_dmp_thrid_pend[1:0]}),
+        .q      ({tlu_lsu_ldxa_tid_w2[1:0],tlu_lsu_stxa_ack_tid[1:0]}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+assign	tlu_dtlb_invalidate_all_g = dmmu_inv_all_g | (dmmu_inv_all_pend & ~dtlb_done_d1) ;
+//assign	tlu_dtlb_invalidate_all_g = dmmu_inv_all_g | (dmmu_inv_all_pend & ~lsu_tlu_dtlb_done) ;
+
+// Timing Change : Delay by a cycle to match vlds.
+wire  pre_dtlb_dmp_all, pre_dtlb_dmp_pctxt ;
+wire pre_dtlb_dmp_sctxt, pre_dtlb_dmp_nctxt, pre_dtlb_dmp_actxt ;
+//assign	pre_dtlb_dmp_by_ctxt = (ddemap_by_ctxt | dtlb_dmp_by_ctxt_pend) & ~tlu_admp_key_sel  ;
+assign	pre_dtlb_dmp_all = (ddemap_all | dtlb_dmp_all_pend) & ~tlu_admp_key_sel ;
+assign	pre_dtlb_dmp_pctxt = (dtlb_dmp_pctxt_pend) & ~tlu_admp_key_sel ;
+assign	pre_dtlb_dmp_sctxt = (dtlb_dmp_sctxt_pend) & ~tlu_admp_key_sel ;
+assign	pre_dtlb_dmp_nctxt = (dtlb_dmp_nctxt_pend) & ~tlu_admp_key_sel ;
+assign	pre_dtlb_dmp_actxt = tlu_admp_key_sel ;
+
+dff_s  #(5) dmp_stgd1 (
+        .din    ({pre_dtlb_dmp_all, pre_dtlb_dmp_pctxt,
+		pre_dtlb_dmp_sctxt, pre_dtlb_dmp_nctxt, pre_dtlb_dmp_actxt}),
+        .q      ({tlu_dtlb_dmp_all_g,tlu_dtlb_dmp_pctxt_g,
+		tlu_dtlb_dmp_sctxt_g,tlu_dtlb_dmp_nctxt_g,tlu_dtlb_dmp_actxt_g}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+assign	tlu_idtlb_dmp_thrid_g = tlb_access_tid_g[1:0] | idtlb_dmp_thrid_pend[1:0] ;
+
+
+//=========================================================================================
+//	MMU ASI Decode - I-Side
+//=========================================================================================
+
+// Assumption is that only 9 bits of VA are required.
+// Comparison for asi-state and va is to be done uniformly in w2.
+
+	assign	immu_tsb_en = 
+			immu_zctxt_ps0_tsb_en  | immu_zctxt_ps1_tsb_en |
+			immu_nzctxt_ps0_tsb_en | immu_nzctxt_ps1_tsb_en ;
+
+reg	immu_data_in_en_m,immu_data_access_en_m,immu_tag_read_en_m,immu_demap_en_m;
+
+// M-stage decoding for long-latency tlb accesses
+always	@ (/*AUTOSENSE*/immu_inv_all_asi or lsu_tlu_tlb_asi_state_m
+           or lsu_tlu_tlb_ldst_va_m[7:0] or tlb_ldst_inst_m)
+	begin
+		immu_data_in_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h54,8'h00}) & tlb_ldst_inst_m ;  	
+		// Address specifies tlb entry.
+		immu_invalidate_all_en_m =
+		 immu_inv_all_asi & tlb_ldst_inst_m ;
+		 //({lsu_tlu_tlb_asi_state_m[7:0],lsu_tlu_tlb_ldst_va_m[7:0]} == {8'h60,8'h00}) & tlb_ldst_inst_m ;
+		immu_data_access_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h55}) & tlb_ldst_inst_m ; 	
+		// Address specifies tlb entry.
+		immu_tag_read_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h56}) & tlb_ldst_inst_m ; 	
+		immu_demap_en_m =
+		 ({lsu_tlu_tlb_asi_state_m[7:0]} == {8'h57}) & tlb_ldst_inst_m ; 	
+	end
+
+// Stage to g.
+// Convert to dffre to resolve conflict between fast-asi and lng-ltncy reads.
+dffre_s #(5) itlbacc_stgg (
+        .din    ({immu_data_in_en_m,immu_data_access_en_m,immu_tag_read_en_m,immu_demap_en_m,immu_invalidate_all_en_m}),
+        .q      ({immu_data_in_en,immu_data_access_en,immu_tag_read_en,immu_demap_en,immu_invalidate_all_en}),
+        .clk    (clk),
+	.rst	(lng_ltncy_rst), 	.en	(lng_ltncy_en),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+
+assign	isfsr_asi_wr_en[0] = immu_sync_fsr_en & st_inst_g & thread0_sel_g ;
+assign	isfsr_asi_wr_en[1] = immu_sync_fsr_en & st_inst_g & thread1_sel_g ;
+assign	isfsr_asi_wr_en[2] = immu_sync_fsr_en & st_inst_g & thread2_sel_g ;
+assign	isfsr_asi_wr_en[3] = immu_sync_fsr_en & st_inst_g & thread3_sel_g ;
+
+assign	immu_any_sfsr_wr = immu_sync_fsr_en & st_inst_g ; //|(isfsr_asi_wr_en[3:0]);
+
+assign	immu_sfsr_wr_en_l[3:0] = ~(isfsr_trp_wr[3:0] | isfsr_asi_wr_en[3:0]) ;
+
+assign	immu_tsb_rd_en[0] = immu_tsb_en & ld_inst_g & thread0_sel_g ;
+assign	immu_tsb_rd_en[1] = immu_tsb_en & ld_inst_g & thread1_sel_g ;
+assign	immu_tsb_rd_en[2] = immu_tsb_en & ld_inst_g & thread2_sel_g ;
+assign	immu_tsb_rd_en[3] = immu_tsb_en & ld_inst_g & thread3_sel_g ;
+
+assign	immu_data_in_wr_en = immu_data_in_en & tlb_st_inst_g ;	// Write-Only.
+assign	immu_data_access_wr_en = immu_data_access_en & tlb_st_inst_g ;
+assign	immu_data_access_rd_en = immu_data_access_en & tlb_ld_inst_g ;
+
+assign	immu_tag_read_rd_en = immu_tag_read_en & tlb_ld_inst_g ;
+
+assign	itlb_rw_index_vld_g = immu_data_access_rd_en | immu_data_access_wr_en | immu_tag_read_rd_en ;
+// terminate write if tlb full and signal exception.
+assign	itlb_wr_vld_g = (immu_data_in_wr_en | immu_data_access_wr_en) & ~ifu_lsu_memref_d ;
+
+wire	itlb_rw_index_vld_pend ;
+
+dffre_s #(1)  stgw2_itlbctl (
+        .din    (itlb_rw_index_vld_g),
+        .q    	(itlb_rw_index_vld_pend),
+	.rst	(tlb_access_rst),	.en	(tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	tlu_itlb_rw_index_vld_g  = itlb_rw_index_vld_g | (itlb_rw_index_vld_pend & ~itlb_done_d1) ;
+assign	tlu_itlb_rw_index_g[5:0] = tlu_dtlb_rw_index_g[5:0] ;
+
+assign	idemap_by_page  = immu_demap_en & ~tlb_ldst_va_g[7] & ~tlb_ldst_va_g[6] ;
+assign	idemap_by_ctxt  = immu_demap_en & ~tlb_ldst_va_g[7] &  tlb_ldst_va_g[6] ;
+assign	idemap_all      = immu_demap_en &  tlb_ldst_va_g[7] & ~tlb_ldst_va_g[6] ;
+
+// assumption is that demap_all is unaffected by presence of reserved ctxt as it
+// does not use ctxt.
+assign	idemap_vld  	= ((idemap_by_page | idemap_by_ctxt) & ~(demap_resrv | demap_sctxt)) | 
+				idemap_all ;
+
+wire	itlb_dmp_by_ctxt_pend ;
+wire	itlb_dmp_all_pend ;
+wire	immu_inv_all_g, immu_inv_all_pend ;
+
+assign	immu_inv_all_g = immu_invalidate_all_en & tlb_st_inst_g ;
+
+// Demap
+dffre_s  #(3) stgw2_itlbdmp (
+        .din    ({idemap_by_ctxt,idemap_all,immu_inv_all_g}),
+        .q    	({itlb_dmp_by_ctxt_pend, itlb_dmp_all_pend,immu_inv_all_pend}),
+	.rst	(tlb_access_rst),	.en	(tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	tlu_itlb_dmp_all_g = (idemap_all | itlb_dmp_all_pend) & ~tlu_admp_key_sel ;
+
+assign	tlu_itlb_invalidate_all_g = immu_inv_all_g | (immu_inv_all_pend & ~itlb_done_d1) ;
+assign	tlu_itlb_dmp_pctxt_g = tlu_dtlb_dmp_pctxt_g ;  
+
+// Timing Change - delay by 1-cycle to match vld.
+wire	pre_itlb_dmp_actxt ;
+assign	pre_itlb_dmp_actxt = tlu_admp_key_sel ;
+dff_s  #(1) preidmp_d1 (
+        .din    (pre_itlb_dmp_actxt),
+        .q    	(tlu_itlb_dmp_actxt_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	tlu_itlb_dmp_nctxt_g = tlu_dtlb_dmp_nctxt_g ;  
+
+
+// Adapt key vlds to autodemap.
+// Note that sense of global bit has changed. Otherwise vlds remain same.
+assign	tlu_dmp_key_vld_g[4:0] = 
+	(ddemap_by_ctxt | idemap_by_ctxt) ? 5'b00000 : 			// demap-ctxt - include only ctxt 
+			(ddemap_all | idemap_all) ? 5'b00001 : 		// demap-all - do not include va or ctxt
+			// Bug 3129		5'b11110 ;	        // else include both va and ctxt
+				tlb_ldst_va_g[9] ? 5'b11111 :           // include va and NO ctxt;dmp-pg-real
+                                                       5'b11110 ;       // include both va and ctxt; dmp-pg
+
+// real tte for demap and write. both are indicated in bit 9 of va.
+// demap_by_ctxt will not effect real translations.
+assign	tlu_tte_real_g = tlb_ldst_va_g[9] & ~(ddemap_by_ctxt | idemap_by_ctxt) ;
+
+//=========================================================================================
+//	EXCEPTIONS
+//=========================================================================================
+
+// Now generated in LSU.
+
+// These are all related to asi use.
+/*assign	tlu_mmu_sync_data_excp_g = 
+	(immu_sync_rd_only_asi_g | dmmu_sync_rd_only_asi_g) & st_inst_unflushed & inst_vld_g  ;*/
+
+//=========================================================================================
+//	TAG/DATA RD/WR/DMP HANDSHAKE
+//=========================================================================================
+
+// RD/WR HANDSHAKE
+// Need to add autodemap capability.
+
+// Assume mutually exclusive by construction.
+assign	tlb_access_en = itlb_wr_vld_g | immu_data_access_rd_en | immu_tag_read_rd_en |
+			dtlb_wr_vld_g | dmmu_data_access_rd_en | dmmu_tag_read_rd_en |
+			idemap_vld    | ddemap_vld | immu_inv_all_g | dmmu_inv_all_g ;
+assign	tlb_access_en_l = ~tlb_access_en ;
+assign	tlb_access_rst = ~rst_l | ((lsu_tlu_dtlb_done | ifu_tlu_itlb_done) & ~(tlb_admp_mode | tlb_admp_mode_d1)) ; 
+assign 	tlb_access_rst_l = ~tlb_access_rst ;	
+
+wire	tlb_access_en_l_d1 ;
+dff_s  #(1) stgd1_tlbacc (
+        .din    (tlb_access_en_l),
+        .q      (tlb_access_en_l_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+assign	tlu_tlb_access_en_l_d1 = tlb_access_en_l_d1 | sehold ;
+
+assign	itlb_tag_rd_en = immu_tag_read_rd_en | immu_data_access_rd_en ;	
+assign	dtlb_tag_rd_en = dmmu_tag_read_rd_en | dmmu_data_access_rd_en ;	
+
+dffre_s #(8)  tlb_access (
+        .din    ({itlb_wr_vld_g,immu_data_access_rd_en,itlb_tag_rd_en,
+        	dtlb_wr_vld_g,dmmu_data_access_rd_en,dtlb_tag_rd_en,
+		idemap_vld, ddemap_vld}),
+        .q    	({itlb_wr_pend,itlb_data_rd_pend,itlb_tag_rd_pend,
+        	dtlb_wr_pend,dtlb_data_rd_pend,dtlb_tag_rd_pend,
+		idemap_pend, ddemap_pend}),
+        .rst    (tlb_access_rst),	.en     (tlb_access_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+assign  tlu_dtlb_rd_done  = lsu_tlu_dtlb_done & (dtlb_data_rd_pend | dtlb_tag_rd_pend) ;
+//assign  itlb_rd_done  = ifu_tlu_itlb_done & (itlb_data_rd_pend | itlb_tag_rd_pend) ;
+
+
+// w2 should be renamed to g at some time !!!
+// Write may take one extra cycle to get initiated !!!
+assign	itlb_wr_vld_unmsked = (itlb_wr_vld_g | (itlb_wr_pend & ~itlb_done_d1)) ;
+wire    pre_itlb_wr_vld_g ;
+assign pre_itlb_wr_vld_g = (itlb_wr_pend & ~itlb_done_d1) & tlb_write_mode ;
+//assign pre_itlb_wr_vld_g = itlb_wr_vld_unmsked & tlb_write_mode ;
+// name kept as _g for now to avoid interface change.
+
+assign	tlu_itlb_wr_vld_g = pre_itlb_wr_vld_g ;
+/*dff  #(1) iwvld_d1 (
+        .din    (pre_itlb_wr_vld_g),
+        .q    	(tlu_itlb_wr_vld_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+assign	tlu_itlb_data_rd_g = immu_data_access_rd_en | (itlb_data_rd_pend & ~itlb_done_d1) ;
+assign	tlu_itlb_tag_rd_g = (immu_tag_read_rd_en | immu_data_access_rd_en) | (itlb_tag_rd_pend & ~itlb_done_d1) ;
+
+assign	dtlb_wr_vld_unmsked = (dtlb_wr_vld_g | (dtlb_wr_pend & ~dtlb_done_d1)) ;
+wire    pre_dtlb_wr_vld_g ;
+assign pre_dtlb_wr_vld_g = (dtlb_wr_pend & ~dtlb_done_d1) & tlb_write_mode ;
+// name kept as _g for now to avoid interface change.
+
+//assign	tlu_dtlb_wr_vld_g = pre_dtlb_wr_vld_g ;
+assign	tlu_dtlb_data_rd_g = dmmu_data_access_rd_en | (dtlb_data_rd_pend & ~dtlb_done_d1) ;
+assign	tlu_dtlb_tag_rd_g = (dmmu_tag_read_rd_en | dmmu_data_access_rd_en) | (dtlb_tag_rd_pend & ~dtlb_done_d1) ;
+
+// Delay by a cycle - rd for long-latency matches fast-asi.
+// Both occur on a posedge.
+
+wire	dtlb_dmp_vld_g,itlb_dmp_vld_g;
+assign	dtlb_dmp_vld_g = 
+		// qual with dtlb-done may not be needed. Taken into account in ddemap_pend.
+		(ddemap_pend & ~dtlb_done_d1) | 
+		(dtlb_wr_vld_unmsked & tlb_admp_mode) ;
+assign	itlb_dmp_vld_g = 
+		(idemap_pend & ~itlb_done_d1) |
+		(itlb_wr_vld_unmsked & tlb_admp_mode) ; 
+// dmp_vld should be w2. kept as _g for now to avoid
+// interface change.
+wire	dtlb_dmp_vld_d1,itlb_dmp_vld_d1 ;
+dff_s  #(2) dmpvld_d1 (
+        .din    ({dtlb_dmp_vld_g,itlb_dmp_vld_g}),
+        .q    	({dtlb_dmp_vld_d1,itlb_dmp_vld_d1}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+assign	tlu_dtlb_dmp_vld_g = dtlb_dmp_vld_d1 & ~dtlb_done_d1 ;
+assign	tlu_itlb_dmp_vld_g = itlb_dmp_vld_d1 & ~itlb_done_d1 ;
+
+wire	stxa_ack ;
+
+// Assume mutually exclusive.
+// Third term is meant to complete demap with reserved ctxt.
+assign	stxa_ack = 
+	(((itlb_wr_pend | dtlb_wr_pend) & ~(tlb_admp_mode | tlb_admp_mode_d1)) 	| 
+	idemap_pend | ddemap_pend | immu_inv_all_pend | dmmu_inv_all_pend) & (lsu_tlu_dtlb_done | ifu_tlu_itlb_done) 	|
+	(demap_resrv & tlb_st_inst_g & 
+		((immu_demap_en & ~idemap_all)  | (dmmu_demap_en & ~ddemap_all))) | //5053
+	(demap_sctxt & tlb_st_inst_g & (immu_demap_en & ~idemap_all)) | // Bug5053				  
+						// iside should not use sctxt
+	// lng-latency store needs to signal cmplt to lsu even with illegal va
+	(tlb_st_inst_unflushed & (dmmu_async_illgl_va_g | immu_async_illgl_va_g)) ;
+
+dff_s  #(1) stack_d1 (
+        .din    (stxa_ack),
+        .q    	(tlu_lsu_stxa_ack),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//	AUTODEMAP
+//=========================================================================================
+
+
+assign	tlb_wr_vld_g = itlb_wr_vld_unmsked | dtlb_wr_vld_unmsked ;
+
+assign	tlb_admp_en   = tlb_wr_vld_g & ~tlb_admp_mode & ~tlb_write_mode ;
+assign	tlb_admp_rst  = ~rst_l | 
+	(((itlb_wr_pend | dtlb_wr_pend) & (lsu_tlu_dtlb_done | ifu_tlu_itlb_done)) & tlb_admp_mode) ;
+assign	tlb_wr_rst  = ~rst_l | 
+	(((itlb_wr_pend | dtlb_wr_pend) & (lsu_tlu_dtlb_done | ifu_tlu_itlb_done)) 
+			& tlb_write_mode & ~tlb_admp_mode_d1) ;
+
+assign	tlu_admp_key_sel = (dtlb_wr_vld_g | itlb_wr_vld_g) | tlb_admp_mode ;
+
+// 1st Phase - Autodemap
+dffre_s  #(1) dmp1_ff (
+        .din    (tlb_wr_vld_g),
+        .q    	(tlb_admp_mode),
+	.rst	(tlb_admp_rst),	.en	(tlb_admp_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+// this is temporary - IFU is spuriously sourcing extra done signal.
+dff_s  #(1) admp_d1 (
+        .din    (tlb_admp_mode),
+        .q    	(tlb_admp_mode_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// 2nd Phase - Follow-up with Write
+dffre_s  #(1) dmp2_ff (
+        .din    (tlb_admp_rst),
+        .q    	(tlb_write_mode),
+	.rst	(tlb_wr_rst),	.en	(tlb_admp_rst),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//=========================================================================================
+
+wire    tlu_ldxa_async_data_vld ;
+assign  tlu_ldxa_async_data_vld =
+        tlu_dtlb_rd_done                |
+        (tlb_ld_inst_unflushed & (dmmu_async_illgl_va_g | immu_async_illgl_va_g)) ;
+
+assign	tlu_dldxa_data_vld = 
+// ** need to qualify with inst_vld in LSU
+	((dmmu_tag_target_en_m 	| 
+	 dmmu_8k_ptr_en_m 	| 
+	 dmmu_64k_ptr_en_m 	|
+	 dmmu_direct_ptr_en_m   |
+	 dmmu_tsb_en_m		|
+	 dmmu_tag_access_en_m   |
+         dmmu_sync_fsr_en_m     |
+         dmmu_sync_far_en_m     |
+         dmmu_ctxt_cfg_en_m) & ld_inst_m) ;
+	//tlu_dtlb_rd_done		| // complete thru lsu
+	// for sync/async lng-latency ldxa with illegal va
+	// MMU_ASI
+	//(ld_inst_g & dmmu_sync_illgl_va_g) |
+	//(tlb_ld_inst_unflushed & dmmu_async_illgl_va_g) ;
+
+assign	tlu_ildxa_data_vld = 
+// ** need to qualify with inst_vld in LSU
+	((immu_tag_target_en_m  	| 
+	 immu_8k_ptr_en_m  	 	| 
+	 immu_64k_ptr_en_m 		|
+ 	 immu_tsb_en_m                  |
+         immu_tag_access_en_m           |
+         immu_sync_fsr_en_m             |
+         immu_ctxt_cfg_en_m) & ld_inst_m)  ;
+	// for sync/async lng-latency ldxa with illegal va
+ 	// MMU_ASI
+	//(ld_inst_g & immu_sync_illgl_va_g) |
+	//(tlb_ld_inst_unflushed & immu_async_illgl_va_g) ;
+
+assign  tlu_ldxa_data_vld = tlu_ildxa_data_vld | tlu_dldxa_data_vld ;
+
+ 	// Flush needs to be removed.
+	assign	lsu_exu_ldxa_m = tlu_ldxa_data_vld & ~(dmmu_sync_illgl_va_m | immu_sync_illgl_va_m);
+
+dff_s #(1) stg_asyncdvld (
+        .din    (tlu_ldxa_async_data_vld),
+        .q    	(tlu_lsu_ldxa_async_data_vld),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//=========================================================================================
+//	SFSR/SFAR Control
+//=========================================================================================
+
+// In tcl
+
+//=========================================================================================
+//	PS0 and PS1 Ptr Registers (NEW !!!!)
+//=========================================================================================
+
+// If N=TSB_Size, P=Page_Size, then
+// Ptr = TSB_Base<63:13+N> | VA<21+N+3xP:13+3xP> | 0000	if TSB not split
+// Ptr = TSB_Base<63:14+N> | 0 | VA<21+N+3xP:13+3xP> | 0000 if TSB split
+// Assume P=0(8K),1(64K),3(4M),5(256M).
+// Note that Nmax=11 even though N=0..15, for 256M page. This is because VA cannot exceed 47 for ms bit.
+// Otherwise entire range of N can be covered by all 3 remaining page-size.
+
+// Timing :
+//
+//	|   D-stage  |	E-stage	| M-stage | W-stage    |	
+//	| Read setup | Read +	| Logic + | Latched in |
+//	| to mra     | Logic	| xmit	  | LSU. Select|	
+//	|	     |		|	  | for wr-back|	
+//
+
+// TSB Size Logic - Form 8 bits for 8k and 64k Ptr regs respectively.
+
+// Macrotest support for logic in shadow of mra scan collar.
+// Scan only. Scan value valid in 2nd cycle of macrotest.
+wire	mtest_rdps0_sel ;
+dff_s  #(1) rps0d_d1 (
+        .din    (1'b0),
+        .q      (mtest_rdps0_sel),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+wire	tsb_rd_ps0_sel ;
+assign	tlu_tsb_rd_ps0_sel = tsb_rd_ps0_sel ;
+assign	tsb_rd_ps0_sel = 
+			((dmmu_8k_ptr_e | immu_8k_ptr_e | 
+			// really _m stage.
+			dmmu_direct_8kptr_sel_g) & ~sehold_d1) | // direct-ptr selects ps0
+			(mtest_rdps0_sel & sehold_d1) ;
+
+// Choose between zero and non-zero context
+assign	tsb_size[3:0]	=  
+	tsb_rd_ps0_sel ? tlu_dtsb_size_w2[3:0] : tlu_itsb_size_w2[3:0] ;
+assign	tsb_split	= 
+	tsb_rd_ps0_sel ? tlu_dtsb_split_w2 : tlu_itsb_split_w2 ;
+// Mux'ed and staged in mmu_dp.
+assign	tag_access[47:13] = tlu_dtag_access_w2[47:13] ;
+wire	[2:0]	page_size,tsb_page_size_g ;
+assign	page_size[2:0] = tsb_page_size_g[2:0] ;
+
+// Currently, all the logic is done in one stage. This will have to
+// be rearranged once the read of the mra is advanced. 
+
+wire	pg8k,pg64k,pg4M;
+assign	pg8k  	= ~page_size[2] & ~page_size[1] & ~page_size[0] ; // 000
+assign	pg64k 	= ~page_size[2] & ~page_size[1] &  page_size[0] ; // 001
+assign	pg4M    = ~page_size[2] &  page_size[1] &  page_size[0] ; // 011
+//assign	pg256M  =  page_size[2] & ~page_size[1] &  page_size[0] ; // 101
+
+// Mux tag-access <36:13>,<39:13>,<45:22>,<51:28> based on page-size.
+// Notebook contains greater detail of mapping of base,tag-access to ptr.
+wire	[23:0]	va ; 
+assign	va[23:0] = pg8k ? tag_access[36:13] : 
+			pg64k ? tag_access[39:16] :	
+				pg4M ? tag_access[45:22] :	
+					 	{{5{tag_access[47]}},tag_access[46:28]} ;// 256M	
+					 	//{4'b0000,tag_access[47:28]} ;	// 256M	// Bug3727
+
+// The ptr address is broken up into 3 regions :
+// ptr<3:0>=4'b0000,		     : constant
+// ptr<12:4>=va<8:0>		     : va from tag-access only 	
+// ptr<27:13>=va<23:9>/base<27:13>/0/1 : va from tag-access OR tsb base address OR '0/1' (split).
+// ptr<28>=base<28>/0/1		     : tsb base address OR '0' (split).
+// ptr<47:29>=base<47:29>	     : tsb base address. 
+
+// Assuming N=0..15. Could be reduced to N=11.
+// Need to take exception for unused page size and value of N not compatible with selected page-size.
+
+wire [28:13] ptr ;
+wire	ps1;
+assign ps1 = ~tsb_rd_ps0_sel ;
+ 
+// This is an obvious flop boundary break. 
+
+wire	[3:0] tsb_size_d1 ;
+wire	tsb_split_d1 ;
+wire	[47:13] tsb_base_d1 ;
+wire	ps1_d1 ;
+wire	[23:0] 	va_d1 ;
+
+dff_s  #(4) tsbsize_stgd1 (
+        .din    (tsb_size[3:0]),
+        .q      (tsb_size_d1[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+dff_s  #(1) tsbsplit_stgd1 (
+        .din    (tsb_split),
+        .q      (tsb_split_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+assign	tsb_base_d1[47:13] = tlu_tsb_base_w2_d1[47:13] ;
+
+dff_s  #(1) ps1_stgd1 (
+        .din    (ps1),
+        .q      (ps1_d1),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+dff_s  #(24) va_stgd1 (
+        .din    (va[23:0]),
+        .q      (va_d1[23:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+) ;
+
+// These equations have to be optimized.
+assign	ptr[28] = ((tsb_size_d1==4'd15) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[28] ;
+assign	ptr[27] = (tsb_size_d1==4'd15) ? va_d1[23] : ((tsb_size_d1==4'd14) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[27] ;
+assign	ptr[26] = (tsb_size_d1>=4'd14) ? va_d1[22] : ((tsb_size_d1==4'd13) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[26] ;
+assign	ptr[25] = (tsb_size_d1>=4'd13) ? va_d1[21] : ((tsb_size_d1==4'd12) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[25] ;
+assign	ptr[24] = (tsb_size_d1>=4'd12) ? va_d1[20] : ((tsb_size_d1==4'd11) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[24] ;
+assign	ptr[23] = (tsb_size_d1>=4'd11) ? va_d1[19] : ((tsb_size_d1==4'd10) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[23] ;
+assign	ptr[22] = (tsb_size_d1>=4'd10) ? va_d1[18] : ((tsb_size_d1==4'd9) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[22] ;
+assign	ptr[21] = (tsb_size_d1>=4'd9) ? va_d1[17] : ((tsb_size_d1==4'd8) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[21] ;
+assign	ptr[20] = (tsb_size_d1>=4'd8) ? va_d1[16] : ((tsb_size_d1==4'd7) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[20] ;
+assign	ptr[19] = (tsb_size_d1>=4'd7) ? va_d1[15] : ((tsb_size_d1==4'd6) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[19] ;
+assign	ptr[18] = (tsb_size_d1>=4'd6) ? va_d1[14] : ((tsb_size_d1==4'd5) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[18] ;
+assign	ptr[17] = (tsb_size_d1>=4'd5) ? va_d1[13] : ((tsb_size_d1==4'd4) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[17] ;
+assign	ptr[16] = (tsb_size_d1>=4'd4) ? va_d1[12] : ((tsb_size_d1==4'd3) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[16] ;
+assign	ptr[15] = (tsb_size_d1>=4'd3) ? va_d1[11] : ((tsb_size_d1==4'd2) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[15] ;
+assign	ptr[14] = (tsb_size_d1>=4'd2) ? va_d1[10] : ((tsb_size_d1==4'd1) & tsb_split_d1) ? ps1_d1 : tsb_base_d1[14] ;
+assign	ptr[13] = (tsb_size_d1>=4'd1) ? va_d1[9] :  tsb_split_d1 ? ps1_d1 : tsb_base_d1[13] ;
+
+// TSB 8K Ptr. This maps to tsb ps0 ptr !!!
+// This is mapped to either PS0 or PS1 ptr. Do not need to send
+// 8k and 64K ptrs to mmu_dp.
+// Direct ptr needs to be accounted for.
+assign	tlu_idtsb_8k_ptr[47:0] = 
+	{tsb_base_d1[47:29],
+	ptr[28:13],
+	va_d1[8:0],
+	4'b0000};
+
+//=========================================================================================
+//	Establishing Context for Ptr Read
+//=========================================================================================
+
+// Context of Ptr Read determined by context within d/i tag-access register. 
+// Markers per thread will be maintained to determine whether any subsequent 
+// ptr access is made in nucleus or non-nucleus context.
+// Note i and d tag-access can be merged within tlu_mmu_dp.v
+
+// write of tag-access ctxt needs to be setup in M for subsequent read of MRA in M.
+
+assign	tsb_page_size_g[2:0] = tsb_rd_ps0_sel ? tlu_ctxt_cfg_w2[2:0] : tlu_ctxt_cfg_w2[5:3] ; 
+
+// Listening Flops for Macrotest of mra.
+dff_s #(6) ctxtcfg_listen (
+        .din    (tlu_ctxt_cfg_w2[5:0]),
+        .q      (),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+
+
+//=========================================================================================
+//	Direct Ptr State
+//=========================================================================================
+
+// For new ptr support, if page-size of tte matches that of ps1 then
+// direct-ptr maps to ps1-ptr else ps0-ptr.
+
+wire	daccess_prot_qual ;
+assign	daccess_prot_qual = 
+lsu_tlu_daccess_prot_g & ~lsu_tlu_daccess_excptn_g & 
+inst_vld_g & ~(priority_squash_g | flush_mmuasi_wr) ;
+
+// For SPARC_HPV_EN, 64k represents ps1 ptr.
+assign	dptr0_pg64k_en = daccess_prot_qual & thread0_sel_g ;
+assign	dptr1_pg64k_en = daccess_prot_qual & thread1_sel_g ;
+assign	dptr2_pg64k_en = daccess_prot_qual & thread2_sel_g ;
+assign	dptr3_pg64k_en = daccess_prot_qual & thread3_sel_g ;
+
+// For SPARC_HPV_EN this means ps0 sel. This should be an internal
+// wire with SPARC_HPV_EN
+assign	dmmu_direct_8kptr_sel_g  = 
+	dmmu_direct_ptr_e & ((thread0_e & ~dptr0_pg64k_vld) |
+				(thread1_e & ~dptr1_pg64k_vld) |
+				(thread2_e & ~dptr2_pg64k_vld) |
+				(thread3_e & ~dptr3_pg64k_vld));
+wire	dptr_state_din ;
+	assign dptr_state_din = dacc_prot_ps1_match ;
+
+dffre_s  #(1) dptrstate_0 (
+        .din    (dptr_state_din),
+        .q    	(dptr0_pg64k_vld),
+	.rst	(~rst_l),	.en	(dptr0_pg64k_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dffre_s  #(1) dptrstate_1 (
+        .din    (dptr_state_din),
+        .q    	(dptr1_pg64k_vld),
+	.rst	(~rst_l),	.en	(dptr1_pg64k_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dffre_s  #(1) dptrstate_2 (
+        .din    (dptr_state_din),
+        .q    	(dptr2_pg64k_vld),
+	.rst	(~rst_l),	.en	(dptr2_pg64k_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+dffre_s  #(1) dptrstate_3 (
+        .din    (dptr_state_din),
+        .q    	(dptr3_pg64k_vld),
+	.rst	(~rst_l),	.en	(dptr3_pg64k_en),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );  
+
+//=========================================================================================
+//	PS1 PAGE SIZE FOR DMMU
+//=========================================================================================
+
+// Maintain ps1 page-size for dmmu zero/non-zero ctxt. This is required to compare
+// against the page-size of the tte on a data-access-protection to set-up the
+// direct-pointer. Note that the real copy is in the mra.
+
+wire [2:0] zctxt_cfg0_ps1,zctxt_cfg1_ps1,zctxt_cfg2_ps1,zctxt_cfg3_ps1;
+wire [2:0] nzctxt_cfg0_ps1,nzctxt_cfg1_ps1,nzctxt_cfg2_ps1,nzctxt_cfg3_ps1;
+wire [3:0] dzctxt_cfg_wr_en ;
+wire [3:0] dnzctxt_cfg_wr_en ;
+
+assign	dzctxt_cfg_wr_en[3] = dmmu_zctxt_cfg_en & st_inst_g & thread3_sel_g ;
+assign	dzctxt_cfg_wr_en[2] = dmmu_zctxt_cfg_en & st_inst_g & thread2_sel_g ;
+assign	dzctxt_cfg_wr_en[1] = dmmu_zctxt_cfg_en & st_inst_g & thread1_sel_g ;
+assign	dzctxt_cfg_wr_en[0] = dmmu_zctxt_cfg_en & st_inst_g & thread0_sel_g ;
+
+assign	dnzctxt_cfg_wr_en[3] = dmmu_nzctxt_cfg_en & st_inst_g & thread3_sel_g ;
+assign	dnzctxt_cfg_wr_en[2] = dmmu_nzctxt_cfg_en & st_inst_g & thread2_sel_g ;
+assign	dnzctxt_cfg_wr_en[1] = dmmu_nzctxt_cfg_en & st_inst_g & thread1_sel_g ;
+assign	dnzctxt_cfg_wr_en[0] = dmmu_nzctxt_cfg_en & st_inst_g & thread0_sel_g ;
+
+// Thread0
+// Zero-Ctxt Cfg PS1
+dffe_s #(3)   zctxtps1_0 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(zctxt_cfg0_ps1[2:0]),
+        .en 	(dzctxt_cfg_wr_en[0]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Non-Zero-Ctxt Cfg PS1
+dffe_s #(3)   nzctxtps1_0 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(nzctxt_cfg0_ps1[2:0]),
+        .en 	(dnzctxt_cfg_wr_en[0]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread1
+// Zero-Ctxt Cfg PS1
+dffe_s #(3)   zctxtps1_1 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(zctxt_cfg1_ps1[2:0]),
+        .en 	(dzctxt_cfg_wr_en[1]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Non-Zero-Ctxt Cfg PS1
+dffe_s #(3)   nzctxtps1_1 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(nzctxt_cfg1_ps1[2:0]),
+        .en 	(dnzctxt_cfg_wr_en[1]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread2
+// Zero-Ctxt Cfg PS1
+dffe_s #(3)   zctxtps1_2 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(zctxt_cfg2_ps1[2:0]),
+        .en 	(dzctxt_cfg_wr_en[2]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Non-Zero-Ctxt Cfg PS1
+dffe_s #(3)   nzctxtps1_2 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(nzctxt_cfg2_ps1[2:0]),
+        .en 	(dnzctxt_cfg_wr_en[2]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Thread3
+// Zero-Ctxt Cfg PS1
+dffe_s #(3)   zctxtps1_3 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(zctxt_cfg3_ps1[2:0]),
+        .en 	(dzctxt_cfg_wr_en[3]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+// Non-Zero-Ctxt Cfg PS1
+dffe_s #(3)   nzctxtps1_3 (
+        .din    (lsu_tlu_st_rs3_data_b12t0_g[10:8]), 
+	.q  	(nzctxt_cfg3_ps1[2:0]),
+        .en 	(dnzctxt_cfg_wr_en[3]), 	.clk (clk),
+        .se     (1'b0),       	.si (),          .so ()
+        );
+
+
+wire [2:0] zctxt_cfg_ps1,nzctxt_cfg_ps1 ;
+
+assign	zctxt_cfg_ps1[2:0] =
+	thread0_sel_g ? zctxt_cfg0_ps1[2:0] :
+		thread1_sel_g ? zctxt_cfg1_ps1[2:0] :
+			thread2_sel_g ? zctxt_cfg2_ps1[2:0] :
+						zctxt_cfg3_ps1[2:0] ;
+
+assign	nzctxt_cfg_ps1[2:0] =
+	thread0_sel_g ? nzctxt_cfg0_ps1[2:0] :
+		thread1_sel_g ? nzctxt_cfg1_ps1[2:0] :
+			thread2_sel_g ? nzctxt_cfg2_ps1[2:0] :
+						nzctxt_cfg3_ps1[2:0] ;
+wire	nucleus_ctxt_g ;
+dff_s nctxt_stgg(
+        .din    (lsu_tlu_nucleus_ctxt_m),
+        .q      (nucleus_ctxt_g),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	[2:0]	ctxt_cfg_ps1 ;
+assign	ctxt_cfg_ps1[2:0] = 
+	nucleus_ctxt_g ? zctxt_cfg_ps1[2:0] : nzctxt_cfg_ps1[2:0] ;
+
+assign	dacc_prot_ps1_match
+	= (lsu_tlu_tte_pg_sz_g[2:0] == ctxt_cfg_ps1[2:0]) ;
+
+//=========================================================================================
+//	CTXT SEL
+//=========================================================================================
+
+wire	thread_tl_zero_e,thread_tl_zero_m ;
+assign thread_tl_zero_e =
+        thread0_e ? tlu_lsu_tl_zero[0] :
+                thread1_e ? tlu_lsu_tl_zero[1] :
+                        thread2_e ? tlu_lsu_tl_zero[2] : tlu_lsu_tl_zero[3];
+
+dff_s tlz_stgm(
+        .din    (thread_tl_zero_e),
+        .q      (thread_tl_zero_m),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+// Generate selects for ctxt to be written to tag_access
+// iside trap meant to cover immu_miss and inst_access_excp
+// modified for hypervisor support
+// assign       iside_trap = exu_tlu_ttype_vld_m | immu_va_oor_brnchetc_m | exu_tlu_va_oor_jl_ret_m;
+
+wire	pstate_am_e,pstate_am_m;
+assign  pstate_am_e =
+        (thread0_e & tlu_lsu_pstate_am[0]) |
+        (thread1_e & tlu_lsu_pstate_am[1]) |
+        (thread2_e & tlu_lsu_pstate_am[2]) |
+        (thread3_e & tlu_lsu_pstate_am[3]);
+
+dff_s pam_stgm(
+        .din    (pstate_am_e),
+        .q      (pstate_am_m),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	immu_va_oor_brnchetc_m ;
+assign  immu_va_oor_brnchetc_m
+        = exu_tlu_va_oor_m & ~pstate_am_m & ~memref_m;
+
+wire iside_trap ;
+assign  iside_trap =
+            ifu_tlu_immu_miss_m | // exu_tlu_ttype_vld_m : Rm along with Bug 5346
+            immu_va_oor_brnchetc_m | exu_tlu_va_oor_jl_ret_m |
+	    ifu_tlu_priv_violtn_m ; // Bug 5346.
+assign  tlu_tag_access_ctxt_sel_m[0] = iside_trap &  thread_tl_zero_m;
+assign  tlu_tag_access_ctxt_sel_m[1] = iside_trap & ~thread_tl_zero_m;
+assign  tlu_tag_access_ctxt_sel_m[2] = ~iside_trap;
+
+
+//=========================================================================================
+//	TLB Write Data
+//=========================================================================================
+
+wire	[2:0]	pg_size ;
+wire		page_8k, page_64k, page_4m ;
+wire		va_15_13_vld, va_21_16_vld, va_27_22_vld ;
+
+assign sun4r_tte_g = ~tlb_ldst_va_g[10] ;
+
+assign tlu_sun4r_tte_g = sun4r_tte_g ;
+
+assign 	pg_size[2:0] 	=  
+	sun4r_tte_g ? {lsu_tlu_st_rs3_data_b48_g,lsu_tlu_st_rs3_data_g[62:61]} :
+			{lsu_tlu_st_rs3_data_b12t0_g[2:0]} ;
+
+assign	page_8k		= ~pg_size[2] & ~pg_size[1] & ~pg_size[0] ;	
+assign	page_64k	= ~pg_size[2] & ~pg_size[1] &  pg_size[0] ;	
+assign	page_4m		= ~pg_size[2] &  pg_size[1] &  pg_size[0] ;	
+//assign	page_256m	=  pg_size[2] & ~pg_size[1] &  pg_size[0] ;	
+
+assign	va_15_13_vld 	= page_8k ; 
+assign	va_21_16_vld 	= page_8k | page_64k  ; 
+assign	va_27_22_vld 	= page_8k | page_64k | page_4m ; 
+
+assign	tlu_tte_tag_g[2:0] = {va_27_22_vld,va_21_16_vld,va_15_13_vld} ;
+	
+assign	thread0_async_g = ~tlb_access_tid_g[1] & ~tlb_access_tid_g[0] ;
+assign	thread1_async_g = ~tlb_access_tid_g[1] &  tlb_access_tid_g[0] ;
+assign	thread2_async_g =  tlb_access_tid_g[1] & ~tlb_access_tid_g[0] ;
+//assign	thread3_async_g =  tlb_access_tid_g[1] &  tlb_access_tid_g[0] ; // to be used in instanced mux
+
+assign	tlu_tte_wr_pid_g[2:0] =
+	thread0_async_g ? lsu_pid_state0[2:0] : 
+		thread1_async_g ? lsu_pid_state1[2:0] : 
+			thread2_async_g ? lsu_pid_state2[2:0] : lsu_pid_state3[2:0] ;
+
+// Error Injection :
+// Error injection is one-shot. It will occur for either dmmu or immu. The ifu
+// is informed once the error injection is accomplished.
+
+wire	i_tag_invrt_par,d_tag_invrt_par ;
+wire	i_data_invrt_par,d_data_invrt_par ;
+assign tlu_tlb_tag_invrt_parity = i_tag_invrt_par | d_tag_invrt_par ;
+assign i_tag_invrt_par = (ifu_lsu_error_inj[2] & (immu_data_in_en | immu_data_access_en)) ;
+assign d_tag_invrt_par = (ifu_lsu_error_inj[0] & (dmmu_data_in_en | dmmu_data_access_en)) ;
+assign tlu_tlb_data_invrt_parity = i_data_invrt_par | d_data_invrt_par ;
+assign i_data_invrt_par = (ifu_lsu_error_inj[3] & (immu_data_in_en | immu_data_access_en)) ;
+assign d_data_invrt_par = (ifu_lsu_error_inj[1] & (dmmu_data_in_en | dmmu_data_access_en)) ;
+
+wire tlb_wr_vld ;
+assign tlb_wr_vld = dtlb_wr_vld_g | itlb_wr_vld_g ;
+wire [3:0] err_inj_ack ;
+assign	err_inj_ack[0] = tlb_wr_vld & d_tag_invrt_par ;
+assign	err_inj_ack[1] = tlb_wr_vld & d_data_invrt_par ;
+assign	err_inj_ack[2] = tlb_wr_vld & i_tag_invrt_par ;
+assign	err_inj_ack[3] = tlb_wr_vld & i_data_invrt_par ;
+
+dff_s #(4) err_inj (
+        .din    (err_inj_ack[3:0]),
+        .q      (lsu_ifu_inj_ack[3:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+endmodule
Index: /trunk/T1-CPU/tlu/tlu_addern_32.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_addern_32.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_addern_32.v	(revision 6)
@@ -0,0 +1,53 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_addern_32.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    parameterized adder macro
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module	tlu_addern_32 (din, incr, sum);
+// synopsys template
+
+parameter ADDER_DATA_WIDTH = 33;
+parameter INCR_DATA_WIDTH  =  1;
+parameter UPPER_DATA_WIDTH =  ADDER_DATA_WIDTH - INCR_DATA_WIDTH;
+
+input	[ADDER_DATA_WIDTH-1:0]	din;
+input	[INCR_DATA_WIDTH-1:0]   incr;
+output	[ADDER_DATA_WIDTH-1:0]	sum;
+//
+////////////////////////////////////////////////////////////////////////
+// local signal declaraiont
+////////////////////////////////////////////////////////////////////////
+
+assign	sum[ADDER_DATA_WIDTH-1:0] =
+            din[ADDER_DATA_WIDTH-1:0] + {{UPPER_DATA_WIDTH{1'b0}},incr[INCR_DATA_WIDTH-1:0]};
+
+endmodule // tlu_addern_32
Index: /trunk/T1-CPU/tlu/tlu_incr64.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_incr64.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_incr64.v	(revision 6)
@@ -0,0 +1,43 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_incr64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    64b incr macro
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module tlu_incr64 ( in, out );
+
+  input  [63:0] in;
+
+  output [63:0] out;   // result of increment
+
+  assign out = in + 64'h01;
+
+endmodule // tlu_incr64
Index: /trunk/T1-CPU/tlu/sparc_tlu_intdp.v
===================================================================
--- /trunk/T1-CPU/tlu/sparc_tlu_intdp.v	(revision 6)
+++ /trunk/T1-CPU/tlu/sparc_tlu_intdp.v	(revision 6)
@@ -0,0 +1,649 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_tlu_intdp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_tlu_intdp
+//  Description:        
+//    Contains the code for receiving interrupts from the crossbar,
+//    and sending interrupts out to other processors through the corssbar.
+//    The interrupt receive register (INRR, asi=0x49/VA=0),  incoming
+//    vector register (INVR, asi=0x7f/VA=0x40), and interrupt vector
+//    dispatch register (INDR, asi=0x77/VA=0) are implemented in this
+//    block.  This block also initiates thread reset/wake up when a
+//    reset packet is received.  
+//
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include "tlu.h"
+//
+// modved defines to tlu.h
+/*
+`define INT_VEC_HI 5
+`define INT_VEC_LO 0
+`define INT_THR_HI  12
+`define INT_THR_LO   8
+`define INT_TYPE_HI 17
+`define INT_TYPE_LO 16
+*/
+
+module sparc_tlu_intdp (/*AUTOARG*/
+   // Outputs
+   int_pending_i2_l, ind_inc_thrid_i1, // indr_inc_rst_pkt, 
+   ind_inc_type_i1, tlu_lsu_int_ldxa_data_w2, int_tlu_rstid_m, 
+   tlu_lsu_pcxpkt, so, 
+   // Inputs
+   //
+   // modified to abide to the Niagara reset methodology 
+   // clk, se, si, reset, lsu_tlu_intpkt, lsu_tlu_st_rs3_data_g, 
+   rclk, se, si, tlu_rst_l, lsu_tlu_st_rs3_data_g, // lsu_tlu_intpkt, 
+   inc_ind_ld_int_i1, inc_ind_rstthr_i1, inc_ind_asi_thr, 
+   inc_ind_asi_wr_indr, inc_ind_indr_grant, // inc_ind_asi_inrr, 
+   inc_ind_thr_m, inc_ind_asi_wr_inrr, inc_ind_asi_rd_invr, 
+   inc_indr_req_valid, inc_indr_req_thrid, tlu_asi_rdata_mxsel_g,
+   tlu_asi_queue_rdata_g, tlu_scpd_asi_rdata_g, lsu_ind_intpkt_id,
+   lsu_ind_intpkt_type, lsu_ind_intpkt_thr 
+   );
+
+   //
+   // modified to abide to the Niagara reset methodology 
+   // input      clk, se, si, reset;
+   input      rclk, se, si, tlu_rst_l;
+
+   // from lsu
+   // input [17:0]  lsu_tlu_intpkt;   // int pkt from cpx
+   input [63:0]  lsu_tlu_st_rs3_data_g;     // write data for int regs
+
+   // select lines from int_ctl
+   input [3:0] 	 inc_ind_ld_int_i1;            // ld ext interrupt to inrr
+   input [3:0] 	 inc_ind_rstthr_i1;
+   
+   // changing the select from inverting to non-inverting for grape
+   // input [3:0] 	 inc_ind_asi_thr_l;            // thread issuing asi command
+   input [3:0] 	 inc_ind_asi_thr;            // thread issuing asi command
+   input [3:0] 	 inc_ind_asi_wr_indr;          // write INDR
+   // convert the signal to non-inverting version for grape
+   // input [3:0] 	 inc_ind_indr_grant_l;         // transmit INDR to PCX
+   input [3:0] 	 inc_ind_indr_grant;         // transmit INDR to PCX
+   // obsolete input
+   // input 	 inc_ind_asi_inrr;             // read INRR
+   // convert the signal to non-inverting version for grape
+   // input [3:0]	 inc_ind_thr_m_l;
+   input [3:0]	 inc_ind_thr_m;
+   
+   // other controls
+   input [3:0] 	 inc_ind_asi_wr_inrr;  // write INRR
+   input [3:0] 	 inc_ind_asi_rd_invr;  // read INVR (reset corr bit in INRR)
+   
+   // indr request
+   input 	 inc_indr_req_valid;   // valid value in INDR, i.e make req
+   input [1:0] 	 inc_indr_req_thrid;   // thread making request
+   //
+   // asi rdata mux select
+   input [3:0] tlu_asi_rdata_mxsel_g;
+   // asi data from other blocks
+   input [`TLU_SCPD_DATA_WIDTH-1:0] tlu_scpd_asi_rdata_g;
+   input [`TLU_ASI_QUE_WIDTH-1:0]   tlu_asi_queue_rdata_g;
+   input [4:0]  lsu_ind_intpkt_thr;
+   input [1:0]  lsu_ind_intpkt_type;
+   input [5:0]  lsu_ind_intpkt_id;
+
+   // to int ctl
+   output [3:0]  int_pending_i2_l;     // interrupt still pending
+   // output 	 indr_inc_rst_pkt;
+
+   output [4:0]  ind_inc_thrid_i1;
+   output [1:0]  ind_inc_type_i1;
+   
+   // to outside world
+   output [63:0] tlu_lsu_int_ldxa_data_w2; // read data from asi regs 
+   output [5:0]  int_tlu_rstid_m;
+   
+   output [25:0] tlu_lsu_pcxpkt;       // pcxpkt for inter processor int
+   
+   output 	 so;
+
+   // local signals
+   //
+   // added to abide to the Niagara reset methodology 
+   wire local_rst; // local reset signal 
+   //
+   wire [63:0] int_tlu_asi_data;     // read data from int regs
+   // interrupt and reset id
+   wire [5:0] 	 int_id_i1;
+   wire [5:0] 	 t0_rstid_i2,
+		 t1_rstid_i2,
+		 t2_rstid_i2,
+		 t3_rstid_i2,
+		 next_t0_rstid_i1,
+		 next_t1_rstid_i1,
+		 next_t2_rstid_i1,
+		 next_t3_rstid_i1;
+
+   // Interrupt receive register
+   wire [63:0]	 inrr_dec_i1,
+		 inrr_rd_data_i2;
+
+   wire [63:0] 	 t0_inrr_i2,
+ 		 t1_inrr_i2,
+		 t2_inrr_i2,
+		 t3_inrr_i2,
+		 t0_inrr_aw_i2,
+		 t1_inrr_aw_i2,
+		 t2_inrr_aw_i2,
+		 t3_inrr_aw_i2,
+		 t0_inrr_arw_i1,
+		 t1_inrr_arw_i1,
+		 t2_inrr_arw_i1,
+		 t3_inrr_arw_i1,
+		 next_t0_inrr_i1,
+		 next_t1_inrr_i1,
+		 next_t2_inrr_i1,
+		 next_t3_inrr_i1;
+   
+   wire [63:0] 	 new_t0_inrr_i1,
+		 new_t1_inrr_i1,
+		 new_t2_inrr_i1,
+		 new_t3_inrr_i1;
+
+   // clear interrupt through asi
+   wire [63:0]   t0_asi_wr_data,
+		 t1_asi_wr_data,
+		 t2_asi_wr_data,
+		 t3_asi_wr_data;
+
+   // interrupt vector
+   wire [5:0] 	 t0_invr_i3,
+ 		 t1_invr_i3,
+ 		 t2_invr_i3,
+ 		 t3_invr_i3,
+    		 t0_invr_i2,
+ 		 t1_invr_i2,
+ 		 t2_invr_i2,
+ 		 t3_invr_i2;
+   wire [5:0] 	 invr_rd_data_i3;
+
+   // highest priority interrupt
+   wire [63:0] 	 pe_ivec_i3,
+		 t0_pe_ivec_i3,
+		 t1_pe_ivec_i3,
+		 t2_pe_ivec_i3,
+		 t3_pe_ivec_i3;
+
+   // interrupt dispatch
+   // removed the obsolete bits 
+   // wire [12:0] 	 indr_wr_pkt;
+   wire [10:0] 	 indr_wr_pkt;
+
+   // removed the obsolete bits 
+   // wire [12:0] 	 indr_pcxpkt,
+   wire [10:0] 	 indr_pcxpkt,
+		 t0_indr,
+		 t1_indr,
+		 t2_indr,
+		 t3_indr,
+		 t0_indr_next,
+		 t1_indr_next,
+		 t2_indr_next,
+		 t3_indr_next;
+   // 
+   // local clock
+   wire clk;
+
+   //
+   // Code Starts Here
+   //
+   //----------------------------------------------------------------------
+   // creating local clock
+   //----------------------------------------------------------------------
+   assign clk = rclk;
+
+   //----------------------------------------------------------------------
+   // Interrupt Receive
+   //----------------------------------------------------------------------
+   //
+   // create local reset signal
+   assign local_rst = ~tlu_rst_l;
+   
+   // I1 Stage
+   // decode interrupt vector
+   // modified due to interface clean-up
+   /*
+   assign  int_id_i1 = lsu_tlu_intpkt[`INT_VEC_HI:`INT_VEC_LO];
+   assign  ind_inc_type_i1 = lsu_tlu_intpkt[`INT_TYPE_HI:`INT_TYPE_LO];
+   assign  ind_inc_thrid_i1 = lsu_tlu_intpkt[`INT_THR_HI:`INT_THR_LO];
+   */
+   assign  int_id_i1[5:0]        = lsu_ind_intpkt_id[5:0];
+   assign  ind_inc_type_i1[1:0]  = lsu_ind_intpkt_type[1:0];
+   assign  ind_inc_thrid_i1[4:0] = lsu_ind_intpkt_thr[4:0];
+
+   // rstid enable mux
+   dp_mux2es #6 rid_mux0(.dout  (next_t0_rstid_i1[5:0]),
+			 .in0   (t0_rstid_i2[5:0]),
+			 .in1   (int_id_i1[5:0]),
+			 .sel   (inc_ind_rstthr_i1[0]));
+
+`ifdef FPGA_SYN_1THREAD
+     dff_s #6 rid0_reg(.din (next_t0_rstid_i1[5:0]),
+		   .q   (t0_rstid_i2[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   assign  int_tlu_rstid_m[5:0] = t0_rstid_i2[5:0];
+   
+`else
+ 
+   dp_mux2es #6 rid_mux1(.dout  (next_t1_rstid_i1[5:0]),
+			 .in0   (t1_rstid_i2[5:0]),
+			 .in1   (int_id_i1[5:0]),
+			 .sel   (inc_ind_rstthr_i1[1]));
+   
+   dp_mux2es #6 rid_mux2(.dout  (next_t2_rstid_i1[5:0]),
+			 .in0   (t2_rstid_i2[5:0]),
+			 .in1   (int_id_i1[5:0]),
+			 .sel   (inc_ind_rstthr_i1[2]));
+   
+   dp_mux2es #6 rid_mux3(.dout  (next_t3_rstid_i1[5:0]),
+			 .in0   (t3_rstid_i2[5:0]),
+			 .in1   (int_id_i1[5:0]),
+			 .sel   (inc_ind_rstthr_i1[3]));
+
+   // rst id flops
+   dff_s #6 rid0_reg(.din (next_t0_rstid_i1[5:0]),
+		   .q   (t0_rstid_i2[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 rid1_reg(.din (next_t1_rstid_i1[5:0]),
+		   .q   (t1_rstid_i2[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 rid2_reg(.din (next_t2_rstid_i1[5:0]),
+		   .q   (t2_rstid_i2[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 rid3_reg(.din (next_t3_rstid_i1[5:0]),
+		   .q   (t3_rstid_i2[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   
+   // rstid to tlu in M stage
+   // changing the select from inverting to non-inverting for grape
+   /*
+   dp_mux4ds #6 tlurid_mux(.dout (int_tlu_rstid_m[5:0]),
+			   .in0  (t0_rstid_i2[5:0]),
+			   .in1  (t1_rstid_i2[5:0]),
+			   .in2  (t2_rstid_i2[5:0]),
+			   .in3  (t3_rstid_i2[5:0]),
+			   .sel0_l (inc_ind_thr_m_l[0]),
+			   .sel1_l (inc_ind_thr_m_l[1]),
+			   .sel2_l (inc_ind_thr_m_l[2]),
+			   .sel3_l (inc_ind_thr_m_l[3]));
+   */
+   dp_mux4ds #6 tlurid_mux(.dout (int_tlu_rstid_m[5:0]),
+			   .in0  (t0_rstid_i2[5:0]),
+			   .in1  (t1_rstid_i2[5:0]),
+			   .in2  (t2_rstid_i2[5:0]),
+			   .in3  (t3_rstid_i2[5:0]),
+			   .sel0_l (~inc_ind_thr_m[0]),
+			   .sel1_l (~inc_ind_thr_m[1]),
+			   .sel2_l (~inc_ind_thr_m[2]),
+			   .sel3_l (~inc_ind_thr_m[3]));
+
+`endif // !`ifdef FPGA_SYN_1THREAD
+
+   sparc_tlu_dec64 iv_dec(.in  (int_id_i1[5:0]),
+			  .out (inrr_dec_i1[63:0]));
+
+   // merge decoded interrupt vector with inrr
+   assign  new_t0_inrr_i1 = inrr_dec_i1 | t0_inrr_arw_i1;
+   assign  new_t1_inrr_i1 = inrr_dec_i1 | t1_inrr_arw_i1;
+   assign  new_t2_inrr_i1 = inrr_dec_i1 | t2_inrr_arw_i1;
+   assign  new_t3_inrr_i1 = inrr_dec_i1 | t3_inrr_arw_i1;
+
+   // enable mux to load new interrupt to INRR
+   dp_mux2es #64 inrr_en_mux0(.dout  (next_t0_inrr_i1[63:0]),
+			  .in0   (t0_inrr_arw_i1[63:0]),
+			  .in1   (new_t0_inrr_i1[63:0]),
+			  .sel   (inc_ind_ld_int_i1[0]));
+`ifdef FPGA_SYN_1THREAD
+   // interrupt receive register (INRR)
+   // change to dff -- software will reset before IE turns on
+   dffr_s #64 t0_inrr (.din (next_t0_inrr_i1[63:0]),
+		     .q   (t0_inrr_i2[63:0]),
+		     .clk (clk),
+   //
+   // modified to abide to the Niagara reset methodology 
+   //		     .rst (reset),
+   		     .rst (local_rst),
+		     .se  (se), .si(), .so());
+   assign   inrr_rd_data_i2[63:0] = t0_inrr_i2[63:0];
+
+`else
+   
+   dp_mux2es #64 inrr_en_mux1(.dout  (next_t1_inrr_i1[63:0]),
+			  .in0   (t1_inrr_arw_i1[63:0]),
+			  .in1   (new_t1_inrr_i1[63:0]),
+			  .sel   (inc_ind_ld_int_i1[1]));
+   dp_mux2es #64 inrr_en_mux2(.dout  (next_t2_inrr_i1[63:0]),
+			  .in0   (t2_inrr_arw_i1[63:0]),
+			  .in1   (new_t2_inrr_i1[63:0]),
+			  .sel   (inc_ind_ld_int_i1[2]));
+   dp_mux2es #64 inrr_en_mux3(.dout  (next_t3_inrr_i1[63:0]),
+			  .in0   (t3_inrr_arw_i1[63:0]),
+			  .in1   (new_t3_inrr_i1[63:0]),
+			  .sel   (inc_ind_ld_int_i1[3]));
+
+   // interrupt receive register (INRR)
+   // change to dff -- software will reset before IE turns on
+   dffr_s #64 t0_inrr (.din (next_t0_inrr_i1[63:0]),
+		     .q   (t0_inrr_i2[63:0]),
+		     .clk (clk),
+   //
+   // modified to abide to the Niagara reset methodology 
+   //		     .rst (reset),
+   		     .rst (local_rst),
+		     .se  (se), .si(), .so());
+   dffr_s #64 t1_inrr (.din (next_t1_inrr_i1[63:0]),
+		     .q   (t1_inrr_i2[63:0]),
+		     .clk (clk),
+   //
+   // modified to abide to the Niagara reset methodology 
+   //		     .rst (reset),
+   		     .rst (local_rst),
+		     .se  (se), .si(), .so());
+   dffr_s #64 t2_inrr (.din (next_t2_inrr_i1[63:0]),
+		     .q   (t2_inrr_i2[63:0]),
+		     .clk (clk),
+   //
+   // modified to abide to the Niagara reset methodology 
+   //		     .rst (reset),
+   		     .rst (local_rst),
+		     .se  (se), .si(), .so());
+   dffr_s #64 t3_inrr (.din (next_t3_inrr_i1[63:0]),
+		     .q   (t3_inrr_i2[63:0]),
+		     .clk (clk),
+   //
+   // modified to abide to the Niagara reset methodology 
+   //		     .rst (reset),
+   		     .rst (local_rst),
+		     .se  (se), .si(), .so());
+
+   // I2 Stage
+   // read out INRR to asi
+   // changing the select from inverting to non-inverting for grape
+   /*
+   dp_mux4ds #64 inrr_rd_mux(.dout (inrr_rd_data_i2[63:0]),
+			 .in0  (t0_inrr_i2[63:0]),
+			 .in1  (t1_inrr_i2[63:0]),
+			 .in2  (t2_inrr_i2[63:0]),
+			 .in3  (t3_inrr_i2[63:0]),
+			 .sel0_l (inc_ind_asi_thr_l[0]),
+			 .sel1_l (inc_ind_asi_thr_l[1]),
+			 .sel2_l (inc_ind_asi_thr_l[2]),
+			 .sel3_l (inc_ind_asi_thr_l[3]));
+    */
+   dp_mux4ds #64 inrr_rd_mux(.dout (inrr_rd_data_i2[63:0]),
+			 .in0  (t0_inrr_i2[63:0]),
+			 .in1  (t1_inrr_i2[63:0]),
+			 .in2  (t2_inrr_i2[63:0]),
+			 .in3  (t3_inrr_i2[63:0]),
+			 .sel0_l (~inc_ind_asi_thr[0]),
+			 .sel1_l (~inc_ind_asi_thr[1]),
+			 .sel2_l (~inc_ind_asi_thr[2]),
+			 .sel3_l (~inc_ind_asi_thr[3]));
+
+`endif // !`ifdef FPGA_SYN_1THREAD
+		     
+   // signal interrupt pending
+   sparc_tlu_zcmp64  zcmp0(.in  (t0_inrr_i2[63:0]),
+			   .zero (int_pending_i2_l[0]));
+
+`ifdef FPGA_SYN_1THREAD
+   assign t0_asi_wr_data = ~(~lsu_tlu_st_rs3_data_g & 
+			      {64{inc_ind_asi_wr_inrr[0]}});
+   assign t0_inrr_aw_i2 = t0_inrr_i2 & t0_asi_wr_data;
+   sparc_tlu_penc64 t0_invr_penc(.in  (t0_inrr_i2[63:0]),
+				 .out (t0_invr_i2[5:0]));
+   dff_s #6 t0_invr (.din (t0_invr_i2[5:0]),
+		   .q   (t0_invr_i3[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   assign invr_rd_data_i3[5:0] = t0_invr_i3[5:0];
+   
+`else
+   
+   sparc_tlu_zcmp64  zcmp1(.in  (t1_inrr_i2[63:0]),
+			   .zero (int_pending_i2_l[1]));
+   sparc_tlu_zcmp64  zcmp2(.in  (t2_inrr_i2[63:0]),
+			   .zero (int_pending_i2_l[2]));
+   sparc_tlu_zcmp64  zcmp3(.in  (t3_inrr_i2[63:0]),
+			   .zero (int_pending_i2_l[3]));
+
+   // write data -- only zeros may be written to the INRR.  An attempt
+   // to write 1 is ignored.
+   // Force to all 1 if no write
+   assign  t0_asi_wr_data = ~(~lsu_tlu_st_rs3_data_g & 
+			      {64{inc_ind_asi_wr_inrr[0]}});
+   assign  t1_asi_wr_data = ~(~lsu_tlu_st_rs3_data_g & 
+			      {64{inc_ind_asi_wr_inrr[1]}});
+   assign  t2_asi_wr_data = ~(~lsu_tlu_st_rs3_data_g & 
+			      {64{inc_ind_asi_wr_inrr[2]}});
+   assign  t3_asi_wr_data = ~(~lsu_tlu_st_rs3_data_g & 
+			      {64{inc_ind_asi_wr_inrr[3]}});
+   
+   assign  t0_inrr_aw_i2 = t0_inrr_i2 & t0_asi_wr_data;
+   assign  t1_inrr_aw_i2 = t1_inrr_i2 & t1_asi_wr_data;
+   assign  t2_inrr_aw_i2 = t2_inrr_i2 & t2_asi_wr_data;
+   assign  t3_inrr_aw_i2 = t3_inrr_i2 & t3_asi_wr_data;
+
+   // priority encode INRR to 6 bits to get INVR
+   // b63 has the highest priority
+   sparc_tlu_penc64 t0_invr_penc(.in  (t0_inrr_i2[63:0]),
+				 .out (t0_invr_i2[5:0]));
+   sparc_tlu_penc64 t1_invr_penc(.in  (t1_inrr_i2[63:0]),
+				 .out (t1_invr_i2[5:0]));
+   sparc_tlu_penc64 t2_invr_penc(.in  (t2_inrr_i2[63:0]),
+				 .out (t2_invr_i2[5:0]));
+   sparc_tlu_penc64 t3_invr_penc(.in  (t3_inrr_i2[63:0]),
+				 .out (t3_invr_i2[5:0]));
+
+   // Interrupt Vector Register (INVR)
+   // Cannot write to INVR
+   dff_s #6 t0_invr (.din (t0_invr_i2[5:0]),
+		   .q   (t0_invr_i3[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 t1_invr (.din (t1_invr_i2[5:0]),
+		   .q   (t1_invr_i3[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 t2_invr (.din (t2_invr_i2[5:0]),
+		   .q   (t2_invr_i3[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #6 t3_invr (.din (t3_invr_i2[5:0]),
+		   .q   (t3_invr_i3[5:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+
+   // I3 stage
+   // read out to asi data 
+   // changing the select from inverting to non-inverting for grape
+   /*
+   dp_mux4ds #6 invr_rd_mux(.dout (invr_rd_data_i3[5:0]),
+			    .in0  (t0_invr_i3[5:0]),
+			    .in1  (t1_invr_i3[5:0]),
+			    .in2  (t2_invr_i3[5:0]),
+			    .in3  (t3_invr_i3[5:0]),
+			    .sel0_l (inc_ind_asi_thr_l[0]),
+			    .sel1_l (inc_ind_asi_thr_l[1]),
+			    .sel2_l (inc_ind_asi_thr_l[2]),
+			    .sel3_l (inc_ind_asi_thr_l[3]));
+   */
+   dp_mux4ds #6 invr_rd_mux(.dout (invr_rd_data_i3[5:0]),
+			    .in0  (t0_invr_i3[5:0]),
+			    .in1  (t1_invr_i3[5:0]),
+			    .in2  (t2_invr_i3[5:0]),
+			    .in3  (t3_invr_i3[5:0]),
+			    .sel0_l (~inc_ind_asi_thr[0]),
+			    .sel1_l (~inc_ind_asi_thr[1]),
+			    .sel2_l (~inc_ind_asi_thr[2]),
+			    .sel3_l (~inc_ind_asi_thr[3]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   //
+   // modified for bug 2109
+   // asi rd data mux
+   dp_mux4ds #(64) asi_rd_mux(
+			   .in0  ({58'b0, invr_rd_data_i3[5:0]}),
+			   .in1  (inrr_rd_data_i2[63:0]),
+			   .in2  (tlu_scpd_asi_rdata_g[`TLU_SCPD_DATA_WIDTH-1:0]),
+			   .in3  ({50'b0, tlu_asi_queue_rdata_g[`TLU_ASI_QUE_WIDTH-1:0],6'b0}),
+			   .sel0_l (~tlu_asi_rdata_mxsel_g[0]),
+			   .sel1_l (~tlu_asi_rdata_mxsel_g[1]),
+			   .sel2_l (~tlu_asi_rdata_mxsel_g[2]),
+			   .sel3_l (~tlu_asi_rdata_mxsel_g[3]),
+               .dout (int_tlu_asi_data[63:0]));
+
+   dff_s #(64) dff_tlu_lsu_int_ldxa_data_w2 (
+        .din (int_tlu_asi_data[63:0]),
+		.q   (tlu_lsu_int_ldxa_data_w2[63:0]),
+		.clk (clk),
+		.se  (se), 
+        .si(), 
+        .so());
+   
+   sparc_tlu_dec64 inrr_pe_dec(.in  (invr_rd_data_i3[5:0]),
+			       .out (pe_ivec_i3[63:0]));
+
+   // when INVR is read, zero out the corresponding bit in INRR
+   assign  t0_pe_ivec_i3 = pe_ivec_i3 & {64{inc_ind_asi_rd_invr[0]}};
+   assign  t1_pe_ivec_i3 = pe_ivec_i3 & {64{inc_ind_asi_rd_invr[1]}};
+   assign  t2_pe_ivec_i3 = pe_ivec_i3 & {64{inc_ind_asi_rd_invr[2]}};
+   assign  t3_pe_ivec_i3 = pe_ivec_i3 & {64{inc_ind_asi_rd_invr[3]}};
+
+   assign  t0_inrr_arw_i1 = t0_inrr_aw_i2 & ~t0_pe_ivec_i3;
+   assign  t1_inrr_arw_i1 = t1_inrr_aw_i2 & ~t1_pe_ivec_i3;
+   assign  t2_inrr_arw_i1 = t2_inrr_aw_i2 & ~t2_pe_ivec_i3;
+   assign  t3_inrr_arw_i1 = t3_inrr_aw_i2 & ~t3_pe_ivec_i3;
+
+   //----------------------------------------------------------------------
+   // Interrupt Dispatch
+   //----------------------------------------------------------------------
+   // modified to remove the unused bits
+   //
+   // assign  indr_wr_pkt = {lsu_tlu_st_rs3_data_g[`INT_TYPE_HI:`INT_TYPE_LO], 
+   assign  indr_wr_pkt = {lsu_tlu_st_rs3_data_g[`INT_THR_HI:`INT_THR_LO], 
+			  lsu_tlu_st_rs3_data_g[`INT_VEC_HI:`INT_VEC_LO]};
+   // 
+   // removed for timing 
+   // assign  indr_inc_rst_pkt = lsu_tlu_st_rs3_data_g[`INT_TYPE_HI] |
+   // 	                      lsu_tlu_st_rs3_data_g[`INT_TYPE_LO];
+   
+   dp_mux2es  #11   t0_indr_mux(.dout (t0_indr_next[10:0]),
+				.in0  (t0_indr[10:0]),
+				.in1  (indr_wr_pkt[10:0]),
+				.sel  (inc_ind_asi_wr_indr[0]));
+`ifdef FPGA_SYN_1THREAD
+   dff_s #11 t0_indr_reg(.din (t0_indr_next[10:0]),
+		   .q   (t0_indr[10:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   assign  indr_pcxpkt[10:0] = t0_indr[10:0];
+   
+`else
+   
+   dp_mux2es  #11   t1_indr_mux(.dout (t1_indr_next[10:0]),
+				.in0  (t1_indr[10:0]),
+				.in1  (indr_wr_pkt[10:0]),
+				.sel  (inc_ind_asi_wr_indr[1]));
+   dp_mux2es  #11   t2_indr_mux(.dout (t2_indr_next[10:0]),
+				.in0  (t2_indr[10:0]),
+				.in1  (indr_wr_pkt[10:0]),
+				.sel  (inc_ind_asi_wr_indr[2]));
+   dp_mux2es  #11   t3_indr_mux(.dout (t3_indr_next[10:0]),
+				.in0  (t3_indr[10:0]),
+				.in1  (indr_wr_pkt[10:0]),
+				.sel  (inc_ind_asi_wr_indr[3]));
+   
+   dff_s #11 t0_indr_reg(.din (t0_indr_next[10:0]),
+		   .q   (t0_indr[10:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #11 t1_indr_reg(.din (t1_indr_next[10:0]),
+		   .q   (t1_indr[10:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #11 t2_indr_reg(.din (t2_indr_next[10:0]),
+		   .q   (t2_indr[10:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+   dff_s #11 t3_indr_reg(.din (t3_indr_next[10:0]),
+		   .q   (t3_indr[10:0]),
+		   .clk (clk),
+		   .se  (se), .si(), .so());
+
+   // changing the select from inverting to non-inverting for grape
+   /*
+   dp_mux4ds #13 int_dsp_mux(.dout (indr_pcxpkt[12:0]),
+			     .in0  (t0_indr[12:0]),
+			     .in1  (t1_indr[12:0]),
+			     .in2  (t2_indr[12:0]),
+			     .in3  (t3_indr[12:0]),
+			     .sel0_l (inc_ind_indr_grant_l[0]),
+			     .sel1_l (inc_ind_indr_grant_l[1]),
+			     .sel2_l (inc_ind_indr_grant_l[2]),
+			     .sel3_l (inc_ind_indr_grant_l[3]));
+    */
+   dp_mux4ds #11 int_dsp_mux(.dout (indr_pcxpkt[10:0]),
+			     .in0  (t0_indr[10:0]),
+			     .in1  (t1_indr[10:0]),
+			     .in2  (t2_indr[10:0]),
+			     .in3  (t3_indr[10:0]),
+			     .sel0_l (~inc_ind_indr_grant[0]),
+			     .sel1_l (~inc_ind_indr_grant[1]),
+			     .sel2_l (~inc_ind_indr_grant[2]),
+			     .sel3_l (~inc_ind_indr_grant[3]));
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+   
+   assign  tlu_lsu_pcxpkt[25:0] = {inc_indr_req_valid,  // 25
+				   {`INT_RQ},               // 24:20
+				   inc_indr_req_thrid[1:0], // 19:18
+//				   indr_pcxpkt[12:11],  -- cannot send rst
+				   {2'b00},             // 17:16
+				   3'b0,                // 15:13 rsvd
+				   indr_pcxpkt[10:6],   // 12:8
+				   2'b0,                // 7:6   rsvd
+				   indr_pcxpkt[5:0]};   // 5:0
+   
+   // TBD:
+   // 1. disable sending of reset/nuke/resum packets from indr -- DONE 1/6
+   
+endmodule
+   
+   
Index: /trunk/T1-CPU/tlu/tlu_pib.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_pib.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_pib.v	(revision 6)
@@ -0,0 +1,1932 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_pib.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Performance Instrumentation Block 
+//                      Performance monitoring 2 of the 9 possible events
+//                      can be tracked per thread
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+`include "tlu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module	tlu_pib (/*AUTOARG*/
+                 // input
+                 ifu_tlu_imiss_e, ifu_tlu_immu_miss_m, ifu_tlu_thrid_d,
+                 ifu_tlu_sraddr_d, ifu_tlu_rsr_inst_d, // ifu_tlu_wsr_inst_d, 
+                 ifu_tlu_l2imiss, tlu_tcc_inst_w, lsu_tlu_wsr_inst_e, 
+                 ffu_tlu_fpu_tid, ffu_tlu_fpu_cmplt, 
+                 lsu_tlu_dmmu_miss_g, lsu_tlu_dcache_miss_w2, lsu_tlu_l2_dmiss,
+                 lsu_tlu_stb_full_w2, exu_tlu_wsr_data_m, // tlu_tickcmp_sel, 
+                 tlu_hpstate_priv, tlu_thread_inst_vld_g, tlu_wsr_inst_nq_g, 
+                 tlu_full_flush_pipe_w2,  tlu_pstate_priv, tlu_thread_wsel_g, 
+                 tlu_pib_rsr_data_e, tlu_hpstate_enb, ifu_tlu_flush_fd_w, 
+//
+// reset was modified to abide to the Niagara reset methodology 
+                 rclk, arst_l, grst_l, si, se, // tlu_rst_l, rst_tri_en, 
+                 // output
+
+// tlu_pcr_ut_e, tlu_pcr_st_e,
+                 pib_picl_wrap, pich_wrap_flg, pich_onebelow_flg, pich_twobelow_flg, 
+                 tlu_pic_onebelow_e, tlu_pic_twobelow_e, pib_priv_act_trap_m, 
+                 tlu_wsr_data_w, tlu_pcr_ut, tlu_pcr_st, tlu_pic_wrap_e, so);
+
+// Input section
+// Events generated by IFU
+input	     ifu_tlu_imiss_e;	   // icache misses -- New interface  
+input		 ifu_tlu_immu_miss_m;  // itlb misses 
+input [1:0]	 ifu_tlu_thrid_d;	   //  thread id For instruction complete
+input [`TLU_THRD_NUM-1:0] tlu_thread_inst_vld_g; // For instruction complete
+input [`TLU_THRD_NUM-1:0] tlu_thread_wsel_g;  // thread of instruction fetched 
+input [`TLU_THRD_NUM-1:0] ifu_tlu_l2imiss; // l2 imiss -- new interface 
+
+// ASR register read/write requests
+input [`TLU_ASR_ADDR_WIDTH-1:0] ifu_tlu_sraddr_d;      
+input ifu_tlu_rsr_inst_d; // valid rd sr(st/pr)
+// input ifu_tlu_wsr_inst_d; // valid wr sr(st/pr)
+input lsu_tlu_wsr_inst_e; // valid wr sr(st/pr)
+// input tlu_wsr_inst_g; // valid wr sr(st/pr)
+// modified for timing
+input tlu_wsr_inst_nq_g; // valid wr sr(st/pr)
+input [`TLU_ASR_DATA_WIDTH-1:0] exu_tlu_wsr_data_m; // pr/st data to irf.
+// modified due to timing
+// input [`TLU_ASR_DATA_WIDTH-1:0] tlu_pib_rsr_data_e; // this was the tlu_exu_rsr_data_e 
+
+// LSU generated events - also include L2 miss
+input [`TLU_THRD_NUM-1:0] lsu_tlu_dcache_miss_w2; // dcache miss -- new interface 
+input [`TLU_THRD_NUM-1:0] lsu_tlu_l2_dmiss;	     // l2 dmisses -- new interface 
+input [`TLU_THRD_NUM-1:0] lsu_tlu_stb_full_w2;	 // store buffer full -- new interface 
+input lsu_tlu_dmmu_miss_g;	 // dtlb misses 
+// FFU generated events - also include L2 miss
+input [1:0] ffu_tlu_fpu_tid;   // ThrdID for the FF instr_cmplt -- new 
+input       ffu_tlu_fpu_cmplt; // FF instru complete -- new 
+// TLU information for event filtering
+//
+input [`TLU_THRD_NUM-1:0] tlu_pstate_priv; // supervisor privilege information 
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_priv;// hypervisor privilege information
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_enb; // hyperlite enabling 
+input tlu_tcc_inst_w; // For instruction complete 
+input tlu_full_flush_pipe_w2; // For instruction complete 
+input ifu_tlu_flush_fd_w; // For instruction complete 
+// Global signals
+input rclk;			
+//
+// reset was modified to abide to the Niagara reset methodology 
+// input			reset;		
+// input tlu_rst_l;		
+input		grst_l;				// global reset - active log
+input		arst_l;				// global reset - active log
+input		si;				    // global scan-in 
+input		se;				    // global scan-out 
+// input		rst_tri_en;			// global reset - active log
+
+// output section
+// modified to make inst vld overflow trap precies
+// output [`TLU_THRD_NUM-1:0] pib_pic_wrap;     // pic register wrap transition 
+// output pib_rst_l;				// local unit reset - active low
+output [`TLU_THRD_NUM-1:0] pib_picl_wrap;       // pic register wrap transition 
+output [`TLU_THRD_NUM-1:0] pich_wrap_flg;       // pic register wrap transition 
+output [`TLU_THRD_NUM-1:0] pich_onebelow_flg;   // pic register wrap transition 
+output [`TLU_THRD_NUM-1:0] pich_twobelow_flg;   // pic register wrap transition 
+// output [`TLU_THRD_NUM-1:0] pich_threebelow_flg; // pic register wrap transition 
+// modified due to timing fixes
+output [`TLU_ASR_DATA_WIDTH-1:0] tlu_pib_rsr_data_e; // rsr data register data 
+output tlu_pic_onebelow_e, tlu_pic_twobelow_e, tlu_pic_wrap_e; 
+//
+// modified for bug 5436 - Niagara 2.0
+output [`TLU_THRD_NUM-1:0] tlu_pcr_ut;   
+output [`TLU_THRD_NUM-1:0] tlu_pcr_st;   
+wire tlu_pcr_ut_e, tlu_pcr_st_e; 
+
+
+// 
+// output [`TLU_THRD_NUM-1:0] pib_priv_act_trap;  // access privilege violation for pics 
+output [`TLU_THRD_NUM-1:0] pib_priv_act_trap_m;  // access privilege violation for pics 
+// output [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_e; // Add in the final muxing of pib asr data 
+output [`TLU_ASR_DATA_WIDTH-1:0] tlu_wsr_data_w;     // flopped version of exu_tlu_wsr_data_m 
+// output [47:0] tlu_ifu_trappc_w2;  // temporary for timing 
+// output [47:0] tlu_ifu_trapnpc_w2; // temporary for timing 
+output   so;				    // global scan-out 
+
+//==============================================================================
+// Local signal defines 
+//==============================================================================
+// decoded address for pcr and pic
+wire pcr_rw_e, pcr_rw_m, pcr_rw_g; // pcr_rw_d, 
+wire pic_priv_rw_e, pic_priv_rw_m, pic_priv_rw_g; // pic_priv_rw_d,  
+wire pic_npriv_rw_e, pic_npriv_rw_m, pic_npriv_rw_g;// pic_npriv_rw_d, 
+//
+// read/write to pcr, evq and pic 
+wire [`TLU_THRD_NUM-1:0] wsr_thread_inst_g; 
+wire [`TLU_THRD_NUM-1:0] update_picl_sel, update_picl_wrap_en;
+wire [`TLU_THRD_NUM-1:0] picl_cnt_wrap_datain;
+wire [`TLU_THRD_NUM-1:0] update_pich_sel, update_pich_wrap_en;
+wire [`TLU_THRD_NUM-1:0] pich_cnt_wrap_datain;
+wire [`TLU_THRD_NUM-1:0] update_evq_sel;
+wire [`TLU_THRD_NUM-1:0] wsr_pcr_sel; 
+wire [`TLU_THRD_NUM-1:0] wsr_pic_sel; 
+wire [`TLU_THRD_NUM-1:0] update_pich_ovf; 
+wire [`TLU_THRD_NUM-1:0] update_picl_ovf; 
+wire [`TLU_THRD_NUM-1:0] inst_vld_w2; 
+wire tcc_inst_w2;
+// 
+// added for bug 2919
+wire [`TLU_THRD_NUM-1:0] pic_update_ctl; 
+wire [1:0] pic_update_sel_ctr; 
+wire [1:0] pic_update_sel_incr; 
+//
+// modified for timing
+// wire [`TLU_ASR_ADDR_WIDTH-1:0] pib_sraddr_d;      
+wire [`TLU_ASR_ADDR_WIDTH-1:0] pib_sraddr_e;      
+wire tlu_rsr_inst_e, tlu_wsr_inst_e;      
+//
+// picl masks
+wire [`PICL_MASK_WIDTH-1:0] picl_mask0, picl_mask1, picl_mask2, picl_mask3;
+wire [`PICL_MASK_WIDTH-1:0] picl_event0, picl_event1, picl_event2, picl_event3;
+// added for bug2332
+// wire incr_pich_onehot;
+// pic counters
+wire [`TLU_THRD_NUM-1:0] incr_pich; 
+wire [`TLU_THRD_NUM-1:0] pich_mux_sel; 
+wire [`TLU_THRD_NUM-1:0] pich_cnt_wrap; 
+wire [`TLU_THRD_NUM-1:0] picl_cnt_wrap; 
+wire [`TLU_THRD_NUM-2:0] thread_rsel_d; 
+wire [`TLU_THRD_NUM-2:0] thread_rsel_e;
+wire [`TLU_THRD_NUM-1:0] pic_onebelow_e, pic_twobelow_e, pic_wrap_e; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] picl_cnt0, picl_cnt1, picl_cnt2, picl_cnt3; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] picl_cnt_din, picl_cnt_sum;
+wire [`PIB_PIC_CNT_WIDTH-1:0] picl_wsr_data; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] update_picl0_data, update_picl1_data; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] update_picl2_data, update_picl3_data; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] pich_cnt0, pich_cnt1, pich_cnt2, pich_cnt3; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] pich_cnt_din, pich_cnt_sum; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] pich_wsr_data;
+wire [`PIB_PIC_CNT_WIDTH-1:0] update_pich0_data, update_pich1_data; 
+wire [`PIB_PIC_CNT_WIDTH-1:0] update_pich2_data, update_pich3_data; 
+wire [`TLU_ASR_DATA_WIDTH-1:0] pic_rdata_e;
+wire [`TLU_ASR_DATA_WIDTH-1:0] pcr_rdata_e;
+wire [`PIB_PCR_WIDTH-1:0] pcr_reg_rdata_e;
+wire [`PIB_PCR_WIDTH-1:0] pcr_wdata_in;
+wire [`TLU_THRD_NUM-1:0] picl_ovf_wdata_in;
+wire [`TLU_THRD_NUM-1:0] pich_ovf_wdata_in;
+// experiment
+wire [`TLU_THRD_NUM-1:0] pich_fourbelow_din;
+wire [`TLU_THRD_NUM-1:0] pich_fourbelow_flg;
+// wire [`TLU_THRD_NUM-1:0] pich_threebelow_flg;
+// modified due to timing
+// wire [2:0] rsr_data_sel_e;
+wire [1:0] rsr_data_sel_e;
+// picl evqs 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq0, picl_evq1, picl_evq2, picl_evq3;
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq0_sum, picl_evq1_sum; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq2_sum, picl_evq3_sum; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] update_evq0_data, update_evq1_data; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] update_evq2_data, update_evq3_data; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq_din; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq0_din, picl_evq1_din; 
+wire [`PIB_EVQ_CNT_WIDTH-1:0] picl_evq2_din, picl_evq3_din; 
+wire [`TLU_THRD_NUM-1:0] incr_evq_din, incr_evq;
+// pcr registers
+wire [`PIB_PCR_WIDTH-1:0] pcr0, pcr1, pcr2, pcr3; 
+// 
+wire local_rst; // local active high reset
+wire local_rst_l; // local active high reset
+// counting enable indicator 
+wire [`TLU_THRD_NUM-1:0] pic_cnt_en, pic_cnt_en_w2;
+//
+// staged icache and itlb misses
+wire imiss_m, imiss_g;
+wire immu_miss_g;
+//
+// threaded icache, itlb, and dtlb misses
+wire [`TLU_THRD_NUM-1:0] imiss_thread_g;
+wire [`TLU_THRD_NUM-1:0] immu_miss_thread_g;
+wire [`TLU_THRD_NUM-1:0] dmmu_miss_thread_g;
+wire [`TLU_THRD_NUM-1:0] fpu_cmplt_thread;
+//
+// clock rename
+wire clk; 
+
+//==============================================================================
+// Code starts here
+//==============================================================================
+//	reset
+
+dffrl_async dffrl_local_rst_l(
+    .din  (grst_l),
+    .clk  (clk),
+    .rst_l(arst_l),
+    .q    (local_rst_l),
+    .se   (se),
+    .si   (),
+    .so   ()
+); 
+
+assign local_rst = ~local_rst_l;
+// assign pib_rst_l = local_rst_l;
+// assign local_rst = ~tlu_rst_l;
+//
+// rename clock 
+assign clk = rclk;
+
+//
+// privilege action trap due to user access of pic register when
+// PRIV bit is set in pcr
+// modified for timing fixes
+/*
+assign pib_priv_act_trap = (pic_npriv_rw_g ) & 
+           ((pcr0[`PIB_PCR_PRIV]  & tlu_thread_inst_vld_g[0]) & 
+             ~tlu_pstate_priv[0]) |
+           ((pcr1[`PIB_PCR_PRIV]  & tlu_thread_inst_vld_g[1]) & 
+             ~tlu_pstate_priv[1]) |
+           ((pcr2[`PIB_PCR_PRIV]  & tlu_thread_inst_vld_g[2]) & 
+             ~tlu_pstate_priv[2]) |
+           ((pcr3[`PIB_PCR_PRIV]  & tlu_thread_inst_vld_g[3]) & 
+             ~tlu_pstate_priv[3]);
+*/
+assign pib_priv_act_trap_m[0] = pic_npriv_rw_m & pcr0[`PIB_PCR_PRIV]; 
+assign pib_priv_act_trap_m[1] = pic_npriv_rw_m & pcr1[`PIB_PCR_PRIV]; 
+assign pib_priv_act_trap_m[2] = pic_npriv_rw_m & pcr2[`PIB_PCR_PRIV]; 
+assign pib_priv_act_trap_m[3] = pic_npriv_rw_m & pcr3[`PIB_PCR_PRIV]; 
+             
+//
+// staging the exu_tlu_wsr_data_w signal for timing
+//
+dff_s #(`TLU_ASR_DATA_WIDTH) dff_tlu_wsr_data_w (
+    .din (exu_tlu_wsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]), 
+    .q   (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+//================================
+// address decode for PCR and PICs 
+//================================
+// added and modified for timing
+// assign pib_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0] =
+//            ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0]; 
+
+dff_s #(`TLU_ASR_ADDR_WIDTH) dff_pib_sraddr_e (
+    .din (ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0]),
+    .q   (pib_sraddr_e[`TLU_ASR_ADDR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_tlu_rsr_inst_e (
+    .din (ifu_tlu_rsr_inst_d),
+    .q   (tlu_rsr_inst_e),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified for timing
+/*
+dffr_s dffr_tlu_wsr_inst_e (
+    .din (ifu_tlu_wsr_inst_d),
+    .q   (tlu_wsr_inst_e),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+assign tlu_wsr_inst_e = lsu_tlu_wsr_inst_e;
+//
+assign pcr_rw_e = 
+           (pib_sraddr_e[`TLU_ASR_ADDR_WIDTH-1:0] == `PCR_ASR_ADDR); 
+assign pic_priv_rw_e = 
+           (pib_sraddr_e[`TLU_ASR_ADDR_WIDTH-1:0] == `PIC_ASR_PRIV_ADDR);
+assign pic_npriv_rw_e = 
+           (pib_sraddr_e[`TLU_ASR_ADDR_WIDTH-1:0] == `PIC_ASR_NPRIV_ADDR) &
+           (tlu_rsr_inst_e | tlu_wsr_inst_e);
+//
+// staging of the ASR decoded controls
+//
+// staging from d to e stage
+// deleted for timing
+/*
+dff_s dff_pcr_rw_d_e (
+    .din (pcr_rw_d),
+    .q   (pcr_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_priv_rw_d_e (
+    .din (pic_priv_rw_d),
+    .q   (pic_priv_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_npriv_rw_d_e (
+    .din (pic_npriv_rw_d),
+    .q   (pic_npriv_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+//
+// staging from e to m stage
+dff_s dff_pcr_rw_e_m (
+    .din (pcr_rw_e),
+    .q   (pcr_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_priv_rw_e_m (
+    .din (pic_priv_rw_e),
+    .q   (pic_priv_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_npriv_rw_e_m (
+    .din (pic_npriv_rw_e),
+    .q   (pic_npriv_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_imiss_e_m (
+    .din (ifu_tlu_imiss_e),
+    .q   (imiss_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// staging from m to g stage
+dff_s dff_pcr_rw_m_g (
+    .din (pcr_rw_m),
+    .q   (pcr_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_priv_rw_m_g (
+    .din (pic_priv_rw_m),
+    .q   (pic_priv_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_npriv_rw_m_g (
+    .din (pic_npriv_rw_m),
+    .q   (pic_npriv_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_imiss_m_g (
+    .din (imiss_m),
+    .q   (imiss_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_immu_miss_m_g (
+    .din (ifu_tlu_immu_miss_m),
+    .q   (immu_miss_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//
+//=========================
+// update for PCR registers   
+//=========================
+//
+assign wsr_thread_inst_g[0] = 
+           tlu_wsr_inst_nq_g & ~ifu_tlu_flush_fd_w & tlu_thread_wsel_g[0];
+assign wsr_thread_inst_g[1] = 
+           tlu_wsr_inst_nq_g & ~ifu_tlu_flush_fd_w & tlu_thread_wsel_g[1];
+assign wsr_thread_inst_g[2] = 
+           tlu_wsr_inst_nq_g & ~ifu_tlu_flush_fd_w & tlu_thread_wsel_g[2];
+assign wsr_thread_inst_g[3] = 
+           tlu_wsr_inst_nq_g & ~ifu_tlu_flush_fd_w & tlu_thread_wsel_g[3];
+// 
+// extracting the relevant bits from the wsr data bus
+assign pcr_wdata_in = 
+    {tlu_wsr_data_w[`WSR_PCR_CH_OVF:`WSR_PCR_CL_OVF],
+     tlu_wsr_data_w[`WSR_PCR_SL_HI:`WSR_PCR_SL_LO],
+     tlu_wsr_data_w[`WSR_PCR_UT:`WSR_PCR_PRIV]};
+//
+// thread 0
+assign wsr_pcr_sel[0] = wsr_thread_inst_g[0] & pcr_rw_g; 
+
+assign update_picl_ovf[0] = 
+           (wsr_thread_inst_g[0] & pcr_rw_g) |
+           (picl_cnt_wrap[0] ^ picl_cnt0[`PIB_PIC_CNT_WIDTH-1]);
+
+assign update_pich_ovf[0] = 
+           (wsr_thread_inst_g[0] & pcr_rw_g) |
+           (pich_cnt_wrap[0] ^ pich_cnt0[`PIB_PIC_CNT_WIDTH-1]);
+//
+// modified for bug 2291
+dffre_s #(`PIB_PCR_WIDTH-2) dffre_pcr0 (
+ //   .din (tlu_wsr_data_w[`PIB_PCR_WIDTH-1:0]),
+    .din (pcr_wdata_in[`PIB_PCR_WIDTH-3:0]),
+    .q   (pcr0[`PIB_PCR_WIDTH-3:0]),
+    .rst (local_rst),
+    .en  (wsr_pcr_sel[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr0_picl_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CL_OVF]),  
+	.in1(picl_cnt_wrap[0] ^ picl_cnt0[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[0]),
+	.sel1(~wsr_pcr_sel[0]),
+	.dout(picl_ovf_wdata_in[0])
+);
+
+// added for the new bug 2588
+dffre_s dffre_pcr0_picl_ovf (
+    .din (picl_ovf_wdata_in[0]),
+    .q   (pcr0[`PIB_PCR_CL_OVF]),
+    .clk (clk),
+    .en  (update_picl_ovf[0]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr0_pich_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CH_OVF]),  
+	.in1(pich_cnt_wrap[0] ^ pich_cnt0[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[0]),
+	.sel1(~wsr_pcr_sel[0]),
+	.dout(pich_ovf_wdata_in[0])
+);
+
+dffre_s dffre_pcr0_pich_ovf (
+    .din (pich_ovf_wdata_in[0]),
+    .q   (pcr0[`PIB_PCR_CH_OVF]),
+    .clk (clk),
+    .en  (update_pich_ovf[0]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// thread 1
+
+assign wsr_pcr_sel[1] = wsr_thread_inst_g[1] & pcr_rw_g; 
+
+assign update_picl_ovf[1] = 
+           (wsr_thread_inst_g[1] & pcr_rw_g) |
+           (picl_cnt_wrap[1] ^ picl_cnt1[`PIB_PIC_CNT_WIDTH-1]);
+
+assign update_pich_ovf[1] = 
+           (wsr_thread_inst_g[1] & pcr_rw_g) |
+           (pich_cnt_wrap[1] ^ pich_cnt1[`PIB_PIC_CNT_WIDTH-1]);
+
+dffre_s #(`PIB_PCR_WIDTH-2) dffre_pcr1 (
+ //   .din (tlu_wsr_data_w[`PIB_PCR_WIDTH-1:0]),
+    .din (pcr_wdata_in[`PIB_PCR_WIDTH-3:0]),
+    .q   (pcr1[`PIB_PCR_WIDTH-3:0]),
+    .rst (local_rst),
+    .en  (wsr_pcr_sel[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr1_picl_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CL_OVF]),  
+	.in1(picl_cnt_wrap[1] ^ picl_cnt1[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[1]),
+	.sel1(~wsr_pcr_sel[1]),
+	.dout(picl_ovf_wdata_in[1])
+);
+// added for the new bug 2588
+dffre_s dffre_pcr1_picl_ovf (
+    .din (picl_ovf_wdata_in[1]),
+    .q   (pcr1[`PIB_PCR_CL_OVF]),
+    .clk (clk),
+    .en  (update_picl_ovf[1]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr1_pich_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CH_OVF]),  
+	.in1(pich_cnt_wrap[1] ^ pich_cnt1[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[1]),
+	.sel1(~wsr_pcr_sel[1]),
+	.dout(pich_ovf_wdata_in[1])
+);
+
+dffre_s dffre_pcr1_pich_ovf (
+    .din (pich_ovf_wdata_in[1]),
+    .q   (pcr1[`PIB_PCR_CH_OVF]),
+    .clk (clk),
+    .en  (update_pich_ovf[1]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// thread 2
+
+assign wsr_pcr_sel[2] = wsr_thread_inst_g[2] & pcr_rw_g; 
+
+assign update_picl_ovf[2] = 
+           (wsr_thread_inst_g[2] & pcr_rw_g) |
+           (picl_cnt_wrap[2] ^ picl_cnt2[`PIB_PIC_CNT_WIDTH-1]);
+
+assign update_pich_ovf[2] = 
+           (wsr_thread_inst_g[2] & pcr_rw_g) |
+           (pich_cnt_wrap[2] ^ pich_cnt2[`PIB_PIC_CNT_WIDTH-1]);
+
+dffre_s #(`PIB_PCR_WIDTH-2) dffre_pcr2 (
+ //   .din (tlu_wsr_data_w[`PIB_PCR_WIDTH-1:0]),
+    .din (pcr_wdata_in[`PIB_PCR_WIDTH-3:0]),
+    .q   (pcr2[`PIB_PCR_WIDTH-3:0]),
+    .rst (local_rst),
+    .en  (wsr_pcr_sel[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr2_picl_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CL_OVF]),  
+	.in1(picl_cnt_wrap[2] ^ picl_cnt2[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[2]),
+	.sel1(~wsr_pcr_sel[2]),
+	.dout(picl_ovf_wdata_in[2])
+);
+
+// added for the new bug 2588
+dffre_s dffre_pcr2_picl_ovf (
+    .din (picl_ovf_wdata_in[2]),
+    .q   (pcr2[`PIB_PCR_CL_OVF]),
+    .clk (clk),
+    .en  (update_picl_ovf[2]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr2_pich_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CH_OVF]),  
+	.in1(pich_cnt_wrap[2] ^ pich_cnt2[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[2]),
+	.sel1(~wsr_pcr_sel[2]),
+	.dout(pich_ovf_wdata_in[2])
+);
+
+dffre_s dffre_pcr2_pich_ovf (
+    .din (pich_ovf_wdata_in[2]),
+    .q   (pcr2[`PIB_PCR_CH_OVF]),
+    .clk (clk),
+    .en  (update_pich_ovf[2]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// thread 3
+
+assign wsr_pcr_sel[3] = wsr_thread_inst_g[3] & pcr_rw_g; 
+
+assign update_picl_ovf[3] = 
+           (wsr_thread_inst_g[3] & pcr_rw_g) |
+           (picl_cnt_wrap[3] ^ picl_cnt3[`PIB_PIC_CNT_WIDTH-1]);
+
+assign update_pich_ovf[3] = 
+           (wsr_thread_inst_g[3] & pcr_rw_g) |
+           (pich_cnt_wrap[3] ^ pich_cnt3[`PIB_PIC_CNT_WIDTH-1]);
+
+dffre_s #(`PIB_PCR_WIDTH-2) dffre_pcr3 (
+ //   .din (tlu_wsr_data_w[`PIB_PCR_WIDTH-1:0]),
+    .din (pcr_wdata_in[`PIB_PCR_WIDTH-3:0]),
+    .q   (pcr3[`PIB_PCR_WIDTH-3:0]),
+    .rst (local_rst),
+    .en  (wsr_pcr_sel[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr3_picl_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CL_OVF]),  
+	.in1(picl_cnt_wrap[3] ^ picl_cnt3[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[3]),
+	.sel1(~wsr_pcr_sel[3]),
+	.dout(picl_ovf_wdata_in[3])
+);
+
+// added for the new bug 2588
+dffre_s dffre_pcr3_picl_ovf (
+    .din (picl_ovf_wdata_in[3]),
+    .q   (pcr3[`PIB_PCR_CL_OVF]),
+    .clk (clk),
+    .en  (update_picl_ovf[3]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+mux2ds mux_pcr3_pich_ovf (
+	.in0(pcr_wdata_in[`PIB_PCR_CH_OVF]),  
+	.in1(pich_cnt_wrap[3] ^ pich_cnt3[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[3]),
+	.sel1(~wsr_pcr_sel[3]),
+	.dout(pich_ovf_wdata_in[3])
+);
+
+dffre_s dffre_pcr3_pich_ovf (
+    .din (pich_ovf_wdata_in[3]),
+    .q   (pcr3[`PIB_PCR_CH_OVF]),
+    .clk (clk),
+    .en  (update_pich_ovf[3]),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//
+//====================
+// threading of events 
+//====================
+//
+// icache misses
+assign imiss_thread_g[0] = imiss_g & tlu_thread_wsel_g[0];
+assign imiss_thread_g[1] = imiss_g & tlu_thread_wsel_g[1];
+assign imiss_thread_g[2] = imiss_g & tlu_thread_wsel_g[2];
+assign imiss_thread_g[3] = imiss_g & tlu_thread_wsel_g[3];
+//
+// itlb misses
+assign immu_miss_thread_g[0] = immu_miss_g & tlu_thread_wsel_g[0];
+assign immu_miss_thread_g[1] = immu_miss_g & tlu_thread_wsel_g[1];
+assign immu_miss_thread_g[2] = immu_miss_g & tlu_thread_wsel_g[2];
+assign immu_miss_thread_g[3] = immu_miss_g & tlu_thread_wsel_g[3];
+//
+// dtlb misses
+assign dmmu_miss_thread_g[0] = lsu_tlu_dmmu_miss_g & tlu_thread_wsel_g[0];
+assign dmmu_miss_thread_g[1] = lsu_tlu_dmmu_miss_g & tlu_thread_wsel_g[1];
+assign dmmu_miss_thread_g[2] = lsu_tlu_dmmu_miss_g & tlu_thread_wsel_g[2];
+assign dmmu_miss_thread_g[3] = lsu_tlu_dmmu_miss_g & tlu_thread_wsel_g[3];
+//
+// itlb misses
+assign fpu_cmplt_thread[0] = 
+           ffu_tlu_fpu_cmplt & (~ffu_tlu_fpu_tid[0] & ~ffu_tlu_fpu_tid[1]); 
+assign fpu_cmplt_thread[1] = 
+           ffu_tlu_fpu_cmplt & (ffu_tlu_fpu_tid[0]  & ~ffu_tlu_fpu_tid[1]); 
+assign fpu_cmplt_thread[2] = 
+           ffu_tlu_fpu_cmplt & (~ffu_tlu_fpu_tid[0] &  ffu_tlu_fpu_tid[1]); 
+assign fpu_cmplt_thread[3] = 
+           ffu_tlu_fpu_cmplt & (ffu_tlu_fpu_tid[0]  &  ffu_tlu_fpu_tid[1]); 
+
+//====================
+// assigning of events 
+//====================
+//
+// thread 0
+assign picl_event0[`PICL_MASK_SB_FULL]   = lsu_tlu_stb_full_w2[0]; 
+assign picl_event0[`PICL_MASK_FP_INST]   = fpu_cmplt_thread[0]; 
+assign picl_event0[`PICL_MASK_IC_MISS]   = imiss_thread_g[0]; 
+assign picl_event0[`PICL_MASK_DC_MISS]   = lsu_tlu_dcache_miss_w2[0]; 
+assign picl_event0[`PICL_MASK_ITLB_MISS] = immu_miss_thread_g[0]; 
+assign picl_event0[`PICL_MASK_DTLB_MISS] = dmmu_miss_thread_g[0]; 
+assign picl_event0[`PICL_MASK_L2_IMISS]  = ifu_tlu_l2imiss[0]; 
+assign picl_event0[`PICL_MASK_L2_DMISS]  = lsu_tlu_l2_dmiss[0]; 
+//
+// thread 1
+assign picl_event1[`PICL_MASK_SB_FULL]   = lsu_tlu_stb_full_w2[1]; 
+assign picl_event1[`PICL_MASK_FP_INST]   = fpu_cmplt_thread[1]; 
+assign picl_event1[`PICL_MASK_IC_MISS]   = imiss_thread_g[1]; 
+assign picl_event1[`PICL_MASK_DC_MISS]   = lsu_tlu_dcache_miss_w2[1]; 
+assign picl_event1[`PICL_MASK_ITLB_MISS] = immu_miss_thread_g[1]; 
+assign picl_event1[`PICL_MASK_DTLB_MISS] = dmmu_miss_thread_g[1]; 
+assign picl_event1[`PICL_MASK_L2_IMISS]  = ifu_tlu_l2imiss[1]; 
+assign picl_event1[`PICL_MASK_L2_DMISS]  = lsu_tlu_l2_dmiss[1]; 
+//
+// thread 2
+assign picl_event2[`PICL_MASK_SB_FULL]   = lsu_tlu_stb_full_w2[2]; 
+assign picl_event2[`PICL_MASK_FP_INST]   = fpu_cmplt_thread[2]; 
+assign picl_event2[`PICL_MASK_IC_MISS]   = imiss_thread_g[2]; 
+assign picl_event2[`PICL_MASK_DC_MISS]   = lsu_tlu_dcache_miss_w2[2]; 
+assign picl_event2[`PICL_MASK_ITLB_MISS] = immu_miss_thread_g[2]; 
+assign picl_event2[`PICL_MASK_DTLB_MISS] = dmmu_miss_thread_g[2]; 
+assign picl_event2[`PICL_MASK_L2_IMISS]  = ifu_tlu_l2imiss[2]; 
+assign picl_event2[`PICL_MASK_L2_DMISS]  = lsu_tlu_l2_dmiss[2]; 
+//
+// thread 3
+assign picl_event3[`PICL_MASK_SB_FULL]   = lsu_tlu_stb_full_w2[3]; 
+assign picl_event3[`PICL_MASK_FP_INST]   = fpu_cmplt_thread[3]; 
+assign picl_event3[`PICL_MASK_IC_MISS]   = imiss_thread_g[3]; 
+assign picl_event3[`PICL_MASK_DC_MISS]   = lsu_tlu_dcache_miss_w2[3]; 
+assign picl_event3[`PICL_MASK_ITLB_MISS] = immu_miss_thread_g[3]; 
+assign picl_event3[`PICL_MASK_DTLB_MISS] = dmmu_miss_thread_g[3]; 
+assign picl_event3[`PICL_MASK_L2_IMISS]  = ifu_tlu_l2imiss[3]; 
+assign picl_event3[`PICL_MASK_L2_DMISS]  = lsu_tlu_l2_dmiss[3]; 
+
+//======================
+// decode for PIC events   
+//======================
+// 
+// thread 0
+
+assign pic_cnt_en[0] = 
+            (~tlu_hpstate_priv[0] & ~tlu_pstate_priv[0] & pcr0[`PIB_PCR_UT])   | 
+            (~tlu_hpstate_enb[0]  & tlu_hpstate_priv[0] & pcr0[`PIB_PCR_ST])   |
+            (tlu_hpstate_enb[0]   & tlu_pstate_priv[0]  & ~tlu_hpstate_priv[0] & 
+             pcr0[`PIB_PCR_ST]); 
+//
+// picl mask decodes
+assign picl_mask0[`PICL_MASK_SB_FULL] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_SB_FULL_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_FP_INST] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_FP_INST_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_IC_MISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_IC_MISS_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_DC_MISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DC_MISS_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_ITLB_MISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_ITLB_MISS_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_DTLB_MISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DTLB_MISS_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_L2_IMISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_IMISS_CNT) &
+             pic_cnt_en[0]);
+assign picl_mask0[`PICL_MASK_L2_DMISS] =  
+           ((pcr0[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_DMISS_CNT) &
+             pic_cnt_en[0]);
+// 
+// thread 1
+
+assign pic_cnt_en[1] = 
+            (~tlu_hpstate_priv[1] & ~tlu_pstate_priv[1] & pcr1[`PIB_PCR_UT])   | 
+            (~tlu_hpstate_enb[1]  & tlu_hpstate_priv[1] & pcr1[`PIB_PCR_ST])   |
+            (tlu_hpstate_enb[1]   & tlu_pstate_priv[1]  & ~tlu_hpstate_priv[1] & 
+             pcr1[`PIB_PCR_ST]); 
+//
+// picl mask decodes
+assign picl_mask1[`PICL_MASK_SB_FULL] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_SB_FULL_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_FP_INST] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_FP_INST_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_IC_MISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_IC_MISS_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_DC_MISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DC_MISS_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_ITLB_MISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_ITLB_MISS_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_DTLB_MISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DTLB_MISS_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_L2_IMISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_IMISS_CNT) &
+             pic_cnt_en[1]);
+assign picl_mask1[`PICL_MASK_L2_DMISS] =  
+           ((pcr1[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_DMISS_CNT) &
+             pic_cnt_en[1]);
+// 
+// thread 2
+
+assign pic_cnt_en[2] = 
+            (~tlu_hpstate_priv[2] & ~tlu_pstate_priv[2] & pcr2[`PIB_PCR_UT])   | 
+            (~tlu_hpstate_enb[2]  & tlu_hpstate_priv[2] & pcr2[`PIB_PCR_ST])   |
+            (tlu_hpstate_enb[2]   & tlu_pstate_priv[2]  & ~tlu_hpstate_priv[2] & 
+             pcr2[`PIB_PCR_ST]); 
+//
+// picl mask decodes
+assign picl_mask2[`PICL_MASK_SB_FULL] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_SB_FULL_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_FP_INST] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_FP_INST_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_IC_MISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_IC_MISS_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_DC_MISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DC_MISS_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_ITLB_MISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_ITLB_MISS_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_DTLB_MISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DTLB_MISS_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_L2_IMISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_IMISS_CNT) &
+             pic_cnt_en[2]);
+assign picl_mask2[`PICL_MASK_L2_DMISS] =  
+           ((pcr2[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_DMISS_CNT) &
+             pic_cnt_en[2]);
+// 
+// thread 3
+
+assign pic_cnt_en[3] = 
+            (~tlu_hpstate_priv[3] & ~tlu_pstate_priv[3] & pcr3[`PIB_PCR_UT])   | 
+            (~tlu_hpstate_enb[3]  & tlu_hpstate_priv[3] & pcr3[`PIB_PCR_ST])   |
+            (tlu_hpstate_enb[3]   & tlu_pstate_priv[3]  & ~tlu_hpstate_priv[3] & 
+             pcr3[`PIB_PCR_ST]); 
+//
+// added for timing
+dff_s #(`TLU_THRD_NUM) dff_pic_cnt_en_w2 (
+    .din (pic_cnt_en[`TLU_THRD_NUM-1:0]),
+    .q   (pic_cnt_en_w2[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//
+// picl mask decodes
+assign picl_mask3[`PICL_MASK_SB_FULL] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_SB_FULL_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_FP_INST] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_FP_INST_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_IC_MISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_IC_MISS_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_DC_MISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DC_MISS_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_ITLB_MISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_ITLB_MISS_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_DTLB_MISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_DTLB_MISS_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_L2_IMISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_IMISS_CNT) &
+             pic_cnt_en[3]);
+assign picl_mask3[`PICL_MASK_L2_DMISS] =  
+           ((pcr3[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO] == `PIB_L2_DMISS_CNT) &
+             pic_cnt_en[3]);
+
+//==================================================================
+// update the picls - could be sperated into a dp block if needed 
+//==================================================================
+// added for bug 2919
+// rrobin scheduler to choose thread to update
+dffr_s #(2) dffr_pic_update_sel_ctr (
+    .din (pic_update_sel_incr[1:0]),
+    .q   (pic_update_sel_ctr[1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign pic_update_sel_incr[1:0] = 
+           pic_update_sel_ctr[1:0] + 2'b01;
+
+assign pic_update_ctl[0] = 
+           ~|(pic_update_sel_incr[1:0]); 
+assign pic_update_ctl[1] = 
+           ~pic_update_sel_incr[1] &  pic_update_sel_incr[0]; 
+assign pic_update_ctl[2] = 
+           pic_update_sel_incr[1]  & ~pic_update_sel_incr[0]; 
+assign pic_update_ctl[3] = 
+           &(pic_update_sel_incr[1:0]); 
+// 
+// EVQs for PICL
+//
+// masking events for increment for picl evq update
+assign incr_evq_din[0] = 
+           (|(picl_mask0[`PICL_MASK_WIDTH-1:0] & 
+             picl_event0[`PICL_MASK_WIDTH-1:0]));
+assign incr_evq_din[1] = 
+           (|(picl_mask1[`PICL_MASK_WIDTH-1:0] & 
+             picl_event1[`PICL_MASK_WIDTH-1:0]));
+assign incr_evq_din[2] = 
+           (|(picl_mask2[`PICL_MASK_WIDTH-1:0] & 
+             picl_event2[`PICL_MASK_WIDTH-1:0]));
+assign incr_evq_din[3] = 
+           (|(picl_mask3[`PICL_MASK_WIDTH-1:0] & 
+             picl_event3[`PICL_MASK_WIDTH-1:0])); 
+//
+// added due to timing 
+dff_s #(`TLU_THRD_NUM) dff_incr_evq (
+    .din (incr_evq_din[`TLU_THRD_NUM-1:0]),
+    .q   (incr_evq[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// constructing controls to update the picl_evq
+assign update_evq_sel[0] = (local_rst | pic_update_ctl[0] | incr_evq[0]); 
+assign update_evq_sel[1] = (local_rst | pic_update_ctl[1] | incr_evq[1]); 
+assign update_evq_sel[2] = (local_rst | pic_update_ctl[2] | incr_evq[2]); 
+assign update_evq_sel[3] = (local_rst | pic_update_ctl[3] | incr_evq[3]); 
+//
+// increment evq count for each thread
+// thread 0
+tlu_addern_32 #(`PIB_EVQ_CNT_WIDTH,1) picl_evq0_adder (
+    .din  (picl_evq0[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .incr (1'b1),
+    .sum  (picl_evq0_sum[`PIB_EVQ_CNT_WIDTH-1:0])
+) ;
+
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_update_evq0_data (
+       .in0  ({`PIB_EVQ_CNT_WIDTH{1'b0}}),
+       .in1  (picl_evq0_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (local_rst | pic_update_ctl[0]),
+       .sel1 (~(local_rst | pic_update_ctl[0])),
+       .dout (update_evq0_data[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_EVQ_CNT_WIDTH) dff_picl_evq0 (
+    .din (update_evq0_data[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .q   (picl_evq0[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_evq_sel[0]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 1
+tlu_addern_32 #(`PIB_EVQ_CNT_WIDTH,1) picl_evq1_adder (
+    .din  (picl_evq1[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .incr (1'b1),
+    .sum  (picl_evq1_sum[`PIB_EVQ_CNT_WIDTH-1:0])
+) ;
+
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_update_evq1_data (
+       .in0  ({`PIB_EVQ_CNT_WIDTH{1'b0}}),
+       .in1  (picl_evq1_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (local_rst | pic_update_ctl[1]),
+       .sel1 (~(local_rst | pic_update_ctl[1])),
+       .dout (update_evq1_data[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_EVQ_CNT_WIDTH) dff_picl_evq1 (
+    .din (update_evq1_data[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .q   (picl_evq1[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_evq_sel[1]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 2
+tlu_addern_32 #(`PIB_EVQ_CNT_WIDTH,1) picl_evq2_adder (
+    .din  (picl_evq2[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .incr (1'b1),
+    .sum  (picl_evq2_sum[`PIB_EVQ_CNT_WIDTH-1:0])
+) ;
+
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_update_evq2_data (
+       .in0  ({`PIB_EVQ_CNT_WIDTH{1'b0}}),
+       .in1  (picl_evq2_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (local_rst | pic_update_ctl[2]),
+       .sel1 (~(local_rst | pic_update_ctl[2])),
+       .dout (update_evq2_data[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_EVQ_CNT_WIDTH) dff_picl_evq2 (
+    .din (update_evq2_data[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .q   (picl_evq2[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_evq_sel[2]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 3
+tlu_addern_32 #(`PIB_EVQ_CNT_WIDTH,1) picl_evq3_adder (
+    .din  (picl_evq3[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .incr (1'b1),
+    .sum  (picl_evq3_sum[`PIB_EVQ_CNT_WIDTH-1:0])
+) ;
+
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_update_evq3_data (
+       .in0  ({`PIB_EVQ_CNT_WIDTH{1'b0}}),
+       .in1  (picl_evq3_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (local_rst | pic_update_ctl[3]),
+       .sel1 (~(local_rst | pic_update_ctl[3])),
+       .dout (update_evq3_data[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_EVQ_CNT_WIDTH) dff_picl_evq3 (
+    .din (update_evq3_data[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .q   (picl_evq3[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_evq_sel[3]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// selelcting the thread for incrementing for picl
+//
+mux4ds #(`PIB_PIC_CNT_WIDTH) mux_picl_cnt_din (
+       .in0  (picl_cnt0[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in1  (picl_cnt1[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (picl_cnt2[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in3  (picl_cnt3[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (pic_update_ctl[0]),
+       .sel1 (pic_update_ctl[1]),
+       .sel2 (pic_update_ctl[2]),
+       .sel3 (pic_update_ctl[3]),
+       .dout (picl_cnt_din[`PIB_PIC_CNT_WIDTH-1:0])
+);
+//
+// selecting the correct input for incrementing the picl
+// thread0
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_picl_evq0_din (
+       .in0  (picl_evq0_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in1  (picl_evq0[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (incr_evq[0]),
+       .sel1 (~incr_evq[0]),
+       .dout (picl_evq0_din[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+//
+// thread1
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_picl_evq1_din (
+       .in0  (picl_evq1_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in1  (picl_evq1[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (incr_evq[1]),
+       .sel1 (~incr_evq[1]),
+       .dout (picl_evq1_din[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+//
+// thread2
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_picl_evq2_din (
+       .in0  (picl_evq2_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in1  (picl_evq2[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (incr_evq[2]),
+       .sel1 (~incr_evq[2]),
+       .dout (picl_evq2_din[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+//
+// thread3
+mux2ds #(`PIB_EVQ_CNT_WIDTH) mux_picl_evq3_din (
+       .in0  (picl_evq3_sum[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in1  (picl_evq3[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (incr_evq[3]),
+       .sel1 (~incr_evq[3]),
+       .dout (picl_evq3_din[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+
+//
+mux4ds #(`PIB_EVQ_CNT_WIDTH) mux_picl_evq_din (
+       .in0  (picl_evq0_din[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in1  (picl_evq1_din[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in2  (picl_evq2_din[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .in3  (picl_evq3_din[`PIB_EVQ_CNT_WIDTH-1:0]),
+       .sel0 (pic_update_ctl[0]),
+       .sel1 (pic_update_ctl[1]),
+       .sel2 (pic_update_ctl[2]),
+       .sel3 (pic_update_ctl[3]),
+       .dout (picl_evq_din[`PIB_EVQ_CNT_WIDTH-1:0])
+);
+//
+// picl incrementor  - shared between four threads
+//
+tlu_addern_32 #(`PIB_PIC_CNT_WIDTH,`PIB_EVQ_CNT_WIDTH) picl_adder (
+    .din  (picl_cnt_din[`PIB_PIC_CNT_WIDTH-1:0]),
+    .incr (picl_evq_din[`PIB_EVQ_CNT_WIDTH-1:0]),
+    .sum  (picl_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0])
+) ;
+//
+// construction mux selects for picl update
+
+assign wsr_pic_sel[0] = wsr_thread_inst_g[0] & (pic_npriv_rw_g | pic_priv_rw_g);
+assign wsr_pic_sel[1] = wsr_thread_inst_g[1] & (pic_npriv_rw_g | pic_priv_rw_g);
+assign wsr_pic_sel[2] = wsr_thread_inst_g[2] & (pic_npriv_rw_g | pic_priv_rw_g);
+assign wsr_pic_sel[3] = wsr_thread_inst_g[3] & (pic_npriv_rw_g | pic_priv_rw_g);
+
+assign update_picl_sel[0] = (local_rst | pic_update_ctl[0] | wsr_pic_sel[0]); 
+assign update_picl_sel[1] = (local_rst | pic_update_ctl[1] | wsr_pic_sel[1]); 
+assign update_picl_sel[2] = (local_rst | pic_update_ctl[2] | wsr_pic_sel[2]); 
+assign update_picl_sel[3] = (local_rst | pic_update_ctl[3] | wsr_pic_sel[3]); 
+
+// constructing the selects to choose to update the pich wrap - added for bug 2588 
+assign update_picl_wrap_en[0] = 
+           update_picl_sel[0] | wsr_pcr_sel[0]; 
+assign update_picl_wrap_en[1] = 
+           update_picl_sel[1] | wsr_pcr_sel[1]; 
+assign update_picl_wrap_en[2] = 
+           update_picl_sel[2] | wsr_pcr_sel[2]; 
+assign update_picl_wrap_en[3] = 
+           update_picl_sel[3] | wsr_pcr_sel[3]; 
+//
+// extracting the wsr_data information to update the picls
+//
+assign picl_wsr_data = {1'b0, tlu_wsr_data_w[`PIB_PICL_CNT_HI:`PIB_PICL_CNT_LO]}; 
+//
+// selecting the data for picl update
+// thread 0 
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_picl0_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (picl_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (picl_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[0] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[0] | local_rst)),
+       .dout (update_picl0_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_picl_cnt0 (
+    .din (update_picl0_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (picl_cnt0[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_picl_sel[0]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 1
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_picl1_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (picl_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (picl_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[1] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[1] | local_rst)),
+       .dout (update_picl1_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_picl_cnt1 (
+    .din (update_picl1_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (picl_cnt1[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_picl_sel[1]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 2
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_picl2_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (picl_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (picl_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[2] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[2] | local_rst)),
+       .dout (update_picl2_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_picl_cnt2 (
+    .din (update_picl2_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (picl_cnt2[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_picl_sel[2]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 3
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_picl3_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (picl_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (picl_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[3] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[3] | local_rst)),
+       .dout (update_picl3_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_picl_cnt3 (
+    .din (update_picl3_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (picl_cnt3[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_picl_sel[3]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//==================================================================
+// update the pichs - could be sperated into a dp block if needed 
+//==================================================================
+//
+dffr_s #(`TLU_THRD_NUM) dffr_inst_vld_w2 (
+    .din (tlu_thread_inst_vld_g[`TLU_THRD_NUM-1:0]),
+    .q   (inst_vld_w2[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// added for bug 4395
+dffr_s dffr_tcc_inst_w2 (
+    .din (tlu_tcc_inst_w),
+    .q   (tcc_inst_w2),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified for bug 4478
+assign incr_pich[0] = pic_cnt_en_w2[0] & inst_vld_w2[0] & 
+                      (~tlu_full_flush_pipe_w2 | tcc_inst_w2); 
+assign incr_pich[1] = pic_cnt_en_w2[1] & inst_vld_w2[1] & 
+                      (~tlu_full_flush_pipe_w2 | tcc_inst_w2);
+assign incr_pich[2] = pic_cnt_en_w2[2] & inst_vld_w2[2] & 
+                      (~tlu_full_flush_pipe_w2 | tcc_inst_w2);
+assign incr_pich[3] = pic_cnt_en_w2[3] & inst_vld_w2[3] & 
+                      (~tlu_full_flush_pipe_w2 | tcc_inst_w2);
+
+assign pich_mux_sel[0] = pic_cnt_en_w2[0] & inst_vld_w2[0]; 
+assign pich_mux_sel[1] = pic_cnt_en_w2[1] & inst_vld_w2[1];
+assign pich_mux_sel[2] = pic_cnt_en_w2[2] & inst_vld_w2[2];
+assign pich_mux_sel[3] = pic_cnt_en_w2[3] & inst_vld_w2[3];
+
+// added for to make inst count overflow trap precise.
+// added for bug 4314
+assign pich_wrap_flg[0] = 
+           (pich_cnt_wrap[0] ^ pich_cnt0[`PIB_PIC_CNT_WIDTH-1]) & pic_cnt_en_w2[0]; 
+assign pich_wrap_flg[1] = 
+           (pich_cnt_wrap[1] ^ pich_cnt1[`PIB_PIC_CNT_WIDTH-1]) & pic_cnt_en_w2[1];
+assign pich_wrap_flg[2] = 
+           (pich_cnt_wrap[2] ^ pich_cnt2[`PIB_PIC_CNT_WIDTH-1]) & pic_cnt_en_w2[2];
+assign pich_wrap_flg[3] = 
+           (pich_cnt_wrap[3] ^ pich_cnt3[`PIB_PIC_CNT_WIDTH-1]) & pic_cnt_en_w2[3];
+
+// modified for bug 4270
+// pic experiment
+assign pich_fourbelow_din[0] = 
+           (&pich_cnt0[`PIB_PIC_CNT_WIDTH-2:2]) & pic_cnt_en_w2[0];
+assign pich_fourbelow_din[1] = 
+           (&pich_cnt1[`PIB_PIC_CNT_WIDTH-2:2]) & pic_cnt_en_w2[1];
+assign pich_fourbelow_din[2] = 
+           (&pich_cnt2[`PIB_PIC_CNT_WIDTH-2:2]) & pic_cnt_en_w2[2];
+assign pich_fourbelow_din[3] = 
+           (&pich_cnt3[`PIB_PIC_CNT_WIDTH-2:2]) & pic_cnt_en_w2[3];
+//
+dff_s #(`TLU_THRD_NUM) dff_pich_fourbelow_flg (
+    .din (pich_fourbelow_din[`TLU_THRD_NUM-1:0]),
+    .q   (pich_fourbelow_flg[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// modified for bug 4270
+assign pich_onebelow_flg[0] = 
+       (pich_fourbelow_flg[0] & pich_cnt0[1] & pich_cnt0[0]) & pic_cnt_en_w2[0]; 
+assign pich_onebelow_flg[1] = 
+       (pich_fourbelow_flg[1] & pich_cnt1[1] & pich_cnt1[0]) & pic_cnt_en_w2[1]; 
+assign pich_onebelow_flg[2] = 
+       (pich_fourbelow_flg[2] & pich_cnt2[1] & pich_cnt2[0]) & pic_cnt_en_w2[2]; 
+assign pich_onebelow_flg[3] = 
+       (pich_fourbelow_flg[3] & pich_cnt3[1] & pich_cnt3[0]) & pic_cnt_en_w2[3]; 
+// 
+assign pich_twobelow_flg[0] = 
+       (pich_fourbelow_flg[0] & pich_cnt0[1] & ~pich_cnt0[0]) & pic_cnt_en_w2[0]; 
+assign pich_twobelow_flg[1] = 
+       (pich_fourbelow_flg[1] & pich_cnt1[1] & ~pich_cnt1[0]) & pic_cnt_en_w2[1]; 
+assign pich_twobelow_flg[2] = 
+       (pich_fourbelow_flg[2] & pich_cnt2[1] & ~pich_cnt2[0]) & pic_cnt_en_w2[2]; 
+assign pich_twobelow_flg[3] = 
+       (pich_fourbelow_flg[3] & pich_cnt3[1] & ~pich_cnt3[0]) & pic_cnt_en_w2[3]; 
+//
+/*
+assign pich_threebelow_flg[0] = 
+       (pich_fourbelow_flg[0] & ~pich_cnt0[1] & pich_cnt0[0]) & pic_cnt_en_w2[0]; 
+assign pich_threebelow_flg[1] = 
+       (pich_fourbelow_flg[1] & ~pich_cnt1[1] & pich_cnt1[0]) & pic_cnt_en_w2[1]; 
+assign pich_threebelow_flg[2] = 
+       (pich_fourbelow_flg[2] & ~pich_cnt2[1] & pich_cnt2[0]) & pic_cnt_en_w2[2]; 
+assign pich_threebelow_flg[3] = 
+       (pich_fourbelow_flg[3] & ~pich_cnt3[1] & pich_cnt3[0]) & pic_cnt_en_w2[3]; 
+*/
+//
+// added for bug 4836 
+assign pic_twobelow_e[0] = 
+       pich_mux_sel[0]? (pich_fourbelow_flg[0] & ~pich_cnt0[1] & pich_cnt0[0]):
+       (pich_fourbelow_flg[0] & pich_cnt0[1] & ~pich_cnt0[0]);
+assign pic_twobelow_e[1] = 
+       pich_mux_sel[1]? (pich_fourbelow_flg[1] & ~pich_cnt1[1] & pich_cnt1[0]):
+       (pich_fourbelow_flg[1] & pich_cnt1[1] & ~pich_cnt1[0]);
+assign pic_twobelow_e[2] = 
+       pich_mux_sel[2]? (pich_fourbelow_flg[2] & ~pich_cnt2[1] & pich_cnt2[0]):
+       (pich_fourbelow_flg[2] & pich_cnt2[1] & ~pich_cnt2[0]);
+assign pic_twobelow_e[3] = 
+       pich_mux_sel[3]? (pich_fourbelow_flg[3] & ~pich_cnt3[1] & pich_cnt3[0]):
+       (pich_fourbelow_flg[3] & pich_cnt3[1] & ~pich_cnt3[0]);
+
+assign tlu_pic_twobelow_e = 
+           (thread_rsel_e[0]) ? pic_twobelow_e[0]:
+           (thread_rsel_e[1]) ? pic_twobelow_e[1]:
+           (thread_rsel_e[2]) ? pic_twobelow_e[2]:
+            pic_twobelow_e[3];
+//
+assign pic_onebelow_e[0] = 
+       pich_mux_sel[0]? (pich_fourbelow_flg[0] & pich_cnt0[1] & ~pich_cnt0[0]):
+       (pich_fourbelow_flg[0] & pich_cnt0[1] & pich_cnt0[0]);
+assign pic_onebelow_e[1] = 
+       pich_mux_sel[1]? (pich_fourbelow_flg[1] & pich_cnt1[1] & ~pich_cnt1[0]):
+       (pich_fourbelow_flg[1] & pich_cnt1[1] & pich_cnt1[0]);
+assign pic_onebelow_e[2] = 
+       pich_mux_sel[2]? (pich_fourbelow_flg[2] & pich_cnt2[1] & ~pich_cnt2[0]):
+       (pich_fourbelow_flg[2] & pich_cnt2[1] & pich_cnt2[0]);
+assign pic_onebelow_e[3] = 
+       pich_mux_sel[3]? (pich_fourbelow_flg[3] & pich_cnt3[1] & ~pich_cnt3[0]):
+       (pich_fourbelow_flg[3] & pich_cnt3[1] & pich_cnt3[0]);
+
+assign tlu_pic_onebelow_e = 
+           (thread_rsel_e[0]) ? pic_onebelow_e[0]:
+           (thread_rsel_e[1]) ? pic_onebelow_e[1]:
+           (thread_rsel_e[2]) ? pic_onebelow_e[2]:
+            pic_onebelow_e[3];
+//
+assign pic_wrap_e[0] = 
+       pich_mux_sel[0]? (pich_fourbelow_flg[0] & pich_cnt0[1] & pich_cnt0[0]):
+       (pich_cnt_wrap[0] ^ pich_cnt0[`PIB_PIC_CNT_WIDTH-1]);
+assign pic_wrap_e[1] = 
+       pich_mux_sel[1]? (pich_fourbelow_flg[1] & pich_cnt1[1] & pich_cnt1[0]):
+       (pich_cnt_wrap[1] ^ pich_cnt1[`PIB_PIC_CNT_WIDTH-1]);
+assign pic_wrap_e[2] = 
+       pich_mux_sel[2]? (pich_fourbelow_flg[2] & pich_cnt2[1] & pich_cnt2[0]):
+       (pich_cnt_wrap[2] ^ pich_cnt2[`PIB_PIC_CNT_WIDTH-1]);
+assign pic_wrap_e[3] = 
+       pich_mux_sel[3]? (pich_fourbelow_flg[3] & pich_cnt3[1] & pich_cnt3[0]):
+       (pich_cnt_wrap[3] ^ pich_cnt3[`PIB_PIC_CNT_WIDTH-1]);
+
+assign tlu_pic_wrap_e = 
+           (thread_rsel_e[0]) ? pic_wrap_e[0]:
+           (thread_rsel_e[1]) ? pic_wrap_e[1]:
+           (thread_rsel_e[2]) ? pic_wrap_e[2]:
+            pic_wrap_e[3];
+//
+//
+// modified for bug 5436: Niagara 2.0
+assign tlu_pcr_ut[0] = pcr0[`PIB_PCR_UT];
+assign tlu_pcr_ut[1] = pcr1[`PIB_PCR_UT];
+assign tlu_pcr_ut[2] = pcr2[`PIB_PCR_UT];
+assign tlu_pcr_ut[3] = pcr3[`PIB_PCR_UT];
+//
+assign tlu_pcr_st[0] = pcr0[`PIB_PCR_ST];
+assign tlu_pcr_st[1] = pcr1[`PIB_PCR_ST];
+assign tlu_pcr_st[2] = pcr2[`PIB_PCR_ST];
+assign tlu_pcr_st[3] = pcr3[`PIB_PCR_ST];
+
+assign tlu_pcr_ut_e = 
+           (thread_rsel_e[0]) ? pcr0[`PIB_PCR_UT]: 
+           (thread_rsel_e[1]) ? pcr1[`PIB_PCR_UT]: 
+           (thread_rsel_e[2]) ? pcr2[`PIB_PCR_UT]:
+            pcr3[`PIB_PCR_UT]; 
+
+assign tlu_pcr_st_e = 
+           (thread_rsel_e[0]) ? pcr0[`PIB_PCR_ST]:
+           (thread_rsel_e[1]) ? pcr1[`PIB_PCR_ST]:
+           (thread_rsel_e[2]) ? pcr2[`PIB_PCR_ST]:
+            pcr3[`PIB_PCR_ST];
+
+       
+// reporting over-flow trap - needed to be precise, therefore
+// bypassing tlb-miss traps 
+// 
+// selelcting the thread for incrementing for pich
+// added for bug2332
+//
+// one-hot mux change
+assign pich_cnt_din[`PIB_PIC_CNT_WIDTH-1:0] =
+       (pich_mux_sel[1])? pich_cnt1[`PIB_PIC_CNT_WIDTH-1:0]:
+       (pich_mux_sel[2])? pich_cnt2[`PIB_PIC_CNT_WIDTH-1:0]:
+       (pich_mux_sel[3])? pich_cnt3[`PIB_PIC_CNT_WIDTH-1:0]:
+       pich_cnt0[`PIB_PIC_CNT_WIDTH-1:0];
+/*
+assign incr_pich_onehot = ~(|incr_pich[3:1]) | rst_tri_en;
+mux4ds #(`PIB_PIC_CNT_WIDTH) mux_pich_cnt_din (
+       .in0  (pich_cnt0[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in1  (pich_cnt1[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (pich_cnt2[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in3  (pich_cnt3[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (incr_pich_onehot),
+       .sel1 (incr_pich[1] & ~rst_tri_en),
+       .sel2 (incr_pich[2] & ~rst_tri_en),
+       .sel3 (incr_pich[3] & ~rst_tri_en),
+       .dout (pich_cnt_din[`PIB_PIC_CNT_WIDTH-1:0])
+);
+*/
+//
+// pich incrementor  - shared between four threads
+//
+tlu_addern_32 #(`PIB_PIC_CNT_WIDTH,1) pich_adder (
+    .din  (pich_cnt_din[`PIB_PIC_CNT_WIDTH-1:0]),
+    .incr (1'b1),
+    .sum  (pich_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0])
+) ;
+//
+// extracting the wsr_data information to update the picls
+//
+assign pich_wsr_data = {1'b0, tlu_wsr_data_w[`PIB_PICH_CNT_HI:`PIB_PICH_CNT_LO]}; 
+
+// constructing the selects to choose to update the pich 
+assign update_pich_sel[0] = (local_rst | incr_pich[0] | wsr_pic_sel[0]); 
+assign update_pich_sel[1] = (local_rst | incr_pich[1] | wsr_pic_sel[1]); 
+assign update_pich_sel[2] = (local_rst | incr_pich[2] | wsr_pic_sel[2]); 
+assign update_pich_sel[3] = (local_rst | incr_pich[3] | wsr_pic_sel[3]); 
+
+// constructing the selects to choose to update the pich wrap 
+assign update_pich_wrap_en[0] = 
+           update_pich_sel[0] | wsr_pcr_sel[0]; 
+assign update_pich_wrap_en[1] = 
+           update_pich_sel[1] | wsr_pcr_sel[1]; 
+assign update_pich_wrap_en[2] = 
+           update_pich_sel[2] | wsr_pcr_sel[2]; 
+assign update_pich_wrap_en[3] = 
+           update_pich_sel[3] | wsr_pcr_sel[3]; 
+//
+// selecting the data for pich update
+// thread 0 
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_pich0_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (pich_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (pich_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[0] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[0] | local_rst)),
+       .dout (update_pich0_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_pich_cnt0 (
+    .din (update_pich0_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (pich_cnt0[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_pich_sel[0]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 1 
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_pich1_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (pich_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (pich_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[1] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[1] | local_rst)), 
+       .dout (update_pich1_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_pich_cnt1 (
+    .din (update_pich1_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (pich_cnt1[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_pich_sel[1]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 2 
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_pich2_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (pich_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (pich_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[2] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[2] | local_rst)),
+       .dout (update_pich2_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_pich_cnt2 (
+    .din (update_pich2_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (pich_cnt2[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_pich_sel[2]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 3
+mux3ds #(`PIB_PIC_CNT_WIDTH) mux_update_pich3_data (
+       .in0  ({`PIB_PIC_CNT_WIDTH{1'b0}}),
+       .in1  (pich_wsr_data[`PIB_PIC_CNT_WIDTH-1:0]),
+       .in2  (pich_cnt_sum[`PIB_PIC_CNT_WIDTH-1:0]),
+       .sel0 (local_rst),
+       .sel1 (wsr_pic_sel[3] & ~local_rst),
+       .sel2 (~(wsr_pic_sel[3] | local_rst)),
+       .dout (update_pich3_data[`PIB_PIC_CNT_WIDTH-1:0])
+);
+
+dffe_s #(`PIB_PIC_CNT_WIDTH) dff_pich_cnt3 (
+    .din (update_pich3_data[`PIB_PIC_CNT_WIDTH-1:0]),
+    .q   (pich_cnt3[`PIB_PIC_CNT_WIDTH-1:0]),
+    .clk (clk),
+    .en  (update_pich_sel[3]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//==========================
+// reading the PCRs and PICs 
+//==========================
+// decoding the thread information for rsr instruction from IFU
+// modified due to timing
+/*
+assign thread_rsel_e[0] = ~(|ifu_tlu_thrid_e[1:0]);
+assign thread_rsel_e[1] = ~ifu_tlu_thrid_e[1] &  ifu_tlu_thrid_e[0];
+assign thread_rsel_e[2] =  ifu_tlu_thrid_e[1] & ~ifu_tlu_thrid_e[0];
+assign thread_rsel_e[3] =  (&ifu_tlu_thrid_e[1:0]);
+*/
+assign thread_rsel_d[0] = ~(|ifu_tlu_thrid_d[1:0]);
+assign thread_rsel_d[1] = ~ifu_tlu_thrid_d[1] &  ifu_tlu_thrid_d[0];
+assign thread_rsel_d[2] =  ifu_tlu_thrid_d[1] & ~ifu_tlu_thrid_d[0];
+// assign thread_rsel_d[3] =  (&ifu_tlu_thrid_d[1:0]);
+//
+dff_s #(`TLU_THRD_NUM-1) dff_thread_rsel_e (
+    .din (thread_rsel_d[`TLU_THRD_NUM-2:0]),
+    .q   (thread_rsel_e[`TLU_THRD_NUM-2:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// selecting the correct pic for rdpr
+// modified to avoid rte failure
+assign pic_rdata_e[`TLU_ASR_DATA_WIDTH-1:0] = 
+       (thread_rsel_e[0])?
+       {pich_cnt0[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt0[`PIB_PIC_CNT_WIDTH-2:0]}:
+       (thread_rsel_e[1])?
+       {pich_cnt1[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt1[`PIB_PIC_CNT_WIDTH-2:0]}:
+       (thread_rsel_e[2])?
+       {pich_cnt2[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt2[`PIB_PIC_CNT_WIDTH-2:0]}:
+       {pich_cnt3[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt3[`PIB_PIC_CNT_WIDTH-2:0]};
+/*
+mux4ds #(`TLU_ASR_DATA_WIDTH) mux_pic_rdata (
+        .in0    ({pich_cnt0[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt0[`PIB_PIC_CNT_WIDTH-2:0]}),
+        .in1    ({pich_cnt1[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt1[`PIB_PIC_CNT_WIDTH-2:0]}),
+        .in2    ({pich_cnt2[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt2[`PIB_PIC_CNT_WIDTH-2:0]}),
+        .in3    ({pich_cnt3[`PIB_PIC_CNT_WIDTH-2:0], picl_cnt3[`PIB_PIC_CNT_WIDTH-2:0]}),
+        .sel0   (thread_rsel_e[0]),
+        .sel1   (thread_rsel_e[1]),
+        .sel2   (thread_rsel_e[2]),
+        .sel3   (thread_rsel_e[3]),
+        .dout   (pic_rdata_e[`TLU_ASR_DATA_WIDTH-1:0])
+);
+
+// selecting the correct pcr for rdpr
+// modified for bug 2391
+mux4ds #(`TLU_ASR_DATA_WIDTH) mux_pcr_rdata (
+        .in0    ({58'b0,pcr0[`PIB_PCR_WIDTH-1:0]}), 
+        .in1    ({58'b0,pcr1[`PIB_PCR_WIDTH-1:0]}),
+        .in2    ({58'b0,pcr2[`PIB_PCR_WIDTH-1:0]}),
+        .in3    ({58'b0,pcr3[`PIB_PCR_WIDTH-1:0]}),
+        .sel0   (thread_rsel_e[0]),
+        .sel1   (thread_rsel_e[1]),
+        .sel2   (thread_rsel_e[2]),
+        .sel3   (thread_rsel_e[3]),
+        .dout   (pcr_rdata_e[`TLU_ASR_DATA_WIDTH-1:0])
+);
+
+mux4ds #(`PIB_PCR_WIDTH) mux_pcr_rdata (
+        .in0    (pcr0[`PIB_PCR_WIDTH-1:0]), 
+        .in1    (pcr1[`PIB_PCR_WIDTH-1:0]),
+        .in2    (pcr2[`PIB_PCR_WIDTH-1:0]),
+        .in3    (pcr3[`PIB_PCR_WIDTH-1:0]),
+        .sel0   (thread_rsel_e[0]),
+        .sel1   (thread_rsel_e[1]),
+        .sel2   (thread_rsel_e[2]),
+        .sel3   (thread_rsel_e[3]),
+        .dout   (pcr_reg_rdata_e[`PIB_PCR_WIDTH-1:0])
+);
+*/
+
+assign pcr_reg_rdata_e[`PIB_PCR_WIDTH-1:0] =
+       (thread_rsel_e[0])? pcr0[`PIB_PCR_WIDTH-1:0]:
+       (thread_rsel_e[1])? pcr1[`PIB_PCR_WIDTH-1:0]:
+       (thread_rsel_e[2])? pcr2[`PIB_PCR_WIDTH-1:0]:
+       pcr3[`PIB_PCR_WIDTH-1:0];
+
+assign pcr_rdata_e[`TLU_ASR_DATA_WIDTH-1:0] =
+           {54'b0, // rsvd bits 
+            pcr_reg_rdata_e[`PIB_PCR_CH_OVF:`PIB_PCR_CL_OVF], 
+            1'b0,  // rsvd bit
+            pcr_reg_rdata_e[`PIB_PCR_SL_HI:`PIB_PCR_SL_LO], 
+            1'b0,  // rsvd bit
+            pcr_reg_rdata_e[`PIB_PCR_UT:`PIB_PCR_PRIV]}; 
+
+// constructing the mux select for the output mux for rsr inst
+assign rsr_data_sel_e[0] = pcr_rw_e;
+assign rsr_data_sel_e[1] = ~pcr_rw_e; 
+
+// modified due to timing 
+// assign rsr_data_sel_e[1] = ~pcr_rw_e & (pic_npriv_rw_e | pic_priv_rw_e);
+// assign rsr_data_sel_e[2] = ~(|rsr_data_sel_e[1:0]);
+/*
+mux3ds #(`TLU_ASR_DATA_WIDTH) mux_exu_rsr_data_e (
+	.in0(pcr_rdata_e[`TLU_ASR_DATA_WIDTH-1:0]),  
+	.in1(pic_rdata_e[`TLU_ASR_DATA_WIDTH-1:0]),
+	.in2(tlu_pib_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0]),
+	.sel0(rsr_data_sel_e[0]),
+	.sel1(rsr_data_sel_e[1]),
+	.sel2(rsr_data_sel_e[2]),
+	.dout(tlu_exu_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0])
+);
+*/
+mux2ds #(`TLU_ASR_DATA_WIDTH) mux_tlu_pib_rsr_data_e (
+	.in0(pcr_rdata_e[`TLU_ASR_DATA_WIDTH-1:0]),  
+	.in1(pic_rdata_e[`TLU_ASR_DATA_WIDTH-1:0]),
+	.sel0(rsr_data_sel_e[0]),
+	.sel1(rsr_data_sel_e[1]),
+	.dout(tlu_pib_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0])
+);
+//==========================
+// over_flow trap 
+//==========================
+// staged the wrap bit for comparison
+//
+// thread 0 - modified for bug 3937
+mux2ds mux_picl_cnt_wrap_datain_0 (
+	.in0(picl_cnt0[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]),
+	.in1(picl_cnt0[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[0]),
+	.sel1(~wsr_pcr_sel[0]),
+	.dout(picl_cnt_wrap_datain[0])
+);
+
+mux2ds mux_pich_cnt_wrap_datain_0 (
+	.in0(pich_cnt0[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]),
+	.in1(pich_cnt0[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[0]),
+	.sel1(~wsr_pcr_sel[0]),
+	.dout(pich_cnt_wrap_datain[0])
+);
+/*
+assign picl_cnt_wrap_datain[0] = 
+           (picl_cnt0[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]);
+
+assign pich_cnt_wrap_datain[0] = 
+           (pich_cnt0[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]);
+*/
+
+dffre_s dffre_picl0_wrap (
+    .din (picl_cnt_wrap_datain[0]),
+    .q   (picl_cnt_wrap[0]),
+    .clk (clk),
+    .en  (update_picl_wrap_en[0]),
+    .rst (local_rst | wsr_pic_sel[0]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_pich0_wrap (
+    .din (pich_cnt_wrap_datain[0]),
+    .q   (pich_cnt_wrap[0]),
+    .clk (clk),
+    .en  (update_pich_wrap_en[0]),
+    .rst (local_rst | wsr_pic_sel[0]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 1 - modified for bug 3937
+mux2ds mux_picl_cnt_wrap_datain_1 (
+	.in0(picl_cnt1[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]),
+	.in1(picl_cnt1[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[1]),
+	.sel1(~wsr_pcr_sel[1]),
+	.dout(picl_cnt_wrap_datain[1])
+);
+
+mux2ds mux_pich_cnt_wrap_datain_1 (
+	.in0(pich_cnt1[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]),
+	.in1(pich_cnt1[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[1]),
+	.sel1(~wsr_pcr_sel[1]),
+	.dout(pich_cnt_wrap_datain[1])
+);
+/*
+assign picl_cnt_wrap_datain[1] = 
+           (picl_cnt1[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]); 
+
+assign pich_cnt_wrap_datain[1] = 
+           (pich_cnt1[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]); 
+*/
+
+dffre_s dffre_picl1_wrap (
+    .din (picl_cnt_wrap_datain[1]),
+    .q   (picl_cnt_wrap[1]),
+    .clk (clk),
+    .en  (update_picl_wrap_en[1]),
+    .rst (local_rst | wsr_pic_sel[1]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_pich1_wrap (
+    .din (pich_cnt_wrap_datain[1]),
+    .q   (pich_cnt_wrap[1]),
+    .clk (clk),
+    .en  (update_pich_wrap_en[1]),
+    .rst (local_rst | wsr_pic_sel[1]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 2 - modified for bug 3937
+mux2ds mux_picl_cnt_wrap_datain_2 (
+	.in0(picl_cnt2[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]),
+	.in1(picl_cnt2[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[2]),
+	.sel1(~wsr_pcr_sel[2]),
+	.dout(picl_cnt_wrap_datain[2])
+);
+
+mux2ds mux_pich_cnt_wrap_datain_2 (
+	.in0(pich_cnt2[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]),
+	.in1(pich_cnt2[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[2]),
+	.sel1(~wsr_pcr_sel[2]),
+	.dout(pich_cnt_wrap_datain[2])
+);
+/*
+assign picl_cnt_wrap_datain[2] = 
+           (picl_cnt2[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]); 
+
+assign pich_cnt_wrap_datain[2] = 
+           (pich_cnt2[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]); 
+*/
+
+dffre_s dffre_picl2_wrap (
+    .din (picl_cnt_wrap_datain[2]),
+    .q   (picl_cnt_wrap[2]),
+    .clk (clk),
+    .en  (update_picl_wrap_en[2]),
+    .rst (local_rst | wsr_pic_sel[2]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_pich2_wrap (
+    .din (pich_cnt_wrap_datain[2]),
+    .q   (pich_cnt_wrap[2]),
+    .clk (clk),
+    .en  (update_pich_wrap_en[2]),
+    .rst (local_rst | wsr_pic_sel[2]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// thread 3 - modified for bug 3937
+mux2ds mux_picl_cnt_wrap_datain_3 (
+	.in0(picl_cnt3[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]),
+	.in1(picl_cnt3[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[3]),
+	.sel1(~wsr_pcr_sel[3]),
+	.dout(picl_cnt_wrap_datain[3])
+);
+
+mux2ds mux_pich_cnt_wrap_datain_3 (
+	.in0(pich_cnt3[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]),
+	.in1(pich_cnt3[`PIB_PIC_CNT_WIDTH-1]),
+	.sel0(wsr_pcr_sel[3]),
+	.sel1(~wsr_pcr_sel[3]),
+	.dout(pich_cnt_wrap_datain[3])
+);
+/*
+assign picl_cnt_wrap_datain[3] = 
+           (picl_cnt3[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CL_OVF]);
+
+assign pich_cnt_wrap_datain[3] = 
+           (pich_cnt3[`PIB_PIC_CNT_WIDTH-1] ^ pcr_wdata_in[`PIB_PCR_CH_OVF]); 
+*/
+
+dffre_s dffre_picl3_wrap (
+    .din (picl_cnt_wrap_datain[3]),
+    .q   (picl_cnt_wrap[3]),
+    .clk (clk),
+    .en  (update_picl_wrap_en[3]),
+    .rst (local_rst | wsr_pic_sel[3]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_pich3_wrap (
+    .din (pich_cnt_wrap_datain[3]),
+    .q   (pich_cnt_wrap[3]),
+    .clk (clk),
+    .en  (update_pich_wrap_en[3]),
+    .rst (local_rst | wsr_pic_sel[3]),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// generating the over-flow (0->1) to be set in sftint[15]
+assign pib_picl_wrap[0] = 
+         ((picl_cnt_wrap[0] ^ picl_cnt0[`PIB_PIC_CNT_WIDTH-1]) & incr_evq[0]);  
+assign pib_picl_wrap[1] = 
+         ((picl_cnt_wrap[1] ^ picl_cnt1[`PIB_PIC_CNT_WIDTH-1]) & incr_evq[1]);  
+assign pib_picl_wrap[2] = 
+         ((picl_cnt_wrap[2] ^ picl_cnt2[`PIB_PIC_CNT_WIDTH-1]) & incr_evq[2]);  
+assign pib_picl_wrap[3] = 
+         ((picl_cnt_wrap[3] ^ picl_cnt3[`PIB_PIC_CNT_WIDTH-1]) & incr_evq[3]);  
+//
+endmodule
Index: /trunk/T1-CPU/tlu/sparc_tlu_zcmp64.v
===================================================================
--- /trunk/T1-CPU/tlu/sparc_tlu_zcmp64.v	(revision 6)
+++ /trunk/T1-CPU/tlu/sparc_tlu_zcmp64.v	(revision 6)
@@ -0,0 +1,52 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_tlu_zcmp64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_tlu_zcmp64
+//  Description:    
+//    64b zero compare
+*/
+
+module sparc_tlu_zcmp64(/*AUTOARG*/
+   // Outputs
+   zero, 
+   // Inputs
+   in
+   );
+
+   input [63:0] in;
+
+   output      zero;
+
+   reg 	       zero;
+
+   always @ (in)
+     begin
+	if (in == 64'b0)
+	  zero = 1'b1;
+	else
+	  zero = 1'b0;
+     end
+   
+
+endmodule // sparc_tlu_dec64
+
+	
Index: /trunk/T1-CPU/tlu/tlu_hyperv.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_hyperv.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_hyperv.v	(revision 6)
@@ -0,0 +1,3345 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_hyperv.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Block that contain most of the Hypervisor support
+//                      additions 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include        "sys.h" // system level definition file which contains the
+                        // time scale definition
+
+`include "tlu.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module	tlu_hyperv (/*AUTOARG*/
+    // outputs
+    tlu_htickcmp_rw_e, tlu_gl_rw_m, tlu_hpstate_din_sel0, tlu_hpstate_din_sel1,
+    tlu_hpstate_din_sel2, tlu_hpstate_din_sel3, tlu_update_hpstate_l_w2,
+    tlu_htickcmp_intdis, tlu_gl_lvl0, tlu_gl_lvl1, tlu_gl_lvl2, tlu_gl_lvl3, 
+    tlu_htstate_rw_d, tlu_wr_hintp_g, tlu_htstate_rw_g, tlu_set_hintp_sel_g, 
+    tlu_htba_en_l, tlu_scpd_wr_addr_g, tlu_scpd_wr_vld_g, tlu_scpd_rd_vld_m, 
+    tlu_scpd_rd_addr_m, tlu_hscpd_dacc_excpt_m, tlu_hyperv_rdpr_sel, 
+    tlu_exu_agp_swap, tlu_exu_agp, tlu_cpu_mondo_cmp, tlu_dev_mondo_cmp, 
+    tlu_resum_err_cmp, tlu_asi_queue_rd_vld_g, tlu_asi_queue_data_g, tlu_ld_data_vld_g, 
+    tlu_va_ill_g, tlu_asi_queue_rdata_g, tlu_qtail_dacc_excpt_m, tlu_asi_write_g, so, 
+    inc_ind_asi_wr_indr, inc_ind_asi_wr_inrr, inc_ind_asi_rd_invr, tlu_local_thrid_g, 
+    // inputs
+    ifu_tlu_thrid_d, ifu_tlu_sraddr_d, tlu_wsr_data_w_global, tlu_tickcmp_sel, 
+    tlu_thrd_traps_w2, tlu_wsr_inst_nq_g, tlu_dnrtry0_inst_g, tlu_dnrtry1_inst_g, 
+    tlu_dnrtry2_inst_g, tlu_dnrtry3_inst_g, tlu_dnrtry_global_g, tlu_tick_ctl_din, 
+    tlu_pstate_priv, tlu_select_tba_w2, tlu_hpstate_priv, tlu_hpstate_enb, 
+    tlu_asi_state_e, ifu_lsu_alt_space_e, ifu_lsu_ld_inst_e, ifu_lsu_st_inst_e, 
+    lsu_tlu_early_flush_w, tlu_por_rstint_g, tlu_agp_tid_w2, // exu_lsu_ldst_va_e, 
+    tlu_local_flush_w, ifu_tlu_flush_fd_w, tlu_inst_vld_m, tlu_lsu_int_ldxa_vld_w2,
+    tlu_asi_data_nf_vld_w2, lsu_tlu_ldst_va_m, arst_l, grst_l, rst_tri_en, 
+    si, se, rclk);
+
+//=================================================
+// output
+//=================================================
+// mux select to tdp to access the hyper-privileged ASR registers
+output tlu_htickcmp_rw_e;
+output tlu_gl_rw_m;
+// output tlu_gl_rw_g;
+
+// global switch indicator - used to be in tlu_tcl
+output [1:0] tlu_exu_agp; 
+// output [1:0] tlu_exu_agp_tid;  
+output tlu_exu_agp_swap; 
+//
+// global register outputs
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl0; // global register value t0 
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl1; // global register value t1 
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl2; // global register value t2 
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl3; // global register value t3 
+
+// mux selects to choose source of data to store in the hpstate regs 
+output [1:0] tlu_hpstate_din_sel0;
+output [1:0] tlu_hpstate_din_sel1;
+output [1:0] tlu_hpstate_din_sel2;
+output [1:0] tlu_hpstate_din_sel3;
+//
+// mux selects to read out the ASR registers
+// output [3:0] tlu_rdpr_mx5_sel; 
+output [4:0] tlu_hyperv_rdpr_sel; 
+// signal indicating mx5 is used
+// output       tlu_rdpr_mx5_active; 
+//
+// hpstate write enable
+// modified for timing
+// output [`TLU_THRD_NUM-1:0] tlu_update_hpstate_l_g;
+output [`TLU_THRD_NUM-1:0] tlu_update_hpstate_l_w2;
+//
+// htick compare reg write enable 
+// output [`TLU_THRD_NUM-1:0] htickcmp_intdis_en;
+// htick compare interrupt disable
+output tlu_htickcmp_intdis;
+
+//  clock enable for hintp regs.
+// output [`TLU_THRD_NUM-1:0] tlu_hintp_en_l_g;
+// wr control for hintp regs.
+output [`TLU_THRD_NUM-1:0] tlu_wr_hintp_g;         
+// set control for hintp regs.
+// output [`TLU_THRD_NUM-1:0] tlu_set_hintp_g;
+output [`TLU_THRD_NUM-1:0] tlu_set_hintp_sel_g;
+//
+// update enable for the htba registers 
+output [`TLU_THRD_NUM-1:0] tlu_htba_en_l;
+// 
+// hypervisor lite indicator
+// output [`TLU_THRD_NUM-1:0] tlu_hyper_lite;
+// 
+// hyper-privileged scratch-pad data access exception 
+output tlu_hscpd_dacc_excpt_m;
+//
+// store instruction to alt space
+output tlu_asi_write_g;
+output [`TLU_THRD_NUM-1:0] inc_ind_asi_wr_indr;
+output [`TLU_THRD_NUM-1:0] inc_ind_asi_wr_inrr;
+output [`TLU_THRD_NUM-1:0] inc_ind_asi_rd_invr;
+output [`TLU_THRD_NUM-1:0] tlu_local_thrid_g;
+
+// read and write valids for the scratch-pad 
+output tlu_scpd_rd_vld_m, tlu_scpd_wr_vld_g;
+output [`SCPD_RW_ADDR_WIDTH-1:0] tlu_scpd_wr_addr_g;
+output [`SCPD_RW_ADDR_WIDTH-1:0] tlu_scpd_rd_addr_m;
+//
+// decode of the htstate register write/read
+output tlu_htstate_rw_d; 
+output tlu_htstate_rw_g; 
+// 
+// select for rdpr read in tlu_tdp
+// output tlu_htba_rsr_sel; 
+// 
+// 
+output [`TLU_THRD_NUM-1:0] tlu_cpu_mondo_cmp; 
+output [`TLU_THRD_NUM-1:0] tlu_dev_mondo_cmp; 
+output [`TLU_THRD_NUM-1:0] tlu_resum_err_cmp; 
+output tlu_qtail_dacc_excpt_m; 
+//
+// asi queue rdata output
+output [`TLU_ASI_QUE_WIDTH-1:0] tlu_asi_queue_rdata_g;
+output tlu_asi_queue_rd_vld_g;
+output tlu_ld_data_vld_g;
+// output tlu_scpd_rd_vld_g;
+output tlu_va_ill_g;
+// output tlu_va_all_zero_g;
+output tlu_lsu_int_ldxa_vld_w2;
+//
+// global nets
+output so;
+
+//=================================================
+// input
+//=================================================
+// non-thread specific por reset indicator
+// modified for timing
+// input [1:0] ifu_tlu_thrid_e; 
+input [1:0] ifu_tlu_thrid_d; 
+// 
+// staged write asr instruction
+// modified for timing 
+// input tlu_wsr_inst_g;
+input tlu_wsr_inst_nq_g;
+
+// non-threaded por instruciton 
+input [`TLU_THRD_NUM-1:0] tlu_por_rstint_g;
+// 
+// addr of sr(st/pr)
+input [`TLU_ASR_ADDR_WIDTH-1:0] ifu_tlu_sraddr_d;      
+//
+// staged pr/st data from irf.
+input [`TLU_GLOBAL_WIDTH-1:0] tlu_wsr_data_w_global; 
+
+// restored global value for done/retry instructions 
+input [`TSA_GLOBAL_WIDTH-1:0] tlu_dnrtry_global_g;
+
+// thread specific done and retry signals
+input tlu_dnrtry0_inst_g, tlu_dnrtry1_inst_g;
+input tlu_dnrtry2_inst_g, tlu_dnrtry3_inst_g;
+// 
+// thread specific trap assetion signals
+// modified due to timing
+// input tlu_thrd0_traps, tlu_thrd1_traps;
+// input tlu_thrd2_traps, tlu_thrd3_traps;
+input [`TLU_THRD_NUM-1:0] tlu_thrd_traps_w2; 
+// 
+// indicating whether the trap is supervisor or hypervisor 
+// added for bug 2889
+// modified due to timing
+// input tlu_select_tba_g; 
+input tlu_select_tba_w2; 
+//
+input tlu_tick_ctl_din;
+// htick compare match
+// input tlu_htick_match;	
+//
+// select src for tickcmp 
+input [`TLU_THRD_NUM-1:0] tlu_tickcmp_sel;   
+// 
+// pstate - supervisor privilege  
+input [`TLU_THRD_NUM-1:0] tlu_pstate_priv;   
+// hpstate - hypervisor privilege  
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_priv;   
+// hpstate - hypervisor lite enb  
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_enb;   
+// thread id for the agp that needs swap
+// modified for timing
+// input [1:0] tlu_agp_tid_g;
+input [1:0] tlu_agp_tid_w2;
+// ASI addresses and valid bits for decode to 
+// access the ASI registers
+input 	    ifu_lsu_alt_space_e; // valid bit for the ASI data
+input 	    ifu_lsu_ld_inst_e;  // read enable
+input 	    ifu_lsu_st_inst_e;  // write enable
+// replaced due to timing violations
+// input 	    tlu_nlsu_flush_w;   // trap flush 
+input 	    tlu_local_flush_w;   // trap flush 
+input 	    ifu_tlu_flush_fd_w;   // trap flush 
+input 	    lsu_tlu_early_flush_w;   // trap flush 
+input 	    tlu_asi_data_nf_vld_w2;   // trap flush 
+input 	    tlu_inst_vld_m;     // instruciton valid 
+//
+// asi to be read/written
+input [`TLU_ASI_STATE_WIDTH-1:0] tlu_asi_state_e;  
+// input [`TLU_ASI_VA_WIDTH-1:0] exu_lsu_ldst_va_e;  
+input [`TLU_ASI_VA_WIDTH-1:0] lsu_tlu_ldst_va_m; 
+// head and tail pointers
+input [`TLU_ASI_QUE_WIDTH-1:0] tlu_asi_queue_data_g;
+
+//reset
+// input tlu_rst_l;  // unit reset
+input grst_l ;  // unit reset
+input arst_l ;  // unit reset
+input rst_tri_en ;  // unit reset
+//clk
+input rclk;
+//
+// global nets
+input si, se;
+
+/*AUTOOUTPUT*/
+//
+// staged thread id
+wire [1:0] thrid_e, thrid_m, thrid_g;
+// decoded thread id
+wire [`TLU_THRD_NUM-1:0] thread_sel_id_e; 
+wire [`TLU_THRD_NUM-1:0] thread_sel_id_m; 
+wire [`TLU_THRD_NUM-1:0] thread_sel_id_g; 
+wire [`TLU_THRD_NUM-1:0] thread_sel_id_w2; 
+// 
+// flush due to "early traps"
+wire 	    local_flush_all_w;   // trap flush 
+wire 	    local_flush_all_w2;   // trap flush 
+//
+// por indicators generations 
+wire por_rstint0_g, por_rstint1_g;
+wire por_rstint2_g, por_rstint3_g;
+//
+// GL register definitions - one GL register/thread
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl0, gl_lvl1, gl_lvl2, gl_lvl3;  
+// updated value of the GL registers 
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl0_new, gl_lvl1_new;  
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl2_new, gl_lvl3_new;  
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl0_update_g, gl_lvl1_update_g;  
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl2_update_g, gl_lvl3_update_g;  
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl0_update_w2, gl_lvl1_update_w2;  
+wire [`TSA_GLOBAL_WIDTH-1:0] gl_lvl2_update_w2, gl_lvl3_update_w2;  
+wire [`TSA_GLOBAL_WIDTH-1:0] wsr_gl_lvl0_data, wsr_gl_lvl1_data;  
+wire [`TSA_GLOBAL_WIDTH-1:0] wsr_gl_lvl2_data, wsr_gl_lvl3_data;  
+wire [`TSA_GLOBAL_WIDTH-1:0] dnrtry_gl_lvl0_data, dnrtry_gl_lvl1_data;  
+wire [`TSA_GLOBAL_WIDTH-1:0] dnrtry_gl_lvl2_data, dnrtry_gl_lvl3_data;  
+// value to be incremented for the GL registers
+// modified due to timing
+// wire gl0_incr_sel, gl1_incr_sel, gl2_incr_sel, gl3_incr_sel; 
+// wire[1:0] gl_lvl0_incr, gl_lvl1_incr, gl_lvl2_incr, gl_lvl3_incr; 
+// indicators to signal gl is at certain pre-defined values 
+// added for timing
+wire [`TLU_THRD_NUM-1:0] gl_incr_sel_w2; 
+wire [`TLU_THRD_NUM-1:0] gl_update_sel_g; 
+wire [`TLU_THRD_NUM-1:0] gl_update_sel_w2; 
+wire [`TLU_THRD_NUM-1:0] gl_priv_max_sel_w2; 
+wire gl_lvl0_at_maxgl, gl_lvl1_at_maxgl; 
+wire gl_lvl2_at_maxgl, gl_lvl3_at_maxgl;
+wire gl_lvl0_at_maxstl, gl_lvl1_at_maxstl;
+wire gl_lvl2_at_maxstl, gl_lvl3_at_maxstl;
+// gl write enables  
+wire gl0_en, gl1_en, gl2_en, gl3_en; 
+wire [`TLU_THRD_NUM-1:0] dnrtry_inst_w2;
+// maxgl control
+wire [`TLU_THRD_NUM-1:0] maxstl_gl_dnrtry_sel;
+wire [`TLU_THRD_NUM-1:0] maxstl_gl_wr_sel;
+wire [`TLU_THRD_NUM-1:0] maxgl_gl_wr_sel;
+// decoded thread info for agp swap
+// wire [`TLU_THRD_NUM-1:0] agp_thrid;
+wire [`TLU_THRD_NUM-1:0] agp_thrid_w2;
+// modified for for timing fix
+// wire agp_swap; 
+// wire [1:0] agp_new; 
+wire wsr_inst_g, wsr_inst_w2; 
+wire agp_swap_w2, agp_swap_w3; 
+wire [1:0] agp_new_w2, agp_new_w3; 
+//
+// hyper-privileged ASR registers
+wire [`TLU_THRD_NUM-1:0] tlu_wr_hintp_g;         
+wire htba_rw_d, hpstate_rw_d, htstate_rw_d, hintp_rw_d; 
+wire htickcmp_rw_d, gl_rw_d; 
+wire htba_rw_e, hpstate_rw_e, htstate_rw_e, hintp_rw_e; 
+wire htickcmp_rw_e, gl_rw_e;
+wire htba_rw_m, hpstate_rw_m, htstate_rw_m, hintp_rw_m; 
+wire htickcmp_rw_m, gl_rw_m; 
+wire htba_rw_g, hpstate_rw_g, htstate_rw_g, hintp_rw_g; 
+wire htickcmp_rw_g, gl_rw_g;
+wire [`TLU_THRD_NUM-1:0] htickcmp_intdis_en;
+wire hpstate_rw_w2;
+//
+// hyper-lite mode indicator
+wire [`TLU_THRD_NUM-1:0] tlu_hyper_lite;
+// 
+// contorls to update the hpstate registers
+// wire update_hpstate0_g, update_hpstate1_g;
+// wire update_hpstate2_g, update_hpstate3_g;
+wire [`TLU_THRD_NUM-1:0] update_hpstate_g; 
+wire [`TLU_THRD_NUM-1:0] update_hpstate_w2; 
+//
+// htick interrupt disable control
+wire htick_intdis0, htick_intdis1;
+wire htick_intdis2, htick_intdis3;
+//
+// local addr of sr(st/pr) 
+wire [`TLU_ASR_ADDR_WIDTH-3:0] sraddr;
+
+// ASI_QUEUE for hyper visor
+// thread 0
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo0_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo0_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo0_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo0_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err0_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err0_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err0_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err0_tail;
+// thread 1
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo1_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo1_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo1_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo1_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err1_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err1_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err1_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err1_tail;
+// thread 2
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo2_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo2_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo2_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo2_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err2_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err2_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err2_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err2_tail;
+// thread 3
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo3_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo3_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo3_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo3_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err3_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err3_tail;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err3_head;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err3_tail;
+//
+// read asi data from the asi queues
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo_hd_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] cpu_mondo_ta_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo_hd_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] dev_mondo_ta_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err_hd_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] resum_err_ta_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err_hd_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] nresum_err_ta_rdata;
+wire [`TLU_ASI_QUE_WIDTH-1:0] asi_queue_rdata1_g;
+wire [`TLU_ASI_QUE_WIDTH-1:0] asi_queue_rdata2_g;
+wire asi_qrdata_mx_sel2;
+//
+// head and tail comparison results
+// modified due to timing violations
+// wire cpu_mondo_head_rw_e,  cpu_mondo_tail_rw_e;
+// wire dev_mondo_head_rw_e,  dev_mondo_tail_rw_e;
+// wire resum_err_head_rw_e,  resum_err_tail_rw_e;
+// wire nresum_err_head_rw_e, nresum_err_tail_rw_e;
+//
+wire cpu_mondo_head_rw_m,  cpu_mondo_tail_rw_m;
+wire dev_mondo_head_rw_m,  dev_mondo_tail_rw_m;
+wire resum_err_head_rw_m,  resum_err_tail_rw_m;
+wire nresum_err_head_rw_m, nresum_err_tail_rw_m;
+//
+wire cpu_mondo_head_rw_g,  cpu_mondo_tail_rw_g;
+wire dev_mondo_head_rw_g,  dev_mondo_tail_rw_g;
+wire resum_err_head_rw_g,  resum_err_tail_rw_g;
+wire nresum_err_head_rw_g, nresum_err_tail_rw_g;
+// creating one-hot selects
+// wire cpu_mondo_hd_onehot_g, cpu_mondo_ta_onehot_g;
+// wire dev_mondo_hd_onehot_g, dev_mondo_ta_onehot_g;
+// wire resum_err_hd_onehot_g, resum_err_ta_onehot_g;
+// wire nresum_err_hd_onehot_g, nresum_err_ta_onehot_g;
+// read 
+wire [`TLU_THRD_NUM-1:0] cpu_mondo_head_rd_g;
+wire cpu_mondo_hd_rd_g;
+wire [`TLU_THRD_NUM-1:0] cpu_mondo_tail_rd_g;
+wire cpu_mondo_ta_rd_g;
+wire [`TLU_THRD_NUM-1:0] dev_mondo_head_rd_g;
+wire dev_mondo_hd_rd_g;
+wire [`TLU_THRD_NUM-1:0] dev_mondo_tail_rd_g;
+wire dev_mondo_ta_rd_g;
+wire [`TLU_THRD_NUM-1:0] resum_err_head_rd_g;
+wire resum_err_hd_rd_g;
+wire [`TLU_THRD_NUM-1:0] resum_err_tail_rd_g;
+wire resum_err_ta_rd_g;
+wire [`TLU_THRD_NUM-1:0] nresum_err_head_rd_g; 
+wire nresum_err_hd_rd_g; 
+wire [`TLU_THRD_NUM-1:0] nresum_err_tail_rd_g;
+wire nresum_err_ta_rd_g;
+wire [3:0] asi_qrdata_mx1_sel;
+wire [3:0] asi_qrdata_mx2_sel;
+// write 
+wire [`TLU_THRD_NUM-1:0] cpu_mondo_head_wr_g;
+wire [`TLU_THRD_NUM-1:0] cpu_mondo_tail_wr_g;
+wire [`TLU_THRD_NUM-1:0] dev_mondo_head_wr_g;
+wire [`TLU_THRD_NUM-1:0] dev_mondo_tail_wr_g;
+wire [`TLU_THRD_NUM-1:0] resum_err_head_wr_g;
+wire [`TLU_THRD_NUM-1:0] resum_err_tail_wr_g;
+wire [`TLU_THRD_NUM-1:0] nresum_err_head_wr_g; 
+wire [`TLU_THRD_NUM-1:0] nresum_err_tail_wr_g;
+// 
+// ASI va address
+// modified due to timing fix
+// wire [`TLU_ASI_VA_WIDTH-1:0] tlu_ldst_va_e;
+wire [`TLU_ASI_VA_WIDTH-1:0] tlu_ldst_va_m;
+//
+// ASI read or write enables
+wire asi_queue_write_e;
+wire asi_queue_read_e;
+wire asi_queue_write_pq_m; 
+wire asi_queue_read_pq_m;
+wire asi_queue_write_m; 
+wire asi_queue_read_m;
+wire asi_queue_write_uf_g; 
+// wire asi_queue_read_uf_g;
+wire asi_queue_write_g; 
+wire asi_queue_read_g;
+//
+// decoded ASI addresses 
+// ASI QUE
+wire asi_queue_rw_e, asi_queue_rw_m, asi_queue_rw_g;
+wire asi_inrr_rw_e, asi_inrr_rw_m, asi_inrr_rw_g; 
+wire asi_indr_rw_e, asi_indr_rw_m, asi_indr_rw_g; 
+wire asi_invr_rw_e, asi_invr_rw_m, asi_invr_rw_g; 
+// supervisor scratch-pad asi state
+wire asi_scpd_rw_e, asi_scpd_rw_m; 
+// hypervisor scratch-pad asi state
+wire asi_hscpd_rw_e, asi_hscpd_rw_m; 
+wire asi_scpd_rw_vld_m, asi_scpd_rw_vld_g; 
+// legal va addresses
+// modified due to timing
+// wire que_legal_va_e;
+wire que_legal_va_m;
+// modified due to timing
+// illegal va range indicator
+// wire que_ill_va_e, 
+wire que_ill_va_m, que_ill_va_g;
+// modified due to timing
+// wire scpd_ill_va_e, 
+wire scpd_ill_va_m, scpd_ill_va_g;
+// alignment checking
+wire va_not_baligned_m; 
+// no longer necessary
+// wire va_all_zero_e, va_all_zero_m, va_all_zero_g;
+// write operation to queue tails 
+wire qtail_write_m;
+//
+// data_access_exception for hyper-privileged scratch-pad
+wire hscpd_data_acc_excpt_m;
+wire hscpd_data_acc_excpt_pq_m;
+// write to hypervisor scratch-pad using 0x20 ASI state
+wire hscpd_priv_asi_acc_m;
+// access hypervisor scratchpad va addresses
+wire hscpd_va_rw_m; 
+//
+// relevant portion of the va address for the scratch-pad 
+// modified due to timing fix
+// wire [`TLU_ASI_SCPD_VA_HI:0]       scpd_addr_va_e;
+wire [`SCPD_ASI_VA_ADDR_WIDTH-1:0] scpd_addr_va_m;
+wire [`SCPD_ASI_VA_ADDR_WIDTH-1:0] scpd_addr_va_g;
+//
+// load instruction data valid
+wire asi_ld_addr_vld_m, asi_ld_addr_vld_g;
+
+// privilege or hyper-privileged address indicators
+wire asr_hyperp, asr_priv;
+//
+// local reset
+wire local_rst_l; 
+wire local_rst; 
+// local clk
+wire clk; 
+
+////////////////////////////////////////////////////////////////////////
+// local reset
+////////////////////////////////////////////////////////////////////////
+
+dffrl_async dffrl_local_rst_l(
+    .din  (grst_l),
+    .clk  (clk),
+    .rst_l(arst_l),
+    .q    (local_rst_l),
+    .se   (se),
+    .si   (),
+    .so   ()
+); 
+
+assign local_rst = ~local_rst_l;
+
+////////////////////////////////////////////////////////////////////////
+// local clock
+////////////////////////////////////////////////////////////////////////
+
+assign clk = rclk; 
+
+////////////////////////////////////////////////////////////////////////
+// Hyper-lite mode indicator
+////////////////////////////////////////////////////////////////////////
+
+assign tlu_hyper_lite[0] = 
+           tlu_hpstate_priv[0]| (~tlu_hpstate_enb[0] & tlu_pstate_priv[0]); 
+assign tlu_hyper_lite[1] = 
+           tlu_hpstate_priv[1]| (~tlu_hpstate_enb[1] & tlu_pstate_priv[1]); 
+assign tlu_hyper_lite[2] = 
+           tlu_hpstate_priv[2]| (~tlu_hpstate_enb[2] & tlu_pstate_priv[2]); 
+assign tlu_hyper_lite[3] = 
+           tlu_hpstate_priv[3]| (~tlu_hpstate_enb[3] & tlu_pstate_priv[3]); 
+
+////////////////////////////////////////////////////////////////////////
+// Thread ID staging and decoding 
+////////////////////////////////////////////////////////////////////////
+//
+assign	thread_sel_id_e[0] = ~thrid_e[1] & ~thrid_e[0];
+assign	thread_sel_id_e[1] = ~thrid_e[1] &  thrid_e[0];
+assign	thread_sel_id_e[2] =  thrid_e[1] & ~thrid_e[0];
+assign	thread_sel_id_e[3] =  thrid_e[1] &  thrid_e[0];
+
+dff_s #(`TLU_THRD_NUM) dff_thread_sel_id_m (
+    .din (thread_sel_id_e[`TLU_THRD_NUM-1:0]),
+    .q   (thread_sel_id_m[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`TLU_THRD_NUM) dff_thread_id_sel_g (
+    .din (thread_sel_id_m[`TLU_THRD_NUM-1:0]),
+    .q   (thread_sel_id_g[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`TLU_THRD_NUM) dff_thread_id_sel_w2 (
+    .din (thread_sel_id_g[`TLU_THRD_NUM-1:0]),
+    .q   (thread_sel_id_w2[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(2) dff_thrid_e (
+    .din (ifu_tlu_thrid_d[1:0]),
+    .q   (thrid_e[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(2) dff_thrid_m (
+    .din (thrid_e[1:0]),
+    .q   (thrid_m[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(2) dff_thrid_g (
+    .din (thrid_m[1:0]),
+    .q   (thrid_g[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified due to rte failure
+assign tlu_local_thrid_g[0] = ~(|thrid_g[1:0]); 
+assign tlu_local_thrid_g[1] = ~thrid_g[1] &  thrid_g[0]; 
+assign tlu_local_thrid_g[2] =  thrid_g[1] & ~thrid_g[0]; 
+assign tlu_local_thrid_g[3] =  (&thrid_g[1:0]); 
+
+/*
+assign tlu_local_thrid_g[`TLU_THRD_NUM-1:0] = 
+           thread_sel_id_g[`TLU_THRD_NUM-1:0];
+*/
+
+////////////////////////////////////////////////////////////////////////
+// POR indicator generation 
+////////////////////////////////////////////////////////////////////////
+//
+// modified for bug 1945
+/*
+assign	por_rstint0_g = tlu_por_rstint_g & thread_sel_id_g[0];
+assign	por_rstint1_g = tlu_por_rstint_g & thread_sel_id_g[1];
+assign	por_rstint2_g = tlu_por_rstint_g & thread_sel_id_g[2];
+assign	por_rstint3_g = tlu_por_rstint_g & thread_sel_id_g[3];
+*/
+assign	por_rstint0_g = tlu_por_rstint_g[0];
+assign	por_rstint1_g = tlu_por_rstint_g[1];
+assign	por_rstint2_g = tlu_por_rstint_g[2];
+assign	por_rstint3_g = tlu_por_rstint_g[3];
+
+////////////////////////////////////////////////////////////////////////
+// Hyper-privileged ASR decodes 
+////////////////////////////////////////////////////////////////////////
+//
+// flush signal - modified for timing
+// assign  local_flush_all_w = tlu_nlsu_flush_w | lsu_tlu_early_flush_w;
+assign  local_flush_all_w = 
+            tlu_local_flush_w | lsu_tlu_early_flush_w | ifu_tlu_flush_fd_w;
+
+dffr_s dffr_local_flush_all_w2 (
+    .din (local_flush_all_w),
+    .q   (local_flush_all_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+// added for timing
+assign tlu_lsu_int_ldxa_vld_w2 = 
+           tlu_asi_data_nf_vld_w2 & ~local_flush_all_w2;
+
+assign  asr_hyperp = ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1];
+assign  asr_priv   = ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-2];
+
+assign	sraddr[`TLU_ASR_ADDR_WIDTH-3:0]	=	
+            ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-3:0];
+
+// hypervisor tick compare - 0x1f - hyper-priviledged
+assign htickcmp_rw_d = sraddr[4] & sraddr[3] & sraddr[2] & sraddr[1] & sraddr[0] &
+			           asr_hyperp;
+//
+// hypervisor processor state - 0x00 - hyper-priviledged
+assign hpstate_rw_d = ~sraddr[4] & ~sraddr[3] & ~sraddr[2] & ~sraddr[1] & ~sraddr[0] &
+			           asr_hyperp;
+// 
+// hypervisor trap state - 0x01 - hyper-priviledged
+assign htstate_rw_d = ~sraddr[4] & ~sraddr[3] & ~sraddr[2] & ~sraddr[1] & sraddr[0] &
+			           asr_hyperp;
+
+assign tlu_htstate_rw_d = htstate_rw_d;
+// 
+// hypervisor interrupt pending - 0x03 - hyper-priviledged
+assign hintp_rw_d = ~sraddr[4] & ~sraddr[3] & ~sraddr[2] & sraddr[1] & sraddr[0] &
+			         asr_hyperp;
+// 
+// hypervisor trap base address - 0x05 - hyper-priviledged
+assign htba_rw_d = ~sraddr[4] & ~sraddr[3] & sraddr[2] & ~sraddr[1] & sraddr[0] &
+			       asr_hyperp;	
+
+// global register - 0x10 - privileged
+assign gl_rw_d = sraddr[4] & ~sraddr[3] & ~sraddr[2] & ~sraddr[1] & ~sraddr[0] &
+			     asr_priv;
+//
+// staging of the ASR decoded controls
+// staging from d to e stage
+dff_s dff_htba_rw_d_e (
+    .din (htba_rw_d),
+    .q   (htba_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hpstate_rw_d_e (
+    .din (hpstate_rw_d),
+    .q   (hpstate_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htstate_rw_d_e (
+    .din (htstate_rw_d),
+    .q   (htstate_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hintp_rw_e (
+    .din (hintp_rw_d),
+    .q   (hintp_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htickcmp_rw_d_e (
+    .din (htickcmp_rw_d),
+    .q   (htickcmp_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_gl_rw_d_e (
+    .din (gl_rw_d),
+    .q   (gl_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// staging from e to m stage
+dff_s dff_htba_rw_m_m (
+    .din (htba_rw_e),
+    .q   (htba_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hpstate_rw_m_m (
+    .din (hpstate_rw_e),
+    .q   (hpstate_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htstate_rw_m_m (
+    .din (htstate_rw_e),
+    .q   (htstate_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hintp_rw_m_m (
+    .din (hintp_rw_e),
+    .q   (hintp_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htickcmp_rw_m_m (
+    .din (htickcmp_rw_e),
+    .q   (htickcmp_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_gl_rw_m_m (
+    .din (gl_rw_e),
+    .q   (gl_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// staging from e to g stage
+dff_s dff_gl_rw_m_g (
+    .din (gl_rw_m),
+    .q   (gl_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hpstate_rw_m_g (
+    .din (hpstate_rw_m),
+    .q   (hpstate_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htickcmp_rw_m_g (
+    .din (htickcmp_rw_m),
+    .q   (htickcmp_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htba_rw_m_g (
+    .din (htba_rw_m),
+    .q   (htba_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_hintp_rw_g (
+    .din (hintp_rw_m),
+    .q   (hintp_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_htstate_rw_m_g (
+    .din (htstate_rw_m),
+    .q   (htstate_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// stage to w2
+dff_s dff_hpstate_rw_m_w2 (
+    .din (hpstate_rw_g),
+    .q   (hpstate_rw_w2),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// constructing the mux select to access the 
+// hyper-privileged ASR registers
+//
+assign tlu_htickcmp_rw_e = htickcmp_rw_e;
+assign tlu_htstate_rw_g = htstate_rw_g;
+// 
+// using htba_rw_e as one of the selects for rdpr read in tlu_tdp
+// assign tlu_htba_rsr_sel = htba_rw_e;
+//
+// construciton mux selects for tlu_rdpr_mx5 in tlu_tdp to read out
+// the ASR information
+//
+assign	tlu_hyperv_rdpr_sel[0] = gl_rw_e; 
+assign	tlu_hyperv_rdpr_sel[1] = hintp_rw_e; 
+assign	tlu_hyperv_rdpr_sel[2] = hpstate_rw_e; 
+assign	tlu_hyperv_rdpr_sel[3] = htstate_rw_e; 
+assign	tlu_hyperv_rdpr_sel[4] = htba_rw_e; 
+// assign  tlu_rdpr_mx5_active = |(tlu_rdpr_mx5_sel[3:0]);
+//
+// buffer gl_rw_g for agp_tid_sel in tlu_tcl
+// modified for timing 
+// assign tlu_gl_rw_g = gl_rw_g;
+assign tlu_gl_rw_m = gl_rw_m;
+
+////////////////////////////////////////////////////////////////////////
+// GL register controls 
+////////////////////////////////////////////////////////////////////////
+
+dffr_s #(`TLU_THRD_NUM)  dffr_gl_update_sel_w2 (
+    .din (gl_update_sel_g[`TLU_THRD_NUM-1:0]), 
+    .q   (gl_update_sel_w2[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+dffr_s #(`TLU_THRD_NUM) dffr_dnrtry_inst_w2 (
+    .din ({tlu_dnrtry3_inst_g, tlu_dnrtry2_inst_g,
+           tlu_dnrtry1_inst_g,tlu_dnrtry0_inst_g}),
+    .q   (dnrtry_inst_w2[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+//
+// added for timing 
+assign wsr_inst_g = tlu_wsr_inst_nq_g & ~ifu_tlu_flush_fd_w;
+
+dffr_s dffr_wsr_inst_w2 (
+    .din (wsr_inst_g),
+    .q   (wsr_inst_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+// THREAD0
+
+assign gl_lvl0_at_maxgl  = (gl_lvl0[`TSA_GLOBAL_WIDTH-1:0] == `MAXGL_GL);
+assign gl_lvl0_at_maxstl = (gl_lvl0[`TSA_GLOBAL_WIDTH-1:0] == `MAXSTL_GL);
+// 
+// generate the control to prevent writing beyond maxstl or maxgl
+
+assign maxstl_gl_wr_sel[0] = 
+           ~tlu_hyper_lite[0] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > {1'b0,`MAXSTL}); 
+assign maxstl_gl_wr_sel[1] = 
+           ~tlu_hyper_lite[1] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > {1'b0,`MAXSTL}); 
+assign maxstl_gl_wr_sel[2] = 
+           ~tlu_hyper_lite[2] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > {1'b0,`MAXSTL});
+assign maxstl_gl_wr_sel[3] = 
+           ~tlu_hyper_lite[3] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > {1'b0,`MAXSTL}); 
+//
+// added for bug 79252 
+assign maxstl_gl_dnrtry_sel[0] = 
+           ~tlu_hyper_lite[0] & 
+           (&tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);
+assign maxstl_gl_dnrtry_sel[1] = 
+           ~tlu_hyper_lite[1] & 
+           (&tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);
+assign maxstl_gl_dnrtry_sel[2] = 
+           ~tlu_hyper_lite[2] & 
+           (&tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);
+assign maxstl_gl_dnrtry_sel[3] = 
+           ~tlu_hyper_lite[3] & 
+           (&tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]); 
+//
+// added for bug 79252
+assign dnrtry_gl_lvl0_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_dnrtry_sel[0])? `MAXSTL_GL:
+            tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0];
+assign dnrtry_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_dnrtry_sel[1])? `MAXSTL_GL:
+            tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0];
+assign dnrtry_gl_lvl2_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_dnrtry_sel[2])? `MAXSTL_GL:
+            tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0];
+assign dnrtry_gl_lvl3_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_dnrtry_sel[3])? `MAXSTL_GL:
+            tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0];
+
+// 
+// maxgl cap
+assign maxgl_gl_wr_sel[0] = 
+           tlu_hyper_lite[0] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > `MAXGL); 
+assign maxgl_gl_wr_sel[1] = 
+           tlu_hyper_lite[1] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > `MAXGL); 
+assign maxgl_gl_wr_sel[2] = 
+           tlu_hyper_lite[2] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > `MAXGL); 
+assign maxgl_gl_wr_sel[3] = 
+           tlu_hyper_lite[3] & 
+           (tlu_wsr_data_w_global[`TLU_GLOBAL_WIDTH-1:0] > `MAXGL); 
+//
+// trap level to be incremented if thread not at MAXGL and not in redmode
+// modified for bug 2889
+// modified due to timing
+/*
+assign gl0_incr_sel = 
+           tlu_thrd0_traps & (~(gl_lvl0_at_maxgl | tlu_select_tba_g) |  
+           (~gl_lvl0_at_maxstl & tlu_select_tba_g));
+           
+assign gl_lvl0_incr[1:0] = {1'b0, gl0_incr_sel};
+
+assign gl0_en = (gl_rw_g & wsr_inst_g & thread_sel_id_g[0]) | 
+			     gl0_incr_sel | local_rst |	por_rstint0_g   | 
+                 tlu_dnrtry0_inst_g;  
+
+assign gl_lvl0_new[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[0]) ? 
+			 wsr_gl_lvl0_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint0_g) ? `MAXGL_GL :
+		   ((tlu_dnrtry0_inst_g) ? 
+			 tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0] :     // done/retry restore
+			 gl_lvl0[`TSA_GLOBAL_WIDTH-1:0] + gl_lvl0_incr[1:0])); // trap increments
+*/
+// modified for bug 79252 
+assign wsr_gl_lvl0_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_wr_sel[0])? `MAXSTL_GL:
+           ((maxgl_gl_wr_sel[0]) ? `MAXGL_GL : 
+            tlu_wsr_data_w_global[`TSA_GLOBAL_WIDTH-1:0]);
+assign gl_lvl0_update_g[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[0]) ? 
+			 wsr_gl_lvl0_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint0_g) ? `MAXGL_GL :
+			 dnrtry_gl_lvl0_data[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+			 // tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl_lvl0_update_w2 (
+     .din (gl_lvl0_update_g[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl0_update_w2[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl_update_sel_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// modified for bug3468 and bug3505
+assign gl_incr_sel_w2[0] = 
+            tlu_thrd_traps_w2[0] & ~(gl_lvl0_at_maxgl | 
+           (gl_lvl0_at_maxstl & tlu_select_tba_w2));
+//            tlu_thrd_traps_w2[0] & (~(gl_lvl0_at_maxgl | tlu_select_tba_w2) |  
+assign gl_update_sel_g[0] = 
+           (gl_rw_g & wsr_inst_g & thread_sel_id_g[0]) | 
+			local_rst |	por_rstint0_g | tlu_dnrtry0_inst_g;  
+//
+// modified for bug3468
+assign gl_priv_max_sel_w2[0] =
+           (gl_lvl0_at_maxgl & tlu_select_tba_w2) & tlu_thrd_traps_w2[0];
+assign gl0_en = 
+           gl_incr_sel_w2[0] | gl_update_sel_w2[0] | gl_priv_max_sel_w2[0]; 
+// assign gl0_en = gl_incr_sel_w2[0] | gl_update_sel_w2[0]; 
+
+assign gl_lvl0_new[`TSA_GLOBAL_WIDTH-1:0] = 
+           (gl_update_sel_w2[0]) ? gl_lvl0_update_w2:
+           (~gl_update_sel_w2[0] & gl_priv_max_sel_w2[0]) ? 
+            `MAXSTL_GL: 
+            gl_lvl0[`TSA_GLOBAL_WIDTH-1:0] + 2'b01;
+
+// Reset required as processor will start out at gl=1 after reset.
+/****************************
+   to fix bug 6028 manually in the gate netlist, the following needs
+   to be a mux_flop with recirculating path from q to d0 input. But
+   to make it resetable, need to brake this recirculating path and
+   insert an AND gate such that local_rst_l is ANDed with the q output.
+
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl0 (
+     .din (gl_lvl0_new[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl0[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl0_en), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+******************************/
+
+wire [1:0] gl_lvl0_or_rst, gl_lvl0_or_rst_or_new;
+
+assign gl_lvl0_or_rst[1:0] = gl_lvl0[1:0] & {2{~local_rst}};
+
+assign gl_lvl0_or_rst_or_new[1:0] = (gl0_en&~local_rst) ? gl_lvl0_new[1:0] : gl_lvl0_or_rst[1:0];
+
+dff_s    #(2) dffe_gl0 (
+        .din(gl_lvl0_or_rst_or_new[1:0]) ,
+        .q(gl_lvl0[1:0]),
+        .clk (clk), .se(se), .si(), .so());
+
+/******************************/
+
+
+assign tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0] = gl_lvl0[`TSA_GLOBAL_WIDTH-1:0];
+
+`ifdef FPGA_SYN_1THREAD
+`else
+   
+// THREAD1
+
+assign gl_lvl1_at_maxgl  = (gl_lvl1[`TSA_GLOBAL_WIDTH-1:0] == `MAXGL_GL);
+assign gl_lvl1_at_maxstl = (gl_lvl1[`TSA_GLOBAL_WIDTH-1:0] == `MAXSTL_GL);
+//
+// trap level to be incremented if thread not at MAXGL and not in redmode
+// modified for bug 2889
+// modified due to timing 
+/*
+assign gl1_incr_sel = 
+           tlu_thrd1_traps & (~(gl_lvl1_at_maxgl | tlu_select_tba_g) |  
+           (~gl_lvl1_at_maxstl & tlu_select_tba_g));
+
+assign gl_lvl1_incr[1:0] = {1'b0, gl1_incr_sel};
+
+assign gl1_en = (gl_rw_g & wsr_inst_g & thread_sel_id_g[1]) | 
+			     gl1_incr_sel | local_rst |	por_rstint1_g   | 
+                 tlu_dnrtry1_inst_g;  
+
+assign gl_lvl1_new[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[1])? 
+			 wsr_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0] : 
+		   ((local_rst | por_rstint1_g) ? `MAXGL_GL :
+		   ((tlu_dnrtry1_inst_g) ? 
+			 dnrtry_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0] :     // done/retry restore
+			 // tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0] :     // done/retry restore
+			 gl_lvl1[`TSA_GLOBAL_WIDTH-1:0] + gl_lvl1_incr[1:0])); // trap increments
+
+*/
+assign wsr_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_wr_sel[1])? `MAXSTL_GL:
+           ((maxgl_gl_wr_sel[1]) ? `MAXGL_GL : 
+            tlu_wsr_data_w_global[`TSA_GLOBAL_WIDTH-1:0]);
+assign gl_lvl1_update_g[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[1]) ? 
+			 wsr_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint1_g) ? `MAXGL_GL :
+			 dnrtry_gl_lvl1_data[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+			 // tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl_lvl1_update_w2 (
+     .din (gl_lvl1_update_g[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl1_update_w2[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl_update_sel_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// modified for bug3468 and bug3505
+assign gl_incr_sel_w2[1] = 
+            tlu_thrd_traps_w2[1] & ~(gl_lvl1_at_maxgl | 
+           (gl_lvl1_at_maxstl & tlu_select_tba_w2));
+//            tlu_thrd_traps_w2[1] & (~(gl_lvl1_at_maxgl | tlu_select_tba_w2) |  
+assign gl_update_sel_g[1] = 
+           (gl_rw_g & wsr_inst_g & thread_sel_id_g[1]) | 
+			local_rst |	por_rstint1_g | tlu_dnrtry1_inst_g;  
+//
+// modified for bug3468
+assign gl_priv_max_sel_w2[1] =
+           (gl_lvl1_at_maxgl & tlu_select_tba_w2) & tlu_thrd_traps_w2[1];
+// 
+assign gl1_en = 
+           gl_incr_sel_w2[1] | gl_update_sel_w2[1] | gl_priv_max_sel_w2[1]; 
+// assign gl1_en = gl_incr_sel_w2[1] | gl_update_sel_w2[1]; 
+
+assign gl_lvl1_new[`TSA_GLOBAL_WIDTH-1:0] = 
+           (gl_update_sel_w2[1]) ? gl_lvl1_update_w2:
+           (~gl_update_sel_w2[1] & gl_priv_max_sel_w2[1]) ? 
+            `MAXSTL_GL: 
+            gl_lvl1[`TSA_GLOBAL_WIDTH-1:0] + 2'b01;
+
+// Reset required as processor will start out at gl=1 after reset.
+/****************************
+   to fix bug 6028 manually in the gate netlist, the following needs
+   to be a mux_flop with recirculating path from q to d0 input. But
+   to make it resetable, need to brake this recirculating path and
+   insert an AND gate such that local_rst_l is ANDed with the q output.
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl1 (
+     .din (gl_lvl1_new[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl1[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl1_en), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+******************************/
+
+wire [1:0] gl_lvl1_or_rst, gl_lvl1_or_rst_or_new;
+
+assign gl_lvl1_or_rst[1:0] = gl_lvl1[1:0] & {2{~local_rst}};
+
+assign gl_lvl1_or_rst_or_new[1:0] = (gl1_en&~local_rst) ? gl_lvl1_new[1:0] : gl_lvl1_or_rst[1:0];
+
+dff_s    #(2) dffe_gl1 (
+        .din(gl_lvl1_or_rst_or_new[1:0]) ,
+        .q(gl_lvl1[1:0]),
+        .clk (clk), .se(se), .si(), .so());
+
+/******************************/
+
+
+assign tlu_gl_lvl1[`TSA_GLOBAL_WIDTH-1:0] = gl_lvl1[`TSA_GLOBAL_WIDTH-1:0];
+
+// THREAD2
+
+assign gl_lvl2_at_maxgl  = (gl_lvl2[`TSA_GLOBAL_WIDTH-1:0] == `MAXGL_GL);
+assign gl_lvl2_at_maxstl = (gl_lvl2[`TSA_GLOBAL_WIDTH-1:0] == `MAXSTL_GL);
+//
+// trap level to be incremented if thread not at MAXGL and not in redmode
+// modified for bug 2889
+// modified due to timing
+/*
+assign gl2_incr_sel = 
+           tlu_thrd2_traps & (~(gl_lvl2_at_maxgl | tlu_select_tba_g) |  
+           (~gl_lvl2_at_maxstl & tlu_select_tba_g));
+
+assign gl_lvl2_incr[1:0] = {1'b0, gl2_incr_sel};
+
+assign gl2_en = (gl_rw_g & wsr_inst_g & thread_sel_id_g[2]) | 
+			     gl2_incr_sel | local_rst |	por_rstint2_g   | 
+                 tlu_dnrtry2_inst_g;  
+
+assign gl_lvl2_new[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[2])? 
+			 wsr_gl_lvl2_data[`TSA_GLOBAL_WIDTH-1:0] : 
+		   ((local_rst | por_rstint2_g) ? `MAXGL_GL :
+		   ((tlu_dnrtry2_inst_g) ? 
+			 tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0] :     // done/retry restore
+			 gl_lvl2[`TSA_GLOBAL_WIDTH-1:0] + gl_lvl2_incr[1:0])); // trap increments
+*/
+assign wsr_gl_lvl2_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_wr_sel[2])? `MAXSTL_GL:
+           ((maxgl_gl_wr_sel[2]) ? `MAXGL_GL : 
+            tlu_wsr_data_w_global[`TSA_GLOBAL_WIDTH-1:0]);
+assign gl_lvl2_update_g[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[2]) ? 
+			 wsr_gl_lvl2_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint2_g) ? `MAXGL_GL :
+			 dnrtry_gl_lvl2_data[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+			 // tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl_lvl2_update_w2 (
+     .din (gl_lvl2_update_g[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl2_update_w2[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl_update_sel_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// modified for bug3468 and bug3505
+assign gl_incr_sel_w2[2] = 
+           tlu_thrd_traps_w2[2] & ~(gl_lvl2_at_maxgl | 
+           (gl_lvl2_at_maxstl & tlu_select_tba_w2));
+//            tlu_thrd_traps_w2[2] & (~(gl_lvl2_at_maxgl | tlu_select_tba_w2) |  
+assign gl_update_sel_g[2] = 
+           (gl_rw_g & wsr_inst_g & thread_sel_id_g[2]) | 
+			local_rst |	por_rstint2_g | tlu_dnrtry2_inst_g;  
+// 
+// modified for bug3468
+assign gl_priv_max_sel_w2[2] =
+           (gl_lvl2_at_maxgl & tlu_select_tba_w2) & tlu_thrd_traps_w2[2];
+//
+assign gl2_en = 
+           gl_incr_sel_w2[2] | gl_update_sel_w2[2] | gl_priv_max_sel_w2[2]; 
+// assign gl2_en = gl_incr_sel_w2[2] | gl_update_sel_w2[2]; 
+
+assign gl_lvl2_new[`TSA_GLOBAL_WIDTH-1:0] = 
+           (gl_update_sel_w2[2]) ? gl_lvl2_update_w2:
+           (~gl_update_sel_w2[2] & gl_priv_max_sel_w2[2]) ? 
+            `MAXSTL_GL: 
+            gl_lvl2[`TSA_GLOBAL_WIDTH-1:0] + 2'b01;
+
+// Reset required as processor will start out at gl=1 after reset.
+/****************************
+   to fix bug 6028 manually in the gate netlist, the following needs
+   to be a mux_flop with recirculating path from q to d0 input. But
+   to make it resetable, need to brake this recirculating path and
+   insert an AND gate such that local_rst_l is ANDed with the q output.
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl2 (
+     .din (gl_lvl2_new[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl2[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl2_en), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+******************************/
+
+wire [1:0] gl_lvl2_or_rst, gl_lvl2_or_rst_or_new;
+
+assign gl_lvl2_or_rst[1:0] = gl_lvl2[1:0] & {2{~local_rst}};
+
+assign gl_lvl2_or_rst_or_new[1:0] = (gl2_en&~local_rst) ? gl_lvl2_new[1:0] : gl_lvl2_or_rst[1:0];
+
+dff_s    #(2) dffe_gl2 (
+        .din(gl_lvl2_or_rst_or_new[1:0]) ,
+        .q(gl_lvl2[1:0]),
+        .clk (clk), .se(se), .si(), .so());
+
+/******************************/
+
+assign tlu_gl_lvl2[`TSA_GLOBAL_WIDTH-1:0] = gl_lvl2[`TSA_GLOBAL_WIDTH-1:0];
+
+// THREAD3
+
+assign gl_lvl3_at_maxgl  = (gl_lvl3[`TSA_GLOBAL_WIDTH-1:0] == `MAXGL_GL);
+assign gl_lvl3_at_maxstl = (gl_lvl3[`TSA_GLOBAL_WIDTH-1:0] == `MAXSTL_GL);
+//
+// trap level to be incremented if thread not at MAXGL and not in redmode
+// modified for bug 2889
+// modified due to timing
+/*
+assign gl3_incr_sel = 
+           tlu_thrd3_traps & (~(gl_lvl3_at_maxgl | tlu_select_tba_g) |  
+           (~gl_lvl3_at_maxstl & tlu_select_tba_g));
+
+assign gl_lvl3_incr = {1'b0, gl3_incr_sel};
+
+assign gl3_en = (gl_rw_g & wsr_inst_g & thread_sel_id_g[3]) |
+			     gl3_incr_sel | local_rst |	por_rstint3_g   | 
+                 tlu_dnrtry3_inst_g;  
+
+assign gl_lvl3_new[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[3])? 
+			 wsr_gl_lvl3_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint3_g) ? `MAXGL_GL :
+		   ((tlu_dnrtry3_inst_g) ? 
+			 tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0] :     // done/retry restore
+		     gl_lvl3[`TSA_GLOBAL_WIDTH-1:0] + gl_lvl3_incr[1:0])); // trap increments
+*/
+assign wsr_gl_lvl3_data[`TSA_GLOBAL_WIDTH-1:0] = 
+            (maxstl_gl_wr_sel[3])? `MAXSTL_GL:
+           ((maxgl_gl_wr_sel[3]) ? `MAXGL_GL : 
+            tlu_wsr_data_w_global[`TSA_GLOBAL_WIDTH-1:0]);
+assign gl_lvl3_update_g[`TSA_GLOBAL_WIDTH-1:0] = 
+	        (gl_rw_g & wsr_inst_g & thread_sel_id_g[3]) ? 
+			 wsr_gl_lvl3_data[`TSA_GLOBAL_WIDTH-1:0] : 
+	       ((local_rst | por_rstint3_g) ? `MAXGL_GL :
+			 dnrtry_gl_lvl3_data[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+			 // tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]);     // done/retry restore
+
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl_lvl3_update_w2 (
+     .din (gl_lvl3_update_g[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl3_update_w2[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl_update_sel_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// modified for bug3468
+assign gl_incr_sel_w2[3] = 
+           tlu_thrd_traps_w2[3] & ~(gl_lvl3_at_maxgl | 
+           (gl_lvl3_at_maxstl & tlu_select_tba_w2));
+//            tlu_thrd_traps_w2[3] & (~(gl_lvl3_at_maxgl | tlu_select_tba_w2) |  
+assign gl_update_sel_g[3] = 
+           (gl_rw_g & wsr_inst_g & thread_sel_id_g[3]) | 
+			local_rst |	por_rstint3_g | tlu_dnrtry3_inst_g;  
+//
+// modified for bug3468
+assign gl_priv_max_sel_w2[3] =
+           (gl_lvl3_at_maxgl & tlu_select_tba_w2) & tlu_thrd_traps_w2[3];
+// 
+assign gl3_en = 
+           gl_incr_sel_w2[3] | gl_update_sel_w2[3] | gl_priv_max_sel_w2[3];
+// assign gl3_en = gl_incr_sel_w2[3] | gl_update_sel_w2[3]; 
+
+assign gl_lvl3_new[`TSA_GLOBAL_WIDTH-1:0] = 
+           (gl_update_sel_w2[3]) ? gl_lvl3_update_w2:
+           (~gl_update_sel_w2[3] & gl_priv_max_sel_w2[3]) ? 
+            `MAXSTL_GL: 
+            gl_lvl3[`TSA_GLOBAL_WIDTH-1:0] + 2'b01;
+
+// Reset required as processor will start out at gl1 after reset.
+/****************************
+   to fix bug 6028 manually in the gate netlist, the following needs
+   to be a mux_flop with recirculating path from q to d0 input. But
+   to make it resetable, need to brake this recirculating path and
+   insert an AND gate such that local_rst_l is ANDed with the q output.
+dffe_s #(`TSA_GLOBAL_WIDTH) dffe_gl3 (
+     .din (gl_lvl3_new[`TSA_GLOBAL_WIDTH-1:0]), 
+     .q   (gl_lvl3[`TSA_GLOBAL_WIDTH-1:0]),
+     .en  (gl3_en), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+******************************/
+
+wire [1:0] gl_lvl3_or_rst, gl_lvl3_or_rst_or_new;
+
+assign gl_lvl3_or_rst[1:0] = gl_lvl3[1:0] & {2{~local_rst}};
+
+assign gl_lvl3_or_rst_or_new[1:0] = (gl3_en&~local_rst) ? gl_lvl3_new[1:0] : gl_lvl3_or_rst[1:0];
+
+dff_s    #(2) dffe_gl3 (
+        .din(gl_lvl3_or_rst_or_new[1:0]) ,
+        .q(gl_lvl3[1:0]),
+        .clk (clk), .se(se), .si(), .so());
+
+/******************************/
+
+assign tlu_gl_lvl3[`TSA_GLOBAL_WIDTH-1:0] = gl_lvl3[`TSA_GLOBAL_WIDTH-1:0];
+
+`endif // !`ifdef FPGA_SYN_1THREAD
+   
+////////////////////////////////////////////////////////////////////////
+// Global swap 
+////////////////////////////////////////////////////////////////////////
+// modified due to timing
+/*
+assign agp_thrid[0] = ~tlu_agp_tid_g[0] & ~tlu_agp_tid_g[1] & gl0_en; 
+assign agp_thrid[1] =  tlu_agp_tid_g[0] & ~tlu_agp_tid_g[1] & gl1_en; 
+assign agp_thrid[2] = ~tlu_agp_tid_g[0] &  tlu_agp_tid_g[1] & gl2_en; 
+assign agp_thrid[3] =  tlu_agp_tid_g[0] &  tlu_agp_tid_g[1] & gl3_en; 
+
+assign agp_swap = 
+           (agp_thrid[0])? 
+               |(gl_lvl0_new[1:0] ^ gl_lvl0[1:0]): 
+               ((agp_thrid[1])? 
+                     |(gl_lvl1_new[1:0] ^ gl_lvl1[1:0]): 
+                     ((agp_thrid[2])? 
+                         |(gl_lvl2_new[1:0] ^ gl_lvl2[1:0]): 
+                         ((agp_thrid[3])? 
+                            |(gl_lvl3_new[1:0] ^ gl_lvl3[1:0]):1'b0)));
+
+assign agp_new[1:0] = 
+           (agp_thrid[1])? gl_lvl1_new[1:0] :
+               ((agp_thrid[2])? gl_lvl2_new[1:0] :
+                   ((agp_thrid[3])? gl_lvl3_new[1:0] : 
+                        gl_lvl0_new[1:0])); 
+
+dffr_s dffr_tlu_exu_agp_swap (
+    .din (agp_swap),
+    .q   (tlu_exu_agp_swap),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(2) dff_tlu_exu_agp (
+    .din (agp_new[1:0]),
+    .q   (tlu_exu_agp[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(2) dff_tlu_exu_agp_tid (
+    .din (tlu_agp_tid_g[1:0]),
+    .q   (tlu_exu_agp_tid[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+
+assign agp_thrid_w2[0] = ~tlu_agp_tid_w2[0] & ~tlu_agp_tid_w2[1] & gl0_en; 
+
+`ifdef FPGA_SYN_1THREAD
+   assign agp_thrid_w2[1] = 1'b0; 
+   assign agp_thrid_w2[2] = 1'b0; 
+   assign agp_thrid_w2[3] = 1'b0; 
+   assign agp_swap_w2 = (agp_thrid_w2[0])? 
+			  |(gl_lvl0_new[1:0] ^ gl_lvl0[1:0]):1'b0;
+   dffr_s dffr_agp_swap_w3 (
+    .din (agp_swap_w2),
+    .q   (agp_swap_w3),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+   assign agp_new_w2[1:0] = gl_lvl0_new[1:0];
+   
+`else
+   
+assign agp_thrid_w2[1] =  tlu_agp_tid_w2[0] & ~tlu_agp_tid_w2[1] & gl1_en; 
+assign agp_thrid_w2[2] = ~tlu_agp_tid_w2[0] &  tlu_agp_tid_w2[1] & gl2_en; 
+assign agp_thrid_w2[3] =  tlu_agp_tid_w2[0] &  tlu_agp_tid_w2[1] & gl3_en; 
+   
+assign agp_swap_w2 = 
+           (agp_thrid_w2[0])? 
+               |(gl_lvl0_new[1:0] ^ gl_lvl0[1:0]): 
+               ((agp_thrid_w2[1])? 
+                     |(gl_lvl1_new[1:0] ^ gl_lvl1[1:0]): 
+                     ((agp_thrid_w2[2])? 
+                         |(gl_lvl2_new[1:0] ^ gl_lvl2[1:0]): 
+                         ((agp_thrid_w2[3])? 
+                            |(gl_lvl3_new[1:0] ^ gl_lvl3[1:0]):1'b0)));
+
+dffr_s dffr_agp_swap_w3 (
+    .din (agp_swap_w2),
+    .q   (agp_swap_w3),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+assign agp_new_w2[1:0] = 
+           (agp_thrid_w2[1])? gl_lvl1_new[1:0] :
+               ((agp_thrid_w2[2])? gl_lvl2_new[1:0] :
+                   ((agp_thrid_w2[3])? gl_lvl3_new[1:0] : 
+                        gl_lvl0_new[1:0])); 
+`endif // !`ifdef FPGA_SYN_1THREAD
+
+//
+// added for timing fixes
+dff_s #(2) dff_agp_new_w3 (
+    .din (agp_new_w2[1:0]),
+    .q   (agp_new_w3[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+assign tlu_exu_agp_swap = agp_swap_w3;
+assign tlu_exu_agp[1:0] = agp_new_w3[1:0];
+
+////////////////////////////////////////////////////////////////////////
+// HPSTATE register controls 
+////////////////////////////////////////////////////////////////////////
+//
+// added for timing
+dffr_s #(`TLU_THRD_NUM) dffr_update_hpstate_w2 (
+    .din (update_hpstate_g[`TLU_THRD_NUM-1:0]),
+    .q   (update_hpstate_w2[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// thread 0
+assign tlu_hpstate_din_sel0[0] = dnrtry_inst_w2[0] & ~rst_tri_en; 
+assign tlu_hpstate_din_sel0[1] = (hpstate_rw_w2 & wsr_inst_w2) & 
+                                 ~rst_tri_en                   & 
+                                 ~tlu_hpstate_din_sel0[0]      &  
+                                  thread_sel_id_w2[0];
+//
+// generating write enables to update the hpstate registers
+// modified for timing 
+/*
+assign tlu_hpstate_din_sel0[0] = tlu_dnrtry0_inst_g; 
+assign tlu_hpstate_din_sel0[1] = (hpstate_rw_g & wsr_inst_g) & 
+                                 ~tlu_hpstate_din_sel0[0]        &  
+                                  thread_sel_id_g[0];
+assign update_hpstate0_g =  tlu_thrd0_traps | tlu_dnrtry0_inst_g | 
+		                  ((hpstate_rw_g & wsr_inst_g) & 
+                            thread_sel_id_g[0]);
+
+assign tlu_update_hpstate_l_g[0] = ~(update_hpstate0_g | local_rst);
+*/
+assign update_hpstate_g[0] =  
+           tlu_dnrtry0_inst_g | ((hpstate_rw_g & wsr_inst_g) & 
+           thread_sel_id_g[0]);
+
+assign tlu_update_hpstate_l_w2[0] = 
+           ~(update_hpstate_w2[0] | local_rst | tlu_thrd_traps_w2[0]);
+//
+// thread 1
+assign tlu_hpstate_din_sel1[0] = dnrtry_inst_w2[1] & ~rst_tri_en;
+assign tlu_hpstate_din_sel1[1] = (hpstate_rw_w2 & wsr_inst_w2) & 
+                                 ~rst_tri_en                   & 
+                                 ~tlu_hpstate_din_sel1[0]      &  
+                                  thread_sel_id_w2[1];
+//
+// generating write enables to update the hpstate registers
+// modified for timing 
+/*
+assign tlu_hpstate_din_sel1[0] = tlu_dnrtry1_inst_g; 
+assign tlu_hpstate_din_sel1[1] = (hpstate_rw_g & wsr_inst_g) & 
+                                 ~tlu_hpstate_din_sel1[0]        &  
+                                  thread_sel_id_g[1];
+assign update_hpstate1_g =  tlu_thrd1_traps | tlu_dnrtry1_inst_g |
+		                  ((hpstate_rw_g & wsr_inst_g) & 
+                            thread_sel_id_g[1]);
+
+assign tlu_update_hpstate_l_g[1] = ~(update_hpstate1_g | local_rst);
+*/
+assign update_hpstate_g[1] =  
+           tlu_dnrtry1_inst_g | ((hpstate_rw_g & wsr_inst_g) & 
+           thread_sel_id_g[1]);
+
+assign tlu_update_hpstate_l_w2[1] = 
+           ~(update_hpstate_w2[1] | local_rst | tlu_thrd_traps_w2[1]);
+//
+// thread 2
+assign tlu_hpstate_din_sel2[0] = dnrtry_inst_w2[2] & ~rst_tri_en;
+assign tlu_hpstate_din_sel2[1] = (hpstate_rw_w2 & wsr_inst_w2) & 
+                                 ~rst_tri_en                   & 
+                                 ~tlu_hpstate_din_sel2[0]      &  
+                                  thread_sel_id_w2[2];
+//
+// generating write enables to update the hpstate registers
+// modified for timing
+/*
+assign tlu_hpstate_din_sel2[0] = tlu_dnrtry2_inst_g; 
+assign tlu_hpstate_din_sel2[1] = (hpstate_rw_g & wsr_inst_g) & 
+                                 ~tlu_hpstate_din_sel2[0]        &  
+                                  thread_sel_id_g[2];
+assign update_hpstate2_g =  tlu_thrd2_traps | tlu_dnrtry2_inst_g | 
+		                  ((hpstate_rw_g & wsr_inst_g) & 
+                            thread_sel_id_g[2]);
+
+assign tlu_update_hpstate_l_g[2] = ~(update_hpstate2_g | local_rst);
+*/
+assign update_hpstate_g[2] =  
+           tlu_dnrtry2_inst_g | ((hpstate_rw_g & wsr_inst_g) & 
+           thread_sel_id_g[2]);
+
+assign tlu_update_hpstate_l_w2[2] = 
+           ~(update_hpstate_w2[2] | local_rst | tlu_thrd_traps_w2[2]);
+//
+// thread 3
+assign tlu_hpstate_din_sel3[0] = dnrtry_inst_w2[3] & ~rst_tri_en;
+assign tlu_hpstate_din_sel3[1] = (hpstate_rw_w2 & wsr_inst_w2) & 
+                                 ~tlu_hpstate_din_sel3[0]      &  
+                                 ~rst_tri_en                   & 
+                                  thread_sel_id_w2[3];
+//
+// generating write enables to update the hpstate registers
+// modified for timing
+/*
+assign tlu_hpstate_din_sel3[0] = tlu_dnrtry3_inst_g; 
+assign tlu_hpstate_din_sel3[1] = (hpstate_rw_g & wsr_inst_g) & 
+                                 ~tlu_hpstate_din_sel3[0]        &  
+                                  thread_sel_id_g[3];
+assign update_hpstate3_g =  tlu_thrd3_traps | tlu_dnrtry3_inst_g | 
+		                  ((hpstate_rw_g & wsr_inst_g) & 
+                            thread_sel_id_g[3]);
+
+assign tlu_update_hpstate_l_g[3] = ~(update_hpstate3_g | local_rst);
+*/
+assign update_hpstate_g[3] =  
+           tlu_dnrtry3_inst_g | ((hpstate_rw_g & wsr_inst_g) & 
+           thread_sel_id_g[3]);
+
+assign tlu_update_hpstate_l_w2[3] = 
+           ~(update_hpstate_w2[3] | local_rst | tlu_thrd_traps_w2[3]);
+
+////////////////////////////////////////////////////////////////////////
+// HTICKCMP register controls 
+////////////////////////////////////////////////////////////////////////
+// thread 0
+assign htickcmp_intdis_en[0] = 
+           (htickcmp_rw_g & wsr_inst_g & thread_sel_id_g[0]) | 
+            local_rst | por_rstint0_g;
+// 
+// HTICK_CMP.INT_DIS
+dffe_s dffe_hintdis0  (
+     .din (tlu_tick_ctl_din), 
+     .q   (htick_intdis0),
+     .en  (htickcmp_intdis_en[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),
+     .so ()
+);
+
+// thread 1
+assign htickcmp_intdis_en[1] = 
+           (htickcmp_rw_g & wsr_inst_g & thread_sel_id_g[1]) |
+			local_rst | por_rstint1_g;
+// 
+// HTICK_CMP.INT_DIS
+dffe_s dffe_hintdis1  (
+     .din (tlu_tick_ctl_din), 
+     .q   (htick_intdis1),
+     .en  (htickcmp_intdis_en[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),
+     .so ()
+);
+
+// thread 2
+assign htickcmp_intdis_en[2] = 
+           (htickcmp_rw_g & wsr_inst_g & thread_sel_id_g[2]) |
+			local_rst | por_rstint2_g;
+// 
+// HTICK_CMP.INT_DIS
+dffe_s dffe_hintdis2  (
+     .din (tlu_tick_ctl_din), 
+     .q   (htick_intdis2),
+     .en  (htickcmp_intdis_en[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),
+     .so ()
+);
+
+// thread 3
+assign htickcmp_intdis_en[3] = 
+           (htickcmp_rw_g & wsr_inst_g & thread_sel_id_g[3]) |
+			local_rst | por_rstint3_g;
+// HTICK_CMP.INT_DIS
+//
+dffe_s dffe_hintdis3  (
+     .din (tlu_tick_ctl_din), 
+     .q   (htick_intdis3),
+     .en  (htickcmp_intdis_en[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),
+     .so ()
+);
+//
+// generating for the non-thread specific htick_cmp
+// interrupt disable
+assign tlu_htickcmp_intdis =
+	       (thread_sel_id_e[0] & htick_intdis0) | (thread_sel_id_e[1] & htick_intdis1) |
+		   (thread_sel_id_e[2] & htick_intdis2) | (thread_sel_id_e[3] & htick_intdis3);
+
+////////////////////////////////////////////////////////////////////////
+// HINTP register controls 
+////////////////////////////////////////////////////////////////////////
+// thread 0 
+// 
+// modified for timing
+// assign tlu_set_hintp_g[0] = 
+//            (~htick_intdis0 & tlu_tickcmp_sel[0])? tlu_htick_match: 1'b0;
+assign tlu_set_hintp_sel_g[0] = ~htick_intdis0 & tlu_tickcmp_sel[0];
+// modified for bug 4886
+assign tlu_wr_hintp_g[0] = 
+           (hintp_rw_g & wsr_inst_g & thread_sel_id_g[0]) | local_rst; 
+//
+// modified for timing - moved to tlu_tdp
+// assign tlu_hintp_en_l_g[0] = 
+// 	      ~(tlu_set_hintp_g[0] | tlu_wr_hintp_g[0]);
+//
+// thread 1 
+//
+// modified for timing
+// assign tlu_set_hintp_g[1] = 
+//            (~htick_intdis1 & tlu_tickcmp_sel[1])? tlu_htick_match: 1'b0;
+assign tlu_set_hintp_sel_g[1] = ~htick_intdis1 & tlu_tickcmp_sel[1];
+assign tlu_wr_hintp_g[1] = 
+           (hintp_rw_g & wsr_inst_g & thread_sel_id_g[1]) | local_rst; 
+//
+// modified for timing - moved to tlu_tdp
+// assign tlu_hintp_en_l_g[1] = 
+//	      ~(tlu_set_hintp_g[1] | tlu_wr_hintp_g[1]);
+//
+// thread 2 
+// 
+// modified for timing 
+// assign tlu_set_hintp_g[2] = 
+//            (~htick_intdis2 & tlu_tickcmp_sel[2])? tlu_htick_match: 1'b0;
+assign tlu_set_hintp_sel_g[2] = ~htick_intdis2 & tlu_tickcmp_sel[2];
+assign tlu_wr_hintp_g[2] = 
+           (hintp_rw_g & wsr_inst_g & thread_sel_id_g[2]) | local_rst; 
+//
+// modified for timing - moved to tlu_tdp
+// assign tlu_hintp_en_l_g[2] = 
+// 	      ~(tlu_set_hintp_g[2] | tlu_wr_hintp_g[2]);
+//
+// thread 3 
+//
+// modified for timing
+// assign tlu_set_hintp_g[3] = 
+//            (~htick_intdis3 & tlu_tickcmp_sel[3])? tlu_htick_match: 1'b0;
+assign tlu_set_hintp_sel_g[3] = ~htick_intdis3 & tlu_tickcmp_sel[3];
+assign tlu_wr_hintp_g[3] = 
+           (hintp_rw_g & wsr_inst_g & thread_sel_id_g[3]) | local_rst; 
+//
+// modified for timing - moved to tlu_tdp
+// assign tlu_hintp_en_l_g[3] = 
+// 	      ~(tlu_set_hintp_g[3] | tlu_wr_hintp_g[3]);
+
+////////////////////////////////////////////////////////////////////////
+// HTBA register controls 
+////////////////////////////////////////////////////////////////////////
+
+assign tlu_htba_en_l[0]	= ~(htba_rw_g & wsr_inst_g & thread_sel_id_g[0]);
+assign tlu_htba_en_l[1]	= ~(htba_rw_g & wsr_inst_g & thread_sel_id_g[1]);
+assign tlu_htba_en_l[2]	= ~(htba_rw_g & wsr_inst_g & thread_sel_id_g[2]);
+assign tlu_htba_en_l[3]	= ~(htba_rw_g & wsr_inst_g & thread_sel_id_g[3]);
+
+////////////////////////////////////////////////////////////////////////
+// ASI QUEUE register controls and data 
+////////////////////////////////////////////////////////////////////////
+// ASI read or write op
+//
+assign asi_queue_write_e = ifu_lsu_alt_space_e & ifu_lsu_st_inst_e;
+assign asi_queue_read_e  = ifu_lsu_alt_space_e & ifu_lsu_ld_inst_e;
+// 
+// qualify the asi write and read controls
+assign asi_queue_write_m = tlu_inst_vld_m & asi_queue_write_pq_m;
+assign asi_queue_read_m  = tlu_inst_vld_m & asi_queue_read_pq_m;
+//
+// modified due to timing violations
+// assign asi_queue_write_g = 
+//      ~(tlu_flush_pipe_w | ifu_tlu_flush_w) & asi_queue_write_uf_g;
+assign asi_queue_write_g = 
+           ~local_flush_all_w & asi_queue_write_uf_g;
+assign tlu_asi_write_g = asi_queue_write_g;
+
+// assign asi_queue_read_g  = 
+//     ~(tlu_flush_pipe_w | ifu_tlu_flush_w) & asi_queue_read_uf_g;
+//
+// staging the asi controls
+dffr_s #(2) dffr_asi_ctl_m (
+    .din ({asi_queue_write_e, asi_queue_read_e}),
+    .q   ({asi_queue_write_pq_m, asi_queue_read_pq_m}),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s #(2) dffr_asi_ctl_g (
+    .din ({asi_queue_write_m, asi_queue_read_m}),
+    .q   ({asi_queue_write_uf_g, asi_queue_read_g}),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// ASI address decode 
+// decoding the ASI state 0x25 for the ASI_QUEUES
+assign asi_queue_rw_e = 
+          (ifu_lsu_alt_space_e)? 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`TLU_ASI_QUE_ASI):
+           1'b0;
+//
+// staging the asi controls
+dffr_s dffr_asi_queue_rw_m (
+    .din (asi_queue_rw_e),
+    .q   (asi_queue_rw_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_asi_queue_rw_g (
+    .din (asi_queue_rw_m),
+    .q   (asi_queue_rw_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified due to timing violations
+/*
+// assign tlu_ldst_va_e[`TLU_ASI_VA_WIDTH-1:0] = 
+//          exu_lsu_ldst_va_e[`TLU_ASI_VA_WIDTH-1:0];  
+dff_s #(`TLU_ASI_VA_WIDTH) dff_tlu_ldst_va_m (
+    .din (exu_lsu_ldst_va_e[`TLU_ASI_VA_WIDTH-1:0]),
+    .q   (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// modified for timing
+assign que_legal_va_e = 
+           cpu_mondo_head_rw_e  | cpu_mondo_tail_rw_e |
+           dev_mondo_head_rw_e  | dev_mondo_tail_rw_e |
+           resum_err_head_rw_e  | resum_err_tail_rw_e |
+           nresum_err_head_rw_e | nresum_err_tail_rw_e; 
+
+assign que_ill_va_e = 
+           (ifu_lsu_alt_space_e)?
+           (((|exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:`TLU_ASI_QUE_VA_HI+1]) |
+           (|exu_lsu_ldst_va_e[`TLU_ASI_QUE_VA_LO-1:0]) | ~que_legal_va_e) & 
+            asi_queue_pq_rw_e): 1'b0; 
+*/
+assign tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0] = 
+           lsu_tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0];
+
+assign que_legal_va_m = 
+           cpu_mondo_head_rw_m  | cpu_mondo_tail_rw_m |
+           dev_mondo_head_rw_m  | dev_mondo_tail_rw_m |
+           resum_err_head_rw_m  | resum_err_tail_rw_m |
+           nresum_err_head_rw_m | nresum_err_tail_rw_m; 
+
+//
+// illegal range check for queue va 
+assign que_ill_va_m = 
+           (asi_queue_rw_m)? ~que_legal_va_m : 1'b0;
+//
+// staged illegal va range 
+//
+dffr_s dffr_que_ill_va_g (
+    .din (que_ill_va_m),
+    .q   (que_ill_va_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// added for timing - interrupt register decodes
+// interrupt receiver registers
+assign asi_inrr_rw_e = 
+          (ifu_lsu_alt_space_e)? 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`TLU_INRR_ASI):
+           1'b0;
+
+dffr_s dffr_asi_inrr_rw_m (
+    .din (asi_inrr_rw_e),
+    .q   (asi_inrr_rw_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_asi_inrr_rw_g (
+    .din (asi_inrr_rw_m),
+    .q   (asi_inrr_rw_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign inc_ind_asi_wr_inrr[0] = 
+           (asi_inrr_rw_g & asi_queue_write_uf_g   & 
+           ~local_flush_all_w & thread_sel_id_g[0]);
+assign inc_ind_asi_wr_inrr[1] = 
+           (asi_inrr_rw_g & asi_queue_write_uf_g   & 
+           ~local_flush_all_w & thread_sel_id_g[1]);
+assign inc_ind_asi_wr_inrr[2] = 
+           (asi_inrr_rw_g & asi_queue_write_uf_g   & 
+           ~local_flush_all_w & thread_sel_id_g[2]);
+assign inc_ind_asi_wr_inrr[3] = 
+           (asi_inrr_rw_g & asi_queue_write_uf_g   & 
+           ~local_flush_all_w & thread_sel_id_g[3]);
+//
+// interrupt dispatch registers
+assign asi_indr_rw_e = 
+          (ifu_lsu_alt_space_e)? 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`TLU_INDR_ASI):
+           1'b0;
+
+dffr_s dffr_asi_indr_rw_m (
+    .din (asi_indr_rw_e),
+    .q   (asi_indr_rw_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_asi_indr_rw_g (
+    .din (asi_indr_rw_m),
+    .q   (asi_indr_rw_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign inc_ind_asi_wr_indr[0] = 
+           asi_indr_rw_g & ~local_flush_all_w & asi_queue_write_uf_g & 
+           thread_sel_id_g[0];
+assign inc_ind_asi_wr_indr[1] = 
+           asi_indr_rw_g & ~local_flush_all_w & asi_queue_write_uf_g & 
+           thread_sel_id_g[1];
+assign inc_ind_asi_wr_indr[2] = 
+           asi_indr_rw_g & ~local_flush_all_w & asi_queue_write_uf_g & 
+           thread_sel_id_g[2];
+assign inc_ind_asi_wr_indr[3] = 
+           asi_indr_rw_g & ~local_flush_all_w & asi_queue_write_uf_g & 
+           thread_sel_id_g[3];
+
+//
+// interrupt vector registers
+assign asi_invr_rw_e = 
+          (ifu_lsu_alt_space_e)? 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`TLU_INVR_ASI):
+           1'b0;
+
+dffr_s dffr_asi_invr_rw_m (
+    .din (asi_invr_rw_e),
+    .q   (asi_invr_rw_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_asi_invr_rw_g (
+    .din (asi_invr_rw_m),
+    .q   (asi_invr_rw_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign inc_ind_asi_rd_invr[0] = 
+           (asi_invr_rw_g & asi_queue_read_g & 
+           ~local_flush_all_w & thread_sel_id_g[0]);
+assign inc_ind_asi_rd_invr[1] = 
+           (asi_invr_rw_g & asi_queue_read_g & 
+           ~local_flush_all_w & thread_sel_id_g[1]);
+assign inc_ind_asi_rd_invr[2] = 
+           (asi_invr_rw_g & asi_queue_read_g & 
+           ~local_flush_all_w & thread_sel_id_g[2]);
+assign inc_ind_asi_rd_invr[3] = 
+           (asi_invr_rw_g & asi_queue_read_g & 
+           ~local_flush_all_w & thread_sel_id_g[3]);
+//
+// timing changes: all va e stage signals have been moved to m-stage
+// decoding the VA portion of the ASI address
+// cpu_mondo_head: 0x3c0
+assign cpu_mondo_head_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`CPU_MONDO_HEAD);
+//
+// cpu_mondo_tail: 0x3c8
+assign cpu_mondo_tail_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`CPU_MONDO_TAIL);
+//
+// dev_mondo_head: 0x3d0
+assign dev_mondo_head_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`DEV_MONDO_HEAD);
+//
+// dev_mondo_tail: 0x3d8
+assign dev_mondo_tail_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`DEV_MONDO_TAIL);
+//
+// resum_err_head: 0x3e0
+assign resum_err_head_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`RESUM_ERR_HEAD);
+//
+// resum_err_tail: 0x3e8
+assign resum_err_tail_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`RESUM_ERR_TAIL);
+//
+// nresum_err_head: 0x3f0
+assign nresum_err_head_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`NRESUM_ERR_HEAD);
+//
+// nresum_err_tail: 0x3f8
+assign nresum_err_tail_rw_m = 
+          (tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]==`NRESUM_ERR_TAIL);
+//
+// timing change: replaced by flopped tlu_ldst_va_m decodes
+// staging the ASI queue rw decodes
+// cpu_mondo_head
+/*
+dff_s dff_cpu_mondo_head_rw_m (
+     .din (cpu_mondo_head_rw_e), 
+     .q   (cpu_mondo_head_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_dev_mondo_head_rw_m (
+     .din (dev_mondo_head_rw_e), 
+     .q   (dev_mondo_head_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_resum_err_head_rw_m (
+     .din (resum_err_head_rw_e), 
+     .q   (resum_err_head_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_nresum_err_head_rw_m (
+     .din (nresum_err_head_rw_e), 
+     .q   (nresum_err_head_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_cpu_mondo_tail_rw_m (
+     .din (cpu_mondo_tail_rw_e), 
+     .q   (cpu_mondo_tail_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_dev_mondo_tail_rw_m (
+     .din (dev_mondo_tail_rw_e), 
+     .q   (dev_mondo_tail_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_resum_err_tail_rw_m (
+     .din (resum_err_tail_rw_e), 
+     .q   (resum_err_tail_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dff_s dff_nresum_err_tail_rw_m (
+     .din (nresum_err_tail_rw_e), 
+     .q   (nresum_err_tail_rw_m),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// illegal range check
+dffr_s dffr_que_ill_va_m (
+    .din (que_ill_va_e),
+    .q   (que_ill_va_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+
+// staging the ASI queue rw decodes
+// cpu_mondo_head
+dff_s dff_cpu_mondo_head_rw_g (
+     .din (cpu_mondo_head_rw_m), 
+     .q   (cpu_mondo_head_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// dev_mondo_head
+dff_s dff_dev_mondo_head_rw_g (
+     .din (dev_mondo_head_rw_m), 
+     .q   (dev_mondo_head_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// resum_err_head
+dff_s dff_resum_err_head_rw_g (
+     .din (resum_err_head_rw_m), 
+     .q   (resum_err_head_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// nresum_err_head
+dff_s dff_nresum_err_head_rw_g (
+     .din (nresum_err_head_rw_m), 
+     .q   (nresum_err_head_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+// 
+// cpu_mondo_tail
+//
+dff_s dff_cpu_mondo_tail_rw_g (
+     .din (cpu_mondo_tail_rw_m), 
+     .q   (cpu_mondo_tail_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// dev_mondo_tail
+//
+dff_s dff_dev_mondo_tail_rw_g (
+     .din (dev_mondo_tail_rw_m), 
+     .q   (dev_mondo_tail_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// resum_err_tail
+//
+dff_s dff_resum_err_tail_rw_g (
+     .din (resum_err_tail_rw_m), 
+     .q   (resum_err_tail_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// nresum_err_tail
+//
+dff_s dff_nresum_err_tail_rw_g (
+     .din (nresum_err_tail_rw_m), 
+     .q   (nresum_err_tail_rw_g),
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+// 
+// generating thread specific read and write enables  
+//
+// cpu_mondo_head read
+// assign cpu_mondo_head_rd_g[0] = 
+//            asi_queue_read_g & cpu_mondo_head_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign cpu_mondo_head_rd_g[0] = 
+           ~(|cpu_mondo_head_rd_g[3:1]);
+assign cpu_mondo_head_rd_g[1] = 
+           asi_queue_read_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign cpu_mondo_head_rd_g[2] = 
+           asi_queue_read_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign cpu_mondo_head_rd_g[3] = 
+           asi_queue_read_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign cpu_mondo_hd_rd_g = 
+           asi_queue_read_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g;
+// 
+// cpu_mondo_head write 
+//
+assign cpu_mondo_head_wr_g[0] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[0];
+assign cpu_mondo_head_wr_g[1] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[1];
+assign cpu_mondo_head_wr_g[2] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[2];
+assign cpu_mondo_head_wr_g[3] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[3];
+//
+// cpu_mondo_tail read
+// assign cpu_mondo_tail_rd_g[0] = 
+//            asi_queue_read_g & cpu_mondo_tail_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign cpu_mondo_tail_rd_g[0] = 
+           ~(|cpu_mondo_tail_rd_g[3:1]);
+assign cpu_mondo_tail_rd_g[1] = 
+           asi_queue_read_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign cpu_mondo_tail_rd_g[2] = 
+           asi_queue_read_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign cpu_mondo_tail_rd_g[3] = 
+           asi_queue_read_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign cpu_mondo_ta_rd_g = 
+           asi_queue_read_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g;
+// 
+// cpu_mondo_tail write 
+// 
+assign cpu_mondo_tail_wr_g[0] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[0];
+assign cpu_mondo_tail_wr_g[1] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[1];
+assign cpu_mondo_tail_wr_g[2] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[2];
+assign cpu_mondo_tail_wr_g[3] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & cpu_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[3];
+//
+// dev_mondo_head read
+// assign dev_mondo_head_rd_g[0] = 
+//            asi_queue_read_g & dev_mondo_head_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign dev_mondo_head_rd_g[0] = 
+           ~(|dev_mondo_head_rd_g[3:1]);
+assign dev_mondo_head_rd_g[1] = 
+           asi_queue_read_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign dev_mondo_head_rd_g[2] = 
+           asi_queue_read_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign dev_mondo_head_rd_g[3] = 
+           asi_queue_read_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign dev_mondo_hd_rd_g = 
+           asi_queue_read_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g;
+// 
+// dev_mondo_head write 
+//
+assign dev_mondo_head_wr_g[0] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[0];
+assign dev_mondo_head_wr_g[1] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[1];
+assign dev_mondo_head_wr_g[2] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[2];
+assign dev_mondo_head_wr_g[3] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_head_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[3];
+//
+// dev_mondo_tail read
+// assign dev_mondo_tail_rd_g[0] = 
+//            asi_queue_read_g & dev_mondo_tail_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign dev_mondo_tail_rd_g[0] = 
+           ~(|dev_mondo_tail_rd_g[3:1]);
+assign dev_mondo_tail_rd_g[1] = 
+           asi_queue_read_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign dev_mondo_tail_rd_g[2] = 
+           asi_queue_read_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign dev_mondo_tail_rd_g[3] = 
+           asi_queue_read_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign dev_mondo_ta_rd_g = 
+           asi_queue_read_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g;
+// 
+// dev_mondo_tail write 
+// 
+assign dev_mondo_tail_wr_g[0] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[0];
+assign dev_mondo_tail_wr_g[1] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[1];
+assign dev_mondo_tail_wr_g[2] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[2];
+assign dev_mondo_tail_wr_g[3] = 
+           ~local_flush_all_w & asi_queue_write_uf_g & dev_mondo_tail_rw_g & 
+           asi_queue_rw_g & thread_sel_id_g[3];
+//
+// resum_err_head read
+// assign resum_err_head_rd_g[0] = 
+//            asi_queue_read_g & resum_err_head_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign resum_err_head_rd_g[0] = 
+           ~(|resum_err_head_rd_g[3:1]);
+assign resum_err_head_rd_g[1] = 
+           asi_queue_read_g & resum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign resum_err_head_rd_g[2] = 
+           asi_queue_read_g & resum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign resum_err_head_rd_g[3] = 
+           asi_queue_read_g & resum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign resum_err_hd_rd_g = 
+           asi_queue_read_g & resum_err_head_rw_g & 
+           asi_queue_rw_g;
+// 
+// resum_err_head write 
+//
+assign resum_err_head_wr_g[0] = 
+           asi_queue_write_g & resum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[0];
+assign resum_err_head_wr_g[1] = 
+           asi_queue_write_g & resum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[1];
+assign resum_err_head_wr_g[2] = 
+           asi_queue_write_g & resum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[2];
+assign resum_err_head_wr_g[3] = 
+           asi_queue_write_g & resum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[3];
+//
+// resum_err_tail read
+// assign resum_err_tail_rd_g[0] = 
+//            asi_queue_read_g & resum_err_tail_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign resum_err_tail_rd_g[0] = 
+           ~(|resum_err_tail_rd_g[3:1]);
+assign resum_err_tail_rd_g[1] = 
+           asi_queue_read_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign resum_err_tail_rd_g[2] = 
+           asi_queue_read_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign resum_err_tail_rd_g[3] = 
+           asi_queue_read_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign resum_err_ta_rd_g = 
+           asi_queue_read_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g;
+// 
+// resum_err_tail write 
+// 
+assign resum_err_tail_wr_g[0] = 
+           asi_queue_write_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[0];
+assign resum_err_tail_wr_g[1] = 
+           asi_queue_write_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[1];
+assign resum_err_tail_wr_g[2] = 
+           asi_queue_write_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[2];
+assign resum_err_tail_wr_g[3] = 
+           asi_queue_write_g & resum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[3];
+//
+// nresum_err_head read
+// assign nresum_err_head_rd_g[0] = 
+//            asi_queue_read_g & nresum_err_head_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign nresum_err_head_rd_g[0] = 
+           ~(|nresum_err_head_rd_g[3:1]);
+assign nresum_err_head_rd_g[1] = 
+           asi_queue_read_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign nresum_err_head_rd_g[2] = 
+           asi_queue_read_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign nresum_err_head_rd_g[3] = 
+           asi_queue_read_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign nresum_err_hd_rd_g = 
+           asi_queue_read_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g;
+// 
+// nresum_err_head write 
+//
+assign nresum_err_head_wr_g[0] = 
+           asi_queue_write_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[0];
+assign nresum_err_head_wr_g[1] = 
+           asi_queue_write_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[1];
+assign nresum_err_head_wr_g[2] = 
+           asi_queue_write_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[2];
+assign nresum_err_head_wr_g[3] = 
+           asi_queue_write_g & nresum_err_head_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[3];
+//
+// nresum_err_tail read
+// assign nresum_err_tail_rd_g[0] = 
+//            asi_queue_read_g & nresum_err_tail_rw_g & 
+//            asi_queue_rw_g   & thread_sel_id_g[0];
+assign nresum_err_tail_rd_g[0] = 
+           ~(|nresum_err_tail_rd_g[3:1]);
+assign nresum_err_tail_rd_g[1] = 
+           asi_queue_read_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[1];
+assign nresum_err_tail_rd_g[2] = 
+           asi_queue_read_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[2];
+assign nresum_err_tail_rd_g[3] = 
+           asi_queue_read_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g   & thread_sel_id_g[3];
+//
+// non-thread specific read
+assign nresum_err_ta_rd_g = 
+           asi_queue_read_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g;
+// 
+// nresum_err_tail write 
+// 
+assign nresum_err_tail_wr_g[0] = 
+           asi_queue_write_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[0];
+assign nresum_err_tail_wr_g[1] = 
+           asi_queue_write_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[1];
+assign nresum_err_tail_wr_g[2] = 
+           asi_queue_write_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[2];
+assign nresum_err_tail_wr_g[3] = 
+           asi_queue_write_g & nresum_err_tail_rw_g & 
+           asi_queue_rw_g    & thread_sel_id_g[3];
+//
+// storing the head and pointers for the queues
+// thread 0
+//
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo0_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_head_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo0_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_tail_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo0_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_head_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo0_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_tail_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err0_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_head_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err0_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_tail_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err0_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_head_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err0_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_tail_wr_g[0]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// thread 1
+//
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo1_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_head_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo1_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_tail_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo1_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_head_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo1_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_tail_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err1_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_head_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err1_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_tail_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err1_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_head_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err1_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_tail_wr_g[1]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// thread 2
+//
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo2_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_head_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo2_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_tail_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo2_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_head_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo2_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_tail_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err2_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_head_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err2_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_tail_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err2_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_head_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err2_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_tail_wr_g[2]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// thread 3
+//
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo3_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_head_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_cpu_mondo3_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (cpu_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (cpu_mondo_tail_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo3_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_head_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_dev_mondo3_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (dev_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (dev_mondo_tail_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err3_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_head_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_resum_err3_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (resum_err3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (resum_err_tail_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err3_head (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_head_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+
+dffe_s #(`TLU_ASI_QUE_WIDTH) dffe_nresum_err3_tail (
+     .din (tlu_asi_queue_data_g[`TLU_ASI_QUE_WIDTH-1:0]), 
+     .q   (nresum_err3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+     .en  (nresum_err_tail_wr_g[3]), 
+     .clk (clk),
+     .se  (se),       
+     .si  (),          
+     .so  ()
+);
+//
+// reading out the asi queues 
+// 
+// added for bug2332
+// assign cpu_mondo_hd_onehot_g = 
+//            ~(|cpu_mondo_head_rd_g[3:1]);
+// cpu_mondo_head
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_cpu_mondo_head (
+    .in0  (cpu_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (cpu_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (cpu_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (cpu_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (cpu_mondo_head_rd_g[0]),
+    .sel1 (cpu_mondo_head_rd_g[1]),
+    .sel2 (cpu_mondo_head_rd_g[2]),
+    .sel3 (cpu_mondo_head_rd_g[3]),
+    .dout (cpu_mondo_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign cpu_mondo_ta_onehot_g = 
+//            ~(|cpu_mondo_tail_rd_g[3:1]);
+// cpu_mondo_tail
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_cpu_mondo_tail (
+    .in0  (cpu_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (cpu_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (cpu_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (cpu_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (cpu_mondo_tail_rd_g[0]),
+    .sel1 (cpu_mondo_tail_rd_g[1]),
+    .sel2 (cpu_mondo_tail_rd_g[2]),
+    .sel3 (cpu_mondo_tail_rd_g[3]),
+    .dout (cpu_mondo_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign dev_mondo_hd_onehot_g = 
+//            ~(|dev_mondo_head_rd_g[3:1]);
+// dev_mondo_head
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_dev_mondo_head (
+    .in0  (dev_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (dev_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (dev_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (dev_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (dev_mondo_head_rd_g[0]),
+    .sel1 (dev_mondo_head_rd_g[1]),
+    .sel2 (dev_mondo_head_rd_g[2]),
+    .sel3 (dev_mondo_head_rd_g[3]),
+    .dout (dev_mondo_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign dev_mondo_ta_onehot_g = 
+//            ~(|dev_mondo_tail_rd_g[3:1]);
+// dev_mondo_tail
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_dev_mondo_tail (
+    .in0  (dev_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (dev_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (dev_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (dev_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (dev_mondo_tail_rd_g[0]),
+    .sel1 (dev_mondo_tail_rd_g[1]),
+    .sel2 (dev_mondo_tail_rd_g[2]),
+    .sel3 (dev_mondo_tail_rd_g[3]),
+    .dout (dev_mondo_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign resum_err_hd_onehot_g = 
+//            ~(|resum_err_head_rd_g[3:1]);
+// resum_err_head
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_resum_err_head (
+    .in0  (resum_err0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (resum_err1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (resum_err2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (resum_err3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (resum_err_head_rd_g[0]),
+    .sel1 (resum_err_head_rd_g[1]),
+    .sel2 (resum_err_head_rd_g[2]),
+    .sel3 (resum_err_head_rd_g[3]),
+    .dout (resum_err_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign resum_err_ta_onehot_g = 
+//            ~(|resum_err_tail_rd_g[3:1]);
+// resum_err_tail
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_resum_err_tail (
+    .in0  (resum_err0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (resum_err1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (resum_err2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (resum_err3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (resum_err_tail_rd_g[0]),
+    .sel1 (resum_err_tail_rd_g[1]),
+    .sel2 (resum_err_tail_rd_g[2]),
+    .sel3 (resum_err_tail_rd_g[3]),
+    .dout (resum_err_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign nresum_err_hd_onehot_g = 
+//            ~(|nresum_err_head_rd_g[3:1]);
+// nresum_err_head
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_nresum_err_head (
+    .in0  (nresum_err0_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (nresum_err1_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (nresum_err2_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (nresum_err3_head[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (nresum_err_head_rd_g[0]),
+    .sel1 (nresum_err_head_rd_g[1]),
+    .sel2 (nresum_err_head_rd_g[2]),
+    .sel3 (nresum_err_head_rd_g[3]),
+    .dout (nresum_err_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+//
+// added for bug2332
+// assign nresum_err_ta_onehot_g = 
+//            ~(|nresum_err_tail_rd_g[3:1]);
+// nresum_err_tail
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx_nresum_err_tail (
+    .in0  (nresum_err0_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (nresum_err1_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (nresum_err2_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (nresum_err3_tail[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (nresum_err_tail_rd_g[0]),
+    .sel1 (nresum_err_tail_rd_g[1]),
+    .sel2 (nresum_err_tail_rd_g[2]),
+    .sel3 (nresum_err_tail_rd_g[3]),
+    .dout (nresum_err_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0])
+); 
+// 
+// added for bug 2332
+assign asi_qrdata_mx1_sel[0] = cpu_mondo_hd_rd_g | 
+                               ~(|asi_qrdata_mx1_sel[3:1]);
+assign asi_qrdata_mx1_sel[1] = cpu_mondo_ta_rd_g;
+assign asi_qrdata_mx1_sel[2] = dev_mondo_hd_rd_g;
+assign asi_qrdata_mx1_sel[3] = dev_mondo_ta_rd_g;
+// 
+// selecting between the eight queues
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx1_asi_queue_rdata (
+    .in0  (cpu_mondo_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (cpu_mondo_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (dev_mondo_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (dev_mondo_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (asi_qrdata_mx1_sel[0]),
+    .sel1 (asi_qrdata_mx1_sel[1]),
+    .sel2 (asi_qrdata_mx1_sel[2]),
+    .sel3 (asi_qrdata_mx1_sel[3]),
+    .dout (asi_queue_rdata1_g[`TLU_ASI_QUE_WIDTH-1:0])
+);
+// 
+// added for bug 2332
+assign asi_qrdata_mx2_sel[0] = resum_err_hd_rd_g | 
+                               ~(|asi_qrdata_mx2_sel[3:1]);
+assign asi_qrdata_mx2_sel[1] = resum_err_ta_rd_g;
+assign asi_qrdata_mx2_sel[2] = nresum_err_hd_rd_g;
+assign asi_qrdata_mx2_sel[3] = nresum_err_ta_rd_g;
+
+mux4ds #(`TLU_ASI_QUE_WIDTH) mx2_asi_queue_rdata (
+    .in0  (resum_err_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (resum_err_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in2  (nresum_err_hd_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in3  (nresum_err_ta_rdata[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (asi_qrdata_mx2_sel[0]),
+    .sel1 (asi_qrdata_mx2_sel[1]),
+    .sel2 (asi_qrdata_mx2_sel[2]),
+    .sel3 (asi_qrdata_mx2_sel[3]),
+    .dout (asi_queue_rdata2_g[`TLU_ASI_QUE_WIDTH-1:0])
+);
+//
+// constructing the select for the final asi queue rdata output
+assign asi_qrdata_mx_sel2 = 
+           resum_err_hd_rd_g  | resum_err_ta_rd_g |  
+           nresum_err_hd_rd_g | nresum_err_ta_rd_g;   
+//
+mux2ds #(`TLU_ASI_QUE_WIDTH) mx_tlu_asi_queue_rdata (
+    .in0  (asi_queue_rdata2_g[`TLU_ASI_QUE_WIDTH-1:0]),
+    .in1  (asi_queue_rdata1_g[`TLU_ASI_QUE_WIDTH-1:0]),
+    .sel0 (asi_qrdata_mx_sel2),
+    .sel1 (~asi_qrdata_mx_sel2),
+    .dout (tlu_asi_queue_rdata_g[`TLU_ASI_QUE_WIDTH-1:0])
+);
+//
+// forming the valid signal for the asi queue rdata
+assign tlu_asi_queue_rd_vld_g =
+       asi_qrdata_mx_sel2 | cpu_mondo_hd_rd_g | cpu_mondo_ta_rd_g |
+       dev_mondo_hd_rd_g  | dev_mondo_ta_rd_g; 
+
+assign tlu_ld_data_vld_g = asi_ld_addr_vld_g; 
+
+////////////////////////////////////////////////////////////////////////
+// SCPD and HSCPD control logic
+////////////////////////////////////////////////////////////////////////
+//
+// privileged scratch pad access
+assign asi_scpd_rw_e = 
+           ifu_lsu_alt_space_e & 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`PRI_SCPD_ASI_STATE);
+
+// hyprivileged scratch pad access
+assign asi_hscpd_rw_e = 
+           ifu_lsu_alt_space_e & 
+          (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]==`HPRI_SCPD_ASI_STATE);
+//
+// staged the scpd/hscpd rw ctls
+dffr_s #(2) dffr_asi_scpd_rw_ctls (
+    .din ({asi_hscpd_rw_e, asi_scpd_rw_e}),
+    .q   ({asi_hscpd_rw_m, asi_scpd_rw_m}),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// address generation
+//
+// modified due to timing fix  
+/*
+ assign scpd_addr_va_e[`SCPD_ASI_VA_ADDR_WIDTH-1:0] = 
+            tlu_ldst_va_e[`TLU_ASI_SCPD_VA_HI:`TLU_ASI_SCPD_VA_LO];
+
+ staged the shifted va address
+ modified due to timing fix
+
+dff_s #(`SCPD_ASI_VA_ADDR_WIDTH) dff_scpd_addr_va_m (
+    .din (scpd_addr_va_e[`SCPD_ASI_VA_ADDR_WIDTH-1:0]),
+    .q   (scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+
+assign scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0] = 
+            tlu_ldst_va_m[`TLU_ASI_SCPD_VA_HI:`TLU_ASI_SCPD_VA_LO];
+
+dff_s #(`SCPD_ASI_VA_ADDR_WIDTH) dff_scpd_addr_va_g (
+    .din (scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0]),
+    .q   (scpd_addr_va_g[`SCPD_ASI_VA_ADDR_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_scpd_rd_addr_m[`SCPD_RW_ADDR_WIDTH-1:0] = 
+           {thrid_m[1:0], scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0]}; 
+
+assign tlu_scpd_wr_addr_g[`SCPD_RW_ADDR_WIDTH-1:0] = 
+           {thrid_g[1:0], scpd_addr_va_g[`SCPD_ASI_VA_ADDR_WIDTH-1:0]}; 
+//
+// illegal range check for scratch-pd va 
+// modified due to timing
+/*
+assign scpd_ill_va_e = 
+           (ifu_lsu_alt_space_e)?
+           ((asi_scpd_rw_e | asi_hscpd_rw_e) & 
+           ((|exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:`TLU_ASI_SCPD_VA_HI+1]) |
+            (|exu_lsu_ldst_va_e[`TLU_ASI_SCPD_VA_LO-1:0]))) :
+            1'b0;
+// staged illegal va range 
+dffr_s dffr_scpd_ill_va_m (
+    .din (scpd_ill_va_e),
+    .q   (scpd_ill_va_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+//
+assign va_not_baligned_m = (|tlu_ldst_va_m[`TLU_ASI_SCPD_VA_LO-1:0]); 
+assign scpd_ill_va_m = 
+           (asi_scpd_rw_m | asi_hscpd_rw_m) & 
+            va_not_baligned_m;
+       
+// 
+dffr_s dffr_scpd_ill_va_g (
+    .din (scpd_ill_va_m),
+    .q   (scpd_ill_va_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// generating read and write valid 
+assign asi_scpd_rw_vld_m = 
+            (asi_scpd_rw_m | asi_hscpd_rw_m) & 
+           ~(hscpd_data_acc_excpt_m | va_not_baligned_m);
+
+dffr_s dffr_asi_scpd_rw_vld_g (
+    .din (asi_scpd_rw_vld_m),
+    .q   (asi_scpd_rw_vld_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_scpd_wr_vld_g = 
+           asi_queue_write_g & asi_scpd_rw_vld_g; 
+//
+// control to the memory macro for the read
+// modified due to timing 
+assign tlu_scpd_rd_vld_m = 
+           // asi_queue_read_m & asi_scpd_rw_vld_m; 
+           asi_queue_read_pq_m & asi_scpd_rw_vld_m; 
+//
+// control to the int block for the selection of read data 
+// replaced by queue vld
+// assign tlu_scpd_rd_vld_g = 
+//            asi_queue_read_g & asi_scpd_rw_vld_g; 
+
+// decoding for hypervisor only scratch-pad
+assign hscpd_va_rw_m = 
+          ((scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0] == 
+            `HSCPD_ASI_VA_ADDR_LO) | 
+           (scpd_addr_va_m[`SCPD_ASI_VA_ADDR_WIDTH-1:0] == 
+            `HSCPD_ASI_VA_ADDR_HI));  
+
+////////////////////////////////////////////////////////////////////////
+// Potential trap indicators 
+////////////////////////////////////////////////////////////////////////
+// possible traps are: 
+// 1) head ptr <> tail ptr (with the exception of nresum_err_queue)
+// 2) write to tail by supervisor - data_access_exception 
+
+//
+// write to hypervisor scratch-pad using 0x20 ASI state
+
+assign hscpd_priv_asi_acc_m = 
+       hscpd_va_rw_m & asi_scpd_rw_m; 
+
+// modified due to timing - moved the inst_vld qualification to tlu_tcl
+assign hscpd_data_acc_excpt_pq_m = 
+           hscpd_priv_asi_acc_m & 
+           (asi_queue_write_pq_m | asi_queue_read_pq_m);
+/*
+           hscpd_priv_asi_acc_m & 
+           // (asi_queue_write_m | asi_queue_read_m)  &
+           (asi_queue_write_pq_m | asi_queue_read_pq_m) &
+           ((thread_sel_id_m[0] & ~tlu_hyper_lite[0]) |
+            (thread_sel_id_m[1] & ~tlu_hyper_lite[1]) |
+            (thread_sel_id_m[2] & ~tlu_hyper_lite[2]) |
+            (thread_sel_id_m[3] & ~tlu_hyper_lite[3])); 
+*/
+//
+// data_access_exception to access the hyper-privileged scratch-pad 
+
+assign tlu_hscpd_dacc_excpt_m = hscpd_data_acc_excpt_pq_m;
+//
+// revised for bug 3586
+
+assign hscpd_data_acc_excpt_m = 
+           hscpd_data_acc_excpt_pq_m & 
+           ((thread_sel_id_m[0] & ~tlu_hyper_lite[0]) |
+            (thread_sel_id_m[1] & ~tlu_hyper_lite[1]) |
+            (thread_sel_id_m[2] & ~tlu_hyper_lite[2]) |
+            (thread_sel_id_m[3] & ~tlu_hyper_lite[3])); 
+
+// illegal va range indicator
+assign tlu_va_ill_g = 
+           (que_ill_va_g | scpd_ill_va_g) & asi_queue_read_g; 
+
+// load instruction valid - scpd or asi queue 
+assign asi_ld_addr_vld_m  = 
+             asi_queue_read_m & (asi_queue_rw_m | 
+           ((asi_hscpd_rw_m | asi_scpd_rw_m) & 
+            ~hscpd_data_acc_excpt_m));
+
+// staging the ld address valid 
+dffr_s dffr_asi_ld_addr_vld_g (
+    .din (asi_ld_addr_vld_m),
+    .q   (asi_ld_addr_vld_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// redefined va illegal checking - the following code is 
+// no longer necessary
+// zero va range detector
+/* 
+assign va_all_zero_e = 
+            (~(|exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:0])) &
+             asi_queue_read_g;
+//
+// staged illegal va range 
+dffr_s dffr_va_all_zero_m (
+    .din (va_all_zero_e),
+    .q   (va_all_zero_m),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+dffr_s dffr_va_all_zero_g (
+    .din (va_all_zero_m),
+    .q   (va_all_zero_g),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_va_all_zero_g = va_all_zero_g; 
+*/
+
+////////////////////////////////////////////////////////////////////////
+// queue traps - head ptr <> tail ptr
+////////////////////////////////////////////////////////////////////////
+// note: these traps are level-sensitive
+//
+// thread 0
+
+assign tlu_cpu_mondo_cmp[0] = 
+           (|(cpu_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              cpu_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_dev_mondo_cmp[0]= 
+           (|(dev_mondo0_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              dev_mondo0_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_resum_err_cmp[0] = 
+           (|(resum_err0_head[`TLU_ASI_QUE_WIDTH-1:0] ^ 
+              resum_err0_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+// thread 1
+
+assign tlu_cpu_mondo_cmp[1] = 
+           (|(cpu_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              cpu_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_dev_mondo_cmp[1]= 
+           (|(dev_mondo1_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              dev_mondo1_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_resum_err_cmp[1] = 
+           (|(resum_err1_head[`TLU_ASI_QUE_WIDTH-1:0] ^ 
+              resum_err1_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+//
+// thread 2
+
+assign tlu_cpu_mondo_cmp[2] = 
+           (|(cpu_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              cpu_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_dev_mondo_cmp[2]= 
+           (|(dev_mondo2_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              dev_mondo2_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_resum_err_cmp[2] = 
+           (|(resum_err2_head[`TLU_ASI_QUE_WIDTH-1:0] ^ 
+              resum_err2_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+// thread 3
+
+assign tlu_cpu_mondo_cmp[3] = 
+           (|(cpu_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              cpu_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_dev_mondo_cmp[3]= 
+           (|(dev_mondo3_head[`TLU_ASI_QUE_WIDTH-1:0] ^
+              dev_mondo3_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+assign tlu_resum_err_cmp[3] = 
+           (|(resum_err3_head[`TLU_ASI_QUE_WIDTH-1:0] ^ 
+              resum_err3_tail[`TLU_ASI_QUE_WIDTH-1:0]));
+
+// write to tail by supervisor
+// modified due to timing - moved the inst_vld qualification to tlu_tcl
+assign qtail_write_m = 
+           (nresum_err_tail_rw_m | resum_err_tail_rw_m  | 
+            cpu_mondo_tail_rw_m  | dev_mondo_tail_rw_m) &
+            asi_queue_write_pq_m & asi_queue_rw_m;
+            // asi_queue_write_m & asi_queue_rw_m;
+
+//
+// modified for timing - qualification moved to tcl
+assign tlu_qtail_dacc_excpt_m = qtail_write_m;
+/*
+           ((thread_sel_id_m[0] & que_trap_en[0] & tlu_pstate_priv[0])  |
+            (thread_sel_id_m[1] & que_trap_en[1] & tlu_pstate_priv[1])  |
+            (thread_sel_id_m[2] & que_trap_en[2] & tlu_pstate_priv[2])  |
+            (thread_sel_id_m[3] & que_trap_en[3] & tlu_pstate_priv[3])) & 
+             qtail_write_m;
+*/
+              
+endmodule
Index: /trunk/T1-CPU/tlu/tlu.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu.v	(revision 6)
@@ -0,0 +1,1833 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:
+//	Description:	Trap Logic and Memory Management Unit (TLU) :
+//			- Contains :
+//				- Trap Stack Array (TSA)
+//				- Trap Control Logic (TCL)
+//				- Mmu internal Register Array (MRA)
+//				- Mmu Control Logic (MCL)
+//
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+			// time scale definition
+
+`include        "tlu.h"
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module tlu (/*AUTOARG*/
+            short_si0,short_si1,short_so0,short_so1,si0,si1,so0,so1,
+   tlu_sftint_vld, tlu_hintp_vld, tlu_rerr_vld, tlu_lsu_tl_zero, 
+   tlu_lsu_stxa_ack, tlu_lsu_redmode_rst_d1, // tlu_lsu_async_ack_w2, 
+   tlu_lsu_pstate_priv, tlu_lsu_pstate_cle, tlu_lsu_pstate_am, tlu_lsu_tid_m,
+   tlu_lsu_pcxpkt, tlu_lsu_ldxa_tid_w2, tlu_lsu_stxa_ack_tid, tlu_lsu_redmode, 
+   tlu_lsu_asi_update_m, tlu_lsu_asi_m, 
+   tlu_itlb_wr_vld_g, tlu_itlb_tte_tag_w2, tlu_itlb_tte_data_w2, 
+   tlu_itlb_tag_rd_g, tlu_itlb_rw_index_vld_g, tlu_itlb_rw_index_g, 
+   tlu_itlb_dmp_actxt_g, tlu_itlb_invalidate_all_g, tlu_itlb_dmp_vld_g, 
+   tlu_itlb_dmp_all_g, tlu_itlb_dmp_nctxt_g, tlu_ifu_trapnpc_w2, 
+   tlu_sscan_test_data, // tlu_sscan_pc, 
+   tlu_itlb_data_rd_g, tlu_ifu_trappc_vld_w1, tlu_ifu_trappc_w2, 
+   tlu_ifu_trapnpc_vld_w1, tlu_ifu_trap_tid_w1, tlu_ifu_rstthr_i2, 
+   tlu_ifu_rstint_i2, tlu_ifu_resumint_i2, tlu_ifu_pstate_pef, 
+   tlu_ifu_pstate_ie, tlu_ifu_nukeint_i2, // tlu_ifu_int_activate_i3, 
+   tlu_ifu_hwint_i3, tlu_idtlb_dmp_thrid_g, tlu_idtlb_dmp_key_g, 
+   tlu_exu_cwpccr_update_m, tlu_exu_cwp_retry_m, 
+   tlu_exu_cwp_m, tlu_exu_ccr_m, tlu_exu_agp_swap, tlu_exu_agp, 
+   tlu_dtlb_tte_tag_w2, tlu_dtlb_tte_data_w2, 
+   tlu_dtlb_tag_rd_g, tlu_dtlb_rw_index_vld_g, tlu_dtlb_rw_index_g, 
+   tlu_dtlb_invalidate_all_g, tlu_dtlb_dmp_vld_g, tlu_exu_rsr_data_m, 
+   tlu_dtlb_dmp_sctxt_g, tlu_dtlb_dmp_pctxt_g, tlu_dtlb_dmp_nctxt_g, 
+   tlu_dtlb_dmp_all_g, tlu_dtlb_dmp_actxt_g, tlu_dtlb_data_rd_g, 
+   tlu_lsu_int_ldxa_vld_w2, tlu_lsu_ldxa_async_data_vld, // tlu_ifu_flush_pipe_w, 
+   ifu_lsu_error_inj, tlu_exu_agp_tid, tlu_hpstate_priv, tlu_hpstate_ibe,
+   tlu_hpstate_enb, tlu_early_flush_pipe_w, tlu_exu_early_flush_pipe_w,
+   tlu_early_flush_pipe2_w, tlu_lsu_int_ldxa_data_w2, tlu_lsu_int_ld_ill_va_w2, 
+   tlu_exu_priv_trap_m, tlu_exu_pic_onebelow_m, tlu_exu_pic_twobelow_m, 
+   lsu_exu_ldxa_m, lsu_exu_ldxa_data_g, tlu_dsfsr_flt_vld, tlu_lsu_priv_trap_m, 
+   // tlu_lsu_priv_trap_w,
+   // Inputs
+   se, arst_l, grst_l, sehold, mem_write_disable, // rst_tri_en, 
+   mux_drive_disable, lsu_tlu_wtchpt_trp_g, ifu_tlu_flush_fd3_w, 
+   lsu_tlu_ttype_vld_m2, ifu_tlu_flush_fd_w, ifu_tlu_flush_fd2_w,
+   lsu_tlu_ttype_m2, lsu_tlu_tlb_st_inst_m, // lsu_tlu_tte_ebit_g, 
+   lsu_tlu_tlb_ldst_va_m, lsu_tlu_tlb_ld_inst_m, lsu_tlu_tlb_dmp_va_m, 
+   lsu_tlu_tlb_asi_state_m, lsu_tlu_tlb_access_tid_m, ifu_tlu_flush_m, 
+   lsu_tlu_st_rs3_data_g, lsu_tlu_early_flush_w, lsu_tlu_early_flush2_w, 
+   lsu_tlu_priv_action_g, lsu_tlu_pcxpkt_ack, // lsu_tlu_priv_violtn_g, 
+   lsu_tlu_pctxt_m, lsu_tlu_async_ttype_vld_g, // lsu_tlu_nonalt_ldst_m, 
+   lsu_tlu_misalign_addr_ldst_atm_m, ctu_sscan_tid, lsu_tlu_intpkt, 
+   lsu_tlu_async_ttype_g, lsu_tlu_rs3_data_g, lsu_tlu_defr_trp_taken_g, 
+   lsu_tlu_dtlb_done, lsu_tlu_dside_ctxt_m, // lsu_tlu_flt_ld_nfo_pg_g, 
+   lsu_tlu_dmmu_miss_g, lsu_tlu_daccess_prot_g, // lsu_tlu_derr_tid_g,  
+   lsu_tlu_daccess_excptn_g, lsu_tlu_cpx_vld, ifu_tlu_pc_oor_e, 
+   lsu_tlu_cpx_req, ifu_tlu_inst_vld_m_bf1, ifu_mmu_trap_m, ifu_tlu_trap_m,
+   lsu_asi_state, lsu_asi_reg3, lsu_asi_reg2, lsu_tlu_async_tid_g, 
+   lsu_asi_reg1, lsu_asi_reg0, ifu_tlu_ttype_vld_m, ifu_tlu_ttype_m, 
+   ifu_tlu_thrid_d, ifu_tlu_swint_m, ifu_tlu_sir_inst_m, ifu_tlu_l2imiss, 
+   ifu_tlu_rstint_m, ifu_tlu_retry_inst_d, ifu_tlu_priv_violtn_m, ifu_tlu_pc_m, 
+   ifu_tlu_npc_m, ifu_tlu_immu_miss_m, ifu_tlu_itlb_done,  ifu_tlu_inst_vld_m, 
+   ifu_tlu_hwint_m, ifu_lsu_imm_asi_d, ifu_lsu_imm_asi_vld_d, ifu_tlu_done_inst_d, 
+   ifu_lsu_st_inst_e, ifu_lsu_memref_d, ifu_lsu_ld_inst_e, ffu_tlu_trap_ue, 
+   ffu_tlu_trap_other, ffu_tlu_trap_ieee754, ffu_tlu_ill_inst_m, ffu_ifu_tid_w2, 
+   exu_tlu_va_oor_jl_ret_m, exu_tlu_ttype_vld_m, exu_tlu_ttype_m, exu_tlu_va_oor_m, 
+   exu_tlu_spill_tid, exu_tlu_spill, exu_tlu_spill_other, exu_tlu_spill_wtype, 
+   exu_tlu_misalign_addr_jmpl_rtn_m, exu_tlu_cwp_retry, exu_mmu_early_va_e, 
+   exu_tlu_cwp_cmplt_tid, // exu_tlu_spill_ttype,  exu_tlu_cwp_fastcmplt_w,
+   exu_tlu_cwp_cmplt, exu_tlu_cwp3, exu_tlu_cwp2, exu_tlu_cwp1, 
+   exu_tlu_cwp0, exu_tlu_ccr3_w, exu_tlu_ccr2_w, lsu_tlu_ldst_va_m, 
+   exu_tlu_ccr1_w, exu_tlu_ccr0_w, exu_lsu_ldst_va_e, const_cpuid, 
+   rclk, ifu_tlu_sraddr_d, ifu_tlu_rsr_inst_d, // ifu_tlu_wsr_inst_d, 
+   exu_tlu_wsr_data_m,  lsu_tlu_rsr_data_e, ifu_tlu_sraddr_d_v2, 
+   ifu_lsu_alt_space_e, lsu_tlu_squash_va_oor_m, ifu_tlu_imiss_e,
+   lsu_tlu_dcache_miss_w2, lsu_tlu_l2_dmiss, lsu_tlu_stb_full_w2, 
+   ffu_tlu_fpu_tid, ffu_tlu_fpu_cmplt, spu_tlu_rsrv_illgl_m, 
+   lsu_pid_state0, lsu_pid_state1, lsu_pid_state2, lsu_pid_state3, 
+   lsu_tlu_nucleus_ctxt_m,lsu_tlu_tte_pg_sz_g, exu_tlu_ue_trap_m,
+   lsu_ifu_inj_ack, ifu_tlu_alt_space_d, // lsu_tlu_ill_inst_m, 
+   ifu_lsu_thrid_s,lsu_dsfsr_din_g,lsu_dmmu_sfsr_trp_wr,lsu_mmu_flush_pipe_w,
+   exu_lsu_priority_trap_m, lsu_tlu_wsr_inst_e, lsu_mmu_defr_trp_taken_g);
+
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+//
+// input			clk;			// To tlu_int of sparc_tlu_int.v, ...
+input			rclk;			// To tlu_int of sparc_tlu_int.v, ...
+input [3:0]		const_cpuid;		// To tlu_int of sparc_tlu_int.v, ...
+input [`ASI_VA_WIDTH-1:0] exu_lsu_ldst_va_e;	// To mmu_dp of tlu_mmu_dp.v
+input [`TLU_ASI_VA_WIDTH-1:0] lsu_tlu_ldst_va_m;	// To mmu_dp of tlu_mmu_dp.v
+input [7:0]     exu_mmu_early_va_e;     // From exu of sparc_exu.v
+input [7:0]		exu_tlu_ccr0_w;		// To tdp of tlu_tdp.v
+input [7:0]		exu_tlu_ccr1_w;		// To tdp of tlu_tdp.v
+input [7:0]		exu_tlu_ccr2_w;		// To tdp of tlu_tdp.v
+input [7:0]		exu_tlu_ccr3_w;		// To tdp of tlu_tdp.v
+// modified due to timing
+// input [2:0]		exu_tlu_cwp0_w;		// To tdp of tlu_tdp.v
+// input [2:0]		exu_tlu_cwp1_w;		// To tdp of tlu_tdp.v
+// input [2:0]		exu_tlu_cwp2_w;		// To tdp of tlu_tdp.v
+// input [2:0]		exu_tlu_cwp3_w;		// To tdp of tlu_tdp.v
+input [2:0]		exu_tlu_cwp0;		// To tdp of tlu_tdp.v
+input [2:0]		exu_tlu_cwp1;		// To tdp of tlu_tdp.v
+input [2:0]		exu_tlu_cwp2;		// To tdp of tlu_tdp.v
+input [2:0]		exu_tlu_cwp3;		// To tdp of tlu_tdp.v
+input			exu_tlu_cwp_cmplt;	// To tcl of tlu_tcl.v
+input [1:0]		exu_tlu_cwp_cmplt_tid;	// To tcl of tlu_tcl.v
+// input			exu_tlu_cwp_fastcmplt_w;// To tcl of tlu_tcl.v
+input			exu_tlu_cwp_retry;	// To tcl of tlu_tcl.v
+input			exu_tlu_misalign_addr_jmpl_rtn_m;// To tcl of tlu_tcl.v
+input			exu_tlu_spill;		// To tcl of tlu_tcl.v
+input [1:0]		exu_tlu_spill_tid;	// To tcl of tlu_tcl.v
+// derive the spill_ttype from spill_other and spill_wtype
+// input [8:0]		exu_tlu_spill_ttype;	// To tcl of tlu_tcl.v
+input           exu_tlu_spill_other;    // From exu of sparc_exu.v
+input [2:0]     exu_tlu_spill_wtype;    // From exu of sparc_exu.v
+input [8:0]		exu_tlu_ttype_m;	// To tcl of tlu_tcl.v
+input			exu_tlu_ttype_vld_m;	// To tcl of tlu_tcl.v
+input			exu_tlu_ue_trap_m;// To tcl of tlu_tcl.v
+input			exu_tlu_va_oor_jl_ret_m;// To tcl of tlu_tcl.v
+input			exu_tlu_va_oor_m;	// To tcl of tlu_tcl.v
+input			ffu_tlu_ill_inst_m;	// new trap from ffu 
+input [1:0]		ffu_ifu_tid_w2;		// To tcl of tlu_tcl.v
+input			ffu_tlu_trap_ieee754;	// To tcl of tlu_tcl.v
+input			ffu_tlu_trap_other;	// To tcl of tlu_tcl.v
+input			ffu_tlu_trap_ue;	// To tcl of tlu_tcl.v
+input			ifu_lsu_ld_inst_e;	// To mmu_ctl of tlu_mmu_ctl.v
+input			ifu_lsu_memref_d;	// To tcl of tlu_tcl.v
+input			ifu_lsu_st_inst_e;	// To mmu_ctl of tlu_mmu_ctl.v
+input			ifu_tlu_done_inst_d;	// To tcl of tlu_tcl.v
+// input			ifu_tlu_flsh_inst_e;	// To tcl of tlu_tcl.v
+input			ifu_tlu_flush_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_flush_fd_w;	// To tcl of tlu_tcl.v
+input			ifu_tlu_flush_fd2_w;	// To tcl of tlu_tcl.v
+input			ifu_tlu_flush_fd3_w;	// To tcl of tlu_tcl.v
+input			lsu_tlu_early_flush_w;	// To tcl of tlu_tcl.v
+input			lsu_tlu_early_flush2_w;	// To tcl of tlu_tcl.v
+input			ifu_tlu_hwint_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_immu_miss_m;	// To tcl of tlu_tcl.v, ...
+input			ifu_tlu_pc_oor_e;	// To tcl of tlu_tcl.v
+input [`TLU_THRD_NUM-1:0] ifu_tlu_l2imiss;	// To tcl of tlu_tcl.v, ...
+input			ifu_tlu_inst_vld_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_inst_vld_m_bf1;	// To tcl of tlu_tcl.v
+input			ifu_tlu_itlb_done;	// To mmu_ctl of tlu_mmu_ctl.v
+// input [1:0]		ifu_tlu_ldst_size_e;	// To mmu_ctl of tlu_mmu_ctl.v
+// modified for bug 3017
+input [48:0]		ifu_tlu_npc_m;		// To tdp of tlu_tdp.v
+input [48:0]		ifu_tlu_pc_m;		// To tdp of tlu_tdp.v
+// input [47:0]		ifu_tlu_npc_m;		// To tdp of tlu_tdp.v
+// input [47:0]		ifu_tlu_pc_m;		// To tdp of tlu_tdp.v
+input			ifu_tlu_priv_violtn_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_retry_inst_d;	// To tcl of tlu_tcl.v
+input			ifu_tlu_rstint_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_sir_inst_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_swint_m;	// To tcl of tlu_tcl.v
+input [1:0]		ifu_tlu_thrid_d;	// To tcl of tlu_tcl.v
+input [1:0]		ifu_lsu_thrid_s;	// To tcl of tlu_tcl.v
+input [8:0]		ifu_tlu_ttype_m;	// To tcl of tlu_tcl.v
+input			ifu_tlu_ttype_vld_m;	// To tcl of tlu_tcl.v
+input			ifu_mmu_trap_m;	    // To tcl of tlu_tcl.v
+input			ifu_tlu_trap_m;	    // To tcl of tlu_tcl.v
+input [7:0]		lsu_asi_reg0;		// To tdp of tlu_tdp.v
+input [7:0]		lsu_asi_reg1;		// To tdp of tlu_tdp.v
+input [7:0]		lsu_asi_reg2;		// To tdp of tlu_tdp.v
+input [7:0]		lsu_asi_reg3;		// To tdp of tlu_tdp.v
+input [`TLU_ASI_STATE_WIDTH-1:0] lsu_asi_state;		// To tcl of tlu_tcl.v, ...
+// added asynchronize trap to handle correctable dmmu parity error
+input           lsu_tlu_async_ttype_vld_g; // lsu asynchronous trap valid
+input           lsu_tlu_defr_trp_taken_g; // lsu asynchronous trap valid
+input           lsu_mmu_defr_trp_taken_g; // lsu asynchronous trap valid
+input [6:0]     lsu_tlu_async_ttype_g;  // lsu asynchronous trap type 
+input [1:0]     lsu_tlu_async_tid_g; // asynchronous trap - thread
+input [3:0]		lsu_tlu_cpx_req;	// To tlu_int of sparc_tlu_int.v
+input			lsu_tlu_cpx_vld;	// To tlu_int of sparc_tlu_int.v
+// input [2:0]		lsu_tlu_ctxt_sel_m;	// To tcl of tlu_tcl.v
+input			lsu_tlu_daccess_excptn_g;// To tcl of tlu_tcl.v, ...
+input			lsu_tlu_daccess_prot_g;	// To tcl of tlu_tcl.v, ...
+// input [1:0]		lsu_tlu_derr_tid_g;	// To tcl of tlu_tcl.v
+input			lsu_tlu_dmmu_miss_g;	// To tcl of tlu_tcl.v, ...
+input [12:0]	lsu_tlu_dside_ctxt_m;	// To mmu_dp of tlu_mmu_dp.v
+input			lsu_tlu_dtlb_done;	// To mmu_ctl of tlu_mmu_ctl.v
+// input			lsu_tlu_flt_ld_nfo_pg_g;// To tcl of tlu_tcl.v
+// input			lsu_tlu_illegal_asi_action_g;// To tcl of tlu_tcl.v
+input [17:0]	lsu_tlu_intpkt;		// To tlu_int of sparc_tlu_int.v
+// modified for shadow scan
+// input [3:0] lsu_tlu_iobrdge_pc_sel;
+input [`TLU_THRD_NUM-1:0] ctu_sscan_tid;
+input			lsu_tlu_misalign_addr_ldst_atm_m;// To tcl of tlu_tcl.v
+// input			lsu_tlu_nonalt_ldst_m;	// To tcl of tlu_tcl.v
+input [12:0]	lsu_tlu_pctxt_m;	// To mmu_dp of tlu_mmu_dp.v
+input			lsu_tlu_pcxpkt_ack;	// To tlu_int of sparc_tlu_int.v
+input			lsu_tlu_priv_action_g;	// To tcl of tlu_tcl.v
+// input			lsu_tlu_priv_violtn_g;	// To tcl of tlu_tcl.v
+// input			lsu_tlu_spec_access_epage_g;// To tcl of tlu_tcl.v
+input [63:0]	lsu_tlu_st_rs3_data_g;	// To tlu_int of sparc_tlu_int.v, ...
+input [63:0]	lsu_tlu_rs3_data_g;	// To tlu_int of sparc_tlu_int.v, ...
+// added for timing
+input [1:0]		lsu_tlu_tlb_access_tid_m;// To mmu_ctl of tlu_mmu_ctl.v
+input [7:0]		lsu_tlu_tlb_asi_state_m;// To mmu_ctl of tlu_mmu_ctl.v
+input [47:13]	lsu_tlu_tlb_dmp_va_m;	// To mmu_dp of tlu_mmu_dp.v
+input			lsu_tlu_tlb_ld_inst_m;	// To mmu_ctl of tlu_mmu_ctl.v
+input [10:0]	lsu_tlu_tlb_ldst_va_m;	// To mmu_ctl of tlu_mmu_ctl.v
+input			lsu_tlu_tlb_st_inst_m;	// To mmu_ctl of tlu_mmu_ctl.v
+// input			lsu_tlu_tte_ebit_g;	// To tcl of tlu_tcl.v
+input [8:0]		lsu_tlu_ttype_m2;	// To tcl of tlu_tcl.v
+// removed unused bits
+// input [1:0]	lsu_tlu_ttype_tid_m2;	// To tcl of tlu_tcl.v
+input			lsu_tlu_ttype_vld_m2;	// To tcl of tlu_tcl.v
+// input			lsu_tlu_uncache_atomic_g;// To tcl of tlu_tcl.v
+// input			lsu_tlu_write_op_m;	// To tcl of tlu_tcl.v
+input			lsu_tlu_wtchpt_trp_g;	// To tcl of tlu_tcl.v
+// input		lsu_tlu_xslating_ldst_m;// To tcl of tlu_tcl.v
+// input		reset;			// To mmu_ctl of tlu_mmu_ctl.v
+// input		rst_l;			// To tcl of tlu_tcl.v, ...
+input			grst_l;			// To tcl of tlu_tcl.v, ...
+input			arst_l;			// To tcl of tlu_tcl.v, ...
+// input			rst_tri_en;	// To tcl of tlu_tcl.v, ...
+input			mem_write_disable;	// To tcl of tlu_tcl.v, ...
+input			mux_drive_disable;	// To tcl of tlu_tcl.v, ...
+// input			testmode_l;	// To tcl of tlu_tcl.v, ...
+input			sehold;			// To tlu_int of sparc_tlu_int.v, ...
+input			se;			// To tlu_int of sparc_tlu_int.v, ...
+input			si0,si1,short_si0,short_si1;			// To tlu_int of sparc_tlu_int.v, ...
+// End of automatics
+// Read/Write Privileged State Register Access.
+input  	[`TLU_ASR_ADDR_WIDTH-1:0] ifu_tlu_sraddr_d;      // addr of sr(st/pr)
+input  	[`TLU_ASR_ADDR_WIDTH-1:0] ifu_tlu_sraddr_d_v2;   // addr of sr(st/pr)
+input           ifu_tlu_rsr_inst_d ;	// valid rd sr(st/pr)
+// modified for timing
+// input           ifu_tlu_wsr_inst_d ;	// valid wr sr(st/pr)
+input           lsu_tlu_wsr_inst_e ;	// valid wr sr(st/pr)
+input  	[63:0]  exu_tlu_wsr_data_m ;    // pr/st data to irf.
+
+// input   [1:0]   ifu_tlu_thrid_e ;   	// Thread id.
+input	[7:0]	lsu_tlu_rsr_data_e ;	// sr/pr rd data from lsu.
+
+input			ifu_lsu_alt_space_e;	// alt-space access
+input		    ifu_tlu_alt_space_d;	// alt-space access - d stage
+input           lsu_tlu_squash_va_oor_m;// squash va_oor for mem-op.
+// input           lsu_tlu_ill_inst_m;     // new illegal instru from spu via lsu
+//
+// new interfaces to the pib 
+input			ifu_tlu_imiss_e;	    // icache misses -- New interface  
+input [3:0]		lsu_tlu_dcache_miss_w2;	// dcache miss -- new interface 
+input [3:0]		lsu_tlu_l2_dmiss;	    // l2 misses -- new interface 
+input [3:0]		lsu_tlu_stb_full_w2;	// store buffer full -- new interface 
+input [1:0]		ffu_tlu_fpu_tid;	    // ThrdID for the FF instr_cmplt -- new 
+input       	ffu_tlu_fpu_cmplt;	    // FF instru complete -- new 
+// 
+// New trap from SPU
+// removed for timing fix
+input       	spu_tlu_rsrv_illgl_m;	// illegal instruction from SPU 
+input  [2:0]   	lsu_pid_state0 ;        // pid thread0 ; global use
+input  [2:0]   	lsu_pid_state1 ;        // pid thread1 ; global use
+input  [2:0]   	lsu_pid_state2 ;        // pid thread2 ; global use
+input  [2:0]   	lsu_pid_state3 ;        // pid thread3 ; global use
+// input [48:0] ifu_tlu_pc_w;
+
+input           lsu_tlu_nucleus_ctxt_m ;// access is nucleus context
+input  [2:0]    lsu_tlu_tte_pg_sz_g ;   // page-size of tte
+input	[3:0]	ifu_lsu_error_inj ;	// inject parity error into tlb
+input   [8:0]   ifu_lsu_imm_asi_d;      // asi state value from imm 
+input           ifu_lsu_imm_asi_vld_d;  // valid asi state value from imm
+
+input	[23:0]	lsu_dsfsr_din_g ;	// now from lsu instead of tlu_tcl
+input	[3:0]	lsu_dmmu_sfsr_trp_wr ;	// now from lsu instead of tlu_tcl
+
+input		lsu_mmu_flush_pipe_w ; // full trap
+
+input		exu_lsu_priority_trap_m ;//fill,ue
+ 
+// output [5:0] int_tlu_rstid_i2;	// From tlu_int of sparc_tlu_int.v
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+output [63:0]	tlu_lsu_int_ldxa_data_w2; // From tlu_int of sparc_tlu_int.v
+output			tlu_lsu_int_ld_ill_va_w2; // From tlu_int of sparc_tlu_int.v
+output			tlu_lsu_int_ldxa_vld_w2; // From tlu_int of sparc_tlu_int.v
+output			so0,so1,short_so0,short_so1;			// From tlu_int of sparc_tlu_int.v, ...
+output			tlu_dtlb_data_rd_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_actxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_all_g;	// From mmu_ctl of tlu_mmu_ctl.v
+//output		tlu_dtlb_dmp_by_ctxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_nctxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_pctxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_sctxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_dmp_vld_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_invalidate_all_g;// From mmu_ctl of tlu_mmu_ctl.v
+output [5:0]	tlu_dtlb_rw_index_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_rw_index_vld_g;// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_dtlb_tag_rd_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output [42:0]	tlu_dtlb_tte_data_w2;	// From mmu_dp of tlu_mmu_dp.v
+output [58:0]	tlu_dtlb_tte_tag_w2;	// From mmu_dp of tlu_mmu_dp.v
+output[3:0]		lsu_ifu_inj_ack ;	// ack for tlb error injection.
+//
+// width modified for hypervisor support
+// output [2:0]	tlu_exu_agp;		// From tcl of tlu_tcl.v
+output [`TSA_GLOBAL_WIDTH-1:0] tlu_exu_agp;		// From tcl of tlu_tcl.v
+output			tlu_exu_agp_swap;	// From tcl of tlu_tcl.v
+output [1:0]    tlu_exu_agp_tid;        // From tcl of tlu_tcl.v
+output [7:0]	tlu_exu_ccr_m;		// From tcl of tlu_tcl.v
+output [2:0]	tlu_exu_cwp_m;		// From tcl of tlu_tcl.v
+output			tlu_exu_cwp_retry_m;	// From tcl of tlu_tcl.v
+output			tlu_exu_cwpccr_update_m;// From tcl of tlu_tcl.v
+// tlu_exu_rsr_data_e being replaced by tlu_exu_rsr_data_m
+// the bus will become obsolete
+// output [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_e;     // From tdp of tlu_tdp.v
+output [`TLU_ASR_DATA_WIDTH-1:0] tlu_exu_rsr_data_m;     // From tdp of tlu_tdp.v
+output [40:0]	tlu_idtlb_dmp_key_g;	// From mmu_dp of tlu_mmu_dp.v
+output [1:0]	tlu_idtlb_dmp_thrid_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output [3:0]	tlu_ifu_hwint_i3;	// From tlu_int of sparc_tlu_int.v
+// removed - ifu will derive the signal internally
+// output [3:0]	tlu_ifu_int_activate_i3;// From tlu_int of sparc_tlu_int.v
+output			tlu_ifu_nukeint_i2;	// From tlu_int of sparc_tlu_int.v
+output [3:0]	tlu_ifu_pstate_ie;	// From tlu_int of sparc_tlu_int.v
+output [3:0]	tlu_ifu_pstate_pef;	// From tdp of tlu_tdp.v
+output			tlu_ifu_resumint_i2;	// From tlu_int of sparc_tlu_int.v
+output			tlu_ifu_rstint_i2;	// From tlu_int of sparc_tlu_int.v
+output [3:0]	tlu_ifu_rstthr_i2;	// From tlu_int of sparc_tlu_int.v
+output [1:0]	tlu_ifu_trap_tid_w1;	// From tcl of tlu_tcl.v
+output			tlu_ifu_trapnpc_vld_w1;	// From tdp of tlu_tdp.v 
+output [48:0]	tlu_ifu_trapnpc_w2;	// From tdp of tlu_tdp.v
+output [48:0]	tlu_ifu_trappc_w2;	// From tdp of tlu_tdp.v
+// output [47:0]	tlu_ifu_trapnpc_w2;	// From tdp of tlu_tdp.v
+// output [47:0]	tlu_ifu_trappc_w2;	// From tdp of tlu_tdp.v
+output			tlu_ifu_trappc_vld_w1;	// From tcl of tlu_tcl.v
+output			tlu_itlb_data_rd_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_dmp_actxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_dmp_all_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_dmp_nctxt_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_dmp_vld_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_invalidate_all_g;// From mmu_ctl of tlu_mmu_ctl.v
+output [5:0]	tlu_itlb_rw_index_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_rw_index_vld_g;// From mmu_ctl of tlu_mmu_ctl.v
+output			tlu_itlb_tag_rd_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output [42:0]	tlu_itlb_tte_data_w2;	// From mmu_dp of tlu_mmu_dp.v
+output [58:0]	tlu_itlb_tte_tag_w2;	// From mmu_dp of tlu_mmu_dp.v
+output			tlu_itlb_wr_vld_g;	// From mmu_ctl of tlu_mmu_ctl.v
+output [7:0]	tlu_lsu_asi_m;		// From tcl of tlu_tcl.v
+output			tlu_lsu_asi_update_m;	// From tcl of tlu_tcl.v
+// replaced by shadow scan signals
+output [62:0]	tlu_sscan_test_data;// From tdp of tlu_tdp.v
+// output [47:0]	tlu_sscan_pc;// From tdp of tlu_tdp.v
+// output [63:0]	tlu_lsu_ldxa_data_w2;	// From mmu_dp of tlu_mmu_dp.v
+output [1:0]	tlu_lsu_ldxa_tid_w2;	// From mmu_ctl of tlu_mmu_ctl.v
+output [25:0]	tlu_lsu_pcxpkt;		// From tlu_int of sparc_tlu_int.v
+output [3:0]	tlu_lsu_pstate_am;	// From tcl of tlu_tcl.v
+output [3:0]	tlu_lsu_pstate_cle;	// From tdp of tlu_tdp.v
+output [3:0]	tlu_lsu_pstate_priv;	// From tdp of tlu_tdp.v
+output [3:0]	tlu_lsu_redmode;	// From tcl of tlu_tcl.v
+// output [3:0]	tlu_lsu_redmode_rst;	// From tcl of tlu_tcl.v
+output [3:0]	tlu_lsu_redmode_rst_d1;	// From tcl of tlu_tcl.v
+// output [`TLU_THRD_NUM-1:0] tlu_lsu_async_ack_w2;	// From tcl of tlu_tcl.v
+output			tlu_lsu_stxa_ack;	// From mmu_ctl of tlu_mmu_ctl.v
+output [1:0]	tlu_lsu_stxa_ack_tid;	// From mmu_ctl of tlu_mmu_ctl.v
+output [1:0]	tlu_lsu_tid_m;		// From tcl of tlu_tcl.v
+output [`TLU_THRD_NUM-1:0] tlu_lsu_tl_zero;	// From tcl of tlu_tcl.v
+output [`TLU_THRD_NUM-1:0] tlu_sftint_vld; // From tcl of tlu_tcl.v
+output [`TLU_THRD_NUM-1:0] tlu_hintp_vld;  // From tcl of tlu_tcl.v
+output [`TLU_THRD_NUM-1:0] tlu_rerr_vld;  // From tcl of tlu_tcl.v
+// End of automatics
+   // Outputs
+// End of automatics
+// output tlu_ifu_flush_pipe_w;	// From tcl of tlu_tcl.v
+output tlu_early_flush_pipe_w;	// From tcl of tlu_tcl.v
+output tlu_early_flush_pipe2_w;	// From tcl of tlu_tcl.v
+output tlu_exu_early_flush_pipe_w;	// From tcl of tlu_tcl.v
+output tlu_lsu_ldxa_async_data_vld ; // tlu_lsu_ldxa_data_vld is for async op.
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_priv;
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_enb;
+// added for hpstate.ibe ECO
+output [`TLU_THRD_NUM-1:0] tlu_hpstate_ibe;
+output tlu_exu_priv_trap_m; // local traps send to exu 
+output tlu_lsu_priv_trap_m; // local traps send to exu 
+// output tlu_lsu_priv_trap_w; // local traps send to exu 
+output tlu_exu_pic_onebelow_m; // local traps send to exu 
+output tlu_exu_pic_twobelow_m; // local traps send to exu 
+// 
+// added for MMU performance enhancement
+output		lsu_exu_ldxa_m ;
+output	[63:0]	lsu_exu_ldxa_data_g ;
+// Added to shift dsfsr logic from tlu to lsu.
+output	[3:0]	tlu_dsfsr_flt_vld;	// From mmu_dp of tlu_mmu_dp.v
+
+ 
+//
+// added to abide to the Niagara reset methodology 
+wire            tlu_rst;   // local active high reset - from tlu_tcl                 
+wire            rclk;      // temprary clock name 
+wire [63:0]		tlu_pib_rsr_data_e;	// From tdp of tlu_tdp.v
+// wire            tlu_rst_l; // local active high reset - from tlu_tcl                 
+wire            int_rst_l; // local active high reset - from tlu_tcl                 
+// wire            pib_rst_l; // local active high reset - from tlu_tcl                 
+wire [1:0]		tlu_incr_tick;		// From tcl of tlu_tcl.v
+wire [1:0]		tlu_tckctr_in;		// From tcl of tlu_tcl.v
+// wire [60:0]		tlu_tick_incr_dout;	// To tdp of tlu_tdp.v
+wire [61:0]		tlu_incr64_dout;	// To tdp of tlu_tdp.v
+wire [61:0]		tlu_tick_incr_din;	// From tdp of tlu_tdp.v
+wire			tlu_tick_ctl_din;		// To tlu_int of sparc_tlu_int.v
+// modified for bug 3017
+wire [48:0]		tlu_restore_pc_w1;
+wire [48:0]		tlu_restore_npc_w1;
+wire [48:0]		tlu_pc_new_w;
+wire [48:0]		tlu_npc_new_w;
+wire [33:0]		tlu_partial_trap_pc_w1;	
+wire [1:0]		tlu_int_tid_m;		// To tlu_int of sparc_tlu_int.v
+wire [3:0]		tlu_sftint_vld;		// From tcl of tlu_tcl.v
+wire			tlu_asi_write_g;	// From hyperv of tlu_hyperv.v
+wire			tlu_tte_real_g ;	// tte is real
+wire [`TLU_THRD_NUM-1:0] tlu_hpstate_tlz;
+wire [`TLU_ASI_STATE_WIDTH-1:0] tlu_asi_state_e;
+// modified due to memory macro swap
+// 
+// wire [`TSA_MEM_WIDTH-1:0]		tsa_dout;
+wire [`TSA_MEM_WIDTH-1:0] tsa0_dout;
+wire [`TSA_MEM_WIDTH-1:0] tsa1_dout;
+   
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+wire [`TLU_ASR_DATA_WIDTH-1:0]		tlu_wsr_data_w;	// From tdp of tlu_tdp.v
+wire			dmmu_any_sfsr_wr;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		dmmu_sfar_wr_en_l;	// From mmu_ctl of tlu_mmu_ctl.v
+// wire [3:0]		dmmu_sfsr_trp_wr;	// From tcl of tlu_tcl.v
+wire [3:0]		dmmu_sfsr_wr_en_l;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			immu_any_sfsr_wr;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		immu_sfsr_trp_wr;	// From tcl of tlu_tcl.v
+wire [3:0]		immu_sfsr_wr_en_l;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [5:0]		int_tlu_rstid_m;	// From tlu_int of sparc_tlu_int.v
+wire			itlb_wr_vld_g;		// From mmu_ctl of tlu_mmu_ctl.v
+wire [7:0]		lsu_tlu_rsr_data_mod_e;	// From tcl of tlu_tcl.v
+wire [19:0]		mra_byte_wen;		// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		mra_rd_ptr;		// From mmu_ctl of tlu_mmu_ctl.v
+wire			mra_rd_vld;		// From mmu_ctl of tlu_mmu_ctl.v
+wire [159:10]		mra_rdata;		// From mra of tlu_mra.v
+wire [155:0]		mra_wdata;		// From mmu_dp of tlu_mmu_dp.v
+wire [3:0]		mra_wr_ptr;		// From mmu_ctl of tlu_mmu_ctl.v
+wire			mra_wr_vld;		// From mmu_ctl of tlu_mmu_ctl.v
+wire [2:0]		tag_access_wdata_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlb_access_rst_l;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlu_addr_msk_g;		// From tcl of tlu_tcl.v
+wire			tlu_admp_key_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlu_clr_sftint_l_g;	// From tcl of tlu_tcl.v
+wire [4:0]		tlu_dmp_key_vld_g;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [2:0]      tlu_true_pc_sel_w;
+wire [48:0]     ifu_npc_w;
+wire [3:0]		tlu_dsfsr_flt_vld;	// From mmu_dp of tlu_mmu_dp.v
+wire [47:13]	tlu_dtag_access_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire [3:0]		tlu_dtsb_size_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire			tlu_dtsb_split_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire [1:0]		tlu_agp_tid_w2;	    // From tcl of tlu_tcl.v
+wire [`TSA_TTYPE_WIDTH-1:0] tlu_final_offset_w1;	// From tcl of tlu_tcl.v
+wire [`TSA_TTYPE_WIDTH-1:0] tlu_final_ttype_w2;	// From tcl of tlu_tcl.v
+wire			tlu_full_flush_pipe_w2;	// From tcl of tlu_tcl.v
+wire            tlu_tcc_inst_w;	// From tcl of tlu_tcl.v
+wire			tlu_local_flush_w;	// From tcl of tlu_tcl.v
+wire [47:0]		tlu_idtsb_8k_ptr;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlu_asi_data_nf_vld_w2;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlu_inst_vld_nq_m;		// From tcl of tlu_tcl.v
+wire			tlu_int_asi_load;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [1:0]		tlu_int_asi_thrid;	// From mmu_ctl of tlu_mmu_ctl.v
+wire			tlu_int_asi_vld;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		tlu_int_pstate_ie;	// From tdp of tlu_tdp.v
+wire [3:0]		tlu_int_redmode;	// From tdp of tlu_tdp.v
+wire [23:0]		tlu_isfsr_din_g;	// From tcl of tlu_tcl.v
+wire [3:0]		tlu_isfsr_flt_vld;	// From mmu_dp of tlu_mmu_dp.v
+//wire [47:13]		tlu_itsb_base_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire [3:0]		tlu_itsb_size_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire			tlu_itsb_split_w2;	// From mmu_dp of tlu_mmu_dp.v
+wire [3:0]		tlu_ldxa_l1mx1_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		tlu_ldxa_l1mx2_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [2:0]		tlu_ldxa_l2mx1_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+// wire			tlu_mmu_sync_data_excp_g;// From mmu_ctl of tlu_mmu_ctl.v
+wire [3:0]		tlu_pil;		// From tcl of tlu_tcl.v
+wire			tlu_tlb_tag_invrt_parity ;  
+wire			tlu_tlb_data_invrt_parity ;  
+wire                  tlu_sun4r_tte_g ;       // sun4r vs. sun4v tte
+//
+// modified for bug 1767
+/*
+wire [1:0]		tlu_pstate0_mmodel;	// From tdp of tlu_tdp.v
+wire [1:0]		tlu_pstate1_mmodel;	// From tdp of tlu_tdp.v
+wire [1:0]		tlu_pstate2_mmodel;	// From tdp of tlu_tdp.v
+wire [1:0]		tlu_pstate3_mmodel;	// From tdp of tlu_tdp.v
+wire [`TLU_THRD_NUM-1:0] tlu_pstate_tle;		// From tdp of tlu_tdp.v
+wire [`TLU_THRD_NUM-1:0] tlu_pstate_cle;		// From tdp of tlu_tdp.v
+*/
+wire [`TLU_THRD_NUM-1:0] tlu_pstate_am;		// From tdp of tlu_tdp.v
+wire [1:0]		tlu_pstate_din_sel0;	// From tcl of tlu_tcl.v
+wire [1:0]		tlu_pstate_din_sel1;	// From tcl of tlu_tcl.v
+wire [1:0]		tlu_pstate_din_sel2;	// From tcl of tlu_tcl.v
+wire [1:0]		tlu_pstate_din_sel3;	// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_pstate_priv;	// From tdp of tlu_tdp.v
+// wire			tlu_retry_inst_m;	// From tcl of tlu_tcl.v
+//
+// modified for hypervisor support and bug 1767
+/*
+wire			tlu_select_alt_global;	// From tcl of tlu_tcl.v
+wire			tlu_select_int_global;	// From tcl of tlu_tcl.v
+wire			tlu_select_mmu_global;	// From tcl of tlu_tcl.v
+wire [1:0]		tlu_select_mmodel;	// From tcl of tlu_tcl.v
+wire			tlu_select_tle;		// From tcl of tlu_tcl.v
+*/
+wire			tlu_select_redmode;	// From tcl of tlu_tcl.v
+// wire			tlu_select_tba_g;	// From tcl of tlu_tcl.v
+wire			tlu_select_tba_w2;	// From tcl of tlu_tcl.v
+wire			tdp_select_tba_w2;	// From tcl of tlu_tcl.v
+// wire			tlu_self_boot_rst_g;	// From tcl of tlu_tcl.v
+// wire			tlu_self_boot_rst_w2;	// From tcl of tlu_tcl.v
+wire			tlu_set_sftint_l_g;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_sftint_en_l_g;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_sftint_mx_sel;	// From tcl of tlu_tcl.v
+wire [3:0]		tlu_sftint_id;		// From tdp of tlu_tdp.v
+// wire [3:0]		tlu_sftint_lvl14_int;	// From tcl of tlu_tcl.v
+wire [3:0]		tlu_sftint_penc_sel;	// From tcl of tlu_tcl.v
+wire [3:0]		tlu_slxa_thrd_sel;	// From mmu_ctl of tlu_mmu_ctl.v
+wire [2:0]		tlu_tag_access_ctxt_sel_m;// From tcl of tlu_tcl.v
+//wire			tlu_tag_access_nctxt_g;	// From mmu_dp of tlu_mmu_dp.v
+wire [`TLU_THRD_NUM-1:0] tlu_tba_en_l;		// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_thrd_rsel_e;	// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_thrd_rsel_g;	// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_thrd_wsel_g;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_thrd_wsel_w2;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_thread_wsel_g;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_thread_inst_vld_g;	// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_thread_inst_vld_w2;	// From tcl of tlu_tcl.v
+wire			tlu_tick_en_l;		// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_tick_int;		// From tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_stick_int;		// From tcl of tlu_tcl.v
+// wire			tlu_tick_match;		// From tdp of tlu_tdp.v
+wire			tlu_tick_npt;		// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_tickcmp_en_l;	// From tcl of tlu_tcl.v
+// wire			tlu_tickcmp_intdis;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_tickcmp_sel;	// From tcl of tlu_tcl.v
+// wire			tlu_tl_gt_0_g;		// From tcl of tlu_tcl.v
+wire			tlu_tl_gt_0_w2;		// From tcl of tlu_tcl.v
+wire [2:0]		tlu_trp_lvl;		// From tcl of tlu_tcl.v
+wire [2:0]		tlu_tte_tag_g;		// From mmu_ctl of tlu_mmu_ctl.v
+wire [2:0]		tlu_tte_wr_pid_g;	// From mmu_ctl of tlu_mmu_ctl.v
+// wire [`TLU_THRD_NUM-1:0] tlu_update_pc_l_m;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_update_pc_l_w;	// From tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_trap_cwp_en;	// From tcl of tlu_tcl.v
+wire tlu_cwp_no_change_m;	// From tcl of tlu_tcl.v
+// modified due to timing
+// wire [3:0]		tlu_update_pstate_l_g;	// From tcl of tlu_tcl.v
+wire [3:0]		tlu_update_pstate_l_w2;	// From tcl of tlu_tcl.v
+wire			tlu_wr_sftint_l_g;	// From tcl of tlu_tcl.v
+// wire			tlu_wsr_inst_g;		// From tcl of tlu_tcl.v
+wire			tlu_wsr_inst_nq_g;		// From tcl of tlu_tcl.v
+// wire			tlu_wr_tsa_inst_g;	// From tcl of tlu_tcl.v
+wire			tlu_wr_tsa_inst_w2;	// From tcl of tlu_tcl.v
+wire			tsa_npc_en;		// From tcl of tlu_tcl.v
+wire			tsa_pc_en;		// From tcl of tlu_tcl.v
+wire [1:0]		tsa_rd_tid;		// From tcl of tlu_tcl.v
+wire [2:0]		tsa_rd_tpl;		// From tcl of tlu_tcl.v
+wire			tsa_rd_vld_e;   // From tcl of tlu_tcl.v
+wire			tsa_rd_en;		// From tcl of tlu_tcl.v
+// wire [`TLU_TSA_WIDTH-1:0]		tsa_rdata;		// From tsa of tlu_tsa.v
+wire			tsa_tstate_en;		// From tcl of tlu_tcl.v
+wire			tsa_htstate_en;		// From tlu_hyperv of tlu_hyperv.v
+wire			tsa_ttype_en;		// From tcl of tlu_tcl.v
+wire [`TLU_TSA_WIDTH-1:0]		tsa_wdata;		// From tdp of tlu_tdp.v
+wire [1:0]		tsa_wr_tid;		// From tcl of tlu_tcl.v
+wire [2:0]		tsa_wr_tpl;		// From tcl of tlu_tcl.v
+// modified due to tsa memory swap
+wire [1:0]		tsa_wr_vld;		// From tcl of tlu_tcl.v
+wire			tlu_htstate_rw_d;  // From tlu_hyperv of tlu_hyperv.v
+wire			tlu_htstate_rw_g;  // From tlu_hyperv of tlu_hyperv.v
+// modified due to rsr mux recode
+// wire			tlu_htba_mx2_sel;  // From tlu_hyperv of tlu_hyperv.v
+wire			tlu_htickcmp_rw_e; // From tlu_hyperv of tlu_hyperv.v
+// End of automatics
+wire [`TLU_ASI_QUE_WIDTH-1:0] tlu_asi_queue_rdata_g;	
+wire tlu_asi_queue_rd_vld_g;	
+wire tlu_ld_data_vld_g;	
+wire tlu_va_ill_g;	
+// wire tlu_va_all_zero_g;	
+//
+// modified for timing fixes
+// wire [3:0]		pib_priv_act_trap ;
+wire [3:0]		pib_priv_act_trap_m ;
+// wire [`QUE_TRAP_SEL_WIDTH-1:0] tlu_que_trap_sel_m;
+wire [5:0] tlu_ctxt_cfg_w2;	// To mmu_ctl of tlu_mmu_ctl.v
+wire [`TLU_THRD_NUM-1:0] pib_picl_wrap;		// To tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] pib_pich_wrap;		// To tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] pich_wrap_flg;		// To tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] pich_onebelow_flg;		// To tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] pich_twobelow_flg;		// To tcl of tlu_tcl.v
+wire tlu_pic_onebelow_e;		// To tcl of tlu_tcl.v
+wire tlu_pic_twobelow_e;		// To tcl of tlu_tcl.v
+wire tlu_pic_wrap_e;		// To tcl of tlu_tcl.v
+//
+// modified for bug 5436: Niagara 2.0
+wire [`TLU_THRD_NUM-1:0] tlu_pcr_ut;		// To tcl of tlu_tcl.v
+wire [`TLU_THRD_NUM-1:0] tlu_pcr_st;		// To tcl of tlu_tcl.v
+// wire tlu_pcr_ut_e;		// To tcl of tlu_tcl.v
+// wire tlu_pcr_st_e;		// To tcl of tlu_tcl.v
+wire tlu_pic_cnt_en_m;		// To tcl of tlu_tcl.v
+// wire [`TLU_THRD_NUM-1:0] pich_threebelow_flg;	// To tcl of tlu_tcl.v
+//
+// added for hypervisor support
+wire [`TSA_GLOBAL_WIDTH-1:0] tlu_dnrtry_global_g;
+// wire tlu_htick_match;
+// wire tlu_stick_match;
+wire tlu_trap_hpstate_enb;
+wire [`TLU_THRD_NUM-1:0] local_hpstate_priv;
+wire [`TLU_THRD_NUM-1:0] tcl_hpstate_priv;
+wire [`TLU_THRD_NUM-1:0] local_hpstate_enb;
+wire [`TLU_THRD_NUM-1:0] tcl_hpstate_enb;
+wire [`TLU_THRD_NUM-1:0] local_pstate_priv;
+wire [`TLU_THRD_NUM-1:0] local_pstate_ie;
+
+wire [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl0;
+wire [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl1;
+wire [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl2;
+wire [`TSA_GLOBAL_WIDTH-1:0] tlu_gl_lvl3;
+// wire [`TLU_THRD_NUM-1:0] tlu_hintp_en_l_g;
+wire [`TLU_THRD_NUM-1:0] tlu_htba_en_l;
+wire [`TLU_THRD_NUM-1:0] tlu_htickcmp_en_l;
+// wire [`TLU_THRD_NUM-1:0] tlu_set_hintp_g;
+wire [`TLU_THRD_NUM-1:0] tlu_set_hintp_sel_g;
+wire [`TLU_THRD_NUM-1:0] tlu_stickcmp_en_l;
+// modified for timing 
+// wire [`TLU_THRD_NUM-1:0] tlu_update_hpstate_l_g;
+wire [`TLU_THRD_NUM-1:0] tlu_update_hpstate_l_w2;
+wire [`TLU_THRD_NUM-1:0] tlu_wr_hintp_g;
+wire [`TLU_THRD_NUM-1:0] tlu_cpu_mondo_cmp; 
+wire [`TLU_THRD_NUM-1:0] tlu_dev_mondo_cmp; 
+wire [`TLU_THRD_NUM-1:0] tlu_resum_err_cmp; 
+wire [`TLU_THRD_NUM-1:0] tlu_hintp;
+wire [1:0] tlu_hpstate_din_sel0;
+wire [1:0] tlu_hpstate_din_sel1;
+wire [1:0] tlu_hpstate_din_sel2;
+wire [1:0] tlu_hpstate_din_sel3;
+wire [4:0] tlu_hyperv_rdpr_sel;
+wire [2:0] tlu_rdpr_mx1_sel;
+wire [2:0] tlu_rdpr_mx2_sel;
+wire [1:0] tlu_rdpr_mx3_sel;
+wire [1:0] tlu_rdpr_mx4_sel;
+wire [2:0] tlu_rdpr_mx5_sel;
+wire [2:0] tlu_rdpr_mx6_sel;
+wire [3:0] tlu_rdpr_mx7_sel;
+// modified for timing
+// wire tlu_ibrkpt_trap_g;
+wire tlu_ibrkpt_trap_w2;
+// wire tlu_select_htba_g;
+// wire tlu_select_htba_w2;
+wire [2:0] tlu_pc_mxsel_w2;
+// wire       tlu_stickcmp_intdis;
+wire       tlu_htickcmp_intdis;
+// wire       tlu_gl_rw_g; 	
+wire       tlu_gl_rw_m; 	
+wire [`TLU_THRD_NUM-1:0] tlu_por_rstint_g; 	
+// modified due to timing
+// wire tlu_thrd0_traps, tlu_thrd1_traps;
+// wire tlu_thrd2_traps, tlu_thrd3_traps;
+wire [`TLU_THRD_NUM-1:0] tlu_thrd_traps_w2; 
+wire tlu_dnrtry0_inst_g, tlu_dnrtry1_inst_g;
+wire tlu_dnrtry2_inst_g, tlu_dnrtry3_inst_g;
+wire tlu_scpd_rd_vld_m; // tlu_scpd_rd_vld_g; 
+wire tlu_scpd_wr_vld_g;
+wire tlu_hscpd_dacc_excpt_m;
+wire tlu_qtail_dacc_excpt_m;
+wire [`SCPD_RW_ADDR_WIDTH-1:0]  tlu_scpd_rd_addr_m;
+wire [`SCPD_RW_ADDR_WIDTH-1:0]  tlu_scpd_wr_addr_g;
+wire [79:0] tlu_scpd_asi_rdata_g;
+//
+// added for the change of hierarchy to promote sparc_tlu_intdp and 
+// sparc_tlu_intctl onto the tlu level
+// wire			inc_ind_asi_inrr;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		tlu_asi_rdata_mxsel_g;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_asi_rd_invr;	// From intctl of sparc_tlu_intctl.v
+// wire [3:0]		inc_ind_asi_thr;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		tlu_local_thrid_g;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_asi_wr_indr;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_asi_wr_inrr;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_indr_grant;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_ld_int_i1;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_rstthr_i1;	// From intctl of sparc_tlu_intctl.v
+wire [3:0]		inc_ind_thr_m;	// From intctl of sparc_tlu_intctl.v
+wire [1:0]		inc_indr_req_thrid;	// From intctl of sparc_tlu_intctl.v
+wire			inc_indr_req_valid;	// From intctl of sparc_tlu_intctl.v
+wire [4:0]		ind_inc_thrid_i1;	// From intdp of sparc_tlu_intdp.v
+wire [1:0]		ind_inc_type_i1;	// From intdp of sparc_tlu_intdp.v
+// wire			indr_inc_rst_pkt;	// From intdp of sparc_tlu_intdp.v
+wire [3:0]		int_pending_i2_l;	// From intdp of sparc_tlu_intdp.v
+// hypervisor lite indicator
+// wire [`TLU_THRD_NUM-1:0] tlu_hyper_lite;
+
+wire [12:0]		tlu_tag_access_ctxt_g ;
+wire			tlu_lng_ltncy_en_l ;
+wire			tlu_tsb_rd_ps0_sel ;
+wire [47:13]		tlu_tsb_base_w2_d1 ;
+
+   // scan chain wires
+   wire         scan1_1;
+   wire         scan1_2;
+   wire         scan1_3;
+   wire         scan0_1;
+   wire         short_scan0_1;
+   wire         short_scan0_2;
+   wire         short_scan0_3;
+   wire         short_scan0_4;
+   wire         short_scan0_5;
+   wire         short_scan0_6;
+//=====================================================================================
+// DUMMY WIRES FOR VLINT. TO BE FILTERED OUT.
+wire [9:0]		dummy_mra_rdata;
+
+//=====================================================================================
+
+
+/*
+sparc_tlu_int  AUTO_TEMPLATE (
+		       .tlu_int_asi_state(lsu_asi_state[`TLU_ASI_STATE_WIDTH-1:0]),	
+		       .lsu_tlu_pmode	(1'b1),
+		       
+		       .int_tlu_longop_done());
+*/
+//
+// modified the hierarchy to bring sparc_tlu_intdp and sparc_tlu_intctl 
+// to the tlu level - eliminating sparc_tlu_int
+/*
+sparc_tlu_int tlu_int (
+		       .tlu_int_asi_state(lsu_asi_state[`TLU_ASI_STATE_WIDTH-1:0]),
+		       .lsu_tlu_pmode	(1'b1),	// enable partition mode
+		       
+		       .int_tlu_longop_done(),  // use to switch in thread
+		       // .tlu_int_asi_store(tlu_int_asi_store),
+		       // Outputs
+		       .int_scpd_asi_data(int_scpd_asi_data[63:0]),
+		       .int_scpd_asi_data_vld(int_scpd_asi_data_vld),
+		       .int_tlu_rstid_m	(int_tlu_rstid_m[5:0]),
+		       .so		(so),
+		       .tlu_ifu_hwint_i3(tlu_ifu_hwint_i3[3:0]),
+		       // .tlu_ifu_int_activate_i3(tlu_ifu_int_activate_i3[3:0]),
+		       .tlu_ifu_nukeint_i2(tlu_ifu_nukeint_i2),
+		       .tlu_ifu_pstate_ie(tlu_ifu_pstate_ie[3:0]),
+		       .tlu_ifu_resumint_i2(tlu_ifu_resumint_i2),
+		       .tlu_ifu_rstint_i2(tlu_ifu_rstint_i2),
+		       .tlu_ifu_rstthr_i2(tlu_ifu_rstthr_i2[3:0]),
+		       .tlu_lsu_pcxpkt	(tlu_lsu_pcxpkt[25:0]),
+		       // Inputs
+		       .lsu_tlu_st_rs3_data_g(lsu_tlu_rs3_data_g[63:0]),
+		       .clk		(rclk),
+		       .const_cpuid	(const_cpuid[3:0]),
+		       .lsu_tlu_cpx_req	(lsu_tlu_cpx_req[3:0]),
+		       .lsu_tlu_cpx_vld	(lsu_tlu_cpx_vld),
+		       .lsu_tlu_intpkt	(lsu_tlu_intpkt[17:0]),
+		       .lsu_tlu_pcxpkt_ack(lsu_tlu_pcxpkt_ack),
+		       .tlu_rst_l	(tlu_rst_l),
+		       .se		(se),
+		       .si		(si),
+		       .tlu_int_asi_load(tlu_int_asi_load),
+		       .tlu_int_asi_thrid(tlu_int_asi_thrid[1:0]),
+		       .tlu_int_asi_vld	(tlu_int_asi_vld),
+		       .tlu_int_pstate_ie(tlu_int_pstate_ie[3:0]),
+		       .tlu_int_redmode	(tlu_int_redmode[3:0]),
+		       .tlu_int_sftint_pend(tlu_int_sftint_pend[3:0]),
+		       .tlu_int_tid_m	(tlu_int_tid_m[1:0]));
+*/
+sparc_tlu_intdp  intdp(
+			  .lsu_ind_intpkt_id(lsu_tlu_intpkt[`INT_VEC_HI:`INT_VEC_LO]),
+			  .lsu_ind_intpkt_type(lsu_tlu_intpkt[`INT_TYPE_HI:`INT_TYPE_LO]),
+			  .lsu_ind_intpkt_thr(lsu_tlu_intpkt[`INT_THR_HI:`INT_THR_LO]),
+			  .so		(scan1_1),
+			  .si		(si1),
+              /*AUTOINST*/
+			  // Outputs
+			  .int_pending_i2_l(int_pending_i2_l[3:0]),
+			  .ind_inc_thrid_i1(ind_inc_thrid_i1[4:0]),
+			  .ind_inc_type_i1(ind_inc_type_i1[1:0]),
+			  .int_tlu_rstid_m(int_tlu_rstid_m[5:0]),
+			  .tlu_lsu_pcxpkt(tlu_lsu_pcxpkt[25:0]),
+			  .tlu_lsu_int_ldxa_data_w2(tlu_lsu_int_ldxa_data_w2[63:0]),
+			  // Inputs
+			  .rclk		(rclk),
+			  .se		(se),
+			  .tlu_rst_l(int_rst_l),
+			  .lsu_tlu_st_rs3_data_g(lsu_tlu_rs3_data_g[63:0]),
+			  .tlu_asi_rdata_mxsel_g(tlu_asi_rdata_mxsel_g[3:0]),
+              .tlu_scpd_asi_rdata_g(tlu_scpd_asi_rdata_g[`TLU_SCPD_DATA_WIDTH-1:0]),
+              .tlu_asi_queue_rdata_g(tlu_asi_queue_rdata_g[`TLU_ASI_QUE_WIDTH-1:0]),
+			  .inc_ind_ld_int_i1(inc_ind_ld_int_i1[3:0]),
+			  .inc_ind_rstthr_i1(inc_ind_rstthr_i1[3:0]),
+			  .inc_ind_asi_thr(tlu_local_thrid_g[`TLU_THRD_NUM-1:0]),
+			  .inc_ind_asi_wr_indr(inc_ind_asi_wr_indr[3:0]),
+			  .inc_ind_indr_grant(inc_ind_indr_grant[3:0]),
+			  .inc_ind_thr_m(inc_ind_thr_m[3:0]),
+			  .inc_ind_asi_wr_inrr(inc_ind_asi_wr_inrr[3:0]),
+			  .inc_ind_asi_rd_invr(inc_ind_asi_rd_invr[3:0]),
+			  .inc_indr_req_valid(inc_indr_req_valid),
+			  .inc_indr_req_thrid(inc_indr_req_thrid[1:0]));
+
+sparc_tlu_intctl  intctl(
+			    .so		(scan0_1),
+			    .si		(si0),
+			    .tlu_int_asi_store(tlu_asi_write_g),
+			    /*AUTOINST*/
+			    // Outputs
+			    .tlu_ifu_hwint_i3(tlu_ifu_hwint_i3[3:0]),
+			    .tlu_ifu_rstthr_i2(tlu_ifu_rstthr_i2[3:0]),
+			    .tlu_ifu_rstint_i2(tlu_ifu_rstint_i2),
+			    .tlu_ifu_nukeint_i2(tlu_ifu_nukeint_i2),
+			    .tlu_ifu_resumint_i2(tlu_ifu_resumint_i2),
+			    .tlu_ifu_pstate_ie(tlu_ifu_pstate_ie[3:0]),
+		        .int_tlu_longop_done(),
+			    .inc_ind_ld_int_i1(inc_ind_ld_int_i1[3:0]),
+			    .inc_ind_rstthr_i1(inc_ind_rstthr_i1[3:0]),
+			    .inc_ind_indr_grant(inc_ind_indr_grant[3:0]),
+			    .inc_ind_thr_m(inc_ind_thr_m[3:0]),
+			    .inc_indr_req_valid(inc_indr_req_valid),
+			    .inc_indr_req_thrid(inc_indr_req_thrid[1:0]),
+			    .tlu_asi_data_nf_vld_w2(tlu_asi_data_nf_vld_w2),
+			    .tlu_lsu_int_ld_ill_va_w2(tlu_lsu_int_ld_ill_va_w2),
+			    .tlu_asi_rdata_mxsel_g(tlu_asi_rdata_mxsel_g[3:0]),
+			    .int_rst_l (int_rst_l),
+			    // Inputs
+			    .rclk	(rclk),
+			    .se		(se),
+			    .sehold	(sehold),
+		        .grst_l	(grst_l),
+		        .arst_l	(arst_l),
+			    .rst_tri_en	(mux_drive_disable),
+			    .const_cpuid(const_cpuid[3:0]),
+			    .lsu_tlu_cpx_vld(lsu_tlu_cpx_vld),
+			    .lsu_tlu_cpx_req(lsu_tlu_cpx_req[3:0]),
+			    .lsu_tlu_pcxpkt_ack(lsu_tlu_pcxpkt_ack),
+			    .ind_inc_thrid_i1(ind_inc_thrid_i1[4:0]),
+			    .ind_inc_type_i1(ind_inc_type_i1[1:0]),
+			    .tlu_int_asi_vld(tlu_int_asi_vld),
+			    .tlu_int_asi_load(tlu_int_asi_load),
+			    .tlu_int_asi_thrid(tlu_int_asi_thrid[1:0]),
+		        .tlu_int_asi_state(lsu_asi_state[7:0]),	
+			    .tlu_int_tid_m(tlu_int_tid_m[1:0]),
+			    .tlu_int_pstate_ie(tlu_int_pstate_ie[3:0]),
+			    .tlu_asi_queue_rd_vld_g(tlu_asi_queue_rd_vld_g),
+			    .tlu_ld_data_vld_g(tlu_ld_data_vld_g),
+			    .tlu_va_ill_g(tlu_va_ill_g),
+			    .int_pending_i2_l(int_pending_i2_l[3:0]));
+
+tlu_misctl misctl (
+         // output
+         .tlu_exu_pic_onebelow_m  (tlu_exu_pic_onebelow_m),
+         .tlu_exu_pic_twobelow_m  (tlu_exu_pic_twobelow_m),
+	     .tlu_exu_cwp_m		  (tlu_exu_cwp_m[`TSA_CWP_WIDTH-1:0]),
+	     .tlu_exu_ccr_m		  (tlu_exu_ccr_m[`TSA_CCR_WIDTH-1:0]),
+	     .tlu_lsu_asi_m		  (tlu_lsu_asi_m[`TLU_ASI_STATE_WIDTH-1:0]),
+         .tlu_cwp_no_change_m (tlu_cwp_no_change_m),
+         .tlu_sscan_misctl_data  (tlu_sscan_test_data[`MISCTL_SSCAN_HI:`MISCTL_SSCAN_LO]),
+         .tlu_ifu_trappc_w2     (tlu_ifu_trappc_w2[48:0]),
+         .tlu_ifu_trapnpc_w2    (tlu_ifu_trapnpc_w2[48:0]), 
+         .tlu_pc_new_w          (tlu_pc_new_w[48:0]), 
+         .tlu_npc_new_w         (tlu_npc_new_w[48:0]), 
+         .so (short_so0),
+         // inputs
+	     .ctu_sscan_tid      (ctu_sscan_tid[`TLU_THRD_NUM-1:0]),
+	     .ifu_tlu_pc_m		 (ifu_tlu_pc_m[48:0]),
+	     // .ifu_tlu_npc_m		 (ifu_tlu_npc_m[48:0]),
+	     .ifu_npc_w		     (ifu_npc_w[48:0]),
+	     .exu_tlu_cwp0		 (exu_tlu_cwp0[`TSA_CWP_WIDTH-1:0]),
+	     .exu_tlu_cwp1		 (exu_tlu_cwp1[`TSA_CWP_WIDTH-1:0]),
+	     .exu_tlu_cwp2		 (exu_tlu_cwp2[`TSA_CWP_WIDTH-1:0]),
+	     .exu_tlu_cwp3		 (exu_tlu_cwp3[`TSA_CWP_WIDTH-1:0]),
+	     .tlu_partial_trap_pc_w1 (tlu_partial_trap_pc_w1[33:0]),
+         .tlu_restore_pc_w1      (tlu_restore_pc_w1[48:0]), 
+         .tlu_restore_npc_w1     (tlu_restore_npc_w1[48:0]), 
+	     .tlu_final_ttype_w2     (tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+	     .tlu_final_offset_w1    (tlu_final_offset_w1[`TSA_TTYPE_WIDTH-1:0]),
+	     .tlu_restore_pc_sel_w1	 (tlu_restore_pc_sel_w1),
+	     // .tlu_retry_inst_m	     (tlu_retry_inst_m),
+	     // .tlu_done_inst_m	     (tlu_done_inst_m),
+	     // .tlu_dnrtry_inst_m_l    (tlu_dnrtry_inst_m_l),
+	     .tlu_true_pc_sel_w   (tlu_true_pc_sel_w[2:0]),
+	     .tsa_wr_tid   (tsa_wr_tid[1:0]),
+	     .tsa1_wr_vld  (tsa_wr_vld[1]),
+	     .tsa_ttype_en (tsa_ttype_en),
+	     .tsa_rd_vld_e (tsa_rd_vld_e),
+	     // .tsa_rd_vld   (tsa_rd_vld),
+	     .tsa0_rdata_cwp	 (tsa0_dout[`TSA0_MEM_CWP_HI:`TSA0_MEM_CWP_LO]),
+	     .tsa0_rdata_pstate	 (tsa0_dout[`TSA0_MEM_PSTATE_HI:`TSA0_MEM_PSTATE_LO]),
+	     .tsa0_rdata_asi	 (tsa0_dout[`TSA0_MEM_ASI_HI:`TSA0_MEM_ASI_LO]),
+	     .tsa0_rdata_ccr	 (tsa0_dout[`TSA0_MEM_CCR_HI:`TSA0_MEM_CCR_LO]),
+	     .tsa0_rdata_gl		 (tsa0_dout[`TSA0_MEM_GL_HI:`TSA0_MEM_GL_LO]),
+	     .tsa0_rdata_pc		 (tsa0_dout[`TSA0_TPC_HI:`TSA0_TPC_LO]),
+	     .tsa1_rdata_ttype	 (tsa1_dout[`TSA1_TTYPE_HI:`TSA1_TTYPE_LO]),
+	     .tsa1_rdata_npc	 (tsa1_dout[`TSA1_TNPC_HI:`TSA1_TNPC_LO]),
+	     .tsa1_rdata_htstate (tsa1_dout[`TSA1_HTSTATE_HI:`TSA1_HTSTATE_LO]),
+	     .tlu_thrd_rsel_e	 (tlu_thrd_rsel_e[`TLU_THRD_NUM-1:0]),
+         // experiement
+         .tlu_pic_onebelow_e (tlu_pic_onebelow_e),
+         .tlu_pic_twobelow_e (tlu_pic_twobelow_e),
+         .tlu_pic_cnt_en_m   (tlu_pic_cnt_en_m),
+         // .pich_onebelow_flg (pich_onebelow_flg[`TLU_THRD_NUM-1:0]),
+         // .pich_twobelow_flg (pich_twobelow_flg[`TLU_THRD_NUM-1:0]),
+         // .pich_threebelow_flg (pich_threebelow_flg[`TLU_THRD_NUM-1:0]),
+	     // .tlu_thread_inst_vld_w2	(tlu_thread_inst_vld_w2[`TLU_THRD_NUM-1:0]),
+         //
+	     .tlu_rst			(tlu_rst),
+         .se (se),
+         .si (short_scan0_6),
+	     .rclk      (rclk));
+
+tlu_tcl tcl (
+         .so (short_so1),
+         .si (short_si1),
+	     .tlu_wsr_data_b63_w	(tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-1]),
+	 .tlu_itag_acc_sel_g (tlu_itag_acc_sel_g),
+         .pib_priv_act_trap_m   (pib_priv_act_trap_m[3:0]), 
+         .spu_tlu_rsrv_illgl_m  (spu_tlu_rsrv_illgl_m), 
+         .tlu_cpu_mondo_cmp (tlu_cpu_mondo_cmp[`TLU_THRD_NUM-1:0]),
+         .tlu_dev_mondo_cmp (tlu_dev_mondo_cmp[`TLU_THRD_NUM-1:0]),
+         .tlu_resum_err_cmp (tlu_resum_err_cmp[`TLU_THRD_NUM-1:0]),
+         .tlu_hintp         (tlu_hintp),
+         .pich_wrap_flg     (pich_wrap_flg[`TLU_THRD_NUM-1:0]), 
+         .pich_onebelow_flg (pich_onebelow_flg[`TLU_THRD_NUM-1:0]),
+         .pich_twobelow_flg (pich_twobelow_flg[`TLU_THRD_NUM-1:0]),
+          // modified for bug 5436: Niagara 2.0
+	     .tlu_pcr_ut	    (tlu_pcr_ut[`TLU_THRD_NUM-1:0]),
+	     .tlu_pcr_st	    (tlu_pcr_st[`TLU_THRD_NUM-1:0]),
+	     // .tlu_pcr_ut_e	    (tlu_pcr_ut_e),
+	     // .tlu_pcr_st_e	    (tlu_pcr_st_e),
+         .tlu_pic_cnt_en_m  (tlu_pic_cnt_en_m),
+         .tlu_pic_wrap_e    (tlu_pic_wrap_e),
+         // .pich_threebelow_flg (pich_threebelow_flg[`TLU_THRD_NUM-1:0]),
+         .pib_picl_wrap (pib_picl_wrap[`TLU_THRD_NUM-1:0]),
+	     .tlu_local_flush_w	(tlu_local_flush_w),
+	     .tlu_restore_pc_sel_w1	(tlu_restore_pc_sel_w1),
+	     .tlu_final_offset_w1 (tlu_final_offset_w1[`TSA_TTYPE_WIDTH-1:0]),
+	     // Outputs
+         .pib_pich_wrap (pib_pich_wrap[`TLU_THRD_NUM-1:0]),
+         .tlu_ibrkpt_trap_w2     (tlu_ibrkpt_trap_w2),
+	     .tlu_early_flush_pipe_w	(tlu_early_flush_pipe_w),
+	     .tlu_early_flush_pipe2_w	(tlu_early_flush_pipe2_w),
+	     .tlu_exu_early_flush_pipe_w	(tlu_exu_early_flush_pipe_w),
+	     .tlu_ifu_trappc_vld_w1	(tlu_ifu_trappc_vld_w1),
+	     .tlu_ifu_trapnpc_vld_w1	(tlu_ifu_trapnpc_vld_w1),
+	     .tlu_ifu_trap_tid_w1	(tlu_ifu_trap_tid_w1[1:0]),
+         .tlu_trap_hpstate_enb  (tlu_trap_hpstate_enb),
+         .tlu_exu_priv_trap_m  (tlu_exu_priv_trap_m),
+         .tlu_lsu_priv_trap_m  (tlu_lsu_priv_trap_m),
+         // .tlu_lsu_priv_trap_w  (tlu_lsu_priv_trap_w),
+         // .tlu_exu_pic_onebelow_m  (tlu_exu_pic_onebelow_m),
+         // .tlu_exu_pic_twobelow_m  (tlu_exu_pic_twobelow_m),
+	     .tsa_wr_tpl		(tsa_wr_tpl[2:0]),
+	     .tsa_rd_tid		(tsa_rd_tid[1:0]),
+	     .tsa_rd_tpl		(tsa_rd_tpl[2:0]),
+	     .tsa_wr_tid		(tsa_wr_tid[1:0]),
+	     .tsa_wr_vld		(tsa_wr_vld[1:0]),
+	     .tsa_rd_vld_e		(tsa_rd_vld_e),
+	     .tsa_rd_en		    (tsa_rd_en),
+	     .tlu_lsu_tl_zero		(tlu_lsu_tl_zero[3:0]),
+	     .tlu_full_flush_pipe_w2 (tlu_full_flush_pipe_w2),
+		 .tlu_exu_agp_tid	(tlu_exu_agp_tid[1:0]),
+	     .tlu_agp_tid_w2	(tlu_agp_tid_w2[1:0]),
+	     .tlu_tcc_inst_w    (tlu_tcc_inst_w),
+	     .tsa_pc_en			(tsa_pc_en),
+	     .tsa_npc_en		(tsa_npc_en),
+	     .tsa_tstate_en		(tsa_tstate_en),
+	     .tsa_ttype_en		(tsa_ttype_en),
+	     .tsa_htstate_en	(tsa_htstate_en),
+	     .tlu_tl_gt_0_w2		(tlu_tl_gt_0_w2),
+	     // .tlu_retry_inst_m		(tlu_retry_inst_m),
+	     // .tlu_done_inst_m		(tlu_done_inst_m),
+	     // .tlu_dnrtry_inst_m_l		(tlu_dnrtry_inst_m_l),
+	     .tlu_true_pc_sel_w   (tlu_true_pc_sel_w[2:0]),
+	     .tlu_tick_en_l		(tlu_tick_en_l),
+	     .tlu_tickcmp_en_l		(tlu_tickcmp_en_l[`TLU_THRD_NUM-1:0]),
+	     .tlu_tba_en_l		(tlu_tba_en_l[`TLU_THRD_NUM-1:0]),
+	     .tlu_thrd_wsel_w2		(tlu_thrd_wsel_w2[`TLU_THRD_NUM-1:0]),
+	     .tlu_thread_wsel_g		(tlu_thread_wsel_g[`TLU_THRD_NUM-1:0]),
+	     .tlu_final_ttype_w2		(tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+	     .tlu_thread_inst_vld_g	(tlu_thread_inst_vld_g[`TLU_THRD_NUM-1:0]),
+	     // .tlu_thread_inst_vld_w2	(tlu_thread_inst_vld_w2[`TLU_THRD_NUM-1:0]),
+	     .tlu_update_pc_l_w		(tlu_update_pc_l_w[`TLU_THRD_NUM-1:0]),
+	     .tlu_select_redmode	(tlu_select_redmode),
+	     .tlu_pstate_din_sel0	(tlu_pstate_din_sel0[1:0]),
+	     .tlu_pstate_din_sel1	(tlu_pstate_din_sel1[1:0]),
+	     .tlu_pstate_din_sel2	(tlu_pstate_din_sel2[1:0]),
+	     .tlu_pstate_din_sel3	(tlu_pstate_din_sel3[1:0]),
+	     .tlu_update_pstate_l_w2	(tlu_update_pstate_l_w2[3:0]),
+	     .tlu_trp_lvl		(tlu_trp_lvl[2:0]),
+	     .tlu_pil			(tlu_pil[3:0]),
+	     .tlu_wsr_inst_nq_g		(tlu_wsr_inst_nq_g),
+	     .tlu_wr_tsa_inst_w2		(tlu_wr_tsa_inst_w2),
+	     .tlu_exu_cwp_retry_m	(tlu_exu_cwp_retry_m),
+	     .tlu_exu_cwpccr_update_m	(tlu_exu_cwpccr_update_m),
+	     .tlu_lsu_asi_update_m	(tlu_lsu_asi_update_m),
+	     .tlu_lsu_tid_m		(tlu_lsu_tid_m[1:0]),
+	     .tlu_select_tba_w2		(tlu_select_tba_w2),
+	     .tdp_select_tba_w2		(tdp_select_tba_w2),
+	     .tlu_set_sftint_l_g	(tlu_set_sftint_l_g),
+	     .tlu_clr_sftint_l_g	(tlu_clr_sftint_l_g),
+	     .tlu_wr_sftint_l_g		(tlu_wr_sftint_l_g),
+	     .tlu_sftint_en_l_g		(tlu_sftint_en_l_g[`TLU_THRD_NUM-1:0]),
+	     .tlu_sftint_mx_sel		(tlu_sftint_mx_sel[`TLU_THRD_NUM-1:0]),
+	     .tlu_sftint_penc_sel	(tlu_sftint_penc_sel[`TLU_THRD_NUM-1:0]),
+	     .tlu_sftint_vld		(tlu_sftint_vld[`TLU_THRD_NUM-1:0]),
+	     .tlu_hintp_vld		    (tlu_hintp_vld[`TLU_THRD_NUM-1:0]),
+	     .tlu_rerr_vld		    (tlu_rerr_vld[`TLU_THRD_NUM-1:0]),
+	     .tlu_int_tid_m		    (tlu_int_tid_m[1:0]),
+	     .tlu_incr_tick		    (tlu_incr_tick[1:0]),
+	     .tlu_tckctr_in		    (tlu_tckctr_in[1:0]),
+	     .tlu_tickcmp_sel		(tlu_tickcmp_sel[`TLU_THRD_NUM-1:0]),
+	     .immu_sfsr_trp_wr		(immu_sfsr_trp_wr[3:0]),
+	     .tlu_isfsr_din_g		(tlu_isfsr_din_g[23:0]),
+	     .tlu_tick_npt		    (tlu_tick_npt),
+	     .tlu_thrd_rsel_e		(tlu_thrd_rsel_e[3:0]),
+	     .tlu_inst_vld_nq_m		(tlu_inst_vld_nq_m),
+	     .tlu_lsu_pstate_am		(tlu_lsu_pstate_am[3:0]),
+         .tlu_hyperv_rdpr_sel   (tlu_hyperv_rdpr_sel[4:0]),
+	     .tlu_rdpr_mx1_sel		(tlu_rdpr_mx1_sel[2:0]),
+	     .tlu_rdpr_mx2_sel		(tlu_rdpr_mx2_sel[2:0]),
+	     .tlu_rdpr_mx3_sel		(tlu_rdpr_mx3_sel[1:0]),
+	     .tlu_rdpr_mx4_sel		(tlu_rdpr_mx4_sel[1:0]),
+	     .tlu_rdpr_mx5_sel		(tlu_rdpr_mx5_sel[2:0]),
+	     .tlu_rdpr_mx6_sel		(tlu_rdpr_mx6_sel[2:0]),
+	     .tlu_rdpr_mx7_sel		(tlu_rdpr_mx7_sel[3:0]),
+	     .tlu_lsu_redmode_rst_d1 (tlu_lsu_redmode_rst_d1[3:0]),
+	     .lsu_tlu_rsr_data_mod_e	(lsu_tlu_rsr_data_mod_e[7:0]),
+	     .tlu_addr_msk_g		(tlu_addr_msk_g),
+         .tlu_stickcmp_en_l (tlu_stickcmp_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_htickcmp_en_l (tlu_htickcmp_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_htstate_rw_d (tlu_htstate_rw_d),
+         .tlu_htstate_rw_g (tlu_htstate_rw_g),
+         .tlu_dnrtry0_inst_g (tlu_dnrtry0_inst_g),
+         .tlu_dnrtry1_inst_g (tlu_dnrtry1_inst_g),
+         .tlu_dnrtry2_inst_g (tlu_dnrtry2_inst_g),
+         .tlu_dnrtry3_inst_g (tlu_dnrtry3_inst_g),
+         .tlu_thrd_traps_w2 (tlu_thrd_traps_w2[`TLU_THRD_NUM-1:0]),
+         .tlu_tick_ctl_din (tlu_tick_ctl_din),
+         .tlu_por_rstint_g (tlu_por_rstint_g[`TLU_THRD_NUM-1:0]),
+         .tlu_pc_mxsel_w2 (tlu_pc_mxsel_w2),
+	     .ifu_npc_w		    (ifu_npc_w[48:0]),
+	     .tlu_rst			(tlu_rst),
+	     // .tlu_rst_l			(tlu_rst_l),
+         .tlu_sscan_tcl_data (tlu_sscan_test_data[`TCL_SSCAN_HI:`TCL_SSCAN_LO]),
+	     // Inputs
+	     .ifu_tlu_npc_m		    (ifu_tlu_npc_m[48:0]),
+	     .ifu_tlu_pc_oor_e		(ifu_tlu_pc_oor_e),
+         .lsu_tlu_early_flush_w (lsu_tlu_early_flush_w),
+         .ifu_tlu_flush_fd_w    (ifu_tlu_flush_fd2_w),
+	     .ifu_tlu_sraddr_d		(ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0]),
+	     .ifu_tlu_rsr_inst_d	(ifu_tlu_rsr_inst_d),
+	     .lsu_tlu_wsr_inst_e	(lsu_tlu_wsr_inst_e),
+	     .tlu_wsr_data_w	    (tlu_wsr_data_w[3:0]),
+	     .lsu_tlu_ttype_m2		(lsu_tlu_ttype_m2[8:0]),
+	     .lsu_tlu_ttype_vld_m2	(lsu_tlu_ttype_vld_m2),
+	     .ifu_tlu_done_inst_d	(ifu_tlu_done_inst_d),
+	     .ifu_tlu_retry_inst_d	(ifu_tlu_retry_inst_d),
+	     .ifu_tlu_ttype_m		(ifu_tlu_ttype_m[8:0]),
+	     .ifu_tlu_ttype_vld_m	(ifu_tlu_ttype_vld_m),
+	     .ifu_tlu_trap_m	    (ifu_tlu_trap_m),
+	     .exu_tlu_ttype_m		(exu_tlu_ttype_m[8:0]),
+	     .exu_tlu_ttype_vld_m	(exu_tlu_ttype_vld_m),
+	     .exu_tlu_ue_trap_m	(exu_tlu_ue_trap_m),
+	     .exu_tlu_spill		(exu_tlu_spill),
+	     .exu_tlu_spill_tid		(exu_tlu_spill_tid[1:0]),
+         .exu_tlu_spill_other   (exu_tlu_spill_other),  
+         .exu_tlu_spill_wtype   (exu_tlu_spill_wtype),  
+	     .exu_tlu_va_oor_m		(exu_tlu_va_oor_m),
+	     .exu_tlu_va_oor_jl_ret_m	(exu_tlu_va_oor_jl_ret_m),
+         .tlu_cwp_no_change_m   (tlu_cwp_no_change_m),
+         .tlu_trap_cwp_en   (tlu_trap_cwp_en[`TLU_THRD_NUM-1:0]),
+	     .ifu_tlu_sir_inst_m	(ifu_tlu_sir_inst_m),
+	     .ifu_tlu_inst_vld_m	(ifu_tlu_inst_vld_m),
+	     .ifu_tlu_thrid_d		(ifu_tlu_thrid_d[1:0]),
+         .lsu_tlu_async_ttype_vld_g (lsu_tlu_async_ttype_vld_g),
+         .lsu_tlu_defr_trp_taken_g (lsu_tlu_defr_trp_taken_g),
+         .lsu_tlu_async_ttype_g (lsu_tlu_async_ttype_g),
+         .lsu_tlu_async_tid_g (lsu_tlu_async_tid_g[1:0]),
+	     .ifu_tlu_immu_miss_m	(ifu_tlu_immu_miss_m),
+	     .exu_tlu_cwp_cmplt		(exu_tlu_cwp_cmplt),
+	     .exu_tlu_cwp_retry		(exu_tlu_cwp_retry),
+	     .exu_tlu_cwp_cmplt_tid	(exu_tlu_cwp_cmplt_tid[1:0]),
+	     .ifu_tlu_rstint_m		(ifu_tlu_rstint_m),
+	     .ifu_tlu_hwint_m		(ifu_tlu_hwint_m),
+	     .ifu_tlu_swint_m		(ifu_tlu_swint_m),
+	     .int_tlu_rstid_m		(int_tlu_rstid_m[5:0]),
+	     .tlu_int_pstate_ie		(local_pstate_ie[3:0]),
+	     .tlu_int_redmode		(tlu_int_redmode[3:0]),
+	     .tlu_sftint_id		(tlu_sftint_id[3:0]),
+	     .lsu_tlu_misalign_addr_ldst_atm_m(lsu_tlu_misalign_addr_ldst_atm_m),
+	     .exu_tlu_misalign_addr_jmpl_rtn_m(exu_tlu_misalign_addr_jmpl_rtn_m),
+	     .lsu_tlu_priv_action_g	(lsu_tlu_priv_action_g),
+	     .lsu_tlu_wtchpt_trp_g	(lsu_tlu_wtchpt_trp_g),
+	     .ifu_tlu_priv_violtn_m	(ifu_tlu_priv_violtn_m),
+	     .ifu_lsu_memref_d		(ifu_lsu_memref_d),
+	     .tlu_pstate_priv		(local_pstate_priv[`TLU_THRD_NUM-1:0]),
+	     .tlu_pstate_am		(tlu_pstate_am[3:0]),
+	     .tlu_isfsr_flt_vld		(tlu_isfsr_flt_vld[3:0]),
+	     .ffu_tlu_trap_ieee754	(ffu_tlu_trap_ieee754),
+	     .ffu_tlu_trap_other	(ffu_tlu_trap_other),
+	     .ffu_tlu_trap_ue	    (ffu_tlu_trap_ue),
+	     .ffu_ifu_tid_w2		(ffu_ifu_tid_w2[1:0]),
+	     .ffu_tlu_ill_inst_m		(ffu_tlu_ill_inst_m), // new trap from ffu
+	     .lsu_tlu_rsr_data_e	(lsu_tlu_rsr_data_e[7:0]),
+	     .lsu_tlu_squash_va_oor_m	(lsu_tlu_squash_va_oor_m),
+	     .tlu_hpstate_priv (tcl_hpstate_priv[`TLU_THRD_NUM-1:0]),
+         .tlu_hscpd_dacc_excpt_m(tlu_hscpd_dacc_excpt_m),
+         .tlu_qtail_dacc_excpt_m(tlu_qtail_dacc_excpt_m),
+         .tlu_htickcmp_rw_e (tlu_htickcmp_rw_e),
+         // .tlu_gl_rw_g (tlu_gl_rw_g),
+         .tlu_gl_rw_m (tlu_gl_rw_m),
+         .tlu_hpstate_enb  (tcl_hpstate_enb[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_tlz  (tlu_hpstate_tlz[`TLU_THRD_NUM-1:0]),
+	     .ctu_sscan_tid    (ctu_sscan_tid[`TLU_THRD_NUM-1:0]),
+         .se         (se),
+	     .rclk		 (rclk),
+	     .grst_l	 (grst_l),
+	     .arst_l	 (arst_l),
+	     .rst_tri_en (mux_drive_disable));
+
+tlu_tdp tdp (
+	     .so			(scan1_2),
+	     .si			(scan1_1),
+	     .tsa_rdata	    ({tsa1_dout[`TSA1_HTSTATE_HI:`TSA1_HTSTATE_LO],
+                         // tsa0_dout[`TSA0_TPC_HI:`TSA0_TPC_LO],
+                         // tsa1_dout[`TSA1_TNPC_HI:`TSA1_TNPC_LO],
+                         tsa0_dout[`TSA0_TPC_HI-1:`TSA0_TPC_LO],
+                         tsa1_dout[`TSA1_TNPC_HI-1:`TSA1_TNPC_LO],
+                         tsa0_dout[`TSA0_TSTATE_HI:`TSA0_TSTATE_LO],
+                         tsa1_dout[`TSA1_TTYPE_HI:`TSA1_TTYPE_LO]}),
+	     .lsu_tlu_rsr_data_e (lsu_tlu_rsr_data_mod_e[7:0]),
+         .ifu_lsu_imm_asi_d           (ifu_lsu_imm_asi_d[7:0]),
+         .ifu_lsu_imm_asi_vld_d       (ifu_lsu_imm_asi_vld_d),
+	     .tlu_lsu_redmode		(tlu_lsu_redmode[3:0]),
+         .tlu_exu_rsr_data_m    (tlu_exu_rsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]),  
+         /*AUTOINST*/
+	     // Outputs
+         // modified for bug 3017
+         .tlu_restore_pc_w1 (tlu_restore_pc_w1[48:0]), 
+         .tlu_restore_npc_w1 (tlu_restore_npc_w1[48:0]), 
+	     .tlu_partial_trap_pc_w1 (tlu_partial_trap_pc_w1[33:0]),
+	     .tlu_pib_rsr_data_e	(tlu_pib_rsr_data_e[63:0]),
+         .tlu_asi_state_e       (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]),
+	     .tsa_wdata			    (tsa_wdata[`TLU_TSA_WIDTH-1:0]),
+	     .tlu_int_pstate_ie		(tlu_int_pstate_ie[3:0]),
+	     .local_pstate_ie	    (local_pstate_ie[3:0]),
+	     .tlu_ifu_pstate_pef	(tlu_ifu_pstate_pef[3:0]),
+	     .tlu_lsu_pstate_cle	(tlu_lsu_pstate_cle[3:0]),
+	     .tlu_lsu_pstate_priv	(tlu_lsu_pstate_priv[3:0]),
+	     .tlu_int_redmode		(tlu_int_redmode[3:0]),
+	     .local_pstate_priv		(local_pstate_priv[`TLU_THRD_NUM-1:0]),
+	     .tlu_pstate_am		(tlu_pstate_am[3:0]),
+	     .tlu_sftint_id		(tlu_sftint_id[3:0]),
+	     .tlu_tick_incr_din		(tlu_tick_incr_din[61:0]),
+	     .tlu_sscan_test_data  (tlu_sscan_test_data[`TDP_SSCAN_WIDTH-1:0]),
+         .tlu_dnrtry_global_g  (tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_hpstate_enb  (tlu_hpstate_enb[`TLU_THRD_NUM-1:0]),
+         .local_hpstate_enb  (local_hpstate_enb[`TLU_THRD_NUM-1:0]),
+         .tcl_hpstate_enb  (tcl_hpstate_enb[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_tlz  (tlu_hpstate_tlz[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_priv  (tlu_hpstate_priv[`TLU_THRD_NUM-1:0]),
+         .local_hpstate_priv  (local_hpstate_priv[`TLU_THRD_NUM-1:0]),
+         .tcl_hpstate_priv  (tcl_hpstate_priv[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_ibe  (tlu_hpstate_ibe[`TLU_THRD_NUM-1:0]),
+         .tlu_hintp        (tlu_hintp),
+	     // Inputs
+         .tlu_ibrkpt_trap_w2 (tlu_ibrkpt_trap_w2),
+         .pib_picl_wrap      (pib_picl_wrap[`TLU_THRD_NUM-1:0]), 
+         .pib_pich_wrap      (pib_pich_wrap[`TLU_THRD_NUM-1:0]), 
+         .tlu_por_rstint_g  (tlu_por_rstint_g[`TLU_THRD_NUM-1:0]),
+	     .rclk			    (rclk),
+	     .tlu_rst			(tlu_rst),
+         .tlu_trap_hpstate_enb  (tlu_trap_hpstate_enb),
+	     .tlu_thrd_wsel_w2		(tlu_thrd_wsel_w2[`TLU_THRD_NUM-1:0]),
+	     .tlu_final_ttype_w2		(tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+	     .tlu_pstate_din_sel0	(tlu_pstate_din_sel0[1:0]),
+	     .tlu_pstate_din_sel1	(tlu_pstate_din_sel1[1:0]),
+	     .tlu_pstate_din_sel2	(tlu_pstate_din_sel2[1:0]),
+	     .tlu_pstate_din_sel3	(tlu_pstate_din_sel3[1:0]),
+	     .tlu_wr_tsa_inst_w2		(tlu_wr_tsa_inst_w2),
+	     .lsu_asi_reg0		(lsu_asi_reg0[7:0]),
+	     .lsu_asi_reg1		(lsu_asi_reg1[7:0]),
+	     .lsu_asi_reg2		(lsu_asi_reg2[7:0]),
+	     .lsu_asi_reg3		(lsu_asi_reg3[7:0]),
+	     .tlu_tickcmp_sel		(tlu_tickcmp_sel[`TLU_THRD_NUM-1:0]),
+	     .exu_tlu_ccr0_w		(exu_tlu_ccr0_w[7:0]),
+	     .exu_tlu_ccr1_w		(exu_tlu_ccr1_w[7:0]),
+	     .exu_tlu_ccr2_w		(exu_tlu_ccr2_w[7:0]),
+	     .exu_tlu_ccr3_w		(exu_tlu_ccr3_w[7:0]),
+	     .exu_tlu_cwp0		(exu_tlu_cwp0[2:0]),
+	     .exu_tlu_cwp1		(exu_tlu_cwp1[2:0]),
+	     .exu_tlu_cwp2		(exu_tlu_cwp2[2:0]),
+	     .exu_tlu_cwp3		(exu_tlu_cwp3[2:0]),
+         .tlu_trap_cwp_en   (tlu_trap_cwp_en[`TLU_THRD_NUM-1:0]),
+         // modified for bug 3017
+	     // .ifu_tlu_pc_m		(ifu_tlu_pc_m[48:0]),
+	     // .ifu_tlu_npc_m		(ifu_tlu_npc_m[48:0]),
+         .tlu_pc_new_w          (tlu_pc_new_w[48:0]), 
+         .tlu_npc_new_w         (tlu_npc_new_w[48:0]), 
+	     .tlu_sftint_en_l_g		(tlu_sftint_en_l_g[`TLU_THRD_NUM-1:0]),
+	     .tlu_sftint_mx_sel		(tlu_sftint_mx_sel[`TLU_THRD_NUM-1:0]),
+	     .tlu_set_sftint_l_g	(tlu_set_sftint_l_g),
+	     .tlu_clr_sftint_l_g	(tlu_clr_sftint_l_g),
+	     .tlu_wr_sftint_l_g		(tlu_wr_sftint_l_g),
+	     .tlu_sftint_penc_sel	(tlu_sftint_penc_sel[3:0]),
+	     .tlu_tba_en_l		(tlu_tba_en_l[3:0]),
+	     .tlu_tick_en_l		(tlu_tick_en_l),
+	     .tlu_tickcmp_en_l		(tlu_tickcmp_en_l[3:0]),
+	     // .tlu_done_inst_m		(tlu_done_inst_m),
+	     // .tlu_dnrtry_inst_m		(tlu_dnrtry_inst_m),
+	     // .tlu_dnrtry_inst_m_l		(tlu_dnrtry_inst_m_l),
+	     .tlu_update_pc_l_w		(tlu_update_pc_l_w[3:0]),
+	     .tlu_tl_gt_0_w2		(tlu_tl_gt_0_w2),
+	     .tlu_select_tba_w2		(tdp_select_tba_w2),
+	     .tlu_select_redmode	(tlu_select_redmode),
+	     .tlu_update_pstate_l_w2	(tlu_update_pstate_l_w2[3:0]),
+	     .tlu_pil			(tlu_pil[3:0]),
+	     .tlu_trp_lvl		(tlu_trp_lvl[2:0]),
+	     .tlu_tick_npt		    (tlu_tick_npt),
+	     .tlu_thrd_rsel_e		(tlu_thrd_rsel_e[3:0]),
+	     .tlu_tick_incr_dout	(tlu_incr64_dout[60:0]),
+	     .tlu_rdpr_mx1_sel		(tlu_rdpr_mx1_sel[2:0]),
+	     .tlu_rdpr_mx2_sel		(tlu_rdpr_mx2_sel[2:0]),
+	     .tlu_rdpr_mx3_sel		(tlu_rdpr_mx3_sel[1:0]),
+	     .tlu_rdpr_mx4_sel		(tlu_rdpr_mx4_sel[1:0]),
+	     .tlu_rdpr_mx5_sel		(tlu_rdpr_mx5_sel[2:0]),
+	     .tlu_rdpr_mx6_sel		(tlu_rdpr_mx6_sel[2:0]),
+	     .tlu_rdpr_mx7_sel		(tlu_rdpr_mx7_sel[3:0]),
+	     .ctu_sscan_tid          (ctu_sscan_tid[`TLU_THRD_NUM-1:0]),
+         .tlu_gl_lvl0  (tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl1  (tlu_gl_lvl1[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl2  (tlu_gl_lvl2[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl3  (tlu_gl_lvl3[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_hpstate_din_sel0  (tlu_hpstate_din_sel0[1:0]),
+         .tlu_hpstate_din_sel1  (tlu_hpstate_din_sel1[1:0]),
+         .tlu_hpstate_din_sel2  (tlu_hpstate_din_sel2[1:0]),
+         .tlu_hpstate_din_sel3  (tlu_hpstate_din_sel3[1:0]),
+         .tlu_htba_en_l  (tlu_htba_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_htickcmp_en_l  (tlu_htickcmp_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_htickcmp_intdis  (tlu_htickcmp_intdis),
+         .tlu_pc_mxsel_w2 (tlu_pc_mxsel_w2),
+         .tlu_set_hintp_sel_g  (tlu_set_hintp_sel_g[`TLU_THRD_NUM-1:0]),
+         .tlu_stickcmp_en_l  (tlu_stickcmp_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_update_hpstate_l_w2  (tlu_update_hpstate_l_w2[`TLU_THRD_NUM-1:0]),
+         .tlu_wr_hintp_g  (tlu_wr_hintp_g[`TLU_THRD_NUM-1:0]),
+         .tlu_wsr_data_w        (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-1:0]), 
+	     .se			(se));
+
+// modified for Niagara SRAMs methodology
+
+bw_r_rf32x80 tsa0 (
+	     // Outputs
+	     .dout 	     (tsa0_dout[`TSA_MEM_WIDTH-1:0]),
+         .so         (short_scan0_1), 
+	     // Inputs
+	     .wr_adr     ({tsa_wr_tid[1:0],tsa_wr_tpl[2:0]}),
+	     .wr_en		 (tsa_wr_vld[0]),
+	     .nib_wr_en	 ({{12{tsa_pc_en}},
+                      { 8{tsa_tstate_en}}}), 
+	     .rd_adr	 ({tsa_rd_tid[1:0],tsa_rd_tpl[2:0]}),
+	     .rd_en		 (tsa_rd_en),
+	     .din		 ({1'b0, tsa_wdata[`TLU_PC_HI:`TLU_PC_LO],
+                       3'b0, tsa_wdata[`TLU_GL_HI:`TLU_CWP_LO]}),
+         .reset_l    (arst_l),
+         .rst_tri_en (mem_write_disable),
+         .sehold     (sehold),
+         .se         (se),
+         .si         (short_si0),
+         .rclk	     (rclk));
+
+bw_r_rf32x80 tsa1 (
+	     // Outputs
+	     .dout 	     (tsa1_dout[`TSA_MEM_WIDTH-1:0]),
+         .so         (short_scan0_2), 
+	     // Inputs
+	     .wr_adr     ({tsa_wr_tid[1:0],tsa_wr_tpl[2:0]}),
+	     .wr_en		 (tsa_wr_vld[1]),
+	     .nib_wr_en	 ({ 4'h0, // unused 
+                      { 1{tsa_htstate_en}},
+                      {12{tsa_npc_en}},
+                      { 3{tsa_ttype_en}}}),
+	     .rd_adr	 ({tsa_rd_tid[1:0],tsa_rd_tpl[2:0]}),
+	     .rd_en		 (tsa_rd_en),
+	     .din		 ({16'h0000, // unused bits
+                       tsa_wdata[`TLU_HTSTATE_HI:`TLU_HTSTATE_LO],
+                       1'b0, tsa_wdata[`TLU_NPC_HI:`TLU_NPC_LO],
+                       3'b0, tsa_wdata[`TLU_TT_HI:`TLU_TT_LO]}),
+         .reset_l    (arst_l),
+         .rst_tri_en (mem_write_disable),
+         .sehold     (sehold),
+         .se         (se),
+         .si         (short_scan0_1),
+         .rclk	     (rclk));
+
+// replaced with softmacro from the library
+tlu_incr64	tick_incr64 (
+                  .out  ({tlu_incr64_dout[61:0], tlu_tckctr_in[1:0]}),
+                  .in   ({tlu_tick_incr_din[61:0], tlu_incr_tick[1:0]})
+);
+/*
+zzinc64 tick_incr64 (
+                  .out  ({tlu_incr64_dout[61:0], tlu_tckctr_in[1:0]}),
+                  .in   ({tlu_tick_incr_din[61:0], tlu_incr_tick[1:0]})
+);
+*/
+
+tlu_mmu_ctl mmu_ctl (
+		                 .so		(so0),
+                     .si(scan0_1),
+		     .lsu_tlu_st_rs3_data_b12t0_g(lsu_tlu_st_rs3_data_g[12:0]),
+		     .lsu_tlu_st_rs3_data_b48_g(lsu_tlu_st_rs3_data_g[48]),
+		     //.lsu_tlu_st_rs3_data_b10t8_g(lsu_tlu_st_rs3_data_g[10:8]),
+	     	     .tlu_sun4r_tte_g	(tlu_sun4r_tte_g),
+		     .ifu_tlu_flush_m	(ifu_tlu_flush_m),
+		     .tlu_mmu_early_flush_pipe_w (tlu_exu_early_flush_pipe_w),
+		     .lsu_mmu_early_flush_w (lsu_tlu_early_flush_w),
+		     .lsu_mmu_flush_pipe_w (lsu_mmu_flush_pipe_w),
+		     .dmmu_sfsr_trp_wr	(lsu_dmmu_sfsr_trp_wr[3:0]),
+		     .rst_tri_en	(mux_drive_disable),
+	     	     .ifu_tlu_priv_violtn_m	(ifu_tlu_priv_violtn_m),
+		     // MMU_ASI_RD_CHANGE
+		     .lsu_exu_ldxa_m	(lsu_exu_ldxa_m),
+	     	     .ifu_lsu_memref_d	(ifu_lsu_memref_d),
+		     .ifu_lsu_imm_asi_d	(ifu_lsu_imm_asi_d[8:0]),
+		     .ifu_lsu_thrid_s	(ifu_lsu_thrid_s[1:0]),
+	     	     .lsu_asi_reg0	(lsu_asi_reg0[7:0]),
+	             .lsu_asi_reg1	(lsu_asi_reg1[7:0]),
+	             .lsu_asi_reg2	(lsu_asi_reg2[7:0]),
+	             .lsu_asi_reg3	(lsu_asi_reg3[7:0]),
+		     .tlu_lng_ltncy_en_l(tlu_lng_ltncy_en_l),
+		     .tlu_tsb_rd_ps0_sel (tlu_tsb_rd_ps0_sel),
+		     .tlu_tsb_base_w2_d1 (tlu_tsb_base_w2_d1[47:13]),
+	     	     .tlu_lsu_pstate_am	(tlu_lsu_pstate_am[3:0]),
+	     	     .exu_tlu_va_oor_m	(exu_tlu_va_oor_m),
+	     	     .exu_tlu_va_oor_jl_ret_m	(exu_tlu_va_oor_jl_ret_m),
+	     	     .tlu_lsu_tl_zero	(tlu_lsu_tl_zero[3:0]),
+         	     .lsu_mmu_defr_trp_taken_g (lsu_mmu_defr_trp_taken_g),
+		     /*AUTOINST*/
+		     // Outputs
+		     .tlu_tlb_access_en_l_d1 (tlu_tlb_access_en_l_d1),
+		     .mra_byte_wen	(mra_byte_wen[19:0]),
+	     	     .tlu_tag_access_ctxt_sel_m	(tlu_tag_access_ctxt_sel_m[2:0]),
+  		     .tlu_tlb_tag_invrt_parity(tlu_tlb_tag_invrt_parity),
+  		     .tlu_tlb_data_invrt_parity(tlu_tlb_data_invrt_parity),
+  		     .lsu_ifu_inj_ack	(lsu_ifu_inj_ack[3:0]),
+		     .dmmu_any_sfsr_wr	(dmmu_any_sfsr_wr),
+		     .dmmu_sfsr_wr_en_l	(dmmu_sfsr_wr_en_l[3:0]),
+		     .dmmu_sfar_wr_en_l	(dmmu_sfar_wr_en_l[3:0]),
+		     .immu_any_sfsr_wr	(immu_any_sfsr_wr),
+		     .immu_sfsr_wr_en_l	(immu_sfsr_wr_en_l[3:0]),
+		     .tlu_tte_tag_g	(tlu_tte_tag_g[2:0]),
+		     .tlu_dtlb_rw_index_vld_g(tlu_dtlb_rw_index_vld_g),
+		     .tlu_dtlb_rw_index_g(tlu_dtlb_rw_index_g[5:0]),
+		     .tlu_dtlb_data_rd_g(tlu_dtlb_data_rd_g),
+		     .tlu_dtlb_tag_rd_g	(tlu_dtlb_tag_rd_g),
+		     .tlu_itlb_rw_index_vld_g(tlu_itlb_rw_index_vld_g),
+		     .tlu_itlb_wr_vld_g	(tlu_itlb_wr_vld_g),
+		     .itlb_wr_vld_g	(itlb_wr_vld_g),
+		     .tlu_itlb_rw_index_g(tlu_itlb_rw_index_g[5:0]),
+		     .tlu_itlb_data_rd_g(tlu_itlb_data_rd_g),
+		     .tlu_itlb_tag_rd_g	(tlu_itlb_tag_rd_g),
+		     .tlu_idtsb_8k_ptr	(tlu_idtsb_8k_ptr[47:0]),
+		     .tlu_dtlb_invalidate_all_g(tlu_dtlb_invalidate_all_g),
+		     .tlu_itlb_invalidate_all_g(tlu_itlb_invalidate_all_g),
+		     .tlu_slxa_thrd_sel	(tlu_slxa_thrd_sel[3:0]),
+		     .tlu_lsu_ldxa_tid_w2(tlu_lsu_ldxa_tid_w2[1:0]),
+		     .tlu_itlb_dmp_vld_g(tlu_itlb_dmp_vld_g),
+		     .tlu_itlb_dmp_all_g(tlu_itlb_dmp_all_g),
+		     .tlu_itlb_dmp_actxt_g(tlu_itlb_dmp_actxt_g),
+		     .tlu_itlb_dmp_nctxt_g(tlu_itlb_dmp_nctxt_g),
+		     .tlu_dtlb_dmp_vld_g(tlu_dtlb_dmp_vld_g),
+		     //.tlu_dtlb_dmp_by_ctxt_g(tlu_dtlb_dmp_by_ctxt_g),
+		     .tlu_dtlb_dmp_all_g(tlu_dtlb_dmp_all_g),
+		     .tlu_dtlb_dmp_pctxt_g(tlu_dtlb_dmp_pctxt_g),
+		     .tlu_dtlb_dmp_sctxt_g(tlu_dtlb_dmp_sctxt_g),
+		     .tlu_dtlb_dmp_nctxt_g(tlu_dtlb_dmp_nctxt_g),
+		     .tlu_dtlb_dmp_actxt_g(tlu_dtlb_dmp_actxt_g),
+		     .tlu_idtlb_dmp_thrid_g(tlu_idtlb_dmp_thrid_g[1:0]),
+		     .tlu_dmp_key_vld_g	(tlu_dmp_key_vld_g[4:0]),
+		     .tlu_int_asi_load	(tlu_int_asi_load),
+		     // .tlu_int_asi_store	(tlu_int_asi_store),
+		     .tlu_int_asi_thrid	(tlu_int_asi_thrid[1:0]),
+		     .tlu_int_asi_vld	(tlu_int_asi_vld),
+		     .tlb_access_rst_l	(tlb_access_rst_l),
+		     .tlu_lsu_stxa_ack	(tlu_lsu_stxa_ack),
+		     .tlu_lsu_stxa_ack_tid(tlu_lsu_stxa_ack_tid[1:0]),
+		     .mra_wr_ptr	(mra_wr_ptr[3:0]),
+		     .mra_rd_ptr	(mra_rd_ptr[3:0]),
+		     .mra_wr_vld	(mra_wr_vld),
+		     .mra_rd_vld	(mra_rd_vld),
+		     .tag_access_wdata_sel(tag_access_wdata_sel[2:0]),
+		     .tlu_admp_key_sel	(tlu_admp_key_sel),
+		     // .tlu_mmu_sync_data_excp_g(tlu_mmu_sync_data_excp_g),
+		     .tlu_tte_wr_pid_g	(tlu_tte_wr_pid_g[2:0]),
+		     .tlu_lsu_ldxa_async_data_vld(tlu_lsu_ldxa_async_data_vld),
+		     .tlu_tte_real_g	(tlu_tte_real_g),
+		     .tlu_ldxa_l1mx1_sel(tlu_ldxa_l1mx1_sel[3:0]),
+		     .tlu_ldxa_l1mx2_sel(tlu_ldxa_l1mx2_sel[3:0]),
+		     .tlu_ldxa_l2mx1_sel(tlu_ldxa_l2mx1_sel[2:0]),
+		     // Inputs
+		     .tlu_itag_acc_sel_g (tlu_itag_acc_sel_g),
+         	     .sehold     	(sehold),
+         	     .spu_tlu_rsrv_illgl_m  (1'b0), 
+		     .ifu_mmu_trap_m	(ifu_mmu_trap_m),
+		     .ffu_tlu_ill_inst_m(ffu_tlu_ill_inst_m),
+	             .ifu_tlu_inst_vld_m (ifu_tlu_inst_vld_m_bf1),
+		     .exu_lsu_priority_trap_m(exu_lsu_priority_trap_m),
+		     .exu_mmu_early_va_e (exu_mmu_early_va_e[7:0]),
+		     .tlu_tag_access_ctxt_g (tlu_tag_access_ctxt_g[12:0]),
+  		     .ifu_lsu_error_inj	(ifu_lsu_error_inj[3:0]), 
+		     .lsu_tlu_nucleus_ctxt_m (lsu_tlu_nucleus_ctxt_m),
+		     .lsu_tlu_tte_pg_sz_g (lsu_tlu_tte_pg_sz_g[2:0]),
+		     .ifu_lsu_ld_inst_e	(ifu_lsu_ld_inst_e),
+		     .ifu_lsu_st_inst_e	(ifu_lsu_st_inst_e),
+		     .lsu_tlu_dmmu_miss_g(lsu_tlu_dmmu_miss_g),
+		     .tlu_dtsb_split_w2	(tlu_dtsb_split_w2),
+		     .tlu_dtsb_size_w2	(tlu_dtsb_size_w2[3:0]),
+		     .tlu_dtag_access_w2(tlu_dtag_access_w2[47:13]),
+		     .tlu_itsb_split_w2	(tlu_itsb_split_w2),
+		     .tlu_itsb_size_w2	(tlu_itsb_size_w2[3:0]),
+		     .tlu_ctxt_cfg_w2	(tlu_ctxt_cfg_w2[5:0]),
+		     //.tlu_tag_access_nctxt_g(tlu_tag_access_nctxt_g),
+		     .lsu_tlu_st_rs3_data_g(lsu_tlu_st_rs3_data_g[62:61]),
+		     .ifu_tlu_immu_miss_m(ifu_tlu_immu_miss_m),
+		     // .ifu_tlu_thrid_e	(ifu_tlu_thrid_e[1:0]),
+		     .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+		     .ifu_tlu_alt_space_d(ifu_tlu_alt_space_d),
+		     .lsu_tlu_dtlb_done	(lsu_tlu_dtlb_done),
+		     .ifu_tlu_itlb_done	(ifu_tlu_itlb_done),
+		     .lsu_tlu_tlb_asi_state_m(lsu_tlu_tlb_asi_state_m[7:0]),
+		     .lsu_tlu_tlb_ldst_va_m(lsu_tlu_tlb_ldst_va_m[10:0]),
+		     .lsu_tlu_tlb_ld_inst_m(lsu_tlu_tlb_ld_inst_m),
+		     .lsu_tlu_tlb_st_inst_m(lsu_tlu_tlb_st_inst_m),
+		     .lsu_tlu_tlb_access_tid_m(lsu_tlu_tlb_access_tid_m[1:0]),
+		     .immu_sfsr_trp_wr	(immu_sfsr_trp_wr[3:0]),
+		     .lsu_tlu_daccess_excptn_g(lsu_tlu_daccess_excptn_g),
+		     .lsu_tlu_daccess_prot_g(lsu_tlu_daccess_prot_g),
+		     .lsu_pid_state0	(lsu_pid_state0[2:0]),
+		     .lsu_pid_state1	(lsu_pid_state1[2:0]),
+		     .lsu_pid_state2	(lsu_pid_state2[2:0]),
+		     .lsu_pid_state3	(lsu_pid_state3[2:0]),
+		     .rclk		(rclk),
+		     .se		(se),
+		     .grst_l		(grst_l),
+		     .arst_l		(arst_l));
+
+/*
+tlu_mmu_dp AUTO_TEMPLATE ();
+*/
+
+
+tlu_mmu_dp mmu_dp (
+           .so (short_scan0_3),
+           .si (short_scan0_2),
+		   .ifu_tlu_pc_m	(ifu_tlu_pc_m[47:13]),
+		   .lsu_exu_ldxa_data_g	(lsu_exu_ldxa_data_g[63:0]),
+		   .tlu_dsfsr_din_g	(lsu_dsfsr_din_g[23:0]),
+		   // MMU_ASI_RD_CHANGE
+		   .tlu_lng_ltncy_en_l(tlu_lng_ltncy_en_l),
+		   .tlu_tsb_rd_ps0_sel (tlu_tsb_rd_ps0_sel),
+		   .tlu_tsb_base_w2_d1 (tlu_tsb_base_w2_d1[47:13]),
+		   /*AUTOINST*/
+		   // Outputs
+		   .tlu_tag_access_ctxt_g (tlu_tag_access_ctxt_g[12:0]),
+		   .tlu_ctxt_cfg_w2	(tlu_ctxt_cfg_w2[5:0]),
+		   .tlu_dtsb_split_w2	(tlu_dtsb_split_w2),
+		   .tlu_dtsb_size_w2	(tlu_dtsb_size_w2[3:0]),
+		   .tlu_dtag_access_w2	(tlu_dtag_access_w2[47:13]),
+		   .tlu_itsb_split_w2	(tlu_itsb_split_w2),
+		   .tlu_itsb_size_w2	(tlu_itsb_size_w2[3:0]),
+		   .tlu_itlb_tte_tag_w2	(tlu_itlb_tte_tag_w2[58:0]),
+		   .tlu_itlb_tte_data_w2(tlu_itlb_tte_data_w2[42:0]),
+		   .tlu_dtlb_tte_tag_w2	(tlu_dtlb_tte_tag_w2[58:0]),
+		   .tlu_dtlb_tte_data_w2(tlu_dtlb_tte_data_w2[42:0]),
+		   // .tlu_lsu_ldxa_data_w2(tlu_lsu_ldxa_data_w2[63:0]),
+		   .tlu_idtlb_dmp_key_g	(tlu_idtlb_dmp_key_g[40:0]),
+		   .tlu_dsfsr_flt_vld	(tlu_dsfsr_flt_vld[3:0]),
+		   .tlu_isfsr_flt_vld	(tlu_isfsr_flt_vld[3:0]),
+		   //.tlu_tag_access_nctxt_g(tlu_tag_access_nctxt_g),
+		   .mra_wdata		(mra_wdata[155:0]),
+		   // Inputs
+		   .tlu_tlb_access_en_l_d1 (tlu_tlb_access_en_l_d1),
+	           .tlu_sun4r_tte_g	(tlu_sun4r_tte_g),
+  		   .tlu_tlb_tag_invrt_parity(tlu_tlb_tag_invrt_parity),
+  		   .tlu_tlb_data_invrt_parity(tlu_tlb_data_invrt_parity),
+		   .tlu_addr_msk_g	(tlu_addr_msk_g),
+		   .dmmu_any_sfsr_wr	(dmmu_any_sfsr_wr),
+		   .dmmu_sfsr_wr_en_l	(dmmu_sfsr_wr_en_l[3:0]),
+		   .immu_any_sfsr_wr	(immu_any_sfsr_wr),
+		   .immu_sfsr_wr_en_l	(immu_sfsr_wr_en_l[3:0]),
+		   .lsu_tlu_dside_ctxt_m(lsu_tlu_dside_ctxt_m[12:0]),
+		   .lsu_tlu_pctxt_m	(lsu_tlu_pctxt_m[12:0]),
+		   .tlu_tag_access_ctxt_sel_m(tlu_tag_access_ctxt_sel_m[2:0]),
+		   .lsu_tlu_st_rs3_data_b63t59_g(lsu_tlu_st_rs3_data_g[63:59]),
+		   .lsu_tlu_st_rs3_data_b47t0_g(lsu_tlu_st_rs3_data_g[47:0]),
+		   .exu_lsu_ldst_va_e	(exu_lsu_ldst_va_e[`ASI_VA_WIDTH-1:0]),
+		   .tlu_idtsb_8k_ptr	(tlu_idtsb_8k_ptr[47:0]),
+		   .lsu_tlu_tlb_dmp_va_m(lsu_tlu_tlb_dmp_va_m[47:13]),
+		   .tlu_slxa_thrd_sel	(tlu_slxa_thrd_sel[3:0]),
+		   .tlu_tte_tag_g	(tlu_tte_tag_g[2:0]),
+		   .tlu_dmp_key_vld_g	(tlu_dmp_key_vld_g[4:0]),
+		   .tlb_access_rst_l	(tlb_access_rst_l),
+		   .tag_access_wdata_sel(tag_access_wdata_sel[2:0]),
+		   .mra_rdata		(mra_rdata[159:10]),
+		   .tlu_admp_key_sel	(tlu_admp_key_sel),
+		   .tlu_isfsr_din_g	(tlu_isfsr_din_g[23:0]),
+		   .tlu_tte_wr_pid_g	(tlu_tte_wr_pid_g[2:0]),
+		   .tlu_tte_real_g	(tlu_tte_real_g),
+		   .tlu_ldxa_l1mx1_sel	(tlu_ldxa_l1mx1_sel[3:0]),
+		   .tlu_ldxa_l1mx2_sel	(tlu_ldxa_l1mx2_sel[3:0]),
+		   .tlu_ldxa_l2mx1_sel	(tlu_ldxa_l2mx1_sel[2:0]),
+		   .rclk		(rclk),
+		   .arst_l		(arst_l),
+		   .grst_l		(grst_l),
+            .se (se),
+		   .dmmu_sfar_wr_en_l	(dmmu_sfar_wr_en_l[3:0]));
+        //   .rst_l		(tlu_rst_l));
+
+tlu_hyperv tlu_hyperv (/*AUTOINST*/
+         .so(scan1_3),
+         .si(scan1_2),
+	     .grst_l	 (grst_l),
+	     .arst_l	 (arst_l),
+	     .rst_tri_en (mux_drive_disable),
+         // output
+         // modified for timing
+         // .tlu_gl_rw_g (tlu_gl_rw_g),
+         .tlu_gl_rw_m (tlu_gl_rw_m),
+         .tlu_gl_lvl0 (tlu_gl_lvl0[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl1 (tlu_gl_lvl1[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl2 (tlu_gl_lvl2[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_gl_lvl3 (tlu_gl_lvl3[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_hpstate_din_sel0 (tlu_hpstate_din_sel0[1:0]),
+         .tlu_hpstate_din_sel1 (tlu_hpstate_din_sel1[1:0]),
+         .tlu_hpstate_din_sel2 (tlu_hpstate_din_sel2[1:0]),
+         .tlu_hpstate_din_sel3 (tlu_hpstate_din_sel3[1:0]),
+         .tlu_htickcmp_rw_e (tlu_htickcmp_rw_e),
+         // .tlu_update_hpstate_l_g (tlu_update_hpstate_l_g[`TLU_THRD_NUM-1:0]),
+         .tlu_update_hpstate_l_w2 (tlu_update_hpstate_l_w2[`TLU_THRD_NUM-1:0]),
+         // .tlu_htickcmp_en_l (tlu_htickcmp_en_l[`TLU_THRD_NUM-1:0]),
+         .tlu_htickcmp_intdis (tlu_htickcmp_intdis),
+         // .tlu_hintp_en_l_g (tlu_hintp_en_l_g[`TLU_THRD_NUM-1:0]),
+         .tlu_wr_hintp_g (tlu_wr_hintp_g[`TLU_THRD_NUM-1:0]),
+         // .tlu_set_hintp_g (tlu_set_hintp_g[`TLU_THRD_NUM-1:0]),
+         .tlu_set_hintp_sel_g (tlu_set_hintp_sel_g[`TLU_THRD_NUM-1:0]),
+         .tlu_htba_en_l (tlu_htba_en_l[`TLU_THRD_NUM-1:0]),
+         // .tlu_hyper_lite (tlu_hyper_lite[`TLU_THRD_NUM-1:0]),
+         .tlu_hscpd_dacc_excpt_m (tlu_hscpd_dacc_excpt_m),
+         .tlu_qtail_dacc_excpt_m (tlu_qtail_dacc_excpt_m),
+         .tlu_scpd_rd_vld_m (tlu_scpd_rd_vld_m),
+         // .tlu_scpd_rd_vld_g (tlu_scpd_rd_vld_g),
+         .tlu_scpd_wr_vld_g (tlu_scpd_wr_vld_g),
+         .tlu_scpd_rd_addr_m (tlu_scpd_rd_addr_m[`SCPD_RW_ADDR_WIDTH-1:0]),
+         .tlu_scpd_wr_addr_g (tlu_scpd_wr_addr_g[`SCPD_RW_ADDR_WIDTH-1:0]),
+         .tlu_asi_queue_rdata_g(tlu_asi_queue_rdata_g[`TLU_ASI_QUE_WIDTH-1:0]),
+         .tlu_ld_data_vld_g(tlu_ld_data_vld_g),
+         .tlu_asi_queue_rd_vld_g(tlu_asi_queue_rd_vld_g),
+		 // .tlu_va_all_zero_g(tlu_va_all_zero_g),
+		 .tlu_va_ill_g(tlu_va_ill_g),
+         .tlu_htstate_rw_d (tlu_htstate_rw_d),
+         .tlu_htstate_rw_g (tlu_htstate_rw_g),
+	     // .tlu_htba_mx2_sel (tlu_htba_mx2_sel),
+         // .tlu_rdpr_mx5_sel (tlu_rdpr_mx5_sel[3:0]),
+         .tlu_hyperv_rdpr_sel (tlu_hyperv_rdpr_sel[4:0]),
+         // .tlu_rdpr_mx5_active (tlu_rdpr_mx5_active),
+	     .tlu_exu_agp (tlu_exu_agp[`TSA_GLOBAL_WIDTH-1:0]),
+	     .tlu_exu_agp_swap (tlu_exu_agp_swap),
+         .tlu_cpu_mondo_cmp (tlu_cpu_mondo_cmp[`TLU_THRD_NUM-1:0]),
+         .tlu_dev_mondo_cmp (tlu_dev_mondo_cmp[`TLU_THRD_NUM-1:0]),
+         .tlu_resum_err_cmp (tlu_resum_err_cmp[`TLU_THRD_NUM-1:0]),
+	     // .tlu_exu_agp_tid	(tlu_exu_agp_tid[1:0]),
+		 .tlu_asi_write_g	(tlu_asi_write_g),
+		.inc_ind_asi_wr_inrr(inc_ind_asi_wr_inrr[`TLU_THRD_NUM-1:0]),
+		.inc_ind_asi_wr_indr(inc_ind_asi_wr_indr[`TLU_THRD_NUM-1:0]),
+		.inc_ind_asi_rd_invr(inc_ind_asi_rd_invr[`TLU_THRD_NUM-1:0]),
+		.tlu_local_thrid_g(tlu_local_thrid_g[`TLU_THRD_NUM-1:0]),
+         // input
+         .tlu_por_rstint_g (tlu_por_rstint_g[`TLU_THRD_NUM-1:0]),
+         // .tlu_wsr_inst_g (tlu_wsr_inst_g),
+         .tlu_wsr_inst_nq_g (tlu_wsr_inst_nq_g),
+         // .ifu_tlu_thrid_e (ifu_tlu_thrid_e[1:0]),
+         .ifu_tlu_thrid_d (ifu_tlu_thrid_d[1:0]),
+         .ifu_tlu_sraddr_d (ifu_tlu_sraddr_d_v2[`TLU_ASR_ADDR_WIDTH-1:0]),
+         .tlu_wsr_data_w_global (tlu_wsr_data_w[`TLU_GLOBAL_WIDTH-1:0]),
+         .tlu_dnrtry_global_g (tlu_dnrtry_global_g[`TSA_GLOBAL_WIDTH-1:0]),
+         .tlu_dnrtry0_inst_g (tlu_dnrtry0_inst_g),
+         .tlu_dnrtry1_inst_g (tlu_dnrtry1_inst_g),
+         .tlu_dnrtry2_inst_g (tlu_dnrtry2_inst_g),
+         .tlu_dnrtry3_inst_g (tlu_dnrtry3_inst_g),
+         // modified due to timing
+         // .tlu_thrd0_traps (tlu_thrd0_traps),
+         // .tlu_thrd1_traps (tlu_thrd1_traps),
+         // .tlu_thrd2_traps (tlu_thrd2_traps),
+         // .tlu_thrd3_traps (tlu_thrd3_traps),
+         // .tlu_select_tba_g (tlu_select_tba_g),
+         .tlu_thrd_traps_w2 (tlu_thrd_traps_w2[`TLU_THRD_NUM-1:0]),
+         .tlu_select_tba_w2 (tlu_select_tba_w2),
+         .tlu_tick_ctl_din (tlu_tick_ctl_din),
+         // .tlu_htick_match (tlu_htick_match),
+         .tlu_tickcmp_sel (tlu_tickcmp_sel[`TLU_THRD_NUM-1:0]),
+         .tlu_pstate_priv (local_pstate_priv[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_priv (local_hpstate_priv[`TLU_THRD_NUM-1:0]),
+         .tlu_hpstate_enb (local_hpstate_enb[`TLU_THRD_NUM-1:0]),
+         .ifu_lsu_alt_space_e (ifu_lsu_alt_space_e),
+         .ifu_lsu_ld_inst_e (ifu_lsu_ld_inst_e),
+         .ifu_lsu_st_inst_e (ifu_lsu_st_inst_e),
+         .tlu_asi_state_e (tlu_asi_state_e[`TLU_ASI_STATE_WIDTH-1:0]),
+         // new signal to replace ifu_tlu_flush_w
+         // .ifu_tlu_flush_w (ifu_tlu_flush_w),
+		 // .tlu_flush_pipe_w (tlu_flush_pipe_w),
+		 // .tlu_flush_all_w (tlu_flush_all_w),
+         .lsu_tlu_early_flush_w (lsu_tlu_early_flush2_w),
+	     .tlu_local_flush_w	(tlu_local_flush_w),
+		 .tlu_lsu_int_ldxa_vld_w2(tlu_lsu_int_ldxa_vld_w2),
+		 .tlu_asi_data_nf_vld_w2 (tlu_asi_data_nf_vld_w2),
+         .ifu_tlu_flush_fd_w      (ifu_tlu_flush_fd_w),
+		 .tlu_inst_vld_m  (tlu_inst_vld_nq_m),
+         // .exu_lsu_ldst_va_e (exu_lsu_ldst_va_e[`TLU_ASI_VA_WIDTH-1:0]),
+         .lsu_tlu_ldst_va_m (lsu_tlu_ldst_va_m[`TLU_ASI_VA_WIDTH-1:0]),
+         .tlu_asi_queue_data_g (lsu_tlu_rs3_data_g[`TLU_ASI_QUE_HI:`TLU_ASI_QUE_LO]),
+	     // .tlu_exu_agp_tid	   (tlu_exu_agp_tid[1:0]),
+	     // .tlu_agp_tid_g	   (tlu_agp_tid_g[1:0]),
+	     .tlu_agp_tid_w2	   (tlu_agp_tid_w2[1:0]),
+         .se(se),
+         // .tlu_rst_l (tlu_rst_l),
+         // .clk (clk));
+         .rclk (rclk));
+
+/*
+bw_r_rf16x160 AUTO_TEMPLATE (
+		.word_wen	(4'b1111),
+	        .rd_clk		(rclk),
+	        .wr_clk		(rclk),
+		.reset_l	(arst_l),
+		.din		({mra_wdata[155:0],4'b0000}),
+		//.si		(),
+		.si_r		(),
+		.si_w		(),
+		.se		(se),
+		.sehold		(sehold),
+		//.so		());
+		.so_r		(),
+		.so_w		());
+*/
+
+bw_r_rf16x160 mra (/*AUTOINST*/
+		// Outputs
+		.dout		({mra_rdata[159:10],dummy_mra_rdata[9:0]}),
+		//.so		(),
+		.so_r		(short_scan0_4),
+		.so_w		(short_scan0_5),
+		// Inputs
+	        .rd_clk		(rclk),
+	        .wr_clk		(rclk),
+		.din		({mra_wdata[155:0],4'b0000}),
+		.rst_tri_en	(mem_write_disable),
+		.word_wen	(4'b1111),
+		.byte_wen	(mra_byte_wen[19:0]),
+	     	.wr_adr 	(mra_wr_ptr[3:0]),
+	     	.rd_adr		(mra_rd_ptr[3:0]),
+	     	.wr_en		(mra_wr_vld),
+	     	.read_en	(mra_rd_vld),
+		.reset_l	(arst_l),
+		.sehold		(sehold),
+		//.si		(),
+		.si_w		(short_scan0_4),
+		.si_r		(short_scan0_3),
+		.se		(se));
+		
+		
+
+//tlu_mra mra (/*AUTOINST*/
+//	     // Outputs
+//	     .mra_rdata			(mra_rdata[155:0]),
+//	     // Inputs
+//	     .mra_wr_ptr		(mra_wr_ptr[3:0]),
+//	     .mra_rd_ptr		(mra_rd_ptr[3:0]),
+//	     .mra_wr_vld		(mra_wr_vld),
+//	     .mra_rd_vld		(mra_rd_vld),
+//	     .mra_field1_en		(mra_field1_en),
+//	     .mra_field2_en		(mra_field2_en),
+//	     .mra_field3_en		(mra_field3_en),
+//	     .mra_field4_en		(mra_field4_en),
+//	     .mra_wdata			(mra_wdata[155:0]),
+//	     .clk			(rclk));
+
+/*
+tlu_pib AUTO_TEMPLATE (
+		.ifu_tlu_imiss_m	    (ifu_tlu_imiss_m),
+		.lsu_tlu_dcache_miss_w2	(lsu_tlu_dcache_miss_w2[3:0]),
+		.lsu_tlu_l2_dmiss	    (lsu_tlu_l2_dmiss[3:0]),
+		.lsu_tlu_stb_full_w2	(lsu_tlu_stb_full_w2[3:0]));
+        .ffu_tlu_fpu_tid        (ffu_tlu_fpu_tid[1:0]), 
+        .ffu_tlu_fpu_cmplt      (ffu_tlu_fpu_cmplt), 
+        // .pib_pic_wrap           (pib_pic_wrap[3:0]), 
+        .pib_picl_wrap           (pib_picl_wrap[`TLU_THRD_NUM-1:0]), 
+        .pich_wrap_flg           (pich_wrap_flg[`TLU_THRD_NUM-1:0]), 
+        .pich_onebelow_flg       (pich_onebelow_flg[`TLU_THRD_NUM-1:0]), 
+        .pich_twobelow_flg       (pich_twobelow_flg[`TLU_THRD_NUM-1:0]), 
+        .tlu_pic_onebelow_e      (tlu_pic_onebelow_e), 
+        .tlu_pic_twobelow_e      (tlu_pic_twobelow_e), 
+        .tlu_pic_wrap_e          (tlu_pic_wrap_e),
+        // modified for bug 5436: Niagara 2.0
+	    .tlu_pcr_ut	    (tlu_pcr_ut[`TLU_THRD_NUM-1:0]),
+	    .tlu_pcr_st	    (tlu_pcr_st[`TLU_THRD_NUM-1:0]),
+        //.tlu_pcr_ut_e            (tlu_pcr_ut_e), 
+        //.tlu_pcr_st_e            (tlu_pcr_st_e), 
+        // .pich_threebelow_flg     (pich_threebelow_flg[`TLU_THRD_NUM-1:0]), 
+        // modified for timing fixes
+        // .pib_priv_act_trap      (pib_priv_act_trap[3:0]), 
+        .pib_priv_act_trap_m    (pib_priv_act_trap_m[3:0]), 
+*/
+tlu_pib tlu_pib (/*AUTOINST*/
+                 .so (so1),
+                 .si (scan1_3),
+	             .grst_l	(grst_l),
+	             .arst_l	(arst_l),
+                 .ifu_tlu_imiss_e       (ifu_tlu_imiss_e), 
+                 .ifu_tlu_immu_miss_m   (ifu_tlu_immu_miss_m), 
+                 .tlu_hpstate_enb  (local_hpstate_enb[`TLU_THRD_NUM-1:0]),
+                 .ifu_tlu_l2imiss       (ifu_tlu_l2imiss[`TLU_THRD_NUM-1:0]), 
+	             .tlu_thread_inst_vld_g	(tlu_thread_inst_vld_g[`TLU_THRD_NUM-1:0]),
+                 .ifu_tlu_thrid_d       (ifu_tlu_thrid_d[1:0]), 
+                 .exu_tlu_wsr_data_m    (exu_tlu_wsr_data_m[`TLU_ASR_DATA_WIDTH-1:0]), 
+                 .tlu_full_flush_pipe_w2 (tlu_full_flush_pipe_w2), 
+                 .tlu_tcc_inst_w         (tlu_tcc_inst_w),
+                 .ifu_tlu_flush_fd_w      (ifu_tlu_flush_fd3_w),
+                 .ifu_tlu_sraddr_d      (ifu_tlu_sraddr_d_v2[`TLU_ASR_ADDR_WIDTH-1:0]), 
+                 .ifu_tlu_rsr_inst_d    (ifu_tlu_rsr_inst_d), 
+                 // .ifu_tlu_wsr_inst_d    (ifu_tlu_wsr_inst_d), 
+                 .lsu_tlu_wsr_inst_e    (lsu_tlu_wsr_inst_e), 
+	             .tlu_wsr_inst_nq_g		(tlu_wsr_inst_nq_g),
+                 .tlu_pib_rsr_data_e    (tlu_pib_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0]), 
+                 .tlu_pstate_priv       (local_pstate_priv[`TLU_THRD_NUM-1:0]),  
+	             .tlu_hpstate_priv      (local_hpstate_priv[`TLU_THRD_NUM-1:0]),
+	             .tlu_thread_wsel_g		(tlu_thread_wsel_g[`TLU_THRD_NUM-1:0]),
+                 .ffu_tlu_fpu_tid       (ffu_tlu_fpu_tid[1:0]), 
+                 .ffu_tlu_fpu_cmplt     (ffu_tlu_fpu_cmplt), 
+                 .lsu_tlu_dmmu_miss_g   (lsu_tlu_dmmu_miss_g), 
+                 .lsu_tlu_dcache_miss_w2(lsu_tlu_dcache_miss_w2[`TLU_THRD_NUM-1:0]),
+                 .lsu_tlu_l2_dmiss       (lsu_tlu_l2_dmiss[`TLU_THRD_NUM-1:0]),
+                 .lsu_tlu_stb_full_w2   (lsu_tlu_stb_full_w2[`TLU_THRD_NUM-1:0]),
+                 .tlu_wsr_data_w        (tlu_wsr_data_w[`TLU_ASR_DATA_WIDTH-1:0]), 
+                 // modified for timing fixes
+        	     // .pib_priv_act_trap     (pib_priv_act_trap[`TLU_THRD_NUM-1:0]), 
+        	     .pib_priv_act_trap_m   (pib_priv_act_trap_m[`TLU_THRD_NUM-1:0]), 
+                 // .pib_pic_wrap        (pib_pic_wrap[`TLU_THRD_NUM-1:0]), 
+                 .pib_picl_wrap          (pib_picl_wrap[`TLU_THRD_NUM-1:0]), 
+                 .pich_wrap_flg          (pich_wrap_flg[`TLU_THRD_NUM-1:0]), 
+                 .pich_onebelow_flg      (pich_onebelow_flg[`TLU_THRD_NUM-1:0]), 
+                 .pich_twobelow_flg      (pich_twobelow_flg[`TLU_THRD_NUM-1:0]), 
+                 .tlu_pic_onebelow_e     (tlu_pic_onebelow_e), 
+                 .tlu_pic_twobelow_e     (tlu_pic_twobelow_e), 
+                 // modified for bug 5436: Niagara 2.0
+	             .tlu_pcr_ut	    (tlu_pcr_ut[`TLU_THRD_NUM-1:0]),
+	             .tlu_pcr_st	    (tlu_pcr_st[`TLU_THRD_NUM-1:0]),
+                 //.tlu_pcr_ut_e            (tlu_pcr_ut_e), 
+                 //.tlu_pcr_st_e            (tlu_pcr_st_e), 
+                 .tlu_pic_wrap_e         (tlu_pic_wrap_e),
+                 // .pich_threebelow_flg      (pich_threebelow_flg[`TLU_THRD_NUM-1:0]), 
+                 // .tlu_que_trap_sel_m  (tlu_que_trap_sel_m[`QUE_TRAP_SEL_WIDTH-1:0]), 
+                 // .tlu_exu_rsr_data_e  (tlu_exu_rsr_data_e[`TLU_ASR_DATA_WIDTH-1:0]),  
+//
+// reset was removed to abide to the Niagara reset methodology 
+//               .reset (reset),			
+                 .se (se),
+                 // .tlu_rst_l  			(tlu_rst_l),
+                 // .rst_tri_en  			(mux_drive_disable),
+                 .rclk					(rclk) );
+                 
+// modified due to Niagara SRAM methodology
+/*
+tlu_scpd tlu_scpd (
+                 .lsu_tlu_st_rs3_data_g (lsu_tlu_rs3_data_g[`TLU_SCPD_DATA_WIDTH-1:0]),
+                 .tlu_scpd_rd_vld_m     (tlu_scpd_rd_vld_m),
+                 .tlu_scpd_wr_vld_g     (tlu_scpd_wr_vld_g),
+                 .tlu_scpd_rd_addr_m    (tlu_scpd_rd_addr_m[`SCPD_RW_ADDR_WIDTH-1:0]),
+                 .tlu_scpd_wr_addr_g    (tlu_scpd_wr_addr_g[`SCPD_RW_ADDR_WIDTH-1:0]),
+//
+// reset was removed to abide to the Niagara reset methodology 
+//               .reset (reset),			
+                 .tlu_rst  				(tlu_rst),
+                 .clk					(rclk),
+                 .tlu_scpd_asi_rdata_g  (tlu_scpd_asi_rdata_g[`TLU_SCPD_DATA_WIDTH-1:0]));
+//
+*/
+bw_r_rf32x80 tlu_scpd (
+                 // output
+                 .dout    (tlu_scpd_asi_rdata_g[79:0]),
+                 .so      (short_scan0_6),
+                 // intput
+                 .din     ({{16{1'b0}}, // unused inputs
+                            lsu_tlu_rs3_data_g[`TLU_SCPD_DATA_WIDTH-1:0]}),
+                 .rd_en   (tlu_scpd_rd_vld_m),
+                 .wr_en   (tlu_scpd_wr_vld_g),
+                 .rd_adr  (tlu_scpd_rd_addr_m[`SCPD_RW_ADDR_WIDTH-1:0]),
+                 .wr_adr  (tlu_scpd_wr_addr_g[`SCPD_RW_ADDR_WIDTH-1:0]),
+                 .nib_wr_en (20'hfffff),
+                 .reset_l (arst_l),
+                 .rst_tri_en (mem_write_disable),
+                 .sehold  (sehold),
+                 .se      (se),
+                 .si      (short_scan0_5),
+                 //.clk	  (clk));
+                 .rclk	  (rclk));
+endmodule
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl")
+// End:
Index: /trunk/T1-CPU/tlu/tlu_mmu_dp.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_mmu_dp.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_mmu_dp.v	(revision 6)
@@ -0,0 +1,1744 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_mmu_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	MMU Datapath - I & D.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+`include "tlu.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module tlu_mmu_dp ( /*AUTOARG*/
+   // Outputs
+   tlu_dtsb_split_w2, tlu_dtsb_size_w2, tlu_dtag_access_w2,
+   tlu_itsb_split_w2, tlu_itsb_size_w2,
+   tlu_itlb_tte_tag_w2, tlu_itlb_tte_data_w2, tlu_dtlb_tte_tag_w2, 
+   tlu_dtlb_tte_data_w2, tlu_idtlb_dmp_key_g, tlu_dsfsr_flt_vld, 
+   tlu_isfsr_flt_vld, mra_wdata, tlu_ctxt_cfg_w2, tlu_tag_access_ctxt_g, 
+   lsu_exu_ldxa_data_g, so, tlu_tsb_base_w2_d1,
+   // Inputs
+   tlu_addr_msk_g, dmmu_any_sfsr_wr, dmmu_sfsr_wr_en_l, dmmu_sfar_wr_en_l, 
+   immu_any_sfsr_wr, immu_sfsr_wr_en_l, tlu_lng_ltncy_en_l,
+   lsu_tlu_dside_ctxt_m, lsu_tlu_pctxt_m, tlu_tag_access_ctxt_sel_m, 
+   lsu_tlu_st_rs3_data_b63t59_g, lsu_tlu_st_rs3_data_b47t0_g,
+   exu_lsu_ldst_va_e, tlu_idtsb_8k_ptr,lsu_tlu_tlb_dmp_va_m, ifu_tlu_pc_m, 
+   tlu_slxa_thrd_sel, 
+   tlu_tte_tag_g, tlu_dmp_key_vld_g, tlb_access_rst_l, 
+   tag_access_wdata_sel, mra_rdata, tlu_admp_key_sel, 
+   tlu_isfsr_din_g, tlu_dsfsr_din_g, 
+   tlu_tte_wr_pid_g, tlu_tte_real_g, tlu_ldxa_l1mx1_sel, 
+   tlu_ldxa_l1mx2_sel, tlu_ldxa_l2mx1_sel, rclk, grst_l, arst_l,
+   tlu_tlb_tag_invrt_parity, tlu_tlb_data_invrt_parity, tlu_sun4r_tte_g,
+   tlu_tsb_rd_ps0_sel, si, se, tlu_tlb_access_en_l_d1
+   ) ;	
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+
+input			tlu_addr_msk_g ;	// address masking active for thread in pipe.
+input			dmmu_any_sfsr_wr ; 
+input	[3:0]		dmmu_sfsr_wr_en_l ; 
+input	[3:0]		dmmu_sfar_wr_en_l ; 
+input                   immu_any_sfsr_wr ;
+input   [3:0]           immu_sfsr_wr_en_l ;
+input  	[12:0]          lsu_tlu_dside_ctxt_m ;
+input  	[12:0]          lsu_tlu_pctxt_m ;
+input	[2:0]		tlu_tag_access_ctxt_sel_m ;	
+// rs3_data split for vlint purposes.
+input	[63:59]		lsu_tlu_st_rs3_data_b63t59_g ;
+input	[47:0]		lsu_tlu_st_rs3_data_b47t0_g ;
+input	[47:0]		exu_lsu_ldst_va_e ;
+input   [47:0]          tlu_idtsb_8k_ptr ;
+input   [47:13]         lsu_tlu_tlb_dmp_va_m ;
+input	[47:13]		ifu_tlu_pc_m ;
+input	[3:0]		tlu_slxa_thrd_sel ;
+//input 	[63:0]        	int_tlu_asi_data;
+//input			int_tlu_asi_data_vld;
+input	[2:0]		tlu_tte_tag_g ;
+input	[4:0]		tlu_dmp_key_vld_g ;
+//input			tlb_access_en_l ;
+input			tlu_tlb_access_en_l_d1 ;
+input			tlb_access_rst_l ;
+
+input	[2:0]		tag_access_wdata_sel ;
+input	[155:6]		mra_rdata ;
+
+input			tlu_admp_key_sel ;
+
+input 	[23:0]   	tlu_isfsr_din_g ;
+input 	[23:0]   	tlu_dsfsr_din_g ;
+
+input   [2:0]          	tlu_tte_wr_pid_g ;      // thread selected pid
+input			tlu_tte_real_g ;	// tte is real		
+
+input	[3:0]		tlu_ldxa_l1mx1_sel ;	// mmu ldxa level1 mx1 sel
+input	[3:0]		tlu_ldxa_l1mx2_sel ;	// mmu ldxa level1 mx2 sel
+input	[2:0]		tlu_ldxa_l2mx1_sel ;	// mmu ldxa level2 mx1 sel
+input			tlu_tlb_tag_invrt_parity ;	// invert parity for tag write
+input			tlu_tlb_data_invrt_parity ;	// invert parity for data write
+input                  	tlu_sun4r_tte_g ;       // sun4r vs. sun4v tte.
+
+input                  tlu_lng_ltncy_en_l ;
+
+input                  tlu_tsb_rd_ps0_sel ;
+
+input                 rclk ;
+input                 arst_l ;
+input                 grst_l ;
+input                 si ;
+input                 se ;
+
+output                        so ;
+
+//output  [47:13]         tlu_dtsb_base_w2 ;	// represents ps0
+output                  tlu_dtsb_split_w2 ;
+output  [3:0]           tlu_dtsb_size_w2 ;
+output  [47:13]        	tlu_dtag_access_w2 ;	// used to represent both i/d.
+//output  [47:13]         tlu_itsb_base_w2 ;	// represents ps1
+output                  tlu_itsb_split_w2 ;
+output  [3:0]           tlu_itsb_size_w2 ;
+//output  [32:13]        	tlu_itag_access_w2 ;	// to be obsoleted.
+output	[58:0]		tlu_itlb_tte_tag_w2 ;
+output	[42:0]		tlu_itlb_tte_data_w2 ;
+output	[58:0]		tlu_dtlb_tte_tag_w2 ;
+output	[42:0]		tlu_dtlb_tte_data_w2 ;
+//output	[63:0]		tlu_lsu_ldxa_data_w2 ;
+output  [5:0]           tlu_ctxt_cfg_w2 ;       // i/d context zero/non-zero config.
+output	[40:0]		tlu_idtlb_dmp_key_g ;
+
+
+output	[3:0]		tlu_dsfsr_flt_vld ;
+output	[3:0]		tlu_isfsr_flt_vld ;
+
+output	[12:0]		tlu_tag_access_ctxt_g ;
+output	[63:0]		lsu_exu_ldxa_data_g ;
+
+output	[47:13]		tlu_tsb_base_w2_d1 ;
+
+///output	tlu_tag_access_nctxt_g ;		// tag-access contains nucleus context.
+
+output	[155:0]		mra_wdata ;
+
+wire    [47:0] 		ldst_va_m,ldst_va_g ;
+// st_rs3_data partitioned for vlint.
+//wire	[63:0]		st_rs3_data_g ;
+wire	[63:59]		st_rs3_data_b63t59_g ;
+wire	[39:8]		st_rs3_data_b39t8_g ;
+wire	[6:1]		st_rs3_data_b6t1_g ;
+wire	[63:0]		tag_target ;
+wire   	[47:13]        	dtag_access_w2 ;
+wire	[23:0]		dsfsr,isfsr ;
+wire	[23:0]		dsfsr0,isfsr0 ;
+wire	[23:0]		dsfsr1,isfsr1 ;
+wire	[23:0]		dsfsr2,isfsr2 ;
+wire	[23:0]		dsfsr3,isfsr3 ;
+wire	[47:0]		dsfar ;
+wire	[47:0]		dsfar0,dsfar1 ;
+wire	[47:0]		dsfar2,dsfar3 ;
+wire	[23:0]		dsfsr_din ;
+wire	[23:0]		isfsr_din ;
+//wire	[39:22] 	tte_relocated_pa ;
+wire	[40:0]		dmp_key ;
+wire	[47:0] 		tag_access_w2 ;
+wire	[41:0]		idtte_data_w2 ;	
+wire			tlb_access0_clk, tlb_access1_clk ;
+wire	[40:0]		idtlb_dmp_key_pend ; 
+wire	[47:0]		tag_access_wdata ;
+wire	[12:0]		tag_access_ctxt_m,tag_access_ctxt_g ;
+// buses split for vlint purposes.
+wire 	[58:55] 	idtte_tag_b58t55_g ;
+wire 	[53:0] 		idtte_tag_b53t0_g ;
+wire 	[58:55] 	idtte_tag_b58t55_w2 ;
+wire 	[53:0] 		idtte_tag_b53t0_w2 ;
+wire 	[41:0] 		idtte_data_g ;
+wire    [47:13]         tlb_dmp_va_g ;
+wire	[47:0]	ldxa_l1mx1_dout_e ;
+wire	[47:0]	ldxa_l1mx1_dout_m ;
+
+ //=========================================================================================
+ //    RESET/CLK
+ //=========================================================================================
+ 
+    wire       clk;
+    assign     clk = rclk;
+ 
+    wire       rst_l;
+    
+    dffrl_async rstff(.din (grst_l),
+                      .q   (rst_l),
+                      .clk (clk), .se(se), .si(), .so(),
+                      .rst_l (arst_l));
+
+
+//=========================================================================================
+//	Staging
+//=========================================================================================
+
+// Stage
+wire [47:13] pc_g ;	
+dff_s  #(35) stg_w (
+        .din    (ifu_tlu_pc_m[47:13]),
+        .q      (pc_g[47:13]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//assign	pc_g[47:13] = ifu_tlu_pc_w[47:13] ;
+
+// Stage va
+dff_s  #(48) stg_m (
+        .din    (exu_lsu_ldst_va_e[47:0]),
+        .q      (ldst_va_m[47:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  #(48) stg_g (
+        .din    (ldst_va_m[47:0]),
+        .q      (ldst_va_g[47:0]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+dff_s  #(35) dstg_g (
+        .din    (lsu_tlu_tlb_dmp_va_m[47:13]),
+        .q      (tlb_dmp_va_g[47:13]),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+//=========================================================================================
+
+wire [4:0] tlu_dmp_key_vld_d1 ;
+wire [47:13] tlb_dmp_va_d1 ;
+dff_s  #(40) dstg_d1 (
+        .din    ({tlb_dmp_va_g[47:13],tlu_dmp_key_vld_g[4:0]}),
+        .q      ({tlb_dmp_va_d1[47:13],tlu_dmp_key_vld_d1[4:0]}),
+        .clk    (clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+
+wire	[2:0]	tlu_tte_tag_d1,tlu_tte_wr_pid_d1 ;
+wire		tlu_tte_real_d1,tlu_tlb_tag_invrt_parity_d1 ;
+wire	[47:13] dmp_va_d1 ;
+wire	[5:0]	dmp_key_vld_d1 ;
+dp_mux2es #(41)	dmp_key_sel (
+     		.in0	({tlb_dmp_va_d1[47:13],tlu_dmp_key_vld_d1[4:0],tlu_tte_real_d1}),
+    		.in1	({tag_access_w2[47:13],1'b1,tlu_tte_tag_d1[2:0],tlu_tte_real_d1,tlu_tte_real_d1}),
+    		//.in1	({tag_access_w2[47:13],1'b1,tlu_tte_tag_d1[2:0],1'b0,tlu_tte_real_d1}), // Bug 3754
+		.sel	(tlu_admp_key_sel),
+	      	.dout	({dmp_va_d1[47:13],dmp_key_vld_d1[5:0]})
+	);
+
+assign  dmp_key[40:0] =
+        {
+        dmp_va_d1[47:28],        // (20b)
+        dmp_key_vld_d1[5],       // (1b)
+        dmp_va_d1[27:22],        // (6b)
+        dmp_key_vld_d1[4],       // (1b)
+        dmp_va_d1[21:16],        // (6b)
+        dmp_key_vld_d1[3],       // (1b)
+        dmp_va_d1[15:13],        // (3b)
+        dmp_key_vld_d1[2],       // (1b)
+        dmp_key_vld_d1[1],       // (1b)
+        dmp_key_vld_d1[0]        // (1b)
+        } ;
+
+
+//wire	tlb_access_en_l_d1 ;
+wire    tlb_access2_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dmpky (
+                .rclk   (clk),
+                .enb_l  (tlu_tlb_access_en_l_d1),
+                .tmb_l  (~se),
+                .clk    (tlb_access2_clk)
+                ) ;
+`endif
+`endif
+
+// Advance by a cycle. Do not have to reset state.
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(41) stg_w2 (
+        .din    (dmp_key[40:0]),
+        .q      (idtlb_dmp_key_pend[40:0]),
+        .rst_l  (tlb_access_rst_l),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(41) stg_w2 (
+        .din    (dmp_key[40:0]),
+        .q      (idtlb_dmp_key_pend[40:0]),
+        .rst_l  (tlb_access_rst_l),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dffrl_s  #(41) stg_w2 (
+        .din    (dmp_key[40:0]),
+        .q      (idtlb_dmp_key_pend[40:0]),
+        .rst_l  (tlb_access_rst_l),
+        .clk    (tlb_access2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+`endif
+
+assign  tlu_idtlb_dmp_key_g[40:0] = idtlb_dmp_key_pend[40:0] ;
+
+
+//=========================================================================================
+//	WR DATA FOR MRA
+//=========================================================================================
+
+// Format for each entry of MRA on a per thread basis.
+// Current :
+//	| 	dtsb(48b)	|	dtag_access(48b)	|	dsfar(48b)	|	
+//	| 	itsb(48b)	|	itag_access(48b)	|			|	
+// New(Hyp,Legacy) : 8 tsb per thread instead of 2. dsfar removed.
+// -This allows tag-access to be lined up with simultaneous reads of tsb
+// -zero-ctxt and non-zero-ctxt tag-access will have to be distinguished either
+// by doing a zero-detect on the lower 13b of the write-data or using a disinct asi.
+//	| 	zcps0_dtsb(48b)	|	zcps1_dtsb(48b)	| 	zctxt_dtag_acc(48b) | dzctxt_cfg(6b) |
+//	| 	zcps0_itsb(48b)	|	zcps1_itsb(48b)	| 	zctxt_itag_acc(48b) | izctxt_cfg(6b) |
+//	|       nzcps0_dtsb(48b)|	nzcps1_dtsb(48b)| 	nzctxt_dtag_acc(48b)| dnzctxt_cfg(6b)|
+//	| 	nzcps0_itsb(48b)|	nzcps1_itsb(48b)| 	nzctxt_itag_acc(48b)| inzctxt_cfg(6b)|
+
+mux3ds #(13)	tag_acc_ctxtmx(
+    		.in0	(lsu_tlu_pctxt_m[12:0]), // iside selects primary ctxt
+    		.in1	(13'd0),		 // iside selects nucleus ctxt
+     		.in2	(lsu_tlu_dside_ctxt_m[12:0]), // otherwise select dside ctxt
+		.sel0	(tlu_tag_access_ctxt_sel_m[0]),
+		.sel1	(tlu_tag_access_ctxt_sel_m[1]),
+		.sel2	(tlu_tag_access_ctxt_sel_m[2]),
+	      	.dout	(tag_access_ctxt_m[12:0])
+	);
+
+/*assign	tag_access_ctxt_m[12:0] =
+	tlu_tag_access_ctxt_sel_m[0] ?	lsu_tlu_pctxt_m[12:0] :		// iside selects primary ctxt
+		tlu_tag_access_ctxt_sel_m[1] ?	13'd0  : 		// iside selects nucleus ctxt
+			tlu_tag_access_ctxt_sel_m[2] ? lsu_tlu_dside_ctxt_m[12:0] : 13'bx_xxxx_xxxx_xxxx ; 			// otherwise select dside ctxt
+*/
+
+dff_s  #(13) ctxt_stgg (
+        .din    (tag_access_ctxt_m[12:0]),
+        .q    	(tag_access_ctxt_g[12:0]),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+// pstate.am masking
+wire	[15:0]	ldst_va_masked_g ;
+assign	ldst_va_masked_g[15:0] = ldst_va_g[47:32] & {16{~tlu_addr_msk_g}} ;
+
+mux3ds #(48)	dtag_access_dsel(
+    		.in0	({ldst_va_masked_g[15:0],ldst_va_g[31:13],tag_access_ctxt_g[12:0]}), // dside hardware
+    		.in1	({pc_g[47:13],tag_access_ctxt_g[12:0]}), // iside hardware
+     		.in2	(lsu_tlu_st_rs3_data_b47t0_g[47:0]),	// stxa,tsb write as an example.
+		.sel0	(tag_access_wdata_sel[0]),
+		.sel1	(tag_access_wdata_sel[1]),
+		.sel2	(tag_access_wdata_sel[2]),
+	      	.dout	(tag_access_wdata[47:0])
+	);
+
+// Determine whether context is nucleus or not.
+//assign tlu_tag_access_nctxt_g = (tag_access_wdata[12:0] == 13'd0) ;
+assign        tlu_tag_access_ctxt_g[12:0] = tag_access_ctxt_g[12:0] ;
+
+wire	[47:0]	dsfar_wdata ;
+dp_mux2es #(48)	dsfar_dsel(
+    		.in0	({ldst_va_masked_g[15:0],ldst_va_g[31:0]}), // dsfar;trap
+    		.in1	(lsu_tlu_st_rs3_data_b47t0_g[47:0]), // asi write
+		.sel	(dmmu_any_sfsr_wr),
+	      	.dout	(dsfar_wdata[47:0])
+	);
+
+// Warning for Grape Mapper - the number of bits may have to be changed to
+// map implementation.
+assign	mra_wdata[155:0] = 
+	// Bug 4676 - tsb rsrved field
+	{lsu_tlu_st_rs3_data_b47t0_g[47:12],8'd0,	
+		lsu_tlu_st_rs3_data_b47t0_g[3:0],	//ps0 zctxt,nzctxt tsb
+	 lsu_tlu_st_rs3_data_b47t0_g[47:12],8'd0, 	
+	 	lsu_tlu_st_rs3_data_b47t0_g[3:0], 	//ps1 zctxt,nzctxt tsb
+	 tag_access_wdata[47:0],		//i/d tag-access
+	 lsu_tlu_st_rs3_data_b47t0_g[10:8], 	//ps1 page size
+	 lsu_tlu_st_rs3_data_b47t0_g[2:0], 	//ps0 page size
+	 6'd0};
+
+
+//=========================================================================================
+//	D-TAG ACCESS
+//=========================================================================================
+
+// 4 registers for the 4 threads.
+// 35b of VA || 13b Ctxt.
+// ** Ctxt is to be read as zero if there is no context associated with the access **
+// VA will be sing-extended based on bit 47. 
+
+// Update in w2.
+assign	dtag_access_w2[47:13] = mra_rdata[`MRA_TACCESS_HI:`MRA_TACCESS_LO+13] ;
+
+// Can this be shared with the i-side ?
+assign	tlu_dtag_access_w2[47:13] = dtag_access_w2[47:13] ;
+
+
+//=========================================================================================
+//	I-TAG ACCESS
+//=========================================================================================
+
+// 4 registers for the 4 threads.
+// 35b of VA || 13b Ctxt.
+// ** Ctxt is to be read as zero if there is no context associated with the access **
+// VA will be sing-extended based on bit 47. 
+
+// Update in w2.
+// SPARC_HPV_EN - This needs to be obsoleted. Common tag-access will be superimposed
+// on dta_access bus.
+
+//assign	itag_access_w2[32:13] = mra_rdata[`MRA_TACCESS_HI-15:`MRA_TACCESS_LO+13] ;
+//assign	itag_access_w2[47:0] = mra_rdata[`MRA_TACCESS_HI:`MRA_TACCESS_LO] ;
+
+//assign	tlu_itag_access_w2[32:13] = itag_access_w2[32:13] ;
+
+
+//=========================================================================================
+//	D-TAG TARGET
+//=========================================================================================
+
+// Tag Target is based on currently selected thread.
+
+// Thread0,1,2,3
+assign tag_target[63:0] =
+	{3'b000,
+	ldxa_l1mx1_dout_m[12:0],	// Context
+	//tag_access_w2[12:0],		// Context
+	6'b000000,
+	{16{ldxa_l1mx1_dout_m[47]}},	// Sign-extend VA[47]
+	//{16{tag_access_w2[47]}},	// Sign-extend VA[47]
+	ldxa_l1mx1_dout_m[47:22]};	// VA // Bug 3975.
+	//tag_access_w2[47:22]};	// VA
+
+//=========================================================================================
+//	D-TSB
+//=========================================================================================
+
+// Note : on interface, dtsb represents ps0 tsbs, itsb represents ps1 tsbs. 
+
+wire 	[47:0] 	tsb_ps0, tsb_ps1 ;
+assign	tsb_ps0[47:0] = mra_rdata[`MRA_TSB_PS0_HI:`MRA_TSB_PS0_LO] ;
+assign	tsb_ps1[47:0] = mra_rdata[`MRA_TSB_PS1_HI:`MRA_TSB_PS1_LO] ;
+
+assign	tlu_dtsb_split_w2 = tsb_ps0[12] ;
+// SPARC_HPV_EN - extend tsb_size by 1b.
+assign	tlu_dtsb_size_w2[3:0] = tsb_ps0[3:0] ;
+
+//=========================================================================================
+//	CTXT CONFIG
+//=========================================================================================
+
+wire	[5:0]	ptr_ctxt_cfg ;
+assign	tlu_ctxt_cfg_w2[5:0] =	mra_rdata[`MRA_CTXTCFG_HI:`MRA_CTXTCFG_LO] ;
+
+dff_s  #(6) pctxt_stgm (
+        .din    (mra_rdata[`MRA_CTXTCFG_HI:`MRA_CTXTCFG_LO]),
+        .q    	(ptr_ctxt_cfg[5:0]),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//=========================================================================================
+//	I-TSB
+//=========================================================================================
+
+assign	tlu_itsb_split_w2 = tsb_ps1[12] ;
+assign	tlu_itsb_size_w2[3:0] = tsb_ps1[3:0] ;
+
+//=========================================================================================
+//	STAGE TSB BASE FOR USE IN PTR CALCULATION
+//=========================================================================================
+
+wire	[47:13]	tsb_base ;
+assign  tsb_base[47:13] =
+        tlu_tsb_rd_ps0_sel ? tsb_ps0[47:13] : tsb_ps1[47:13] ;
+        //tlu_tsb_rd_ps0_sel ? dtsb[47:13] : itsb[47:13] ;
+
+dff_s  #(35) tsbbase_stgm (
+        .din    (tsb_base[47:13]),
+        .q    	(tlu_tsb_base_w2_d1[47:13]),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//=========================================================================================
+//	8K and 64K Ptr
+//=========================================================================================
+
+// In MMU Control.
+
+//=========================================================================================
+//	Direct Ptr
+//=========================================================================================
+
+//=========================================================================================
+//	I-/D TLB Fill : TTE Tag and Data.
+//=========================================================================================
+
+
+// TTE Tag is formed from Tag Access.
+// TTE Data is formed from rs3_data for store.
+
+// Timing needs to be fixed !!! Partition mode will add one more cycle
+// to path. tlb write will occur in w3.
+
+// partitioned for vlint purposes.
+//assign	st_rs3_data_g[63:0] = lsu_tlu_st_rs3_data_g[63:0] ; 
+assign	st_rs3_data_b63t59_g[63:59] = lsu_tlu_st_rs3_data_b63t59_g[63:59] ; 
+assign	st_rs3_data_b39t8_g[39:8] = lsu_tlu_st_rs3_data_b47t0_g[39:8] ; 
+assign	st_rs3_data_b6t1_g[6:1] = lsu_tlu_st_rs3_data_b47t0_g[6:1] ; 
+
+assign	tag_access_w2[47:0] = mra_rdata[`MRA_TACCESS_HI:`MRA_TACCESS_LO] ;
+
+wire idtte_tag_vld_g,idtte_tag_vld_d1 ;
+assign	idtte_tag_vld_g =
+	st_rs3_data_b63t59_g[63] ;
+wire idtte_tag_lock_g,idtte_tag_lock_d1 ;
+assign	idtte_tag_lock_g =
+	tlu_sun4r_tte_g ? st_rs3_data_b6t1_g[6] : st_rs3_data_b63t59_g[61] ;
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_idttetg (
+                .rclk   (clk),
+                .enb_l  (tlu_lng_ltncy_en_l),
+                .tmb_l  (~se),
+                .clk    (tlb_access3_clk)
+                ) ;    
+`endif
+`endif
+
+// Stage some bits to match posedge rd for lng-lat reads of mra.
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(10) stgd1_idttetg (
+        .din    ({idtte_tag_vld_g,idtte_tag_lock_g,tlu_tte_tag_g[2:0],
+		tlu_tte_wr_pid_g[2:0],tlu_tte_real_g,tlu_tlb_tag_invrt_parity}),
+        .q      ({idtte_tag_vld_d1,idtte_tag_lock_d1,tlu_tte_tag_d1[2:0],
+		tlu_tte_wr_pid_d1[2:0],tlu_tte_real_d1,tlu_tlb_tag_invrt_parity_d1}),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(10) stgd1_idttetg (
+        .din    ({idtte_tag_vld_g,idtte_tag_lock_g,tlu_tte_tag_g[2:0],
+		tlu_tte_wr_pid_g[2:0],tlu_tte_real_g,tlu_tlb_tag_invrt_parity}),
+        .q      ({idtte_tag_vld_d1,idtte_tag_lock_d1,tlu_tte_tag_d1[2:0],
+		tlu_tte_wr_pid_d1[2:0],tlu_tte_real_d1,tlu_tlb_tag_invrt_parity_d1}),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(10) stgd1_idttetg (
+        .din    ({idtte_tag_vld_g,idtte_tag_lock_g,tlu_tte_tag_g[2:0],
+		tlu_tte_wr_pid_g[2:0],tlu_tte_real_g,tlu_tlb_tag_invrt_parity}),
+        .q      ({idtte_tag_vld_d1,idtte_tag_lock_d1,tlu_tte_tag_d1[2:0],
+		tlu_tte_wr_pid_d1[2:0],tlu_tte_real_d1,tlu_tlb_tag_invrt_parity_d1}),
+        .clk 	(tlb_access3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+// assumption is that tag_access_w2 gets delayed by a cycle because
+// the rd is now posedge.
+assign idtte_tag_b53t0_g[53:0] =
+	{tag_access_w2[47:22],		// VA_tag	(26b)
+	tlu_tte_tag_d1[2],		// 27:22 are valid (1b)
+	idtte_tag_vld_d1,		// V 		(1b) can be 0 or 1
+	idtte_tag_lock_d1,		// L 		(1b) 
+	1'b1,				// U		(1b) : must be set on write
+	tag_access_w2[21:16],		// VA_tag	(6b)
+	tlu_tte_tag_d1[1],		// 21:16 are valid (1b)
+	tag_access_w2[15:13],		// VA_tag	(3b)
+	tlu_tte_tag_d1[0],		// 15:13 are valid (1b)
+	tag_access_w2[12:0] 		// Ctxt b12:0 	(13b)
+			};		
+
+assign	idtte_tag_b58t55_g[58:55] = {tlu_tte_wr_pid_d1[2:0],tlu_tte_real_d1};
+// V and U bit omitted from tag as it can change once in tlb
+// assign	idtte_tag_g[54] = 
+// tlu_tlb_tag_invrt_parity_d1^(^{idtte_tag_g[58:55],idtte_tag_g[53:27],idtte_tag_g[25],idtte_tag_g[23:0]}) ;
+
+// Additional page size bit does not have to be included. EP ? 
+// SUN4R TTE
+wire	[41:0]	idtte_data_sun4r_g ;
+assign idtte_data_sun4r_g[41:0] =
+	{st_rs3_data_b39t8_g[39:22],	// PA		(18b)
+	~tlu_tte_tag_g[2],		// 27:20 - mx sel (1b) : active-low
+	st_rs3_data_b39t8_g[21:16],	// PA 		(6b)
+	~tlu_tte_tag_g[1],		// 21:16 - mx sel (1b) : active-low
+	st_rs3_data_b39t8_g[15:13],	// PA 		(3b)
+	~tlu_tte_tag_g[0],		// 15:13 - mx sel (1b) : active-low
+	st_rs3_data_b63t59_g[63],	// V		(1b)
+	st_rs3_data_b63t59_g[60],	// NFO 		(1b)
+	st_rs3_data_b63t59_g[59],	// IE 		(1b)
+	st_rs3_data_b6t1_g[6],		// L		(1b)
+	st_rs3_data_b6t1_g[5:4],	// CP/CV	(2b)
+	st_rs3_data_b6t1_g[3],		// E		(1b)
+	st_rs3_data_b6t1_g[2],		// P		(1b)
+	st_rs3_data_b6t1_g[1],		// W		(1b)
+	3'b000};			// Spare	(3b)
+// SUN4V TTE
+wire	[41:0]	idtte_data_sun4v_g ;
+assign idtte_data_sun4v_g[41:0] =
+	{st_rs3_data_b39t8_g[39:22],	// PA		(18b)
+	~tlu_tte_tag_g[2],		// 27:20 - mx sel (1b) : active-low
+	st_rs3_data_b39t8_g[21:16],	// PA 		(6b)
+	~tlu_tte_tag_g[1],		// 21:16 - mx sel (1b) : active-low
+	st_rs3_data_b39t8_g[15:13],	// PA 		(3b)
+	~tlu_tte_tag_g[0],		// 15:13 - mx sel (1b) : active-low
+	st_rs3_data_b63t59_g[63],	// V		(1b) // 4->63. Bug 2977
+	st_rs3_data_b63t59_g[62],	// NFO 		(1b) // 10->62
+	st_rs3_data_b39t8_g[12],	// IE 		(1b)
+	st_rs3_data_b63t59_g[61],	// L 		(1b)
+	//1'b0,				//// L(none)	(1b)
+	st_rs3_data_b39t8_g[10:9],	// CP/CV	(2b) // 9:8 -> 10:9
+	st_rs3_data_b39t8_g[11],	// E		(1b)
+	st_rs3_data_b39t8_g[8],		// P		(1b) // 7->8
+	st_rs3_data_b6t1_g[6],		// W		(1b) // 5->6
+	3'b000};			// Spare	(3b)
+assign	idtte_data_g[41:0] =
+	tlu_sun4r_tte_g ? idtte_data_sun4r_g[41:0] : idtte_data_sun4v_g[41:0];
+
+// Generate Parity for tte data. Match to DP Macro.
+//assign idtte_data_g[42] = tlu_tlb_data_invrt_parity^(^idtte_data_g[41:0]) ;
+
+/*dff  #(1) stgd1_tlbacc (
+        .din    (tlb_access_en_l),
+        .q    	(tlb_access_en_l_d1),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );*/
+
+// flopping of tte-tag is delayed by a cycle,tte-data
+// is not. wr-vld will match tte-tag.
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ttetg (
+                .rclk   (clk),
+                .enb_l  (tlu_tlb_access_en_l_d1),
+                .tmb_l  (~se),
+                .clk    (tlb_access0_clk)
+                ) ;    
+`endif
+`endif
+
+// Ship for write to TLB. Doesn't have to be resettable.
+// Shorten by a bit, as parity will be generated based on output.
+// Instead of removing the bit, use it for parity-invrt bit
+// in section below.
+/*dff  #(59) stgw2_ttetg (
+        .din    (idtte_tag_g[58:0]),
+        .q      (idtte_tag_w2[58:0]),
+        .clk 	(tlb_access0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); */
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(58) stgw2_ttetg (
+        .din    ({idtte_tag_b58t55_g[58:55],idtte_tag_b53t0_g[53:0]}),
+        .q      ({idtte_tag_b58t55_w2[58:55],idtte_tag_b53t0_w2[53:0]}),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(58) stgw2_ttetg (
+        .din    ({idtte_tag_b58t55_g[58:55],idtte_tag_b53t0_g[53:0]}),
+        .q      ({idtte_tag_b58t55_w2[58:55],idtte_tag_b53t0_w2[53:0]}),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(58) stgw2_ttetg (
+        .din    ({idtte_tag_b58t55_g[58:55],idtte_tag_b53t0_g[53:0]}),
+        .q      ({idtte_tag_b58t55_w2[58:55],idtte_tag_b53t0_w2[53:0]}),
+        .clk 	(tlb_access0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_ttedt (
+                .rclk   (clk),
+                .enb_l  (tlu_lng_ltncy_en_l),
+                //.enb_l  (tlb_access_en_l),
+                .tmb_l  (~se),
+                .clk    (tlb_access1_clk)
+                ) ;    
+`endif
+`endif
+
+// Shorten by a bit, as parity will be generated based on output.
+// Instead of removing the bit, use it for parity-invrt bit
+// in section below.
+/*dff  #(43) stgw2_ttedt (
+        .din    (idtte_data_g[42:0]),
+        .q    	(idtte_data_w2[42:0]),
+        .clk 	(tlb_access1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );*/ 
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(42) stgw2_ttedt (
+        .din    (idtte_data_g[41:0]),
+        .q    	(idtte_data_w2[41:0]),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(42) stgw2_ttedt (
+        .din    (idtte_data_g[41:0]),
+        .q    	(idtte_data_w2[41:0]),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`else
+dff_s  #(42) stgw2_ttedt (
+        .din    (idtte_data_g[41:0]),
+        .q    	(idtte_data_w2[41:0]),
+        .clk 	(tlb_access1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        );
+`endif
+`endif
+
+wire	parity_tag,parity_data ;
+wire	parity_tag_d1,parity_data_d1 ;
+assign tlu_dtlb_tte_tag_w2[58:0] = {idtte_tag_b58t55_w2[58:55],parity_tag_d1,idtte_tag_b53t0_w2[53:0]} ;
+assign tlu_itlb_tte_tag_w2[58:0] = {idtte_tag_b58t55_w2[58:55],parity_tag_d1,idtte_tag_b53t0_w2[53:0]} ;
+assign tlu_dtlb_tte_data_w2[42:0] = {parity_data_d1,idtte_data_w2[41:0]} ;
+assign tlu_itlb_tte_data_w2[42:0] = {parity_data_d1,idtte_data_w2[41:0]} ;
+
+//=========================================================================================
+//	PARITY GEN FOR TTE TAG & DATA
+//=========================================================================================
+
+// Timing Change : Since parity is not required until the write, and the write
+// is preceeded by a auto-demap, the parity generation can be hidden in the
+// cycle of auto-demap.
+
+wire	tlu_tlb_tag_invrt_parity_d2,tlu_tlb_data_invrt_parity_d1 ;
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(1) stgw2_ttetgpar (
+        .din    (tlu_tlb_tag_invrt_parity_d1),
+        .q      (tlu_tlb_tag_invrt_parity_d2),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(1) stgw2_ttetgpar (
+        .din    (tlu_tlb_tag_invrt_parity_d1),
+        .q      (tlu_tlb_tag_invrt_parity_d2),
+        .en (~(tlu_tlb_access_en_l_d1)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(1) stgw2_ttetgpar (
+        .din    (tlu_tlb_tag_invrt_parity_d1),
+        .q      (tlu_tlb_tag_invrt_parity_d2),
+        .clk 	(tlb_access0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(1) stgw2_ttedtpar (
+        .din    (tlu_tlb_data_invrt_parity),
+        .q    	(tlu_tlb_data_invrt_parity_d1),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(1) stgw2_ttedtpar (
+        .din    (tlu_tlb_data_invrt_parity),
+        .q    	(tlu_tlb_data_invrt_parity_d1),
+        .en (~(tlu_lng_ltncy_en_l)), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(1) stgw2_ttedtpar (
+        .din    (tlu_tlb_data_invrt_parity),
+        .q    	(tlu_tlb_data_invrt_parity_d1),
+        .clk 	(tlb_access1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	parity_tag =
+tlu_tlb_tag_invrt_parity_d2^(^{idtte_tag_b58t55_w2[58:55],
+	idtte_tag_b53t0_w2[53:27],idtte_tag_b53t0_w2[25],idtte_tag_b53t0_w2[23:0]}) ;
+assign parity_data = tlu_tlb_data_invrt_parity_d1^(^idtte_data_w2[41:0]) ;
+//assign	idtte_tag_w2[54] = 
+//tlu_tlb_tag_invrt_parity_d2^(^{idtte_tag_w2[58:55],idtte_tag_w2[53:27],idtte_tag_w2[25],idtte_tag_w2[23:0]}) ;
+//assign idtte_data_w2[42] = tlu_tlb_data_invrt_parity_d1^(^idtte_data_w2[41:0]) ;
+
+dff_s  #(2) stg_partd (
+        .din    ({parity_tag,parity_data}),
+        .q      ({parity_tag_d1,parity_data_d1}),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+//=========================================================================================
+//	D-SFAR
+//=========================================================================================
+
+// dsfar is written into mra for pre SPARC_HPV_EN changes. It will be written into flops
+// for SPARC_HPV_EN. 
+
+wire	[47:0]		dsfar_din ;
+    
+assign	dsfar_din[47:0] = dsfar_wdata[47:0] ;
+
+wire	dsfar0_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfar0 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfar_wr_en_l[0]),
+                .tmb_l  (~se),
+                .clk    (dsfar0_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread0
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar0_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar0[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar0_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar0[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(48) dsfar0_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar0[47:0]),
+        .clk 	(dsfar0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+
+wire	dsfar1_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfar1 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfar_wr_en_l[1]),
+                .tmb_l  (~se),
+                .clk    (dsfar1_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread1
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar1_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar1[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar1_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar1[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(48) dsfar1_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar1[47:0]),
+        .clk 	(dsfar1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+wire	dsfar2_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfar2 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfar_wr_en_l[2]),
+                .tmb_l  (~se),
+                .clk    (dsfar2_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread2
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar2_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar2[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar2_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar2[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(48) dsfar2_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar2[47:0]),
+        .clk 	(dsfar2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+
+wire	dsfar3_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfar3 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfar_wr_en_l[3]),
+                .tmb_l  (~se),
+                .clk    (dsfar3_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread3
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar3_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar3[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) dsfar3_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar3[47:0]),
+        .en (~(dmmu_sfar_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(48) dsfar3_ff (
+        .din    (dsfar_din[47:0]),
+        .q      (dsfar3[47:0]),
+        .clk 	(dsfar3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+mux4ds #(48) dsfar_mx(
+        .in0(dsfar0[47:0]),
+        .in1(dsfar1[47:0]),
+        .in2(dsfar2[47:0]),
+        .in3(dsfar3[47:0]),
+	.sel0 (tlu_slxa_thrd_sel[0]),
+	.sel1 (tlu_slxa_thrd_sel[1]),
+	.sel2 (tlu_slxa_thrd_sel[2]),
+	.sel3 (tlu_slxa_thrd_sel[3]),
+        .dout(dsfar[47:0])
+);
+
+
+//=========================================================================================
+//	D-SFSR
+//=========================================================================================
+
+
+dp_mux2es #(24)	dsfsr_wdsel(
+        	.in0    (tlu_dsfsr_din_g[23:0]),
+     		.in1	({lsu_tlu_st_rs3_data_b47t0_g[23:16],	// stxa
+     			 2'b00,lsu_tlu_st_rs3_data_b47t0_g[13:0]}),
+     		// .in1	(lsu_tlu_st_rs3_data_b47t0_g[23:0]),	// Bug 4283
+		.sel	(dmmu_any_sfsr_wr),
+	      	.dout	(dsfsr_din[23:0])
+	);
+
+wire	dsfsr0_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfsr0 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfsr_wr_en_l[0]),
+                .tmb_l  (~se),
+                .clk    (dsfsr0_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread0
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr0_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr0[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr0_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr0[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) dsfsr0_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr0[23:1]),
+        .clk 	(dsfsr0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr0vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr0[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr0vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr0[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) dsfsr0vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr0[0]),
+        .rst_l	(rst_l),
+	.clk 	(dsfsr0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_dsfsr_flt_vld[0] = dsfsr0[0] ;
+
+wire	dsfsr1_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfsr1 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfsr_wr_en_l[1]),
+                .tmb_l  (~se),
+                .clk    (dsfsr1_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread1
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr1_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr1[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr1_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr1[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) dsfsr1_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr1[23:1]),
+        .clk 	(dsfsr1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr1vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr1[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr1vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr1[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) dsfsr1vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr1[0]),
+        .rst_l	(rst_l),
+	.clk 	(dsfsr1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_dsfsr_flt_vld[1] = dsfsr1[0] ;
+
+wire	dsfsr2_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfsr2 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfsr_wr_en_l[2]),
+                .tmb_l  (~se),
+                .clk    (dsfsr2_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread2
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr2_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr2[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr2_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr2[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) dsfsr2_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr2[23:1]),
+        .clk 	(dsfsr2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr2vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr2[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr2vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr2[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) dsfsr2vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr2[0]),
+        .rst_l	(rst_l),
+	.clk 	(dsfsr2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_dsfsr_flt_vld[2] = dsfsr2[0] ;
+
+wire	dsfsr3_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_dsfsr3 (
+                .rclk   (clk),
+                .enb_l  (dmmu_sfsr_wr_en_l[3]),
+                .tmb_l  (~se),
+                .clk    (dsfsr3_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread3
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr3_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr3[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) dsfsr3_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr3[23:1]),
+        .en (~(dmmu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) dsfsr3_ff (
+        .din    (dsfsr_din[23:1]),
+        .q      (dsfsr3[23:1]),
+        .clk 	(dsfsr3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr3vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr3[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) dsfsr3vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr3[0]),
+        .rst_l	(rst_l),
+	.en (~(dmmu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) dsfsr3vld_ff (
+        .din    (dsfsr_din[0]),
+        .q      (dsfsr3[0]),
+        .rst_l	(rst_l),
+	.clk 	(dsfsr3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_dsfsr_flt_vld[3] = dsfsr3[0] ;
+
+dp_mux4ds #(24)	dsfsr_msel(
+     		.in0	(dsfsr0[23:0]),
+     		.in1	(dsfsr1[23:0]),
+     		.in2	(dsfsr2[23:0]),
+     		.in3	(dsfsr3[23:0]),
+		.sel0_l	(~tlu_slxa_thrd_sel[0]),
+		.sel1_l	(~tlu_slxa_thrd_sel[1]),
+		.sel2_l	(~tlu_slxa_thrd_sel[2]),
+		.sel3_l	(~tlu_slxa_thrd_sel[3]),
+	      	.dout	(dsfsr[23:0])
+	);
+
+//=========================================================================================
+//	I-SFSR
+//=========================================================================================
+
+// Should be able to reduce the width of these regs !!!
+
+
+dp_mux2es #(24)	isfsr_wdsel(
+        	.in0    (tlu_isfsr_din_g[23:0]),
+     		.in1	({lsu_tlu_st_rs3_data_b47t0_g[23:16],	// stxa
+     			 2'b00,lsu_tlu_st_rs3_data_b47t0_g[13:0]}),	
+     		//.in1	(lsu_tlu_st_rs3_data_b47t0_g[23:0]),	// Bug 4283
+		.sel	(immu_any_sfsr_wr),
+	      	.dout	(isfsr_din[23:0])
+	);
+
+wire	isfsr0_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_isfsr0 (
+                .rclk   (clk),
+                .enb_l  (immu_sfsr_wr_en_l[0]),
+                .tmb_l  (~se),
+                .clk    (isfsr0_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread0
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr0_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr0[23:1]),
+        .en (~(immu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr0_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr0[23:1]),
+        .en (~(immu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) isfsr0_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr0[23:1]),
+        .clk 	(isfsr0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+// Chandra - This has changed.
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld0_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr0[0]),
+        .rst_l	(rst_l),      .en (~(immu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld0_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr0[0]),
+        .rst_l	(rst_l),      .en (~(immu_sfsr_wr_en_l[0])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) isfsrvld0_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr0[0]),
+        .rst_l	(rst_l),      .clk 	(isfsr0_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_isfsr_flt_vld[0] = isfsr0[0] ;
+
+wire	isfsr1_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_isfsr1 (
+                .rclk   (clk),
+                .enb_l  (immu_sfsr_wr_en_l[1]),
+                .tmb_l  (~se),
+                .clk    (isfsr1_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread1
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr1_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr1[23:1]),
+        .en (~(immu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr1_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr1[23:1]),
+        .en (~(immu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) isfsr1_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr1[23:1]),
+        .clk 	(isfsr1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+// Chandra - This has changed.
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld1_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr1[0]),
+        .rst_l	(rst_l),		.en (~(immu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld1_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr1[0]),
+        .rst_l	(rst_l),		.en (~(immu_sfsr_wr_en_l[1])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) isfsrvld1_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr1[0]),
+        .rst_l	(rst_l),		.clk 	(isfsr1_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_isfsr_flt_vld[1] = isfsr1[0] ;
+
+wire	isfsr2_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_isfsr2 (
+                .rclk   (clk),
+                .enb_l  (immu_sfsr_wr_en_l[2]),
+                .tmb_l  (~se),
+                .clk    (isfsr2_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread2
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr2_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr2[23:1]),
+        .en (~(immu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr2_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr2[23:1]),
+        .en (~(immu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) isfsr2_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr2[23:1]),
+        .clk 	(isfsr2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+// Chandra - This has changed.
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld2_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr2[0]),
+        .rst_l	(rst_l),	.en (~(immu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld2_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr2[0]),
+        .rst_l	(rst_l),	.en (~(immu_sfsr_wr_en_l[2])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) isfsrvld2_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr2[0]),
+        .rst_l	(rst_l),	.clk 	(isfsr2_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_isfsr_flt_vld[2] = isfsr2[0] ;
+
+wire	isfsr3_clk ;
+`ifdef FPGA_SYN_CLK_EN
+`else
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf clkbf_isfsr3 (
+                .rclk   (clk),
+                .enb_l  (immu_sfsr_wr_en_l[3]),
+                .tmb_l  (~se),
+                .clk    (isfsr3_clk)
+                ) ;    
+`endif
+`endif
+
+// Thread3
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr3_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr3[23:1]),
+        .en (~(immu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(23) isfsr3_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr3[23:1]),
+        .en (~(immu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dff_s  #(23) isfsr3_ff (
+        .din    (isfsr_din[23:1]),
+        .q      (isfsr3[23:1]),
+        .clk 	(isfsr3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+// Chandra - This has changed.
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld3_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr3[0]),
+        .rst_l	(rst_l),	.en (~(immu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+`ifdef FPGA_SYN_CLK_DFF
+dffrle_s  #(1) isfsrvld3_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr3[0]),
+        .rst_l	(rst_l),	.en (~(immu_sfsr_wr_en_l[3])), .clk(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`else
+dffrl_s  #(1) isfsrvld3_ff (
+        .din    (isfsr_din[0]),
+        .q      (isfsr3[0]),
+        .rst_l	(rst_l),	.clk 	(isfsr3_clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+`endif
+`endif
+
+assign	tlu_isfsr_flt_vld[3] = isfsr3[0] ;
+
+dp_mux4ds #(24)	isfsr_msel(
+     		.in0	(isfsr0[23:0]),
+     		.in1	(isfsr1[23:0]),
+     		.in2	(isfsr2[23:0]),
+     		.in3	(isfsr3[23:0]),
+		.sel0_l	(~tlu_slxa_thrd_sel[0]),
+		.sel1_l	(~tlu_slxa_thrd_sel[1]),
+		.sel2_l	(~tlu_slxa_thrd_sel[2]),
+		.sel3_l	(~tlu_slxa_thrd_sel[3]),
+	      	.dout	(isfsr[23:0])
+	);
+
+//=========================================================================================
+//	D-SFAR
+//=========================================================================================
+/*
+`ifdef SPARC_HPV_EN
+`else
+assign	dsfar[47:0] = mra_rdata[`MRA_DSFAR_HI:`MRA_DSFAR_LO];
+`endif
+*/
+
+//=========================================================================================
+//	Muxing for ldxa read
+//=========================================================================================
+
+// Note - collapse dtsb/itsb into one leg of the mux. Similar for
+// dtag_access/itag_access.
+// read of zcps1_itsb,zcps1_dtsb collapsed into read of dtsb.
+// read of nzcps0_dtsb,nzcps0_itsb collapsed into read of dtag_access.
+// read of nzcps1_dtsb,nzcps1_itsb collapsed into read of dsfar.
+
+// use rs3 to return data.
+
+//*****************************************************************
+//	SPARC_HPV_EN 
+//*****************************************************************
+
+// Warning for Grape Mapper : Be careful about loading on replicated
+// msb.
+
+// First Level, Mux 1
+// This is done in Estage to save on flops.
+// !!! The sels except b0 are also Estage !!! b0 is delayed by a cycle.
+mux3ds #(48) ldxa_l1mx1_e(
+        	.in0(tsb_ps0[47:0]), // becomes ps0 tsb with SPARC_HPV_EN
+        	.in1(tsb_ps1[47:0]), // becomes ps1 tsb with SPARC_HPV_EN
+        	.in2(tag_access_w2[47:0]),
+		.sel0(tlu_ldxa_l1mx1_sel[1]),
+		.sel1(tlu_ldxa_l1mx1_sel[2]),
+		.sel2(tlu_ldxa_l1mx1_sel[3]),
+		.dout(ldxa_l1mx1_dout_e[47:0])
+);
+
+// New
+dff_s  #(48) l1mx1_ff (
+        .din    (ldxa_l1mx1_dout_e[47:0]),
+        .q      (ldxa_l1mx1_dout_m[47:0]),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+wire [63:0] ldxa_l1mx1_dout_final ;
+
+// New
+assign	ldxa_l1mx1_dout_final[63:0] =
+		// Note : this bit of the mx sel is stage delayed relative to the others.
+		tlu_ldxa_l1mx1_sel[0] ? 
+		tag_target[63:0] : // tag_target.
+		{{16{ldxa_l1mx1_dout_m[47]}},ldxa_l1mx1_dout_m[47:0]} ; // tsb_ps0/ps1,tag_access
+
+/*mux4ds #(64) ldxa_l1mx1(
+     		.in0(tag_target[63:0]),
+        	.in1({{16{tsb_ps0[47]}},tsb_ps0[47:0]}), // becomes ps0 tsb with SPARC_HPV_EN
+        	.in2({{16{tsb_ps1[47]}},tsb_ps1[47:0]}), // becomes ps1 tsb with SPARC_HPV_EN
+        	.in3({{16{tag_access_w2[47]}},tag_access_w2[47:0]}),
+		.sel0(tlu_ldxa_l1mx1_sel[0]),
+		.sel1(tlu_ldxa_l1mx1_sel[1]),
+		.sel2(tlu_ldxa_l1mx1_sel[2]),
+		.sel3(tlu_ldxa_l1mx1_sel[3]),
+		.dout(ldxa_l1mx1_dout[63:0])
+);*/
+
+wire	[47:0]	ldxa_l1mx2_dout ;
+// First Level, Mux 2 - This is done in M stage.
+mux4ds #(48) ldxa_l1mx2(
+        	.in0({24'd0,dsfsr[23:0]}),
+        	.in1(dsfar[47:0]),
+        	.in2({24'd0,isfsr[23:0]}),
+     		.in3({37'd0,ptr_ctxt_cfg[5:3],5'd0,ptr_ctxt_cfg[2:0]}),
+		.sel0(tlu_ldxa_l1mx2_sel[0]),
+		.sel1(tlu_ldxa_l1mx2_sel[1]),
+		.sel2(tlu_ldxa_l1mx2_sel[2]),
+		.sel3(tlu_ldxa_l1mx2_sel[3]),
+		.dout(ldxa_l1mx2_dout[47:0])
+);
+
+wire	[63:0]	tlu_ldxa_data_m ;
+mux3ds #(64)	ldxa_fmx (
+    		.in0	(ldxa_l1mx1_dout_final[63:0]),
+    		//.in0	(ldxa_l1mx1_dout[63:0]),
+    		.in1	({{16{ldxa_l1mx2_dout[47]}},ldxa_l1mx2_dout[47:0]}),
+     		.in2	({{16{tlu_idtsb_8k_ptr[47]}},tlu_idtsb_8k_ptr[47:0]}),
+		.sel0	(tlu_ldxa_l2mx1_sel[0]),
+		.sel1	(tlu_ldxa_l2mx1_sel[1]),
+		.sel2	(tlu_ldxa_l2mx1_sel[2]),
+	      	.dout	(tlu_ldxa_data_m[63:0])
+	      	//.dout	(tlu_ldxa_data_e[63:0])
+	);
+
+dff_s  #(64) stgg_eldxa (
+        .din    (tlu_ldxa_data_m[63:0]),
+        .q    	(lsu_exu_ldxa_data_g[63:0]),
+        .clk 	(clk),
+        .se     (1'b0),       .si (),          .so ()
+        ); 
+
+endmodule
+
+
Index: /trunk/T1-CPU/tlu/tlu_tcl.v
===================================================================
--- /trunk/T1-CPU/tlu/tlu_tcl.v	(revision 6)
+++ /trunk/T1-CPU/tlu/tlu_tcl.v	(revision 6)
@@ -0,0 +1,6610 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: tlu_tcl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Trap Control Logic
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include "tlu.h"
+
+module tlu_tcl (/*AUTOARG*/
+   // Outputs
+   tlu_ifu_trappc_vld_w1, tlu_ifu_trapnpc_vld_w1, tlu_ifu_trap_tid_w1, 
+   tlu_trap_hpstate_enb, tsa_wr_tpl, tsa_rd_tid, tsa_rd_tpl, tsa_rd_en, 
+   tsa_wr_tid, tsa_wr_vld, tsa_rd_vld_e, tlu_lsu_tl_zero, tlu_restore_pc_sel_w1, 
+   tlu_early_flush_pipe_w, tlu_early_flush_pipe2_w, tlu_exu_early_flush_pipe_w, 
+   tlu_agp_tid_w2, tsa_tstate_en, tsa_ttype_en, tlu_tl_gt_0_w2, 
+   tlu_exu_agp_tid, tlu_true_pc_sel_w, // tlu_retry_inst_m, tlu_done_inst_m, 
+   tlu_tick_en_l, tlu_tickcmp_en_l, tlu_stickcmp_en_l, tlu_local_flush_w, 
+   tlu_tba_en_l, tlu_thrd_wsel_w2, tlu_thread_wsel_g, tlu_final_ttype_w2,
+   tlu_thread_inst_vld_g, tlu_update_pc_l_w, tlu_htickcmp_en_l,
+   tsa_pc_en, tsa_npc_en, tlu_hyperv_rdpr_sel, tlu_wsr_inst_nq_g,
+   tlu_exu_priv_trap_m, tlu_ibrkpt_trap_w2, tlu_full_flush_pipe_w2,  
+   tlu_pstate_din_sel0, tlu_pstate_din_sel1, tlu_pstate_din_sel2, 
+   tlu_pstate_din_sel3, tlu_update_pstate_l_w2, tlu_trp_lvl, 
+   tlu_pil, tlu_wr_tsa_inst_w2, tlu_trap_cwp_en, // tlu_lsu_priv_trap_w,
+   tlu_exu_cwp_retry_m, tlu_exu_cwpccr_update_m, tlu_lsu_priv_trap_m,
+   tlu_lsu_asi_update_m, tlu_lsu_tid_m, tlu_pc_mxsel_w2, // tlu_lsu_asi_m,
+   tlu_select_tba_w2, tdp_select_tba_w2, tlu_set_sftint_l_g, 
+   tlu_clr_sftint_l_g, tlu_wr_sftint_l_g, tlu_sftint_mx_sel, tlu_itag_acc_sel_g, 
+   tlu_sftint_en_l_g, tlu_sftint_penc_sel, tlu_sftint_vld, tlu_int_tid_m, 
+   tlu_tickcmp_sel, tlu_incr_tick, immu_sfsr_trp_wr, tlu_select_redmode,
+   tlu_isfsr_din_g, // tlu_dsfsr_din_g, tlu_tag_access_ctxt_sel_m, 
+   tlu_tick_npt, tlu_thrd_rsel_e, tlu_inst_vld_nq_m, tlu_pic_cnt_en_m,  
+   tlu_rdpr_mx1_sel, tlu_rdpr_mx2_sel, tlu_rdpr_mx3_sel, tlu_rdpr_mx4_sel, 
+   tlu_rdpr_mx5_sel, tlu_rdpr_mx6_sel, tlu_rdpr_mx7_sel, tlu_lsu_pstate_am,
+   tlu_lsu_redmode_rst_d1, lsu_tlu_rsr_data_mod_e, tlu_addr_msk_g, 
+   // added for hypervisor support
+   tlu_dnrtry0_inst_g, tlu_dnrtry1_inst_g, tlu_dnrtry2_inst_g, tlu_dnrtry3_inst_g,
+   tlu_thrd_traps_w2, tlu_tick_ctl_din, tsa_htstate_en, tlu_por_rstint_g,
+   tlu_hintp_vld, tlu_rerr_vld, tlu_final_offset_w1, // tlu_ifu_trapnpc_w2, 
+   so, tlu_sscan_tcl_data, tlu_rst, // tlu_ifu_trappc_w2, tlu_rst_l, 
+   // Inputs
+   ifu_tlu_sraddr_d, ifu_tlu_rsr_inst_d, lsu_tlu_early_flush_w, ifu_tlu_pc_oor_e,
+   tlu_wsr_data_b63_w, tlu_wsr_data_w, lsu_tlu_ttype_m2, ifu_tlu_flush_fd_w, 
+   lsu_tlu_ttype_vld_m2, ifu_tlu_done_inst_d, ifu_tlu_retry_inst_d, ifu_tlu_ttype_m, 
+   ifu_tlu_ttype_vld_m, exu_tlu_ttype_m, exu_tlu_ttype_vld_m, exu_tlu_spill, 
+   exu_tlu_spill_other, exu_tlu_spill_wtype, exu_tlu_va_oor_m, exu_tlu_spill_tid,
+   ifu_tlu_sir_inst_m, ifu_tlu_inst_vld_m, ifu_tlu_thrid_d, tlu_tckctr_in, 
+   ifu_tlu_immu_miss_m, exu_tlu_va_oor_jl_ret_m, ifu_tlu_trap_m, lsu_tlu_wsr_inst_e,  
+   exu_tlu_cwp_cmplt, exu_tlu_cwp_retry, exu_tlu_cwp_cmplt_tid, exu_tlu_ue_trap_m, 
+   ifu_tlu_rstint_m, ifu_tlu_hwint_m, ifu_tlu_swint_m, pich_wrap_flg, tlu_pic_wrap_e, 
+   pich_onebelow_flg, pich_twobelow_flg, pib_picl_wrap, pib_pich_wrap, tlu_tcc_inst_w, 
+   int_tlu_rstid_m, tlu_int_pstate_ie, tlu_int_redmode, ifu_npc_w, tlu_pcr_ut, 
+   tlu_sftint_id, lsu_tlu_async_ttype_vld_g, lsu_tlu_defr_trp_taken_g, tlu_pcr_st,
+   lsu_tlu_misalign_addr_ldst_atm_m, exu_tlu_misalign_addr_jmpl_rtn_m, 
+   lsu_tlu_async_tid_g, lsu_tlu_priv_action_g, lsu_tlu_async_ttype_g, lsu_tlu_wtchpt_trp_g, 
+   ifu_tlu_priv_violtn_m, ifu_lsu_memref_d, tlu_pstate_priv, tlu_isfsr_flt_vld, 
+   tlu_pstate_am, ffu_tlu_trap_ieee754, ffu_tlu_trap_other, ffu_tlu_trap_ue,
+   ffu_ifu_tid_w2, ffu_tlu_ill_inst_m, ifu_tlu_npc_m,  // ifu_tlu_pc_m, 
+   lsu_tlu_rsr_data_e, lsu_tlu_squash_va_oor_m, // tlu_restore_npc_w1,
+   spu_tlu_rsrv_illgl_m, // exu_tlu_cwp0, exu_tlu_cwp1, exu_tlu_cwp2, exu_tlu_cwp3, 
+   //
+   // added for hypervisor support
+   tlu_hpstate_priv, tlu_htstate_rw_d, tlu_htstate_rw_g, tlu_cwp_no_change_m, 
+   tlu_hscpd_dacc_excpt_m, tlu_htickcmp_rw_e, tlu_gl_rw_m, // tlu_gl_rw_g, 
+   tlu_hpstate_enb, tlu_cpu_mondo_cmp, tlu_dev_mondo_cmp,
+   tlu_resum_err_cmp, tlu_hintp, tlu_hpstate_tlz, tlu_qtail_dacc_excpt_m, 
+   pib_priv_act_trap_m, rclk, arst_l, grst_l, si, se, rst_tri_en, ctu_sscan_tid
+   );	
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+input [`TLU_ASR_ADDR_WIDTH-1:0] ifu_tlu_sraddr_d;      // addr of sr(st/pr)
+input       ifu_tlu_rsr_inst_d;    // valid rd sr(st/pr)
+// input       ifu_tlu_wsr_inst_d;    // valid wr sr(st/pr)
+input       lsu_tlu_wsr_inst_e;    // valid wr sr(st/pr)
+input   	tlu_wsr_data_b63_w;    // b63 of wsr data
+// input   	tlu_wsr_data_b16_w;    // b16 of wsr data
+input [3:0] tlu_wsr_data_w;    // pr/st data to irf.
+input [8:0]	lsu_tlu_ttype_m2;	// trap type in m2.
+input		lsu_tlu_ttype_vld_m2;	// trap is signaled.
+// added asynchronize trap to handle correctable dmmu parity errors
+input       lsu_tlu_defr_trp_taken_g; // lsu asynchronous trap valid
+input       lsu_tlu_async_ttype_vld_g; // lsu asynchronous trap valid
+input [6:0] lsu_tlu_async_ttype_g;  // lsu asynchronous trap type 
+input [1:0] lsu_tlu_async_tid_g; // asynchronous trap - thread
+// Removed unused bits 
+// input	[1:0]	lsu_tlu_ttype_tid_m2;	// trapping thread
+input		ifu_tlu_done_inst_d;  	// done is valid
+input		ifu_tlu_retry_inst_d;  // retry is valid
+
+input [8:0]	ifu_tlu_ttype_m;	// trap type in m2.
+input		ifu_tlu_ttype_vld_m;	// trap is signaled.
+input		ifu_tlu_trap_m;	    // trap is signaled.
+// modified for timing
+input	    ifu_tlu_flush_fd_w;	    // instruction flush signal 
+// input		ifu_tlu_flush_m;	    // instruction flush signal 
+input		lsu_tlu_early_flush_w;	// early flush with tlb from LSU 
+
+input [8:0]	exu_tlu_ttype_m;	// exu src ttype
+input		exu_tlu_ttype_vld_m;	// exu src ttype vld
+input		exu_tlu_ue_trap_m;	// exu ue ecc trap indicator 
+//
+// added for timing
+/*
+input [2:0]  exu_tlu_cwp0;  // cwp - thread0
+input [2:0]  exu_tlu_cwp1;  // cwp - thread1
+input [2:0]  exu_tlu_cwp2;  // cwp - thread2
+input [2:0]  exu_tlu_cwp3;  // cwp - thread3
+*/
+//
+input		exu_tlu_spill;		// spill trap
+input [1:0]	exu_tlu_spill_tid;	// spill trap - thrid
+input       exu_tlu_spill_other;    // From exu of sparc_exu.v
+input [2:0] exu_tlu_spill_wtype;    // From exu of sparc_exu.v
+input		exu_tlu_va_oor_m;	// ??? - to be used in sfsr
+input		exu_tlu_va_oor_jl_ret_m; // ??? - to be used in sfsr
+input		ifu_tlu_sir_inst_m; 	// sir instruction executed
+
+input       ifu_tlu_inst_vld_m;    // inst in w-stage of pipe.
+input       ifu_tlu_pc_oor_e;    // inst in w-stage of pipe.
+input [1:0] ifu_tlu_thrid_d;   	// Thread id.
+// input       lsu_tlu_dmmu_miss_g;   // ld/st misses in dtlb.
+//
+// modified the stage for timing 
+//input     ifu_tlu_immu_miss_e;   // i-side page fault
+input       ifu_tlu_immu_miss_m;   // i-side page fault
+
+input       exu_tlu_cwp_cmplt;
+input       exu_tlu_cwp_retry;
+input [1:0] exu_tlu_cwp_cmplt_tid;
+input       tlu_cwp_no_change_m;
+// input       exu_tlu_cwp_fastcmplt_w;
+// input moved to tlu_misctl
+// input [2:0]	tsa_rdata_cwp;
+// input [`TSA_TTYPE_WIDTH-1:0]	tsa_rdata_ttype;
+// input [7:0]	tsa_rdata_ccr;
+// input [7:0]	tsa_rdata_asi;
+
+input		ifu_tlu_rstint_m;	// reset interrupt
+input		ifu_tlu_hwint_m;	// hw interrupt
+input		ifu_tlu_swint_m;	// sw interrupt
+input [5:0]	int_tlu_rstid_m;	// reset type
+input [`TLU_THRD_NUM-1:0] tlu_int_pstate_ie;      // interrupt enable
+input [`TLU_THRD_NUM-1:0] tlu_int_redmode;        // redmode
+// input  [`TLU_THRD_NUM-1:0]	const_cpuid;
+
+input [`TLU_THRD_NUM-1:0] tlu_sftint_id;
+input [`TLU_THRD_NUM-1:0] pich_wrap_flg;
+input [`TLU_THRD_NUM-1:0] pich_onebelow_flg;
+input [`TLU_THRD_NUM-1:0] pich_twobelow_flg;
+input [`TLU_THRD_NUM-1:0] pib_picl_wrap;
+// modified for bug 5436: Niagara 2.0
+input [`TLU_THRD_NUM-1:0] tlu_pcr_ut;
+input [`TLU_THRD_NUM-1:0] tlu_pcr_st;
+// input tlu_pic_wrap_e, tlu_pcr_ut_e, tlu_pcr_st_e;
+input tlu_pic_wrap_e;
+
+// input		tlu_tick_match;	// match between tick and tick-cmp 
+// input		tlu_stick_match;	// match between tick and stick-cmp 
+// input  [`TLU_THRD_NUM-1:0] pib_pic_wrap; // overflow for the pic registers - lvl15 int 
+// modified for timing support
+// input  [`TLU_THRD_NUM-1:0] pib_priv_act_trap; // access priv violation of the pics 
+input  [`TLU_THRD_NUM-1:0] pib_priv_act_trap_m; // access priv violation of the pics 
+
+input		lsu_tlu_misalign_addr_ldst_atm_m;// misaligned addr - ld,st,atomic 
+input		exu_tlu_misalign_addr_jmpl_rtn_m;// misaligned addr - jmpl or return addr
+// input		lsu_tlu_priv_violtn_g;		// privileged violation trap
+input		lsu_tlu_priv_action_g;		// privileged action trap
+input		lsu_tlu_wtchpt_trp_g;		// watchpt trap has occurred.
+
+input		ifu_tlu_priv_violtn_m;
+input		ifu_lsu_memref_d;
+input [3:0] tlu_pstate_priv;
+input [3:0] tlu_pstate_am;
+input [3:0]	tlu_isfsr_flt_vld;
+input		ffu_tlu_trap_ieee754;
+input		ffu_tlu_trap_other;
+input		ffu_tlu_trap_ue;
+input		ffu_tlu_ill_inst_m; // illegal instruction trap from ffu		
+input [1:0]	ffu_ifu_tid_w2;
+input [7:0]	lsu_tlu_rsr_data_e;
+input       lsu_tlu_squash_va_oor_m;  // squash va_oor for mem-op.
+input       spu_tlu_rsrv_illgl_m;    // illegal instruction trap from spu 
+input       tlu_htstate_rw_d;
+input       tlu_htstate_rw_g;
+input       tlu_htickcmp_rw_e;
+// input       tlu_gl_rw_g;
+input       tlu_gl_rw_m;
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_priv;
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_enb;
+input [`TLU_THRD_NUM-1:0] tlu_hpstate_tlz;
+input [`TLU_THRD_NUM-1:0] tlu_cpu_mondo_cmp;
+input [`TLU_THRD_NUM-1:0] tlu_dev_mondo_cmp;
+input [`TLU_THRD_NUM-1:0] tlu_resum_err_cmp;
+input [`TLU_THRD_NUM-1:0] tlu_hintp;
+// input [48:0] ifu_tlu_pc_m;
+input [48:0] ifu_tlu_npc_m;
+// input [33:0] tlu_partial_trap_pc_w1;
+// modified for bug 3017
+// logic moved to tlu_misctl
+input tlu_hscpd_dacc_excpt_m;
+input tlu_qtail_dacc_excpt_m;
+// added for timing
+input [4:0] tlu_hyperv_rdpr_sel;
+input [1:0]	tlu_tckctr_in;
+input		rclk; // clock
+// sscan tid
+input [`TLU_THRD_NUM-1:0] ctu_sscan_tid;
+//
+// modified to abide to the niagara reset methodology
+input		grst_l;				// global reset - active log
+input		arst_l;				// global reset - active log
+input		rst_tri_en;			// global reset - active log
+input		si;				    // global scan-in 
+input		se;				    // global scan-out 
+
+/*autooutput*/
+// beginning of automatic outputs (from unused autoinst outputs)
+// end of automatics
+output	  	 tlu_ifu_trappc_vld_w1;	// trap pc or pc on retry.
+output	  	 tlu_ifu_trapnpc_vld_w1;// trap pc or pc on retry.
+output [1:0] tlu_ifu_trap_tid_w1;	// thread id.
+output	     tlu_trap_hpstate_enb;	
+output	     tlu_restore_pc_sel_w1;	
+output [`TLU_THRD_NUM-1:0] pib_pich_wrap;
+output tlu_tcc_inst_w;
+
+output [2:0] tsa_wr_tpl;		// trap level for wr.
+output [1:0] tsa_rd_tid;		// thread id for wr.
+output [2:0] tsa_rd_tpl;		// trap level for rd.
+output [1:0] tsa_wr_tid;		// thread id for rd.
+output [1:0] tsa_wr_vld;		// write pointer vld
+// modified for timing
+output 		 tsa_rd_vld_e;		// read pointer
+output 		 tsa_rd_en;		    // read pointer
+output [3:0] tlu_lsu_tl_zero;	// trap level is zero.
+// output		 tlu_ifu_flush_pipe_w;  // exception related flush
+// output		 tlu_flush_pipe_w;  	// exception related flush - local copy
+// added for timing 
+// output		 tlu_flush_all_w2;  	// exception related flush - local copy
+// output		 tlu_flush_all_w;  	// exception related flush - local copy
+output		 tlu_local_flush_w;  	// exception related flush - local copy
+output		 tlu_early_flush_pipe_w;  	// exception related flush - local copy
+output		 tlu_early_flush_pipe2_w;  	// exception related flush - local copy
+output		 tlu_exu_early_flush_pipe_w;  	// exception related flush - to exu
+output		 tlu_full_flush_pipe_w2;  	    // exception related flush - to exu
+// output  [2:0]   tlu_exu_agp;            // alternate global pointer
+// output          tlu_exu_agp_swap;       // switch globals
+// modified due to timing
+// output [1:0] tlu_agp_tid_g;        // thread that agp refers to
+output [1:0] tlu_agp_tid_w2;        // thread that agp refers to
+output [1:0] tlu_exu_agp_tid;        // thread that agp refers to
+output		 tsa_pc_en;		// enable write of pc in tsa.
+output	     tsa_npc_en;		// enable write of npc in tsa.
+output	     tsa_tstate_en;		// enable write of tstate in tsa.
+output	     tsa_htstate_en;	// enable write of htstate in tsa.
+output	     tsa_ttype_en;		// enable write of ttype in tsa.
+// modified due to timing
+// output       tlu_tl_gt_0_g;     // trp lvl gt then 0
+output       tlu_tl_gt_0_w2;     // trp lvl gt then 0
+// modified for timing
+output [2:0] tlu_true_pc_sel_w;
+// output       tlu_retry_inst_m;  // valid retry inst
+// output       tlu_done_inst_m;   // valid done inst
+// output       tlu_dnrtry_inst_m_l; // valid done/retry inst - g
+output       tlu_tick_en_l;     // tick reg write enable
+output [`TLU_THRD_NUM-1:0]  tlu_tickcmp_en_l;  // tick compare reg write enable
+output [`TLU_THRD_NUM-1:0]  tlu_stickcmp_en_l; // stick compare reg write enable
+output [`TLU_THRD_NUM-1:0]  tlu_htickcmp_en_l; // update htickcmp register 
+output [`TLU_THRD_NUM-1:0]  tlu_tba_en_l;      // tba reg write enable
+output [`TLU_THRD_NUM-1:0]  tlu_thrd_wsel_w2;   // thread requiring tsa write.
+output [`TLU_THRD_NUM-1:0]  tlu_thread_wsel_g; // thread for instruction fetched 
+output [`TSA_TTYPE_WIDTH-1:0] tlu_final_ttype_w2;  // selected ttype - w2
+// output   tlu_async_trap_taken_g; // async trap taken
+output [`TLU_THRD_NUM-1:0]  tlu_thread_inst_vld_g; // valid inst for a thread
+// output [`TLU_THRD_NUM-1:0]  tlu_thread_inst_vld_w2; // valid inst for a thread
+// output [`TLU_THRD_NUM-1:0]  tlu_update_pc_l_m; // update pc or npc for a thread
+output [`TLU_THRD_NUM-1:0]  tlu_update_pc_l_w; // update pc or npc for a thread
+// output [`TLU_THRD_NUM-1:0]  tlu_thrd_rsel_g; // thread requiring tsa read
+// modified for bug 1767
+// output         	tlu_select_tle;	// tle/cle value on trap 
+// output [1:0]   tlu_select_mmodel;	// mem. model on trap
+output 	  	tlu_select_redmode;	// redmode setting on trap
+// Modified for bug 1575
+//
+// output   [2:0]  tlu_pstate_din_sel;    // sel source of tsa wdata
+output [1:0] tlu_pstate_din_sel0;    // sel source of tsa wdata
+output [1:0] tlu_pstate_din_sel1;    // sel source of tsa wdata
+output [1:0] tlu_pstate_din_sel2;    // sel source of tsa wdata
+output [1:0] tlu_pstate_din_sel3;    // sel source of tsa wdata
+//
+// modified due to timing
+// output [3:0] tlu_update_pstate_l_g; // pstate write enable
+output [3:0] tlu_update_pstate_l_w2; // pstate write enable
+output [2:0] tlu_trp_lvl;    // trp lvl - mx'ed
+output [3:0] tlu_pil;		  // pil - mx'ed
+// output       tlu_wsr_inst_g; // write state inst
+// 
+// added for timing
+output       tlu_wsr_inst_nq_g; // write state inst
+// output       tlu_wr_tsa_inst_g; // write state inst
+output       tlu_wr_tsa_inst_w2; // write state inst
+output       tlu_exu_priv_trap_m; // local traps send to exu 
+output       tlu_lsu_priv_trap_m; // local traps send to lsu 
+// output       tlu_lsu_priv_trap_w; // local traps send to lsu 
+// experiment
+output       tlu_pic_cnt_en_m; // local traps send to exu 
+// output       tlu_exu_pic_onebelow_m; // local traps send to exu 
+// output       tlu_exu_pic_twobelow_m; // local traps send to exu 
+output       tlu_exu_cwp_retry_m;
+output       tlu_exu_cwpccr_update_m;
+// output moved to tlu_misctl
+// output [2:0] tlu_exu_cwp_m;
+// output [7:0] tlu_exu_ccr_m;
+// output [7:0] tlu_lsu_asi_m;		// asi from stack
+// added for bug3499
+output [`TLU_THRD_NUM-1:0] tlu_trap_cwp_en;
+
+output       tlu_lsu_asi_update_m; // update asi
+output [1:0] tlu_lsu_tid_m;		// thread for asi update
+
+// output	 tlu_assist_boot_rst_g; // use rstvaddr all zeroes
+// modified due to timing
+// output		 tlu_self_boot_rst_g;	// use rstvaddr all ones
+// output		 tlu_select_tba_g;	// use tba
+// output		 tlu_select_htba_g;	// use htba
+// modified for one-hot mux problem
+// output		 tlu_self_boot_rst_w2;	// use rstvaddr all ones
+// output		 tlu_select_htba_w2;	// use htba
+output [2:0] tlu_pc_mxsel_w2;
+output		 tlu_select_tba_w2;	// use tba
+output		 tdp_select_tba_w2;	// use tba
+//
+output		 tlu_set_sftint_l_g;	// set sftint
+output		 tlu_clr_sftint_l_g;	// clr sftint
+output		 tlu_wr_sftint_l_g;	// wr to sftin (asr 16)
+output [`TLU_THRD_NUM-1:0] tlu_sftint_en_l_g; // wr en sftint regs.
+output [`TLU_THRD_NUM-1:0] tlu_sftint_mx_sel; // mux sel sftint regs.
+//
+// removed due to sftint recode
+// output	[3:0]	tlu_sftint_lvl14_int;	// level 14 sft interrupt
+
+output [3:0] tlu_sftint_penc_sel;	// select appr. thread for pr. encd.
+output [3:0] tlu_sftint_vld;	// a sftint is valid for a thread	
+output [1:0] tlu_int_tid_m;		// thread id
+output [1:0] tlu_incr_tick;         // increment tick reg
+output [3:0] tlu_tickcmp_sel;       // select src for tickcmp
+
+output [3:0]  immu_sfsr_trp_wr;
+output tlu_itag_acc_sel_g;
+
+output [23:0] tlu_isfsr_din_g;
+//
+// removed due to sftint code cleanup
+output		 tlu_tick_npt;		// npt bit of tick
+output [3:0] tlu_thrd_rsel_e;	// read select for threaded regs
+
+output		 tlu_inst_vld_nq_m;	// not qualified inst vld
+
+output [3:0] tlu_lsu_pstate_am;	// ship to lsu
+
+output [2:0] tlu_rdpr_mx1_sel;
+output [2:0] tlu_rdpr_mx2_sel;
+output [1:0] tlu_rdpr_mx3_sel;
+output [1:0] tlu_rdpr_mx4_sel; 
+output [2:0] tlu_rdpr_mx5_sel; 
+output [2:0] tlu_rdpr_mx6_sel;
+output [3:0] tlu_rdpr_mx7_sel;
+//
+output [`TSA_TTYPE_WIDTH-1:0] tlu_final_offset_w1;
+// output   [3:0]   tlu_lsu_redmode;       	// redmode
+// output [3:0] tlu_lsu_redmode_rst;
+// output [`TLU_THRD_NUM-1:0] tlu_lsu_async_ack_w2;
+output [3:0] tlu_lsu_redmode_rst_d1;
+output [7:0] lsu_tlu_rsr_data_mod_e;
+output       tlu_addr_msk_g;        // address masking active for thread in pipe.
+//
+// added for hypervisor support
+// modified for timing
+// output tlu_thrd0_traps, tlu_thrd1_traps;
+// output tlu_thrd2_traps, tlu_thrd3_traps;
+output [`TLU_THRD_NUM-1:0] tlu_thrd_traps_w2; 
+output tlu_dnrtry0_inst_g, tlu_dnrtry1_inst_g;
+output tlu_dnrtry2_inst_g, tlu_dnrtry3_inst_g;
+// output tlu_ibrkpt_trap_g; 
+output tlu_ibrkpt_trap_w2; 
+output tlu_tick_ctl_din;
+output [`TLU_THRD_NUM-1:0] tlu_por_rstint_g;
+output [`TLU_THRD_NUM-1:0] tlu_hintp_vld;  // From tcl of tlu_tcl.v
+output [`TLU_THRD_NUM-1:0] tlu_rerr_vld;  // From tcl of tlu_tcl.v
+// modified for bug 3017
+// moved to tlu_misctl
+output [48:0] ifu_npc_w; //ifu_pc_w, 
+// 
+// shadow scan data from tcl tl and ttype
+output [`TCL_SSCAN_WIDTH-1:0] tlu_sscan_tcl_data;
+
+//
+// added to abide to the niagara reset methodology
+output tlu_rst;     // local unit reset - active high
+// output tlu_rst_l;	// local unit reset - active low
+output so;			// global scan-out 
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+// this signal were added to abide to the niagara reset methodology
+wire local_rst;
+wire local_rst_l;
+wire tlu_rst_l;	// local unit reset - active low
+
+wire [1:0]  tlu_exu_tid_m;
+wire [3:0]	pstate_rmode;
+
+// wire select_tba_g;	// use tba
+wire local_select_tba_w2;	// use tba
+wire [1:0] select_tba_element_w2;	// use tba
+// wire select_htba_g;	// use htba
+//
+// added for early flush timing fix
+// wire tlu_early_flush_pipe_m; 
+wire local_early_flush_pipe_w; 
+wire local_early_flush_pipe2_w; 
+wire local_early_flush_pipe3_w; 
+wire local_early_flush_pipe4_w; 
+wire lsu_ttype_vld_w, lsu_ttype_vld_w2; 
+wire tlu_flush_all_w; 
+wire tlu_ifu_flush_pipe_w;  // exception related flush
+wire tlu_flush_pipe_w;  // exception related flush
+wire tlu_flush_all_w2; 
+// wire tlu_wr_tsa_inst_g; // write state inst
+wire tlu_self_boot_rst_g, tlu_self_boot_rst_w2;	
+wire dnrtry_inst_g; 
+wire dnrtry0_inst_g, dnrtry1_inst_g;
+wire dnrtry2_inst_g, dnrtry3_inst_g;
+wire [`TLU_THRD_NUM-1:0] dnrtry_inst_w2; 
+wire thrd0_traps,thrd1_traps;
+wire thrd2_traps,thrd3_traps;
+// wire [`TLU_THRD_NUM-1:0] async_trap_ack_g;
+// wire [`TLU_THRD_NUM-1:0] async_trap_ack_w2;
+wire [2:0]	trp_lvl0,trp_lvl0_new;
+wire [2:0]	trp_lvl1,trp_lvl1_new;
+wire [2:0]	trp_lvl2,trp_lvl2_new;
+wire [2:0]	trp_lvl3,trp_lvl3_new;
+wire tl0_en, tl0_gt_0;
+wire tl1_en, tl1_gt_0;
+wire tl2_en, tl2_gt_0;
+wire tl3_en, tl3_gt_0;
+wire [1:0] agp_tid_g, agp_tid_w2, agp_tid_w3; // thread that agp refers to
+// wire tlu_pic_onebelow_e, tlu_pic_twobelow_e; 
+// experiment
+wire pich_wrap_flg_m, tlu_pich_wrap_flg_m; // pich_wrap_flg_e, 
+wire tlu_picl_wrap_flg_m; // pich_wrap_flg_e, 
+// modified for bug 5436 - Niagara 2.0
+wire [`TLU_THRD_NUM-1:0] pic_cnt_en;
+wire pic_cnt_en_e, pic_cnt_en_m, pic_cnt_en_w, pic_cnt_en_w2; 
+// wire pic_trap_en_e; 
+//wire pcr_ut_e, pcr_st_e; 
+// wire [`TLU_THRD_NUM-1:0] pich_exu_wrap_e;
+// wire pic_hpstate_enb_e, pic_hpstate_priv_e, pic_pstate_priv_e; 
+//
+wire [`TLU_THRD_NUM-1:0] tlz_thread_set, tlz_thread_data;
+wire [`TLU_THRD_NUM-1:0] tlz_thread;
+wire [`TLU_THRD_NUM-1:0] tlz_trap_m, tlz_exu_trap_m;
+wire [`TLU_THRD_NUM-1:0] tlz_trap_nq_g, tlz_trap_g; 
+wire [`TLU_THRD_NUM-1:0] ifu_thrd_flush_w; 
+wire [`TLU_THRD_NUM-1:0] tlu_none_priv; 
+wire cpu_mondo_trap_g, dev_mondo_trap_g; 
+wire cpu_mondo_trap_w2, dev_mondo_trap_w2; 
+wire [`TLU_THRD_NUM-1:0] tlu_cpu_mondo_trap; 
+wire [`TLU_THRD_NUM-1:0] tlu_dev_mondo_trap; 
+wire [`TLU_THRD_NUM-1:0] tlu_resum_err_trap; 
+wire [`TLU_THRD_NUM-1:0] tlu_hyper_lite;
+wire [3:0] local_rdpr_mx6_sel; 
+wire [3:0] local_rdpr_mx5_sel; 
+wire [2:0] local_rdpr_mx4_sel; 
+wire [2:0] local_rdpr_mx3_sel;
+wire [3:0] local_rdpr_mx2_sel;
+wire [3:0] local_rdpr_mx1_sel;
+wire tlu_none_priv_m;
+wire ibrkpt_trap_m, ibrkpt_trap_g, ibrkpt_trap_w2;
+wire va_oor_jl_ret_g;
+wire done_inst_m_tmp;
+wire retry_inst_m_tmp;
+wire done_inst_w2;
+wire retry_inst_w2;
+wire [2:0] true_pc_sel_m, true_pc_sel_w; 
+// wire dsfsr_flt_vld_g;
+wire done_inst_e, retry_inst_e;
+wire done_inst_m, retry_inst_m;
+wire exu_done_inst_m, exu_retry_inst_m;
+// logic moved to misctl
+// wire cwp_no_change_m;
+// wire [2:0] cwp_xor_m, trap_old_cwp_m; 
+wire done_inst_g, retry_inst_g;
+wire [1:0] thrid_d, thrid_e, thrid_m, thrid_g;
+wire [1:0] thrid_w2; 
+//
+// added for tsa_wr_tid bug
+//
+// wire thread0_wtrp_g, thread1_wtrp_g, thread2_wtrp_g, thread3_wtrp_g;
+wire thread0_wtrp_w2, thread1_wtrp_w2, thread2_wtrp_w2, thread3_wtrp_w2;
+wire thread0_wsel_g, thread1_wsel_g, thread2_wsel_g, thread3_wsel_g;
+wire thread0_wsel_w2, thread1_wsel_w2, thread2_wsel_w2, thread3_wsel_w2;
+wire thread0_rsel_dec_g,thread1_rsel_dec_g;
+wire thread2_rsel_dec_g,thread3_rsel_dec_g;
+wire thread0_rsel_d, thread1_rsel_d, thread2_rsel_d, thread3_rsel_d;
+wire thread0_rsel_m, thread1_rsel_m, thread2_rsel_m, thread3_rsel_m;
+wire thread0_stg_m, thread1_stg_m, thread2_stg_m, thread3_stg_m; 
+wire thread0_stg_m_buf, thread1_stg_m_buf, thread2_stg_m_buf, thread3_stg_m_buf; 
+wire thread0_rsel_g, thread1_rsel_g, thread2_rsel_g, thread3_rsel_g;
+wire thread0_rsel_e, thread1_rsel_e, thread2_rsel_e, thread3_rsel_e;
+wire inst_vld_w2, inst_vld_g, inst_vld_m, inst_vld_nf_g;
+wire [`TLU_THRD_NUM-1:0] thread_inst_vld_g; 
+wire [`TLU_THRD_NUM-1:0] thread_inst_vld_w2; 
+// wire tlu_inst_vld_m;	// qualified inst vld
+wire exu_ttype_vld_g, ifu_ttype_vld_g, exu_ue_trap_g;
+wire [8:0]	exu_ttype_g, ifu_ttype_tmp_g, ifu_ttype_g;
+wire [8:0]	exu_spill_ttype; 
+// added for timing fix
+wire    spu_ill_inst_m ;      // illegal instruction trap from spu 
+wire    spu_ill_inst_uf_g ;      // illegal instruction trap from spu 
+wire    spu_ill_inst_g ;      // illegal instruction trap from spu 
+wire    pib_priv_act_trap_g ;   // privilege action trap from pib 
+wire    pib_priv_act_trap_uf_g ;   // privilege action trap from pib 
+wire    pib_priv_act_early_trap_m ; // privilege action trap from pib 
+wire    ffu_ill_inst_uf_g ;   // illegal instruction trap from ffu - unflushed 
+wire    ffu_ill_inst_g ;      // illegal instruction trap from ffu 
+wire    ffu_higher_pri_g ;      // illegal instruction trap from ffu 
+wire    exu_higher_pri_g ;      // UE ECC trap from exu 
+// wire    lsu_ill_inst_uf_g ;   // illegal instruction trap from lsu - unflushed
+// wire    lsu_ill_inst_g ;      // illegal instruction trap from lsu 
+// wire [`TLU_THRD_NUM-1:0] lsu_defr_thrd_g; 
+wire    lsu_defr_trap_g, lsu_defr_trap_w2 ;     // deferred trap from lsu 
+wire    local_lsu_async_ttype_vld_w; // deferred trap from lsu 
+// wire    local_lsu_defr_trp_taken_g; // deferred trap from lsu 
+wire [`TLU_THRD_NUM-1:0] lsu_defr_trp_taken_w2; 
+// wire    lsu_tlu_defr_trp_taken_w2 ;  // deferred trap from lsu - signled in g for w2
+                                     // trap need to sync up with lsu_tlu_async_ttype_vld_g  
+wire    htrap_ill_inst_m ;      // illegal instruction trap from htrap 
+wire    htrap_ill_inst_uf_g ;   // illegal instruction trap from htrap - unflushed
+wire    htrap_ill_inst_g ;      // illegal instruction trap from htrap 
+
+wire	[`TLU_ASR_ADDR_WIDTH-1:0] sraddr;
+wire	[`TLU_ASR_ADDR_WIDTH-1:0] sraddr2;
+// modified due to timing
+// wire		wsr_inst_d;
+wire        asr_hyperp, asr_priv;
+wire		tpc_rw_d, tnpc_rw_d, tstate_rw_d, ttype_rw_d;
+wire		tick_rw_d, tickcmp_rw_d, tick_npriv_r_d;
+wire	    pcr_rsr_d, pic_rsr_d;	
+wire	    pcr_rsr_e, pic_rsr_e;	
+wire        tlu_gl_rw_g;
+//
+// added for hypervisor support
+wire maxtl_wr_sel;
+wire [3:0] maxstl_wr_sel;
+wire [2:0] wsr_trp_lvl0_data_w, wsr_trp_lvl1_data_w;
+wire [2:0] wsr_trp_lvl2_data_w, wsr_trp_lvl3_data_w;
+wire [2:0] wsr_trp_lvl0_data_w2, wsr_trp_lvl1_data_w2;
+wire [2:0] wsr_trp_lvl2_data_w2, wsr_trp_lvl3_data_w2;
+wire	   stick_rw_d, stickcmp_rw_d, stickcmp_rw_e; 
+wire	   stickcmp_rw_m, stickcmp_rw_g; 
+// wire [3:0] stickcmp_int;       // interrupt caused by stick_ticktmp 
+// wire [3:0] stick_intclr;  // use to clear the stick_int bit
+
+wire tba_rw_d, pstate_rw_d, pil_rw_d, tl_rw_d; 
+wire tsa_wr_tid_sel_g, tsa_wr_tid_sel_tim_g, tsa_wr_tid_sel_w2;
+wire immu_miss_g;
+wire trap_taken_g, trap_taken_w2;
+wire [1:0] trap_tid_g;
+// wire [1:0] tsa_wr_tid_g;
+wire [1:0] pend_trap_tid_g, pend_trap_tid_w2;
+wire [`TSA_TTYPE_WIDTH-1:0] final_ttype_w2; 
+wire [`TSA_TTYPE_WIDTH-1:0] tba_ttype_w1;
+wire [`TSA_TTYPE_WIDTH-1:0] final_offset_w1; 
+wire tsa_rd_vld;
+// modified for bug 3017
+// logic moved to tlu_misctl
+// wire [48:0] normal_trap_pc_w1, normal_trap_npc_w1; 
+// wire [48:0] trap_pc_w1, trap_npc_w1; 
+// wire [48:0] trap_pc_w2, trap_npc_w2; 
+// wire tsa_rd_vld_e, tsa_rd_vld_m;
+wire [`TLU_THRD_NUM-1:0] sscan_tid_sel; 
+// logic moved to tlu_misctl
+/*
+wire [`TLU_THRD_NUM-1:0] sscan_ttype_en;
+wire [`TLU_THRD_NUM-1:0] sscan_tt_rd_sel;
+wire [`TLU_THRD_NUM-1:0] sscan_tt_wr_sel;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt0_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt1_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt2_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt3_data;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt0_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt1_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt2_din;
+wire [`TSA_TTYPE_WIDTH-1:0] sscan_tt3_din;
+wire [`TSA_TTYPE_WIDTH-1:0] tsa_rdata_ttype_m;
+*/
+wire [`TCL_SSCAN_WIDTH-1:0] tcl_sscan_test_data;
+wire tba_ttype_sel_w2;
+wire [3:0] final_ttype_sel_g, final_ttype_sel_w2;
+// modified due to one-hot mux bug
+wire [1:0] final_offset_en_g, final_offset_en_w1; 
+wire [2:0] final_offset_sel_w1; 
+wire restore_pc_sel_g, restore_pc_sel_w1;
+// removed for timing
+// wire [`TSA_TTYPE_WIDTH-1:0] sync_ttype_g;
+// added to support lsu dferred traps
+wire		priority_trap_sel0, priority_trap_sel1, priority_trap_sel2;
+wire		sync_trap_taken_g, sync_trap_taken_w2;
+// added for timing fix
+wire		sync_trap_taken_m ;
+wire		ifu_ttype_early_vld_m ;
+// wire  [3:0]   tickcmp_int;       // interrupt caused by tick_ticktmp 
+wire	   fp_trap_thrd0,fp_trap_thrd1,fp_trap_thrd2,fp_trap_thrd3;
+wire [`TSA_TTYPE_WIDTH-1:0] ffu_async_ttype;
+wire	   spill_thrd0,spill_thrd1,spill_thrd2,spill_thrd3;
+wire [`TLU_THRD_NUM-1:0] trap_cwp_enb; 
+wire [`TLU_THRD_NUM-1:0] lsu_async_vld_en_g, lsu_async_vld_en_w2; 
+wire	   dmmu_async_thrd0, dmmu_async_thrd1; 
+wire       dmmu_async_thrd2, dmmu_async_thrd3;
+wire [`TSA_TTYPE_WIDTH-1:0] dmmu_async_ttype;
+wire	   pend_to_thrd0_en, pend_to_thrd1_en;
+wire	   pend_to_thrd2_en, pend_to_thrd3_en;
+wire	   pend_to_thrd0_reset, pend_to_thrd1_reset;
+wire	   pend_to_thrd2_reset, pend_to_thrd3_reset;
+wire tlu_pich_cnt_hld;
+wire [`TLU_THRD_NUM-1:0] pich_cnt_hld_rst_g;
+wire [`TLU_THRD_NUM-1:0] pich_cnt_hld_rst_w2;
+wire [`TLU_THRD_NUM-1:0] pend_pich_cnt_hld;
+wire [`TLU_THRD_NUM-1:0] pend_pich_cnt_hld_q;
+wire [`TLU_THRD_NUM-1:0] pend_pich_cnt_hld_noqual;
+wire [`TLU_THRD_NUM-1:0] pend_pich_cnt_hld_early;
+wire [`TLU_THRD_NUM-1:0] pend_pich_cnt_adj;
+wire [`TLU_THRD_NUM-1:0] cwp_en_thrd_reset; 
+// wire	   pend_to_thrd0_taken, pend_to_thrd1_taken;
+// wire	   pend_to_thrd2_taken, pend_to_thrd3_taken;
+wire [`TSA_TTYPE_WIDTH-1:0] pend_ttype0,pend_ttype1,pend_ttype2,pend_ttype3;
+wire	   pending_trap0,pending_trap1,pending_trap2,pending_trap3;
+wire [`TSA_TTYPE_WIDTH-1:0] pending_ttype0,pending_ttype1,pending_ttype2,pending_ttype3;
+wire [`TSA_TTYPE_WIDTH-1:0] pending_ttype, pending_ttype_w2;
+//
+// Added for bug 1575
+wire agp_tid_sel;	
+// modified due to timing
+// wire update_pstate0_g,update_pstate1_g;
+// wire update_pstate2_g,update_pstate3_g;
+// wire [`TLU_THRD_NUM-1:0] update_pstate_g;,
+wire [`TLU_THRD_NUM-1:0] update_pstate_w2;
+wire thrd0_traps_w2, thrd1_traps_w2;
+wire thrd2_traps_w2, thrd3_traps_w2;
+wire ifu_ttype_vld_tmp_g;
+//
+// added for timing, move qualification from ifu to tlu
+wire	   ifu_ttype_vld_m;
+wire	   cwp_cmplt0,cwp_cmplt1,cwp_cmplt2,cwp_cmplt3;
+wire	   cwp_cmplt_w2, cwp_cmplt_g;
+wire	   cwp_cmplt_rtry_w2, cwp_cmplt_rtry_g;
+wire	   cwp_fastcmplt_w2;
+wire	   cwp_cmplt0_pending, cwp_cmplt1_pending;
+wire	   cwp_cmplt2_pending, cwp_cmplt3_pending;
+wire	   cwp_retry0,cwp_retry1,cwp_retry2,cwp_retry3;
+wire	   pending_thrd0_event_taken, pending_thrd1_event_taken;
+wire	   pending_thrd2_event_taken, pending_thrd3_event_taken;
+// wire	   pending_thrd0_event_taken_w2, pending_thrd1_event_taken_w2;
+// wire	   pending_thrd2_event_taken_w2, pending_thrd3_event_taken_w2;
+wire	   cwp_fastcmplt_m, cwp_fastcmplt_uq_g, cwp_fastcmplt_g;
+wire	   pending_dntry0_taken, pending_dntry1_taken; 
+wire	   pending_dntry2_taken, pending_dntry3_taken;
+wire	   rstint_g,hwint_g,swint_g;
+wire [2:0] early_ttype_sel;
+// wire [2:0] rst_ttype_sel;
+wire [1:0] rst_ttype_sel;
+wire rst_hwint_sel_w2;
+// modified for timing
+// wire [3:0] rst_hwdr_ttype_sel;
+wire rst_hwdr_ttype_sel_w2;
+wire	   onehot_pending_ttype_sel;
+wire	   early_priv_traps_g, exu_hyper_traps_g; 
+wire	   exu_pib_priv_act_trap_m; 
+wire [`TLU_THRD_NUM-1:0] pib_wrap_m; 
+wire [`TLU_THRD_NUM-1:0] pib_pich_wrap_m; 
+wire pib_wrap_trap_nq_g, pib_wrap_trap_g, pib_wrap_trap_m;
+wire [`TLU_THRD_NUM-1:0] pib_trap_en;
+wire [`TLU_THRD_NUM-1:0] picl_wrap_pend;
+//
+// added for timing; moved qualification from IFU to TLU
+wire	   ifu_rstint_m,ifu_hwint_m,ifu_swint_m; // swint_nq_g;
+wire	   sftint_penc_update; 
+wire	   sftint_user_update_g, sftint_user_update_w2; 
+wire	   penc_sel_user_update; 
+wire [5:0] rstid_g;
+wire	   trp_lvl0_incr_w2, trp_lvl1_incr_w2;
+wire	   trp_lvl2_incr_w2, trp_lvl3_incr_w2;
+wire	rstint_taken,hwint_taken,swint_taken;
+// wire	swint_thrd0_taken, swint_thrd1_taken;
+// wire	swint_thrd2_taken, swint_thrd3_taken;
+wire    sirint_taken;
+// wire [`TLU_THRD_NUM-2:0] swint_thrd_g;
+wire [`TLU_THRD_NUM-2:0] sftint_penc_thrd;
+wire	por_rstint_g, xir_rstint_g; 
+wire	por_rstint0_g, por_rstint1_g;
+wire	por_rstint2_g, por_rstint3_g;
+wire    por_rstint_w2; 
+wire	por_rstint0_w2, por_rstint1_w2;
+wire	por_rstint2_w2, por_rstint3_w2;
+wire	trp_lvl0_at_maxtl,trp_lvl1_at_maxtl;
+wire	trp_lvl2_at_maxtl,trp_lvl3_at_maxtl;
+wire	internal_wdr;
+wire [`TLU_THRD_NUM-1:0] internal_wdr_trap;
+// added for hypervispor support
+wire [`TLU_THRD_NUM-1:0] pil_cmp_en;
+wire [`TLU_THRD_NUM-1:0] sftint_only_vld;
+wire [`TLU_THRD_NUM-1:0] tlu_int_sftint_pend;
+wire [`TLU_THRD_NUM-1:0] sftint_pend_wait;
+wire [`TLU_THRD_NUM-1:0] sftint_wait_rst;
+//
+wire [3:0] true_pil0, true_pil1;
+wire [3:0] true_pil2, true_pil3;
+wire pil0_en,pil1_en,pil2_en,pil3_en;
+wire set_sftint_d, clr_sftint_d, sftint_rg_rw_d;
+// modified for timing and bug 5117 
+wire [6:0] final_swint_id_w2;
+// wire [6:0] final_swint_id;
+// wire [6:0] final_swint0_id, final_swint1_id;
+// wire [6:0] final_swint2_id, final_swint3_id;
+// modified for bug 3705
+// wire [6:0] tlz_swint_ttype;
+// wire [6:0] hwint_swint_ttype;
+wire [6:0] wrap_tlz_ttype;
+wire [3:0] sftint0_id,sftint1_id,sftint2_id,sftint3_id;
+wire [3:0] sftint_id_w2;
+// wire [6:0] sftint_ttype; 
+wire done_inst_g_tmp, retry_inst_g_tmp;
+wire immu_va_oor_brnchetc_m;
+wire pstate_am;// pstate_priv pstate_priv_g;
+wire memref_e, memref_m;
+wire [2:0] isfsr_ftype_sel;
+wire [6:0] isfsr_ftype_m,isfsr_ftype_g;
+wire	   isfsr_flt_vld_m,isfsr_flt_vld_g;
+wire	   isfsr_trp_wr_m,isfsr_trp_wr_g;
+wire	   itag_acc_sel_g;
+// wire	   flsh_inst_m, flsh_inst_g;
+// wire	   pstate_cle;
+// wire [2:0] dsfsr_asi_sel_m, dsfsr_asi_sel_g;
+// wire [1:0] dsfsr_asi_sel_m, // dsfsr_asi_sel_g;
+wire dmmu_va_oor_m, dmmu_va_oor_g;
+// wire ldst_xslate_g;
+// wire [2:0]	dsfsr_ctxt_sel;
+// wire dsfsr_wr_op_g;
+// wire dsfsr_flt_vld_m;
+//
+// logic moved to lsu_expctl due to timing
+/*
+wire dsfsr_ftype_zero;
+wire [1:0]	dsfsr_ctxt_g, 
+wire [7:0]	dsfsr_asi_g;
+// wire [6:0]	dsfsr_ftype_g, dsfsr_pe_ftype_g;
+wire dsfsr_side_effect_g;
+wire dsfsr_trp_wr_g;
+*/
+wire [1:0] isfsr_ctxt_g;
+wire [`TLU_THRD_NUM-1:0] tick_en;
+wire local_sync_trap_m, local_sync_trap_g;
+wire dside_sync_trap_g, early_dside_trap_g;
+wire true_hscpd_dacc_excpt_m;
+wire true_qtail_dacc_excpt_m;
+// wire lsu_higher_priority;
+// wire dside_higher_priority;
+wire [`TSA_TTYPE_WIDTH-1:0] local_sync_ttype_g;
+wire local_higher_ttype_flg;
+// wire [`TSA_TTYPE_WIDTH-1:0]	dside_sync_ttype_pre_g;
+// wire [`TSA_TTYPE_WIDTH-1:0]	dside_sync_ttype_g;
+wire [`TSA_TTYPE_WIDTH-1:0]	early_sync_ttype_g, early_sync_ttype_w2;
+wire [`TSA_TTYPE_WIDTH-1:0]	adj_lsu_ttype_w2;
+wire [`TSA_TTYPE_WIDTH-1:0]	lsu_tlu_ttype_w2;
+// wire [`TSA_TTYPE_WIDTH-3:0]	lsu_tlu_async_ttype_w2;
+// wire [`TSA_TTYPE_WIDTH-3:0]	rst_ttype_g; 
+wire [`TSA_TTYPE_WIDTH-3:0]	rst_hwint_ttype_g, rst_hwint_ttype_w2; 
+wire [`TSA_TTYPE_WIDTH-3:0]	rst_ttype_w2, rst_hwdr_ttype_w2;
+wire [`TSA_TTYPE_WIDTH-1:0]	early_ttype_g;
+wire trp_lvl0_at_maxtlless1,trp_lvl1_at_maxtlless1;
+wire trp_lvl2_at_maxtlless1,trp_lvl3_at_maxtlless1;
+wire trp_lvl_at_maxtlless1;
+wire [`TLU_THRD_NUM-1:0] tpl_maxless1;
+wire redmode_insertion, redmode_insertion_w2;
+wire [`TLU_THRD_NUM-1:0] tlu_lsu_redmode_rst;
+wire trap_to_redmode;
+wire pending_thrd_event_taken;
+// added or modified for timing
+wire [`TLU_THRD_NUM-2:0]  thrd_rsel_g;
+wire [`TLU_THRD_NUM-2:0]  thrd_rsel_w2;
+wire va_oor_inst_acc_excp_g; // qualified va_oor_jl_ret trap 
+wire va_oor_data_acc_excp_g, va_oor_data_acc_excp_w2; // qualified exu_tlu_va_oor_m trap 
+wire sir_inst_g;
+wire [`TLU_THRD_NUM-1:0]	pending_trap_sel;
+//
+// modified to support lsu_deferred traps; modified for timing
+wire reset_sel_g, reset_sel_w2;
+wire [2:0] reset_id_g;
+wire tick_npt0,tick_npt1,tick_npt2,tick_npt3;
+wire tick_ctl_din;
+// modified due to early_flush_pipe timing fix
+// wire tlu_tick_npt_priv_act;
+wire	tick_npt_priv_act_g;
+wire	tick_npt_priv_act_m;
+wire	exu_tick_npt_priv_act_m;
+//
+// moved the tick_indis and stick_intdis logic to tlu_tdp
+// wire tick_intdis0,tick_intdis1,tick_intdis2,tick_intdis3;
+// wire stick_intdis0,stick_intdis1,stick_intdis2,stick_intdis3;
+// wire [`TLU_THRD_NUM-1:0]	tick_intrpt;
+// wire [`TLU_THRD_NUM-1:0]	tick_intclr;  // use to clear the tick_int bit
+// wire wsr_tick_intclr_g;  // clear the tick_int through asr write
+// wire wsr_tick_intset_g;  // set the tick_int through asr write 
+// add and/or modified for hypervisor support
+// wire [1:0] cwp_cmplt_tid_w2, cwp_cmplt_tid_g;
+// wire	wsr_illeg_globals_g;  // mutual exclusiveness of the pstate globals 
+// wire wsr_stick_intclr_g;  // clear the stick_int through asr write
+// wire wsr_stick_intset_g;  // set the stick_int through asr write 
+// wire [`TLU_THRD_NUM-1:0] stick_intrpt;
+// wire [`TLU_THRD_NUM-1:0] stick_int_en, stick_int_din;
+// wire [`TLU_THRD_NUM-1:0] tick_int_en, tick_int_din;
+// 
+// wire [1:0] cwp_cmplt_tid_g;
+wire [1:0] true_trap_tid_g;
+wire [1:0] early_trap_tid_g;
+wire [1:0] true_trap_tid_w2;
+wire trp_lvl_zero;
+wire misalign_addr_jmpl_rtn_g,misalign_addr_ldst_atm_g;
+wire tt_init_en;
+wire [`TLU_THRD_NUM-1:0] tt_init_rst;
+wire [`TLU_THRD_NUM-1:0] tt_unwritten;
+wire ttype_written;
+wire ttype_unwritten_sel;
+wire reset_d1;
+wire thread_tl_zero;
+// wire iside_trap;
+wire [7:0] isfsr_asi_g;
+wire thread_tl_zero_m,thread_tl_zero_g;
+wire tlu_trap_to_hyper_g, tlu_trap_to_hyper_w2;
+// wire hyper_wdr_trap;
+wire hyper_wdr_early_trap_g, hyper_wdr_early_trap_w2, hyper_wdr_trap_w2;
+wire tlu_priv_traps_w2;
+wire [2:0] tlu_early_priv_element_g;
+wire [2:0] tlu_early_priv_element_w2; 
+wire [`TLU_THRD_NUM-1:0] trp_lvl_gte_maxstl;
+wire [`TLU_THRD_NUM-1:0] trp_lvl_at_maxstl;
+
+// This section was modified to abide to the Niagara synthesis methodology
+//
+//reg	tpc_rw_e, tpc_rw_m, tpc_rw_g;
+//reg	tnpc_rw_e, tnpc_rw_m, tnpc_rw_g;
+//reg	tstate_rw_e, tstate_rw_m, tstate_rw_g, tstate_rw_w2;
+//reg	ttype_rw_e, ttype_rw_m, ttype_rw_g, ttype_rw_w2;
+//reg	tick_rw_e, tick_rw_m, tick_rw_g;
+//reg	tick_npriv_r_e, tick_npriv_r_m, tick_npriv_r_g;
+//reg	tickcmp_rw_e, tickcmp_rw_m, tickcmp_rw_g;
+//reg	tba_rw_e, tba_rw_m, tba_rw_g;
+//reg	pstate_rw_e, pstate_rw_m, pstate_rw_g;
+//reg	pil_rw_e, pil_rw_m, pil_rw_g;
+//reg	tl_rw_e, tl_rw_m, tl_rw_g;
+//reg	wsr_inst_e, wsr_inst_m, wsr_inst_g_unflushed; 
+//reg	set_sftint_e, clr_sftint_e, sftint_rg_rw_e;
+//reg	set_sftint_m, clr_sftint_m, sftint_rg_rw_m;
+//reg	set_sftint_g, clr_sftint_g, sftint_rg_rw_g;
+//
+wire	tpc_rw_e, tpc_rw_m, tpc_rw_g, tpc_rw_w2;
+wire	tnpc_rw_e, tnpc_rw_m, tnpc_rw_g, tnpc_rw_w2;
+wire	tstate_rw_e, tstate_rw_m, tstate_rw_g, tstate_rw_w2;
+wire	ttype_rw_e, ttype_rw_m, ttype_rw_g, ttype_rw_w2;
+wire	htstate_rw_w2;  
+wire	tick_rw_e, tick_rw_m, tick_rw_g;
+wire	tick_npriv_r_e, tick_npriv_r_m, tick_npriv_r_g;
+wire	tickcmp_rw_e, tickcmp_rw_m, tickcmp_rw_g;
+wire	tba_rw_e, tba_rw_m, tba_rw_g;
+wire	pstate_rw_e, pstate_rw_m, pstate_rw_g, pstate_rw_w2;
+wire	pil_rw_e, pil_rw_m, pil_rw_g;
+wire	tl_rw_e, tl_rw_m, tl_rw_g, tl_rw_w2;
+wire	htickcmp_rw_m, htickcmp_rw_g;
+wire	wsr_inst_e, wsr_inst_m, wsr_inst_g_unflushed; 
+wire	set_sftint_e, clr_sftint_e, sftint_rg_rw_e;
+wire	set_sftint_m, clr_sftint_m, sftint_rg_rw_m;
+wire	set_sftint_g, clr_sftint_g, sftint_rg_rw_g;
+//
+wire wsr_inst_g, wsr_inst_w2; 
+wire inst_ifu_flush_w;
+wire inst_ifu_flush2_w;
+wire clk;
+
+//=========================================================================================
+//=========================================================================================
+//=========================================================================================
+
+wire [3:0] tlu_pstate_priv_buf;
+
+assign tlu_pstate_priv_buf[3:0] = tlu_pstate_priv[3:0];
+
+//=========================================================================================
+//=========================================================================================
+//=========================================================================================
+//	reset
+//=========================================================================================
+
+dffrl_async dffrl_local_rst_l(
+    .din  (grst_l),
+    .clk  (clk),
+    .rst_l(arst_l),
+    .q    (local_rst_l),
+    .se   (se),
+    .si   (),
+    .so   ()
+); 
+
+assign tlu_rst = ~tlu_rst_l;
+assign local_rst = ~tlu_rst_l;
+assign tlu_rst_l = local_rst_l;
+
+//=========================================================================================
+//	Rename
+//=========================================================================================
+
+// assign	tlu_lsu_redmode[3:0] = tlu_int_redmode[3:0];
+assign clk = rclk;
+
+//=========================================================================================
+//	Misc. TDP Control
+//=========================================================================================
+//
+// modified for bug 5436: Niagara 2.0
+/*
+assign pcr_ut_e = 
+           (tlu_thrd_rsel_e[0]) ? tlu_pcr_ut[0]: 
+           (tlu_thrd_rsel_e[1]) ? tlu_pcr_ut[1]: 
+           (tlu_thrd_rsel_e[2]) ? tlu_pcr_ut[2]:
+            tlu_pcr_ut[3]; 
+
+assign pcr_st_e = 
+           (tlu_thrd_rsel_e[0]) ? tlu_pcr_st[0]:
+           (tlu_thrd_rsel_e[1]) ? tlu_pcr_st[1]:
+           (tlu_thrd_rsel_e[2]) ? tlu_pcr_st[2]:
+            tlu_pcr_st[3];
+*/
+
+assign	tlu_thread_inst_vld_g[0] = 
+            inst_vld_g & thread0_rsel_g & ~pend_pich_cnt_hld[0];
+assign	tlu_thread_inst_vld_g[1] = 
+            inst_vld_g & thread1_rsel_g & ~pend_pich_cnt_hld[1];
+assign	tlu_thread_inst_vld_g[2] = 
+            inst_vld_g & thread2_rsel_g & ~pend_pich_cnt_hld[2];
+assign	tlu_thread_inst_vld_g[3] = 
+            inst_vld_g & thread3_rsel_g & ~pend_pich_cnt_hld[3];
+
+assign	thread_inst_vld_w2[0] = inst_vld_w2 & thread0_wsel_w2;
+assign	thread_inst_vld_w2[1] = inst_vld_w2 & thread1_wsel_w2;
+assign	thread_inst_vld_w2[2] = inst_vld_w2 & thread2_wsel_w2;
+assign	thread_inst_vld_w2[3] = inst_vld_w2 & thread3_wsel_w2;
+
+assign	thread_inst_vld_g[0] = inst_vld_g & thread0_rsel_g;
+assign	thread_inst_vld_g[1] = inst_vld_g & thread1_rsel_g;
+assign	thread_inst_vld_g[2] = inst_vld_g & thread2_rsel_g;
+assign	thread_inst_vld_g[3] = inst_vld_g & thread3_rsel_g;
+
+// added for timing
+//
+assign	tlu_trp_lvl[2:0] =
+		thread0_rsel_e ? trp_lvl0[2:0] :
+			thread1_rsel_e ? trp_lvl1[2:0] :
+				thread2_rsel_e ? trp_lvl2[2:0] :
+					thread3_rsel_e ? trp_lvl3[2:0] : 3'bxxx;
+
+assign	tlu_pil[3:0] =
+		thread0_rsel_e ? true_pil0[3:0] :
+			thread1_rsel_e ? true_pil1[3:0] :
+				thread2_rsel_e ? true_pil2[3:0] :
+					thread3_rsel_e ? true_pil3[3:0] : 4'bxxx;
+
+assign tlu_tba_en_l[0] = ~(tba_rw_g & wsr_inst_g & thread0_wsel_g);
+assign tlu_tba_en_l[1] = ~(tba_rw_g & wsr_inst_g & thread1_wsel_g);
+assign tlu_tba_en_l[2] = ~(tba_rw_g & wsr_inst_g & thread2_wsel_g);
+assign tlu_tba_en_l[3] = ~(tba_rw_g & wsr_inst_g & thread3_wsel_g);
+
+
+assign	tlu_tick_en_l	=	~(tick_rw_g & wsr_inst_g);
+// the logic equations can be made common (grape)
+// reset may not have to be factored in !!!
+assign	tick_en[0] = (tick_rw_g & wsr_inst_g & thread0_wsel_g) | local_rst | por_rstint0_g;
+assign	tick_en[1] = (tick_rw_g & wsr_inst_g & thread1_wsel_g) | local_rst | por_rstint1_g;
+assign	tick_en[2] = (tick_rw_g & wsr_inst_g & thread2_wsel_g) | local_rst | por_rstint2_g;
+assign	tick_en[3] = (tick_rw_g & wsr_inst_g & thread3_wsel_g) | local_rst | por_rstint3_g;
+
+// modified for bug 4763
+assign	tlu_tickcmp_en_l[0] =	~((tickcmp_rw_g & wsr_inst_g & thread0_wsel_g)); 
+assign	tlu_tickcmp_en_l[1] =	~((tickcmp_rw_g & wsr_inst_g & thread1_wsel_g));
+assign	tlu_tickcmp_en_l[2] =	~((tickcmp_rw_g & wsr_inst_g & thread2_wsel_g));
+assign	tlu_tickcmp_en_l[3] =	~((tickcmp_rw_g & wsr_inst_g & thread3_wsel_g));
+//
+// modified for bug 4763
+assign	tlu_stickcmp_en_l[0] =	~((stickcmp_rw_g & wsr_inst_g & thread0_wsel_g));
+assign	tlu_stickcmp_en_l[1] =	~((stickcmp_rw_g & wsr_inst_g & thread1_wsel_g));
+assign	tlu_stickcmp_en_l[2] =	~((stickcmp_rw_g & wsr_inst_g & thread2_wsel_g));
+assign	tlu_stickcmp_en_l[3] =	~((stickcmp_rw_g & wsr_inst_g & thread3_wsel_g));
+//
+// modified for bug 4763
+assign	tlu_htickcmp_en_l[0] =	~((htickcmp_rw_g & wsr_inst_g & thread0_wsel_g));
+assign	tlu_htickcmp_en_l[1] =	~((htickcmp_rw_g & wsr_inst_g & thread1_wsel_g));
+assign	tlu_htickcmp_en_l[2] =	~((htickcmp_rw_g & wsr_inst_g & thread2_wsel_g));
+assign	tlu_htickcmp_en_l[3] =	~((htickcmp_rw_g & wsr_inst_g & thread3_wsel_g));
+
+// modified for bug 1266 and 1264
+dff_s dff_stgg_va_oor_jl_ret_g (
+    .din (exu_tlu_va_oor_jl_ret_m), 
+	.q   (va_oor_jl_ret_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// This may have to be changed as all lsu traps may not use mmu globals 
+// ffu traps may have to be factored in once round-robin selection in place.
+// factor in ldst related mem-address exceptions. 
+//
+// modified for bug 1264 and 1266
+// prioritize the exu_tlu_va_oor_jl_ret_m trap; if no higher traps are happening initiate the trap
+//
+assign va_oor_inst_acc_excp_g = 
+      va_oor_jl_ret_g & inst_vld_g & 
+     ~(exu_ttype_vld_g | ifu_ttype_vld_g | lsu_tlu_priv_action_g | local_sync_trap_g);
+//
+// added for bug 1316 
+// prioritize the exu_tlu_va_oor_jl_ret_m trap; if no higher traps are happening initiate the trap
+// modified for bug 3464 and bug 4873 
+assign va_oor_data_acc_excp_g = 
+       (dmmu_va_oor_g & inst_vld_g) & ~(exu_ttype_vld_g | ifu_ttype_vld_g | 
+        lsu_tlu_priv_action_g | misalign_addr_ldst_atm_g | lsu_tlu_wtchpt_trp_g); 
+//
+// added for timing 
+dffr_s dffr_va_oor_data_acc_excp_w2 (
+    .din (va_oor_data_acc_excp_g),
+    .q   (va_oor_data_acc_excp_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// exu should qualify with priv bit. Assume ttype vld is asserted.
+dff_s #(1) dff_stgg_sir_g (
+    .din (ifu_tlu_sir_inst_m), 
+	.q   (sir_inst_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	pstate_rmode[3:0] = tlu_int_redmode[3:0];
+
+wire	intrpt_taken;
+// recoded for bug 2644
+// assign	intrpt_taken = rstint_taken | hwint_taken | swint_taken;  
+assign	intrpt_taken = 
+            rstint_taken | hwint_taken | sirint_taken;  
+//
+// modified for bug 4906
+assign trp_lvl_at_maxtlless1 = 
+           tpl_maxless1[0] | tpl_maxless1[1] | tpl_maxless1[2] | tpl_maxless1[3];
+assign tpl_maxless1[0] =
+           (trp_lvl0_at_maxtlless1 | pstate_rmode[0]) & thrd0_traps;
+assign tpl_maxless1[1] =
+           (trp_lvl1_at_maxtlless1 | pstate_rmode[1]) & thrd1_traps;
+assign tpl_maxless1[2] =
+           (trp_lvl2_at_maxtlless1 | pstate_rmode[2]) & thrd2_traps;
+assign tpl_maxless1[3] =
+           (trp_lvl3_at_maxtlless1 | pstate_rmode[3]) & thrd3_traps;
+
+// thread enters redstate
+// modified for bug 3919
+// assign	trap_to_redmode = trp_lvl_at_maxtlless1 & ~intrpt_taken; 
+assign	trap_to_redmode = trp_lvl_at_maxtlless1 & ~(rstint_taken | sirint_taken); 
+
+assign	tlu_lsu_redmode_rst[0]	= 
+	        ((rstint_taken |  sirint_taken) & thread0_rsel_g) | 
+              tpl_maxless1[0] | internal_wdr_trap[0] | local_rst ;
+assign	tlu_lsu_redmode_rst[1]	= 
+	        ((rstint_taken |  sirint_taken) & thread1_rsel_g) | 
+              tpl_maxless1[1] | internal_wdr_trap[1] | local_rst ;
+assign	tlu_lsu_redmode_rst[2]	= 
+	        ((rstint_taken |  sirint_taken) & thread2_rsel_g) | 
+              tpl_maxless1[2] | internal_wdr_trap[2] | local_rst ;
+assign	tlu_lsu_redmode_rst[3]	= 
+	        ((rstint_taken |  sirint_taken) & thread3_rsel_g) | 
+              tpl_maxless1[3] | internal_wdr_trap[3] | local_rst ;
+
+dff_s #(`TLU_THRD_NUM) dff_tlu_lsu_redmode_rst_d1 (
+    .din (tlu_lsu_redmode_rst[`TLU_THRD_NUM-1:0]),
+	.q   (tlu_lsu_redmode_rst_d1[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign	redmode_insertion =
+	local_rst | rstint_taken | trap_to_redmode | internal_wdr | sirint_taken;
+//	sir_inst_g;					// sigm inst in priv mode
+//
+// added for timing
+dff_s dff_redmode_insertion_w2 (
+    .din (redmode_insertion),
+	.q   (redmode_insertion_w2),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign	tlu_select_redmode =  redmode_insertion_w2;
+
+// added for bug 2808
+assign ibrkpt_trap_m = 
+           (ifu_tlu_ttype_m[8:0]== 9'h076) & ifu_tlu_ttype_vld_m;  
+
+dffr_s dffr_ibrkpt_trap_g (
+   .din (ibrkpt_trap_m),
+   .q   (ibrkpt_trap_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+dffr_s dffr_ibrkpt_trap_w2 (
+   .din (ibrkpt_trap_g),
+   .q   (ibrkpt_trap_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+// assign tlu_ibrkpt_trap_g = ibrkpt_trap_g;
+assign tlu_ibrkpt_trap_w2 = ibrkpt_trap_w2;
+
+// modified for bug 1575
+// assign	tlu_pstate_din_sel[2]	= ~(tlu_pstate_din_sel[0] | tlu_pstate_din_sel[1]);
+
+// the selection pstate by thread
+// modified for the hypervisory support
+
+assign	tlu_pstate_din_sel0[0]	= dnrtry_inst_w2[0] & ~rst_tri_en; 
+assign	tlu_pstate_din_sel0[1]	= (pstate_rw_w2 & wsr_inst_w2) & ~rst_tri_en & 
+                                 ~tlu_pstate_din_sel0[0] & thread0_wsel_w2;
+
+assign	tlu_pstate_din_sel1[0]	= dnrtry_inst_w2[1] & ~rst_tri_en;
+assign	tlu_pstate_din_sel1[1]	= (pstate_rw_w2 & wsr_inst_w2) & ~rst_tri_en & 
+                                 ~tlu_pstate_din_sel1[0] & thread1_wsel_w2;
+
+assign	tlu_pstate_din_sel2[0]	= dnrtry_inst_w2[2] & ~rst_tri_en; 
+assign	tlu_pstate_din_sel2[1]	= (pstate_rw_w2 & wsr_inst_w2) & ~rst_tri_en & 
+                                 ~tlu_pstate_din_sel2[0] & thread2_wsel_w2;
+
+assign	tlu_pstate_din_sel3[0]	= dnrtry_inst_w2[3] & ~rst_tri_en;
+assign	tlu_pstate_din_sel3[1]	= (pstate_rw_w2 & wsr_inst_w2) & ~rst_tri_en & 
+                                 ~tlu_pstate_din_sel3[0] & thread3_wsel_w2;
+
+assign	restore_pc_sel_g = (dnrtry_inst_g & cwp_fastcmplt_g) | cwp_cmplt_g; 
+//
+dffr_s dffr_restore_pc_sel_w1 (
+   .din (restore_pc_sel_g),
+   .q   (restore_pc_sel_w1),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign	tlu_restore_pc_sel_w1 = restore_pc_sel_w1; 
+//
+// modified for the hypervisor support and timing
+
+assign	update_pstate_w2[0] = 
+		thrd0_traps_w2 | dnrtry_inst_w2[0] | 
+		((pstate_rw_w2 & wsr_inst_w2) & thread0_wsel_w2);
+assign	update_pstate_w2[1] = 
+		thrd1_traps_w2 | dnrtry_inst_w2[1] | 
+		((pstate_rw_w2 & wsr_inst_w2) & thread1_wsel_w2);
+assign	update_pstate_w2[2] = 
+		thrd2_traps_w2 | dnrtry_inst_w2[2] | 
+		((pstate_rw_w2 & wsr_inst_w2) & thread2_wsel_w2);
+assign	update_pstate_w2[3] = 
+		thrd3_traps_w2 | dnrtry_inst_w2[3] | 
+		((pstate_rw_w2 & wsr_inst_w2) & thread3_wsel_w2);
+
+// recoded for timing
+// modified for bug 4284
+assign	tlu_update_pc_l_w[0] = 	~(inst_vld_g & thread0_rsel_g);
+assign	tlu_update_pc_l_w[1] = 	~(inst_vld_g & thread1_rsel_g);
+assign	tlu_update_pc_l_w[2] = 	~(inst_vld_g & thread2_rsel_g);
+assign	tlu_update_pc_l_w[3] = 	~(inst_vld_g & thread3_rsel_g);
+//
+// modified for timing 
+assign tlu_thrd_wsel_w2[`TLU_THRD_NUM-1:0] = 
+           {thread3_wtrp_w2, thread2_wtrp_w2, thread1_wtrp_w2, thread0_wtrp_w2};
+
+//wire	pending_thrd_event_taken_w2;
+assign	pending_thrd_event_taken = 
+	pending_thrd0_event_taken | pending_thrd1_event_taken |
+	pending_thrd2_event_taken | pending_thrd3_event_taken;
+//
+// modified due to timing
+assign	tlu_tl_gt_0_w2 =
+		thrd_rsel_w2[0] ? tl0_gt_0 :
+			(thrd_rsel_w2[1] ? tl1_gt_0 :
+				(thrd_rsel_w2[2] ? tl2_gt_0 : tl3_gt_0)); 
+
+assign	thrd_rsel_g[0] = 	(thread0_rsel_g & ~pending_thrd_event_taken) | pending_thrd0_event_taken;
+assign	thrd_rsel_g[1] = 	(thread1_rsel_g & ~pending_thrd_event_taken) | pending_thrd1_event_taken;
+assign	thrd_rsel_g[2] = 	(thread2_rsel_g & ~pending_thrd_event_taken) | pending_thrd2_event_taken;
+
+dff_s #(`TLU_THRD_NUM-1) dff_thrd_rsel_w2 (
+    .din (thrd_rsel_g[`TLU_THRD_NUM-2:0]),
+    .q   (thrd_rsel_w2[`TLU_THRD_NUM-2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// modified for the tsa_wdata bug (tlu_tdp)
+//
+// assign tlu_wr_tsa_inst_g = tsa_wr_tid_sel_g;
+//
+// added for timing
+assign tlu_wr_tsa_inst_w2 = 
+           (wsr_inst_w2 & (tstate_rw_w2 | tpc_rw_w2 | tnpc_rw_w2  | 
+            ttype_rw_w2 | htstate_rw_w2)) & ~sync_trap_taken_w2; 
+
+// assign tlu_wsr_inst_g = wsr_inst_g;
+// 
+// modified for timing
+/*
+assign tlu_update_pstate_l_g[0] = ~(update_pstate_g[0] | local_rst);
+assign tlu_update_pstate_l_g[1] = ~(update_pstate_g[1] | local_rst);
+assign tlu_update_pstate_l_g[2] = ~(update_pstate_g[2] | local_rst);
+assign tlu_update_pstate_l_g[3] = ~(update_pstate_g[3] | local_rst);
+*/
+assign tlu_update_pstate_l_w2[0] = ~(update_pstate_w2[0] | local_rst);
+assign tlu_update_pstate_l_w2[1] = ~(update_pstate_w2[1] | local_rst);
+assign tlu_update_pstate_l_w2[2] = ~(update_pstate_w2[2] | local_rst);
+assign tlu_update_pstate_l_w2[3] = ~(update_pstate_w2[3] | local_rst);
+
+//=========================================================================================
+// rdpr mux selects - recoded due to timing
+//=========================================================================================
+// modified for bug 1352 - added the non-privedged term in the read select
+//
+// modified for bug 1859
+// assign	tlu_rdpr_mx1_sel[0] = tpc_rw_e; 
+// assign	tlu_rdpr_mx1_sel[1] = tnpc_rw_e;
+// assign	tlu_rdpr_mx1_sel[2] = tick_rw_e | tick_npriv_r_e;
+// assign	tlu_rdpr_mx1_sel[3] = tickcmp_rw_e;
+// assign	tlu_rdpr_mx2_sel[0] = tstate_rw_e; 
+// assign	tlu_rdpr_mx2_sel[1] = tba_rw_e;
+// assign	tlu_rdpr_mx2_sel[2] = sftint_rg_rw_e;
+// assign	tlu_rdpr_mx3_sel[0] = ttype_rw_e; 
+// assign	tlu_rdpr_mx3_sel[2] = tl_rw_e;
+// assign	tlu_rdpr_mx3_sel[3] = pil_rw_e;
+// assign	tlu_rdpr_mx4_sel[0] = (|tlu_rdpr_mx2_sel[`RDPR_MX2_SEL_WIDTH-1:0]) | tlu_htba_mx2_sel; 
+// assign	tlu_rdpr_mx4_sel[1] = (ttype_rw_e & ttype_written) | pstate_rw_e | tl_rw_e | pil_rw_e;
+// assign	tlu_rdpr_mx4_sel[2] =  tlu_rdpr_mx5_active;  
+// assign	tlu_rdpr_mx6_sel[0] = (|tlu_rdpr_mx1_sel[3:0]); 
+// assign	tlu_rdpr_mx6_sel[1] =  stickcmp_rw_e;  
+// assign	tlu_rdpr_mx6_sel[2] =  tlu_htickcmp_rw_e; 
+// assign	tlu_rdpr_mx7_sel[0] = |(tlu_rdpr_mx4_sel[2:0]);
+// assign	tlu_rdpr_mx7_sel[1] = |(tlu_rdpr_mx6_sel[2:0]); 
+// assign	tlu_rdpr_mx7_sel[2] = ttype_unwritten_sel;
+
+assign	local_rdpr_mx1_sel[0] = tick_rw_e | tick_npriv_r_e;
+assign	local_rdpr_mx1_sel[1] = tickcmp_rw_e;
+assign	local_rdpr_mx1_sel[2] = stickcmp_rw_e;  
+assign	local_rdpr_mx1_sel[3] = tlu_htickcmp_rw_e; 
+//
+assign	tlu_rdpr_mx1_sel[0] = local_rdpr_mx1_sel[1] & ~rst_tri_en; 
+assign	tlu_rdpr_mx1_sel[1] = local_rdpr_mx1_sel[2] & ~rst_tri_en;
+assign	tlu_rdpr_mx1_sel[2] = local_rdpr_mx1_sel[3] & ~rst_tri_en; 
+//
+assign	local_rdpr_mx2_sel[0] = tlu_hyperv_rdpr_sel[0];
+assign	local_rdpr_mx2_sel[1] = tlu_hyperv_rdpr_sel[1];
+assign	local_rdpr_mx2_sel[2] = tl_rw_e;
+assign	local_rdpr_mx2_sel[3] = pil_rw_e;
+//
+assign	tlu_rdpr_mx2_sel[0] = local_rdpr_mx2_sel[1] & ~rst_tri_en;
+assign	tlu_rdpr_mx2_sel[1] = local_rdpr_mx2_sel[2] & ~rst_tri_en;
+assign	tlu_rdpr_mx2_sel[2] = local_rdpr_mx2_sel[3] & ~rst_tri_en; 
+//
+assign	local_rdpr_mx3_sel[0] = sftint_rg_rw_e;
+assign	local_rdpr_mx3_sel[1] = pstate_rw_e;
+assign	local_rdpr_mx3_sel[2] = tlu_hyperv_rdpr_sel[2];
+//
+assign	tlu_rdpr_mx3_sel[0] = local_rdpr_mx3_sel[1] & ~rst_tri_en; 
+assign	tlu_rdpr_mx3_sel[1] = local_rdpr_mx3_sel[2] & ~rst_tri_en; 
+//
+assign	local_rdpr_mx4_sel[0] = tpc_rw_e; 
+assign	local_rdpr_mx4_sel[1] = tnpc_rw_e;
+assign	local_rdpr_mx4_sel[2] = tstate_rw_e; 
+//
+assign	tlu_rdpr_mx4_sel[0] = local_rdpr_mx4_sel[1] & ~rst_tri_en;
+assign	tlu_rdpr_mx4_sel[1] = local_rdpr_mx4_sel[2] & ~rst_tri_en;
+//
+// modified for rte failures
+assign  local_rdpr_mx5_sel[0] = tba_rw_e;
+assign	local_rdpr_mx5_sel[1] = tlu_hyperv_rdpr_sel[4] & ~rst_tri_en;
+assign	local_rdpr_mx5_sel[2] = (|local_rdpr_mx1_sel[3:0]) & ~rst_tri_en;
+assign	local_rdpr_mx5_sel[3] = (pcr_rsr_e | pic_rsr_e) & ~rst_tri_en;
+//
+assign  tlu_rdpr_mx5_sel[0] = local_rdpr_mx5_sel[1];
+assign	tlu_rdpr_mx5_sel[1] = local_rdpr_mx5_sel[2];
+assign	tlu_rdpr_mx5_sel[2] = local_rdpr_mx5_sel[3]; 
+//
+assign	tlu_rdpr_mx6_sel[0] = local_rdpr_mx6_sel[1]; 
+assign	tlu_rdpr_mx6_sel[1] = local_rdpr_mx6_sel[2];
+assign	tlu_rdpr_mx6_sel[2] = local_rdpr_mx6_sel[3];
+// 
+assign	local_rdpr_mx6_sel[0] = ttype_rw_e; 
+assign	local_rdpr_mx6_sel[1] = tlu_hyperv_rdpr_sel[3] & ~rst_tri_en;
+assign	local_rdpr_mx6_sel[2] = (|local_rdpr_mx2_sel[3:0]) & ~rst_tri_en;
+assign	local_rdpr_mx6_sel[3] = (|local_rdpr_mx3_sel[2:0]) & ~rst_tri_en;
+//
+assign	tlu_rdpr_mx7_sel[0] = (|local_rdpr_mx4_sel[2:0]) & ~rst_tri_en; 
+assign	tlu_rdpr_mx7_sel[1] = (|local_rdpr_mx5_sel[3:0]) & ~rst_tri_en; 
+assign	tlu_rdpr_mx7_sel[2] = (|local_rdpr_mx6_sel[3:0]) & ~rst_tri_en; 
+assign	tlu_rdpr_mx7_sel[3] = ~(|tlu_rdpr_mx7_sel[2:0]); 
+
+//=========================================================================================
+
+assign	ttype_written = (thread0_rsel_e & ~tt_unwritten[0]) |
+			(thread1_rsel_e & ~tt_unwritten[1]) |
+			(thread2_rsel_e & ~tt_unwritten[2]) |
+			(thread3_rsel_e & ~tt_unwritten[3]);
+
+assign	ttype_unwritten_sel = ttype_rw_e & ~ttype_written;
+
+//
+// constructing the mux select for rdpr 7 in tdp
+//
+
+//=========================================================================================
+
+dff_s #(2) dff_stgdntry_m (
+    .din ({done_inst_e,retry_inst_e}), 
+	.q   ({done_inst_m_tmp,retry_inst_m_tmp}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),
+    .so  ()
+);
+
+wire	trap_on_dnrtry_m; 
+// priv opcode, illegal inst trap on done/retry.
+assign	trap_on_dnrtry_m =  ifu_ttype_vld_m ;
+
+// qualification done with previous instruction's flush pipe 
+// the inst_vld may have to be sent earlier to avoid the critical path.
+// modified for bug 4074 and 4561
+assign	done_inst_m  = 
+            done_inst_m_tmp  & ~(((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+            tlu_ifu_flush_pipe_w) | trap_on_dnrtry_m);
+/*
+            done_inst_m_tmp  & ~(((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+            tlu_ifu_flush_pipe_w) | ((thrid_w2[1:0] == tlu_exu_tid_m[1:0]) &
+            lsu_defr_trap_g) | trap_on_dnrtry_m);
+*/
+assign	retry_inst_m = 
+            retry_inst_m_tmp & ~(((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+            tlu_ifu_flush_pipe_w) | trap_on_dnrtry_m);
+/*
+            retry_inst_m_tmp & ~(((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+            tlu_ifu_flush_pipe_w) | ((thrid_w2[1:0] == tlu_exu_tid_m[1:0]) &
+            lsu_defr_trap_g) | trap_on_dnrtry_m);
+*/
+/*
+// logic moved to tlu_misctl
+// modified/added for timing violations
+// moved the logic from exu to tlu due to timing violations
+
+mux4ds #(3) mux_trap_old_cwp_m(
+    .in0(exu_tlu_cwp0[2:0]),
+    .in1(exu_tlu_cwp1[2:0]), 
+    .in2(exu_tlu_cwp2[2:0]),
+    .in3(exu_tlu_cwp3[2:0]),
+    .sel0(thread0_rsel_m),
+    .sel1(thread1_rsel_m),
+    .sel2(thread2_rsel_m),
+    .sel3(thread3_rsel_m),
+    .dout(trap_old_cwp_m[2:0])
+);
+
+assign cwp_xor_m[2:0] = trap_old_cwp_m[2:0] ^ tlu_exu_cwp_m[2:0];
+
+assign cwp_no_change_m = ~|(cwp_xor_m[2:0]); 
+*/
+assign cwp_fastcmplt_m = 
+           tlu_exu_cwpccr_update_m & tlu_cwp_no_change_m;
+
+dffr_s dffr_cwp_fastcmplt_uq_g (
+    .din (cwp_fastcmplt_m),
+	.q   (cwp_fastcmplt_uq_g),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// assign	tlu_exu_cwpccr_update_m = done_inst_m | retry_inst_m;
+assign	tlu_exu_cwpccr_update_m = exu_done_inst_m | exu_retry_inst_m;
+
+assign exu_done_inst_m  = 
+           done_inst_m_tmp; // & ~(ifu_tlu_ttype_vld_m & ifu_tlu_inst_vld_m); 
+assign exu_retry_inst_m = 
+           retry_inst_m_tmp;// & ~(ifu_tlu_ttype_vld_m & ifu_tlu_inst_vld_m);
+
+//
+// modified due timing problems 
+// assign	tlu_exu_cwp_retry_m = retry_inst_m; 
+assign	tlu_exu_cwp_retry_m = exu_retry_inst_m; 
+
+// qualify with flush ?
+// modified for timing and bug4658 
+// modified for timing and added the omitted tlz trap qualification
+
+assign	true_pc_sel_m[0] =  
+            retry_inst_m_tmp & ~ifu_tlu_trap_m & ifu_tlu_inst_vld_m &
+            ~(pib_wrap_trap_m | (|tlz_trap_m[`TLU_THRD_NUM-1:0])); 
+assign	true_pc_sel_m[1]  = 
+            done_inst_m_tmp  & ~ifu_tlu_trap_m & ifu_tlu_inst_vld_m &
+            ~(pib_wrap_trap_m | (|tlz_trap_m[`TLU_THRD_NUM-1:0])) ; 
+
+assign	true_pc_sel_m[2]  = ~(|true_pc_sel_m[1:0]);
+
+dffr_s #(3) dff_true_pc_sel_w (
+    .din (true_pc_sel_m[2:0]), 	
+    .q   (true_pc_sel_w[2:0]),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(49) dff_ifu_npc_w (
+    .din (ifu_tlu_npc_m[48:0]), 	
+    .q   (ifu_npc_w[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_true_pc_sel_w[2:0] = true_pc_sel_w[2:0];
+
+dff_s #(2) dff_stgdntry_g (
+    .din ({done_inst_m,retry_inst_m}),
+	.q   ({done_inst_g_tmp,retry_inst_g_tmp}),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	done_inst_g = done_inst_g_tmp & inst_vld_g;
+assign	retry_inst_g = retry_inst_g_tmp & inst_vld_g;
+
+//assign	tlu_retry_inst_g = retry_inst_g;
+//assign	tlu_done_inst_g  = done_inst_g;
+// 
+// threaded dnrtry_inst_g signal
+// modified for timing 
+//
+assign	dnrtry0_inst_g = (done_inst_g | retry_inst_g) & 
+                        ~(inst_ifu_flush2_w | local_early_flush_pipe_w) & 
+                          thread0_rsel_g;
+
+assign	dnrtry1_inst_g = (done_inst_g | retry_inst_g) & 
+                        ~(inst_ifu_flush2_w | local_early_flush_pipe_w) & 
+                          thread1_rsel_g;
+
+assign	dnrtry2_inst_g = (done_inst_g | retry_inst_g) & 
+                        ~(inst_ifu_flush2_w | local_early_flush_pipe_w) & 
+                          thread2_rsel_g;
+
+assign	dnrtry3_inst_g = (done_inst_g | retry_inst_g) & 
+                        ~(inst_ifu_flush2_w | local_early_flush_pipe_w) & 
+                          thread3_rsel_g;
+//
+// added for timing
+dffr_s #(`TLU_THRD_NUM) dffr_dnrtry_inst_w2 (
+    .din ({dnrtry3_inst_g,dnrtry2_inst_g,dnrtry1_inst_g,dnrtry0_inst_g}),
+    .q   (dnrtry_inst_w2[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign  tlu_dnrtry0_inst_g = dnrtry0_inst_g;
+assign  tlu_dnrtry1_inst_g = dnrtry1_inst_g;
+assign  tlu_dnrtry2_inst_g = dnrtry2_inst_g;
+assign  tlu_dnrtry3_inst_g = dnrtry3_inst_g;
+
+// flush needed for done/retry with tl=0
+// modified for timing
+// assign	dnrtry_inst_g = (done_inst_g | retry_inst_g) & ~tlu_flush_pipe_w;
+// 
+assign	dnrtry_inst_g = (done_inst_g | retry_inst_g) & 
+                       ~(inst_ifu_flush_w | local_early_flush_pipe_w);
+dff_s #(2) dff_stgdntry_e (
+    .din ({ifu_tlu_done_inst_d,ifu_tlu_retry_inst_d}), 
+	.q   ({done_inst_e,retry_inst_e}),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	thrid_d[1:0] = ifu_tlu_thrid_d[1:0];
+
+assign	thread0_rsel_d = ~thrid_d[1] & ~thrid_d[0];
+assign	thread1_rsel_d = ~thrid_d[1] &  thrid_d[0];
+assign	thread2_rsel_d =  thrid_d[1] & ~thrid_d[0];
+assign	thread3_rsel_d =  thrid_d[1] &  thrid_d[0];
+
+//
+// modified due to rte failure 
+dff_s #(2) dff_thrid_e (
+    .din (thrid_d[1:0]),
+	.q   (thrid_e[1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(2) dff_thrid_m (
+    .din (thrid_e[1:0]),
+    .q   (thrid_m[1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(2) dff_thrid_g (
+    .din (thrid_m[1:0]),
+    .q   (thrid_g[1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	thread0_rsel_e = ~(|thrid_e[1:0]); 
+assign	thread1_rsel_e = ~thrid_e[1] &  thrid_e[0]; 
+assign	thread2_rsel_e =  thrid_e[1] & ~thrid_e[0]; 
+assign	thread3_rsel_e =  (&thrid_e[1:0]);
+
+assign	tlu_thrd_rsel_e[0] = thread0_rsel_e; 
+assign	tlu_thrd_rsel_e[1] = thread1_rsel_e;
+assign	tlu_thrd_rsel_e[2] = thread2_rsel_e;
+assign	tlu_thrd_rsel_e[3] = thread3_rsel_e;
+//
+// added for timing
+dff_s #(`TLU_THRD_NUM) dff_thread_stg_m (
+    .din ({thread3_rsel_e, thread2_rsel_e, thread1_rsel_e, thread0_rsel_e}),
+    .q   ({thread3_stg_m, thread2_stg_m, thread1_stg_m, thread0_stg_m}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign thread0_stg_m_buf = thread0_stg_m;
+assign thread1_stg_m_buf = thread1_stg_m;
+assign thread2_stg_m_buf = thread2_stg_m;
+assign thread3_stg_m_buf = thread3_stg_m;
+
+assign	thread0_rsel_m = ~(|thrid_m[1:0]); 
+assign	thread1_rsel_m = ~thrid_m[1] &  thrid_m[0]; 
+assign	thread2_rsel_m =  thrid_m[1] & ~thrid_m[0]; 
+assign	thread3_rsel_m =  (&thrid_m[1:0]);
+
+assign	thread0_rsel_dec_g = ~(|thrid_g[1:0]); 
+assign	thread1_rsel_dec_g = ~thrid_g[1] &  thrid_g[0]; 
+assign	thread2_rsel_dec_g =  thrid_g[1] & ~thrid_g[0]; 
+assign	thread3_rsel_dec_g =  (&thrid_g[1:0]);
+
+dff_s #(`TLU_THRD_NUM) dff_thread_rsel_g (
+    .din ({thread3_rsel_m, thread2_rsel_m, thread1_rsel_m, thread0_rsel_m}),
+    .q   ({thread3_rsel_g, thread2_rsel_g, thread1_rsel_g, thread0_rsel_g}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s #(`TLU_THRD_NUM) dff_thread_wsel_g (
+    .din ({thread3_rsel_m, thread2_rsel_m, thread1_rsel_m, thread0_rsel_m}),
+    .q   ({thread3_wsel_g, thread2_wsel_g, thread1_wsel_g, thread0_wsel_g}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// timing -fix: load redistribution
+/*
+assign	thread0_rsel_g = thread0_rsel_dec_g; 
+assign	thread1_rsel_g = thread1_rsel_dec_g; 
+assign	thread2_rsel_g = thread2_rsel_dec_g; 
+assign	thread3_rsel_g = thread3_rsel_dec_g; 
+*/
+// 
+
+dff_s #(2) dff_stgdntry_w2 (
+    .din ({done_inst_g,retry_inst_g}),
+	.q   ({done_inst_w2,retry_inst_w2}),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// modified for bug 4561
+assign inst_vld_m = 
+           ifu_tlu_inst_vld_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+           (tlu_flush_pipe_w | inst_ifu_flush_w)); 
+/*
+assign inst_vld_m = 
+           ifu_tlu_inst_vld_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+           (tlu_flush_pipe_w | inst_ifu_flush_w)) & ~((thrid_w2[1:0] == tlu_exu_tid_m[1:0]) & 
+            lsu_defr_trap_g);
+*/
+//
+
+assign tlu_inst_vld_nq_m = 
+           ifu_tlu_inst_vld_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+           tlu_flush_all_w);
+
+dff_s dff_stgivld_g (
+    .din (inst_vld_m),
+    .q   (inst_vld_nf_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// modified for timing
+/*
+dffr_s dffr_inst_ifu_flush_w (
+    .din (ifu_tlu_flush_m),
+	.q   (inst_ifu_flush_w),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+assign inst_ifu_flush_w  = ifu_tlu_flush_fd_w;
+assign inst_ifu_flush2_w = ifu_tlu_flush_fd_w;
+
+// added for bug 2133
+assign inst_vld_g = 
+       inst_vld_nf_g & ~(inst_ifu_flush_w | lsu_tlu_defr_trp_taken_g);
+       // modified for bug 4561
+       // inst_vld_nf_g & ~(inst_ifu_flush_w | lsu_tlu_defr_trp_taken_g | 
+       // ((thrid_w2[1:0] == thrid_g[1:0]) & lsu_defr_trap_g));
+
+dff_s dff_stgivld_w2 (
+    .din (inst_vld_g),
+	.q   (inst_vld_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// modified due to timing
+// assign	cwp_fastcmplt_g = cwp_fastcmplt_w & inst_vld_g;
+assign	cwp_fastcmplt_g = cwp_fastcmplt_uq_g & inst_vld_g;
+
+dff_s dff_stgfcmplt_w2 (
+    .din (cwp_fastcmplt_g),
+	.q   (cwp_fastcmplt_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// The stage name assignment may have to be changed !!
+// done/retry qualified with inst_vld as it could be flushed.
+assign	tlu_ifu_trappc_vld_w1 	=	((retry_inst_w2 | done_inst_w2) & inst_vld_w2 & cwp_fastcmplt_w2) | 
+					thrd0_traps_w2 | thrd1_traps_w2 |
+					thrd2_traps_w2 | thrd3_traps_w2 | cwp_cmplt_w2;	   
+					//thrd2_traps_w2 | thrd3_traps_w2) & inst_vld_w2 | cwp_cmplt_w2;	   
+assign	tlu_ifu_trapnpc_vld_w1 	= 	(retry_inst_w2 & inst_vld_w2 & cwp_fastcmplt_w2) |
+					thrd0_traps_w2 | thrd1_traps_w2 |
+					thrd2_traps_w2 | thrd3_traps_w2 | cwp_cmplt_rtry_w2;
+					//) & inst_vld_w2 | cwp_cmplt_w2;	   
+//
+// modified for hypervisor support
+// assign	tlu_ifu_trap_tid_w1[1:0]=	cwp_cmplt_w2 ? cwp_cmplt_tid_w2[1:0] : trap_tid_w2[1:0];
+//
+// recoded for timing
+// assign	true_trap_tid_g[1:0] = cwp_cmplt_g ? cwp_cmplt_tid_g[1:0] : trap_tid_g[1:0];
+// modified for bug 4091 and 4491
+/*
+assign early_trap_tid_g[1:0] = 
+	       (((hwint_g | pib_wrap_trap_g| local_early_flush_pipe_w) & 
+              ~(ifu_tlu_flush_fd_w | local_lsu_defr_trp_taken_g)) | 
+           (dnrtry_inst_g & cwp_fastcmplt_g) | rstint_g) ? thrid_g[1:0] : pend_trap_tid_g[1:0]; 
+*/
+assign early_trap_tid_g[1:0] = 
+	       (((hwint_g | pib_wrap_trap_g| local_early_flush_pipe_w) & ~ifu_tlu_flush_fd_w) | 
+           (dnrtry_inst_g & cwp_fastcmplt_g) | rstint_g) ? thrid_g[1:0] : pend_trap_tid_g[1:0]; 
+//
+// modified for bug 4561
+assign	true_trap_tid_g[1:0] = 
+             // (lsu_defr_trap_g) ? thrid_w2[1:0]  : 
+             (dside_sync_trap_g | lsu_defr_trap_g) ? thrid_g[1:0] :
+              early_trap_tid_g[1:0];
+
+dff_s #(2) dff_true_trap_tid_w2 (
+    .din (true_trap_tid_g[1:0]),
+    .q   (true_trap_tid_w2[1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+assign tlu_ifu_trap_tid_w1[1:0] = true_trap_tid_w2[1:0];
+
+// determine the mode of operation for the trapped thread
+// modified for timing 
+/*
+assign tlu_trap_hpstate_enb = 
+           (~(|true_trap_tid_g[1:0]))? tlu_hpstate_enb[0]:
+               ((~true_trap_tid_g[1] & true_trap_tid_g[0])? tlu_hpstate_enb[1]:
+                   ((true_trap_tid_g[1] & ~true_trap_tid_g[0])? tlu_hpstate_enb[2]: 
+                       tlu_hpstate_enb[3]));
+*/
+assign tlu_trap_hpstate_enb = 
+           (~(|true_trap_tid_w2[1:0]))? tlu_hpstate_enb[0]:
+               ((~true_trap_tid_w2[1] & true_trap_tid_w2[0])? tlu_hpstate_enb[1]:
+                   ((true_trap_tid_w2[1] & ~true_trap_tid_w2[0])? tlu_hpstate_enb[2]: 
+                       tlu_hpstate_enb[3]));
+
+//=========================================================================================
+//	Local Exceptions within TLU/MMU
+//=========================================================================================
+
+// These are to be merged with lsu reported exceptions.
+// 
+// modified due to early_flush timing fix
+// assign	local_sync_trap_g = tlu_mmu_sync_data_excp_g;
+// 
+// modified for hypervisor support
+// modified for timing 
+assign	local_sync_trap_m =  
+               (true_hscpd_dacc_excpt_m | true_qtail_dacc_excpt_m) & inst_vld_m;
+// 
+// added for dsfsr bug
+assign	tlu_lsu_priv_trap_m =  
+               (true_hscpd_dacc_excpt_m | true_qtail_dacc_excpt_m); 
+//
+/*
+// added for timing
+dffr_s dffr_tlu_lsu_priv_trap_w  (
+    .din (tlu_lsu_priv_trap_m),
+    .q   (tlu_lsu_priv_trap_w),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+*/
+// added for timing 
+// modified for bug 3618
+assign true_hscpd_dacc_excpt_m = 
+           tlu_hscpd_dacc_excpt_m &
+           ((thread0_stg_m_buf & ~tlu_hyper_lite[0]) |
+            (thread1_stg_m_buf & ~tlu_hyper_lite[1]) |
+            (thread2_stg_m_buf & ~tlu_hyper_lite[2]) |
+            (thread3_stg_m_buf & ~tlu_hyper_lite[3])); 
+
+assign true_qtail_dacc_excpt_m = 
+           ((thread0_stg_m_buf & tlu_hpstate_enb[0] &  ~tlu_hpstate_priv[0] & 
+             tlu_pstate_priv_buf[0])  |
+            (thread1_stg_m_buf & tlu_hpstate_enb[1] &  ~tlu_hpstate_priv[1] & 
+             tlu_pstate_priv_buf[1])  |
+            (thread2_stg_m_buf & tlu_hpstate_enb[2] &  ~tlu_hpstate_priv[2] & 
+             tlu_pstate_priv_buf[2])  |
+            (thread3_stg_m_buf & tlu_hpstate_enb[3] &  ~tlu_hpstate_priv[3] & 
+             tlu_pstate_priv_buf[3])) & tlu_qtail_dacc_excpt_m;
+
+dffr_s dffr_local_sync_trap_g  (
+    .din (local_sync_trap_m),
+    .q   (local_sync_trap_g),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+//	Queuing traps
+//=========================================================================================
+
+// For current instr, prioritize traps across pipe. There are 3 synchronous sources :
+// ifu,exu,lsu. Assume ifu traps have highest priority so compare has to be done
+// only between exu and lsu traps.
+
+// added for timing; moved qualification from IFU to TLU
+assign	ifu_rstint_m = 
+        ifu_tlu_rstint_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+        tlu_flush_all_w) & inst_vld_m;
+assign	ifu_hwint_m = 
+        ifu_tlu_hwint_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+        tlu_flush_all_w) & inst_vld_m;
+assign	ifu_swint_m = 
+        ifu_tlu_swint_m & ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & 
+        tlu_flush_all_w) & inst_vld_m;
+
+// generate the thread specific ifu flush signal - added for bug 2133
+assign ifu_thrd_flush_w[0] = inst_ifu_flush2_w & thread0_rsel_dec_g;  
+assign ifu_thrd_flush_w[1] = inst_ifu_flush2_w & thread1_rsel_dec_g;  
+assign ifu_thrd_flush_w[2] = inst_ifu_flush2_w & thread2_rsel_dec_g;  
+assign ifu_thrd_flush_w[3] = inst_ifu_flush2_w & thread3_rsel_dec_g;  
+
+
+// INTERRUPT
+dff_s #(9) dff_stgint_g (
+   .din ({ifu_rstint_m,ifu_hwint_m,ifu_swint_m,int_tlu_rstid_m[5:0]}),
+   .q   ({rstint_g,hwint_g,swint_g,rstid_g[5:0]}),
+   .clk	(clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+
+// Determine type of reset. Type of reset determines what state gets updated.
+// This is the same as wrm reset !!! Can we then turn off writes to TNPC, TPC ???
+assign	por_rstint_g = ((rstid_g[5:0] == 6'h01) & rstint_g);
+assign	por_rstint0_g = por_rstint_g & thread0_rsel_g;
+assign	por_rstint1_g = por_rstint_g & thread1_rsel_g;
+assign	por_rstint2_g = por_rstint_g & thread2_rsel_g;
+assign	por_rstint3_g = por_rstint_g & thread3_rsel_g;
+// 
+// added for bug 4749 
+assign	xir_rstint_g = ((rstid_g[5:0] == 6'h03) & rstint_g);
+
+dff_s dff_por_rstint_w2 (
+    .din (por_rstint_g),
+	.q   (por_rstint_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	por_rstint0_w2 = por_rstint_w2 & thread0_wsel_w2;
+assign	por_rstint1_w2 = por_rstint_w2 & thread1_wsel_w2;
+assign	por_rstint2_w2 = por_rstint_w2 & thread2_wsel_w2;
+assign	por_rstint3_w2 = por_rstint_w2 & thread3_wsel_w2;
+
+assign	tlu_por_rstint_g[0]  = por_rstint0_g;
+assign	tlu_por_rstint_g[1]  = por_rstint1_g;
+assign	tlu_por_rstint_g[2]  = por_rstint2_g;
+assign	tlu_por_rstint_g[3]  = por_rstint3_g;
+
+assign 	rstint_taken = rstint_g & inst_vld_g; 
+
+// hwint needs to be requalified with pstate.ie. IFU will replay hwint in
+// case dropped. IFU needs to source thread id in the form of ifu_tlu_thrid_d.
+assign 	hwint_taken = hwint_g & inst_vld_g; 
+//
+// modified for bug 5127
+// assign 	sirint_taken = sir_inst_g & inst_vld_g; 
+assign 	sirint_taken = 
+            sir_inst_g & inst_vld_g  & ~(pib_wrap_trap_nq_g | 
+            lsu_tlu_defr_trp_taken_g | (|tlz_trap_g[`TLU_THRD_NUM-1:0])); 
+
+assign 	swint_taken = swint_g & inst_vld_g; 
+
+/*
+assign	swint_thrd0_taken = swint_taken & thread0_rsel_g & tlu_int_pstate_ie[0];
+assign	swint_thrd1_taken = swint_taken & thread1_rsel_g & tlu_int_pstate_ie[1];
+assign	swint_thrd2_taken = swint_taken & thread2_rsel_g & tlu_int_pstate_ie[2];
+assign	swint_thrd3_taken = swint_taken & thread3_rsel_g & tlu_int_pstate_ie[3];
+//
+//modified for hypervisor support
+assign	swint_id[3:0]	=
+	swint_thrd0_taken ? sftint0_id[3:0] :
+		swint_thrd1_taken ? sftint1_id[3:0] :
+			swint_thrd2_taken ? sftint2_id[3:0] :
+				swint_thrd3_taken ? sftint3_id[3:0] :
+					4'bxxxx;
+*/
+//
+//added for timing 
+dffr_s #(`TLU_THRD_NUM) dffr_tlu_cpu_mondo_trap (
+    .din (tlu_cpu_mondo_cmp[`TLU_THRD_NUM-1:0]),
+    .q   (tlu_cpu_mondo_trap[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+dffr_s #(`TLU_THRD_NUM) dffr_tlu_dev_mondo_trap (
+    .din (tlu_dev_mondo_cmp[`TLU_THRD_NUM-1:0]),
+    .q   (tlu_dev_mondo_trap[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s #(`TLU_THRD_NUM) dffr_tlu_resum_err_trap (
+    .din (tlu_resum_err_cmp[`TLU_THRD_NUM-1:0]),
+    .q   (tlu_resum_err_trap[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+assign cpu_mondo_trap_g =
+       (thread0_wsel_g)? tlu_cpu_mondo_trap[0]:
+       (thread1_wsel_g)? tlu_cpu_mondo_trap[1]:
+       (thread2_wsel_g)? tlu_cpu_mondo_trap[2]:
+       tlu_cpu_mondo_trap[3];
+
+assign dev_mondo_trap_g =
+       (thread0_wsel_g)? tlu_dev_mondo_trap[0]:
+       (thread1_wsel_g)? tlu_dev_mondo_trap[1]:
+       (thread2_wsel_g)? tlu_dev_mondo_trap[2]:
+       tlu_dev_mondo_trap[3];
+
+assign sftint_id_w2[3:0] =
+       (thread0_wsel_w2)? sftint0_id[3:0]:
+       (thread1_wsel_w2)? sftint1_id[3:0]: 
+       (thread2_wsel_w2)? sftint2_id[3:0]:
+        sftint3_id[3:0];
+
+dffr_s dffr_cpu_mondo_trap_w2 (
+    .din (cpu_mondo_trap_g),
+    .q   (cpu_mondo_trap_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_dev_mondo_trap_w2 (
+    .din (dev_mondo_trap_g),
+    .q   (dev_mondo_trap_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign final_swint_id_w2[6:0] =
+       (cpu_mondo_trap_w2)? `CPU_MONDO_TRAP :
+       (dev_mondo_trap_w2)? `DEV_MONDO_TRAP :
+        {3'b100, sftint_id_w2[3:0]};
+
+// recoded for timing for bug 5117
+/*
+assign final_swint0_id[6:0] =
+	tlu_cpu_mondo_trap[0] ? `CPU_MONDO_TRAP : 
+		((tlu_dev_mondo_trap[0] & ~tlu_cpu_mondo_trap[0]) ? `DEV_MONDO_TRAP :
+		      {3'b100, sftint0_id[3:0]});	
+
+assign final_swint1_id[6:0] =
+	tlu_cpu_mondo_trap[1] ? `CPU_MONDO_TRAP : 
+		((tlu_dev_mondo_trap[1] & ~tlu_cpu_mondo_trap[1]) ? `DEV_MONDO_TRAP :
+		      {3'b100, sftint1_id[3:0]});	
+
+assign final_swint2_id[6:0] =
+	tlu_cpu_mondo_trap[2] ? `CPU_MONDO_TRAP : 
+		((tlu_dev_mondo_trap[2] & ~tlu_cpu_mondo_trap[2]) ? `DEV_MONDO_TRAP :
+		      {3'b100, sftint2_id[3:0]});	
+
+assign final_swint3_id[6:0] =
+	tlu_cpu_mondo_trap[3] ? `CPU_MONDO_TRAP : 
+		((tlu_dev_mondo_trap[3] & ~tlu_cpu_mondo_trap[3]) ? `DEV_MONDO_TRAP :
+		      {3'b100, sftint3_id[3:0]});	
+
+assign final_swint_id[6:0]	=
+	swint_thrd0_taken ? final_swint0_id[6:0] :
+		swint_thrd1_taken ? final_swint1_id[6:0] :
+			swint_thrd2_taken ? final_swint2_id[6:0] :
+				swint_thrd3_taken ? final_swint3_id[6:0] :
+					7'bxxxxxxx;
+*/
+
+// Assume rstid(interrupt/reset vector) is the same as trap type.
+// Need to confirm !!!!
+// sftware sir is generated by ifu decode.
+// ttype for internal wdr is tt of trap itself.
+
+// sir inst at maxtl can result entry to error state and thus wdr
+// modified for bug 4749 and 4906
+assign internal_wdr_trap[0] = 
+           (thrd0_traps & trp_lvl0_at_maxtl) & 
+          ~((por_rstint_g | xir_rstint_g) & thread0_rsel_g);
+assign internal_wdr_trap[1] = 
+           (thrd1_traps & trp_lvl1_at_maxtl) & 
+          ~((por_rstint_g | xir_rstint_g) & thread1_rsel_g);
+assign internal_wdr_trap[2] = 
+           (thrd2_traps & trp_lvl2_at_maxtl) & 
+          ~((por_rstint_g | xir_rstint_g) & thread2_rsel_g);
+assign internal_wdr_trap[3] = 
+           (thrd3_traps & trp_lvl3_at_maxtl) & 
+          ~((por_rstint_g | xir_rstint_g) & thread3_rsel_g);
+
+assign internal_wdr = 
+           internal_wdr_trap[0] | internal_wdr_trap[1] |
+           internal_wdr_trap[2] | internal_wdr_trap[3];
+/*
+assign	internal_wdr = 
+		((thrd0_traps & trp_lvl0_at_maxtl) |
+		(thrd1_traps & trp_lvl1_at_maxtl) |
+		(thrd2_traps & trp_lvl2_at_maxtl) |
+		(thrd3_traps & trp_lvl3_at_maxtl)) & 
+       ~(por_rstint_g | xir_rstint_g);
+*/
+//
+// modified for bug 4640 and bug5127 
+assign 	tlu_self_boot_rst_g = 
+              rstint_g | internal_wdr | (sir_inst_g & 
+              ~(lsu_defr_trap_g | pib_wrap_trap_g | 
+               (|tlz_trap_g[`TLU_THRD_NUM-1:0]))) | trap_to_redmode;
+              // (rstint_g | internal_wdr | (sir_inst_g & ~lsu_defr_trap_g) |
+               
+//
+// added for timing; moved qualification from IFU to TLU
+// modified for bug 4561
+assign	ifu_ttype_vld_m = 
+            ifu_tlu_ttype_vld_m & 
+            ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & tlu_flush_pipe_w);
+/*
+assign	ifu_ttype_vld_m = 
+            ifu_tlu_ttype_vld_m & 
+            ~((thrid_g[1:0] == tlu_exu_tid_m[1:0]) & tlu_flush_pipe_w) & 
+            ~((thrid_w2[1:0] == tlu_exu_tid_m[1:0]) & lsu_defr_trap_g);
+*/
+
+// REGULAR TRAP
+dff_s #(20) dff_stgeftt_g (
+    .din ({exu_tlu_ttype_m[8:0],exu_tlu_ttype_vld_m,ifu_tlu_ttype_m[8:0],ifu_ttype_vld_m}), 
+    .q   ({exu_ttype_g[8:0],exu_ttype_vld_g,ifu_ttype_tmp_g[8:0],ifu_ttype_vld_tmp_g}), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// added for bug 1293
+// added spu_tlu_rsrv_illgl_m2 to account for the new illeg_instr from spu
+//
+// modified for the hypervisor support - wsr_illeg_globals_g is no longer necessary
+// modified for pib support and timing fixes 
+/*
+assign ifu_ttype_g[8:0] = //((wsr_illeg_globals_g | spu_tlu_rsrv_illgl_m2) & 
+                          (spu_tlu_rsrv_illgl_m2 & 
+                         ~(ifu_ttype_tmp_g & (ifu_ttype_tmp_g < 9'h012)) &
+                         ~immu_miss_g) ? 
+                           9'h010 :
+                          ((tlu_tick_npt_priv_act) ? 9'h037 : 
+                          ifu_ttype_tmp_g); 
+*/
+//
+// determine whether the processor is in user mode
+assign tlu_none_priv[0] = ~(tlu_hpstate_priv[0] | tlu_pstate_priv_buf[0]);
+assign tlu_none_priv[1] = ~(tlu_hpstate_priv[1] | tlu_pstate_priv_buf[1]);
+assign tlu_none_priv[2] = ~(tlu_hpstate_priv[2] | tlu_pstate_priv_buf[2]);
+assign tlu_none_priv[3] = ~(tlu_hpstate_priv[3] | tlu_pstate_priv_buf[3]);
+
+assign tlu_hyper_lite[0] = 
+           tlu_hpstate_priv[0]| (~tlu_hpstate_enb[0] & tlu_pstate_priv_buf[0]); 
+assign tlu_hyper_lite[1] = 
+           tlu_hpstate_priv[1]| (~tlu_hpstate_enb[1] & tlu_pstate_priv_buf[1]); 
+assign tlu_hyper_lite[2] = 
+           tlu_hpstate_priv[2]| (~tlu_hpstate_enb[2] & tlu_pstate_priv_buf[2]); 
+assign tlu_hyper_lite[3] = 
+           tlu_hpstate_priv[3]| (~tlu_hpstate_enb[3] & tlu_pstate_priv_buf[3]); 
+//
+// htrap instruction illegal instruction trap  
+// this trap is taken only in hypervisor mode and not in hyper-lite
+// mode
+assign tlu_none_priv_m = 
+          (tlu_none_priv[0] & tlu_hpstate_enb[0] & thread0_rsel_m) |
+          (tlu_none_priv[1] & tlu_hpstate_enb[1] & thread1_rsel_m) |
+          (tlu_none_priv[2] & tlu_hpstate_enb[2] & thread2_rsel_m) |
+          (tlu_none_priv[3] & tlu_hpstate_enb[3] & thread3_rsel_m);
+
+assign htrap_ill_inst_m = 
+    (exu_tlu_ttype_vld_m &  exu_tlu_ttype_m[8] & 
+     exu_tlu_ttype_m[7]) &  tlu_none_priv_m;
+
+dffr_s dffr_htrap_ill_inst_uf_g (
+   .din (htrap_ill_inst_m),
+   .q   (htrap_ill_inst_uf_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign htrap_ill_inst_g = htrap_ill_inst_uf_g & ~inst_ifu_flush_w;
+// 
+// added for timing fix
+assign spu_ill_inst_m = spu_tlu_rsrv_illgl_m & inst_vld_m;
+
+dffr_s dffr_spu_ill_inst_uf_g (
+   .din (spu_ill_inst_m),
+   // modified for bug 2133
+   // .q   (spu_ill_inst_g),
+   .q   (spu_ill_inst_uf_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+
+// 
+// added for bug 2133
+// modified at Farnad's request for bug 3599
+// modified back to the old behavior (pre bug 3599) due to bug 4698
+assign spu_ill_inst_g = 
+//             spu_ill_inst_uf_g & ~(inst_ifu_flush_w); 
+// fix for bug 5863. Only a stxa to asi=40 with opcode-rsvd should cause an illgl_va
+           spu_ill_inst_uf_g & ~(inst_ifu_flush_w | lsu_tlu_early_flush_w);
+
+assign ffu_higher_pri_g = 
+           ffu_ill_inst_g & (ifu_ttype_vld_tmp_g & (ifu_ttype_tmp_g == 9'h020));
+
+//
+assign ifu_ttype_g[8:0] = (((spu_ill_inst_g  | ffu_ill_inst_g | htrap_ill_inst_g) & 
+                         ~(ifu_ttype_vld_tmp_g   & (ifu_ttype_tmp_g < 9'h012)) &
+                         ~immu_miss_g) | ffu_higher_pri_g) ? 
+                           9'h010 :
+                          ((tick_npt_priv_act_g | 
+                           (pib_priv_act_trap_g & ~ifu_ttype_vld_tmp_g)) ? 9'h037 : 
+                           ifu_ttype_tmp_g); 
+//
+// added for timing fix
+assign pib_priv_act_early_trap_m = 
+           ((pib_priv_act_trap_m[0] & inst_vld_m & thread0_rsel_m) & 
+             ~(tlu_pstate_priv_buf[0] | tlu_hpstate_priv[0])) |
+           ((pib_priv_act_trap_m[1] & inst_vld_m & thread1_rsel_m) & 
+             ~(tlu_pstate_priv_buf[1] | tlu_hpstate_priv[1])) |
+           ((pib_priv_act_trap_m[2] & inst_vld_m & thread2_rsel_m) & 
+             ~(tlu_pstate_priv_buf[2] | tlu_hpstate_priv[2])) |
+           ((pib_priv_act_trap_m[3] & inst_vld_m & thread3_rsel_m) & 
+             ~(tlu_pstate_priv_buf[3] | tlu_hpstate_priv[3]));
+//
+
+
+// recoded the following for timing:
+/*
+assign exu_pib_priv_act_trap_m = 
+           ((pib_priv_act_trap_m[0] & thread0_rsel_m) & 
+             ~(tlu_pstate_priv[0] | tlu_hpstate_priv[0])) |
+           ((pib_priv_act_trap_m[1] & thread1_rsel_m) & 
+             ~(tlu_pstate_priv[1] | tlu_hpstate_priv[1])) |
+           ((pib_priv_act_trap_m[2] & thread2_rsel_m) & 
+             ~(tlu_pstate_priv[2] | tlu_hpstate_priv[2])) |
+           ((pib_priv_act_trap_m[3] & thread3_rsel_m) & 
+             ~(tlu_pstate_priv[3] | tlu_hpstate_priv[3]));
+*/
+
+wire [3:0] pib_priv_act_trap_thrd_qual_m;
+wire [3:0] pib_priv_act_trap_thrd_hpstatepriv_qual_m;
+wire [3:0] pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m;
+
+assign  pib_priv_act_trap_thrd_qual_m[0] = pib_priv_act_trap_m[0] & thread0_rsel_m ;
+assign  pib_priv_act_trap_thrd_qual_m[1] = pib_priv_act_trap_m[1] & thread1_rsel_m ;
+assign  pib_priv_act_trap_thrd_qual_m[2] = pib_priv_act_trap_m[2] & thread2_rsel_m ;
+assign  pib_priv_act_trap_thrd_qual_m[3] = pib_priv_act_trap_m[3] & thread3_rsel_m ;
+
+assign pib_priv_act_trap_thrd_hpstatepriv_qual_m[0] = pib_priv_act_trap_thrd_qual_m[0] & ~tlu_hpstate_priv[0];
+assign pib_priv_act_trap_thrd_hpstatepriv_qual_m[1] = pib_priv_act_trap_thrd_qual_m[1] & ~tlu_hpstate_priv[1];
+assign pib_priv_act_trap_thrd_hpstatepriv_qual_m[2] = pib_priv_act_trap_thrd_qual_m[2] & ~tlu_hpstate_priv[2];
+assign pib_priv_act_trap_thrd_hpstatepriv_qual_m[3] = pib_priv_act_trap_thrd_qual_m[3] & ~tlu_hpstate_priv[3];
+
+assign pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m[0] = pib_priv_act_trap_thrd_hpstatepriv_qual_m[0] &
+								~tlu_pstate_priv[0];
+assign pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m[1] = pib_priv_act_trap_thrd_hpstatepriv_qual_m[1] &
+								~tlu_pstate_priv[1];
+assign pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m[2] = pib_priv_act_trap_thrd_hpstatepriv_qual_m[2] &
+								~tlu_pstate_priv[2];
+assign pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m[3] = pib_priv_act_trap_thrd_hpstatepriv_qual_m[3] &
+								~tlu_pstate_priv[3];
+
+assign exu_pib_priv_act_trap_m = (|pib_priv_act_trap_thrd_hpstatepriv_pstatepriv_m[3:0]);
+
+
+// 
+// added for make pib overflow trap precise
+assign pib_trap_en[0] = tlu_int_pstate_ie[0] & (true_pil0[3:0] < 4'hf);
+assign pib_trap_en[1] = tlu_int_pstate_ie[1] & (true_pil1[3:0] < 4'hf);
+assign pib_trap_en[2] = tlu_int_pstate_ie[2] & (true_pil2[3:0] < 4'hf);
+assign pib_trap_en[3] = tlu_int_pstate_ie[3] & (true_pil3[3:0] < 4'hf);
+//
+// added for bug 5017
+dffr_s dffr_picl_wrap_pend_0 (
+    .din (pib_picl_wrap[0]),
+    .q   (picl_wrap_pend[0]),
+    .rst (local_rst | (thread_inst_vld_w2[0] & ~pib_picl_wrap[0] & ~tlu_full_flush_pipe_w2)), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffr_s dffr_picl_wrap_pend_1 (
+    .din (pib_picl_wrap[1]),
+    .q   (picl_wrap_pend[1]),
+    .rst (local_rst | (thread_inst_vld_w2[1] & ~pib_picl_wrap[1] & ~tlu_full_flush_pipe_w2)), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffr_s dffr_picl_wrap_pend_2 (
+    .din (pib_picl_wrap[2]),
+    .q   (picl_wrap_pend[2]),
+    .rst (local_rst | (thread_inst_vld_w2[2] & ~pib_picl_wrap[2] & ~tlu_full_flush_pipe_w2)), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffr_s dffr_picl_wrap_pend_3 (
+    .din (pib_picl_wrap[3]),
+    .q   (picl_wrap_pend[3]),
+    .rst (local_rst | (thread_inst_vld_w2[3] & ~pib_picl_wrap[3] & ~tlu_full_flush_pipe_w2)), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_picl_wrap_flg_m = 
+       (picl_wrap_pend[0] & thread0_stg_m_buf) | 
+       (picl_wrap_pend[1] & thread1_stg_m_buf) | 
+       (picl_wrap_pend[2] & thread2_stg_m_buf) | 
+       (picl_wrap_pend[3] & thread3_stg_m_buf);
+
+// modified for bug 4086, 4206, 4246 and 4314
+// modified for bug 5033, 5083 and 5017
+// modified for bug 5436 - Niagara 2.0
+
+// changed pend_pich_cnt_hld to pend_pich_cnt_hld_noqual as per bug5436(reopened 9/17/04).
+assign pib_wrap_m[0] = 
+           // (pib_picl_wrap[0] |
+           ((picl_wrap_pend[0] & thread0_rsel_m) |
+           (pich_wrap_flg[0] & inst_vld_m & thread0_rsel_m) |
+           (pich_onebelow_flg[0] & (inst_vld_m & thread0_rsel_m) & 
+           ((inst_vld_g & thread0_rsel_g) | (inst_vld_w2 & thread0_wsel_w2))) |
+           (pich_twobelow_flg[0] & (inst_vld_m & thread0_rsel_m) & 
+           (inst_vld_g & thread0_rsel_g) & (inst_vld_w2 & thread0_wsel_w2))) &
+            pib_trap_en[0] & ~(tlu_flush_pipe_w & thread0_rsel_g) & ~pend_pich_cnt_hld_noqual[0];
+assign pib_wrap_m[1] = 
+           // (pib_picl_wrap[1] |
+           ((picl_wrap_pend[1] & thread1_rsel_m) |
+           (pich_wrap_flg[1] & inst_vld_m & thread1_rsel_m) |
+           (pich_onebelow_flg[1] & (inst_vld_m & thread1_rsel_m) & 
+           ((inst_vld_g & thread1_rsel_g) | (inst_vld_w2 & thread1_wsel_w2))) |
+           (pich_twobelow_flg[1] & (inst_vld_m & thread1_rsel_m) & 
+           (inst_vld_g & thread1_rsel_g) & (inst_vld_w2 & thread1_wsel_w2))) &   
+            pib_trap_en[1] & ~(tlu_flush_pipe_w & thread1_rsel_g) & ~pend_pich_cnt_hld_noqual[1];
+assign pib_wrap_m[2] = 
+           // (pib_picl_wrap[2] |
+           ((picl_wrap_pend[2] & thread2_rsel_m) |
+           (pich_wrap_flg[2] & inst_vld_m & thread2_rsel_m) |
+           (pich_onebelow_flg[2] & (inst_vld_m & thread2_rsel_m) & 
+           ((inst_vld_g & thread2_rsel_g) | (inst_vld_w2 & thread2_wsel_w2))) |
+           (pich_twobelow_flg[2] & (inst_vld_m & thread2_rsel_m) & 
+           (inst_vld_g & thread2_rsel_g) & (inst_vld_w2 & thread2_wsel_w2))) &   
+            pib_trap_en[2] & ~(tlu_flush_pipe_w & thread2_rsel_g) & ~pend_pich_cnt_hld_noqual[2];
+assign pib_wrap_m[3] = 
+           // (pib_picl_wrap[3] |
+           ((picl_wrap_pend[3] & thread3_rsel_m) |
+           (pich_wrap_flg[3] & inst_vld_m & thread3_rsel_m) |
+           (pich_onebelow_flg[3] & (inst_vld_m & thread3_rsel_m) & 
+           ((inst_vld_g & thread3_rsel_g) | (inst_vld_w2 & thread3_wsel_w2))) |
+           (pich_twobelow_flg[3] & (inst_vld_m & thread3_rsel_m) & 
+           (inst_vld_g & thread3_rsel_g) & (inst_vld_w2 & thread3_wsel_w2))) &   
+            pib_trap_en[3] & ~(tlu_flush_pipe_w & thread3_rsel_g) & ~pend_pich_cnt_hld_noqual[3];
+
+// modified for timing and bug 4314 and 5017
+// added for bug 5436 - Niagara 2.0
+
+// removed qualification with ~pend_pich_cnt_hld from the following logics and pushed
+// the qulaification to G stage only for software interupt bit15 setting. The above 
+// logic stay the same and no precise trap will be taken in the case of b2b valid
+// instruction as indicated in bug5436(reopened 9/16/04) 
+assign pib_pich_wrap_m[0] = 
+           // (pib_picl_wrap[0] |
+           ((picl_wrap_pend[0] & thread0_rsel_m) |
+           (pich_wrap_flg[0] & inst_vld_m & thread0_rsel_m) |
+           (pich_onebelow_flg[0] & (inst_vld_m & thread0_rsel_m) & 
+           ((inst_vld_g & thread0_rsel_g) | (inst_vld_w2 & thread0_wsel_w2))) |
+           (pich_twobelow_flg[0] & (inst_vld_m & thread0_rsel_m) & 
+           (inst_vld_g & thread0_rsel_g) & (inst_vld_w2 & thread0_wsel_w2))) &
+           ~(tlu_flush_pipe_w & thread0_rsel_g) ;
+assign pib_pich_wrap_m[1] = 
+           // (pib_picl_wrap[1] |
+           ((picl_wrap_pend[1] & thread1_rsel_m) |
+           (pich_wrap_flg[1] & inst_vld_m & thread1_rsel_m) |
+           (pich_onebelow_flg[1] & (inst_vld_m & thread1_rsel_m) & 
+           ((inst_vld_g & thread1_rsel_g) | (inst_vld_w2 & thread1_wsel_w2))) |
+           (pich_twobelow_flg[1] & (inst_vld_m & thread1_rsel_m) & 
+           (inst_vld_g & thread1_rsel_g) & (inst_vld_w2 & thread1_wsel_w2))) &   
+           ~(tlu_flush_pipe_w & thread1_rsel_g) ;
+assign pib_pich_wrap_m[2] = 
+           // (pib_picl_wrap[2] |
+           ((picl_wrap_pend[2] & thread2_rsel_m) |
+           (pich_wrap_flg[2] & inst_vld_m & thread2_rsel_m) |
+           (pich_onebelow_flg[2] & (inst_vld_m & thread2_rsel_m) & 
+           ((inst_vld_g & thread2_rsel_g) | (inst_vld_w2 & thread2_wsel_w2))) |
+           (pich_twobelow_flg[2] & (inst_vld_m & thread2_rsel_m) & 
+           (inst_vld_g & thread2_rsel_g) & (inst_vld_w2 & thread2_wsel_w2))) &   
+           ~(tlu_flush_pipe_w & thread2_rsel_g) ;
+assign pib_pich_wrap_m[3] = 
+           // (pib_picl_wrap[3] |
+           ((picl_wrap_pend[3] & thread3_rsel_m) |
+           (pich_wrap_flg[3] & inst_vld_m & thread3_rsel_m) |
+           (pich_onebelow_flg[3] & (inst_vld_m & thread3_rsel_m) & 
+           ((inst_vld_g & thread3_rsel_g) | (inst_vld_w2 & thread3_wsel_w2))) |
+           (pich_twobelow_flg[3] & (inst_vld_m & thread3_rsel_m) & 
+           (inst_vld_g & thread3_rsel_g) & (inst_vld_w2 & thread3_wsel_w2))) &   
+           ~(tlu_flush_pipe_w & thread3_rsel_g) ;
+/*
+assign pib_wrap_m[0] = 
+           (pib_picl_wrap[0] |
+           (pich_wrap_flg[0] & inst_vld_m & thread0_rsel_m) |
+           (pich_onebelow_flg[0] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread0_rsel_m & (thread0_wsel_w2 | thread0_rsel_g)) | 
+           (pich_twobelow_flg[0] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread0_rsel_g & thread0_rsel_m & thread0_wsel_w2)) & pib_trap_en[0] & 
+            ~(tlu_flush_pipe_w & thread0_rsel_g);
+assign pib_wrap_m[1] = 
+           (pib_picl_wrap[1] |
+           (pich_wrap_flg[1] & inst_vld_m & thread1_rsel_m) |
+           (pich_onebelow_flg[1] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread1_rsel_m & (thread1_wsel_w2 | thread1_rsel_g)) | 
+           (pich_twobelow_flg[1] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread1_rsel_g & thread1_rsel_m & thread1_wsel_w2)) & pib_trap_en[1] &
+            ~(tlu_flush_pipe_w & thread1_rsel_g);
+assign pib_wrap_m[2] = 
+           (pib_picl_wrap[2] |
+           (pich_wrap_flg[2] & inst_vld_m & thread2_rsel_m) |
+           (pich_onebelow_flg[2] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread2_rsel_m & (thread2_wsel_w2 | thread2_rsel_g)) | 
+           (pich_twobelow_flg[2] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread2_rsel_g & thread2_rsel_m & thread2_wsel_w2)) & pib_trap_en[2] &
+            ~(tlu_flush_pipe_w & thread2_rsel_g);
+assign pib_wrap_m[3] = 
+           (pib_picl_wrap[3] |
+           (pich_wrap_flg[3] & inst_vld_m & thread3_rsel_m) |
+           (pich_onebelow_flg[3] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread3_rsel_m & (thread3_wsel_w2 | thread3_rsel_g)) | 
+           (pich_twobelow_flg[3] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread3_rsel_g & thread3_rsel_m & thread3_wsel_w2)) & pib_trap_en[3] &
+            ~(tlu_flush_pipe_w & thread3_rsel_g);
+
+// modified for timing and bug 4314 
+assign pib_pich_wrap_m[0] = 
+           (pib_picl_wrap[0] |
+           (pich_wrap_flg[0] & inst_vld_m & thread0_rsel_m) |
+           (pich_onebelow_flg[0] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread0_rsel_m & (thread0_wsel_w2 | thread0_rsel_g)) | 
+           (pich_twobelow_flg[0] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread0_rsel_g & thread0_rsel_m & thread0_wsel_w2)) & 
+            ~(tlu_flush_pipe_w & thread0_rsel_g);
+assign pib_pich_wrap_m[1] = 
+           (pib_picl_wrap[1] |
+           (pich_wrap_flg[1] & inst_vld_m & thread1_rsel_m) |
+           (pich_onebelow_flg[1] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread1_rsel_m & (thread1_wsel_w2 | thread1_rsel_g)) | 
+           (pich_twobelow_flg[1] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread1_rsel_g & thread1_rsel_m & thread1_wsel_w2)) & 
+            ~(tlu_flush_pipe_w & thread1_rsel_g);
+assign pib_pich_wrap_m[2] = 
+           (pib_picl_wrap[2] |
+           (pich_wrap_flg[2] & inst_vld_m & thread2_rsel_m) |
+           (pich_onebelow_flg[2] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread2_rsel_m & (thread2_wsel_w2 | thread2_rsel_g)) | 
+           (pich_twobelow_flg[2] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread2_rsel_g & thread2_rsel_m & thread2_wsel_w2)) & 
+            ~(tlu_flush_pipe_w & thread2_rsel_g);
+assign pib_pich_wrap_m[3] = 
+           (pib_picl_wrap[3] |
+           (pich_wrap_flg[3] & inst_vld_m & thread3_rsel_m) |
+           (pich_onebelow_flg[3] & inst_vld_m & (inst_vld_g | inst_vld_w2) & 
+           thread3_rsel_m & (thread3_wsel_w2 | thread3_rsel_g)) | 
+           (pich_twobelow_flg[3] & inst_vld_m & inst_vld_g  & inst_vld_w2 & 
+            thread3_rsel_g & thread3_rsel_m & thread3_wsel_w2)) & 
+            ~(tlu_flush_pipe_w & thread3_rsel_g);
+//
+*/
+
+
+wire [3:0] pib_pich_wrap_q;
+
+dffr_s #(`TLU_THRD_NUM) dffr_pib_pich_wrap (
+   .din (pib_pich_wrap_m[`TLU_THRD_NUM-1:0]),
+   .q   (pib_pich_wrap_q[`TLU_THRD_NUM-1:0]),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+// added for the bug 5436 reopened on 9/16/2004 by Samy. The following pushes
+// the qualification by hold signal to G stage.So sofint bit15 is set for signaling
+// software a overflow has occurred. But the preciese trap will not be taken.
+assign pib_pich_wrap[3:0] = pib_pich_wrap_q[3:0] & {4{~pend_pich_cnt_hld}};
+
+
+// 
+// experiment
+/*
+assign pich_exu_wrap_e[0] = 
+       tlu_thread_inst_vld_w2[0]? pich_onebelow_flg[0]: pich_wrap_flg[0]; 
+assign pich_exu_wrap_e[1] = 
+       tlu_thread_inst_vld_w2[1]? pich_onebelow_flg[1]: pich_wrap_flg[1];
+assign pich_exu_wrap_e[2] = 
+       tlu_thread_inst_vld_w2[2]? pich_onebelow_flg[2]: pich_wrap_flg[2];
+assign pich_exu_wrap_e[3] = 
+       tlu_thread_inst_vld_w2[3]? pich_onebelow_flg[3]: pich_wrap_flg[3]; 
+
+assign pich_wrap_flg_e = 
+           (tlu_thrd_rsel_e[0]) ? pich_exu_wrap_e[0]:
+           (tlu_thrd_rsel_e[1]) ? pich_exu_wrap_e[1]:
+           (tlu_thrd_rsel_e[2]) ? pich_exu_wrap_e[2]:
+            pich_exu_wrap_e[3];
+*/
+
+dffr_s dffr_pich_wrap_flg_m (
+   .din (tlu_pic_wrap_e),
+   .q   (pich_wrap_flg_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+//
+// modified for bug 5436 - Niagara 2.0
+assign tlu_pich_wrap_flg_m = 
+           pich_wrap_flg_m & tlu_pic_cnt_en_m; 
+/*
+assign pic_hpstate_priv_e = 
+           (tlu_thrd_rsel_e[0]) ? tlu_hpstate_priv[0]:
+           (tlu_thrd_rsel_e[1]) ? tlu_hpstate_priv[1]:
+           (tlu_thrd_rsel_e[2]) ? tlu_hpstate_priv[2]:
+            tlu_hpstate_priv[3];
+
+assign pic_pstate_priv_e = 
+           (tlu_thrd_rsel_e[0]) ? tlu_pstate_priv_buf[0]:
+           (tlu_thrd_rsel_e[1]) ? tlu_pstate_priv_buf[1]:
+           (tlu_thrd_rsel_e[2]) ? tlu_pstate_priv_buf[2]:
+            tlu_pstate_priv_buf[3];
+
+assign pic_hpstate_enb_e = 
+           (tlu_thrd_rsel_e[0]) ? tlu_hpstate_enb[0]:
+           (tlu_thrd_rsel_e[1]) ? tlu_hpstate_enb[1]:
+           (tlu_thrd_rsel_e[2]) ? tlu_hpstate_enb[2]:
+            tlu_hpstate_enb[3];
+
+assign pic_trap_en_e = 
+           (tlu_thrd_rsel_e[0]) ? pib_trap_en[0]:
+           (tlu_thrd_rsel_e[1]) ? pib_trap_en[1]:
+           (tlu_thrd_rsel_e[2]) ? pib_trap_en[2]:
+            pib_trap_en[3];
+*/
+
+// modified for bug 5436 - Niagara 2.0
+
+assign pic_cnt_en[0] = 
+            ((~tlu_hpstate_priv[0] & ~tlu_pstate_priv_buf[0] & tlu_pcr_ut[0]) | 
+             (~tlu_hpstate_enb[0]  & tlu_hpstate_priv[0] & tlu_pcr_st[0]) |
+             (tlu_hpstate_enb[0]   & tlu_pstate_priv_buf[0]  & ~tlu_hpstate_priv[0] & 
+ //tlu_pcr_st[0])) & pib_trap_en[0]; 
+              tlu_pcr_st[0])) ; 
+assign pic_cnt_en[1] = 
+            ((~tlu_hpstate_priv[1] & ~tlu_pstate_priv_buf[1] & tlu_pcr_ut[1]) | 
+             (~tlu_hpstate_enb[1]  & tlu_hpstate_priv[1] & tlu_pcr_st[1]) |
+             (tlu_hpstate_enb[1]   & tlu_pstate_priv_buf[1]  & ~tlu_hpstate_priv[1] & 
+ //tlu_pcr_st[1])) & pib_trap_en[1]; 
+              tlu_pcr_st[1])) ; 
+assign pic_cnt_en[2] = 
+            ((~tlu_hpstate_priv[2] & ~tlu_pstate_priv_buf[2] & tlu_pcr_ut[2]) | 
+             (~tlu_hpstate_enb[2]  & tlu_hpstate_priv[2] & tlu_pcr_st[2]) |
+             (tlu_hpstate_enb[2]   & tlu_pstate_priv_buf[2]  & ~tlu_hpstate_priv[2] & 
+ //tlu_pcr_st[2])) & pib_trap_en[2]; 
+              tlu_pcr_st[2])) ; 
+assign pic_cnt_en[3] = 
+            ((~tlu_hpstate_priv[3] & ~tlu_pstate_priv_buf[3] & tlu_pcr_ut[3]) | 
+             (~tlu_hpstate_enb[3]  & tlu_hpstate_priv[3] & tlu_pcr_st[3]) |
+             (tlu_hpstate_enb[3]   & tlu_pstate_priv_buf[3]  & ~tlu_hpstate_priv[3] & 
+ //tlu_pcr_st[3])) & pib_trap_en[3]; 
+              tlu_pcr_st[3])) ; 
+
+assign pic_cnt_en_e = 
+           (tlu_thrd_rsel_e[0]) ? pic_cnt_en[0]:
+           (tlu_thrd_rsel_e[1]) ? pic_cnt_en[1]:
+           (tlu_thrd_rsel_e[2]) ? pic_cnt_en[2]:
+            pic_cnt_en[3];
+
+
+/*
+assign pic_cnt_en_e = 
+            ((~pic_hpstate_priv_e & ~pic_pstate_priv_e & pcr_ut_e) | 
+             (~pic_hpstate_enb_e  & pic_hpstate_priv_e & pcr_st_e) |
+             (pic_hpstate_enb_e   & pic_pstate_priv_e  & ~pic_hpstate_priv_e & 
+              pcr_st_e)) & pic_trap_en_e; 
+*/
+
+dffr_s dffr_tlu_pic_cnt_en_m (
+   .din (pic_cnt_en_e),
+   .q   (pic_cnt_en_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+/**** replaced the following with and-or for better synthesis interms of timing 
+assign tlu_pich_cnt_hld =
+       (thread0_rsel_m) ? pend_pich_cnt_hld[0]: 
+       (thread1_rsel_m) ? pend_pich_cnt_hld[1]:
+       (thread2_rsel_m) ? pend_pich_cnt_hld[2]:
+        pend_pich_cnt_hld[3]; 
+************/
+
+
+assign tlu_pich_cnt_hld = (thread0_stg_m & pend_pich_cnt_hld_early[0]) |
+			  (thread1_stg_m & pend_pich_cnt_hld_early[1]) |
+			  (thread2_stg_m & pend_pich_cnt_hld_early[2]) |
+			  (thread3_stg_m & pend_pich_cnt_hld_early[3]) ;
+
+
+// added the follwoing since we still want to qualify with pib_trap_en for the trap signal going
+// to exu, i.e. tlu_pic_cnt_en_m is used to generate tlu_exu_pic_onebelow_m in tlu_misctl.v 
+wire pic_trap_en_e =
+           (tlu_thrd_rsel_e[0]) ? pib_trap_en[0]:
+           (tlu_thrd_rsel_e[1]) ? pib_trap_en[1]:
+           (tlu_thrd_rsel_e[2]) ? pib_trap_en[2]:
+            pib_trap_en[3];
+
+wire pic_trap_en_m;
+
+dffr_s dffr_pic_trap_en_m (
+   .din (pic_trap_en_e),
+   .q   (pic_trap_en_m),
+   .rst (local_rst), .clk (clk), .se  (se), .si  (), .so  ());
+
+wire tlu_pic_cnt_en_m_prequal = pic_cnt_en_m & pic_trap_en_m;
+
+assign tlu_pic_cnt_en_m = tlu_pic_cnt_en_m_prequal & ~tlu_pich_cnt_hld;
+
+//
+// added for bug 5436 - Niagara 2.0
+dffr_s dffr_pic_cnt_en_w (
+   .din (pic_cnt_en_m),
+   .q   (pic_cnt_en_w),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+dffr_s dffr_pic_cnt_en_w2 (
+   .din (pic_cnt_en_w),
+   .q   (pic_cnt_en_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+/*
+// added for bug 4785
+assign tlu_pic_onebelow_e = 
+           (thread0_rsel_e) ? pich_onebelow_flg[0] :
+           (thread1_rsel_e) ? pich_onebelow_flg[1] :
+           (thread2_rsel_e) ? pich_onebelow_flg[2] :
+           pich_onebelow_flg[3];
+
+dffr_s dffr_tlu_exu_pic_onebelow_m (
+   .din (tlu_pic_onebelow_e),
+   .q   (tlu_exu_pic_onebelow_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign tlu_pic_twobelow_e = 
+           (thread0_rsel_e) ? pich_twobelow_flg[0] :
+           (thread1_rsel_e) ? pich_twobelow_flg[1] :
+           (thread2_rsel_e) ? pich_twobelow_flg[2] :
+           pich_twobelow_flg[3];
+
+dffr_s dffr_tlu_exu_pic_twobelow_m (
+   .din (tlu_pic_twobelow_e),
+   .q   (tlu_exu_pic_twobelow_m),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+*/
+//
+// added for bug 4395
+dffr_s dffr_tlu_tcc_inst_w (
+    .din (exu_tlu_ttype_m[8]),
+    .q   (tlu_tcc_inst_w),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+assign pib_wrap_trap_m = (|pib_wrap_m[`TLU_THRD_NUM-1:0]);
+//
+// modified for bug 4342
+// pib wrap precise trap 
+dffr_s dffr_pib_wrap_trap_nq_g (
+   .din (pib_wrap_trap_m),
+   .q   (pib_wrap_trap_nq_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+// modified for bug 4916
+assign pib_wrap_trap_g = 
+           pib_wrap_trap_nq_g & ~lsu_tlu_defr_trp_taken_g; 
+//
+// modified for bug 2955
+assign tlu_exu_priv_trap_m = 
+           exu_pib_priv_act_trap_m | exu_tick_npt_priv_act_m |
+           (|tlz_exu_trap_m[`TLU_THRD_NUM-1:0]) | tlu_pich_wrap_flg_m |
+           tlu_picl_wrap_flg_m;
+// 
+// illegal instruction from ffu
+dffr_s dffr_ffu_ill_inst_uf_g (
+   .din (ffu_tlu_ill_inst_m),
+   .q   (ffu_ill_inst_uf_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign ffu_ill_inst_g = ffu_ill_inst_uf_g & ~inst_ifu_flush_w;
+/*
+dffr_s dffr_lsu_ill_inst_uf_g (
+   .din (lsu_tlu_ill_inst_m),
+   .q   (lsu_ill_inst_uf_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign lsu_ill_inst_g = lsu_ill_inst_uf_g & ~inst_ifu_flush_w;
+//
+*/
+// added for bug 4074 and modified for bug 4715
+/*
+dffr_s dffr_lsu_tlu_defr_trp_taken_w2 (
+   .din (lsu_tlu_defr_trp_taken_g),
+   .q   (lsu_tlu_defr_trp_taken_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+*/
+
+assign lsu_defr_trp_taken_w2[0] = lsu_defr_trap_w2 & thread0_wsel_w2; 
+assign lsu_defr_trp_taken_w2[1] = lsu_defr_trap_w2 & thread1_wsel_w2; 
+assign lsu_defr_trp_taken_w2[2] = lsu_defr_trap_w2 & thread2_wsel_w2; 
+assign lsu_defr_trp_taken_w2[3] = lsu_defr_trap_w2 & thread3_wsel_w2; 
+
+
+// added for the lsu deferred trap - bug 3060
+// modified for bug 4074, 4561 and 4916
+assign lsu_defr_trap_g = lsu_tlu_defr_trp_taken_g & ~ifu_tlu_flush_fd_w; 
+// assign lsu_defr_trap_g = lsu_tlu_defr_trp_taken_g; 
+
+assign local_lsu_async_ttype_vld_w = lsu_tlu_async_ttype_vld_g; 
+//
+// modified for bug 4443 and 4561
+// added for timing
+dffr_s dffr_lsu_defr_trap_w2 (
+   .din (lsu_defr_trap_g),
+   .q   (lsu_defr_trap_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+//
+// privilege action trap of the PIB registers
+dffr_s dffr_pib_priv_act_trap_g (
+   .din (pib_priv_act_early_trap_m),
+   .q   (pib_priv_act_trap_uf_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+//
+// added for bug 2133
+assign pib_priv_act_trap_g = pib_priv_act_trap_uf_g & ~inst_ifu_flush_w;
+//
+assign	ifu_ttype_vld_g = ifu_ttype_vld_tmp_g | spu_ill_inst_g | immu_miss_g    | 
+                          tick_npt_priv_act_g | ffu_ill_inst_g | pib_priv_act_trap_g | 
+                          htrap_ill_inst_g;
+// 
+// added and modified for timing fix
+assign ifu_ttype_early_vld_m = 
+             (ifu_ttype_vld_m  | pib_priv_act_early_trap_m |
+              spu_ill_inst_m   | tick_npt_priv_act_m | ffu_tlu_ill_inst_m |
+              htrap_ill_inst_m); 
+
+assign	early_dside_trap_g = 
+            (local_sync_trap_g & ~inst_ifu_flush2_w) | 
+             va_oor_inst_acc_excp_g | va_oor_data_acc_excp_g; 
+//
+assign	dside_sync_trap_g = 
+            lsu_ttype_vld_w | early_dside_trap_g; 
+//
+// The sync ttype is being recoded for timing
+// Merge with lsu traps.
+//
+mux2ds #(`TSA_TTYPE_WIDTH) mx_local_sync_ttype (
+    .in0  (`INST_ACC_EXC),
+	.in1  (`DATA_ACC_EXC),
+    .sel0 (va_oor_inst_acc_excp_g),
+    .sel1 (~va_oor_inst_acc_excp_g),
+    .dout (local_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0])
+);
+
+// Need 9b comparator.
+// assign dside_higher_priority = (dside_sync_ttype_g[8:0] > exu_ttype_g[8:0]);
+assign local_higher_ttype_flg = 
+           (local_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0] > 
+            exu_ttype_g[`TSA_TTYPE_WIDTH-1:0]);
+
+// added for bug 3977
+dffr_s dffr_exu_ue_trap_g (
+   .din (exu_tlu_ue_trap_m),
+   .q   (exu_ue_trap_g),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+assign exu_higher_pri_g = 
+           exu_ue_trap_g & exu_ttype_vld_g & ~immu_miss_g;  
+
+// Is the prioritization needed or is this handled among the units themselves ?
+// modified for bug 3977
+assign	priority_trap_sel0 = 
+            ifu_ttype_vld_g & ~((|tlz_trap_g[`TLU_THRD_NUM-1:0]) | 
+            lsu_defr_trap_g | exu_higher_pri_g);
+//
+// modified for support to lsu deferred traps
+// modified for bug 3977
+assign	priority_trap_sel1 = 
+            ~((|tlz_trap_g[`TLU_THRD_NUM-1:0]) | lsu_defr_trap_g) & 
+            ~(ifu_ttype_vld_g & ~exu_higher_pri_g) &
+             ((exu_ttype_vld_g & ~early_dside_trap_g) | 
+			 ((exu_ttype_vld_g &  early_dside_trap_g) & ~local_higher_ttype_flg));
+// 
+// modified for bug 3634
+assign	priority_trap_sel2 = ~(priority_trap_sel0 | priority_trap_sel1);
+//
+// recoded for timing
+// Prioritized ttype for thread available.
+/*
+mux4ds #(9) finaltt_sel (
+    .in0    ({2'b00,`TLZ_TRAP}),
+    .in1    (ifu_ttype_g[8:0]), 	
+	.in2    (exu_ttype_g[8:0]),
+	.in3    (dside_sync_ttype_g[8:0]),
+    .sel0   (|tlz_trap_g[`TLU_THRD_NUM-1:0]),
+    .sel1   (priority_trap_sel0),  	
+	.sel2   (priority_trap_sel1),
+	.sel3   (priority_trap_sel2),
+    .dout   (sync_ttype_g[8:0])
+); 
+*/
+//
+// modified for bug 3634
+// modified for bug 3977
+mux3ds #(`TSA_TTYPE_WIDTH) mx_early_sync_ttype (
+//     .in0    ({2'b00,`TLZ_TRAP}),
+    .in0    (ifu_ttype_g[8:0]), 	
+	.in1    (exu_ttype_g[8:0]),
+	.in2    (local_sync_ttype_g[8:0]),
+    .sel0   (priority_trap_sel0),  	
+	.sel1   (priority_trap_sel1),
+	.sel2   (priority_trap_sel2),
+    .dout   (early_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0])
+); 
+//
+// added for timing 
+dff_s #(`TSA_TTYPE_WIDTH) dff_early_sync_ttype_w2 (
+    .din (early_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (early_sync_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+//
+// Now pend Div and Spill/Fill traps if necessary. These traps are always pended
+// even if there is no concurrent synchronous trap. They are pended by thread.
+// Include fp traps
+// modified for bug 4857
+assign	sync_trap_taken_g = 
+	((ifu_ttype_vld_g | exu_ttype_vld_g | lsu_tlu_ttype_vld_m2 | early_dside_trap_g | 
+     (|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g) & inst_vld_g) | 
+      intrpt_taken | swint_taken | lsu_defr_trap_g;
+     // (|tlz_trap_g[`TLU_THRD_NUM-1:0])) & inst_vld_g) | intrpt_taken | swint_taken |
+     //  lsu_defr_trap_g | pib_wrap_trap_g; 
+// 
+// added for timing
+dff_s dff_sync_trap_taken_w2 (
+    .din (sync_trap_taken_g), 
+	.q   (sync_trap_taken_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// added for timing fix 
+// modified for bug 3653, bug 4758 and bug 5169
+assign	sync_trap_taken_m = 
+	 (exu_tlu_va_oor_jl_ret_m | exu_tlu_ttype_vld_m | 
+      ifu_ttype_early_vld_m | (|tlz_trap_m[`TLU_THRD_NUM-1:0]) | true_hscpd_dacc_excpt_m | 
+      true_qtail_dacc_excpt_m | dmmu_va_oor_m | exu_tlu_va_oor_jl_ret_m |
+      pib_wrap_trap_m | ifu_swint_m | ifu_hwint_m | ifu_rstint_m) & inst_vld_m; 
+/*
+assign	sync_trap_taken_m = 
+	((exu_tlu_va_oor_jl_ret_m | exu_tlu_ttype_vld_m | 
+      ifu_ttype_early_vld_m | (|tlz_trap_m[`TLU_THRD_NUM-1:0]) | true_hscpd_dacc_excpt_m | 
+      true_qtail_dacc_excpt_m | dmmu_va_oor_m | exu_tlu_va_oor_jl_ret_m) & 
+      inst_vld_m) | pib_wrap_trap_m | ifu_swint_m | ifu_hwint_m | ifu_rstint_m; 
+*/
+
+assign fp_trap_thrd0 = ~ffu_ifu_tid_w2[1] & ~ffu_ifu_tid_w2[0];
+assign fp_trap_thrd1 = ~ffu_ifu_tid_w2[1] &  ffu_ifu_tid_w2[0];
+assign fp_trap_thrd2 =  ffu_ifu_tid_w2[1] & ~ffu_ifu_tid_w2[0];
+assign fp_trap_thrd3 =  ffu_ifu_tid_w2[1] &  ffu_ifu_tid_w2[0];
+
+// assign div_zero_thrd0 = ~exu_tlu_div_tid[1] & ~exu_tlu_div_tid[0];
+// assign div_zero_thrd1 = ~exu_tlu_div_tid[1] &  exu_tlu_div_tid[0];
+// assign div_zero_thrd2 =  exu_tlu_div_tid[1] & ~exu_tlu_div_tid[0];
+// assign div_zero_thrd3 =  exu_tlu_div_tid[1] &  exu_tlu_div_tid[0];
+
+assign spill_thrd0 = ~exu_tlu_spill_tid[1] & ~exu_tlu_spill_tid[0];
+assign spill_thrd1 = ~exu_tlu_spill_tid[1] &  exu_tlu_spill_tid[0];
+assign spill_thrd2 =  exu_tlu_spill_tid[1] & ~exu_tlu_spill_tid[0];
+assign spill_thrd3 =  exu_tlu_spill_tid[1] &  exu_tlu_spill_tid[0];
+// 
+// added for bug 3499
+dff_s #(`TLU_THRD_NUM) dff_cwp_en_thrd_reset (
+    .din ({pend_to_thrd3_reset, pend_to_thrd2_reset,
+           pend_to_thrd1_reset, pend_to_thrd0_reset}),
+    .q   (cwp_en_thrd_reset[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_trap_cwp0_enb (
+    .din (spill_thrd0),
+    .q   (trap_cwp_enb[0]),
+    .rst (cwp_en_thrd_reset[0]), 
+    .en  (exu_tlu_spill & spill_thrd0),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_trap_cwp1_enb (
+    .din (spill_thrd1),
+    .q   (trap_cwp_enb[1]),
+    .rst (cwp_en_thrd_reset[1]), 
+    .en  (exu_tlu_spill & spill_thrd1),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_trap_cwp2_enb (
+    .din (spill_thrd2),
+    .q   (trap_cwp_enb[2]),
+    .rst (cwp_en_thrd_reset[2]), 
+    .en  (exu_tlu_spill & spill_thrd2),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffre_s dffre_trap_cwp3_enb (
+    .din (spill_thrd3),
+    .q   (trap_cwp_enb[3]),
+    .rst (cwp_en_thrd_reset[3]), 
+    .en  (exu_tlu_spill & spill_thrd3),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_trap_cwp_en[`TLU_THRD_NUM-1:0] = ~(trap_cwp_enb[`TLU_THRD_NUM-1:0]);
+
+//
+// added for asynchronize dmmu traps (correctable parity error)
+assign dmmu_async_thrd0 = ~lsu_tlu_async_tid_g[1] & ~lsu_tlu_async_tid_g[0];
+assign dmmu_async_thrd1 = ~lsu_tlu_async_tid_g[1] &  lsu_tlu_async_tid_g[0];
+assign dmmu_async_thrd2 =  lsu_tlu_async_tid_g[1] & ~lsu_tlu_async_tid_g[0];
+assign dmmu_async_thrd3 =  lsu_tlu_async_tid_g[1] &  lsu_tlu_async_tid_g[0];
+//
+// modified for bug 4074
+assign lsu_async_vld_en_g[0] = 
+//            local_lsu_async_ttype_vld_w & dmmu_async_thrd0 & ~lsu_tlu_defr_trp_taken_g;
+            local_lsu_async_ttype_vld_w & dmmu_async_thrd0 & ~lsu_defr_trp_taken_w2[0];
+assign lsu_async_vld_en_g[1] = 
+//            local_lsu_async_ttype_vld_w & dmmu_async_thrd1 & ~lsu_tlu_defr_trp_taken_g;
+           local_lsu_async_ttype_vld_w & dmmu_async_thrd1 & ~lsu_defr_trp_taken_w2[1];
+assign lsu_async_vld_en_g[2] = 
+//            local_lsu_async_ttype_vld_w & dmmu_async_thrd2 & ~lsu_tlu_defr_trp_taken_g;
+           local_lsu_async_ttype_vld_w & dmmu_async_thrd2 & ~lsu_defr_trp_taken_w2[2];
+assign lsu_async_vld_en_g[3] = 
+//           local_lsu_async_ttype_vld_w & dmmu_async_thrd3 & ~lsu_tlu_defr_trp_taken_g;
+           local_lsu_async_ttype_vld_w & dmmu_async_thrd3 & ~lsu_defr_trp_taken_w2[3];
+//
+dffre_s dffre_lsu_async_vld_en_w2_0 (
+    .din (lsu_async_vld_en_g[0]),
+    .q   (lsu_async_vld_en_w2[0]),
+    .rst (pend_to_thrd0_reset),
+    .en  (lsu_async_vld_en_g[0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffre_s dffre_lsu_async_vld_en_w2_1 (
+    .din (lsu_async_vld_en_g[1]),
+    .q   (lsu_async_vld_en_w2[1]),
+    .rst (pend_to_thrd1_reset),
+    .en  (lsu_async_vld_en_g[1]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffre_s dffre_lsu_async_vld_en_w2_2 (
+    .din (lsu_async_vld_en_g[2]),
+    .q   (lsu_async_vld_en_w2[2]),
+    .rst (pend_to_thrd2_reset),
+    .en  (lsu_async_vld_en_g[2]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffre_s dffre_lsu_async_vld_en_w2_3 (
+    .din (lsu_async_vld_en_g[3]),
+    .q   (lsu_async_vld_en_w2[3]),
+    .rst (pend_to_thrd3_reset),
+    .en  (lsu_async_vld_en_g[3]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// assign trap type base on information send
+assign dmmu_async_ttype[`TSA_TTYPE_WIDTH-1:0] = 
+           {2'b0, lsu_tlu_async_ttype_g[6:0]};
+// 
+// derived the spill ttype
+assign exu_spill_ttype[`TSA_TTYPE_WIDTH-1:0] =
+           {3'b010,exu_tlu_spill_other,exu_tlu_spill_wtype[2:0], 2'b00}; 
+//
+// derived ffu_asynchronous ttype
+// modified for bug 4084 - new ffu asynchronous trap type: 0x29
+assign ffu_async_ttype[`TSA_TTYPE_WIDTH-1:0] = 
+           (ffu_tlu_trap_ue) ? 9'h029:
+	       ({7'b0001000, ffu_tlu_trap_other, ffu_tlu_trap_ieee754});
+//
+//
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_ttype0[`TSA_TTYPE_WIDTH-1:0] = 
+		(exu_tlu_spill & spill_thrd0) ? 
+         exu_spill_ttype[`TSA_TTYPE_WIDTH-1:0] :
+		(((ffu_tlu_trap_other | ffu_tlu_trap_ieee754 | ffu_tlu_trap_ue) & fp_trap_thrd0) ? 
+           ffu_async_ttype[`TSA_TTYPE_WIDTH-1:0] :
+           dmmu_async_ttype[`TSA_TTYPE_WIDTH-1:0]);
+
+// always flop if selected for thread.
+// THREAD0
+// added support for dmmu_async_traps
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_to_thrd0_en =
+	       (exu_tlu_spill & spill_thrd0) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd0) |
+           (lsu_async_vld_en_g[0] & ~lsu_async_vld_en_w2[0]) |
+			cwp_cmplt0;				// cwp completion always pended.
+//
+// added for bug 5436 - Niagara 2.0
+assign pend_pich_cnt_adj[0] =
+	      ((exu_tlu_spill & spill_thrd0) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd0) |
+           (lsu_async_vld_en_g[0] & ~lsu_async_vld_en_w2[0])) & pic_cnt_en[0];
+
+// If there is no sync trap in a cycle, then the pending trap is taken.
+assign	pend_to_thrd0_reset =
+	local_rst | pending_thrd0_event_taken;
+
+// Choose pending traps in round-robin order.
+tlu_rrobin_picker       ptrap_rrobin      (
+                .events         ({pending_trap3,pending_trap2,pending_trap1,pending_trap0}),
+                .pick_one_hot   (pending_trap_sel[3:0]),
+        // 
+        // this siganl was modified to abide to the Niagara reset methodology
+		        .tlu_rst_l		(tlu_rst_l),
+                .clk            (clk)
+        );           
+
+// modified to arbitrate between wsr instruction and asynchronous events
+// due to there is only one write port to tsa
+// also modified for bug 1672
+// modified for bug 3827
+assign pending_thrd0_event_taken = 
+            pending_trap_sel[0] & ~(sync_trap_taken_g | dnrtry_inst_g | 
+            tsa_wr_tid_sel_g | ifu_thrd_flush_w[0] | (tlu_gl_rw_g & wsr_inst_g)); 
+
+dffre_s #(12) dffre_pendthrd0 (
+    .din ({pend_to_thrd0_en,pend_ttype0[8:0],cwp_cmplt0,exu_tlu_cwp_retry}), 
+	.q   ({pending_trap0,pending_ttype0[8:0],cwp_cmplt0_pending,cwp_retry0}),
+    .rst (pend_to_thrd0_reset), 
+    .en  (pend_to_thrd0_en), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+// THREAD1
+// added support for dmmu_async_traps
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_to_thrd1_en = 
+	       (exu_tlu_spill & spill_thrd1) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd1) |
+           (lsu_async_vld_en_g[1] & ~lsu_async_vld_en_w2[1]) |
+			cwp_cmplt1;				// cwp completion always pended.
+//
+// added for bug 5436 - Niagara 2.0
+assign pend_pich_cnt_adj[1] =
+	      ((exu_tlu_spill & spill_thrd1) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd1) |
+           (lsu_async_vld_en_g[1] & ~lsu_async_vld_en_w2[1])) & pic_cnt_en[1];
+
+assign	pend_to_thrd1_reset =
+	local_rst | pending_thrd1_event_taken;
+
+// modified to arbitrate between wsr instruction and asynchronous events
+// due to there is only one write port to tsa
+//
+// modified for bug 3827
+assign pending_thrd1_event_taken = 
+            pending_trap_sel[1] & ~(sync_trap_taken_g | dnrtry_inst_g | 
+            tsa_wr_tid_sel_g | ifu_thrd_flush_w[1] | (tlu_gl_rw_g & wsr_inst_g)); 
+
+//
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_ttype1[`TSA_TTYPE_WIDTH-1:0] = 
+		(exu_tlu_spill & spill_thrd1) ? 
+         exu_spill_ttype[`TSA_TTYPE_WIDTH-1:0] :
+		(((ffu_tlu_trap_other | ffu_tlu_trap_ieee754 | ffu_tlu_trap_ue) & fp_trap_thrd1) ? 
+           ffu_async_ttype[`TSA_TTYPE_WIDTH-1:0] :
+           dmmu_async_ttype[`TSA_TTYPE_WIDTH-1:0]);
+
+dffre_s #(12) dffre_pendthrd1 (
+    .din ({pend_to_thrd1_en,pend_ttype1[8:0],cwp_cmplt1,exu_tlu_cwp_retry}), 
+	.q   ({pending_trap1,pending_ttype1[8:0],cwp_cmplt1_pending,cwp_retry1}),
+    .rst (pend_to_thrd1_reset), 
+    .en  (pend_to_thrd1_en), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// THREAD2
+// added support for dmmu_async_traps
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_to_thrd2_en = 
+	       (exu_tlu_spill & spill_thrd2) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd2) |
+           (lsu_async_vld_en_g[2] & ~lsu_async_vld_en_w2[2]) |
+			cwp_cmplt2;				// cwp completion always pended.
+//
+// added for bug 5436 - Niagara 2.0
+assign pend_pich_cnt_adj[2] =
+	      ((exu_tlu_spill & spill_thrd2) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd2) |
+           (lsu_async_vld_en_g[2] & ~lsu_async_vld_en_w2[2])) & pic_cnt_en[2];
+
+assign	pend_to_thrd2_reset =
+	local_rst | pending_thrd2_event_taken;
+
+// modified to arbitrate between wsr instruction and asynchronous events
+// due to there is only one write port to tsa
+//
+// modified for bug 3827
+assign pending_thrd2_event_taken = 
+            pending_trap_sel[2] & ~(sync_trap_taken_g | dnrtry_inst_g | 
+            tsa_wr_tid_sel_g | ifu_thrd_flush_w[2] | (tlu_gl_rw_g & wsr_inst_g)); 
+
+//
+// modified for bug 4084 - new ffu_tlu_trap_ue
+assign pend_ttype2[`TSA_TTYPE_WIDTH-1:0] = 
+		(exu_tlu_spill & spill_thrd2) ? 
+         exu_spill_ttype[`TSA_TTYPE_WIDTH-1:0] :
+		(((ffu_tlu_trap_other | ffu_tlu_trap_ieee754 | ffu_tlu_trap_ue) & fp_trap_thrd2) ? 
+           ffu_async_ttype[`TSA_TTYPE_WIDTH-1:0] :
+           dmmu_async_ttype[`TSA_TTYPE_WIDTH-1:0]);
+
+dffre_s #(12) dffre_pendthrd2 (
+    .din ({pend_to_thrd2_en,pend_ttype2[8:0],cwp_cmplt2,exu_tlu_cwp_retry}), 
+	.q   ({pending_trap2,pending_ttype2[8:0],cwp_cmplt2_pending,cwp_retry2}),
+    .rst (pend_to_thrd2_reset), 
+    .en  (pend_to_thrd2_en), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// THREAD3
+// added support for dmmu_async_traps
+// modified for bug 4084 - new ffu_tlu_trap_ue 
+assign pend_to_thrd3_en = 
+		   (exu_tlu_spill & spill_thrd3) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd3) |
+           (lsu_async_vld_en_g[3] & ~lsu_async_vld_en_w2[3]) |
+			cwp_cmplt3;				// cwp completion always pended.
+//
+// added for bug 5436 - Niagara 2.0
+assign pend_pich_cnt_adj[3] =
+	      ((exu_tlu_spill & spill_thrd3) |
+		  ((ffu_tlu_trap_ieee754 | ffu_tlu_trap_other | ffu_tlu_trap_ue) & fp_trap_thrd3) |
+           (lsu_async_vld_en_g[3] & ~lsu_async_vld_en_w2[3])) & pic_cnt_en[3];
+
+assign	pend_to_thrd3_reset =
+	local_rst |	pending_thrd3_event_taken; 
+
+// modified to arbitrate between wsr instruction and asynchronous events
+// due to there is only one write port to tsa
+// modified for bug 3827
+assign pending_thrd3_event_taken = 
+            pending_trap_sel[3] & ~(sync_trap_taken_g | dnrtry_inst_g | 
+            tsa_wr_tid_sel_g | ifu_thrd_flush_w[3] | (tlu_gl_rw_g & wsr_inst_g)); 
+
+//
+assign pend_ttype3[`TSA_TTYPE_WIDTH-1:0] = 
+		(exu_tlu_spill & spill_thrd3) ? 
+         exu_spill_ttype[`TSA_TTYPE_WIDTH-1:0] :
+		(((ffu_tlu_trap_other | ffu_tlu_trap_ieee754 | ffu_tlu_trap_ue) & fp_trap_thrd3) ? 
+           ffu_async_ttype[`TSA_TTYPE_WIDTH-1:0] :
+           dmmu_async_ttype[`TSA_TTYPE_WIDTH-1:0]);
+//
+dffre_s #(12) dffre_pendthrd3 (
+    .din ({pend_to_thrd3_en,pend_ttype3[8:0],cwp_cmplt3,exu_tlu_cwp_retry}), 
+	.q   ({pending_trap3,pending_ttype3[8:0],cwp_cmplt3_pending,cwp_retry3}),
+    .rst (pend_to_thrd3_reset), 
+    .en  (pend_to_thrd3_en), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// added for bug 5436 - Niagara 2.0
+//assign pich_cnt_hld_rst[`TLU_THRD_NUM-1:0] = 
+//           (thread_inst_vld_w2[`TLU_THRD_NUM-1:0] & {4{pic_cnt_en_w2}} |
+//           {4{local_rst}});  
+
+// fix for 5436 for reopend bugs(9/8/2004) related to flushed inst reseting the hold
+// and b2b valid instruction; the 1st one reseting the hold, but the 2nd not incrementing
+// since the hold was not reset early to allow the 2nd inst to incr_pich.
+
+assign pich_cnt_hld_rst_g[3:0] = 
+			(thread_inst_vld_g[3:0] & {4{pic_cnt_en_w}}) &
+			{4{~(lsu_ttype_vld_w | tlu_flush_all_w)}};
+
+
+dff_s #(4) dff_pich_cnt_hld_rst_g (
+    .din (pich_cnt_hld_rst_g[3:0]),
+    .q   (pich_cnt_hld_rst_w2[3:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+
+assign pend_pich_cnt_hld_early[3:0] = pend_pich_cnt_hld_q[3:0] & ~pich_cnt_hld_rst_w2[3:0];
+
+assign pend_pich_cnt_hld[3:0] = pend_pich_cnt_hld_early[3:0];
+
+// following is used in pib_wrap_m logic as per bug5436(reopened 9/17/04).
+assign pend_pich_cnt_hld_noqual[3:0] = pend_pich_cnt_hld_q[3:0];
+
+dffre_s dffre_pend_pich_cnt_adj_0 (
+    .din (pend_pich_cnt_adj[0]),
+	.q   (pend_pich_cnt_hld_q[0]),
+    .rst (local_rst | pich_cnt_hld_rst_w2[0]), 
+    .en  (pend_pich_cnt_adj[0]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_pend_pich_cnt_adj_1 (
+    .din (pend_pich_cnt_adj[1]),
+	.q   (pend_pich_cnt_hld_q[1]),
+    .rst (local_rst | pich_cnt_hld_rst_w2[1]), 
+    .en  (pend_pich_cnt_adj[1]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_pend_pich_cnt_adj_2 (
+    .din (pend_pich_cnt_adj[2]),
+	.q   (pend_pich_cnt_hld_q[2]),
+    .rst (local_rst | pich_cnt_hld_rst_w2[2]), 
+    .en  (pend_pich_cnt_adj[2]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_pend_pich_cnt_adj_3 (
+    .din (pend_pich_cnt_adj[3]),
+	.q   (pend_pich_cnt_hld_q[3]),
+    .rst (local_rst | pich_cnt_hld_rst_w2[3]), 
+    .en  (pend_pich_cnt_adj[3]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+
+assign	trap_taken_g = 	thrd0_traps | thrd1_traps | thrd2_traps | thrd3_traps;
+// 
+// added for timing 
+dff_s dff_trap_taken_w2 (
+    .din (trap_taken_g), 	
+    .q   (trap_taken_w2),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// Selection of traps should be round-robin.
+assign	trap_tid_g[1:0] = 
+    // lsu_defr_trap_g ? thrid_w2[1:0] :
+	((sync_trap_taken_g) | (dnrtry_inst_g & cwp_fastcmplt_g))  ? thrid_g[1:0] :
+			(pending_trap_sel[0] ? 2'b00 : 
+				(pending_trap_sel[1] ? 2'b01 : 
+					(pending_trap_sel[2] ? 2'b10 : 2'b11))); 
+
+assign pend_trap_tid_g[1:0] = 
+	       pending_trap_sel[0] ? 2'b00 : 
+		       (pending_trap_sel[1] ? 2'b01 : 
+			       (pending_trap_sel[2] ? 2'b10 : 
+                        2'b11)); 
+
+dff_s #(2) dff_pend_trap_tid_w2 (
+    .din (pend_trap_tid_g[1:0]),
+    .q   (pend_trap_tid_w2[1:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+
+
+// Assume fixed priority for now. Should change to round-robin selection !!!
+
+// modified for bug 1806
+// modified to support lsu deferred traps - modified for timing
+// modified for bug 4640 and 5127
+//
+assign	reset_sel_g = 
+            rstint_g | (sir_inst_g & ~(lsu_defr_trap_g | pib_wrap_trap_g |
+            (|tlz_trap_g[`TLU_THRD_NUM-1:0]))) | rst_tri_en;
+            // rstint_g | (sir_inst_g & ~lsu_defr_trap_g) | rst_tri_en;
+//
+// added for timing
+dffr_s dffr_reset_sel_w2 (
+   .din (reset_sel_g),
+   .q   (reset_sel_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+// 
+// modified for timing
+// assign	reset_defr_id_g[6:0] =
+assign	reset_id_g[2:0] =
+local_rst ? 3'b001 :	
+	rstint_g ? rstid_g[2:0] :
+		sir_inst_g ? 3'b100 :  
+                3'bxxx;
+
+// recoded for timing
+//
+// construct the tba_ttype to determine the tba 
+// the trap is hypervisor or supervisor traps
+// modified for bug 3634 and timing
+
+assign tba_ttype_sel_w2 = 
+           final_ttype_sel_w2[0] | (hyper_wdr_trap_w2 & ~lsu_defr_trap_w2);  
+
+mux2ds #(`TSA_TTYPE_WIDTH) mx_tba_ttype_w2 (
+    .sel0 (tba_ttype_sel_w2),
+    .sel1 (~tba_ttype_sel_w2),
+    .in0  ({2'b0,rst_hwdr_ttype_w2[`TSA_TTYPE_WIDTH-3:0]}),
+    .in1  (final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (tba_ttype_w1[`TSA_TTYPE_WIDTH-1:0])
+); 
+/*
+assign tba_ttype_sel_g[0] = 
+               (rstint_g | rst_tri_en | ((hwint_g | swint_g | hyper_wdr_trap | 
+               (|tlz_trap_g[`TLU_THRD_NUM-1:0] | sir_inst_g) | pib_wrap_trap_g) & 
+                inst_vld_g & ~lsu_defr_trap_g); 
+assign tba_ttype_sel_g[1] = 
+           (((ifu_ttype_vld_g |  exu_ttype_vld_g | va_oor_inst_acc_excp_g) |  
+            (local_sync_trap_g & ~(lsu_tlu_priv_action_g | misalign_addr_ldst_atm_g))) & 
+           ~(reset_sel_g | hwint_g | swint_g | hyper_wdr_trap | (|tlz_trap_g[`TLU_THRD_NUM-1:0])) & 
+             inst_vld_g) & ~lsu_defr_trap_g & ~pib_wrap_trap_g;
+assign tba_ttype_sel_g[2] = 
+           (((lsu_tlu_ttype_vld_m2 & inst_vld_g) | va_oor_data_acc_excp_g) & 
+           ~(|tba_ttype_sel_g[1:0])) | (lsu_defr_trap_g & ~(rstint_g | rst_tri_en)); 
+assign tba_ttype_sel_g[3] = 
+           ~(|tba_ttype_sel_g[2:0]); 
+
+// added for timing
+dffr_s #(4) dffr_tba_ttype_sel_w2 (
+    .din (tba_ttype_sel_g[3:0]),
+    .q   (tba_ttype_sel_w2[3:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+mux4ds #(`TSA_TTYPE_WIDTH) mx_tba_ttype_w2 (
+    .sel0 (tba_ttype_sel_w2[0]),
+    .sel1 (tba_ttype_sel_w2[1]),
+    .sel2 (tba_ttype_sel_w2[2]),
+    .sel3 (tba_ttype_sel_w2[3]),
+    .in0  ({2'b0,rst_hwdr_ttype_w2[`TSA_TTYPE_WIDTH-3:0]}),
+    .in1  (early_sync_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .in2  (adj_lsu_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .in3  (pending_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (tba_ttype_w1[`TSA_TTYPE_WIDTH-1:0])
+); 
+
+dff_s #(`TSA_TTYPE_WIDTH) dff_tba_ttype_w1 (
+    .din (tba_ttype_g[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (tba_ttype_w1[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+*/
+//
+// construct the final_ttype to be written into the trap stack 
+// modified for bug 3634, 4640 and timing  
+assign final_ttype_sel_g[0] = 
+            (rstint_g | rst_tri_en) | ((hwint_g | swint_g | sir_inst_g |
+            (|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g) & inst_vld_g &
+            ~lsu_defr_trap_g);
+//             reset_sel_g | ((hwint_g | swint_g |
+//             (|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g) & inst_vld_g &
+//             ~lsu_defr_trap_g);
+
+assign final_ttype_sel_g[1] = 
+           (((ifu_ttype_vld_g | exu_ttype_vld_g | va_oor_inst_acc_excp_g) |  
+            (local_sync_trap_g & ~(lsu_tlu_priv_action_g | misalign_addr_ldst_atm_g))) & 
+           ~(rstint_g | sir_inst_g  | hwint_g | swint_g | rst_tri_en | (|tlz_trap_g[`TLU_THRD_NUM-1:0])) & 
+             inst_vld_g) & ~lsu_defr_trap_g & ~pib_wrap_trap_g; 
+assign final_ttype_sel_g[2] = 
+           ((lsu_tlu_ttype_vld_m2 & inst_vld_g) | va_oor_data_acc_excp_g)  & 
+           ~(|final_ttype_sel_g[1:0]) | (lsu_defr_trap_g & ~(rst_tri_en | rstint_g)); 
+assign final_ttype_sel_g[3] = 
+           ~(|final_ttype_sel_g[2:0]);
+//
+// added for timing
+dffr_s #(4) dffr_final_ttype_sel_w2 (
+    .din (final_ttype_sel_g[3:0]),
+    .q   (final_ttype_sel_w2[3:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// 
+// modified for timing
+/*
+mux3ds #(`TSA_TTYPE_WIDTH) mx_adj_lsu_ttype_m2 (
+    .sel0 (lsu_defr_trap_g),
+    .sel1 (va_oor_data_acc_excp_g & ~lsu_defr_trap_g),
+    .sel2 (~(va_oor_data_acc_excp_g | lsu_defr_trap_g)),
+    .in0  ({2'b0, lsu_tlu_async_ttype_g[6:0]}),
+    .in1  (9'h030),
+    .in2  (lsu_tlu_ttype_m2),
+    .dout (adj_lsu_ttype_m2[`TSA_TTYPE_WIDTH-1:0])
+); 
+*/
+// added for timing 
+dff_s #(`TSA_TTYPE_WIDTH) dff_lsu_tlu_ttype_w2 (
+    .din (lsu_tlu_ttype_m2[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (lsu_tlu_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+//
+/*
+dff_s #(`TSA_TTYPE_WIDTH-2) dff_lsu_tlu_async_ttype_w2 (
+    .din (lsu_tlu_async_ttype_g[`TSA_TTYPE_WIDTH-3:0]), 
+    .q   (lsu_tlu_async_ttype_w2[`TSA_TTYPE_WIDTH-3:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+*/ 
+mux3ds #(`TSA_TTYPE_WIDTH) mx_adj_lsu_ttype_w2 (
+    .sel0 (lsu_defr_trap_w2),
+    .sel1 (va_oor_data_acc_excp_w2 & ~lsu_defr_trap_w2),
+    .sel2 (~(va_oor_data_acc_excp_w2 | lsu_defr_trap_w2)),
+    // modified for bug 4561
+    // .in0  ({2'b0, lsu_tlu_async_ttype_w2[6:0]}),
+    .in0  (9'h032),
+    .in1  (9'h030),
+    .in2  (lsu_tlu_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (adj_lsu_ttype_w2[`TSA_TTYPE_WIDTH-1:0])
+); 
+//
+// modified for timing 
+mux4ds #(`TSA_TTYPE_WIDTH) mx_final_ttype_w2 (
+    .sel0 (final_ttype_sel_w2[0]),
+    .sel1 (final_ttype_sel_w2[1]),
+    .sel2 (final_ttype_sel_w2[2]),
+    .sel3 (final_ttype_sel_w2[3]),
+    .in0  ({2'b0,rst_ttype_w2[`TSA_TTYPE_WIDTH-3:0]}),
+    .in1  (early_sync_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .in2  (adj_lsu_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .in3  (pending_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (final_ttype_w2[`TSA_TTYPE_WIDTH-1:0])
+); 
+//
+// modified for timing
+/*
+dff_s #(`TSA_TTYPE_WIDTH) dff_tlu_final_ttype_w2 (
+    .din (final_ttype_g[`TSA_TTYPE_WIDTH-1:0]),
+    .q   (final_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),          
+    .so  ()
+);
+*/
+
+assign tlu_final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] =
+           final_ttype_w2[`TSA_TTYPE_WIDTH-1:0];
+//
+// added for timing
+// pending trap type 
+assign onehot_pending_ttype_sel = ~(|pending_trap_sel[2:0]);
+//
+mux4ds #(`TSA_TTYPE_WIDTH) mx_pending_ttype (
+    .sel0 (pending_trap_sel[0]),
+    .sel1 (pending_trap_sel[1]),
+	.sel2 (pending_trap_sel[2]),
+	.sel3 (onehot_pending_ttype_sel),
+    .in0  (pending_ttype0[`TSA_TTYPE_WIDTH-1:0]),
+    .in1  (pending_ttype1[`TSA_TTYPE_WIDTH-1:0]),
+    .in2  (pending_ttype2[`TSA_TTYPE_WIDTH-1:0]),
+    .in3  (pending_ttype3[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (pending_ttype[`TSA_TTYPE_WIDTH-1:0])
+); 
+//
+// added for timing 
+dff_s #(`TSA_TTYPE_WIDTH) dff_pending_ttype_w2 (
+    .din (pending_ttype[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (pending_ttype_w2[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+//
+// modified for timing and bug 5117
+assign rst_ttype_sel[0] = reset_sel_g; 
+// modified for bug 5127
+assign rst_ttype_sel[1] = 
+           ((|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g); 
+           // ~(rstint_g | rst_tri_en); 
+           // ((|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g) & ~reset_sel_g; 
+// assign rst_ttype_sel[2] = ~(|rst_ttype_sel[1:0]); 
+
+// reset ttype  
+// modified for bug 3634 and bug 3705
+// modified for timing and bug 5117
+assign rst_hwint_ttype_g[`TSA_TTYPE_WIDTH-3:0] = 
+           (rst_ttype_sel[0])? {4'b00,reset_id_g[2:0]}:
+          ((rst_ttype_sel[1])? wrap_tlz_ttype[6:0]:
+            `HWINT_INT);  
+
+dff_s #(`TSA_TTYPE_WIDTH-2) dff_rst_hwint_ttype_w2 (
+    .din (rst_hwint_ttype_g[`TSA_TTYPE_WIDTH-3:0]),
+    .q   (rst_hwint_ttype_w2[`TSA_TTYPE_WIDTH-3:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+
+dffr_s dffr_rst_hwint_sel_w2 (
+    .din ((|rst_ttype_sel[1:0]) | hwint_g),
+    .q   (rst_hwint_sel_w2),
+    .clk (clk),
+    .se  (se),  
+    .rst (local_rst),
+    .si  (), 
+    .so  ()
+);
+
+assign rst_ttype_w2[`TSA_TTYPE_WIDTH-3:0] =
+       (rst_hwint_sel_w2)? rst_hwint_ttype_w2[`TSA_TTYPE_WIDTH-3:0]:
+        final_swint_id_w2[`TSA_TTYPE_WIDTH-3:0];  
+       
+/*
+mux3ds #(`TSA_TTYPE_WIDTH-2) mx_rst_ttype_g (
+    .sel0  (rst_ttype_sel[0]),
+    .sel1  (rst_ttype_sel[1]),
+	.sel2  (rst_ttype_sel[2]),
+    .in0 ({4'b00,reset_id_g[2:0]}),
+    .in1 (wrap_tlz_ttype[6:0]),
+    .in2 (hwint_swint_ttype[6:0]),
+    .dout (rst_ttype_g[`TSA_TTYPE_WIDTH-3:0])
+); 
+//
+// added for timing 
+dff_s #(`TSA_TTYPE_WIDTH-2) dff_rst_ttype_w2 (
+    .din (rst_ttype_g[`TSA_TTYPE_WIDTH-3:0]),
+    .q   (rst_ttype_w2[`TSA_TTYPE_WIDTH-3:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+// modified for timing
+
+assign rst_hwdr_ttype_sel[0] = reset_sel_g; 
+assign rst_hwdr_ttype_sel[1] = hyper_wdr_trap & ~reset_sel_g;
+assign rst_hwdr_ttype_sel[2] = 
+           ((|tlz_trap_g[`TLU_THRD_NUM-1:0]) | pib_wrap_trap_g) & 
+           ~(|rst_hwdr_ttype_sel[1:0]); 
+assign rst_hwdr_ttype_sel[3] = ~(|rst_hwdr_ttype_sel[2:0]);
+
+mux2ds #(`TSA_TTYPE_WIDTH-2) mx_hwint_swint_ttype (
+    .sel0 (hwint_g),
+    .sel1 (~hwint_g),
+    .in0  (`HWINT_INT),
+    .in1  (final_swint_id[6:0]),
+    .dout (hwint_swint_ttype[6:0])
+); 
+*/
+
+mux2ds #(`TSA_TTYPE_WIDTH-2) mx_wrap_tlz_ttype (
+    .sel0 (|tlz_trap_g[`TLU_THRD_NUM-1:0]),
+    .sel1 (~(|tlz_trap_g[`TLU_THRD_NUM-1:0])),
+    .in0  (`TLZ_TRAP),
+    .in1  (`PIB_OVERFLOW_TTYPE),
+    .dout (wrap_tlz_ttype[6:0])
+); 
+//
+// modified for timing
+assign rst_hwdr_ttype_sel_w2 = hyper_wdr_trap_w2 & ~reset_sel_w2;
+
+mux2ds #(`TSA_TTYPE_WIDTH-2) mx_rst_hwdr_ttype_w2 (
+    .sel0  (rst_hwdr_ttype_sel_w2),
+    .sel1  (~rst_hwdr_ttype_sel_w2),
+    .in0 ({7'b0000010}),
+    .in1 (rst_ttype_w2[`TSA_TTYPE_WIDTH-3:0]),
+    .dout (rst_hwdr_ttype_w2[`TSA_TTYPE_WIDTH-3:0])
+); 
+//
+/*
+mux4ds #(`TSA_TTYPE_WIDTH-2) mx_rst_hwdr_ttype (
+    .sel0  (rst_hwdr_ttype_sel[0]),
+    .sel1  (rst_hwdr_ttype_sel[1]),
+	.sel2  (rst_hwdr_ttype_sel[2]),
+	.sel3  (rst_hwdr_ttype_sel[3]),
+    .in0 ({4'b00,reset_id_g[2:0]}),
+    .in1 ({7'b0000010}),
+    .in2 (wrap_tlz_ttype[6:0]),
+    .in3 (hwint_swint_ttype[6:0]),
+    .dout (rst_hwdr_ttype_g[`TSA_TTYPE_WIDTH-3:0])
+); 
+//
+// added for timing
+dff_s #(`TSA_TTYPE_WIDTH-2) dff_rst_hwdr_ttype_w2 (
+    .din (rst_hwdr_ttype_g[`TSA_TTYPE_WIDTH-3:0]), 
+    .q   (rst_hwdr_ttype_w2[`TSA_TTYPE_WIDTH-3:0]),
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+*/
+//
+// construct the early_ttype_g for timing to determine whether
+// the trap is hypervisor or supervisor traps
+// modified for bug 3646, 5117 and timing
+assign early_ttype_sel[0] = 
+             reset_sel_g | hwint_g | (|tlz_trap_g[`TLU_THRD_NUM-1:0]);
+             // reset_sel_g | hwint_g | swint_g | (|tlz_trap_g[`TLU_THRD_NUM-1:0]);
+assign early_ttype_sel[1] = 
+            local_early_flush_pipe_w;
+            // local_early_flush_pipe_w & ~(reset_sel_g | hwint_g | swint_g | 
+            // (|tlz_trap_g[`TLU_THRD_NUM-1:0])); 
+assign early_ttype_sel[2] = 
+            ~inst_vld_nf_g | inst_ifu_flush_w | ~(|early_ttype_sel[1:0]);
+
+assign early_ttype_g[`TSA_TTYPE_WIDTH-1:0] =
+           (early_ttype_sel[2])? pending_ttype[`TSA_TTYPE_WIDTH-1:0]:
+           (early_ttype_sel[0])? {2'b0,rst_hwint_ttype_g[`TSA_TTYPE_WIDTH-3:0]}:
+           // (early_ttype_sel[0])? {2'b0,rst_ttype_g[`TSA_TTYPE_WIDTH-3:0]}:
+            early_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0]; 
+/*
+assign early_ttype_sel[0] = 
+            reset_sel_g | ((hwint_g | swint_g | (|tlz_trap_g[`TLU_THRD_NUM-1:0])) & 
+            inst_vld_g) ; 
+assign early_ttype_sel[1] = 
+            (local_early_flush_pipe_w & ~ifu_tlu_flush_fd_w) & ~rst_tri_en & 
+           ~((reset_sel_g | hwint_g | swint_g | (|tlz_trap_g[`TLU_THRD_NUM-1:0])) & inst_vld_g);
+assign early_ttype_sel[2] = 
+            ~(|early_ttype_sel[1:0]);
+//
+mux3ds #(`TSA_TTYPE_WIDTH) mx_early_ttype (
+    .sel0 (early_ttype_sel[0]),
+    .sel1 (early_ttype_sel[1]),
+    .sel2 (early_ttype_sel[2]),
+    .in0  ({2'b0,rst_ttype_g[`TSA_TTYPE_WIDTH-3:0]}),
+    .in1  (early_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0]),
+    .in2  (pending_ttype[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (early_ttype_g[`TSA_TTYPE_WIDTH-1:0])
+); 
+*/
+//
+// recoded for timing
+assign final_offset_en_g[0] = trap_to_redmode & ~(sir_inst_g | internal_wdr);
+assign final_offset_en_g[1] = internal_wdr & ~final_offset_en_g[0]; 
+// modified due to one-hot mux bug
+// assign final_offset_en_g[2] = ~(|final_offset_en_g[1:0]); 
+
+dffr_s #(2) dffr_final_offset_en_w1 (
+    .din (final_offset_en_g[1:0]), 
+    .q   (final_offset_en_w1[1:0]),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so  ()
+);
+
+assign final_offset_sel_w1[2] = 
+           ~(|final_offset_sel_w1[1:0]);
+assign final_offset_sel_w1[1] = 
+           final_offset_en_w1[1] & ~rst_tri_en;
+assign final_offset_sel_w1[0] = 
+           final_offset_en_w1[0] & ~rst_tri_en;
+
+mux3ds #(`TSA_TTYPE_WIDTH) mx_final_offset_w1 (
+    .sel0 (final_offset_sel_w1[0]),
+    .sel1 (final_offset_sel_w1[1]),
+    .sel2 (final_offset_sel_w1[2]),
+    .in0  (9'b000000101),
+    .in1  (9'b000000010),
+    .in2  (tba_ttype_w1[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (final_offset_w1[`TSA_TTYPE_WIDTH-1:0])
+); 
+
+assign tlu_final_offset_w1[`TSA_TTYPE_WIDTH-1:0] =
+           final_offset_w1[`TSA_TTYPE_WIDTH-1:0];
+// 
+// generating the trap pc and trap npc
+// This section has been modified due to bug 3017 
+// pc and npc has been changed from 48 -> 49 bits
+// added for one-hot mux problem
+assign tlu_pc_mxsel_w2[0] = 
+           tlu_self_boot_rst_w2 | rst_tri_en; 
+// modified for bug 3710
+assign tlu_pc_mxsel_w2[1] = 
+           local_select_tba_w2 & ~(rst_tri_en | tlu_self_boot_rst_w2); 
+assign tlu_pc_mxsel_w2[2] = 
+           ~(|tlu_pc_mxsel_w2[1:0]); 
+//
+/* logic moved to tlu_misctl
+assign	normal_trap_pc_w1 [48:0] = 
+            {1'b0, tlu_partial_trap_pc_w1[33:0],final_offset_w1[`TSA_TTYPE_WIDTH-1:0],
+             5'b00000};
+assign	normal_trap_npc_w1[48:0] = 
+            {1'b0, tlu_partial_trap_pc_w1[33:0],final_offset_w1[`TSA_TTYPE_WIDTH-1:0],
+             5'b00100};
+//
+// code moved from tlu_tdp
+mux2ds #(49) mx_trap_pc_w1 (
+       .in0  (normal_trap_pc_w1[48:0]), 
+       .in1  (tlu_restore_pc_w1[48:0]),
+       .sel0 (~restore_pc_sel_w1),  
+       .sel1 (restore_pc_sel_w1),
+       .dout (trap_pc_w1[48:0])
+);           
+//
+dff_s #(49) dff_trap_pc_w2 (
+    .din (trap_pc_w1[48:0]), 	
+    .q   (trap_pc_w2[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_trappc_w2[48:0] = trap_pc_w2[48:0];
+
+mux2ds #(49) mx_trap_npc_w1 (
+       .in0  (normal_trap_npc_w1[48:0]), 
+       .in1  (tlu_restore_npc_w1[48:0]),
+       .sel0 (~restore_pc_sel_w1),  
+       .sel1 (restore_pc_sel_w1),
+       .dout (trap_npc_w1[48:0])
+);           
+//
+dff_s #(49) dff_trap_npc_w2 (
+    .din (trap_npc_w1[48:0]), 	
+    .q   (trap_npc_w2[48:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_ifu_trapnpc_w2[48:0] = trap_npc_w2[48:0];
+*/
+
+// determine whether to generate a watch-dog reset using htba as the 
+// trap base address instead of the watch-dog reset vector
+// added for bug 1894 and modified for bug 1964
+// modified for timing 
+assign hyper_wdr_early_trap_g = ((true_trap_tid_g[1:0] == 2'b00) ? 
+               (tlu_hpstate_enb[0] & ~tlu_hpstate_priv[0] & trp_lvl_at_maxstl[0]):
+              ((true_trap_tid_g[1:0] == 2'b01) ? 
+                   (tlu_hpstate_enb[1] & ~tlu_hpstate_priv[1] & trp_lvl_at_maxstl[1]):
+                  ((true_trap_tid_g[1:0] == 2'b10) ? 
+                       (tlu_hpstate_enb[2] & ~tlu_hpstate_priv[2] & trp_lvl_at_maxstl[2]):
+                       (tlu_hpstate_enb[3] & ~tlu_hpstate_priv[3] & trp_lvl_at_maxstl[3]))));
+
+dffr_s dffr_hyper_wdr_early_trap_w2 (
+   .din (hyper_wdr_early_trap_g),
+   .q   (hyper_wdr_early_trap_w2),
+   .rst (local_rst),
+   .clk (clk),
+   .se  (se),       
+   .si  (),          
+   .so  ()
+);
+
+assign hyper_wdr_trap_w2 = 
+           hyper_wdr_early_trap_w2 & (tlu_priv_traps_w2 & ~lsu_defr_trap_w2);  
+//
+// detetermine whehter the trapping thread is in hyperlite mode or is at
+// maxstl
+// modified for timing and bug 4779
+/*
+assign tlu_trap_to_hyper_g =  
+           (true_trap_tid_g[1:0] == 2'b00) ? 
+               (~tlu_hpstate_enb[0] | tlu_hpstate_priv[0] | trp_lvl_gte_maxstl[0]):
+              ((true_trap_tid_g[1:0] == 2'b01) ? 
+                   (~tlu_hpstate_enb[1] | tlu_hpstate_priv[1] | trp_lvl_gte_maxstl[1]):
+                  ((true_trap_tid_g[1:0] == 2'b10) ? 
+                       (~tlu_hpstate_enb[2] | tlu_hpstate_priv[2] | trp_lvl_gte_maxstl[2]):
+                       (~tlu_hpstate_enb[3] | tlu_hpstate_priv[3] | trp_lvl_gte_maxstl[3])));
+*/
+//
+assign tlu_trap_to_hyper_g =  
+           (true_trap_tid_g[1:0] == 2'b00) ? 
+               (~tlu_hpstate_enb[0] | tlu_hpstate_priv[0] | 
+                 trp_lvl_gte_maxstl[0] | (tlz_trap_g[0] & inst_vld_g)):
+              ((true_trap_tid_g[1:0] == 2'b01) ? 
+                   (~tlu_hpstate_enb[1] | tlu_hpstate_priv[1] | 
+                     trp_lvl_gte_maxstl[1] | (tlz_trap_g[1] & inst_vld_g)):
+                  ((true_trap_tid_g[1:0] == 2'b10) ? 
+                       (~tlu_hpstate_enb[2] | tlu_hpstate_priv[2] | 
+                         trp_lvl_gte_maxstl[2] | (tlz_trap_g[2] & inst_vld_g)):
+                       (~tlu_hpstate_enb[3] | tlu_hpstate_priv[3] | 
+                         trp_lvl_gte_maxstl[3] | (tlz_trap_g[3] & inst_vld_g))));
+// added for timing
+dffr_s dffr_tlu_tlu_trap_to_hyper_w2 (
+    .din (tlu_trap_to_hyper_g),
+    .q   (tlu_trap_to_hyper_w2),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+                          
+// recoded for timing
+assign select_tba_element_w2[0] =
+           ~(tlu_trap_to_hyper_w2 | lsu_defr_trap_w2) & tlu_early_priv_element_w2[0];
+assign select_tba_element_w2[1] =
+           ~tlu_trap_to_hyper_w2 & (|tlu_early_priv_element_w2[2:1]) & ~lsu_defr_trap_w2;
+assign local_select_tba_w2 = 
+           ~tlu_trap_to_hyper_w2 & (tlu_priv_traps_w2 & ~lsu_defr_trap_w2); 
+assign tdp_select_tba_w2 = local_select_tba_w2;
+assign tlu_select_tba_w2 = 
+           select_tba_element_w2[1] | (select_tba_element_w2[0] & ~lsu_ttype_vld_w2);
+           
+/*
+dffr_s dffr_tlu_select_tba_w2 (
+    .din (select_tba_g),
+    .q   (tlu_select_tba_w2),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+//
+// added for bug 2064 and modified for bug 2165
+// modified for bug3719
+assign early_priv_traps_g = 
+           ((early_ttype_g[8:4] == 5'b00001)    & (|early_ttype_g[3:0])) | 
+           ((early_ttype_g[8:4] == 5'b00100)    & (|early_ttype_g[3:0])) | 
+           ((early_ttype_g[8:4] == 5'b00010)    & ~(early_ttype_g[3] & early_ttype_g[0]))|  
+           ((early_ttype_g[8:2] == 7'b0011000)  & (early_ttype_g[1] ^ early_ttype_g[0])) | 
+           ((early_ttype_g[8:4] == 5'b00111)    & (early_ttype_g[3:2]== 2'b11))       | 
+           (early_ttype_g[8] & ~early_ttype_g[7]) | (early_ttype_g[7] & ~early_ttype_g[8]) |
+           (pib_wrap_trap_g & ~(|tlz_trap_g[`TLU_THRD_NUM-1:0]) & inst_vld_g) | 
+           (swint_g & ~(|tlz_trap_g[`TLU_THRD_NUM-1:0]) & inst_vld_g); 
+
+assign exu_hyper_traps_g = 
+           exu_ttype_vld_g & ((early_ttype_g[8:0] == 9'h029) | (early_ttype_g[8:0] == 9'h034));
+
+//
+// modified for timing
+
+assign tlu_early_priv_element_g[0] = 
+          early_priv_traps_g & early_ttype_sel[2]; 
+assign tlu_early_priv_element_g[1] = 
+           early_priv_traps_g & ~early_ttype_sel[2];
+// modified for bug 4431, 4443
+assign tlu_early_priv_element_g[2] = 
+          lsu_tlu_wtchpt_trp_g & ~(misalign_addr_jmpl_rtn_g | misalign_addr_ldst_atm_g | 
+          ifu_ttype_vld_g | exu_hyper_traps_g | lsu_tlu_priv_action_g);
+//          lsu_tlu_wtchpt_trp_g & ~(lsu_tlu_priv_violtn_g | misalign_addr_jmpl_rtn_g |
+//
+// modified for added for timing
+dffr_s #(3) dffr_tlu_early_priv_element_w2 (
+    .din (tlu_early_priv_element_g[2:0]),
+	.q   (tlu_early_priv_element_w2[2:0]),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_priv_traps_w2 =
+            tlu_early_priv_element_w2[0] & ~lsu_ttype_vld_w2 |
+            tlu_early_priv_element_w2[1] |
+            tlu_early_priv_element_w2[2]; 
+
+dffr_s dffr_tlu_self_boot_rst_w2 (
+    .din (tlu_self_boot_rst_g),
+	.q   (tlu_self_boot_rst_w2),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//=========================================================================================
+//	Generate TSA Control and Data
+//=========================================================================================
+
+// MODIFY : keep 2b tid
+// added for tsa_wr_tid bug
+// modified for hypervisor support  and logic loop 
+// modified for timing
+//
+assign tsa_wr_tid_sel_g = 
+           wsr_inst_g_unflushed & inst_vld_g & (tstate_rw_g | tpc_rw_g | 
+           tnpc_rw_g  | ttype_rw_g | tlu_htstate_rw_g); 
+
+// added for timing
+
+assign tsa_wr_tid_sel_tim_g = 
+           (((wsr_inst_g & (tstate_rw_g | tpc_rw_g | 
+            tnpc_rw_g  | ttype_rw_g | tlu_htstate_rw_g)) |
+           ((retry_inst_g | done_inst_g) & cwp_fastcmplt_g)) &
+            inst_vld_g) | sync_trap_taken_g ; 
+
+dffr_s dffr_tsa_wr_tid_sel_w2 (
+    .din (tsa_wr_tid_sel_tim_g),
+	.q   (tsa_wr_tid_sel_w2),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+/*
+assign tsa_wr_tid_sel_w2 = 
+           (((wsr_inst_w2 & (tstate_rw_w2 | tpc_rw_w2 | 
+            tnpc_rw_w2  | ttype_rw_w2 | htstate_rw_w2)) |
+           ((retry_inst_w2 | done_inst_w2) & cwp_fastcmplt_w2)) &
+            inst_vld_w2) | sync_trap_taken_w2 ; 
+*/
+//
+// added for timing
+assign	thrid_w2[0] = thread1_wsel_w2 | thread3_wsel_w2;
+assign	thrid_w2[1] = thread2_wsel_w2 | thread3_wsel_w2;
+//
+// 
+// modified for bug 4403
+/*
+mux2ds #(2) mx_tsa_wr_tid (
+    .in0  (pend_trap_tid_w2[1:0]),
+	.in1  (thrid_w2[1:0]),
+    .sel0 (~tsa_wr_tid_sel_w2),  		
+    .sel1 (tsa_wr_tid_sel_w2),
+    .dout (tsa_wr_tid[1:0])
+);
+*/
+// modified for bug 4403 dn 4443
+assign tsa_wr_tid[1:0] = 
+           (tsa_wr_tid_sel_w2 & lsu_defr_trap_w2) ? true_trap_tid_w2[1:0]:
+           ((tsa_wr_tid_sel_w2 & ~lsu_defr_trap_w2)? thrid_w2[1:0] :
+             pend_trap_tid_w2[1:0]);
+
+// tsa should not be written by certain resets. May have to extend to wrm etc. !!!
+// modified due to the swap of memory from tlu_tsa -> bw_r_rf32x144 -> 2x bw_r_rf32x80
+// modified for bug 3384
+assign	tsa_wr_vld[0] = 	
+             trap_taken_w2 | local_rst |               // a thread traps
+			((tpc_rw_w2 | tstate_rw_w2) & wsr_inst_w2); // wrpr-tsa
+
+assign	tsa_wr_vld[1] = 	
+ 			 trap_taken_w2 | local_rst   | 		        // a thread traps
+			((tnpc_rw_w2   | ttype_rw_w2 | 
+             htstate_rw_w2) & wsr_inst_w2); // wrpr-tsa
+// 
+// modified due to timing all w stage signals have been moved to w2	
+assign	tsa_pc_en  	    = 	tpc_rw_w2  	 | trap_taken_w2;
+assign	tsa_npc_en  	= 	tnpc_rw_w2 	 | trap_taken_w2;
+assign	tsa_tstate_en  	= 	tstate_rw_w2 | trap_taken_w2;
+assign	tsa_ttype_en  	= 	ttype_rw_w2  | trap_taken_w2 | local_rst;
+//
+// added for hypervisor support
+assign	tsa_htstate_en  = 	htstate_rw_w2 | trap_taken_w2;
+
+// Should all these regs enable a read of the tsa ?
+assign	tsa_rd_vld = 	ifu_tlu_done_inst_d | ifu_tlu_retry_inst_d | // done/retry
+			(tpc_rw_d | tnpc_rw_d | tstate_rw_d | ttype_rw_d |
+			 // tick_rw_d | tba_rw_d | pstate_rw_d | tl_rw_d    |
+             tlu_htstate_rw_d) & ifu_tlu_rsr_inst_d; // rdpr-tsa
+//
+// added for timing
+dff_s dff_tsa_rd_vld_e ( 
+    .din (tsa_rd_vld),
+	.q   (tsa_rd_vld_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// added for timing
+assign	tsa_rd_en = ifu_tlu_done_inst_d | ifu_tlu_retry_inst_d | // done/retry
+                    (~(|sraddr2[4:2]) & ifu_tlu_rsr_inst_d); // rdpr-tsa
+// 
+dff_s #(`TLU_THRD_NUM) dff_thread_wsel_w2 (
+    .din ({thread3_wsel_g, thread2_wsel_g, thread1_wsel_g, thread0_wsel_g}),
+    .q   ({thread3_wsel_w2, thread2_wsel_w2, thread1_wsel_w2, thread0_wsel_w2}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign	tlu_thread_wsel_g[0] = thread0_rsel_dec_g; 
+assign	tlu_thread_wsel_g[1] = thread1_rsel_dec_g; 
+assign	tlu_thread_wsel_g[2] = thread2_rsel_dec_g; 
+assign	tlu_thread_wsel_g[3] = thread3_rsel_dec_g; 
+//
+// Added for tsa_wr_tid bug
+//
+assign	thread0_wtrp_w2 = ~tsa_wr_tid[1] & ~tsa_wr_tid[0];
+assign	thread1_wtrp_w2 = ~tsa_wr_tid[1] &  tsa_wr_tid[0];
+assign	thread2_wtrp_w2 =  tsa_wr_tid[1] & ~tsa_wr_tid[0];
+assign	thread3_wtrp_w2 =  tsa_wr_tid[1] &  tsa_wr_tid[0];
+
+// write uses trp-lvl after increment.
+mux4ds #(3) tsawthrd (
+   .in0  (trp_lvl0_new[2:0]),
+   .in1  (trp_lvl1_new[2:0]),
+   .in2  (trp_lvl2_new[2:0]),
+   .in3  (trp_lvl3_new[2:0]),
+   .sel0 (thread0_wtrp_w2),
+   .sel1 (thread1_wtrp_w2),
+   .sel2 (thread2_wtrp_w2),
+   .sel3 (thread3_wtrp_w2),
+   .dout (tsa_wr_tpl[2:0])
+); 
+
+// rd use trp-lvl prior to decrement.
+mux4ds  #(3) tsarthrd (
+        .in0    (trp_lvl0[2:0]),
+        .in1    (trp_lvl1[2:0]),
+        .in2    (trp_lvl2[2:0]),
+        .in3    (trp_lvl3[2:0]),
+        .sel0   (thread0_rsel_d),
+        .sel1   (thread1_rsel_d),
+        .sel2   (thread2_rsel_d),
+        .sel3   (thread3_rsel_d),
+        .dout   (tsa_rd_tpl[2:0])
+); 
+
+assign	tsa_rd_tid[1:0]  = thrid_d[1:0];
+
+//=========================================================================================
+//      TT initial state
+//=========================================================================================
+
+// The initial state of TT should be 1 on por. Since this is required for 4 thread,
+// it will be difficult to do this thru a write to the tsa while reset is occuring.
+// Instead a bit will be used to mark whether the tt for a thread has been written to.
+// If it hasn't then a '1' has to be inserted into the 
+dff_s dff_rst_d1 (
+    .din (local_rst),
+    .q   (reset_d1),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign  tt_init_en =  reset_d1 & tlu_rst_l;
+//
+// modified for timing all g stage signals have been move to w2
+assign  tt_init_rst[0] =  
+            local_rst | (tsa_ttype_en & (|tsa_wr_vld[1:0]) & thread0_wtrp_w2);
+assign  tt_init_rst[1] =  
+            local_rst | (tsa_ttype_en & (|tsa_wr_vld[1:0]) & thread1_wtrp_w2);
+assign  tt_init_rst[2] =  
+            local_rst | (tsa_ttype_en & (|tsa_wr_vld[1:0]) & thread2_wtrp_w2);
+assign  tt_init_rst[3] =  
+            local_rst | (tsa_ttype_en & (|tsa_wr_vld[1:0]) & thread3_wtrp_w2);
+
+assign	lsu_tlu_rsr_data_mod_e[7:0] = ttype_unwritten_sel ? 8'b0000_0001 : lsu_tlu_rsr_data_e[7:0];
+
+dffre_s dffre_tt_init0  (
+    .din (tt_init_en), 
+    .q   (tt_unwritten[0]),
+    .rst (tt_init_rst[0]),
+    .en  (tt_init_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_tt_init1  (
+    .din (tt_init_en), 
+    .q   (tt_unwritten[1]),
+    .rst (tt_init_rst[1]),
+    .en  (tt_init_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_tt_init2  (
+    .din (tt_init_en), 
+    .q   (tt_unwritten[2]),
+    .rst (tt_init_rst[2]),
+    .en  (tt_init_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffre_tt_init3  (
+    .din (tt_init_en), 
+    .q   (tt_unwritten[3]),
+    .rst (tt_init_rst[3]),
+    .en  (tt_init_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+//	Decode SR Addr
+//=========================================================================================
+
+// **Exceptions for Write/Reads of Privileged/State Register**
+// WRPR:
+// - Access to reserved rd fields will cause exception. Done by IFU.
+// - A write to TPC, TNPC, TT or TSTATE when the trap level is zero
+// (TL=0) causes an illegal_instruction exception.
+// - privileged opcode. Use privilege bit in sraddr for exception.
+// WRSR :
+// - privileged opcode. wrasr only - implementation dependent.
+// - illegal inst - done by IFU.
+// RDPR :
+// - A read from TPC, TNPC, TT or TSTATE when the trap level is zero
+// (TL=0) causes an illegal_instruction exception.
+// - Access to reserved rs1 fields causes an illegal_inst exception.
+// - privileged opcode.
+// RDSR :
+// - privileged opcode. rdasr only - implementation dependent.
+// - Access to reserved rs1 fields causes an illegal_inst exception.
+assign	sraddr[`TLU_ASR_ADDR_WIDTH-1:0]	=	
+            ifu_tlu_sraddr_d[`TLU_ASR_ADDR_WIDTH-1:0];
+assign	sraddr2[`TLU_ASR_ADDR_WIDTH-1:0] =	
+            sraddr[`TLU_ASR_ADDR_WIDTH-1:0];
+//
+// added for hypervisor support
+assign  asr_hyperp  =   sraddr2[6];
+assign  asr_priv    =   sraddr2[5]; 
+
+assign	stickcmp_rw_d 	=  sraddr2[4] & sraddr2[3] &  ~sraddr2[2] &  ~sraddr2[1] &  sraddr2[0] &
+			~asr_priv;	// 
+assign	stick_rw_d = sraddr2[4] & sraddr2[3] &  ~sraddr2[2] & ~sraddr2[1] & ~sraddr2[0]; 
+
+assign	tpc_rw_d 	= ~sraddr[4] & ~sraddr[3] & ~sraddr[2] & ~sraddr[1] & ~sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	tnpc_rw_d 	= ~sraddr[4] & ~sraddr[3] & ~sraddr[2] & ~sraddr[1] &  sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	tstate_rw_d 	= ~sraddr[4] & ~sraddr[3] & ~sraddr[2] &  sraddr[1] & ~sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	ttype_rw_d 	= ~sraddr[4] & ~sraddr[3] & ~sraddr[2] &  sraddr[1] &  sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+
+// stick and tick are refering to the same register.  
+// - privileged action - rdtick only.
+assign	tick_rw_d 	= ((~sraddr2[4] & ~sraddr2[3] &  sraddr2[2] & ~sraddr2[1] & ~sraddr2[0]) | 
+                        stick_rw_d) & ~asr_hyperp; // =1 ; privileged.
+// 
+// modified for bug 1293
+// qualified with the rsr read
+assign	tick_npriv_r_d = (~sraddr2[4] & ~sraddr2[3] &  sraddr2[2] & ~sraddr2[1] & ~sraddr2[0] |
+                          stick_rw_d) & ~asr_priv & ifu_tlu_rsr_inst_d;	// =0; non-privileged.
+
+assign	tickcmp_rw_d 	=  sraddr2[4] & ~sraddr2[3] &  sraddr2[2] &  sraddr2[1] &  sraddr2[0] &
+			~asr_priv;	// 
+assign	tba_rw_d 	= ~sraddr[4] & ~sraddr[3] &  sraddr[2] & ~sraddr[1] &  sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	pstate_rw_d 	= ~sraddr[4] & ~sraddr[3] &  sraddr[2] &  sraddr[1] & ~sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	tl_rw_d 	= ~sraddr[4] & ~sraddr[3] &  sraddr[2] &  sraddr[1] &  sraddr[0] &
+			asr_priv;	// =1 ; privileged.
+assign	pil_rw_d 	= ~sraddr2[4] &  sraddr2[3] & ~sraddr2[2] & ~sraddr2[1] & ~sraddr2[0] &
+			asr_priv;	// =1 ; privileged.
+assign	set_sftint_d 	=  sraddr2[4] &  ~sraddr2[3] & sraddr2[2] & ~sraddr2[1] & ~sraddr2[0] &
+                           ~(asr_priv | asr_hyperp);
+assign	clr_sftint_d 	=  sraddr2[4] &  ~sraddr2[3] & sraddr2[2] & ~sraddr2[1] &  sraddr2[0] & 
+                           ~(asr_priv | asr_hyperp);
+assign	sftint_rg_rw_d  =  sraddr2[4] &  ~sraddr2[3] & sraddr2[2] &  sraddr2[1] & ~sraddr2[0] &
+                           ~(asr_priv | asr_hyperp);
+//
+// pib register decodes
+assign pcr_rsr_d = 
+           (sraddr[`TLU_ASR_ADDR_WIDTH-1:0] == `PCR_ASR_ADDR); 
+assign pic_rsr_d = 
+           ((sraddr[`TLU_ASR_ADDR_WIDTH-1:0] == `PIC_ASR_PRIV_ADDR) |
+            (sraddr[`TLU_ASR_ADDR_WIDTH-1:0] == `PIC_ASR_NPRIV_ADDR));
+
+// Bug 818 fix: The qualification to sraddr[5] is removed due to the sftint and tick_cmp registers
+// are priveledged write state registers and not priveledged registers, therefore, the sraddr[5] is 
+// not asserted for these
+// modified due to timing
+// assign	wsr_inst_d	= ifu_tlu_wsr_inst_d; 
+//
+// added for bug 1293
+          
+// Stage to E1.
+
+dff_s dff_tpc_rw_e (
+    .din (tpc_rw_d),
+    .q   (tpc_rw_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tnpc_rw_e (
+    .din (tnpc_rw_d),
+    .q   (tnpc_rw_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tstate_rw_e (
+    .din (tstate_rw_d),
+    .q   (tstate_rw_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_ttype_rw_e (
+    .din (ttype_rw_d),
+    .q   (ttype_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tick_rw_e (
+    .din (tick_rw_d),
+    .q   (tick_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tick_npriv_r_e (
+    .din (tick_npriv_r_d),
+    .q   (tick_npriv_r_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tickcmp_rw_e (
+    .din (tickcmp_rw_d),
+    .q   (tickcmp_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tba_rw_e (
+    .din (tba_rw_d),
+    .q   (tba_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_pstate_rw_e (
+    .din (pstate_rw_d),
+    .q   (pstate_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tl_rw_d_e (
+    .din (tl_rw_d),
+    .q   (tl_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_pil_rw_d_e (
+    .din (pil_rw_d),
+    .q   (pil_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_set_sftint_e (
+    .din (set_sftint_d),
+    .q   (set_sftint_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_clr_sftint_e (
+    .din (clr_sftint_d),
+    .q   (clr_sftint_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_sftint_rg_rw_e (
+    .din (sftint_rg_rw_d),
+    .q   (sftint_rg_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pcr_rsr_e (
+    .din (pcr_rsr_d),
+    .q   (pcr_rsr_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_pic_rsr_e (
+    .din (pic_rsr_d),
+    .q   (pic_rsr_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// modified due to timing
+/*
+dff_s dff_wsr_inst_d_e (
+    .din (wsr_inst_d),
+    .q   (wsr_inst_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+assign	wsr_inst_e	= lsu_tlu_wsr_inst_e; 
+
+dff_s dff_stickcmp_rw_e (
+    .din (stickcmp_rw_d),
+    .q   (stickcmp_rw_e),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+// Stage to E2.
+
+dff_s dff_tpc_rw_m (
+   .din (tpc_rw_e),
+   .q   (tpc_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tnpc_rw_m (
+   .din (tnpc_rw_e),
+   .q   (tnpc_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tstate_rw_m (
+    .din (tstate_rw_e),
+    .q   (tstate_rw_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_ttype_rw_m (
+   .din (ttype_rw_e),
+   .q   (ttype_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tick_rw_m (
+   .din (tick_rw_e),
+   .q   (tick_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tick_npriv_r_m (
+   .din (tick_npriv_r_e),
+   .q   (tick_npriv_r_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tickcmp_rw_m (
+   .din (tickcmp_rw_e),
+   .q   (tickcmp_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+//
+// added for timing - moved from hypervisor
+dff_s dff_htickcmp_rw_m_m (
+    .din (tlu_htickcmp_rw_e),
+    .q   (htickcmp_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tba_rw_m (
+   .din (tba_rw_e),
+   .q   (tba_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_pstate_rw_m (
+   .din (pstate_rw_e),
+   .q   (pstate_rw_m),
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so  ()
+);
+						
+dff_s dff_tl_rw_m (
+    .din (tl_rw_e),
+    .q   (tl_rw_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_pil_rw_m (
+    .din (pil_rw_e),
+    .q   (pil_rw_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_set_sftint_m (
+    .din (set_sftint_e),
+    .q   (set_sftint_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_clr_sftint_m (
+    .din (clr_sftint_e),
+    .q   (clr_sftint_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_sftint_rg_rw_m (
+    .din (sftint_rg_rw_e),
+    .q   (sftint_rg_rw_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_wsr_inst_m (
+    .din (wsr_inst_e),
+    .q   (wsr_inst_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// added for hypervisor support
+dff_s dff_stickcmp_rw_m (
+    .din (stickcmp_rw_e),
+    .q   (stickcmp_rw_m),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_tpc_rw_g (
+    .din (tpc_rw_m),
+    .q   (tpc_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tnpc_rw_g (
+    .din (tnpc_rw_m),
+    .q   (tnpc_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tstate_rw_g (
+    .din (tstate_rw_m),
+    .q   (tstate_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_ttype_rw_g (
+    .din (ttype_rw_m),
+    .q   (ttype_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tick_rw_g (
+    .din (tick_rw_m),
+    .q   (tick_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tick_npriv_r_g (
+     .din (tick_npriv_r_m),
+     .q   (tick_npriv_r_g),
+     .clk (clk),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+						
+dff_s dff_tickcmp_rw_g (
+     .din (tickcmp_rw_m),
+     .q   (tickcmp_rw_g),
+     .clk (clk),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+//
+// added for timing - moved form hyperv
+dff_s dff_htickcmp_rw_m_g (
+    .din (htickcmp_rw_m),
+    .q   (htickcmp_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+						
+dff_s dff_tba_rw_g (
+     .din (tba_rw_m),
+     .q   (tba_rw_g),
+     .clk (clk),
+     .se  (se),
+     .si  (),
+     .so  ()
+);
+						
+dff_s dff_pstate_rw_g (
+    .din (pstate_rw_m),
+    .q   (pstate_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_pstate_rw_w2 (
+    .din (pstate_rw_g),
+    .q   (pstate_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_tl_rw_g (
+    .din (tl_rw_m),
+    .q   (tl_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_tl_rw_w2 (
+    .din (tl_rw_g),
+    .q   (tl_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_pil_rw_g (
+    .din (pil_rw_m),
+    .q   (pil_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_tpc_rw_w2 (
+    .din (tpc_rw_g),
+    .q   (tpc_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tnpc_rw_w2 (
+    .din (tnpc_rw_g),
+    .q   (tnpc_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_tstate_rw_w2 (
+    .din (tstate_rw_g),
+    .q   (tstate_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_ttype_rw_w2 (
+    .din (ttype_rw_g),
+    .q   (ttype_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_htstate_rw_w2 (
+    .din (tlu_htstate_rw_g),
+    .q   (htstate_rw_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_set_sftint_g (
+    .din (set_sftint_m),
+    .q   (set_sftint_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_clr_sftint_g (
+    .din (clr_sftint_m),
+    .q   (clr_sftint_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_sftint_rg_rw_g (
+    .din (sftint_rg_rw_m),
+    .q   (sftint_rg_rw_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+						
+dff_s dff_wsr_inst_g (
+    .din (wsr_inst_m),
+    .q   (wsr_inst_g_unflushed),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_wsr_inst_w2 (
+    .din (wsr_inst_g),
+    .q   (wsr_inst_w2),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dff_s dff_tlu_gl_rw_g (
+    .din (tlu_gl_rw_m),
+    .q   (tlu_gl_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//
+// added for hypervisor support
+dff_s dff_stickcmp_rw_g (
+    .din (stickcmp_rw_m),
+    .q   (stickcmp_rw_g),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// modified due to timing violations						
+// assign	wsr_inst_g = wsr_inst_g_unflushed & ~tlu_ifu_flush_pipe_w & inst_vld_g;
+assign	wsr_inst_g = wsr_inst_g_unflushed & ~local_early_flush_pipe_w & inst_vld_g;
+assign tlu_wsr_inst_nq_g = 
+           wsr_inst_g_unflushed & ~local_early_flush_pipe_w & inst_vld_nf_g;
+
+
+//=========================================================================================
+//	TICK/TICK-CMP RELATED
+//=========================================================================================
+
+wire	[1:0]	tckctr;
+wire	[1:0]	tckctr_in;
+
+// modified due to swapping in the incr64 soft macro
+// assign tckctr_incr = tckctr + 1;
+assign tckctr_in[1:0] = tlu_tckctr_in[1:0]; 
+assign tlu_incr_tick[1:0] = tckctr[1:0]; 
+
+dffr_s #(2) dffr_tckctr_cnt (
+    .din (tckctr_in[1:0]), 
+    .q (tckctr[1:0]),
+    .rst (local_rst | ~tlu_tick_en_l), 
+    .clk (clk),
+    .se  (se),  
+    .si  (), 
+    .so ()
+);
+
+// 3rd cycle, increment tick reg.
+// assign	tlu_incr_tick = tckctr[1] & tckctr[0];
+
+assign	tlu_tickcmp_sel[0] = ~tckctr[1] & ~tckctr[0];
+assign	tlu_tickcmp_sel[1] = ~tckctr[1] &  tckctr[0];
+assign	tlu_tickcmp_sel[2] =  tckctr[1] & ~tckctr[0];
+assign	tlu_tickcmp_sel[3] =  tckctr[1] &  tckctr[0];
+
+// TICK.NPT
+
+// reset should not be needed in this equation !!!
+assign	tick_ctl_din = tlu_wsr_data_b63_w | local_rst | por_rstint_g;
+assign  tlu_tick_ctl_din = tick_ctl_din;
+
+dffe_s dffe_npt0 (
+    .din (tick_ctl_din), 
+    .q   (tick_npt0),
+    .en  (tick_en[0]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s dffe_npt1 (
+    .din (tick_ctl_din), 
+    .q   (tick_npt1),
+    .en  (tick_en[1]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s dffe_npt2 (
+    .din (tick_ctl_din), 
+    .q   (tick_npt2),
+    .en  (tick_en[2]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s dffe_npt3 (
+    .din (tick_ctl_din), 
+    .q   (tick_npt3),
+    .en  (tick_en[3]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign	tlu_tick_npt =
+	(thread0_rsel_e & tick_npt0) |
+		(thread1_rsel_e & tick_npt1) |
+			(thread2_rsel_e & tick_npt2) |
+				(thread3_rsel_e & tick_npt3);
+
+assign	tick_npt_priv_act_g = 
+	(tick_npriv_r_g & ~ifu_ttype_vld_tmp_g) &
+	   ((tick_npt0 & thread0_rsel_g & tlu_none_priv[0]) |
+		(tick_npt1 & thread1_rsel_g & tlu_none_priv[1]) |
+		(tick_npt2 & thread2_rsel_g & tlu_none_priv[2]) |
+		(tick_npt3 & thread3_rsel_g & tlu_none_priv[3])); 
+//
+// added for timing fix
+assign	tick_npt_priv_act_m = 
+	 (tick_npriv_r_m & ~ifu_ttype_vld_m) &
+	((tick_npt0 & thread0_rsel_m & tlu_none_priv[0]) |
+	 (tick_npt1 & thread1_rsel_m & tlu_none_priv[1]) |
+	 (tick_npt2 & thread2_rsel_m & tlu_none_priv[2]) |
+	 (tick_npt3 & thread3_rsel_m & tlu_none_priv[3])); 
+
+assign	exu_tick_npt_priv_act_m = 
+	  tick_npriv_r_m &
+	((tick_npt0 & thread0_stg_m_buf & tlu_none_priv[0]) |
+	 (tick_npt1 & thread1_stg_m_buf & tlu_none_priv[1]) |
+	 (tick_npt2 & thread2_stg_m_buf & tlu_none_priv[2]) |
+	 (tick_npt3 & thread3_stg_m_buf & tlu_none_priv[3])); 
+
+//=========================================================================================
+//	Soft Interrupt Control
+//=========================================================================================
+
+wire	[1:0]	sftintctr;
+wire	[1:0]	sftintctr_incr;
+
+assign sftintctr_incr[1:0] = sftintctr[1:0] + 2'b01;
+
+dffr_s #(2) dffr_sftint_cnt  (
+   .din (sftintctr_incr[1:0]), 
+   .q (sftintctr[1:0]),
+   .rst (local_rst), 
+   .clk (clk),
+   .se  (se),
+   .si  (),
+   .so ()
+);
+// 
+// modified for bug 4626 and 5117 
+/*
+assign swint_nq_g = swint_g; 
+assign swint_thrd_g[0]= swint_nq_g & thread0_rsel_g & tlu_int_pstate_ie[0];
+assign swint_thrd_g[1]= swint_nq_g & thread1_rsel_g & tlu_int_pstate_ie[1];
+assign swint_thrd_g[2]= swint_nq_g & thread2_rsel_g & tlu_int_pstate_ie[2];
+// assign swint_thrd_g[3]= swint_nq_g & thread3_rsel_g & tlu_int_pstate_ie[3];
+*/
+
+assign sftint_user_update_g = 
+       clr_sftint_g | sftint_rg_rw_g;
+
+dffr_s dffr_sftint_user_update_w2 (
+    .din (sftint_user_update_g),
+	.q   (sftint_user_update_w2),
+    .clk (clk),
+    .rst (local_rst), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign penc_sel_user_update = sftint_user_update_w2 & ~swint_g;
+
+assign sftint_penc_update = sftint_user_update_w2 | swint_g; 
+
+assign sftint_penc_thrd[0]= 
+       (swint_g & thread0_rsel_g) | (penc_sel_user_update & thread0_wsel_w2);
+assign sftint_penc_thrd[1]= 
+       (swint_g & thread1_rsel_g) | (penc_sel_user_update & thread1_wsel_w2);
+assign sftint_penc_thrd[2]= 
+       (swint_g & thread2_rsel_g) | (penc_sel_user_update & thread2_wsel_w2);
+
+assign	tlu_sftint_penc_sel[0] = 
+            ((~sftintctr[1] & ~sftintctr[0] & ~sftint_penc_update) | 
+             sftint_penc_thrd[0]) & ~rst_tri_en;
+assign	tlu_sftint_penc_sel[1] = 
+            ((~sftintctr[1] &  sftintctr[0] & ~sftint_penc_update) | 
+             sftint_penc_thrd[1]) & ~rst_tri_en;
+assign	tlu_sftint_penc_sel[2] = 
+            (( sftintctr[1] & ~sftintctr[0] & ~sftint_penc_update) | 
+             sftint_penc_thrd[2]) & ~rst_tri_en;
+//
+// added for bug 5117
+
+assign sftint_wait_rst[0] = 
+           sftint_pend_wait[0] & tlu_sftint_penc_sel[0]; 
+assign sftint_wait_rst[1] = 
+           sftint_pend_wait[1] & tlu_sftint_penc_sel[1]; 
+assign sftint_wait_rst[2] = 
+           sftint_pend_wait[2] & tlu_sftint_penc_sel[2]; 
+assign sftint_wait_rst[3] = 
+           sftint_pend_wait[3] & tlu_sftint_penc_sel[3]; 
+
+dffr_s dffr_sftint_pend_wait_0 (
+    .din (sftint_user_update_g & thread0_rsel_dec_g),
+	.q   (sftint_pend_wait[0]),
+    .clk (clk),
+    .rst (local_rst | sftint_wait_rst[0]), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+dffr_s dffr_sftint_pend_wait_1 (
+    .din (sftint_user_update_g & thread1_rsel_dec_g),
+	.q   (sftint_pend_wait[1]),
+    .clk (clk),
+    .rst (local_rst | sftint_wait_rst[1]), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_sftint_pend_wait_2 (
+    .din (sftint_user_update_g & thread2_rsel_dec_g),
+	.q   (sftint_pend_wait[2]),
+    .clk (clk),
+    .rst (local_rst | sftint_wait_rst[2]), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffr_s dffr_sftint_pend_wait_3 (
+    .din (sftint_user_update_g & thread3_rsel_dec_g),
+	.q   (sftint_pend_wait[3]),
+    .clk (clk),
+    .rst (local_rst | sftint_wait_rst[3]), 
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+/*
+assign	tlu_sftint_penc_sel[0] = 
+            (~sftintctr[1] & ~sftintctr[0] & ~swint_nq_g) | swint_thrd_g[0];
+assign	tlu_sftint_penc_sel[1] = 
+            (~sftintctr[1] &  sftintctr[0] & ~swint_nq_g) | swint_thrd_g[1];
+assign	tlu_sftint_penc_sel[2] = 
+            ( sftintctr[1] & ~sftintctr[0] & ~swint_nq_g) | swint_thrd_g[2];
+//
+*/
+// 
+// modified for one-hot problem
+assign	tlu_sftint_penc_sel[3] =
+            ~(|tlu_sftint_penc_sel[2:0]);
+// assign	tlu_sftint_penc_sel[3] =
+//             ( sftintctr[1] &  sftintctr[0] & ~swint_nq_g) | swint_thrd_g[3];
+/*
+assign	tlu_sftint_penc_sel[0] = ~sftintctr[1] & ~sftintctr[0];
+assign	tlu_sftint_penc_sel[1] = ~sftintctr[1] &  sftintctr[0];
+assign	tlu_sftint_penc_sel[2] =  sftintctr[1] & ~sftintctr[0];
+assign	tlu_sftint_penc_sel[3] =  sftintctr[1] &  sftintctr[0];
+*/
+
+//  Flop sftint values on a per thread basis.
+dffe_s #(4) dffe_sftint_id0  (
+    .din (tlu_sftint_id[3:0]), 
+    .q   (sftint0_id[3:0]),
+    .en  (tlu_sftint_penc_sel[0]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(4) dffe_sftint_id1  (
+    .din (tlu_sftint_id[3:0]), 
+    .q   (sftint1_id[3:0]),
+    .en  (tlu_sftint_penc_sel[1]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(4) dffe_sftint_id2  (
+    .din (tlu_sftint_id[3:0]), 
+    .q   (sftint2_id[3:0]),
+    .en  (tlu_sftint_penc_sel[2]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+dffe_s #(4) dffe_sftint_id3  (
+    .din (tlu_sftint_id[3:0]), 
+    .q   (sftint3_id[3:0]),
+    .en  (tlu_sftint_penc_sel[3]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+// Soft Int Control
+// modified to fix one-hot problem
+assign	tlu_set_sftint_l_g   =  ~(set_sftint_g) | rst_tri_en; 
+assign	tlu_clr_sftint_l_g   =  ~(clr_sftint_g) | rst_tri_en;
+assign	tlu_wr_sftint_l_g    =  ~(sftint_rg_rw_g) | rst_tri_en;
+// modified for timing
+/*
+assign	tlu_set_sftint_l_g   =  ~(set_sftint_g & wsr_inst_g);
+assign	tlu_clr_sftint_l_g   =  ~(clr_sftint_g & wsr_inst_g);
+assign	tlu_wr_sftint_l_g    =  ~(sftint_rg_rw_g & wsr_inst_g);
+*/
+
+// modified for pib support
+//
+assign tlu_sftint_en_l_g[0] = 
+           ~((set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+              wsr_inst_g & thread0_rsel_dec_g) & tlu_rst_l;
+assign tlu_sftint_en_l_g[1] = 
+           ~((set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+              wsr_inst_g & thread1_rsel_dec_g) & tlu_rst_l;
+assign tlu_sftint_en_l_g[2] = 
+           ~((set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+              wsr_inst_g & thread2_rsel_dec_g) & tlu_rst_l;
+assign tlu_sftint_en_l_g[3] = 
+           ~((set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+              wsr_inst_g & thread3_rsel_dec_g) & tlu_rst_l;
+
+// added for one-hot mux bug
+// modified for timing
+assign tlu_sftint_mx_sel[0] = 
+           ~(|tlu_sftint_mx_sel[3:1]); 
+assign tlu_sftint_mx_sel[1] = 
+           (set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+            thread1_rsel_dec_g & ~rst_tri_en; 
+assign tlu_sftint_mx_sel[2] = 
+           (set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+            thread2_rsel_dec_g & ~rst_tri_en; 
+assign tlu_sftint_mx_sel[3] = 
+           (set_sftint_g | clr_sftint_g | sftint_rg_rw_g) & 
+            thread3_rsel_dec_g & ~rst_tri_en; 
+//
+// determine whether there is a pending sftint interrupt for each thread
+//
+assign tlu_int_sftint_pend[0] = |(sftint0_id[3:0]) & ~sftint_pend_wait[0];
+assign tlu_int_sftint_pend[1] = |(sftint1_id[3:0]) & ~sftint_pend_wait[1];
+assign tlu_int_sftint_pend[2] = |(sftint2_id[3:0]) & ~sftint_pend_wait[2];
+assign tlu_int_sftint_pend[3] = |(sftint3_id[3:0]) & ~sftint_pend_wait[3];
+
+// if there is no existing sft interrupt, then sftint_id = 0, and vld would never be asserted. 
+// this is why a 15b vector has been encoded as a 16b vector.
+// modified for hypervisor support
+
+// fix for bug 7027
+/*
+assign sftint_only_vld[0] = (tlu_int_sftint_pend[0]) ? 
+                            (sftint0_id[3:0] > true_pil0[3:0]) & pil_cmp_en[0] : 1'b0;
+assign sftint_only_vld[1] = (tlu_int_sftint_pend[1]) ?
+                            (sftint1_id[3:0] > true_pil1[3:0]) & pil_cmp_en[1] : 1'b0;
+assign sftint_only_vld[2] = (tlu_int_sftint_pend[2]) ?
+                            (sftint2_id[3:0] > true_pil2[3:0]) & pil_cmp_en[2] : 1'b0;
+assign sftint_only_vld[3] = (tlu_int_sftint_pend[3]) ?
+                            (sftint3_id[3:0] > true_pil3[3:0]) & pil_cmp_en[3] : 1'b0;
+*/
+
+assign sftint_only_vld[0] = (tlu_int_sftint_pend[0]) ?
+                            (sftint0_id[3:0] > true_pil0[3:0]) : 1'b0;
+assign sftint_only_vld[1] = (tlu_int_sftint_pend[1]) ?
+                            (sftint1_id[3:0] > true_pil1[3:0]) : 1'b0;
+assign sftint_only_vld[2] = (tlu_int_sftint_pend[2]) ?
+                            (sftint2_id[3:0] > true_pil2[3:0]) : 1'b0;
+assign sftint_only_vld[3] = (tlu_int_sftint_pend[3]) ?
+                            (sftint3_id[3:0] > true_pil3[3:0]) : 1'b0;
+
+
+// swint 
+// removed the qualification of the tlu_int_pstate_ie - otherwise, IFU might never wakeup
+// after the thread has been suspended.
+// 
+// modified for timing
+assign tlu_sftint_vld[0] = 
+           (tlu_cpu_mondo_trap[0] | tlu_dev_mondo_trap[0] | sftint_only_vld[0]); 
+assign tlu_sftint_vld[1] = 
+           (tlu_cpu_mondo_trap[1] | tlu_dev_mondo_trap[1] | sftint_only_vld[1]);
+assign tlu_sftint_vld[2] = 
+           (tlu_cpu_mondo_trap[2] | tlu_dev_mondo_trap[2] | sftint_only_vld[2]);
+assign tlu_sftint_vld[3] = 
+           (tlu_cpu_mondo_trap[3] | tlu_dev_mondo_trap[3] | sftint_only_vld[3]);
+//
+// added for hypervisor support
+// htick_match traps 
+
+// fix for bug 7027
+/*
+assign tlu_hintp_vld[0] = 
+           tlu_hintp[0] & (~tlu_hpstate_priv[0] | 
+          (tlu_hpstate_priv[0] & tlu_int_pstate_ie[0]));
+assign tlu_hintp_vld[1] = 
+           tlu_hintp[1] & (~tlu_hpstate_priv[1] | 
+          (tlu_hpstate_priv[1] & tlu_int_pstate_ie[1]));
+assign tlu_hintp_vld[2] = 
+           tlu_hintp[2] & (~tlu_hpstate_priv[2] | 
+          (tlu_hpstate_priv[2] & tlu_int_pstate_ie[2]));
+assign tlu_hintp_vld[3] = 
+           tlu_hintp[3] & (~tlu_hpstate_priv[3] | 
+          (tlu_hpstate_priv[3] & tlu_int_pstate_ie[3]));
+*/
+
+assign tlu_hintp_vld[0] = tlu_hintp[0];
+assign tlu_hintp_vld[1] = tlu_hintp[1];
+assign tlu_hintp_vld[2] = tlu_hintp[2];
+assign tlu_hintp_vld[3] = tlu_hintp[3];
+
+
+//
+// resum_err traps
+// modified for timing
+
+// fix for bug 7027
+/*
+assign tlu_rerr_vld[0] = tlu_resum_err_trap[0] & tlu_int_pstate_ie[0]; 
+assign tlu_rerr_vld[1] = tlu_resum_err_trap[1] & tlu_int_pstate_ie[1]; 
+assign tlu_rerr_vld[2] = tlu_resum_err_trap[2] & tlu_int_pstate_ie[2]; 
+assign tlu_rerr_vld[3] = tlu_resum_err_trap[3] & tlu_int_pstate_ie[3]; 
+*/
+assign tlu_rerr_vld[0] = tlu_resum_err_trap[0];
+assign tlu_rerr_vld[1] = tlu_resum_err_trap[1];
+assign tlu_rerr_vld[2] = tlu_resum_err_trap[2];
+assign tlu_rerr_vld[3] = tlu_resum_err_trap[3];
+
+
+assign pil_cmp_en[0] = 
+           ~(tlu_hpstate_priv[0] & tlu_hpstate_enb[0]);
+assign pil_cmp_en[1] = 
+           ~(tlu_hpstate_priv[1] & tlu_hpstate_enb[1]);
+assign pil_cmp_en[2] = 
+           ~(tlu_hpstate_priv[2] & tlu_hpstate_enb[2]);
+assign pil_cmp_en[3] = 
+           ~(tlu_hpstate_priv[3] & tlu_hpstate_enb[3]);
+
+// TLU.TICK_INT - The tick and stick interrupt logic has been moved to tlu_tdp
+// the interrupt will be report back to tlu_tcl via the softint settings
+/*
+assign wsr_tick_intclr_g =  (tlu_clr_sftint_l_g | ~tlu_wsr_data_w[0]) & (tlu_wr_sftint_l_g | tlu_wsr_data_w[0]);
+assign wsr_tick_intset_g = ~(tlu_set_sftint_l_g & tlu_wr_sftint_l_g) & tlu_wsr_data_w[0];
+//
+// added for hypervisor suppor for tlu_stck_int
+assign wsr_stick_intclr_g =  (tlu_clr_sftint_l_g | ~tlu_wsr_data_b16_w) & (tlu_wr_sftint_l_g | tlu_wsr_data_b16_w);
+assign wsr_stick_intset_g = ~(tlu_set_sftint_l_g & tlu_wr_sftint_l_g) & tlu_wsr_data_b16_w;
+*/
+// The following code has been moved to tlu_tdp
+/*
+assign	tick_intclr[0] = tlu_tick_int[0] & wsr_tick_intclr_g;
+assign	tick_intclr[1] = tlu_tick_int[1] & wsr_tick_intclr_g;
+assign	tick_intclr[2] = tlu_tick_int[2] & wsr_tick_intclr_g;
+assign	tick_intclr[3] = tlu_tick_int[3] & wsr_tick_intclr_g; 
+//
+assign	tickcmp_int[0] = tlu_tick_match & ~tick_intdis0 & tlu_tickcmp_sel[0];  
+assign	tickcmp_int[1] = tlu_tick_match & ~tick_intdis1 & tlu_tickcmp_sel[1];   
+assign	tickcmp_int[2] = tlu_tick_match & ~tick_intdis2 & tlu_tickcmp_sel[2]; 
+assign	tickcmp_int[3] = tlu_tick_match & ~tick_intdis3 & tlu_tickcmp_sel[3]; 
+
+assign	tick_intrpt[0] = tickcmp_int[0] | tick_intclr[0];
+assign	tick_intrpt[1] = tickcmp_int[1] | tick_intclr[1];
+assign	tick_intrpt[2] = tickcmp_int[2] | tick_intclr[2];
+assign	tick_intrpt[3] = tickcmp_int[3] | tick_intclr[3];
+
+// modified for bug 1022
+// qualified tlu_set_sftint with wsr_data_w[0]
+//
+assign	tick_int_en[0] = ~tlu_sftint_en_l_g[0] | tick_intrpt[0];
+assign	tick_int_din[0] = (tick_intrpt[0] | wsr_tick_intset_g) ? 1'b1 : 1'b0;
+
+assign	tick_int_en[1] = ~tlu_sftint_en_l_g[1] | tick_intrpt[1];
+assign	tick_int_din[1] = (tick_intrpt[1] | wsr_tick_intset_g) ? 1'b1 : 1'b0;
+
+assign	tick_int_en[2] = ~tlu_sftint_en_l_g[2] | tick_intrpt[2];
+assign	tick_int_din[2] = (tick_intrpt[2] | wsr_tick_intset_g) ? 1'b1 : 1'b0;
+
+assign	tick_int_en[3] = ~tlu_sftint_en_l_g[3] | tick_intrpt[3];
+assign	tick_int_din[3] = (tick_intrpt[3] | wsr_tick_intset_g) ? 1'b1 : 1'b0;
+//
+// recoded tlu_tick_int for bug 818
+dffre_s dffre_tick_int0 (
+    .din (tick_int_din[0]), 
+    .q   (tlu_tick_int[0]),
+    .rst (local_rst), 
+    .en  (tick_int_en[0]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// recoded tlu_tick_int for bug 818
+dffre_s dffre_tick_int1 (
+    .din (tick_int_din[1]), 
+    .q   (tlu_tick_int[1]),
+    .rst (local_rst), 
+    .en  (tick_int_en[1]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// recoded tlu_tick_int for bug 818
+//
+dffre_s dffre_tick_int2 (
+    .din (tick_int_din[2]), 
+    .q   (tlu_tick_int[2]),
+    .rst (local_rst), 
+    .en  (tick_int_en[2]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// recoded tlu_tick_int for bug 818
+dffre_s dffre_tick_int3 (
+    .din (tick_int_din[3]), 
+    .q   (tlu_tick_int[3]),
+    .rst (local_rst), 
+    .en  (tick_int_en[3]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+*/
+//
+// added and/or modified for hypervisor support
+// the following logic has been moved to tlu_tdp
+/*
+assign	stick_intclr[0] = tlu_stick_int[0] & wsr_stick_intclr_g;
+assign	stick_intclr[1] = tlu_stick_int[1] & wsr_stick_intclr_g;
+assign	stick_intclr[2] = tlu_stick_int[2] & wsr_stick_intclr_g;
+assign	stick_intclr[3] = tlu_stick_int[3] & wsr_stick_intclr_g; 
+//
+assign	stickcmp_int[0] = tlu_stick_match & ~stick_intdis0 & tlu_tickcmp_sel[0];  
+assign	stickcmp_int[1] = tlu_stick_match & ~stick_intdis1 & tlu_tickcmp_sel[1];   
+assign	stickcmp_int[2] = tlu_stick_match & ~stick_intdis2 & tlu_tickcmp_sel[2]; 
+assign	stickcmp_int[3] = tlu_stick_match & ~stick_intdis3 & tlu_tickcmp_sel[3]; 
+//
+assign	stick_intrpt[0] = stickcmp_int[0] | stick_intclr[0];
+assign	stick_intrpt[1] = stickcmp_int[1] | stick_intclr[1];
+assign	stick_intrpt[2] = stickcmp_int[2] | stick_intclr[2];
+assign	stick_intrpt[3] = stickcmp_int[3] | stick_intclr[3];
+//
+// modified for bug 1022
+// qualified tlu_set_sftint with wsr_data_w[16]
+//
+assign	stick_int_en[0] = ~tlu_sftint_en_l_g[0] | stick_intrpt[0];
+assign	stick_int_din[0] = (stick_intrpt[0] | wsr_stick_intset_g) ? 1'b1 : 1'b0;
+
+assign	stick_int_en[1] = ~tlu_sftint_en_l_g[1] | stick_intrpt[1];
+assign	stick_int_din[1] = (stick_intrpt[1] | wsr_stick_intset_g) ? 1'b1 : 1'b0;
+
+assign	stick_int_en[2] = ~tlu_sftint_en_l_g[2] | stick_intrpt[2];
+assign	stick_int_din[2] = (stick_intrpt[2] | wsr_stick_intset_g) ? 1'b1 : 1'b0;
+
+assign	stick_int_en[3] = ~tlu_sftint_en_l_g[3] | stick_intrpt[3];
+assign	stick_int_din[3] = (stick_intrpt[3] | wsr_stick_intset_g) ? 1'b1 : 1'b0;
+
+// recoded tlu_tick_int for bug 818
+//
+dffre_s dffre_stick_int0 (
+    .din (stick_int_din[0]), 
+    .q   (tlu_stick_int[0]),
+    .rst (local_rst), 
+    .en  (stick_int_en[0]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+dffre_s dffre_stick_int1 (
+    .din (stick_int_din[1]), 
+    .q   (tlu_stick_int[1]),
+    .rst (local_rst), 
+    .en  (stick_int_en[1]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+dffre_s dffre_stick_int2  (
+    .din (stick_int_din[2]), 
+    .q   (tlu_stick_int[2]),
+    .rst (local_rst), 
+    .en  (stick_int_en[2]), 
+    .clk (clk),
+    .se  (se),
+    .si (),
+    .so ()
+);
+//
+dffre_s dffre_stick_int3 (
+    .din (stick_int_din[3]), 
+    .q   (tlu_stick_int[3]),
+    .rst (local_rst), 
+    .en  (stick_int_en[3]), 
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+// modified for hypervisor support
+//
+assign	tlu_sftint_lvl14_all[0] = 
+            tlu_sftint_lvl14[0] | tlu_tick_int[0] | tlu_stick_int[0];
+assign	tlu_sftint_lvl14_all[1] = 
+            tlu_sftint_lvl14[1] | tlu_tick_int[1] | tlu_stick_int[1];
+assign	tlu_sftint_lvl14_all[2] = 
+            tlu_sftint_lvl14[2] | tlu_tick_int[2] | tlu_stick_int[2];
+assign	tlu_sftint_lvl14_all[3] = 
+            tlu_sftint_lvl14[3] | tlu_tick_int[3] | tlu_stick_int[3];
+//
+assign	tlu_sftint_lvl14_int[0] = tickcmp_int[0] | stickcmp_int[0];
+assign	tlu_sftint_lvl14_int[1] = tickcmp_int[1] | stickcmp_int[1];
+assign	tlu_sftint_lvl14_int[2] = tickcmp_int[2] | stickcmp_int[2];
+assign	tlu_sftint_lvl14_int[3] = tickcmp_int[3] | stickcmp_int[3];
+*/
+
+//=========================================================================================
+//	PIL for Threads
+//=========================================================================================
+
+assign	pil0_en	= pil_rw_g & wsr_inst_g & thread0_wsel_g; 
+assign	pil1_en	= pil_rw_g & wsr_inst_g & thread1_wsel_g; 
+assign	pil2_en	= pil_rw_g & wsr_inst_g & thread2_wsel_g; 
+assign	pil3_en	= pil_rw_g & wsr_inst_g & thread3_wsel_g; 
+
+// THREAD 0
+dffe_s #(4) dffe_pil0 (
+    .din (tlu_wsr_data_w[3:0]),
+    .q   (true_pil0[3:0]),
+    .en  (pil0_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+// 
+// THREAD 1
+dffe_s #(4) dffe_pil1 (
+    .din (tlu_wsr_data_w[3:0]),
+    .q   (true_pil1[3:0]),
+    .en  (pil1_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+// 
+// THREAD 2
+dffe_s #(4) dffe_pil2 (
+    .din (tlu_wsr_data_w[3:0]),
+    .q   (true_pil2[3:0]),
+    .en  (pil2_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// THREAD 3
+dffe_s #(4) dffe_pil3 (
+    .din (tlu_wsr_data_w[3:0]),
+    .q   (true_pil3[3:0]),
+    .en  (pil3_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+//	TL for Threads
+//=========================================================================================
+//
+dff_s dff_stgim_g (
+    .din (ifu_tlu_immu_miss_m), 
+    .q  (immu_miss_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// wrpr supplies new value else increment on trap.
+// wrpr %tl when tl=0 will cause a trap.
+// trap in MAXTL-1 enters RED_MODE. 
+// added for hypervisor support
+// capped the tl value by supervisor write at MAXSTL 
+//
+assign maxstl_wr_sel[0] = 
+           ~tlu_hyper_lite[0] & (tlu_wsr_data_w[2:0] > `MAXSTL);
+assign maxstl_wr_sel[1] = 
+           ~tlu_hyper_lite[1] & (tlu_wsr_data_w[2:0] > `MAXSTL); 
+assign maxstl_wr_sel[2] = 
+           ~tlu_hyper_lite[2] & (tlu_wsr_data_w[2:0] > `MAXSTL); 
+assign maxstl_wr_sel[3] = 
+           ~tlu_hyper_lite[3] & (tlu_wsr_data_w[2:0] > `MAXSTL); 
+
+assign maxtl_wr_sel =  (tlu_wsr_data_w[2:0] == 3'b111); 
+
+// THREAD0
+// Use to signal page fault for now.
+// sync_trap_taken_g already qualified with inst_vld_g.
+// long-latency sparc traps have to be killed in own pipeline
+// hwint interrupts are qualified elsewhere
+// modified due to timing
+// modified for bug 4561
+assign thrd0_traps =
+            (sync_trap_taken_g & thread0_rsel_g) | 
+            (pending_trap_sel[0] & ~(dnrtry_inst_g | tsa_wr_tid_sel_g |
+             ifu_thrd_flush_w[0] | cwp_cmplt0_pending | sync_trap_taken_g |
+            (tlu_gl_rw_g & wsr_inst_g))); 
+//
+// trap level will get updated next cycle.
+dff_s #(1) dff_stgw2_0 (
+    .din (thrd0_traps), 
+    .q   (thrd0_traps_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign tlu_thrd_traps_w2[0] = thrd0_traps_w2; 
+
+assign	trp_lvl0_at_maxtl = (trp_lvl0[2:0] == `MAXTL);
+assign	trp_lvl0_at_maxtlless1 = (trp_lvl0[2:0] == `MAXTL_LESSONE);
+//
+// added for modified for hypervisor support
+assign trp_lvl_at_maxstl[0]   = (trp_lvl0[2:0] == `MAXSTL);
+assign trp_lvl_gte_maxstl[0]  = (trp_lvl0[2:0] > `MAXSTL) | trp_lvl_at_maxstl[0];
+assign wsr_trp_lvl0_data_w[2:0] = 
+           (maxstl_wr_sel[0])? `MAXSTL_TL: 
+           ((maxtl_wr_sel)? `MAXTL: tlu_wsr_data_w[2:0]);
+//
+// added for timing
+dff_s #(3) dff_wsr_trp_lvl0_data_w2 (
+    .din (wsr_trp_lvl0_data_w[2:0]),
+    .q   (wsr_trp_lvl0_data_w2[2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+// The following section has been recoded due to timing
+//=========================================================================================
+// trap level to be incremented if thread not at MAXTL and not in redmode
+assign	trp_lvl0_incr_w2 = thrd0_traps_w2 & ~trp_lvl0_at_maxtl;
+
+assign trp_lvl0_new[2:0] = 
+	   (tl_rw_w2 & wsr_inst_w2 & thread0_wsel_w2) ? 
+			wsr_trp_lvl0_data_w2[2:0] : 
+			(local_rst | por_rstint0_w2) ? `MAXTL :
+			(dnrtry_inst_w2[0]) ? 
+				trp_lvl0[2:0] - 3'b001:// done/retry decrements
+				trp_lvl0[2:0] + {2'b00,trp_lvl0_incr_w2};// trap increments
+assign tl0_en =
+           (tl_rw_w2 & wsr_inst_w2 & thread0_wsel_w2) |  
+			trp_lvl0_incr_w2| local_rst | por_rstint0_w2 | 
+            dnrtry_inst_w2[0]; 
+
+// Reset required as processor will start out at tl0 after reset.
+// tl has to be correctly defined for all conditions !!!
+dffe_s #(3) dffe_tl0 (
+    .din (trp_lvl0_new[2:0]),
+    .q   (trp_lvl0[2:0]),
+    .en  (tl0_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+assign	tlu_lsu_tl_zero[0] = ~trp_lvl0[2] & ~trp_lvl0[1] & ~trp_lvl0[0];         
+assign	tl0_gt_0 = trp_lvl0[2] | trp_lvl0[1] | trp_lvl0[0]; 	
+//
+// THREAD1
+// Use to signal page fault for now.
+// sync_trap_taken_g already qualified with inst_vld_g.
+// long-latency sparc traps have to be killed in own pipeline
+// hwint interrupts are qualified elsewhere
+// modified due to timing
+assign thrd1_traps =
+            (sync_trap_taken_g & thread1_rsel_g ) | 
+            (pending_trap_sel[1] & ~(dnrtry_inst_g | tsa_wr_tid_sel_g |
+             ifu_thrd_flush_w[1] | cwp_cmplt1_pending | sync_trap_taken_g |
+            (tlu_gl_rw_g & wsr_inst_g))); 
+//
+// trap level will get updated next cycle.
+dff_s #(1) dff_stgw2_1 (
+    .din (thrd1_traps),
+    .q  (thrd1_traps_w2),
+    .clk (clk),
+    .se  (se),
+    .si (),
+    .so ()
+);
+
+assign tlu_thrd_traps_w2[1] = thrd1_traps_w2; 
+
+assign	trp_lvl1_at_maxtl = (trp_lvl1[2:0] == `MAXTL);
+assign	trp_lvl1_at_maxtlless1 = (trp_lvl1[2:0] == `MAXTL_LESSONE);
+//
+// added for modified for hypervisor support
+assign trp_lvl_at_maxstl[1]   = (trp_lvl1[2:0] == `MAXSTL);
+assign trp_lvl_gte_maxstl[1]  = (trp_lvl1[2:0] > `MAXSTL) | trp_lvl_at_maxstl[1];
+assign wsr_trp_lvl1_data_w[2:0] = 
+           (maxstl_wr_sel[1])? `MAXSTL_TL: 
+           ((maxtl_wr_sel)? `MAXTL: tlu_wsr_data_w[2:0]);
+//
+// added for timing
+dff_s #(3) dff_wsr_trp_lvl1_data_w2 (
+    .din (wsr_trp_lvl1_data_w[2:0]),
+    .q   (wsr_trp_lvl1_data_w2[2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+// The following section has been recoded due to timing
+//=========================================================================================
+// trap level to be incremented if thread not at MAXTL and not in redmode
+assign	trp_lvl1_incr_w2 = thrd1_traps_w2 & ~trp_lvl1_at_maxtl;
+
+assign trp_lvl1_new[2:0] = 
+	   (tl_rw_w2 & wsr_inst_w2 & thread1_wsel_w2) ? 
+			wsr_trp_lvl1_data_w2[2:0] : 
+			(local_rst | por_rstint1_w2) ? `MAXTL :
+			(dnrtry_inst_w2[1]) ? 
+				trp_lvl1[2:0] - 3'b001:// done/retry decrements
+				trp_lvl1[2:0] + {2'b00,trp_lvl1_incr_w2};// trap increments
+assign tl1_en =
+           (tl_rw_w2 & wsr_inst_w2 & thread1_wsel_w2) |  
+			trp_lvl1_incr_w2| local_rst | por_rstint1_w2 | 
+            dnrtry_inst_w2[1]; 
+
+// Reset required as processor will start out at tl1 after reset.
+// tl has to be correctly defined for all conditions !!!
+dffe_s #(3) dffe_tl1 (
+    .din (trp_lvl1_new[2:0]),
+    .q   (trp_lvl1[2:0]),
+    .en  (tl1_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+assign	tlu_lsu_tl_zero[1] = ~trp_lvl1[2] & ~trp_lvl1[1] & ~trp_lvl1[0];         
+assign	tl1_gt_0 = trp_lvl1[2] | trp_lvl1[1] | trp_lvl1[0]; 	
+//
+// THREAD2
+// Use to signal page fault for now.
+// sync_trap_taken_g already qualified with inst_vld_g.
+// long-latency sparc traps have to be killed in own pipeline
+// hwint interrupts are qualified elsewhere
+// modified due to timing
+// modified for bug 3827
+assign thrd2_traps =
+            (sync_trap_taken_g & thread2_rsel_g) | 
+            (pending_trap_sel[2] & ~(dnrtry_inst_g | tsa_wr_tid_sel_g |
+             ifu_thrd_flush_w[2] | cwp_cmplt2_pending | sync_trap_taken_g | 
+             (tlu_gl_rw_g & wsr_inst_g))); 
+
+// trap level will get updated next cycle.
+dff_s #(1) dff_stgw2_2 (
+    .din (thrd2_traps), 
+    .q   (thrd2_traps_w2),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign tlu_thrd_traps_w2[2] = thrd2_traps_w2;
+
+assign	trp_lvl2_at_maxtl = (trp_lvl2[2:0] == `MAXTL);
+assign	trp_lvl2_at_maxtlless1 = (trp_lvl2[2:0] == `MAXTL_LESSONE);
+//
+// added or modified for hypervisor support
+assign trp_lvl_at_maxstl[2]   = (trp_lvl2[2:0] == `MAXSTL);
+assign trp_lvl_gte_maxstl[2]  = (trp_lvl2[2:0] > `MAXSTL) | trp_lvl_at_maxstl[2];
+assign wsr_trp_lvl2_data_w[2:0] = 
+           (maxstl_wr_sel[2])? `MAXSTL_TL:
+           ((maxtl_wr_sel)? `MAXTL: tlu_wsr_data_w[2:0]);
+//
+// added for timing
+dff_s #(3) dff_wsr_trp_lvl2_data_w2 (
+    .din (wsr_trp_lvl2_data_w[2:0]),
+    .q   (wsr_trp_lvl2_data_w2[2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+// The following section has been recoded due to timing
+//=========================================================================================
+// trap level to be incremented if thread not at MAXTL and not in redmode
+assign	trp_lvl2_incr_w2 = thrd2_traps_w2 & ~trp_lvl2_at_maxtl;
+
+assign trp_lvl2_new[2:0] = 
+	   (tl_rw_w2 & wsr_inst_w2 & thread2_wsel_w2) ? 
+			wsr_trp_lvl2_data_w2[2:0] : 
+			(local_rst | por_rstint2_w2) ? `MAXTL :
+			(dnrtry_inst_w2[2]) ? 
+				trp_lvl2[2:0] - 3'b001:// done/retry decrements
+				trp_lvl2[2:0] + {2'b00,trp_lvl2_incr_w2};// trap increments
+assign tl2_en =
+           (tl_rw_w2 & wsr_inst_w2 & thread2_wsel_w2) |  
+			trp_lvl2_incr_w2| local_rst | por_rstint2_w2 | 
+            dnrtry_inst_w2[2]; 
+
+// Reset required as processor will start out at tl1 after reset.
+// tl has to be correctly defined for all conditions !!!
+dffe_s #(3) dffe_tl2 (
+    .din (trp_lvl2_new[2:0]),
+    .q   (trp_lvl2[2:0]),
+    .en  (tl2_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+assign	tlu_lsu_tl_zero[2] = ~trp_lvl2[2] & ~trp_lvl2[1] & ~trp_lvl2[0];         
+assign	tl2_gt_0 = trp_lvl2[2] | trp_lvl2[1] | trp_lvl2[0]; 	
+//
+// THREAD3
+// Use to signal page fault for now.
+// sync_trap_taken_g already qualified with inst_vld_g.
+// long-latency sparc traps have to be killed in own pipeline
+// hwint interrupts are qualified elsewhere
+// modified due to timing
+assign thrd3_traps =
+            (sync_trap_taken_g & thread3_rsel_g) | 
+            (pending_trap_sel[3] & ~(dnrtry_inst_g | tsa_wr_tid_sel_g |
+             ifu_thrd_flush_w[3] | cwp_cmplt3_pending | sync_trap_taken_g |
+            (tlu_gl_rw_g & wsr_inst_g))); 
+
+// trap level will get updated next cycle.
+dff_s #(1) dff_stgw2_3 (
+    .din (thrd3_traps), 
+    .q   (thrd3_traps_w2),
+    .clk (clk),
+    .se  (se),
+    .si (),
+    .so ()
+);
+
+assign tlu_thrd_traps_w2[3] = thrd3_traps_w2;
+
+assign	trp_lvl3_at_maxtl = (trp_lvl3[2:0] == `MAXTL);
+assign	trp_lvl3_at_maxtlless1 = (trp_lvl3[2:0] == `MAXTL_LESSONE);
+//
+// added for modified for hypervisor support
+assign trp_lvl_at_maxstl[3]   = (trp_lvl3[2:0] == `MAXSTL);
+assign trp_lvl_gte_maxstl[3]  = (trp_lvl3[2:0] > `MAXSTL) | trp_lvl_at_maxstl[3];
+assign wsr_trp_lvl3_data_w[2:0] = 
+           (maxstl_wr_sel[3])? `MAXSTL_TL:
+           ((maxtl_wr_sel)? `MAXTL: tlu_wsr_data_w[2:0]);
+//
+// added for timing
+dff_s #(3) dff_wsr_trp_lvl3_data_w2 (
+    .din (wsr_trp_lvl3_data_w[2:0]),
+    .q   (wsr_trp_lvl3_data_w2[2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//=========================================================================================
+// The following section has been recoded due to timing
+//=========================================================================================
+// trap level to be incremented if thread not at MAXTL and not in redmode
+assign	trp_lvl3_incr_w2 = thrd3_traps_w2 & ~trp_lvl3_at_maxtl;
+
+assign trp_lvl3_new[2:0] = 
+	   (tl_rw_w2 & wsr_inst_w2 & thread3_wsel_w2) ? 
+			wsr_trp_lvl3_data_w2[2:0] : 
+			(local_rst | por_rstint3_w2) ? `MAXTL :
+			(dnrtry_inst_w2[3]) ? 
+				trp_lvl3[2:0] - 3'b001:// done/retry decrements
+				trp_lvl3[2:0] + {2'b00,trp_lvl3_incr_w2};// trap increments
+
+assign tl3_en =
+           (tl_rw_w2 & wsr_inst_w2 & thread3_wsel_w2) |  
+			trp_lvl3_incr_w2| local_rst | por_rstint3_w2 | 
+            dnrtry_inst_w2[3]; 
+
+// Reset required as processor will start out at tl1 after reset.
+dffe_s #(3) dffe_tl3 (
+    .din (trp_lvl3_new[2:0]),
+    .q   (trp_lvl3[2:0]),
+    .en  (tl3_en),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+assign	tlu_lsu_tl_zero[3] = ~trp_lvl3[2] & ~trp_lvl3[1] & ~trp_lvl3[0];         
+assign	tl3_gt_0 = trp_lvl3[2] | trp_lvl3[1] | trp_lvl3[0]; 	
+//
+// added for hypervisor support - TLZ trap
+// detection of transition of trap-level from <> 0 to 0
+// modified for bug 3192
+
+assign tlz_thread_set[0] = ~(tlu_lsu_tl_zero[0] | (|(trp_lvl0_new[2:0]))) & tl0_en;
+assign tlz_thread_set[1] = ~(tlu_lsu_tl_zero[1] | (|(trp_lvl1_new[2:0]))) & tl1_en;
+assign tlz_thread_set[2] = ~(tlu_lsu_tl_zero[2] | (|(trp_lvl2_new[2:0]))) & tl2_en;
+assign tlz_thread_set[3] = ~(tlu_lsu_tl_zero[3] | (|(trp_lvl3_new[2:0]))) & tl3_en;
+
+dff_s #(`TLU_THRD_NUM) dff_tlz_thread_data (
+    .din (tlz_thread_set[`TLU_THRD_NUM-1:0]),
+	.q   (tlz_thread_data[`TLU_THRD_NUM-1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//
+// storing the state of the tlz trap to take the trap on the next valid
+// instruction 
+// modified for bug 3646
+dffre_s dffr_tlz_thread_0  (
+    .din (tlz_thread_data[0]),
+    .q   (tlz_thread[0]),
+    .rst (local_rst | tlz_trap_g[0] | thread_inst_vld_g[0]),
+    .en  (tlz_thread_data[0] & tlu_hpstate_tlz[0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffr_tlz_thread_1  (
+    .din (tlz_thread_data[1]),
+    .q   (tlz_thread[1]),
+    .rst (local_rst | tlz_trap_g[1] | thread_inst_vld_g[1]), 
+    .en  (tlz_thread_data[1] & tlu_hpstate_tlz[1]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffr_tlz_thread_2  (
+    .din (tlz_thread_data[2]),
+    .q   (tlz_thread[2]),
+    .rst (local_rst | tlz_trap_g[2] | thread_inst_vld_g[2]),
+    .en  (tlz_thread_data[2] & tlu_hpstate_tlz[2]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffre_s dffr_tlz_thread_3  (
+    .din (tlz_thread_data[3]),
+    .q   (tlz_thread[3]),
+    .rst (local_rst | tlz_trap_g[3] | thread_inst_vld_g[3]),
+    .en  (tlz_thread_data[3] & tlu_hpstate_tlz[3]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// initiate the trap for the appropriate thread
+// modified for bug 4434 & 4758
+assign tlz_trap_m[0] =
+          ~ifu_rstint_m & 
+          // ~(ifu_rstint_m | (ifu_hwint_m  & tlu_int_pstate_ie[0])) & 
+            inst_vld_m & tlu_lsu_tl_zero[0] & thread0_rsel_m & tlz_thread[0] & 
+          ~tlu_hpstate_priv[0] & tlu_hpstate_tlz[0];
+assign tlz_trap_m[1] =
+          ~ifu_rstint_m & 
+          // ~(ifu_rstint_m | (ifu_hwint_m & tlu_int_pstate_ie[1])) & 
+            inst_vld_m & tlu_lsu_tl_zero[1] & thread1_rsel_m & tlz_thread[1] &
+          ~tlu_hpstate_priv[1] & tlu_hpstate_tlz[1];
+assign tlz_trap_m[2] =
+          ~ifu_rstint_m & 
+          // ~(ifu_rstint_m | (ifu_hwint_m & tlu_int_pstate_ie[2])) & 
+            inst_vld_m & tlu_lsu_tl_zero[2] & thread2_rsel_m & tlz_thread[2] & 
+          ~tlu_hpstate_priv[2] & tlu_hpstate_tlz[2];
+assign tlz_trap_m[3] =
+          ~ifu_rstint_m & 
+          // ~(ifu_rstint_m | (ifu_hwint_m & tlu_int_pstate_ie[3])) & 
+            inst_vld_m & tlu_lsu_tl_zero[3] & thread3_rsel_m & tlz_thread[3] &
+          ~tlu_hpstate_priv[3] & tlu_hpstate_tlz[3];
+//
+// added for timing - modifed to removed the qualification of the interrupts from
+// IFU
+assign tlz_exu_trap_m[0] =
+            tlu_lsu_tl_zero[0] & thread0_rsel_m & tlz_thread[0] & ~tlu_hpstate_priv[0] & 
+            tlu_hpstate_tlz[0];
+assign tlz_exu_trap_m[1] =
+            tlu_lsu_tl_zero[1] & thread1_rsel_m & tlz_thread[1] & ~tlu_hpstate_priv[1] & 
+            tlu_hpstate_tlz[1];
+assign tlz_exu_trap_m[2] =
+            tlu_lsu_tl_zero[2] & thread2_rsel_m & tlz_thread[2] & ~tlu_hpstate_priv[2] & 
+            tlu_hpstate_tlz[2];
+assign tlz_exu_trap_m[3] =
+            tlu_lsu_tl_zero[3] & thread3_rsel_m & tlz_thread[3] & ~tlu_hpstate_priv[3] & 
+            tlu_hpstate_tlz[3];
+//
+// modified for bug 4862
+// indicate that a TLZ trap needs to be taken
+dffr_s #(`TLU_THRD_NUM) dffr_tlz_trap_g  (
+    .din (tlz_trap_m[`TLU_THRD_NUM-1:0]), 
+    .q   (tlz_trap_nq_g[`TLU_THRD_NUM-1:0]),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign tlz_trap_g[0] = tlz_trap_nq_g[0] & ~inst_ifu_flush2_w;
+assign tlz_trap_g[1] = tlz_trap_nq_g[1] & ~inst_ifu_flush2_w;
+assign tlz_trap_g[2] = tlz_trap_nq_g[2] & ~inst_ifu_flush2_w;
+assign tlz_trap_g[3] = tlz_trap_nq_g[3] & ~inst_ifu_flush2_w;
+
+//=========================================================================================
+//	EXCEPTION HANDLING
+//=========================================================================================
+// modified to test out timing - 
+/*
+assign	tlu_ifu_flush_pipe_w = 
+ 	(thrd0_traps_flush | thrd1_traps_flush | thrd2_traps_flush | thrd3_traps_flush) & 
+     inst_vld_g; 
+//
+assign	tlu_ifu_flush_pipe_w = 
+            (dside_sync_trap_g & inst_vld_g) | local_early_flush_pipe_w;  
+*/
+assign tlu_ifu_flush_pipe_w = 
+           (early_dside_trap_g & inst_vld_g) | lsu_tlu_defr_trp_taken_g | 
+            local_early_flush_pipe_w | lsu_ttype_vld_w;  
+           // modified for bug 4561
+           // (lsu_defr_trap_g & (thrid_g[1:0] == thrid_w2[1:0]))  |
+
+//
+// modified for timing fix
+assign tlu_flush_all_w = 
+           inst_ifu_flush_w | local_early_flush_pipe_w | 
+           (lsu_tlu_early_flush_w & inst_vld_nf_g);
+
+
+// staging the all flush signal 
+dffr_s dffr_tlu_flush_all_w2 (
+    .din (tlu_flush_all_w),
+    .q   (tlu_flush_all_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// added for timing
+assign lsu_ttype_vld_w = 
+           lsu_tlu_ttype_vld_m2 & inst_vld_g;
+// 
+// staging the flush-pipe signal 
+dffr_s dffr_lsu_ttype_vld_w2 (
+    .din (lsu_ttype_vld_w),
+    .q   (lsu_ttype_vld_w2),
+    .rst (local_rst),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign	tlu_flush_pipe_w = tlu_ifu_flush_pipe_w;
+//
+// added for timing 
+assign	tlu_full_flush_pipe_w2 = 
+            lsu_ttype_vld_w2 | tlu_flush_all_w2;
+
+// added for early flush pipe timing fix
+// assign tlu_early_flush_pipe_m = sync_trap_taken_m;  
+
+assign tlu_local_flush_w          = local_early_flush_pipe_w;
+assign tlu_early_flush_pipe2_w    = local_early_flush_pipe2_w; 
+assign tlu_exu_early_flush_pipe_w = local_early_flush_pipe3_w;
+assign tlu_early_flush_pipe_w     = local_early_flush_pipe4_w; 
+
+// added local early flush pipe timing fix
+
+dffr_s dffr_local_early_flush_pipe_w (
+    .din (sync_trap_taken_m),
+    .q   (local_early_flush_pipe_w),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+dffr_s dffr_local_early_flush_pipe2_w (
+    .din (sync_trap_taken_m),
+    .q   (local_early_flush_pipe2_w),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+dffr_s dffr_local_early_flush_pipe3_w (
+    .din (sync_trap_taken_m),
+    .q   (local_early_flush_pipe3_w),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+dffr_s dffr_local_early_flush_pipe4_w (
+    .din (sync_trap_taken_m),
+    .q   (local_early_flush_pipe4_w),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+//=========================================================================================
+//	SFSR/SFAR HANDLING
+//=========================================================================================
+
+assign thread_tl_zero =
+	thread0_rsel_e ? tlu_lsu_tl_zero[0] :
+		thread1_rsel_e ? tlu_lsu_tl_zero[1] :
+			thread2_rsel_e ? tlu_lsu_tl_zero[2] : tlu_lsu_tl_zero[3];
+
+// Generate selects for ctxt to be written to tag_access
+// iside trap meant to cover immu_miss and inst_access_excp
+// modified for hypervisor support
+// assign	iside_trap = exu_tlu_ttype_vld_m | immu_va_oor_brnchetc_m | exu_tlu_va_oor_jl_ret_m;
+// removed for timing 
+/*
+assign	iside_trap =
+            ifu_tlu_immu_miss_m | exu_tlu_ttype_vld_m | 
+            immu_va_oor_brnchetc_m | exu_tlu_va_oor_jl_ret_m ;
+  
+assign	tlu_tag_access_ctxt_sel_m[0] = iside_trap &  thread_tl_zero_m;
+assign	tlu_tag_access_ctxt_sel_m[1] = iside_trap & ~thread_tl_zero_m;
+assign	tlu_tag_access_ctxt_sel_m[2] = ~iside_trap;
+*/
+
+// ISFSR
+
+// voor reported for both ifetch and memref - need to distinguish.
+// va-out-of-range for ldst,branch,call,sequential 
+// modified for bug 4763
+// assign	immu_va_oor_brnchetc_m
+// 	= exu_tlu_va_oor_m & ~pstate_am & ~memref_m;
+
+dffr_s dffr_immu_va_oor_brnchetc_m (
+    .din (ifu_tlu_pc_oor_e),
+    .q   (immu_va_oor_brnchetc_m),
+    .rst (local_rst), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_memref_e (
+    .din (ifu_lsu_memref_d),
+    .q   (memref_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so ()
+);
+
+
+dff_s dff_memref_m (
+    .din (memref_e),// ifu_tlu_flsh_inst_e
+    .q   (memref_m),// flsh_inst_m
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign isfsr_flt_vld_m = 
+	(thread0_rsel_m & tlu_isfsr_flt_vld[0]) |
+	(thread1_rsel_m & tlu_isfsr_flt_vld[1]) |
+	(thread2_rsel_m & tlu_isfsr_flt_vld[2]) |
+	(thread3_rsel_m & tlu_isfsr_flt_vld[3]);
+
+assign tlu_lsu_pstate_am[3:0] = tlu_pstate_am[3:0];
+
+assign	pstate_am = 
+	(thread0_rsel_m & tlu_pstate_am[0]) |
+	(thread1_rsel_m & tlu_pstate_am[1]) |
+	(thread2_rsel_m & tlu_pstate_am[2]) |
+	(thread3_rsel_m & tlu_pstate_am[3]);
+
+dff_s #(1) dff_am_stgg (
+    .din (pstate_am),
+    .q   (tlu_addr_msk_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// logic moved to lsu_expctl due to timing
+/*
+assign	pstate_priv = 
+	(thread0_rsel_m & tlu_pstate_priv[0]) |
+	(thread1_rsel_m & tlu_pstate_priv[1]) |
+	(thread2_rsel_m & tlu_pstate_priv[2]) |
+	(thread3_rsel_m & tlu_pstate_priv[3]);
+*/
+
+
+assign trp_lvl_zero = 
+	(thread0_rsel_g & tlu_lsu_tl_zero[0]) |
+	(thread1_rsel_g & tlu_lsu_tl_zero[1]) |
+	(thread2_rsel_g & tlu_lsu_tl_zero[2]) |
+	(thread3_rsel_g & tlu_lsu_tl_zero[3]);
+
+assign	isfsr_ftype_sel[0] =  ifu_tlu_priv_violtn_m;
+// The 2 out of range exceptions are mutex as they are based on inst type.
+assign	isfsr_ftype_sel[1] = ~isfsr_ftype_sel[0] & immu_va_oor_brnchetc_m;
+// modified for bug 4452
+assign isfsr_ftype_sel[2] = 
+           ~isfsr_ftype_sel[0] & exu_tlu_va_oor_jl_ret_m & 
+           ~(exu_tlu_ttype_vld_m | ifu_tlu_ttype_vld_m) & ~pstate_am;
+
+assign	isfsr_trp_wr_m = |isfsr_ftype_sel[2:0];
+
+dff_s #(1) dff_isfsrw_stgg (
+        .din (isfsr_trp_wr_m),
+        .q   (isfsr_trp_wr_g),
+        .clk (clk),
+        .se  (se),
+        .si  (),
+        .so  ()
+);
+
+dff_s #(1) dff_itag_acc_sel_g (
+        .din (isfsr_trp_wr_m | ifu_tlu_immu_miss_m),
+        .q   (itag_acc_sel_g),
+        .clk (clk),
+        .se  (se),
+        .si  (),
+        .so ()
+);
+
+assign tlu_itag_acc_sel_g = itag_acc_sel_g;
+
+// terms below can be made common. (grape)
+// recoded for timing - flush qualification moved to mmu_ctl
+assign	immu_sfsr_trp_wr[0] = 
+            isfsr_trp_wr_g & inst_vld_nf_g & thread0_rsel_g;
+assign	immu_sfsr_trp_wr[1] = 
+            isfsr_trp_wr_g & inst_vld_nf_g & thread1_rsel_g;
+assign	immu_sfsr_trp_wr[2] = 
+            isfsr_trp_wr_g & inst_vld_nf_g & thread2_rsel_g;
+assign	immu_sfsr_trp_wr[3] = 
+            isfsr_trp_wr_g & inst_vld_nf_g & thread3_rsel_g;
+
+assign	isfsr_ftype_m[6] = isfsr_ftype_sel[2];
+assign	isfsr_ftype_m[5] = isfsr_ftype_sel[1];
+assign	isfsr_ftype_m[4:1] = 4'b0000;
+assign	isfsr_ftype_m[0] = isfsr_ftype_sel[0];
+//
+// modified due to timing
+dff_s #(8) dff_isfsr_stgg (
+    .din ({isfsr_ftype_m[6:0],isfsr_flt_vld_m}), // pstate_priv, 
+	.q   ({isfsr_ftype_g[6:0],isfsr_flt_vld_g}), // pstate_priv_g,
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+// Can we remove the excessive bits in isfsr ?
+// Do jmpl/rtrn define the asi in i or dsfsr ? seems only jmpl_rtrn mem_addr_not_aligned
+// traps set the asi and that too in the dsfsr
+// Need to add ctxt !!!
+
+assign isfsr_ctxt_g[1:0] =
+	       trp_lvl_zero ? 2'b00 : 2'b10;
+
+dff_s #(1) dff_thread_tl_zero_m (
+    .din (thread_tl_zero),
+    .q   (thread_tl_zero_m),
+    .clk (clk),
+    .se  (se),
+    .si (),
+    .so ()
+);
+
+dff_s #(1) dff_thread_tl_zero_g (
+    .din (thread_tl_zero_m),
+    .q   (thread_tl_zero_g),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+assign isfsr_asi_g[7:0] = 
+           thread_tl_zero_g ? 8'h80 : 8'h04; 
+//
+// modified for bug 3323
+assign	tlu_isfsr_din_g[23:0] =
+	{isfsr_asi_g[7:0],2'b0,isfsr_ftype_g[6:0],1'b0,isfsr_ctxt_g[1:0],2'b0,isfsr_flt_vld_g,1'b1};
+
+assign	dmmu_va_oor_m = exu_tlu_va_oor_m & ~pstate_am & memref_m & ~lsu_tlu_squash_va_oor_m;
+
+dff_s #(3) dff_dsfsr_stgg (
+    .din ({dmmu_va_oor_m,// memref_m,
+           exu_tlu_misalign_addr_jmpl_rtn_m,
+	       lsu_tlu_misalign_addr_ldst_atm_m}),
+    .q   ({dmmu_va_oor_g,
+	       misalign_addr_jmpl_rtn_g,
+           misalign_addr_ldst_atm_g}),
+    .clk (clk),
+    .se  (se),
+    .si (),
+    .so ()
+);
+
+//=========================================================================================
+//	GLOBAL REGISTER SWITCHING
+//=========================================================================================
+// modified for bug 3827
+//
+assign agp_tid_sel = 
+           (dnrtry_inst_g) | (tlu_gl_rw_g & wsr_inst_g); 
+assign	agp_tid_g[1:0] = 
+            agp_tid_sel ? thrid_g[1:0] : trap_tid_g[1:0]; 
+
+dff_s #(2) dff_tlu_agp_tid_w2 (
+    .din (agp_tid_g[1:0]),
+    .q   (agp_tid_w2[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+//
+// added for timing
+dff_s #(2) dff_agp_tid_w3 (
+    .din (agp_tid_w2[1:0]),
+    .q   (agp_tid_w3[1:0]),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_agp_tid_w2[1:0]  = agp_tid_w2[1:0];
+assign tlu_exu_agp_tid[1:0] = agp_tid_w3[1:0];
+
+//=========================================================================================
+//	CWP/CCR restoration
+//=========================================================================================
+// code moved to tlu_misctl
+/*
+dff_s #(8) dff_ccr_stgm (
+    .din (tsa_rdata_ccr[7:0]),
+    .q   (tlu_exu_ccr_m[7:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(3) dff_cwp_stgm (
+    .din (tsa_rdata_cwp[2:0]),
+    .q   (tlu_exu_cwp_m[2:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(8) dff_lsu_asi_m (
+    .din (tsa_rdata_asi[7:0]),
+    .q   (tlu_lsu_asi_m[7:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+    );
+*/
+//
+
+assign	tlu_exu_tid_m[1:0] = thrid_m[1:0];
+
+assign	tlu_int_tid_m[1:0] = tlu_exu_tid_m[1:0];
+assign	tlu_lsu_tid_m[1:0] = tlu_exu_tid_m[1:0];
+
+// modified due to timing violations
+assign	tlu_lsu_asi_update_m = tlu_exu_cwpccr_update_m;
+
+// Assumption is that this will be transmitted in the equivalent
+// of the w-stage from the exu.
+assign	cwp_cmplt0 = ~exu_tlu_cwp_cmplt_tid[1] & ~exu_tlu_cwp_cmplt_tid[0] 
+			& exu_tlu_cwp_cmplt;
+assign	cwp_cmplt1 = ~exu_tlu_cwp_cmplt_tid[1] &  exu_tlu_cwp_cmplt_tid[0] 
+			& exu_tlu_cwp_cmplt;
+assign	cwp_cmplt2 =  exu_tlu_cwp_cmplt_tid[1] & ~exu_tlu_cwp_cmplt_tid[0] 
+			& exu_tlu_cwp_cmplt;
+assign	cwp_cmplt3 =  exu_tlu_cwp_cmplt_tid[1] &  exu_tlu_cwp_cmplt_tid[0] 
+			& exu_tlu_cwp_cmplt;
+
+
+assign	pending_dntry0_taken = cwp_cmplt0_pending & pending_thrd0_event_taken;
+assign	pending_dntry1_taken = cwp_cmplt1_pending & pending_thrd1_event_taken;
+assign	pending_dntry2_taken = cwp_cmplt2_pending & pending_thrd2_event_taken;
+assign	pending_dntry3_taken = cwp_cmplt3_pending & pending_thrd3_event_taken; 
+
+// Any pending cwp change completes.
+// ** This equation can be optimized in terms of gate count **
+assign	cwp_cmplt_g = 
+		pending_dntry0_taken | pending_dntry1_taken |
+		pending_dntry2_taken | pending_dntry3_taken;
+
+// A cwp change related to retry completes.
+assign	cwp_cmplt_rtry_g = 
+		(cwp_cmplt0_pending & pending_thrd0_event_taken & cwp_retry0) | 
+		(cwp_cmplt1_pending & pending_thrd1_event_taken & cwp_retry1) | 
+		(cwp_cmplt2_pending & pending_thrd2_event_taken & cwp_retry2) | 
+		(cwp_cmplt3_pending & pending_thrd3_event_taken & cwp_retry3); 
+// 
+
+dff_s #(2) dff_ccmplt_stgw2 (
+    .din ({cwp_cmplt_g,cwp_cmplt_rtry_g}),
+    .q   ({cwp_cmplt_w2,cwp_cmplt_rtry_w2}),
+    .clk (clk),
+    .se  (se),       
+    .si  (),          
+    .so  ()
+);
+
+//=========================================================================================
+//	Generate SSCAN data 
+//=========================================================================================
+// 
+assign sscan_tid_sel[`TLU_THRD_NUM-1:0] = ctu_sscan_tid[`TLU_THRD_NUM-1:0]; 
+/* 
+// logic moved to tlu_misctl
+// generating write indicators of ttype to the tsa
+assign sscan_tt_wr_sel[0] = 
+           tsa_ttype_en & tsa_wr_vld[1] & thread0_wtrp_w2; 
+assign sscan_tt_wr_sel[1] = 
+           tsa_ttype_en & tsa_wr_vld[1] & thread1_wtrp_w2; 
+assign sscan_tt_wr_sel[2] = 
+           tsa_ttype_en & tsa_wr_vld[1] & thread2_wtrp_w2; 
+assign sscan_tt_wr_sel[3] = 
+           tsa_ttype_en & tsa_wr_vld[1] & thread3_wtrp_w2;
+//
+// generating read indicators of ttype from the tsa
+assign sscan_tt_rd_sel[0] = 
+           tsa_rd_vld_m & thread0_rsel_m; 
+assign sscan_tt_rd_sel[1] = 
+           tsa_rd_vld_m & thread1_rsel_m; 
+assign sscan_tt_rd_sel[2] = 
+           tsa_rd_vld_m & thread2_rsel_m; 
+assign sscan_tt_rd_sel[3] = 
+           tsa_rd_vld_m & thread3_rsel_m; 
+
+assign sscan_ttype_en[0] = 
+           sscan_tt_rd_sel[0] | sscan_tt_wr_sel[0]; 
+assign sscan_ttype_en[1] = 
+           sscan_tt_rd_sel[1] | sscan_tt_wr_sel[1]; 
+assign sscan_ttype_en[2] = 
+           sscan_tt_rd_sel[2] | sscan_tt_wr_sel[2]; 
+assign sscan_ttype_en[3] = 
+           sscan_tt_rd_sel[3] | sscan_tt_wr_sel[3]; 
+//
+assign sscan_tt0_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[0]) ? 
+            final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] : tsa_rdata_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt1_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[1]) ? 
+            final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] : tsa_rdata_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt2_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[2]) ? 
+            final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] : tsa_rdata_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+assign sscan_tt3_din[`TSA_TTYPE_WIDTH-1:0] = 
+           (sscan_tt_wr_sel[3]) ? 
+            final_ttype_w2[`TSA_TTYPE_WIDTH-1:0] : tsa_rdata_ttype_m[`TSA_TTYPE_WIDTH-1:0];  
+//
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt0_data (
+    .din (sscan_tt0_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt0_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[0]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt1_data (
+    .din (sscan_tt1_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt1_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[1]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt2_data (
+    .din (sscan_tt2_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt2_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[2]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dffe_s #(`TSA_TTYPE_WIDTH) dffe_sscan_tt3_data (
+    .din (sscan_tt3_din[`TSA_TTYPE_WIDTH-1:0]), 
+    .q   (sscan_tt3_data[`TSA_TTYPE_WIDTH-1:0]),
+    .en  (sscan_ttype_en[3]), 
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s #(`TSA_TTYPE_WIDTH) dff_tsa_rdata_ttype_m (
+    .din (tsa_rdata_ttype[`TSA_TTYPE_WIDTH-1:0]), 
+	.q   (tsa_rdata_ttype_m[`TSA_TTYPE_WIDTH-1:0]),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_tsa_rd_vld_e ( 
+    .din (tsa_rd_vld),
+	.q   (tsa_rd_vld_e),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+dff_s dff_tsa_rd_vld_m (
+    .din (tsa_rd_vld_e),
+	.q   (tsa_rd_vld_m),
+    .clk (clk),
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// modified - due to sscan_tt[0-3]_data moved to tlu_misctl
+mux4ds #(`TCL_SSCAN_WIDTH) mx_sscan_test_data (
+       .in0  ({trp_lvl0[2:0],sscan_tt0_data[`TSA_TTYPE_WIDTH-1:0]}),
+       .in1  ({trp_lvl1[2:0],sscan_tt1_data[`TSA_TTYPE_WIDTH-1:0]}),
+       .in2  ({trp_lvl2[2:0],sscan_tt2_data[`TSA_TTYPE_WIDTH-1:0]}),
+       .in3  ({trp_lvl3[2:0],sscan_tt3_data[`TSA_TTYPE_WIDTH-1:0]}),
+       .sel0 (sscan_tid_sel[0]),
+       .sel1 (sscan_tid_sel[1]),
+       .sel2 (sscan_tid_sel[2]),
+       .sel3 (sscan_tid_sel[3]),
+       .dout (tcl_sscan_test_data[`TCL_SSCAN_WIDTH-1:0])
+); 
+*/
+
+mux4ds #(`TCL_SSCAN_WIDTH) mx_sscan_test_data (
+       .in0  (trp_lvl0[2:0]),
+       .in1  (trp_lvl1[2:0]),
+       .in2  (trp_lvl2[2:0]),
+       .in3  (trp_lvl3[2:0]),
+       .sel0 (sscan_tid_sel[0]),
+       .sel1 (sscan_tid_sel[1]),
+       .sel2 (sscan_tid_sel[2]),
+       .sel3 (sscan_tid_sel[3]),
+       .dout (tcl_sscan_test_data[`TCL_SSCAN_WIDTH-1:0])
+); 
+
+assign tlu_sscan_tcl_data[`TCL_SSCAN_WIDTH-1:0] = 
+           tcl_sscan_test_data[`TCL_SSCAN_WIDTH-1:0]; 
+
+//=========================================================================================
+//	Instrumentation signals created for sas 
+//=========================================================================================
+// 
+// synopsys translate_off
+wire [`TSA_TTYPE_WIDTH-1:0] sas_final_ttype_g;
+wire [`TSA_TTYPE_WIDTH-1:0] sas_adj_lsu_ttype_m2;
+wire [6:0] sas_hwint_swint_ttype;
+wire [`TSA_TTYPE_WIDTH-3:0] sas_rst_ttype_g;
+
+mux4ds #(`TSA_TTYPE_WIDTH) mx_sas_final_ttype_g (
+    .sel0 (final_ttype_sel_g[0]),
+    .sel1 (final_ttype_sel_g[1]),
+    .sel2 (final_ttype_sel_g[2]),
+    .sel3 (final_ttype_sel_g[3]),
+    .in0  ({2'b0,sas_rst_ttype_g[`TSA_TTYPE_WIDTH-3:0]}),
+    .in1  (early_sync_ttype_g[`TSA_TTYPE_WIDTH-1:0]),
+    .in2  (sas_adj_lsu_ttype_m2[`TSA_TTYPE_WIDTH-1:0]),
+    .in3  (pending_ttype[`TSA_TTYPE_WIDTH-1:0]),
+    .dout (sas_final_ttype_g[`TSA_TTYPE_WIDTH-1:0])
+); 
+
+mux3ds #(`TSA_TTYPE_WIDTH) mx_sas_adj_lsu_ttype_m2 (
+    .sel0 (lsu_defr_trap_g),
+    .sel1 (va_oor_data_acc_excp_g & ~lsu_defr_trap_g),
+    .sel2 (~(va_oor_data_acc_excp_g | lsu_defr_trap_g)),
+    .in0  (9'h032),
+    .in1  (9'h030),
+    .in2  (lsu_tlu_ttype_m2),
+    .dout (sas_adj_lsu_ttype_m2[`TSA_TTYPE_WIDTH-1:0])
+); 
+
+assign sas_hwint_swint_ttype[6:0] =
+       (hwint_g)? `HWINT_INT:
+       (cpu_mondo_trap_g)? `CPU_MONDO_TRAP:
+       (dev_mondo_trap_g)? `DEV_MONDO_TRAP:
+        {3'b100, tlu_sftint_id[3:0]};
+
+assign sas_rst_ttype_g[`TSA_TTYPE_WIDTH-3:0] =
+       (rst_ttype_sel[0])? {4'b00,reset_id_g[2:0]}:
+       (rst_ttype_sel[1])? wrap_tlz_ttype[6:0]: 
+        sas_hwint_swint_ttype[6:0];
+
+// synopsys translate_on
+endmodule
Index: /trunk/T1-CPU/tlu/sparc_tlu_intctl.v
===================================================================
--- /trunk/T1-CPU/tlu/sparc_tlu_intctl.v	(revision 6)
+++ /trunk/T1-CPU/tlu/sparc_tlu_intctl.v	(revision 6)
@@ -0,0 +1,588 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_tlu_intctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: sparc_tlu_intctl
+//  Description:        
+//    Contains the code for receiving interrupts from the crossbar,
+//    and sending interrupts out to other processors through the corssbar.
+//    The interrupt receive register (INRR, asi=0x49/VA=0),  incoming
+//    vector register (INVR, asi=0x7f/VA=0x40), and interrupt vector
+//    dispatch register (INDR, asi=0x77/VA=0) are implemented in this
+//    block.  This block also initiates thread reset/wake up when a
+//    reset packet is received.  
+//
+*/
+
+`include "iop.h"
+
+// from intdp.v for now
+`define INT_THR_HI  12
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include "tlu.h"
+
+module sparc_tlu_intctl(/*AUTOARG*/
+   // Outputs
+   so, int_rst_l, tlu_ifu_hwint_i3, tlu_ifu_rstthr_i2, tlu_ifu_rstint_i2, 
+   tlu_ifu_nukeint_i2, tlu_ifu_resumint_i2, tlu_ifu_pstate_ie, 
+   int_tlu_longop_done,  inc_ind_ld_int_i1, inc_indr_req_valid, 
+   inc_ind_rstthr_i1, // inc_ind_asi_thr, inc_ind_asi_wr_inrr, 
+   // inc_ind_asi_rd_invr, inc_ind_asi_inrr, inc_ind_asi_wr_indr, 
+   inc_ind_indr_grant, inc_ind_thr_m, tlu_lsu_int_ld_ill_va_w2,
+   inc_indr_req_thrid, tlu_asi_data_nf_vld_w2, tlu_asi_rdata_mxsel_g, 
+   // Inputs
+   // modified to abide to the Niagara reset methodology 
+   // clk, se, si, reset, const_cpuid, lsu_tlu_cpx_vld, lsu_tlu_cpx_req, 
+   rclk, se, sehold, si, rst_tri_en, arst_l, grst_l, const_cpuid,  
+   lsu_tlu_cpx_vld, lsu_tlu_cpx_req, lsu_tlu_pcxpkt_ack, tlu_ld_data_vld_g, 
+   ind_inc_thrid_i1, ind_inc_type_i1, tlu_int_asi_vld, tlu_int_asi_load, 
+   tlu_int_asi_store, tlu_int_asi_thrid, tlu_int_asi_state, tlu_int_tid_m, 
+   tlu_int_pstate_ie, int_pending_i2_l, // indr_inc_rst_pkt, tlu_int_redmode,  
+   tlu_asi_queue_rd_vld_g, tlu_va_ill_g); // tlu_flush_all_w2
+
+//
+// modified to abide to the Niagara reset methodology 
+//   input            clk, se, si, reset;
+   input            rclk, se, si;
+   input            arst_l, grst_l;  
+   input            sehold; 
+   input            rst_tri_en; 
+   input [3:0] 	    const_cpuid;
+
+   input 	    lsu_tlu_cpx_vld;    // cpx from lsu
+   input [3:0] 	    lsu_tlu_cpx_req;    // cpx req type
+   // the flush bit is included in lsu_tlu_cpx_vld
+   // input 	    lsu_tlu_cpx_nc;
+   input 	    lsu_tlu_pcxpkt_ack;
+   
+// removed unused pins
+// input [`INT_THR_HI:0] lsu_tlu_st_rs3_data_g;
+// input 	    lsu_tlu_pmode;
+// input [3:0] 	tlu_int_sftint_pend;
+   
+   input [4:0] 	ind_inc_thrid_i1; // connect to lsu_tlu_intpkt[12:8]
+   input [1:0]	ind_inc_type_i1;  // connect to lsu_tlu_intpkt[16]
+
+   input 	    tlu_int_asi_vld;
+   input 	    tlu_int_asi_load;  // read enable
+   input 	    tlu_int_asi_store; // write enable
+   input [1:0] 	tlu_int_asi_thrid; // thread making asi request
+   input [7:0] 	tlu_int_asi_state; // asi to be read/written
+   // input        tlu_scpd_rd_vld_g; // rdata vld from scratchpad
+   // removed no longer necessary 
+   // input        tlu_va_all_zero_g; // va address - all zero 
+   input        tlu_va_ill_g;      // illega va range 
+   input        tlu_asi_queue_rd_vld_g; // rdata vld from asi queues
+   input        tlu_ld_data_vld_g; // rdata vld from asi queues
+   // input        tlu_flush_all_w2;  // flush pipe from tcl 
+
+   input [1:0] 	tlu_int_tid_m;
+   
+   input [3:0] 	tlu_int_pstate_ie;
+   // input [3:0] 	tlu_int_redmode;
+   
+   // from int_dp
+   input [3:0] 	    int_pending_i2_l;   // uncleared interrupt
+   // input 	    indr_inc_rst_pkt;
+   // added for timing
+   // input [1:0]	lsu_tlu_rst_pkt;
+
+   output 	    int_rst_l, so;
+
+   // to ifu
+   output [3:0]     tlu_ifu_hwint_i3;   // interrupt
+   output [3:0]     tlu_ifu_rstthr_i2;  // reset, nuke or resume
+   output 	    tlu_ifu_rstint_i2;  // reset msg
+   output           tlu_ifu_nukeint_i2; // idle/suspend message
+   output 	    tlu_ifu_resumint_i2;// resume message
+   output [3:0]	    tlu_ifu_pstate_ie;
+   
+   output [3:0]     int_tlu_longop_done;
+// 
+// removed - IFU will derive the signal locally
+//   output [3:0]     tlu_ifu_int_activate_i3;// wake up signal for thread
+   
+   // to int_dp
+   output [3:0]     inc_ind_ld_int_i1;          // ld new interrupt
+   output [3:0]     inc_ind_rstthr_i1;          // ld new rst vector
+   
+   // convert the signal back to non-inverting version for grape
+   // output [3:0]     inc_ind_asi_thr_l;          // choose asi op thread
+   // output [3:0]     inc_ind_asi_thr;          // choose asi op thread
+   // output [3:0]     inc_ind_asi_wr_inrr;        // write to INRR (per thread)
+   // output [3:0]     inc_ind_asi_wr_indr;        // write to INDR
+   // output [3:0]     inc_ind_asi_rd_invr;        // read INVR and 
+                                                // reset corr. bit in INRR
+   // obsolete output
+   // output 	    inc_ind_asi_inrr;           // choose which reg to read
+   // convert the signal back to non-inverting version for grape
+   // output [3:0]     inc_ind_indr_grant_l;       // move on to next pcx pkt
+   output [3:0]     inc_ind_indr_grant;       // move on to next pcx pkt
+   // convert the signal back to non-inverting version for grape
+   // output [3:0]     inc_ind_thr_m_l;            // M stage thread
+   output [3:0]     inc_ind_thr_m;            // M stage thread
+   
+   // pcx pkt fields
+   output 	    inc_indr_req_valid;     // valid bit for PCX int pkt
+   output [1:0] inc_indr_req_thrid;     // thread sending pcx int pkt
+
+   // to tlu
+   // output tlu_lsu_int_ldxa_vld_w2;  // valid asi data from int or scpd 
+   output tlu_asi_data_nf_vld_w2;  // valid asi data from int or scpd 
+   output tlu_lsu_int_ld_ill_va_w2; // illega va range - load  
+   // to intdp
+   output [3:0] tlu_asi_rdata_mxsel_g; // mux selects to the asi rdata
+
+   // local signals
+   // wire indr_inc_rst_pkt;
+   wire inc_ind_asi_inrr;           // choose which reg to read
+   wire	int_tlu_asi_data_vld_g, int_tlu_asi_data_vld_w2;
+   wire	int_ld_ill_va_g, int_ld_ill_va_w2;
+   wire hw_int_i1,
+		rst_int_i1,
+		nuke_int_i1,
+		resum_int_i1;
+
+   wire [3:0] 	    int_thr_i1,
+		    rstthr_i1,
+		    asi_thr;
+
+   wire [3:0] 	    int_pending_i2;
+// 		    int_activate_i2;
+   
+   wire 	    asi_write, 
+		    asi_read,
+		    asi_invr,
+		    asi_indr;
+   
+   wire [3:0] 	    indr_vld,
+		    indr_rst,
+		    indr_vld_next,
+		    indr_grant;
+
+   // added for bug 3945
+   wire [3:0] indr_req_vec;
+   wire indr_req_valid_disable;
+
+   // wire [3:0] 	    int_or_redrst;
+   wire [3:0] 	    intd_done;
+
+   // wire 	    red_thread, valid_dest;
+   wire 	    local_rst;  // local reset 
+   wire 	    local_rst_l;  // local reset 
+   wire 	    clk;        // local clk 
+   
+
+   //
+   // Code Starts Here
+   //
+   //=========================================================================================
+   //	reset
+   //=========================================================================================
+
+   dffrl_async dffrl_local_rst_l(
+       .din  (grst_l),
+       .clk  (clk),
+       .rst_l(arst_l),
+       .q    (local_rst_l),
+       .se   (se),
+       .si   (),
+       .so   ()
+   ); 
+   assign local_rst = ~local_rst_l;
+   assign int_rst_l = local_rst_l;
+
+   // create local clk
+   assign clk = rclk; 
+
+   //-------------------------------------
+   // Basic Operation
+   //-------------------------------------
+   sink s1(const_cpuid[3]);
+
+   assign  tlu_ifu_pstate_ie = tlu_int_pstate_ie;
+
+   // process cpx interrupt type
+   // int = 00
+   // the flush bit from cpx packet is now included in the
+   // lsu_tlu_cpx_vld qualification
+   assign  hw_int_i1 = (lsu_tlu_cpx_vld &
+			// (lsu_tlu_cpx_req == `INT_RET) & ~lsu_tlu_cpx_nc &
+			(lsu_tlu_cpx_req == `INT_RET) & 
+			(ind_inc_thrid_i1[4:2] == const_cpuid[2:0])) ?
+			 ~ind_inc_type_i1[1] & ~ind_inc_type_i1[0] :
+	                 1'b0;
+   //reset = 01
+   // the flush bit from cpx packet is now included in the
+   // lsu_tlu_cpx_vld qualification
+   assign  rst_int_i1 = (lsu_tlu_cpx_vld &
+			 // (lsu_tlu_cpx_req == `INT_RET) && ~lsu_tlu_cpx_nc &
+			 (lsu_tlu_cpx_req == `INT_RET) &
+			 (ind_inc_thrid_i1[4:2] == const_cpuid[2:0])) ?
+			  ~ind_inc_type_i1[1] & ind_inc_type_i1[0] :
+	                  1'b0;
+   // idle/nuke = 10
+   // the flush bit from cpx packet is now included in the
+   // lsu_tlu_cpx_vld qualification
+   assign  nuke_int_i1 = (lsu_tlu_cpx_vld &
+			   // (lsu_tlu_cpx_req == `INT_RET) & ~lsu_tlu_cpx_nc &
+			   (lsu_tlu_cpx_req == `INT_RET) & 
+			   (ind_inc_thrid_i1[4:2] == const_cpuid[2:0])) ?
+			    ind_inc_type_i1[1] & ~ind_inc_type_i1[0] :
+	                    1'b0;
+   // resume = 11
+   // the flush bit from cpx packet is now included in the
+   // lsu_tlu_cpx_vld qualification
+   assign  resum_int_i1 = (lsu_tlu_cpx_vld &
+			   // (lsu_tlu_cpx_req == `INT_RET) & ~lsu_tlu_cpx_nc &
+			   (lsu_tlu_cpx_req == `INT_RET) & 
+			   (ind_inc_thrid_i1[4:2] == const_cpuid[2:0])) ?
+			    ind_inc_type_i1[1] & ind_inc_type_i1[0] :
+	                    1'b0;
+
+   dffr_s #1  rstint_ff(.din  (rst_int_i1),
+		      .q    (tlu_ifu_rstint_i2),
+		      .clk  (clk),
+//
+// modified to abide to the Niagara reset methodology 
+//		      .rst  (reset),
+		      .rst  (local_rst),
+		      .se   (se), .si(), .so());
+   
+   dffr_s #1  nukint_ff(.din  (nuke_int_i1),
+		      .q    (tlu_ifu_nukeint_i2),
+		      .clk  (clk),
+//
+// modified to abide to the Niagara reset methodology 
+//		      .rst  (reset),
+		      .rst  (local_rst),
+		      .se   (se), .si(), .so());
+   
+   dffr_s #1  resint_ff(.din  (resum_int_i1),
+		      .q    (tlu_ifu_resumint_i2),
+		      .clk  (clk),
+//
+// modified to abide to the Niagara reset methodology 
+//		      .rst  (reset),
+		      .rst  (local_rst),
+		      .se   (se), .si(), .so());
+   
+   // decode int thread id
+   assign  int_thr_i1[0] = ~ind_inc_thrid_i1[1] & ~ind_inc_thrid_i1[0];
+   assign  int_thr_i1[1] = ~ind_inc_thrid_i1[1] &  ind_inc_thrid_i1[0];
+   assign  int_thr_i1[2] =  ind_inc_thrid_i1[1] & ~ind_inc_thrid_i1[0];
+   assign  int_thr_i1[3] =  ind_inc_thrid_i1[1] &  ind_inc_thrid_i1[0];
+
+   assign  inc_ind_ld_int_i1 = {4{hw_int_i1}} & int_thr_i1;
+   assign  inc_ind_rstthr_i1 = {4{rst_int_i1}} & int_thr_i1;
+   assign  rstthr_i1 = {4{rst_int_i1 | nuke_int_i1 | resum_int_i1}} 
+	                & int_thr_i1;
+
+   // decode thr_m
+   // convert the signal back to non-inverting version for grape
+   /*
+   assign  inc_ind_thr_m_l[0] = ~(~tlu_int_tid_m[1] & ~tlu_int_tid_m[0]);
+   assign  inc_ind_thr_m_l[1] = ~(~tlu_int_tid_m[1] &  tlu_int_tid_m[0]);
+   assign  inc_ind_thr_m_l[2] = ~( tlu_int_tid_m[1] & ~tlu_int_tid_m[0]);
+   assign  inc_ind_thr_m_l[3] = ~( tlu_int_tid_m[1] &  tlu_int_tid_m[0]);
+   */
+
+   assign  inc_ind_thr_m[0] = ~tlu_int_tid_m[1] & ~tlu_int_tid_m[0];
+   assign  inc_ind_thr_m[1] = ~tlu_int_tid_m[1] &  tlu_int_tid_m[0];
+   assign  inc_ind_thr_m[2] =  tlu_int_tid_m[1] & ~tlu_int_tid_m[0];
+   assign  inc_ind_thr_m[3] =  tlu_int_tid_m[1] &  tlu_int_tid_m[0];
+   
+
+   // Interrupt continues to be signalled even 1 cycle after read is
+   // done.  This should not be a problem, since the lsu will probably
+   // burn one cycle to complete the read by forwarding it to the reg
+   // file.  Otherwise, just burn another cycle in the IFU before
+   // starting the thread (this is also done right now).
+
+   assign  int_pending_i2 = ~int_pending_i2_l;
+
+   // removed IFU will derive the siganl locally
+   /*
+   assign  int_activate_i2 = ~int_pending_i2_l | tlu_int_sftint_pend;
+   // send message to SWL to wake up thread if it is halted
+   dff_s #4 act_signal_reg(.din (int_activate_i2[3:0]),
+			 .q   (tlu_ifu_int_activate_i3[3:0]),
+			 .clk (clk),
+			 .se  (se), .si(), .so());
+   */
+   
+   // ask IFU to schedule interrupt
+   dff_s #4 int_signal_reg(.din (int_pending_i2[3:0]),
+			 .q   (tlu_ifu_hwint_i3[3:0]),
+			 .clk (clk),
+			 .se  (se), .si(), .so());
+
+   dff_s #4 rst_signal_reg(.din (rstthr_i1[3:0]),
+			 .q   (tlu_ifu_rstthr_i2[3:0]),
+			 .clk (clk),
+			 .se  (se), .si(), .so());
+
+
+   //----------------------------------
+   // ASI Registers
+   //----------------------------------
+   //ASI_INTR_RECEIVE: 0x72
+   //ASI_UDB_INTR_W: 0x73
+   //ASI_UDB_INTR_R: 0x74
+   //ASI_MESSAGE_MASK: 0x7D
+
+   // decode asi thread
+   assign  asi_thr[0] = ~tlu_int_asi_thrid[1] & ~tlu_int_asi_thrid[0];
+   assign  asi_thr[1] = ~tlu_int_asi_thrid[1] &  tlu_int_asi_thrid[0];
+   assign  asi_thr[2] =  tlu_int_asi_thrid[1] & ~tlu_int_asi_thrid[0];
+   assign  asi_thr[3] =  tlu_int_asi_thrid[1] &  tlu_int_asi_thrid[0];
+
+   // convert the signal back to non-inverting version for grape
+   // assign  inc_ind_asi_thr_l = ~asi_thr;
+   // assign  inc_ind_asi_thr = asi_thr;
+   
+   // read or write op
+   assign  asi_write = tlu_int_asi_vld & tlu_int_asi_store;
+   assign  asi_read = tlu_int_asi_vld & tlu_int_asi_load;
+
+   // decode asi target
+   // ASI_INTR_RECEIVE
+   assign inc_ind_asi_inrr = ~tlu_int_asi_state[7] &
+	      tlu_int_asi_state[6]  &
+	      tlu_int_asi_state[5] &
+	      tlu_int_asi_state[4] &
+	      ~tlu_int_asi_state[3]  &
+	      ~tlu_int_asi_state[2] &
+	      tlu_int_asi_state[1] &
+	      ~tlu_int_asi_state[0];      // 0x72
+
+   // need to also check if VA=0x40
+   // what else is mapped to this asi?
+   // ASI_UDB_INTR_R
+   assign asi_invr = ~tlu_int_asi_state[7] &
+	      tlu_int_asi_state[6]  &
+	      tlu_int_asi_state[5]  &
+	      tlu_int_asi_state[4]  &
+	      ~tlu_int_asi_state[3]  &
+	      tlu_int_asi_state[2]  &
+	      ~tlu_int_asi_state[1]  &
+	      ~tlu_int_asi_state[0];      // 0x74
+
+   // VA<63:19>=0 is not checked
+   // ASI_UDB_INTR_W
+   assign asi_indr = ~tlu_int_asi_state[7] &
+	      tlu_int_asi_state[6]  &
+	      tlu_int_asi_state[5]  &
+	      tlu_int_asi_state[4]  &
+	      ~tlu_int_asi_state[3] &
+	      ~tlu_int_asi_state[2]  &
+	      tlu_int_asi_state[1]  &
+	      tlu_int_asi_state[0];      // 0x73
+   /*	       
+   // ASI_MESSAGE_MASK_REG
+   // not implemented any more
+   assign  inc_ind_asi_wr_inrr = asi_thr & {4{inc_ind_asi_inrr & asi_write}};
+   assign  inc_ind_asi_wr_indr = asi_thr & {4{asi_indr & asi_write}};
+   assign  inc_ind_asi_rd_invr = asi_thr & {4{asi_invr & asi_read}};
+
+   assign  red_thread = (tlu_int_redmode[0] & asi_thr[0] |
+			 tlu_int_redmode[1] & asi_thr[1] |
+			 tlu_int_redmode[2] & asi_thr[2] |
+			 tlu_int_redmode[3] & asi_thr[3]);
+   */
+   // modified for bug 2109 
+   // modified for one-hot mux problem and support of macro test
+   // 
+   assign tlu_asi_rdata_mxsel_g[0] = 
+              asi_invr & ~(rst_tri_en | sehold); 
+   assign tlu_asi_rdata_mxsel_g[1] = 
+              inc_ind_asi_inrr & ~(rst_tri_en | asi_invr | sehold);
+   assign tlu_asi_rdata_mxsel_g[2] = 
+              ~((|tlu_asi_rdata_mxsel_g[1:0]) | tlu_asi_rdata_mxsel_g[3]);
+   assign tlu_asi_rdata_mxsel_g[3] = 
+              tlu_asi_queue_rd_vld_g & ~(rst_tri_en | asi_invr | sehold |
+              inc_ind_asi_inrr); 
+   // 
+   assign int_tlu_asi_data_vld_g = 
+          ((asi_invr | inc_ind_asi_inrr) & asi_read) | tlu_ld_data_vld_g; 
+            
+
+   dffr_s dffr_int_tlu_asi_data_vld_w2 (
+    .din (int_tlu_asi_data_vld_g),
+    .q   (int_tlu_asi_data_vld_w2),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (1'b0),       
+    .si  (),          
+    .so  ()
+);
+
+// modified for timing
+// assign tlu_lsu_int_ldxa_vld_w2 = 
+//            int_tlu_asi_data_vld_w2 & ~tlu_flush_all_w2;
+
+assign tlu_asi_data_nf_vld_w2 = 
+            int_tlu_asi_data_vld_w2; 
+   // 
+   // illegal va range
+   //
+   /*
+   assign int_ld_ill_va_g = 
+          ((asi_invr | inc_ind_asi_inrr) & asi_read &
+            ~tlu_va_all_zero_g) | tlu_va_ill_g; 
+   */
+   assign int_ld_ill_va_g = tlu_va_ill_g; 
+
+   dffr_s dffr_tlu_lsu_int_ld_ill_va_w2 (
+    .din (int_ld_ill_va_g),
+    // .q   (tlu_lsu_int_ld_ill_va_w2),
+    .q   (int_ld_ill_va_w2),
+    .clk (clk),
+    .rst (local_rst),
+    .se  (1'b0),       
+    .si  (),          
+    .so  ()
+);
+
+assign tlu_lsu_int_ld_ill_va_w2 = int_ld_ill_va_w2;
+   // Write to INDR
+   // Can send reset pkt's only in red mode
+   // modified for timing
+   // modified for bug3170
+   /*
+   assign int_or_redrst[3:0] = 
+              ({4{~indr_inc_rst_pkt}} | tlu_int_redmode[3:0]) & 
+			    asi_thr[3:0];
+
+   assign indr_vld_next[3:0] = 
+              inc_ind_asi_wr_indr[3:0] & int_or_redrst[3:0] |  // set
+	          indr_vld[3:0] & ~indr_rst[3:0];         // reset
+   // 
+   // original code
+   assign indr_vld_next[3] = 
+              (asi_indr & asi_write & asi_thr[3] & 
+              (~(|lsu_tlu_rst_pkt[1:0]) | tlu_int_redmode[3])) |
+              (indr_vld[3] & ~indr_rst[3]); 
+
+   assign indr_vld_next[2] = 
+              (asi_indr & asi_write & asi_thr[2] & 
+              (~(|lsu_tlu_rst_pkt[1:0]) | tlu_int_redmode[2])) |
+              (indr_vld[2] & ~indr_rst[2]); 
+
+   assign indr_vld_next[1] = 
+              (asi_indr & asi_write & asi_thr[1] & 
+              (~(|lsu_tlu_rst_pkt[1:0]) | tlu_int_redmode[1])) |
+              (indr_vld[1] & ~indr_rst[1]); 
+
+   assign indr_vld_next[0] = 
+              (asi_indr & asi_write & asi_thr[0] & 
+              (~(|lsu_tlu_rst_pkt[1:0]) | tlu_int_redmode[0])) |
+              (indr_vld[0] & ~indr_rst[0]); 
+   */
+   assign indr_vld_next[3] = 
+              (asi_indr & asi_write & asi_thr[3]) |
+              (indr_vld[3] & ~indr_rst[3]); 
+
+   assign indr_vld_next[2] = 
+              (asi_indr & asi_write & asi_thr[2]) |
+              (indr_vld[2] & ~indr_rst[2]); 
+
+   assign indr_vld_next[1] = 
+              (asi_indr & asi_write & asi_thr[1]) |
+              (indr_vld[1] & ~indr_rst[1]); 
+
+   assign indr_vld_next[0] = 
+              (asi_indr & asi_write & asi_thr[0]) |
+              (indr_vld[0] & ~indr_rst[0]); 
+
+   dff_s #4 indr_vld_reg(.din (indr_vld_next[3:0]),
+		       .q   (indr_vld[3:0]),
+		       .clk (clk),
+		       .se  (se), .si(), .so());
+   // 
+   // modified for bug 3945
+   dffr_s dffr_indr_req_valid_disable(
+       .din (|indr_vld[3:0]),
+	   .q   (indr_req_valid_disable),
+	   .clk (clk),
+	   .rst  (local_rst | lsu_tlu_pcxpkt_ack), 
+	   .se  (se), 
+       .si(), 
+       .so());
+
+   dffe_s #(4) dffe_indr_req_vec(
+       .din (indr_vld_next[3:0]),
+	   .q   (indr_req_vec[3:0]),
+       .en  (~indr_req_valid_disable),
+	   .clk (clk),
+	   .se  (se), 
+       .si(), 
+       .so());
+   
+   // Round robin scheduler for indr request to pcx
+   sparc_ifu_rndrob  indr_sched(
+       // .req_vec (indr_vld[3:0]),
+       .req_vec (indr_req_vec[3:0]),
+	   .advance (lsu_tlu_pcxpkt_ack),
+	   .rst_tri_enable (rst_tri_en),
+	   .clk (clk),
+	   .reset  (local_rst),
+	   .se  (se),
+	   .si (si),
+	   .grant_vec (indr_grant[3:0]),
+	   .so ());
+
+// convert the signal back to non-inverting version for grape
+// modified to fix one-hot indetermination
+     assign  inc_ind_indr_grant[0] = 
+                 ~(|inc_ind_indr_grant[3:1]);
+     assign  inc_ind_indr_grant[1] = 
+                 indr_grant[1]; 
+     assign  inc_ind_indr_grant[2] = 
+                 indr_grant[2] & ~indr_grant[1];
+     assign  inc_ind_indr_grant[3] = 
+                 indr_grant[3] & ~(|inc_ind_indr_grant[2:1]);
+//
+   assign  indr_rst[3:0] = 
+               {4{local_rst}} | (indr_grant[3:0] & {4{lsu_tlu_pcxpkt_ack}});
+   assign  intd_done[3:0] = 
+               (indr_grant[3:0] & indr_vld[3:0] & {4{lsu_tlu_pcxpkt_ack}});
+
+   dffr_s #(4) intd_reg(
+       .din (intd_done[3:0]),
+	   .q   (int_tlu_longop_done[3:0]),
+	   .clk (clk),
+	   .rst  (local_rst),
+	   .se  (se), 
+       .si(), 
+       .so());
+
+   // INDR pcx request control signals
+   // modified for bug 3945
+   // assign  inc_indr_req_valid = (|indr_vld[3:0]) & ~lsu_tlu_pcxpkt_ack;
+   assign  inc_indr_req_valid = indr_req_valid_disable;
+   assign  inc_indr_req_thrid[1] = indr_grant[3] | indr_grant[2];
+   assign  inc_indr_req_thrid[0] = indr_grant[3] | indr_grant[1];
+   
+endmodule // sparc_tlu_intctl
Index: /trunk/T1-CPU/spu/spu_mast.v
===================================================================
--- /trunk/T1-CPU/spu/spu_mast.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_mast.v	(revision 6)
@@ -0,0 +1,335 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_mast.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:   state machine to do stores to L2. 
+*/
+////////////////////////////////////////////////////////////////////////
+
+module spu_mast (
+
+/*outputs*/
+spu_mast_maaddr_addrinc,
+spu_mast_memren,
+spu_mast_stbuf_wen,
+spu_mast_mpa_addrinc,
+spu_mast_streq,
+
+spu_mast_done_set,
+/*inputs*/
+
+spu_mactl_iss_pulse_dly,
+mactl_stop,
+streq_ack,
+len_neqz,
+
+spu_wen_allma_stacks_ok,
+
+spu_mactl_perr_set,
+
+spu_mactl_stxa_force_abort,
+
+se,
+reset,
+rclk);
+
+
+input reset;
+input rclk;
+input se;
+
+input spu_mactl_iss_pulse_dly;
+input mactl_stop;
+input streq_ack;
+input len_neqz;
+
+input spu_wen_allma_stacks_ok;
+
+input spu_mactl_perr_set;
+
+input spu_mactl_stxa_force_abort;
+// -----------------------------------------------------------------
+
+output spu_mast_maaddr_addrinc;
+output spu_mast_memren;
+output spu_mast_stbuf_wen;
+output spu_mast_mpa_addrinc;
+output spu_mast_streq;
+output spu_mast_done_set;
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+wire spu_mast_st_done,tr2rdmem_frm_wait4stdrain;
+
+wire ok_to_signal_cmplt;
+
+wire start_set;
+wire spu_mast_allow_rdmem;
+
+wire [1:0] rd_cntr_add,rd_cntr_q;
+
+wire tr2laststreq_frm_wait4stdrain;
+
+wire local_kill_abort;
+// -----------------------------------------------------------------
+wire streq_ack_dly;
+dff_s    #(1) streq_ack_ff (
+        .din(streq_ack) ,
+        .q(streq_ack_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// we need a state set to indcate st is done, and when an
+// masync gets issued later, then the load asi is returned.
+wire spu_mast_done_wen = (spu_mast_st_done | local_kill_abort) & mactl_stop;
+wire spu_mast_done_rst = reset | spu_mactl_iss_pulse_dly;
+
+wire spu_mast_done_set_q;
+
+dffre_s    #(1) spu_mast_done_ff (
+        .din(1'b1) , 
+        .q(spu_mast_done_set_q),
+        .en(spu_mast_done_wen),
+        .rst(spu_mast_done_rst), .clk (rclk), .se(se), .si(), .so()); 
+
+assign spu_mast_done_set = spu_mast_done_set_q & ok_to_signal_cmplt;
+
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+
+// added the following dly to fix bug5212. I had added a flop to lsu_spu_ldst_ack to
+// the logic to increment the store req in spu_wen ack_cmplt counter to prevent
+// introducing a timing path. Now in the case if an ma_store has a length 1, then
+// the done_set gets asserted a cycle before the store ack incrementer increments.
+// So now i have to delay the done_set by a cycle so that the incrementer has
+// seen a store request by that time and the counter is no longer zero.
+wire spu_mast_done_wen_dly;
+dff_s    #(1) spu_mast_done_wen_ff (
+        .din(spu_mast_done_wen) ,
+        .q(spu_mast_done_wen_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+dffre_s    #(1) spu_mast_done_stack_ff (
+        .din(1'b1) ,
+        .q(spu_mast_done_set_stack),
+        .en(spu_mast_done_wen_dly),
+        .rst(spu_mast_done_rst), .clk (rclk), .se(se), .si(), .so());
+
+assign ok_to_signal_cmplt = spu_wen_allma_stacks_ok & spu_mast_done_set_stack;
+
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+wire  state_reset = reset | local_kill_abort;
+// -------------------------------------------------------------------------
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) ,
+        .q(cur_idle_state),
+        .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) rdmem_state_ff (
+        .din(nxt_rdmem_state) ,
+        .q(cur_rdmem_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) wait4stdrain_state_ff (
+        .din(nxt_wait4stdrain_state) , 
+        .q(cur_wait4stdrain_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) laststreq_state_ff (
+        .din(nxt_laststreq_state) ,
+        .q(cur_laststreq_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire start_stop = spu_mactl_iss_pulse_dly & mactl_stop;
+
+// -------------------------------------------------------------------------
+//  transition to idle state.
+
+/*
+assign spu_mast_st_done = cur_wait4stdrain_state & streq_ack & 
+				(~len_neqz | spu_mactl_stxa_force_abort);
+*/
+
+
+assign spu_mast_st_done = 
+			//((cur_wait4stdrain_state & ~len_neqz & start_set) |
+			((cur_wait4stdrain_state & ~len_neqz & rd_cntr_q[0]) |
+			  cur_laststreq_state) & streq_ack ; 
+
+
+assign  nxt_idle_state = (
+                         state_reset | spu_mast_st_done |
+                         (cur_idle_state & ~start_stop));
+
+
+wire tr2rdmem_frm_idle = cur_idle_state & start_stop;
+
+// this delay is because spu_mast_memren is based on nxt_rdmem_state
+// and it happens before cur_idle_state goes to zero.
+wire dly_tr2rdmem_frm_idle;
+dff_s    #(1) dly_tr2rdmem_frm_idle_ff (
+        .din(tr2rdmem_frm_idle) ,
+        .q(dly_tr2rdmem_frm_idle),
+        .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+//  transition to rdmem state.
+
+
+assign tr2rdmem_frm_wait4stdrain = cur_wait4stdrain_state & 
+					(streq_ack | spu_mast_allow_rdmem) & 
+					len_neqz ;
+
+assign  nxt_rdmem_state = (
+			 (dly_tr2rdmem_frm_idle) |
+			 (tr2rdmem_frm_wait4stdrain));
+
+// -------------------------------------------------------------------------
+//  transition to wait4stdrain state.
+
+assign nxt_wait4stdrain_state = (
+                         cur_rdmem_state |
+			 (cur_wait4stdrain_state & ~(streq_ack | (spu_mast_allow_rdmem & 
+									len_neqz)) ));
+
+// -------------------------------------------------------------------------
+//  transition to laststreq state.
+
+assign tr2laststreq_frm_wait4stdrain = cur_wait4stdrain_state & streq_ack & ~len_neqz &
+						//~start_set;
+						~rd_cntr_q[0];
+
+assign nxt_laststreq_state = (
+			 tr2laststreq_frm_wait4stdrain |
+			 (cur_laststreq_state & ~streq_ack) );
+
+wire tr2laststreq_frm_wait4stdrain_dly;
+dff_s    #(1) tr2laststreq_frm_wait4stdrain_ff (
+        .din(tr2laststreq_frm_wait4stdrain) ,
+        .q(tr2laststreq_frm_wait4stdrain_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+assign spu_mast_maaddr_addrinc = cur_rdmem_state;
+
+//assign spu_mast_memren = nxt_rdmem_state;
+assign spu_mast_memren = cur_rdmem_state & ~local_kill_abort;
+
+wire cur_rdmem_state_dly;
+dff_s    #(1) cur_rdmem_state_ff (
+        .din(cur_rdmem_state) ,
+        .q(cur_rdmem_state_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire cur_rdmem_state_dly2,cur_rdmem_state_dly3;
+dff_s    #(2) cur_rdmem_state_dly_ff (
+        .din({cur_rdmem_state_dly,cur_rdmem_state_dly2}) ,
+        .q({cur_rdmem_state_dly2,cur_rdmem_state_dly3}),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mast_stbuf_wen = cur_rdmem_state_dly;
+
+
+// cannot use cur_rdmem_state to start the request since the data will
+// not be in the store buffer till the next cyle after mem rd.
+//assign spu_mast_streq = cur_wait4stdrain_state | cur_rdmem_state;
+
+
+//assign spu_mast_streq = cur_wait4stdrain_state & ~spu_mactl_dly_streq &
+assign spu_mast_streq = ((cur_rdmem_state_dly3 & start_set & ~rd_cntr_q[1]) |
+				(streq_ack_dly & len_neqz) |
+				(tr2laststreq_frm_wait4stdrain_dly) )&
+				~cur_idle_state & ~spu_mactl_perr_set &
+				~spu_mactl_stxa_force_abort;
+			// when perr is asserted
+			// the state machine to goto idle. but due to above eq, len is not zero and
+			// whith streq_ack it will continue doing streq and hence the st_ack counter keeps incr.
+
+
+assign local_kill_abort = ((cur_rdmem_state_dly3 & start_set & ~rd_cntr_q[1]) |
+                                (streq_ack_dly & len_neqz) |
+                                (tr2laststreq_frm_wait4stdrain_dly) )&
+				(spu_mactl_perr_set | spu_mactl_stxa_force_abort);
+
+
+wire tr2rdmem_frm_wait4stdrain_dly;
+dff_s    #(1) tr2rdmem_frm_wait4stdrain_ff (
+        .din(tr2rdmem_frm_wait4stdrain) ,
+        .q(tr2rdmem_frm_wait4stdrain_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire tr2rdmem_frm_wait4stdrain_dly2;
+dff_s    #(1) tr2rdmem_frm_wait4stdrain_dly_ff (
+        .din(tr2rdmem_frm_wait4stdrain_dly) ,
+        .q(tr2rdmem_frm_wait4stdrain_dly2),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mast_mpa_addrinc = tr2rdmem_frm_wait4stdrain_dly2;
+
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// cntr to do an extra st req.
+
+wire rd_cntr_en = cur_rdmem_state;
+
+wire rd_cntr_rst = state_reset | streq_ack_dly | start_stop;
+
+assign rd_cntr_add[1:0] = rd_cntr_q[1:0] + 2'b01;
+
+dffre_s  #(2) rd_cntr_ff (
+        .din(rd_cntr_add[1:0]) ,
+        .q(rd_cntr_q[1:0]),
+        .en(rd_cntr_en),
+        .rst(rd_cntr_rst), .clk (rclk), .se(se), .si(), .so());
+
+
+dffre_s  #(1) start_stop_ff (
+        .din(1'b1) ,
+        .q(start_set),
+        .en(start_stop),
+        .rst(state_reset | streq_ack_dly), .clk (rclk), .se(se), .si(), .so());
+
+
+/*
+assign  spu_mast_allow_rdmem = (start_set & ~rd_cntr_q[1] & cur_rdmem_state_dly3) | 
+				(~start_set & rd_cntr_q[0]) ;
+*/
+
+
+assign  spu_mast_allow_rdmem = (start_set & ~rd_cntr_q[1] & cur_rdmem_state_dly3) ; 
+//------------------------------------------------------------------
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_maaeqb.v
===================================================================
--- /trunk/T1-CPU/spu/spu_maaeqb.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_maaeqb.v	(revision 6)
@@ -0,0 +1,725 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_maaeqb.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:   state machine to do MA mul/acc/shf when
+//				A = B. 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+module spu_maaeqb (
+
+
+/*outputs*/
+spu_maaeqb_memren,
+spu_maaeqb_memwen,
+
+spu_maaeqb_rst_iptr,
+spu_maaeqb_rst_jptr,
+spu_maaeqb_incr_iptr,
+spu_maaeqb_incr_jptr,
+
+spu_maaeqb_a_rd_oprnd_sel,
+spu_maaeqb_ax_rd_oprnd_sel,
+spu_maaeqb_m_rd_oprnd_sel,
+spu_maaeqb_me_rd_oprnd_sel,
+spu_maaeqb_n_rd_oprnd_sel,
+spu_maaeqb_m_wr_oprnd_sel,
+spu_maaeqb_me_wr_oprnd_sel,
+
+spu_maaeqb_iminus1_ptr_sel,
+spu_maaeqb_j_ptr_sel,
+spu_maaeqb_iminusj_ptr_sel,
+spu_maaeqb_iminuslenminus1_sel,
+spu_maaeqb_irshft_sel,
+spu_maaeqb_jjptr_wen,
+
+spu_maaeqb_oprnd2_wen,
+spu_maaeqb_oprnd2_bypass,
+spu_maaeqb_a_leftshft,
+spu_maaeqb_oprnd1_mxsel,
+spu_maaeqb_oprnd1_wen,
+
+spu_maaeqb_mul_req_vld,
+spu_maaeqb_mul_areg_shf,
+spu_maaeqb_mul_acc,
+spu_maaeqb_mul_areg_rst,
+spu_maaeqb_mul_done,
+
+spu_maaeqb_jjptr_sel,
+
+/*inputs*/
+spu_mactl_mulop,
+
+spu_maaddr_iequtwolenplus2,
+spu_maaddr_iequtwolenplus1,
+spu_maaddr_jequiminus1,
+spu_maaddr_jequlen,
+spu_maaddr_halfpnt_set,
+spu_mactl_iss_pulse_dly,
+
+
+mul_spu_ack,
+mul_spu_shf_ack,
+
+spu_maexp_start_mulred_aequb,
+
+spu_mactl_expop,
+
+
+spu_maaddr_jequiminus1rshft,
+spu_maaddr_iequtwolen,
+spu_maaddr_ieven,
+spu_maaddr_ieq0,
+
+spu_maaddr_aequb,
+
+spu_mactl_kill_op,
+spu_mactl_stxa_force_abort,
+
+se,
+reset,
+rclk);
+
+// ---------------------------------------------------------------
+input reset;
+input rclk;
+input se;
+
+input spu_maaddr_iequtwolenplus2;
+input spu_maaddr_iequtwolenplus1;
+input spu_maaddr_jequiminus1;
+input spu_maaddr_jequlen;
+input spu_maaddr_halfpnt_set;
+
+input mul_spu_ack;
+input mul_spu_shf_ack;
+input spu_mactl_mulop;
+input spu_mactl_iss_pulse_dly;
+
+
+input spu_maexp_start_mulred_aequb;
+
+input spu_mactl_expop;
+
+
+
+input spu_maaddr_jequiminus1rshft;
+input spu_maaddr_iequtwolen;
+input spu_maaddr_ieven;
+input spu_maaddr_ieq0;
+
+
+input spu_maaddr_aequb;
+
+input spu_mactl_kill_op;
+input spu_mactl_stxa_force_abort;
+
+// ---------------------------------------------------------------
+
+output spu_maaeqb_memwen;
+output spu_maaeqb_memren;
+output spu_maaeqb_rst_iptr;
+output spu_maaeqb_rst_jptr;
+output spu_maaeqb_incr_iptr;
+output spu_maaeqb_incr_jptr;
+
+output spu_maaeqb_a_rd_oprnd_sel;
+output spu_maaeqb_ax_rd_oprnd_sel;
+output spu_maaeqb_m_rd_oprnd_sel;
+output spu_maaeqb_me_rd_oprnd_sel;
+output spu_maaeqb_n_rd_oprnd_sel;
+output spu_maaeqb_m_wr_oprnd_sel;
+output spu_maaeqb_me_wr_oprnd_sel;
+
+output spu_maaeqb_iminus1_ptr_sel;
+output spu_maaeqb_j_ptr_sel;
+output spu_maaeqb_iminusj_ptr_sel;
+output spu_maaeqb_iminuslenminus1_sel;
+output spu_maaeqb_irshft_sel;
+output spu_maaeqb_jjptr_wen;
+
+output spu_maaeqb_oprnd2_wen;
+output spu_maaeqb_oprnd2_bypass;
+output spu_maaeqb_a_leftshft;
+output [1:0] spu_maaeqb_oprnd1_mxsel;
+output spu_maaeqb_oprnd1_wen;
+
+output spu_maaeqb_mul_req_vld;
+output spu_maaeqb_mul_areg_shf;
+output spu_maaeqb_mul_acc;
+output spu_maaeqb_mul_areg_rst;
+output spu_maaeqb_mul_done;
+
+output spu_maaeqb_jjptr_sel;
+// ---------------------------------------------------------------
+wire tr2mwrite_frm_accumshft_pre;
+wire tr2mwrite_frm_accumshft;
+wire spu_maaeqb_rd_aj,spu_maaeqb_rd_mj,
+	spu_maaeqb_rd_niminusj,spu_maaeqb_rd_ai,
+	spu_maaeqb_wr_mi,spu_maaeqb_wr_miminuslenminus1,
+	spu_maaeqb_rd_n0;
+
+wire spu_maaeqb_rd_aiminusj;
+
+wire tr2accumshft_frm_mwrite;
+wire tr2accumshft_frm_iloopn;
+wire nxt_mwrite_state;
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+wire local_stxa_abort = nxt_mwrite_state & spu_mactl_stxa_force_abort;
+
+wire state_reset = reset | spu_mactl_kill_op | local_stxa_abort;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) , 
+        .q(cur_idle_state),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopa_state_ff (
+        .din(nxt_jloopa_state) , 
+        .q(cur_jloopa_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) ijloopa_state_ff (
+        .din(nxt_ijloopa_state) , 
+        .q(cur_ijloopa_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopn_state_ff (
+        .din(nxt_jloopn_state) , 
+        .q(cur_jloopn_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopm_state_ff (
+        .din(nxt_jloopm_state) , 
+        .q(cur_jloopm_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) iloopa1_state_ff (
+        .din(nxt_iloopa1_state) ,
+        .q(cur_iloopa1_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) iloopa_state_ff (
+        .din(nxt_iloopa_state) , 
+        .q(cur_iloopa_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+
+dffr_s  #(1) nprime_state_ff (
+        .din(nxt_nprime_state) , 
+        .q(cur_nprime_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) mwrite_state_ff (
+        .din(nxt_mwrite_state) , 
+        .q(cur_mwrite_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) iloopn_state_ff (
+        .din(nxt_iloopn_state) , 
+        .q(cur_iloopn_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) accumshft_state_ff (
+        .din(nxt_accumshft_state) , 
+        .q(cur_accumshft_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+
+wire spu_maaddr_aequb_q;
+dff_s  #(1) spu_maaddr_aequb_ff (
+        .din(spu_maaddr_aequb) ,
+        .q(spu_maaddr_aequb_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+// ---------------------------------------------------------------
+// 5 cycle delay for mul result coming back.
+// ---------------------------------------------------------------
+
+wire tr2mwrite_frm_jloopn = cur_jloopn_state & mul_spu_ack & spu_maaddr_halfpnt_set &
+                                spu_maaddr_jequlen;
+
+
+wire mul_result_c0,mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4,mul_result_c5;
+
+//assign mul_result_c0 = (cur_nprime_state & mul_spu_ack & ~spu_maaddr_halfpnt_set) |
+assign mul_result_c0 = (cur_nprime_state & mul_spu_ack) |
+                        ( tr2mwrite_frm_jloopn );
+
+
+dffr_s  #(5) mul_res_ff (
+        .din({mul_result_c0,mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4}) ,
+        .q({mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4,mul_result_c5}),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// ---------------------------------------------------------------
+wire tr2idle_frm_accumshft = cur_accumshft_state & spu_maaddr_iequtwolenplus2 &
+                                mul_spu_shf_ack;
+
+
+wire spu_maaeqb_mul_done_pre = tr2idle_frm_accumshft;
+
+wire spu_maaeqb_mul_done_q;
+dff_s #(1) muldone_dly_ff (
+        .din(spu_maaeqb_mul_done_pre) , 
+        .q(spu_maaeqb_mul_done_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+assign spu_maaeqb_mul_done = spu_maaeqb_mul_done_q | local_stxa_abort;
+
+
+assign spu_maaeqb_rst_iptr = tr2idle_frm_accumshft;
+
+
+// ----------------------------------------------------------------
+// transition to idle state 
+
+wire mulop_start = (spu_mactl_iss_pulse_dly & spu_mactl_mulop & spu_maaddr_aequb_q) | 
+				spu_maexp_start_mulred_aequb;
+
+assign spu_maaeqb_mul_areg_rst = mulop_start;
+
+
+assign  nxt_idle_state = (
+                         state_reset |
+                         tr2idle_frm_accumshft |
+                         (cur_idle_state & ~mulop_start));
+
+
+// ----------------------------------------------------------------
+// transition to jloopa state(rdA[j])
+
+wire tr2jloopa_frm_ijloopa = cur_ijloopa_state & mul_spu_ack & ~spu_maaddr_jequiminus1rshft;
+
+wire tr2jloopa_frm_accumshft = cur_accumshft_state & ~spu_maaddr_iequtwolenplus2 &
+                                ~spu_maaddr_iequtwolenplus1 & ~spu_maaddr_iequtwolen &
+				mul_spu_shf_ack;
+
+wire tr2jloopa_frm_accumshft_dly;
+dffr_s #(1) tr2jloopa_frm_accumshft_dly_ff (
+        .din(tr2jloopa_frm_accumshft) ,
+        .q(tr2jloopa_frm_accumshft_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+assign nxt_jloopa_state = (
+                          tr2jloopa_frm_ijloopa |
+                          tr2jloopa_frm_accumshft_dly );
+
+//assign spu_maaeqb_rd_aj = nxt_jloopa_state;
+assign spu_maaeqb_rd_aj = (cur_ijloopa_state & ~spu_maaddr_jequiminus1rshft) |
+				tr2jloopa_frm_accumshft_dly;
+
+// ----------------------------------------------------------------
+// transition to jloopa state(rdA[i-j])
+
+
+assign nxt_ijloopa_state = (
+                          cur_jloopa_state |
+                          (cur_ijloopa_state & ~mul_spu_ack));
+
+assign spu_maaeqb_a_leftshft = cur_ijloopa_state;
+
+//assign spu_maaeqb_rd_aiminusj = nxt_ijloopa_state | cur_ijloopa_state;
+assign spu_maaeqb_rd_aiminusj = cur_jloopa_state;
+
+
+// ----------------------------------------------------------------
+// transition to iloopa state(rdA[i/2])
+
+wire tr2iloopa1_frm_ijloopa = cur_ijloopa_state & mul_spu_ack & spu_maaddr_ieven &
+				spu_maaddr_jequiminus1rshft;
+
+wire tr2iloopa1_frm_accumshft = spu_maaddr_ieven & cur_accumshft_state & mul_spu_shf_ack &
+			//(spu_maaddr_iequtwolenplus1 | spu_maaddr_iequtwolenplus2 |
+			(spu_maaddr_iequtwolenplus1 | 
+			 spu_maaddr_iequtwolen); 
+
+wire tr2iloopa1_frm_accumshft_dly;
+dffr_s #(1) tr2iloopa1_frm_accumshft_dly_ff (
+        .din(tr2iloopa1_frm_accumshft) ,
+        .q(tr2iloopa1_frm_accumshft_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+wire tr2iloopa1_frm_idle = cur_idle_state & mulop_start;
+
+wire tr2iloopa1_frm_idle_dly;
+dffr_s #(1) tr2iloopa1_frm_idle_ff (
+        .din(tr2iloopa1_frm_idle) ,
+        .q(tr2iloopa1_frm_idle_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+
+assign nxt_iloopa1_state = (
+                          tr2iloopa1_frm_accumshft_dly |
+                          tr2iloopa1_frm_ijloopa |
+                          tr2iloopa1_frm_idle_dly) ;
+
+wire cur_iloopa1_state_dly;
+dffr_s #(1) cur_iloopa1_state_dly_ff (
+        .din(cur_iloopa1_state) ,
+        .q(cur_iloopa1_state_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+assign nxt_iloopa_state = (
+			  cur_iloopa1_state_dly |
+                          (cur_iloopa_state & ~mul_spu_ack));
+
+
+
+//assign spu_maaeqb_rd_ai = cur_iloopa1_state | nxt_iloopa_state | cur_iloopa_state; 
+
+assign spu_maaeqb_rd_ai = (cur_ijloopa_state & spu_maaddr_ieven & spu_maaddr_jequiminus1rshft) |
+				tr2iloopa1_frm_idle_dly |
+	//(cur_accumshft_state & spu_maaddr_ieven & (spu_maaddr_iequtwolenplus1 | spu_maaddr_iequtwolen))	|
+		tr2iloopa1_frm_accumshft_dly |
+// above are for iloopa1 and below are for iloopa.
+	(cur_iloopa1_state_dly);
+
+// ----------------------------------------------------------------
+// transition to jloopm state(rdM[j])
+
+wire tr2jloopm_frm_ijloopa = cur_ijloopa_state & mul_spu_ack & ~spu_maaddr_ieven &
+				spu_maaddr_jequiminus1rshft;
+
+// the following is needed to reset jptr on the transition
+// from ijloopa to jloopm.
+wire tr2jloopm_frm_ijloopa_dly;
+dffr_s #(1) tr2jloopm_frm_ijloopa_dly_ff (
+        .din(tr2jloopm_frm_ijloopa) ,
+        .q(tr2jloopm_frm_ijloopa_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+wire tr2jloopm_frm_iloopa = cur_iloopa_state & mul_spu_ack & ~spu_maaddr_ieq0 ;
+wire tr2jloopm_frm_iloopa_dly;
+dffr_s #(1) tr2jloopm_frm_iloopa_dly_ff (
+        .din(tr2jloopm_frm_iloopa) ,
+        .q(tr2jloopm_frm_iloopa_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+wire tr2jloopm_frm_jloopn = cur_jloopn_state & mul_spu_ack &
+                ((~spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set) |
+                (~spu_maaddr_jequlen & spu_maaddr_halfpnt_set)) ;
+
+assign nxt_jloopm_state = (
+			  tr2jloopm_frm_jloopn |
+			  tr2jloopm_frm_ijloopa_dly |
+			  tr2jloopm_frm_iloopa_dly);
+
+
+//assign spu_maaeqb_rd_mj = nxt_jloopm_state;
+assign spu_maaeqb_rd_mj = tr2jloopm_frm_ijloopa_dly | tr2jloopm_frm_iloopa_dly |
+			cur_jloopn_state & 
+                ((~spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set) |
+                (~spu_maaddr_jequlen & spu_maaddr_halfpnt_set))	;
+
+
+
+// ----------------------------------------------------------------
+// transition to jloopn state(rdN[j])
+
+assign nxt_jloopn_state = (
+                          cur_jloopm_state |
+                          (cur_jloopn_state & ~mul_spu_ack));
+
+assign spu_maaeqb_jjptr_wen = cur_jloopa_state | cur_jloopm_state;
+assign spu_maaeqb_incr_jptr = tr2jloopa_frm_ijloopa | tr2jloopm_frm_jloopn;
+
+assign spu_maaeqb_jjptr_sel = cur_ijloopa_state | cur_jloopn_state;
+
+
+//assign spu_maaeqb_rd_niminusj = nxt_jloopn_state;
+assign spu_maaeqb_rd_niminusj = cur_jloopm_state;
+
+
+// ----------------------------------------------------------------
+// transition to nprime state
+
+wire tr2nprime_frm_jloopn = cur_jloopn_state & mul_spu_ack &
+                spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set;
+
+wire tr2nprime_frm_iloopa = cur_iloopa_state & mul_spu_ack & spu_maaddr_ieq0;
+
+assign nxt_nprime_state = (
+			  tr2nprime_frm_jloopn |
+			  tr2nprime_frm_iloopa |
+                          (cur_nprime_state & ~mul_spu_ack));
+
+
+
+
+// the following is to reset jptr on the 1st half.
+wire tr2nprime_frm_jloopn_dly;
+dffr_s #(1) tr2nprime_frm_jloopn_dly_ff (
+        .din(tr2nprime_frm_jloopn) ,
+        .q(tr2nprime_frm_jloopn_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+// ----------------------------------------------------------------
+// transition to mwrite state
+
+
+assign tr2mwrite_frm_accumshft_pre = cur_accumshft_state & mul_spu_shf_ack &
+                                spu_maaddr_iequtwolenplus1;
+
+// delaying for one cycle to allow time to do i ptr increment
+// and calculate i-len-1(M[i-len-1]).This is due to skipping jloop on last
+// i iteration, not enough time to do both.
+dffr_s #(1) tr2mwrite_frm_accumshft_ff (
+        .din(tr2mwrite_frm_accumshft_pre) ,
+        .q(tr2mwrite_frm_accumshft),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+assign nxt_mwrite_state = (
+                          tr2mwrite_frm_accumshft |
+                          (mul_result_c5));
+
+
+//assign spu_maaeqb_memwen = nxt_mwrite_state;
+
+wire spu_maaeqb_wr_mi_oprnd2_wenbyp = nxt_mwrite_state & ~spu_maaddr_halfpnt_set;
+wire spu_maaeqb_wr_miminuslenminus1_oprnd2_wenbyp = nxt_mwrite_state & spu_maaddr_halfpnt_set;
+
+
+// ---------------------------------------------------------------
+// transition to iloopn state
+
+assign nxt_iloopn_state = (
+                          (cur_mwrite_state & ~spu_maaddr_halfpnt_set) |
+                          (cur_iloopn_state & ~mul_spu_ack));
+
+
+//assign spu_maaeqb_rd_n0 =  nxt_iloopn_state | cur_iloopn_state; 
+assign spu_maaeqb_rd_n0 =  cur_mwrite_state; 
+
+// ---------------------------------------------------------------
+// transition to accumshft state
+
+assign tr2accumshft_frm_mwrite = cur_mwrite_state & spu_maaddr_halfpnt_set;
+assign tr2accumshft_frm_iloopn = cur_iloopn_state & mul_spu_ack;
+
+assign nxt_accumshft_state = (
+                          tr2accumshft_frm_mwrite |
+                          tr2accumshft_frm_iloopn |
+                          (cur_accumshft_state & ~mul_spu_shf_ack));
+
+assign spu_maaeqb_incr_iptr = tr2accumshft_frm_mwrite | tr2accumshft_frm_iloopn;
+
+dff_s  #(1) memwen_dly_ff (
+        .din(spu_maaeqb_incr_iptr) ,
+        .q(spu_maaeqb_memwen),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaeqb_wr_mi = spu_maaeqb_memwen & ~spu_maaddr_halfpnt_set;
+assign spu_maaeqb_wr_miminuslenminus1 = spu_maaeqb_memwen & spu_maaddr_halfpnt_set;
+
+// ---------------------------------------------------------------
+
+wire cur_accumshft_pulse,cur_accumshft_q;
+
+dff_s  #(1) cur_accumshft_pulse_ff (
+        .din(cur_accumshft_state) ,
+        .q(cur_accumshft_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign cur_accumshft_pulse = ~cur_accumshft_q & cur_accumshft_state;
+
+
+
+
+assign spu_maaeqb_rst_jptr = mulop_start | tr2nprime_frm_jloopn_dly |  
+				tr2jloopm_frm_ijloopa | tr2iloopa1_frm_ijloopa |
+			(cur_accumshft_pulse &
+                        spu_maaddr_halfpnt_set & ~spu_maaddr_iequtwolenplus2 &
+                        ~spu_maaddr_iequtwolenplus1);
+
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// send selects to spu_maaddr.v
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+assign spu_maaeqb_memren = spu_maaeqb_rd_aj | 
+                spu_maaeqb_rd_aiminusj |
+                spu_maaeqb_rd_mj | 
+                spu_maaeqb_rd_niminusj |
+                spu_maaeqb_rd_ai | spu_maaeqb_rd_n0;    
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+assign spu_maaeqb_a_rd_oprnd_sel = (spu_maaeqb_rd_aj | spu_maaeqb_rd_ai | 
+					spu_maaeqb_rd_aiminusj) & ~spu_mactl_expop ;
+assign spu_maaeqb_ax_rd_oprnd_sel = (spu_maaeqb_rd_aj | spu_maaeqb_rd_ai | 
+					spu_maaeqb_rd_aiminusj) & spu_mactl_expop ;
+
+assign spu_maaeqb_m_rd_oprnd_sel = spu_maaeqb_rd_mj & ~spu_mactl_expop;
+assign spu_maaeqb_me_rd_oprnd_sel = spu_maaeqb_rd_mj & spu_mactl_expop  ;
+
+
+
+assign spu_maaeqb_n_rd_oprnd_sel = (spu_maaeqb_rd_niminusj & ~spu_maaeqb_rd_mj) | 
+					spu_maaeqb_rd_n0;
+
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+assign spu_maaeqb_m_wr_oprnd_sel = (spu_maaeqb_wr_mi | spu_maaeqb_wr_miminuslenminus1) &
+                                                ~spu_mactl_expop;
+assign spu_maaeqb_me_wr_oprnd_sel = (spu_maaeqb_wr_mi | spu_maaeqb_wr_miminuslenminus1) &
+                                                spu_mactl_expop;
+
+
+
+wire spu_maaeqb_m_wr_oprnd2_wen = (spu_maaeqb_wr_mi_oprnd2_wenbyp | 
+					spu_maaeqb_wr_miminuslenminus1_oprnd2_wenbyp) &
+                                                ~spu_mactl_expop;
+wire spu_maaeqb_me_wr_oprnd2_wen = (spu_maaeqb_wr_mi_oprnd2_wenbyp | 
+					spu_maaeqb_wr_miminuslenminus1_oprnd2_wenbyp) &
+                                                spu_mactl_expop;
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+assign spu_maaeqb_iminus1_ptr_sel = spu_maaeqb_wr_mi;
+
+assign spu_maaeqb_j_ptr_sel = spu_maaeqb_rd_aj | spu_maaeqb_rd_mj;
+assign spu_maaeqb_iminusj_ptr_sel = 
+                (spu_maaeqb_rd_aiminusj | spu_maaeqb_rd_niminusj) &
+                ~(spu_maaeqb_rd_aj | spu_maaeqb_rd_mj);
+
+assign spu_maaeqb_iminuslenminus1_sel = spu_maaeqb_wr_miminuslenminus1;
+
+
+assign spu_maaeqb_irshft_sel = spu_maaeqb_rd_ai;
+
+// ---------------------------------------------------------------
+// request to mul unit when asserted
+
+wire spu_maaeqb_mul_req_vld_pre = nxt_ijloopa_state | nxt_jloopn_state |
+                                nxt_nprime_state | nxt_iloopn_state |
+                                        nxt_iloopa_state;
+dffr_s #(1) spu_maaeqb_mul_req_vld_ff (
+        .din(spu_maaeqb_mul_req_vld_pre) , 
+        .q(spu_maaeqb_mul_req_vld),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+
+/*
+assign spu_maaeqb_mul_req_vld = cur_ijloopa_state | cur_jloopn_state | 
+                                cur_nprime_state | cur_iloopn_state |
+					cur_iloopa_state;
+*/
+        
+// ---------------------------------------------------------------
+
+assign spu_maaeqb_mul_areg_shf = cur_accumshft_state;
+// ---------------------------------------------------------------
+
+/*
+wire oprnd2_sel = (spu_maaeqb_rd_aj | spu_maaeqb_rd_ai |
+                  spu_maaeqb_m_rd_oprnd_sel | spu_maaeqb_me_rd_oprnd_sel) ;
+*/
+
+//wire oprnd2_sel = nxt_jloopa_state | cur_iloopa1_state | nxt_iloopa_state | nxt_jloopm_state ;
+wire oprnd2_sel = nxt_jloopa_state | nxt_iloopa1_state | nxt_jloopm_state ;
+
+
+wire oprnd2_sel_q;
+dff_s #(1) oprnd2_wen_ff (
+        .din(oprnd2_sel) , 
+        .q(oprnd2_sel_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+assign spu_maaeqb_oprnd2_wen = oprnd2_sel_q | spu_maaeqb_m_wr_oprnd2_wen |
+                                spu_maaeqb_me_wr_oprnd2_wen ;
+
+
+
+assign spu_maaeqb_oprnd2_bypass = spu_maaeqb_m_wr_oprnd2_wen | spu_maaeqb_me_wr_oprnd2_wen ;
+
+
+//assign spu_maaeqb_oprnd1_sel = cur_nprime_state; // only select nprime if set
+
+// ---------------------------------------------------------------
+assign spu_maaeqb_mul_acc = spu_maaeqb_mul_req_vld & ~cur_nprime_state;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+wire spu_maaeqb_memrd4op1 = spu_maaeqb_rd_aiminusj | 
+				//spu_maaeqb_rd_ai |
+				cur_iloopa1_state_dly |
+				spu_maaeqb_rd_niminusj | spu_maaeqb_rd_n0;
+
+wire spu_maaeqb_memrd4op1_q;
+dff_s #(1) spu_maaeqb_memrd4op1_ff (
+        .din(spu_maaeqb_memrd4op1) ,
+        .q(spu_maaeqb_memrd4op1_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire [1:0] spu_maaeqb_oprnd1_mxsel;
+assign spu_maaeqb_oprnd1_mxsel[0] = ~cur_nprime_state & ~spu_maaeqb_memrd4op1_q;
+assign spu_maaeqb_oprnd1_mxsel[1] = ~cur_nprime_state & spu_maaeqb_memrd4op1_q;
+//assign spu_maaeqb_oprnd1_mxsel[2] = cur_nprime_state;
+
+
+assign spu_maaeqb_oprnd1_wen = spu_maaeqb_memrd4op1_q;
+
+
+
+endmodule
+
Index: /trunk/T1-CPU/spu/spu_maaddr.v
===================================================================
--- /trunk/T1-CPU/spu/spu_maaddr.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_maaddr.v	(revision 6)
@@ -0,0 +1,1153 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_maaddr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    adderss generator for MA.
+// 			MPA,MA_ADDR,LENGTH,I_PTR,J_PTR...
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+module spu_maaddr (
+
+/*outputs*/
+spu_maaddr_len_neqz,
+spu_maaddr_mpa1maddr0,
+spu_maaddr_memindx,
+spu_maaddr_mamem_eveodd_sel_l,
+spu_maaddr_iequtwolenplus2,
+spu_maaddr_iequtwolenplus1,
+spu_maaddr_jequiminus1,
+spu_maaddr_jequlen,
+
+spu_maaddr_halfpnt_set,
+
+spu_maaddr_len_eqmax,
+
+spu_maaddr_esmax,
+spu_maaddr_esmod64,
+
+spu_maaddr_nooddwr_on_leneq1,
+
+spu_maaddr_mpa_addrinc,
+spu_maaddr_mpa_wen,
+spu_maaddr_mpa_incr_val,
+
+spu_maaddr_len_cntr,
+
+spu_maaddr_wrindx_0,
+spu_maaddr_jptr_eqz_mared,
+
+/*inputs*/
+
+spu_mald_force_mpa_add16,
+
+spu_mactl_iss_pulse_pre,
+spu_mactl_iss_pulse,
+
+
+//spu_mactl_mulop,
+spu_mactl_ldop,
+spu_madp_maaddr_reg_in,
+spu_madp_mactl_reg,
+
+spu_mald_maaddr_addrinc,
+spu_mald_mpa_addrinc,
+spu_mast_maaddr_addrinc,
+spu_mast_mpa_addrinc,
+
+spu_mamul_memwen,
+
+spu_mamul_rst_iptr,
+spu_mamul_rst_jptr,
+spu_mamul_incr_iptr,
+spu_mamul_incr_jptr,
+
+spu_mamul_a_rd_oprnd_sel,
+spu_mamul_ax_rd_oprnd_sel,
+spu_mamul_b_rd_oprnd_sel,
+spu_mamul_ba_rd_oprnd_sel,
+spu_mamul_m_rd_oprnd_sel,
+spu_mamul_n_rd_oprnd_sel,
+spu_mamul_m_wr_oprnd_sel,
+
+spu_mared_me_rd_oprnd_sel,
+spu_mared_xe_wr_oprnd_sel,
+spu_mamul_me_rd_oprnd_sel,
+spu_mamul_me_wr_oprnd_sel,
+
+spu_mamul_i_ptr_sel,
+spu_mamul_iminus1_ptr_sel,
+spu_mamul_j_ptr_sel,
+spu_mamul_iminusj_ptr_sel,
+spu_mamul_iminuslenminus1_sel,
+
+spu_mamul_jjptr_wen,
+
+spu_mared_m_rd_oprnd_sel,
+spu_mared_nm_rd_oprnd_sel,
+spu_mared_x_wr_oprnd_sel,
+spu_mared_a_rd_oprnd_sel,
+spu_mared_nr_rd_oprnd_sel,
+spu_mared_r_wr_oprnd_sel,
+spu_mared_memwen,
+spu_mared_j_ptr_sel,
+spu_mared_update_jptr,
+spu_mared_rst_jptr,
+spu_mared_maxlen_wen,
+spu_mared_cin_set_4sub,
+
+spu_mast_memren,
+spu_mared_start_wen,
+spu_mared_start_sel,
+
+spu_maexp_incr_es_ptr,
+spu_maexp_e_rd_oprnd_sel,
+spu_maexp_es_max_init,
+spu_maexp_es_e_ptr_rst,
+
+spu_madp_mpa_addr_3,
+
+
+
+spu_maaddr_jequiminus1rshft,
+spu_maaddr_iequtwolen,
+spu_maaddr_ieven,
+spu_maaddr_ieq0,
+
+spu_maaddr_aequb,
+
+spu_maaeqb_memwen,
+spu_maaeqb_irshft_sel,
+
+spu_mared_update_redwr_jptr,
+spu_mared_jjptr_wen,
+
+spu_mamul_jjptr_sel,
+
+mem_bypass,
+sehold,
+
+spu_mamul_rst,
+
+se,
+reset,
+rclk);
+
+// ---------------------------------------------------------------
+input reset;
+input rclk;
+input se;
+
+
+input spu_mald_force_mpa_add16;
+
+input spu_mactl_ldop;
+//input spu_mactl_mulop;
+input [47:0] spu_madp_maaddr_reg_in;
+input [5:0] spu_madp_mactl_reg;
+
+input spu_mald_maaddr_addrinc;
+input spu_mald_mpa_addrinc;
+input spu_mast_maaddr_addrinc;
+input spu_mast_mpa_addrinc;
+
+input spu_mamul_memwen;
+input spu_mamul_rst_iptr;
+input spu_mamul_rst_jptr;
+input spu_mamul_incr_iptr;
+input spu_mamul_incr_jptr;
+
+input spu_mamul_a_rd_oprnd_sel;
+input spu_mamul_ax_rd_oprnd_sel;
+input spu_mamul_b_rd_oprnd_sel;
+input spu_mamul_ba_rd_oprnd_sel;
+input spu_mamul_m_rd_oprnd_sel;
+input spu_mamul_n_rd_oprnd_sel;
+input spu_mamul_m_wr_oprnd_sel;
+
+input spu_mared_me_rd_oprnd_sel;
+input spu_mared_xe_wr_oprnd_sel;
+input spu_mamul_me_rd_oprnd_sel;
+input spu_mamul_me_wr_oprnd_sel;
+
+input spu_mamul_i_ptr_sel;
+input spu_mamul_iminus1_ptr_sel;
+input spu_mamul_j_ptr_sel;
+input spu_mamul_iminusj_ptr_sel;
+input spu_mamul_iminuslenminus1_sel;
+
+input spu_mamul_jjptr_wen;
+
+input spu_mactl_iss_pulse_pre;
+input spu_mactl_iss_pulse;
+
+input spu_mared_m_rd_oprnd_sel;
+input spu_mared_nm_rd_oprnd_sel;
+input spu_mared_x_wr_oprnd_sel;
+input spu_mared_a_rd_oprnd_sel;
+input spu_mared_nr_rd_oprnd_sel;
+input spu_mared_r_wr_oprnd_sel;
+input spu_mared_memwen;
+input spu_mared_j_ptr_sel;
+input spu_mared_update_jptr;
+input spu_mared_rst_jptr;
+input spu_mared_maxlen_wen;
+input spu_mared_cin_set_4sub;
+
+input spu_mast_memren;
+input spu_mared_start_wen;
+input spu_mared_start_sel;
+
+input spu_maexp_incr_es_ptr;
+input spu_maexp_e_rd_oprnd_sel;
+input spu_maexp_es_max_init;
+input spu_maexp_es_e_ptr_rst;
+
+
+input spu_madp_mpa_addr_3;
+input spu_maaeqb_memwen;
+input spu_maaeqb_irshft_sel;
+
+input spu_mared_update_redwr_jptr;
+input spu_mared_jjptr_wen;
+
+input spu_mamul_jjptr_sel;
+
+input mem_bypass;
+input sehold;
+
+input spu_mamul_rst;
+// ---------------------------------------------------------------
+output spu_maaddr_len_neqz;
+
+output spu_maaddr_mpa1maddr0;
+
+output [7:1] spu_maaddr_memindx;
+output [3:0] spu_maaddr_mamem_eveodd_sel_l;
+
+output spu_maaddr_iequtwolenplus2;
+output spu_maaddr_iequtwolenplus1;
+output spu_maaddr_jequiminus1;
+output spu_maaddr_jequlen;
+
+output spu_maaddr_halfpnt_set;
+
+output spu_maaddr_len_eqmax;
+
+output spu_maaddr_esmod64;
+output spu_maaddr_esmax;
+
+output spu_maaddr_nooddwr_on_leneq1;
+
+output spu_maaddr_mpa_addrinc;
+output spu_maaddr_mpa_wen;
+output [4:0] spu_maaddr_mpa_incr_val;
+
+
+
+
+output spu_maaddr_jequiminus1rshft;
+output spu_maaddr_iequtwolen;
+output spu_maaddr_ieven;
+output spu_maaddr_ieq0;
+
+output spu_maaddr_aequb;
+
+output [5:0] spu_maaddr_len_cntr;
+
+output spu_maaddr_wrindx_0;
+output spu_maaddr_jptr_eqz_mared;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+wire reset_local = reset | spu_mactl_iss_pulse_pre;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+wire [47:0] spu_madp_maaddr_reg;
+wire [6:0] len_decr_val,len_ptr_sub_q,len_ptr_mx,len_ptr_sub;
+wire incr_2wd;
+wire [1:0] len_1or2_dec;
+
+wire [7:0] maaddr_mx,maaddr_incr_val,spu_maaddr_maaddr_add,spu_maaddr_maaddr_q;
+wire maaddr_wen;
+
+wire [6:0] j_ptr_data;
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+dffe_s  #(48) maaddr_reg_ff  (
+        .din(spu_madp_maaddr_reg_in[47:0]) ,
+        .q(spu_madp_maaddr_reg[47:0]),
+        .en(spu_mactl_iss_pulse_pre),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+// ---------------------------------------------------------------
+wire spu_maaddr_maaddr_addrinc = spu_mald_maaddr_addrinc | spu_mast_maaddr_addrinc;
+assign spu_maaddr_mpa_addrinc = spu_mald_mpa_addrinc | spu_mast_mpa_addrinc;
+// ---------------------------------------------------------------
+// ***************** LEN *******************
+// --------------------------------------------------------------
+
+wire len_wen = spu_maaddr_maaddr_addrinc | spu_mactl_iss_pulse;
+
+mux2ds  #(7) len_issorex_mx (
+        .in0    (len_ptr_sub_q[6:0]), 
+        .in1    ({1'b0,spu_madp_mactl_reg[5:0]}), 
+        .sel0   (spu_maaddr_maaddr_addrinc),  
+        .sel1   (~spu_maaddr_maaddr_addrinc),
+        .dout   (len_ptr_mx[6:0])
+);
+
+// for memload decr by 16bytes(2words) and for memstore & mulop decr by
+// 8bytes(1word) or when transferring one word rather than two from 
+// ld buf to mamem.
+
+
+mux2ds  #(2) len_1or2_mx (
+        .in0    (2'b01), // already inverted
+        .in1    (2'b10), 
+        .sel0   (incr_2wd),
+        .sel1   (~incr_2wd),
+        .dout   (len_1or2_dec[1:0])
+);
+
+
+mux2ds  #(7) len_ptr_iss_mx (
+        .in0    (7'b0000000), 
+        .in1    ({5'b11111,len_1or2_dec[1:0]}), 
+        .sel0   (spu_mactl_iss_pulse),  
+        .sel1   (~spu_mactl_iss_pulse),
+        .dout   (len_decr_val[6:0])
+);
+
+assign len_ptr_sub[6:0] = len_ptr_mx[6:0] + len_decr_val[6:0] + 
+						7'b0000001; 
+
+dffre_s  #(7) len_ptr_ff  (
+        .din(len_ptr_sub[6:0]) ,
+        .q(len_ptr_sub_q[6:0]),
+        .en(len_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaddr_len_neqz = |len_ptr_sub_q[6:0];
+
+// his for the case when the len has been decremented to 1 and there
+// is possibilty the next decr will be -2 which makes the len be a
+// neg number.
+
+wire allow_incr_2wd = (|len_ptr_sub_q[6:1]) & spu_mactl_ldop;
+
+
+
+// when mpa0maddr0 make sure the last write does not happen to 
+// odd side when len has reached one. mpa=0 and maddr=0 are checked in
+// spu_mactl.v.
+
+assign spu_maaddr_nooddwr_on_leneq1 = ~(~allow_incr_2wd & len_ptr_sub_q[0]); 
+
+
+wire [5:0] spu_maaddr_len_cntr_l;
+assign spu_maaddr_len_cntr_l[5:0] = ~len_ptr_sub_q[5:0];
+assign spu_maaddr_len_cntr[5:0] = ~spu_maaddr_len_cntr_l[5:0];
+
+// --------------------------------------------------------------
+// ***************** MPA *******************
+// --------------------------------------------------------------
+
+wire mpa_incr_2wd = incr_2wd | spu_mald_force_mpa_add16;
+
+
+mux2ds  #(5) mpa_iss_mx (
+        .in0    (5'b10000),
+        .in1    (5'b01000),
+        .sel0   (mpa_incr_2wd),  
+        .sel1   (~mpa_incr_2wd),
+        .dout   (spu_maaddr_mpa_incr_val[4:0])
+);
+
+
+assign spu_maaddr_mpa_wen = spu_maaddr_mpa_addrinc | spu_mactl_iss_pulse;
+
+// --------------------------------------------------------------
+// ***************** MA_ADDR *******************
+// --------------------------------------------------------------
+
+mux2ds  #(8) maaddr_issorex_mx (
+        .in0    (spu_maaddr_maaddr_add[7:0]), 
+        .in1    (spu_madp_maaddr_reg[7:0]), 
+        .sel0   (spu_maaddr_maaddr_addrinc),  
+        .sel1   (~spu_maaddr_maaddr_addrinc),
+        .dout   (maaddr_mx[7:0])
+);
+
+
+mux2ds  #(8) maaddr_iss_mx (
+        .in0    (8'b00000010),
+        .in1    (8'b00000001), 
+        .sel0   (incr_2wd),  
+        .sel1   (~incr_2wd),
+        .dout   (maaddr_incr_val[7:0])
+);
+
+assign spu_maaddr_maaddr_add[7:0] = spu_maaddr_maaddr_q[7:0] + maaddr_incr_val[7:0];
+
+assign maaddr_wen = spu_maaddr_maaddr_addrinc | spu_mactl_iss_pulse;
+
+dffre_s  #(8) maaadr_pa_mx  (
+        .din(maaddr_mx[7:0]) ,
+        .q(spu_maaddr_maaddr_q[7:0]),
+	.en(maaddr_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+
+// --------------------------------------------------------------
+// ***************** INCR VALUE DECODE *******************
+// --------------------------------------------------------------
+
+// 16byte increments for load from L2 and 8byte increment for
+// store to L2 or if its 8byte transfer from ld buff to mamem.
+
+
+assign incr_2wd = ~spu_madp_mpa_addr_3 & ~spu_maaddr_maaddr_q[0] & spu_mactl_ldop &
+				allow_incr_2wd;
+
+
+
+assign spu_maaddr_mpa1maddr0 = spu_madp_mpa_addr_3 & ~spu_maaddr_maaddr_q[0] & 
+					spu_mactl_ldop;
+
+
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// ******************* A == B ******************
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+
+assign spu_maaddr_aequb = (spu_madp_maaddr_reg[7:0] == spu_madp_maaddr_reg[15:8]);
+
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// ***************** MEMINDEX *******************
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+
+wire [7:0] memld_wroffset,x_mul_wroffset,m_mul_wroffset,r_red_wroffset,
+		x_exp_wroffset,memst_rdoffset,mmx_mulredexp_rdoffset,nrn_mulredexp_rdoffset,
+		bnm_mulredexp_rdoffset,aaa_mulredexp_rdoffset,e_exp_rdoffset;
+
+
+assign memld_wroffset = spu_maaddr_maaddr_q[7:0];
+
+assign x_mul_wroffset = spu_madp_maaddr_reg[39:32];
+assign m_mul_wroffset = spu_madp_maaddr_reg[31:24];
+assign r_red_wroffset = spu_madp_maaddr_reg[23:16];
+assign x_exp_wroffset = spu_madp_maaddr_reg[31:24];
+
+assign memst_rdoffset = spu_maaddr_maaddr_q[7:0];
+
+assign mmx_mulredexp_rdoffset = spu_madp_maaddr_reg[31:24];
+assign nrn_mulredexp_rdoffset = spu_madp_maaddr_reg[23:16];
+assign bnm_mulredexp_rdoffset = spu_madp_maaddr_reg[15:8];
+assign aaa_mulredexp_rdoffset = spu_madp_maaddr_reg[7:0];
+assign e_exp_rdoffset = spu_madp_maaddr_reg[39:32];
+
+// --------------- WR INDEX -------------------------------------
+
+wire [7:0] wroffset_a,wroffset_b,wr_offset;
+wire [7:0] rdoffset_a,rdoffset_b,rd_offset;
+
+wire wr_a_sel = spu_mactl_ldop | spu_mamul_m_wr_oprnd_sel |
+		spu_mared_x_wr_oprnd_sel;
+
+/******
+// converting muxes to AO logic to fix 0in problems.
+mux3ds  #(8) wroffset_a_mx (
+        .in0    (memld_wroffset[7:0]), 
+        .in1    (x_mul_wroffset[7:0]), 
+        .in2    (m_mul_wroffset[7:0]), 
+        .sel0   (spu_mactl_ldop),  
+        .sel1   (spu_mared_x_wr_oprnd_sel),
+        .sel2   (spu_mamul_m_wr_oprnd_sel),
+        .dout   (wroffset_a[7:0])
+);
+
+mux3ds  #(8) wroffset_b_mx (
+        .in0    (r_red_wroffset[7:0]),
+        .in1    (x_exp_wroffset[7:0]), 
+        .in2    (spu_madp_maaddr_reg[15:8]), 
+        .sel0   (spu_mared_r_wr_oprnd_sel),  
+        .sel1   (spu_mared_xe_wr_oprnd_sel),
+        .sel2   (spu_mamul_me_wr_oprnd_sel),
+        .dout   (wroffset_b[7:0])
+);
+
+******/
+
+assign wroffset_a[7:0] = 
+			(memld_wroffset[7:0] & {8{spu_mactl_ldop}}) |
+			(x_mul_wroffset[7:0] & {8{spu_mared_x_wr_oprnd_sel}}) |
+			(m_mul_wroffset[7:0] & {8{spu_mamul_m_wr_oprnd_sel}}) ;
+
+assign wroffset_b[7:0] =
+			(r_red_wroffset[7:0] & {8{spu_mared_r_wr_oprnd_sel}}) |
+			(x_exp_wroffset[7:0] & {8{spu_mared_xe_wr_oprnd_sel}}) |
+			(spu_madp_maaddr_reg[15:8] & {8{spu_mamul_me_wr_oprnd_sel}}) ;
+
+
+
+
+mux2ds  #(8) wroffset_mx (
+        .in0    (wroffset_a[7:0]),
+        .in1    (wroffset_b[7:0]), 
+        .sel0   (wr_a_sel),  
+        .sel1   (~wr_a_sel),
+        .dout   (wr_offset[7:0])
+);
+
+// --------------- RD INDEX -------------------------------------
+wire [2:0] rd_a_sel;
+wire [2:0] rd_b_sel;
+
+// sel for store needs to be added
+wire rd_a_select = spu_mamul_m_rd_oprnd_sel | spu_mamul_n_rd_oprnd_sel |
+		spu_mared_m_rd_oprnd_sel | spu_mared_nm_rd_oprnd_sel |
+		spu_mamul_ax_rd_oprnd_sel | 
+			spu_mast_memren; 
+
+assign rd_a_sel[0] = spu_mast_memren;
+assign rd_a_sel[1] = spu_mamul_m_rd_oprnd_sel | spu_mared_m_rd_oprnd_sel |
+			spu_mamul_ax_rd_oprnd_sel ;
+assign rd_a_sel[2] = spu_mamul_n_rd_oprnd_sel | spu_mared_nm_rd_oprnd_sel;
+
+assign rd_b_sel[0] = spu_mamul_b_rd_oprnd_sel | spu_mared_nr_rd_oprnd_sel |
+			spu_mared_me_rd_oprnd_sel | spu_mamul_me_rd_oprnd_sel;
+assign rd_b_sel[1] = spu_mamul_a_rd_oprnd_sel | spu_mared_a_rd_oprnd_sel |
+			spu_mamul_ba_rd_oprnd_sel;
+assign rd_b_sel[2] = spu_maexp_e_rd_oprnd_sel;
+
+
+/***************
+mux3ds  #(8) rdoffset_a_mx (
+        .in0    (memst_rdoffset[7:0]),
+        .in1    (mmx_mulredexp_rdoffset[7:0]), 
+        .in2    (nrn_mulredexp_rdoffset[7:0]), 
+        .sel0   (rd_a_sel[0]),  
+        .sel1   (rd_a_sel[1]),
+        .sel2   (rd_a_sel[2]),
+        .dout   (rdoffset_a[7:0])
+);
+
+mux3ds  #(8) rdoffset_b_mx (
+        .in0    (bnm_mulredexp_rdoffset[7:0]),
+        .in1    (aaa_mulredexp_rdoffset[7:0]), 
+        .in2    (e_exp_rdoffset[7:0]), 
+        .sel0   (rd_b_sel[0]),  
+        .sel1   (rd_b_sel[1]),
+        .sel2   (rd_b_sel[2]),
+        .dout   (rdoffset_b[7:0])
+);
+****************/
+
+assign rdoffset_a[7:0] =
+			(memst_rdoffset[7:0] & {8{rd_a_sel[0]}}) |
+			(mmx_mulredexp_rdoffset[7:0] & {8{rd_a_sel[1]}}) |
+			(nrn_mulredexp_rdoffset[7:0] & {8{rd_a_sel[2]}}) ;
+
+assign rdoffset_b[7:0] =
+                        (bnm_mulredexp_rdoffset[7:0] & {8{rd_b_sel[0]}}) |
+                        (aaa_mulredexp_rdoffset[7:0] & {8{rd_b_sel[1]}}) |
+                        (e_exp_rdoffset[7:0] & {8{rd_b_sel[2]}}) ;
+
+
+
+
+mux2ds  #(8) rdoffset_mx (
+        .in0    (rdoffset_a[7:0]),
+        .in1    (rdoffset_b[7:0]), 
+        .sel0   (rd_a_select),  
+        .sel1   (~rd_a_select),
+        .dout   (rd_offset[7:0])
+);
+
+
+// --------------------------------------------------------------
+
+// spu_mamul_memwen is true when m[i] operand is updated.
+wire mem_wr_op = spu_mactl_ldop | spu_mamul_memwen | spu_mared_memwen |
+				spu_maaeqb_memwen;
+
+wire [7:0] memoffset;
+
+mux2ds  #(8) memoffset_rd_wr (
+        .in0    (wr_offset[7:0]),
+        .in1    (rd_offset[7:0]), 
+        .sel0   (mem_wr_op),  
+        .sel1   (~mem_wr_op),
+        .dout   (memoffset[7:0])
+);
+
+
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+
+wire [6:0] j_ptr,j_ptr_add;
+wire [5:0] len;
+wire [6:0] twolen,lenplus1,twolenplus2,iminus1_rshft,i_rshft,
+		twolenplus1,iminus1_ptr,iminus_len,i_ptr_add,i_ptr;
+
+wire i_equ_lenplus1,j_equ_iminus1rshft,
+		j_equ_iminus1,
+		i_ptr_rst;
+
+//wire i_equ_zero;
+		
+
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// ------------ FINAL ADDER & ADDVAL MUX FOR MEMINDEX -----------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+// --------------------------------------------------------------
+wire [6:0] jj_ptr;
+wire [6:0] e_ptr;
+wire [6:0] j_ptr_redwr_val;
+
+wire [6:0] add_val,addval_a,addval_b;
+wire [6:0] iminusj_ptr,iminus1_lenminus1;
+
+wire add_a_sel = spu_mamul_iminuslenminus1_sel |
+			spu_mamul_iminus1_ptr_sel | 
+			spu_mamul_i_ptr_sel | spu_maaeqb_irshft_sel; 
+
+wire mulred_j_ptr_sel = spu_mamul_j_ptr_sel | spu_mared_j_ptr_sel;
+wire add_b_sel = mulred_j_ptr_sel | spu_mamul_iminusj_ptr_sel |
+				spu_maexp_e_rd_oprnd_sel | spu_mared_memwen;
+
+//wire add_c_sel = ~(add_a_sel | add_b_sel);
+
+/*****
+mux4ds  #(7) addval_a_mux (
+        .in0    (iminus1_lenminus1[6:0]),
+        .in1    (iminus1_ptr[6:0]), 
+        .in2    (i_rshft[6:0]), 
+        .in3    (i_ptr[6:0]), 
+        .sel0   (spu_mamul_iminuslenminus1_sel),  
+        .sel1   (spu_mamul_iminus1_ptr_sel),
+        .sel2   (spu_maaeqb_irshft_sel),
+        .sel3   (spu_mamul_i_ptr_sel),
+        .dout   (addval_a[6:0])
+);
+
+*******/
+
+assign addval_a[6:0] = 
+			(iminus1_lenminus1[6:0] & {7{spu_mamul_iminuslenminus1_sel}}) |
+			(iminus1_ptr[6:0] & {7{spu_mamul_iminus1_ptr_sel}}) |
+			(i_rshft[6:0] & {7{spu_maaeqb_irshft_sel}}) |
+			(i_ptr[6:0] & {7{spu_mamul_i_ptr_sel}}) ;
+
+/****** 
+mux4ds  #(7) addval_b_mux (
+        .in0    (jj_ptr[6:0]),
+        .in1    (iminusj_ptr[6:0]),
+        .in2    (e_ptr[6:0]),
+        .in3    (j_ptr_redwr_val[6:0]),
+        .sel0   (mulred_j_ptr_sel),
+        .sel1   (spu_mamul_iminusj_ptr_sel),
+        .sel2   (spu_maexp_e_rd_oprnd_sel),
+        .sel3   (spu_mared_memwen),
+        .dout   (addval_b[6:0])
+);
+**********/
+
+assign addval_b[6:0] = 
+			(jj_ptr[6:0] & {7{mulred_j_ptr_sel}}) |
+			(iminusj_ptr[6:0] & {7{spu_mamul_iminusj_ptr_sel}}) |
+			(e_ptr[6:0] & {7{spu_maexp_e_rd_oprnd_sel}}) |
+			(j_ptr_redwr_val[6:0] & {7{spu_mared_memwen}}) ;
+			
+
+/*******
+mux3ds  #(7) addval_mux (
+        .in0    (addval_a[6:0]), 
+        .in1    (addval_b[6:0]), 
+        .in2    (7'b0000000),
+        .sel0   (add_a_sel),  
+        .sel1   (add_b_sel),
+        .sel2   (add_c_sel),
+        .dout   (add_val[6:0])
+);
+
+********/
+
+assign add_val[6:0] =
+			(addval_a[6:0] & {7{add_a_sel}}) |
+			(addval_b[6:0] & {7{add_b_sel}}) ;
+
+wire spu_maaddr_memindx_0;
+assign {spu_maaddr_memindx[7:1],spu_maaddr_memindx_0} = {1'b0,add_val[6:0]} + memoffset[7:0];
+
+
+assign spu_maaddr_wrindx_0 = wr_offset[0] + add_val[0];
+
+
+// ----------------------
+// check for out of range
+// ----------------------
+wire spu_maaddr_memindx_outofrange_q;
+
+wire spu_maaddr_memindx_outofrange = (spu_maaddr_memindx[7] & spu_maaddr_memindx[6]) | 
+					(spu_maaddr_memindx[7] & spu_maaddr_memindx[5]) ;
+
+
+wire spu_maaddr_memindx_outofrange_sehold = sehold ? spu_maaddr_memindx_outofrange_q :
+							spu_maaddr_memindx_outofrange;
+
+dff_s #(1) spu_maaddr_memindx_outofrange_ff (
+        .din(spu_maaddr_memindx_outofrange_sehold) ,
+        .q(spu_maaddr_memindx_outofrange_q),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+// ----------------------
+wire spu_maaddr_memindx_0_q;
+
+wire spu_maaddr_memindx_0_sehold = sehold ? spu_maaddr_memindx_0_q :
+						spu_maaddr_memindx_0;
+
+dff_s #(1) mem_indx_0_ff (
+        .din(spu_maaddr_memindx_0_sehold) ,
+        .q(spu_maaddr_memindx_0_q),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+wire [3:0] spu_maaddr_mamem_eveodd_sel;
+assign spu_maaddr_mamem_eveodd_sel[0] = ~(mem_bypass | spu_maaddr_memindx_outofrange_q) & ~spu_maaddr_memindx_0_q;
+assign spu_maaddr_mamem_eveodd_sel[1] = ~(mem_bypass | spu_maaddr_memindx_outofrange_q) & spu_maaddr_memindx_0_q;
+assign spu_maaddr_mamem_eveodd_sel[2] = ~mem_bypass & spu_maaddr_memindx_outofrange_q;
+assign spu_maaddr_mamem_eveodd_sel[3] =  mem_bypass;
+
+assign spu_maaddr_mamem_eveodd_sel_l = ~spu_maaddr_mamem_eveodd_sel;
+
+// --------------------------------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+// ---------------- POINTERS ------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+// --------------------------------------------------------
+
+
+// ---------------------------------------------------------------
+
+assign len[5:0] = spu_madp_mactl_reg[5:0];
+assign twolen[6:0] = {spu_madp_mactl_reg[5],spu_madp_mactl_reg[4],
+		    spu_madp_mactl_reg[3],spu_madp_mactl_reg[2],
+		    spu_madp_mactl_reg[1],spu_madp_mactl_reg[0],1'b0};
+assign lenplus1[6:0] = {1'b0,spu_madp_mactl_reg[5:0]} + 7'b0000001;
+//assign lenminus1[5:0] = spu_madp_mactl_reg[5:0] - 1'b1;
+assign twolenplus1[6:0] = twolen[6:0] + 7'b0000001;
+assign twolenplus2[6:0] = twolen[6:0] + 7'b0000010;
+
+
+
+assign iminus1_rshft[6:0] = {1'b0,iminus1_ptr[6],iminus1_ptr[5],
+				iminus1_ptr[4],iminus1_ptr[3],
+				iminus1_ptr[2],iminus1_ptr[1]};
+
+assign i_rshft[6:0] = {1'b0,i_ptr[6],i_ptr[5],i_ptr[4],i_ptr[3],
+				i_ptr[2],i_ptr[1]};
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// start pointer comparisons.
+// ---------------------------------------------------------------
+
+//assign i_equ_len = (i_ptr_add[6:0] == {1'b0,len[5:0]}); 
+assign i_equ_lenplus1 = (i_ptr[6:0] == lenplus1[6:0]); 
+
+wire i_equ_twolenplus2_pre = (i_ptr_add[6:0] == twolenplus2[6:0]); 
+wire i_equ_twolenplus1_pre = (i_ptr_add[6:0] == twolenplus1[6:0]); 
+
+wire i_equ_twolen_pre = (i_ptr_add[6:0] == twolen[6:0]); 
+wire i_equ_zero_pre = (i_ptr[6:0] == 7'b0000000); 
+
+assign j_equ_iminus1rshft = (j_ptr[6:0] == iminus1_rshft[6:0]); 
+//assign j_equ_iminuslen = (j_ptr[6:0] == iminus_len[6:0]); 
+assign j_equ_iminus1 = (j_ptr[6:0] == iminus1_ptr[6:0]); 
+//wire j_equ_iminus1_pre = (j_ptr_data[6:0] == i_ptr[6:0]); 
+wire j_equ_len_pre = (j_ptr_data[6:0] == {1'b0,len[5:0]}); 
+
+//assign spu_maaddr_iequtwolenplus2 = i_equ_twolenplus2;
+//assign spu_maaddr_iequtwolenplus1 = i_equ_twolenplus1;
+//assign spu_maaddr_iequtwolen = i_equ_twolen_pre;
+
+//assign spu_maaddr_jequiminus1 = j_equ_iminus1;
+//assign spu_maaddr_jequlen = j_equ_len;
+//assign spu_maaddr_jequiminus1rshft = j_equ_iminus1rshft;
+
+assign spu_maaddr_ieq0 = i_equ_zero_pre;
+assign spu_maaddr_ieven = ~i_ptr[0];
+
+// ***************************************************************
+// start of pointers
+// ***************************************************************
+// ---------------------------------------------------------------
+
+wire rst_halfpnt,halfpnt_set;
+
+//assign rst_halfpnt = reset_local | spu_mamul_rst_iptr | ~spu_mactl_mulop;
+assign rst_halfpnt = reset_local | spu_mamul_rst_iptr | spu_mamul_rst ;
+
+dffre_s  #(1) halfpnt_ff (
+        .din(1'b1) ,
+        .q(halfpnt_set),
+        .en(i_equ_lenplus1),
+        .rst(rst_halfpnt), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaddr_halfpnt_set = halfpnt_set ;
+
+
+// --------------------
+  
+wire halfpnt_set_pulse,halfpnt_set_q;
+
+dff_s  #(1) halfpnt_set_pulse_ff (
+        .din(halfpnt_set) ,
+        .q(halfpnt_set_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign halfpnt_set_pulse = ~halfpnt_set_q & halfpnt_set;
+  
+// ---------------------------------------------------------------
+// J pointer (used for mamul and mared)
+// ---------------------------------------------------------------
+
+wire [6:0] jj_ptr_val;
+wire [2:0] jptr_rst_sel;
+
+wire j_ptr_wen = (spu_mamul_incr_jptr | spu_mamul_rst_jptr | halfpnt_set_pulse) |
+		 (spu_mared_update_jptr | spu_mared_rst_jptr) |
+			 spu_mared_start_wen;
+
+
+// in the case of mamul we need to incr and for mared we
+// need to start from msw and decr, and if we find that M is
+// greater than N then we need to start from 0 and incr.
+
+wire carryin = spu_mared_cin_set_4sub; 
+
+wire [6:0] incr_decr_val;
+mux2ds  #(7) incr_decr_val_mx2 (
+        .in0    (7'b1111110),
+        .in1    (7'b0000001), 
+        .sel0   (carryin),  
+        .sel1   (~carryin),
+        .dout   (incr_decr_val[6:0])
+);
+
+
+assign j_ptr_add[6:0] = j_ptr[6:0] + incr_decr_val[6:0] + {6'b000000,carryin};
+
+wire jptr_eqz_pre = ~(|j_ptr_data[6:0]);
+
+wire jptr_eqz_q;
+wire spu_maaddr_jequlen_q;
+
+dffre_s  #(7) j_ptr_ff (
+        .din(j_ptr_data[6:0]) ,
+        .q(j_ptr[6:0]),
+        .en(j_ptr_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+dffre_s  #(2) j_ptr_b_ff (
+        .din({jptr_eqz_pre, j_equ_len_pre}) ,
+        .q({jptr_eqz_q, spu_maaddr_jequlen_q}),
+        .en(j_ptr_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaddr_jequlen = spu_maaddr_jequlen_q;
+
+assign spu_maaddr_jptr_eqz_mared = jptr_eqz_q;
+
+//assign spu_maaddr_jptr_eqz = ~(|j_ptr[6:0]);
+
+/*
+assign jptr_rst_sel[0] = (spu_mamul_rst_jptr & spu_maaddr_halfpnt_set) | halfpnt_set_pulse ;
+assign jptr_rst_sel[1] = (spu_mamul_rst_jptr & ~spu_maaddr_halfpnt_set) | spu_mared_rst_jptr;
+assign jptr_rst_sel[2] = ~(halfpnt_set_pulse | spu_mamul_rst_jptr | spu_mared_rst_jptr |
+				spu_mared_start_sel);
+assign jptr_rst_sel[3] =  spu_mared_start_sel;
+*/
+
+assign jptr_rst_sel[0] = (spu_mamul_rst_jptr & spu_maaddr_halfpnt_set) | halfpnt_set_pulse ;
+assign jptr_rst_sel[1] = ~(halfpnt_set_pulse | spu_mamul_rst_jptr | spu_mared_rst_jptr |
+				spu_mared_start_sel);
+assign jptr_rst_sel[2] =  spu_mared_start_sel;
+
+/*
+mux3ds  #(7) jptr_rst_mux (
+        .in0    (iminus_len[6:0]), 
+        .in1    (7'b0000000), 
+        .in2    (j_ptr_add[6:0]), 
+        .sel0   (jptr_rst_sel[0]),
+        .sel1   (jptr_rst_sel[1]),
+        .sel2   (jptr_rst_sel[2]),
+        .dout   (j_ptr_data[6:0])
+);*/
+
+/*********
+mux4ds  #(7) jptr_rst_mux (
+        .in0    (iminus_len[6:0]),
+        .in1    (7'b0000000),
+        .in2    (j_ptr_add[6:0]),
+        .in3    ({1'b0,spu_madp_mactl_reg[5:0]}),//when mared op starts,load the LEN into jptr.
+        .sel0   (jptr_rst_sel[0]),
+        .sel1   (jptr_rst_sel[1]),
+        .sel2   (jptr_rst_sel[2]),
+        .sel3   (jptr_rst_sel[3]),
+        .dout   (j_ptr_data[6:0])
+);
+************/
+
+assign j_ptr_data[6:0] =
+			(iminus_len[6:0] & {7{jptr_rst_sel[0]}}) |
+			(j_ptr_add[6:0] & {7{jptr_rst_sel[1]}}) |
+			({1'b0,spu_madp_mactl_reg[5:0]} & {7{jptr_rst_sel[2]}}) ;
+ 
+wire jj_ptr_update = spu_mamul_jjptr_wen | spu_mared_jjptr_wen;
+dffre_s  #(7) jj_ptr_ff (
+        .din(j_ptr_add[6:0]) ,
+        .q(jj_ptr_val[6:0]),
+        .en(jj_ptr_update),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+//wire jptr_oldnew_sel = spu_mamul_incr_jptr | spu_mared_update_jptr;
+wire jptr_oldnew_sel = spu_mamul_jjptr_sel | spu_mared_update_jptr;
+mux2ds  #(7) jptr_oldnew_mx2 (
+        .in0    (jj_ptr_val[6:0]), 
+        .in1    (j_ptr[6:0]),
+        .sel0   (jptr_oldnew_sel),  
+        .sel1   (~jptr_oldnew_sel),
+        .dout   (jj_ptr[6:0])
+);
+
+
+dffre_s  #(7) j_ptr_redwr_ff (
+        .din(j_ptr[6:0]) ,
+        .q(j_ptr_redwr_val[6:0]),
+        .en(spu_mared_update_redwr_jptr),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+// ----------------------
+//saving the jptr this will be the maxlen
+// for the mared subtract count.
+wire [6:0] maxlen;
+dffre_s  #(7) maxlen_ff (
+        .din(j_ptr[6:0]) ,
+        .q(maxlen[6:0]),
+        .en(spu_mared_maxlen_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+//assign spu_maaddr_len_eqmax = (j_ptr[6:0] == maxlen[6:0]);
+wire spu_maaddr_len_eqmax_pre = (j_ptr_data[6:0] == maxlen[6:0]);
+
+
+dffre_s  #(1) len_eqmax_ff (
+        .din(spu_maaddr_len_eqmax_pre) ,
+        .q(spu_maaddr_len_eqmax),
+        .en(j_ptr_wen),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+
+// ---------------------------------------------------------------
+// I pointer 
+// ---------------------------------------------------------------
+
+assign i_ptr_rst = reset_local | spu_mamul_rst_iptr;
+
+assign i_ptr_add[6:0] = i_ptr[6:0] + 7'b0000001;
+
+dffre_s  #(7) i_ptr_ff (
+        .din(i_ptr_add[6:0]) ,
+        .q(i_ptr[6:0]),
+        .en(spu_mamul_incr_iptr),
+        .rst(i_ptr_rst), .clk (rclk), .se(se), .si(), .so());
+
+dffre_s  #(7) iminus1_ptr_ff (
+        .din(i_ptr[6:0]) ,
+        .q(iminus1_ptr[6:0]),
+        .en(spu_mamul_incr_iptr),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_maaddr_jequiminus1_q;
+wire spu_maaddr_jequiminus1rshft_q;
+dffre_s  #(2) j_equ_iminus1_ff (
+        .din({j_equ_iminus1, j_equ_iminus1rshft}) ,
+        .q({spu_maaddr_jequiminus1_q, spu_maaddr_jequiminus1rshft_q}),
+        .en(1'b1),
+        .rst(reset_local), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaddr_jequiminus1 = spu_maaddr_jequiminus1_q;
+assign spu_maaddr_jequiminus1rshft = spu_maaddr_jequiminus1rshft_q;
+
+
+	// added this to fix timing path.
+dffre_s  #(3) i_equ_stuff_pre_ff (
+        .din({i_equ_twolenplus2_pre,i_equ_twolenplus1_pre,i_equ_twolen_pre}) ,
+        .q({spu_maaddr_iequtwolenplus2,spu_maaddr_iequtwolenplus1,spu_maaddr_iequtwolen}),
+        .en(spu_mamul_incr_iptr),
+        .rst(i_ptr_rst), .clk (rclk), .se(se), .si(), .so());
+
+// ---------------------------------------------------------------
+// I-J pointer 
+// ---------------------------------------------------------------
+
+wire [6:0] iminusj_ptr_pre;
+assign iminusj_ptr_pre[6:0] = i_ptr[6:0] + ~jj_ptr[6:0] + 7'b0000001;
+
+dff_s  #(7) iminusj_ff (
+        .din(iminusj_ptr_pre[6:0]) ,
+        .q(iminusj_ptr[6:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+// assign iminusj_ptr = iminusj_ptr_pre;
+
+// ---------------------------------------------------------------
+// I-LEN-1 pointer 
+// ---------------------------------------------------------------
+
+// assign iminus_lenminus1 = i_ptr[6:0] + {1'b1,~lenminus1[5:0]} + 1'b1;
+//assign iminus_lenminus1[6:0] = (i_ptr[6:0] + {1'b1,~len[5:0]} + 1'b1) - 1'b1;
+assign iminus1_lenminus1[6:0] = (iminus1_ptr[6:0] + {1'b1,~len[5:0]} + 7'b0000001) - 
+							7'b0000001;
+
+// ---------------------------------------------------------------
+// I-LEN pointer
+// ---------------------------------------------------------------
+
+assign iminus_len[6:0] = i_ptr[6:0] + {1'b1,~len[5:0]} + 7'b0000001;
+
+// ---------------------------------------------------------------
+
+/*
+always @(posedge clk) begin
+
+$display ("%t iminus1_ptr",$time);
+end
+*/
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// --------------------- MA EXP ----------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+// ---------------------------------------------------------------
+// ^^^^^^^^^^^^^^ ES COUNTER ^^^^^^^^^^^
+// ---------------------------------------------------------------
+
+wire [10:0] es_add,es_ptr;
+
+assign es_add = es_ptr  + 11'b00000000001;
+
+dffre_s  #(11) es_ptr_ff (
+        .din(es_add[10:0]) ,
+        .q(es_ptr[10:0]),
+        .en(spu_maexp_incr_es_ptr),
+        .rst(spu_maexp_es_e_ptr_rst), .clk (rclk), .se(se), .si(), .so());
+
+// ---------------------------------------------------------------
+// ^^^^^^^^^ ES_MAX_MINUS1 & COMPARE ^^^^^^^^^
+// ---------------------------------------------------------------
+wire [10:0] es_max_minus1_q,es_max_minus1;
+//wire [8:0] es_max_plus1;
+wire [7:0] es_max_plus1;
+
+//assign es_max_plus1[8:0] = {1'b0,spu_madp_maaddr_reg[47:40]} + 9'b000000001;
+assign es_max_plus1[7:0] = spu_madp_maaddr_reg[47:40] + 8'b00000001;
+
+// vlint error:left size: 11,  right size: 12
+//assign es_max_minus1[10:0] = {es_max_plus1[8:0],3'b000} - 11'b00000000001;
+assign es_max_minus1[10:0] = {es_max_plus1[7:0],3'b000} - 11'b00000000001;
+
+dffre_s  #(11) esmax_minus1_ptr_ff (
+        .din(es_max_minus1[10:0]) ,
+        .q(es_max_minus1_q[10:0]),
+        .en(spu_maexp_es_max_init),//save es_max at start of maexp_op
+        .rst(spu_maexp_es_e_ptr_rst), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_maaddr_esmax = (es_max_minus1_q[10:0] == es_ptr[10:0]);
+
+
+// ---------------------------------------------------------------
+// ^^^^^^^^^^^^^^ MOD64 CHECK ^^^^^^^^^^^
+// ---------------------------------------------------------------
+
+wire max_mod64_reached;
+wire max_mod64_reached_q;
+wire max_mod64_reached_pulse;
+
+
+assign max_mod64_reached = &es_ptr[5:0];
+
+assign spu_maaddr_esmod64 = max_mod64_reached;
+
+dff_s  #(1) mod64_pulse_ff (
+        .din(max_mod64_reached) ,
+        .q(max_mod64_reached_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign max_mod64_reached_pulse = ~max_mod64_reached_q & max_mod64_reached;
+
+// ---------------------------------------------------------------
+// ^^^^^^^^^^^^^^ E PTR ^^^^^^^^^^^
+// ---------------------------------------------------------------
+// E ptr 
+wire [6:0] e_add;
+
+assign e_add = e_ptr + 7'b0000001;
+
+dffre_s  #(7) e_ptr_ff (
+        .din(e_add[6:0]) ,
+        .q(e_ptr[6:0]),
+        .en(max_mod64_reached_pulse),
+        .rst(spu_maexp_es_e_ptr_rst), .clk (rclk), .se(se), .si(), .so());
+
+
+endmodule
+
Index: /trunk/T1-CPU/spu/spu_mared.v
===================================================================
--- /trunk/T1-CPU/spu/spu_mared.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_mared.v	(revision 6)
@@ -0,0 +1,628 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_mared.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    state machine to do MA reduction. 
+*/
+////////////////////////////////////////////////////////////////////////
+
+module spu_mared (
+
+/*outputs*/
+spu_mared_data_sel_l,
+spu_mared_j_ptr_sel,
+spu_mared_nm_rd_oprnd_sel,
+spu_mared_m_rd_oprnd_sel,
+spu_mared_me_rd_oprnd_sel,
+spu_mared_x_wr_oprnd_sel,
+spu_mared_xe_wr_oprnd_sel,
+spu_mared_nr_rd_oprnd_sel,
+spu_mared_a_rd_oprnd_sel,
+spu_mared_r_wr_oprnd_sel,
+spu_mared_update_jptr,
+spu_mared_rst_jptr,
+spu_mared_maxlen_wen,
+spu_mared_rdn_wen,
+spu_mared_oprnd2_wen,
+
+spu_mared_memren,
+spu_mared_memwen,
+
+spu_mared_cin_set_4sub,
+spu_mared_cin_oprnd_sub_mod,
+
+spu_mared_done_set,
+spu_mared_start_wen,
+spu_mared_start_sel,
+
+spu_mared_red_done,
+
+spu_mared_update_redwr_jptr,
+spu_mared_jjptr_wen,
+
+spu_mared_not_idle,
+
+/*inputs*/
+mul_data_out_0,
+spu_madp_m_eq_n,
+spu_madp_m_lt_n,
+
+spu_mactl_expop,
+spu_mactl_mulop,
+spu_mactl_redop,
+spu_mamul_mul_done,
+spu_mactl_iss_pulse_dly,
+
+spu_maaddr_jptr_eqz,
+spu_maaddr_len_eqmax,
+
+spu_mast_stbuf_wen,
+spu_madp_cout_oprnd_sub_mod,
+
+spu_mactl_kill_op,
+
+spu_mactl_stxa_force_abort,
+
+se,
+reset,
+rclk);
+
+// -------------------------------------------------------------------------
+input reset;
+input rclk;
+input se;
+
+input mul_data_out_0;
+
+input spu_madp_m_eq_n;
+input spu_madp_m_lt_n;
+
+input spu_mactl_expop;
+input spu_mactl_mulop;
+input spu_mactl_redop;
+input spu_mamul_mul_done;
+input spu_mactl_iss_pulse_dly;
+
+input spu_maaddr_jptr_eqz;
+input spu_maaddr_len_eqmax;
+
+input spu_mast_stbuf_wen;
+input spu_madp_cout_oprnd_sub_mod;
+
+input spu_mactl_kill_op;
+
+input spu_mactl_stxa_force_abort;
+
+// -------------------------------------------------------------------------
+
+output [3:0] spu_mared_data_sel_l;
+output spu_mared_j_ptr_sel;
+output spu_mared_nm_rd_oprnd_sel;
+output spu_mared_m_rd_oprnd_sel;
+output spu_mared_me_rd_oprnd_sel;
+output spu_mared_x_wr_oprnd_sel;
+output spu_mared_xe_wr_oprnd_sel;
+output spu_mared_nr_rd_oprnd_sel;
+output spu_mared_a_rd_oprnd_sel;
+output spu_mared_r_wr_oprnd_sel;
+output spu_mared_update_jptr;
+output spu_mared_rst_jptr;
+output spu_mared_maxlen_wen;
+output spu_mared_rdn_wen;
+output spu_mared_oprnd2_wen;
+
+output spu_mared_memren;
+output spu_mared_memwen;
+
+output spu_mared_cin_set_4sub;
+output spu_mared_cin_oprnd_sub_mod;
+
+output spu_mared_done_set;
+output spu_mared_start_wen;
+output spu_mared_start_sel;
+output spu_mared_red_done;
+output spu_mared_update_redwr_jptr;
+output spu_mared_jjptr_wen;
+output spu_mared_not_idle;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+wire m_gt_n_rst;
+wire spu_mared_red_done;
+wire m_gt_n_set,m_lt_n_rst,m_lt_n_set;
+wire start_op;
+wire tr2idle_frm_wr0tox,tr2idle_frm_wrmtox,tr2idle_frm_wrstox;
+wire tr2rdm_frm_wr0tox,tr2rdm_frm_saveptrs,dly_saveptrs_state,
+     tr2rdm_frm_wrstox,tr2rdm_frm_wrmtox;
+
+wire start_mtox_from_msw;
+
+wire local_stxa_abort;
+wire cur_rdm_state;
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire  state_reset = reset | spu_mared_red_done | spu_mactl_kill_op |
+					local_stxa_abort;
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// we need a state set to indcate mulred/red is done, and when an
+// masync gets issued later, then the load asi is returned.
+// ********* ONLY FOR mul_op & red_op NOT exp_op.
+wire spu_mared_done_wen = (spu_mared_red_done | spu_mactl_kill_op | local_stxa_abort) & 
+				(spu_mactl_mulop | spu_mactl_redop);
+wire spu_mared_done_rst = reset | spu_mactl_iss_pulse_dly;
+
+dffre_s    #(1) spu_mared_done_ff (
+        .din(1'b1) , 
+        .q(spu_mared_done_set),
+        .en(spu_mared_done_wen),
+        .rst(spu_mared_done_rst), .clk (rclk)
+        , .se(se), .si(), .so()); 
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) ,
+        .q(cur_idle_state),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) rdm_state_ff (
+        .din(nxt_rdm_state) ,
+        .q(cur_rdm_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign local_stxa_abort = cur_rdm_state & spu_mactl_stxa_force_abort;
+
+// the delay is for the loop which is rdm,wrmtox to 
+//match the cycles for other read/write loops
+dffr_s  #(1) rdmdly_state_ff (
+        .din(nxt_rdmdly_state) ,
+        .q(cur_rdmdly_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) rdn_state_ff (
+        .din(nxt_rdn_state) ,
+        .q(cur_rdn_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) cmpsub_state_ff (
+        .din(nxt_cmpsub_state) ,
+        .q(cur_cmpsub_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) wr0tox_state_ff (
+        .din(nxt_wr0tox_state) ,
+        .q(cur_wr0tox_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) wrmtox_state_ff (
+        .din(nxt_wrmtox_state) ,
+        .q(cur_wrmtox_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+// s = m-n
+dffr_s  #(1) wrstox_state_ff (
+        .din(nxt_wrstox_state) ,
+        .q(cur_wrstox_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) saveptrs_state_ff (
+        .din(nxt_saveptrs_state) ,
+        .q(cur_saveptrs_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+dffr_s  #(1) submn_state_ff (
+        .din(nxt_submn_state) ,
+        .q(cur_submn_state),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire m_gt_n_q,m_lt_n_q;
+
+wire spu_mared_m_eq_n = spu_madp_m_eq_n & ~(m_lt_n_q | m_gt_n_q);
+//assign spu_mared_m_gt_n = ~(spu_madp_m_eq_n | spu_madp_m_lt_n | m_lt_n_q);
+wire spu_mared_m_lt_n = ~(spu_madp_m_eq_n | m_gt_n_q) & spu_madp_m_lt_n;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire mamulred_op_rst = state_reset;
+
+wire spu_mamul_mul_done_qual = spu_mamul_mul_done & ~spu_mactl_kill_op;
+
+wire mamulred_op_set = (spu_mactl_mulop | spu_mactl_expop) & spu_mamul_mul_done_qual;
+wire mulred_start = mamulred_op_set;
+
+dffre_s #(1) mamulred_op_ff (
+        .din(1'b1) ,
+        .q(mamulred_op_q),
+        .en(mamulred_op_set),
+        .rst(mamulred_op_rst), .clk (rclk)
+        , .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+assign m_gt_n_rst = state_reset;
+
+assign m_gt_n_set = ((spu_mactl_mulop | spu_mactl_expop) & mul_data_out_0 & spu_mamul_mul_done_qual) |
+				(cur_saveptrs_state & ~m_lt_n_q);
+
+dffre_s #(1) m_gt_n_ff (
+        .din(1'b1) ,
+        .q(m_gt_n_q),
+        .en(m_gt_n_set),
+        .rst(m_gt_n_rst), .clk (rclk)
+        , .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+assign m_lt_n_rst = state_reset;
+assign m_lt_n_set = cur_cmpsub_state & spu_mared_m_lt_n;
+
+dffre_s #(1) m_lt_n_ff (
+        .din(1'b1) ,
+        .q(m_lt_n_q),
+        .en(m_lt_n_set),
+        .rst(m_lt_n_rst), .clk (rclk)
+        , .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// transition to idle state
+
+// this dley is so that m_gt_n_q is updated by the time we start. as
+// this is one of the conditions to come out of idle state.
+wire mulred_start_q;
+dff_s #(1) dly_start_mulred_ff (
+        .din(mulred_start) ,
+        .q(mulred_start_q),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+
+// delaying mared_start so we can save len ptr to jptr before
+// starting.
+wire mared_start_p1 = spu_mactl_redop & spu_mactl_iss_pulse_dly;
+
+wire mared_start_p1_q,mared_start_q;
+dff_s #(2) dly_start_red_ff (
+        .din({mared_start_p1,mared_start_p1_q}) ,
+        .q({mared_start_p1_q,mared_start_q}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign spu_mared_start_wen = mared_start_p1_q | start_mtox_from_msw | spu_mamul_mul_done_qual;
+assign spu_mared_start_sel = mared_start_p1_q | start_mtox_from_msw | spu_mamul_mul_done;
+
+assign start_op = mulred_start_q | mared_start_q;
+
+
+assign tr2idle_frm_wr0tox = cur_wr0tox_state & spu_maaddr_jptr_eqz;
+assign tr2idle_frm_wrmtox = cur_wrmtox_state & spu_maaddr_jptr_eqz;
+assign tr2idle_frm_wrstox = cur_wrstox_state & spu_maaddr_len_eqmax;
+
+wire spu_mared_red_done_pre = tr2idle_frm_wr0tox | tr2idle_frm_wrmtox |
+				tr2idle_frm_wrstox;
+
+dffr_s #(2) spu_mared_red_done_ff (
+        .din({spu_mared_red_done_pre,spu_mared_red_done_dly1}) ,
+        .q({spu_mared_red_done_dly1,spu_mared_red_done_dly2}),
+        .rst(state_reset), .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign spu_mared_red_done = spu_mared_red_done_dly2 | local_stxa_abort;
+
+// --------------------------
+
+assign spu_mared_not_idle = ~cur_idle_state;
+
+
+assign  nxt_idle_state = (
+                         state_reset | spu_mared_red_done | 
+                         (cur_idle_state & ~start_op));
+
+
+// -------------------------------------------------------------------------
+// transition to rdm state
+
+wire twodly_saveptrs_state;
+
+assign tr2rdm_frm_wr0tox = cur_wr0tox_state & ~spu_maaddr_jptr_eqz;
+assign tr2rdm_frm_saveptrs = twodly_saveptrs_state & ~cur_idle_state;
+assign tr2rdm_frm_wrstox = cur_wrstox_state & ~spu_maaddr_len_eqmax;
+assign tr2rdm_frm_wrmtox = cur_wrmtox_state & m_lt_n_q & ~spu_maaddr_jptr_eqz;
+
+assign  nxt_rdm_state = ( 
+			 tr2rdm_frm_wrmtox |
+			 tr2rdm_frm_wr0tox | tr2rdm_frm_saveptrs |
+			 tr2rdm_frm_wrstox |
+                         (cur_idle_state & start_op & ~(m_lt_n_q|m_gt_n_q)));
+                         //(cur_idle_state & start_op & ~m_lt_n_q));
+
+// this goes to spu_mamul to get ored with the logic there before
+// sending to spu_madp.
+assign spu_mared_oprnd2_wen = cur_rdm_state;
+
+
+// -------------------------------------------------------------------------
+// transition to rdmdly state
+
+assign  nxt_rdmdly_state = (
+                         (cur_rdm_state & m_lt_n_q) );
+
+// -------------------------------------------------------------------------
+// transition to rdn state
+
+
+assign  nxt_rdn_state = (
+                         (cur_rdm_state & ~m_lt_n_q));
+
+// the following is for capturing the N data into flop
+// used for subtract & compare.
+assign spu_mared_rdn_wen = cur_rdn_state | spu_mast_stbuf_wen;
+
+// -------------------------------------------------------------------------
+// transition to cmpsub state
+
+assign  nxt_cmpsub_state = (
+			 (cur_rdn_state & ~(m_lt_n_q | m_gt_n_q)));
+
+// -------------------------------------------------------------------------
+// transition to wr0tox state
+
+assign  nxt_wr0tox_state = (
+			 (cur_cmpsub_state & spu_mared_m_eq_n));
+
+// -------------------------------------------------------------------------
+// transition to wrmtox state
+
+assign  nxt_wrmtox_state = (
+                         (cur_rdmdly_state) );
+
+// -------------------------------------------------------------------------
+// transition to wrstox state
+
+assign  nxt_wrstox_state = (
+                         (cur_submn_state));
+
+// -------------------------------------------------------------------------
+// transition to saveptrs state
+
+assign  nxt_saveptrs_state = (
+                         (cur_idle_state & start_op & m_gt_n_q) |
+			 (cur_cmpsub_state & ~spu_mared_m_eq_n));
+/*
+                         (cur_cmpsub_state & spu_mared_m_gt_n) |
+			 (cur_cmpsub_state & spu_mared_m_lt_n));
+*/
+
+
+dffr_s    #(1) dly_saveptrs_ff (
+        .din(cur_saveptrs_state) ,
+        .q(dly_saveptrs_state),
+        .clk (rclk),
+        .rst(state_reset), .se(se), .si(), .so());
+
+// the delay is needed so we can save the pointer before
+// reseting it.
+assign spu_mared_maxlen_wen = cur_saveptrs_state & ~m_lt_n_q;
+assign spu_mared_rst_jptr = dly_saveptrs_state & ~m_lt_n_q;
+
+assign start_mtox_from_msw = cur_saveptrs_state & m_lt_n_q;
+
+// need to delay this an extra cycle to trigger nxt_rdm_state, so 
+// the len_eqmax has correct value by then.
+dffr_s    #(1) twodly_saveptrs_ff (
+        .din(dly_saveptrs_state) ,
+        .q(twodly_saveptrs_state),
+        .clk (rclk),
+        .rst(state_reset), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// transition to submn state
+
+assign  nxt_submn_state = (
+                         (cur_rdn_state & m_gt_n_q));
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+/*
+assign spu_mared_incr_jptr = nxt_wr0tox_state | nxt_wrmtox_state |
+				nxt_wstox_state;
+*/
+
+// the follwoing is to mux the updated jjptr from a temp
+// flop for the transition to rdm state and then the mux selects
+// the jptr updated value for rdn and wr.
+assign spu_mared_update_jptr = tr2rdm_frm_wr0tox | tr2rdm_frm_wrmtox  |
+				tr2rdm_frm_wrstox;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+
+// added spu_mactl_stxa_force_abort to the following since ren causes perr_set with x's.
+assign spu_mared_memren = (nxt_rdm_state | nxt_rdn_state) & ~spu_mactl_stxa_force_abort;
+
+// ---------------------
+assign spu_mared_jjptr_wen = nxt_wr0tox_state | nxt_wrmtox_state |
+                                nxt_wrstox_state;
+
+dff_s #(3) nxt_wr0tox_state_ff(
+        .din({nxt_wr0tox_state,nxt_wr0tox_state_dly1,nxt_wr0tox_state_dly2}) ,
+        .q({nxt_wr0tox_state_dly1,nxt_wr0tox_state_dly2,nxt_wr0tox_state_dly3}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+dff_s #(3) nxt_wrstox_state_ff(
+        .din({nxt_wrstox_state,nxt_wrstox_state_dly1,nxt_wrstox_state_dly2}) ,
+        .q({nxt_wrstox_state_dly1,nxt_wrstox_state_dly2,nxt_wrstox_state_dly3}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+
+dff_s #(2) nxt_wrmtox_state_ff(
+        .din({nxt_wrmtox_state,nxt_wrmtox_state_dly1}) ,
+        .q({nxt_wrmtox_state_dly1,nxt_wrmtox_state_dly2}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign spu_mared_memwen = nxt_wr0tox_state_dly3 | nxt_wrmtox_state_dly2 |
+				nxt_wrstox_state_dly3;
+// -----------------------
+
+dff_s #(2) spu_mared_start_wen_ff(
+        .din({spu_mared_start_wen,spu_mared_start_wen_dly}) ,
+        .q({spu_mared_start_wen_dly,spu_mared_start_wen_dly2}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+dff_s #(2) spu_mared_rst_jptr_ff(
+        .din({spu_mared_rst_jptr,spu_mared_rst_jptr_dly}) ,
+        .q({spu_mared_rst_jptr_dly,spu_mared_rst_jptr_dly2}),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+dff_s    #(1) spu_mared_memwen_ff (
+        .din(spu_mared_memwen) ,
+        .q(spu_mared_memwen_dly),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign spu_mared_update_redwr_jptr  = spu_mared_rst_jptr_dly2 | spu_mared_start_wen_dly2 | 
+						spu_mared_memwen_dly;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+/*
+assign spu_mared_m_rd_oprnd_sel = nxt_rdm_state & (mamulred_op_q | mamulred_op_set);
+assign spu_mared_nm_rd_oprnd_sel = nxt_rdn_state & (mamulred_op_q | mamulred_op_set);
+assign spu_mared_x_wr_oprnd_sel = spu_mared_memwen & mamulred_op_q; 
+*/
+
+assign spu_mared_m_rd_oprnd_sel = nxt_rdm_state & spu_mactl_mulop;
+assign spu_mared_nm_rd_oprnd_sel = nxt_rdn_state & (spu_mactl_mulop | spu_mactl_expop);
+assign spu_mared_x_wr_oprnd_sel = spu_mared_memwen & spu_mactl_mulop; 
+
+assign spu_mared_me_rd_oprnd_sel = nxt_rdm_state & spu_mactl_expop;
+assign spu_mared_xe_wr_oprnd_sel = spu_mared_memwen & spu_mactl_expop; 
+
+assign spu_mared_a_rd_oprnd_sel = nxt_rdm_state & spu_mactl_redop;
+assign spu_mared_nr_rd_oprnd_sel = nxt_rdn_state & spu_mactl_redop;
+assign spu_mared_r_wr_oprnd_sel = spu_mared_memwen & spu_mactl_redop; 
+
+//assign spu_mared_j_ptr_sel = spu_mared_memren | spu_mared_memwen;
+assign spu_mared_j_ptr_sel = spu_mared_memren ;
+
+// -------------------------------------------------------------------------
+// the following selects go to spu_madp.
+wire [3:0] spu_mared_data_sel;
+assign spu_mared_data_sel[0] = ~(mamulred_op_q | spu_mactl_redop);
+//assign spu_mared_data_sel[1] = (mamulred_op_q | spu_mactl_redop) & spu_mared_m_eq_n;
+assign spu_mared_data_sel[1] = (mamulred_op_q | spu_mactl_redop) & ~m_lt_n_q & ~m_gt_n_q;
+assign spu_mared_data_sel[2] = (mamulred_op_q | spu_mactl_redop) & m_lt_n_q & ~m_gt_n_q;
+assign spu_mared_data_sel[3] = (mamulred_op_q | spu_mactl_redop) & m_gt_n_q;
+
+assign spu_mared_data_sel_l[3:0] = ~spu_mared_data_sel[3:0];
+// -------------------------------------------------------------------------
+
+assign spu_mared_cin_set_4sub = spu_mared_data_sel[2] | spu_mared_data_sel[1];
+
+// -------------------------------------------------------------------------
+
+// except for the first word subtract(starting at jptr=0), use borrow from the
+// previous stage as cin for the next stage.
+wire sel_cout_frm_prev_stage = (~spu_maaddr_jptr_eqz & m_gt_n_q) & ~start_op;
+
+wire spu_mared_cin_oprnd_sub_mod_pre;
+mux3ds  #(1) cin_sel_mux (
+        .in0    (1'b0),
+        .in1    (1'b1),
+        .in2    (spu_madp_cout_oprnd_sub_mod),
+        .sel0   (1'b0),
+        .sel1   (~sel_cout_frm_prev_stage),
+        .sel2   (sel_cout_frm_prev_stage),
+        .dout   (spu_mared_cin_oprnd_sub_mod_pre)
+);
+
+
+wire dly_cur_wrstox_state;
+
+wire cin_cout_wen = start_op | dly_cur_wrstox_state;
+
+wire spu_mared_cin_oprnd_sub_mod_q;
+dffre_s    #(1) cin_cout_ff (
+        .din(spu_mared_cin_oprnd_sub_mod_pre) ,
+        .q(spu_mared_cin_oprnd_sub_mod_q),
+        .en(cin_cout_wen),
+        .rst(reset), 
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+
+// for ld and store ops force cin to zero, since the adder is used for MPA calculations.
+wire force_cin_to_zero = spu_mactl_expop | spu_mactl_mulop | spu_mactl_redop;
+
+wire force_cin_to_zero_q;
+dff_s    #(1) force_cin_to_zero_ff (
+        .din(force_cin_to_zero) ,
+        .q(force_cin_to_zero_q),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+assign spu_mared_cin_oprnd_sub_mod = spu_mared_cin_oprnd_sub_mod_q & force_cin_to_zero_q;
+
+// -------------------------
+// delaying cur_wrstox_state to write the cout to cin reg. this delay
+// is for when the j-ptr comes out of being zero is when we need to capture
+// the next cout to cin.
+
+dff_s    #(1) dly_cur_wrstox_state_ff (
+        .din(cur_wrstox_state) ,
+        .q(dly_cur_wrstox_state),
+        .clk (rclk)
+        , .se(se), .si(), .so());
+
+
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_lsurpt.v
===================================================================
--- /trunk/T1-CPU/spu/spu_lsurpt.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_lsurpt.v	(revision 6)
@@ -0,0 +1,70 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_lsurpt.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+
+module spu_lsurpt (
+
+
+/*outputs*/
+
+spu_lsurpt_ldxa_data_out,
+spu_lsurpt_ldst_pckt_out,
+spu_lsurpt_cpx_data_out,
+
+/*inputs*/
+spu_lsurpt_ldxa_data_in,
+spu_lsurpt_ldst_pckt_in,
+spu_lsurpt_cpx_data_in);
+
+// ---------------------------------------------------------------------
+
+input [63:0] spu_lsurpt_ldxa_data_in;
+input [122:0] spu_lsurpt_ldst_pckt_in;
+input [134:0] spu_lsurpt_cpx_data_in;
+
+// ---------------------------------------------------------------------
+
+output [63:0] spu_lsurpt_ldxa_data_out;
+output [122:0] spu_lsurpt_ldst_pckt_out;
+output [134:0] spu_lsurpt_cpx_data_out;
+
+// ---------------------------------------------------------------------
+// ---------------------------------------------------------------------
+// ---------------------------------------------------------------------
+
+// port postion should be: input on the TOP and output on BOTTOM.
+
+assign spu_lsurpt_ldxa_data_out[63:0] = spu_lsurpt_ldxa_data_in[63:0];
+
+// ---------------------------------------------------------------------
+
+// port postion should be: input on the TOP and output on BOTTOM.
+
+assign spu_lsurpt_ldst_pckt_out[122:0] = spu_lsurpt_ldst_pckt_in[122:0];
+
+// ---------------------------------------------------------------------
+
+// port postion should be: input on the BOTTOM and output on TOP.
+
+assign spu_lsurpt_cpx_data_out[134:0] = spu_lsurpt_cpx_data_in[134:0];
+
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_wen.v
===================================================================
--- /trunk/T1-CPU/spu/spu_wen.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_wen.v	(revision 6)
@@ -0,0 +1,359 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_wen.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    this block generates the write enables for 
+//			various registers in spu.   
+//			Storage for valid bits also are here.
+*/
+////////////////////////////////////////////////////////////////////////
+
+module spu_wen (
+
+/*outputs*/
+
+spu_wen_maln_wen,
+spu_wen_vld_maln,
+
+spu_wen_mast_ack,
+
+spu_wen_mald_ack,
+
+spu_wen_ldst_pcx_vld,
+
+spu_wen_allma_stacks_ok,
+
+spu_wen_ma_unc_err_pulse,
+spu_wen_ma_unc_err,
+spu_wen_ma_cor_err,
+
+spu_wen_pckt_req,
+
+/*inputs*/
+
+spu_mald_ldreq,
+spu_mactl_streq,
+
+
+lsu_spu_vload_rtntyp,
+lsu_spu_vload_vld,
+
+lsu_spu_st_ack_tid,
+lsu_spu_st_asop,
+lsu_spu_st_ackvld,
+lsu_spu_ld_ack_tid,
+lsu_spu_ld_asop,
+lsu_spu_ld_ackvld,
+
+spu_mald_done,
+spu_mald_rstln,
+
+
+
+lsu_spu_strm_ack_cmplt,
+
+l2_err,
+
+
+spu_mactl_uncerr_rst,
+
+cpuid,
+
+se,
+reset,
+rclk);
+
+input reset;
+input rclk;
+input se;
+
+input [1:0] l2_err;
+
+input spu_mald_ldreq;
+input spu_mactl_streq;
+
+input [3:0]	lsu_spu_vload_rtntyp;
+input 		lsu_spu_vload_vld;
+
+input [1:0] 	lsu_spu_st_ack_tid;
+input 		lsu_spu_st_asop;
+input 		lsu_spu_st_ackvld;
+
+input [1:0] 	lsu_spu_ld_ack_tid;
+input 		lsu_spu_ld_asop;
+input 		lsu_spu_ld_ackvld;
+
+input spu_mald_done;
+input spu_mald_rstln;
+
+
+
+input [1:0] lsu_spu_strm_ack_cmplt;
+
+
+input [2:0] cpuid;
+
+input spu_mactl_uncerr_rst;
+
+// -----------------------------------------------------
+
+output spu_wen_mast_ack;
+
+output spu_wen_maln_wen;
+output spu_wen_mald_ack;
+output spu_wen_vld_maln;
+
+output spu_wen_ldst_pcx_vld;
+
+output spu_wen_allma_stacks_ok;
+
+output spu_wen_ma_unc_err_pulse;
+output spu_wen_ma_unc_err;
+output spu_wen_ma_cor_err;
+
+output [122:104] spu_wen_pckt_req;
+
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+wire spu_wen_maln_wen_local;
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+
+wire [1:0] l2_err_q;
+wire spu_wen_maln_wen_local_q;
+
+dff_s  #(3) l2_err_ff (
+        .din({l2_err[1:0], spu_wen_maln_wen_local}) , 
+        .q({l2_err_q[1:0], spu_wen_maln_wen_local_q}),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+wire spu_wen_ma_unc_err_decode = l2_err_q[1] & spu_wen_maln_wen_local_q;
+
+assign spu_wen_ma_unc_err_pulse = spu_wen_ma_unc_err_decode;
+
+wire spu_wen_ma_cor_err = ~l2_err_q[1] & l2_err_q[0] & spu_wen_maln_wen_local_q;
+
+dffre_s  #(1) ma_unc_err_ff (
+        .din(1'b1) ,
+        .q(spu_wen_ma_unc_err),
+        .en(spu_wen_ma_unc_err_decode),
+        .rst(reset | spu_mactl_uncerr_rst), .clk (rclk), .se(se), .si(), .so());
+
+ 
+// -----------------------------------------------------
+
+assign spu_wen_ldst_pcx_vld = spu_mald_ldreq | spu_mactl_streq;
+
+wire spu_lsu_load_req = spu_mald_ldreq;
+wire spu_lsu_store_req = spu_mactl_streq;
+// -----------------------------------------------------
+
+
+wire [1:0] lsu_spu_st_ack_tid_q;
+dff_s #(2) lsu_spu_st_ack_tid_ff (
+        .din(lsu_spu_st_ack_tid[1:0]) , 
+        .q(lsu_spu_st_ack_tid_q[1:0]),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+wire lsu_spu_st_asop_q;
+dff_s #(1) lsu_spu_st_asop_ff (
+        .din(lsu_spu_st_asop) , 
+        .q(lsu_spu_st_asop_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+
+wire [1:0] lsu_spu_ld_ack_tid_q;
+dff_s #(2) lsu_spu_ld_ack_tid_ff (
+        .din(lsu_spu_ld_ack_tid[1:0]) , 
+        .q(lsu_spu_ld_ack_tid_q[1:0]),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+wire lsu_spu_ld_asop_q;
+dff_s #(1) lsu_spu_ld_asop_ff (
+        .din(lsu_spu_ld_asop) ,
+        .q(lsu_spu_ld_asop_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+
+
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+
+//wire spu_wen_tid_bit0 = spu_rrstr_streq_mx2sel[0] | spu_rrld_ldreq_mx2sel; 
+//wire spu_wen_tid_bit0 = 1'b0; 
+
+wire spu_wen_ma_st_req_q;
+dff_s #(1) spu_wen_ma_st_req_ff (
+        .din(spu_lsu_store_req) ,
+        .q(spu_wen_ma_st_req_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+// -----------------------------------------------------
+// -----------------------------------------------------
+// -----------------------------------------------------
+// ^^^^^^^^^^ LOAD RETURN FROM L2 ^^^^^^^^^^^^^^
+// -----------------------------------------------------
+
+wire load_rtntyp = (lsu_spu_vload_rtntyp[3:0] == 4'b0010);
+
+
+/*
+wire spu_wen_maln_wen =   lsu_spu_vload_vld & ~lsu_spu_vload_bid & 
+			   lsu_spu_vload_asop & load_rtntyp & 
+			   (2'b00 == lsu_spu_vload_data_tid[1:0]);
+*/
+
+// wire spu_wen_maln_wen_prequal =   lsu_spu_vload_vld & ~lsu_spu_vload_data_tid[0];
+
+wire spu_wen_maln_wen =  lsu_spu_vload_vld & load_rtntyp ; 
+
+assign spu_wen_maln_wen_local = spu_wen_maln_wen;
+
+
+// ------------------------------------------------------------
+// load/store acks from lsu captured in spu.
+// ------------------------------------------------------------
+
+
+wire spu_wen_mast_ack_prequal = (lsu_spu_st_ack_tid_q[1:0] == 2'b00) & lsu_spu_st_asop_q &
+				spu_lsu_store_req;
+
+wire spu_wen_mast_ack = lsu_spu_st_ackvld & spu_wen_mast_ack_prequal;
+
+
+wire spu_wen_mald_ack_prequal = (lsu_spu_ld_ack_tid_q[1:0] == 2'b00) & lsu_spu_ld_asop_q &
+				spu_lsu_load_req & ~spu_lsu_store_req;
+
+wire spu_wen_mald_ack = lsu_spu_ld_ackvld & spu_wen_mald_ack_prequal;
+
+
+// =================================================================
+// ----------------------------------------------------------------- 
+// ----------------------------------------------------------------- 
+// ***************** MA STUFF **************************************
+
+wire spu_wen_ma_reset = reset;
+
+// ----------------------------------------------------------------- 
+
+wire reset_vld_maln = spu_wen_ma_reset | spu_mald_rstln | spu_mald_done ;
+
+// ----------------------------------------------------------------- 
+
+dffre_s  #(1) maln_vld_bit_ff (
+        .din(1'b1) , 
+        .q(spu_wen_vld_maln),
+        .en(spu_wen_maln_wen_local), 
+        .rst(reset_vld_maln), .clk (rclk), .se(se), .si(), .so()); 
+
+// =================================================================
+// ----------------------------------------------------------------- 
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// ^^^^^^^^^^ STORE RETURN FROM L2 ^^^^^^^^^^^^^^
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+
+wire [1:0] lsu_spu_strm_ack_cmplt_q;
+
+dff_s  #(2) lsu_spu_strm_ff (
+        .din(lsu_spu_strm_ack_cmplt[1:0]) ,
+        .q(lsu_spu_strm_ack_cmplt_q[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire lsu_spu_st_ackvld_q;
+dff_s  #(1) lsu_spu_st_ackvld_ff (
+        .din(lsu_spu_st_ackvld) ,
+        .q(lsu_spu_st_ackvld_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire spu_wen_ma_st_req_qq;
+dff_s  #(1) spu_wen_ma_st_req_q_ff (
+        .din(spu_wen_ma_st_req_q) ,
+        .q(spu_wen_ma_st_req_qq),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire ma_stack_decr_sel = lsu_spu_strm_ack_cmplt_q[0] | lsu_spu_strm_ack_cmplt_q[1];
+
+
+wire ma_stack_incr_sel = spu_wen_ma_st_req_qq & lsu_spu_st_ackvld_q;
+
+
+wire ma_stack_cntr_wen  = ma_stack_incr_sel | ma_stack_decr_sel ;
+
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// ^^^^^^^^^^ STORE ACK COUNTERS ^^^^^^^^^^^^
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+wire [5:0] ma_stack_cntr_q, ma_stack_incrdecr_val;
+
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// ^^^^^^ MA ST_ACK ^^^^^^^^
+// -----------------------------------------------------------------
+/*
+assign ma_stack_incr_val[5:0] = ma_stack_cntr_q[5:0] + 6'b000001;
+assign ma_stack_decr_val[5:0] = ma_stack_cntr_q[5:0] - 6'b000001;
+
+assign ma_stack_incrdecr_val[5:0] = ma_stack_incr_sel ?    ma_stack_incr_val[5:0] :
+								ma_stack_decr_val[5:0];
+*/
+
+assign ma_stack_incrdecr_val[5:0] =  ma_stack_cntr_q[5:0] + {5'b00000,ma_stack_incr_sel} -
+					{4'b0000,lsu_spu_strm_ack_cmplt_q[1:0]};
+
+dffre_s  #(6) ma_stack_cntr_ff (
+        .din(ma_stack_incrdecr_val[5:0]) ,
+        .q(ma_stack_cntr_q[5:0]),
+        .en(ma_stack_cntr_wen),
+        .rst(reset), .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_wen_allma_stacks_ok = ~(|ma_stack_cntr_q[5:0]) ;
+
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+
+dp_mux2es #(19) ldstreq_misc_mx (
+        .in0    ({6'b001001,cpuid[2:0],1'b0,1'b0,8'b00000100}),
+        .in1    ({6'b001011,cpuid[2:0],1'b0,1'b0,8'b00010000}),
+        .sel    (spu_lsu_store_req),
+        .dout   (spu_wen_pckt_req[122:104]));
+
+
+endmodule
+
Index: /trunk/T1-CPU/spu/spu_lsurpt1.v
===================================================================
--- /trunk/T1-CPU/spu/spu_lsurpt1.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_lsurpt1.v	(revision 6)
@@ -0,0 +1,275 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_lsurpt1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+
+module spu_lsurpt1 (
+
+
+/*outputs*/
+so,
+
+spu_lsu_ldxa_data_w2,
+spu_lsu_ldxa_data_vld_w2,
+spu_lsu_ldxa_tid_w2,
+
+spu_lsu_ldst_pckt,
+
+
+spu_lsurpt1_rs3_data_g2,
+
+
+spu_lsu_ldxa_illgl_va_w2,
+
+spu_lsurpt1_stb_empty,
+
+spu_lsurpt_cpx_data_out,
+
+spu_ifu_ttype_tid_w2,
+
+spu_lsu_unc_error_w2,
+
+spu_ifu_err_addr_w2,
+
+spu_lsu_stxa_ack_tid,
+
+/*inputs*/
+
+spu_ctl_ldxa_illgl_va_w,
+
+spu_madp_ldxa_data,
+
+spu_ldstreq_pcx,
+
+spu_ctl_ldxa_data_vld_w2,
+spu_ctl_ldxa_tid_w2,
+
+
+exu_lsu_rs3_data_e,
+
+lsu_spu_stb_empty,
+
+spu_lsurpt_cpx_data_in,
+
+spu_wen_pcx_wen,
+spu_wen_pcx_7170_sel,
+
+spu_ifu_ttype_tid_w,
+
+spu_lsu_unc_error_w,
+
+spu_lsu_stxa_ack_tid_ctl,
+
+
+
+si,se,
+//tmb_l,
+
+reset_l,
+rclk);
+
+// ---------------------------------------------------------------------
+input rclk;
+input reset_l;
+input se;
+input si;
+//input tmb_l;
+
+input [63:0] spu_madp_ldxa_data;
+
+input [122:0] spu_ldstreq_pcx;
+
+input spu_ctl_ldxa_data_vld_w2;
+input [1:0] spu_ctl_ldxa_tid_w2;
+
+
+input [63:0] exu_lsu_rs3_data_e;
+
+input spu_ctl_ldxa_illgl_va_w;
+
+input [3:0] lsu_spu_stb_empty;
+
+input [134:0] spu_lsurpt_cpx_data_in;
+
+input spu_wen_pcx_wen;
+input spu_wen_pcx_7170_sel;
+
+input [1:0] spu_ifu_ttype_tid_w;
+
+input spu_lsu_unc_error_w;
+
+input [1:0] spu_lsu_stxa_ack_tid_ctl;
+// ---------------------------------------------------------------------
+output [122:0] spu_lsu_ldst_pckt;
+
+output [63:0] spu_lsu_ldxa_data_w2;
+output spu_lsu_ldxa_data_vld_w2;
+output [1:0] spu_lsu_ldxa_tid_w2;
+
+
+output [63:0] spu_lsurpt1_rs3_data_g2;
+
+
+output  spu_lsu_ldxa_illgl_va_w2;
+
+output [3:0] spu_lsurpt1_stb_empty;
+
+output [134:0] spu_lsurpt_cpx_data_out;
+
+output [1:0] spu_ifu_ttype_tid_w2;
+
+output spu_lsu_unc_error_w2;
+
+output [39:4] spu_ifu_err_addr_w2;
+
+output [1:0] spu_lsu_stxa_ack_tid;
+
+
+output so;
+// ---------------------------------------------------------------------
+
+
+dffe_s #(121) pcx_ff (
+        .din({spu_ldstreq_pcx[122:72],spu_ldstreq_pcx[69:0]}) ,
+        .q({spu_lsu_ldst_pckt[122:72],spu_lsu_ldst_pckt[69:0]}),
+        .en(spu_wen_pcx_wen), .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+
+// bank select needs to be fast.
+//assign spu_lsu_ldst_pckt[71:70] = spu_ldstreq_pcx[71:70];
+
+wire [71:70] spu_ldstreq_pcx_q;
+dffe_s #(2) pcx_7170_ff (
+        .din(spu_ldstreq_pcx[71:70]) ,
+        .q(spu_ldstreq_pcx_q[71:70]),
+        .en(spu_wen_pcx_wen), .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+dp_mux2es #(2) pcx_7170_mx (
+        .in0    (spu_ldstreq_pcx_q[71:70]),
+        .in1    (spu_ldstreq_pcx[71:70]),
+        .sel    (spu_wen_pcx_7170_sel),
+        .dout   (spu_lsu_ldst_pckt[71:70]));
+
+
+assign spu_ifu_err_addr_w2[39:8] = spu_ldstreq_pcx[103:72]; // buf_10x
+assign spu_ifu_err_addr_w2[7:6] = spu_ldstreq_pcx[71:70]; // very critical to not overload double 
+							  // buffer(buf_2x+buf10x)
+assign spu_ifu_err_addr_w2[5:4] = spu_ldstreq_pcx[69:68]; // buf_10x
+
+// ---------------------------------------------------------------------
+
+
+dff_s #(64) ldxa_data_ff (
+        .din(spu_madp_ldxa_data[63:0]) ,
+        .q(spu_lsu_ldxa_data_w2[63:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+dff_s #(2) ldxa_tid_ff (
+        .din(spu_ctl_ldxa_tid_w2[1:0]) ,
+        .q(spu_lsu_ldxa_tid_w2[1:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+
+wire reset = ~reset_l;
+
+dffr_s #(1) ldxa_vld_ff (
+        .din(spu_ctl_ldxa_data_vld_w2) ,
+        .q(spu_lsu_ldxa_data_vld_w2),
+	.rst(reset),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+	);
+
+dffr_s #(1) illgl_va_ff (
+        .din(spu_ctl_ldxa_illgl_va_w) ,
+        .q(spu_lsu_ldxa_illgl_va_w2),
+	.rst(reset),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+	);
+
+//---------------------------------------------
+
+wire [63:0] spu_lsurpt1_rs3_data_m, spu_lsurpt1_rs3_data_g;
+
+dff_s #(64) exu_rs3_data_e_ff (
+        .din(exu_lsu_rs3_data_e[63:0]) ,
+        .q(spu_lsurpt1_rs3_data_m[63:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+dff_s #(64) spu_rs3_data_m_ff (
+        .din(spu_lsurpt1_rs3_data_m[63:0]) ,
+        .q(spu_lsurpt1_rs3_data_g[63:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+dff_s #(64) spu_rs3_data_g_ff (
+        .din(spu_lsurpt1_rs3_data_g[63:0]) ,
+        .q(spu_lsurpt1_rs3_data_g2[63:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+
+//---------------------------------------------
+//---------------------------------------------
+
+// port postion should be: input on the BOTTOM and output on TOP.
+
+dff_s #(4) lsu_spu_stb_empty_ff (
+        .din(lsu_spu_stb_empty[3:0]) ,
+        .q(spu_lsurpt1_stb_empty[3:0]),
+        .clk (rclk), .se(1'b0), .si(), .so());
+
+
+//---------------------------------------------
+//---------------------------------------------
+
+// port postion should be: input on the BOTTOM and output on TOP.
+
+assign spu_lsurpt_cpx_data_out[134:0] = spu_lsurpt_cpx_data_in[134:0];
+
+
+//---------------------------------------------
+//---------------------------------------------
+
+// place all the following flops on the right hand side. inputs located on the top
+// and outputs located on the bottom.
+
+
+dff_s  #(2) spu_ifu_ttype_tid_w2_ff (
+        .din(spu_ifu_ttype_tid_w[1:0]) ,
+        .q(spu_ifu_ttype_tid_w2[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+dff_s  #(1) spu_lsu_unc_error_w2_ff (
+        .din(spu_lsu_unc_error_w) ,
+        .q(spu_lsu_unc_error_w2),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s  #(2) spu_lsu_stxa_ack_tid_ff (
+        .din(spu_lsu_stxa_ack_tid_ctl[1:0]) ,
+        .q(spu_lsu_stxa_ack_tid[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_maexp.v
===================================================================
--- /trunk/T1-CPU/spu/spu_maexp.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_maexp.v	(revision 6)
@@ -0,0 +1,294 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_maexp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+module spu_maexp (
+
+/*outputs*/
+spu_maexp_e_rd_oprnd_sel,
+spu_maexp_shift_e,
+spu_maexp_e_data_wen,
+spu_maexp_incr_es_ptr,
+
+spu_maexp_es_max_init,
+spu_maexp_es_e_ptr_rst,
+
+spu_maexp_done_set,
+spu_maexp_memren,
+
+spu_maexp_start_mulred_aequb,
+spu_maexp_start_mulred_anoteqb,
+
+spu_mactl_stxa_force_abort,
+
+/*inputs*/
+spu_maaddr_esmax,
+spu_maaddr_esmod64,
+spu_madp_e_eq_one,
+spu_mared_red_done,
+
+spu_mactl_iss_pulse_dly,
+spu_mactl_expop,
+
+spu_mactl_kill_op,
+
+se,
+reset,
+rclk);
+
+input reset;
+input rclk;
+input se;
+
+input spu_maaddr_esmax;
+input spu_maaddr_esmod64;
+input spu_madp_e_eq_one;
+input spu_mared_red_done;
+
+input spu_mactl_iss_pulse_dly;
+input spu_mactl_expop;
+input spu_mactl_kill_op;
+input spu_mactl_stxa_force_abort;
+// --------------------------------------------------------------------------------
+
+output spu_maexp_e_rd_oprnd_sel;
+output spu_maexp_shift_e;
+output spu_maexp_e_data_wen;
+output spu_maexp_incr_es_ptr;
+output spu_maexp_es_max_init;
+output spu_maexp_es_e_ptr_rst;
+
+output spu_maexp_done_set;
+output spu_maexp_memren;
+
+output spu_maexp_start_mulred_aequb;
+output spu_maexp_start_mulred_anoteqb;
+
+// --------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------
+
+wire spu_maexp_exp_done,tr2idle_frm_esmax,tr2rde_frm_idle,tr2rde_frm_esmax,
+     tr2gotomulred1_frm_rde,tr2gotomulred1_frm_esmax,tr2echk_frm_gotomulred1,
+     tr2gotomulred2_frm_echk,tr2esmax_frm_gotomulred2,tr2esmax_frm_echk;
+// --------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------
+wire cur_rde_state;
+wire local_stxa_abort;
+// --------------------------------------------------------------------------------
+// --------------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// we need a state set to indcate exp is done, and when an
+// masync gets issued later, then the load asi is returned.
+wire spu_maexp_done_wen = (spu_maexp_exp_done | spu_mactl_kill_op |
+					local_stxa_abort) & spu_mactl_expop ;
+wire spu_maexp_done_rst = reset | spu_mactl_iss_pulse_dly;
+
+dffre_s    #(1) spu_maexp_done_ff (
+        .din(1'b1) , 
+        .q(spu_maexp_done_set),
+        .en(spu_maexp_done_wen),
+        .rst(spu_maexp_done_rst), .clk (rclk), .se(se), .si(), .so()); 
+
+// --------------------------------------------------------------------------------
+
+
+// this was causing rd and wr contention in idct when running random diags. cur_rde_state
+//cause perr which caused expop to go to idle, but maaeqb state machine was in progress
+//and then a ldop was started which caused a rd of mem for ldop and a write during
+//maaeqb op in progress.
+
+//assign local_stxa_abort = (cur_rde_state | spu_mared_red_done)  & spu_mactl_stxa_force_abort;
+assign local_stxa_abort =  spu_mared_red_done & spu_mactl_stxa_force_abort;
+
+wire state_reset = reset | spu_maexp_exp_done | spu_mactl_kill_op |
+					local_stxa_abort;
+
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+
+wire expop_start = spu_mactl_iss_pulse_dly & spu_mactl_expop;
+
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) , 
+        .q(cur_idle_state),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) rde_state_ff (
+        .din(nxt_rde_state) ,
+        .q(cur_rde_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) gotomulred1_state_ff (
+        .din(nxt_gotomulred1_state) , 
+        .q(cur_gotomulred1_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) echk_state_ff (
+        .din(nxt_echk_state) ,
+        .q(cur_echk_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) gotomulred2_state_ff (
+        .din(nxt_gotomulred2_state) ,
+        .q(cur_gotomulred2_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) esmax_state_ff (
+        .din(nxt_esmax_state) ,
+        .q(cur_esmax_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// transition to idle state
+
+assign tr2idle_frm_esmax = spu_maaddr_esmax & cur_esmax_state;
+
+assign spu_maexp_exp_done = tr2idle_frm_esmax;
+
+assign  nxt_idle_state = (
+                         state_reset | 
+                         tr2idle_frm_esmax |
+                         (cur_idle_state & ~expop_start));
+
+// -------------------------------------------------------------------------------
+// transition to rde state
+
+assign tr2rde_frm_idle = cur_idle_state & expop_start;
+
+/*
+wire dly_tr2rde_frm_idle;
+dff_s #(1) dly_tr2rde_frm_idle_ff (
+        .din(tr2rde_frm_idle) ,
+        .q(dly_tr2rde_frm_idle),
+        .clk (rclk), 
+        .clk (rclk)
+        , .se(se), .si(), .so());
+*/
+
+
+assign tr2rde_frm_esmax = cur_esmax_state & ~spu_maaddr_esmax & spu_maaddr_esmod64;
+
+assign nxt_rde_state = (
+                          tr2rde_frm_idle |
+                          tr2rde_frm_esmax );
+
+// -------------------------------------------------------------------------------
+// transition to gotomulred1 state
+
+assign tr2gotomulred1_frm_rde = cur_rde_state; 
+assign tr2gotomulred1_frm_esmax = cur_esmax_state & ~spu_maaddr_esmax & 
+						~spu_maaddr_esmod64; 
+
+assign nxt_gotomulred1_state = (
+                          tr2gotomulred1_frm_rde |
+                          tr2gotomulred1_frm_esmax |
+			  (cur_gotomulred1_state & ~spu_mared_red_done) );
+
+// -------------------------------------------------------------------------------
+// transition to echk state
+
+assign tr2echk_frm_gotomulred1 = cur_gotomulred1_state & spu_mared_red_done; 
+
+assign nxt_echk_state = (
+			  tr2echk_frm_gotomulred1);
+
+// -------------------------------------------------------------------------------
+// transition to gotomulred2 state
+
+assign tr2gotomulred2_frm_echk = cur_echk_state & spu_madp_e_eq_one; 
+
+assign nxt_gotomulred2_state = (
+			  tr2gotomulred2_frm_echk |
+			  (cur_gotomulred2_state & ~spu_mared_red_done) );
+
+// -------------------------------------------------------------------------------
+// transition to esmax state
+
+
+assign tr2esmax_frm_gotomulred2 = cur_gotomulred2_state & spu_mared_red_done; 
+assign tr2esmax_frm_echk = cur_echk_state & ~spu_madp_e_eq_one; 
+
+assign nxt_esmax_state = (
+			  tr2esmax_frm_gotomulred2 |
+			  tr2esmax_frm_echk);
+
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------
+// SEL XXNM OR XANM
+
+/*
+wire spu_maexp_xxnm_sel_set = tr2rde_frm_idle | tr2esmax_frm_echk | 
+					tr2esmax_frm_gotomulred2;
+
+wire spu_maexp_xxnm_sel_rst = state_reset | tr2echk_frm_gotomulred1; 
+
+dffre_s #(1) xxnm_set_ff (
+        .din(1'b1) , 
+        .q(spu_maexp_xxnm_sel_q),
+        .en(spu_maexp_xxnm_sel_set), 
+        .rst(spu_maexp_xxnm_sel_rst), .clk (rclk), 
+        .rst(spu_maexp_xxnm_sel_rst), .clk (rclk)
+        , .se(se), .si(), .so()); 
+
+assign spu_maexp_b_to_x_sel = spu_maexp_xxnm_sel_q;
+assign spu_maexp_b_to_a_sel = ~spu_maexp_xxnm_sel_q;
+*/
+
+// -------------------------------------------------------------------------------
+
+assign spu_maexp_e_rd_oprnd_sel = tr2rde_frm_idle | tr2rde_frm_esmax;
+assign spu_maexp_memren = spu_maexp_e_rd_oprnd_sel;
+
+assign spu_maexp_shift_e = nxt_esmax_state; // muxsel in madp
+// write enable when data is from mamem or a shift write
+assign spu_maexp_e_data_wen = cur_rde_state | nxt_esmax_state;
+
+//assign spu_maexp_incr_es_ptr = tr2echk_frm_gotomulred1;
+assign spu_maexp_incr_es_ptr = tr2rde_frm_esmax | tr2gotomulred1_frm_esmax;
+
+assign spu_maexp_es_max_init = tr2rde_frm_idle;
+
+assign spu_maexp_es_e_ptr_rst = state_reset;
+// -------------------------------------------------------------------------------
+
+
+//assign spu_maexp_start_mulred = tr2gotomulred1_frm_rde | tr2gotomulred1_frm_esmax |
+//			  		tr2gotomulred2_frm_echk ;
+
+
+assign spu_maexp_start_mulred_aequb = tr2gotomulred1_frm_rde | tr2gotomulred1_frm_esmax;
+assign spu_maexp_start_mulred_anoteqb = tr2gotomulred2_frm_echk;
+
+
+
+
+// -------------------------------------------------------------------------------
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_mamul.v
===================================================================
--- /trunk/T1-CPU/spu/spu_mamul.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_mamul.v	(revision 6)
@@ -0,0 +1,727 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_mamul.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:   state machine to do MA mul/acc/shf. 
+*/
+////////////////////////////////////////////////////////////////////////
+
+module spu_mamul (
+
+
+/*outputs*/
+spu_mamul_memren,
+spu_mamul_memwen,
+
+spu_mamul_rst_iptr,
+spu_mamul_rst_jptr,
+spu_mamul_incr_iptr,
+spu_mamul_incr_jptr,
+
+spu_mamul_a_rd_oprnd_sel,
+spu_mamul_ax_rd_oprnd_sel,
+spu_mamul_b_rd_oprnd_sel,
+spu_mamul_ba_rd_oprnd_sel,
+spu_mamul_m_rd_oprnd_sel,
+spu_mamul_me_rd_oprnd_sel,
+spu_mamul_n_rd_oprnd_sel,
+spu_mamul_m_wr_oprnd_sel,
+spu_mamul_me_wr_oprnd_sel,
+
+spu_mamul_i_ptr_sel,
+spu_mamul_iminus1_ptr_sel,
+spu_mamul_j_ptr_sel,
+spu_mamul_iminusj_ptr_sel,
+spu_mamul_iminuslenminus1_sel,
+spu_mamul_jjptr_wen,
+
+spu_mamul_oprnd2_wen,
+spu_mamul_oprnd2_bypass,
+spu_mamul_oprnd1_mxsel_l,
+spu_mamul_oprnd1_wen,
+
+spu_mul_req_vld,
+spu_mul_areg_shf,
+spu_mul_acc,
+spu_mul_areg_rst,
+spu_mamul_mul_done,
+
+spu_mamul_jjptr_sel,
+
+spu_mamul_rst,
+
+/*inputs*/
+spu_maaeqb_jjptr_sel,
+
+spu_mactl_mulop,
+
+spu_maaddr_iequtwolenplus2,
+spu_maaddr_iequtwolenplus1,
+spu_maaddr_jequiminus1,
+spu_maaddr_jequlen,
+spu_maaddr_halfpnt_set,
+spu_mactl_iss_pulse_dly,
+
+spu_mared_oprnd2_wen,
+
+mul_spu_ack,
+mul_spu_shf_ack,
+
+spu_maexp_start_mulred_anoteqb,
+
+spu_mactl_expop,
+
+spu_maaddr_aequb,
+
+
+spu_maaeqb_rst_iptr,
+spu_maaeqb_rst_jptr,
+spu_maaeqb_incr_iptr,
+spu_maaeqb_incr_jptr,
+
+spu_maaeqb_a_rd_oprnd_sel,
+spu_maaeqb_ax_rd_oprnd_sel,
+spu_maaeqb_m_rd_oprnd_sel,
+spu_maaeqb_me_rd_oprnd_sel,
+spu_maaeqb_n_rd_oprnd_sel,
+spu_maaeqb_m_wr_oprnd_sel,
+spu_maaeqb_me_wr_oprnd_sel,
+
+spu_maaeqb_iminus1_ptr_sel,
+spu_maaeqb_j_ptr_sel,
+spu_maaeqb_iminusj_ptr_sel,
+spu_maaeqb_iminuslenminus1_sel,
+spu_maaeqb_jjptr_wen,
+
+spu_maaeqb_oprnd2_wen,
+spu_maaeqb_oprnd2_bypass,
+
+spu_maaeqb_mul_req_vld,
+spu_maaeqb_mul_areg_shf,
+spu_maaeqb_mul_acc,
+spu_maaeqb_mul_areg_rst,
+spu_maaeqb_mul_done,
+
+spu_maaeqb_oprnd1_mxsel,
+spu_maaeqb_oprnd1_wen,
+
+spu_mactl_kill_op,
+spu_mactl_stxa_force_abort,
+
+se,
+reset,
+rclk);
+
+// ---------------------------------------------------------------
+input reset;
+input rclk;
+input se;
+
+input spu_maaddr_iequtwolenplus2;
+input spu_maaddr_iequtwolenplus1;
+input spu_maaddr_jequiminus1;
+input spu_maaddr_jequlen;
+input spu_maaddr_halfpnt_set;
+
+input mul_spu_ack;
+input mul_spu_shf_ack;
+input spu_mactl_mulop;
+input spu_mactl_iss_pulse_dly;
+
+input spu_mared_oprnd2_wen;
+
+input spu_maexp_start_mulred_anoteqb;
+
+input spu_mactl_expop;
+
+input spu_maaddr_aequb;
+
+
+
+input spu_maaeqb_rst_iptr;
+input spu_maaeqb_rst_jptr;
+input spu_maaeqb_incr_iptr;
+input spu_maaeqb_incr_jptr;
+
+input spu_maaeqb_a_rd_oprnd_sel;
+input spu_maaeqb_ax_rd_oprnd_sel;
+input spu_maaeqb_m_rd_oprnd_sel;
+input spu_maaeqb_me_rd_oprnd_sel;
+input spu_maaeqb_n_rd_oprnd_sel;
+input spu_maaeqb_m_wr_oprnd_sel;
+input spu_maaeqb_me_wr_oprnd_sel;
+
+input spu_maaeqb_iminus1_ptr_sel;
+input spu_maaeqb_j_ptr_sel;
+input spu_maaeqb_iminusj_ptr_sel;
+input spu_maaeqb_iminuslenminus1_sel;
+input spu_maaeqb_jjptr_wen;
+
+input spu_maaeqb_oprnd2_wen;
+input spu_maaeqb_oprnd2_bypass;
+
+input spu_maaeqb_mul_req_vld;
+input spu_maaeqb_mul_areg_shf;
+input spu_maaeqb_mul_acc;
+input spu_maaeqb_mul_areg_rst;
+input spu_maaeqb_mul_done;
+
+input [1:0] spu_maaeqb_oprnd1_mxsel;
+input spu_maaeqb_oprnd1_wen;
+input spu_maaeqb_jjptr_sel;
+
+input spu_mactl_kill_op;
+input spu_mactl_stxa_force_abort;
+
+// ---------------------------------------------------------------
+
+output spu_mamul_memwen;
+output spu_mamul_memren;
+output spu_mamul_rst_iptr;
+output spu_mamul_rst_jptr;
+output spu_mamul_incr_iptr;
+output spu_mamul_incr_jptr;
+
+output spu_mamul_a_rd_oprnd_sel;
+output spu_mamul_ax_rd_oprnd_sel;
+output spu_mamul_b_rd_oprnd_sel;
+output spu_mamul_ba_rd_oprnd_sel;
+output spu_mamul_m_rd_oprnd_sel;
+output spu_mamul_me_rd_oprnd_sel;
+output spu_mamul_n_rd_oprnd_sel;
+output spu_mamul_m_wr_oprnd_sel;
+output spu_mamul_me_wr_oprnd_sel;
+
+output spu_mamul_i_ptr_sel;
+output spu_mamul_iminus1_ptr_sel;
+output spu_mamul_j_ptr_sel;
+output spu_mamul_iminusj_ptr_sel;
+output spu_mamul_iminuslenminus1_sel;
+output spu_mamul_jjptr_wen;
+
+output spu_mamul_oprnd2_wen;
+output spu_mamul_oprnd2_bypass;
+output [2:0] spu_mamul_oprnd1_mxsel_l;
+output spu_mamul_oprnd1_wen;
+
+output spu_mul_req_vld;
+output spu_mul_areg_shf;
+output spu_mul_acc;
+output spu_mul_areg_rst;
+output spu_mamul_mul_done;
+output spu_mamul_jjptr_sel;
+output spu_mamul_rst;
+
+// ---------------------------------------------------------------
+wire tr2mwrite_frm_accumshft_pre;
+wire tr2mwrite_frm_accumshft,tr2iloopa_frm_jloopn;
+wire spu_mamul_rd_aj,spu_mamul_rd_biminusj,spu_mamul_rd_mj,
+	spu_mamul_rd_niminusj,spu_mamul_rd_ai,spu_mamul_rd_b0,
+	spu_mamul_wr_mi,spu_mamul_wr_miminuslenminus1,
+	spu_mamul_rd_n0;
+
+wire tr2accumshft_frm_mwrite;
+wire tr2accumshft_frm_iloopn;
+wire nxt_mwrite_state;
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+//wire local_stxa_abort = cur_mwrite_state & spu_mactl_stxa_force_abort;// this causes x to in perr_set
+wire local_stxa_abort = nxt_mwrite_state & spu_mactl_stxa_force_abort;
+
+wire state_reset = reset | spu_mactl_kill_op | local_stxa_abort;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) , 
+        .q(cur_idle_state),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopa_state_ff (
+        .din(nxt_jloopa_state) , 
+        .q(cur_jloopa_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopb_state_ff (
+        .din(nxt_jloopb_state) , 
+        .q(cur_jloopb_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopn_state_ff (
+        .din(nxt_jloopn_state) , 
+        .q(cur_jloopn_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) jloopm_state_ff (
+        .din(nxt_jloopm_state) , 
+        .q(cur_jloopm_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) iloopa_state_ff (
+        .din(nxt_iloopa_state) , 
+        .q(cur_iloopa_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) iloopb_state_ff (
+        .din(nxt_iloopb_state) , 
+        .q(cur_iloopb_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) nprime_state_ff (
+        .din(nxt_nprime_state) , 
+        .q(cur_nprime_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) mwrite_state_ff (
+        .din(nxt_mwrite_state) , 
+        .q(cur_mwrite_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) iloopn_state_ff (
+        .din(nxt_iloopn_state) , 
+        .q(cur_iloopn_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) accumshft_state_ff (
+        .din(nxt_accumshft_state) , 
+        .q(cur_accumshft_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+// ---------------------------------------------------------------
+
+wire spu_maaddr_aequb_q;
+dff_s  #(1) spu_maaddr_aequb_ff (
+        .din(spu_maaddr_aequb) , 
+        .q(spu_maaddr_aequb_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// 4 cycle delay for mul result coming back.
+// ---------------------------------------------------------------
+
+wire tr2mwrite_frm_jloopn = cur_jloopn_state & mul_spu_ack & spu_maaddr_halfpnt_set &
+                                spu_maaddr_jequlen;
+
+wire mul_result_c0,mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4,mul_result_c5;
+
+//assign mul_result_c0 = (cur_nprime_state & mul_spu_ack & ~spu_maaddr_halfpnt_set) |
+assign mul_result_c0 = (cur_nprime_state & mul_spu_ack) |
+			( tr2mwrite_frm_jloopn );
+
+
+dffr_s  #(5) mul_res_ff (
+        .din({mul_result_c0,mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4}) , 
+        .q({mul_result_c1,mul_result_c2,mul_result_c3,mul_result_c4,mul_result_c5}),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+wire tr2idle_frm_accumshft = cur_accumshft_state & spu_maaddr_iequtwolenplus2 &
+				mul_spu_shf_ack;
+
+wire spu_mamul_mul_done_pre = tr2idle_frm_accumshft;
+wire spu_mamul_mul_done_q;
+dff_s #(1) muldone_dly_ff (
+        .din(spu_mamul_mul_done_pre) , 
+        .q(spu_mamul_mul_done_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+assign spu_mamul_mul_done = spu_mamul_mul_done_q | spu_maaeqb_mul_done | local_stxa_abort;
+
+assign spu_mamul_rst_iptr = tr2idle_frm_accumshft | spu_maaeqb_rst_iptr;
+
+
+// the following is to reset jptr on the 1st half.
+wire tr2iloopa_frm_jloopn_dly;
+dff_s #(1) tr2iloopa_frm_jloopn_dly_ff (
+        .din(tr2iloopa_frm_jloopn) , 
+        .q(tr2iloopa_frm_jloopn_dly),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+// ---------------------------------------------------------------
+
+wire mulop_start = (spu_mactl_iss_pulse_dly & spu_mactl_mulop & ~spu_maaddr_aequb_q) | 
+			spu_maexp_start_mulred_anoteqb;
+
+assign spu_mul_areg_rst = mulop_start | spu_maaeqb_mul_areg_rst;
+assign spu_mamul_rst = spu_mul_areg_rst;
+
+assign  nxt_idle_state = (
+                         state_reset | 
+			 tr2idle_frm_accumshft |
+                         (cur_idle_state & ~mulop_start));
+
+// ---------------------------------------------------------------
+wire tr2jloopa_frm_accumshft = cur_accumshft_state & ~spu_maaddr_iequtwolenplus2 &
+				~spu_maaddr_iequtwolenplus1 & mul_spu_shf_ack;
+
+wire tr2jloopa_frm_accumshft_dly;
+dffr_s #(1) tr2jloopa_frm_accumshft_dly_ff (
+        .din(tr2jloopa_frm_accumshft) ,
+        .q(tr2jloopa_frm_accumshft_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+wire tr2jloopa_frm_jloopn = cur_jloopn_state & mul_spu_ack &
+ 		((~spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set) |
+		(~spu_maaddr_jequlen & spu_maaddr_halfpnt_set)) ;
+
+assign nxt_jloopa_state = (
+			  tr2jloopa_frm_jloopn |
+                          tr2jloopa_frm_accumshft_dly );
+
+
+assign spu_mamul_jjptr_wen = cur_jloopm_state | spu_maaeqb_jjptr_wen;
+
+assign spu_mamul_incr_jptr = tr2jloopa_frm_jloopn | spu_maaeqb_incr_jptr;
+
+assign spu_mamul_jjptr_sel = cur_jloopn_state | spu_maaeqb_jjptr_sel;
+
+//assign spu_mamul_rd_aj = nxt_jloopa_state;
+assign spu_mamul_rd_aj = 
+	(cur_jloopn_state & ((~spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set) |
+				(~spu_maaddr_jequlen & spu_maaddr_halfpnt_set))) | 
+	tr2jloopa_frm_accumshft_dly;
+
+// ---------------------------------------------------------------
+assign nxt_jloopb_state = (
+                          cur_jloopa_state |
+			  (cur_jloopb_state & ~mul_spu_ack));
+
+//assign spu_mamul_rd_biminusj = nxt_jloopb_state | cur_jloopb_state;
+assign spu_mamul_rd_biminusj = cur_jloopa_state;
+
+// ---------------------------------------------------------------
+assign nxt_jloopm_state = (
+			  (cur_jloopb_state & mul_spu_ack));
+
+//assign spu_mamul_rd_mj = nxt_jloopm_state;
+assign spu_mamul_rd_mj = cur_jloopb_state;
+
+// ---------------------------------------------------------------
+
+assign nxt_jloopn_state = (
+                          cur_jloopm_state |
+			  (cur_jloopn_state & ~mul_spu_ack));
+
+//assign spu_mamul_rd_niminusj = nxt_jloopn_state;
+assign spu_mamul_rd_niminusj = cur_jloopm_state;
+
+// ---------------------------------------------------------------
+assign tr2iloopa_frm_jloopn = cur_jloopn_state & mul_spu_ack &
+                spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set;
+
+wire tr2iloopa_frm_idle = cur_idle_state & mulop_start;
+
+wire tr2iloopa_frm_idle_dly;
+dff_s #(1) tr2iloopa_frm_idle_ff (
+        .din(tr2iloopa_frm_idle) ,
+        .q(tr2iloopa_frm_idle_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+assign nxt_iloopa_state = (
+                          (tr2iloopa_frm_idle_dly) |
+			  (tr2iloopa_frm_jloopn));
+
+// iloop reads are done in cur_* state where as the jloop reads
+// are done in nxt_* and cur_* state(this to hold the rd indx during
+// requests. Due to read of the iloop in cur_* state the spu_mul_req_vld
+// is delayed by a cycle.
+//assign spu_mamul_rd_ai = nxt_iloopa_state;
+assign spu_mamul_rd_ai = 
+	(cur_jloopn_state & (spu_maaddr_jequiminus1 & ~spu_maaddr_halfpnt_set)) | tr2iloopa_frm_idle_dly;
+
+// ---------------------------------------------------------------
+assign nxt_iloopb_state = (
+                          (cur_iloopa_state) |
+			  (cur_iloopb_state & ~mul_spu_ack));
+
+//assign spu_mamul_rd_b0 = nxt_iloopb_state;
+assign spu_mamul_rd_b0 = cur_iloopa_state;
+
+// ---------------------------------------------------------------
+assign nxt_nprime_state = (
+                          (cur_iloopb_state & mul_spu_ack) |
+			  (cur_nprime_state & ~mul_spu_ack));
+
+
+// ---------------------------------------------------------------
+// assign tr2mwrite_frm_accumshft = cur_accumshft_state & mul_spu_shf_ack & 
+//                                 spu_maaddr_iequtwolenplus1; 
+assign tr2mwrite_frm_accumshft_pre = cur_accumshft_state & mul_spu_shf_ack & 
+                                spu_maaddr_iequtwolenplus1; 
+// delaying for one cycle to allow time to do i ptr increment
+// and calculate i-len-1(M[i-len-1]).This is due to skipping jloop on last
+// i iteration, not enough time to do both.
+dffr_s #(1) tr2mwrite_frm_accumshft_ff (
+        .din(tr2mwrite_frm_accumshft_pre) , 
+        .q(tr2mwrite_frm_accumshft),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+assign nxt_mwrite_state = (
+			  tr2mwrite_frm_accumshft |
+			  (mul_result_c5));
+
+// assign spu_mamul_memwen = nxt_mwrite_state;
+//need the following to capture mul data into flop.
+wire spu_mamul_wr_mi_oprnd2_wenbyp = nxt_mwrite_state & ~spu_maaddr_halfpnt_set;
+wire spu_mamul_wr_miminuslenminus1_oprnd2_wenbyp = nxt_mwrite_state & spu_maaddr_halfpnt_set;
+
+// ---------------------------------------------------------------
+assign nxt_iloopn_state = (
+			  (cur_mwrite_state & ~spu_maaddr_halfpnt_set) |
+			  (cur_iloopn_state & ~mul_spu_ack));
+
+//assign spu_mamul_rd_n0 =  nxt_iloopn_state | cur_iloopn_state;
+assign spu_mamul_rd_n0 =  cur_mwrite_state;
+
+// ---------------------------------------------------------------
+assign tr2accumshft_frm_mwrite = cur_mwrite_state & spu_maaddr_halfpnt_set;
+assign tr2accumshft_frm_iloopn = cur_iloopn_state & mul_spu_ack;
+
+assign nxt_accumshft_state = (
+			  tr2accumshft_frm_mwrite |
+			  tr2accumshft_frm_iloopn |
+			  (cur_accumshft_state & ~mul_spu_shf_ack));
+
+wire mamul_incr_iptr = tr2accumshft_frm_mwrite | tr2accumshft_frm_iloopn;
+
+assign spu_mamul_incr_iptr = mamul_incr_iptr | spu_maaeqb_incr_iptr;
+
+
+dff_s  #(1) memwen_dly_ff (
+        .din(mamul_incr_iptr) ,
+        .q(spu_mamul_memwen),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mamul_wr_mi = spu_mamul_memwen & ~spu_maaddr_halfpnt_set;
+assign spu_mamul_wr_miminuslenminus1 = spu_mamul_memwen & spu_maaddr_halfpnt_set;
+
+// ---------------------------------------------------------------
+
+wire cur_accumshft_pulse,cur_accumshft_q;
+
+dff_s  #(1) cur_accumshft_pulse_ff (
+        .din(cur_accumshft_state) ,
+        .q(cur_accumshft_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign cur_accumshft_pulse = ~cur_accumshft_q & cur_accumshft_state;
+
+wire mamul_rst_jptr = mulop_start | tr2iloopa_frm_jloopn_dly |  (cur_accumshft_pulse &
+			spu_maaddr_halfpnt_set & ~spu_maaddr_iequtwolenplus2 &
+			~spu_maaddr_iequtwolenplus1);
+
+assign spu_mamul_rst_jptr = mamul_rst_jptr | spu_maaeqb_rst_jptr;
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// send selects to spu_maaddr.v
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+assign spu_mamul_memren = spu_mamul_rd_aj | 
+		spu_mamul_rd_biminusj |
+		spu_mamul_rd_mj | 
+		spu_mamul_rd_niminusj |
+		spu_mamul_rd_ai | spu_mamul_rd_b0 | spu_mamul_rd_n0;	
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+wire mamul_a_rd_oprnd_sel = (spu_mamul_rd_aj | spu_mamul_rd_ai) & ~spu_mactl_expop;
+assign spu_mamul_a_rd_oprnd_sel = mamul_a_rd_oprnd_sel | spu_maaeqb_a_rd_oprnd_sel;
+
+wire mamul_ax_rd_oprnd_sel = (spu_mamul_rd_aj | spu_mamul_rd_ai) & spu_mactl_expop;
+assign spu_mamul_ax_rd_oprnd_sel = mamul_ax_rd_oprnd_sel | spu_maaeqb_ax_rd_oprnd_sel;
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+//assign spu_mamul_b_rd_oprnd_sel = ((spu_mamul_rd_biminusj & ~spu_mamul_rd_aj & ~spu_mamul_rd_mj) | 
+assign spu_mamul_b_rd_oprnd_sel = (spu_mamul_rd_biminusj | 
+					spu_mamul_rd_b0) & ~spu_mactl_expop;
+
+// bx should be removed, since xxnm does not start mamul, instead it starts maaeqb.
+// assign spu_mamul_bx_rd_oprnd_sel = ((spu_mamul_rd_biminusj & ~spu_mamul_rd_aj & ~spu_mamul_rd_mj) | 
+// 					spu_mamul_rd_b0) & spu_maexp_b_to_x_sel & spu_mactl_expop;
+
+//assign spu_mamul_ba_rd_oprnd_sel = ((spu_mamul_rd_biminusj & ~spu_mamul_rd_aj & ~spu_mamul_rd_mj) | 
+assign spu_mamul_ba_rd_oprnd_sel = (spu_mamul_rd_biminusj | 
+					spu_mamul_rd_b0) & spu_mactl_expop;
+
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+wire mamul_m_rd_oprnd_sel = spu_mamul_rd_mj & ~spu_mactl_expop ;
+assign spu_mamul_m_rd_oprnd_sel = mamul_m_rd_oprnd_sel | spu_maaeqb_m_rd_oprnd_sel  ;
+
+wire mamul_me_rd_oprnd_sel = spu_mamul_rd_mj & spu_mactl_expop ;
+assign spu_mamul_me_rd_oprnd_sel = mamul_me_rd_oprnd_sel | spu_maaeqb_me_rd_oprnd_sel  ;
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+//wire mamul_n_rd_oprnd_sel = (spu_mamul_rd_niminusj & ~spu_mamul_rd_aj & ~spu_mamul_rd_mj) | spu_mamul_rd_n0;
+wire mamul_n_rd_oprnd_sel = spu_mamul_rd_niminusj | spu_mamul_rd_n0;
+assign spu_mamul_n_rd_oprnd_sel = mamul_n_rd_oprnd_sel | spu_maaeqb_n_rd_oprnd_sel;
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+wire mamul_m_wr_oprnd_sel = (spu_mamul_wr_mi | spu_mamul_wr_miminuslenminus1) & 
+						~spu_mactl_expop;
+assign spu_mamul_m_wr_oprnd_sel = mamul_m_wr_oprnd_sel | spu_maaeqb_m_wr_oprnd_sel;
+
+wire mamul_me_wr_oprnd_sel = (spu_mamul_wr_mi | spu_mamul_wr_miminuslenminus1) & 
+						spu_mactl_expop;
+assign spu_mamul_me_wr_oprnd_sel = mamul_me_wr_oprnd_sel | spu_maaeqb_me_wr_oprnd_sel;
+
+
+
+wire mamul_m_wr_oprnd2_wen = (spu_mamul_wr_mi_oprnd2_wenbyp | 
+				spu_mamul_wr_miminuslenminus1_oprnd2_wenbyp) &
+                                                ~spu_mactl_expop;
+wire mamul_me_wr_oprnd2_wen = (spu_mamul_wr_mi_oprnd2_wenbyp | 
+				spu_mamul_wr_miminuslenminus1_oprnd2_wenbyp) &
+                                                spu_mactl_expop;
+
+// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+//assign spu_mamul_i_ptr_sel = (spu_mamul_rd_ai | spu_mamul_wr_mi) | spu_maaeqb_i_ptr_sel;
+assign spu_mamul_i_ptr_sel = spu_mamul_rd_ai ;
+assign spu_mamul_iminus1_ptr_sel = spu_mamul_wr_mi | spu_maaeqb_iminus1_ptr_sel ;
+
+assign spu_mamul_j_ptr_sel = (spu_mamul_rd_aj | spu_mamul_rd_mj) | spu_maaeqb_j_ptr_sel;
+
+wire mamul_iminusj_ptr_sel = 
+		//(spu_mamul_rd_biminusj | spu_mamul_rd_niminusj) & ~(spu_mamul_rd_aj | spu_mamul_rd_mj);
+		(spu_mamul_rd_biminusj | spu_mamul_rd_niminusj) ;
+assign spu_mamul_iminusj_ptr_sel = mamul_iminusj_ptr_sel | spu_maaeqb_iminusj_ptr_sel;
+
+
+assign spu_mamul_iminuslenminus1_sel = spu_mamul_wr_miminuslenminus1 | spu_maaeqb_iminuslenminus1_sel;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// request to mul unit when asserted
+/*
+wire iloop_or_req_d;
+wire iloop_or_req = (cur_iloopb_state | cur_nprime_state | cur_iloopn_state)&
+			  ~mul_spu_ack; 
+dff_s #(1) iloop_dly_req_ff (
+        .din(iloop_or_req) , 
+        .q(iloop_or_req_d),
+        .clk (rclk), .se(se), .si(), .so()); 
+assign spu_mul_req_vld = (cur_jloopb_state | cur_jloopn_state | iloop_or_req_d) ;
+*/
+
+
+wire mamul_mul_req_vld_pre = nxt_jloopb_state | nxt_jloopn_state | nxt_iloopb_state |
+                                nxt_nprime_state | nxt_iloopn_state ;
+
+dffr_s #(1) mamul_mul_req_vld_ff (
+        .din(mamul_mul_req_vld_pre) , 
+        .q(mamul_mul_req_vld),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+/*
+wire mamul_mul_req_vld = cur_jloopb_state | cur_jloopn_state | cur_iloopb_state |
+				cur_nprime_state | cur_iloopn_state ;
+*/
+
+assign spu_mul_req_vld = mamul_mul_req_vld | spu_maaeqb_mul_req_vld;
+	
+// ---------------------------------------------------------------
+
+assign spu_mul_areg_shf = cur_accumshft_state | spu_maaeqb_mul_areg_shf;
+// ---------------------------------------------------------------
+
+/*
+wire oprnd2_sel = mamul_a_rd_oprnd_sel | mamul_ax_rd_oprnd_sel | 
+	          mamul_m_rd_oprnd_sel | mamul_me_rd_oprnd_sel) & 
+*/
+
+wire oprnd2_sel = nxt_jloopa_state | nxt_iloopa_state | nxt_jloopm_state ;
+
+wire oprnd2_sel_q;
+dff_s #(1) oprnd2_wen_ff (
+        .din(oprnd2_sel) , 
+        .q(oprnd2_sel_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+assign spu_mamul_oprnd2_wen = oprnd2_sel_q | mamul_m_wr_oprnd2_wen | mamul_me_wr_oprnd2_wen | 
+				spu_mared_oprnd2_wen |
+				spu_maaeqb_oprnd2_wen;
+
+assign spu_mamul_oprnd2_bypass = mamul_m_wr_oprnd2_wen | mamul_me_wr_oprnd2_wen |
+					spu_maaeqb_oprnd2_bypass;
+
+
+//assign spu_mamul_oprnd1_sel = cur_nprime_state | spu_maaeqb_oprnd1_sel; // only select nprime if set
+
+// ---------------------------------------------------------------
+assign spu_mul_acc = (mamul_mul_req_vld & ~cur_nprime_state) | spu_maaeqb_mul_acc;
+
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+// ---------------------------------------------------------------
+
+
+wire select_mamul = ~cur_idle_state; 
+
+
+wire spu_mamul_memrd4op1 = spu_mamul_rd_biminusj | spu_mamul_rd_b0 | spu_mamul_rd_n0 |
+					spu_mamul_rd_niminusj;
+
+wire spu_mamul_memrd4op1_q;
+dff_s #(1) spu_mamul_memrd4op1_ff (
+        .din(spu_mamul_memrd4op1) ,
+        .q(spu_mamul_memrd4op1_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire [1:0] spu_mamul_oprnd1_mxsel;
+assign spu_mamul_oprnd1_mxsel[0] = (select_mamul & (~cur_nprime_state & ~spu_mamul_memrd4op1_q)) |
+				   (~select_mamul & spu_maaeqb_oprnd1_mxsel[0]) ;
+assign spu_mamul_oprnd1_mxsel[1] = (select_mamul & (~cur_nprime_state & spu_mamul_memrd4op1_q)) |
+				   (~select_mamul & spu_maaeqb_oprnd1_mxsel[1]);
+//assign spu_mamul_oprnd1_mxsel[2] = (select_mamul & cur_nprime_state) | (~select_mamul & spu_maaeqb_oprnd1_mxsel[2]);
+
+
+wire [2:0] spu_mamul_oprnd1_mxsel_ps;
+assign spu_mamul_oprnd1_mxsel_ps[0] = spu_mamul_oprnd1_mxsel[0];
+assign spu_mamul_oprnd1_mxsel_ps[1] = ~spu_mamul_oprnd1_mxsel[0] & spu_mamul_oprnd1_mxsel[1];
+assign spu_mamul_oprnd1_mxsel_ps[2] = ~spu_mamul_oprnd1_mxsel[0] & ~spu_mamul_oprnd1_mxsel[1];
+
+
+assign spu_mamul_oprnd1_mxsel_l = ~spu_mamul_oprnd1_mxsel_ps;
+
+assign spu_mamul_oprnd1_wen = spu_mamul_memrd4op1_q | spu_maaeqb_oprnd1_wen;
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_mald.v
===================================================================
--- /trunk/T1-CPU/spu/spu_mald.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_mald.v	(revision 6)
@@ -0,0 +1,233 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_mald.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    state machine for load requests to L2.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+module spu_mald (
+
+/*outputs*/
+spu_mald_rstln,
+spu_mald_maaddr_addrinc,
+spu_mald_memwen,
+spu_mald_mpa_addrinc,
+spu_mald_ldreq,
+spu_mald_done,
+spu_mald_force_mpa_add16,
+
+spu_mald_done_set,
+
+/*inputs*/
+ld_inprog,
+ldreq_ack,
+ln_received,
+len_neqz,
+mactl_ldop,
+spu_maaddr_mpa1maddr0,
+spu_mactl_iss_pulse_dly,
+
+spu_wen_ma_unc_err_pulse,
+
+spu_mactl_stxa_force_abort,
+
+se,
+reset,
+rclk);
+
+// ---------------------------------------------------------
+input reset;
+input rclk;
+input se;
+
+input ld_inprog;
+input ldreq_ack;
+input ln_received;
+input len_neqz;
+input mactl_ldop;
+input spu_maaddr_mpa1maddr0;
+input spu_mactl_iss_pulse_dly;
+
+input spu_wen_ma_unc_err_pulse;
+
+input spu_mactl_stxa_force_abort;
+// ---------------------------------------------------------
+output spu_mald_rstln;
+output spu_mald_maaddr_addrinc;
+output spu_mald_memwen;
+output spu_mald_mpa_addrinc;
+
+output spu_mald_ldreq;
+output spu_mald_done;
+output spu_mald_force_mpa_add16;
+
+output spu_mald_done_set;
+
+// ---------------------------------------------------------
+
+wire tr2wait4ln_frm_ldreq;
+
+// ---------------------------------------------------------
+/*******************************
+
+there are 8 states:
+
+000001       idle
+000010       ld1_req
+000100       ld2_req
+001000       wait_4ln1
+010000       wait_4ln2
+100000       mamem_wr
+
+********************************/
+wire local_stxa_abort;
+// ------------------------------------------------------
+// we need a state set to indcate ld is done, and when an
+// masync gets issued later, then the load asi is returned.
+wire spu_mald_done_wen = (spu_mald_done | spu_wen_ma_unc_err_pulse | local_stxa_abort) & 
+					mactl_ldop;
+wire spu_mald_done_rst = reset | spu_mactl_iss_pulse_dly;
+
+dffre_s    #(1) spu_mald_done_ff (
+        .din(1'b1) , 
+        .q(spu_mald_done_set),
+        .en(spu_mald_done_wen),
+        .rst(spu_mald_done_rst), .clk (rclk), .se(se), .si(), .so()); 
+
+// ------------------------------------------------------
+// ------------------------------------------------------
+// ------------------------------------------------------
+// ------------------------------------------------------
+// ------------------------------------------------------
+
+wire state_reset = reset | spu_mald_done | spu_wen_ma_unc_err_pulse |
+					local_stxa_abort; 
+
+// ------------------------------------------------------
+dff_s    #(1) idle_state_ff (
+        .din(nxt_idle_state) , 
+        .q(cur_idle_state),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) ldreq_state_ff (
+        .din(nxt_ldreq_state) , 
+        .q(cur_ldreq_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) wait4ln_state_ff (
+        .din(nxt_wait4ln_state) , 
+        .q(cur_wait4ln_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) mamemwr_state_ff (
+        .din(nxt_mamemwr_state) , 
+        .q(cur_mamemwr_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+dffr_s  #(1) chk4mpa1maddr0_state_ff (
+        .din(nxt_chk4mpa1maddr0_state) , 
+        .q(cur_chk4mpa1maddr0_state),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so()); 
+
+// ------------------------------------------------------
+// ------------------------------------------------------
+
+wire start_ldop = spu_mactl_iss_pulse_dly & mactl_ldop;
+
+// --------------------------------------------------------------
+//  transition to idle state.
+
+assign spu_mald_done = cur_chk4mpa1maddr0_state & ~len_neqz;
+
+assign  nxt_idle_state = (
+                         state_reset |
+			 (spu_mald_done) |
+                         (cur_idle_state & ~start_ldop));
+
+// --------------------------------------------------------------
+//  transition to ldreq state.
+
+
+assign  nxt_ldreq_state = (
+			(cur_chk4mpa1maddr0_state & ~spu_maaddr_mpa1maddr0 & len_neqz) |
+                        (cur_idle_state & start_ldop) |
+                        (cur_ldreq_state & ~ldreq_ack));
+
+assign spu_mald_rstln = (cur_mamemwr_state & ld_inprog & len_neqz) | local_stxa_abort |
+						spu_wen_ma_unc_err_pulse;
+
+// --------------------------------------------------------------
+//  transition to wait4ln state.
+
+//assign tr2wait4ln_frm_ldreq = cur_ldreq_state & ldreq_ack & ln_received;
+assign tr2wait4ln_frm_ldreq = cur_ldreq_state & ldreq_ack ;
+
+assign nxt_wait4ln_state = (
+                        (tr2wait4ln_frm_ldreq) |
+                        (cur_wait4ln_state & ~ln_received));
+
+// --------------------------------------------------------------
+//  transition to mamemwr state.
+
+wire tr2mamemwr_frm_wait4ln = cur_wait4ln_state & ln_received;
+wire tr2mamemwr_frm_chk4mpa1maddr0 = cur_chk4mpa1maddr0_state & spu_maaddr_mpa1maddr0 & len_neqz;
+
+wire mald_memwen = ( tr2mamemwr_frm_wait4ln |
+		     tr2mamemwr_frm_chk4mpa1maddr0) & len_neqz;
+
+// added this delay for the Parity Gen. added extra cycle.
+wire mald_memwen_dly;
+dffr_s    #(1) wen_dly_ff (
+        .din(mald_memwen) ,
+        .q(mald_memwen_dly),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+
+assign nxt_mamemwr_state = ( mald_memwen_dly );
+
+assign local_stxa_abort = mald_memwen_dly & spu_mactl_stxa_force_abort;
+
+// --------------------------------------------------------------
+//  transition to chk4mpa1maddr0 state.
+
+assign nxt_chk4mpa1maddr0_state = (
+                        (cur_mamemwr_state) );
+
+
+// --------------------------------------------------------------
+// **************************************************************
+// --------------------------------------------------------------
+assign spu_mald_memwen = nxt_mamemwr_state;
+
+assign spu_mald_maaddr_addrinc = cur_mamemwr_state;
+
+assign spu_mald_mpa_addrinc = cur_mamemwr_state ;
+
+assign spu_mald_force_mpa_add16 = 1'b0 ;
+
+assign spu_mald_ldreq = cur_ldreq_state ;
+
+
+
+endmodule
Index: /trunk/T1-CPU/spu/spu.v
===================================================================
--- /trunk/T1-CPU/spu/spu.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu.v	(revision 6)
@@ -0,0 +1,604 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Stream Processing Unit for Sparc Core   
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+
+module spu (///*AUTOARG*/ 
+short_si0,short_so0,short_si1,short_so1,si1,so1,
+
+
+/*outputs*/
+
+spu_ifu_err_addr_w2,
+spu_ifu_mamem_err_w1,
+spu_ifu_int_w2,
+spu_lsu_ldxa_illgl_va_w2,
+
+spu_ifu_ttype_w2,
+spu_ifu_ttype_vld_w2,
+spu_ifu_ttype_tid_w2,
+
+spu_lsu_ldst_pckt,
+
+spu_mul_req_vld,
+spu_mul_areg_shf,
+spu_mul_areg_rst,
+spu_mul_acc,
+spu_mul_op1_data,
+spu_mul_op2_data,
+
+spu_lsu_ldxa_data_w2,
+spu_lsu_ldxa_data_vld_w2,
+spu_lsu_ldxa_tid_w2,
+
+spu_lsu_stxa_ack,
+spu_lsu_stxa_ack_tid,
+
+spu_mul_mulres_lshft,
+
+spu_tlu_rsrv_illgl_m,
+
+spu_ifu_corr_err_w2,
+spu_ifu_unc_err_w1,
+spu_lsu_unc_error_w2,
+
+
+/*inputs*/
+const_cpuid,
+cpx_spu_data_cx,
+lsu_spu_ldst_ack,
+
+mul_spu_ack,
+mul_spu_shf_ack,
+mul_data_out,
+
+lsu_spu_asi_state_e,
+ifu_spu_inst_vld_w,
+ifu_lsu_ld_inst_e,
+ifu_lsu_st_inst_e,
+ifu_lsu_alt_space_e,
+ifu_tlu_thrid_e,
+exu_lsu_ldst_va_e,
+exu_lsu_rs3_data_e,
+
+ifu_spu_trap_ack,
+
+lsu_spu_stb_empty,
+lsu_spu_strm_ack_cmplt,
+
+lsu_spu_early_flush_g,
+tlu_spu_flush_w,
+ifu_spu_flush_w,
+
+exu_spu_rsrv_data_e,
+ifu_spu_nceen,
+
+lsu_mamem_mrgn,
+mem_write_disable,
+mux_drive_disable,
+mem_bypass,
+
+se,
+sehold,
+grst_l,
+arst_l,
+rclk) ;  
+
+
+// ------------------------------------------------------------------
+///*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+input si1,short_si0,short_si1,se;
+input                   rclk ;
+input                   grst_l ;
+input                   arst_l ;
+input                   mem_write_disable ;
+input                   mux_drive_disable ;
+input                   sehold ;
+
+input tlu_spu_flush_w;
+input ifu_spu_flush_w;
+
+input [2:0]     const_cpuid;
+
+input [134:0] cpx_spu_data_cx;
+input  lsu_spu_ldst_ack;
+
+input mul_spu_ack;
+input mul_spu_shf_ack;
+input [63:0] mul_data_out;
+
+input [7:0] lsu_spu_asi_state_e;
+input ifu_spu_inst_vld_w;
+input ifu_lsu_ld_inst_e;
+input ifu_lsu_st_inst_e;
+input ifu_lsu_alt_space_e;
+input [1:0] ifu_tlu_thrid_e;
+input [7:0] exu_lsu_ldst_va_e;
+input [63:0] exu_lsu_rs3_data_e;
+
+input  ifu_spu_trap_ack;
+
+input [3:0] lsu_spu_stb_empty;
+input [1:0] lsu_spu_strm_ack_cmplt;
+
+input lsu_spu_early_flush_g;
+
+input [2:0] exu_spu_rsrv_data_e;
+
+input [3:0] ifu_spu_nceen;
+input [3:0] lsu_mamem_mrgn;
+
+input mem_bypass;
+
+// End of automatics
+// ------------------------------------------------------------------
+///*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+
+output so1,short_so1,short_so0;
+output spu_ifu_ttype_w2;
+output spu_ifu_ttype_vld_w2;
+output [1:0] spu_ifu_ttype_tid_w2;
+
+// ------------------------------
+
+output [123:0] spu_lsu_ldst_pckt;
+
+output spu_mul_req_vld;
+output spu_mul_areg_shf;
+output spu_mul_areg_rst;
+output spu_mul_acc;
+output [63:0] spu_mul_op1_data;
+output [63:0] spu_mul_op2_data;
+output [63:0] spu_lsu_ldxa_data_w2;
+output spu_lsu_ldxa_data_vld_w2;
+output [1:0] spu_lsu_ldxa_tid_w2;
+
+output spu_lsu_stxa_ack;
+output [1:0] spu_lsu_stxa_ack_tid;
+
+output spu_mul_mulres_lshft;
+
+output spu_tlu_rsrv_illgl_m;
+
+output spu_ifu_corr_err_w2;
+output spu_ifu_unc_err_w1;
+output spu_lsu_unc_error_w2;
+
+output [39:4] spu_ifu_err_addr_w2;
+output spu_ifu_mamem_err_w1;
+output spu_ifu_int_w2;
+output spu_lsu_ldxa_illgl_va_w2;
+
+
+// End of automatics
+// ------------------------------------------------------------------
+
+
+// ------------------------------------------------------------------
+// ------------------------------------------------------------------
+
+wire [123:0] spu_lsu_ldst_pckt;
+wire [65:0] spu_madp_evedata;
+wire [65:0] spu_madp_odddata;
+wire [7:1] spu_maaddr_memindx;
+wire [3:0] spu_maaddr_mamem_eveodd_sel_l;
+wire [2:0] spu_mactl_memmxsel_l;
+wire [38:3] spu_madp_mpa_addr_out;
+
+wire [63:0] spu_mul_op1_data;
+wire [63:0] spu_mul_op2_data;
+
+wire [3:0] spu_mared_data_sel_l;
+wire [63:0] spu_madp_store_data;
+
+
+// ------------------------------------
+
+wire [4:0] spu_maaddr_mpa_incr_val;
+
+// --------------------------------------------------------------
+
+wire spu_madp_perr;
+
+wire [2:0]			spu_mamul_oprnd1_mxsel_l;	// From spu_ctl of spu_ctl.v
+
+wire [1:0]		spu_ctl_ldxa_tid_w2;
+
+
+wire [3:0] spu_lsurpt1_stb_empty;
+
+
+   wire spu_ctl_ldxa_data_vld_w2,
+        spu_mactl_madp_parflop_wen,
+        spu_mactl_force_perr,
+        spu_mactl_memeve_wen,
+        spu_mactl_memodd_wen,
+        spu_mactl_mamem_ren,
+        spu_mactl_mamem_wen,
+        spu_mamul_oprnd1_wen,
+        spu_mactl_mem_reset_l,
+        spu_madp_m_eq_n,
+        spu_madp_m_lt_n,
+        spu_madp_cout_oprnd_sub_mod,
+        spu_madp_e_eq_one,
+        spu_mamul_oprnd2_wen,
+        spu_mamul_oprnd2_bypass,
+        spu_mared_rdn_wen,
+        spu_mared_cin_oprnd_sub_mod,
+        spu_maexp_e_data_wen,
+        spu_maexp_shift_e,
+        spu_maaddr_mpa_addrinc,
+        spu_maaddr_mpa_wen,
+        spu_mactl_mpa_sel,
+        spu_mactl_ldop,
+        spu_ctl_ldxa_illgl_va_w;
+
+wire [63:0] spu_lsurpt1_rs3_data_g2;
+
+wire [134:0] spu_lsurpt1_cpx_data;
+wire [134:0] spu_lsurpt2_cpx_data;
+
+wire [122:0] spu_lsurpt1_ldst_pckt;
+
+wire [63:0] spu_lsurpt1_ldxa_data;
+
+wire spu_wen_pcx_wen, spu_wen_pcx_7170_sel;
+
+
+wire [1:0] spu_ifu_ttype_tid_w;
+wire spu_lsu_unc_error_w;
+
+wire [65:0] spu_mamem_rd_eve_data;
+wire [65:0] spu_mamem_rd_odd_data;
+wire [122:104] spu_wen_pckt_req;
+
+wire [63:0] spu_madp_ldxa_data;
+wire [1:0] spu_lsu_stxa_ack_tid_ctl;
+
+wire [3:0] spu_mactl_ldxa_data_w_sel_l;
+wire  spu_mactl_ldxa_data_w_select;
+wire  spu_mactl_mpa_wen;
+wire  spu_mactl_maaddr_wen;
+wire  spu_mactl_manp_wen;
+wire  spu_wen_maln_wen;
+wire  [13:0] spu_mactl_mactl_reg;
+wire  [47:0] spu_madp_maaddr_reg;
+
+   wire    scan1_1;
+// End of automatics
+
+
+/****************************************************************************/
+spu_lsurpt spu_lsurpt2 (///*AUTOINST*/
+			
+             // Outputs
+		.spu_lsurpt_ldxa_data_out	(spu_lsu_ldxa_data_w2[63:0]),
+		.spu_lsurpt_ldst_pckt_out	(spu_lsu_ldst_pckt[122:0]),
+                .spu_lsurpt_cpx_data_out	(spu_lsurpt2_cpx_data[134:0]),
+             // Inputs
+		.spu_lsurpt_ldxa_data_in	(spu_lsurpt1_ldxa_data[63:0]),
+		.spu_lsurpt_ldst_pckt_in	(spu_lsurpt1_ldst_pckt[122:0]),
+                .spu_lsurpt_cpx_data_in  	(cpx_spu_data_cx[134:0]));
+
+
+/****************************************************************************/
+
+
+spu_lsurpt1 spu_lsurpt1 (///*AUTOINST*/
+			
+             // Outputs
+                .so                          	(scan1_1),
+		.spu_lsu_ldst_pckt		(spu_lsurpt1_ldst_pckt[122:0]),
+   
+		.spu_lsu_ldxa_data_w2		(spu_lsurpt1_ldxa_data[63:0]),
+		.spu_lsu_ldxa_data_vld_w2	(spu_lsu_ldxa_data_vld_w2),
+		.spu_lsu_ldxa_tid_w2		(spu_lsu_ldxa_tid_w2[1:0]),
+
+
+		.spu_lsu_ldxa_illgl_va_w2	(spu_lsu_ldxa_illgl_va_w2),
+
+		.spu_lsurpt1_rs3_data_g2		(spu_lsurpt1_rs3_data_g2[63:0]),
+
+		.spu_lsurpt1_stb_empty 		(spu_lsurpt1_stb_empty[3:0]),
+
+		.spu_lsurpt_cpx_data_out 	(spu_lsurpt1_cpx_data[134:0]),
+
+		.spu_ifu_ttype_tid_w2		(spu_ifu_ttype_tid_w2[1:0]),
+
+		.spu_ifu_err_addr_w2 		(spu_ifu_err_addr_w2[39:4]),
+
+		 .spu_lsu_unc_error_w2		(spu_lsu_unc_error_w2),
+
+		 .spu_lsu_stxa_ack_tid		(spu_lsu_stxa_ack_tid[1:0]),
+             // Inputs
+		.spu_ifu_ttype_tid_w		(spu_ifu_ttype_tid_w[1:0]),
+		 .spu_lsu_unc_error_w		(spu_lsu_unc_error_w),
+
+		.exu_lsu_rs3_data_e		(exu_lsu_rs3_data_e[63:0]),
+   
+		.spu_ctl_ldxa_illgl_va_w	(spu_ctl_ldxa_illgl_va_w),
+
+		.spu_ldstreq_pcx		({spu_wen_pckt_req[122:104],1'b0,spu_madp_mpa_addr_out[38:3],
+							3'b000,spu_madp_store_data[63:0]}),
+   
+		.spu_madp_ldxa_data		(spu_madp_ldxa_data[63:0]),
+
+		.spu_ctl_ldxa_data_vld_w2	(spu_ctl_ldxa_data_vld_w2),
+		.spu_ctl_ldxa_tid_w2		(spu_ctl_ldxa_tid_w2[1:0]),
+
+		.lsu_spu_stb_empty 		(lsu_spu_stb_empty[3:0]),
+
+		.spu_lsurpt_cpx_data_in 	(spu_lsurpt2_cpx_data[134:0]),
+
+		.spu_wen_pcx_wen		(spu_wen_pcx_wen),
+		.spu_wen_pcx_7170_sel		(spu_wen_pcx_7170_sel),
+
+		 .spu_lsu_stxa_ack_tid_ctl		(spu_lsu_stxa_ack_tid_ctl[1:0]),
+
+                //.tmb_l  			(testmode_l),
+                .se                          	(se),
+                .si                          	(si1),
+		.reset_l			(spu_mactl_mem_reset_l),
+                .rclk                       	(rclk));
+
+/****************************************************************************/
+
+// -------------------------------------------------------------------------
+// ------------------------ MA STUFF ---------------------------------------
+bw_r_idct spu_mamem (
+                 .rdtag_w3_y    ({spu_mamem_rd_eve_data[65],spu_mamem_rd_eve_data[63:32]}),
+                 .rdtag_w2_y    ({spu_mamem_rd_eve_data[64],spu_mamem_rd_eve_data[31:0]}),
+                 .rdtag_w1_y    ({spu_mamem_rd_odd_data[65],spu_mamem_rd_odd_data[63:32]}),
+                 .rdtag_w0_y    ({spu_mamem_rd_odd_data[64],spu_mamem_rd_odd_data[31:0]}),
+
+                 .wrtag_w3_y       ({spu_madp_evedata[65],spu_madp_evedata[63:32]}),
+                 .wrtag_w2_y       ({spu_madp_evedata[64],spu_madp_evedata[31:0]}),
+                 .wrtag_w1_y       ({spu_madp_odddata[65],spu_madp_odddata[63:32]}),
+                 .wrtag_w0_y       ({spu_madp_odddata[64],spu_madp_odddata[31:0]}),
+
+                 /*AUTOINST*/
+                 // Outputs
+                 .so                    (short_so0),
+                 // Inputs
+		 .rst_tri_en			(mem_write_disable),
+                 .rclk                  (rclk),
+                 .se                    (se),
+                 .si                    (short_si0),
+                 .reset_l               (arst_l),       
+                 .sehold                (sehold),
+                 .index0_x              (spu_maaddr_memindx[7:1]), 
+                 .index1_x              (7'b0000000), 
+                 .index_sel_x           (1'b0), 
+                 .dec_wrway_x               ({spu_mactl_memeve_wen,spu_mactl_memeve_wen,
+                                                spu_mactl_memodd_wen,spu_mactl_memodd_wen}), 
+                 .rdreq_x               (spu_mactl_mamem_ren),      
+                 .wrreq_x               (spu_mactl_mamem_wen),      
+                 .adj                   (lsu_mamem_mrgn[3:0]));   
+   
+
+// -------------------------------------------------------------------------
+spu_madp spu_madp (///*AUTOINST*/
+             // Outputs
+		.spu_madp_evedata		(spu_madp_evedata[65:0]),
+		.spu_madp_odddata		(spu_madp_odddata[65:0]),
+
+
+		.spu_mul_op2_data		(spu_mul_op2_data[63:0]),
+
+		.spu_madp_m_eq_n		(spu_madp_m_eq_n),
+		.spu_madp_m_lt_n		(spu_madp_m_lt_n),
+
+		.spu_madp_store_data		(spu_madp_store_data[63:0]),
+
+		.spu_madp_cout_oprnd_sub_mod	(spu_madp_cout_oprnd_sub_mod),
+
+		.spu_madp_e_eq_one		(spu_madp_e_eq_one),
+
+		.spu_madp_mpa_addr_out		(spu_madp_mpa_addr_out[38:3]),
+
+		.spu_madp_perr			(spu_madp_perr),
+
+                .spu_mul_op1_data               (spu_mul_op1_data[63:0]),
+
+		.spu_madp_ldxa_data		(spu_madp_ldxa_data[63:0]),
+
+		.spu_madp_maaddr_reg		(spu_madp_maaddr_reg[47:0]),
+
+		.so				(short_so1),
+
+             // Inputs
+                .spu_mamul_oprnd1_mxsel_l       (spu_mamul_oprnd1_mxsel_l[2:0]),
+                .spu_mamul_oprnd1_wen           (spu_mamul_oprnd1_wen),
+                .spu_maaddr_mamem_eveodd_sel_l                   (spu_maaddr_mamem_eveodd_sel_l[3:0]),
+
+                .spu_mamem_rd_eve_data                  (spu_mamem_rd_eve_data[65:0]),
+                .spu_mamem_rd_odd_data                  (spu_mamem_rd_odd_data[65:0]),
+
+
+		.mul_data_out			(mul_data_out[63:0]),
+
+
+
+		.spu_mamul_oprnd2_wen		(spu_mamul_oprnd2_wen),
+		.spu_mamul_oprnd2_bypass	(spu_mamul_oprnd2_bypass),
+
+		.spu_mared_data_sel_l		(spu_mared_data_sel_l[3:0]),
+		.spu_mared_rdn_wen		(spu_mared_rdn_wen),
+		.spu_mared_cin_oprnd_sub_mod	(spu_mared_cin_oprnd_sub_mod),
+ 
+		.spu_maexp_e_data_wen		(spu_maexp_e_data_wen),
+		.spu_maexp_shift_e		(spu_maexp_shift_e),
+
+		.spu_maaddr_mpa_addrinc		(spu_maaddr_mpa_addrinc),
+		.spu_maaddr_mpa_incr_val	(spu_maaddr_mpa_incr_val[4:0]),
+		.spu_mactl_mpa_sel		(spu_mactl_mpa_sel),
+
+		.spu_mactl_ldop			(spu_mactl_ldop),
+
+		.spu_mactl_madp_parflop_wen	(spu_mactl_madp_parflop_wen),
+
+		.spu_mactl_memmxsel_l		(spu_mactl_memmxsel_l[2:0]),
+
+                .spu_mactl_force_perr     	(spu_mactl_force_perr),
+
+		.spu_maaddr_mpa_wen		(spu_maaddr_mpa_wen),
+
+                .spu_mactl_mactl_reg            (spu_mactl_mactl_reg[13:0]),
+
+                .spu_mactl_ldxa_data_w_sel_l    (spu_mactl_ldxa_data_w_sel_l[3:0]),
+                .spu_mactl_ldxa_data_w_select   (spu_mactl_ldxa_data_w_select),
+
+                .spu_mactl_mpa_wen              (spu_mactl_mpa_wen),
+                .spu_mactl_maaddr_wen           (spu_mactl_maaddr_wen),
+                .spu_mactl_manp_wen             (spu_mactl_manp_wen),
+                .exu_spu_st_rs3_data_g2          (spu_lsurpt1_rs3_data_g2[63:0]),
+                .spu_wen_maln_wen          (spu_wen_maln_wen),
+                .lsu_spu_vload_data          (spu_lsurpt1_cpx_data[127:0]),
+
+
+                .se     			(se),
+                .si     			(short_si1),
+                .sehold     			(sehold),
+
+
+                .rclk                       (rclk));
+
+//---------------------------------------------------
+//--------------SPU CONTROL BLOCK--------------------
+spu_ctl spu_ctl (		     
+		     /*AUTOINST*/
+		 // Outputs
+
+		 .spu_wen_ldst_pcx_vld		(spu_lsu_ldst_pckt[123]),
+		 .spu_mul_mulres_lshft		(spu_mul_mulres_lshft),
+		.spu_maaddr_mpa_wen		(spu_maaddr_mpa_wen),
+		 .spu_mamul_oprnd2_bypass	(spu_mamul_oprnd2_bypass),
+		 .spu_mactl_ldop		(spu_mactl_ldop),
+		 .so				(so1),
+		 .spu_ifu_ttype_tid_w		(spu_ifu_ttype_tid_w[1:0]),
+		 .spu_ifu_ttype_vld_w2		(spu_ifu_ttype_vld_w2),
+		 .spu_ifu_ttype_w2		(spu_ifu_ttype_w2),
+		 .spu_lsu_ldxa_data_vld_w2	(spu_ctl_ldxa_data_vld_w2),
+		 .spu_lsu_ldxa_tid_w2		(spu_ctl_ldxa_tid_w2[1:0]),
+		 .spu_lsu_stxa_ack		(spu_lsu_stxa_ack),
+		 .spu_lsu_stxa_ack_tid		(spu_lsu_stxa_ack_tid_ctl[1:0]),
+		 .spu_maaddr_memindx		(spu_maaddr_memindx[7:1]),
+                .spu_maaddr_mamem_eveodd_sel_l                   (spu_maaddr_mamem_eveodd_sel_l[3:0]),
+
+		 .spu_maaddr_mpa_addrinc	(spu_maaddr_mpa_addrinc),
+		 .spu_maaddr_mpa_incr_val	(spu_maaddr_mpa_incr_val[4:0]),
+		 .spu_mactl_force_perr		(spu_mactl_force_perr),
+		 .spu_mactl_madp_parflop_wen	(spu_mactl_madp_parflop_wen),
+		 .spu_mactl_mamem_ren		(spu_mactl_mamem_ren),
+		 .spu_mactl_mamem_wen		(spu_mactl_mamem_wen),
+		 .spu_mactl_memeve_wen		(spu_mactl_memeve_wen),
+		 .spu_mactl_memmxsel_l		(spu_mactl_memmxsel_l[2:0]),
+		 .spu_mactl_memodd_wen		(spu_mactl_memodd_wen),
+		 .spu_mactl_mpa_sel		(spu_mactl_mpa_sel),
+		 .spu_maexp_e_data_wen		(spu_maexp_e_data_wen),
+		 .spu_maexp_shift_e		(spu_maexp_shift_e),
+		.spu_mamul_oprnd1_mxsel_l	(spu_mamul_oprnd1_mxsel_l[2:0]),
+		.spu_mamul_oprnd1_wen		(spu_mamul_oprnd1_wen),
+		 .spu_mamul_oprnd2_wen		(spu_mamul_oprnd2_wen),
+		 .spu_mared_cin_oprnd_sub_mod	(spu_mared_cin_oprnd_sub_mod),
+		 .spu_mared_data_sel_l		(spu_mared_data_sel_l[3:0]),
+		 .spu_mared_rdn_wen		(spu_mared_rdn_wen),
+		 .spu_mul_acc			(spu_mul_acc),
+		 .spu_mul_areg_rst		(spu_mul_areg_rst),
+		 .spu_mul_areg_shf		(spu_mul_areg_shf),
+		 .spu_mul_req_vld		(spu_mul_req_vld),
+		 .spu_tlu_rsrv_illgl_m		(spu_tlu_rsrv_illgl_m),
+
+		 .spu_ifu_corr_err_w2		(spu_ifu_corr_err_w2),
+		 .spu_ifu_unc_err_w		(spu_ifu_unc_err_w1),
+		 .spu_lsu_unc_error_w		(spu_lsu_unc_error_w),
+
+		 .spu_ifu_mamem_err_w		(spu_ifu_mamem_err_w1),
+		 .spu_ifu_int_w2		(spu_ifu_int_w2),
+		 .spu_lsu_ldxa_illgl_va_w2	(spu_ctl_ldxa_illgl_va_w),
+
+		 .spu_mactl_mem_reset_l		(spu_mactl_mem_reset_l),
+
+		 .spu_mactl_ldxa_data_w_sel_l		(spu_mactl_ldxa_data_w_sel_l[3:0]),
+		 .spu_mactl_ldxa_data_w_select		(spu_mactl_ldxa_data_w_select),
+		 .spu_mactl_mpa_wen		(spu_mactl_mpa_wen),
+		 .spu_mactl_maaddr_wen		(spu_mactl_maaddr_wen),
+		 .spu_mactl_manp_wen		(spu_mactl_manp_wen),
+		 .spu_wen_maln_wen		(spu_wen_maln_wen),
+		 .spu_mactl_mactl_reg		(spu_mactl_mactl_reg[13:0]),
+		 // Inputs
+		.spu_madp_maaddr_reg		(spu_madp_maaddr_reg[47:0]),
+
+
+		 .sehold			(sehold),
+
+		 .mem_bypass			(mem_bypass),
+		 .mux_drive_disable		(mux_drive_disable),
+		 .tlu_spu_flush_w		(tlu_spu_flush_w),
+		 .ifu_spu_flush_w		(ifu_spu_flush_w),
+
+		.lsu_spu_stb_empty		(spu_lsurpt1_stb_empty[3:0]),
+
+		 .lsu_spu_strm_ack_cmplt	(lsu_spu_strm_ack_cmplt[1:0]),
+
+		 .cpx_spu_data_cx		(spu_lsurpt1_cpx_data[134:128]),
+		 .spu_wen_pckt_req		(spu_wen_pckt_req[122:104]),
+		 .lsu_spu_ldst_ack		(lsu_spu_ldst_ack),
+		 .ifu_spu_trap_ack		(ifu_spu_trap_ack),
+		 .lsu_tlu_st_rs3_data_g		(spu_lsurpt1_rs3_data_g2[13:0]),
+		 .spu_lsurpt1_rsrv_data_e	(exu_spu_rsrv_data_e[2:0]),
+		 .spu_madp_mpa_addr		(spu_madp_mpa_addr_out[3:3]),
+		 .mul_data_out			(mul_data_out[0:0]),
+		 .rclk				(rclk),
+		 .exu_lsu_ldst_va_e		(exu_lsu_ldst_va_e[7:0]),
+		 .ifu_lsu_alt_space_e		(ifu_lsu_alt_space_e),
+		 .ifu_lsu_ld_inst_e		(ifu_lsu_ld_inst_e),
+		 .ifu_lsu_st_inst_e		(ifu_lsu_st_inst_e),
+		 .ifu_spu_inst_vld_w		(ifu_spu_inst_vld_w),
+		 .ifu_tlu_thrid_e		(ifu_tlu_thrid_e[1:0]),
+		 .lsu_spu_asi_state_e		(lsu_spu_asi_state_e[7:0]),
+		 .mul_spu_ack			(mul_spu_ack),
+		 .mul_spu_shf_ack		(mul_spu_shf_ack),
+		 .grst_l			(grst_l),
+		 .arst_l			(arst_l),
+		 .se				(se),
+		 .si				(scan1_1),
+		.spu_wen_pcx_wen		(spu_wen_pcx_wen),
+		.spu_wen_pcx_7170_sel		(spu_wen_pcx_7170_sel),
+		.cpuid				(const_cpuid[2:0]),
+		.ifu_spu_nceen			(ifu_spu_nceen[3:0]),
+
+		 .spu_madp_cout_oprnd_sub_mod	(spu_madp_cout_oprnd_sub_mod),
+		 .spu_madp_e_eq_one		(spu_madp_e_eq_one),
+		 .spu_madp_m_eq_n		(spu_madp_m_eq_n),
+		 .spu_madp_m_lt_n		(spu_madp_m_lt_n),
+		 .spu_madp_perr		(spu_madp_perr),
+		 .lsu_spu_early_flush_g		(lsu_spu_early_flush_g));
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_mactl.v
===================================================================
--- /trunk/T1-CPU/spu/spu_mactl.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_mactl.v	(revision 6)
@@ -0,0 +1,1624 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_mactl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    control for MA. 
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+
+module spu_mactl (
+
+/*outputs*/
+spu_mactl_iss_pulse,
+spu_mactl_mpa_wen,
+spu_mactl_maaddr_wen,
+spu_mactl_manp_wen,
+
+spu_mactl_ldop,
+spu_mactl_stop,
+spu_mactl_mulop,
+spu_mactl_redop,
+spu_mactl_expop,
+
+spu_mactl_memmxsel_l,
+
+spu_mactl_memeve_wen,
+spu_mactl_memodd_wen,
+
+spu_mactl_mamem_ren,
+spu_mactl_mamem_wen,
+
+spu_mactl_iss_pulse_dly,
+
+spu_mactl_ldxa_data_w_sel_l,
+spu_mactl_ldxa_data_w_select,
+
+spu_mactl_mpa_sel,
+
+spu_mactl_madp_parflop_wen,
+
+spu_lsu_ldxa_data_vld_w2,
+spu_lsu_ldxa_tid_w2,
+spu_lsu_stxa_ack,
+
+spu_lsu_stxa_ack_tid,
+
+spu_mactl_ldxa_mactl_reg,
+
+spu_mactl_force_perr,
+
+spu_ifu_ttype_w2,
+spu_ifu_ttype_vld_w2,
+spu_ifu_ttype_tid_w,
+
+spu_tlu_rsrv_illgl_m,
+
+
+spu_ifu_corr_err_w2,
+spu_ifu_unc_err_w,
+spu_lsu_unc_error_w,
+
+spu_ifu_mamem_err_w,
+spu_ifu_int_w2,
+spu_lsu_ldxa_illgl_va_w2,
+
+
+spu_mactl_streq,
+
+spu_mactl_ctl_reset,
+spu_mactl_mem_reset_l,
+
+
+spu_mactl_ma_kill_op,
+
+spu_mactl_stxa_force_abort,
+
+spu_mactl_mactl_len,
+
+spu_mactl_uncerr_rst,
+
+spu_mactl_iss_pulse_pre,
+
+spu_mactl_pcx_wen,
+spu_mactl_pcx_7170_sel,
+
+spu_mactl_perr_set,
+
+/*inputs*/
+
+spu_mast_streq,
+
+
+lsu_spu_stb_empty,
+
+spu_maaddr_maaddr_0,
+spu_maaddr_mpa_3,
+
+spu_mald_memwen,
+spu_mamul_memwen,
+spu_mamul_memren,
+spu_maaeqb_memwen,
+spu_maaeqb_memren,
+
+spu_mared_memren,
+spu_mared_memwen,
+
+spu_mast_memren,
+
+lsu_spu_early_flush_g,
+tlu_spu_flush_w,
+ifu_spu_flush_w,
+ifu_spu_inst_vld_w,
+lsu_spu_asi_state_e,
+ifu_lsu_ld_inst_e,
+ifu_lsu_st_inst_e,
+ifu_lsu_alt_space_e,
+ifu_tlu_thrid_e,
+exu_lsu_ldst_va_e,
+
+spu_mald_done_set,
+spu_mast_done_set,
+spu_mared_done_set,
+spu_maexp_done_set,
+
+spu_maexp_memren,
+
+spu_maaddr_nooddwr_on_leneq1,
+
+spu_mared_not_idle,
+spu_mamul_oprnd2_bypass,
+
+spu_madp_perr,
+
+ma_ctl_reg_data,
+
+ifu_spu_trap_ack,
+
+
+spu_mactl_rsrv_data_e,
+
+
+
+spu_maaddr_len_cntr,
+
+
+spu_wen_ma_unc_err,
+spu_wen_ma_cor_err,
+
+ifu_spu_nceen,
+
+lsu_spu_ldst_ack,
+mux_drive_disable,
+
+
+se,
+grst_l,
+arst_l,
+rclk);
+
+// ---------------------------------------------
+input grst_l;
+input arst_l;
+input rclk;
+input se;
+
+
+input spu_mald_memwen;
+input spu_mamul_memwen;
+input spu_mamul_memren;
+input spu_maaeqb_memwen;
+input spu_maaeqb_memren;
+
+input spu_maaddr_maaddr_0;
+input  spu_maaddr_mpa_3;
+
+input  spu_mared_memren;
+input  spu_mared_memwen;
+
+input  spu_mast_memren;
+
+
+input lsu_spu_early_flush_g;
+input tlu_spu_flush_w;
+input ifu_spu_flush_w;
+input ifu_spu_inst_vld_w;
+input [7:0] lsu_spu_asi_state_e;
+input ifu_lsu_ld_inst_e;
+input ifu_lsu_st_inst_e;
+input ifu_lsu_alt_space_e;
+input [1:0] ifu_tlu_thrid_e;
+input [7:0] exu_lsu_ldst_va_e;
+
+input spu_mald_done_set;
+input spu_mast_done_set;
+input spu_mared_done_set;
+input spu_maexp_done_set;
+
+input spu_maexp_memren;
+
+input spu_maaddr_nooddwr_on_leneq1;
+
+input spu_mared_not_idle;
+input spu_mamul_oprnd2_bypass;
+
+input spu_madp_perr;
+
+input [13:0] ma_ctl_reg_data;
+
+input ifu_spu_trap_ack;
+
+
+input [2:0] spu_mactl_rsrv_data_e;
+
+
+
+input [3:0] lsu_spu_stb_empty;
+
+input spu_mast_streq;
+
+input [5:0] spu_maaddr_len_cntr;
+
+
+
+input spu_wen_ma_unc_err;
+input spu_wen_ma_cor_err;
+
+
+input [3:0] ifu_spu_nceen;
+
+input lsu_spu_ldst_ack;
+input mux_drive_disable;
+
+// ---------------------------------------------
+
+output spu_mactl_iss_pulse;
+
+output spu_mactl_mpa_wen;
+output spu_mactl_maaddr_wen;
+output spu_mactl_manp_wen;
+
+output spu_mactl_ldop;
+output spu_mactl_stop;
+output spu_mactl_mulop;
+output spu_mactl_redop;
+output spu_mactl_expop;
+
+output [2:0] spu_mactl_memmxsel_l;
+
+output spu_mactl_memeve_wen;
+output spu_mactl_memodd_wen;
+
+output spu_mactl_mamem_ren;
+output spu_mactl_mamem_wen;
+
+output spu_mactl_iss_pulse_dly;
+
+output [3:0] spu_mactl_ldxa_data_w_sel_l;
+output spu_mactl_ldxa_data_w_select;
+
+output spu_mactl_mpa_sel;
+
+output spu_mactl_madp_parflop_wen;
+
+output spu_lsu_ldxa_data_vld_w2;
+output [1:0] spu_lsu_ldxa_tid_w2;
+output spu_lsu_stxa_ack;
+output [1:0] spu_lsu_stxa_ack_tid;
+
+
+output [13:0] spu_mactl_ldxa_mactl_reg;
+
+output spu_mactl_force_perr;
+
+output spu_ifu_ttype_w2;
+output spu_ifu_ttype_vld_w2;
+output [1:0] spu_ifu_ttype_tid_w;
+
+output spu_tlu_rsrv_illgl_m;
+
+
+output spu_ifu_corr_err_w2;
+output spu_ifu_unc_err_w;
+output spu_lsu_unc_error_w;
+
+output spu_ifu_mamem_err_w;
+output spu_ifu_int_w2;
+output spu_lsu_ldxa_illgl_va_w2;
+
+output spu_mactl_streq;
+
+output spu_mactl_ctl_reset;
+output spu_mactl_mem_reset_l;
+
+
+output spu_mactl_ma_kill_op;
+
+output spu_mactl_stxa_force_abort;
+
+output [5:0] spu_mactl_mactl_len;
+
+output spu_mactl_uncerr_rst;
+
+output spu_mactl_iss_pulse_pre;
+
+output spu_mactl_pcx_wen;
+output spu_mactl_pcx_7170_sel;
+
+output spu_mactl_perr_set;
+
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+wire [1:0] spu_mactl_ldxa_tid_g2;
+wire [1:0] spu_mactl_ldxa_tid_w;
+wire spu_mactl_clr_busy_bit;
+wire spu_mactl_busy_bit_wen,spu_mactl_busy_bit_wrdata;
+
+wire clr_busy_bit_when_masync;
+
+wire spu_mactl_mactl_wen;
+wire ma_op_done;
+wire spu_mactl_rsrv_done_set;
+wire spu_mactl_rsrv_wrdetect_m;
+wire spu_mactl_perr_set_int;
+wire spu_mactl_disable_stxa_ack_g2;
+wire wait_4stb_tobecome_empty;
+wire waiting_4stb_tobecome_empty;
+wire spu_lsu_stxa_ack_q_buf;
+
+wire wait4_trap_ack_set;
+
+wire spu_mactl_nceen_4int;
+
+wire spu_ifu_mamem_unc_w2;
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+/*
+dffrl_async ma_mem_rst (.din (grst_l),
+                        .q   (spu_mactl_mem_reset_l),
+                        .clk (rclk),
+                        .rst_l (arst_l), .se(se), .si(), .so());
+*/
+
+wire spu_mactl_ctl_rst_l;
+dffrl_async ma_enc_rst (.din (grst_l),
+                        .q   (spu_mactl_ctl_rst_l),
+                        .clk (rclk),
+                        .rst_l (arst_l), .se(se), .si(), .so());
+
+
+
+wire spu_mactl_ctl_rst_local = ~spu_mactl_ctl_rst_l;
+
+assign spu_mactl_ctl_reset = ~spu_mactl_ctl_rst_l;
+   
+assign spu_mactl_mem_reset_l = ~spu_mactl_ctl_rst_local;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire spu_mactl_stxa_2ctl_reg;
+wire stxa_2ctl_reg = spu_mactl_stxa_2ctl_reg;
+wire ma_op_complete, spu_mactl_done;
+wire ma_op_complete_mask;
+wire [1:0] spu_thrid_m,spu_thrid_g;
+wire [1:0] spu_thrid_g2;
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+
+wire wait4_trap_ack_stxa2ctl = wait4_trap_ack_set & stxa_2ctl_reg;
+
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+// -----------------------------------------------------------------
+wire  state_reset = spu_mactl_ctl_rst_local | spu_mactl_done ;
+// -------------------------------------------------------------------------
+
+/*************
+state
+-----
+00	idle
+01	operation inprogress
+10	wait state
+11	abort
+**************/
+dff_s    #(1) state_00_ff (
+        .din(nxt_state_00) ,
+        .q(cur_state_00),
+        .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) state_01_ff (
+        .din(nxt_state_01) ,
+        .q(cur_state_01),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) state_10_ff (
+        .din(nxt_state_10) ,
+        .q(cur_state_10),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+
+dffr_s  #(1) state_11_ff (
+        .din(nxt_state_11) ,
+        .q(cur_state_11),
+        .rst(state_reset), .clk (rclk), .se(se), .si(), .so());
+// -------------------------------------------------------------------------
+// transition to 00 state.
+
+assign spu_mactl_done = cur_state_01 & ~stxa_2ctl_reg & ma_op_complete_mask;
+
+assign  nxt_state_00 = (
+                         state_reset | spu_mactl_done |
+                         (cur_state_00 & ~stxa_2ctl_reg ));
+
+// -------------------------------------------------------------------------
+// transition to 01 state.
+
+wire tr2state01_frm_state00 = cur_state_00 & stxa_2ctl_reg & ~wait_4stb_tobecome_empty &
+								~wait4_trap_ack_stxa2ctl; 
+wire tr2state01_frm_state10 = cur_state_10 & ~(stxa_2ctl_reg | waiting_4stb_tobecome_empty |
+								wait4_trap_ack_set); 
+
+assign  nxt_state_01 = (
+			 tr2state01_frm_state00 |
+			 tr2state01_frm_state10 |
+                         (cur_state_01 & ~stxa_2ctl_reg & ~ma_op_complete_mask));
+
+// -------------------------------------------------------------------------
+// transition to 10 state.
+
+wire tr2state10_frm_state11 = cur_state_11 & ~stxa_2ctl_reg & ma_op_complete_mask; 
+wire tr2state10_frm_state10 = cur_state_10 & (stxa_2ctl_reg | waiting_4stb_tobecome_empty |
+								wait4_trap_ack_set); 
+wire tr2state10_frm_state00 = cur_state_00 & (wait_4stb_tobecome_empty | wait4_trap_ack_stxa2ctl); 
+
+assign  nxt_state_10 = (
+                         tr2state10_frm_state00 | tr2state10_frm_state11 | tr2state10_frm_state10 );
+
+// -------------------------------------------------------------------------
+// transition to 11 state.
+
+wire tr2state11_frm_state01 = cur_state_01 & stxa_2ctl_reg; 
+wire tr2state11_frm_state11 = cur_state_11 & (stxa_2ctl_reg | ~ma_op_complete_mask); 
+
+assign  nxt_state_11 = (
+                         tr2state11_frm_state11 | tr2state11_frm_state01 );
+
+
+// -------------------------------------------------------------------------
+
+assign spu_mactl_mactl_wen = tr2state01_frm_state00 | tr2state01_frm_state10;
+
+wire spu_mactl_mactl_iss_mxsel = tr2state01_frm_state00;
+wire spu_mactl_mactl_wen_a_reg = stxa_2ctl_reg;
+
+
+wire tr2state01_frm_state00_q,tr2state01_frm_state00_qq;
+dff_s  #(2) tr2state01_frm_state00_dly_ff (
+        .din({tr2state01_frm_state00,tr2state01_frm_state00_q}) ,
+        .q({tr2state01_frm_state00_q,tr2state01_frm_state00_qq}),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire tr2state01_frm_state10_q,tr2state01_frm_state10_qq;
+dff_s  #(2) tr2state01_frm_state10_dly_ff (
+        .din({tr2state01_frm_state10,tr2state01_frm_state10_q}) ,
+        .q({tr2state01_frm_state10_q,tr2state01_frm_state10_qq}),
+        .clk (rclk), .se(se), .si(), .so());
+
+// since there is a 2 cycle delay between stxa to ctlreg and issue signal getting
+// asserted and clear of all *_done_set signals. Need to mask out the qualification
+// with ma_complete for these 2 cycles.
+assign ma_op_complete_mask = ma_op_complete & ~(tr2state01_frm_state00_q |
+				tr2state01_frm_state00_qq) & ~(tr2state01_frm_state10_q |
+                                tr2state01_frm_state10_qq);
+
+assign spu_mactl_stxa_force_abort = cur_state_11;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// 				CTL REG
+// -------------------------------------------------------------------------
+wire [13:0] spu_mactl_mactl_reg;
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire [13:0] spu_mactl_young_ctlreg_data;
+wire [13:0] spu_mactl_ctlreg_data_mxout;
+// wire [1:0] old_ctlreg_issue_tid;
+wire [1:0] young_ctlreg_issue_tid;
+wire [1:0] ctlreg_tid_4cmp_2ldxa_tid;
+wire ldxa_tid_match_ctlreg_tid_g2;
+
+dffre_s  #(16) young_ctl_reg_ff (
+        .din({spu_thrid_g2[1:0], ma_ctl_reg_data[13:0]}) ,
+        .q({young_ctlreg_issue_tid[1:0],spu_mactl_young_ctlreg_data[13:0]}),
+        .en(spu_mactl_mactl_wen_a_reg),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_mactl_ctlreg_data_mxout[13:0] = spu_mactl_mactl_iss_mxsel ? 
+						 ma_ctl_reg_data[13:0] :
+						 spu_mactl_young_ctlreg_data[13:0];
+
+assign spu_mactl_ldxa_mactl_reg[13:0] = (cur_state_01 | cur_state_00) ? spu_mactl_mactl_reg[13:0] :
+							spu_mactl_young_ctlreg_data[13:0];
+
+assign ctlreg_tid_4cmp_2ldxa_tid[1:0] = (cur_state_01 | cur_state_00) ? spu_mactl_mactl_reg[12:11] :
+							spu_mactl_young_ctlreg_data[12:11];
+
+/*
+replaced this with following since an ldxatosync followed and stxa2ctl was getting the incoorect timing
+for compare of tids
+
+assign ldxa_tid_match_ctlreg_tid_g = (ctlreg_tid_4cmp_2ldxa_tid[1:0] == spu_thrid_g[1:0]);
+
+dff_s #(1) ldxa_tid_match_ctl_reg_tid_ff (
+        .din(ldxa_tid_match_ctlreg_tid_g) ,
+        .q(ldxa_tid_match_ctlreg_tid_g2),
+        .clk (rclk), .se(se), .si(), .so()); 
+*/
+
+assign ldxa_tid_match_ctlreg_tid_g2 = (ctlreg_tid_4cmp_2ldxa_tid[1:0] == spu_thrid_g2[1:0]);
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire spu_mactl_busy_bit;
+mux2ds #(1) busy_bit_mx (
+        .in0    (spu_mactl_busy_bit_wrdata),
+        .in1    (spu_mactl_ctlreg_data_mxout[10]),
+        .sel0    (spu_mactl_busy_bit_wen),
+        .sel1    (~spu_mactl_busy_bit_wen),
+        .dout   (spu_mactl_busy_bit));
+
+dffre_s  #(1) ctl_reg_busybit_ff (
+        .din(spu_mactl_busy_bit ) ,
+        .q(spu_mactl_mactl_reg[10]),
+        .en(spu_mactl_busy_bit_wen),
+        .rst(spu_mactl_ctl_rst_local|spu_mactl_clr_busy_bit), .clk (rclk), .se(se), .si(), .so());
+
+///wire ma_is_busy = spu_mactl_mactl_reg[10];
+
+
+/*
+dffre_s  #(9) mactl_reg_ff (
+        .din({spu_mactl_ctlreg_data_mxout[15:11],spu_mactl_ctlreg_data_mxout[9:6]}) ,
+        .q({old_ctlreg_issue_tid[1:0], spu_mactl_mactl_reg[13:11],spu_mactl_mactl_reg[9:6]}),
+        .en(spu_mactl_mactl_wen),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+*/
+
+dffre_s  #(7) mactl_reg_ff (
+        .din({spu_mactl_ctlreg_data_mxout[13:11],spu_mactl_ctlreg_data_mxout[9:6]}) ,
+        .q({spu_mactl_mactl_reg[13:11],spu_mactl_mactl_reg[9:6]}),
+        .en(spu_mactl_mactl_wen),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_mactl_int_set = spu_mactl_mactl_reg[9];
+
+
+
+/////////   NEED TO DO LEN UPDATE AT THE END also
+
+wire [5:0] spu_mactl_len_cntr_mx;
+dffre_s  #(6) mactl_reg_len_ff (
+        .din(spu_mactl_len_cntr_mx[5:0]) ,
+        .q(spu_mactl_mactl_reg[5:0]),
+        .en(spu_mactl_mactl_wen | spu_mast_streq),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_mactl_mactl_len = spu_mactl_mactl_reg[5:0];
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// LEN field update
+
+
+assign spu_mactl_len_cntr_mx[5:0] = (spu_maaddr_len_cntr[5:0] & {6{~spu_mactl_mactl_wen}}) |
+					(spu_mactl_ctlreg_data_mxout[5:0] & {6{spu_mactl_mactl_wen}}) ;
+
+
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// stb empty logic
+
+wire allow_stxaack_waiting4trapack_pulse;
+
+wire spu_thrid_decode_tid0 = ~spu_thrid_g2[1] & ~spu_thrid_g2[0];
+wire spu_thrid_decode_tid1 = ~spu_thrid_g2[1] & spu_thrid_g2[0];
+wire spu_thrid_decode_tid2 = spu_thrid_g2[1] & ~spu_thrid_g2[0];
+wire spu_thrid_decode_tid3 = spu_thrid_g2[1] & spu_thrid_g2[0];
+
+
+wire stb_isempty =
+                                (lsu_spu_stb_empty[0] & spu_thrid_decode_tid0) |
+                                (lsu_spu_stb_empty[1] & spu_thrid_decode_tid1) |
+                                (lsu_spu_stb_empty[2] & spu_thrid_decode_tid2) |
+                                (lsu_spu_stb_empty[3] & spu_thrid_decode_tid3) ;
+
+
+wire wait_4stb_empty_wen = stxa_2ctl_reg & ~stb_isempty;
+wire wait_4stb_empty_rst = (stxa_2ctl_reg & stb_isempty) | 
+				tr2state01_frm_state10 |
+				(allow_stxaack_waiting4trapack_pulse & ~wait_4stb_empty_wen) ;
+
+assign spu_mactl_disable_stxa_ack_g2 = wait_4stb_empty_wen;
+assign wait_4stb_tobecome_empty = wait_4stb_empty_wen;
+
+wire waiting_4stb_empty_set;
+dffre_s #(1) wait_4stb_empty_ff (
+        .din(1'b1) ,
+        .q(waiting_4stb_empty_set),
+        .en(wait_4stb_empty_wen),
+        .rst(spu_mactl_ctl_rst_local | wait_4stb_empty_rst), .clk (rclk), .se(se), .si(), .so()); 
+
+wire young_thrid_decode_tid0 = ~young_ctlreg_issue_tid[1] & ~young_ctlreg_issue_tid[0];
+wire young_thrid_decode_tid1 = ~young_ctlreg_issue_tid[1] & young_ctlreg_issue_tid[0];
+wire young_thrid_decode_tid2 = young_ctlreg_issue_tid[1] & ~young_ctlreg_issue_tid[0];
+wire young_thrid_decode_tid3 = young_ctlreg_issue_tid[1] & young_ctlreg_issue_tid[0];
+
+wire stb_hasbecome_empty_while_instate10 =
+                                (lsu_spu_stb_empty[0] & young_thrid_decode_tid0) |
+                                (lsu_spu_stb_empty[1] & young_thrid_decode_tid1) |
+                                (lsu_spu_stb_empty[2] & young_thrid_decode_tid2) |
+                                (lsu_spu_stb_empty[3] & young_thrid_decode_tid3) ;
+
+assign waiting_4stb_tobecome_empty  = (~stb_hasbecome_empty_while_instate10 & waiting_4stb_empty_set) | 
+										wait_4stb_empty_wen;
+
+wire allow_stxa_ack_while_instate10 = 
+	//(waiting_4stb_empty_set & wait_4stb_empty_wen & cur_state_10) | // when in state10 & waiting 
+	//(waiting_4stb_empty_set & wait_4stb_empty_wen ) | // when in state10 & waiting 
+	(waiting_4stb_empty_set & stxa_2ctl_reg ) | // when in state10 & waiting 
+									// 4stb_empty & there is stxa_2ctlreg.
+	(waiting_4stb_empty_set & tr2state01_frm_state10 & stb_hasbecome_empty_while_instate10) | // when in state10 & 
+									// waiting 4stb_empty and stb becomes empty. 
+	allow_stxaack_waiting4trapack_pulse;
+
+
+
+wire delayed_allow_stxa_ack;
+wire delayed_allow_stxa_ack_pre = (stxa_2ctl_reg & stb_isempty & waiting_4stb_empty_set) |
+					(spu_lsu_stxa_ack_q_buf & stxa_2ctl_reg & stb_isempty);//this is to resolve
+									//b-b-b stxa's
+
+wire waiting_4stb_empty_set_dly;
+dff_s #(1) waiting_4stb_empty_set_dly_ff (
+        .din(waiting_4stb_empty_set) ,
+        .q(waiting_4stb_empty_set_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire waiting_4stb_empty_set_dly2;
+dff_s #(1) waiting_4stb_empty_set_dly2_ff (
+        .din(waiting_4stb_empty_set_dly) ,
+        .q(waiting_4stb_empty_set_dly2),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire delayed_allow_stxa_ack_pre_qual = delayed_allow_stxa_ack_pre & (waiting_4stb_empty_set |
+						waiting_4stb_empty_set_dly | waiting_4stb_empty_set_dly2);
+
+dffr_s #(1) delayed_allow_stxa_ack_pre_ff (
+        .din(delayed_allow_stxa_ack_pre_qual) ,
+        .q(delayed_allow_stxa_ack),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+
+// ################################################################################ 
+// ECO bug5326.
+// this is related to stxa not getting acked when in state 10 and stb becomes empty and wait4_trap_ack is
+// asserted and becaues of this tr2state01_frm_state10 does not occur and as a result no ack is sent out
+// and when stb becomes empty(wait4_trap_ack is still waiting for ack) still no ack and stxa hangs.
+
+wire  allow_stxaack_waiting4trapack = waiting_4stb_empty_set & stb_hasbecome_empty_while_instate10 & 
+						cur_state_10 & wait4_trap_ack_set;
+
+wire allow_stxaack_waiting4trapack_q;
+dff_s  #(1) allow_stxaack_waiting4trapack_ff (
+        .din(allow_stxaack_waiting4trapack) ,
+        .q(allow_stxaack_waiting4trapack_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+assign allow_stxaack_waiting4trapack_pulse = ~allow_stxaack_waiting4trapack_q & allow_stxaack_waiting4trapack;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+/*
+dffr_s  #(1) spu_mactl_streq_ff (
+        .din(spu_mast_streq) ,
+        .q(spu_mactl_streq),
+        .rst(lsu_spu_ldst_ack),
+        .clk (rclk), .se(se), .si(), .so()); 
+*/
+
+wire spu_mactl_streq_set;
+dffre_s  #(1) spu_mactl_streq_ff (
+        .din(1'b1) ,
+        .q(spu_mactl_streq_set),
+        .en(spu_mast_streq),
+        .rst(spu_mactl_ctl_rst_local | lsu_spu_ldst_ack), .clk (rclk), .se(se), .si(), .so());
+
+//assign spu_mactl_streq = spu_mast_streq | spu_mactl_streq_set;
+assign spu_mactl_streq = spu_mactl_streq_set ;
+
+
+wire spu_mactl_pcx_7170_sel_pre = spu_mast_streq | spu_mactl_ldop;
+
+dff_s  #(1) spu_mactl_pcx_7170_sel_ff (
+        .din(spu_mactl_pcx_7170_sel_pre) ,
+        .q(spu_mactl_pcx_7170_sel),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s  #(1) spu_mactl_pcx_wen_ff (
+        .din(spu_mactl_pcx_7170_sel_pre) ,
+        .q(spu_mactl_pcx_wen),
+        .clk (rclk), .se(se), .si(), .so());
+
+/*
+assign spu_mactl_pcx_7170_sel = spu_mast_streq_q | spu_mactl_ldop;
+assign spu_mactl_pcx_wen = spu_mactl_pcx_7170_sel;
+*/
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// SETTING THE BUSY BIT.
+// -------------------------------------------------------------------------
+// when spu_mactl_iss_pulse_dly is set busy_bit is set and when operation
+// is done it is cleared.
+
+
+assign spu_mactl_busy_bit_wen = spu_mactl_iss_pulse_dly |  
+                                        spu_mactl_mactl_wen;
+
+assign spu_mactl_busy_bit_wrdata = spu_mactl_iss_pulse_dly;
+
+
+
+wire clr_busy_bit_when_maint = spu_mactl_int_set & ifu_spu_trap_ack;
+//wire clr_busy_bit_when_maint = spu_mactl_int_set & 1'b0;
+
+//wire spu_mactl_clr_all_done_set = clr_busy_bit_when_maint;
+
+assign spu_mactl_clr_busy_bit = clr_busy_bit_when_masync | clr_busy_bit_when_maint;
+
+
+/******************************************************************************/
+// completion by interrupt.
+// ----------------------------------------
+
+wire done_set_pulse_q;
+dff_s  #(1) doneset_pulse_ff (
+        .din(ma_op_done) ,
+        .q(done_set_pulse_q),
+        .clk (rclk), .se(se), .si(), .so()); 
+
+
+wire spu_mactl_ttype_vld = (~done_set_pulse_q & ma_op_done & spu_mactl_int_set) &
+					(cur_state_00 | cur_state_01) & ~stxa_2ctl_reg ; 
+									// prevent from sending completion
+									// interrupt when in abort mode.
+
+wire spu_mactl_ttype_vld_dly;
+dff_s    #(1) ma_ttype_vld_ff (
+        .din(spu_mactl_ttype_vld) ,
+        .q(spu_mactl_ttype_vld_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+// ----------------------------------------
+// ----ttype logic, 1 => 74(ma), 0 => 70(enc); enc has higher priority.
+wire spu_mactl_ttype;
+assign spu_mactl_ttype = 1'b1; 
+
+// ----------------------------------------
+// ----tid logic
+assign spu_ifu_ttype_tid_w = spu_mactl_int_set ? spu_mactl_mactl_reg[12:11] :
+						spu_mactl_ldxa_tid_g2[1:0];// changed to _g2 since
+										//ifu_errctl flops it as well.
+
+// ----------------------------------------
+dff_s    #(2) spu_ifu_ttype_ff (
+        .din({spu_mactl_ttype,spu_mactl_ttype_vld_dly}) ,
+        .q({spu_ifu_ttype_w2,spu_ifu_ttype_vld_w2}),
+        .clk (rclk), .se(se), .si(), .so());
+
+// ----------------------------------------
+
+wire ctl_tid_decode0 = ~spu_mactl_mactl_reg[12] & ~spu_mactl_mactl_reg[11];
+wire ctl_tid_decode1 = ~spu_mactl_mactl_reg[12] & spu_mactl_mactl_reg[11];
+wire ctl_tid_decode2 = spu_mactl_mactl_reg[12] & ~spu_mactl_mactl_reg[11];
+wire ctl_tid_decode3 = spu_mactl_mactl_reg[12] & spu_mactl_mactl_reg[11];
+
+//wire [3:0] ifu_spu_nceen_q;
+assign spu_mactl_nceen_4int =
+                                (ifu_spu_nceen[0] & ctl_tid_decode0) |
+                                (ifu_spu_nceen[1] & ctl_tid_decode1) |
+                                (ifu_spu_nceen[2] & ctl_tid_decode2) |
+                                (ifu_spu_nceen[3] & ctl_tid_decode3) ;
+
+// ----------------------------------------
+
+wire wait4_trap_ack_rst = spu_ifu_mamem_unc_w2 & spu_mactl_nceen_4int ;
+
+dffre_s  #(1) wait4_trap_ack_ff (
+        .din(1'b1) ,
+        .q(wait4_trap_ack_set),
+        .en(spu_mactl_ttype_vld),
+        .rst(spu_mactl_ctl_rst_local | ifu_spu_trap_ack | wait4_trap_ack_rst), .clk (rclk), .se(se), .si(), .so());
+
+
+// -------------------------------------------------------------------------
+// ***************** VA COMPARES ********************
+// -------------------------------------------------------------------------
+wire [7:0] spu_ldst_va_m,spu_ldst_va_m_buf,spu_ldst_va_g;
+wire spu_mactl_mactl_va_vld_g;
+
+wire spu_mactl_masync_va_vld_g 	= (spu_ldst_va_g[7:0] == 8'ha0);
+wire spu_mactl_mpa_va_vld_g 	= (spu_ldst_va_g[7:0] == 8'h88);
+wire spu_mactl_maaddr_va_vld_g 	= (spu_ldst_va_g[7:0] == 8'h90);
+wire spu_mactl_manp_va_vld_g 	= (spu_ldst_va_g[7:0] == 8'h98);
+
+wire spu_mactl_mactl_va_vld_m 	= (spu_ldst_va_m[7:0] == 8'h80);
+
+dff_s  #(1) spu_mactl_mactl_va_vld_m_ff (
+        .din(spu_mactl_mactl_va_vld_m) ,
+        .q(spu_mactl_mactl_va_vld_g),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_mactl_ma_va_vld_g = spu_mactl_masync_va_vld_g | spu_mactl_mpa_va_vld_g |
+					spu_mactl_maaddr_va_vld_g | spu_mactl_manp_va_vld_g |
+					spu_mactl_mactl_va_vld_g;
+
+wire spu_mactl_illgl_va_g = ~spu_mactl_ma_va_vld_g ;
+
+
+wire spu_mactl_masync_va_vld_g2, spu_mactl_mpa_va_vld_g2, spu_mactl_maaddr_va_vld_g2,
+                        spu_mactl_manp_va_vld_g2, spu_mactl_mactl_va_vld_g2, spu_mactl_illgl_va_g2;
+
+dff_s  #(6) mactl_va_vld_g2_ff (
+        .din({spu_mactl_masync_va_vld_g, spu_mactl_mpa_va_vld_g, spu_mactl_maaddr_va_vld_g,
+			spu_mactl_manp_va_vld_g, spu_mactl_mactl_va_vld_g, spu_mactl_illgl_va_g}) ,
+        .q({spu_mactl_masync_va_vld_g2, spu_mactl_mpa_va_vld_g2, spu_mactl_maaddr_va_vld_g2,
+                        spu_mactl_manp_va_vld_g2, spu_mactl_mactl_va_vld_g2, spu_mactl_illgl_va_g2}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+wire spu_ld_inst_m,spu_st_inst_m,spu_alt_space_m;
+wire spu_mactl_asi_is_40_m;
+wire spu_mactl_asi_is_40_m_buf;
+wire spu_mactl_asi_is_40_e;
+
+dff_s  #(13) ifu_spu_pipe_ff (
+        .din({ifu_lsu_ld_inst_e,ifu_lsu_st_inst_e,ifu_lsu_alt_space_e,
+              ifu_tlu_thrid_e[1:0],exu_lsu_ldst_va_e[7:0]}) ,
+        .q({spu_ld_inst_m,spu_st_inst_m,spu_alt_space_m,
+              spu_thrid_m[1:0],spu_ldst_va_m[7:0]}),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_ldst_va_m_buf = spu_ldst_va_m;
+
+// asi addr = 80
+
+wire spu_mactl_mactl_wen_m_prequal = spu_mactl_asi_is_40_m_buf & spu_alt_space_m & spu_st_inst_m ;
+/*
+wire spu_mactl_mactl_wen_m = spu_mactl_mactl_va_vld_m & spu_mactl_asi_is_40_m &
+				spu_alt_space_m & spu_st_inst_m ;
+*/
+                                //ifu_tlu_inst_vld_m;
+
+wire spu_mactl_mactl_wen_m = spu_mactl_mactl_va_vld_m & spu_mactl_mactl_wen_m_prequal ;
+
+wire spu_mactl_mactl_wen_g;
+dff_s  #(1) spu_mactl_mactl_ack_ff (
+        .din(spu_mactl_mactl_wen_m) ,
+        .q(spu_mactl_mactl_wen_g),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+///wire spu_mactl_mactl_wen_busy_m = spu_mactl_mactl_wen_m & ~ma_is_busy;
+// wire spu_mactl_mactl_wen_vld_m = spu_mactl_mactl_wen_busy_m & ifu_tlu_inst_vld_m;
+
+/*
+wire spu_mactl_mactl_wen_busy_g;
+dff_s  #(1) spu_mactl_mactl_wen_busy_m_ff (
+        .din(spu_mactl_mactl_wen_busy_m) ,
+        .q(spu_mactl_mactl_wen_busy_g),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire spu_mactl_mactl_wen_vld_g = spu_mactl_mactl_wen_busy_g & ifu_spu_inst_vld_w;
+
+*/
+
+
+wire spu_mactl_mactl_wen_vld_g = spu_mactl_mactl_wen_g & ifu_spu_inst_vld_w;
+
+wire spu_mactl_mactl_wen_vld_g2;
+dff_s  #(1) spu_mactl_mactl_wen_vld_g_ff (
+        .din(spu_mactl_mactl_wen_vld_g) ,
+        .q(spu_mactl_mactl_wen_vld_g2),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+
+dff_s  #(10) spu_pipe_ff (
+        .din({spu_thrid_m[1:0],spu_ldst_va_m_buf[7:0]}),
+        .q({spu_thrid_g[1:0],spu_ldst_va_g[7:0]}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+dff_s  #(2) tid_g2_ff (
+        .din(spu_thrid_g[1:0]) ,
+        .q(spu_thrid_g2[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+/*
+wire [7:0] spu_mactl_asi_state_m;
+dff_s  #(8) spu_mactl_asi_state_ff (
+        .din(lsu_spu_asi_state_e[7:0]) ,
+        .q(spu_mactl_asi_state_m[7:0]),
+        .clk (rclk), .se(se), .si(), .so());
+*/
+
+assign spu_mactl_asi_is_40_e = (lsu_spu_asi_state_e[7:0] == 8'h40);
+
+dff_s  #(2) spu_mactl_asi_is_40_e_ff (
+        .din({spu_mactl_asi_is_40_e, spu_mactl_asi_is_40_e}) ,
+        .q({spu_mactl_asi_is_40_m, spu_mactl_asi_is_40_m_buf}),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire spu_mactl_asi40_alt_st_m = spu_mactl_asi_is_40_m & spu_alt_space_m & spu_st_inst_m ; 
+wire spu_mactl_asi40_alt_ld_m = spu_mactl_asi_is_40_m & spu_alt_space_m & spu_ld_inst_m ; 
+
+// wire spu_mactl_asi40_alt_st_vld_m = spu_mactl_asi40_alt_st_m & ifu_tlu_inst_vld_m;
+// wire spu_mactl_asi40_alt_ld_vld_m = spu_mactl_asi40_alt_ld_m & ifu_tlu_inst_vld_m;
+
+
+wire spu_mactl_asi40_alt_st_g,spu_mactl_asi40_alt_ld_g;
+dff_s  #(2) vld_ldst_alt_asi40_ff (
+        .din({spu_mactl_asi40_alt_st_m,spu_mactl_asi40_alt_ld_m}) ,
+        .q({spu_mactl_asi40_alt_st_g,spu_mactl_asi40_alt_ld_g}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_mactl_asi40_alt_st_vld_g = spu_mactl_asi40_alt_st_g & ifu_spu_inst_vld_w;
+wire spu_mactl_asi40_alt_ld_vld_g = spu_mactl_asi40_alt_ld_g & ifu_spu_inst_vld_w;
+
+
+wire spu_mactl_asi40_alt_st_vld_g2,spu_mactl_asi40_alt_ld_vld_g2;
+dff_s  #(2) vld_ldst_alt_asi40_g2_ff (
+        .din({spu_mactl_asi40_alt_st_vld_g,spu_mactl_asi40_alt_ld_vld_g}) ,
+        .q({spu_mactl_asi40_alt_st_vld_g2,spu_mactl_asi40_alt_ld_vld_g2}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+
+wire all_flush = lsu_spu_early_flush_g | tlu_spu_flush_w | ifu_spu_flush_w;
+
+wire spu_mactl_flush_g2;
+dff_s  #(1) flush_ff (
+        .din(all_flush) ,
+        .q(spu_mactl_flush_g2),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_mactl_st_asi_vld = spu_mactl_asi40_alt_st_vld_g2 & ~spu_mactl_flush_g2;
+wire spu_mactl_ld_asi_vld = spu_mactl_asi40_alt_ld_vld_g2 & ~spu_mactl_flush_g2;
+
+
+wire spu_mactl_st_asi_vld_local = spu_mactl_st_asi_vld;
+wire spu_mactl_ld_asi_vld_local = spu_mactl_ld_asi_vld;
+
+wire spu_mactl_illgl_va_vld_g2 = spu_mactl_illgl_va_g2 & spu_mactl_ld_asi_vld_local;
+
+
+
+// -------------------------------------------------------------------------
+// ******************* ST ASI ***********************
+// -------------------------------------------------------------------------
+// ST ASI decode and generate write enables.
+
+// asi addr = 88
+assign spu_mactl_mpa_wen = spu_mactl_mpa_va_vld_g2 & spu_mactl_st_asi_vld_local ; 
+
+// asi addr = 90
+assign spu_mactl_maaddr_wen = spu_mactl_maaddr_va_vld_g2 & spu_mactl_st_asi_vld_local ; 
+
+// asi addr = 98
+assign spu_mactl_manp_wen = spu_mactl_manp_va_vld_g2 & spu_mactl_st_asi_vld_local ; 
+
+// asi addr = 80
+assign spu_mactl_stxa_2ctl_reg = spu_mactl_mactl_wen_vld_g2 & ~spu_mactl_flush_g2 ;
+
+
+
+// -------------------------------------------------------------------------
+// STXA ACK 
+// -------------------------------------------------------------------------
+wire spu_lsu_stxa_ack_g2;
+wire disable_stxa_ack_4b2b_same_tid;
+
+wire spu_lsu_stxa_ack_g2_prequal = ( spu_mactl_stxa_2ctl_reg & ~spu_mactl_disable_stxa_ack_g2) |
+				allow_stxa_ack_while_instate10 |
+				delayed_allow_stxa_ack;
+
+assign spu_lsu_stxa_ack_g2 = spu_lsu_stxa_ack_g2_prequal & ~disable_stxa_ack_4b2b_same_tid;
+
+dff_s  #(1) stxa_ack_g2_ff (
+        .din(spu_lsu_stxa_ack_g2) ,
+        .q(spu_lsu_stxa_ack),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_lsu_stxa_ack_q_buf = spu_lsu_stxa_ack;
+
+assign spu_lsu_stxa_ack_tid[1:0] =  (allow_stxa_ack_while_instate10 |
+				     delayed_allow_stxa_ack) ? young_ctlreg_issue_tid[1:0] :
+									spu_thrid_g2[1:0]; 
+
+
+// following is to avoid the case when the logic generates a b2b stxa ack for the same tid.
+wire [1:0] spu_lsu_stxa_ack_tid_g3;
+dff_s  #(2) stxa_ack_tid_g2_ff (
+        .din(spu_lsu_stxa_ack_tid[1:0]) ,
+        .q(spu_lsu_stxa_ack_tid_g3[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire g2_g3_tids_match = (spu_lsu_stxa_ack_tid[1:0] == spu_lsu_stxa_ack_tid_g3[1:0]);
+
+assign disable_stxa_ack_4b2b_same_tid = spu_lsu_stxa_ack & g2_g3_tids_match;
+
+// -------------------------------------------------------------------------
+// ******************* LD ASI ***********************
+// -------------------------------------------------------------------------
+// LD ASI decode and generate write enables.
+wire masync_set;
+wire spu_mactl_masync_ldxa;
+wire spu_mactl_masync_ldxa_sel;
+wire spu_mactl_masync_ldxa_sel_pre;
+// asi addr = A0
+
+assign spu_mactl_masync_ldxa = spu_mactl_masync_va_vld_g2 & spu_mactl_ld_asi_vld_local;
+
+//wire spu_mactl_masync_ldxa_qual = spu_mactl_masync_ldxa & ~masync_set;
+wire spu_mactl_masync_ldxa_qual = spu_mactl_masync_ldxa & ldxa_tid_match_ctlreg_tid_g2;
+
+wire spu_mactl_masync_ldxa_rst = spu_mactl_ctl_rst_local | spu_mactl_masync_ldxa_sel_pre;
+
+wire [1:0] masync_tid;
+
+dffre_s  #(3) masync_ff (
+        .din({1'b1,spu_thrid_g2[1:0]}) ,
+        .q({masync_set,masync_tid[1:0]}),
+        .en(spu_mactl_masync_ldxa_qual),
+        .rst(spu_mactl_masync_ldxa_rst), .clk (rclk), .se(se), .si(), .so());
+
+//wire multi_masync_ldxa_sel = masync_set & spu_mactl_masync_ldxa;
+//&&&&&&&&&&&&&&&&&&&&&
+// following should signal to lsu spu_lsu_ldxa_data_vld and also spu_lsu_illgl_va.
+//&&&&&&&&&&&&&&&&&&&&&
+
+wire ldxa_2masync_tid_notmatch_ctlreg_tid = ~ldxa_tid_match_ctlreg_tid_g2 & spu_mactl_masync_ldxa;
+
+
+wire spu_mactl_rst_done_set;
+dffre_s #(1) spu_mactl_rst_done_ff (
+        .din(1'b1) ,
+        .q(spu_mactl_rst_done_set),
+        .en(spu_mactl_ctl_rst_local),
+        .rst(spu_mactl_iss_pulse_dly), .clk (rclk), .se(se), .si(), .so()); 
+
+
+assign ma_op_done = spu_mald_done_set | spu_mared_done_set | spu_mast_done_set |
+			spu_maexp_done_set | spu_mactl_rsrv_done_set | spu_mactl_rst_done_set;
+
+assign ma_op_complete = spu_mald_done_set | spu_mared_done_set | spu_mast_done_set |
+				spu_maexp_done_set;
+
+
+//&&&&&&&&&&&&&&&&&&&&&
+// mask(or delay) ma_op_done if ldxa to mpa_maaddr_manp_mactl register's is in progress to avoid conflicts
+// with masync response when operation completes/aborts.
+//&&&&&&&&&&&&&&&&&&&&&
+
+
+wire spu_mactl_mask_done_set_4abort = cur_state_11 | cur_state_10 | tr2state01_frm_state10_q |
+									tr2state01_frm_state10_qq;
+
+
+
+wire spu_mactl_ldxa_mpa_maaddr_manp_mactl;
+
+wire spu_mactl_masync_ldxa_set = ma_op_done & masync_set & ~spu_mactl_ldxa_mpa_maaddr_manp_mactl &
+							~spu_mactl_masync_ldxa & // this is when ldxa to masync and
+										// compleion occur at same time
+							~spu_mactl_mask_done_set_4abort; 
+
+wire masync_pulse_q;
+
+dff_s  #(1) masync_pulse_ff (
+	.din(spu_mactl_masync_ldxa_set) ,
+	.q(masync_pulse_q),
+	.clk (rclk), .se(se), .si(), .so());
+
+
+//&&&&&&&&&&&&&&&&&&&&&
+// following _pre signal is to signal an operation completed normally or was aborted.
+//&&&&&&&&&&&&&&&&&&&&&
+
+assign spu_mactl_masync_ldxa_sel_pre = (~masync_pulse_q & spu_mactl_masync_ldxa_set) |
+					(masync_set & stxa_2ctl_reg); //this unblocks ldxa to sync reg since
+									// this is an abort
+
+assign spu_mactl_masync_ldxa_sel = spu_mactl_masync_ldxa_sel_pre | ldxa_2masync_tid_notmatch_ctlreg_tid;
+
+//&&&&&&&&&&&&&&&&&&&&&
+// mask out clearing the busy bit when another stxa is aborting the older operation. Since the timing
+// of setting the busy bit is earlier than clearing it. So in this case the busy bit set for younger
+// operation will be cleared when ldxa to sync is reponded as a result of the abort. And we want to avoid that.
+//&&&&&&&&&&&&&&&&&&&&&
+
+assign clr_busy_bit_when_masync = spu_mactl_masync_ldxa_sel_pre & ~(masync_set & stxa_2ctl_reg);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// asi addr = 88
+wire spu_mactl_mpa_ldxa_g2 = spu_mactl_mpa_va_vld_g2 & spu_mactl_ld_asi_vld_local ; 
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// asi addr = 90
+wire spu_mactl_maaddr_ldxa_g2 = spu_mactl_maaddr_va_vld_g2 & spu_mactl_ld_asi_vld_local ; 
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// asi addr = 98
+wire spu_mactl_manp_ldxa_g2 = spu_mactl_manp_va_vld_g2 & spu_mactl_ld_asi_vld_local ; 
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// asi addr = 80
+wire spu_mactl_mactl_ldxa_g2 = spu_mactl_mactl_va_vld_g2 & spu_mactl_ld_asi_vld_local ; 
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+/*
+assign spu_mactl_ldxa_tid_g2[1:0] = (({2{spu_mactl_mpa_ldxa_g2}} & spu_thrid_g2[1:0]) |
+                                    ({2{spu_mactl_maaddr_ldxa_g2}} & spu_thrid_g2[1:0]) |
+                                    ({2{spu_mactl_manp_ldxa_g2}} & spu_thrid_g2[1:0]) |
+                                    ({2{spu_mactl_mactl_ldxa_g2}} & spu_thrid_g2[1:0]) |
+                                    ({2{spu_mactl_masync_ldxa_sel}} & masync_tid[1:0]) );
+*/
+
+
+assign spu_mactl_ldxa_tid_g2[1:0] = spu_mactl_masync_ldxa_sel_pre ? masync_tid[1:0] :
+									spu_thrid_g2[1:0];
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+assign spu_mactl_ldxa_mpa_maaddr_manp_mactl = spu_mactl_mpa_ldxa_g2 | spu_mactl_maaddr_ldxa_g2 |
+						spu_mactl_manp_ldxa_g2 | spu_mactl_mactl_ldxa_g2;
+
+wire spu_mactl_ldxa_data_vld_g2 = 	spu_mactl_ldxa_mpa_maaddr_manp_mactl |
+					spu_mactl_illgl_va_vld_g2 |
+					spu_mactl_masync_ldxa_sel;
+
+// any ldxa to masync reg that its tid does not match tid of ctlreg should generate an
+// illgl_va to lsu, so lsu silently drops it(no writes to the reg file).
+
+wire illgl_va_vld_or_drop_ldxa2masync = spu_mactl_illgl_va_vld_g2 | ldxa_2masync_tid_notmatch_ctlreg_tid |
+						(masync_set & stxa_2ctl_reg);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// fix for bug 4918, when ldxa to tid3 which causes illl_va and masyc completion to another tid 
+// that causes unc_err. both of these happen at the same time, need to delay unc_err.
+
+wire spu_mactl_ldxa_data_vld_qual = spu_mactl_masync_ldxa_sel | spu_mactl_ldxa_mpa_maaddr_manp_mactl; 
+
+wire spu_mactl_illgl_va_vld_w;
+wire spu_mactl_ldxa_data_vld_qual_w;
+dff_s #(1) spu_mactl_ldxa_data_vld_qual_ff (
+        .din(spu_mactl_ldxa_data_vld_qual) ,
+        .q(spu_mactl_ldxa_data_vld_qual_w),
+        .clk (rclk), .se(se), .si(), .so());
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+wire spu_mactl_ldxa_data_vld_w;
+dff_s  #(4) ldxa_tid_vld_ff (
+        //.din({spu_mactl_ldxa_tid_g2[1:0],spu_mactl_ldxa_data_vld_g2, spu_mactl_illgl_va_vld_g2}) ,
+
+        .din({spu_mactl_ldxa_tid_g2[1:0],spu_mactl_ldxa_data_vld_g2, illgl_va_vld_or_drop_ldxa2masync}) ,
+        .q({spu_mactl_ldxa_tid_w[1:0],spu_mactl_ldxa_data_vld_w, spu_mactl_illgl_va_vld_w}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_lsu_ldxa_data_vld_w2 = spu_mactl_ldxa_data_vld_w ;
+assign spu_lsu_ldxa_tid_w2[1:0] = spu_mactl_ldxa_tid_w[1:0] ;
+
+wire spu_mactl_perr_rst;
+assign spu_lsu_ldxa_illgl_va_w2 = spu_mactl_illgl_va_vld_w & ~spu_lsu_unc_error_w;
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+wire [3:0] spu_mactl_ldxa_data_g2_sel;
+wire spu_mactl_ldxa_data_g2_select;
+
+/******				*************
+assign spu_mactl_ldxa_data_g2_sel[3:0] = {spu_mactl_mactl_ldxa_g2,
+					 spu_mactl_manp_ldxa_g2,
+					 spu_mactl_maaddr_ldxa_g2,
+					 spu_mactl_mpa_ldxa_g2};
+*******				*************/
+
+
+assign spu_mactl_ldxa_data_g2_sel[0] = spu_mactl_mpa_ldxa_g2;
+assign spu_mactl_ldxa_data_g2_sel[1] = ~spu_mactl_mpa_ldxa_g2 & spu_mactl_maaddr_ldxa_g2;
+assign spu_mactl_ldxa_data_g2_sel[2] = ~spu_mactl_mpa_ldxa_g2 & ~spu_mactl_maaddr_ldxa_g2 &
+                                                spu_mactl_manp_ldxa_g2;
+assign spu_mactl_ldxa_data_g2_sel[3] = ~spu_mactl_mpa_ldxa_g2 & ~spu_mactl_maaddr_ldxa_g2 &
+                                                ~spu_mactl_manp_ldxa_g2;
+
+
+
+
+assign spu_mactl_ldxa_data_g2_select = ~(spu_mactl_mactl_ldxa_g2 |
+                                         spu_mactl_manp_ldxa_g2 |
+                                         spu_mactl_maaddr_ldxa_g2 |
+                                         spu_mactl_mpa_ldxa_g2);
+
+wire [3:0] spu_mactl_ldxa_data_w_sel;
+dff_s  #(5) ldxa_data_sel_ff (
+        .din({spu_mactl_ldxa_data_g2_sel[3:0],spu_mactl_ldxa_data_g2_select}) ,
+        .q({spu_mactl_ldxa_data_w_sel[3:0],spu_mactl_ldxa_data_w_select}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire [3:0] spu_mactl_ldxa_data_w_sel_s;
+assign spu_mactl_ldxa_data_w_sel_s[0] = spu_mactl_ldxa_data_w_sel[0] | mux_drive_disable;
+assign spu_mactl_ldxa_data_w_sel_s[1] = spu_mactl_ldxa_data_w_sel[1] & ~mux_drive_disable;
+assign spu_mactl_ldxa_data_w_sel_s[2] = spu_mactl_ldxa_data_w_sel[2] & ~mux_drive_disable;
+assign spu_mactl_ldxa_data_w_sel_s[3] = spu_mactl_ldxa_data_w_sel[3] & ~mux_drive_disable;
+
+
+assign spu_mactl_ldxa_data_w_sel_l = ~spu_mactl_ldxa_data_w_sel_s;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// reserve bit write detect. Kill start signal(spu_encctl_iss_pulse_dly) and
+// signal a completion signal.
+// ---------------------------------------------------
+
+wire [2:0] spu_mactl_op_field_e;
+//wire [2:0] spu_mactl_op_field_m;
+
+assign spu_mactl_op_field_e[2:0] = spu_mactl_rsrv_data_e[2:0];
+
+wire spu_mactl_rsrv_wrdetect_e = spu_mactl_op_field_e[2] & ~(~(spu_mactl_op_field_e[1]|spu_mactl_op_field_e[0]));
+
+wire spu_mactl_rsrv_wrdetect_noqual_m;
+dff_s #(1) spu_mactl_rsrv_wrdetect_e_ff (
+        .din(spu_mactl_rsrv_wrdetect_e) ,
+        .q(spu_mactl_rsrv_wrdetect_noqual_m),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mactl_rsrv_wrdetect_m = spu_mactl_rsrv_wrdetect_noqual_m & spu_mactl_mactl_wen_m;
+
+/*
+dffr_s #(3) rsrv_data_ff (
+        .din(spu_mactl_rsrv_data_e[2:0]) ,
+        .q(spu_mactl_op_field_m[2:0]),
+        .rst(spu_mactl_ctl_rst_local), .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_mactl_rsrv_wrdetect_m =
+		spu_mactl_op_field_m[2] & ~(~(spu_mactl_op_field_m[1]|spu_mactl_op_field_m[0])) &
+		spu_mactl_mactl_wen_m;
+
+*/
+ 
+wire spu_mactl_rsrv_done_wen; 
+wire spu_mactl_rsrv_done_rst = spu_mactl_ctl_rst_local | spu_mactl_iss_pulse_dly;
+
+dffre_s #(1) spu_mactl_rsrv_done_ff (
+        .din(1'b1) ,
+        .q(spu_mactl_rsrv_done_set),
+        .en(spu_mactl_rsrv_done_wen),
+        .rst(spu_mactl_rsrv_done_rst), .clk (rclk), .se(se), .si(), .so());
+
+wire spu_mactl_rsrv_wrdetect_m2;
+
+dff_s #(1) spu_mactl_rsrv_wrdetect_m_ff (
+        .din(spu_mactl_rsrv_wrdetect_m) ,
+        .q(spu_mactl_rsrv_wrdetect_m2),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_tlu_rsrv_illgl_m = spu_mactl_rsrv_wrdetect_m ;
+
+// ----------------------------------------------
+
+wire spu_mactl_rsrv_wrdetect_m3;
+dff_s #(1) spu_mactl_rsrv_wrdetect_m2_ff (
+        .din(spu_mactl_rsrv_wrdetect_m2) ,
+        .q(spu_mactl_rsrv_wrdetect_m3),
+        .clk (rclk), .se(se), .si(), .so());
+
+wire ifu_spu_inst_vld_w2;
+dff_s #(1) ifu_spu_inst_vld_w_dff (
+        .din(ifu_spu_inst_vld_w) ,
+        .q(ifu_spu_inst_vld_w2),
+        .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mactl_rsrv_done_wen = ~spu_mactl_flush_g2 & ifu_spu_inst_vld_w2 & spu_mactl_rsrv_wrdetect_m3;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// ^^^^^^^^^^^^^^ ISSUE VALID SIGNAL ^^^^^^^^^^^^^^
+// -------------------------------------------------------------------------
+wire maop_vld_set;
+
+wire maop_vld_rst = spu_mactl_ctl_rst_local | spu_mactl_iss_pulse_dly;
+
+wire maop_vld_wen = spu_mactl_mactl_wen & ~spu_mactl_rsrv_wrdetect_m3;
+
+assign spu_mactl_iss_pulse_pre = maop_vld_wen;
+
+dffre_s  #(1) maop_vld_ff (
+        .din(1'b1) ,
+        .q(maop_vld_set),
+        .en(maop_vld_wen),
+        .rst(maop_vld_rst), .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+wire maop_vld_set_q;
+
+dff_s  #(1) iss_pulse_ff (
+        .din(maop_vld_set) ,
+        .q(maop_vld_set_q),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+assign spu_mactl_iss_pulse = ~maop_vld_set_q & maop_vld_set;
+
+dff_s  #(1) iss_pulse_dly_ff (
+        .din(spu_mactl_iss_pulse) ,
+        .q(spu_mactl_iss_pulse_dly),
+        .clk (rclk), .se(se), .si(), .so());
+
+// -------------------------------------------------------------------------
+// ^^^^^^^^^^^^^^^ OPCODE DECODE ^^^^^^^^^^^^^^^^
+// -------------------------------------------------------------------------
+// OPCODE decode.
+wire [2:0] spu_madp_mactl_op;
+
+assign spu_madp_mactl_op = spu_mactl_mactl_reg[8:6];
+
+wire spu_mactl_ldop_pre = ~(|spu_madp_mactl_op[2:0]) ;
+
+wire spu_mactl_stop_pre = ~spu_madp_mactl_op[2] & ~spu_madp_mactl_op[1] & 
+				spu_madp_mactl_op[0];
+
+wire spu_mactl_mulop_pre = ~spu_madp_mactl_op[2] & spu_madp_mactl_op[1] & 
+				~spu_madp_mactl_op[0];
+
+wire spu_mactl_redop_pre = ~spu_madp_mactl_op[2] & spu_madp_mactl_op[1] & 
+				spu_madp_mactl_op[0] ;
+
+wire spu_mactl_expop_pre = spu_madp_mactl_op[2] & ~spu_madp_mactl_op[1] & 
+				~spu_madp_mactl_op[0];
+
+dff_s #(1) ldop_ff (
+        .din(spu_mactl_ldop_pre) ,
+        .q(spu_mactl_ldop),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s #(1) stop_ff (
+        .din(spu_mactl_stop_pre) ,
+        .q(spu_mactl_stop),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s #(1) mulop_ff (
+        .din(spu_mactl_mulop_pre) ,
+        .q(spu_mactl_mulop),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s #(1) redop_ff (
+        .din(spu_mactl_redop_pre) ,
+        .q(spu_mactl_redop),
+        .clk (rclk), .se(se), .si(), .so());
+
+dff_s #(1) expop_ff (
+        .din(spu_mactl_expop_pre) ,
+        .q(spu_mactl_expop),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+// -------------------------------------------------------------------------
+
+assign spu_mactl_mpa_sel = spu_mactl_ldop | spu_mactl_stop;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// ^^^^^^^^^^^^^^^ MEM WR/D ENABLE ^^^^^^^^^^^^^^^
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// the follwoing are for MAMEM(load) write and mux selection
+/*******************************
+
+MPA[3] MAADDR[0] | 
+------------------------------
+  0      0 	 | 128b	| eve+odd | mxsel[0]
+  0      1 	 | 64b	| odd	  | mxsel[1]
+  1      0 	 | 64b	| eve	  | mxsel[1]
+  1      1 	 | 64b	| odd	  | mxsel[0]
+
+********************************/
+
+wire spu_mactl_memwen = spu_mamul_memwen | spu_maaeqb_memwen | spu_mald_memwen | spu_mared_memwen;
+wire spu_mactl_memren = spu_mamul_memren | spu_maaeqb_memren |spu_mared_memren | spu_mast_memren |
+				spu_maexp_memren;
+
+assign spu_mactl_memeve_wen = ~spu_maaddr_maaddr_0 & spu_mactl_memwen;
+assign spu_mactl_memodd_wen = (spu_maaddr_maaddr_0 & spu_mactl_memwen) | 
+			(spu_mald_memwen & ~spu_maaddr_maaddr_0 & ~spu_maaddr_mpa_3 &
+				spu_maaddr_nooddwr_on_leneq1);
+
+wire mem_swap = (~spu_maaddr_mpa_3 & spu_maaddr_maaddr_0) |
+	   	  (spu_maaddr_mpa_3 & ~spu_maaddr_maaddr_0);
+
+wire [2:0] spu_mactl_memmxsel;
+assign spu_mactl_memmxsel[2] = ~spu_mactl_ldop_pre; // its a mulop
+assign spu_mactl_memmxsel[1] = spu_mactl_ldop_pre & mem_swap ;
+assign spu_mactl_memmxsel[0] = spu_mactl_ldop_pre & ~mem_swap;
+
+assign spu_mactl_memmxsel_l[2:0] = ~spu_mactl_memmxsel[2:0];
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+assign spu_mactl_mamem_ren = spu_mactl_memren;
+assign spu_mactl_mamem_wen = spu_mactl_memwen;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// ^^^^^^^^^^^^^^ FLOP ENABLE FOR MA MUL/RED OPERATIONS ^^^^^^^^^^
+// -------------------------------------------------------------------------
+
+assign spu_mactl_madp_parflop_wen = spu_mared_not_idle | spu_mamul_oprnd2_bypass;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// ^^^^^^^^^^^^ PERR ^^^^^^^^^^^^^^
+// -------------------------------------------------------------------------
+
+wire spu_mactl_mamem_ren_dly1,spu_mactl_mamem_ren_dly2;
+dff_s    #(2) dly_ren_ff (
+        .din({spu_mactl_mamem_ren,spu_mactl_mamem_ren_dly1}) ,
+        .q({spu_mactl_mamem_ren_dly1,spu_mactl_mamem_ren_dly2}),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+wire spu_mactl_perr_wen = spu_madp_perr & spu_mactl_mamem_ren_dly2;
+
+dffre_s  #(1) perr_ff (
+        .din(1'b1) ,
+        .q(spu_mactl_perr_set_int),
+        .en(spu_mactl_perr_wen),
+        //.rst(spu_mactl_ctl_rst_local | spu_mactl_iss_pulse_dly), .clk (rclk), .se(se), .si(), .so());
+        .rst(spu_mactl_ctl_rst_local | spu_mactl_perr_rst), .clk (rclk), .se(se), .si(), .so());
+
+assign spu_mactl_perr_set = spu_mactl_perr_set_int;
+
+wire spu_mactl_perr_kill_op;
+dff_s  #(1) perr_dly_ff (
+        .din(spu_mactl_perr_wen) ,
+        .q(spu_mactl_perr_kill_op),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+
+assign spu_mactl_ma_kill_op = spu_mactl_perr_kill_op;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+// force perr during ma_ldop for testing only.
+/*
+wire [3:0] dumy_cntr_add,dumy_cntr_q;
+
+wire dummy_count_eq = (dumy_cntr_q[3:0] == 4'b1001);
+assign dumy_cntr_add[3:0] = dumy_cntr_q[3:0] + 4'b0001;
+
+dffre_s  #(4) dumy_cntr_ff (
+        .din(dumy_cntr_add[3:0]) ,
+        .q(dumy_cntr_q[3:0]),
+        .en(spu_mactl_ldop & lsu_spu_ldst_ack),
+        .rst(spu_mactl_ctl_rst_local | (dummy_count_eq&lsu_spu_ldst_ack)  | spu_mactl_iss_pulse), .clk (rclk), .se(se), .si(), .so());
+
+*/
+
+//assign spu_mactl_force_perr = spu_mactl_mactl_reg[13] | (dummy_count_eq & spu_mactl_ldop);
+assign spu_mactl_force_perr = spu_mactl_mactl_reg[13] ;
+
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+
+wire ldxa_tid_decode0 = ~spu_mactl_ldxa_tid_w[1] & ~spu_mactl_ldxa_tid_w[0];
+wire ldxa_tid_decode1 = ~spu_mactl_ldxa_tid_w[1] & spu_mactl_ldxa_tid_w[0];
+wire ldxa_tid_decode2 = spu_mactl_ldxa_tid_w[1] & ~spu_mactl_ldxa_tid_w[0];
+wire ldxa_tid_decode3 = spu_mactl_ldxa_tid_w[1] & spu_mactl_ldxa_tid_w[0];
+
+/*
+dff_s  #(4) ifu_spu_nceen_ff (
+        .din(ifu_spu_nceen[3:0]) ,
+        .q(ifu_spu_nceen_q[3:0]),
+        .clk (rclk), .se(se), .si(), .so()); 
+*/
+
+wire spu_mactl_nceen_w = 
+                                (ifu_spu_nceen[0] & ldxa_tid_decode0) |
+                                (ifu_spu_nceen[1] & ldxa_tid_decode1) |
+                                (ifu_spu_nceen[2] & ldxa_tid_decode2) |
+                                (ifu_spu_nceen[3] & ldxa_tid_decode3) ;
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+
+
+wire spu_ifu_corr_err_w =  spu_wen_ma_cor_err; // this is a pulse. 
+
+dff_s  #(1) spu_ifu_corr_err_w2_ff (
+        .din(spu_ifu_corr_err_w) ,
+        .q(spu_ifu_corr_err_w2),
+        .clk (rclk), .se(se), .si(), .so());
+
+// ------------------------------------------------
+
+
+wire spu_ifu_unc_err_w =  (spu_mactl_ttype_vld_dly | spu_mactl_ldxa_data_vld_qual_w) & 
+						spu_wen_ma_unc_err;
+
+assign spu_mactl_uncerr_rst = spu_ifu_unc_err_w;
+// ------------------------------------------------
+
+wire spu_lsu_unc_error_w = spu_mactl_ldxa_data_vld_qual_w &  spu_mactl_nceen_w & ~spu_mactl_int_set &
+				(spu_mactl_perr_set_int | spu_wen_ma_unc_err) ;
+
+// ------------------------------------------------
+
+wire spu_ifu_mamem_err_w = (spu_mactl_ttype_vld_dly | spu_mactl_ldxa_data_vld_qual_w) & 
+						spu_mactl_perr_set_int;
+
+assign spu_mactl_perr_rst = spu_ifu_mamem_err_w;
+// ------------------------------------------------
+
+wire spu_ifu_int_set = spu_mactl_int_set;
+
+
+dff_s  #(1) spu_ifu_int_w2_ff (
+        .din(spu_ifu_int_set ) ,
+        .q(spu_ifu_int_w2),
+        .clk (rclk), .se(se), .si(), .so());
+
+// ------------------------------------------------
+
+wire spu_ifu_mamem_unc_w = spu_ifu_unc_err_w | spu_ifu_mamem_err_w;
+
+
+dff_s  #(1) spu_ifu_mamem_unc_ff (
+        .din(spu_ifu_mamem_unc_w ) ,
+        .q(spu_ifu_mamem_unc_w2),
+        .clk (rclk), .se(se), .si(), .so());
+
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_madp.v
===================================================================
--- /trunk/T1-CPU/spu/spu_madp.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_madp.v	(revision 6)
@@ -0,0 +1,861 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_madp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    MA datapath .
+*/
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module spu_madp (
+
+/*outputs*/
+spu_madp_evedata,
+spu_madp_odddata,
+
+spu_mul_op2_data,
+
+spu_madp_m_lt_n,
+spu_madp_m_eq_n,
+
+spu_madp_store_data,
+
+spu_madp_cout_oprnd_sub_mod,
+
+spu_madp_e_eq_one,
+
+spu_madp_mpa_addr_out,
+
+spu_madp_perr,
+
+so,
+
+spu_mul_op1_data,
+
+spu_madp_maaddr_reg,
+
+spu_madp_ldxa_data,
+
+/*inputs*/
+
+spu_mamul_oprnd2_wen,
+spu_mamul_oprnd2_bypass,
+
+mul_data_out,
+
+spu_mared_data_sel_l,
+spu_mared_rdn_wen,
+spu_mared_cin_oprnd_sub_mod,
+
+spu_maexp_e_data_wen,
+spu_maexp_shift_e,
+
+spu_maaddr_mpa_incr_val,
+spu_maaddr_mpa_wen,
+spu_maaddr_mpa_addrinc,
+spu_mactl_mpa_sel,
+
+spu_mactl_ldop,
+spu_mactl_madp_parflop_wen,
+
+spu_mactl_memmxsel_l,
+
+spu_mactl_force_perr,
+
+spu_mamem_rd_eve_data,
+spu_mamem_rd_odd_data,
+
+spu_mamul_oprnd1_mxsel_l,
+spu_maaddr_mamem_eveodd_sel_l,
+spu_mamul_oprnd1_wen,
+
+exu_spu_st_rs3_data_g2,
+
+lsu_spu_vload_data,
+spu_mactl_mactl_reg,
+spu_wen_maln_wen,
+spu_mactl_mpa_wen,
+spu_mactl_maaddr_wen,
+spu_mactl_manp_wen,
+
+spu_mactl_ldxa_data_w_sel_l,
+spu_mactl_ldxa_data_w_select,
+
+se,
+si,
+sehold,
+
+rclk);
+
+// ---------------------------------------------------------
+
+input rclk;
+
+
+input spu_mamul_oprnd2_wen;
+input spu_mamul_oprnd2_bypass;
+
+input [63:0] mul_data_out;
+
+input [3:0] spu_mared_data_sel_l;
+input spu_mared_rdn_wen;
+input spu_mared_cin_oprnd_sub_mod;
+
+input spu_maexp_e_data_wen;
+input spu_maexp_shift_e;
+
+input [4:0] spu_maaddr_mpa_incr_val;
+input spu_maaddr_mpa_wen;
+input spu_maaddr_mpa_addrinc;
+input spu_mactl_mpa_sel;
+
+input spu_mactl_ldop;
+input spu_mactl_madp_parflop_wen;
+
+input [2:0] spu_mactl_memmxsel_l;
+
+
+input spu_mactl_force_perr;
+input spu_wen_maln_wen;
+input spu_mactl_mpa_wen;
+input spu_mactl_maaddr_wen;
+input spu_mactl_manp_wen;
+
+input [3:0] spu_mactl_ldxa_data_w_sel_l;
+input spu_mactl_ldxa_data_w_select;
+
+input se;
+input si;
+input sehold;
+
+input [65:0] spu_mamem_rd_eve_data;
+input [65:0] spu_mamem_rd_odd_data;
+
+input [2:0] spu_mamul_oprnd1_mxsel_l;
+input [3:0] spu_maaddr_mamem_eveodd_sel_l;
+input spu_mamul_oprnd1_wen;
+
+
+input [63:0] exu_spu_st_rs3_data_g2;
+
+input [13:0] spu_mactl_mactl_reg;
+input [127:0] lsu_spu_vload_data;
+// ---------------------------------------------------------
+
+output [65:0] spu_madp_evedata;
+output [65:0] spu_madp_odddata;
+
+output [63:0] spu_mul_op2_data;
+
+output spu_madp_m_lt_n;
+output spu_madp_m_eq_n;
+
+output spu_madp_cout_oprnd_sub_mod;
+
+output [63:0] spu_madp_store_data;
+
+
+output spu_madp_e_eq_one;
+
+output [38:3] spu_madp_mpa_addr_out;
+
+output spu_madp_perr;
+
+output so;
+
+output [63:0] spu_mul_op1_data;
+
+output [47:0] spu_madp_maaddr_reg;
+
+output [63:0] spu_madp_ldxa_data;
+
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+wire [47:0] spu_madp_maaddr_reg_int;
+wire [39:0] spu_madp_mpa_addr;
+
+wire [63:0] spu_madp_oprnd2_data;
+wire [64:0] oprand_minus_modulus_or_mpa;
+wire [63:0] spu_madp_modulus;
+//wire [63:0] spu_madp_modulus_b;
+wire [63:0] spu_madp_mpa_or_m,spu_madp_mpa_or_n_b;
+wire [39:0] spu_madp_mpa_mx;
+wire carry_out;
+wire [63:0] mulorred_data;
+wire [63:0] spu_madp_exp_e_data,spu_madp_exp_e_data_q;
+wire [63:0] mulorred_data_q;
+wire [63:0] mul_ldlower_data_mx;
+wire [1:0] mul_ldlower_data_par;
+wire [1:0] mem_ldupper_data_par;
+wire [1:0] mul_ldlower_data_par_pre;
+wire [1:0] mem_ldupper_data_par_pre;
+wire [63:0] mem_ldupper_data_mx;
+wire [65:0] spu_madp_odddata_mx;
+wire [63:0] spu_madp_mem_rd_data;
+wire [65:0] spu_madp_evedata_mx;
+
+
+wire [65:0] spu_mamem_rd_data_unbuf;
+wire [63:0] spu_mamem_rd_data;
+
+wire [1:0] spu_madp_rdmem_pargen;
+
+wire [38:3] spu_madp_mpa_reg;
+wire [63:0] spu_madp_manp_reg;
+wire [63:0] spu_madp_lnupper_data;
+wire [63:0] spu_madp_lnlower_data;
+
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+wire testmode_l = ~se;
+
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// Arrange the parity bits accordingly 
+wire [63:0] read_data_q;
+wire [1:0] spu_madp_par_data;
+
+
+dp_mux4ds #(66) mamem_rd_data_mx (
+        .in0    (spu_mamem_rd_eve_data[65:0]),
+        .in1    (spu_mamem_rd_odd_data[65:0]),
+        .in2    ({2'b11,64'h0000000000000000}),
+        .in3    ({spu_madp_par_data[1:0],read_data_q[63:0]}),// added for dft to test downstream logic.
+        .sel0_l (spu_maaddr_mamem_eveodd_sel_l[0]),
+        .sel1_l (spu_maaddr_mamem_eveodd_sel_l[1]),
+        .sel2_l (spu_maaddr_mamem_eveodd_sel_l[2]),
+        .sel3_l (spu_maaddr_mamem_eveodd_sel_l[3]),
+        .dout   (spu_mamem_rd_data_unbuf[65:0]));
+
+assign spu_mamem_rd_data[63:0] = spu_mamem_rd_data_unbuf[63:0];// used internal decouple from critical path
+                                                                // make sure its buffered buf_x10 not critical
+
+
+wire mem_rddata_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf mem_rddata_lcd (.clk(mem_rddata_clk), .rclk(rclk), 
+                         .enb_l(~spu_mamul_oprnd1_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s #(64) mem_rddata_ff (
+        .din(spu_mamem_rd_data_unbuf[63:0]) ,
+        .q(read_data_q[63:0]),
+        .en (~(~spu_mamul_oprnd1_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s #(64) mem_rddata_ff (
+        .din(spu_mamem_rd_data_unbuf[63:0]) ,
+        .q(read_data_q[63:0]),
+        .clk (mem_rddata_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+wire [63:0] spu_mul_op1_data_unbuf;
+dp_mux3ds #(64) oprnd1_mx (
+        .in0    (read_data_q[63:0]),
+        .in1    (spu_mamem_rd_data_unbuf[63:0]),
+        .in2    (spu_madp_manp_reg[63:0]),
+        .sel0_l (spu_mamul_oprnd1_mxsel_l[0]),
+        .sel1_l (spu_mamul_oprnd1_mxsel_l[1]),
+        .sel2_l (spu_mamul_oprnd1_mxsel_l[2]),
+        .dout   (spu_mul_op1_data_unbuf[63:0]));
+
+assign spu_mul_op1_data[63:0] = spu_mul_op1_data_unbuf[63:0]; // this is critical going to mul unit.
+
+// --------- parity logic
+wire [1:0] spu_madp_mamem_rddata_par;
+assign spu_madp_mamem_rddata_par[1:0] = spu_mamem_rd_data_unbuf[65:64]; // its going out buffer.
+
+// place the following flop on the left handside.
+dff_s    #(2) par_ff (
+        .din(spu_madp_mamem_rddata_par[1:0]) ,
+        .q(spu_madp_par_data[1:0]),
+        .clk (rclk), .se(se), .si(), .so());
+
+// upper=parity for [63:32], lower= parity for [31:0]
+wire spu_madp_upper_perr = spu_madp_par_data[1] ^ ~spu_madp_rdmem_pargen[1];
+wire spu_madp_lower_perr = spu_madp_par_data[0] ^ ~spu_madp_rdmem_pargen[0];
+
+assign spu_madp_perr = spu_madp_upper_perr | spu_madp_lower_perr;
+
+// ############################################################
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+wire spu_mactl_ldop_q;
+dff_s  #(1) ldop_ff (
+        .din(spu_mactl_ldop) ,
+        .q(spu_mactl_ldop_q),
+        .clk (rclk), .se(se),.si (),.so ()
+        );
+
+wire spu_madp_ldop_q_buf1 = spu_mactl_ldop_q;
+wire spu_madp_ldop_q_buf2 = spu_mactl_ldop_q;
+
+// ############################################################
+
+// ############################################################
+
+// ############################################################
+// ---------------------------------------------------------
+// ------------ OPERAND1 and OPERAND2 to mul unit ----------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// operand2 = A,M,ACCUM
+// operand1 = B,N,NP
+// ---------------------------------------------------------
+
+dp_mux2es #(64) oprnd2_mx2 (
+        .in0    (spu_mamem_rd_data[63:0]),
+        .in1    (mul_data_out[63:0]),
+        .sel    (spu_mamul_oprnd2_bypass),
+        .dout   (spu_madp_oprnd2_data[63:0]));
+
+
+wire oprnd2_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf oprnd2_lcd (
+                        .clk(oprnd2_clk), .rclk(rclk),
+                        .enb_l(~spu_mamul_oprnd2_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) oprnd2_ff (
+        .din(spu_madp_oprnd2_data[63:0]) ,
+        .q(spu_mul_op2_data[63:0]),
+        .en (~(~spu_mamul_oprnd2_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) oprnd2_ff (
+        .din(spu_madp_oprnd2_data[63:0]) ,
+        .q(spu_mul_op2_data[63:0]),
+        .clk (oprnd2_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+// ----------------------
+// operand1 mux was moved to spu_mamem.v
+
+
+// ############################################################
+// ---------------------------------------------------------
+// ------------ muxing of MPA with reduction operands ------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+//assign spu_madp_modulus_b = ~spu_madp_modulus;
+
+
+wire spu_mactl_mpa_sel_q_upper,spu_mactl_mpa_sel_q_lower;
+
+dff_s  #(1) spu_mactl_mpa_sel_upper_ff (
+        .din(spu_mactl_mpa_sel) ,
+        .q(spu_mactl_mpa_sel_q_upper),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+dff_s  #(1) spu_mactl_mpa_sel_lower_ff (
+        .din(spu_mactl_mpa_sel) ,
+        .q(spu_mactl_mpa_sel_q_lower),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+wire spu_madp_mpa_sel_q_buf1_upper = spu_mactl_mpa_sel_q_upper;
+wire spu_madp_mpa_sel_q_buf1_lower = spu_mactl_mpa_sel_q_lower;
+wire spu_madp_mpa_sel_q_buf2_upper = spu_mactl_mpa_sel_q_upper;
+wire spu_madp_mpa_sel_q_buf2_lower = spu_mactl_mpa_sel_q_lower;
+
+dp_mux2es #(32) mpa_or_m_mx_upper (
+        .in0    (spu_mul_op2_data[63:32]),
+        .in1    ({24'h000000,spu_madp_mpa_addr[39:32]}),
+        .sel    (spu_madp_mpa_sel_q_buf1_upper),
+        .dout   (spu_madp_mpa_or_m[63:32]));
+
+dp_mux2es #(32) mpa_or_m_mx_lower (
+        .in0    (spu_mul_op2_data[31:0]),
+        .in1    (spu_madp_mpa_addr[31:0]),
+        .sel    (spu_madp_mpa_sel_q_buf1_lower),
+        .dout   (spu_madp_mpa_or_m[31:0]));
+
+
+/*
+dp_mux2es #(64) mpa_or_n_mx (
+        .in0    (spu_madp_modulus_b[63:0]),
+        .in1    ({56'h00000000000000,3'b000,spu_maaddr_mpa_incr_val[4:0]}),
+        .sel    (spu_mactl_mpa_sel),
+        .dout   (spu_madp_mpa_or_n_b[63:0]));
+*/
+
+
+wire [4:0] spu_maaddr_mpa_incr_val_q;
+dff_s  #(5) spu_maaddr_mpa_incr_val_ff (
+        .din(spu_maaddr_mpa_incr_val[4:0]) ,
+        .q(spu_maaddr_mpa_incr_val_q[4:0]),
+        .clk (rclk), .se(1'b0), .si(), .so());
+
+wire [4:0] spu_maaddr_mpa_incr_val_q_l = ~spu_maaddr_mpa_incr_val_q;
+
+
+wire [63:0] spu_madp_mpa_or_n;
+
+dp_mux2es #(32) mpa_or_n_mx_upper (
+        .in0    (spu_madp_modulus[63:32]),
+        .in1    (32'hffffffff),
+        .sel    (spu_madp_mpa_sel_q_buf2_upper),
+        .dout   (spu_madp_mpa_or_n[63:32]));
+
+dp_mux2es #(32) mpa_or_n_mx_lower (
+        .in0    (spu_madp_modulus[31:0]),
+        .in1    ({24'hffffff,3'b111,spu_maaddr_mpa_incr_val_q_l[4:0]}),
+        .sel    (spu_madp_mpa_sel_q_buf2_lower),
+        .dout   (spu_madp_mpa_or_n[31:0]));
+
+assign spu_madp_mpa_or_n_b = ~spu_madp_mpa_or_n;
+
+
+
+// ------------------------
+dp_mux2es  #(40) mpa_pa_iss_mx (
+        .in0    ({1'b0,spu_madp_mpa_reg[38:3],3'b000}),
+        .in1    (oprand_minus_modulus_or_mpa[39:0]),
+        .sel    (spu_maaddr_mpa_addrinc),
+        .dout   (spu_madp_mpa_mx[39:0]));
+
+
+wire mpa_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf mpa_lcd (
+                        .clk(mpa_clk), .rclk(rclk),
+                        .enb_l(~spu_maaddr_mpa_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(40) mpa_ff  (
+        .din({spu_madp_mpa_mx[39:3],3'b000}) ,
+        .q(spu_madp_mpa_addr[39:0]),
+        .en (~(~spu_maaddr_mpa_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(40) mpa_ff  (
+        .din({spu_madp_mpa_mx[39:3],3'b000}) ,
+        .q(spu_madp_mpa_addr[39:0]),
+        .clk (mpa_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+assign spu_madp_mpa_addr_out[38:3] = spu_madp_mpa_addr[38:3];
+
+// ############################################################
+// ---------------------------------------------------------
+// ------------ MOD REDUCTION DATAPATH ---------------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+wire modulus_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf modulus_lcd (
+                        .clk(modulus_clk), .rclk(rclk),
+                        .enb_l(~spu_mared_rdn_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) modulus_ff (
+        .din(spu_mamem_rd_data[63:0]) ,
+        .q(spu_madp_modulus[63:0]),
+        .en (~(~spu_mared_rdn_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) modulus_ff (
+        .din(spu_mamem_rd_data[63:0]) ,
+        .q(spu_madp_modulus[63:0]),
+        .clk (modulus_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+
+// USE 64BIT ADDER FROM LIB.
+assign oprand_minus_modulus_or_mpa[64:0] = {1'b0,spu_madp_mpa_or_m[63:0]} +
+				    {1'b0,spu_madp_mpa_or_n_b[63:0]} +
+				    {64'h0000000000000000,spu_mared_cin_oprnd_sub_mod};
+
+assign carry_out = oprand_minus_modulus_or_mpa[64];
+
+assign spu_madp_cout_oprnd_sub_mod = carry_out;
+
+assign spu_madp_m_lt_n = ~carry_out;
+
+// USE 64BIT COMPARATOR FROM LIB.
+assign spu_madp_m_eq_n = (spu_mul_op2_data[63:0] == spu_madp_modulus[63:0]);
+
+dp_mux4ds #(64) modred_data_mx3  (
+        .in0    (mul_data_out[63:0]),
+        .in1    (64'h0000000000000000), 		// wr0tox
+        .in2    (spu_mul_op2_data[63:0]), 		// wrmtox
+        .in3    (oprand_minus_modulus_or_mpa[63:0]), 	// wrstox
+        .sel0_l   (spu_mared_data_sel_l[0]),
+        .sel1_l   (spu_mared_data_sel_l[1]),
+        .sel2_l   (spu_mared_data_sel_l[2]),
+        .sel3_l   (spu_mared_data_sel_l[3]),
+        .dout   (mulorred_data[63:0]));
+
+
+assign spu_madp_store_data[63:0] = spu_madp_modulus[63:0];
+
+
+// ############################################################
+// ---------------------------------------------------------
+// ------------ MOD EXPONENTIATION DATAPATH ----------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+dp_mux2es #(64) exp_e_data_mx (
+        .in0    (spu_mamem_rd_data[63:0]),
+        .in1    ({spu_madp_exp_e_data_q[62:0],1'b0}),
+        .sel    (spu_maexp_shift_e),
+        .dout   (spu_madp_exp_e_data[63:0]));
+
+
+wire exp_e_data_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf exp_e_data_lcd (
+                        .clk(exp_e_data_clk), .rclk(rclk),
+                        .enb_l(~spu_maexp_e_data_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) exp_e_data_ff (
+        .din(spu_madp_exp_e_data[63:0]) ,
+        .q(spu_madp_exp_e_data_q[63:0]),
+        .en (~(~spu_maexp_e_data_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) exp_e_data_ff (
+        .din(spu_madp_exp_e_data[63:0]) ,
+        .q(spu_madp_exp_e_data_q[63:0]),
+        .clk (exp_e_data_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+assign spu_madp_e_eq_one = spu_madp_exp_e_data_q[63];
+
+
+// ############################################################
+// ---------------------------------------------------------
+// ------------ MA PARITY DATAPATH ----------------
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+wire [63:0] mem_ldupper_data_mx_l;
+
+wire mulorred_data_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf mulorred_data_lcd (
+                        .clk(mulorred_data_clk), .rclk(rclk),
+                        .enb_l(~spu_mactl_madp_parflop_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) mulorred_data_ff (
+        .din(mulorred_data[63:0]) ,
+        .q(mulorred_data_q[63:0]),
+        .en (~(~spu_mactl_madp_parflop_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) mulorred_data_ff (
+        .din(mulorred_data[63:0]) ,
+        .q(mulorred_data_q[63:0]),
+        .clk (mulorred_data_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+dp_mux2es #(64) mul_ldlower_mx (
+        .in0    (mulorred_data_q[63:0]),
+        .in1    (spu_madp_lnlower_data[63:0]),
+        .sel    (spu_madp_ldop_q_buf1),
+        .dout   (mul_ldlower_data_mx[63:0]));
+
+wire [63:0] mul_ldlower_data_mx_l;
+assign mul_ldlower_data_mx_l = ~mul_ldlower_data_mx;	//to match gatemap.
+
+// USE THE LIBRARY MACRO FOR THE FOLLOWING PARITY GENERATORS.
+assign mul_ldlower_data_par_pre[1] = (^mul_ldlower_data_mx_l[63:32]);	//to match gatemap.
+assign mul_ldlower_data_par_pre[0] = (^mul_ldlower_data_mx_l[31:0]);	//to match gatemap.
+
+assign mul_ldlower_data_par[1] = mul_ldlower_data_par_pre[1] ^ spu_mactl_force_perr;
+assign mul_ldlower_data_par[0] = mul_ldlower_data_par_pre[0] ^ spu_mactl_force_perr;
+
+
+wire [65:0] spu_madp_odddata_mx_l;
+
+dp_mux3ds #(66) odd_data_mx (
+        .in0    ({mul_ldlower_data_par[1:0],mul_ldlower_data_mx_l[63:0]}),
+        .in1    ({mem_ldupper_data_par[1:0],mem_ldupper_data_mx_l[63:0]}),
+        .in2    ({mul_ldlower_data_par[1:0],mul_ldlower_data_mx_l[63:0]}),
+        .sel0_l    (spu_mactl_memmxsel_l[0]),
+        .sel1_l    (spu_mactl_memmxsel_l[1]),
+        .sel2_l    (spu_mactl_memmxsel_l[2]),
+        .dout   (spu_madp_odddata_mx_l[65:0]));
+
+
+assign spu_madp_odddata_mx = ~spu_madp_odddata_mx_l;	//to match gatemap.
+
+
+wire [65:0] spu_madp_odddata_mx_sehold;
+dp_mux2es #(66) odddata_mx_sehold (
+        .in0    (spu_madp_odddata_mx[65:0]),
+        .in1    (spu_madp_odddata[65:0]),
+        .sel    (sehold),
+        .dout   (spu_madp_odddata_mx_sehold[65:0]));
+
+dff_s  #(66) mul_ldlower_ff (
+        .din(spu_madp_odddata_mx_sehold[65:0]) ,
+        .q(spu_madp_odddata[65:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+
+// ---------------------------------------------------------
+
+dff_s  #(64) mem_data_ff (
+        .din(spu_mamem_rd_data[63:0]) ,
+        .q(spu_madp_mem_rd_data[63:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+// ---------------------------------------------------------
+
+dp_mux2es #(64) mem_ldupper_mx (
+        .in0    (spu_madp_mem_rd_data[63:0]),
+        .in1    (spu_madp_lnupper_data[63:0]),
+        .sel    (spu_madp_ldop_q_buf2),
+        .dout   (mem_ldupper_data_mx[63:0]));
+
+assign mem_ldupper_data_mx_l = ~mem_ldupper_data_mx;
+
+// USE THE LIBRARY MACRO FOR THE FOLLOWING PARITY GENERATORS.
+assign mem_ldupper_data_par_pre[1] = (^mem_ldupper_data_mx_l[63:32]);
+assign mem_ldupper_data_par_pre[0] = (^mem_ldupper_data_mx_l[31:0]);
+
+assign mem_ldupper_data_par[1] = mem_ldupper_data_par_pre[1] ^ spu_mactl_force_perr;
+assign mem_ldupper_data_par[0] = mem_ldupper_data_par_pre[0] ^ spu_mactl_force_perr;
+
+wire [65:0] spu_madp_evedata_mx_l;
+dp_mux3ds #(66) eve_data_mx (
+        .in0    ({mem_ldupper_data_par[1:0],mem_ldupper_data_mx_l[63:0]}),
+        .in1    ({mul_ldlower_data_par[1:0],mul_ldlower_data_mx_l[63:0]}),
+        .in2    ({mul_ldlower_data_par[1:0],mul_ldlower_data_mx_l[63:0]}),
+        .sel0_l    (spu_mactl_memmxsel_l[0]),
+        .sel1_l    (spu_mactl_memmxsel_l[1]),
+        .sel2_l    (spu_mactl_memmxsel_l[2]),
+        .dout   (spu_madp_evedata_mx_l[65:0]));
+
+assign spu_madp_evedata_mx = ~spu_madp_evedata_mx_l;
+
+
+wire [65:0] spu_madp_evedata_mx_sehold;
+dp_mux2es #(66) evedata_mx_sehold (
+        .in0    (spu_madp_evedata_mx[65:0]),
+        .in1    (spu_madp_evedata[65:0]),
+        .sel    (sehold),
+        .dout   (spu_madp_evedata_mx_sehold[65:0]));
+
+dff_s  #(66) mem_ldupper_ff (
+        .din(spu_madp_evedata_mx_sehold[65:0]) ,
+        .q(spu_madp_evedata[65:0]),
+        .clk (rclk), .se(1'b0),.si (),.so ()
+        );
+
+
+
+// ---------------------------------------------------------
+// following is for readmem parity check only performed in spu_mactl.v
+assign spu_madp_rdmem_pargen[1:0] = mem_ldupper_data_par_pre[1:0];
+
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+
+// ############################################################
+// ---------------------------------------------------------
+// ---------------------------------------------------------
+// ------- MA ASI REGISTERS
+
+wire mampa_reg_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf mampa_reg_lcd (
+                        .clk(mampa_reg_clk), .rclk(rclk), 
+                        .enb_l(~spu_mactl_mpa_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(36) mampa_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[38:3]) , 
+        .q(spu_madp_mpa_reg[38:3]),
+        .en (~(~spu_mactl_mpa_wen)), .clk(rclk), .se(1'b0),.si(),.so()
+        ); 
+`else
+dff_s  #(36) mampa_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[38:3]) , 
+        .q(spu_madp_mpa_reg[38:3]),
+        .clk (mampa_reg_clk), .se(1'b0),.si(),.so()
+        ); 
+`endif
+
+wire maaddr_reg_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf maaddr_reg_lcd (
+                        .clk(maaddr_reg_clk), .rclk(rclk), 
+                        .enb_l(~spu_mactl_maaddr_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(48) maaddr_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[47:0]) , 
+        .q(spu_madp_maaddr_reg_int[47:0]),
+        .en (~(~spu_mactl_maaddr_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        ); 
+`else
+dff_s  #(48) maaddr_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[47:0]) , 
+        .q(spu_madp_maaddr_reg_int[47:0]),
+        .clk (maaddr_reg_clk), .se(1'b0),.si (),.so ()
+        ); 
+`endif
+
+
+assign spu_madp_maaddr_reg[47:0] = spu_madp_maaddr_reg_int[47:0];
+
+
+wire manp_reg_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf manp_reg_lcd (
+                        .clk(manp_reg_clk), .rclk(rclk),
+                        .enb_l(~spu_mactl_manp_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) manp_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[63:0]) , 
+        .q(spu_madp_manp_reg[63:0]),
+        .en (~(~spu_mactl_manp_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        ); 
+`else
+dff_s  #(64) manp_reg_ff (
+        .din(exu_spu_st_rs3_data_g2[63:0]) , 
+        .q(spu_madp_manp_reg[63:0]),
+        .clk (manp_reg_clk), .se(1'b0),.si (),.so ()
+        ); 
+`endif
+
+wire [63:0] spu_madp_ldxa_data_a;
+dp_mux4ds #(64) ldxa_data_a_mx  (
+        .in0    ({24'h000000,1'b0,spu_madp_mpa_reg[38:3],3'b000}),
+        .in1    ({16'h0000,spu_madp_maaddr_reg_int[47:0]}),
+        .in2    (spu_madp_manp_reg[63:0]),
+        .in3    ({50'b00000000000000000000000000000000000000000000000000,spu_mactl_mactl_reg[13:0]}),
+        .sel0_l   (spu_mactl_ldxa_data_w_sel_l[0]),
+        .sel1_l   (spu_mactl_ldxa_data_w_sel_l[1]),
+        .sel2_l   (spu_mactl_ldxa_data_w_sel_l[2]),
+        .sel3_l   (spu_mactl_ldxa_data_w_sel_l[3]),
+        .dout   (spu_madp_ldxa_data_a[63:0]));
+
+dp_mux2es #(64) ldxa_data_mx (
+        .in0    (spu_madp_ldxa_data_a[63:0]),
+        .in1    (64'h0000000000000000),
+        .sel    (spu_mactl_ldxa_data_w_select),
+        .dout   (spu_madp_ldxa_data[63:0]));
+
+
+
+// ############################################################
+// ------------------------------------------------------------
+// load buffer.
+// ------------------------------------------------------------
+// ------------------------------------------------------------
+
+wire lnupper_data_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lnupper_data_lcd (
+                        .clk(lnupper_data_clk), .rclk(rclk),
+                        .enb_l(~spu_wen_maln_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) lnupper_data_ff (
+        .din(lsu_spu_vload_data[127:64]) ,
+        .q(spu_madp_lnupper_data[63:0]),
+        .en (~(~spu_wen_maln_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) lnupper_data_ff (
+        .din(lsu_spu_vload_data[127:64]) ,
+        .q(spu_madp_lnupper_data[63:0]),
+        .clk (lnupper_data_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+wire lnlower_data_clk;
+`ifdef FPGA_SYN_CLK_EN
+`else
+clken_buf lnlower_data_lcd (
+                        .clk(lnlower_data_clk), .rclk(rclk),
+                        .enb_l(~spu_wen_maln_wen), .tmb_l(testmode_l));
+`endif
+
+`ifdef FPGA_SYN_CLK_DFF
+dffe_s  #(64) lnlower_data_ff (
+        .din(lsu_spu_vload_data[63:0]) ,
+        .q(spu_madp_lnlower_data[63:0]),
+        .en (~(~spu_wen_maln_wen)), .clk(rclk), .se(1'b0),.si (),.so ()
+        );
+`else
+dff_s  #(64) lnlower_data_ff (
+        .din(lsu_spu_vload_data[63:0]) ,
+        .q(spu_madp_lnlower_data[63:0]),
+        .clk (lnlower_data_clk), .se(1'b0),.si (),.so ()
+        );
+`endif
+
+
+// ############################################################
+
+
+endmodule
Index: /trunk/T1-CPU/spu/spu_ctl.v
===================================================================
--- /trunk/T1-CPU/spu/spu_ctl.v	(revision 6)
+++ /trunk/T1-CPU/spu/spu_ctl.v	(revision 6)
@@ -0,0 +1,821 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: spu_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    Stream Processing Unit for Sparc Core   
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+
+
+module spu_ctl (
+  //Inputs
+   cpx_spu_data_cx,spu_wen_pckt_req,lsu_spu_ldst_ack,lsu_tlu_st_rs3_data_g,
+   spu_lsurpt1_rsrv_data_e,  spu_madp_mpa_addr, 
+   ifu_spu_trap_ack,mul_data_out,spu_mul_mulres_lshft,
+   //output
+   spu_mamul_oprnd2_bypass, 
+   spu_mactl_ldop, 
+/*AUTOARG*/
+   // Outputs
+
+   spu_wen_ldst_pcx_vld, 
+
+   spu_wen_pcx_wen, 
+   spu_wen_pcx_7170_sel, 
+
+   spu_ifu_corr_err_w2, 
+   spu_ifu_unc_err_w, 
+   spu_lsu_unc_error_w, 
+
+   spu_ifu_mamem_err_w, 
+   spu_ifu_int_w2, 
+   spu_lsu_ldxa_illgl_va_w2, 
+   cpuid, 
+   ifu_spu_nceen, 
+
+   spu_tlu_rsrv_illgl_m, 
+   spu_mul_req_vld, 
+   spu_mul_areg_shf, spu_mul_areg_rst, spu_mul_acc, 
+   spu_mared_rdn_wen, spu_mared_data_sel_l, 
+   spu_mared_cin_oprnd_sub_mod, spu_mamul_oprnd2_wen, 
+   spu_mamul_oprnd1_mxsel_l,spu_mamul_oprnd1_wen, spu_maexp_shift_e, 
+   spu_maexp_e_data_wen, spu_mactl_mpa_sel, 
+   spu_mactl_memodd_wen, spu_mactl_memmxsel_l, spu_mactl_memeve_wen, 
+   spu_mactl_mamem_ren, spu_mactl_mamem_wen,
+   spu_mactl_madp_parflop_wen, spu_mactl_force_perr, 
+   spu_maaddr_mpa_wen, spu_maaddr_mpa_incr_val, 
+   spu_maaddr_mpa_addrinc, spu_maaddr_memindx,  spu_maaddr_mamem_eveodd_sel_l,
+   spu_lsu_stxa_ack_tid, spu_lsu_stxa_ack, spu_lsu_ldxa_tid_w2, 
+   spu_lsu_ldxa_data_vld_w2, spu_ifu_ttype_w2, spu_ifu_ttype_vld_w2, 
+   spu_ifu_ttype_tid_w,   
+   so, 
+
+   spu_mactl_mem_reset_l,
+   mux_drive_disable,
+   mem_bypass,
+   sehold,
+
+spu_mactl_ldxa_data_w_sel_l,
+spu_mactl_ldxa_data_w_select,
+spu_mactl_mpa_wen,
+spu_mactl_maaddr_wen,
+spu_mactl_manp_wen,
+spu_wen_maln_wen,
+spu_mactl_mactl_reg,
+spu_madp_maaddr_reg,
+
+   // Inputs
+
+
+   lsu_spu_stb_empty, 
+
+   lsu_spu_strm_ack_cmplt, 
+   lsu_spu_early_flush_g, 
+   tlu_spu_flush_w, 
+   ifu_spu_flush_w, 
+   spu_madp_perr, spu_madp_m_lt_n, spu_madp_m_eq_n, spu_madp_e_eq_one, 
+   spu_madp_cout_oprnd_sub_mod, si, se, grst_l, arst_l, mul_spu_shf_ack, 
+   mul_spu_ack, lsu_spu_asi_state_e, ifu_tlu_thrid_e, 
+   ifu_spu_inst_vld_w, ifu_lsu_st_inst_e, ifu_lsu_ld_inst_e, 
+   ifu_lsu_alt_space_e, exu_lsu_ldst_va_e, rclk
+   ) ;  
+
+input [3:0] lsu_spu_stb_empty;
+
+input [134:128]         cpx_spu_data_cx;
+input 		        lsu_spu_ldst_ack;
+input              ifu_spu_trap_ack;
+input [13:0]            lsu_tlu_st_rs3_data_g;
+input [2:0]            spu_lsurpt1_rsrv_data_e;
+input [3:3]             spu_madp_mpa_addr;
+input [0:0]             mul_data_out;
+output                  spu_mul_mulres_lshft;
+output			spu_mamul_oprnd2_bypass;// From spu_mamul of spu_mamul.v
+output			spu_mactl_ldop;		// From spu_mactl of spu_mactl.v
+
+output 	spu_ifu_corr_err_w2;
+output 	spu_ifu_unc_err_w;
+output 	spu_lsu_unc_error_w;
+
+output spu_ifu_mamem_err_w;
+output spu_ifu_int_w2;
+output spu_lsu_ldxa_illgl_va_w2;
+
+
+output spu_mactl_mem_reset_l;
+
+output spu_wen_pcx_7170_sel;
+output spu_wen_pcx_wen;
+
+output [122:104]         spu_wen_pckt_req;
+// ------------------------------------------------------------------
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+
+
+input			rclk;			// To spu_wen of spu_wen.v, ...
+input [7:0]		exu_lsu_ldst_va_e;	// To spu_mactl of spu_mactl.v
+input			ifu_lsu_alt_space_e;	// To spu_mactl of spu_mactl.v
+input			ifu_lsu_ld_inst_e;	// To spu_mactl of spu_mactl.v
+input			ifu_lsu_st_inst_e;	// To spu_mactl of spu_mactl.v
+input			ifu_spu_inst_vld_w;	// To spu_mactl of spu_mactl.v
+input [1:0]		ifu_tlu_thrid_e;	// To spu_mactl of spu_mactl.v
+input [7:0]		lsu_spu_asi_state_e;	// To spu_mactl of spu_mactl.v
+input			mul_spu_ack;		// To spu_mamul of spu_mamul.v, ...
+input			mul_spu_shf_ack;	// To spu_mamul of spu_mamul.v, ...
+input			grst_l;			// To spu_wen of spu_wen.v, ...
+input			arst_l;			// To spu_wen of spu_wen.v, ...
+input			se;			// To spu_shactl of spu_shactl.v, ...
+input			si;			// To spu_shactl of spu_shactl.v, ...
+input			spu_madp_cout_oprnd_sub_mod;// To spu_mared of spu_mared.v
+input			spu_madp_e_eq_one;	// To spu_maexp of spu_maexp.v
+input			spu_madp_m_eq_n;	// To spu_mared of spu_mared.v
+input			spu_madp_m_lt_n;	// To spu_mared of spu_mared.v
+input      		spu_madp_perr;	// To spu_mactl of spu_mactl.v
+input			lsu_spu_early_flush_g;	
+input			tlu_spu_flush_w;	
+input			ifu_spu_flush_w;	
+input [2:0] 		cpuid;	
+input [3:0] 		ifu_spu_nceen;	
+
+
+input [1:0] lsu_spu_strm_ack_cmplt;
+
+input mux_drive_disable;
+input mem_bypass;
+input sehold;
+
+
+// End of automatics
+
+// ------------------------------------------------------------------
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+output			so;			// From spu_shactl of spu_shactl.v, ...
+output [1:0]		spu_ifu_ttype_tid_w;	// From spu_mactl of spu_mactl.v
+output			spu_ifu_ttype_vld_w2;	// From spu_mactl of spu_mactl.v
+output			spu_ifu_ttype_w2;	// From spu_mactl of spu_mactl.v
+output			spu_lsu_ldxa_data_vld_w2;// From spu_mactl of spu_mactl.v
+output [1:0]		spu_lsu_ldxa_tid_w2;	// From spu_mactl of spu_mactl.v
+output			spu_lsu_stxa_ack;	// From spu_mactl of spu_mactl.v
+output [1:0]		spu_lsu_stxa_ack_tid;	// From spu_mactl of spu_mactl.v
+output [7:1]		spu_maaddr_memindx;	// From spu_maaddr of spu_maaddr.v
+output [3:0]		spu_maaddr_mamem_eveodd_sel_l;	// From spu_maaddr of spu_maaddr.v
+output			spu_maaddr_mpa_addrinc;	// From spu_maaddr of spu_maaddr.v
+output [4:0]		spu_maaddr_mpa_incr_val;// From spu_maaddr of spu_maaddr.v
+output			spu_maaddr_mpa_wen;	// From spu_maaddr of spu_maaddr.v
+output			spu_mactl_force_perr;	// From spu_mactl of spu_mactl.v
+output			spu_mactl_madp_parflop_wen;// From spu_mactl of spu_mactl.v
+output			spu_mactl_mamem_ren;	// From spu_mactl of spu_mactl.v
+output			spu_mactl_mamem_wen;	// From spu_mactl of spu_mactl.v
+output			spu_mactl_memeve_wen;	// From spu_mactl of spu_mactl.v
+output [2:0]		spu_mactl_memmxsel_l;	// From spu_mactl of spu_mactl.v
+output			spu_mactl_memodd_wen;	// From spu_mactl of spu_mactl.v
+output			spu_mactl_mpa_sel;	// From spu_mactl of spu_mactl.v
+output			spu_maexp_e_data_wen;	// From spu_maexp of spu_maexp.v
+output			spu_maexp_shift_e;	// From spu_maexp of spu_maexp.v
+output [2:0]			spu_mamul_oprnd1_mxsel_l;	// From spu_mamul of spu_mamul.v
+output			spu_mamul_oprnd1_wen;	// From spu_mamul of spu_mamul.v
+output			spu_mamul_oprnd2_wen;	// From spu_mamul of spu_mamul.v
+output			spu_mared_cin_oprnd_sub_mod;// From spu_mared of spu_mared.v
+output [3:0]		spu_mared_data_sel_l;	// From spu_mared of spu_mared.v
+output			spu_mared_rdn_wen;	// From spu_mared of spu_mared.v
+output			spu_mul_acc;		// From spu_mamul of spu_mamul.v
+output			spu_mul_areg_rst;	// From spu_mamul of spu_mamul.v
+output			spu_mul_areg_shf;	// From spu_mamul of spu_mamul.v
+output			spu_mul_req_vld;	// From spu_mamul of spu_mamul.v
+output			spu_tlu_rsrv_illgl_m;	// From spu_mactl of spu_mactl.v
+
+output spu_wen_ldst_pcx_vld;
+
+output [3:0] spu_mactl_ldxa_data_w_sel_l;
+output spu_mactl_ldxa_data_w_select;
+output spu_mactl_mpa_wen;
+output spu_mactl_maaddr_wen;
+output spu_mactl_manp_wen;
+output spu_wen_maln_wen;
+output [13:0] spu_mactl_mactl_reg;
+input [47:0] spu_madp_maaddr_reg;
+
+// End of automatics
+// ------------------------------------------------------------------
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+wire [1:0]			spu_maaeqb_oprnd1_mxsel;	// From spu_maaeqb of spu_maaeqb.v
+wire [5:0] spu_maaddr_len_cntr;
+wire [5:0]		spu_mactl_mactl_len;	// From spu_mactl of spu_mactl.v
+
+// End of automatics
+
+// ------------------------------------------------------------------
+
+
+// ------------------------------------------------------------------
+
+
+/****************************************************************************/
+
+// -------------------------------------------------------------------------
+spu_wen spu_wen (//in
+		.spu_mald_done	        	(spu_mald_ld_done),
+
+		//.lsu_spu_vload_rtntyp		(cpx_spu_data_cx[138:135]),	//cpx_spc_data_cx[143:140]
+		.lsu_spu_vload_rtntyp		(cpx_spu_data_cx[133:130]),	//cpx_spc_data_cx[143:140]
+		//.lsu_spu_vload_asop		(cpx_spu_data_cx[129]),		//cpx_spc_data_cx[130]
+		//.lsu_spu_vload_vld		(cpx_spu_data_cx[139]),		//cpx_spc_data_cx[144]
+		.lsu_spu_vload_vld		(cpx_spu_data_cx[134]),		//cpx_spc_data_cx[144]
+		//.lsu_spu_vload_data_tid	(cpx_spu_data_cx[131:130]),	//cpx_spc_data_cx[135:134]
+		//.lsu_spu_vload_bid		(cpx_spu_data_cx[128]),		//cpx_spc_data_cx[129]
+		//.l2_miss 			(cpx_spu_data_cx[134]),		//cpx_spc_data_cx[139]
+		//.l2_err 			(cpx_spu_data_cx[133:132]),	//cpx_spc_data_cx[138:137]
+		.l2_err 			(cpx_spu_data_cx[129:128]),	//cpx_spc_data_cx[138:137]
+ 
+		.lsu_spu_st_ack_tid		(spu_wen_pckt_req[113:112]),
+		.lsu_spu_st_asop		(spu_wen_pckt_req[108]),
+		.lsu_spu_st_ackvld		(lsu_spu_ldst_ack),
+		.lsu_spu_ld_ack_tid		(spu_wen_pckt_req[113:112]),
+		.lsu_spu_ld_asop		(spu_wen_pckt_req[106]),
+		.lsu_spu_ld_ackvld		(lsu_spu_ldst_ack),
+
+		.cpuid		(cpuid[2:0]),
+
+		 /*AUTOINST*/
+		 // Outputs
+		 .spu_wen_pckt_req	(spu_wen_pckt_req[122:104]),
+
+		 .spu_wen_mast_ack	(spu_wen_mast_ack),
+		 .spu_wen_maln_wen	(spu_wen_maln_wen),
+		 .spu_wen_mald_ack	(spu_wen_mald_ack),
+		 .spu_wen_vld_maln	(spu_wen_vld_maln),
+
+		 .spu_wen_ldst_pcx_vld	(spu_wen_ldst_pcx_vld),
+		.spu_mactl_streq		(spu_mactl_streq),
+		.spu_mald_ldreq		(spu_mald_ldreq),
+
+		.spu_wen_allma_stacks_ok		(spu_wen_allma_stacks_ok),
+
+
+                .spu_wen_ma_unc_err_pulse             (spu_wen_ma_unc_err_pulse),
+		.spu_wen_ma_unc_err		(spu_wen_ma_unc_err),
+		.spu_wen_ma_cor_err		(spu_wen_ma_cor_err),
+
+		 // Inputs
+		.spu_mactl_uncerr_rst 			(spu_mactl_uncerr_rst),
+
+
+		 .lsu_spu_strm_ack_cmplt		(lsu_spu_strm_ack_cmplt[1:0]),
+		 .reset			(spu_mactl_ctl_reset),
+		 .rclk			(rclk),
+		 .se			(se),
+		 .spu_mald_rstln	(spu_mald_rstln));
+
+
+// -------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// ------------------------ MA STUFF ---------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+// -------------------------------------------------------------------------
+spu_mast spu_mast (//in
+		.mactl_stop			(spu_mactl_stop),
+		.streq_ack			(spu_wen_mast_ack),
+		.len_neqz			(spu_maaddr_len_neqz),
+		   
+		   /*AUTOINST*/
+		   // Outputs
+		   .spu_mast_maaddr_addrinc(spu_mast_maaddr_addrinc),
+		   .spu_mast_memren	(spu_mast_memren),
+		   .spu_mast_stbuf_wen	(spu_mast_stbuf_wen),
+		   .spu_mast_mpa_addrinc(spu_mast_mpa_addrinc),
+		   .spu_mast_streq	(spu_mast_streq),
+		   .spu_mast_done_set	(spu_mast_done_set),
+
+			.spu_wen_allma_stacks_ok		(spu_wen_allma_stacks_ok),
+		   // Inputs
+
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+			.spu_mactl_perr_set		(spu_mactl_perr_set),
+
+		   .reset		(spu_mactl_ctl_reset),
+		   .rclk			(rclk),
+		 .se			(se),
+		   .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly));
+// -------------------------------------------------------------------------
+spu_mald spu_mald (//in
+		.ld_inprog			(spu_mactl_ldop),
+		.ldreq_ack			(spu_wen_mald_ack),
+		.ln_received			(spu_wen_vld_maln),
+		.len_neqz			(spu_maaddr_len_neqz),
+		.mactl_ldop			(spu_mactl_ldop),
+		   //out
+		.spu_mald_done			(spu_mald_ld_done),		   
+		   /*AUTOINST*/
+		   // Outputs
+		   .spu_mald_rstln	(spu_mald_rstln),
+		   .spu_mald_maaddr_addrinc(spu_mald_maaddr_addrinc),
+		   .spu_mald_memwen	(spu_mald_memwen),
+		   .spu_mald_mpa_addrinc(spu_mald_mpa_addrinc),
+		   .spu_mald_ldreq	(spu_mald_ldreq),
+		   .spu_mald_force_mpa_add16(spu_mald_force_mpa_add16),
+		   .spu_mald_done_set	(spu_mald_done_set),
+		   // Inputs
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+                .spu_wen_ma_unc_err_pulse             (spu_wen_ma_unc_err_pulse),
+
+		   .reset		(spu_mactl_ctl_reset),
+		   .rclk			(rclk),
+		 .se			(se),
+		   .spu_maaddr_mpa1maddr0(spu_maaddr_mpa1maddr0),
+		   .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly));
+
+// -------------------------------------------------------------------------
+spu_mactl spu_mactl (//in
+		.spu_maaddr_maaddr_0		(spu_maaddr_wrindx_0),
+		.spu_maaddr_mpa_3		(spu_madp_mpa_addr[3]),
+                .ma_ctl_reg_data            	(lsu_tlu_st_rs3_data_g[13:0]),
+
+		.ifu_spu_trap_ack 			(ifu_spu_trap_ack),
+                .spu_mactl_rsrv_data_e		(spu_lsurpt1_rsrv_data_e[2:0]),
+		     
+		.spu_ifu_corr_err_w2 			(spu_ifu_corr_err_w2),
+		.spu_ifu_unc_err_w 			(spu_ifu_unc_err_w),
+		.spu_lsu_unc_error_w 			(spu_lsu_unc_error_w),
+
+		.spu_ifu_mamem_err_w 			(spu_ifu_mamem_err_w),
+		.spu_ifu_int_w2 			(spu_ifu_int_w2),
+		.spu_lsu_ldxa_illgl_va_w2 			(spu_lsu_ldxa_illgl_va_w2),
+
+		.spu_mactl_uncerr_rst 			(spu_mactl_uncerr_rst),
+
+		.spu_mactl_pcx_wen		(spu_wen_pcx_wen),
+		.spu_mactl_pcx_7170_sel		(spu_wen_pcx_7170_sel),
+			.spu_mactl_perr_set		(spu_mactl_perr_set),
+		     /*AUTOINST*/
+		     // Outputs
+		     //.so		(so),
+
+		     .spu_mactl_iss_pulse_pre(spu_mactl_iss_pulse_pre),
+		     .spu_mactl_iss_pulse(spu_mactl_iss_pulse),
+		     .spu_mactl_mpa_wen	(spu_mactl_mpa_wen),
+		     .spu_mactl_maaddr_wen(spu_mactl_maaddr_wen),
+		     .spu_mactl_manp_wen(spu_mactl_manp_wen),
+		     .spu_mactl_ldop	(spu_mactl_ldop),
+		     .spu_mactl_stop	(spu_mactl_stop),
+		     .spu_mactl_mulop	(spu_mactl_mulop),
+		     .spu_mactl_redop	(spu_mactl_redop),
+		     .spu_mactl_expop	(spu_mactl_expop),
+		     .spu_mactl_memmxsel_l(spu_mactl_memmxsel_l[2:0]),
+		     .spu_mactl_memeve_wen(spu_mactl_memeve_wen),
+		     .spu_mactl_memodd_wen(spu_mactl_memodd_wen),
+		     .spu_mactl_mamem_ren(spu_mactl_mamem_ren),
+		     .spu_mactl_mamem_wen(spu_mactl_mamem_wen),
+		     .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly),
+		     .spu_mactl_ldxa_data_w_sel_l(spu_mactl_ldxa_data_w_sel_l[3:0]),
+		     .spu_mactl_ldxa_data_w_select(spu_mactl_ldxa_data_w_select),
+		     .spu_mactl_mpa_sel	(spu_mactl_mpa_sel),
+		     .spu_mactl_madp_parflop_wen(spu_mactl_madp_parflop_wen),
+		     .spu_lsu_ldxa_data_vld_w2(spu_lsu_ldxa_data_vld_w2),
+		     .spu_lsu_ldxa_tid_w2(spu_lsu_ldxa_tid_w2[1:0]),
+		     .spu_lsu_stxa_ack	(spu_lsu_stxa_ack),
+		     .spu_lsu_stxa_ack_tid(spu_lsu_stxa_ack_tid[1:0]),
+		     .spu_mactl_ldxa_mactl_reg(spu_mactl_mactl_reg[13:0]),
+		     .spu_mactl_mactl_len(spu_mactl_mactl_len[5:0]),
+		     .spu_mactl_force_perr(spu_mactl_force_perr),
+		     .spu_ifu_ttype_w2	(spu_ifu_ttype_w2),
+		     .spu_ifu_ttype_vld_w2(spu_ifu_ttype_vld_w2),
+		     .spu_ifu_ttype_tid_w(spu_ifu_ttype_tid_w[1:0]),
+		     .spu_tlu_rsrv_illgl_m(spu_tlu_rsrv_illgl_m),
+
+		   .spu_mactl_streq	(spu_mactl_streq),
+
+		   .spu_mactl_ctl_reset	(spu_mactl_ctl_reset),
+		   .spu_mactl_mem_reset_l	(spu_mactl_mem_reset_l),
+
+			.spu_mactl_ma_kill_op		(spu_mactl_ma_kill_op),
+		     // Inputs
+		.mux_drive_disable		(mux_drive_disable),
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+		.lsu_spu_ldst_ack		(lsu_spu_ldst_ack),
+
+		.spu_wen_ma_unc_err		(spu_wen_ma_unc_err),
+		.spu_wen_ma_cor_err		(spu_wen_ma_cor_err),
+
+
+
+		     .spu_maaddr_len_cntr		(spu_maaddr_len_cntr[5:0]),
+		     .ifu_spu_nceen		(ifu_spu_nceen[3:0]),
+
+		   .spu_mast_streq	(spu_mast_streq),
+
+			.lsu_spu_stb_empty		(lsu_spu_stb_empty[3:0]),
+
+		     .grst_l		(grst_l),
+		     .arst_l		(arst_l),
+		     .rclk		(rclk),
+		 .se			(se),
+		     //.si		(si),
+		     //.se		(se),
+
+
+		     .spu_mald_memwen	(spu_mald_memwen),
+		     .spu_mamul_memwen	(spu_mamul_memwen),
+		     .spu_mamul_memren	(spu_mamul_memren),
+		     .spu_maaeqb_memwen	(spu_maaeqb_memwen),
+		     .spu_maaeqb_memren	(spu_maaeqb_memren),
+		     .spu_mared_memren	(spu_mared_memren),
+		     .spu_mared_memwen	(spu_mared_memwen),
+		     .spu_mast_memren	(spu_mast_memren),
+		     .lsu_spu_early_flush_g(lsu_spu_early_flush_g),
+		     .tlu_spu_flush_w(tlu_spu_flush_w),
+		     .ifu_spu_flush_w(ifu_spu_flush_w),
+		     .ifu_spu_inst_vld_w(ifu_spu_inst_vld_w),
+		     .lsu_spu_asi_state_e(lsu_spu_asi_state_e[7:0]),
+		     .ifu_lsu_ld_inst_e	(ifu_lsu_ld_inst_e),
+		     .ifu_lsu_st_inst_e	(ifu_lsu_st_inst_e),
+		     .ifu_lsu_alt_space_e(ifu_lsu_alt_space_e),
+		     .ifu_tlu_thrid_e	(ifu_tlu_thrid_e[1:0]),
+		     .exu_lsu_ldst_va_e	(exu_lsu_ldst_va_e[7:0]),
+		     .spu_mald_done_set	(spu_mald_done_set),
+		     .spu_mast_done_set	(spu_mast_done_set),
+		     .spu_mared_done_set(spu_mared_done_set),
+		     .spu_maexp_done_set(spu_maexp_done_set),
+		     .spu_maexp_memren	(spu_maexp_memren),
+		     .spu_maaddr_nooddwr_on_leneq1(spu_maaddr_nooddwr_on_leneq1),
+		     .spu_mared_not_idle(spu_mared_not_idle),
+		     .spu_mamul_oprnd2_bypass(spu_mamul_oprnd2_bypass),
+		     .spu_madp_perr(spu_madp_perr));
+
+// -------------------------------------------------------------------------
+spu_maaddr spu_maaddr (//in
+		//.lsu_spu_stxa_data		(lsu_tlu_st_rs3_data_g[5:0]),
+		.spu_madp_mactl_reg		(spu_mactl_mactl_len[5:0]),
+		.spu_madp_mpa_addr_3		(spu_madp_mpa_addr[3]),
+
+		       
+		       /*AUTOINST*/
+		       // Outputs
+		       .spu_maaddr_len_neqz(spu_maaddr_len_neqz),
+		       .spu_maaddr_mpa1maddr0(spu_maaddr_mpa1maddr0),
+		       .spu_maaddr_memindx(spu_maaddr_memindx[7:1]),
+		       .spu_maaddr_mamem_eveodd_sel_l(spu_maaddr_mamem_eveodd_sel_l[3:0]),
+		       .spu_maaddr_iequtwolenplus2(spu_maaddr_iequtwolenplus2),
+		       .spu_maaddr_iequtwolenplus1(spu_maaddr_iequtwolenplus1),
+		       .spu_maaddr_jequiminus1(spu_maaddr_jequiminus1),
+		       .spu_maaddr_jequlen(spu_maaddr_jequlen),
+		       .spu_maaddr_halfpnt_set(spu_maaddr_halfpnt_set),
+		       .spu_maaddr_len_eqmax(spu_maaddr_len_eqmax),
+		       .spu_maaddr_esmod64(spu_maaddr_esmod64),
+		       .spu_maaddr_esmax(spu_maaddr_esmax),
+		       .spu_maaddr_nooddwr_on_leneq1(spu_maaddr_nooddwr_on_leneq1),
+		       .spu_maaddr_mpa_addrinc(spu_maaddr_mpa_addrinc),
+		       .spu_maaddr_mpa_wen(spu_maaddr_mpa_wen),
+		       .spu_maaddr_mpa_incr_val(spu_maaddr_mpa_incr_val[4:0]),
+		       .spu_maaddr_jequiminus1rshft(spu_maaddr_jequiminus1rshft),
+		       .spu_maaddr_iequtwolen(spu_maaddr_iequtwolen),
+		       .spu_maaddr_ieven(spu_maaddr_ieven),
+		       .spu_maaddr_ieq0	(spu_maaddr_ieq0),
+		       .spu_maaddr_aequb(spu_maaddr_aequb),
+			.spu_maaddr_jptr_eqz_mared		(spu_maaddr_jptr_eqz_mared),
+		       // Inputs
+
+		     .spu_mamul_rst	(spu_mamul_rst),
+		     .mem_bypass(mem_bypass),
+		     .sehold(sehold),
+
+		     .spu_mamul_jjptr_sel(spu_mamul_jjptr_sel),
+
+		     .spu_maaddr_len_cntr(spu_maaddr_len_cntr[5:0]),
+
+		.spu_maaddr_wrindx_0		(spu_maaddr_wrindx_0),
+
+		       .reset		(spu_mactl_ctl_reset),
+		       .rclk		(rclk),
+		 .se			(se),
+		       .spu_mald_force_mpa_add16(spu_mald_force_mpa_add16),
+		       .spu_mactl_ldop	(spu_mactl_ldop),
+		       .spu_madp_maaddr_reg_in(spu_madp_maaddr_reg[47:0]),
+		       .spu_mald_maaddr_addrinc(spu_mald_maaddr_addrinc),
+		       .spu_mald_mpa_addrinc(spu_mald_mpa_addrinc),
+		       .spu_mast_maaddr_addrinc(spu_mast_maaddr_addrinc),
+		       .spu_mast_mpa_addrinc(spu_mast_mpa_addrinc),
+		       .spu_mamul_memwen(spu_mamul_memwen),
+		       .spu_mamul_rst_iptr(spu_mamul_rst_iptr),
+		       .spu_mamul_rst_jptr(spu_mamul_rst_jptr),
+		       .spu_mamul_incr_iptr(spu_mamul_incr_iptr),
+		       .spu_mamul_incr_jptr(spu_mamul_incr_jptr),
+		       .spu_mamul_a_rd_oprnd_sel(spu_mamul_a_rd_oprnd_sel),
+		       .spu_mamul_ax_rd_oprnd_sel(spu_mamul_ax_rd_oprnd_sel),
+		       .spu_mamul_b_rd_oprnd_sel(spu_mamul_b_rd_oprnd_sel),
+		       .spu_mamul_ba_rd_oprnd_sel(spu_mamul_ba_rd_oprnd_sel),
+		       .spu_mamul_m_rd_oprnd_sel(spu_mamul_m_rd_oprnd_sel),
+		       .spu_mamul_n_rd_oprnd_sel(spu_mamul_n_rd_oprnd_sel),
+		       .spu_mamul_m_wr_oprnd_sel(spu_mamul_m_wr_oprnd_sel),
+		       .spu_mared_me_rd_oprnd_sel(spu_mared_me_rd_oprnd_sel),
+		       .spu_mared_xe_wr_oprnd_sel(spu_mared_xe_wr_oprnd_sel),
+		       .spu_mamul_me_rd_oprnd_sel(spu_mamul_me_rd_oprnd_sel),
+		       .spu_mamul_me_wr_oprnd_sel(spu_mamul_me_wr_oprnd_sel),
+		       .spu_mamul_i_ptr_sel(spu_mamul_i_ptr_sel),
+		       .spu_mamul_iminus1_ptr_sel(spu_mamul_iminus1_ptr_sel),
+		       .spu_mamul_j_ptr_sel(spu_mamul_j_ptr_sel),
+		       .spu_mamul_iminusj_ptr_sel(spu_mamul_iminusj_ptr_sel),
+		       .spu_mamul_iminuslenminus1_sel(spu_mamul_iminuslenminus1_sel),
+		       .spu_mamul_jjptr_wen(spu_mamul_jjptr_wen),
+		       .spu_mactl_iss_pulse(spu_mactl_iss_pulse),
+		     .spu_mactl_iss_pulse_pre(spu_mactl_iss_pulse_pre),
+		       .spu_mared_m_rd_oprnd_sel(spu_mared_m_rd_oprnd_sel),
+		       .spu_mared_nm_rd_oprnd_sel(spu_mared_nm_rd_oprnd_sel),
+		       .spu_mared_x_wr_oprnd_sel(spu_mared_x_wr_oprnd_sel),
+		       .spu_mared_a_rd_oprnd_sel(spu_mared_a_rd_oprnd_sel),
+		       .spu_mared_nr_rd_oprnd_sel(spu_mared_nr_rd_oprnd_sel),
+		       .spu_mared_r_wr_oprnd_sel(spu_mared_r_wr_oprnd_sel),
+		       .spu_mared_memwen(spu_mared_memwen),
+		       .spu_mared_j_ptr_sel(spu_mared_j_ptr_sel),
+		       .spu_mared_update_jptr(spu_mared_update_jptr),
+		       .spu_mared_rst_jptr(spu_mared_rst_jptr),
+		       .spu_mared_maxlen_wen(spu_mared_maxlen_wen),
+		       .spu_mared_cin_set_4sub(spu_mared_cin_set_4sub),
+		       .spu_mast_memren	(spu_mast_memren),
+		       .spu_mared_start_wen(spu_mared_start_wen),
+		       .spu_mared_start_sel(spu_mared_start_sel),
+		       .spu_maexp_incr_es_ptr(spu_maexp_incr_es_ptr),
+		       .spu_maexp_e_rd_oprnd_sel(spu_maexp_e_rd_oprnd_sel),
+		       .spu_maexp_es_max_init(spu_maexp_es_max_init),
+		       .spu_maexp_es_e_ptr_rst(spu_maexp_es_e_ptr_rst),
+		       .spu_maaeqb_memwen(spu_maaeqb_memwen),
+		       .spu_maaeqb_irshft_sel(spu_maaeqb_irshft_sel),
+		       .spu_mared_update_redwr_jptr(spu_mared_update_redwr_jptr),
+		       .spu_mared_jjptr_wen(spu_mared_jjptr_wen));
+
+// -------------------------------------------------------------------------
+//spu_mamem spu_mamem (/*AUTOINST*/);
+
+// -------------------------------------------------------------------------
+//spu_madp spu_madp (/*AUTOINST*/);
+
+// -------------------------------------------------------------------------
+
+spu_mamul spu_mamul (//in
+		     
+		     /*AUTOINST*/
+		     // Outputs
+		     //.so		(so),
+		     .spu_mamul_memwen	(spu_mamul_memwen),
+		     .spu_mamul_memren	(spu_mamul_memren),
+		     .spu_mamul_rst_iptr(spu_mamul_rst_iptr),
+		     .spu_mamul_rst_jptr(spu_mamul_rst_jptr),
+		     .spu_mamul_incr_iptr(spu_mamul_incr_iptr),
+		     .spu_mamul_incr_jptr(spu_mamul_incr_jptr),
+		     .spu_mamul_a_rd_oprnd_sel(spu_mamul_a_rd_oprnd_sel),
+		     .spu_mamul_ax_rd_oprnd_sel(spu_mamul_ax_rd_oprnd_sel),
+		     .spu_mamul_b_rd_oprnd_sel(spu_mamul_b_rd_oprnd_sel),
+		     .spu_mamul_ba_rd_oprnd_sel(spu_mamul_ba_rd_oprnd_sel),
+		     .spu_mamul_m_rd_oprnd_sel(spu_mamul_m_rd_oprnd_sel),
+		     .spu_mamul_me_rd_oprnd_sel(spu_mamul_me_rd_oprnd_sel),
+		     .spu_mamul_n_rd_oprnd_sel(spu_mamul_n_rd_oprnd_sel),
+		     .spu_mamul_m_wr_oprnd_sel(spu_mamul_m_wr_oprnd_sel),
+		     .spu_mamul_me_wr_oprnd_sel(spu_mamul_me_wr_oprnd_sel),
+		     .spu_mamul_i_ptr_sel(spu_mamul_i_ptr_sel),
+		     .spu_mamul_iminus1_ptr_sel(spu_mamul_iminus1_ptr_sel),
+		     .spu_mamul_j_ptr_sel(spu_mamul_j_ptr_sel),
+		     .spu_mamul_iminusj_ptr_sel(spu_mamul_iminusj_ptr_sel),
+		     .spu_mamul_iminuslenminus1_sel(spu_mamul_iminuslenminus1_sel),
+		     .spu_mamul_jjptr_wen(spu_mamul_jjptr_wen),
+		     .spu_mamul_oprnd2_wen(spu_mamul_oprnd2_wen),
+		     .spu_mamul_oprnd2_bypass(spu_mamul_oprnd2_bypass),
+		     .spu_mamul_oprnd1_mxsel_l(spu_mamul_oprnd1_mxsel_l[2:0]),
+		     .spu_mamul_oprnd1_wen	(spu_mamul_oprnd1_wen),
+		     .spu_mul_req_vld	(spu_mul_req_vld),
+		     .spu_mul_areg_shf	(spu_mul_areg_shf),
+		     .spu_mul_acc	(spu_mul_acc),
+		     .spu_mul_areg_rst	(spu_mul_areg_rst),
+		     .spu_mamul_rst	(spu_mamul_rst),
+		     .spu_mamul_mul_done(spu_mamul_mul_done),
+
+		     .spu_mamul_jjptr_sel(spu_mamul_jjptr_sel),
+		     // Inputs
+
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+			.spu_mactl_kill_op		(spu_mactl_ma_kill_op),
+
+		     .spu_maaeqb_jjptr_sel(spu_maaeqb_jjptr_sel),
+
+		     .reset		(spu_mactl_ctl_reset),
+		     .rclk		(rclk),
+		 .se			(se),
+		     //.si		(si),
+		     //.se		(se),
+		     .spu_maaddr_iequtwolenplus2(spu_maaddr_iequtwolenplus2),
+		     .spu_maaddr_iequtwolenplus1(spu_maaddr_iequtwolenplus1),
+		     .spu_maaddr_jequiminus1(spu_maaddr_jequiminus1),
+		     .spu_maaddr_jequlen(spu_maaddr_jequlen),
+		     .spu_maaddr_halfpnt_set(spu_maaddr_halfpnt_set),
+		     .mul_spu_ack	(mul_spu_ack),
+		     .mul_spu_shf_ack	(mul_spu_shf_ack),
+		     .spu_mactl_mulop	(spu_mactl_mulop),
+		     .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly),
+		     .spu_mared_oprnd2_wen(spu_mared_oprnd2_wen),
+		     .spu_maexp_start_mulred_anoteqb(spu_maexp_start_mulred_anoteqb),
+		     .spu_mactl_expop	(spu_mactl_expop),
+		     .spu_maaddr_aequb	(spu_maaddr_aequb),
+		     .spu_maaeqb_rst_iptr(spu_maaeqb_rst_iptr),
+		     .spu_maaeqb_rst_jptr(spu_maaeqb_rst_jptr),
+		     .spu_maaeqb_incr_iptr(spu_maaeqb_incr_iptr),
+		     .spu_maaeqb_incr_jptr(spu_maaeqb_incr_jptr),
+		     .spu_maaeqb_a_rd_oprnd_sel(spu_maaeqb_a_rd_oprnd_sel),
+		     .spu_maaeqb_ax_rd_oprnd_sel(spu_maaeqb_ax_rd_oprnd_sel),
+		     .spu_maaeqb_m_rd_oprnd_sel(spu_maaeqb_m_rd_oprnd_sel),
+		     .spu_maaeqb_me_rd_oprnd_sel(spu_maaeqb_me_rd_oprnd_sel),
+		     .spu_maaeqb_n_rd_oprnd_sel(spu_maaeqb_n_rd_oprnd_sel),
+		     .spu_maaeqb_m_wr_oprnd_sel(spu_maaeqb_m_wr_oprnd_sel),
+		     .spu_maaeqb_me_wr_oprnd_sel(spu_maaeqb_me_wr_oprnd_sel),
+		     .spu_maaeqb_iminus1_ptr_sel(spu_maaeqb_iminus1_ptr_sel),
+		     .spu_maaeqb_j_ptr_sel(spu_maaeqb_j_ptr_sel),
+		     .spu_maaeqb_iminusj_ptr_sel(spu_maaeqb_iminusj_ptr_sel),
+		     .spu_maaeqb_iminuslenminus1_sel(spu_maaeqb_iminuslenminus1_sel),
+		     .spu_maaeqb_jjptr_wen(spu_maaeqb_jjptr_wen),
+		     .spu_maaeqb_oprnd2_wen(spu_maaeqb_oprnd2_wen),
+		     .spu_maaeqb_oprnd2_bypass(spu_maaeqb_oprnd2_bypass),
+		     .spu_maaeqb_oprnd1_mxsel(spu_maaeqb_oprnd1_mxsel[1:0]),
+		     .spu_maaeqb_oprnd1_wen(spu_maaeqb_oprnd1_wen),
+		     .spu_maaeqb_mul_req_vld(spu_maaeqb_mul_req_vld),
+		     .spu_maaeqb_mul_areg_shf(spu_maaeqb_mul_areg_shf),
+		     .spu_maaeqb_mul_acc(spu_maaeqb_mul_acc),
+		     .spu_maaeqb_mul_areg_rst(spu_maaeqb_mul_areg_rst),
+		     .spu_maaeqb_mul_done(spu_maaeqb_mul_done));
+
+// -------------------------------------------------------------------------
+
+spu_mared spu_mared (//in
+		.mul_data_out_0			(mul_data_out[0]),		      
+		     
+		     /*AUTOINST*/
+		     // Outputs
+		     .spu_mared_data_sel_l(spu_mared_data_sel_l[3:0]),
+		     .spu_mared_j_ptr_sel(spu_mared_j_ptr_sel),
+		     .spu_mared_nm_rd_oprnd_sel(spu_mared_nm_rd_oprnd_sel),
+		     .spu_mared_m_rd_oprnd_sel(spu_mared_m_rd_oprnd_sel),
+		     .spu_mared_me_rd_oprnd_sel(spu_mared_me_rd_oprnd_sel),
+		     .spu_mared_x_wr_oprnd_sel(spu_mared_x_wr_oprnd_sel),
+		     .spu_mared_xe_wr_oprnd_sel(spu_mared_xe_wr_oprnd_sel),
+		     .spu_mared_nr_rd_oprnd_sel(spu_mared_nr_rd_oprnd_sel),
+		     .spu_mared_a_rd_oprnd_sel(spu_mared_a_rd_oprnd_sel),
+		     .spu_mared_r_wr_oprnd_sel(spu_mared_r_wr_oprnd_sel),
+		     .spu_mared_update_jptr(spu_mared_update_jptr),
+		     .spu_mared_rst_jptr(spu_mared_rst_jptr),
+		     .spu_mared_maxlen_wen(spu_mared_maxlen_wen),
+		     .spu_mared_rdn_wen	(spu_mared_rdn_wen),
+		     .spu_mared_oprnd2_wen(spu_mared_oprnd2_wen),
+		     .spu_mared_memren	(spu_mared_memren),
+		     .spu_mared_memwen	(spu_mared_memwen),
+		     .spu_mared_cin_set_4sub(spu_mared_cin_set_4sub),
+		     .spu_mared_cin_oprnd_sub_mod(spu_mared_cin_oprnd_sub_mod),
+		     .spu_mared_done_set(spu_mared_done_set),
+		     .spu_mared_start_wen(spu_mared_start_wen),
+		     .spu_mared_start_sel(spu_mared_start_sel),
+		     .spu_mared_red_done(spu_mared_red_done),
+		     .spu_mared_update_redwr_jptr(spu_mared_update_redwr_jptr),
+		     .spu_mared_jjptr_wen(spu_mared_jjptr_wen),
+		     .spu_mared_not_idle(spu_mared_not_idle),
+		     // Inputs
+
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+			.spu_mactl_kill_op		(spu_mactl_ma_kill_op),
+
+		     .reset		(spu_mactl_ctl_reset),
+		     .rclk		(rclk),
+		 .se			(se),
+		     .spu_madp_m_eq_n	(spu_madp_m_eq_n),
+		     .spu_madp_m_lt_n	(spu_madp_m_lt_n),
+		     .spu_mactl_expop	(spu_mactl_expop),
+		     .spu_mactl_mulop	(spu_mactl_mulop),
+		     .spu_mactl_redop	(spu_mactl_redop),
+		     .spu_mamul_mul_done(spu_mamul_mul_done),
+		     .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly),
+		     .spu_maaddr_jptr_eqz(spu_maaddr_jptr_eqz_mared),
+		     .spu_maaddr_len_eqmax(spu_maaddr_len_eqmax),
+		     .spu_mast_stbuf_wen(spu_mast_stbuf_wen),
+		     .spu_madp_cout_oprnd_sub_mod(spu_madp_cout_oprnd_sub_mod));
+// -------------------------------------------------------------------------
+
+spu_maexp spu_maexp (//in
+		     
+		     /*AUTOINST*/
+		     // Outputs
+		     .spu_maexp_e_rd_oprnd_sel(spu_maexp_e_rd_oprnd_sel),
+		     .spu_maexp_shift_e	(spu_maexp_shift_e),
+		     .spu_maexp_e_data_wen(spu_maexp_e_data_wen),
+		     .spu_maexp_incr_es_ptr(spu_maexp_incr_es_ptr),
+		     .spu_maexp_es_max_init(spu_maexp_es_max_init),
+		     .spu_maexp_es_e_ptr_rst(spu_maexp_es_e_ptr_rst),
+		     .spu_maexp_done_set(spu_maexp_done_set),
+		     .spu_maexp_memren	(spu_maexp_memren),
+		     .spu_maexp_start_mulred_aequb(spu_maexp_start_mulred_aequb),
+		     .spu_maexp_start_mulred_anoteqb(spu_maexp_start_mulred_anoteqb),
+		     // Inputs
+
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+			.spu_mactl_kill_op		(spu_mactl_ma_kill_op),
+
+		     .reset		(spu_mactl_ctl_reset),
+		     .rclk		(rclk),
+		 .se			(se),
+		     .spu_maaddr_esmax	(spu_maaddr_esmax),
+		     .spu_maaddr_esmod64(spu_maaddr_esmod64),
+		     .spu_madp_e_eq_one	(spu_madp_e_eq_one),
+		     .spu_mared_red_done(spu_mared_red_done),
+		     .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly),
+		     .spu_mactl_expop	(spu_mactl_expop));
+
+// -------------------------------------------------------------------------
+
+
+spu_maaeqb spu_maaeqb (//out
+                .spu_maaeqb_a_leftshft        	(spu_mul_mulres_lshft),
+		       
+		       /*AUTOINST*/
+		       // Outputs
+		       .spu_maaeqb_memwen(spu_maaeqb_memwen),
+		       .spu_maaeqb_memren(spu_maaeqb_memren),
+		       .spu_maaeqb_rst_iptr(spu_maaeqb_rst_iptr),
+		       .spu_maaeqb_rst_jptr(spu_maaeqb_rst_jptr),
+		       .spu_maaeqb_incr_iptr(spu_maaeqb_incr_iptr),
+		       .spu_maaeqb_incr_jptr(spu_maaeqb_incr_jptr),
+		       .spu_maaeqb_a_rd_oprnd_sel(spu_maaeqb_a_rd_oprnd_sel),
+		       .spu_maaeqb_ax_rd_oprnd_sel(spu_maaeqb_ax_rd_oprnd_sel),
+		       .spu_maaeqb_m_rd_oprnd_sel(spu_maaeqb_m_rd_oprnd_sel),
+		       .spu_maaeqb_me_rd_oprnd_sel(spu_maaeqb_me_rd_oprnd_sel),
+		       .spu_maaeqb_n_rd_oprnd_sel(spu_maaeqb_n_rd_oprnd_sel),
+		       .spu_maaeqb_m_wr_oprnd_sel(spu_maaeqb_m_wr_oprnd_sel),
+		       .spu_maaeqb_me_wr_oprnd_sel(spu_maaeqb_me_wr_oprnd_sel),
+		       .spu_maaeqb_iminus1_ptr_sel(spu_maaeqb_iminus1_ptr_sel),
+		       .spu_maaeqb_j_ptr_sel(spu_maaeqb_j_ptr_sel),
+		       .spu_maaeqb_iminusj_ptr_sel(spu_maaeqb_iminusj_ptr_sel),
+		       .spu_maaeqb_iminuslenminus1_sel(spu_maaeqb_iminuslenminus1_sel),
+		       .spu_maaeqb_irshft_sel(spu_maaeqb_irshft_sel),
+		       .spu_maaeqb_jjptr_wen(spu_maaeqb_jjptr_wen),
+		       .spu_maaeqb_oprnd2_wen(spu_maaeqb_oprnd2_wen),
+		       .spu_maaeqb_oprnd2_bypass(spu_maaeqb_oprnd2_bypass),
+		     .spu_maaeqb_oprnd1_mxsel(spu_maaeqb_oprnd1_mxsel[1:0]),
+		     .spu_maaeqb_oprnd1_wen(spu_maaeqb_oprnd1_wen),
+		       .spu_maaeqb_mul_req_vld(spu_maaeqb_mul_req_vld),
+		       .spu_maaeqb_mul_areg_shf(spu_maaeqb_mul_areg_shf),
+		       .spu_maaeqb_mul_acc(spu_maaeqb_mul_acc),
+		       .spu_maaeqb_mul_areg_rst(spu_maaeqb_mul_areg_rst),
+		       .spu_maaeqb_mul_done(spu_maaeqb_mul_done),
+
+		     .spu_maaeqb_jjptr_sel(spu_maaeqb_jjptr_sel),
+
+		       // Inputs
+
+		.spu_mactl_stxa_force_abort		(spu_mactl_stxa_force_abort),
+			.spu_mactl_kill_op		(spu_mactl_ma_kill_op),
+
+		       .reset		(spu_mactl_ctl_reset),
+		       .rclk		(rclk),
+		 .se			(se),
+		       .spu_maaddr_iequtwolenplus2(spu_maaddr_iequtwolenplus2),
+		       .spu_maaddr_iequtwolenplus1(spu_maaddr_iequtwolenplus1),
+		       .spu_maaddr_jequiminus1(spu_maaddr_jequiminus1),
+		       .spu_maaddr_jequlen(spu_maaddr_jequlen),
+		       .spu_maaddr_halfpnt_set(spu_maaddr_halfpnt_set),
+		       .mul_spu_ack	(mul_spu_ack),
+		       .mul_spu_shf_ack	(mul_spu_shf_ack),
+		       .spu_mactl_mulop	(spu_mactl_mulop),
+		       .spu_mactl_iss_pulse_dly(spu_mactl_iss_pulse_dly),
+		       .spu_maexp_start_mulred_aequb(spu_maexp_start_mulred_aequb),
+		       .spu_mactl_expop	(spu_mactl_expop),
+		       .spu_maaddr_jequiminus1rshft(spu_maaddr_jequiminus1rshft),
+		       .spu_maaddr_iequtwolen(spu_maaddr_iequtwolen),
+		       .spu_maaddr_ieven(spu_maaddr_ieven),
+		       .spu_maaddr_ieq0	(spu_maaddr_ieq0),
+		       .spu_maaddr_aequb(spu_maaddr_aequb));
+
+
+
+endmodule
Index: /trunk/T1-CPU/mul/sparc_mul_cntl.v
===================================================================
--- /trunk/T1-CPU/mul/sparc_mul_cntl.v	(revision 6)
+++ /trunk/T1-CPU/mul/sparc_mul_cntl.v	(revision 6)
@@ -0,0 +1,162 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_mul_cntl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_mul_cntl(
+  ecl_mul_req_vld,
+  spu_mul_req_vld,
+  spu_mul_acc,
+  spu_mul_areg_shf,
+  spu_mul_areg_rst,
+  spu_mul_mulres_lshft,
+  c0_act,
+  spick,
+  byp_sel,
+  byp_imm,
+  acc_imm,
+  acc_actc2,
+  acc_actc3,
+  acc_actc5,
+  acc_reg_enb,
+  acc_reg_rst,
+  acc_reg_shf,
+  x2,
+  mul_ecl_ack,
+  mul_spu_ack,
+  mul_spu_shf_ack,
+  rst_l,
+  rclk
+  );
+
+input		rclk;
+input		rst_l;			// System rest 
+input		ecl_mul_req_vld; 	// Input request from EXU to MUL
+input		spu_mul_req_vld;	// Input request from SPU to MUL
+input		spu_mul_acc;		// 1: SPU mul op req will accumulate the ACCUM register 
+input 		spu_mul_areg_shf;	// ACCUM shift right 64-bit
+input 		spu_mul_areg_rst;	// ACCUM reset; initialization of modular multiplication
+input		spu_mul_mulres_lshft;	// For x2 of op1*op2*2 left shift
+output		c0_act;			// cycle-0 of muliplier operation
+output		spick;
+output		byp_sel;		// Bypass mux control
+output		byp_imm;
+output		acc_imm;
+output		acc_actc2, acc_actc3;	// accumulate enable for LSB-32 and All-96
+output		acc_actc5;		// accumulate enable for LSB-32 and All-96
+output		acc_reg_enb;		// ACCUM register enable
+output		acc_reg_rst;		// ACCUM register reset
+output		acc_reg_shf;		// ACCUM register shift select
+output		x2;
+output		mul_ecl_ack;		// Ack EXU multiplier operation is accepted.
+output		mul_spu_ack;		// Ack SPU multiplier operation is accepted.
+output		mul_spu_shf_ack;	// Ack SPU shift operation is accepted.
+
+reg 		mul_ecl_ack_d;
+reg		mul_spu_ack_d;
+reg		c1_act;			// Squash all mul requests from EXU and SPU if c1_act = 1
+reg		c2_act;			// Squash bypass ACCUM mul request from SPU if c2_act = 1
+reg		c3_act;			// Enable >>32 results back to CSA2 if c3_act = 1
+reg		favor_e;		// Flag for alternate picker, favor to EXU if f_state = 1
+reg 		acc_actc1, acc_actc2, acc_actc3, acc_actc4, acc_actc5;
+reg		acc_reg_shf, acc_reg_rst; 
+
+wire		exu_req_vld, spu_req_vld;
+wire		epick;			// Internal pick signals of exu, spu multiplier
+wire		nobyps;			// Squash SPU bypass mul requests nobyps = 1
+wire		noshft;			// Squash SPU bypass mul requests noshft = 1
+wire		acc_reg_shf_in;
+wire 		spu_mul_byp = ~spu_mul_acc ; 
+wire		clk;
+
+
+/////////////////////////////////////////
+// Requests picker and general control //
+/////////////////////////////////////////
+
+assign clk = rclk ;
+
+assign	c0_act = epick | spick ;				// Cycle0 of multiplier operation
+//assign	c1_act = mul_ecl_ack_d | mul_spu_ack_d ;		// Cycle1 of multiplier operation
+assign  nobyps = c1_act | acc_actc2 | acc_actc3 | acc_actc4 ; 	// Cycles prevent the SPU bypass 
+
+assign  x2 = spick & spu_mul_mulres_lshft;
+
+assign	exu_req_vld = ecl_mul_req_vld & ~c1_act ;
+assign	spu_req_vld = spu_mul_req_vld & ~c1_act & ~(nobyps & spu_mul_byp); 
+
+assign	epick = exu_req_vld & ( favor_e | ~spu_req_vld) ; 
+assign  spick = spu_req_vld & (~favor_e | ~exu_req_vld) ;
+
+// moved this one cycle earlier   
+assign    mul_spu_ack = rst_l & spick ;
+assign    mul_ecl_ack = rst_l & epick ;
+   
+always @(posedge clk)
+  begin
+	mul_ecl_ack_d <= rst_l & epick ;
+	mul_spu_ack_d <= rst_l & spick ;
+	c1_act <= rst_l & c0_act ;
+	c2_act <= rst_l & c1_act ; 
+	c3_act <= rst_l & c2_act ; 
+
+	favor_e <= rst_l & (mul_spu_ack_d & ~mul_ecl_ack_d);		
+  end
+
+/////////////////////////////////////////////////
+// SPU accumulate and bypass and shift control //
+/////////////////////////////////////////////////
+
+assign 	byp_sel = spick & spu_mul_byp ;	// SPU bypass operand is picked 
+
+//////////////////////////////////////////////////////////////////////////
+//	No ACCUM >>= 64 allow if there are 				//
+//	1) accumulate mul before cycle4 which need to updated ACCUM	//
+//	2) Any mul at cyc3 which will use the same output mux at cyc-5	//
+//////////////////////////////////////////////////////////////////////////
+assign  noshft = acc_actc1 | acc_actc2 | c3_act | acc_actc4 ;
+
+						// Squash shifr if:
+assign  acc_reg_shf_in =   spu_mul_areg_shf &	// No shift request
+			  ~noshft	    &	// SPU accum mul in cycle1~4 or EXU mul in cycle3
+			  ~acc_reg_shf ;	// reset SPU shift request for 1-cycle for signal upate
+
+always @(posedge clk)
+  begin
+	acc_reg_shf <= rst_l & acc_reg_shf_in ;		// latch ACCUM reg shift control
+
+	acc_reg_rst <=  spu_mul_areg_rst ;		// latch input control of ACCUM reg reset
+
+        acc_actc1 <= rst_l & (spick & spu_mul_acc) ;	// SPU MAC in cycle 1
+        acc_actc2 <= rst_l & acc_actc1 ;			// SPU MAC in cycle 2 
+        acc_actc3 <= rst_l & acc_actc2 ;			// SPU MAC in cycle 3 
+        acc_actc4 <= rst_l & acc_actc3 ;			// SPU MAC in cycle 4 
+        acc_actc5 <= rst_l & acc_actc4 ;			// SPU MAC in cycle 5 
+  end
+
+assign  mul_spu_shf_ack = acc_reg_shf;
+
+assign 	byp_imm = acc_actc5 ;
+
+assign 	acc_imm = (acc_actc2 & acc_actc4) | ((acc_actc2 | acc_actc3) & acc_actc5)  ; 
+
+assign 	acc_reg_enb = acc_actc5 | acc_reg_shf;		// enable of ACCUM registers 
+
+
+endmodule // sparc_mul_cntl
+
Index: /trunk/T1-CPU/mul/mul64.v
===================================================================
--- /trunk/T1-CPU/mul/mul64.v	(revision 6)
+++ /trunk/T1-CPU/mul/mul64.v	(revision 6)
@@ -0,0 +1,2443 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: mul64.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+/*//////////////////////////////////////////////////////////////////////
+//
+//  Module Name: mul64
+//  Description:        *This block implements the multiplier used in the modular multiplier
+//                       unit (MUL) and be shared by sparc EXU and the streaming unit (SPU).
+//                       It is also used as the 54x54 multiplier in the FPU.
+//                      *It takes two 64-bit unsign data and accumulated operand and do the
+//                       64x64 MAC operation at two cycle thruput and 5 cycle latency.
+//                      *The mul_valid signal indicate the beginning of a new operation.
+//                       It MUST be dis-asserted at the next cycle to have the proper 2-cycle
+//                       latency operation in the csa array. If there are two back-to-back
+//                       cycle operation, the first operation result will be incorrect.
+//                      *Results are avaliable on the 5th cycle of the mul_valid as shows
+//
+//			*Following inputs should tie to "0" when used as a 64x64 multiplier
+//			 - areg 
+//			 - accreg 
+//			 - x2
+//
+//                         Cycle-0  | Cycle-1 | Cycle-2 | Cycle-3 | Cycle-4 | Cycle-5
+//                       1st        *         |         |         |         |
+//                       rs1, rs2   ^         |         |         |         | 1st results
+//                       valid=1    | valid=0 |         *         |         | avaliable
+//                                1st         | 2nd OP  ^         |         |
+//                                setup       | valid=1 |         |         |
+//                                            |        2nd        |         |
+//                                            |       setup       |         |
+//
+*/
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN
+`define FPGA_SYN_MUL
+`endif
+
+`ifdef FPGA_SYN_MUL
+module mul64(rs1_l, rs2, valid, areg, accreg, x2, out, rclk, si, so, se, 
+	mul_rst_l, mul_step);
+
+	input	[63:0]		rs1_l;
+	input	[63:0]		rs2;
+	input			valid;
+	input	[96:0]		areg;
+	input	[135:129]	accreg;
+	input			x2;
+	input			rclk;
+	input			si;
+	input			se;
+	input			mul_rst_l;
+	input			mul_step;
+	output			so;
+	output	[135:0]		out;
+
+reg [135:0] myout, myout_a1, myout_a2, myout_a3;
+
+reg [63:0] rs1_ff;
+reg [64:0] rs2_ff;
+
+reg [63:0] par1, par2;
+reg [64:0] par3, par4;
+
+reg [5:0] state;
+
+always @(posedge rclk)
+  state <= {valid,state[5:1]};
+
+
+always @(posedge rclk) begin
+  if(mul_step) begin
+    if(valid) begin
+      rs1_ff <= ~rs1_l;
+      rs2_ff <= x2 ? {rs2,1'b0} : {1'b0,rs2};
+    end else begin
+      rs1_ff <= {32'b0, rs1_ff[63:32]};
+    end
+    par1 <= (rs1_ff[31:0] * rs2_ff[31:0]);
+    par3 <= rs1_ff[31:0] * rs2_ff[64:32];
+    myout_a1 <= ({32'b0, myout_a1[135:32]} & {136{state[3]}}) + par1 + {par3, 32'b0} + areg;
+    myout <= {(myout_a1[103:97]+accreg),myout_a1[96:0],myout[63:32]};
+  end 
+end
+
+assign out = myout;
+assign so = 1'b0;
+
+endmodule
+
+`else
+
+
+module mul64 (rs1_l, rs2, valid, areg, accreg, x2, out, rclk, si, so, se, mul_rst_l, mul_step);
+
+input  [63:0]  	rs1_l;			// op1
+input  [63:0]  	rs2;			// op2
+input	       	valid;			// begin of the MUL operation
+input  [96:0]  	areg;			// accumulated input for ACCUM
+input  [135:129] accreg;		// direct input from ACCUM [135:129]
+input	       	x2;			// for op1*op2*2
+input	       	rclk, si, se, mul_rst_l, mul_step;
+output  	so;
+output [135:0] 	out;
+
+wire	       	cyc1, cyc2, cyc3;	// cycle stage of MUL
+wire [2:0]	b0, b1, b2,  b3,  b4,  b5,  b6,  b7;
+wire [2:0]	b8, b9, b10, b11, b12, b13, b14, b15;
+wire	    	b16;
+wire [63:0]	op1_l, op1;
+wire [81:0]	a0sum, a1sum, a0s, a1s; 
+wire [81:4]	a0cout, a1cout, a0c, a1c;
+wire		pcoutx2, psumx2;
+wire 		x2_c1, x2_c2, x2_c3, x2_c2c3;
+
+wire [98:0]	psum, pcout;
+wire [98:30]	pcout_in, pc;
+wire [98:31]	psum_in, ps;
+wire [96:0]	ary2_cout, addin_cout;
+wire [97:0]	ary2_sum,  addin_sum ;
+wire		add_cin, addin_cin, add_co31, add_co96;
+wire [103:0]	addout;
+wire		clk_enb0, clk_enb1;
+wire 		rst;
+wire		clk;
+wire		tm_l;
+
+  assign clk = rclk;
+  assign rst = ~mul_rst_l; 
+  assign tm_l = ~se;
+
+  clken_buf	ckbuf_0(.clk(clk_enb0), .rclk(clk), .enb_l(~mul_step), .tmb_l(tm_l));
+
+  /////////////////////////////////////////////////////////////////////
+  // 	States count
+  /////////////////////////////////////////////////////////////////////
+  dffr_s  cyc1_dff(.din(valid), .clk(clk_enb0), .q(cyc1), .rst(rst), .se(se), .si(), .so());
+  dffr_s  cyc2_dff(.din(cyc1),  .clk(clk_enb0), .q(cyc2), .rst(rst), .se(se), .si(), .so());
+  dffr_s  cyc3_dff(.din(cyc2),  .clk(clk_enb0), .q(cyc3), .rst(rst), .se(se), .si(), .so());
+  dffr_s  x2c1_dff(.din(x2),    .clk(clk_enb0), .q(x2_c1), .rst(rst), .se(se), .si(), .so());
+  dffr_s  x2c2_dff(.din(x2_c1), .clk(clk_enb0), .q(x2_c2), .rst(rst), .se(se), .si(), .so());
+  dffr_s  x2c3_dff(.din(x2_c2), .clk(clk_enb0), .q(x2_c3), .rst(rst), .se(se), .si(), .so());
+
+  assign x2_c2c3 =  x2_c2 | x2_c3 ;
+	
+  /////////////////////////////////////////////////////////////////////
+  // 	Enable flops for op1
+  /////////////////////////////////////////////////////////////////////
+  clken_buf	ckbuf_1(.clk(clk_enb1), .rclk(clk), .enb_l(~(valid & mul_step)), .tmb_l(tm_l));
+  dff_s #(64)  	ffrs1  (.din(rs1_l[63:0]), .clk(clk_enb1), .q(op1_l[63:0]),
+			.se(se), .si(), .so());
+
+
+
+
+  assign op1[63:0] = ~op1_l[63:0];
+
+  mul_booth	 booth (.head (valid),
+			.b_in (rs2),
+			.b0   (b0),
+			.b1   (b1),
+			.b2   (b2),
+			.b3   (b3),
+			.b4   (b4),
+			.b5   (b5),
+			.b6   (b6),
+			.b7   (b7),
+			.b8   (b8),
+			.b9   (b9),
+			.b10  (b10),
+			.b11  (b11),
+			.b12  (b12),
+			.b13  (b13),
+			.b14  (b14),
+			.b15  (b15),
+			.b16  (b16),
+			.clk  (clk), .se(se), .si(), .so(), .mul_step(mul_step), .tm_l(tm_l));
+			
+  /////////////////////////////////////////////////////////////////////
+  // 	Two Array1 inst ary1_a0 & ary1_a1 with the ouput flops 
+  /////////////////////////////////////////////////////////////////////
+  mul_array1	ary1_a0(.cout (a0cout[81:4]),
+			.sum  (a0sum[81:0]),
+			.a    (op1),
+			.b0   (b0),
+			.b1   (b1),
+			.b2   (b2),
+			.b3   (b3),
+			.b4   (b4),
+			.b5   (b5),
+			.b6   (b6),
+			.b7   (b7),
+			.b8   (3'b000),
+			.head (cyc1),
+			.bot  (1'b0)); //array a is never at the bottom of 33-pp rows
+ 
+  dff_s #(78)  a0cot_dff (.din(a0cout[81:4]), .clk(clk_enb0), .q(a0c[81:4]),
+			.se(se), .si(), .so());
+  dff_s #(82)  a0sum_dff (.din(a0sum[81:0]), .clk(clk_enb0), .q(a0s[81:0]),
+			.se(se), .si(), .so());
+
+  mul_array1	ary1_a1(.cout (a1cout[81:4]),
+			.sum  (a1sum[81:0]),
+			.a    (op1),
+			.b0   (b8),
+			.b1   (b9),
+			.b2   (b10),
+			.b3   (b11),
+			.b4   (b12),
+			.b5   (b13),
+			.b6   (b14),
+			.b7   (b15),
+			.b8   ({1'b0,b16,1'b0}),
+			.head (1'b0),	//array b is never at the head of 33-pp rows
+			.bot  (cyc2)); 
+
+  dff_s #(78)  a1cot_dff (.din(a1cout[81:4]), .clk(clk_enb0), .q(a1c[81:4]),
+			.se(se), .si(), .so());
+  dff_s #(82)  a1sum_dff (.din(a1sum[81:0]), .clk(clk_enb0), .q(a1s[81:0]),
+			.se(se), .si(), .so());
+
+  /////////////////////////////////////////////////////////////////////
+  // 	Array2 with the reorder output mux-flops
+  /////////////////////////////////////////////////////////////////////
+  mul_array2 	 array2(.pcoutx2 (pcoutx2),
+			.psumx2  (psumx2),
+			.pcout 	 (pcout[98:0]),
+			.psum    (psum[98:0]), 
+			.a0c     (a0c[81:4]),
+			.a0s     (a0s[81:0]),
+			.a1c     (a1c[81:4]),
+			.a1s     (a1s[81:0]),
+			.pc	 (pc[98:30]),
+			.ps	 (ps[98:31]),
+			.areg    (areg[96:0]),
+			.bot     (cyc3),
+			.x2      (x2_c2c3));
+ 
+  //// Outpput re-order muxes and flops	////
+  dp_mux2es #(97)  ary2_cmux (.dout(ary2_cout[96:0]),
+                              .in0(pcout[96:0]),
+                              .in1({pcout[95:0],pcoutx2}),
+                              .sel(x2_c2c3));
+  dff_s #(97)  a2cot_dff (.din(ary2_cout[96:0]), .clk(clk_enb0), .q(addin_cout[96:0]), 
+              		.se(se), .si(), .so());
+
+  dp_mux2es #(98) ary2_smux (.dout(ary2_sum[97:0]),
+                             .in0(psum[97:0]),
+                             .in1({psum[96:0],psumx2}),
+                             .sel(x2_c2c3));
+  dff_s #(98)  a2sum_dff (.din(ary2_sum[97:0]), .clk(clk_enb0), .q(addin_sum[97:0]), 
+			.se(se), .si(), .so());
+
+  //// Pseudo sum & cout logic and flops ////
+  assign psum_in[98:32]  = psum[98:32] & {67{cyc2}} ;
+  assign psum_in[31]     = psum[31] & x2_c2 ;
+
+  assign pcout_in[98:31] = pcout[98:31] & {68{cyc2}} ;
+  assign pcout_in[30]    = pcout[30] & x2_c2 ;
+  
+  dff_s #(68)  psum_dff  (.din(psum_in[98:31]), .clk(clk_enb0), .q(ps[98:31]),
+                	.se(se), .si(), .so());
+  dff_s #(69)  pcout_dff (.din(pcout_in[98:30]), .clk(clk_enb0), .q(pc[98:30]),
+            		.se(se), .si(), .so());
+
+  /////////////////////////////////////////////////////////////////////
+  // 	Adder (104-bit) 
+  /////////////////////////////////////////////////////////////////////
+
+  assign 	add_cin = add_co31 & cyc3 ;
+
+  assign {add_co31,addout[31:0]} =   {{1'b0},addin_sum[31:0]} 
+		     		   + {{1'b0},addin_cout[30:0],addin_cin} ;
+
+
+  assign {add_co96,addout[96:32]} =  addin_sum[97:32]	
+				  + addin_cout[96:31]
+				  + {{65'b0},add_co31} ;
+
+  assign 	addout[103:97] =  accreg[135:129] + {{6'b0},add_co96} ;
+
+  /////////////////////////////////////////////////////////////////////
+  // 	Pipe adder outputs  
+  /////////////////////////////////////////////////////////////////////
+
+  dff_s  	      co31_dff (.din(add_cin), .clk(clk_enb0), .q(addin_cin),
+       			.se(se), .si(), .so());
+
+  dff_s #(104)   out_dff (.din(addout[103:0]), .clk(clk_enb0), .q(out[135:32]),
+              		.se(se), .si(), .so());
+
+  dff_s #(32)    pip_dff (.din(out[63:32]), .clk(clk_enb0), .q(out[31:0]),
+               		.se(se), .si(), .so());
+
+endmodule // mul64
+
+
+
+
+////////////////////////////////////////////////////////////////////////
+//	Sub-moudle for mul64
+////////////////////////////////////////////////////////////////////////
+
+module mul_array1 ( cout, sum, a, b0, b1, b2, b3, b4, b5, b6, b7, b8,
+     bot, head );
+
+input  bot, head;
+output [81:4]  cout;
+output [81:0]  sum;
+input [2:0]  b6;
+input [2:0]  b3;
+input [2:0]  b8;
+input [2:0]  b2;
+input [2:0]  b1;
+input [2:0]  b7;
+input [63:0]  a;
+input [2:0]  b0;
+input [2:0]  b4;
+input [2:0]  b5;
+
+// Buses in the design
+
+wire  [1:0]  b5n;
+wire  [1:0]  b2n;
+wire  [68:1]  c0;
+wire  [69:0]  s1;
+wire  [68:1]  c1;
+wire  [69:0]  s2;
+wire  [68:1]  c2;
+wire  [70:4]  s_1;
+wire  [69:2]  s0;
+wire  [76:10]  s_2;
+wire  [70:2]  c_1;
+wire  [76:10]  c_2;
+wire  [75:11]  co;
+
+mul_negen p1n ( .b(b5[2:0]), .n1(b5n[1]), .n0(b5n[0]));
+mul_negen p0n ( .b(b2[2:0]), .n1(b2n[1]), .n0(b2n[0]));
+mul_csa42  sc3_71_ ( .c(s_2[71]), .cin(co[70]), .a(c_1[70]),
+     .b(c_2[70]), .cout(co[71]), .sum(sum[71]), .d(s1[65]),
+     .carry(cout[71]));
+mul_csa42  sc3_75_ ( .c(s_2[75]), .cin(co[74]), .a(1'b0),
+     .b(c_2[74]), .cout(co[75]), .sum(sum[75]), .d(s1[69]),
+     .carry(cout[75]));
+mul_csa42  sc3_74_ ( .c(s_2[74]), .cin(co[73]), .a(1'b0),
+     .b(c_2[73]), .cout(co[74]), .sum(sum[74]), .d(s1[68]),
+     .carry(cout[74]));
+mul_csa42  sc3_73_ ( .c(s_2[73]), .cin(co[72]), .a(1'b0),
+     .b(c_2[72]), .cout(co[73]), .sum(sum[73]), .d(s1[67]),
+     .carry(cout[73]));
+mul_csa42  sc3_72_ ( .c(s_2[72]), .cin(co[71]), .a(1'b0),
+     .b(c_2[71]), .cout(co[72]), .sum(sum[72]), .d(s1[66]),
+     .carry(cout[72]));
+mul_csa42  sc3_76_ ( .c(s_2[76]), .cin(co[75]), .a(1'b0),
+     .b(c_2[75]), .cout(), .sum(sum[76]), .d(1'b0),
+     .carry(cout[76]));
+mul_csa42  sc3_70_ ( .c(s_2[70]), .cin(co[69]), .a(c_1[69]),
+     .b(c_2[69]), .cout(co[70]), .sum(sum[70]), .d(s_1[70]),
+     .carry(cout[70]));
+mul_csa42  sc3_69_ ( .c(s_2[69]), .cin(co[68]), .a(c_1[68]),
+     .b(c_2[68]), .cout(co[69]), .sum(sum[69]), .d(s_1[69]),
+     .carry(cout[69]));
+mul_csa42  sc3_68_ ( .c(s_2[68]), .cin(co[67]), .a(c_1[67]),
+     .b(c_2[67]), .cout(co[68]), .sum(sum[68]), .d(s_1[68]),
+     .carry(cout[68]));
+mul_csa42  sc3_67_ ( .c(s_2[67]), .cin(co[66]), .a(c_1[66]),
+     .b(c_2[66]), .cout(co[67]), .sum(sum[67]), .d(s_1[67]),
+     .carry(cout[67]));
+mul_csa42  sc3_66_ ( .c(s_2[66]), .cin(co[65]), .a(c_1[65]),
+     .b(c_2[65]), .cout(co[66]), .sum(sum[66]), .d(s_1[66]),
+     .carry(cout[66]));
+mul_csa42  sc3_65_ ( .c(s_2[65]), .cin(co[64]), .a(c_1[64]),
+     .b(c_2[64]), .cout(co[65]), .sum(sum[65]), .d(s_1[65]),
+     .carry(cout[65]));
+mul_csa42  sc3_64_ ( .c(s_2[64]), .cin(co[63]), .a(c_1[63]),
+     .b(c_2[63]), .cout(co[64]), .sum(sum[64]), .d(s_1[64]),
+     .carry(cout[64]));
+mul_csa42  sc3_63_ ( .c(s_2[63]), .cin(co[62]), .a(c_1[62]),
+     .b(c_2[62]), .cout(co[63]), .sum(sum[63]), .d(s_1[63]),
+     .carry(cout[63]));
+mul_csa42  sc3_62_ ( .c(s_2[62]), .cin(co[61]), .a(c_1[61]),
+     .b(c_2[61]), .cout(co[62]), .sum(sum[62]), .d(s_1[62]),
+     .carry(cout[62]));
+mul_csa42  sc3_61_ ( .c(s_2[61]), .cin(co[60]), .a(c_1[60]),
+     .b(c_2[60]), .cout(co[61]), .sum(sum[61]), .d(s_1[61]),
+     .carry(cout[61]));
+mul_csa42  sc3_60_ ( .c(s_2[60]), .cin(co[59]), .a(c_1[59]),
+     .b(c_2[59]), .cout(co[60]), .sum(sum[60]), .d(s_1[60]),
+     .carry(cout[60]));
+mul_csa42  sc3_59_ ( .c(s_2[59]), .cin(co[58]), .a(c_1[58]),
+     .b(c_2[58]), .cout(co[59]), .sum(sum[59]), .d(s_1[59]),
+     .carry(cout[59]));
+mul_csa42  sc3_58_ ( .c(s_2[58]), .cin(co[57]), .a(c_1[57]),
+     .b(c_2[57]), .cout(co[58]), .sum(sum[58]), .d(s_1[58]),
+     .carry(cout[58]));
+mul_csa42  sc3_57_ ( .c(s_2[57]), .cin(co[56]), .a(c_1[56]),
+     .b(c_2[56]), .cout(co[57]), .sum(sum[57]), .d(s_1[57]),
+     .carry(cout[57]));
+mul_csa42  sc3_56_ ( .c(s_2[56]), .cin(co[55]), .a(c_1[55]),
+     .b(c_2[55]), .cout(co[56]), .sum(sum[56]), .d(s_1[56]),
+     .carry(cout[56]));
+mul_csa42  sc3_55_ ( .c(s_2[55]), .cin(co[54]), .a(c_1[54]),
+     .b(c_2[54]), .cout(co[55]), .sum(sum[55]), .d(s_1[55]),
+     .carry(cout[55]));
+mul_csa42  sc3_54_ ( .c(s_2[54]), .cin(co[53]), .a(c_1[53]),
+     .b(c_2[53]), .cout(co[54]), .sum(sum[54]), .d(s_1[54]),
+     .carry(cout[54]));
+mul_csa42  sc3_53_ ( .c(s_2[53]), .cin(co[52]), .a(c_1[52]),
+     .b(c_2[52]), .cout(co[53]), .sum(sum[53]), .d(s_1[53]),
+     .carry(cout[53]));
+mul_csa42  sc3_52_ ( .c(s_2[52]), .cin(co[51]), .a(c_1[51]),
+     .b(c_2[51]), .cout(co[52]), .sum(sum[52]), .d(s_1[52]),
+     .carry(cout[52]));
+mul_csa42  sc3_51_ ( .c(s_2[51]), .cin(co[50]), .a(c_1[50]),
+     .b(c_2[50]), .cout(co[51]), .sum(sum[51]), .d(s_1[51]),
+     .carry(cout[51]));
+mul_csa42  sc3_50_ ( .c(s_2[50]), .cin(co[49]), .a(c_1[49]),
+     .b(c_2[49]), .cout(co[50]), .sum(sum[50]), .d(s_1[50]),
+     .carry(cout[50]));
+mul_csa42  sc3_49_ ( .c(s_2[49]), .cin(co[48]), .a(c_1[48]),
+     .b(c_2[48]), .cout(co[49]), .sum(sum[49]), .d(s_1[49]),
+     .carry(cout[49]));
+mul_csa42  sc3_48_ ( .c(s_2[48]), .cin(co[47]), .a(c_1[47]),
+     .b(c_2[47]), .cout(co[48]), .sum(sum[48]), .d(s_1[48]),
+     .carry(cout[48]));
+mul_csa42  sc3_47_ ( .c(s_2[47]), .cin(co[46]), .a(c_1[46]),
+     .b(c_2[46]), .cout(co[47]), .sum(sum[47]), .d(s_1[47]),
+     .carry(cout[47]));
+mul_csa42  sc3_46_ ( .c(s_2[46]), .cin(co[45]), .a(c_1[45]),
+     .b(c_2[45]), .cout(co[46]), .sum(sum[46]), .d(s_1[46]),
+     .carry(cout[46]));
+mul_csa42  sc3_45_ ( .c(s_2[45]), .cin(co[44]), .a(c_1[44]),
+     .b(c_2[44]), .cout(co[45]), .sum(sum[45]), .d(s_1[45]),
+     .carry(cout[45]));
+mul_csa42  sc3_44_ ( .c(s_2[44]), .cin(co[43]), .a(c_1[43]),
+     .b(c_2[43]), .cout(co[44]), .sum(sum[44]), .d(s_1[44]),
+     .carry(cout[44]));
+mul_csa42  sc3_43_ ( .c(s_2[43]), .cin(co[42]), .a(c_1[42]),
+     .b(c_2[42]), .cout(co[43]), .sum(sum[43]), .d(s_1[43]),
+     .carry(cout[43]));
+mul_csa42  sc3_42_ ( .c(s_2[42]), .cin(co[41]), .a(c_1[41]),
+     .b(c_2[41]), .cout(co[42]), .sum(sum[42]), .d(s_1[42]),
+     .carry(cout[42]));
+mul_csa42  sc3_41_ ( .c(s_2[41]), .cin(co[40]), .a(c_1[40]),
+     .b(c_2[40]), .cout(co[41]), .sum(sum[41]), .d(s_1[41]),
+     .carry(cout[41]));
+mul_csa42  sc3_40_ ( .c(s_2[40]), .cin(co[39]), .a(c_1[39]),
+     .b(c_2[39]), .cout(co[40]), .sum(sum[40]), .d(s_1[40]),
+     .carry(cout[40]));
+mul_csa42  sc3_39_ ( .c(s_2[39]), .cin(co[38]), .a(c_1[38]),
+     .b(c_2[38]), .cout(co[39]), .sum(sum[39]), .d(s_1[39]),
+     .carry(cout[39]));
+mul_csa42  sc3_38_ ( .c(s_2[38]), .cin(co[37]), .a(c_1[37]),
+     .b(c_2[37]), .cout(co[38]), .sum(sum[38]), .d(s_1[38]),
+     .carry(cout[38]));
+mul_csa42  sc3_37_ ( .c(s_2[37]), .cin(co[36]), .a(c_1[36]),
+     .b(c_2[36]), .cout(co[37]), .sum(sum[37]), .d(s_1[37]),
+     .carry(cout[37]));
+mul_csa42  sc3_36_ ( .c(s_2[36]), .cin(co[35]), .a(c_1[35]),
+     .b(c_2[35]), .cout(co[36]), .sum(sum[36]), .d(s_1[36]),
+     .carry(cout[36]));
+mul_csa42  sc3_35_ ( .c(s_2[35]), .cin(co[34]), .a(c_1[34]),
+     .b(c_2[34]), .cout(co[35]), .sum(sum[35]), .d(s_1[35]),
+     .carry(cout[35]));
+mul_csa42  sc3_34_ ( .c(s_2[34]), .cin(co[33]), .a(c_1[33]),
+     .b(c_2[33]), .cout(co[34]), .sum(sum[34]), .d(s_1[34]),
+     .carry(cout[34]));
+mul_csa42  sc3_33_ ( .c(s_2[33]), .cin(co[32]), .a(c_1[32]),
+     .b(c_2[32]), .cout(co[33]), .sum(sum[33]), .d(s_1[33]),
+     .carry(cout[33]));
+mul_csa42  sc3_32_ ( .c(s_2[32]), .cin(co[31]), .a(c_1[31]),
+     .b(c_2[31]), .cout(co[32]), .sum(sum[32]), .d(s_1[32]),
+     .carry(cout[32]));
+mul_csa42  sc3_31_ ( .c(s_2[31]), .cin(co[30]), .a(c_1[30]),
+     .b(c_2[30]), .cout(co[31]), .sum(sum[31]), .d(s_1[31]),
+     .carry(cout[31]));
+mul_csa42  sc3_30_ ( .c(s_2[30]), .cin(co[29]), .a(c_1[29]),
+     .b(c_2[29]), .cout(co[30]), .sum(sum[30]), .d(s_1[30]),
+     .carry(cout[30]));
+mul_csa42  sc3_29_ ( .c(s_2[29]), .cin(co[28]), .a(c_1[28]),
+     .b(c_2[28]), .cout(co[29]), .sum(sum[29]), .d(s_1[29]),
+     .carry(cout[29]));
+mul_csa42  sc3_28_ ( .c(s_2[28]), .cin(co[27]), .a(c_1[27]),
+     .b(c_2[27]), .cout(co[28]), .sum(sum[28]), .d(s_1[28]),
+     .carry(cout[28]));
+mul_csa42  sc3_27_ ( .c(s_2[27]), .cin(co[26]), .a(c_1[26]),
+     .b(c_2[26]), .cout(co[27]), .sum(sum[27]), .d(s_1[27]),
+     .carry(cout[27]));
+mul_csa42  sc3_26_ ( .c(s_2[26]), .cin(co[25]), .a(c_1[25]),
+     .b(c_2[25]), .cout(co[26]), .sum(sum[26]), .d(s_1[26]),
+     .carry(cout[26]));
+mul_csa42  sc3_25_ ( .c(s_2[25]), .cin(co[24]), .a(c_1[24]),
+     .b(c_2[24]), .cout(co[25]), .sum(sum[25]), .d(s_1[25]),
+     .carry(cout[25]));
+mul_csa42  sc3_24_ ( .c(s_2[24]), .cin(co[23]), .a(c_1[23]),
+     .b(c_2[23]), .cout(co[24]), .sum(sum[24]), .d(s_1[24]),
+     .carry(cout[24]));
+mul_csa42  sc3_23_ ( .c(s_2[23]), .cin(co[22]), .a(c_1[22]),
+     .b(c_2[22]), .cout(co[23]), .sum(sum[23]), .d(s_1[23]),
+     .carry(cout[23]));
+mul_csa42  sc3_22_ ( .c(s_2[22]), .cin(co[21]), .a(c_1[21]),
+     .b(c_2[21]), .cout(co[22]), .sum(sum[22]), .d(s_1[22]),
+     .carry(cout[22]));
+mul_csa42  sc3_21_ ( .c(s_2[21]), .cin(co[20]), .a(c_1[20]),
+     .b(c_2[20]), .cout(co[21]), .sum(sum[21]), .d(s_1[21]),
+     .carry(cout[21]));
+mul_csa42  sc3_20_ ( .c(s_2[20]), .cin(co[19]), .a(c_1[19]),
+     .b(c_2[19]), .cout(co[20]), .sum(sum[20]), .d(s_1[20]),
+     .carry(cout[20]));
+mul_csa42  sc3_19_ ( .c(s_2[19]), .cin(co[18]), .a(c_1[18]),
+     .b(c_2[18]), .cout(co[19]), .sum(sum[19]), .d(s_1[19]),
+     .carry(cout[19]));
+mul_csa42  sc3_18_ ( .c(s_2[18]), .cin(co[17]), .a(c_1[17]),
+     .b(c_2[17]), .cout(co[18]), .sum(sum[18]), .d(s_1[18]),
+     .carry(cout[18]));
+mul_csa42  sc3_17_ ( .c(s_2[17]), .cin(co[16]), .a(c_1[16]),
+     .b(c_2[16]), .cout(co[17]), .sum(sum[17]), .d(s_1[17]),
+     .carry(cout[17]));
+mul_csa42  sc3_16_ ( .c(s_2[16]), .cin(co[15]), .a(c_1[15]),
+     .b(c_2[15]), .cout(co[16]), .sum(sum[16]), .d(s_1[16]),
+     .carry(cout[16]));
+mul_csa42  sc3_15_ ( .c(s_2[15]), .cin(co[14]), .a(c_1[14]),
+     .b(c_2[14]), .cout(co[15]), .sum(sum[15]), .d(s_1[15]),
+     .carry(cout[15]));
+mul_csa42  sc3_14_ ( .c(s_2[14]), .cin(co[13]), .a(c_1[13]),
+     .b(c_2[13]), .cout(co[14]), .sum(sum[14]), .d(s_1[14]),
+     .carry(cout[14]));
+mul_csa42  sc3_13_ ( .c(s_2[13]), .cin(co[12]), .a(c_1[12]),
+     .b(c_2[12]), .cout(co[13]), .sum(sum[13]), .d(s_1[13]),
+     .carry(cout[13]));
+mul_csa42  sc3_12_ ( .c(s_2[12]), .cin(co[11]), .a(c_1[11]),
+     .b(c_2[11]), .cout(co[12]), .sum(sum[12]), .d(s_1[12]),
+     .carry(cout[12]));
+mul_csa42  sc3_11_ ( .c(s_2[11]), .cin(1'b0),
+     .a(c_1[10]), .b(c_2[10]), .cout(co[11]), .sum(sum[11]),
+     .d(s_1[11]), .carry(cout[11]));
+mul_csa32  sc2_2_70_ ( .c(c1[63]), .b(c2[57]), .a(s2[58]),
+     .cout(c_2[70]), .sum(s_2[70]));
+mul_csa32  sc2_2_69_ ( .c(c1[62]), .b(c2[56]), .a(s2[57]),
+     .cout(c_2[69]), .sum(s_2[69]));
+mul_csa32  sc2_2_68_ ( .c(c1[61]), .b(c2[55]), .a(s2[56]),
+     .cout(c_2[68]), .sum(s_2[68]));
+mul_csa32  sc2_2_67_ ( .c(c1[60]), .b(c2[54]), .a(s2[55]),
+     .cout(c_2[67]), .sum(s_2[67]));
+mul_csa32  sc2_2_66_ ( .c(c1[59]), .b(c2[53]), .a(s2[54]),
+     .cout(c_2[66]), .sum(s_2[66]));
+mul_csa32  sc2_2_65_ ( .c(c1[58]), .b(c2[52]), .a(s2[53]),
+     .cout(c_2[65]), .sum(s_2[65]));
+mul_csa32  sc2_2_64_ ( .c(c1[57]), .b(c2[51]), .a(s2[52]),
+     .cout(c_2[64]), .sum(s_2[64]));
+mul_csa32  sc2_2_63_ ( .c(c1[56]), .b(c2[50]), .a(s2[51]),
+     .cout(c_2[63]), .sum(s_2[63]));
+mul_csa32  sc2_2_62_ ( .c(c1[55]), .b(c2[49]), .a(s2[50]),
+     .cout(c_2[62]), .sum(s_2[62]));
+mul_csa32  sc2_2_61_ ( .c(c1[54]), .b(c2[48]), .a(s2[49]),
+     .cout(c_2[61]), .sum(s_2[61]));
+mul_csa32  sc2_2_60_ ( .c(c1[53]), .b(c2[47]), .a(s2[48]),
+     .cout(c_2[60]), .sum(s_2[60]));
+mul_csa32  sc2_2_59_ ( .c(c1[52]), .b(c2[46]), .a(s2[47]),
+     .cout(c_2[59]), .sum(s_2[59]));
+mul_csa32  sc2_2_58_ ( .c(c1[51]), .b(c2[45]), .a(s2[46]),
+     .cout(c_2[58]), .sum(s_2[58]));
+mul_csa32  sc2_2_57_ ( .c(c1[50]), .b(c2[44]), .a(s2[45]),
+     .cout(c_2[57]), .sum(s_2[57]));
+mul_csa32  sc2_2_56_ ( .c(c1[49]), .b(c2[43]), .a(s2[44]),
+     .cout(c_2[56]), .sum(s_2[56]));
+mul_csa32  sc2_2_55_ ( .c(c1[48]), .b(c2[42]), .a(s2[43]),
+     .cout(c_2[55]), .sum(s_2[55]));
+mul_csa32  sc2_2_54_ ( .c(c1[47]), .b(c2[41]), .a(s2[42]),
+     .cout(c_2[54]), .sum(s_2[54]));
+mul_csa32  sc2_2_53_ ( .c(c1[46]), .b(c2[40]), .a(s2[41]),
+     .cout(c_2[53]), .sum(s_2[53]));
+mul_csa32  sc2_2_52_ ( .c(c1[45]), .b(c2[39]), .a(s2[40]),
+     .cout(c_2[52]), .sum(s_2[52]));
+mul_csa32  sc2_2_51_ ( .c(c1[44]), .b(c2[38]), .a(s2[39]),
+     .cout(c_2[51]), .sum(s_2[51]));
+mul_csa32  sc2_2_50_ ( .c(c1[43]), .b(c2[37]), .a(s2[38]),
+     .cout(c_2[50]), .sum(s_2[50]));
+mul_csa32  sc2_2_49_ ( .c(c1[42]), .b(c2[36]), .a(s2[37]),
+     .cout(c_2[49]), .sum(s_2[49]));
+mul_csa32  sc2_2_48_ ( .c(c1[41]), .b(c2[35]), .a(s2[36]),
+     .cout(c_2[48]), .sum(s_2[48]));
+mul_csa32  sc2_2_47_ ( .c(c1[40]), .b(c2[34]), .a(s2[35]),
+     .cout(c_2[47]), .sum(s_2[47]));
+mul_csa32  sc2_2_46_ ( .c(c1[39]), .b(c2[33]), .a(s2[34]),
+     .cout(c_2[46]), .sum(s_2[46]));
+mul_csa32  sc2_2_45_ ( .c(c1[38]), .b(c2[32]), .a(s2[33]),
+     .cout(c_2[45]), .sum(s_2[45]));
+mul_csa32  sc2_2_44_ ( .c(c1[37]), .b(c2[31]), .a(s2[32]),
+     .cout(c_2[44]), .sum(s_2[44]));
+mul_csa32  sc2_2_43_ ( .c(c1[36]), .b(c2[30]), .a(s2[31]),
+     .cout(c_2[43]), .sum(s_2[43]));
+mul_csa32  sc2_2_42_ ( .c(c1[35]), .b(c2[29]), .a(s2[30]),
+     .cout(c_2[42]), .sum(s_2[42]));
+mul_csa32  sc2_2_41_ ( .c(c1[34]), .b(c2[28]), .a(s2[29]),
+     .cout(c_2[41]), .sum(s_2[41]));
+mul_csa32  sc2_2_40_ ( .c(c1[33]), .b(c2[27]), .a(s2[28]),
+     .cout(c_2[40]), .sum(s_2[40]));
+mul_csa32  sc2_2_39_ ( .c(c1[32]), .b(c2[26]), .a(s2[27]),
+     .cout(c_2[39]), .sum(s_2[39]));
+mul_csa32  sc2_2_38_ ( .c(c1[31]), .b(c2[25]), .a(s2[26]),
+     .cout(c_2[38]), .sum(s_2[38]));
+mul_csa32  sc2_2_37_ ( .c(c1[30]), .b(c2[24]), .a(s2[25]),
+     .cout(c_2[37]), .sum(s_2[37]));
+mul_csa32  sc2_2_36_ ( .c(c1[29]), .b(c2[23]), .a(s2[24]),
+     .cout(c_2[36]), .sum(s_2[36]));
+mul_csa32  sc2_2_35_ ( .c(c1[28]), .b(c2[22]), .a(s2[23]),
+     .cout(c_2[35]), .sum(s_2[35]));
+mul_csa32  sc2_2_34_ ( .c(c1[27]), .b(c2[21]), .a(s2[22]),
+     .cout(c_2[34]), .sum(s_2[34]));
+mul_csa32  sc2_2_33_ ( .c(c1[26]), .b(c2[20]), .a(s2[21]),
+     .cout(c_2[33]), .sum(s_2[33]));
+mul_csa32  sc2_2_32_ ( .c(c1[25]), .b(c2[19]), .a(s2[20]),
+     .cout(c_2[32]), .sum(s_2[32]));
+mul_csa32  sc2_2_31_ ( .c(c1[24]), .b(c2[18]), .a(s2[19]),
+     .cout(c_2[31]), .sum(s_2[31]));
+mul_csa32  sc2_2_30_ ( .c(c1[23]), .b(c2[17]), .a(s2[18]),
+     .cout(c_2[30]), .sum(s_2[30]));
+mul_csa32  sc2_2_29_ ( .c(c1[22]), .b(c2[16]), .a(s2[17]),
+     .cout(c_2[29]), .sum(s_2[29]));
+mul_csa32  sc2_2_28_ ( .c(c1[21]), .b(c2[15]), .a(s2[16]),
+     .cout(c_2[28]), .sum(s_2[28]));
+mul_csa32  sc2_2_27_ ( .c(c1[20]), .b(c2[14]), .a(s2[15]),
+     .cout(c_2[27]), .sum(s_2[27]));
+mul_csa32  sc2_2_26_ ( .c(c1[19]), .b(c2[13]), .a(s2[14]),
+     .cout(c_2[26]), .sum(s_2[26]));
+mul_csa32  sc2_2_25_ ( .c(c1[18]), .b(c2[12]), .a(s2[13]),
+     .cout(c_2[25]), .sum(s_2[25]));
+mul_csa32  sc2_2_24_ ( .c(c1[17]), .b(c2[11]), .a(s2[12]),
+     .cout(c_2[24]), .sum(s_2[24]));
+mul_csa32  sc2_2_23_ ( .c(c1[16]), .b(c2[10]), .a(s2[11]),
+     .cout(c_2[23]), .sum(s_2[23]));
+mul_csa32  sc2_2_22_ ( .c(c1[15]), .b(c2[9]), .a(s2[10]),
+     .cout(c_2[22]), .sum(s_2[22]));
+mul_csa32  sc2_2_21_ ( .c(c1[14]), .b(c2[8]), .a(s2[9]),
+     .cout(c_2[21]), .sum(s_2[21]));
+mul_csa32  sc2_2_20_ ( .c(c1[13]), .b(c2[7]), .a(s2[8]),
+     .cout(c_2[20]), .sum(s_2[20]));
+mul_csa32  sc2_2_19_ ( .c(c1[12]), .b(c2[6]), .a(s2[7]),
+     .cout(c_2[19]), .sum(s_2[19]));
+mul_csa32  sc2_2_18_ ( .c(c1[11]), .b(c2[5]), .a(s2[6]),
+     .cout(c_2[18]), .sum(s_2[18]));
+mul_csa32  sc2_2_17_ ( .c(c1[10]), .b(c2[4]), .a(s2[5]),
+     .cout(c_2[17]), .sum(s_2[17]));
+mul_csa32  sc2_2_16_ ( .c(c1[9]), .b(c2[3]), .a(s2[4]),
+     .cout(c_2[16]), .sum(s_2[16]));
+mul_csa32  sc2_2_15_ ( .c(c1[8]), .b(c2[2]), .a(s2[3]),
+     .cout(c_2[15]), .sum(s_2[15]));
+mul_csa32  sc2_2_14_ ( .c(c1[7]), .b(c2[1]), .a(s2[2]),
+     .cout(c_2[14]), .sum(s_2[14]));
+mul_csa32  sc2_2_13_ ( .c(c1[6]), .b(s1[7]), .a(s2[1]),
+     .cout(c_2[13]), .sum(s_2[13]));
+mul_csa32  sc2_2_12_ ( .c(c1[5]), .b(s1[6]), .a(s2[0]),
+     .cout(c_2[12]), .sum(s_2[12]));
+mul_csa32  sc2_2_11_ ( .c(c1[4]), .b(s1[5]), .a(b5n[1]),
+     .cout(c_2[11]), .sum(s_2[11]));
+mul_csa32  sc2_2_10_ ( .c(c1[3]), .b(s1[4]), .a(b5n[0]),
+     .cout(c_2[10]), .sum(s_2[10]));
+mul_csa32  sc2_2_76_ ( .c(1'b1), .b(c2[63]), .a(s2[64]),
+     .cout(c_2[76]), .sum(s_2[76]));
+mul_csa32  sc2_2_77_ ( .c(c_2[76]), .b(c2[64]), .a(s2[65]),
+     .cout(cout[77]), .sum(sum[77]));
+mul_csa32  sc2_1_9_ ( .c(s1[3]), .b(c0[8]), .a(s0[9]), .cout(c_1[9]),
+     .sum(s_1[9]));
+mul_csa32  sc2_1_8_ ( .c(s1[2]), .b(c0[7]), .a(s0[8]), .cout(c_1[8]),
+     .sum(s_1[8]));
+mul_csa32  sc2_1_3_ ( .c(c_1[2]), .b(c0[2]), .a(s0[3]),
+     .cout(c_1[3]), .sum(sum[3]));
+mul_csa32  sc3_10_ ( .c(s_2[10]), .b(s_1[10]), .a(c_1[9]),
+     .cout(cout[10]), .sum(sum[10]));
+mul_csa32  sc3_9_ ( .c(c1[2]), .sum(sum[9]), .cout(cout[9]),
+     .a(c_1[8]), .b(s_1[9]));
+mul_csa32  sc3_8_ ( .c(c1[1]), .sum(sum[8]), .cout(cout[8]),
+     .a(c_1[7]), .b(s_1[8]));
+mul_csa32  sc2_2_71_ ( .c(c1[64]), .b(c2[58]), .a(s2[59]),
+     .cout(c_2[71]), .sum(s_2[71]));
+mul_csa32  sc2_2_75_ ( .c(c1[68]), .b(c2[62]), .a(s2[63]),
+     .cout(c_2[75]), .sum(s_2[75]));
+mul_csa32  sc2_2_74_ ( .c(c1[67]), .b(c2[61]), .a(s2[62]),
+     .cout(c_2[74]), .sum(s_2[74]));
+mul_csa32  sc2_2_73_ ( .c(c1[66]), .b(c2[60]), .a(s2[61]),
+     .cout(c_2[73]), .sum(s_2[73]));
+mul_csa32  sc2_2_72_ ( .c(c1[65]), .b(c2[59]), .a(s2[60]),
+     .cout(c_2[72]), .sum(s_2[72]));
+mul_csa32  sc2_1_69_ ( .c(s1[63]), .sum(s_1[69]), .cout(c_1[69]),
+     .a(s0[69]), .b(c0[68]));
+mul_csa32  sc2_1_68_ ( .c(s1[62]), .sum(s_1[68]), .cout(c_1[68]),
+     .a(s0[68]), .b(c0[67]));
+mul_csa32  sc2_1_67_ ( .c(s1[61]), .sum(s_1[67]), .cout(c_1[67]),
+     .a(s0[67]), .b(c0[66]));
+mul_csa32  sc2_1_66_ ( .c(s1[60]), .sum(s_1[66]), .cout(c_1[66]),
+     .a(s0[66]), .b(c0[65]));
+mul_csa32  sc2_1_65_ ( .c(s1[59]), .sum(s_1[65]), .cout(c_1[65]),
+     .a(s0[65]), .b(c0[64]));
+mul_csa32  sc2_1_64_ ( .c(s1[58]), .sum(s_1[64]), .cout(c_1[64]),
+     .a(s0[64]), .b(c0[63]));
+mul_csa32  sc2_1_63_ ( .c(s1[57]), .sum(s_1[63]), .cout(c_1[63]),
+     .a(s0[63]), .b(c0[62]));
+mul_csa32  sc2_1_62_ ( .c(s1[56]), .sum(s_1[62]), .cout(c_1[62]),
+     .a(s0[62]), .b(c0[61]));
+mul_csa32  sc2_1_61_ ( .c(s1[55]), .sum(s_1[61]), .cout(c_1[61]),
+     .a(s0[61]), .b(c0[60]));
+mul_csa32  sc2_1_60_ ( .c(s1[54]), .sum(s_1[60]), .cout(c_1[60]),
+     .a(s0[60]), .b(c0[59]));
+mul_csa32  sc2_1_59_ ( .c(s1[53]), .sum(s_1[59]), .cout(c_1[59]),
+     .a(s0[59]), .b(c0[58]));
+mul_csa32  sc2_1_58_ ( .c(s1[52]), .sum(s_1[58]), .cout(c_1[58]),
+     .a(s0[58]), .b(c0[57]));
+mul_csa32  sc2_1_57_ ( .c(s1[51]), .sum(s_1[57]), .cout(c_1[57]),
+     .a(s0[57]), .b(c0[56]));
+mul_csa32  sc2_1_56_ ( .c(s1[50]), .sum(s_1[56]), .cout(c_1[56]),
+     .a(s0[56]), .b(c0[55]));
+mul_csa32  sc2_1_55_ ( .c(s1[49]), .sum(s_1[55]), .cout(c_1[55]),
+     .a(s0[55]), .b(c0[54]));
+mul_csa32  sc2_1_54_ ( .c(s1[48]), .sum(s_1[54]), .cout(c_1[54]),
+     .a(s0[54]), .b(c0[53]));
+mul_csa32  sc2_1_53_ ( .c(s1[47]), .sum(s_1[53]), .cout(c_1[53]),
+     .a(s0[53]), .b(c0[52]));
+mul_csa32  sc2_1_52_ ( .c(s1[46]), .sum(s_1[52]), .cout(c_1[52]),
+     .a(s0[52]), .b(c0[51]));
+mul_csa32  sc2_1_51_ ( .c(s1[45]), .sum(s_1[51]), .cout(c_1[51]),
+     .a(s0[51]), .b(c0[50]));
+mul_csa32  sc2_1_50_ ( .c(s1[44]), .sum(s_1[50]), .cout(c_1[50]),
+     .a(s0[50]), .b(c0[49]));
+mul_csa32  sc2_1_49_ ( .c(s1[43]), .sum(s_1[49]), .cout(c_1[49]),
+     .a(s0[49]), .b(c0[48]));
+mul_csa32  sc2_1_48_ ( .c(s1[42]), .sum(s_1[48]), .cout(c_1[48]),
+     .a(s0[48]), .b(c0[47]));
+mul_csa32  sc2_1_47_ ( .c(s1[41]), .sum(s_1[47]), .cout(c_1[47]),
+     .a(s0[47]), .b(c0[46]));
+mul_csa32  sc2_1_46_ ( .c(s1[40]), .sum(s_1[46]), .cout(c_1[46]),
+     .a(s0[46]), .b(c0[45]));
+mul_csa32  sc2_1_45_ ( .c(s1[39]), .sum(s_1[45]), .cout(c_1[45]),
+     .a(s0[45]), .b(c0[44]));
+mul_csa32  sc2_1_44_ ( .c(s1[38]), .sum(s_1[44]), .cout(c_1[44]),
+     .a(s0[44]), .b(c0[43]));
+mul_csa32  sc2_1_43_ ( .c(s1[37]), .sum(s_1[43]), .cout(c_1[43]),
+     .a(s0[43]), .b(c0[42]));
+mul_csa32  sc2_1_42_ ( .c(s1[36]), .sum(s_1[42]), .cout(c_1[42]),
+     .a(s0[42]), .b(c0[41]));
+mul_csa32  sc2_1_41_ ( .c(s1[35]), .sum(s_1[41]), .cout(c_1[41]),
+     .a(s0[41]), .b(c0[40]));
+mul_csa32  sc2_1_40_ ( .c(s1[34]), .sum(s_1[40]), .cout(c_1[40]),
+     .a(s0[40]), .b(c0[39]));
+mul_csa32  sc2_1_39_ ( .c(s1[33]), .sum(s_1[39]), .cout(c_1[39]),
+     .a(s0[39]), .b(c0[38]));
+mul_csa32  sc2_1_38_ ( .c(s1[32]), .sum(s_1[38]), .cout(c_1[38]),
+     .a(s0[38]), .b(c0[37]));
+mul_csa32  sc2_1_37_ ( .c(s1[31]), .sum(s_1[37]), .cout(c_1[37]),
+     .a(s0[37]), .b(c0[36]));
+mul_csa32  sc2_1_36_ ( .c(s1[30]), .sum(s_1[36]), .cout(c_1[36]),
+     .a(s0[36]), .b(c0[35]));
+mul_csa32  sc2_1_35_ ( .c(s1[29]), .sum(s_1[35]), .cout(c_1[35]),
+     .a(s0[35]), .b(c0[34]));
+mul_csa32  sc2_1_34_ ( .c(s1[28]), .sum(s_1[34]), .cout(c_1[34]),
+     .a(s0[34]), .b(c0[33]));
+mul_csa32  sc2_1_33_ ( .c(s1[27]), .sum(s_1[33]), .cout(c_1[33]),
+     .a(s0[33]), .b(c0[32]));
+mul_csa32  sc2_1_32_ ( .c(s1[26]), .sum(s_1[32]), .cout(c_1[32]),
+     .a(s0[32]), .b(c0[31]));
+mul_csa32  sc2_1_31_ ( .c(s1[25]), .sum(s_1[31]), .cout(c_1[31]),
+     .a(s0[31]), .b(c0[30]));
+mul_csa32  sc2_1_30_ ( .c(s1[24]), .sum(s_1[30]), .cout(c_1[30]),
+     .a(s0[30]), .b(c0[29]));
+mul_csa32  sc2_1_29_ ( .c(s1[23]), .sum(s_1[29]), .cout(c_1[29]),
+     .a(s0[29]), .b(c0[28]));
+mul_csa32  sc2_1_28_ ( .c(s1[22]), .sum(s_1[28]), .cout(c_1[28]),
+     .a(s0[28]), .b(c0[27]));
+mul_csa32  sc2_1_27_ ( .c(s1[21]), .sum(s_1[27]), .cout(c_1[27]),
+     .a(s0[27]), .b(c0[26]));
+mul_csa32  sc2_1_26_ ( .c(s1[20]), .sum(s_1[26]), .cout(c_1[26]),
+     .a(s0[26]), .b(c0[25]));
+mul_csa32  sc2_1_25_ ( .c(s1[19]), .sum(s_1[25]), .cout(c_1[25]),
+     .a(s0[25]), .b(c0[24]));
+mul_csa32  sc2_1_24_ ( .c(s1[18]), .sum(s_1[24]), .cout(c_1[24]),
+     .a(s0[24]), .b(c0[23]));
+mul_csa32  sc2_1_23_ ( .c(s1[17]), .sum(s_1[23]), .cout(c_1[23]),
+     .a(s0[23]), .b(c0[22]));
+mul_csa32  sc2_1_22_ ( .c(s1[16]), .sum(s_1[22]), .cout(c_1[22]),
+     .a(s0[22]), .b(c0[21]));
+mul_csa32  sc2_1_21_ ( .c(s1[15]), .sum(s_1[21]), .cout(c_1[21]),
+     .a(s0[21]), .b(c0[20]));
+mul_csa32  sc2_1_20_ ( .c(s1[14]), .sum(s_1[20]), .cout(c_1[20]),
+     .a(s0[20]), .b(c0[19]));
+mul_csa32  sc2_1_19_ ( .c(s1[13]), .sum(s_1[19]), .cout(c_1[19]),
+     .a(s0[19]), .b(c0[18]));
+mul_csa32  sc2_1_18_ ( .c(s1[12]), .sum(s_1[18]), .cout(c_1[18]),
+     .a(s0[18]), .b(c0[17]));
+mul_csa32  sc2_1_17_ ( .c(s1[11]), .sum(s_1[17]), .cout(c_1[17]),
+     .a(s0[17]), .b(c0[16]));
+mul_csa32  sc2_1_16_ ( .c(s1[10]), .sum(s_1[16]), .cout(c_1[16]),
+     .a(s0[16]), .b(c0[15]));
+mul_csa32  sc2_1_15_ ( .c(s1[9]), .sum(s_1[15]), .cout(c_1[15]),
+     .a(s0[15]), .b(c0[14]));
+mul_csa32  sc2_1_14_ ( .c(s1[8]), .sum(s_1[14]), .cout(c_1[14]),
+     .a(s0[14]), .b(c0[13]));
+mul_csa32  sc2_1_7_ ( .c(s1[1]), .b(c0[6]), .a(s0[7]), .cout(c_1[7]),
+     .sum(s_1[7]));
+mul_csa32  sc2_1_6_ ( .c(s1[0]), .b(c0[5]), .a(s0[6]), .cout(c_1[6]),
+     .sum(s_1[6]));
+mul_csa32  sc2_1_5_ ( .c(b2n[1]), .b(c0[4]), .a(s0[5]),
+     .cout(c_1[5]), .sum(s_1[5]));
+mul_csa32  sc2_1_4_ ( .c(b2n[0]), .b(c0[3]), .a(s0[4]),
+     .cout(c_1[4]), .sum(s_1[4]));
+mul_ha sc2_1_10_ ( .sum(s_1[10]), .cout(c_1[10]), .a(s0[10]),
+     .b(c0[9]));
+mul_ha sc3_7_ ( .sum(sum[7]), .cout(cout[7]), .a(c_1[6]),
+     .b(s_1[7]));
+mul_ha sc3_6_ ( .sum(sum[6]), .cout(cout[6]), .a(c_1[5]),
+     .b(s_1[6]));
+mul_ha sc3_5_ ( .sum(sum[5]), .cout(cout[5]), .a(c_1[4]),
+     .b(s_1[5]));
+mul_ha sc3_4_ ( .sum(sum[4]), .cout(cout[4]), .a(c_1[3]),
+     .b(s_1[4]));
+mul_ha sc2_2_81_ ( .sum(sum[81]), .cout(cout[81]), .a(s2[69]),
+     .b(c2[68]));
+mul_ha sc2_2_80_ ( .sum(sum[80]), .cout(cout[80]), .a(s2[68]),
+     .b(c2[67]));
+mul_ha sc2_2_79_ ( .sum(sum[79]), .cout(cout[79]), .a(s2[67]),
+     .b(c2[66]));
+mul_ha sc2_2_78_ ( .sum(sum[78]), .cout(cout[78]), .a(s2[66]),
+     .b(c2[65]));
+mul_ha sc2_1_70_ ( .sum(s_1[70]), .cout(c_1[70]),
+     .a(1'b1), .b(s1[64]));
+mul_ha sc2_1_2_ ( .sum(sum[2]), .cout(c_1[2]), .a(s0[2]), .b(c0[1]));
+mul_ha sc2_1_13_ ( .sum(s_1[13]), .cout(c_1[13]), .a(s0[13]),
+     .b(c0[12]));
+mul_ha sc2_1_12_ ( .sum(s_1[12]), .cout(c_1[12]), .a(s0[12]),
+     .b(c0[11]));
+mul_ha sc2_1_11_ ( .sum(s_1[11]), .cout(c_1[11]), .a(s0[11]),
+     .b(c0[10]));
+mul_ppgenrow3 I2 ( .head(1'b0), .bot(bot), .b2(b8[2:0]),
+     .b1(b7[2:0]), .b0(b6[2:0]), .a(a[63:0]), .sum(s2[69:0]),
+     .cout(c2[68:1]));
+mul_ppgenrow3 I1 ( .head(1'b0), .bot(1'b1),
+     .b2(b5[2:0]), .b1(b4[2:0]), .b0(b3[2:0]), .a(a[63:0]),
+     .sum(s1[69:0]), .cout(c1[68:1]));
+mul_ppgenrow3 I0 ( .head(head), .bot(1'b1), .b2(b2[2:0]),
+     .b1(b1[2:0]), .b0(b0[2:0]), .a(a[63:0]), .sum({s0[69:2],
+     sum[1:0]}), .cout(c0[68:1]));
+
+endmodule // mul_array1
+
+module mul_array2 ( pcout, pcoutx2, psum, psumx2, a0c, a0s, a1c, a1s,
+     areg, bot, pc, ps, x2 );
+
+output  pcoutx2, psumx2;
+input  bot, x2;
+output [98:0]  psum;
+output [98:0]  pcout;
+input [81:4]  a1c;
+input [98:30]  pc;
+input [98:31]  ps;
+input [81:0]  a0s;
+input [96:0]  areg;
+input [81:0]  a1s;
+input [81:4]  a0c;
+
+// Buses in the design
+wire  [81:15]  s3;
+wire  [81:15]  c3;
+wire  [96:0]  ain;
+wire  [67:20]  co;
+wire  [82:0]  s1;
+wire  [96:0]  c2;
+wire  [82:0]  c1;
+wire  [96:0]  s2;
+wire	      ainx2, s1x2, c1x2;
+
+mul_mux2 sh_82_ ( .d1(areg[83]), .z(ain[82]), .d0(areg[82]), .s(x2));
+mul_mux2 sh_68_ ( .d1(areg[69]), .z(ain[68]), .d0(areg[68]), .s(x2));
+mul_mux2 sh_67_ ( .d1(areg[68]), .z(ain[67]), .d0(areg[67]), .s(x2));
+mul_mux2 sh_66_ ( .d1(areg[67]), .z(ain[66]), .d0(areg[66]), .s(x2));
+mul_mux2 sh_65_ ( .d1(areg[66]), .z(ain[65]), .d0(areg[65]), .s(x2));
+mul_mux2 sh_64_ ( .d1(areg[65]), .z(ain[64]), .d0(areg[64]), .s(x2));
+mul_mux2 sh_63_ ( .d1(areg[64]), .z(ain[63]), .d0(areg[63]), .s(x2));
+mul_mux2 sh_62_ ( .d1(areg[63]), .z(ain[62]), .d0(areg[62]), .s(x2));
+mul_mux2 sh_61_ ( .d1(areg[62]), .z(ain[61]), .d0(areg[61]), .s(x2));
+mul_mux2 sh_60_ ( .d1(areg[61]), .z(ain[60]), .d0(areg[60]), .s(x2));
+mul_mux2 sh_59_ ( .d1(areg[60]), .z(ain[59]), .d0(areg[59]), .s(x2));
+mul_mux2 sh_58_ ( .d1(areg[59]), .z(ain[58]), .d0(areg[58]), .s(x2));
+mul_mux2 sh_57_ ( .d1(areg[58]), .z(ain[57]), .d0(areg[57]), .s(x2));
+mul_mux2 sh_56_ ( .d1(areg[57]), .z(ain[56]), .d0(areg[56]), .s(x2));
+mul_mux2 sh_55_ ( .d1(areg[56]), .z(ain[55]), .d0(areg[55]), .s(x2));
+mul_mux2 sh_54_ ( .d1(areg[55]), .z(ain[54]), .d0(areg[54]), .s(x2));
+mul_mux2 sh_53_ ( .d1(areg[54]), .z(ain[53]), .d0(areg[53]), .s(x2));
+mul_mux2 sh_52_ ( .d1(areg[53]), .z(ain[52]), .d0(areg[52]), .s(x2));
+mul_mux2 sh_51_ ( .d1(areg[52]), .z(ain[51]), .d0(areg[51]), .s(x2));
+mul_mux2 sh_50_ ( .d1(areg[51]), .z(ain[50]), .d0(areg[50]), .s(x2));
+mul_mux2 sh_49_ ( .d1(areg[50]), .z(ain[49]), .d0(areg[49]), .s(x2));
+mul_mux2 sh_48_ ( .d1(areg[49]), .z(ain[48]), .d0(areg[48]), .s(x2));
+mul_mux2 sh_47_ ( .d1(areg[48]), .z(ain[47]), .d0(areg[47]), .s(x2));
+mul_mux2 sh_46_ ( .d1(areg[47]), .z(ain[46]), .d0(areg[46]), .s(x2));
+mul_mux2 sh_45_ ( .d1(areg[46]), .z(ain[45]), .d0(areg[45]), .s(x2));
+mul_mux2 sh_44_ ( .d1(areg[45]), .z(ain[44]), .d0(areg[44]), .s(x2));
+mul_mux2 sh_43_ ( .d1(areg[44]), .z(ain[43]), .d0(areg[43]), .s(x2));
+mul_mux2 sh_42_ ( .d1(areg[43]), .z(ain[42]), .d0(areg[42]), .s(x2));
+mul_mux2 sh_41_ ( .d1(areg[42]), .z(ain[41]), .d0(areg[41]), .s(x2));
+mul_mux2 sh_40_ ( .d1(areg[41]), .z(ain[40]), .d0(areg[40]), .s(x2));
+mul_mux2 sh_39_ ( .d1(areg[40]), .z(ain[39]), .d0(areg[39]), .s(x2));
+mul_mux2 sh_38_ ( .d1(areg[39]), .z(ain[38]), .d0(areg[38]), .s(x2));
+mul_mux2 sh_37_ ( .d1(areg[38]), .z(ain[37]), .d0(areg[37]), .s(x2));
+mul_mux2 sh_36_ ( .d1(areg[37]), .z(ain[36]), .d0(areg[36]), .s(x2));
+mul_mux2 sh_35_ ( .d1(areg[36]), .z(ain[35]), .d0(areg[35]), .s(x2));
+mul_mux2 sh_34_ ( .d1(areg[35]), .z(ain[34]), .d0(areg[34]), .s(x2));
+mul_mux2 sh_33_ ( .d1(areg[34]), .z(ain[33]), .d0(areg[33]), .s(x2));
+mul_mux2 sh_32_ ( .d1(areg[33]), .z(ain[32]), .d0(areg[32]), .s(x2));
+mul_mux2 sh_31_ ( .d1(areg[32]), .z(ain[31]), .d0(areg[31]), .s(x2));
+mul_mux2 sh_30_ ( .d1(areg[31]), .z(ain[30]), .d0(areg[30]), .s(x2));
+mul_mux2 sh_29_ ( .d1(areg[30]), .z(ain[29]), .d0(areg[29]), .s(x2));
+mul_mux2 sh_28_ ( .d1(areg[29]), .z(ain[28]), .d0(areg[28]), .s(x2));
+mul_mux2 sh_27_ ( .d1(areg[28]), .z(ain[27]), .d0(areg[27]), .s(x2));
+mul_mux2 sh_26_ ( .d1(areg[27]), .z(ain[26]), .d0(areg[26]), .s(x2));
+mul_mux2 sh_25_ ( .d1(areg[26]), .z(ain[25]), .d0(areg[25]), .s(x2));
+mul_mux2 sh_24_ ( .d1(areg[25]), .z(ain[24]), .d0(areg[24]), .s(x2));
+mul_mux2 sh_23_ ( .d1(areg[24]), .z(ain[23]), .d0(areg[23]), .s(x2));
+mul_mux2 sh_22_ ( .d1(areg[23]), .z(ain[22]), .d0(areg[22]), .s(x2));
+mul_mux2 sh_21_ ( .d1(areg[22]), .z(ain[21]), .d0(areg[21]), .s(x2));
+mul_mux2 sh_20_ ( .d1(areg[21]), .z(ain[20]), .d0(areg[20]), .s(x2));
+mul_mux2 sh_96_ ( .d1(1'b0), .z(ain[96]), .d0(areg[96]),
+     .s(x2));
+mul_mux2 sh_95_ ( .d1(areg[96]), .z(ain[95]), .d0(areg[95]), .s(x2));
+mul_mux2 sh_94_ ( .d1(areg[95]), .z(ain[94]), .d0(areg[94]), .s(x2));
+mul_mux2 sh_93_ ( .d1(areg[94]), .z(ain[93]), .d0(areg[93]), .s(x2));
+mul_mux2 sh_92_ ( .d1(areg[93]), .z(ain[92]), .d0(areg[92]), .s(x2));
+mul_mux2 sh_91_ ( .d1(areg[92]), .z(ain[91]), .d0(areg[91]), .s(x2));
+mul_mux2 sh_90_ ( .d1(areg[91]), .z(ain[90]), .d0(areg[90]), .s(x2));
+mul_mux2 sh_89_ ( .d1(areg[90]), .z(ain[89]), .d0(areg[89]), .s(x2));
+mul_mux2 sh_88_ ( .d1(areg[89]), .z(ain[88]), .d0(areg[88]), .s(x2));
+mul_mux2 sh_87_ ( .d1(areg[88]), .z(ain[87]), .d0(areg[87]), .s(x2));
+mul_mux2 sh_86_ ( .d1(areg[87]), .z(ain[86]), .d0(areg[86]), .s(x2));
+mul_mux2 sh_85_ ( .d1(areg[86]), .z(ain[85]), .d0(areg[85]), .s(x2));
+mul_mux2 sh_84_ ( .d1(areg[85]), .z(ain[84]), .d0(areg[84]), .s(x2));
+mul_mux2 sh_0_ ( .d1(areg[1]), .z(ain[0]), .d0(areg[0]), .s(x2));
+mul_mux2 sh_81_ ( .d1(areg[82]), .z(ain[81]), .d0(areg[81]), .s(x2));
+mul_mux2 sh_80_ ( .d1(areg[81]), .z(ain[80]), .d0(areg[80]), .s(x2));
+mul_mux2 sh_79_ ( .d1(areg[80]), .z(ain[79]), .d0(areg[79]), .s(x2));
+mul_mux2 sh_78_ ( .d1(areg[79]), .z(ain[78]), .d0(areg[78]), .s(x2));
+mul_mux2 sh_77_ ( .d1(areg[78]), .z(ain[77]), .d0(areg[77]), .s(x2));
+mul_mux2 sh_76_ ( .d1(areg[77]), .z(ain[76]), .d0(areg[76]), .s(x2));
+mul_mux2 sh_75_ ( .d1(areg[76]), .z(ain[75]), .d0(areg[75]), .s(x2));
+mul_mux2 sh_74_ ( .d1(areg[75]), .z(ain[74]), .d0(areg[74]), .s(x2));
+mul_mux2 sh_73_ ( .d1(areg[74]), .z(ain[73]), .d0(areg[73]), .s(x2));
+mul_mux2 sh_72_ ( .d1(areg[73]), .z(ain[72]), .d0(areg[72]), .s(x2));
+mul_mux2 sh_71_ ( .d1(areg[72]), .z(ain[71]), .d0(areg[71]), .s(x2));
+mul_mux2 sh_70_ ( .d1(areg[71]), .z(ain[70]), .d0(areg[70]), .s(x2));
+mul_mux2 sh_69_ ( .d1(areg[70]), .z(ain[69]), .d0(areg[69]), .s(x2));
+mul_mux2 sh_19_ ( .d1(areg[20]), .z(ain[19]), .d0(areg[19]), .s(x2));
+mul_mux2 sh_18_ ( .d1(areg[19]), .z(ain[18]), .d0(areg[18]), .s(x2));
+mul_mux2 sh_17_ ( .d1(areg[18]), .z(ain[17]), .d0(areg[17]), .s(x2));
+mul_mux2 sh_16_ ( .d1(areg[17]), .z(ain[16]), .d0(areg[16]), .s(x2));
+mul_mux2 sh_15_ ( .d1(areg[16]), .z(ain[15]), .d0(areg[15]), .s(x2));
+mul_mux2 sh_4_ ( .d1(areg[5]), .z(ain[4]), .d0(areg[4]), .s(x2));
+mul_mux2 sh_3_ ( .d1(areg[4]), .z(ain[3]), .d0(areg[3]), .s(x2));
+mul_mux2 sh_2_ ( .d1(areg[3]), .z(ain[2]), .d0(areg[2]), .s(x2));
+mul_mux2 sh_1_ ( .d1(areg[2]), .z(ain[1]), .d0(areg[1]), .s(x2));
+mul_mux2 shx2 ( .d1(areg[0]), .z(ainx2), .d0(1'b0),
+     .s(x2));
+mul_mux2 sh_83_ ( .d1(areg[84]), .z(ain[83]), .d0(areg[83]), .s(x2));
+mul_mux2 sh_14_ ( .d1(areg[15]), .z(ain[14]), .d0(areg[14]), .s(x2));
+mul_mux2 sh_13_ ( .d1(areg[14]), .z(ain[13]), .d0(areg[13]), .s(x2));
+mul_mux2 sh_12_ ( .d1(areg[13]), .z(ain[12]), .d0(areg[12]), .s(x2));
+mul_mux2 sh_11_ ( .d1(areg[12]), .z(ain[11]), .d0(areg[11]), .s(x2));
+mul_mux2 sh_10_ ( .d1(areg[11]), .z(ain[10]), .d0(areg[10]), .s(x2));
+mul_mux2 sh_9_ ( .d1(areg[10]), .z(ain[9]), .d0(areg[9]), .s(x2));
+mul_mux2 sh_8_ ( .d1(areg[9]), .z(ain[8]), .d0(areg[8]), .s(x2));
+mul_mux2 sh_7_ ( .d1(areg[8]), .z(ain[7]), .d0(areg[7]), .s(x2));
+mul_mux2 sh_6_ ( .d1(areg[7]), .z(ain[6]), .d0(areg[6]), .s(x2));
+mul_mux2 sh_5_ ( .d1(areg[6]), .z(ain[5]), .d0(areg[5]), .s(x2));
+mul_csa42  sc3_68_ ( .cin(co[67]), .d(1'b0),
+     .carry(c3[68]), .c(c2[67]), .b(s2[68]), .a(1'b0),
+     .cout(), .sum(s3[68]));
+mul_csa42  sc3_67_ ( .cin(co[66]), .d(1'b0),
+     .carry(c3[67]), .c(c2[66]), .b(s2[67]), .a(s1[67]), .cout(co[67]),
+     .sum(s3[67]));
+mul_csa42  sc3_66_ ( .cin(co[65]), .d(c1[65]), .carry(c3[66]),
+     .c(c2[65]), .b(s2[66]), .a(s1[66]), .cout(co[66]), .sum(s3[66]));
+mul_csa42  sc3_65_ ( .cin(co[64]), .d(c1[64]), .carry(c3[65]),
+     .c(c2[64]), .b(s2[65]), .a(s1[65]), .cout(co[65]), .sum(s3[65]));
+mul_csa42  sc3_64_ ( .cin(co[63]), .d(c1[63]), .carry(c3[64]),
+     .c(c2[63]), .b(s2[64]), .a(s1[64]), .cout(co[64]), .sum(s3[64]));
+mul_csa42  sc3_63_ ( .cin(co[62]), .d(c1[62]), .carry(c3[63]),
+     .c(c2[62]), .b(s2[63]), .a(s1[63]), .cout(co[63]), .sum(s3[63]));
+mul_csa42  sc3_62_ ( .cin(co[61]), .d(c1[61]), .carry(c3[62]),
+     .c(c2[61]), .b(s2[62]), .a(s1[62]), .cout(co[62]), .sum(s3[62]));
+mul_csa42  sc3_61_ ( .cin(co[60]), .d(c1[60]), .carry(c3[61]),
+     .c(c2[60]), .b(s2[61]), .a(s1[61]), .cout(co[61]), .sum(s3[61]));
+mul_csa42  sc3_60_ ( .cin(co[59]), .d(c1[59]), .carry(c3[60]),
+     .c(c2[59]), .b(s2[60]), .a(s1[60]), .cout(co[60]), .sum(s3[60]));
+mul_csa42  sc3_59_ ( .cin(co[58]), .d(c1[58]), .carry(c3[59]),
+     .c(c2[58]), .b(s2[59]), .a(s1[59]), .cout(co[59]), .sum(s3[59]));
+mul_csa42  sc3_58_ ( .cin(co[57]), .d(c1[57]), .carry(c3[58]),
+     .c(c2[57]), .b(s2[58]), .a(s1[58]), .cout(co[58]), .sum(s3[58]));
+mul_csa42  sc3_57_ ( .cin(co[56]), .d(c1[56]), .carry(c3[57]),
+     .c(c2[56]), .b(s2[57]), .a(s1[57]), .cout(co[57]), .sum(s3[57]));
+mul_csa42  sc3_56_ ( .cin(co[55]), .d(c1[55]), .carry(c3[56]),
+     .c(c2[55]), .b(s2[56]), .a(s1[56]), .cout(co[56]), .sum(s3[56]));
+mul_csa42  sc3_55_ ( .cin(co[54]), .d(c1[54]), .carry(c3[55]),
+     .c(c2[54]), .b(s2[55]), .a(s1[55]), .cout(co[55]), .sum(s3[55]));
+mul_csa42  sc3_54_ ( .cin(co[53]), .d(c1[53]), .carry(c3[54]),
+     .c(c2[53]), .b(s2[54]), .a(s1[54]), .cout(co[54]), .sum(s3[54]));
+mul_csa42  sc3_53_ ( .cin(co[52]), .d(c1[52]), .carry(c3[53]),
+     .c(c2[52]), .b(s2[53]), .a(s1[53]), .cout(co[53]), .sum(s3[53]));
+mul_csa42  sc3_52_ ( .cin(co[51]), .d(c1[51]), .carry(c3[52]),
+     .c(c2[51]), .b(s2[52]), .a(s1[52]), .cout(co[52]), .sum(s3[52]));
+mul_csa42  sc3_51_ ( .cin(co[50]), .d(c1[50]), .carry(c3[51]),
+     .c(c2[50]), .b(s2[51]), .a(s1[51]), .cout(co[51]), .sum(s3[51]));
+mul_csa42  sc3_50_ ( .cin(co[49]), .d(c1[49]), .carry(c3[50]),
+     .c(c2[49]), .b(s2[50]), .a(s1[50]), .cout(co[50]), .sum(s3[50]));
+mul_csa42  sc3_49_ ( .cin(co[48]), .d(c1[48]), .carry(c3[49]),
+     .c(c2[48]), .b(s2[49]), .a(s1[49]), .cout(co[49]), .sum(s3[49]));
+mul_csa42  sc3_48_ ( .cin(co[47]), .d(c1[47]), .carry(c3[48]),
+     .c(c2[47]), .b(s2[48]), .a(s1[48]), .cout(co[48]), .sum(s3[48]));
+mul_csa42  sc3_47_ ( .cin(co[46]), .d(c1[46]), .carry(c3[47]),
+     .c(c2[46]), .b(s2[47]), .a(s1[47]), .cout(co[47]), .sum(s3[47]));
+mul_csa42  sc3_46_ ( .cin(co[45]), .d(c1[45]), .carry(c3[46]),
+     .c(c2[45]), .b(s2[46]), .a(s1[46]), .cout(co[46]), .sum(s3[46]));
+mul_csa42  sc3_45_ ( .cin(co[44]), .d(c1[44]), .carry(c3[45]),
+     .c(c2[44]), .b(s2[45]), .a(s1[45]), .cout(co[45]), .sum(s3[45]));
+mul_csa42  sc3_44_ ( .cin(co[43]), .d(c1[43]), .carry(c3[44]),
+     .c(c2[43]), .b(s2[44]), .a(s1[44]), .cout(co[44]), .sum(s3[44]));
+mul_csa42  sc3_43_ ( .cin(co[42]), .d(c1[42]), .carry(c3[43]),
+     .c(c2[42]), .b(s2[43]), .a(s1[43]), .cout(co[43]), .sum(s3[43]));
+mul_csa42  sc3_42_ ( .cin(co[41]), .d(c1[41]), .carry(c3[42]),
+     .c(c2[41]), .b(s2[42]), .a(s1[42]), .cout(co[42]), .sum(s3[42]));
+mul_csa42  sc3_41_ ( .cin(co[40]), .d(c1[40]), .carry(c3[41]),
+     .c(c2[40]), .b(s2[41]), .a(s1[41]), .cout(co[41]), .sum(s3[41]));
+mul_csa42  sc3_40_ ( .cin(co[39]), .d(c1[39]), .carry(c3[40]),
+     .c(c2[39]), .b(s2[40]), .a(s1[40]), .cout(co[40]), .sum(s3[40]));
+mul_csa42  sc3_39_ ( .cin(co[38]), .d(c1[38]), .carry(c3[39]),
+     .c(c2[38]), .b(s2[39]), .a(s1[39]), .cout(co[39]), .sum(s3[39]));
+mul_csa42  sc3_38_ ( .cin(co[37]), .d(c1[37]), .carry(c3[38]),
+     .c(c2[37]), .b(s2[38]), .a(s1[38]), .cout(co[38]), .sum(s3[38]));
+mul_csa42  sc3_37_ ( .cin(co[36]), .d(c1[36]), .carry(c3[37]),
+     .c(c2[36]), .b(s2[37]), .a(s1[37]), .cout(co[37]), .sum(s3[37]));
+mul_csa42  sc3_36_ ( .cin(co[35]), .d(c1[35]), .carry(c3[36]),
+     .c(c2[35]), .b(s2[36]), .a(s1[36]), .cout(co[36]), .sum(s3[36]));
+mul_csa42  sc3_35_ ( .cin(co[34]), .d(c1[34]), .carry(c3[35]),
+     .c(c2[34]), .b(s2[35]), .a(s1[35]), .cout(co[35]), .sum(s3[35]));
+mul_csa42  sc3_34_ ( .cin(co[33]), .d(c1[33]), .carry(c3[34]),
+     .c(c2[33]), .b(s2[34]), .a(s1[34]), .cout(co[34]), .sum(s3[34]));
+mul_csa42  sc3_33_ ( .cin(co[32]), .d(c1[32]), .carry(c3[33]),
+     .c(c2[32]), .b(s2[33]), .a(s1[33]), .cout(co[33]), .sum(s3[33]));
+mul_csa42  sc3_32_ ( .cin(co[31]), .d(c1[31]), .carry(c3[32]),
+     .c(c2[31]), .b(s2[32]), .a(s1[32]), .cout(co[32]), .sum(s3[32]));
+mul_csa42  sc3_31_ ( .cin(co[30]), .d(c1[30]), .carry(c3[31]),
+     .c(c2[30]), .b(s2[31]), .a(s1[31]), .cout(co[31]), .sum(s3[31]));
+mul_csa42  sc3_30_ ( .cin(co[29]), .d(c1[29]), .carry(c3[30]),
+     .c(c2[29]), .b(s2[30]), .a(s1[30]), .cout(co[30]), .sum(s3[30]));
+mul_csa42  sc3_29_ ( .cin(co[28]), .d(c1[28]), .carry(c3[29]),
+     .c(c2[28]), .b(s2[29]), .a(s1[29]), .cout(co[29]), .sum(s3[29]));
+mul_csa42  sc3_28_ ( .cin(co[27]), .d(c1[27]), .carry(c3[28]),
+     .c(c2[27]), .b(s2[28]), .a(s1[28]), .cout(co[28]), .sum(s3[28]));
+mul_csa42  sc3_27_ ( .cin(co[26]), .d(c1[26]), .carry(c3[27]),
+     .c(c2[26]), .b(s2[27]), .a(s1[27]), .cout(co[27]), .sum(s3[27]));
+mul_csa42  sc3_26_ ( .cin(co[25]), .d(c1[25]), .carry(c3[26]),
+     .c(c2[25]), .b(s2[26]), .a(s1[26]), .cout(co[26]), .sum(s3[26]));
+mul_csa42  sc3_25_ ( .cin(co[24]), .d(c1[24]), .carry(c3[25]),
+     .c(c2[24]), .b(s2[25]), .a(s1[25]), .cout(co[25]), .sum(s3[25]));
+mul_csa42  sc3_24_ ( .cin(co[23]), .d(c1[23]), .carry(c3[24]),
+     .c(c2[23]), .b(s2[24]), .a(s1[24]), .cout(co[24]), .sum(s3[24]));
+mul_csa42  sc3_23_ ( .cin(co[22]), .d(c1[22]), .carry(c3[23]),
+     .c(c2[22]), .b(s2[23]), .a(s1[23]), .cout(co[23]), .sum(s3[23]));
+mul_csa42  sc3_22_ ( .cin(co[21]), .d(c1[21]), .carry(c3[22]),
+     .c(c2[21]), .b(s2[22]), .a(s1[22]), .cout(co[22]), .sum(s3[22]));
+mul_csa42  sc3_21_ ( .cin(co[20]), .d(c1[20]), .carry(c3[21]),
+     .c(c2[20]), .b(s2[21]), .a(s1[21]), .cout(co[21]), .sum(s3[21]));
+mul_csa42  sc3_20_ ( .cin(1'b0), .d(c1[19]),
+     .carry(c3[20]), .c(c2[19]), .b(s2[20]), .a(s1[20]), .cout(co[20]),
+     .sum(s3[20]));
+mul_csa32  sc4_82_ ( .c(c3[81]), .b(s2[82]), .a(ain[82]),
+     .cout(pcout[82]), .sum(psum[82]));
+mul_csa32  sc4_68_ ( .c(c3[67]), .b(s3[68]), .a(ain[68]),
+     .cout(pcout[68]), .sum(psum[68]));
+mul_csa32  sc4_67_ ( .c(c3[66]), .b(s3[67]), .a(ain[67]),
+     .cout(pcout[67]), .sum(psum[67]));
+mul_csa32  sc4_66_ ( .c(c3[65]), .b(s3[66]), .a(ain[66]),
+     .cout(pcout[66]), .sum(psum[66]));
+mul_csa32  sc4_65_ ( .c(c3[64]), .b(s3[65]), .a(ain[65]),
+     .cout(pcout[65]), .sum(psum[65]));
+mul_csa32  sc4_64_ ( .c(c3[63]), .b(s3[64]), .a(ain[64]),
+     .cout(pcout[64]), .sum(psum[64]));
+mul_csa32  sc4_63_ ( .c(c3[62]), .b(s3[63]), .a(ain[63]),
+     .cout(pcout[63]), .sum(psum[63]));
+mul_csa32  sc4_62_ ( .c(c3[61]), .b(s3[62]), .a(ain[62]),
+     .cout(pcout[62]), .sum(psum[62]));
+mul_csa32  sc4_61_ ( .c(c3[60]), .b(s3[61]), .a(ain[61]),
+     .cout(pcout[61]), .sum(psum[61]));
+mul_csa32  sc4_60_ ( .c(c3[59]), .b(s3[60]), .a(ain[60]),
+     .cout(pcout[60]), .sum(psum[60]));
+mul_csa32  sc4_59_ ( .c(c3[58]), .b(s3[59]), .a(ain[59]),
+     .cout(pcout[59]), .sum(psum[59]));
+mul_csa32  sc4_58_ ( .c(c3[57]), .b(s3[58]), .a(ain[58]),
+     .cout(pcout[58]), .sum(psum[58]));
+mul_csa32  sc4_57_ ( .c(c3[56]), .b(s3[57]), .a(ain[57]),
+     .cout(pcout[57]), .sum(psum[57]));
+mul_csa32  sc4_56_ ( .c(c3[55]), .b(s3[56]), .a(ain[56]),
+     .cout(pcout[56]), .sum(psum[56]));
+mul_csa32  sc4_55_ ( .c(c3[54]), .b(s3[55]), .a(ain[55]),
+     .cout(pcout[55]), .sum(psum[55]));
+mul_csa32  sc4_54_ ( .c(c3[53]), .b(s3[54]), .a(ain[54]),
+     .cout(pcout[54]), .sum(psum[54]));
+mul_csa32  sc4_53_ ( .c(c3[52]), .b(s3[53]), .a(ain[53]),
+     .cout(pcout[53]), .sum(psum[53]));
+mul_csa32  sc4_52_ ( .c(c3[51]), .b(s3[52]), .a(ain[52]),
+     .cout(pcout[52]), .sum(psum[52]));
+mul_csa32  sc4_51_ ( .c(c3[50]), .b(s3[51]), .a(ain[51]),
+     .cout(pcout[51]), .sum(psum[51]));
+mul_csa32  sc4_50_ ( .c(c3[49]), .b(s3[50]), .a(ain[50]),
+     .cout(pcout[50]), .sum(psum[50]));
+mul_csa32  sc4_49_ ( .c(c3[48]), .b(s3[49]), .a(ain[49]),
+     .cout(pcout[49]), .sum(psum[49]));
+mul_csa32  sc4_48_ ( .c(c3[47]), .b(s3[48]), .a(ain[48]),
+     .cout(pcout[48]), .sum(psum[48]));
+mul_csa32  sc4_47_ ( .c(c3[46]), .b(s3[47]), .a(ain[47]),
+     .cout(pcout[47]), .sum(psum[47]));
+mul_csa32  sc4_46_ ( .c(c3[45]), .b(s3[46]), .a(ain[46]),
+     .cout(pcout[46]), .sum(psum[46]));
+mul_csa32  sc4_45_ ( .c(c3[44]), .b(s3[45]), .a(ain[45]),
+     .cout(pcout[45]), .sum(psum[45]));
+mul_csa32  sc4_44_ ( .c(c3[43]), .b(s3[44]), .a(ain[44]),
+     .cout(pcout[44]), .sum(psum[44]));
+mul_csa32  sc4_43_ ( .c(c3[42]), .b(s3[43]), .a(ain[43]),
+     .cout(pcout[43]), .sum(psum[43]));
+mul_csa32  sc4_42_ ( .c(c3[41]), .b(s3[42]), .a(ain[42]),
+     .cout(pcout[42]), .sum(psum[42]));
+mul_csa32  sc4_41_ ( .c(c3[40]), .b(s3[41]), .a(ain[41]),
+     .cout(pcout[41]), .sum(psum[41]));
+mul_csa32  sc4_40_ ( .c(c3[39]), .b(s3[40]), .a(ain[40]),
+     .cout(pcout[40]), .sum(psum[40]));
+mul_csa32  sc4_39_ ( .c(c3[38]), .b(s3[39]), .a(ain[39]),
+     .cout(pcout[39]), .sum(psum[39]));
+mul_csa32  sc4_38_ ( .c(c3[37]), .b(s3[38]), .a(ain[38]),
+     .cout(pcout[38]), .sum(psum[38]));
+mul_csa32  sc4_37_ ( .c(c3[36]), .b(s3[37]), .a(ain[37]),
+     .cout(pcout[37]), .sum(psum[37]));
+mul_csa32  sc4_36_ ( .c(c3[35]), .b(s3[36]), .a(ain[36]),
+     .cout(pcout[36]), .sum(psum[36]));
+mul_csa32  sc4_35_ ( .c(c3[34]), .b(s3[35]), .a(ain[35]),
+     .cout(pcout[35]), .sum(psum[35]));
+mul_csa32  sc4_34_ ( .c(c3[33]), .b(s3[34]), .a(ain[34]),
+     .cout(pcout[34]), .sum(psum[34]));
+mul_csa32  sc4_33_ ( .c(c3[32]), .b(s3[33]), .a(ain[33]),
+     .cout(pcout[33]), .sum(psum[33]));
+mul_csa32  sc4_32_ ( .c(c3[31]), .b(s3[32]), .a(ain[32]),
+     .cout(pcout[32]), .sum(psum[32]));
+mul_csa32  sc4_31_ ( .c(c3[30]), .b(s3[31]), .a(ain[31]),
+     .cout(pcout[31]), .sum(psum[31]));
+mul_csa32  sc4_30_ ( .c(c3[29]), .b(s3[30]), .a(ain[30]),
+     .cout(pcout[30]), .sum(psum[30]));
+mul_csa32  sc4_29_ ( .c(c3[28]), .b(s3[29]), .a(ain[29]),
+     .cout(pcout[29]), .sum(psum[29]));
+mul_csa32  sc4_28_ ( .c(c3[27]), .b(s3[28]), .a(ain[28]),
+     .cout(pcout[28]), .sum(psum[28]));
+mul_csa32  sc4_27_ ( .c(c3[26]), .b(s3[27]), .a(ain[27]),
+     .cout(pcout[27]), .sum(psum[27]));
+mul_csa32  sc4_26_ ( .c(c3[25]), .b(s3[26]), .a(ain[26]),
+     .cout(pcout[26]), .sum(psum[26]));
+mul_csa32  sc4_25_ ( .c(c3[24]), .b(s3[25]), .a(ain[25]),
+     .cout(pcout[25]), .sum(psum[25]));
+mul_csa32  sc4_24_ ( .c(c3[23]), .b(s3[24]), .a(ain[24]),
+     .cout(pcout[24]), .sum(psum[24]));
+mul_csa32  sc4_23_ ( .c(c3[22]), .b(s3[23]), .a(ain[23]),
+     .cout(pcout[23]), .sum(psum[23]));
+mul_csa32  sc4_22_ ( .c(c3[21]), .b(s3[22]), .a(ain[22]),
+     .cout(pcout[22]), .sum(psum[22]));
+mul_csa32  sc4_21_ ( .c(c3[20]), .b(s3[21]), .a(ain[21]),
+     .cout(pcout[21]), .sum(psum[21]));
+mul_csa32  sc4_20_ ( .c(c3[19]), .b(s3[20]), .a(ain[20]),
+     .cout(pcout[20]), .sum(psum[20]));
+mul_csa32  sc4_96_ ( .c(c2[95]), .b(s2[96]), .a(ain[96]),
+     .cout(pcout[96]), .sum(psum[96]));
+mul_csa32  sc4_95_ ( .c(c2[94]), .b(s2[95]), .a(ain[95]),
+     .cout(pcout[95]), .sum(psum[95]));
+mul_csa32  sc4_94_ ( .c(c2[93]), .b(s2[94]), .a(ain[94]),
+     .cout(pcout[94]), .sum(psum[94]));
+mul_csa32  sc4_93_ ( .c(c2[92]), .b(s2[93]), .a(ain[93]),
+     .cout(pcout[93]), .sum(psum[93]));
+mul_csa32  sc4_92_ ( .c(c2[91]), .b(s2[92]), .a(ain[92]),
+     .cout(pcout[92]), .sum(psum[92]));
+mul_csa32  sc4_91_ ( .c(c2[90]), .b(s2[91]), .a(ain[91]),
+     .cout(pcout[91]), .sum(psum[91]));
+mul_csa32  sc4_90_ ( .c(c2[89]), .b(s2[90]), .a(ain[90]),
+     .cout(pcout[90]), .sum(psum[90]));
+mul_csa32  sc4_89_ ( .c(c2[88]), .b(s2[89]), .a(ain[89]),
+     .cout(pcout[89]), .sum(psum[89]));
+mul_csa32  sc4_88_ ( .c(c2[87]), .b(s2[88]), .a(ain[88]),
+     .cout(pcout[88]), .sum(psum[88]));
+mul_csa32  sc4_87_ ( .c(c2[86]), .b(s2[87]), .a(ain[87]),
+     .cout(pcout[87]), .sum(psum[87]));
+mul_csa32  sc4_86_ ( .c(c2[85]), .b(s2[86]), .a(ain[86]),
+     .cout(pcout[86]), .sum(psum[86]));
+mul_csa32  sc4_85_ ( .c(c2[84]), .b(s2[85]), .a(ain[85]),
+     .cout(pcout[85]), .sum(psum[85]));
+mul_csa32  sc4_84_ ( .c(c2[83]), .b(s2[84]), .a(ain[84]),
+     .cout(pcout[84]), .sum(psum[84]));
+mul_csa32  sc4_81_ ( .c(c3[80]), .b(s3[81]), .a(ain[81]),
+     .cout(pcout[81]), .sum(psum[81]));
+mul_csa32  sc4_80_ ( .c(c3[79]), .b(s3[80]), .a(ain[80]),
+     .cout(pcout[80]), .sum(psum[80]));
+mul_csa32  sc4_79_ ( .c(c3[78]), .b(s3[79]), .a(ain[79]),
+     .cout(pcout[79]), .sum(psum[79]));
+mul_csa32  sc4_78_ ( .c(c3[77]), .b(s3[78]), .a(ain[78]),
+     .cout(pcout[78]), .sum(psum[78]));
+mul_csa32  sc4_77_ ( .c(c3[76]), .b(s3[77]), .a(ain[77]),
+     .cout(pcout[77]), .sum(psum[77]));
+mul_csa32  sc4_76_ ( .c(c3[75]), .b(s3[76]), .a(ain[76]),
+     .cout(pcout[76]), .sum(psum[76]));
+mul_csa32  sc4_75_ ( .c(c3[74]), .b(s3[75]), .a(ain[75]),
+     .cout(pcout[75]), .sum(psum[75]));
+mul_csa32  sc4_74_ ( .c(c3[73]), .b(s3[74]), .a(ain[74]),
+     .cout(pcout[74]), .sum(psum[74]));
+mul_csa32  sc4_73_ ( .c(c3[72]), .b(s3[73]), .a(ain[73]),
+     .cout(pcout[73]), .sum(psum[73]));
+mul_csa32  sc4_72_ ( .c(c3[71]), .b(s3[72]), .a(ain[72]),
+     .cout(pcout[72]), .sum(psum[72]));
+mul_csa32  sc4_71_ ( .c(c3[70]), .b(s3[71]), .a(ain[71]),
+     .cout(pcout[71]), .sum(psum[71]));
+mul_csa32  sc4_70_ ( .c(c3[69]), .b(s3[70]), .a(ain[70]),
+     .cout(pcout[70]), .sum(psum[70]));
+mul_csa32  sc4_69_ ( .c(c3[68]), .b(s3[69]), .a(ain[69]),
+     .cout(pcout[69]), .sum(psum[69]));
+mul_csa32  acc_4_ ( .c(c2[3]), .sum(psum[4]), .cout(pcout[4]),
+     .a(ain[4]), .b(s2[4]));
+mul_csa32  acc_3_ ( .c(c2[2]), .sum(psum[3]), .cout(pcout[3]),
+     .a(ain[3]), .b(s2[3]));
+mul_csa32  acc_2_ ( .c(c2[1]), .sum(psum[2]), .cout(pcout[2]),
+     .a(ain[2]), .b(s2[2]));
+mul_csa32  acc_1_ ( .c(c2[0]), .sum(psum[1]), .cout(pcout[1]),
+     .a(ain[1]), .b(s2[1]));
+mul_csa32  sc3_97_ ( .c(c2[96]), .sum(psum[97]), .cout(pcout[97]),
+     .a(a1s[81]), .b(a1c[80]));
+mul_csa32  sc1_19_ ( .c(a1s[3]), .b(pc[50]), .a(ps[51]),
+     .cout(c1[19]), .sum(s1[19]));
+mul_csa32  sc1_18_ ( .c(a1s[2]), .b(pc[49]), .a(ps[50]),
+     .cout(c1[18]), .sum(s1[18]));
+mul_csa32  sc1_17_ ( .c(a1s[1]), .b(pc[48]), .a(ps[49]),
+     .cout(c1[17]), .sum(s1[17]));
+mul_csa32  sc1_16_ ( .c(a1s[0]), .b(pc[47]), .a(ps[48]),
+     .cout(c1[16]), .sum(s1[16]));
+mul_csa32  sc1_15_ ( .c(1'b0), .b(pc[46]), .a(ps[47]),
+     .cout(c1[15]), .sum(s1[15]));
+mul_csa32  sc4_83_ ( .c(c2[82]), .b(s2[83]), .a(ain[83]),
+     .cout(pcout[83]), .sum(psum[83]));
+mul_csa32  sc2_83_ ( .c(c1[82]), .b(a1c[66]), .a(a1s[67]),
+     .cout(c2[83]), .sum(s2[83]));
+mul_csa32  sc2_19_ ( .c(a0c[18]), .b(a0s[19]), .a(s1[19]),
+     .cout(c2[19]), .sum(s2[19]));
+mul_csa32  sc2_18_ ( .c(a0c[17]), .b(a0s[18]), .a(s1[18]),
+     .cout(c2[18]), .sum(s2[18]));
+mul_csa32  sc2_17_ ( .c(a0c[16]), .b(a0s[17]), .a(s1[17]),
+     .cout(c2[17]), .sum(s2[17]));
+mul_csa32  sc2_16_ ( .c(a0c[15]), .b(a0s[16]), .a(s1[16]),
+     .cout(c2[16]), .sum(s2[16]));
+mul_csa32  sc2_15_ ( .c(a0c[14]), .b(a0s[15]), .a(s1[15]),
+     .cout(c2[15]), .sum(s2[15]));
+mul_csa32  sc1_81_ ( .c(a0s[81]), .b(a1c[64]), .a(a1s[65]),
+     .cout(c1[81]), .sum(s1[81]));
+mul_csa32  sc1_80_ ( .c(a0s[80]), .b(a1c[63]), .a(a1s[64]),
+     .cout(c1[80]), .sum(s1[80]));
+mul_csa32  sc1_79_ ( .c(a0s[79]), .b(a1c[62]), .a(a1s[63]),
+     .cout(c1[79]), .sum(s1[79]));
+mul_csa32  sc1_78_ ( .c(a0s[78]), .b(a1c[61]), .a(a1s[62]),
+     .cout(c1[78]), .sum(s1[78]));
+mul_csa32  sc1_77_ ( .c(a0s[77]), .b(a1c[60]), .a(a1s[61]),
+     .cout(c1[77]), .sum(s1[77]));
+mul_csa32  sc1_76_ ( .c(a0s[76]), .b(a1c[59]), .a(a1s[60]),
+     .cout(c1[76]), .sum(s1[76]));
+mul_csa32  sc1_75_ ( .c(a0s[75]), .b(a1c[58]), .a(a1s[59]),
+     .cout(c1[75]), .sum(s1[75]));
+mul_csa32  sc1_74_ ( .c(a0s[74]), .b(a1c[57]), .a(a1s[58]),
+     .cout(c1[74]), .sum(s1[74]));
+mul_csa32  sc1_73_ ( .c(a0s[73]), .b(a1c[56]), .a(a1s[57]),
+     .cout(c1[73]), .sum(s1[73]));
+mul_csa32  sc1_72_ ( .c(a0s[72]), .b(a1c[55]), .a(a1s[56]),
+     .cout(c1[72]), .sum(s1[72]));
+mul_csa32  sc1_71_ ( .c(a0s[71]), .b(a1c[54]), .a(a1s[55]),
+     .cout(c1[71]), .sum(s1[71]));
+mul_csa32  sc1_70_ ( .c(a0s[70]), .b(a1c[53]), .a(a1s[54]),
+     .cout(c1[70]), .sum(s1[70]));
+mul_csa32  sc1_69_ ( .c(a0s[69]), .b(a1c[52]), .a(a1s[53]),
+     .cout(c1[69]), .sum(s1[69]));
+mul_csa32  sc1_68_ ( .c(a0s[68]), .b(a1c[51]), .a(a1s[52]),
+     .cout(c1[68]), .sum(s1[68]));
+mul_csa32  sc3_19_ ( .c(c2[18]), .b(c1[18]), .a(s2[19]),
+     .cout(c3[19]), .sum(s3[19]));
+mul_csa32  sc3_18_ ( .c(c2[17]), .b(c1[17]), .a(s2[18]),
+     .cout(c3[18]), .sum(s3[18]));
+mul_csa32  sc3_17_ ( .c(c2[16]), .b(c1[16]), .a(s2[17]),
+     .cout(c3[17]), .sum(s3[17]));
+mul_csa32  sc3_16_ ( .c(c2[15]), .b(c1[15]), .a(s2[16]),
+     .cout(c3[16]), .sum(s3[16]));
+mul_csa32  sc3_15_ ( .c(c2[14]), .b(c1[14]), .a(s2[15]),
+     .cout(c3[15]), .sum(s3[15]));
+mul_csa32  sc1_82_ ( .c(a0c[81]), .b(a1c[65]), .a(a1s[66]),
+     .cout(c1[82]), .sum(s1[82]));
+mul_csa32  acc_14_ ( .c(c2[13]), .sum(psum[14]), .cout(pcout[14]),
+     .a(ain[14]), .b(s2[14]));
+mul_csa32  acc_13_ ( .c(c2[12]), .sum(psum[13]), .cout(pcout[13]),
+     .a(ain[13]), .b(s2[13]));
+mul_csa32  acc_12_ ( .c(c2[11]), .sum(psum[12]), .cout(pcout[12]),
+     .a(ain[12]), .b(s2[12]));
+mul_csa32  acc_11_ ( .c(c2[10]), .sum(psum[11]), .cout(pcout[11]),
+     .a(ain[11]), .b(s2[11]));
+mul_csa32  acc_10_ ( .c(c2[9]), .sum(psum[10]), .cout(pcout[10]),
+     .a(ain[10]), .b(s2[10]));
+mul_csa32  acc_9_ ( .c(c2[8]), .sum(psum[9]), .cout(pcout[9]),
+     .a(ain[9]), .b(s2[9]));
+mul_csa32  acc_8_ ( .c(c2[7]), .sum(psum[8]), .cout(pcout[8]),
+     .a(ain[8]), .b(s2[8]));
+mul_csa32  acc_7_ ( .c(c2[6]), .sum(psum[7]), .cout(pcout[7]),
+     .a(ain[7]), .b(s2[7]));
+mul_csa32  acc_6_ ( .c(c2[5]), .sum(psum[6]), .cout(pcout[6]),
+     .a(ain[6]), .b(s2[6]));
+mul_csa32  acc_5_ ( .c(c2[4]), .sum(psum[5]), .cout(pcout[5]),
+     .a(ain[5]), .b(s2[5]));
+mul_csa32  sc2_67_ ( .c(a0c[66]), .b(c1[66]), .a(a0s[67]),
+     .cout(c2[67]), .sum(s2[67]));
+mul_csa32  sc1_14_ ( .c(a0s[14]), .b(pc[45]), .a(ps[46]),
+     .cout(c1[14]), .sum(s1[14]));
+mul_csa32  sc1_13_ ( .c(a0s[13]), .b(pc[44]), .a(ps[45]),
+     .cout(c1[13]), .sum(s1[13]));
+mul_csa32  sc1_12_ ( .c(a0s[12]), .b(pc[43]), .a(ps[44]),
+     .cout(c1[12]), .sum(s1[12]));
+mul_csa32  sc1_11_ ( .c(a0s[11]), .b(pc[42]), .a(ps[43]),
+     .cout(c1[11]), .sum(s1[11]));
+mul_csa32  sc1_10_ ( .c(a0s[10]), .b(pc[41]), .a(ps[42]),
+     .cout(c1[10]), .sum(s1[10]));
+mul_csa32  sc1_9_ ( .c(a0s[9]), .b(pc[40]), .a(ps[41]), .cout(c1[9]),
+     .sum(s1[9]));
+mul_csa32  sc1_8_ ( .c(a0s[8]), .b(pc[39]), .a(ps[40]), .cout(c1[8]),
+     .sum(s1[8]));
+mul_csa32  sc1_7_ ( .c(a0s[7]), .b(pc[38]), .a(ps[39]), .cout(c1[7]),
+     .sum(s1[7]));
+mul_csa32  sc1_6_ ( .c(a0s[6]), .b(pc[37]), .a(ps[38]), .cout(c1[6]),
+     .sum(s1[6]));
+mul_csa32  sc1_5_ ( .c(a0s[5]), .b(pc[36]), .a(ps[37]), .cout(c1[5]),
+     .sum(s1[5]));
+mul_csa32  sc2_14_ ( .c(a0c[13]), .b(c1[13]), .a(s1[14]),
+     .cout(c2[14]), .sum(s2[14]));
+mul_csa32  sc2_13_ ( .c(a0c[12]), .b(c1[12]), .a(s1[13]),
+     .cout(c2[13]), .sum(s2[13]));
+mul_csa32  sc2_12_ ( .c(a0c[11]), .b(c1[11]), .a(s1[12]),
+     .cout(c2[12]), .sum(s2[12]));
+mul_csa32  sc2_11_ ( .c(a0c[10]), .b(c1[10]), .a(s1[11]),
+     .cout(c2[11]), .sum(s2[11]));
+mul_csa32  sc2_10_ ( .c(a0c[9]), .b(c1[9]), .a(s1[10]),
+     .cout(c2[10]), .sum(s2[10]));
+mul_csa32  sc2_9_ ( .c(a0c[8]), .b(c1[8]), .a(s1[9]), .cout(c2[9]),
+     .sum(s2[9]));
+mul_csa32  sc2_8_ ( .c(a0c[7]), .b(c1[7]), .a(s1[8]), .cout(c2[8]),
+     .sum(s2[8]));
+mul_csa32  sc2_7_ ( .c(a0c[6]), .b(c1[6]), .a(s1[7]), .cout(c2[7]),
+     .sum(s2[7]));
+mul_csa32  sc2_6_ ( .c(a0c[5]), .b(c1[5]), .a(s1[6]), .cout(c2[6]),
+     .sum(s2[6]));
+mul_csa32  sc2_5_ ( .c(a0c[4]), .b(c1[4]), .a(s1[5]), .cout(c2[5]),
+     .sum(s2[5]));
+mul_csa32  sc2_82_ ( .c(c2[81]), .b(c1[81]), .a(s1[82]),
+     .cout(c2[82]), .sum(s2[82]));
+mul_csa32  sc1_4_ ( .c(a0s[4]), .b(pc[35]), .a(ps[36]), .cout(c1[4]),
+     .sum(s1[4]));
+mul_csa32  sc1_3_ ( .c(a0s[3]), .b(pc[34]), .a(ps[35]), .cout(c1[3]),
+     .sum(s1[3]));
+mul_csa32  sc1_2_ ( .c(a0s[2]), .b(pc[33]), .a(ps[34]), .cout(c1[2]),
+     .sum(s1[2]));
+mul_csa32  sc1_1_ ( .c(a0s[1]), .b(pc[32]), .a(ps[33]), .cout(c1[1]),
+     .sum(s1[1]));
+mul_csa32  sc2_66_ ( .c(a0c[65]), .b(a0s[66]), .a(a1c[49]),
+     .cout(c2[66]), .sum(s2[66]));
+mul_csa32  sc2_65_ ( .c(a0c[64]), .b(a0s[65]), .a(a1c[48]),
+     .cout(c2[65]), .sum(s2[65]));
+mul_csa32  sc2_64_ ( .c(a0c[63]), .b(a0s[64]), .a(a1c[47]),
+     .cout(c2[64]), .sum(s2[64]));
+mul_csa32  sc2_63_ ( .c(a0c[62]), .b(a0s[63]), .a(a1c[46]),
+     .cout(c2[63]), .sum(s2[63]));
+mul_csa32  sc2_62_ ( .c(a0c[61]), .b(a0s[62]), .a(a1c[45]),
+     .cout(c2[62]), .sum(s2[62]));
+mul_csa32  sc2_61_ ( .c(a0c[60]), .b(a0s[61]), .a(a1c[44]),
+     .cout(c2[61]), .sum(s2[61]));
+mul_csa32  sc2_60_ ( .c(a0c[59]), .b(a0s[60]), .a(a1c[43]),
+     .cout(c2[60]), .sum(s2[60]));
+mul_csa32  sc2_59_ ( .c(a0c[58]), .b(a0s[59]), .a(a1c[42]),
+     .cout(c2[59]), .sum(s2[59]));
+mul_csa32  sc2_58_ ( .c(a0c[57]), .b(a0s[58]), .a(a1c[41]),
+     .cout(c2[58]), .sum(s2[58]));
+mul_csa32  sc2_57_ ( .c(a0c[56]), .b(a0s[57]), .a(a1c[40]),
+     .cout(c2[57]), .sum(s2[57]));
+mul_csa32  sc2_56_ ( .c(a0c[55]), .b(a0s[56]), .a(a1c[39]),
+     .cout(c2[56]), .sum(s2[56]));
+mul_csa32  sc2_55_ ( .c(a0c[54]), .b(a0s[55]), .a(a1c[38]),
+     .cout(c2[55]), .sum(s2[55]));
+mul_csa32  sc2_54_ ( .c(a0c[53]), .b(a0s[54]), .a(a1c[37]),
+     .cout(c2[54]), .sum(s2[54]));
+mul_csa32  sc2_53_ ( .c(a0c[52]), .b(a0s[53]), .a(a1c[36]),
+     .cout(c2[53]), .sum(s2[53]));
+mul_csa32  sc2_52_ ( .c(a0c[51]), .b(a0s[52]), .a(a1c[35]),
+     .cout(c2[52]), .sum(s2[52]));
+mul_csa32  sc2_51_ ( .c(a0c[50]), .b(a0s[51]), .a(a1c[34]),
+     .cout(c2[51]), .sum(s2[51]));
+mul_csa32  sc2_50_ ( .c(a0c[49]), .b(a0s[50]), .a(a1c[33]),
+     .cout(c2[50]), .sum(s2[50]));
+mul_csa32  sc2_49_ ( .c(a0c[48]), .b(a0s[49]), .a(a1c[32]),
+     .cout(c2[49]), .sum(s2[49]));
+mul_csa32  sc2_48_ ( .c(a0c[47]), .b(a0s[48]), .a(a1c[31]),
+     .cout(c2[48]), .sum(s2[48]));
+mul_csa32  sc2_47_ ( .c(a0c[46]), .b(a0s[47]), .a(a1c[30]),
+     .cout(c2[47]), .sum(s2[47]));
+mul_csa32  sc2_46_ ( .c(a0c[45]), .b(a0s[46]), .a(a1c[29]),
+     .cout(c2[46]), .sum(s2[46]));
+mul_csa32  sc2_45_ ( .c(a0c[44]), .b(a0s[45]), .a(a1c[28]),
+     .cout(c2[45]), .sum(s2[45]));
+mul_csa32  sc2_44_ ( .c(a0c[43]), .b(a0s[44]), .a(a1c[27]),
+     .cout(c2[44]), .sum(s2[44]));
+mul_csa32  sc2_43_ ( .c(a0c[42]), .b(a0s[43]), .a(a1c[26]),
+     .cout(c2[43]), .sum(s2[43]));
+mul_csa32  sc2_42_ ( .c(a0c[41]), .b(a0s[42]), .a(a1c[25]),
+     .cout(c2[42]), .sum(s2[42]));
+mul_csa32  sc2_41_ ( .c(a0c[40]), .b(a0s[41]), .a(a1c[24]),
+     .cout(c2[41]), .sum(s2[41]));
+mul_csa32  sc2_40_ ( .c(a0c[39]), .b(a0s[40]), .a(a1c[23]),
+     .cout(c2[40]), .sum(s2[40]));
+mul_csa32  sc2_39_ ( .c(a0c[38]), .b(a0s[39]), .a(a1c[22]),
+     .cout(c2[39]), .sum(s2[39]));
+mul_csa32  sc2_38_ ( .c(a0c[37]), .b(a0s[38]), .a(a1c[21]),
+     .cout(c2[38]), .sum(s2[38]));
+mul_csa32  sc2_37_ ( .c(a0c[36]), .b(a0s[37]), .a(a1c[20]),
+     .cout(c2[37]), .sum(s2[37]));
+mul_csa32  sc2_36_ ( .c(a0c[35]), .b(a0s[36]), .a(a1c[19]),
+     .cout(c2[36]), .sum(s2[36]));
+mul_csa32  sc2_35_ ( .c(a0c[34]), .b(a0s[35]), .a(a1c[18]),
+     .cout(c2[35]), .sum(s2[35]));
+mul_csa32  sc2_34_ ( .c(a0c[33]), .b(a0s[34]), .a(a1c[17]),
+     .cout(c2[34]), .sum(s2[34]));
+mul_csa32  sc2_33_ ( .c(a0c[32]), .b(a0s[33]), .a(a1c[16]),
+     .cout(c2[33]), .sum(s2[33]));
+mul_csa32  sc2_32_ ( .c(a0c[31]), .b(a0s[32]), .a(a1c[15]),
+     .cout(c2[32]), .sum(s2[32]));
+mul_csa32  sc2_31_ ( .c(a0c[30]), .b(a0s[31]), .a(a1c[14]),
+     .cout(c2[31]), .sum(s2[31]));
+mul_csa32  sc2_30_ ( .c(a0c[29]), .b(a0s[30]), .a(a1c[13]),
+     .cout(c2[30]), .sum(s2[30]));
+mul_csa32  sc2_29_ ( .c(a0c[28]), .b(a0s[29]), .a(a1c[12]),
+     .cout(c2[29]), .sum(s2[29]));
+mul_csa32  sc2_28_ ( .c(a0c[27]), .b(a0s[28]), .a(a1c[11]),
+     .cout(c2[28]), .sum(s2[28]));
+mul_csa32  sc2_27_ ( .c(a0c[26]), .b(a0s[27]), .a(a1c[10]),
+     .cout(c2[27]), .sum(s2[27]));
+mul_csa32  sc2_26_ ( .c(a0c[25]), .b(a0s[26]), .a(a1c[9]),
+     .cout(c2[26]), .sum(s2[26]));
+mul_csa32  sc2_25_ ( .c(a0c[24]), .b(a0s[25]), .a(a1c[8]),
+     .cout(c2[25]), .sum(s2[25]));
+mul_csa32  sc2_24_ ( .c(a0c[23]), .b(a0s[24]), .a(a1c[7]),
+     .cout(c2[24]), .sum(s2[24]));
+mul_csa32  sc2_23_ ( .c(a0c[22]), .b(a0s[23]), .a(a1c[6]),
+     .cout(c2[23]), .sum(s2[23]));
+mul_csa32  sc2_22_ ( .c(a0c[21]), .b(a0s[22]), .a(a1c[5]),
+     .cout(c2[22]), .sum(s2[22]));
+mul_csa32  sc2_21_ ( .c(a0c[20]), .b(a0s[21]), .a(a1c[4]),
+     .cout(c2[21]), .sum(s2[21]));
+mul_csa32  sc2_20_ ( .c(a0c[19]), .b(a0s[20]), .a(1'b0),
+     .cout(c2[20]), .sum(s2[20]));
+mul_csa32  sc1_66_ ( .c(a1s[50]), .b(pc[97]), .a(ps[98]),
+     .cout(c1[66]), .sum(s1[66]));
+mul_csa32  sc1_65_ ( .c(a1s[49]), .b(pc[96]), .a(ps[97]),
+     .cout(c1[65]), .sum(s1[65]));
+mul_csa32  sc1_64_ ( .c(a1s[48]), .b(pc[95]), .a(ps[96]),
+     .cout(c1[64]), .sum(s1[64]));
+mul_csa32  sc1_63_ ( .c(a1s[47]), .b(pc[94]), .a(ps[95]),
+     .cout(c1[63]), .sum(s1[63]));
+mul_csa32  sc1_62_ ( .c(a1s[46]), .b(pc[93]), .a(ps[94]),
+     .cout(c1[62]), .sum(s1[62]));
+mul_csa32  sc1_61_ ( .c(a1s[45]), .b(pc[92]), .a(ps[93]),
+     .cout(c1[61]), .sum(s1[61]));
+mul_csa32  sc1_60_ ( .c(a1s[44]), .b(pc[91]), .a(ps[92]),
+     .cout(c1[60]), .sum(s1[60]));
+mul_csa32  sc1_59_ ( .c(a1s[43]), .b(pc[90]), .a(ps[91]),
+     .cout(c1[59]), .sum(s1[59]));
+mul_csa32  sc1_58_ ( .c(a1s[42]), .b(pc[89]), .a(ps[90]),
+     .cout(c1[58]), .sum(s1[58]));
+mul_csa32  sc1_57_ ( .c(a1s[41]), .b(pc[88]), .a(ps[89]),
+     .cout(c1[57]), .sum(s1[57]));
+mul_csa32  sc1_56_ ( .c(a1s[40]), .b(pc[87]), .a(ps[88]),
+     .cout(c1[56]), .sum(s1[56]));
+mul_csa32  sc1_55_ ( .c(a1s[39]), .b(pc[86]), .a(ps[87]),
+     .cout(c1[55]), .sum(s1[55]));
+mul_csa32  sc1_54_ ( .c(a1s[38]), .b(pc[85]), .a(ps[86]),
+     .cout(c1[54]), .sum(s1[54]));
+mul_csa32  sc1_53_ ( .c(a1s[37]), .b(pc[84]), .a(ps[85]),
+     .cout(c1[53]), .sum(s1[53]));
+mul_csa32  sc1_52_ ( .c(a1s[36]), .b(pc[83]), .a(ps[84]),
+     .cout(c1[52]), .sum(s1[52]));
+mul_csa32  sc1_51_ ( .c(a1s[35]), .b(pc[82]), .a(ps[83]),
+     .cout(c1[51]), .sum(s1[51]));
+mul_csa32  sc1_50_ ( .c(a1s[34]), .b(pc[81]), .a(ps[82]),
+     .cout(c1[50]), .sum(s1[50]));
+mul_csa32  sc1_49_ ( .c(a1s[33]), .b(pc[80]), .a(ps[81]),
+     .cout(c1[49]), .sum(s1[49]));
+mul_csa32  sc1_48_ ( .c(a1s[32]), .b(pc[79]), .a(ps[80]),
+     .cout(c1[48]), .sum(s1[48]));
+mul_csa32  sc1_47_ ( .c(a1s[31]), .b(pc[78]), .a(ps[79]),
+     .cout(c1[47]), .sum(s1[47]));
+mul_csa32  sc1_46_ ( .c(a1s[30]), .b(pc[77]), .a(ps[78]),
+     .cout(c1[46]), .sum(s1[46]));
+mul_csa32  sc1_45_ ( .c(a1s[29]), .b(pc[76]), .a(ps[77]),
+     .cout(c1[45]), .sum(s1[45]));
+mul_csa32  sc1_44_ ( .c(a1s[28]), .b(pc[75]), .a(ps[76]),
+     .cout(c1[44]), .sum(s1[44]));
+mul_csa32  sc1_43_ ( .c(a1s[27]), .b(pc[74]), .a(ps[75]),
+     .cout(c1[43]), .sum(s1[43]));
+mul_csa32  sc1_42_ ( .c(a1s[26]), .b(pc[73]), .a(ps[74]),
+     .cout(c1[42]), .sum(s1[42]));
+mul_csa32  sc1_41_ ( .c(a1s[25]), .b(pc[72]), .a(ps[73]),
+     .cout(c1[41]), .sum(s1[41]));
+mul_csa32  sc1_40_ ( .c(a1s[24]), .b(pc[71]), .a(ps[72]),
+     .cout(c1[40]), .sum(s1[40]));
+mul_csa32  sc1_39_ ( .c(a1s[23]), .b(pc[70]), .a(ps[71]),
+     .cout(c1[39]), .sum(s1[39]));
+mul_csa32  sc1_38_ ( .c(a1s[22]), .b(pc[69]), .a(ps[70]),
+     .cout(c1[38]), .sum(s1[38]));
+mul_csa32  sc1_37_ ( .c(a1s[21]), .b(pc[68]), .a(ps[69]),
+     .cout(c1[37]), .sum(s1[37]));
+mul_csa32  sc1_36_ ( .c(a1s[20]), .b(pc[67]), .a(ps[68]),
+     .cout(c1[36]), .sum(s1[36]));
+mul_csa32  sc1_35_ ( .c(a1s[19]), .b(pc[66]), .a(ps[67]),
+     .cout(c1[35]), .sum(s1[35]));
+mul_csa32  sc1_34_ ( .c(a1s[18]), .b(pc[65]), .a(ps[66]),
+     .cout(c1[34]), .sum(s1[34]));
+mul_csa32  sc1_33_ ( .c(a1s[17]), .b(pc[64]), .a(ps[65]),
+     .cout(c1[33]), .sum(s1[33]));
+mul_csa32  sc1_32_ ( .c(a1s[16]), .b(pc[63]), .a(ps[64]),
+     .cout(c1[32]), .sum(s1[32]));
+mul_csa32  sc1_31_ ( .c(a1s[15]), .b(pc[62]), .a(ps[63]),
+     .cout(c1[31]), .sum(s1[31]));
+mul_csa32  sc1_30_ ( .c(a1s[14]), .b(pc[61]), .a(ps[62]),
+     .cout(c1[30]), .sum(s1[30]));
+mul_csa32  sc1_29_ ( .c(a1s[13]), .b(pc[60]), .a(ps[61]),
+     .cout(c1[29]), .sum(s1[29]));
+mul_csa32  sc1_28_ ( .c(a1s[12]), .b(pc[59]), .a(ps[60]),
+     .cout(c1[28]), .sum(s1[28]));
+mul_csa32  sc1_27_ ( .c(a1s[11]), .b(pc[58]), .a(ps[59]),
+     .cout(c1[27]), .sum(s1[27]));
+mul_csa32  sc1_26_ ( .c(a1s[10]), .b(pc[57]), .a(ps[58]),
+     .cout(c1[26]), .sum(s1[26]));
+mul_csa32  sc1_25_ ( .c(a1s[9]), .b(pc[56]), .a(ps[57]),
+     .cout(c1[25]), .sum(s1[25]));
+mul_csa32  sc1_24_ ( .c(a1s[8]), .b(pc[55]), .a(ps[56]),
+     .cout(c1[24]), .sum(s1[24]));
+mul_csa32  sc1_23_ ( .c(a1s[7]), .b(pc[54]), .a(ps[55]),
+     .cout(c1[23]), .sum(s1[23]));
+mul_csa32  sc1_22_ ( .c(a1s[6]), .b(pc[53]), .a(ps[54]),
+     .cout(c1[22]), .sum(s1[22]));
+mul_csa32  sc1_21_ ( .c(a1s[5]), .b(pc[52]), .a(ps[53]),
+     .cout(c1[21]), .sum(s1[21]));
+mul_csa32  sc1_20_ ( .c(a1s[4]), .b(pc[51]), .a(ps[52]),
+     .cout(c1[20]), .sum(s1[20]));
+mul_csa32  sc2_81_ ( .c(a0c[80]), .b(c1[80]), .a(s1[81]),
+     .cout(c2[81]), .sum(s2[81]));
+mul_csa32  sc2_80_ ( .c(a0c[79]), .b(c1[79]), .a(s1[80]),
+     .cout(c2[80]), .sum(s2[80]));
+mul_csa32  sc2_79_ ( .c(a0c[78]), .b(c1[78]), .a(s1[79]),
+     .cout(c2[79]), .sum(s2[79]));
+mul_csa32  sc2_78_ ( .c(a0c[77]), .b(c1[77]), .a(s1[78]),
+     .cout(c2[78]), .sum(s2[78]));
+mul_csa32  sc2_77_ ( .c(a0c[76]), .b(c1[76]), .a(s1[77]),
+     .cout(c2[77]), .sum(s2[77]));
+mul_csa32  sc2_76_ ( .c(a0c[75]), .b(c1[75]), .a(s1[76]),
+     .cout(c2[76]), .sum(s2[76]));
+mul_csa32  sc2_75_ ( .c(a0c[74]), .b(c1[74]), .a(s1[75]),
+     .cout(c2[75]), .sum(s2[75]));
+mul_csa32  sc2_74_ ( .c(a0c[73]), .b(c1[73]), .a(s1[74]),
+     .cout(c2[74]), .sum(s2[74]));
+mul_csa32  sc2_73_ ( .c(a0c[72]), .b(c1[72]), .a(s1[73]),
+     .cout(c2[73]), .sum(s2[73]));
+mul_csa32  sc2_72_ ( .c(a0c[71]), .b(c1[71]), .a(s1[72]),
+     .cout(c2[72]), .sum(s2[72]));
+mul_csa32  sc2_71_ ( .c(a0c[70]), .b(c1[70]), .a(s1[71]),
+     .cout(c2[71]), .sum(s2[71]));
+mul_csa32  sc2_70_ ( .c(a0c[69]), .b(c1[69]), .a(s1[70]),
+     .cout(c2[70]), .sum(s2[70]));
+mul_csa32  sc2_69_ ( .c(a0c[68]), .b(c1[68]), .a(s1[69]),
+     .cout(c2[69]), .sum(s2[69]));
+mul_csa32  sc2_68_ ( .c(a0c[67]), .b(c1[67]), .a(s1[68]),
+     .cout(c2[68]), .sum(s2[68]));
+mul_csa32  acc_19_ ( .c(c3[18]), .b(s3[19]), .a(ain[19]),
+     .cout(pcout[19]), .sum(psum[19]));
+mul_csa32  acc_18_ ( .c(c3[17]), .b(s3[18]), .a(ain[18]),
+     .cout(pcout[18]), .sum(psum[18]));
+mul_csa32  acc_17_ ( .c(c3[16]), .b(s3[17]), .a(ain[17]),
+     .cout(pcout[17]), .sum(psum[17]));
+mul_csa32  acc_16_ ( .c(c3[15]), .b(s3[16]), .a(ain[16]),
+     .cout(pcout[16]), .sum(psum[16]));
+mul_csa32  acc_15_ ( .c(1'b0), .b(s3[15]), .a(ain[15]),
+     .cout(pcout[15]), .sum(psum[15]));
+mul_csa32  sc1_0_ ( .c(a0s[0]), .sum(s1[0]), .cout(c1[0]),
+     .a(ps[32]), .b(pc[31]));
+mul_csa32  sc1_67_ ( .c(a1c[50]), .b(pc[98]), .a(a1s[51]),
+     .cout(c1[67]), .sum(s1[67]));
+mul_ha acc_0_ ( .sum(psum[0]), .cout(pcout[0]), .a(ain[0]),
+     .b(s2[0]));
+mul_ha sc3_98_ ( .sum(psum[98]), .cout(pcout[98]), .a(bot),
+     .b(a1c[81]));
+mul_ha sc2_96_ ( .b(a1c[79]), .a(a1s[80]), .cout(c2[96]),
+     .sum(s2[96]));
+mul_ha sc2_95_ ( .b(a1c[78]), .a(a1s[79]), .cout(c2[95]),
+     .sum(s2[95]));
+mul_ha sc2_94_ ( .b(a1c[77]), .a(a1s[78]), .cout(c2[94]),
+     .sum(s2[94]));
+mul_ha sc2_93_ ( .b(a1c[76]), .a(a1s[77]), .cout(c2[93]),
+     .sum(s2[93]));
+mul_ha sc2_92_ ( .b(a1c[75]), .a(a1s[76]), .cout(c2[92]),
+     .sum(s2[92]));
+mul_ha sc2_91_ ( .b(a1c[74]), .a(a1s[75]), .cout(c2[91]),
+     .sum(s2[91]));
+mul_ha sc2_90_ ( .b(a1c[73]), .a(a1s[74]), .cout(c2[90]),
+     .sum(s2[90]));
+mul_ha sc2_89_ ( .b(a1c[72]), .a(a1s[73]), .cout(c2[89]),
+     .sum(s2[89]));
+mul_ha sc2_88_ ( .b(a1c[71]), .a(a1s[72]), .cout(c2[88]),
+     .sum(s2[88]));
+mul_ha sc2_87_ ( .b(a1c[70]), .a(a1s[71]), .cout(c2[87]),
+     .sum(s2[87]));
+mul_ha sc2_86_ ( .b(a1c[69]), .a(a1s[70]), .cout(c2[86]),
+     .sum(s2[86]));
+mul_ha sc2_85_ ( .b(a1c[68]), .a(a1s[69]), .cout(c2[85]),
+     .sum(s2[85]));
+mul_ha sc2_84_ ( .b(a1c[67]), .a(a1s[68]), .cout(c2[84]),
+     .sum(s2[84]));
+mul_ha sc3_81_ ( .b(c2[80]), .a(s2[81]), .cout(c3[81]),
+     .sum(s3[81]));
+mul_ha sc3_80_ ( .b(c2[79]), .a(s2[80]), .cout(c3[80]),
+     .sum(s3[80]));
+mul_ha sc3_79_ ( .b(c2[78]), .a(s2[79]), .cout(c3[79]),
+     .sum(s3[79]));
+mul_ha sc3_78_ ( .b(c2[77]), .a(s2[78]), .cout(c3[78]),
+     .sum(s3[78]));
+mul_ha sc3_77_ ( .b(c2[76]), .a(s2[77]), .cout(c3[77]),
+     .sum(s3[77]));
+mul_ha sc3_76_ ( .b(c2[75]), .a(s2[76]), .cout(c3[76]),
+     .sum(s3[76]));
+mul_ha sc3_75_ ( .b(c2[74]), .a(s2[75]), .cout(c3[75]),
+     .sum(s3[75]));
+mul_ha sc3_74_ ( .b(c2[73]), .a(s2[74]), .cout(c3[74]),
+     .sum(s3[74]));
+mul_ha sc3_73_ ( .b(c2[72]), .a(s2[73]), .cout(c3[73]),
+     .sum(s3[73]));
+mul_ha sc3_72_ ( .b(c2[71]), .a(s2[72]), .cout(c3[72]),
+     .sum(s3[72]));
+mul_ha sc3_71_ ( .b(c2[70]), .a(s2[71]), .cout(c3[71]),
+     .sum(s3[71]));
+mul_ha sc3_70_ ( .b(c2[69]), .a(s2[70]), .cout(c3[70]),
+     .sum(s3[70]));
+mul_ha sc3_69_ ( .b(c2[68]), .a(s2[69]), .cout(c3[69]),
+     .sum(s3[69]));
+mul_ha accx2 ( .sum(psumx2), .cout(pcoutx2), .a(ainx2), .b(s1x2));
+mul_ha sc2_4_ ( .sum(s2[4]), .cout(c2[4]), .a(s1[4]), .b(c1[3]));
+mul_ha sc2_3_ ( .sum(s2[3]), .cout(c2[3]), .a(s1[3]), .b(c1[2]));
+mul_ha sc2_2_ ( .sum(s2[2]), .cout(c2[2]), .a(s1[2]), .b(c1[1]));
+mul_ha sc2_1_ ( .sum(s2[1]), .cout(c2[1]), .a(s1[1]), .b(c1[0]));
+mul_ha sc2_0_ ( .sum(s2[0]), .cout(c2[0]), .a(s1[0]), .b(c1x2));
+mul_ha sc1x2 ( .sum(s1x2), .cout(c1x2), .a(ps[31]), .b(pc[30]));
+
+endmodule //mul_array2
+
+module mul_csa32 (sum, cout, a, b, c);
+
+output sum, cout;
+input a, b, c;
+
+wire x, y0, y1, y2;
+
+assign x = a ^ b;
+assign sum = c ^ x;
+
+assign y0 = a & b ;
+assign y1 = a & c ;
+assign y2 = b & c ;
+
+assign cout = y0 | y1 | y2 ;
+
+endmodule //mul_csa32
+
+module mul_csa42 (sum, carry, cout, a, b, c, d, cin);
+
+output sum, carry, cout;
+input a, b, c, d, cin;
+
+wire x, y, z;
+
+assign x = a ^ b;
+assign y = c ^ d;
+assign z = x ^ y;
+
+assign sum = z ^ cin ;
+
+assign carry = (b & ~z) | (cin & z);
+
+assign cout = (d & ~y) | (a & y);
+
+endmodule // mul_csa42
+
+module mul_ha ( cout, sum, a, b );
+output  cout, sum;
+input  a, b;
+
+assign sum = a ^ b;
+assign cout = a & b ;
+
+endmodule //mul_ha
+
+module mul_negen ( n0, n1, b );
+output  n0, n1;
+input [2:0]  b;
+
+assign n0 = b[2] & b[1] & ~b[0] ;
+assign n1 = b[2] & b[1] & b[0] ;
+
+endmodule //mul_negen
+
+module mul_ppgen3lsb4 (cout, p0_l, p1_l, sum, a, b0, b1 );
+
+output  p0_l, p1_l;
+output [3:0]  sum;
+output [3:1]  cout;
+input [3:0]  a;
+input [2:0]  b0;
+input [2:0]  b1;
+
+wire b0n, b0n_0, b0n_1, b1n_0, b1n_1;
+wire p0_0, p0_1, p0_2, p0_3, p1_2, p1_3;
+wire p0_l_0, p0_l_1, p0_l_2, p1_l_2;
+
+assign b0n = b0n_1 | (b0n_0 & p0_0) ;
+assign sum[0] = b0n_0 ^ p0_0 ;
+
+mul_negen p0n ( .b(b0[2:0]), .n1(b0n_1), .n0(b0n_0));
+mul_negen p1n ( .b(b1[2:0]), .n1(b1n_1), .n0(b1n_0));
+mul_csa32  sc1_2_ ( .c(b1n_0), .sum(sum[2]), .cout(cout[2]),
+     .a(p0_2), .b(p1_2));
+mul_csa32  sc1_3_ ( .c(b1n_1), .sum(sum[3]), .cout(cout[3]),
+     .a(p0_3), .b(p1_3));
+mul_ha sc1_1_ ( .sum(sum[1]), .cout(cout[1]), .a(p0_1),
+     .b(b0n));
+mul_ppgen p0_3_ ( .pm1_l(p0_l_2), .p_l(p0_l), .b(b0[2:0]), .a(a[3]),
+     .z(p0_3));
+mul_ppgen p1_3_ ( .pm1_l(p1_l_2), .p_l(p1_l), .b(b1[2:0]), .a(a[1]),
+     .z(p1_3));
+mul_ppgen p0_2_ ( .pm1_l(p0_l_1), .p_l(p0_l_2), .b(b0[2:0]),
+     .a(a[2]), .z(p0_2));
+mul_ppgen p0_1_ ( .pm1_l(p0_l_0), .p_l(p0_l_1), .b(b0[2:0]),
+     .a(a[1]), .z(p0_1));
+mul_ppgen p0_0_ ( .pm1_l(1'b1), .p_l(p0_l_0),
+     .b(b0[2:0]), .a(a[0]), .z(p0_0));
+mul_ppgen p1_2_ ( .pm1_l(1'b1), .p_l(p1_l_2),
+     .b(b1[2:0]), .a(a[0]), .z(p1_2));
+
+endmodule // mul_ppgen3lsb4
+
+module mul_ppgen3sign ( cout, sum, am1, am2, am3, am4, b0, b1, b2,
+     bot, head, p0m1_l, p1m1_l, p2m1_l );
+input  am1, am2, am3, am4;
+input  bot, head, p0m1_l, p1m1_l, p2m1_l;
+output [5:0]  sum;
+output [4:0]  cout;
+input [2:0]  b0;
+input [2:0]  b2;
+input [2:0]  b1;
+
+wire net37, net42, net075, net088, net0117; 
+wire net47, net073, net38, net0118, net078, net8, net15, net43, net48, net35;
+wire p2_l_67, p2_l_66, p2_l_65, p2_l_64; 
+wire p1_l_65, p1_l_64; 
+
+assign sum[5] = bot & net075 ;
+assign net0117 = head & net088 ; 
+assign net37 = ~net0117 ;
+assign net42 = head ^ net088 ;
+
+mul_ppgensign p0_64_ ( .b(b0[2:0]), .z(net47), .p_l(net088),
+     .pm1_l(p0m1_l));
+mul_ppgensign p2_68_ ( .pm1_l(p2_l_67), .b(b2[2:0]), .z(net073),
+     .p_l(net075));
+mul_ppgensign p1_66_ ( .pm1_l(p1_l_65), .b(b1[2:0]), .z(net38),
+     .p_l(net0118));
+mul_ha sc1_68_ ( .b(net073), .a(1'b1), .cout(cout[4]),
+     .sum(sum[4]));
+mul_ppgen p2_67_ ( .pm1_l(p2_l_66), .b(b2[2:0]), .a(am1), .z(net078),
+     .p_l(p2_l_67));
+mul_ppgen p2_66_ ( .pm1_l(p2_l_65), .b(b2[2:0]), .a(am2), .z(net8),
+     .p_l(p2_l_66));
+mul_ppgen p2_65_ ( .pm1_l(p2_l_64), .p_l(p2_l_65), .b(b2[2:0]),
+     .a(am3), .z(net15));
+mul_ppgen p1_65_ ( .pm1_l(p1_l_64), .p_l(p1_l_65), .b(b1[2:0]),
+     .a(am1), .z(net43));
+mul_ppgen p1_64_ ( .pm1_l(p1m1_l), .p_l(p1_l_64), .b(b1[2:0]),
+     .a(am2), .z(net48));
+mul_ppgen p2_64_ ( .pm1_l(p2m1_l), .p_l(p2_l_64), .b(b2[2:0]),
+     .a(am4), .z(net35));
+mul_csa32  sc1_67_ ( .c(net078), .b(net0117), .a(net0118),
+     .cout(cout[3]), .sum(sum[3]));
+mul_csa32  sc1_66_ ( .c(net8), .b(net37), .a(net38), .cout(cout[2]),
+     .sum(sum[2]));
+mul_csa32  sc1_65_ ( .c(net15), .b(net42), .a(net43), .cout(cout[1]),
+     .sum(sum[1]));
+mul_csa32  sc1_64_ ( .c(net35), .b(net47), .a(net48), .cout(cout[0]),
+     .sum(sum[0]));
+
+endmodule //mul_ppgen3sign
+
+module mul_ppgen3 ( cout, p0_l, p1_l, p2_l, sum, am2, am4,
+     a, b0, b1, b2, p0m1_l, p1m1_l, p2m1_l );
+output  cout, p0_l, p1_l, p2_l, sum;
+input  am2, am4;
+input  a, p0m1_l, p1m1_l, p2m1_l;
+input [2:0]  b0;
+input [2:0]  b2;
+input [2:0]  b1;
+
+wire net046, net32, net043;
+
+mul_csa32  sc1 ( .a(net046), .b(net32), .cout(cout), .sum(sum),
+     .c(net043));
+mul_ppgen p2 ( .pm1_l(p2m1_l), .p_l(p2_l), .b(b2[2:0]), .a(am4),
+     .z(net043));
+mul_ppgen p1 ( .pm1_l(p1m1_l), .p_l(p1_l), .b(b1[2:0]), .a(am2),
+     .z(net046));
+mul_ppgen p0 ( .pm1_l(p0m1_l), .p_l(p0_l), .b(b0[2:0]), .a(a),
+     .z(net32));
+
+endmodule // mul_ppgen3
+
+module mul_ppgenrow3 ( cout, sum, a, b0, b1, b2, bot, head );
+
+output [68:1]  cout;
+output [69:0]  sum;
+input [63:0]  a;
+input [2:0]  b2;
+input [2:0]  b0;
+input [2:0]  b1;
+input  bot, head;
+
+// Buses in the design
+wire  [63:4]  p2_l;
+wire  [63:3]  p1_l;
+wire  [63:3]  p0_l;
+
+mul_ppgen3sign I2 ( .am4(a[60]), .am3(a[61]), .am2(a[62]),
+     .am1(a[63]), .p2m1_l(p2_l[63]), .p1m1_l(p1_l[63]),
+     .p0m1_l(p0_l[63]), .b2(b2[2:0]), .head(head), .bot(bot),
+     .sum(sum[69:64]), .cout(cout[68:64]), .b1(b1[2:0]), .b0(b0[2:0]));
+mul_ppgen3 I1_63_ ( .p2_l(p2_l[63]), .b2(b2[2:0]),
+     .am2(a[61]), .a(a[63]), .p2m1_l(p2_l[62]),
+     .p1m1_l(p1_l[62]), .p0m1_l(p0_l[62]), .am4(a[59]), .sum(sum[63]),
+     .cout(cout[63]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[63]),
+     .p0_l(p0_l[63]));
+mul_ppgen3 I1_62_ ( .p2_l(p2_l[62]), .b2(b2[2:0]), 
+     .am2(a[60]), .a(a[62]), .p2m1_l(p2_l[61]),
+     .p1m1_l(p1_l[61]), .p0m1_l(p0_l[61]), .am4(a[58]), .sum(sum[62]),
+     .cout(cout[62]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[62]),
+     .p0_l(p0_l[62]));
+mul_ppgen3 I1_61_ ( .p2_l(p2_l[61]), .b2(b2[2:0]), 
+     .am2(a[59]), .a(a[61]), .p2m1_l(p2_l[60]),
+     .p1m1_l(p1_l[60]), .p0m1_l(p0_l[60]), .am4(a[57]), .sum(sum[61]),
+     .cout(cout[61]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[61]),
+     .p0_l(p0_l[61]));
+mul_ppgen3 I1_60_ ( .p2_l(p2_l[60]), .b2(b2[2:0]), 
+     .am2(a[58]), .a(a[60]), .p2m1_l(p2_l[59]),
+     .p1m1_l(p1_l[59]), .p0m1_l(p0_l[59]), .am4(a[56]), .sum(sum[60]),
+     .cout(cout[60]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[60]),
+     .p0_l(p0_l[60]));
+mul_ppgen3 I1_59_ ( .p2_l(p2_l[59]), .b2(b2[2:0]), 
+     .am2(a[57]), .a(a[59]), .p2m1_l(p2_l[58]),
+     .p1m1_l(p1_l[58]), .p0m1_l(p0_l[58]), .am4(a[55]), .sum(sum[59]),
+     .cout(cout[59]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[59]),
+     .p0_l(p0_l[59]));
+mul_ppgen3 I1_58_ ( .p2_l(p2_l[58]), .b2(b2[2:0]), 
+     .am2(a[56]), .a(a[58]), .p2m1_l(p2_l[57]),
+     .p1m1_l(p1_l[57]), .p0m1_l(p0_l[57]), .am4(a[54]), .sum(sum[58]),
+     .cout(cout[58]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[58]),
+     .p0_l(p0_l[58]));
+mul_ppgen3 I1_57_ ( .p2_l(p2_l[57]), .b2(b2[2:0]), 
+     .am2(a[55]), .a(a[57]), .p2m1_l(p2_l[56]),
+     .p1m1_l(p1_l[56]), .p0m1_l(p0_l[56]), .am4(a[53]), .sum(sum[57]),
+     .cout(cout[57]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[57]),
+     .p0_l(p0_l[57]));
+mul_ppgen3 I1_56_ ( .p2_l(p2_l[56]), .b2(b2[2:0]), 
+     .am2(a[54]), .a(a[56]), .p2m1_l(p2_l[55]),
+     .p1m1_l(p1_l[55]), .p0m1_l(p0_l[55]), .am4(a[52]), .sum(sum[56]),
+     .cout(cout[56]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[56]),
+     .p0_l(p0_l[56]));
+mul_ppgen3 I1_55_ ( .p2_l(p2_l[55]), .b2(b2[2:0]), 
+     .am2(a[53]), .a(a[55]), .p2m1_l(p2_l[54]),
+     .p1m1_l(p1_l[54]), .p0m1_l(p0_l[54]), .am4(a[51]), .sum(sum[55]),
+     .cout(cout[55]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[55]),
+     .p0_l(p0_l[55]));
+mul_ppgen3 I1_54_ ( .p2_l(p2_l[54]), .b2(b2[2:0]), 
+     .am2(a[52]), .a(a[54]), .p2m1_l(p2_l[53]),
+     .p1m1_l(p1_l[53]), .p0m1_l(p0_l[53]), .am4(a[50]), .sum(sum[54]),
+     .cout(cout[54]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[54]),
+     .p0_l(p0_l[54]));
+mul_ppgen3 I1_53_ ( .p2_l(p2_l[53]), .b2(b2[2:0]), 
+     .am2(a[51]), .a(a[53]), .p2m1_l(p2_l[52]),
+     .p1m1_l(p1_l[52]), .p0m1_l(p0_l[52]), .am4(a[49]), .sum(sum[53]),
+     .cout(cout[53]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[53]),
+     .p0_l(p0_l[53]));
+mul_ppgen3 I1_52_ ( .p2_l(p2_l[52]), .b2(b2[2:0]), 
+     .am2(a[50]), .a(a[52]), .p2m1_l(p2_l[51]),
+     .p1m1_l(p1_l[51]), .p0m1_l(p0_l[51]), .am4(a[48]), .sum(sum[52]),
+     .cout(cout[52]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[52]),
+     .p0_l(p0_l[52]));
+mul_ppgen3 I1_51_ ( .p2_l(p2_l[51]), .b2(b2[2:0]), 
+     .am2(a[49]), .a(a[51]), .p2m1_l(p2_l[50]),
+     .p1m1_l(p1_l[50]), .p0m1_l(p0_l[50]), .am4(a[47]), .sum(sum[51]),
+     .cout(cout[51]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[51]),
+     .p0_l(p0_l[51]));
+mul_ppgen3 I1_50_ ( .p2_l(p2_l[50]), .b2(b2[2:0]), 
+     .am2(a[48]), .a(a[50]), .p2m1_l(p2_l[49]),
+     .p1m1_l(p1_l[49]), .p0m1_l(p0_l[49]), .am4(a[46]), .sum(sum[50]),
+     .cout(cout[50]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[50]),
+     .p0_l(p0_l[50]));
+mul_ppgen3 I1_49_ ( .p2_l(p2_l[49]), .b2(b2[2:0]), 
+     .am2(a[47]), .a(a[49]), .p2m1_l(p2_l[48]),
+     .p1m1_l(p1_l[48]), .p0m1_l(p0_l[48]), .am4(a[45]), .sum(sum[49]),
+     .cout(cout[49]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[49]),
+     .p0_l(p0_l[49]));
+mul_ppgen3 I1_48_ ( .p2_l(p2_l[48]), .b2(b2[2:0]), 
+     .am2(a[46]), .a(a[48]), .p2m1_l(p2_l[47]),
+     .p1m1_l(p1_l[47]), .p0m1_l(p0_l[47]), .am4(a[44]), .sum(sum[48]),
+     .cout(cout[48]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[48]),
+     .p0_l(p0_l[48]));
+mul_ppgen3 I1_47_ ( .p2_l(p2_l[47]), .b2(b2[2:0]), 
+     .am2(a[45]), .a(a[47]), .p2m1_l(p2_l[46]),
+     .p1m1_l(p1_l[46]), .p0m1_l(p0_l[46]), .am4(a[43]), .sum(sum[47]),
+     .cout(cout[47]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[47]),
+     .p0_l(p0_l[47]));
+mul_ppgen3 I1_46_ ( .p2_l(p2_l[46]), .b2(b2[2:0]), 
+     .am2(a[44]), .a(a[46]), .p2m1_l(p2_l[45]),
+     .p1m1_l(p1_l[45]), .p0m1_l(p0_l[45]), .am4(a[42]), .sum(sum[46]),
+     .cout(cout[46]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[46]),
+     .p0_l(p0_l[46]));
+mul_ppgen3 I1_45_ ( .p2_l(p2_l[45]), .b2(b2[2:0]), 
+     .am2(a[43]), .a(a[45]), .p2m1_l(p2_l[44]),
+     .p1m1_l(p1_l[44]), .p0m1_l(p0_l[44]), .am4(a[41]), .sum(sum[45]),
+     .cout(cout[45]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[45]),
+     .p0_l(p0_l[45]));
+mul_ppgen3 I1_44_ ( .p2_l(p2_l[44]), .b2(b2[2:0]), 
+     .am2(a[42]), .a(a[44]), .p2m1_l(p2_l[43]),
+     .p1m1_l(p1_l[43]), .p0m1_l(p0_l[43]), .am4(a[40]), .sum(sum[44]),
+     .cout(cout[44]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[44]),
+     .p0_l(p0_l[44]));
+mul_ppgen3 I1_43_ ( .p2_l(p2_l[43]), .b2(b2[2:0]), 
+     .am2(a[41]), .a(a[43]), .p2m1_l(p2_l[42]),
+     .p1m1_l(p1_l[42]), .p0m1_l(p0_l[42]), .am4(a[39]), .sum(sum[43]),
+     .cout(cout[43]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[43]),
+     .p0_l(p0_l[43]));
+mul_ppgen3 I1_42_ ( .p2_l(p2_l[42]), .b2(b2[2:0]), 
+     .am2(a[40]), .a(a[42]), .p2m1_l(p2_l[41]),
+     .p1m1_l(p1_l[41]), .p0m1_l(p0_l[41]), .am4(a[38]), .sum(sum[42]),
+     .cout(cout[42]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[42]),
+     .p0_l(p0_l[42]));
+mul_ppgen3 I1_41_ ( .p2_l(p2_l[41]), .b2(b2[2:0]), 
+     .am2(a[39]), .a(a[41]), .p2m1_l(p2_l[40]),
+     .p1m1_l(p1_l[40]), .p0m1_l(p0_l[40]), .am4(a[37]), .sum(sum[41]),
+     .cout(cout[41]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[41]),
+     .p0_l(p0_l[41]));
+mul_ppgen3 I1_40_ ( .p2_l(p2_l[40]), .b2(b2[2:0]), 
+     .am2(a[38]), .a(a[40]), .p2m1_l(p2_l[39]),
+     .p1m1_l(p1_l[39]), .p0m1_l(p0_l[39]), .am4(a[36]), .sum(sum[40]),
+     .cout(cout[40]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[40]),
+     .p0_l(p0_l[40]));
+mul_ppgen3 I1_39_ ( .p2_l(p2_l[39]), .b2(b2[2:0]), 
+     .am2(a[37]), .a(a[39]), .p2m1_l(p2_l[38]),
+     .p1m1_l(p1_l[38]), .p0m1_l(p0_l[38]), .am4(a[35]), .sum(sum[39]),
+     .cout(cout[39]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[39]),
+     .p0_l(p0_l[39]));
+mul_ppgen3 I1_38_ ( .p2_l(p2_l[38]), .b2(b2[2:0]), 
+     .am2(a[36]), .a(a[38]), .p2m1_l(p2_l[37]),
+     .p1m1_l(p1_l[37]), .p0m1_l(p0_l[37]), .am4(a[34]), .sum(sum[38]),
+     .cout(cout[38]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[38]),
+     .p0_l(p0_l[38]));
+mul_ppgen3 I1_37_ ( .p2_l(p2_l[37]), .b2(b2[2:0]), 
+     .am2(a[35]), .a(a[37]), .p2m1_l(p2_l[36]),
+     .p1m1_l(p1_l[36]), .p0m1_l(p0_l[36]), .am4(a[33]), .sum(sum[37]),
+     .cout(cout[37]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[37]),
+     .p0_l(p0_l[37]));
+mul_ppgen3 I1_36_ ( .p2_l(p2_l[36]), .b2(b2[2:0]), 
+     .am2(a[34]), .a(a[36]), .p2m1_l(p2_l[35]),
+     .p1m1_l(p1_l[35]), .p0m1_l(p0_l[35]), .am4(a[32]), .sum(sum[36]),
+     .cout(cout[36]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[36]),
+     .p0_l(p0_l[36]));
+mul_ppgen3 I1_35_ ( .p2_l(p2_l[35]), .b2(b2[2:0]), 
+     .am2(a[33]), .a(a[35]), .p2m1_l(p2_l[34]),
+     .p1m1_l(p1_l[34]), .p0m1_l(p0_l[34]), .am4(a[31]), .sum(sum[35]),
+     .cout(cout[35]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[35]),
+     .p0_l(p0_l[35]));
+mul_ppgen3 I1_34_ ( .p2_l(p2_l[34]), .b2(b2[2:0]), 
+     .am2(a[32]), .a(a[34]), .p2m1_l(p2_l[33]),
+     .p1m1_l(p1_l[33]), .p0m1_l(p0_l[33]), .am4(a[30]), .sum(sum[34]),
+     .cout(cout[34]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[34]),
+     .p0_l(p0_l[34]));
+mul_ppgen3 I1_33_ ( .p2_l(p2_l[33]), .b2(b2[2:0]), 
+     .am2(a[31]), .a(a[33]), .p2m1_l(p2_l[32]),
+     .p1m1_l(p1_l[32]), .p0m1_l(p0_l[32]), .am4(a[29]), .sum(sum[33]),
+     .cout(cout[33]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[33]),
+     .p0_l(p0_l[33]));
+mul_ppgen3 I1_32_ ( .p2_l(p2_l[32]), .b2(b2[2:0]), 
+     .am2(a[30]), .a(a[32]), .p2m1_l(p2_l[31]),
+     .p1m1_l(p1_l[31]), .p0m1_l(p0_l[31]), .am4(a[28]), .sum(sum[32]),
+     .cout(cout[32]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[32]),
+     .p0_l(p0_l[32]));
+mul_ppgen3 I1_31_ ( .p2_l(p2_l[31]), .b2(b2[2:0]), 
+     .am2(a[29]), .a(a[31]), .p2m1_l(p2_l[30]),
+     .p1m1_l(p1_l[30]), .p0m1_l(p0_l[30]), .am4(a[27]), .sum(sum[31]),
+     .cout(cout[31]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[31]),
+     .p0_l(p0_l[31]));
+mul_ppgen3 I1_30_ ( .p2_l(p2_l[30]), .b2(b2[2:0]), 
+     .am2(a[28]), .a(a[30]), .p2m1_l(p2_l[29]),
+     .p1m1_l(p1_l[29]), .p0m1_l(p0_l[29]), .am4(a[26]), .sum(sum[30]),
+     .cout(cout[30]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[30]),
+     .p0_l(p0_l[30]));
+mul_ppgen3 I1_29_ ( .p2_l(p2_l[29]), .b2(b2[2:0]), 
+     .am2(a[27]), .a(a[29]), .p2m1_l(p2_l[28]),
+     .p1m1_l(p1_l[28]), .p0m1_l(p0_l[28]), .am4(a[25]), .sum(sum[29]),
+     .cout(cout[29]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[29]),
+     .p0_l(p0_l[29]));
+mul_ppgen3 I1_28_ ( .p2_l(p2_l[28]), .b2(b2[2:0]), 
+     .am2(a[26]), .a(a[28]), .p2m1_l(p2_l[27]),
+     .p1m1_l(p1_l[27]), .p0m1_l(p0_l[27]), .am4(a[24]), .sum(sum[28]),
+     .cout(cout[28]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[28]),
+     .p0_l(p0_l[28]));
+mul_ppgen3 I1_27_ ( .p2_l(p2_l[27]), .b2(b2[2:0]), 
+     .am2(a[25]), .a(a[27]), .p2m1_l(p2_l[26]),
+     .p1m1_l(p1_l[26]), .p0m1_l(p0_l[26]), .am4(a[23]), .sum(sum[27]),
+     .cout(cout[27]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[27]),
+     .p0_l(p0_l[27]));
+mul_ppgen3 I1_26_ ( .p2_l(p2_l[26]), .b2(b2[2:0]), 
+     .am2(a[24]), .a(a[26]), .p2m1_l(p2_l[25]),
+     .p1m1_l(p1_l[25]), .p0m1_l(p0_l[25]), .am4(a[22]), .sum(sum[26]),
+     .cout(cout[26]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[26]),
+     .p0_l(p0_l[26]));
+mul_ppgen3 I1_25_ ( .p2_l(p2_l[25]), .b2(b2[2:0]), 
+     .am2(a[23]), .a(a[25]), .p2m1_l(p2_l[24]),
+     .p1m1_l(p1_l[24]), .p0m1_l(p0_l[24]), .am4(a[21]), .sum(sum[25]),
+     .cout(cout[25]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[25]),
+     .p0_l(p0_l[25]));
+mul_ppgen3 I1_24_ ( .p2_l(p2_l[24]), .b2(b2[2:0]), 
+     .am2(a[22]), .a(a[24]), .p2m1_l(p2_l[23]),
+     .p1m1_l(p1_l[23]), .p0m1_l(p0_l[23]), .am4(a[20]), .sum(sum[24]),
+     .cout(cout[24]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[24]),
+     .p0_l(p0_l[24]));
+mul_ppgen3 I1_23_ ( .p2_l(p2_l[23]), .b2(b2[2:0]), 
+     .am2(a[21]), .a(a[23]), .p2m1_l(p2_l[22]),
+     .p1m1_l(p1_l[22]), .p0m1_l(p0_l[22]), .am4(a[19]), .sum(sum[23]),
+     .cout(cout[23]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[23]),
+     .p0_l(p0_l[23]));
+mul_ppgen3 I1_22_ ( .p2_l(p2_l[22]), .b2(b2[2:0]), 
+     .am2(a[20]), .a(a[22]), .p2m1_l(p2_l[21]),
+     .p1m1_l(p1_l[21]), .p0m1_l(p0_l[21]), .am4(a[18]), .sum(sum[22]),
+     .cout(cout[22]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[22]),
+     .p0_l(p0_l[22]));
+mul_ppgen3 I1_21_ ( .p2_l(p2_l[21]), .b2(b2[2:0]), 
+     .am2(a[19]), .a(a[21]), .p2m1_l(p2_l[20]),
+     .p1m1_l(p1_l[20]), .p0m1_l(p0_l[20]), .am4(a[17]), .sum(sum[21]),
+     .cout(cout[21]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[21]),
+     .p0_l(p0_l[21]));
+mul_ppgen3 I1_20_ ( .p2_l(p2_l[20]), .b2(b2[2:0]), 
+     .am2(a[18]), .a(a[20]), .p2m1_l(p2_l[19]),
+     .p1m1_l(p1_l[19]), .p0m1_l(p0_l[19]), .am4(a[16]), .sum(sum[20]),
+     .cout(cout[20]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[20]),
+     .p0_l(p0_l[20]));
+mul_ppgen3 I1_19_ ( .p2_l(p2_l[19]), .b2(b2[2:0]), 
+     .am2(a[17]), .a(a[19]), .p2m1_l(p2_l[18]),
+     .p1m1_l(p1_l[18]), .p0m1_l(p0_l[18]), .am4(a[15]), .sum(sum[19]),
+     .cout(cout[19]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[19]),
+     .p0_l(p0_l[19]));
+mul_ppgen3 I1_18_ ( .p2_l(p2_l[18]), .b2(b2[2:0]), 
+     .am2(a[16]), .a(a[18]), .p2m1_l(p2_l[17]),
+     .p1m1_l(p1_l[17]), .p0m1_l(p0_l[17]), .am4(a[14]), .sum(sum[18]),
+     .cout(cout[18]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[18]),
+     .p0_l(p0_l[18]));
+mul_ppgen3 I1_17_ ( .p2_l(p2_l[17]), .b2(b2[2:0]), 
+     .am2(a[15]), .a(a[17]), .p2m1_l(p2_l[16]),
+     .p1m1_l(p1_l[16]), .p0m1_l(p0_l[16]), .am4(a[13]), .sum(sum[17]),
+     .cout(cout[17]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[17]),
+     .p0_l(p0_l[17]));
+mul_ppgen3 I1_16_ ( .p2_l(p2_l[16]), .b2(b2[2:0]), 
+     .am2(a[14]), .a(a[16]), .p2m1_l(p2_l[15]),
+     .p1m1_l(p1_l[15]), .p0m1_l(p0_l[15]), .am4(a[12]), .sum(sum[16]),
+     .cout(cout[16]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[16]),
+     .p0_l(p0_l[16]));
+mul_ppgen3 I1_15_ ( .p2_l(p2_l[15]), .b2(b2[2:0]), 
+     .am2(a[13]), .a(a[15]), .p2m1_l(p2_l[14]),
+     .p1m1_l(p1_l[14]), .p0m1_l(p0_l[14]), .am4(a[11]), .sum(sum[15]),
+     .cout(cout[15]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[15]),
+     .p0_l(p0_l[15]));
+mul_ppgen3 I1_14_ ( .p2_l(p2_l[14]), .b2(b2[2:0]), 
+     .am2(a[12]), .a(a[14]), .p2m1_l(p2_l[13]),
+     .p1m1_l(p1_l[13]), .p0m1_l(p0_l[13]), .am4(a[10]), .sum(sum[14]),
+     .cout(cout[14]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[14]),
+     .p0_l(p0_l[14]));
+mul_ppgen3 I1_13_ ( .p2_l(p2_l[13]), .b2(b2[2:0]), 
+     .am2(a[11]), .a(a[13]), .p2m1_l(p2_l[12]),
+     .p1m1_l(p1_l[12]), .p0m1_l(p0_l[12]), .am4(a[9]), .sum(sum[13]),
+     .cout(cout[13]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[13]),
+     .p0_l(p0_l[13]));
+mul_ppgen3 I1_12_ ( .p2_l(p2_l[12]), .b2(b2[2:0]), 
+     .am2(a[10]), .a(a[12]), .p2m1_l(p2_l[11]),
+     .p1m1_l(p1_l[11]), .p0m1_l(p0_l[11]), .am4(a[8]), .sum(sum[12]),
+     .cout(cout[12]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[12]),
+     .p0_l(p0_l[12]));
+mul_ppgen3 I1_11_ ( .p2_l(p2_l[11]), .b2(b2[2:0]), 
+     .am2(a[9]), .a(a[11]), .p2m1_l(p2_l[10]),
+     .p1m1_l(p1_l[10]), .p0m1_l(p0_l[10]), .am4(a[7]), .sum(sum[11]),
+     .cout(cout[11]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[11]),
+     .p0_l(p0_l[11]));
+mul_ppgen3 I1_10_ ( .p2_l(p2_l[10]), .b2(b2[2:0]), 
+     .am2(a[8]), .a(a[10]), .p2m1_l(p2_l[9]),
+     .p1m1_l(p1_l[9]), .p0m1_l(p0_l[9]), .am4(a[6]), .sum(sum[10]),
+     .cout(cout[10]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[10]),
+     .p0_l(p0_l[10]));
+mul_ppgen3 I1_9_ ( .p2_l(p2_l[9]), .b2(b2[2:0]), 
+     .am2(a[7]), .a(a[9]), .p2m1_l(p2_l[8]),
+     .p1m1_l(p1_l[8]), .p0m1_l(p0_l[8]), .am4(a[5]), .sum(sum[9]),
+     .cout(cout[9]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[9]),
+     .p0_l(p0_l[9]));
+mul_ppgen3 I1_8_ ( .p2_l(p2_l[8]), .b2(b2[2:0]), 
+     .am2(a[6]), .a(a[8]), .p2m1_l(p2_l[7]),
+     .p1m1_l(p1_l[7]), .p0m1_l(p0_l[7]), .am4(a[4]), .sum(sum[8]),
+     .cout(cout[8]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[8]),
+     .p0_l(p0_l[8]));
+mul_ppgen3 I1_7_ ( .p2_l(p2_l[7]), .b2(b2[2:0]), 
+     .am2(a[5]), .a(a[7]), .p2m1_l(p2_l[6]),
+     .p1m1_l(p1_l[6]), .p0m1_l(p0_l[6]), .am4(a[3]), .sum(sum[7]),
+     .cout(cout[7]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[7]),
+     .p0_l(p0_l[7]));
+mul_ppgen3 I1_6_ ( .p2_l(p2_l[6]), .b2(b2[2:0]), 
+     .am2(a[4]), .a(a[6]), .p2m1_l(p2_l[5]),
+     .p1m1_l(p1_l[5]), .p0m1_l(p0_l[5]), .am4(a[2]), .sum(sum[6]),
+     .cout(cout[6]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[6]),
+     .p0_l(p0_l[6]));
+mul_ppgen3 I1_5_ ( .p2_l(p2_l[5]), .b2(b2[2:0]), 
+     .am2(a[3]), .a(a[5]), .p2m1_l(p2_l[4]),
+     .p1m1_l(p1_l[4]), .p0m1_l(p0_l[4]), .am4(a[1]), .sum(sum[5]),
+     .cout(cout[5]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[5]),
+     .p0_l(p0_l[5]));
+mul_ppgen3 I1_4_ ( .p2_l(p2_l[4]), .b2(b2[2:0]), 
+     .am2(a[2]), .a(a[4]), .p2m1_l(1'b1),
+     .p1m1_l(p1_l[3]), .p0m1_l(p0_l[3]), .am4(a[0]), .sum(sum[4]),
+     .cout(cout[4]), .b1(b1[2:0]), .b0(b0[2:0]), .p1_l(p1_l[4]),
+     .p0_l(p0_l[4]));
+mul_ppgen3lsb4 I0 ( .cout(cout[3:1]), .a(a[3:0]), .sum(sum[3:0]),
+     .p1_l(p1_l[3]), .p0_l(p0_l[3]), .b1(b1[2:0]), .b0(b0[2:0]));
+
+endmodule //mul_ppgenrow3
+
+module mul_ppgensign ( p_l, z, b, pm1_l );
+output  p_l, z;
+input  pm1_l;
+input [2:0]  b;
+
+assign p_l = ~(b[1] & b[2]);
+assign z = b[0] ? ~pm1_l : ~p_l ;
+
+endmodule //mul_ppgensign
+
+module mul_ppgen ( p_l, z, a, b, pm1_l );
+output  p_l, z;
+input  a, pm1_l;
+input [2:0]  b;
+
+assign p_l = ~((a ^ b[2]) & b[1]) ;
+assign z = b[0] ? ~pm1_l : ~p_l ;
+
+endmodule //mul_ppgen
+
+module mul_mux2 ( z, d0, d1, s );
+output  z;
+input  d0, d1, s;
+
+assign z = s ? d1 : d0 ;
+
+endmodule // mul_mux2 
+
+module mul_booth(
+	head,
+        b_in,
+        b0, b1, b2, b3, b4, b5, b6, b7,
+	b8, b9, b10, b11, b12, b13, b14, b15, b16,
+	clk, se, si, so, mul_step, tm_l
+	);
+input		head;		// begin of the MUL operation
+input   [63:0] 	b_in;
+input		clk, se, si, mul_step, tm_l;
+output  [2:0]  	b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15;
+output 		b16;
+output 		so;
+
+wire  [63:31] 	b;
+wire [2:0] 	b0_in0, b1_in0,  b2_in0,  b3_in0,  b4_in0,  b5_in0,  b6_in0,  b7_in0 ;
+wire [2:0] 	b8_in0, b9_in0, b10_in0, b11_in0, b12_in0, b13_in0, b14_in0, b15_in0 ;
+wire [2:0] 	b0_in1, b1_in1,  b2_in1,  b3_in1,  b4_in1,  b5_in1,  b6_in1,  b7_in1 ;
+wire [2:0] 	b8_in1, b9_in1, b10_in1, b11_in1, b12_in1, b13_in1, b14_in1, b15_in1 ;
+wire 	   	b16_in1;
+
+wire [2:0] 	b0_outmx, b1_outmx, b2_outmx, b3_outmx, b4_outmx, b5_outmx, b6_outmx;
+wire [2:0] 	b7_outmx, b8_outmx, b9_outmx, b10_outmx, b11_outmx, b12_outmx, b13_outmx;
+wire [2:0] 	b14_outmx, b15_outmx;
+wire 	   	b16_outmx;
+wire		clk_enb0, clk_enb1;
+
+
+  mul_bodec 		encode0_a(
+				.x  (1'b0),
+				.b  (b_in[15:0]),
+				.b0 (b0_in0),
+				.b1 (b1_in0),
+				.b2 (b2_in0),
+				.b3 (b3_in0),
+				.b4 (b4_in0),
+				.b5 (b5_in0),
+				.b6 (b6_in0),
+				.b7 (b7_in0)
+				);
+				//remove 16th row since it's never the last row
+				//b8_in0 = 3'b010; 
+  mul_bodec		encode0_b(
+				.x  (b_in[15]),
+				.b  (b_in[31:16]),
+				.b0 (b8_in0),
+				.b1 (b9_in0),
+				.b2 (b10_in0),
+				.b3 (b11_in0),
+				.b4 (b12_in0),
+				.b5 (b13_in0),
+				.b6 (b14_in0),
+				.b7 (b15_in0)
+				);
+				// remove 32th row since it's never the last row 
+				// b16_in0 = 3'b010 ;
+
+  // Pipe picked address [63:31] and hold flop
+
+  clken_buf     ckbuf_0(.clk(clk_enb0), .rclk(clk), .enb_l(~mul_step), .tmb_l(tm_l));
+  clken_buf     ckbuf_1(.clk(clk_enb1), .rclk(clk), .enb_l(~(head & mul_step)), .tmb_l(tm_l));
+
+  dff_s 			hld_dff0(.din(b_in[31]), .clk(clk_enb1), .q(b[31]),
+                        	.se(se), .si(), .so());
+  dff_s #(32) 		hld_dff(.din(b_in[63:32]), .clk(clk_enb1), .q(b[63:32]),
+				.se(se), .si(), .so());
+
+  mul_bodec     	encode1_a(
+                        	.x  (b[31]),
+                        	.b  (b[47:32]),
+                        	.b0 (b0_in1),
+                        	.b1 (b1_in1),
+                        	.b2 (b2_in1),
+                        	.b3 (b3_in1),
+                        	.b4 (b4_in1),
+                        	.b5 (b5_in1),
+                        	.b6 (b6_in1),
+                        	.b7 (b7_in1)
+                        	);
+                        	//remove 16th row since it's never the last row
+                        	//b8_in1 = 3'b010;
+  mul_bodec     	encode1_b(
+                        	.x  (b[47]),
+                        	.b  (b[63:48]),
+                        	.b0 (b8_in1),
+                        	.b1 (b9_in1),
+                        	.b2 (b10_in1),
+                        	.b3 (b11_in1),
+                        	.b4 (b12_in1),
+                        	.b5 (b13_in1),
+                        	.b6 (b14_in1),
+                        	.b7 (b15_in1)
+                        	);
+				assign b16_in1 = b[63] ;
+
+// Select booth encoded b outputs and flop based on the cycle0 and cycle1 
+
+  dp_mux2es #(3)    out_mux0(.dout(b0_outmx[2:0]),
+                        .in0(b0_in0[2:0]),
+                        .in1(b0_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux1(.dout(b1_outmx[2:0]),
+                        .in0(b1_in0[2:0]),
+                        .in1(b1_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux2(.dout(b2_outmx[2:0]),
+                        .in0(b2_in0[2:0]),
+                        .in1(b2_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux3(.dout(b3_outmx[2:0]),
+                        .in0(b3_in0[2:0]),
+                        .in1(b3_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux4(.dout(b4_outmx[2:0]),
+                        .in0(b4_in0[2:0]),
+                        .in1(b4_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux5(.dout(b5_outmx[2:0]),
+                        .in0(b5_in0[2:0]),
+                        .in1(b5_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux6(.dout(b6_outmx[2:0]),
+                        .in0(b6_in0[2:0]),
+                        .in1(b6_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux7(.dout(b7_outmx[2:0]),
+                        .in0(b7_in0[2:0]),
+                        .in1(b7_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux8(.dout(b8_outmx[2:0]),
+                        .in0(b8_in0[2:0]),
+                        .in1(b8_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux9(.dout(b9_outmx[2:0]),
+                        .in0(b9_in0[2:0]),
+                        .in1(b9_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux10(.dout(b10_outmx[2:0]),
+                        .in0(b10_in0[2:0]),
+                        .in1(b10_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux11(.dout(b11_outmx[2:0]),
+                        .in0(b11_in0[2:0]),
+                        .in1(b11_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux12(.dout(b12_outmx[2:0]),
+                        .in0(b12_in0[2:0]),
+                        .in1(b12_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux13(.dout(b13_outmx[2:0]),
+                        .in0(b13_in0[2:0]),
+                        .in1(b13_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux14(.dout(b14_outmx[2:0]),
+                        .in0(b14_in0[2:0]),
+                        .in1(b14_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es #(3)    out_mux15(.dout(b15_outmx[2:0]),
+                        .in0(b15_in0[2:0]),
+                        .in1(b15_in1[2:0]),
+                        .sel(~head));
+  dp_mux2es         out_mux16(.dout(b16_outmx),
+                        .in0(1'b0),
+                        .in1(b16_in1),
+                        .sel(~head));
+
+  dff_s #(3)    out_dff0 (.din(b0_outmx[2:0]), .clk(clk_enb0), .q(b0[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff1 (.din(b1_outmx[2:0]), .clk(clk_enb0), .q(b1[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff2 (.din(b2_outmx[2:0]), .clk(clk_enb0), .q(b2[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff3 (.din(b3_outmx[2:0]), .clk(clk_enb0), .q(b3[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff4 (.din(b4_outmx[2:0]), .clk(clk_enb0), .q(b4[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff5 (.din(b5_outmx[2:0]), .clk(clk_enb0), .q(b5[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff6 (.din(b6_outmx[2:0]), .clk(clk_enb0), .q(b6[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff7 (.din(b7_outmx[2:0]), .clk(clk_enb0), .q(b7[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff8 (.din(b8_outmx[2:0]), .clk(clk_enb0), .q(b8[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff9 (.din(b9_outmx[2:0]), .clk(clk_enb0), .q(b9[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff10 (.din(b10_outmx[2:0]), .clk(clk_enb0), .q(b10[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff11 (.din(b11_outmx[2:0]), .clk(clk_enb0), .q(b11[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff12 (.din(b12_outmx[2:0]), .clk(clk_enb0), .q(b12[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff13 (.din(b13_outmx[2:0]), .clk(clk_enb0), .q(b13[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff14 (.din(b14_outmx[2:0]), .clk(clk_enb0), .q(b14[2:0]),
+			.se(se), .si(), .so());
+  dff_s #(3)    out_dff15 (.din(b15_outmx[2:0]), .clk(clk_enb0), .q(b15[2:0]),
+			.se(se), .si(), .so());
+  dff_s 	      out_dff16 (.din(b16_outmx), .clk(clk_enb0), .q(b16),
+			.se(se), .si(), .so());
+endmodule //mul_booth
+
+module mul_bodec (x, b,  
+        b0, b1, b2, b3, b4, b5, b6, b7);
+
+input	x;
+input   [15:0] 	b;
+output  [2:0] 	b0, b1, b2, b3, b4, b5, b6, b7; 
+
+assign b0[2] = b[1];
+assign b0[1] = ~((b[1] & b[0] & x) | (~b[1] & ~b[0] & ~x)) ;
+assign b0[0] = (~b[1] & b[0] & x) | (b[1] & ~b[0] & ~x) ;
+
+assign b1[2] = b[3]; 
+assign b1[1] = ~((b[3] & b[2] & b[1]) | (~b[3] & ~b[2] & ~b[1])) ;
+assign b1[0] = (~b[3] & b[2] & b[1]) | (b[3] & ~b[2] & ~b[1]) ;
+
+assign b2[2] = b[5]; 
+assign b2[1] = ~((b[5] & b[4] & b[3]) | (~b[5] & ~b[4] & ~b[3])) ;
+assign b2[0] = (~b[5] & b[4] & b[3]) | (b[5] & ~b[4] & ~b[3]) ;
+
+assign b3[2] = b[7] ;
+assign b3[1] = ~((b[7] & b[6] & b[5]) | (~b[7] & ~b[6] & ~b[5])) ;
+assign b3[0] = (~b[7] & b[6] & b[5]) | (b[7] & ~b[6] & ~b[5]) ;
+
+assign b4[2] = b[9] ;
+assign b4[1] = ~((b[9] & b[8] & b[7]) | (~b[9] & ~b[8] & ~b[7])) ;
+assign b4[0] = (~b[9] & b[8] & b[7]) | (b[9] & ~b[8] & ~b[7]) ;
+
+assign b5[2] = b[11] ;
+assign b5[1] = ~((b[11] & b[10] & b[9]) | (~b[11] & ~b[10] & ~b[9])) ;
+assign b5[0] = (~b[11] & b[10] & b[9]) | (b[11] & ~b[10] & ~b[9]) ;
+
+assign b6[2] = b[13] ;
+assign b6[1] = ~((b[13] & b[12] & b[11]) | (~b[13] & ~b[12] & ~b[11])) ;
+assign b6[0] = (~b[13] & b[12] & b[11]) | (b[13] & ~b[12] & ~b[11]) ;
+
+assign b7[2] = b[15] ;
+assign b7[1] = ~((b[15] & b[14] & b[13]) | (~b[15] & ~b[14] & ~b[13])) ;
+assign b7[0] = (~b[15] & b[14] & b[13]) | (b[15] & ~b[14] & ~b[13]) ;
+
+endmodule // mul_bodec
+`endif
Index: /trunk/T1-CPU/mul/sparc_mul_top.v
===================================================================
--- /trunk/T1-CPU/mul/sparc_mul_top.v	(revision 6)
+++ /trunk/T1-CPU/mul/sparc_mul_top.v	(revision 6)
@@ -0,0 +1,122 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_mul_top.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module sparc_mul_top(/*AUTOARG*/
+   // Outputs
+   mul_exu_ack, mul_spu_ack, mul_spu_shf_ack, mul_data_out, so, 
+   // Inputs
+   rclk, grst_l, arst_l, exu_mul_input_vld, exu_mul_rs1_data, exu_mul_rs2_data, 
+   spu_mul_req_vld, spu_mul_acc, spu_mul_areg_shf, spu_mul_areg_rst, 
+   spu_mul_op1_data, spu_mul_op2_data, spu_mul_mulres_lshft, si, se
+   );
+
+input		rclk;
+input		grst_l;			// system reset
+input		arst_l;			// async reset
+input		si;			// scan in
+input		se;			// scan enablen
+input		exu_mul_input_vld;	// EXU multipler op request
+input [63:0]	exu_mul_rs1_data;	// EXU multipler Op1
+input [63:0]	exu_mul_rs2_data;	// EXU multipler Op2
+input		spu_mul_req_vld;	// SPU multipler op request
+input		spu_mul_acc;		// MAC Op: ACCUM += op1 * op2 if spu_mul_acc=1
+					// Bypass Op: Out = ACCUM * op1 if spu_mul_acc=0  
+input		spu_mul_areg_shf;	// Shift >> 64 ACCUM register
+input		spu_mul_areg_rst;	// Reset of ACCUM register (136-bit)
+input [63:0]	spu_mul_op1_data;	// SPU multiplier Op1
+input [63:0]	spu_mul_op2_data;	// SPU multiplier Op2
+
+input spu_mul_mulres_lshft;
+
+output		so;			// scan_out
+output		mul_exu_ack;		// ack signal for EXU mul operation
+output		mul_spu_ack;		// ack signal for SPU MAC and Bypass mul operation
+output		mul_spu_shf_ack;	// acl signal for ACCUM >> 64 operation
+output [63:0]	mul_data_out;		// Shared output data for both EXU and SPU
+
+wire 		acc_imm, acc_actc2, acc_actc3, acc_actc5, acc_reg_enb;
+wire 		acc_reg_rst, acc_reg_shf;
+wire		byp_sel, byp_imm, spick, x2;
+wire		c0_act;
+
+wire 		rst_l;
+wire		clk;
+
+assign clk = rclk ;
+
+dffrl_async	rstff	(
+			.din	(grst_l),
+			.clk	(clk),
+			.rst_l	(arst_l),
+			.q	(rst_l),
+			.se	(se),
+			.si	(),
+			.so	()); 
+
+sparc_mul_cntl	control	(
+			.ecl_mul_req_vld  	(exu_mul_input_vld),
+			.spu_mul_req_vld  	(spu_mul_req_vld),
+			.spu_mul_acc	  	(spu_mul_acc),
+			.spu_mul_areg_shf 	(spu_mul_areg_shf),
+			.spu_mul_areg_rst 	(spu_mul_areg_rst),
+			.spu_mul_mulres_lshft 	(spu_mul_mulres_lshft),
+			.c0_act	  	  	(c0_act),
+			.spick	  	  	(spick),
+			.byp_sel	  	(byp_sel),
+			.byp_imm	  	(byp_imm),
+			.acc_imm 	  	(acc_imm),
+			.acc_actc2 	  	(acc_actc2),
+			.acc_actc3	  	(acc_actc3),
+			.acc_actc5	  	(acc_actc5),
+			.acc_reg_enb	  	(acc_reg_enb),
+			.acc_reg_rst	  	(acc_reg_rst),
+			.acc_reg_shf	  	(acc_reg_shf),
+			.x2		  	(x2),
+			.mul_ecl_ack	  	(mul_exu_ack),
+			.mul_spu_ack	  	(mul_spu_ack),
+			.mul_spu_shf_ack  	(mul_spu_shf_ack),
+			.rst_l		  	(rst_l),
+			.rclk 		  	(clk));
+
+sparc_mul_dp	dpath 	(
+			.ecl_mul_rs1_data 	(exu_mul_rs1_data),
+			.ecl_mul_rs2_data 	(exu_mul_rs2_data),
+			.spu_mul_op1_data 	(spu_mul_op1_data),
+			.spu_mul_op2_data 	(spu_mul_op2_data),
+			.valid		  	(c0_act),
+			.spick		  	(spick),
+			.byp_sel	  	(byp_sel),
+			.byp_imm	  	(byp_imm),
+			.acc_imm          	(acc_imm),
+			.acc_actc2        	(acc_actc2),
+                        .acc_actc3        	(acc_actc3),  
+                        .acc_actc5        	(acc_actc5),  
+                        .acc_reg_enb      	(acc_reg_enb),
+                        .acc_reg_rst      	(acc_reg_rst),
+                        .acc_reg_shf      	(acc_reg_shf),
+			.x2		  	(x2),
+			.mul_data_out	  	(mul_data_out),
+			.rst_l		  	(rst_l),
+			.si		  	(),
+			.so		  	(),
+			.se		  	(se),
+			.rclk		  	(clk));
+
+endmodule // sparc_mul_top
Index: /trunk/T1-CPU/mul/sparc_mul_dp.v
===================================================================
--- /trunk/T1-CPU/mul/sparc_mul_dp.v	(revision 6)
+++ /trunk/T1-CPU/mul/sparc_mul_dp.v	(revision 6)
@@ -0,0 +1,203 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sparc_mul_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK_EN
+`define FPGA_SYN_CLK_DFF
+`endif
+
+module sparc_mul_dp(
+  ecl_mul_rs1_data,
+  ecl_mul_rs2_data,
+  spu_mul_op1_data,
+  spu_mul_op2_data,
+  valid,
+  spick,
+  byp_sel,
+  byp_imm,
+  acc_imm,
+  acc_actc2,
+  acc_actc3,
+  acc_actc5,
+  acc_reg_enb,
+  acc_reg_rst,
+  acc_reg_shf,
+  x2,
+  mul_data_out,
+  rst_l,
+  si,
+  so,
+  se,
+  rclk
+  );
+
+input [63:0]	ecl_mul_rs1_data;	// EXU mul operand 1
+input [63:0]	ecl_mul_rs2_data;	// EXU mul operand 2
+input [63:0]	spu_mul_op1_data;	// SPU mul operand 1	
+input [63:0]	spu_mul_op2_data;	// SPU mul operand 2	
+input		valid;			// begin cyc0 of MUL operation
+input 		spick;			// Internal pick signals of exu, spu multiplier 
+input		byp_sel;		// SPU bypass ACCUM[63:0] as operand 
+input		byp_imm;		// SPU bypss action from mout immediately 
+input		acc_imm;		// SPU accumlate from mout immediately
+input		acc_actc2, acc_actc3;	// accumulate enable for LSB-32 and All-96
+input		acc_actc5;		// accumulate enable for LSB-32 and All-96
+input		acc_reg_enb;		// ACCUM register enable
+input		acc_reg_rst;		// ACCUM register reset
+input 		acc_reg_shf;		// ACCUM shift right 64-bit
+input		x2;			// for op1*op2*2
+input		rst_l;			// system  reset
+input		si;			// si
+input		se;			// scan_enable
+input		rclk;
+output		so;			// so
+output [63:0]	mul_data_out;		// Multiplier outputs
+
+wire  [63:0]	mul_op1_d, mul_op2_d, bypreg;
+wire  [63:32]	mux1_reg, mux1_mou;
+wire  [96:0]    mux2_reg, areg;
+wire  [135:0]	mout, acc_reg_in, acc_reg;
+wire 		op2_s0, op2_s1, op2_s2;
+wire 		acc_reg_shf2, clk_enb1;
+wire 		clk;
+
+assign clk = rclk ;
+
+///////////////////////////////////////////////////////////////////////////////
+////// 	op1 inputs mux between EXU and SPU 
+///////////////////////////////////////////////////////////////////////////////
+
+  assign mul_op1_d = ({64{spick}}  & spu_mul_op1_data) |
+                     ({64{~spick}} & ecl_mul_rs1_data );
+
+///////////////////////////////////////////////////////////////////////////////
+////// 	op2 inputs mux between EXU, SPU and bypass from ACCUM register
+///////////////////////////////////////////////////////////////////////////////
+  
+  assign op2_s0 = ~spick;
+  assign op2_s1 = spick & byp_sel ; 
+  assign op2_s2 = spick & ~byp_sel ; 
+  assign mul_op2_d = (op2_s0 & op2_s1)|(op2_s0 & op2_s2)|(op2_s1 & op2_s2) ? 64'hxx :
+                     (op2_s0 ? ecl_mul_rs2_data : 
+		     (op2_s1 ? bypreg : 
+		     (op2_s2 ? spu_mul_op2_data : 64'hxx)
+		      ));
+ 
+///////////////////////////////////////////////////////////////////////////////
+//////	Accumulate input muxes 
+///////////////////////////////////////////////////////////////////////////////
+
+// MUX1: Pass acc_reg[31:0] at cyc2 of SPU accumulate, otherwise acc_reg[63:32] 
+  assign mux1_reg[63:32] = acc_actc2 ? acc_reg[31:0] 
+				     : acc_reg[63:32] ;
+
+// Bypass mout[31:0] (mul core output) of MAC1 at cyc5 when the lower 32-bit 
+// 	  are ready but not lateched into acc_reg yet.
+//
+//  MAC1: cyc1	|cyc2	|cyc3 	|	cyc4	| 	cyc5	| 	cyc6
+//		|	|	|		|  mout[31:0] 	|  acc_reg[128:0]	
+//		|	|	|		|  bypass	|  latched out
+//	
+//  MAC2: 			|	cyc1	|	cyc2	|	
+//				|		|  ACCUM from 	|	
+//				|		|  mout[31:0]	|
+//						
+  assign mux1_mou[63:32] = (acc_actc2 & acc_actc5) ? mout[31:0]
+				     		   : mout[63:32] ;
+
+// MUX2: Immediate bypass from mout (output of mul core)
+  assign mux2_reg[96:0]  = acc_imm   ? {mout[128:64],mux1_mou[63:32]}
+                                     : {acc_reg[128:64],mux1_reg[63:32]};
+
+// Enable of accumulate reg input to multipler core
+  assign areg[96:32] = mux2_reg[96:32] & {65{acc_actc3}} ;
+  assign areg[31:0] =  mux2_reg[31:0]  & {32{(acc_actc3 | acc_actc2)}};
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//////	Multiplier core connection
+///////////////////////////////////////////////////////////////////////////////
+
+  mul64		mulcore(.rs1_l	(~mul_op1_d),
+			.rs2	(mul_op2_d),
+			.valid	(valid),
+			.areg	(areg),
+			.accreg	(acc_reg[135:129]),
+			.x2	(x2),
+			.out	(mout),
+			.rclk	(clk),
+			.si	(),
+			.so	(),
+			.se	(se),
+			.mul_rst_l (rst_l),
+			.mul_step  (1'b1)
+			);
+			
+///////////////////////////////////////////////////////////////////////////////
+/////	ACCUM register and right shift muxes
+///////////////////////////////////////////////////////////////////////////////
+
+  dff_s        	dffshf (.din    (acc_reg_shf),
+                        .clk    (clk),
+                        .q      (acc_reg_shf2),
+                        .se     (se),
+                        .si     (),
+                        .so     ()
+                        );
+
+  assign acc_reg_in  =	acc_reg_shf  ?	{64'b0,acc_reg[135:64]}
+				     :	mout ;
+
+  assign mul_data_out =	acc_reg_shf2 ?	acc_reg[63:0]
+				     :	mout[63:0]	;
+
+`ifdef FPGA_SYN_CLK_DFF
+  dffre_s  #(136)  accum  (.din    (acc_reg_in),
+                        .rst    (acc_reg_rst),
+                        .en (acc_reg_enb | acc_reg_rst), .clk(clk), //manually fixed
+                        .q      (acc_reg),
+                        .se     (se),
+                        .si     (),
+                        .so     ()
+                        );
+`else
+  dffr_s  #(136)  accum  (.din    (acc_reg_in),
+                        .rst    (acc_reg_rst),
+                        .clk    (clk_enb1),
+                        .q      (acc_reg),
+                        .se     (se),
+                        .si     (),
+                        .so     ()
+                        );
+`endif
+
+`ifdef FPGA_SYN_CLK_EN
+`else
+  clken_buf     ckbuf_1(.clk(clk_enb1), .rclk(clk), .enb_l(~(acc_reg_enb | acc_reg_rst)), .tmb_l(~se));
+`endif
+
+
+  assign bypreg =  byp_imm ? mout[63:0]
+			   : acc_reg[63:0] ;
+
+endmodule 
+
Index: /trunk/OC-UART/uart_regs.v
===================================================================
--- /trunk/OC-UART/uart_regs.v	(revision 6)
+++ /trunk/OC-UART/uart_regs.v	(revision 6)
@@ -0,0 +1,893 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_regs.v                                                 ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  Registers of the uart 16550 core                            ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  Inserts 1 wait state in all WISHBONE transfers              ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing or verification.                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   (See log for the revision history           ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.41  2004/05/21 11:44:41  tadejm
+// Added synchronizer flops for RX input.
+//
+// Revision 1.40  2003/06/11 16:37:47  gorban
+// This fixes errors in some cases when data is being read and put to the FIFO at the same time. Patch is submitted by Scott Furman. Update is very recommended.
+//
+// Revision 1.39  2002/07/29 21:16:18  gorban
+// The uart_defines.v file is included again in sources.
+//
+// Revision 1.38  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.37  2001/12/27 13:24:09  mohor
+// lsr[7] was not showing overrun errors.
+//
+// Revision 1.36  2001/12/20 13:25:46  mohor
+// rx push changed to be only one cycle wide.
+//
+// Revision 1.35  2001/12/19 08:03:34  mohor
+// Warnings cleared.
+//
+// Revision 1.34  2001/12/19 07:33:54  mohor
+// Synplicity was having troubles with the comment.
+//
+// Revision 1.33  2001/12/17 10:14:43  mohor
+// Things related to msr register changed. After THRE IRQ occurs, and one
+// character is written to the transmit fifo, the detection of the THRE bit in the
+// LSR is delayed for one character time.
+//
+// Revision 1.32  2001/12/14 13:19:24  mohor
+// MSR register fixed.
+//
+// Revision 1.31  2001/12/14 10:06:58  mohor
+// After reset modem status register MSR should be reset.
+//
+// Revision 1.30  2001/12/13 10:09:13  mohor
+// thre irq should be cleared only when being source of interrupt.
+//
+// Revision 1.29  2001/12/12 09:05:46  mohor
+// LSR status bit 0 was not cleared correctly in case of reseting the FCR (rx fifo).
+//
+// Revision 1.28  2001/12/10 19:52:41  gorban
+// Scratch register added
+//
+// Revision 1.27  2001/12/06 14:51:04  gorban
+// Bug in LSR[0] is fixed.
+// All WISHBONE signals are now sampled, so another wait-state is introduced on all transfers.
+//
+// Revision 1.26  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.25  2001/11/28 19:36:39  gorban
+// Fixed: timeout and break didn't pay attention to current data format when counting time
+//
+// Revision 1.24  2001/11/26 21:38:54  gorban
+// Lots of fixes:
+// Break condition wasn't handled correctly at all.
+// LSR bits could lose their values.
+// LSR value after reset was wrong.
+// Timing of THRE interrupt signal corrected.
+// LSR bit 0 timing corrected.
+//
+// Revision 1.23  2001/11/12 21:57:29  gorban
+// fixed more typo bugs
+//
+// Revision 1.22  2001/11/12 15:02:28  mohor
+// lsr1r error fixed.
+//
+// Revision 1.21  2001/11/12 14:57:27  mohor
+// ti_int_pnd error fixed.
+//
+// Revision 1.20  2001/11/12 14:50:27  mohor
+// ti_int_d error fixed.
+//
+// Revision 1.19  2001/11/10 12:43:21  gorban
+// Logic Synthesis bugs fixed. Some other minor changes
+//
+// Revision 1.18  2001/11/08 14:54:23  mohor
+// Comments in Slovene language deleted, few small fixes for better work of
+// old tools. IRQs need to be fix.
+//
+// Revision 1.17  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.16  2001/11/02 09:55:16  mohor
+// no message
+//
+// Revision 1.15  2001/10/31 15:19:22  gorban
+// Fixes to break and timeout conditions
+//
+// Revision 1.14  2001/10/29 17:00:46  gorban
+// fixed parity sending and tx_fifo resets over- and underrun
+//
+// Revision 1.13  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.12  2001/10/19 16:21:40  gorban
+// Changes data_out to be synchronous again as it should have been.
+//
+// Revision 1.11  2001/10/18 20:35:45  gorban
+// small fix
+//
+// Revision 1.10  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.9  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.10  2001/06/23 11:21:48  gorban
+// DL made 16-bit long. Fixed transmission/reception bugs.
+//
+// Revision 1.9  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.8  2001/05/29 20:05:04  gorban
+// Fixed some bugs and synthesis problems.
+//
+// Revision 1.7  2001/05/27 17:37:49  gorban
+// Fixed many bugs. Updated spec. Changed FIFO files structure. See CHANGES.txt file.
+//
+// Revision 1.6  2001/05/21 19:12:02  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.5  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:11+02  jacob
+// Initial revision
+//
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+`define UART_DL1 7:0
+`define UART_DL2 15:8
+
+module uart_regs (clk,
+	wb_rst_i, wb_addr_i, wb_dat_i, wb_dat_o, wb_we_i, wb_re_i, 
+
+// additional signals
+	modem_inputs,
+	stx_pad_o, srx_pad_i,
+
+`ifdef DATA_BUS_WIDTH_8
+`else
+// debug interface signals	enabled
+ier, iir, fcr, mcr, lcr, msr, lsr, rf_count, tf_count, tstate, rstate,
+`endif				
+	rts_pad_o, dtr_pad_o, int_o
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+	, baud_o
+`endif
+
+	);
+
+input 									clk;
+input 									wb_rst_i;
+input [`UART_ADDR_WIDTH-1:0] 		wb_addr_i;
+input [7:0] 							wb_dat_i;
+output [7:0] 							wb_dat_o;
+input 									wb_we_i;
+input 									wb_re_i;
+
+output 									stx_pad_o;
+input 									srx_pad_i;
+
+input [3:0] 							modem_inputs;
+output 									rts_pad_o;
+output 									dtr_pad_o;
+output 									int_o;
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+output	baud_o;
+`endif
+
+`ifdef DATA_BUS_WIDTH_8
+`else
+// if 32-bit databus and debug interface are enabled
+output [3:0]							ier;
+output [3:0]							iir;
+output [1:0]							fcr;  /// bits 7 and 6 of fcr. Other bits are ignored
+output [4:0]							mcr;
+output [7:0]							lcr;
+output [7:0]							msr;
+output [7:0] 							lsr;
+output [`UART_FIFO_COUNTER_W-1:0] 	rf_count;
+output [`UART_FIFO_COUNTER_W-1:0] 	tf_count;
+output [2:0] 							tstate;
+output [3:0] 							rstate;
+
+`endif
+
+wire [3:0] 								modem_inputs;
+reg 										enable;
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+assign baud_o = enable; // baud_o is actually the enable signal
+`endif
+
+
+wire 										stx_pad_o;		// received from transmitter module
+wire 										srx_pad_i;
+wire 										srx_pad;
+
+reg [7:0] 								wb_dat_o;
+
+wire [`UART_ADDR_WIDTH-1:0] 		wb_addr_i;
+wire [7:0] 								wb_dat_i;
+
+
+reg [3:0] 								ier;
+reg [3:0] 								iir;
+reg [1:0] 								fcr;  /// bits 7 and 6 of fcr. Other bits are ignored
+reg [4:0] 								mcr;
+reg [7:0] 								lcr;
+reg [7:0] 								msr;
+reg [15:0] 								dl;  // 32-bit divisor latch
+reg [7:0] 								scratch; // UART scratch register
+reg 										start_dlc; // activate dlc on writing to UART_DL1
+reg 										lsr_mask_d; // delay for lsr_mask condition
+reg 										msi_reset; // reset MSR 4 lower bits indicator
+//reg 										threi_clear; // THRE interrupt clear flag
+reg [15:0] 								dlc;  // 32-bit divisor latch counter
+reg 										int_o;
+
+reg [3:0] 								trigger_level; // trigger level of the receiver FIFO
+reg 										rx_reset;
+reg 										tx_reset;
+
+wire 										dlab;			   // divisor latch access bit
+wire 										cts_pad_i, dsr_pad_i, ri_pad_i, dcd_pad_i; // modem status bits
+wire 										loopback;		   // loopback bit (MCR bit 4)
+wire 										cts, dsr, ri, dcd;	   // effective signals
+wire                    cts_c, dsr_c, ri_c, dcd_c; // Complement effective signals (considering loopback)
+wire 										rts_pad_o, dtr_pad_o;		   // modem control outputs
+
+// LSR bits wires and regs
+wire [7:0] 								lsr;
+wire 										lsr0, lsr1, lsr2, lsr3, lsr4, lsr5, lsr6, lsr7;
+reg										lsr0r, lsr1r, lsr2r, lsr3r, lsr4r, lsr5r, lsr6r, lsr7r;
+wire 										lsr_mask; // lsr_mask
+
+//
+// ASSINGS
+//
+
+assign 									lsr[7:0] = { lsr7r, lsr6r, lsr5r, lsr4r, lsr3r, lsr2r, lsr1r, lsr0r };
+
+assign 									{cts_pad_i, dsr_pad_i, ri_pad_i, dcd_pad_i} = modem_inputs;
+assign 									{cts, dsr, ri, dcd} = ~{cts_pad_i,dsr_pad_i,ri_pad_i,dcd_pad_i};
+
+assign                  {cts_c, dsr_c, ri_c, dcd_c} = loopback ? {mcr[`UART_MC_RTS],mcr[`UART_MC_DTR],mcr[`UART_MC_OUT1],mcr[`UART_MC_OUT2]}
+                                                               : {cts_pad_i,dsr_pad_i,ri_pad_i,dcd_pad_i};
+
+assign 									dlab = lcr[`UART_LC_DL];
+assign 									loopback = mcr[4];
+
+// assign modem outputs
+assign 									rts_pad_o = mcr[`UART_MC_RTS];
+assign 									dtr_pad_o = mcr[`UART_MC_DTR];
+
+// Interrupt signals
+wire 										rls_int;  // receiver line status interrupt
+wire 										rda_int;  // receiver data available interrupt
+wire 										ti_int;   // timeout indicator interrupt
+wire										thre_int; // transmitter holding register empty interrupt
+wire 										ms_int;   // modem status interrupt
+
+// FIFO signals
+reg 										tf_push;
+reg 										rf_pop;
+wire [`UART_FIFO_REC_WIDTH-1:0] 	rf_data_out;
+wire 										rf_error_bit; // an error (parity or framing) is inside the fifo
+wire [`UART_FIFO_COUNTER_W-1:0] 	rf_count;
+wire [`UART_FIFO_COUNTER_W-1:0] 	tf_count;
+wire [2:0] 								tstate;
+wire [3:0] 								rstate;
+wire [9:0] 								counter_t;
+
+wire                      thre_set_en; // THRE status is delayed one character time when a character is written to fifo.
+reg  [7:0]                block_cnt;   // While counter counts, THRE status is blocked (delayed one character cycle)
+reg  [7:0]                block_value; // One character length minus stop bit
+
+// Transmitter Instance
+wire serial_out;
+
+uart_transmitter transmitter(clk, wb_rst_i, lcr, tf_push, wb_dat_i, enable, serial_out, tstate, tf_count, tx_reset, lsr_mask);
+
+  // Synchronizing and sampling serial RX input
+  uart_sync_flops    i_uart_sync_flops
+  (
+    .rst_i           (wb_rst_i),
+    .clk_i           (clk),
+    .stage1_rst_i    (1'b0),
+    .stage1_clk_en_i (1'b1),
+    .async_dat_i     (srx_pad_i),
+    .sync_dat_o      (srx_pad)
+  );
+  defparam i_uart_sync_flops.width      = 1;
+  defparam i_uart_sync_flops.init_value = 1'b1;
+
+// handle loopback
+wire serial_in = loopback ? serial_out : srx_pad;
+assign stx_pad_o = loopback ? 1'b1 : serial_out;
+
+// Receiver Instance
+uart_receiver receiver(clk, wb_rst_i, lcr, rf_pop, serial_in, enable, 
+	counter_t, rf_count, rf_data_out, rf_error_bit, rf_overrun, rx_reset, lsr_mask, rstate, rf_push_pulse);
+
+
+// Asynchronous reading here because the outputs are sampled in uart_wb.v file 
+always @(dl or dlab or ier or iir or scratch
+			or lcr or lsr or msr or rf_data_out or wb_addr_i or wb_re_i)   // asynchrounous reading
+begin
+	case (wb_addr_i)
+		`UART_REG_RB   : wb_dat_o = dlab ? dl[`UART_DL1] : rf_data_out[10:3];
+		`UART_REG_IE	: wb_dat_o = dlab ? dl[`UART_DL2] : ier;
+		`UART_REG_II	: wb_dat_o = {4'b1100,iir};
+		`UART_REG_LC	: wb_dat_o = lcr;
+		`UART_REG_LS	: wb_dat_o = lsr;
+		`UART_REG_MS	: wb_dat_o = msr;
+		`UART_REG_SR	: wb_dat_o = scratch;
+		default:  wb_dat_o = 8'b0; // ??
+	endcase // case(wb_addr_i)
+end // always @ (dl or dlab or ier or iir or scratch...
+
+
+// rf_pop signal handling
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		rf_pop <= #1 0; 
+	else
+	if (rf_pop)	// restore the signal to 0 after one clock cycle
+		rf_pop <= #1 0;
+	else
+	if (wb_re_i && wb_addr_i == `UART_REG_RB && !dlab)
+		rf_pop <= #1 1; // advance read pointer
+end
+
+wire 	lsr_mask_condition;
+wire 	iir_read;
+wire  msr_read;
+wire	fifo_read;
+wire	fifo_write;
+
+assign lsr_mask_condition = (wb_re_i && wb_addr_i == `UART_REG_LS && !dlab);
+assign iir_read = (wb_re_i && wb_addr_i == `UART_REG_II && !dlab);
+assign msr_read = (wb_re_i && wb_addr_i == `UART_REG_MS && !dlab);
+assign fifo_read = (wb_re_i && wb_addr_i == `UART_REG_RB && !dlab);
+assign fifo_write = (wb_we_i && wb_addr_i == `UART_REG_TR && !dlab);
+
+// lsr_mask_d delayed signal handling
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		lsr_mask_d <= #1 0;
+	else // reset bits in the Line Status Register
+		lsr_mask_d <= #1 lsr_mask_condition;
+end
+
+// lsr_mask is rise detected
+assign lsr_mask = lsr_mask_condition && ~lsr_mask_d;
+
+// msi_reset signal handling
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		msi_reset <= #1 1;
+	else
+	if (msi_reset)
+		msi_reset <= #1 0;
+	else
+	if (msr_read)
+		msi_reset <= #1 1; // reset bits in Modem Status Register
+end
+
+
+//
+//   WRITES AND RESETS   //
+//
+// Line Control Register
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+		lcr <= #1 8'b00000011; // 8n1 setting
+	else
+	if (wb_we_i && wb_addr_i==`UART_REG_LC)
+		lcr <= #1 wb_dat_i;
+
+// Interrupt Enable Register or UART_DL2
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+	begin
+		ier <= #1 4'b0000; // no interrupts after reset
+		dl[`UART_DL2] <= #1 8'b0;
+	end
+	else
+	if (wb_we_i && wb_addr_i==`UART_REG_IE)
+		if (dlab)
+		begin
+			dl[`UART_DL2] <= #1 wb_dat_i;
+		end
+		else
+			ier <= #1 wb_dat_i[3:0]; // ier uses only 4 lsb
+
+
+// FIFO Control Register and rx_reset, tx_reset signals
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) begin
+		fcr <= #1 2'b11; 
+		rx_reset <= #1 0;
+		tx_reset <= #1 0;
+	end else
+	if (wb_we_i && wb_addr_i==`UART_REG_FC) begin
+		fcr <= #1 wb_dat_i[7:6];
+		rx_reset <= #1 wb_dat_i[1];
+		tx_reset <= #1 wb_dat_i[2];
+	end else begin
+		rx_reset <= #1 0;
+		tx_reset <= #1 0;
+	end
+
+// Modem Control Register
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+		mcr <= #1 5'b0; 
+	else
+	if (wb_we_i && wb_addr_i==`UART_REG_MC)
+			mcr <= #1 wb_dat_i[4:0];
+
+// Scratch register
+// Line Control Register
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+		scratch <= #1 0; // 8n1 setting
+	else
+	if (wb_we_i && wb_addr_i==`UART_REG_SR)
+		scratch <= #1 wb_dat_i;
+
+// TX_FIFO or UART_DL1
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+	begin
+		dl[`UART_DL1]  <= #1 8'b0;
+		tf_push   <= #1 1'b0;
+		start_dlc <= #1 1'b0;
+	end
+	else
+	if (wb_we_i && wb_addr_i==`UART_REG_TR)
+		if (dlab)
+		begin
+			dl[`UART_DL1] <= #1 wb_dat_i;
+			start_dlc <= #1 1'b1; // enable DL counter
+			tf_push <= #1 1'b0;
+		end
+		else
+		begin
+			tf_push   <= #1 1'b1;
+			start_dlc <= #1 1'b0;
+		end // else: !if(dlab)
+	else
+	begin
+		start_dlc <= #1 1'b0;
+		tf_push   <= #1 1'b0;
+	end // else: !if(dlab)
+
+// Receiver FIFO trigger level selection logic (asynchronous mux)
+always @(fcr)
+	case (fcr[`UART_FC_TL])
+		2'b00 : trigger_level = 1;
+		2'b01 : trigger_level = 4;
+		2'b10 : trigger_level = 8;
+		2'b11 : trigger_level = 14;
+	endcase // case(fcr[`UART_FC_TL])
+	
+//
+//  STATUS REGISTERS  //
+//
+
+// Modem Status Register
+reg [3:0] delayed_modem_signals;
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+	  begin
+  		msr <= #1 0;
+	  	delayed_modem_signals[3:0] <= #1 0;
+	  end
+	else begin
+		msr[`UART_MS_DDCD:`UART_MS_DCTS] <= #1 msi_reset ? 4'b0 :
+			msr[`UART_MS_DDCD:`UART_MS_DCTS] | ({dcd, ri, dsr, cts} ^ delayed_modem_signals[3:0]);
+		msr[`UART_MS_CDCD:`UART_MS_CCTS] <= #1 {dcd_c, ri_c, dsr_c, cts_c};
+		delayed_modem_signals[3:0] <= #1 {dcd, ri, dsr, cts};
+	end
+end
+
+
+// Line Status Register
+
+// activation conditions
+assign lsr0 = (rf_count==0 && rf_push_pulse);  // data in receiver fifo available set condition
+assign lsr1 = rf_overrun;     // Receiver overrun error
+assign lsr2 = rf_data_out[1]; // parity error bit
+assign lsr3 = rf_data_out[0]; // framing error bit
+assign lsr4 = rf_data_out[2]; // break error in the character
+assign lsr5 = (tf_count==5'b0 && thre_set_en);  // transmitter fifo is empty
+assign lsr6 = (tf_count==5'b0 && thre_set_en && (tstate == /*`S_IDLE */ 0)); // transmitter empty
+assign lsr7 = rf_error_bit | rf_overrun;
+
+// lsr bit0 (receiver data available)
+reg 	 lsr0_d;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr0_d <= #1 0;
+	else lsr0_d <= #1 lsr0;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr0r <= #1 0;
+	else lsr0r <= #1 (rf_count==1 && rf_pop && !rf_push_pulse || rx_reset) ? 0 : // deassert condition
+					  lsr0r || (lsr0 && ~lsr0_d); // set on rise of lsr0 and keep asserted until deasserted 
+
+// lsr bit 1 (receiver overrun)
+reg lsr1_d; // delayed
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr1_d <= #1 0;
+	else lsr1_d <= #1 lsr1;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr1r <= #1 0;
+	else	lsr1r <= #1	lsr_mask ? 0 : lsr1r || (lsr1 && ~lsr1_d); // set on rise
+
+// lsr bit 2 (parity error)
+reg lsr2_d; // delayed
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr2_d <= #1 0;
+	else lsr2_d <= #1 lsr2;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr2r <= #1 0;
+	else lsr2r <= #1 lsr_mask ? 0 : lsr2r || (lsr2 && ~lsr2_d); // set on rise
+
+// lsr bit 3 (framing error)
+reg lsr3_d; // delayed
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr3_d <= #1 0;
+	else lsr3_d <= #1 lsr3;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr3r <= #1 0;
+	else lsr3r <= #1 lsr_mask ? 0 : lsr3r || (lsr3 && ~lsr3_d); // set on rise
+
+// lsr bit 4 (break indicator)
+reg lsr4_d; // delayed
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr4_d <= #1 0;
+	else lsr4_d <= #1 lsr4;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr4r <= #1 0;
+	else lsr4r <= #1 lsr_mask ? 0 : lsr4r || (lsr4 && ~lsr4_d);
+
+// lsr bit 5 (transmitter fifo is empty)
+reg lsr5_d;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr5_d <= #1 1;
+	else lsr5_d <= #1 lsr5;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr5r <= #1 1;
+	else lsr5r <= #1 (fifo_write) ? 0 :  lsr5r || (lsr5 && ~lsr5_d);
+
+// lsr bit 6 (transmitter empty indicator)
+reg lsr6_d;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr6_d <= #1 1;
+	else lsr6_d <= #1 lsr6;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr6r <= #1 1;
+	else lsr6r <= #1 (fifo_write) ? 0 : lsr6r || (lsr6 && ~lsr6_d);
+
+// lsr bit 7 (error in fifo)
+reg lsr7_d;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr7_d <= #1 0;
+	else lsr7_d <= #1 lsr7;
+
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) lsr7r <= #1 0;
+	else lsr7r <= #1 lsr_mask ? 0 : lsr7r || (lsr7 && ~lsr7_d);
+
+// Frequency divider
+always @(posedge clk or posedge wb_rst_i) 
+begin
+	if (wb_rst_i)
+		dlc <= #1 0;
+	else
+		if (start_dlc | ~ (|dlc))
+  			dlc <= #1 dl - 1;               // preset counter
+		else
+			dlc <= #1 dlc - 1;              // decrement counter
+end
+
+// Enable signal generation logic
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		enable <= #1 1'b0;
+	else
+		if (|dl & ~(|dlc))     // dl>0 & dlc==0
+			enable <= #1 1'b1;
+		else
+			enable <= #1 1'b0;
+end
+
+// Delaying THRE status for one character cycle after a character is written to an empty fifo.
+always @(lcr)
+  case (lcr[3:0])
+    4'b0000                             : block_value =  95; // 6 bits
+    4'b0100                             : block_value = 103; // 6.5 bits
+    4'b0001, 4'b1000                    : block_value = 111; // 7 bits
+    4'b1100                             : block_value = 119; // 7.5 bits
+    4'b0010, 4'b0101, 4'b1001           : block_value = 127; // 8 bits
+    4'b0011, 4'b0110, 4'b1010, 4'b1101  : block_value = 143; // 9 bits
+    4'b0111, 4'b1011, 4'b1110           : block_value = 159; // 10 bits
+    4'b1111                             : block_value = 175; // 11 bits
+  endcase // case(lcr[3:0])
+
+// Counting time of one character minus stop bit
+always @(posedge clk or posedge wb_rst_i)
+begin
+  if (wb_rst_i)
+    block_cnt <= #1 8'd0;
+  else
+  if(lsr5r & fifo_write)  // THRE bit set & write to fifo occured
+    block_cnt <= #1 block_value;
+  else
+  if (enable & block_cnt != 8'b0)  // only work on enable times
+    block_cnt <= #1 block_cnt - 1;  // decrement break counter
+end // always of break condition detection
+
+// Generating THRE status enable signal
+assign thre_set_en = ~(|block_cnt);
+
+
+//
+//	INTERRUPT LOGIC
+//
+
+assign rls_int  = ier[`UART_IE_RLS] && (lsr[`UART_LS_OE] || lsr[`UART_LS_PE] || lsr[`UART_LS_FE] || lsr[`UART_LS_BI]);
+assign rda_int  = ier[`UART_IE_RDA] && (rf_count >= {1'b0,trigger_level});
+assign thre_int = ier[`UART_IE_THRE] && lsr[`UART_LS_TFE];
+assign ms_int   = ier[`UART_IE_MS] && (| msr[3:0]);
+assign ti_int   = ier[`UART_IE_RDA] && (counter_t == 10'b0) && (|rf_count);
+
+reg 	 rls_int_d;
+reg 	 thre_int_d;
+reg 	 ms_int_d;
+reg 	 ti_int_d;
+reg 	 rda_int_d;
+
+// delay lines
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) rls_int_d <= #1 0;
+	else rls_int_d <= #1 rls_int;
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) rda_int_d <= #1 0;
+	else rda_int_d <= #1 rda_int;
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) thre_int_d <= #1 0;
+	else thre_int_d <= #1 thre_int;
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) ms_int_d <= #1 0;
+	else ms_int_d <= #1 ms_int;
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) ti_int_d <= #1 0;
+	else ti_int_d <= #1 ti_int;
+
+// rise detection signals
+
+wire 	 rls_int_rise;
+wire 	 thre_int_rise;
+wire 	 ms_int_rise;
+wire 	 ti_int_rise;
+wire 	 rda_int_rise;
+
+assign rda_int_rise    = rda_int & ~rda_int_d;
+assign rls_int_rise 	  = rls_int & ~rls_int_d;
+assign thre_int_rise   = thre_int & ~thre_int_d;
+assign ms_int_rise 	  = ms_int & ~ms_int_d;
+assign ti_int_rise 	  = ti_int & ~ti_int_d;
+
+// interrupt pending flags
+reg 	rls_int_pnd;
+reg	rda_int_pnd;
+reg 	thre_int_pnd;
+reg 	ms_int_pnd;
+reg 	ti_int_pnd;
+
+// interrupt pending flags assignments
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) rls_int_pnd <= #1 0; 
+	else 
+		rls_int_pnd <= #1 lsr_mask ? 0 :  						// reset condition
+							rls_int_rise ? 1 :						// latch condition
+							rls_int_pnd && ier[`UART_IE_RLS];	// default operation: remove if masked
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) rda_int_pnd <= #1 0; 
+	else 
+		rda_int_pnd <= #1 ((rf_count == {1'b0,trigger_level}) && fifo_read) ? 0 :  	// reset condition
+							rda_int_rise ? 1 :						// latch condition
+							rda_int_pnd && ier[`UART_IE_RDA];	// default operation: remove if masked
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) thre_int_pnd <= #1 0; 
+	else 
+		thre_int_pnd <= #1 fifo_write || (iir_read & ~iir[`UART_II_IP] & iir[`UART_II_II] == `UART_II_THRE)? 0 : 
+							thre_int_rise ? 1 :
+							thre_int_pnd && ier[`UART_IE_THRE];
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) ms_int_pnd <= #1 0; 
+	else 
+		ms_int_pnd <= #1 msr_read ? 0 : 
+							ms_int_rise ? 1 :
+							ms_int_pnd && ier[`UART_IE_MS];
+
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) ti_int_pnd <= #1 0; 
+	else 
+		ti_int_pnd <= #1 fifo_read ? 0 : 
+							ti_int_rise ? 1 :
+							ti_int_pnd && ier[`UART_IE_RDA];
+// end of pending flags
+
+// INT_O logic
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)	
+		int_o <= #1 1'b0;
+	else
+		int_o <= #1 
+					rls_int_pnd		?	~lsr_mask					:
+					rda_int_pnd		? 1								:
+					ti_int_pnd		? ~fifo_read					:
+					thre_int_pnd	? !(fifo_write & iir_read) :
+					ms_int_pnd		? ~msr_read						:
+					0;	// if no interrupt are pending
+end
+
+
+// Interrupt Identification register
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		iir <= #1 1;
+	else
+	if (rls_int_pnd)  // interrupt is pending
+	begin
+		iir[`UART_II_II] <= #1 `UART_II_RLS;	// set identification register to correct value
+		iir[`UART_II_IP] <= #1 1'b0;		// and clear the IIR bit 0 (interrupt pending)
+	end else // the sequence of conditions determines priority of interrupt identification
+	if (rda_int)
+	begin
+		iir[`UART_II_II] <= #1 `UART_II_RDA;
+		iir[`UART_II_IP] <= #1 1'b0;
+	end
+	else if (ti_int_pnd)
+	begin
+		iir[`UART_II_II] <= #1 `UART_II_TI;
+		iir[`UART_II_IP] <= #1 1'b0;
+	end
+	else if (thre_int_pnd)
+	begin
+		iir[`UART_II_II] <= #1 `UART_II_THRE;
+		iir[`UART_II_IP] <= #1 1'b0;
+	end
+	else if (ms_int_pnd)
+	begin
+		iir[`UART_II_II] <= #1 `UART_II_MS;
+		iir[`UART_II_IP] <= #1 1'b0;
+	end else	// no interrupt is pending
+	begin
+		iir[`UART_II_II] <= #1 0;
+		iir[`UART_II_IP] <= #1 1'b1;
+	end
+end
+
+endmodule
Index: /trunk/OC-UART/uart_top.v
===================================================================
--- /trunk/OC-UART/uart_top.v	(revision 6)
+++ /trunk/OC-UART/uart_top.v	(revision 6)
@@ -0,0 +1,340 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_top.v                                                  ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core top level.                                        ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  Note that transmitter and receiver instances are inside     ////
+////  the uart_regs.v file.                                       ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing so far.                                             ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.18  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.17  2001/12/19 08:40:03  mohor
+// Warnings fixed (unused signals removed).
+//
+// Revision 1.16  2001/12/06 14:51:04  gorban
+// Bug in LSR[0] is fixed.
+// All WISHBONE signals are now sampled, so another wait-state is introduced on all transfers.
+//
+// Revision 1.15  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.14  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.13  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.12  2001/08/25 15:46:19  gorban
+// Modified port names again
+//
+// Revision 1.11  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.10  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.4  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/21 19:12:02  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.2  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:12+02  jacob
+// Initial revision
+//
+//
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_top	(
+	wb_clk_i, 
+	
+	// Wishbone signals
+	wb_rst_i, wb_adr_i, wb_dat_i, wb_dat_o, wb_we_i, wb_stb_i, wb_cyc_i, wb_ack_o, wb_sel_i,
+	int_o, // interrupt request
+
+	// UART	signals
+	// serial input/output
+	stx_pad_o, srx_pad_i,
+
+	// modem signals
+	rts_pad_o, cts_pad_i, dtr_pad_o, dsr_pad_i, ri_pad_i, dcd_pad_i
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+	, baud_o
+`endif
+	);
+
+parameter 							 uart_data_width = `UART_DATA_WIDTH;
+parameter 							 uart_addr_width = `UART_ADDR_WIDTH;
+
+input 								 wb_clk_i;
+
+// WISHBONE interface
+input 								 wb_rst_i;
+input [uart_addr_width-1:0] 	 wb_adr_i;
+input [uart_data_width-1:0] 	 wb_dat_i;
+output [uart_data_width-1:0] 	 wb_dat_o;
+input 								 wb_we_i;
+input 								 wb_stb_i;
+input 								 wb_cyc_i;
+input [3:0]							 wb_sel_i;
+output 								 wb_ack_o;
+output 								 int_o;
+
+// UART	signals
+input 								 srx_pad_i;
+output 								 stx_pad_o;
+output 								 rts_pad_o;
+input 								 cts_pad_i;
+output 								 dtr_pad_o;
+input 								 dsr_pad_i;
+input 								 ri_pad_i;
+input 								 dcd_pad_i;
+
+// optional baudrate output
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+output	baud_o;
+`endif
+
+
+wire 									 stx_pad_o;
+wire 									 rts_pad_o;
+wire 									 dtr_pad_o;
+
+wire [uart_addr_width-1:0] 	 wb_adr_i;
+wire [uart_data_width-1:0] 	 wb_dat_i;
+wire [uart_data_width-1:0] 	 wb_dat_o;
+
+wire [7:0] 							 wb_dat8_i; // 8-bit internal data input
+wire [7:0] 							 wb_dat8_o; // 8-bit internal data output
+wire [31:0] 						 wb_dat32_o; // debug interface 32-bit output
+wire [3:0] 							 wb_sel_i;  // WISHBONE select signal
+wire [uart_addr_width-1:0] 	 wb_adr_int;
+wire 									 we_o;	// Write enable for registers
+wire		          	     re_o;	// Read enable for registers
+//
+// MODULE INSTANCES
+//
+
+`ifdef DATA_BUS_WIDTH_8
+`else
+// debug interface wires
+wire	[3:0] ier;
+wire	[3:0] iir;
+wire	[1:0] fcr;
+wire	[4:0] mcr;
+wire	[7:0] lcr;
+wire	[7:0] msr;
+wire	[7:0] lsr;
+wire	[`UART_FIFO_COUNTER_W-1:0] rf_count;
+wire	[`UART_FIFO_COUNTER_W-1:0] tf_count;
+wire	[2:0] tstate;
+wire	[3:0] rstate; 
+`endif
+
+`ifdef DATA_BUS_WIDTH_8
+////  WISHBONE interface module
+uart_wb		wb_interface(
+		.clk(		wb_clk_i		),
+		.wb_rst_i(	wb_rst_i	),
+	.wb_dat_i(wb_dat_i),
+	.wb_dat_o(wb_dat_o),
+	.wb_dat8_i(wb_dat8_i),
+	.wb_dat8_o(wb_dat8_o),
+	 .wb_dat32_o(32'b0),								 
+	 .wb_sel_i(4'b0),
+		.wb_we_i(	wb_we_i		),
+		.wb_stb_i(	wb_stb_i	),
+		.wb_cyc_i(	wb_cyc_i	),
+		.wb_ack_o(	wb_ack_o	),
+	.wb_adr_i(wb_adr_i),
+	.wb_adr_int(wb_adr_int),
+		.we_o(		we_o		),
+		.re_o(re_o)
+		);
+`else
+uart_wb		wb_interface(
+		.clk(		wb_clk_i		),
+		.wb_rst_i(	wb_rst_i	),
+	.wb_dat_i(wb_dat_i),
+	.wb_dat_o(wb_dat_o),
+	.wb_dat8_i(wb_dat8_i),
+	.wb_dat8_o(wb_dat8_o),
+	 .wb_sel_i(wb_sel_i),
+	 .wb_dat32_o(wb_dat32_o),								 
+		.wb_we_i(	wb_we_i		),
+		.wb_stb_i(	wb_stb_i	),
+		.wb_cyc_i(	wb_cyc_i	),
+		.wb_ack_o(	wb_ack_o	),
+	.wb_adr_i(wb_adr_i),
+	.wb_adr_int(wb_adr_int),
+		.we_o(		we_o		),
+		.re_o(re_o)
+		);
+`endif
+
+// Registers
+uart_regs	regs(
+	.clk(		wb_clk_i		),
+	.wb_rst_i(	wb_rst_i	),
+	.wb_addr_i(	wb_adr_int	),
+	.wb_dat_i(	wb_dat8_i	),
+	.wb_dat_o(	wb_dat8_o	),
+	.wb_we_i(	we_o		),
+   .wb_re_i(re_o),
+	.modem_inputs(	{cts_pad_i, dsr_pad_i,
+	ri_pad_i,  dcd_pad_i}	),
+	.stx_pad_o(		stx_pad_o		),
+	.srx_pad_i(		srx_pad_i		),
+`ifdef DATA_BUS_WIDTH_8
+`else
+// debug interface signals	enabled
+.ier(ier), 
+.iir(iir), 
+.fcr(fcr), 
+.mcr(mcr), 
+.lcr(lcr), 
+.msr(msr), 
+.lsr(lsr), 
+.rf_count(rf_count),
+.tf_count(tf_count),
+.tstate(tstate),
+.rstate(rstate),
+`endif					  
+	.rts_pad_o(		rts_pad_o		),
+	.dtr_pad_o(		dtr_pad_o		),
+	.int_o(		int_o		)
+`ifdef UART_HAS_BAUDRATE_OUTPUT
+	, .baud_o(baud_o)
+`endif
+
+);
+
+`ifdef DATA_BUS_WIDTH_8
+`else
+uart_debug_if dbg(/*AUTOINST*/
+						// Outputs
+						.wb_dat32_o				 (wb_dat32_o[31:0]),
+						// Inputs
+						.wb_adr_i				 (wb_adr_int[`UART_ADDR_WIDTH-1:0]),
+						.ier						 (ier[3:0]),
+						.iir						 (iir[3:0]),
+						.fcr						 (fcr[1:0]),
+						.mcr						 (mcr[4:0]),
+						.lcr						 (lcr[7:0]),
+						.msr						 (msr[7:0]),
+						.lsr						 (lsr[7:0]),
+						.rf_count				 (rf_count[`UART_FIFO_COUNTER_W-1:0]),
+						.tf_count				 (tf_count[`UART_FIFO_COUNTER_W-1:0]),
+						.tstate					 (tstate[2:0]),
+						.rstate					 (rstate[3:0]));
+`endif 
+
+initial
+begin
+	`ifdef DATA_BUS_WIDTH_8
+		$display("(%m) UART INFO: Data bus width is 8. No Debug interface.\n");
+	`else
+		$display("(%m) UART INFO: Data bus width is 32. Debug Interface present.\n");
+	`endif
+	`ifdef UART_HAS_BAUDRATE_OUTPUT
+		$display("(%m) UART INFO: Has baudrate output\n");
+	`else
+		$display("(%m) UART INFO: Doesn't have baudrate output\n");
+	`endif
+end
+
+endmodule
+
+
Index: /trunk/OC-UART/raminfr.v
===================================================================
--- /trunk/OC-UART/raminfr.v	(revision 6)
+++ /trunk/OC-UART/raminfr.v	(revision 6)
@@ -0,0 +1,111 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  raminfr.v                                                   ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  Inferrable Distributed RAM for FIFOs                        ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None                .                                       ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing so far.                                             ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////                                                              ////
+////  Created:        2002/07/22                                  ////
+////  Last Updated:   2002/07/22                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.1  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+
+//Following is the Verilog code for a dual-port RAM with asynchronous read. 
+module raminfr   
+        (clk, we, a, dpra, di, dpo); 
+
+parameter addr_width = 4;
+parameter data_width = 8;
+parameter depth = 16;
+
+input clk;   
+input we;   
+input  [addr_width-1:0] a;   
+input  [addr_width-1:0] dpra;   
+input  [data_width-1:0] di;   
+//output [data_width-1:0] spo;   
+output [data_width-1:0] dpo;   
+reg    [data_width-1:0] ram [depth-1:0]; 
+
+wire [data_width-1:0] dpo;
+wire  [data_width-1:0] di;   
+wire  [addr_width-1:0] a;   
+wire  [addr_width-1:0] dpra;   
+ 
+  always @(posedge clk) begin   
+    if (we)   
+      ram[a] <= di;   
+  end   
+//  assign spo = ram[a];   
+  assign dpo = ram[dpra];   
+endmodule 
+
Index: /trunk/OC-UART/uart_debug_if.v
===================================================================
--- /trunk/OC-UART/uart_debug_if.v	(revision 6)
+++ /trunk/OC-UART/uart_debug_if.v	(revision 6)
@@ -0,0 +1,126 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_debug_if.v                                             ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core debug interface.                                  ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////                                                              ////
+////  Created:        2001/12/02                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.4  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.3  2001/12/19 08:40:03  mohor
+// Warnings fixed (unused signals removed).
+//
+// Revision 1.2  2001/12/12 22:17:30  gorban
+// some synthesis bugs fixed
+//
+// Revision 1.1  2001/12/04 21:14:16  gorban
+// committed the debug interface file
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_debug_if (/*AUTOARG*/
+// Outputs
+wb_dat32_o, 
+// Inputs
+wb_adr_i, ier, iir, fcr, mcr, lcr, msr, 
+lsr, rf_count, tf_count, tstate, rstate
+) ;
+
+input [`UART_ADDR_WIDTH-1:0] 		wb_adr_i;
+output [31:0] 							wb_dat32_o;
+input [3:0] 							ier;
+input [3:0] 							iir;
+input [1:0] 							fcr;  /// bits 7 and 6 of fcr. Other bits are ignored
+input [4:0] 							mcr;
+input [7:0] 							lcr;
+input [7:0] 							msr;
+input [7:0] 							lsr;
+input [`UART_FIFO_COUNTER_W-1:0] rf_count;
+input [`UART_FIFO_COUNTER_W-1:0] tf_count;
+input [2:0] 							tstate;
+input [3:0] 							rstate;
+
+
+wire [`UART_ADDR_WIDTH-1:0] 		wb_adr_i;
+reg [31:0] 								wb_dat32_o;
+
+always @(/*AUTOSENSE*/fcr or ier or iir or lcr or lsr or mcr or msr
+			or rf_count or rstate or tf_count or tstate or wb_adr_i)
+	case (wb_adr_i)
+		                      // 8 + 8 + 4 + 4 + 8
+		5'b01000: wb_dat32_o = {msr,lcr,iir,ier,lsr};
+		               // 5 + 2 + 5 + 4 + 5 + 3
+		5'b01100: wb_dat32_o = {8'b0, fcr,mcr, rf_count, rstate, tf_count, tstate};
+		default: wb_dat32_o = 0;
+	endcase // case(wb_adr_i)
+
+endmodule // uart_debug_if
+
Index: /trunk/OC-UART/uart_receiver.v
===================================================================
--- /trunk/OC-UART/uart_receiver.v	(revision 6)
+++ /trunk/OC-UART/uart_receiver.v	(revision 6)
@@ -0,0 +1,482 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_receiver.v                                             ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core receiver logic                                    ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None known                                                  ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Thourough testing.                                          ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.29  2002/07/29 21:16:18  gorban
+// The uart_defines.v file is included again in sources.
+//
+// Revision 1.28  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.27  2001/12/30 20:39:13  mohor
+// More than one character was stored in case of break. End of the break
+// was not detected correctly.
+//
+// Revision 1.26  2001/12/20 13:28:27  mohor
+// Missing declaration of rf_push_q fixed.
+//
+// Revision 1.25  2001/12/20 13:25:46  mohor
+// rx push changed to be only one cycle wide.
+//
+// Revision 1.24  2001/12/19 08:03:34  mohor
+// Warnings cleared.
+//
+// Revision 1.23  2001/12/19 07:33:54  mohor
+// Synplicity was having troubles with the comment.
+//
+// Revision 1.22  2001/12/17 14:46:48  mohor
+// overrun signal was moved to separate block because many sequential lsr
+// reads were preventing data from being written to rx fifo.
+// underrun signal was not used and was removed from the project.
+//
+// Revision 1.21  2001/12/13 10:31:16  mohor
+// timeout irq must be set regardless of the rda irq (rda irq does not reset the
+// timeout counter).
+//
+// Revision 1.20  2001/12/10 19:52:05  gorban
+// Igor fixed break condition bugs
+//
+// Revision 1.19  2001/12/06 14:51:04  gorban
+// Bug in LSR[0] is fixed.
+// All WISHBONE signals are now sampled, so another wait-state is introduced on all transfers.
+//
+// Revision 1.18  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.17  2001/11/28 19:36:39  gorban
+// Fixed: timeout and break didn't pay attention to current data format when counting time
+//
+// Revision 1.16  2001/11/27 22:17:09  gorban
+// Fixed bug that prevented synthesis in uart_receiver.v
+//
+// Revision 1.15  2001/11/26 21:38:54  gorban
+// Lots of fixes:
+// Break condition wasn't handled correctly at all.
+// LSR bits could lose their values.
+// LSR value after reset was wrong.
+// Timing of THRE interrupt signal corrected.
+// LSR bit 0 timing corrected.
+//
+// Revision 1.14  2001/11/10 12:43:21  gorban
+// Logic Synthesis bugs fixed. Some other minor changes
+//
+// Revision 1.13  2001/11/08 14:54:23  mohor
+// Comments in Slovene language deleted, few small fixes for better work of
+// old tools. IRQs need to be fix.
+//
+// Revision 1.12  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.11  2001/10/31 15:19:22  gorban
+// Fixes to break and timeout conditions
+//
+// Revision 1.10  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.9  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.8  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.6  2001/06/23 11:21:48  gorban
+// DL made 16-bit long. Fixed transmission/reception bugs.
+//
+// Revision 1.5  2001/06/02 14:28:14  gorban
+// Fixed receiver and transmitter. Major bug fixed.
+//
+// Revision 1.4  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/27 17:37:49  gorban
+// Fixed many bugs. Updated spec. Changed FIFO files structure. See CHANGES.txt file.
+//
+// Revision 1.2  2001/05/21 19:12:02  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.1  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:11+02  jacob
+// Initial revision
+//
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_receiver (clk, wb_rst_i, lcr, rf_pop, srx_pad_i, enable, 
+	counter_t, rf_count, rf_data_out, rf_error_bit, rf_overrun, rx_reset, lsr_mask, rstate, rf_push_pulse);
+
+input				clk;
+input				wb_rst_i;
+input	[7:0]	lcr;
+input				rf_pop;
+input				srx_pad_i;
+input				enable;
+input				rx_reset;
+input       lsr_mask;
+
+output	[9:0]			counter_t;
+output	[`UART_FIFO_COUNTER_W-1:0]	rf_count;
+output	[`UART_FIFO_REC_WIDTH-1:0]	rf_data_out;
+output				rf_overrun;
+output				rf_error_bit;
+output [3:0] 		rstate;
+output 				rf_push_pulse;
+
+reg	[3:0]	rstate;
+reg	[3:0]	rcounter16;
+reg	[2:0]	rbit_counter;
+reg	[7:0]	rshift;			// receiver shift register
+reg		rparity;		// received parity
+reg		rparity_error;
+reg		rframing_error;		// framing error flag
+reg		rbit_in;
+reg		rparity_xor;
+reg	[7:0]	counter_b;	// counts the 0 (low) signals
+reg   rf_push_q;
+
+// RX FIFO signals
+reg	[`UART_FIFO_REC_WIDTH-1:0]	rf_data_in;
+wire	[`UART_FIFO_REC_WIDTH-1:0]	rf_data_out;
+wire      rf_push_pulse;
+reg				rf_push;
+wire				rf_pop;
+wire				rf_overrun;
+wire	[`UART_FIFO_COUNTER_W-1:0]	rf_count;
+wire				rf_error_bit; // an error (parity or framing) is inside the fifo
+wire 				break_error = (counter_b == 0);
+
+// RX FIFO instance
+uart_rfifo #(`UART_FIFO_REC_WIDTH) fifo_rx(
+	.clk(		clk		), 
+	.wb_rst_i(	wb_rst_i	),
+	.data_in(	rf_data_in	),
+	.data_out(	rf_data_out	),
+	.push(		rf_push_pulse		),
+	.pop(		rf_pop		),
+	.overrun(	rf_overrun	),
+	.count(		rf_count	),
+	.error_bit(	rf_error_bit	),
+	.fifo_reset(	rx_reset	),
+	.reset_status(lsr_mask)
+);
+
+wire 		rcounter16_eq_7 = (rcounter16 == 4'd7);
+wire		rcounter16_eq_0 = (rcounter16 == 4'd0);
+wire		rcounter16_eq_1 = (rcounter16 == 4'd1);
+
+wire [3:0] rcounter16_minus_1 = rcounter16 - 1'b1;
+
+parameter  sr_idle 					= 4'd0;
+parameter  sr_rec_start 			= 4'd1;
+parameter  sr_rec_bit 				= 4'd2;
+parameter  sr_rec_parity			= 4'd3;
+parameter  sr_rec_stop 				= 4'd4;
+parameter  sr_check_parity 		= 4'd5;
+parameter  sr_rec_prepare 			= 4'd6;
+parameter  sr_end_bit				= 4'd7;
+parameter  sr_ca_lc_parity	      = 4'd8;
+parameter  sr_wait1 					= 4'd9;
+parameter  sr_push 					= 4'd10;
+
+
+always @(posedge clk or posedge wb_rst_i)
+begin
+  if (wb_rst_i)
+  begin
+     rstate 			<= #1 sr_idle;
+	  rbit_in 				<= #1 1'b0;
+	  rcounter16 			<= #1 0;
+	  rbit_counter 		<= #1 0;
+	  rparity_xor 		<= #1 1'b0;
+	  rframing_error 	<= #1 1'b0;
+	  rparity_error 		<= #1 1'b0;
+	  rparity 				<= #1 1'b0;
+	  rshift 				<= #1 0;
+	  rf_push 				<= #1 1'b0;
+	  rf_data_in 			<= #1 0;
+  end
+  else
+  if (enable)
+  begin
+	case (rstate)
+	sr_idle : begin
+			rf_push 			  <= #1 1'b0;
+			rf_data_in 	  <= #1 0;
+			rcounter16 	  <= #1 4'b1110;
+			if (srx_pad_i==1'b0 & ~break_error)   // detected a pulse (start bit?)
+			begin
+				rstate 		  <= #1 sr_rec_start;
+			end
+		end
+	sr_rec_start :	begin
+  			rf_push 			  <= #1 1'b0;
+				if (rcounter16_eq_7)    // check the pulse
+					if (srx_pad_i==1'b1)   // no start bit
+						rstate <= #1 sr_idle;
+					else            // start bit detected
+						rstate <= #1 sr_rec_prepare;
+				rcounter16 <= #1 rcounter16_minus_1;
+			end
+	sr_rec_prepare:begin
+				case (lcr[/*`UART_LC_BITS*/1:0])  // number of bits in a word
+				2'b00 : rbit_counter <= #1 3'b100;
+				2'b01 : rbit_counter <= #1 3'b101;
+				2'b10 : rbit_counter <= #1 3'b110;
+				2'b11 : rbit_counter <= #1 3'b111;
+				endcase
+				if (rcounter16_eq_0)
+				begin
+					rstate		<= #1 sr_rec_bit;
+					rcounter16	<= #1 4'b1110;
+					rshift		<= #1 0;
+				end
+				else
+					rstate <= #1 sr_rec_prepare;
+				rcounter16 <= #1 rcounter16_minus_1;
+			end
+	sr_rec_bit :	begin
+				if (rcounter16_eq_0)
+					rstate <= #1 sr_end_bit;
+				if (rcounter16_eq_7) // read the bit
+					case (lcr[/*`UART_LC_BITS*/1:0])  // number of bits in a word
+					2'b00 : rshift[4:0]  <= #1 {srx_pad_i, rshift[4:1]};
+					2'b01 : rshift[5:0]  <= #1 {srx_pad_i, rshift[5:1]};
+					2'b10 : rshift[6:0]  <= #1 {srx_pad_i, rshift[6:1]};
+					2'b11 : rshift[7:0]  <= #1 {srx_pad_i, rshift[7:1]};
+					endcase
+				rcounter16 <= #1 rcounter16_minus_1;
+			end
+	sr_end_bit :   begin
+				if (rbit_counter==3'b0) // no more bits in word
+					if (lcr[`UART_LC_PE]) // choose state based on parity
+						rstate <= #1 sr_rec_parity;
+					else
+					begin
+						rstate <= #1 sr_rec_stop;
+						rparity_error <= #1 1'b0;  // no parity - no error :)
+					end
+				else		// else we have more bits to read
+				begin
+					rstate <= #1 sr_rec_bit;
+					rbit_counter <= #1 rbit_counter - 1'b1;
+				end
+				rcounter16 <= #1 4'b1110;
+			end
+	sr_rec_parity: begin
+				if (rcounter16_eq_7)	// read the parity
+				begin
+					rparity <= #1 srx_pad_i;
+					rstate <= #1 sr_ca_lc_parity;
+				end
+				rcounter16 <= #1 rcounter16_minus_1;
+			end
+	sr_ca_lc_parity : begin    // rcounter equals 6
+				rcounter16  <= #1 rcounter16_minus_1;
+				rparity_xor <= #1 ^{rshift,rparity}; // calculate parity on all incoming data
+				rstate      <= #1 sr_check_parity;
+			  end
+	sr_check_parity: begin	  // rcounter equals 5
+				case ({lcr[`UART_LC_EP],lcr[`UART_LC_SP]})
+					2'b00: rparity_error <= #1  rparity_xor == 0;  // no error if parity 1
+					2'b01: rparity_error <= #1 ~rparity;      // parity should sticked to 1
+					2'b10: rparity_error <= #1  rparity_xor == 1;   // error if parity is odd
+					2'b11: rparity_error <= #1  rparity;	  // parity should be sticked to 0
+				endcase
+				rcounter16 <= #1 rcounter16_minus_1;
+				rstate <= #1 sr_wait1;
+			  end
+	sr_wait1 :	if (rcounter16_eq_0)
+			begin
+				rstate <= #1 sr_rec_stop;
+				rcounter16 <= #1 4'b1110;
+			end
+			else
+				rcounter16 <= #1 rcounter16_minus_1;
+	sr_rec_stop :	begin
+				if (rcounter16_eq_7)	// read the parity
+				begin
+					rframing_error <= #1 !srx_pad_i; // no framing error if input is 1 (stop bit)
+					rstate <= #1 sr_push;
+				end
+				rcounter16 <= #1 rcounter16_minus_1;
+			end
+	sr_push :	begin
+///////////////////////////////////////
+//				$display($time, ": received: %b", rf_data_in);
+        if(srx_pad_i | break_error)
+          begin
+            if(break_error)
+        		  rf_data_in 	<= #1 {8'b0, 3'b100}; // break input (empty character) to receiver FIFO
+            else
+        			rf_data_in  <= #1 {rshift, 1'b0, rparity_error, rframing_error};
+      		  rf_push 		  <= #1 1'b1;
+    				rstate        <= #1 sr_idle;
+          end
+        else if(~rframing_error)  // There's always a framing before break_error -> wait for break or srx_pad_i
+          begin
+       			rf_data_in  <= #1 {rshift, 1'b0, rparity_error, rframing_error};
+      		  rf_push 		  <= #1 1'b1;
+      			rcounter16 	  <= #1 4'b1110;
+    				rstate 		  <= #1 sr_rec_start;
+          end
+                      
+			end
+	default : rstate <= #1 sr_idle;
+	endcase
+  end  // if (enable)
+end // always of receiver
+
+always @ (posedge clk or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    rf_push_q <= 0;
+  else
+    rf_push_q <= #1 rf_push;
+end
+
+assign rf_push_pulse = rf_push & ~rf_push_q;
+
+  
+//
+// Break condition detection.
+// Works in conjuction with the receiver state machine
+
+reg 	[9:0]	toc_value; // value to be set to timeout counter
+
+always @(lcr)
+	case (lcr[3:0])
+		4'b0000										: toc_value = 447; // 7 bits
+		4'b0100										: toc_value = 479; // 7.5 bits
+		4'b0001,	4'b1000							: toc_value = 511; // 8 bits
+		4'b1100										: toc_value = 543; // 8.5 bits
+		4'b0010, 4'b0101, 4'b1001				: toc_value = 575; // 9 bits
+		4'b0011, 4'b0110, 4'b1010, 4'b1101	: toc_value = 639; // 10 bits
+		4'b0111, 4'b1011, 4'b1110				: toc_value = 703; // 11 bits
+		4'b1111										: toc_value = 767; // 12 bits
+	endcase // case(lcr[3:0])
+
+wire [7:0] 	brc_value; // value to be set to break counter
+assign 		brc_value = toc_value[9:2]; // the same as timeout but 1 insead of 4 character times
+
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		counter_b <= #1 8'd159;
+	else
+	if (srx_pad_i)
+		counter_b <= #1 brc_value; // character time length - 1
+	else
+	if(enable & counter_b != 8'b0)            // only work on enable times  break not reached.
+		counter_b <= #1 counter_b - 1;  // decrement break counter
+end // always of break condition detection
+
+///
+/// Timeout condition detection
+reg	[9:0]	counter_t;	// counts the timeout condition clocks
+
+always @(posedge clk or posedge wb_rst_i)
+begin
+	if (wb_rst_i)
+		counter_t <= #1 10'd639; // 10 bits for the default 8N1
+	else
+		if(rf_push_pulse || rf_pop || rf_count == 0) // counter is reset when RX FIFO is empty, accessed or above trigger level
+			counter_t <= #1 toc_value;
+		else
+		if (enable && counter_t != 10'b0)  // we don't want to underflow
+			counter_t <= #1 counter_t - 1;		
+end
+	
+endmodule
Index: /trunk/OC-UART/uart_rfifo.v
===================================================================
--- /trunk/OC-UART/uart_rfifo.v	(revision 6)
+++ /trunk/OC-UART/uart_rfifo.v	(revision 6)
@@ -0,0 +1,325 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_rfifo.v (Modified from uart_fifo.v)                    ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core receiver FIFO                                     ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing.                                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2002/07/22                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2003/06/11 16:37:47  gorban
+// This fixes errors in some cases when data is being read and put to the FIFO at the same time. Patch is submitted by Scott Furman. Update is very recommended.
+//
+// Revision 1.2  2002/07/29 21:16:18  gorban
+// The uart_defines.v file is included again in sources.
+//
+// Revision 1.1  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.16  2001/12/20 13:25:46  mohor
+// rx push changed to be only one cycle wide.
+//
+// Revision 1.15  2001/12/18 09:01:07  mohor
+// Bug that was entered in the last update fixed (rx state machine).
+//
+// Revision 1.14  2001/12/17 14:46:48  mohor
+// overrun signal was moved to separate block because many sequential lsr
+// reads were preventing data from being written to rx fifo.
+// underrun signal was not used and was removed from the project.
+//
+// Revision 1.13  2001/11/26 21:38:54  gorban
+// Lots of fixes:
+// Break condition wasn't handled correctly at all.
+// LSR bits could lose their values.
+// LSR value after reset was wrong.
+// Timing of THRE interrupt signal corrected.
+// LSR bit 0 timing corrected.
+//
+// Revision 1.12  2001/11/08 14:54:23  mohor
+// Comments in Slovene language deleted, few small fixes for better work of
+// old tools. IRQs need to be fix.
+//
+// Revision 1.11  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.10  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.9  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.8  2001/08/24 08:48:10  mohor
+// FIFO was not cleared after the data was read bug fixed.
+//
+// Revision 1.7  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.3  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/27 17:37:48  gorban
+// Fixed many bugs. Updated spec. Changed FIFO files structure. See CHANGES.txt file.
+//
+// Revision 1.2  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:12+02  jacob
+// Initial revision
+//
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_rfifo (clk, 
+	wb_rst_i, data_in, data_out,
+// Control signals
+	push, // push strobe, active high
+	pop,   // pop strobe, active high
+// status signals
+	overrun,
+	count,
+	error_bit,
+	fifo_reset,
+	reset_status
+	);
+
+
+// FIFO parameters
+parameter fifo_width = `UART_FIFO_WIDTH;
+parameter fifo_depth = `UART_FIFO_DEPTH;
+parameter fifo_pointer_w = `UART_FIFO_POINTER_W;
+parameter fifo_counter_w = `UART_FIFO_COUNTER_W;
+
+input				clk;
+input				wb_rst_i;
+input				push;
+input				pop;
+input	[fifo_width-1:0]	data_in;
+input				fifo_reset;
+input       reset_status;
+
+output	[fifo_width-1:0]	data_out;
+output				overrun;
+output	[fifo_counter_w-1:0]	count;
+output				error_bit;
+
+wire	[fifo_width-1:0]	data_out;
+wire [7:0] data8_out;
+// flags FIFO
+reg	[2:0]	fifo[fifo_depth-1:0];
+
+// FIFO pointers
+reg	[fifo_pointer_w-1:0]	top;
+reg	[fifo_pointer_w-1:0]	bottom;
+
+reg	[fifo_counter_w-1:0]	count;
+reg				overrun;
+
+wire [fifo_pointer_w-1:0] top_plus_1 = top + 1'b1;
+
+raminfr #(fifo_pointer_w,8,fifo_depth) rfifo  
+        (.clk(clk), 
+			.we(push), 
+			.a(top), 
+			.dpra(bottom), 
+			.di(data_in[fifo_width-1:fifo_width-8]), 
+			.dpo(data8_out)
+		); 
+
+integer i;
+
+always @(posedge clk or posedge wb_rst_i) // synchronous FIFO
+begin
+	if (wb_rst_i)
+	begin
+		top		<= #1 0;
+		bottom		<= #1 1'b0;
+		count		<= #1 0;
+		for(i=0;i<fifo_depth;i=i+1)
+		   fifo[i] <= #1 0;
+		/*fifo[1] <= #1 0;
+		fifo[2] <= #1 0;
+		fifo[3] <= #1 0;
+		fifo[4] <= #1 0;
+		fifo[5] <= #1 0;
+		fifo[6] <= #1 0;
+		fifo[7] <= #1 0;
+		fifo[8] <= #1 0;
+		fifo[9] <= #1 0;
+		fifo[10] <= #1 0;
+		fifo[11] <= #1 0;
+		fifo[12] <= #1 0;
+		fifo[13] <= #1 0;
+		fifo[14] <= #1 0;
+		fifo[15] <= #1 0;*/
+	end
+	else
+	if (fifo_reset) begin
+		top		<= #1 0;
+		bottom		<= #1 1'b0;
+		count		<= #1 0;
+		for(i=0;i<fifo_depth;i=i+1)
+		   fifo[i] <= #1 0;
+/*		fifo[0] <= #1 0;
+		fifo[1] <= #1 0;
+		fifo[2] <= #1 0;
+		fifo[3] <= #1 0;
+		fifo[4] <= #1 0;
+		fifo[5] <= #1 0;
+		fifo[6] <= #1 0;
+		fifo[7] <= #1 0;
+		fifo[8] <= #1 0;
+		fifo[9] <= #1 0;
+		fifo[10] <= #1 0;
+		fifo[11] <= #1 0;
+		fifo[12] <= #1 0;
+		fifo[13] <= #1 0;
+		fifo[14] <= #1 0;
+		fifo[15] <= #1 0;*/
+	end
+  else
+	begin
+		case ({push, pop})
+		2'b10 : if (count<fifo_depth)  // overrun condition
+			begin
+				top       <= #1 top_plus_1;
+				fifo[top] <= #1 data_in[2:0];
+				count     <= #1 count + 1'b1;
+			end
+		2'b01 : if(count>0)
+			begin
+        fifo[bottom] <= #1 0;
+				bottom   <= #1 bottom + 1'b1;
+				count	 <= #1 count - 1'b1;
+			end
+		2'b11 : begin
+				bottom   <= #1 bottom + 1'b1;
+				top       <= #1 top_plus_1;
+				fifo[top] <= #1 data_in[2:0];
+		        end
+    default: ;
+		endcase
+	end
+end   // always
+
+always @(posedge clk or posedge wb_rst_i) // synchronous FIFO
+begin
+  if (wb_rst_i)
+    overrun   <= #1 1'b0;
+  else
+  if(fifo_reset | reset_status) 
+    overrun   <= #1 1'b0;
+  else
+  if(push & ~pop & (count==fifo_depth))
+    overrun   <= #1 1'b1;
+end   // always
+
+
+// please note though that data_out is only valid one clock after pop signal
+assign data_out = {data8_out,fifo[bottom]};
+
+// Additional logic for detection of error conditions (parity and framing) inside the FIFO
+// for the Line Status Register bit 7
+
+wire	[2:0]	word0 = fifo[0];
+wire	[2:0]	word1 = fifo[1];
+wire	[2:0]	word2 = fifo[2];
+wire	[2:0]	word3 = fifo[3];
+wire	[2:0]	word4 = fifo[4];
+wire	[2:0]	word5 = fifo[5];
+wire	[2:0]	word6 = fifo[6];
+wire	[2:0]	word7 = fifo[7];
+
+wire	[2:0]	word8 = fifo[8];
+wire	[2:0]	word9 = fifo[9];
+wire	[2:0]	word10 = fifo[10];
+wire	[2:0]	word11 = fifo[11];
+wire	[2:0]	word12 = fifo[12];
+wire	[2:0]	word13 = fifo[13];
+wire	[2:0]	word14 = fifo[14];
+wire	[2:0]	word15 = 0;//fifo[15];
+
+// a 1 is returned if any of the error bits in the fifo is 1
+assign	error_bit = |(word0[2:0]  | word1[2:0]  | word2[2:0]  | word3[2:0]  |
+            		      word4[2:0]  | word5[2:0]  | word6[2:0]  | word7[2:0]  |
+            		      word8[2:0]  | word9[2:0]  | word10[2:0] | word11[2:0] |
+            		      word12[2:0] | word13[2:0] | word14[2:0] | word15[2:0] );
+								
+endmodule
Index: /trunk/OC-UART/uart_tfifo.v
===================================================================
--- /trunk/OC-UART/uart_tfifo.v	(revision 6)
+++ /trunk/OC-UART/uart_tfifo.v	(revision 6)
@@ -0,0 +1,243 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_tfifo.v                                                ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core transmitter FIFO                                  ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing.                                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2002/07/22                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.1  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.16  2001/12/20 13:25:46  mohor
+// rx push changed to be only one cycle wide.
+//
+// Revision 1.15  2001/12/18 09:01:07  mohor
+// Bug that was entered in the last update fixed (rx state machine).
+//
+// Revision 1.14  2001/12/17 14:46:48  mohor
+// overrun signal was moved to separate block because many sequential lsr
+// reads were preventing data from being written to rx fifo.
+// underrun signal was not used and was removed from the project.
+//
+// Revision 1.13  2001/11/26 21:38:54  gorban
+// Lots of fixes:
+// Break condition wasn't handled correctly at all.
+// LSR bits could lose their values.
+// LSR value after reset was wrong.
+// Timing of THRE interrupt signal corrected.
+// LSR bit 0 timing corrected.
+//
+// Revision 1.12  2001/11/08 14:54:23  mohor
+// Comments in Slovene language deleted, few small fixes for better work of
+// old tools. IRQs need to be fix.
+//
+// Revision 1.11  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.10  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.9  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.8  2001/08/24 08:48:10  mohor
+// FIFO was not cleared after the data was read bug fixed.
+//
+// Revision 1.7  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.3  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/27 17:37:48  gorban
+// Fixed many bugs. Updated spec. Changed FIFO files structure. See CHANGES.txt file.
+//
+// Revision 1.2  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:12+02  jacob
+// Initial revision
+//
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_tfifo (clk, 
+	wb_rst_i, data_in, data_out,
+// Control signals
+	push, // push strobe, active high
+	pop,   // pop strobe, active high
+// status signals
+	overrun,
+	count,
+	fifo_reset,
+	reset_status
+	);
+
+
+// FIFO parameters
+parameter fifo_width = `UART_FIFO_WIDTH;
+parameter fifo_depth = `UART_FIFO_DEPTH;
+parameter fifo_pointer_w = `UART_FIFO_POINTER_W;
+parameter fifo_counter_w = `UART_FIFO_COUNTER_W;
+
+input				clk;
+input				wb_rst_i;
+input				push;
+input				pop;
+input	[fifo_width-1:0]	data_in;
+input				fifo_reset;
+input       reset_status;
+
+output	[fifo_width-1:0]	data_out;
+output				overrun;
+output	[fifo_counter_w-1:0]	count;
+
+wire	[fifo_width-1:0]	data_out;
+
+// FIFO pointers
+reg	[fifo_pointer_w-1:0]	top;
+reg	[fifo_pointer_w-1:0]	bottom;
+
+reg	[fifo_counter_w-1:0]	count;
+reg				overrun;
+wire [fifo_pointer_w-1:0] top_plus_1 = top + 1'b1;
+
+raminfr #(fifo_pointer_w,fifo_width,fifo_depth) tfifo  
+        (.clk(clk), 
+			.we(push), 
+			.a(top), 
+			.dpra(bottom), 
+			.di(data_in), 
+			.dpo(data_out)
+		); 
+
+
+always @(posedge clk or posedge wb_rst_i) // synchronous FIFO
+begin
+	if (wb_rst_i)
+	begin
+		top		<= #1 0;
+		bottom		<= #1 1'b0;
+		count		<= #1 0;
+	end
+	else
+	if (fifo_reset) begin
+		top		<= #1 0;
+		bottom		<= #1 1'b0;
+		count		<= #1 0;
+	end
+  else
+	begin
+		case ({push, pop})
+		2'b10 : if (count<fifo_depth)  // overrun condition
+			begin
+				top       <= #1 top_plus_1;
+				count     <= #1 count + 1'b1;
+			end
+		2'b01 : if(count>0)
+			begin
+				bottom   <= #1 bottom + 1'b1;
+				count	 <= #1 count - 1'b1;
+			end
+		2'b11 : begin
+				bottom   <= #1 bottom + 1'b1;
+				top       <= #1 top_plus_1;
+		        end
+    default: ;
+		endcase
+	end
+end   // always
+
+always @(posedge clk or posedge wb_rst_i) // synchronous FIFO
+begin
+  if (wb_rst_i)
+    overrun   <= #1 1'b0;
+  else
+  if(fifo_reset | reset_status) 
+    overrun   <= #1 1'b0;
+  else
+  if(push & (count==fifo_depth))
+    overrun   <= #1 1'b1;
+end   // always
+
+endmodule
Index: /trunk/OC-UART/uart_wb.v
===================================================================
--- /trunk/OC-UART/uart_wb.v	(revision 6)
+++ /trunk/OC-UART/uart_wb.v	(revision 6)
@@ -0,0 +1,317 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_wb.v                                                   ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core WISHBONE interface.                               ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  Inserts one wait state on all transfers.                    ////
+////  Note affected signals and the way they are affected.        ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing.                                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.16  2002/07/29 21:16:18  gorban
+// The uart_defines.v file is included again in sources.
+//
+// Revision 1.15  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.12  2001/12/19 08:03:34  mohor
+// Warnings cleared.
+//
+// Revision 1.11  2001/12/06 14:51:04  gorban
+// Bug in LSR[0] is fixed.
+// All WISHBONE signals are now sampled, so another wait-state is introduced on all transfers.
+//
+// Revision 1.10  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.9  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.8  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.7  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.4  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/21 19:12:01  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.2  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:13+02  jacob
+// Initial revision
+//
+//
+
+// UART core WISHBONE interface 
+//
+// Author: Jacob Gorban   (jacob.gorban@flextronicssemi.com)
+// Company: Flextronics Semiconductor
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+`include "uart_defines.v"
+ 
+module uart_wb (clk, wb_rst_i, 
+	wb_we_i, wb_stb_i, wb_cyc_i, wb_ack_o, wb_adr_i,
+	wb_adr_int, wb_dat_i, wb_dat_o, wb_dat8_i, wb_dat8_o, wb_dat32_o, wb_sel_i,
+	we_o, re_o // Write and read enable output for the core
+);
+
+input 		  clk;
+
+// WISHBONE interface	
+input 		  wb_rst_i;
+input 		  wb_we_i;
+input 		  wb_stb_i;
+input 		  wb_cyc_i;
+input [3:0]   wb_sel_i;
+input [`UART_ADDR_WIDTH-1:0] 	wb_adr_i; //WISHBONE address line
+
+`ifdef DATA_BUS_WIDTH_8
+input [7:0]  wb_dat_i; //input WISHBONE bus 
+output [7:0] wb_dat_o;
+reg [7:0] 	 wb_dat_o;
+wire [7:0] 	 wb_dat_i;
+reg [7:0] 	 wb_dat_is;
+`else // for 32 data bus mode
+input [31:0]  wb_dat_i; //input WISHBONE bus 
+output [31:0] wb_dat_o;
+reg [31:0] 	  wb_dat_o;
+wire [31:0]   wb_dat_i;
+reg [31:0] 	  wb_dat_is;
+`endif // !`ifdef DATA_BUS_WIDTH_8
+
+output [`UART_ADDR_WIDTH-1:0]	wb_adr_int; // internal signal for address bus
+input [7:0]   wb_dat8_o; // internal 8 bit output to be put into wb_dat_o
+output [7:0]  wb_dat8_i;
+input [31:0]  wb_dat32_o; // 32 bit data output (for debug interface)
+output 		  wb_ack_o;
+output 		  we_o;
+output 		  re_o;
+
+wire 			  we_o;
+reg 			  wb_ack_o;
+reg [7:0] 	  wb_dat8_i;
+wire [7:0] 	  wb_dat8_o;
+wire [`UART_ADDR_WIDTH-1:0]	wb_adr_int; // internal signal for address bus
+reg [`UART_ADDR_WIDTH-1:0]	wb_adr_is;
+reg 								wb_we_is;
+reg 								wb_cyc_is;
+reg 								wb_stb_is;
+reg [3:0] 						wb_sel_is;
+wire [3:0]   wb_sel_i;
+reg 			 wre ;// timing control signal for write or read enable
+
+// wb_ack_o FSM
+reg [1:0] 	 wbstate;
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) begin
+		wb_ack_o <= #1 1'b0;
+		wbstate <= #1 0;
+		wre <= #1 1'b1;
+	end else
+		case (wbstate)
+			0: begin
+				if (wb_stb_is & wb_cyc_is) begin
+					wre <= #1 0;
+					wbstate <= #1 1;
+					wb_ack_o <= #1 1;
+				end else begin
+					wre <= #1 1;
+					wb_ack_o <= #1 0;
+				end
+			end
+			1: begin
+			   wb_ack_o <= #1 0;
+				wbstate <= #1 2;
+				wre <= #1 0;
+			end
+			2,3: begin
+				wb_ack_o <= #1 0;
+				wbstate <= #1 0;
+				wre <= #1 0;
+			end
+		endcase
+
+assign we_o =  wb_we_is & wb_stb_is & wb_cyc_is & wre ; //WE for registers	
+assign re_o = ~wb_we_is & wb_stb_is & wb_cyc_is & wre ; //RE for registers	
+
+// Sample input signals
+always  @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i) begin
+		wb_adr_is <= #1 0;
+		wb_we_is <= #1 0;
+		wb_cyc_is <= #1 0;
+		wb_stb_is <= #1 0;
+		wb_dat_is <= #1 0;
+		wb_sel_is <= #1 0;
+	end else begin
+		wb_adr_is <= #1 wb_adr_i;
+		wb_we_is <= #1 wb_we_i;
+		wb_cyc_is <= #1 wb_cyc_i;
+		wb_stb_is <= #1 wb_stb_i;
+		wb_dat_is <= #1 wb_dat_i;
+		wb_sel_is <= #1 wb_sel_i;
+	end
+
+`ifdef DATA_BUS_WIDTH_8 // 8-bit data bus
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+		wb_dat_o <= #1 0;
+	else
+		wb_dat_o <= #1 wb_dat8_o;
+
+always @(wb_dat_is)
+	wb_dat8_i = wb_dat_is;
+
+assign wb_adr_int = wb_adr_is;
+
+`else // 32-bit bus
+// put output to the correct byte in 32 bits using select line
+always @(posedge clk or posedge wb_rst_i)
+	if (wb_rst_i)
+		wb_dat_o <= #1 0;
+	else if (re_o)
+		case (wb_sel_is)
+			4'b0001: wb_dat_o <= #1 {24'b0, wb_dat8_o};
+			4'b0010: wb_dat_o <= #1 {16'b0, wb_dat8_o, 8'b0};
+			4'b0100: wb_dat_o <= #1 {8'b0, wb_dat8_o, 16'b0};
+			4'b1000: wb_dat_o <= #1 {wb_dat8_o, 24'b0};
+			4'b1111: wb_dat_o <= #1 wb_dat32_o; // debug interface output
+ 			default: wb_dat_o <= #1 0;
+		endcase // case(wb_sel_i)
+
+reg [1:0] wb_adr_int_lsb;
+
+always @(wb_sel_is or wb_dat_is)
+begin
+	case (wb_sel_is)
+		4'b0001 : wb_dat8_i = wb_dat_is[7:0];
+		4'b0010 : wb_dat8_i = wb_dat_is[15:8];
+		4'b0100 : wb_dat8_i = wb_dat_is[23:16];
+		4'b1000 : wb_dat8_i = wb_dat_is[31:24];
+		default : wb_dat8_i = wb_dat_is[7:0];
+	endcase // case(wb_sel_i)
+
+  `ifdef LITLE_ENDIAN
+	case (wb_sel_is)
+		4'b0001 : wb_adr_int_lsb = 2'h0;
+		4'b0010 : wb_adr_int_lsb = 2'h1;
+		4'b0100 : wb_adr_int_lsb = 2'h2;
+		4'b1000 : wb_adr_int_lsb = 2'h3;
+		default : wb_adr_int_lsb = 2'h0;
+	endcase // case(wb_sel_i)
+  `else
+	case (wb_sel_is)
+		4'b0001 : wb_adr_int_lsb = 2'h3;
+		4'b0010 : wb_adr_int_lsb = 2'h2;
+		4'b0100 : wb_adr_int_lsb = 2'h1;
+		4'b1000 : wb_adr_int_lsb = 2'h0;
+		default : wb_adr_int_lsb = 2'h0;
+	endcase // case(wb_sel_i)
+  `endif
+end
+
+assign wb_adr_int = {wb_adr_is[`UART_ADDR_WIDTH-1:2], wb_adr_int_lsb};
+
+`endif // !`ifdef DATA_BUS_WIDTH_8
+
+endmodule
+
+
+
+
+
+
+
+
+
+
Index: /trunk/OC-UART/timescale.v
===================================================================
--- /trunk/OC-UART/timescale.v	(revision 6)
+++ /trunk/OC-UART/timescale.v	(revision 6)
@@ -0,0 +1,64 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  timescale.v                                                 ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  Defines of the Core                                         ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None                                                        ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing.                                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+// Timescale define
+
+`timescale 1ns/10ps
Index: /trunk/OC-UART/uart_transmitter.v
===================================================================
--- /trunk/OC-UART/uart_transmitter.v	(revision 6)
+++ /trunk/OC-UART/uart_transmitter.v	(revision 6)
@@ -0,0 +1,351 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_transmitter.v                                          ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core transmitter logic                                 ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None known                                                  ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Thourough testing.                                          ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.18  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.16  2002/01/08 11:29:40  mohor
+// tf_pop was too wide. Now it is only 1 clk cycle width.
+//
+// Revision 1.15  2001/12/17 14:46:48  mohor
+// overrun signal was moved to separate block because many sequential lsr
+// reads were preventing data from being written to rx fifo.
+// underrun signal was not used and was removed from the project.
+//
+// Revision 1.14  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.13  2001/11/08 14:54:23  mohor
+// Comments in Slovene language deleted, few small fixes for better work of
+// old tools. IRQs need to be fix.
+//
+// Revision 1.12  2001/11/07 17:51:52  gorban
+// Heavily rewritten interrupt and LSR subsystems.
+// Many bugs hopefully squashed.
+//
+// Revision 1.11  2001/10/29 17:00:46  gorban
+// fixed parity sending and tx_fifo resets over- and underrun
+//
+// Revision 1.10  2001/10/20 09:58:40  gorban
+// Small synopsis fixes
+//
+// Revision 1.9  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.8  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.6  2001/06/23 11:21:48  gorban
+// DL made 16-bit long. Fixed transmission/reception bugs.
+//
+// Revision 1.5  2001/06/02 14:28:14  gorban
+// Fixed receiver and transmitter. Major bug fixed.
+//
+// Revision 1.4  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.3  2001/05/27 17:37:49  gorban
+// Fixed many bugs. Updated spec. Changed FIFO files structure. See CHANGES.txt file.
+//
+// Revision 1.2  2001/05/21 19:12:02  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.1  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:12+02  jacob
+// Initial revision
+//
+//
+
+// synopsys translate_off
+`include "timescale.v"
+// synopsys translate_on
+
+`include "uart_defines.v"
+
+module uart_transmitter (clk, wb_rst_i, lcr, tf_push, wb_dat_i, enable,	stx_pad_o, tstate, tf_count, tx_reset, lsr_mask);
+
+input 										clk;
+input 										wb_rst_i;
+input [7:0] 								lcr;
+input 										tf_push;
+input [7:0] 								wb_dat_i;
+input 										enable;
+input 										tx_reset;
+input 										lsr_mask; //reset of fifo
+output 										stx_pad_o;
+output [2:0] 								tstate;
+output [`UART_FIFO_COUNTER_W-1:0] 	tf_count;
+
+reg [2:0] 									tstate;
+reg [4:0] 									counter;
+reg [2:0] 									bit_counter;   // counts the bits to be sent
+reg [6:0] 									shift_out;	// output shift register
+reg 											stx_o_tmp;
+reg 											parity_xor;  // parity of the word
+reg 											tf_pop;
+reg 											bit_out;
+
+// TX FIFO instance
+//
+// Transmitter FIFO signals
+wire [`UART_FIFO_WIDTH-1:0] 			tf_data_in;
+wire [`UART_FIFO_WIDTH-1:0] 			tf_data_out;
+wire 											tf_push;
+wire 											tf_overrun;
+wire [`UART_FIFO_COUNTER_W-1:0] 		tf_count;
+
+assign 										tf_data_in = wb_dat_i;
+
+uart_tfifo fifo_tx(	// error bit signal is not used in transmitter FIFO
+	.clk(		clk		), 
+	.wb_rst_i(	wb_rst_i	),
+	.data_in(	tf_data_in	),
+	.data_out(	tf_data_out	),
+	.push(		tf_push		),
+	.pop(		tf_pop		),
+	.overrun(	tf_overrun	),
+	.count(		tf_count	),
+	.fifo_reset(	tx_reset	),
+	.reset_status(lsr_mask)
+);
+
+// TRANSMITTER FINAL STATE MACHINE
+
+parameter s_idle        = 3'd0;
+parameter s_send_start  = 3'd1;
+parameter s_send_byte   = 3'd2;
+parameter s_send_parity = 3'd3;
+parameter s_send_stop   = 3'd4;
+parameter s_pop_byte    = 3'd5;
+
+always @(posedge clk or posedge wb_rst_i)
+begin
+  if (wb_rst_i)
+  begin
+	tstate       <= #1 s_idle;
+	stx_o_tmp       <= #1 1'b1;
+	counter   <= #1 5'b0;
+	shift_out   <= #1 7'b0;
+	bit_out     <= #1 1'b0;
+	parity_xor  <= #1 1'b0;
+	tf_pop      <= #1 1'b0;
+	bit_counter <= #1 3'b0;
+  end
+  else
+  if (enable)
+  begin
+	case (tstate)
+	s_idle	 :	if (~|tf_count) // if tf_count==0
+			begin
+				tstate <= #1 s_idle;
+				stx_o_tmp <= #1 1'b1;
+			end
+			else
+			begin
+				tf_pop <= #1 1'b0;
+				stx_o_tmp  <= #1 1'b1;
+				tstate  <= #1 s_pop_byte;
+			end
+	s_pop_byte :	begin
+				tf_pop <= #1 1'b1;
+				case (lcr[/*`UART_LC_BITS*/1:0])  // number of bits in a word
+				2'b00 : begin
+					bit_counter <= #1 3'b100;
+					parity_xor  <= #1 ^tf_data_out[4:0];
+				     end
+				2'b01 : begin
+					bit_counter <= #1 3'b101;
+					parity_xor  <= #1 ^tf_data_out[5:0];
+				     end
+				2'b10 : begin
+					bit_counter <= #1 3'b110;
+					parity_xor  <= #1 ^tf_data_out[6:0];
+				     end
+				2'b11 : begin
+					bit_counter <= #1 3'b111;
+					parity_xor  <= #1 ^tf_data_out[7:0];
+				     end
+				endcase
+				{shift_out[6:0], bit_out} <= #1 tf_data_out;
+				tstate <= #1 s_send_start;
+			end
+	s_send_start :	begin
+				tf_pop <= #1 1'b0;
+				if (~|counter)
+					counter <= #1 5'b01111;
+				else
+				if (counter == 5'b00001)
+				begin
+					counter <= #1 0;
+					tstate <= #1 s_send_byte;
+				end
+				else
+					counter <= #1 counter - 1'b1;
+				stx_o_tmp <= #1 1'b0;
+			end
+	s_send_byte :	begin
+				if (~|counter)
+					counter <= #1 5'b01111;
+				else
+				if (counter == 5'b00001)
+				begin
+					if (bit_counter > 3'b0)
+					begin
+						bit_counter <= #1 bit_counter - 1'b1;
+						{shift_out[5:0],bit_out  } <= #1 {shift_out[6:1], shift_out[0]};
+						tstate <= #1 s_send_byte;
+					end
+					else   // end of byte
+					if (~lcr[`UART_LC_PE])
+					begin
+						tstate <= #1 s_send_stop;
+					end
+					else
+					begin
+						case ({lcr[`UART_LC_EP],lcr[`UART_LC_SP]})
+						2'b00:	bit_out <= #1 ~parity_xor;
+						2'b01:	bit_out <= #1 1'b1;
+						2'b10:	bit_out <= #1 parity_xor;
+						2'b11:	bit_out <= #1 1'b0;
+						endcase
+						tstate <= #1 s_send_parity;
+					end
+					counter <= #1 0;
+				end
+				else
+					counter <= #1 counter - 1'b1;
+				stx_o_tmp <= #1 bit_out; // set output pin
+			end
+	s_send_parity :	begin
+				if (~|counter)
+					counter <= #1 5'b01111;
+				else
+				if (counter == 5'b00001)
+				begin
+					counter <= #1 4'b0;
+					tstate <= #1 s_send_stop;
+				end
+				else
+					counter <= #1 counter - 1'b1;
+				stx_o_tmp <= #1 bit_out;
+			end
+	s_send_stop :  begin
+				if (~|counter)
+				  begin
+						casex ({lcr[`UART_LC_SB],lcr[`UART_LC_BITS]})
+  						3'b0xx:	  counter <= #1 5'b01101;     // 1 stop bit ok igor
+  						3'b100:	  counter <= #1 5'b10101;     // 1.5 stop bit
+  						default:	  counter <= #1 5'b11101;     // 2 stop bits
+						endcase
+					end
+				else
+				if (counter == 5'b00001)
+				begin
+					counter <= #1 0;
+					tstate <= #1 s_idle;
+				end
+				else
+					counter <= #1 counter - 1'b1;
+				stx_o_tmp <= #1 1'b1;
+			end
+
+		default : // should never get here
+			tstate <= #1 s_idle;
+	endcase
+  end // end if enable
+  else
+    tf_pop <= #1 1'b0;  // tf_pop must be 1 cycle width
+end // transmitter logic
+
+assign stx_pad_o = lcr[`UART_LC_BC] ? 1'b0 : stx_o_tmp;    // Break condition
+	
+endmodule
Index: /trunk/OC-UART/uart_defines.v
===================================================================
--- /trunk/OC-UART/uart_defines.v	(revision 6)
+++ /trunk/OC-UART/uart_defines.v	(revision 6)
@@ -0,0 +1,248 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_defines.v                                              ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  Defines of the Core                                         ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None                                                        ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Nothing.                                                    ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - gorban@opencores.org                                  ////
+////      - Jacob Gorban                                          ////
+////      - Igor Mohor (igorm@opencores.org)                      ////
+////                                                              ////
+////  Created:        2001/05/12                                  ////
+////  Last Updated:   2001/05/17                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.13  2003/06/11 16:37:47  gorban
+// This fixes errors in some cases when data is being read and put to the FIFO at the same time. Patch is submitted by Scott Furman. Update is very recommended.
+//
+// Revision 1.12  2002/07/22 23:02:23  gorban
+// Bug Fixes:
+//  * Possible loss of sync and bad reception of stop bit on slow baud rates fixed.
+//   Problem reported by Kenny.Tung.
+//  * Bad (or lack of ) loopback handling fixed. Reported by Cherry Withers.
+//
+// Improvements:
+//  * Made FIFO's as general inferrable memory where possible.
+//  So on FPGA they should be inferred as RAM (Distributed RAM on Xilinx).
+//  This saves about 1/3 of the Slice count and reduces P&R and synthesis times.
+//
+//  * Added optional baudrate output (baud_o).
+//  This is identical to BAUDOUT* signal on 16550 chip.
+//  It outputs 16xbit_clock_rate - the divided clock.
+//  It's disabled by default. Define UART_HAS_BAUDRATE_OUTPUT to use.
+//
+// Revision 1.10  2001/12/11 08:55:40  mohor
+// Scratch register define added.
+//
+// Revision 1.9  2001/12/03 21:44:29  gorban
+// Updated specification documentation.
+// Added full 32-bit data bus interface, now as default.
+// Address is 5-bit wide in 32-bit data bus mode.
+// Added wb_sel_i input to the core. It's used in the 32-bit mode.
+// Added debug interface with two 32-bit read-only registers in 32-bit mode.
+// Bits 5 and 6 of LSR are now only cleared on TX FIFO write.
+// My small test bench is modified to work with 32-bit mode.
+//
+// Revision 1.8  2001/11/26 21:38:54  gorban
+// Lots of fixes:
+// Break condition wasn't handled correctly at all.
+// LSR bits could lose their values.
+// LSR value after reset was wrong.
+// Timing of THRE interrupt signal corrected.
+// LSR bit 0 timing corrected.
+//
+// Revision 1.7  2001/08/24 21:01:12  mohor
+// Things connected to parity changed.
+// Clock devider changed.
+//
+// Revision 1.6  2001/08/23 16:05:05  mohor
+// Stop bit bug fixed.
+// Parity bug fixed.
+// WISHBONE read cycle bug fixed,
+// OE indicator (Overrun Error) bug fixed.
+// PE indicator (Parity Error) bug fixed.
+// Register read bug fixed.
+//
+// Revision 1.5  2001/05/31 20:08:01  gorban
+// FIFO changes and other corrections.
+//
+// Revision 1.4  2001/05/21 19:12:02  gorban
+// Corrected some Linter messages.
+//
+// Revision 1.3  2001/05/17 18:34:18  gorban
+// First 'stable' release. Should be sythesizable now. Also added new header.
+//
+// Revision 1.0  2001-05-17 21:27:11+02  jacob
+// Initial revision
+//
+//
+
+// remove comments to restore to use the new version with 8 data bit interface
+// in 32bit-bus mode, the wb_sel_i signal is used to put data in correct place
+// also, in 8-bit version there'll be no debugging features included
+// CAUTION: doesn't work with current version of OR1200
+//`define DATA_BUS_WIDTH_8
+
+`ifdef DATA_BUS_WIDTH_8
+ `define UART_ADDR_WIDTH 3
+ `define UART_DATA_WIDTH 8
+`else
+ `define UART_ADDR_WIDTH 5
+ `define UART_DATA_WIDTH 32
+`endif
+
+// Uncomment this if you want your UART to have
+// 16xBaudrate output port.
+// If defined, the enable signal will be used to drive baudrate_o signal
+// It's frequency is 16xbaudrate
+
+`define UART_HAS_BAUDRATE_OUTPUT
+
+// Register addresses
+`define UART_REG_RB	`UART_ADDR_WIDTH'd0	// receiver buffer
+`define UART_REG_TR  `UART_ADDR_WIDTH'd0	// transmitter
+`define UART_REG_IE	`UART_ADDR_WIDTH'd1	// Interrupt enable
+`define UART_REG_II  `UART_ADDR_WIDTH'd2	// Interrupt identification
+`define UART_REG_FC  `UART_ADDR_WIDTH'd2	// FIFO control
+`define UART_REG_LC	`UART_ADDR_WIDTH'd3	// Line Control
+`define UART_REG_MC	`UART_ADDR_WIDTH'd4	// Modem control
+`define UART_REG_LS  `UART_ADDR_WIDTH'd5	// Line status
+`define UART_REG_MS  `UART_ADDR_WIDTH'd6	// Modem status
+`define UART_REG_SR  `UART_ADDR_WIDTH'd7	// Scratch register
+`define UART_REG_DL1	`UART_ADDR_WIDTH'd0	// Divisor latch bytes (1-2)
+`define UART_REG_DL2	`UART_ADDR_WIDTH'd1
+
+// Interrupt Enable register bits
+`define UART_IE_RDA	0	// Received Data available interrupt
+`define UART_IE_THRE	1	// Transmitter Holding Register empty interrupt
+`define UART_IE_RLS	2	// Receiver Line Status Interrupt
+`define UART_IE_MS	3	// Modem Status Interrupt
+
+// Interrupt Identification register bits
+`define UART_II_IP	0	// Interrupt pending when 0
+`define UART_II_II	3:1	// Interrupt identification
+
+// Interrupt identification values for bits 3:1
+`define UART_II_RLS	3'b011	// Receiver Line Status
+`define UART_II_RDA	3'b010	// Receiver Data available
+`define UART_II_TI	3'b110	// Timeout Indication
+`define UART_II_THRE	3'b001	// Transmitter Holding Register empty
+`define UART_II_MS	3'b000	// Modem Status
+
+// FIFO Control Register bits
+`define UART_FC_TL	1:0	// Trigger level
+
+// FIFO trigger level values
+`define UART_FC_1		2'b00
+`define UART_FC_4		2'b01
+`define UART_FC_8		2'b10
+`define UART_FC_14	2'b11
+
+// Line Control register bits
+`define UART_LC_BITS	1:0	// bits in character
+`define UART_LC_SB	2	// stop bits
+`define UART_LC_PE	3	// parity enable
+`define UART_LC_EP	4	// even parity
+`define UART_LC_SP	5	// stick parity
+`define UART_LC_BC	6	// Break control
+`define UART_LC_DL	7	// Divisor Latch access bit
+
+// Modem Control register bits
+`define UART_MC_DTR	0
+`define UART_MC_RTS	1
+`define UART_MC_OUT1	2
+`define UART_MC_OUT2	3
+`define UART_MC_LB	4	// Loopback mode
+
+// Line Status Register bits
+`define UART_LS_DR	0	// Data ready
+`define UART_LS_OE	1	// Overrun Error
+`define UART_LS_PE	2	// Parity Error
+`define UART_LS_FE	3	// Framing Error
+`define UART_LS_BI	4	// Break interrupt
+`define UART_LS_TFE	5	// Transmit FIFO is empty
+`define UART_LS_TE	6	// Transmitter Empty indicator
+`define UART_LS_EI	7	// Error indicator
+
+// Modem Status Register bits
+`define UART_MS_DCTS	0	// Delta signals
+`define UART_MS_DDSR	1
+`define UART_MS_TERI	2
+`define UART_MS_DDCD	3
+`define UART_MS_CCTS	4	// Complement signals
+`define UART_MS_CDSR	5
+`define UART_MS_CRI	6
+`define UART_MS_CDCD	7
+
+// FIFO parameter defines
+
+`define UART_FIFO_WIDTH	8
+`define UART_FIFO_DEPTH	15
+`define UART_FIFO_POINTER_W	4
+`define UART_FIFO_COUNTER_W	5
+// receiver fifo has width 11 because it has break, parity and framing error bits
+`define UART_FIFO_REC_WIDTH  11
+
+
+`define VERBOSE_WB  0           // All activity on the WISHBONE is recorded
+`define VERBOSE_LINE_STATUS 0   // Details about the lsr (line status register)
+`define FAST_TEST   1           // 64/1024 packets are sent
+
+`define LITLE_ENDIAN
+
+
+
+
+
+
Index: /trunk/OC-UART/uart_sync_flops.v
===================================================================
--- /trunk/OC-UART/uart_sync_flops.v	(revision 6)
+++ /trunk/OC-UART/uart_sync_flops.v	(revision 6)
@@ -0,0 +1,122 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  uart_sync_flops.v                                             ////
+////                                                              ////
+////                                                              ////
+////  This file is part of the "UART 16550 compatible" project    ////
+////  http://www.opencores.org/cores/uart16550/                   ////
+////                                                              ////
+////  Documentation related to this project:                      ////
+////  - http://www.opencores.org/cores/uart16550/                 ////
+////                                                              ////
+////  Projects compatibility:                                     ////
+////  - WISHBONE                                                  ////
+////  RS232 Protocol                                              ////
+////  16550D uart (mostly supported)                              ////
+////                                                              ////
+////  Overview (main Features):                                   ////
+////  UART core receiver logic                                    ////
+////                                                              ////
+////  Known problems (limits):                                    ////
+////  None known                                                  ////
+////                                                              ////
+////  To Do:                                                      ////
+////  Thourough testing.                                          ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Andrej Erzen (andreje@flextronics.si)                 ////
+////      - Tadej Markovic (tadejm@flextronics.si)                ////
+////                                                              ////
+////  Created:        2004/05/20                                  ////
+////  Last Updated:   2004/05/20                                  ////
+////                  (See log for the revision history)          ////
+////                                                              ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2000, 2001 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+//
+
+
+`include "timescale.v"
+
+
+module uart_sync_flops
+(
+  // internal signals
+  rst_i,
+  clk_i,
+  stage1_rst_i,
+  stage1_clk_en_i,
+  async_dat_i,
+  sync_dat_o
+);
+
+parameter Tp            = 1;
+parameter width         = 1;
+parameter init_value    = 1'b0;
+
+input                           rst_i;                  // reset input
+input                           clk_i;                  // clock input
+input                           stage1_rst_i;           // synchronous reset for stage 1 FF
+input                           stage1_clk_en_i;        // synchronous clock enable for stage 1 FF
+input   [width-1:0]             async_dat_i;            // asynchronous data input
+output  [width-1:0]             sync_dat_o;             // synchronous data output
+
+
+//
+// Interal signal declarations
+//
+
+reg     [width-1:0]             sync_dat_o;
+reg     [width-1:0]             flop_0;
+
+
+// first stage
+always @ (posedge clk_i or posedge rst_i)
+begin
+    if (rst_i)
+        flop_0 <= #Tp {width{init_value}};
+    else
+        flop_0 <= #Tp async_dat_i;    
+end
+
+// second stage
+always @ (posedge clk_i or posedge rst_i)
+begin
+    if (rst_i)
+        sync_dat_o <= #Tp {width{init_value}};
+    else if (stage1_rst_i)
+        sync_dat_o <= #Tp {width{init_value}};
+    else if (stage1_clk_en_i)
+        sync_dat_o <= #Tp flop_0;       
+end
+
+endmodule
Index: /trunk/T1-common/include/xst_defines.h
===================================================================
--- /trunk/T1-common/include/xst_defines.h	(revision 6)
+++ /trunk/T1-common/include/xst_defines.h	(revision 6)
@@ -0,0 +1,4 @@
+`define FPGA_SYN
+`define FPGA_SYN_1THREAD
+`define FPGA_SYN_NO_SPU
+`define FPGA_SYN_8TLB
Index: /trunk/T1-common/include/sys_paths.h
===================================================================
--- /trunk/T1-common/include/sys_paths.h	(revision 6)
+++ /trunk/T1-common/include/sys_paths.h	(revision 6)
@@ -0,0 +1,50 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: sys_paths.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+// -*- verilog -*-
+////////////////////////////////////////////////////////////////////////
+/*
+//
+// Description:		Global header file that contain definitions that 
+//                      are common/shared at the systme level
+*/
+////////////////////////////////////////////////////////////////////////
+
+// Here we add defines for all the blocks referenced in monitors.
+// The absolute path would vary based on the verif model being built.
+
+// Global CIOP defines
+`define      RBC           sys_top
+`define      RBC_CLK      `RBC.jbus_gclk
+`define      CPU_CLK      `RBC.cmp_gclk
+`define      CHIP_RST_L   `RBC.jbus_grst_l
+`define      CHIP_RST     ~`RBC.jbus_grst_l
+
+// BSC defines
+`define      BSC          `RBC.iop.bsc
+`define      BSC_CTL      `BSC.bsc_ctl
+`define      BSC_SFS      `BSC_CTL.bsc_sfs
+`define      BSC_IOP      `BSC.bsc_iop
+`define      BSC_EEPU     `BSC_IOP.eepu_ioq
+
+// Environment defines
+`define     FAIL_FLAG     sys_top.fail_flag
Index: /trunk/T1-common/include/sys.h
===================================================================
--- /trunk/T1-common/include/sys.h	(revision 6)
+++ /trunk/T1-common/include/sys.h	(revision 6)
@@ -0,0 +1,278 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: sys.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+// -*- verilog -*-
+////////////////////////////////////////////////////////////////////////
+/*
+//
+// Description:		Global header file that contain definitions that 
+//                      are common/shared at the systme level
+*/
+////////////////////////////////////////////////////////////////////////
+//
+// Setting the time scale
+// If the timescale changes, JP_TIMESCALE may also have to change.
+`timescale	1ps/1ps
+
+//
+// JBUS clock
+// =========
+//
+`define SYSCLK_PERIOD   5000
+
+
+// Afara Link Defines
+// ==================
+
+// Reliable Link
+`define AL_RB_CNT       16
+`define AL_RB_IDX        4
+`define AL_RB_WINDOW    `AL_RB_IDX'd8
+
+// Afara Link Objects
+`define AL_OBJ_SZ      112
+
+// Afara Link Object Format - Reliable Link
+`define AL_RL_HI       111
+`define AL_RL_LO       103
+`define AL_RL_SZ         9
+
+`define AL_ESN_HI      111
+`define AL_ESN_LO      108
+`define AL_SSN_HI      107
+`define AL_SSN_LO      104
+`define AL_ED          103
+
+// Afara Link Object Format - Congestion
+`define AL_CNG_HI      102
+`define AL_CNG_LO       94
+`define AL_CNG_SZ        9
+  
+`define AL_REQ_CNG     102
+`define AL_BSCT_HI     101
+`define AL_BSCT_LO      96
+`define AL_EGR_P_CNG    95
+`define AL_MARK         94
+
+
+// Afara Link Object Format - Acknowledge
+`define AL_ACK_SZ       21
+`define AL_A_COS        93
+`define AL_A_TYP_HI     92
+`define AL_A_TYP_LO     91
+`define AL_A_NACK       90
+`define AL_A_TAG_HI     89
+`define AL_A_TAG_LO     80
+`define AL_A_PORT_HI    79
+`define AL_A_PORT_LO    73
+
+
+// Afara Link Object Format - Request
+`define AL_REQ_SZ       73
+`define AL_R_COS        72
+`define AL_R_TYP_HI     71
+`define AL_R_TYP_LO     70
+`define AL_R_SCR_HI     69
+`define AL_R_SCR_LO     67
+`define AL_R_TCR_HI     66
+`define AL_R_TCR_LO     64
+`define AL_R_TAG_HI     63
+`define AL_R_TAG_LO     54
+`define AL_R_PORT_HI    53
+`define AL_R_PORT_LO    47
+`define AL_R_LEN_HI     46
+`define AL_R_LEN_LO     40
+`define AL_R_ADD_HI     39
+`define AL_R_ADD_LO      0
+
+// Afara Link Object Format - Message
+`define AL_M_MQID_HI     2
+`define AL_M_MQID_LO     0
+
+// Acknowledge Types
+`define AL_ACK_NONE   2'b00
+`define AL_ACK_NPAY   2'b01
+`define AL_ACK_WPAY   2'b10
+
+// Request Types
+`define AL_REQ_NONE   2'b00
+`define AL_REQ_NPAY   2'b01
+`define AL_REQ_WPAY   2'b10
+`define AL_REQ_MSG    2'b11
+
+// Afara Link Frame
+`define AL_FRAME_SZ    144
+
+
+//
+// UCB Packet Type
+// ===============
+//
+`define UCB_READ_NACK        4'b0000    // ack/nack types
+`define UCB_READ_ACK         4'b0001
+`define UCB_WRITE_ACK        4'b0010
+`define UCB_IFILL_ACK        4'b0011
+`define UCB_IFILL_NACK       4'b0111
+
+`define UCB_READ_REQ         4'b0100    // req types
+`define UCB_WRITE_REQ        4'b0101
+`define UCB_IFILL_REQ        4'b0110
+
+`define UCB_INT              4'b1000    // plain interrupt
+`define UCB_INT_VEC          4'b1100    // interrupt with vector
+`define UCB_RESET_VEC        4'b1101    // reset with vector
+`define UCB_IDLE_VEC         4'b1110    // idle with vector
+`define UCB_RESUME_VEC       4'b1111    // resume with vector
+
+
+//
+// UCB Data Packet Format
+// ======================
+//
+`define UCB_NOPAY_PKT_WIDTH   64        // packet without payload
+`define UCB_64PAY_PKT_WIDTH  128        // packet with 64 bit payload
+`define UCB_128PAY_PKT_WIDTH 192        // packet with 128 bit payload
+
+`define UCB_DATA_EXT_HI      191        // (64) extended data
+`define UCB_DATA_EXT_LO      128
+`define UCB_DATA_HI          127        // (64) data
+`define UCB_DATA_LO           64
+`define UCB_RSV_HI            63        // (9) reserved bits
+`define UCB_RSV_LO            55
+`define UCB_ADDR_HI           54        // (40) bit address
+`define UCB_ADDR_LO           15
+`define UCB_SIZE_HI           14        // (3) request size
+`define UCB_SIZE_LO           12
+`define UCB_BUF_HI            11        // (2) buffer ID
+`define UCB_BUF_LO            10
+`define UCB_THR_HI             9        // (6) cpu/thread ID
+`define UCB_THR_LO             4
+`define UCB_PKT_HI             3        // (4) packet type
+`define UCB_PKT_LO             0
+
+`define UCB_DATA_EXT_WIDTH    64
+`define UCB_DATA_WIDTH        64
+`define UCB_RSV_WIDTH          9
+`define UCB_ADDR_WIDTH        40 
+`define UCB_SIZE_WIDTH         3
+`define UCB_BUFID_WIDTH        2
+`define UCB_THR_WIDTH          6
+`define UCB_PKT_WIDTH          4
+
+// Size encoding for the UCB_SIZE_HI/LO field
+// 000 - byte
+// 001 - half-word
+// 010 - word
+// 011 - double-word
+// 111 - quad-word
+`define UCB_SIZE_1B          3'b000
+`define UCB_SIZE_2B          3'b001
+`define UCB_SIZE_4B          3'b010
+`define UCB_SIZE_8B          3'b011
+`define UCB_SIZE_16B         3'b111
+
+
+//
+// UCB Interrupt Packet Format
+// ===========================
+//
+`define UCB_INT_PKT_WIDTH     64
+
+`define UCB_INT_RSV_HI        63        // (7) reserved bits
+`define UCB_INT_RSV_LO        57
+`define UCB_INT_VEC_HI        56        // (6) interrupt vector
+`define UCB_INT_VEC_LO        51
+`define UCB_INT_STAT_HI       50        // (32) interrupt status
+`define UCB_INT_STAT_LO       19
+`define UCB_INT_DEV_HI        18        // (9) device ID
+`define UCB_INT_DEV_LO        10
+//`define UCB_THR_HI             9      // (6) cpu/thread ID shared with
+//`define UCB_THR_LO             4             data packet format
+//`define UCB_PKT_HI             3      // (4) packet type shared with
+//`define UCB_PKT_LO             0      //     data packet format
+
+`define UCB_INT_RSV_WIDTH      7
+`define UCB_INT_VEC_WIDTH      6
+`define UCB_INT_STAT_WIDTH    32
+`define UCB_INT_DEV_WIDTH      9
+
+
+//
+// FCRAM Bus Widths
+// ================
+//
+`define FCRAM_DQ_WIDTH                16
+`define FCRAM_DQS_WIDTH                2
+`define FCRAM_ADDR_WIDTH              15
+`define FCRAM_BA_WIDTH                 2
+
+
+//
+// ENET clock periods
+// ==================
+//
+`define AXGRMII_CLK_PERIOD          6400 // 312.5MHz/2
+`define ENET_GMAC_CLK_PERIOD        8000 // 125MHz
+
+
+//
+// JBus Bridge defines
+// =================
+//
+`define      SYS_UPA_CLK        `SYS.upa_clk
+`define      SYS_J_CLK          `SYS.j_clk
+`define      SYS_P_CLK          `SYS.p_clk
+`define      SYS_G_CLK          `SYS.g_clk
+`define      JP_TIMESCALE       `timescale 1 ps / 1 ps
+`define      PCI_CLK_PERIOD     15152                  //  66 MHz
+`define      UPA_RD_CLK_PERIOD  6666                   // 150 MHz
+`define      UPA_REF_CLK_PERIOD 7576                   // 132 MHz
+`define      ICHIP_CLK_PERIOD   30304                  //  33 MHz
+
+
+//
+// PCI Device Address Configuration
+// ================================
+//
+`define PRIM_SLAVE1_MEM0_L      64'h0000000000000000
+`define PRIM_SLAVE1_MEM0_H      64'h000000003fff0000
+`define PRIM_SLAVE1_IO0_L       64'h0000000000000000
+`define PRIM_SLAVE1_IO0_H       64'h00000000002f0000
+`define PRIM_SLAVE1_JBUS_BASE   64'h000007ff00000000
+
+`define PRIM_SLAVE2_MEM0_L      64'h0000000040000000
+`define PRIM_SLAVE2_MEM0_H      64'h00000000Dfffffff
+`define PRIM_SLAVE2_IO0_L       64'h0000000000300000
+`define PRIM_SLAVE2_IO0_H       64'h00000000005fffff
+`define PRIM_SLAVE2_JBUS_BASE   64'h000007ff40000000
+
+`define PCIB_SLAVE1_MEM0_L      64'h0000000000000000
+`define PCIB_SLAVE1_MEM0_H      64'h000000003fff0000
+`define PCIB_SLAVE1_IO0_L       64'h0000000000000000
+`define PCIB_SLAVE1_IO0_H       64'h00000000002fffff
+`define PCIB_SLAVE1_JBUS_BASE   64'h000007f780000000
+
+`define PCIB_SLAVE2_MEM0_L      64'h0000000040000000
+`define PCIB_SLAVE2_MEM0_H      64'h000000007fffffff
+`define PCIB_SLAVE2_IO0_L       64'h0000000000300000
+`define PCIB_SLAVE2_IO0_H       64'h00000000007fffff
+`define PCIB_SLAVE2_JBUS_BASE   64'h000007f7c0000000
Index: /trunk/T1-common/include/ifu.h
===================================================================
--- /trunk/T1-common/include/ifu.h	(revision 6)
+++ /trunk/T1-common/include/ifu.h	(revision 6)
@@ -0,0 +1,180 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: ifu.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+////////////////////////////////////////////////////////////////////////
+/*
+//
+//  Module Name: ifu.h
+//  Description:	
+//  All ifu defines
+*/
+
+//--------------------------------------------
+// Icache Values in IFU::ICD/ICV/ICT/FDP/IFQDP
+//--------------------------------------------
+// Set Values
+`define IC_SZ      16384
+// IC_IDX_HI = log(icache_size/4ways) - 1
+`define IC_IDX_HI  11
+
+// !!IMPORTANT!! a change to IC_LINE_SZ will mean a change to the code as
+//   well.  Unfortunately this has not been properly parametrized.
+//   Changing the IC_LINE_SZ param alone is *not* enough.
+`define IC_LINE_SZ  32
+
+// !!IMPORTANT!! a change to IC_TAG_HI will mean a change to the code as
+//   well.  Changing the IC_TAG_HI param alone is *not* enough to
+//   change the PA range. 
+// highest bit of PA
+`define IC_TAG_HI    39
+
+
+// Derived Values
+// 4095
+`define IC_ARR_HI (`IC_SZ/4 - 1)
+
+// number of entries - 1 = 511
+`define IC_ENTRY_HI  ((`IC_SZ/`IC_LINE_SZ) - 1)
+
+// 12
+`define IC_TAG_LO    (`IC_IDX_HI + 1)
+
+// 28
+`define IC_TAG_SZ    (`IC_TAG_HI - `IC_IDX_HI)
+
+// 7
+`define IC_IDX_SZ  (`IC_IDX_HI - 4)
+
+// tags for all 4 ways + parity
+// 116
+`define IC_TAG_ALL   ((`IC_TAG_SZ * 4) + 4)
+
+// 115
+`define IC_TAG_ALL_HI   ((`IC_TAG_SZ * 4) + 3)
+
+
+//----------------------------------------------------------------------
+// For thread scheduler in IFU::DTU::SWL
+//----------------------------------------------------------------------
+// thread states:  (thr_state[4:0])
+`define THRFSM_DEAD     5'b00000
+`define THRFSM_IDLE     5'b00000
+`define THRFSM_HALT     5'b00010
+`define THRFSM_RDY      5'b11001
+`define THRFSM_SPEC_RDY 5'b10011
+`define THRFSM_RUN      5'b00101
+`define THRFSM_SPEC_RUN 5'b00111
+`define THRFSM_WAIT     5'b00001
+
+// thread configuration register bit fields
+`define TCR_READY   4
+`define TCR_URDY    3
+`define TCR_RUNNING 2
+`define TCR_SPEC    1
+`define TCR_ACTIVE  0
+
+
+//----------------------------------------------------------------------
+// For MIL fsm in IFU::IFQ
+//----------------------------------------------------------------------
+`define MILFSM_NULL   4'b0000
+`define MILFSM_WAIT   4'b1000
+`define MILFSM_REQ    4'b1100
+`define MILFSM_FILL0  4'b1001
+`define MILFSM_FILL1  4'b1011
+
+`define MIL_V  3
+`define MIL_R  2
+`define MIL_A  1
+`define MIL_F  0
+
+//---------------------------------------------------
+// Interrupt Block
+//---------------------------------------------------
+`define INT_VEC_HI  5
+`define INT_VEC_LO  0
+`define INT_THR_HI  12
+`define INT_THR_LO  8
+`define INT_TYPE_HI 17
+`define INT_TYPE_LO 16
+
+//-------------------------------------
+// IFQ
+//-------------------------------------
+// valid bit plus ifill
+`define CPX_IFILLPKT {1'b1, `IFILL_RET}
+`define CPX_INVPKT {1'b1, `INV_RET}
+`define CPX_STRPKT {1'b1, `ST_ACK}
+`define CPX_STRMACK {1'b1, `STRST_ACK}
+`define CPX_EVPKT {1'b1, `EVICT_REQ}
+`define CPX_LDPKT {1'b1, `LOAD_RET}
+`define CPX_ERRPKT {1'b1, `ERR_RET}
+`define CPX_FREQPKT {1'b1, `FWD_RQ_RET}
+
+`define CPX_REQFIELD `CPX_RQ_HI:`CPX_RQ_LO
+`define CPX_THRFIELD `CPX_TH_HI:`CPX_TH_LO
+`define CPX_RQ_SIZE  (`CPX_RQ_HI - `CPX_RQ_LO + 1)
+
+//`ifdef SPARC_L2_64B
+`define BANK_ID_HI 7
+`define BANK_ID_LO 6
+//`else
+//`define BANK_ID_HI 8
+//`define BANK_ID_LO 7
+//`endif
+
+//`define CPX_INV_PA_HI  116
+//`define CPX_INV_PA_LO  112
+
+`define IFU_ASI_VA_HI   17
+`define IFU_ASI_DATA_HI 47
+
+`define ICT_FILL_BITS  (32 - `IC_TAG_SZ)
+`define ICV_IDX_SZ  (`IC_IDX_HI - 5)
+
+//----------------------------------------
+// IFU Traps
+//----------------------------------------
+// precise
+`define INST_ACC_EXC    9'h008
+`define INST_ACC_ERR    9'h00a
+`define CORR_ECC_ERR    9'h063
+`define DATA_ACC_ERR    9'h032
+`define DATA_ERR        9'h078
+`define ASYN_DATA_ERR   9'h040
+`define INST_ACC_MMU_MS 9'h009
+`define FAST_MMU_MS     9'h064
+`define PRIV_OPC        9'h011
+`define ILL_INST        9'h010
+`define SIR             9'h004
+`define FP_DISABLED     9'h020
+`define REAL_TRANS_MS   9'h03e
+`define INST_BRK_PT     9'h076
+
+// disrupting
+`define SPU_MAINT        9'h074
+`define SPU_ENCINT       9'h070
+`define HSTICK_CMP       9'h05e
+`define RESUMABLE_ERR    9'h07e
+
+
+
Index: /trunk/T1-common/include/lsu.h
===================================================================
--- /trunk/T1-common/include/lsu.h	(revision 6)
+++ /trunk/T1-common/include/lsu.h	(revision 6)
@@ -0,0 +1,302 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: lsu.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+`define STB_PCX_WIDTH   115
+`define STB_PCX_VLD     114
+`define STB_PCX_RQ_HI   113
+`define STB_PCX_RQ_LO   111
+`define STB_PCX_NC      110
+`define STB_PCX_TH_HI   109
+`define STB_PCX_TH_LO   108
+`define STB_PCX_FLSH   	107
+//`define STB_PCX_WY_HI   107
+//`define STB_PCX_WY_LO   106
+`define STB_PCX_SZ_HI   105
+`define STB_PCX_SZ_LO   104
+`define STB_PCX_AD_HI   103
+`define STB_PCX_AD_LO   64
+`define STB_PCX_DA_HI   63
+`define STB_PCX_DA_LO   0     
+`define LMQ_WIDTH       65
+`define LMQ_VLD 	64
+`define LMQ_DFLUSH 	63
+`define LMQ_PREF 	62
+`define LMQ_FPLD 	61
+`define LMQ_SIGNEXT 	60
+`define LMQ_BIGEND 	59
+`define LMQ_RD1_HI      58
+`define LMQ_RD1_LO      54
+`define LMQ_RD2_VLD 	53
+`define LMQ_RD2_HI      52
+`define LMQ_RD2_LO      51
+`define LMQ_RQ_HI       47
+`define LMQ_RQ_LO       45
+`define LMQ_NC  	44
+`define LMQ_WY_HI       43
+`define LMQ_WY_LO       42
+`define LMQ_SZ_HI       41
+`define LMQ_SZ_LO       40
+`define LMQ_AD_HI       39
+`define LMQ_AD_LO       0
+`define DATA_PA_HI      32
+`define DATA_PA_LO      6
+`define	STB_DFQ_WIDTH	83
+`define	STB_DFQ_VLD	82
+`define	STB_DFQ_ATM	81
+`define	STB_DFQ_WY_HI	80
+`define	STB_DFQ_WY_LO	79
+`define STB_DFQ_BF_ID_HI 78
+`define STB_DFQ_BF_ID_LO 76
+`define	STB_DFQ_SZ_HI	75
+`define	STB_DFQ_SZ_LO	74
+`define	STB_DFQ_AD_HI	73
+`define STB_DFQ_AD_LO	64
+`define	STB_DFQ_DA_HI	63
+`define	STB_DFQ_DA_LO	0
+
+`define DFQ_WIDTH	151
+`define DFQ_TH_HI	150
+`define DFQ_TH_LO	149
+`define DFQ_ST_CMPLT    148
+`define DFQ_LD_TYPE	147
+`define DFQ_INV_TYPE	146
+`define DFQ_WY_HI	145
+`define DFQ_WY_LO	144
+`define DFQ_WY1_HI	143
+`define DFQ_WY1_LO	142
+`define DFQ_WY2_HI	141
+`define DFQ_WY2_LO	140
+`define DFQ_WY3_HI	139
+`define DFQ_WY3_LO	138
+`define DFQ_SI_HI	137
+`define DFQ_SI_LO	132
+`define DFQ_SI_DCD_HI	131
+`define DFQ_SI_DCD_LO	128
+`define DFQ_DA_HI	127
+`define DFQ_DA_LO	0
+
+`define DCFILL_WIDTH	183
+`define DCFILL_TH_HI	182
+`define DCFILL_TH_LO	181
+`define DCFILL_ST	180
+`define DCFILL_ST	180
+`define DCFILL_LD	179
+`define DCFILL_INV	178
+`define DCFILL_DC_WR	177
+`define DCFILL_RD_HI	176
+`define DCFILL_RD_LO	172
+`define DCFILL_WY_HI	171
+`define DCFILL_WY_LO	170
+`define DCFILL_SZ_HI	169
+`define DCFILL_SZ_LO	168
+`define DCFILL_AD_HI	167
+`define DCFILL_AD_LO	128
+`define DCFILL_DA_HI	127
+`define DCFILL_DA_LO 	0
+
+// TLB Tag and Data Format
+	`define       STLB_TAG_PID_HI         58
+	`define       STLB_TAG_PID_LO         56
+	`define       STLB_TAG_R              55
+	`define       STLB_TAG_PARITY         54
+	`define       STLB_TAG_VA_47_28_HI    53
+	`define       STLB_TAG_VA_47_28_LO    34
+	`define       STLB_TAG_VA_27_22_HI    33
+	`define       STLB_TAG_VA_27_22_LO    28
+	`define       STLB_TAG_VA_27_22_V     27
+	`define       STLB_TAG_V              26
+	`define       STLB_TAG_L              25
+	`define       STLB_TAG_U              24
+	`define       STLB_TAG_VA_21_16_HI    23
+	`define       STLB_TAG_VA_21_16_LO    18
+	`define       STLB_TAG_VA_21_16_V     17
+	`define       STLB_TAG_VA_15_13_HI    16
+	`define       STLB_TAG_VA_15_13_LO    14
+	`define       STLB_TAG_VA_15_13_V     13
+	`define       STLB_TAG_CTXT_12_0_HI   12
+	`define       STLB_TAG_CTXT_12_0_LO   0
+
+	`define       STLB_DATA_PARITY        42
+	`define       STLB_DATA_PA_39_28_HI   41 
+	`define       STLB_DATA_PA_39_28_LO   30
+	`define       STLB_DATA_PA_27_22_HI   29
+	`define       STLB_DATA_PA_27_22_LO   24
+	`define       STLB_DATA_27_22_SEL     23
+	`define       STLB_DATA_PA_21_16_HI   22
+	`define       STLB_DATA_PA_21_16_LO   17
+	`define       STLB_DATA_21_16_SEL     16
+	`define       STLB_DATA_PA_15_13_HI   15
+	`define       STLB_DATA_PA_15_13_LO   13
+	`define       STLB_DATA_15_13_SEL     12
+	`define       STLB_DATA_V             11
+	`define       STLB_DATA_NFO           10
+	`define       STLB_DATA_IE            9
+	`define       STLB_DATA_L             8
+	`define       STLB_DATA_CP            7
+	`define       STLB_DATA_CV            6
+	`define       STLB_DATA_E             5
+	`define       STLB_DATA_P             4
+	`define       STLB_DATA_W             3
+	`define       STLB_DATA_SPARE_HI      2
+	`define       STLB_DATA_SPARE_LO      0
+
+	`define CAM_VA_47_28_HI         40
+	`define CAM_VA_47_28_LO         21
+	`define CAM_VA_47_28_V          20
+	`define CAM_VA_27_22_HI         19
+	`define CAM_VA_27_22_LO         14
+	`define CAM_VA_27_22_V          13
+	`define CAM_VA_21_16_HI         12
+	`define CAM_VA_21_16_LO         7
+	`define CAM_VA_21_16_V          6
+	`define CAM_VA_15_13_HI         5
+	`define CAM_VA_15_13_LO         3
+	`define CAM_VA_15_13_V          2
+	`define CAM_CTXT_GK             1
+	`define CAM_REAL_V              0
+
+
+// I-TLB version - lsu_tlb only.
+
+`define TLB_TAG_G	52
+`define TLB_TAG_CTXT_HI	51
+`define TLB_TAG_CTXT_LO	39
+`define TLB_TAG_VA_HI	38
+`define TLB_TAG_VA_LO	4
+`define	TLB_TAG_L	3
+`define	TLB_TAG_VA_21_19_V  2
+`define	TLB_TAG_VA_18_16_V  1
+`define	TLB_TAG_VA_15_13_V  0
+`define TLB_DATA_PARITY 37 
+`define TLB_DATA_SZ_HI	36
+`define TLB_DATA_SZ_LO	35
+`define TLB_DATA_NFO  	34
+`define TLB_DATA_IE   	33
+`define TLB_DATA_PA_HI 	32	
+`define TLB_DATA_PA_LO 	6
+`define TLB_DATA_CP 	5 
+`define TLB_DATA_CV 	4 
+`define TLB_DATA_E  	3 
+`define TLB_DATA_P  	2 
+`define TLB_DATA_W  	1 
+`define TLB_DATA_G  	0 
+
+// Invalidate Format
+//addr<5:4>=00
+`define CPX_A00_C0_LO	0
+`define CPX_A00_C0_HI	3
+`define CPX_A00_C1_LO	4
+`define CPX_A00_C1_HI	7
+`define CPX_A00_C2_LO	8
+`define CPX_A00_C2_HI	11
+`define CPX_A00_C3_LO	12
+`define CPX_A00_C3_HI	15
+`define CPX_A00_C4_LO	16
+`define CPX_A00_C4_HI	19
+`define CPX_A00_C5_LO	20
+`define CPX_A00_C5_HI	23
+`define CPX_A00_C6_LO	24
+`define CPX_A00_C6_HI	27
+`define CPX_A00_C7_LO	28
+`define CPX_A00_C7_HI	31
+
+//addr<5:4>=01
+`define CPX_A01_C0_LO	32
+`define CPX_A01_C0_HI	34
+`define CPX_A01_C1_LO	35
+`define CPX_A01_C1_HI	37
+`define CPX_A01_C2_LO	38
+`define CPX_A01_C2_HI	40
+`define CPX_A01_C3_LO	41
+`define CPX_A01_C3_HI	43
+`define CPX_A01_C4_LO	44
+`define CPX_A01_C4_HI	46
+`define CPX_A01_C5_LO	47
+`define CPX_A01_C5_HI	49
+`define CPX_A01_C6_LO	50
+`define CPX_A01_C6_HI	52
+`define CPX_A01_C7_LO	53
+`define CPX_A01_C7_HI	55
+
+//addr<5:4>=10
+`define CPX_A10_C0_LO	56
+`define CPX_A10_C0_HI	59
+`define CPX_A10_C1_LO	60
+`define CPX_A10_C1_HI	63
+`define CPX_A10_C2_LO	64
+`define CPX_A10_C2_HI	67
+`define CPX_A10_C3_LO	68
+`define CPX_A10_C3_HI	71
+`define CPX_A10_C4_LO	72
+`define CPX_A10_C4_HI	75
+`define CPX_A10_C5_LO	76
+`define CPX_A10_C5_HI	79
+`define CPX_A10_C6_LO	80
+`define CPX_A10_C6_HI	83
+`define CPX_A10_C7_LO	84
+`define CPX_A10_C7_HI	87
+
+//addr<5:4>=11
+`define CPX_A11_C0_LO	88
+`define CPX_A11_C0_HI	90
+`define CPX_A11_C1_LO	91
+`define CPX_A11_C1_HI	93
+`define CPX_A11_C2_LO	94
+`define CPX_A11_C2_HI	96
+`define CPX_A11_C3_LO	97
+`define CPX_A11_C3_HI	99
+`define CPX_A11_C4_LO	100
+`define CPX_A11_C4_HI	102
+`define CPX_A11_C5_LO	103
+`define CPX_A11_C5_HI	105
+`define CPX_A11_C6_LO	106
+`define CPX_A11_C6_HI	108
+`define CPX_A11_C7_LO	109
+`define CPX_A11_C7_HI	111
+
+// cpuid - 4b
+`define CPX_INV_CID_LO 118
+`define CPX_INV_CID_HI 120
+
+// CPUany, addr<5:4>=00,10
+`define CPX_AX0_INV_DVLD 0
+`define CPX_AX0_INV_IVLD 1
+`define CPX_AX0_INV_WY_LO 2
+`define CPX_AX0_INV_WY_HI 3
+
+// CPUany, addr<5:4>=01,11
+`define CPX_AX1_INV_DVLD 0
+`define CPX_AX1_INV_WY_LO 1
+`define CPX_AX1_INV_WY_HI 2
+
+// CPUany, addr<5:4>=01,11
+`define CPX_AX1_INV_DVLD 0
+`define CPX_AX1_INV_WY_LO 1
+`define CPX_AX1_INV_WY_HI 2
+
+// DTAG parity error Invalidate
+`define CPX_PERR_DINV 123	// dcache inv
+`define CPX_PERR_DINV_AD5 122	// addr bit 5
+`define CPX_PERR_DINV_AD4 121	// addr bit 4
+
+// CPX BINIT STORE
+`define CPX_BINIT_STACK 125	// dcache inv
Index: /trunk/T1-common/include/tlu.h
===================================================================
--- /trunk/T1-common/include/tlu.h	(revision 6)
+++ /trunk/T1-common/include/tlu.h	(revision 6)
@@ -0,0 +1,399 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: tlu.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+// ifu trap types
+`define INST_ACC_EXC    9'h008
+`define INST_ACC_MMU_MS 9'h009
+`define INST_ACC_ERR    9'h00a
+`define ILL_INST        9'h010
+`define PRIV_OPC        9'h011
+`define FP_DISABLED     9'h020
+`define DATA_ACC_EXC    9'h030
+
+`define         MRA_TSB_PS0_HI      155
+`define         MRA_TSB_PS0_LO      108
+`define         MRA_TSB_PS1_HI      107
+`define         MRA_TSB_PS1_LO      60
+`define         MRA_TACCESS_HI  59
+`define         MRA_TACCESS_LO  12
+`define         MRA_CTXTCFG_HI    11
+`define         MRA_CTXTCFG_LO    6
+//
+// modified for hypervisor support
+//
+`define TLU_THRD_NUM     4
+`define	TLU_TT_LO	     0
+`define	TLU_TT_HI	     8
+`define	TLU_CWP_LO	     9
+`define	TLU_CWP_HI	    11
+`define	TLU_PSTATE_LO	12
+`define	TLU_PSTATE_HI	19
+`define	TLU_ASI_LO	    20
+`define	TLU_ASI_HI	    27
+`define	TLU_CCR_LO	    28
+`define	TLU_CCR_HI	    35
+`define	TLU_GL_LO	    36 
+`define	TLU_GL_HI	    37 
+`define	TLU_NPC_LO	    38
+`define	TLU_NPC_HI	    84
+`define	TLU_PC_LO	    85
+`define	TLU_PC_HI	   131
+`define	TLU_HTSTATE_LO 132
+`define	TLU_HTSTATE_HI 135
+`define	TLU_RD_NPC_HI	83
+`define	TLU_RD_PC_LO	84
+`define	TLU_RD_PC_HI   129
+`define	TLU_RD_HTSTATE_LO 130
+`define	TLU_RD_HTSTATE_HI 133
+//
+`define	TSA_PSTATE_VRANGE1_LO 12
+`define	TSA_PSTATE_VRANGE1_HI 15
+// modified due to bug 2588
+// `define	TSA_PSTATE_VRANGE2_LO 16 
+`define	TSA_PSTATE_VRANGE2_LO 18 
+`define	TSA_PSTATE_VRANGE2_HI 19
+//
+`define	TLU_TSA_WIDTH     136 
+`define	TLU_TDP_TSA_WIDTH 134 
+`define	TSA_HTSTATE_WIDTH   4 
+`define	TSA_GLOBAL_WIDTH    2 
+`define	TSA_CCR_WIDTH       8 
+`define	TSA_ASI_WIDTH       8 
+`define	TSA_PSTATE_WIDTH    8 
+`define	TSA_CWP_WIDTH       3 
+`define	TSA_TTYPE_WIDTH     9 
+`define	TLU_GLOBAL_WIDTH    4 
+`define	TLU_HPSTATE_WIDTH   5 
+//
+// added due to Niagara SRAMs methodology
+// The following defines have been replaced due
+// the memory macro replacement from:
+// bw_r_rf32x144 -> 2x bw_r_rf32x80
+/*
+`define	TSA_MEM_WIDTH     144 
+`define	TSA_HTSTATE_HI    142 //  3 bits 
+`define	TSA_HTSTATE_LO    140 
+`define	TSA_TPC_HI        138 // 47 bits 
+`define	TSA_TPC_LO         92
+`define	TSA_TNPC_HI        90 // 47 bits
+`define	TSA_TNPC_LO        44 
+`define	TSA_TSTATE_HI      40 // 29 bits 
+`define	TSA_TSTATE_LO      12 
+`define	TSA_TTYPE_HI        8 //  9 bits
+`define	TSA_TTYPE_LO        0
+`define	TSA_MEM_CWP_LO	   12
+`define	TSA_MEM_CWP_HI	   14
+`define	TSA_MEM_PSTATE_LO  15
+`define	TSA_MEM_PSTATE_HI  22
+`define	TSA_MEM_ASI_LO	   23
+`define	TSA_MEM_ASI_HI	   30
+`define	TSA_MEM_CCR_LO	   31
+`define	TSA_MEM_CCR_HI	   38
+`define	TSA_MEM_GL_LO	   39 
+`define	TSA_MEM_GL_HI	   40 
+*/
+`define	TSA_MEM_WIDTH   80 
+`define	TSA1_HTSTATE_HI 63 //  4 bits 
+`define	TSA1_HTSTATE_LO 60 
+`define	TSA1_TNPC_HI    58 // 47 bits
+`define	TSA1_TNPC_LO    12
+`define	TSA1_TTYPE_HI    8 //  9 bits
+`define	TSA1_TTYPE_LO    0
+`define	TSA0_TPC_HI     78 // 47 bits 
+`define	TSA0_TPC_LO     32
+`define	TSA0_TSTATE_HI  28 // 29 bits 
+`define	TSA0_TSTATE_LO   0 
+//
+`define	TSA0_MEM_CWP_LO	    0 
+`define	TSA0_MEM_CWP_HI	    2
+`define	TSA0_MEM_PSTATE_LO  3 
+`define	TSA0_MEM_PSTATE_HI 10
+`define	TSA0_MEM_ASI_LO	   11
+`define	TSA0_MEM_ASI_HI	   18
+`define	TSA0_MEM_CCR_LO	   19
+`define	TSA0_MEM_CCR_HI	   26
+`define	TSA0_MEM_GL_LO	   27
+`define	TSA0_MEM_GL_HI	   28
+
+// HPSTATE position definitions within wsr
+`define	WSR_HPSTATE_ENB  11 
+`define	WSR_HPSTATE_IBE  10 
+`define	WSR_HPSTATE_RED   5 
+`define	WSR_HPSTATE_PRIV  2 
+`define	WSR_HPSTATE_TLZ   0 
+
+// TSTATE postition definitions within wsr
+`define	WSR_TSTATE_GL_HI  41 // 2b
+`define	WSR_TSTATE_GL_LO  40 
+`define	WSR_TSTATE_CCR_HI 39 // 8b
+`define	WSR_TSTATE_CCR_LO 32 
+`define	WSR_TSTATE_ASI_HI 31 // 8b
+`define	WSR_TSTATE_ASI_LO 24 
+`define	WSR_TSTATE_PS2_HI 17 // 4b
+// modified due to bug 2588
+`define	WSR_TSTATE_PS2_LO  16 
+`define	WSR_TSTATE_PS1_HI  12 // 4b
+// added for bug 2584 
+`define	WSR_TSTATE_PS_PRIV 10 // 4b
+`define	WSR_TSTATE_PS1_LO   9
+`define	WSR_TSTATE_CWP_HI   2 // 3b
+`define	WSR_TSTATE_CWP_LO   0 
+//
+`define	WSR_TSTATE_WIDTH   29 
+`define	RDSR_TSTATE_WIDTH  48 
+`define	RDSR_HPSTATE_WIDTH 12 
+`define	TLU_ASR_DATA_WIDTH 64 
+`define	TLU_ASR_ADDR_WIDTH  7 
+
+`define SFTINT_WIDTH     17 
+//
+// tick_cmp and stick_cmp definitions
+`define TICKCMP_RANGE_HI 60
+`define TICKCMP_RANGE_LO  0
+`define TICKCMP_INTDIS   63 
+`define SFTINT_TICK_CMP   0 
+`define SFTINT_STICK_CMP 16 
+//
+// PIB WRAP
+`define SFTINT_PIB_WRAP 15 
+`define PIB_OVERFLOW_TTYPE 7'h4f 
+
+// HPSTATE postition definitions
+`define	HPSTATE_IBE  4 
+`define	HPSTATE_ENB  3 
+`define	HPSTATE_RED  2 
+`define	HPSTATE_PRIV 1 
+`define	HPSTATE_TLZ  0 
+
+// HTBA definitions
+`define	TLU_HTBA_WIDTH  34 // supported physical width 
+`define	TLU_HTBA_HI     47 
+`define	TLU_HTBA_LO     14 
+
+// TBA definitions
+`define	TLU_TBA_WIDTH  33 // supported physical width 
+`define	TLU_TBA_HI     47 
+`define	TLU_TBA_LO     15 
+
+`define	TPC		 5'h0 	
+`define	TNPC	 5'h1 	
+`define	TSTATE	 5'h2 	
+`define	TT		 5'h3 	
+`define	TICK	 5'h4 	
+`define	TBA		 5'h5 	
+`define	PSTATE	 5'h6 	
+`define	TL		 5'h7 	
+`define	PIL		 5'h8 
+`define	HPSTATE	 5'h0 
+`define	HTSTATE	 5'h1 
+`define	HINTP	 5'h3 
+`define	HTBA	 5'h5 
+`define	HTICKCMP 5'h1f 
+`define	STICKCMP 5'h19 
+`define	TICKCMP  5'h17 
+//
+// added for the hypervisor support
+`define	PSTATE_VRANGE1_LO	1
+`define	PSTATE_VRANGE1_HI   4	
+// modified due to bug 2588
+`define	PSTATE_VRANGE2_LO	8
+`define	PSTATE_VRANGE2_HI   9
+`define	PSTATE_TRUE_WIDTH  12 
+
+`define PSTATE_AG 	 0
+`define PSTATE_IE 	 1
+`define PSTATE_PRIV  2
+`define PSTATE_AM 	 3
+`define PSTATE_PEF 	 4
+`define PSTATE_RED 	 5
+`define PSTATE_MM_LO 6
+`define PSTATE_MM_HI 7
+`define PSTATE_TLE 	 8
+`define PSTATE_CLE 	 9
+`define PSTATE_MG 	10
+`define PSTATE_IG 	11
+//
+// compressed PSTATE WSR definitions
+`define	WSR_PSTATE_VRANGE1_LO	0
+`define	WSR_PSTATE_VR_PRIV      1
+`define	WSR_PSTATE_VRANGE1_HI   3	
+`define	WSR_PSTATE_VRANGE2_LO	4
+`define	WSR_PSTATE_VRANGE2_HI   5
+`define	WSR_PSTATE_VR_WIDTH     6
+
+`define MAXTL  3'b110
+`define MAXTL_LESSONE 3'b101
+`define MAXSTL 3'b010		 
+`define MAXSTL_TL 3'b010 // Saturation point for GL and TL (supervisor) 
+`define MAXSTL_GL 2'b10  // Saturation point for GL and TL (supervisor) 
+`define MAXGL  4'b0011	 // Saturation point for GL (hypervisor)	
+`define MAXGL_GL  2'b11	 // Saturation point for GL (hypervisor)	
+//
+// ASI_QUEUE for hypervisor
+// Queues are: CPU_MONODO
+//             DEV_MONODO
+//             RESUMABLE_ERROR
+//             NON_RESUMABLE_ERROR
+//
+`define        ASI_VA_WIDTH       48 
+`define        TLU_ASI_QUE_HI     13
+`define        TLU_ASI_QUE_LO      6
+`define        TLU_ASI_QUE_WIDTH   8
+`define        TLU_ASI_VA_WIDTH   10 
+`define        TLU_ASI_STATE_WIDTH 8 
+
+// for address range checking
+`define        TLU_ASI_QUE_VA_HI   9
+`define        TLU_ASI_QUE_VA_LO   3 
+
+`define        TLU_ASI_QUE_ASI   8'h25 
+`define        CPU_MONDO_HEAD   10'h3c0
+`define        CPU_MONDO_TAIL   10'h3c8
+`define        DEV_MONDO_HEAD   10'h3d0
+`define        DEV_MONDO_TAIL   10'h3d8
+`define        RESUM_ERR_HEAD   10'h3e0
+`define        RESUM_ERR_TAIL   10'h3e8
+`define        NRESUM_ERR_HEAD  10'h3f0
+`define        NRESUM_ERR_TAIL  10'h3f8
+`define        CPU_MONDO_TRAP   7'h7c // only 7 bits are defined; upper two are 2'b00
+`define        DEV_MONDO_TRAP   7'h7d // only 7 bits are defined; upper two are 2'b00 
+`define        TLZ_TRAP         7'h5f // only 7 bits are defined; upper two are 2'b00 
+`define        HWINT_INT        7'h60 // only 7 bits are defined; upper two are 2'b00 
+//
+// Niagara scratch-pads
+// VA address of 0x20 and 0x28 are exclusive to hypervisor
+// 
+`define        TLU_SCPD_DATA_WIDTH  64
+`define        SCPD_RW_ADDR_WIDTH     5 
+`define        SCPD_ASI_VA_ADDR_WIDTH 3  
+
+`define        PRI_SCPD_ASI_STATE     8'h20
+`define        SCPD_ASI_VA_ADDR_LO   10'h000
+`define        SCPD_ASI_VA_ADDR_HI   10'h038
+//
+// range checking 
+`define        TLU_ASI_SCPD_VA_HI  5   
+`define        TLU_ASI_SCPD_VA_LO  3   
+
+`define        HPRI_SCPD_ASI_STATE    8'h4f
+`define        HSCPD_ASI_VA_ADDR_LO   3'h4
+`define        HSCPD_ASI_VA_ADDR_HI   3'h5
+
+// PIB related definitions
+// Bit definition for events
+`define        PIB_INSTR_COUNT   3'bxxx
+`define        PIB_SB_FULL_CNT   3'b000
+`define        PIB_FP_INST_CNT   3'b001
+`define        PIB_IC_MISS_CNT   3'b010
+`define        PIB_DC_MISS_CNT   3'b011
+`define        PIB_ITLB_MISS_CNT 3'b100
+`define        PIB_DTLB_MISS_CNT 3'b101
+`define        PIB_L2_IMISS_CNT  3'b110
+`define        PIB_L2_DMISS_CNT  3'b111
+// 
+// PIB related definitions
+// PCR and PIC address definitions
+`define        PCR_ASR_ADDR		  7'b0010000
+`define        PIC_ASR_PRIV_ADDR  7'b0110001
+`define        PIC_ASR_NPRIV_ADDR 7'b0010001
+// 
+// PCR bit definitions
+`define        WSR_PCR_PRIV   0 // PIC privilege 
+`define        WSR_PCR_ST     1 // supervior trace 
+`define        WSR_PCR_UT     2 // user trace 
+`define        WSR_PCR_SL_LO  4 // PICL event mask 
+`define        WSR_PCR_SL_HI  6 // 
+`define        WSR_PCR_CL_OVF 8 // 
+`define        WSR_PCR_CH_OVF 9 // 
+//
+`define        PIB_PCR_WIDTH  8 
+`define        PIB_PCR_PRIV   0 // PIC privilege 
+`define        PIB_PCR_ST     1 // privilege event trace 
+`define        PIB_PCR_UT     2 // user event trace 
+`define        PIB_PCR_SL_LO  3 // PICL event encode 
+`define        PIB_PCR_SL_HI  5 // 
+`define        PIB_PCR_CL_OVF 6 // 
+`define        PIB_PCR_CH_OVF 7 // 
+
+// PIC definitions
+`define        PIB_PIC_FULL_WIDTH 64
+`define        PIB_PIC_CNT_WIDTH  33
+`define        PIB_PIC_CNT_WRAP   32
+`define        PIB_PICH_CNT_HI    63
+`define        PIB_PICH_CNT_LO    32
+`define        PIB_PICL_CNT_HI    31
+`define        PIB_PICL_CNT_LO     0
+`define        PIB_EVQ_CNT_WIDTH   3
+// PIC  mask bit position definitions
+`define        PICL_MASK_WIDTH     8
+`define        PICL_MASK_SB_FULL   0 
+`define        PICL_MASK_FP_INST   1 
+`define        PICL_MASK_IC_MISS   2
+`define        PICL_MASK_DC_MISS   3
+`define        PICL_MASK_ITLB_MISS 4
+`define        PICL_MASK_DTLB_MISS 5
+`define        PICL_MASK_L2_IMISS  6
+`define        PICL_MASK_L2_DMISS  7
+
+// added define from sparc_tlu_int.v 
+`define INT_THR_HI  12
+`define INT_VEC_HI 5
+`define INT_VEC_LO 0
+`define INT_THR_HI  12
+`define INT_THR_LO   8
+`define INT_TYPE_HI 17
+`define INT_TYPE_LO 16
+`define TLU_INRR_ASI 8'h72
+`define TLU_INDR_ASI 8'h73
+`define TLU_INVR_ASI 8'h74
+//
+// shadow scan related definitions 
+`define TLU_SSCAN_WIDTH 63 
+// modified due to logic redistribution
+// `define TCL_SSCAN_WIDTH 12 
+`define TCL_SSCAN_WIDTH 3 
+`define MISCTL_SSCAN_WIDTH 9 
+`define TDP_SSCAN_WIDTH 51 
+`define TDP_SSCAN_LO  0 
+`define TDP_SSCAN_HI 50 
+// `define TCL_SSCAN_LO 51 
+`define MISCTL_SSCAN_LO 51 
+`define MISCTL_SSCAN_HI 59 
+`define TCL_SSCAN_LO 60 
+`define TCL_SSCAN_HI 62 
+// 
+// position definitions - TDP
+`define TDP_SSCAN_PC_LO    0 
+`define TDP_SSCAN_PC_HI   45 
+`define TDP_SSCAN_PS_IE   46 
+`define TDP_SSCAN_PS_PRIV 47 
+`define TDP_SSCAN_HPS_LO  48
+`define TDP_SSCAN_HPS_HI  50  
+// 
+// position definitions - TCL
+`define TCL_SSCAN_TT_LO    0 
+`define TCL_SSCAN_TT_HI    8 
+`define TCL_SSCAN_TL_LO    9
+`define TCL_SSCAN_TL_HI   11
+// 
+// To speedup POR for verification purposes
+`define RSTVADDR_BASE 34'h3_ffff_c000 
Index: /trunk/T1-common/include/iop.h
===================================================================
--- /trunk/T1-common/include/iop.h	(revision 6)
+++ /trunk/T1-common/include/iop.h	(revision 6)
@@ -0,0 +1,839 @@
+/*
+* ========== Copyright Header Begin ==========================================
+* 
+* OpenSPARC T1 Processor File: iop.h
+* Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+* 
+* The above named program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public
+* License version 2 as published by the Free Software Foundation.
+* 
+* The above named program is distributed in the hope that it will be 
+* useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* General Public License for more details.
+* 
+* You should have received a copy of the GNU General Public
+* License along with this work; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+* 
+* ========== Copyright Header End ============================================
+*/
+//-*- verilog -*-
+////////////////////////////////////////////////////////////////////////
+/*
+//
+//  Description:	Global header file that contain definitions that 
+//                      are common/shared at the IOP chip level
+*/
+////////////////////////////////////////////////////////////////////////
+
+
+// Address Map Defines
+// ===================
+`define ADDR_MAP_HI      39
+`define ADDR_MAP_LO      32
+`define IO_ADDR_BIT      39
+
+// CMP space
+`define DRAM_DATA_LO     8'h00
+`define DRAM_DATA_HI     8'h7f
+
+// IOP space
+`define JBUS1            8'h80
+`define RESERVED_5	 8'h81 //`define HASH_TBL_NRAM_CSR 8'h81
+`define RESERVED_1       8'h82
+`define RESERVED_6_LO    8'h83 //`define ENET_MAC_CSR     8'h83
+                               //`define ENET_ING_CSR     8'h84
+                               //`define ENET_EGR_CMD_CSR 8'h85
+`define RESERVED_6_HI    8'h86 //`define ENET_EGR_DP_CSR  8'h86
+`define RESERVED_2_LO    8'h87
+`define RESERVED_2_HI    8'h92
+`define RESERVED_7       8'h93 //`define BSC_CSR          8'h93
+`define RESERVED_3       8'h94
+`define RESERVED_8       8'h95 //`define RAND_GEN_CSR     8'h95
+`define CLOCK_UNIT_CSR   8'h96
+`define DRAM_CSR         8'h97
+`define IOB_MAN_CSR      8'h98
+`define TAP_CSR          8'h99
+`define RESERVED_4_L0    8'h9a
+`define RESERVED_4_HI    8'h9d
+`define CPU_ASI          8'h9e
+`define IOB_INT_CSR      8'h9f
+
+// L2 space
+`define L2C_CSR_LO       8'ha0
+`define L2C_CSR_HI       8'hbf
+
+// More IOP space
+`define JBUS2_LO         8'hc0
+`define JBUS2_HI         8'hfe
+`define SPI_CSR          8'hff
+
+
+//Cache Crossbar Width and Field Defines
+//======================================
+`define	PCX_WIDTH	124  //PCX payload packet width
+`define	CPX_WIDTH	145  //CPX payload packet width
+
+`define PCX_VLD         123  //PCX packet valid 
+`define PCX_RQ_HI       122  //PCX request type field 
+`define PCX_RQ_LO       118
+`define PCX_NC          117  //PCX non-cacheable bit
+`define PCX_R           117  //PCX read/!write bit 
+`define PCX_CP_HI       116  //PCX cpu_id field
+`define PCX_CP_LO       114
+`define PCX_TH_HI       113  //PCX Thread field
+`define PCX_TH_LO       112
+`define PCX_BF_HI       111  //PCX buffer id field
+`define PCX_INVALL      111
+`define PCX_BF_LO       109
+`define PCX_WY_HI       108  //PCX replaced L1 way field
+`define PCX_WY_LO       107
+`define PCX_P_HI        108  //PCX packet ID, 1st STQ - 10, 2nd - 01
+`define PCX_P_LO        107
+`define PCX_SZ_HI       106  //PCX load/store size field
+`define PCX_SZ_LO       104
+`define PCX_ERR_HI      106  //PCX error field
+`define PCX_ERR_LO      104
+`define PCX_AD_HI       103  //PCX address field
+`define PCX_AD_LO        64
+`define PCX_DA_HI        63  //PCX Store data
+`define PCX_DA_LO         0  
+
+`define PCX_SZ_1B    3'b000  // encoding for 1B access
+`define PCX_SZ_2B    3'b001  // encoding for 2B access
+`define PCX_SZ_4B    3'b010  // encoding for 4B access
+`define PCX_SZ_8B    3'b011  // encoding for 8B access
+`define PCX_SZ_16B   3'b111  // encoding for 16B access
+
+`define CPX_VLD         144  //CPX payload packet valid
+
+`define CPX_RQ_HI       143  //CPX Request type
+`define CPX_RQ_LO       140
+`define CPX_ERR_HI      139  //CPX error field
+`define CPX_ERR_LO      137
+`define CPX_NC          136  //CPX non-cacheable
+`define CPX_R           136  //CPX read/!write bit
+`define CPX_TH_HI       135  //CPX thread ID field 
+`define CPX_TH_LO       134
+
+//bits 133:128 are shared by different fields
+//for different packet types.
+
+`define CPX_IN_HI       133  //CPX Interrupt source 
+`define CPX_IN_LO       128  
+
+`define CPX_WYVLD       133  //CPX replaced way valid
+`define CPX_WY_HI       132  //CPX replaced I$/D$ way
+`define CPX_WY_LO       131
+`define CPX_BF_HI       130  //CPX buffer ID field - 3 bits
+`define CPX_BF_LO       128
+
+`define CPX_SI_HI       132  //L1 set ID - PA[10:6]- 5 bits
+`define CPX_SI_LO       128  //used for invalidates
+
+`define CPX_P_HI        131  //CPX packet ID, 1st STQ - 10, 2nd - 01 
+`define CPX_P_LO        130
+
+`define CPX_ASI         130  //CPX forward request to ASI
+`define CPX_IF4B        130
+`define CPX_IINV        124
+`define CPX_DINV        123
+`define CPX_INVPA5      122
+`define CPX_INVPA4      121
+`define CPX_CPUID_HI    120
+`define CPX_CPUID_LO    118
+`define CPX_INV_PA_HI   116
+`define CPX_INV_PA_LO   112
+`define CPX_INV_IDX_HI   117
+`define CPX_INV_IDX_LO   112
+
+`define CPX_DA_HI       127  //CPX data payload
+`define CPX_DA_LO         0
+
+`define	LOAD_RQ		5'b00000
+`define	IMISS_RQ	5'b10000
+`define	STORE_RQ	5'b00001
+`define	CAS1_RQ		5'b00010
+`define	CAS2_RQ		5'b00011
+`define	SWAP_RQ		5'b00110
+`define	STRLOAD_RQ	5'b00100
+`define	STRST_RQ	5'b00101
+`define STQ_RQ          5'b00111
+`define INT_RQ          5'b01001
+`define FWD_RQ          5'b01101
+`define FWD_RPY         5'b01110
+`define RSVD_RQ         5'b11111
+
+`define LOAD_RET        4'b0000
+`define INV_RET         4'b0011
+`define ST_ACK          4'b0100
+`define AT_ACK          4'b0011
+`define INT_RET         4'b0111
+`define TEST_RET        4'b0101
+`define FP_RET          4'b1000
+`define IFILL_RET       4'b0001
+`define	EVICT_REQ	4'b0011
+`define	ERR_RET		4'b1100
+`define STRLOAD_RET     4'b0010
+`define STRST_ACK       4'b0110
+`define FWD_RQ_RET      4'b1010
+`define FWD_RPY_RET     4'b1011
+`define RSVD_RET        4'b1111
+
+//End cache crossbar defines
+
+
+// Number of COS supported by EECU 
+`define EECU_COS_NUM  	  2
+
+
+// 
+// BSC bus sizes
+// =============
+//
+
+// General
+`define BSC_ADDRESS      40
+`define MAX_XFER_LEN     7'b0
+`define XFER_LEN_WIDTH   6
+
+// CTags
+`define BSC_CTAG_SZ      12
+`define EICU_CTAG_PRE    5'b11101
+`define EICU_CTAG_REM    7
+`define EIPU_CTAG_PRE    3'b011
+`define EIPU_CTAG_REM    9
+`define EECU_CTAG_PRE    8'b11010000
+`define EECU_CTAG_REM    4
+`define EEPU_CTAG_PRE    6'b010000
+`define EEPU_CTAG_REM    6
+`define L2C_CTAG_PRE     2'b00
+`define L2C_CTAG_REM     10
+`define JBI_CTAG_PRE     2'b10
+`define JBI_CTAG_REM     10
+// reinstated temporarily
+`define PCI_CTAG_PRE     7'b1101100
+`define PCI_CTAG_REM     5
+
+
+// CoS
+`define EICU_COS         1'b0
+`define EIPU_COS         1'b1
+`define EECU_COS         1'b0
+`define EEPU_COS         1'b1
+`define PCI_COS          1'b0
+
+// L2$ Bank
+`define BSC_L2_BNK_HI    8
+`define BSC_L2_BNK_LO    6
+
+// L2$ Req
+`define BSC_L2_REQ_SZ   62
+`define BSC_L2_REQ	`BSC_L2_REQ_SZ	// used by rams in L2 code
+`define BSC_L2_BUS      64
+`define BSC_L2_CTAG_HI  61
+`define BSC_L2_CTAG_LO  50
+`define BSC_L2_ADD_HI   49
+`define BSC_L2_ADD_LO   10
+`define BSC_L2_LEN_HI    9
+`define BSC_L2_LEN_LO    3
+`define BSC_L2_ALLOC     2
+`define BSC_L2_COS       1
+`define BSC_L2_READ      0   
+
+// L2$ Ack
+`define L2_BSC_ACK_SZ   16
+`define L2_BSC_BUS      64
+`define L2_BSC_CBA_HI   14    // CBA - Critical Byte Address
+`define L2_BSC_CBA_LO   13
+`define L2_BSC_READ     12
+`define L2_BSC_CTAG_HI  11
+`define L2_BSC_CTAG_LO   0
+
+// Enet Egress Command Unit
+`define EECU_REQ_BUS    44
+`define EECU_REQ_SZ     44
+`define EECU_R_QID_HI   43
+`define EECU_R_QID_LO   40
+`define EECU_R_ADD_HI   39
+`define EECU_R_ADD_LO    0
+
+`define EECU_ACK_BUS    64
+`define EECU_ACK_SZ      5
+`define EECU_A_NACK      4
+`define EECU_A_QID_HI    3
+`define EECU_A_QID_LO    0
+
+
+// Enet Egress Packet Unit
+`define EEPU_REQ_BUS    55
+`define EEPU_REQ_SZ     55
+`define EEPU_R_TLEN_HI  54
+`define EEPU_R_TLEN_LO  48
+`define EEPU_R_SOF      47
+`define EEPU_R_EOF      46
+`define EEPU_R_PORT_HI  45
+`define EEPU_R_PORT_LO  44
+`define EEPU_R_QID_HI   43
+`define EEPU_R_QID_LO   40
+`define EEPU_R_ADD_HI   39
+`define EEPU_R_ADD_LO    0
+
+// This is cleaved in between Egress Datapath Ack's
+`define EEPU_ACK_BUS     6
+`define EEPU_ACK_SZ      6
+`define EEPU_A_EOF       5
+`define EEPU_A_NACK      4
+`define EEPU_A_QID_HI    3
+`define EEPU_A_QID_LO    0
+
+
+// Enet Egress Datapath
+`define EEDP_ACK_BUS   128
+`define EEDP_ACK_SZ     28
+`define EEDP_A_NACK     27
+`define EEDP_A_QID_HI   26
+`define EEDP_A_QID_LO   21
+`define EEDP_A_SOF      20
+`define EEDP_A_EOF      19
+`define EEDP_A_LEN_HI   18
+`define EEDP_A_LEN_LO   12
+`define EEDP_A_TAG_HI   11
+`define EEDP_A_TAG_LO    0
+`define EEDP_A_PORT_HI   5
+`define EEDP_A_PORT_LO   4
+`define EEDP_A_PORT_WIDTH 2
+
+
+// In-Order / Ordered Queue: EEPU
+// Tag is: TLEN, SOF, EOF, QID = 15
+`define EEPU_TAG_ARY     (7+1+1+6)
+`define EEPU_ENTRIES     16
+`define EEPU_E_IDX        4
+`define EEPU_PORTS        4
+`define EEPU_P_IDX        2
+
+// Nack + Tag Info + CTag
+`define IOQ_TAG_ARY      (1+`EEPU_TAG_ARY+12)
+`define EEPU_TAG_LOC     (`EEPU_P_IDX+`EEPU_E_IDX)
+
+
+// ENET Ingress Queue Management Req
+`define EICU_REQ_BUS     64 
+`define EICU_REQ_SZ      62
+`define EICU_R_CTAG_HI   61
+`define EICU_R_CTAG_LO   50
+`define EICU_R_ADD_HI    49
+`define EICU_R_ADD_LO    10
+`define EICU_R_LEN_HI     9
+`define EICU_R_LEN_LO     3
+`define EICU_R_COS        1
+`define EICU_R_READ       0   
+
+
+// ENET Ingress Queue Management Ack
+`define EICU_ACK_BUS     64
+`define EICU_ACK_SZ      14
+`define EICU_A_NACK      13
+`define EICU_A_READ      12
+`define EICU_A_CTAG_HI   11
+`define EICU_A_CTAG_LO    0
+
+
+// Enet Ingress Packet Unit
+`define EIPU_REQ_BUS    128 
+`define EIPU_REQ_SZ      59
+`define EIPU_R_CTAG_HI   58
+`define EIPU_R_CTAG_LO   50
+`define EIPU_R_ADD_HI    49
+`define EIPU_R_ADD_LO    10
+`define EIPU_R_LEN_HI     9
+`define EIPU_R_LEN_LO     3
+`define EIPU_R_COS        1
+`define EIPU_R_READ       0   
+
+
+// ENET Ingress Packet Unit Ack
+`define EIPU_ACK_BUS      10
+`define EIPU_ACK_SZ       10
+`define EIPU_A_NACK       9
+`define EIPU_A_CTAG_HI    8
+`define EIPU_A_CTAG_LO    0
+
+
+// In-Order / Ordered Queue: PCI
+// Tag is: CTAG
+`define PCI_TAG_ARY     12
+`define PCI_ENTRIES     16
+`define PCI_E_IDX        4
+`define PCI_PORTS        2
+
+// PCI-X Request
+`define PCI_REQ_BUS      64
+`define PCI_REQ_SZ       62
+`define PCI_R_CTAG_HI    61
+`define PCI_R_CTAG_LO    50
+`define PCI_R_ADD_HI     49
+`define PCI_R_ADD_LO     10
+`define PCI_R_LEN_HI      9
+`define PCI_R_LEN_LO      3
+`define PCI_R_COS         1
+`define PCI_R_READ        0
+
+// PCI_X Acknowledge
+`define PCI_ACK_BUS      64
+`define PCI_ACK_SZ       14
+`define PCI_A_NACK       13
+`define PCI_A_READ       12 
+`define PCI_A_CTAG_HI    11
+`define PCI_A_CTAG_LO     0
+
+
+`define BSC_MAX_REQ_SZ   62
+
+
+//
+// BSC array sizes
+//================
+//
+`define BSC_REQ_ARY_INDEX        6
+`define BSC_REQ_ARY_DEPTH       64
+`define BSC_REQ_ARY_WIDTH       62
+`define BSC_REQ_NXT_WIDTH       12
+`define BSC_ACK_ARY_INDEX        6
+`define BSC_ACK_ARY_DEPTH       64
+`define BSC_ACK_ARY_WIDTH       14
+`define BSC_ACK_NXT_WIDTH       12
+`define BSC_PAY_ARY_INDEX        6
+`define BSC_PAY_ARY_DEPTH       64
+`define BSC_PAY_ARY_WIDTH      256
+
+// ECC syndrome bits per memory element
+`define BSC_PAY_ECC             10
+`define BSC_PAY_MEM_WIDTH       (`BSC_PAY_ECC+`BSC_PAY_ARY_WIDTH)
+
+
+//
+// BSC Port Definitions
+// ====================
+//
+// Bits 7 to 4 of curr_port_id
+`define BSC_PORT_NULL       4'h0
+`define BSC_PORT_SC         4'h1
+`define BSC_PORT_EICU       4'h2
+`define BSC_PORT_EIPU       4'h3
+`define BSC_PORT_EECU       4'h4
+`define BSC_PORT_EEPU       4'h8
+`define BSC_PORT_PCI        4'h9
+
+// Number of ports of each type
+`define BSC_PORT_SC_CNT     8
+
+// Bits needed to represent above
+`define BSC_PORT_SC_IDX     3
+
+// How wide the linked list pointers are
+// 60b for no payload (2CoS)
+// 80b for payload (2CoS)
+
+//`define BSC_OBJ_PTR   80
+//`define BSC_HD1_HI    69
+//`define BSC_HD1_LO    60
+//`define BSC_TL1_HI    59
+//`define BSC_TL1_LO    50
+//`define BSC_CT1_HI    49
+//`define BSC_CT1_LO    40
+//`define BSC_HD0_HI    29
+//`define BSC_HD0_LO    20
+//`define BSC_TL0_HI    19
+//`define BSC_TL0_LO    10
+//`define BSC_CT0_HI     9
+//`define BSC_CT0_LO     0
+
+`define BSC_OBJP_PTR  48
+`define BSC_PYP1_HI   47
+`define BSC_PYP1_LO   42
+`define BSC_HDP1_HI   41
+`define BSC_HDP1_LO   36
+`define BSC_TLP1_HI   35
+`define BSC_TLP1_LO   30
+`define BSC_CTP1_HI   29
+`define BSC_CTP1_LO   24
+`define BSC_PYP0_HI   23
+`define BSC_PYP0_LO   18
+`define BSC_HDP0_HI   17
+`define BSC_HDP0_LO   12
+`define BSC_TLP0_HI   11
+`define BSC_TLP0_LO    6
+`define BSC_CTP0_HI    5
+`define BSC_CTP0_LO    0
+
+`define BSC_PTR_WIDTH     192
+`define BSC_PTR_REQ_HI    191
+`define BSC_PTR_REQ_LO    144
+`define BSC_PTR_REQP_HI   143
+`define BSC_PTR_REQP_LO    96
+`define BSC_PTR_ACK_HI     95
+`define BSC_PTR_ACK_LO     48
+`define BSC_PTR_ACKP_HI    47
+`define BSC_PTR_ACKP_LO     0
+
+`define BSC_PORT_SC_PTR    96       // R, R+P
+`define BSC_PORT_EECU_PTR  48       // A+P
+`define BSC_PORT_EICU_PTR  96       // A, A+P
+`define BSC_PORT_EIPU_PTR  48       // A
+
+// I2C STATES in DRAMctl
+`define I2C_CMD_NOP   4'b0000
+`define I2C_CMD_START 4'b0001
+`define I2C_CMD_STOP  4'b0010
+`define I2C_CMD_WRITE 4'b0100
+`define I2C_CMD_READ  4'b1000
+
+
+//
+// IOB defines
+// ===========
+//
+`define IOB_ADDR_WIDTH       40
+`define IOB_LOCAL_ADDR_WIDTH 32
+
+`define IOB_CPU_INDEX         3
+`define IOB_CPU_WIDTH         8
+`define IOB_THR_INDEX         2
+`define IOB_THR_WIDTH         4
+`define IOB_CPUTHR_INDEX      5
+`define IOB_CPUTHR_WIDTH     32
+
+`define IOB_MONDO_DATA_INDEX  5
+`define IOB_MONDO_DATA_DEPTH 32
+`define IOB_MONDO_DATA_WIDTH 64
+`define IOB_MONDO_SRC_WIDTH   5
+`define IOB_MONDO_BUSY        5
+
+`define IOB_INT_TAB_INDEX     2
+`define IOB_INT_TAB_DEPTH     4 
+
+//`define IOB_INT_STAT_WIDTH   32
+//`define IOB_INT_STAT_HI      31
+//`define IOB_INT_STAT_LO       0
+
+`define IOB_INT_VEC_WIDTH     6
+`define IOB_INT_VEC_HI        5
+`define IOB_INT_VEC_LO        0
+
+`define IOB_INT_CPU_WIDTH     5
+`define IOB_INT_CPU_HI       12 
+`define IOB_INT_CPU_LO        8
+
+`define IOB_INT_MASK          2
+`define IOB_INT_CLEAR         1
+`define IOB_INT_PEND          0
+
+`define IOB_DISP_TYPE_HI     17
+`define IOB_DISP_TYPE_LO     16
+`define IOB_DISP_THR_HI      12
+`define IOB_DISP_THR_LO       8
+`define IOB_DISP_VEC_HI       5
+`define IOB_DISP_VEC_LO       0
+
+`define IOB_JBI_RESET         1
+`define IOB_ENET_RESET        0
+
+`define IOB_RESET_STAT_WIDTH  3
+`define IOB_RESET_STAT_HI     3
+`define IOB_RESET_STAT_LO     1
+
+`define IOB_SERNUM_WIDTH     64
+
+`define IOB_FUSE_WIDTH       22
+
+`define IOB_TMSTAT_THERM     63
+
+`define IOB_POR_TT            6'b01  // power-on-reset trap type
+
+`define IOB_CPU_BUF_INDEX     4
+
+`define IOB_INT_BUF_INDEX     4  
+`define IOB_INT_BUF_WIDTH   153  // interrupt table read result buffer width
+
+`define IOB_IO_BUF_INDEX      4
+`define IOB_IO_BUF_WIDTH    153  // io-2-cpu return buffer width
+
+`define IOB_L2_VIS_BUF_INDEX  5
+`define IOB_L2_VIS_BUF_WIDTH 48  // l2 visibility buffer width
+
+`define IOB_INT_AVEC_WIDTH    9  // availibility vector width
+`define IOB_ACK_AVEC_WIDTH    9  // availibility vector width 
+
+// fixme - double check address mapping
+// CREG in `IOB_INT_CSR space
+`define IOB_DEV_ADDR_MASK    32'hffffffe7
+`define IOB_CREG_INTSTAT     32'h00000000
+`define IOB_CREG_MDATA0      32'h00000400
+`define IOB_CREG_MDATA1      32'h00000500
+`define IOB_CREG_MBUSY       32'h00000900
+`define IOB_THR_ADDR_MASK    32'hffffff07
+`define IOB_CREG_MDATA0_ALIAS 32'h00000600
+`define IOB_CREG_MDATA1_ALIAS 32'h00000700
+`define IOB_CREG_MBUSY_ALIAS  32'h00000b00
+
+// CREG in `IOB_MAN_CSR space
+`define IOB_CREG_INTMAN      32'h00000000
+`define IOB_CREG_INTCTL      32'h00000400
+`define IOB_CREG_INTVECDISP  32'h00000800
+`define IOB_CREG_RESETSTAT   32'h00000810
+`define IOB_CREG_SERNUM      32'h00000820
+`define IOB_CREG_TMSTATCTRL  32'h00000828
+`define IOB_CREG_COREAVAIL   32'h00000830
+`define IOB_CREG_SSYSRESET   32'h00000838
+`define IOB_CREG_FUSESTAT    32'h00000840
+`define IOB_CREG_MARGIN      32'h00000850
+`define IOB_CREG_JINTV       32'h00000a00
+
+`define IOB_CREG_DBG_L2VIS_CTRL    32'h00001800 
+`define IOB_CREG_DBG_L2VIS_MASKA   32'h00001820 
+`define IOB_CREG_DBG_L2VIS_MASKB   32'h00001828 
+`define IOB_CREG_DBG_L2VIS_CMPA    32'h00001830
+`define IOB_CREG_DBG_L2VIS_CMPB    32'h00001838
+`define IOB_CREG_DBG_L2VIS_TRIG    32'h00001840
+`define IOB_CREG_DBG_IOBVIS_CTRL   32'h00001000
+`define IOB_CREG_DBG_ENET_CTRL     32'h00002000
+`define IOB_CREG_DBG_ENET_IDLEVAL  32'h00002008
+`define IOB_CREG_DBG_JBUS_CTRL     32'h00002100
+`define IOB_CREG_DBG_JBUS_LO_MASK0 32'h00002140
+`define IOB_CREG_DBG_JBUS_LO_MASK1 32'h00002160
+`define IOB_CREG_DBG_JBUS_LO_CMP0  32'h00002148
+`define IOB_CREG_DBG_JBUS_LO_CMP1  32'h00002168
+`define IOB_CREG_DBG_JBUS_LO_CNT0  32'h00002150
+`define IOB_CREG_DBG_JBUS_LO_CNT1  32'h00002170
+`define IOB_CREG_DBG_JBUS_HI_MASK0 32'h00002180
+`define IOB_CREG_DBG_JBUS_HI_MASK1 32'h000021a0
+`define IOB_CREG_DBG_JBUS_HI_CMP0  32'h00002188
+`define IOB_CREG_DBG_JBUS_HI_CMP1  32'h000021a8
+`define IOB_CREG_DBG_JBUS_HI_CNT0  32'h00002190
+`define IOB_CREG_DBG_JBUS_HI_CNT1  32'h000021b0
+
+`define IOB_CREG_TESTSTUB    32'h80000000
+
+// Address map for TAP access of SPARC ASI
+`define IOB_ASI_PC            4'b0000
+`define IOB_ASI_BIST          4'b0001
+`define IOB_ASI_MARGIN        4'b0010
+`define IOB_ASI_DEFEATURE     4'b0011
+`define IOB_ASI_L1DD          4'b0100
+`define IOB_ASI_L1ID          4'b0101
+`define IOB_ASI_L1DT          4'b0110
+
+`define IOB_INT               2'b00
+`define IOB_RESET             2'b01
+`define IOB_IDLE              2'b10
+`define IOB_RESUME            2'b11
+
+//
+// CIOP UCB Bus Width
+// ==================
+//
+//`define IOB_EECU_WIDTH       16  // ethernet egress command
+//`define EECU_IOB_WIDTH       16
+
+//`define IOB_NRAM_WIDTH       16  // NRAM (RLDRAM previously)
+//`define NRAM_IOB_WIDTH        4
+
+`define IOB_JBI_WIDTH        64  // JBI
+`define JBI_IOB_WIDTH        16 
+
+//`define IOB_ENET_ING_WIDTH   32  // ethernet ingress
+//`define ENET_ING_IOB_WIDTH    8
+
+//`define IOB_ENET_EGR_WIDTH    4  // ethernet egress
+//`define ENET_EGR_IOB_WIDTH    4
+
+//`define IOB_ENET_MAC_WIDTH    4  // ethernet MAC
+//`define ENET_MAC_IOB_WIDTH    4
+
+`define IOB_DRAM_WIDTH        4  // DRAM controller
+`define DRAM_IOB_WIDTH        4
+
+//`define IOB_BSC_WIDTH         4  // BSC
+//`define BSC_IOB_WIDTH         4
+
+`define IOB_SPI_WIDTH         4  // SPI (Boot ROM)
+`define SPI_IOB_WIDTH         4
+
+`define IOB_CLK_WIDTH         4  // clk unit
+`define CLK_IOB_WIDTH         4
+
+//`define IOB_CLSP_WIDTH        4  // clk spine unit
+//`define CLSP_IOB_WIDTH        4
+
+`define IOB_TAP_WIDTH         8  // TAP
+`define TAP_IOB_WIDTH         8
+
+
+//
+// CIOP UCB Buf ID Type
+// ====================
+//
+`define UCB_BID_CMP          2'b00
+`define UCB_BID_TAP          2'b01
+
+//
+// Interrupt Device ID
+// ===================
+//
+// Caution: DUMMY_DEV_ID has to be 9 bit wide
+//          for fields to line up properly in the IOB.
+`define DUMMY_DEV_ID         9'h00   // 0
+`define UNCOR_ECC_DEV_ID     7'd1    // 1
+
+//
+// Soft Error related definitions 
+// ==============================
+//
+`define COR_ECC_CNT_WIDTH   16
+
+
+//
+// CMP clock
+// =========
+//
+
+`define CMP_CLK_PERIOD   1333
+
+
+//
+// NRAM/IO Interface
+// =================
+//
+
+`define DRAM_CLK_PERIOD  6000
+
+`define NRAM_IO_DQ_WIDTH   32
+`define IO_NRAM_DQ_WIDTH   32
+
+`define NRAM_IO_ADDR_WIDTH 15
+`define NRAM_IO_BA_WIDTH    2
+
+
+//
+// NRAM/ENET Interface
+// ===================
+//
+
+`define NRAM_ENET_DATA_WIDTH 64
+`define ENET_NRAM_ADDR_WIDTH 20
+
+`define NRAM_DBG_DATA_WIDTH  40
+
+
+//
+// IO/FCRAM Interface
+// ==================
+//
+
+`define FCRAM_DATA1_HI       63
+`define FCRAM_DATA1_LO       32
+`define FCRAM_DATA0_HI       31
+`define FCRAM_DATA0_LO        0
+
+//
+// PCI Interface
+// ==================
+// Load/store size encodings
+// -------------------------
+// Size encoding
+// 000 - byte
+// 001 - half-word
+// 010 - word
+// 011 - double-word
+// 100 - quad
+`define LDST_SZ_BYTE        3'b000
+`define LDST_SZ_HALF_WORD   3'b001
+`define LDST_SZ_WORD        3'b010
+`define LDST_SZ_DOUBLE_WORD 3'b011
+`define LDST_SZ_QUAD        3'b100
+
+//
+// JBI<->SCTAG Interface
+// =======================
+// Outbound Header Format
+`define JBI_BTU_OUT_ADDR_LO      0
+`define JBI_BTU_OUT_ADDR_HI     42
+`define JBI_BTU_OUT_RSV0_LO     43
+`define JBI_BTU_OUT_RSV0_HI     43
+`define JBI_BTU_OUT_TYPE_LO     44
+`define JBI_BTU_OUT_TYPE_HI     48
+`define JBI_BTU_OUT_RSV1_LO     49
+`define JBI_BTU_OUT_RSV1_HI     51
+`define JBI_BTU_OUT_REPLACE_LO  52
+`define JBI_BTU_OUT_REPLACE_HI  56
+`define JBI_BTU_OUT_RSV2_LO     57
+`define JBI_BTU_OUT_RSV2_HI     59
+`define JBI_BTU_OUT_BTU_ID_LO   60
+`define JBI_BTU_OUT_BTU_ID_HI   71
+`define JBI_BTU_OUT_DATA_RTN    72
+`define JBI_BTU_OUT_RSV3_LO     73
+`define JBI_BTU_OUT_RSV3_HI     75
+`define JBI_BTU_OUT_CE          76
+`define JBI_BTU_OUT_RSV4_LO     77
+`define JBI_BTU_OUT_RSV4_HI     79
+`define JBI_BTU_OUT_UE          80
+`define JBI_BTU_OUT_RSV5_LO     81
+`define JBI_BTU_OUT_RSV5_HI     83
+`define JBI_BTU_OUT_DRAM        84
+`define JBI_BTU_OUT_RSV6_LO     85
+`define JBI_BTU_OUT_RSV6_HI    127
+
+// Inbound Header Format
+`define JBI_SCTAG_IN_ADDR_LO   0
+`define JBI_SCTAG_IN_ADDR_HI  39
+`define JBI_SCTAG_IN_SZ_LO    40
+`define JBI_SCTAG_IN_SZ_HI    42
+`define JBI_SCTAG_IN_RSV0     43
+`define JBI_SCTAG_IN_TAG_LO   44
+`define JBI_SCTAG_IN_TAG_HI   55
+`define JBI_SCTAG_IN_REQ_LO   56
+`define JBI_SCTAG_IN_REQ_HI   58
+`define JBI_SCTAG_IN_POISON   59
+`define JBI_SCTAG_IN_RSV1_LO  60
+`define JBI_SCTAG_IN_RSV1_HI  63
+
+`define JBI_SCTAG_REQ_WRI   3'b100
+`define JBI_SCTAG_REQ_WR8   3'b010
+`define JBI_SCTAG_REQ_RDD   3'b001
+`define JBI_SCTAG_REQ_WRI_BIT 2
+`define JBI_SCTAG_REQ_WR8_BIT 1
+`define JBI_SCTAG_REQ_RDD_BIT 0
+
+//
+// JBI->IOB Mondo Header Format
+// ============================
+//
+`define JBI_IOB_MONDO_RSV1_HI       15 // reserved 1
+`define JBI_IOB_MONDO_RSV1_LO       13
+`define JBI_IOB_MONDO_TRG_HI        12 // interrupt target
+`define JBI_IOB_MONDO_TRG_LO         8 
+`define JBI_IOB_MONDO_RSV0_HI        7 // reserved 0
+`define JBI_IOB_MONDO_RSV0_LO        5
+`define JBI_IOB_MONDO_SRC_HI         4 // interrupt source
+`define JBI_IOB_MONDO_SRC_LO         0
+
+`define JBI_IOB_MONDO_RSV1_WIDTH     3 
+`define JBI_IOB_MONDO_TRG_WIDTH      5
+`define JBI_IOB_MONDO_RSV0_WIDTH     3 
+`define JBI_IOB_MONDO_SRC_WIDTH      5
+
+// JBI->IOB Mondo Bus Width/Cycle
+// ==============================
+// Cycle  1 Header[15:8]
+// Cycle  2 Header[ 7:0]
+// Cycle  3 J_AD[127:120]
+// Cycle  4 J_AD[119:112]
+// .....
+// Cycle 18 J_AD[  7:  0]
+`define JBI_IOB_MONDO_BUS_WIDTH      8
+`define JBI_IOB_MONDO_BUS_CYCLE     18 // 2 header + 16 data
Index: /trunk/T1-common/srams/bw_r_cm16x40.v
===================================================================
--- /trunk/T1-common/srams/bw_r_cm16x40.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_cm16x40.v	(revision 6)
@@ -0,0 +1,374 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_cm16x40.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_r_cm16x40( /*AUTOARG*/
+   // Outputs
+   dout, match, match_idx, so, 
+   // Inputs
+   adr_w, din, write_en, rst_tri_en, adr_r, read_en, lookup_en, key, 
+   rclk, sehold, se, si, rst_l
+   );
+
+input   [15:0]  adr_w ; // set up to +ve edge
+input   [39:0]  din;    // set up to +ve edge
+input           write_en;       // +ve edge clk; write enable
+input		rst_tri_en; // used to gate off writes during SCAN.
+input   [15:0]  adr_r;  // set up to +ve edge
+input           read_en;
+output  [39:0]  dout;
+input           lookup_en;      // set up to -ve edge
+input   [39:8]  key;    // set up to -ve edge
+output  [15:0]  match ;
+output  [15:0]  match_idx ;
+input   rclk ;
+input   sehold, se, si, rst_l;
+output  so ;
+
+reg     [39:0]  mb_cam_data[15:0] ;
+
+reg     [39:0]  dout;
+reg     [39:8]  key_d1;
+reg     lookup_en_d1 ;
+
+reg     [39:0]  tmp_addr ;
+reg     [39:0]  tmp_addr0 ;
+reg     [39:0]  tmp_addr1 ;
+reg     [39:0]  tmp_addr2 ;
+reg     [39:0]  tmp_addr3 ;
+reg     [39:0]  tmp_addr4 ;
+reg     [39:0]  tmp_addr5 ;
+reg     [39:0]  tmp_addr6 ;
+reg     [39:0]  tmp_addr7 ;
+reg     [39:0]  tmp_addr8 ;
+reg     [39:0]  tmp_addr9 ;
+reg     [39:0]  tmp_addr10 ;
+reg     [39:0]  tmp_addr11 ;
+reg     [39:0]  tmp_addr12 ;
+reg     [39:0]  tmp_addr13 ;
+reg     [39:0]  tmp_addr14 ;
+reg     [39:0]  tmp_addr15 ;
+
+reg     [15:0]  adr_w_d1 ;
+reg     [15:0]  adr_r_d1 ;
+reg             mb_wen_d1 ;     // registered write enable
+reg             mb_ren_d1 ;     // registered read enable
+
+reg     [39:0]  din_d1;
+
+wire    [15:0]  match ;
+wire    [15:0]  match_idx ;
+reg     [15:0]  match_p ;
+reg     [15:0]  match_idx_p ;
+
+reg             so ;
+reg		rst_l_d1;
+reg		rst_tri_en_d1;
+
+integer	i;
+
+always  @(posedge rclk) begin
+        adr_w_d1 <= (sehold)? adr_w_d1: adr_w ;
+        adr_r_d1 <= (sehold)? adr_r_d1: adr_r;
+        din_d1 <= ( sehold)? din_d1: din ;
+        mb_wen_d1 <= ( sehold)? mb_wen_d1: write_en ;
+        mb_ren_d1 <= ( sehold)? mb_ren_d1 : read_en  ;
+        lookup_en_d1 <= ( sehold)? lookup_en_d1 :lookup_en ;
+        key_d1 <= ( sehold)? key_d1 : key;
+
+	rst_l_d1 <= rst_l ; // this is not a real flop
+	rst_tri_en_d1 <= rst_tri_en ; // this is not a real flop
+	
+end
+
+assign	match = match_p ;
+assign	match_idx = match_idx_p ;
+
+// CAM OPERATION
+
+always  @( /*AUTOSENSE*/ /*memory or*/ adr_w_d1 or key_d1
+          or lookup_en_d1 or mb_wen_d1 or rst_l ) begin
+
+  	
+        
+  	if(~rst_l)	begin
+		match_p = 16'b0 ;
+		match_idx_p = 16'b0;
+	end
+
+
+        else if( lookup_en_d1 ) begin
+
+  
+		tmp_addr0 = mb_cam_data[0];
+                match_p[0] =  ( mb_wen_d1 & adr_w_d1[0] ) ? 1'bx :
+                               ( tmp_addr0[39:8] == key_d1[39:8] ) ;
+                match_idx_p[0] = ( mb_wen_d1 & adr_w_d1[0] ) ? 1'bx :
+                                 ( tmp_addr0[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr1 = mb_cam_data[1];
+                match_p[1] =  ( mb_wen_d1 & adr_w_d1[1] ) ? 1'bx :
+                               ( tmp_addr1[39:8] == key_d1[39:8] ) ;
+                match_idx_p[1] = ( mb_wen_d1 & adr_w_d1[1] ) ? 1'bx :
+                                 ( tmp_addr1[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr2 = mb_cam_data[2];
+                match_p[2] =  ( mb_wen_d1 & adr_w_d1[2] ) ? 1'bx :
+                               ( tmp_addr2[39:8] == key_d1[39:8] ) ;
+                match_idx_p[2] = ( mb_wen_d1 & adr_w_d1[2] ) ? 1'bx :
+                                 ( tmp_addr2[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr3 = mb_cam_data[3];
+                match_p[3] =  ( mb_wen_d1 & adr_w_d1[3] ) ? 1'bx :
+                               ( tmp_addr3[39:8] == key_d1[39:8] ) ;
+                match_idx_p[3] = ( mb_wen_d1 & adr_w_d1[3] ) ? 1'bx :
+                                 ( tmp_addr3[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr4 = mb_cam_data[4];
+                match_p[4] =  ( mb_wen_d1 & adr_w_d1[4] ) ? 1'bx :
+                               ( tmp_addr4[39:8] == key_d1[39:8] ) ;
+                match_idx_p[4] = ( mb_wen_d1 & adr_w_d1[4] ) ? 1'bx :
+                                 ( tmp_addr4[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr5 = mb_cam_data[5];
+                match_p[5] =  ( mb_wen_d1 & adr_w_d1[5] ) ? 1'bx :
+                               ( tmp_addr5[39:8] == key_d1[39:8] ) ;
+                match_idx_p[5] = ( mb_wen_d1 & adr_w_d1[5] ) ? 1'bx :
+                                 ( tmp_addr5[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr6 = mb_cam_data[6];
+                match_p[6] =  ( mb_wen_d1 & adr_w_d1[6] ) ? 1'bx :
+                               ( tmp_addr6[39:8] == key_d1[39:8] ) ;
+                match_idx_p[6] = ( mb_wen_d1 & adr_w_d1[6] ) ? 1'bx :
+                                 ( tmp_addr6[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr7 = mb_cam_data[7];
+                match_p[7] =  ( mb_wen_d1 & adr_w_d1[7] ) ? 1'bx :
+                               ( tmp_addr7[39:8] == key_d1[39:8] ) ;
+                match_idx_p[7] = ( mb_wen_d1 & adr_w_d1[7] ) ? 1'bx :
+                                 ( tmp_addr7[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr8 = mb_cam_data[8];
+                match_p[8] =  ( mb_wen_d1 & adr_w_d1[8] ) ? 1'bx :
+                               ( tmp_addr8[39:8] == key_d1[39:8] ) ;
+                match_idx_p[8] = ( mb_wen_d1 & adr_w_d1[8] ) ? 1'bx :
+                                 ( tmp_addr8[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr9 = mb_cam_data[9];
+                match_p[9] =  ( mb_wen_d1 & adr_w_d1[9] ) ? 1'bx :
+                               ( tmp_addr9[39:8] == key_d1[39:8] ) ;
+                match_idx_p[9] = ( mb_wen_d1 & adr_w_d1[9] ) ? 1'bx :
+                                 ( tmp_addr9[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr10 = mb_cam_data[10];
+                match_p[10] =  ( mb_wen_d1 & adr_w_d1[10] ) ? 1'bx :
+                               ( tmp_addr10[39:8] == key_d1[39:8] ) ;
+                match_idx_p[10] = ( mb_wen_d1 & adr_w_d1[10] ) ? 1'bx :
+                                 ( tmp_addr10[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr11 = mb_cam_data[11];
+                match_p[11] =  ( mb_wen_d1 & adr_w_d1[11] ) ? 1'bx :
+                               ( tmp_addr11[39:8] == key_d1[39:8] ) ;
+                match_idx_p[11] = ( mb_wen_d1 & adr_w_d1[11] ) ? 1'bx :
+                                 ( tmp_addr11[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr12 = mb_cam_data[12];
+                match_p[12] =  ( mb_wen_d1 & adr_w_d1[12] ) ? 1'bx :
+                               ( tmp_addr12[39:8] == key_d1[39:8] ) ;
+                match_idx_p[12] = ( mb_wen_d1 & adr_w_d1[12] ) ? 1'bx :
+                                 ( tmp_addr12[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr13 = mb_cam_data[13];
+                match_p[13] =  ( mb_wen_d1 & adr_w_d1[13] ) ? 1'bx :
+                               ( tmp_addr13[39:8] == key_d1[39:8] ) ;
+                match_idx_p[13] = ( mb_wen_d1 & adr_w_d1[13] ) ? 1'bx :
+                                 ( tmp_addr13[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr14 = mb_cam_data[14];
+                match_p[14] =  ( mb_wen_d1 & adr_w_d1[14] ) ? 1'bx :
+                               ( tmp_addr14[39:8] == key_d1[39:8] ) ;
+                match_idx_p[14] = ( mb_wen_d1 & adr_w_d1[14] ) ? 1'bx :
+                                 ( tmp_addr14[17:8] == key_d1[17:8] ) ;
+  
+		tmp_addr15 = mb_cam_data[15];
+                match_p[15] =  ( mb_wen_d1 & adr_w_d1[15] ) ? 1'bx :
+                               ( tmp_addr15[39:8] == key_d1[39:8] ) ;
+                match_idx_p[15] = ( mb_wen_d1 & adr_w_d1[15] ) ? 1'bx :
+                                 ( tmp_addr15[17:8] == key_d1[17:8] ) ;
+	end
+
+	else begin
+                match_p = 16'b0;
+                match_idx_p = 16'b0;
+        end
+
+end
+
+
+// READ AND WRITE HAPPEN in Phase 1.
+
+// rst_tri_en_d1 & rst_l_d1 are part of the 
+// list because we want to enter the following
+// always block under the following condition:
+// - adr_w_d1 , din_d1 , mb_wen_d1 remain the same across the
+// rising edge of the clock
+// - rst_tri_en or rst_l change across the rising edge of the
+// clock from high to low.
+
+always  @(adr_w_d1 or din_d1 or mb_wen_d1  or rst_tri_en_d1 or rst_l_d1 ) begin
+  begin
+    if (mb_wen_d1  & ~rst_tri_en & rst_l ) begin
+        case(adr_w_d1 )
+          16'b0000_0000_0000_0000: ;  // do nothing
+          16'b0000_0000_0000_0001: mb_cam_data[0] = din_d1 ;
+          16'b0000_0000_0000_0010: mb_cam_data[1] = din_d1 ;
+          16'b0000_0000_0000_0100: mb_cam_data[2] = din_d1 ;
+          16'b0000_0000_0000_1000: mb_cam_data[3] = din_d1 ;
+          16'b0000_0000_0001_0000: mb_cam_data[4] = din_d1;
+          16'b0000_0000_0010_0000: mb_cam_data[5] = din_d1 ;
+          16'b0000_0000_0100_0000: mb_cam_data[6] = din_d1 ;
+          16'b0000_0000_1000_0000: mb_cam_data[7] = din_d1 ;
+          16'b0000_0001_0000_0000: mb_cam_data[8] = din_d1 ;
+          16'b0000_0010_0000_0000: mb_cam_data[9] = din_d1 ;
+          16'b0000_0100_0000_0000: mb_cam_data[10] = din_d1 ;
+          16'b0000_1000_0000_0000: mb_cam_data[11] = din_d1 ;
+          16'b0001_0000_0000_0000: mb_cam_data[12] = din_d1 ;
+          16'b0010_0000_0000_0000: mb_cam_data[13] = din_d1 ;
+          16'b0100_0000_0000_0000: mb_cam_data[14] = din_d1 ;
+          16'b1000_0000_0000_0000: mb_cam_data[15] = din_d1 ;
+	  //
+          //16'b1111_1111_1111_1111:
+           //     begin
+           //             mb_cam_data[15] = din_d1 ;
+           //             mb_cam_data[14] = din_d1 ;
+           //             mb_cam_data[13] = din_d1 ;
+           //             mb_cam_data[12] = din_d1 ;
+           //             mb_cam_data[11] = din_d1 ;
+           //             mb_cam_data[10] = din_d1 ;
+           //             mb_cam_data[9] = din_d1 ;
+           //             mb_cam_data[8] = din_d1 ;
+           //             mb_cam_data[7] = din_d1 ;
+           //             mb_cam_data[6] = din_d1 ;
+           //             mb_cam_data[5] = din_d1 ;
+           //             mb_cam_data[4] = din_d1 ;
+           //             mb_cam_data[3] = din_d1 ;
+           //             mb_cam_data[2] = din_d1 ;
+           //             mb_cam_data[1] = din_d1 ;
+           //             mb_cam_data[0] = din_d1 ;
+           //     end
+          default: 
+             // 0in <fire -message "FATAL ERROR: incorrect write wordline"
+`ifdef DEFINE_0IN
+             ;
+`else
+`ifdef  INNO_MUXEX
+             ;
+`else
+		`ifdef MODELSIM
+            $display("PH1_CAM2_ERROR"," incorrect write wordline %h ", adr_w_d1);
+		`else
+            $error("PH1_CAM2_ERROR"," incorrect write wordline %h ", adr_w_d1);
+		`endif	
+`endif
+`endif
+
+	endcase
+      end
+  end
+
+end
+
+
+// rst_l_d1 has purely been added so that we enter the always 
+// block when the wordline/wr_en does not change across clk cycles
+// but the rst_l does.  
+// Notice rst_l_d1 is not used in any of the "if" statements.
+// Notice that the renable is qualified with rclk to take 
+// care that we do not read from the array if rst_l goes high
+// during the negative phase of rclk. 
+// 
+
+always  @( /*memory or*/ adr_r_d1 or adr_w_d1
+          or mb_ren_d1 or mb_wen_d1 or rst_l_d1 or rst_l or rst_tri_en_d1) begin
+  if(~rst_l ) begin
+	dout = 40'b0 ;
+  end
+  else if (mb_ren_d1 & rclk & rst_tri_en ) begin
+		dout = 40'hff_ffff_ffff ;
+  end
+  else if (mb_ren_d1 & rclk & ~rst_tri_en ) begin
+    if ((mb_wen_d1) && (adr_r_d1 == adr_w_d1) && (adr_r_d1) )
+      begin
+	     dout = 40'bx ;	
+
+`ifdef DEFINE_0IN
+`else
+`ifdef  INNO_MUXEX
+`else
+		`ifdef MODELSIM
+             $display("PH1_CAM2_ERROR"," read write conflict %h ", adr_r_d1);
+		`else
+             $error("PH1_CAM2_ERROR"," read write conflict %h ", adr_r_d1);
+		`endif
+`endif
+`endif
+      end
+    else
+      begin
+        case(adr_r_d1)
+          // match sense amp ckt behavior when no read wl is selected
+          16'b0000_0000_0000_0000: dout = 40'hff_ffff_ffff ;
+          16'b0000_0000_0000_0001: dout = mb_cam_data[0] ;
+          16'b0000_0000_0000_0010: dout = mb_cam_data[1] ;
+          16'b0000_0000_0000_0100: dout = mb_cam_data[2] ;
+          16'b0000_0000_0000_1000: dout = mb_cam_data[3] ;
+          16'b0000_0000_0001_0000: dout = mb_cam_data[4] ;
+          16'b0000_0000_0010_0000: dout = mb_cam_data[5] ;
+          16'b0000_0000_0100_0000: dout = mb_cam_data[6] ;
+          16'b0000_0000_1000_0000: dout = mb_cam_data[7] ;
+          16'b0000_0001_0000_0000: dout = mb_cam_data[8] ;
+          16'b0000_0010_0000_0000: dout = mb_cam_data[9] ;
+          16'b0000_0100_0000_0000: dout = mb_cam_data[10] ;
+          16'b0000_1000_0000_0000: dout = mb_cam_data[11] ;
+          16'b0001_0000_0000_0000: dout = mb_cam_data[12] ;
+          16'b0010_0000_0000_0000: dout = mb_cam_data[13] ;
+          16'b0100_0000_0000_0000: dout = mb_cam_data[14] ;
+          16'b1000_0000_0000_0000: dout = mb_cam_data[15] ;
+          default: 
+             // 0in <fire -message "FATAL ERROR: incorrect read wordline"
+`ifdef DEFINE_0IN
+             ;
+`else
+`ifdef  INNO_MUXEX
+             ;
+`else
+		`ifdef MODELSIM	
+             $display("PH1_CAM2_ERROR"," incorrect read wordline %h ", adr_r_d1);
+		`else
+             $error("PH1_CAM2_ERROR"," incorrect read wordline %h ", adr_r_d1);
+		`endif	 
+`endif
+`endif
+
+        endcase
+      end
+
+	end // of else if
+end
+endmodule
Index: /trunk/T1-common/srams/bw_r_dcm.v
===================================================================
--- /trunk/T1-common/srams/bw_r_dcm.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_dcm.v	(revision 6)
@@ -0,0 +1,841 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_dcm.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+// The Four panels correspond to addr<10:9> decoded.
+
+module bw_r_dcm(  /*AUTOARG*/
+   // Outputs
+   row_hit, rd_data0, rd_data1, rd_data2, rd_data3, so_0, so_1, 
+   // Inputs
+   cam_en, inv_mask0, inv_mask1, inv_mask2, inv_mask3, si_0, se_0, 
+   si_1, se_1, sehold_0, sehold_1, rclk,  rd_en, rw_addr0, 
+   rw_addr1, rw_addr2, rw_addr3, rst_l_0, rst_l_1, rst_warm_0, 
+   rst_warm_1, wr_en, rst_tri_en_0, rst_tri_en_1, wr_data0, wr_data1, 
+   wr_data2, wr_data3
+   );
+
+output	[31:0]	row_hit;
+
+output [31:0]         rd_data0;               // From panel0 of dcm_panel.v
+output [31:0]         rd_data1;               // From panel1 of dcm_panel.v
+output [31:0]         rd_data2;               // From panel2 of dcm_panel.v
+output [31:0]         rd_data3;               // From panel3 of dcm_panel.v
+
+input   [3:0]         cam_en;
+
+input [7:0]           inv_mask0;              // To panel0 of dcm_panel.v
+input [7:0]           inv_mask1;              // To panel1 of dcm_panel.v
+input [7:0]           inv_mask2;              // To panel2 of dcm_panel.v
+input [7:0]           inv_mask3;              // To panel3 of dcm_panel.v
+
+input		      si_0, se_0;
+output		      so_0;
+input		      si_1, se_1;
+output		      so_1;
+input		      sehold_0;
+input		      sehold_1;
+
+input                 rclk;                   // To panel0 of dcm_panel.v, ...
+
+input  [3:0]          rd_en ;           // To panel0 of dcm_panel.v
+
+input [5:0]           rw_addr0;      // To panel0 of dcm_panel.v
+input [5:0]           rw_addr1;      // To panel1 of dcm_panel.v
+input [5:0]           rw_addr2;      // To panel2 of dcm_panel.v
+input [5:0]           rw_addr3;      // To panel3 of dcm_panel.v
+
+input                 rst_l_0;                  // To panel0 of dcm_panel.v, ...
+input                 rst_l_1;                  // To panel0 of dcm_panel.v, ...
+input		      rst_warm_0;
+input		      rst_warm_1;
+
+input   [3:0]         wr_en;            // To panel0 of dcm_panel.v
+input		      rst_tri_en_0; // used to disable writes during SCAN.
+input		      rst_tri_en_1; // used to disable writes during SCAN.
+
+input [32:0]          wr_data0;         // To panel0 of dcm_panel.v
+input [32:0]          wr_data1;         // To panel1 of dcm_panel.v
+input [32:0]          wr_data2;         // To panel2 of dcm_panel.v
+input [32:0]          wr_data3;         // To panel3 of dcm_panel.v
+
+
+wire	[31:0]	bank1_hit;
+wire	[31:0]	bank0_hit;
+
+/*	dcm_panel_pair	AUTO_TEMPLATE (
+
+		   		  .bank_hit(bank0_hit[31:0]),
+                                  .rd_data0(rd_data0[31:0]),
+                                  .rd_data1(rd_data1[31:0]),
+                                  // Inputs
+                                  .cam_en(cam_en[1:0]),
+                                  .inv_mask0(inv_mask0[7:0]),
+                                  .inv_mask1(inv_mask1[7:0]),
+                                  .rclk (rclk),
+                                  .rd_en(rd_en[1:0]),
+                                  .rst_l(rst_l_0),
+                                  .rst_tri_en(rst_tri_en_0),
+                                  .rst_warm(rst_warm_0),
+                                  .rw_addr0(rw_addr0[5:0]),
+                                  .rw_addr1(rw_addr1[5:0]),
+                                  .sehold(sehold_0),
+                                  .wr_data0(wr_data0[32:0]),
+                                  .wr_data1(wr_data1[32:0]),
+                                  .wr_en(wr_en[1:0]));
+
+*/
+
+      dcm_panel_pair	panel_pair0(
+                                  .so   (),
+                                  .si   (),
+                                  .se   (se_0),
+					/*AUTOINST*/
+                                  // Outputs
+                                  .bank_hit(bank0_hit[31:0]),    // Templated
+                                  .rd_data0(rd_data0[31:0]),     // Templated
+                                  .rd_data1(rd_data1[31:0]),     // Templated
+                                  // Inputs
+                                  .cam_en(cam_en[1:0]),          // Templated
+                                  .inv_mask0(inv_mask0[7:0]),    // Templated
+                                  .inv_mask1(inv_mask1[7:0]),    // Templated
+                                  .rclk (rclk),                  // Templated
+                                  .rd_en(rd_en[1:0]),            // Templated
+                                  .rst_l(rst_l_0),               // Templated
+                                  .rst_tri_en(rst_tri_en_0),     // Templated
+                                  .rst_warm(rst_warm_0),         // Templated
+                                  .rw_addr0(rw_addr0[5:0]),      // Templated
+                                  .rw_addr1(rw_addr1[5:0]),      // Templated
+                                  .sehold(sehold_0),             // Templated
+                                  .wr_data0(wr_data0[32:0]),     // Templated
+                                  .wr_data1(wr_data1[32:0]),     // Templated
+                                  .wr_en(wr_en[1:0]));            // Templated
+				
+	assign	 row_hit =  bank1_hit | bank0_hit ;
+
+/*      dcm_panel_pair  AUTO_TEMPLATE (
+
+                                  .bank_hit(bank1_hit[31:0]),
+                                  .rd_data0(rd_data2[31:0]),
+                                  .rd_data1(rd_data3[31:0]),
+                                  // Inputs
+                                  .cam_en(cam_en[3:2]),
+                                  .inv_mask0(inv_mask2[7:0]),
+                                  .inv_mask1(inv_mask3[7:0]),
+                                  .rclk (rclk),
+                                  .rd_en(rd_en[3:2]),
+                                  .rst_l(rst_l_1),
+                                  .rst_tri_en(rst_tri_en_1),
+                                  .rst_warm(rst_warm_1),
+                                  .rw_addr0(rw_addr2[5:0]),
+                                  .rw_addr1(rw_addr3[5:0]),
+                                  .sehold(sehold_1),
+                                  .wr_data0(wr_data2[32:0]),
+                                  .wr_data1(wr_data3[32:0]),
+                                  .wr_en(wr_en[3:2]));
+
+*/
+
+      dcm_panel_pair    panel_pair1(
+                                  .so   (),
+                                  .si   (),
+                                  .se   (se_1),
+                                        /*AUTOINST*/
+                                    // Outputs
+                                    .bank_hit(bank1_hit[31:0]),  // Templated
+                                    .rd_data0(rd_data2[31:0]),   // Templated
+                                    .rd_data1(rd_data3[31:0]),   // Templated
+                                    // Inputs
+                                    .cam_en(cam_en[3:2]),        // Templated
+                                    .inv_mask0(inv_mask2[7:0]),  // Templated
+                                    .inv_mask1(inv_mask3[7:0]),  // Templated
+                                    .rclk(rclk),                 // Templated
+                                    .rd_en(rd_en[3:2]),          // Templated
+                                    .rst_l(rst_l_1),             // Templated
+                                    .rst_tri_en(rst_tri_en_1),   // Templated
+                                    .rst_warm(rst_warm_1),       // Templated
+                                    .rw_addr0(rw_addr2[5:0]),    // Templated
+                                    .rw_addr1(rw_addr3[5:0]),    // Templated
+                                    .sehold(sehold_1),           // Templated
+                                    .wr_data0(wr_data2[32:0]),   // Templated
+                                    .wr_data1(wr_data3[32:0]),   // Templated
+                                    .wr_en(wr_en[3:2]));          // Templated
+
+
+endmodule
+
+
+
+module dcm_panel_pair(  /*AUTOARG*/
+   // Outputs
+   so, bank_hit, rd_data0, rd_data1, 
+   // Inputs
+   cam_en, inv_mask0, inv_mask1, rclk, rd_en, rst_l, rst_tri_en, 
+   rst_warm, rw_addr0, rw_addr1, sehold, wr_data0, wr_data1, wr_en, 
+   si, se
+   );
+
+input [1:0]             cam_en;                 
+input [7:0]             inv_mask0;              
+input [7:0]             inv_mask1;              
+input                   rclk;                   
+input [1:0]             rd_en;                  
+input                   rst_l;                
+input                   rst_tri_en;           
+input                   rst_warm;             
+input [5:0]             rw_addr0;               
+input [5:0]             rw_addr1;               
+input                   sehold;               
+input [32:0]            wr_data0;               
+input [32:0]            wr_data1;               
+input [1:0]             wr_en;                  
+input			si,se ;
+
+output			so;
+output [31:0]           bank_hit;              
+output [31:0]           rd_data0;               
+output [31:0]           rd_data1;               
+
+wire	[31:0]	lkup_hit0, lkup_hit1;
+reg	rst_warm_d;
+
+
+always  @(posedge rclk)
+begin
+	rst_warm_d <= ( sehold)? rst_warm_d : rst_warm;
+end
+
+/*      dcm_panel       AUTO_TEMPLATE (
+                   .lkup_hit            (lkup_hit@[31:0]),
+                   .rd_data            (rd_data@[31:0]),
+                   .rd_en          (rd_en[@]),
+                   .wr_en          (wr_en[@]),
+                   .cam_en              (cam_en[@]),
+                   .wr_data             (wr_data@[32:0]),
+                   .rw_addr             (rw_addr@[5:0]),
+                   .rst_l               (rst_l),
+                   .rst_warm               (rst_warm_d),
+                   .rst_tri_en               (rst_tri_en),
+                   .sehold               (sehold),
+                   .inv_mask            (inv_mask@[7:0]));
+*/
+
+        dcm_panel       panel0(.si(),
+			       .so(),
+			       .se(se),
+				/*AUTOINST*/
+                               // Outputs
+                               .lkup_hit(lkup_hit0[31:0]),       // Templated
+                               .rd_data (rd_data0[31:0]),        // Templated
+                               // Inputs
+                               .rd_en   (rd_en[0]),              // Templated
+                               .wr_en   (wr_en[0]),              // Templated
+                               .cam_en  (cam_en[0]),             // Templated
+                               .wr_data (wr_data0[32:0]),        // Templated
+                               .rw_addr (rw_addr0[5:0]),         // Templated
+                               .inv_mask(inv_mask0[7:0]),        // Templated
+                               .rst_l   (rst_l),                 // Templated
+                               .rclk    (rclk),
+                               .rst_warm(rst_warm_d),            // Templated
+                               .rst_tri_en(rst_tri_en),          // Templated
+                               .sehold  (sehold));                // Templated
+
+        assign   bank_hit      =    lkup_hit0 | lkup_hit1 ;
+
+        dcm_panel       panel1(.si(),
+                               .so(),
+                               .se(se),
+				/*AUTOINST*/
+                               // Outputs
+                               .lkup_hit(lkup_hit1[31:0]),       // Templated
+                               .rd_data (rd_data1[31:0]),        // Templated
+                               // Inputs
+                               .rd_en   (rd_en[1]),              // Templated
+                               .wr_en   (wr_en[1]),              // Templated
+                               .cam_en  (cam_en[1]),             // Templated
+                               .wr_data (wr_data1[32:0]),        // Templated
+                               .rw_addr (rw_addr1[5:0]),         // Templated
+                               .inv_mask(inv_mask1[7:0]),        // Templated
+                               .rst_l   (rst_l),                 // Templated
+                               .rclk    (rclk),
+                               .rst_warm(rst_warm_d),            // Templated
+                               .rst_tri_en(rst_tri_en),          // Templated
+                               .sehold  (sehold));                // Templated
+
+
+endmodule
+
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+// A directory panel is 32 bits wide and 64 entries deep.
+// The lkup_hit combines the match lines for an even and odd entry pair
+// and hence is only 32 bits wide.
+////////////////////////////////////////////////////////////////////////
+
+
+module dcm_panel(  /*AUTOARG*/
+   // Outputs
+   lkup_hit, rd_data, so, 
+   // Inputs
+   rd_en, wr_en, cam_en, wr_data, rw_addr, inv_mask, rst_l, rclk, 
+   rst_warm, si, se, rst_tri_en, sehold
+   );
+
+
+// Read inputs
+input		rd_en;
+input		wr_en;
+input		cam_en;
+input	[32:0]	wr_data; // { addr<39:10>, addr<8>, parity, valid  }
+
+
+// shared inputs 
+input	[5:0]	rw_addr; // even entries will have wr_data<0> == 0
+input	[7:0]	inv_mask;
+
+
+output	[31:0]	lkup_hit;
+output	[31:0]	rd_data; // { addr<39:10>, parity, valid } 
+
+input		rst_l;
+input		rclk;
+input		rst_warm;
+
+input		si, se;
+output		so;
+input		rst_tri_en;
+input		sehold;
+
+
+reg	[29:0]	addr_array[63:0]	;
+reg	[63:0]	valid	;
+reg	[63:0]	parity	;
+reg	[29:0]	temp_addr0 ;
+reg	[29:0]	temp_addr1 ;
+reg	[31:0]	rd_data;
+reg	[31:0]	lkup_hit;
+reg	[63:0]	cam_hit;
+
+
+reg	[63:0]	reset_valid;
+reg	[63:0]	valid_bit;
+
+reg             rd_en_d, wr_en_d;
+reg             cam_en_d ;
+reg     [7:0]   inval_mask_d;
+reg     [5:0]   rw_addr_d;
+//reg	wr_en_off_d1;
+reg	rst_tri_en_d1;
+
+
+wire	[7:0]	inval_mask;
+integer	i,j;
+
+always  @(posedge rclk)
+begin
+        rd_en_d <= (sehold)? rd_en_d: rd_en ;
+        wr_en_d <= (sehold)? wr_en_d: wr_en;
+        rw_addr_d <= (sehold)? rw_addr_d : rw_addr  ;
+        cam_en_d <= ( sehold)? cam_en_d: cam_en ;
+        inval_mask_d <= ( sehold)? inval_mask_d : inv_mask ;
+
+	rst_tri_en_d1 <= rst_tri_en ; // this is a dummy flop only used as a trigger
+end
+
+
+
+//--------\/-------------
+// VALID flop logic
+//--------\/-------------
+always  @(posedge rclk) begin
+		valid_bit <= valid;
+end
+	
+
+reg	cam_out;
+
+
+// CAM OPERATION and reset_valid generation
+// the following always block ensures that lkup_hit will be 
+// a ph1 signal.
+
+always	@( /*AUTOSENSE*/ /*memory or*/ cam_en_d or inval_mask_d or rst_tri_en or
+           rst_tri_en_d1 or valid_bit or wr_data or rst_warm or rst_l)
+
+ begin
+
+
+		cam_out = cam_en_d & ~(rst_tri_en | rst_tri_en_d1)  ;
+
+
+
+		cam_hit[0] = ( wr_data[32:3] == addr_array[0] )  &
+                                 cam_out &   ~wr_data[2] & valid_bit[0]  ;
+                reset_valid[0] = (cam_hit[0] & inval_mask_d[0]) ;
+                cam_hit[1] = ( wr_data[32:3] == addr_array[1] )  &
+                                  cam_out &  wr_data[2]  & valid_bit[1];
+                reset_valid[1] = (cam_hit[1] & inval_mask_d[0]) ;
+
+		lkup_hit[0] = ( cam_hit[0]  |  cam_hit[1] ) ;
+
+	
+
+		cam_hit[2] = ( wr_data[32:3] == addr_array[2] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[2]  ;
+                reset_valid[2] = (cam_hit[2] & inval_mask_d[0]) ;
+                cam_hit[3] = ( wr_data[32:3] == addr_array[3] )  &
+                                   cam_out & wr_data[2]  & valid_bit[3];
+                reset_valid[3] = (cam_hit[3] & inval_mask_d[0]) ;
+
+		lkup_hit[1] = ( cam_hit[2]  |  cam_hit[3] );
+
+	
+
+		cam_hit[4] = ( wr_data[32:3] == addr_array[4] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[4]  ;
+                reset_valid[4] = (cam_hit[4] & inval_mask_d[0]) ;
+                cam_hit[5] = ( wr_data[32:3] == addr_array[5] )  &
+                                   cam_out & wr_data[2]  & valid_bit[5];
+                reset_valid[5] = (cam_hit[5] & inval_mask_d[0]) ;
+
+		lkup_hit[2] = ( cam_hit[4]  |  cam_hit[5] );
+
+	
+
+		cam_hit[6] = ( wr_data[32:3] == addr_array[6] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[6]  ;
+                reset_valid[6] = (cam_hit[6] & inval_mask_d[0]) ;
+                cam_hit[7] = ( wr_data[32:3] == addr_array[7] )  &
+                                   cam_out & wr_data[2]  & valid_bit[7];
+                reset_valid[7] = (cam_hit[7] & inval_mask_d[0]) ;
+
+		lkup_hit[3] = ( cam_hit[6]  |  cam_hit[7] );
+
+	
+
+		cam_hit[8] = ( wr_data[32:3] == addr_array[8] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[8]  ;
+                reset_valid[8] = (cam_hit[8] & inval_mask_d[1]) ;
+                cam_hit[9] = ( wr_data[32:3] == addr_array[9] )  &
+                                   cam_out & wr_data[2]  & valid_bit[9];
+                reset_valid[9] = (cam_hit[9] & inval_mask_d[1]) ;
+
+		lkup_hit[4] = ( cam_hit[8]  |  cam_hit[9] );
+
+	
+
+		cam_hit[10] = ( wr_data[32:3] == addr_array[10] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[10]  ;
+                reset_valid[10] = (cam_hit[10] & inval_mask_d[1]) ;
+                cam_hit[11] = ( wr_data[32:3] == addr_array[11] )  &
+                                   cam_out & wr_data[2]  & valid_bit[11];
+                reset_valid[11] = (cam_hit[11] & inval_mask_d[1]) ;
+
+		lkup_hit[5] = ( cam_hit[10]  |  cam_hit[11] );
+
+	
+
+		cam_hit[12] = ( wr_data[32:3] == addr_array[12] )  &
+                                   cam_out & ~wr_data[2] & valid_bit[12]  ;
+                reset_valid[12] = (cam_hit[12] & inval_mask_d[1]) ;
+                cam_hit[13] = ( wr_data[32:3] == addr_array[13] )  &
+                                   cam_out & wr_data[2]  & valid_bit[13];
+                reset_valid[13] = (cam_hit[13] & inval_mask_d[1]) ;
+
+		lkup_hit[6] = ( cam_hit[12]  |  cam_hit[13] );
+
+	
+
+		cam_hit[14] = ( wr_data[32:3] == addr_array[14] )  &
+                                  cam_out &  ~wr_data[2] & valid_bit[14]  ;
+                reset_valid[14] = (cam_hit[14] & inval_mask_d[1]) ;
+                cam_hit[15] = ( wr_data[32:3] == addr_array[15] )  &
+                                  cam_out &  wr_data[2]  & valid_bit[15];
+                reset_valid[15] = (cam_hit[15] & inval_mask_d[1]) ;
+
+		lkup_hit[7] = ( cam_hit[14]  |  cam_hit[15] );
+
+	
+
+		cam_hit[16] = ( wr_data[32:3] == addr_array[16] )  &
+                                 cam_out &   ~wr_data[2] & valid_bit[16]  ;
+                reset_valid[16] = (cam_hit[16] & inval_mask_d[2]) ;
+                cam_hit[17] = ( wr_data[32:3] == addr_array[17] )  &
+                                  cam_out &  wr_data[2]  & valid_bit[17];
+                reset_valid[17] = (cam_hit[17] & inval_mask_d[2]) ;
+
+		lkup_hit[8] = ( cam_hit[16]  |  cam_hit[17] );
+
+	
+
+		cam_hit[18] = ( wr_data[32:3] == addr_array[18] )  &
+                                  cam_out &  ~wr_data[2] & valid_bit[18]  ;
+                reset_valid[18] = (cam_hit[18] & inval_mask_d[2]) ;
+                cam_hit[19] = ( wr_data[32:3] == addr_array[19] )  &
+                                  cam_out &  wr_data[2]  & valid_bit[19];
+                reset_valid[19] = (cam_hit[19] & inval_mask_d[2]) ;
+
+		lkup_hit[9] = ( cam_hit[18]  |  cam_hit[19] );
+
+	
+
+		cam_hit[20] = ( wr_data[32:3] == addr_array[20] )  &
+                                 cam_out &   ~wr_data[2] & valid_bit[20]  ;
+                reset_valid[20] = (cam_hit[20] & inval_mask_d[2]) ;
+                cam_hit[21] = ( wr_data[32:3] == addr_array[21] )  &
+                                 cam_out &   wr_data[2]  & valid_bit[21];
+                reset_valid[21] = (cam_hit[21] & inval_mask_d[2]) ;
+
+		lkup_hit[10] = ( cam_hit[20]  |  cam_hit[21] );
+
+	
+
+		cam_hit[22] = ( wr_data[32:3] == addr_array[22] )  &
+                                  cam_out &  ~wr_data[2] & valid_bit[22]  ;
+                reset_valid[22] = (cam_hit[22] & inval_mask_d[2]) ;
+                cam_hit[23] = ( wr_data[32:3] == addr_array[23] )  &
+                                  cam_out &  wr_data[2]  & valid_bit[23];
+                reset_valid[23] = (cam_hit[23] & inval_mask_d[2]) ;
+
+		lkup_hit[11] = ( cam_hit[22]  |  cam_hit[23] );
+
+	
+
+		cam_hit[24] = ( wr_data[32:3] == addr_array[24] )  &
+                                cam_out &    ~wr_data[2] & valid_bit[24]  ;
+                reset_valid[24] = (cam_hit[24] & inval_mask_d[3]) ;
+                cam_hit[25] = ( wr_data[32:3] == addr_array[25] )  &
+                                cam_out &    wr_data[2]  & valid_bit[25];
+                reset_valid[25] = (cam_hit[25] & inval_mask_d[3]) ;
+
+		lkup_hit[12] = ( cam_hit[24]  |  cam_hit[25] );
+
+	
+
+		cam_hit[26] = ( wr_data[32:3] == addr_array[26] )  &
+                                cam_out &    ~wr_data[2] & valid_bit[26]  ;
+                reset_valid[26] = (cam_hit[26] & inval_mask_d[3]) ;
+                cam_hit[27] = ( wr_data[32:3] == addr_array[27] )  &
+                                cam_out &    wr_data[2]  & valid_bit[27];
+                reset_valid[27] = (cam_hit[27] & inval_mask_d[3]) ;
+
+		lkup_hit[13] = ( cam_hit[26]  |  cam_hit[27] );
+
+	
+
+		cam_hit[28] = ( wr_data[32:3] == addr_array[28] )  &
+                                cam_out &    ~wr_data[2] & valid_bit[28]  ;
+                reset_valid[28] = (cam_hit[28] & inval_mask_d[3]) ;
+                cam_hit[29] = ( wr_data[32:3] == addr_array[29] )  &
+                                cam_out &    wr_data[2]  & valid_bit[29];
+                reset_valid[29] = (cam_hit[29] & inval_mask_d[3]) ;
+
+		lkup_hit[14] = ( cam_hit[28]  |  cam_hit[29] );
+
+	
+
+		cam_hit[30] = ( wr_data[32:3] == addr_array[30] )  &
+                                 cam_out &   ~wr_data[2] & valid_bit[30]  ;
+                reset_valid[30] = (cam_hit[30] & inval_mask_d[3]) ;
+                cam_hit[31] = ( wr_data[32:3] == addr_array[31] )  &
+                                 cam_out &   wr_data[2]  & valid_bit[31];
+                reset_valid[31] = (cam_hit[31] & inval_mask_d[3]) ;
+
+		lkup_hit[15] = ( cam_hit[30]  |  cam_hit[31] );
+
+	
+
+		cam_hit[32] = ( wr_data[32:3] == addr_array[32] )  &
+                              cam_out &      ~wr_data[2] & valid_bit[32]  ;
+                reset_valid[32] = (cam_hit[32] & inval_mask_d[4]) ;
+                cam_hit[33] = ( wr_data[32:3] == addr_array[33] )  &
+                              cam_out &      wr_data[2]  & valid_bit[33];
+                reset_valid[33] = (cam_hit[33] & inval_mask_d[4]) ;
+
+		lkup_hit[16] = ( cam_hit[32]  |  cam_hit[33] );
+
+	
+
+		cam_hit[34] = ( wr_data[32:3] == addr_array[34] )  &
+                               cam_out &     ~wr_data[2] & valid_bit[34]  ;
+                reset_valid[34] = (cam_hit[34] & inval_mask_d[4]) ;
+                cam_hit[35] = ( wr_data[32:3] == addr_array[35] )  &
+                                cam_out &    wr_data[2]  & valid_bit[35];
+                reset_valid[35] = (cam_hit[35] & inval_mask_d[4]) ;
+
+		lkup_hit[17] = ( cam_hit[34]  |  cam_hit[35] );
+
+	
+
+		cam_hit[36] = ( wr_data[32:3] == addr_array[36] )  &
+                                cam_out &    ~wr_data[2] & valid_bit[36]  ;
+                reset_valid[36] = (cam_hit[36] & inval_mask_d[4]) ;
+                cam_hit[37] = ( wr_data[32:3] == addr_array[37] )  &
+                                cam_out &    wr_data[2]  & valid_bit[37];
+                reset_valid[37] = (cam_hit[37] & inval_mask_d[4]) ;
+
+		lkup_hit[18] = ( cam_hit[36]  |  cam_hit[37] );
+
+	
+
+		cam_hit[38] = ( wr_data[32:3] == addr_array[38] )  &
+                               cam_out &     ~wr_data[2] & valid_bit[38]  ;
+                reset_valid[38] = (cam_hit[38] & inval_mask_d[4]) ;
+                cam_hit[39] = ( wr_data[32:3] == addr_array[39] )  &
+                               cam_out &     wr_data[2]  & valid_bit[39];
+                reset_valid[39] = (cam_hit[39] & inval_mask_d[4]) ;
+
+		lkup_hit[19] = ( cam_hit[38]  |  cam_hit[39] );
+
+	
+
+		cam_hit[40] = ( wr_data[32:3] == addr_array[40] )  &
+                               cam_out &     ~wr_data[2] & valid_bit[40]  ;
+                reset_valid[40] = (cam_hit[40] & inval_mask_d[5]) ;
+                cam_hit[41] = ( wr_data[32:3] == addr_array[41] )  &
+                               cam_out &     wr_data[2]  & valid_bit[41];
+                reset_valid[41] = (cam_hit[41] & inval_mask_d[5]) ;
+
+		lkup_hit[20] = ( cam_hit[40]  |  cam_hit[41] );
+
+	
+
+		cam_hit[42] = ( wr_data[32:3] == addr_array[42] )  &
+                              cam_out &      ~wr_data[2] & valid_bit[42]  ;
+                reset_valid[42] = (cam_hit[42] & inval_mask_d[5]) ;
+                cam_hit[43] = ( wr_data[32:3] == addr_array[43] )  &
+                              cam_out &      wr_data[2]  & valid_bit[43];
+                reset_valid[43] = (cam_hit[43] & inval_mask_d[5]) ;
+
+		lkup_hit[21] = ( cam_hit[42]  |  cam_hit[43] );
+
+	
+
+		cam_hit[44] = ( wr_data[32:3] == addr_array[44] )  &
+                              cam_out &      ~wr_data[2] & valid_bit[44]  ;
+                reset_valid[44] = (cam_hit[44] & inval_mask_d[5]) ;
+                cam_hit[45] = ( wr_data[32:3] == addr_array[45] )  &
+                              cam_out &      wr_data[2]  & valid_bit[45];
+                reset_valid[45] = (cam_hit[45] & inval_mask_d[5]) ;
+
+		lkup_hit[22] = ( cam_hit[44]  |  cam_hit[45] );
+
+	
+
+		cam_hit[46] = ( wr_data[32:3] == addr_array[46] )  &
+                             cam_out & ~wr_data[2] & valid_bit[46]  ;
+                reset_valid[46] = (cam_hit[46] & inval_mask_d[5]) ;
+                cam_hit[47] = ( wr_data[32:3] == addr_array[47] )  &
+                             cam_out & wr_data[2]  & valid_bit[47];
+                reset_valid[47] = (cam_hit[47] & inval_mask_d[5]) ;
+
+		lkup_hit[23] = ( cam_hit[46]  |  cam_hit[47] );
+
+	
+
+		cam_hit[48] = ( wr_data[32:3] == addr_array[48] )  &
+                           cam_out &  ~wr_data[2] & valid_bit[48]  ;
+                reset_valid[48] = (cam_hit[48] & inval_mask_d[6]) ;
+                cam_hit[49] = ( wr_data[32:3] == addr_array[49] )  &
+                           cam_out &  wr_data[2]  & valid_bit[49];
+                reset_valid[49] = (cam_hit[49] & inval_mask_d[6]) ;
+
+		lkup_hit[24] = ( cam_hit[48]  |  cam_hit[49] );
+
+	
+
+		cam_hit[50] = ( wr_data[32:3] == addr_array[50] )  &
+                           cam_out &  ~wr_data[2] & valid_bit[50]  ;
+                reset_valid[50] = (cam_hit[50] & inval_mask_d[6]) ;
+                cam_hit[51] = ( wr_data[32:3] == addr_array[51] )  &
+                           cam_out &  wr_data[2]  & valid_bit[51];
+                reset_valid[51] = (cam_hit[51] & inval_mask_d[6]) ;
+
+		lkup_hit[25] = ( cam_hit[50]  |  cam_hit[51] );
+
+	
+
+		cam_hit[52] = ( wr_data[32:3] == addr_array[52] )  &
+                            cam_out &  ~wr_data[2] & valid_bit[52]  ;
+                reset_valid[52] = (cam_hit[52] & inval_mask_d[6]) ;
+                cam_hit[53] = ( wr_data[32:3] == addr_array[53] )  &
+                             cam_out &  wr_data[2]  & valid_bit[53];
+                reset_valid[53] = (cam_hit[53] & inval_mask_d[6]) ;
+
+		lkup_hit[26] = ( cam_hit[52]  |  cam_hit[53] );
+
+	
+
+		cam_hit[54] = ( wr_data[32:3] == addr_array[54] )  &
+                             cam_out & ~wr_data[2] & valid_bit[54]  ;
+                reset_valid[54] = (cam_hit[54] & inval_mask_d[6]) ;
+                cam_hit[55] = ( wr_data[32:3] == addr_array[55] )  &
+                             cam_out &  wr_data[2]  & valid_bit[55];
+                reset_valid[55] = (cam_hit[55] & inval_mask_d[6]) ;
+
+		lkup_hit[27] = ( cam_hit[54]  |  cam_hit[55] );
+
+	
+
+		cam_hit[56] = ( wr_data[32:3] == addr_array[56] )  &
+                         cam_out & ~wr_data[2] & valid_bit[56]  ;
+                reset_valid[56] = (cam_hit[56] & inval_mask_d[7]) ;
+                cam_hit[57] = ( wr_data[32:3] == addr_array[57] )  &
+                         cam_out &  wr_data[2]  & valid_bit[57];
+                reset_valid[57] = (cam_hit[57] & inval_mask_d[7]) ;
+
+		lkup_hit[28] = ( cam_hit[56]  |  cam_hit[57] );
+
+	
+
+		cam_hit[58] = ( wr_data[32:3] == addr_array[58] )  &
+                         cam_out & ~wr_data[2] & valid_bit[58]  ;
+                reset_valid[58] = (cam_hit[58] & inval_mask_d[7]) ;
+                cam_hit[59] = ( wr_data[32:3] == addr_array[59] )  &
+                         cam_out &  wr_data[2]  & valid_bit[59];
+                reset_valid[59] = (cam_hit[59] & inval_mask_d[7]) ;
+
+		lkup_hit[29] = ( cam_hit[58]  |  cam_hit[59] );
+
+	
+
+		cam_hit[60] = ( wr_data[32:3] == addr_array[60] )  &
+                         cam_out & ~wr_data[2] & valid_bit[60]  ;
+                reset_valid[60] = (cam_hit[60] & inval_mask_d[7]) ;
+                cam_hit[61] = ( wr_data[32:3] == addr_array[61] )  &
+                         cam_out &  wr_data[2]  & valid_bit[61];
+                reset_valid[61] = (cam_hit[61] & inval_mask_d[7]) ;
+
+		lkup_hit[30] = ( cam_hit[60]  |  cam_hit[61] );
+
+	
+
+		cam_hit[62] = ( wr_data[32:3] == addr_array[62] )  &
+                        cam_out & ~wr_data[2] & valid_bit[62]  ;
+                reset_valid[62] = (cam_hit[62] & inval_mask_d[7]) ;
+                cam_hit[63] = ( wr_data[32:3] == addr_array[63] )  &
+                        cam_out & wr_data[2]  & valid_bit[63];
+                reset_valid[63] = (cam_hit[63] & inval_mask_d[7]) ;
+
+		lkup_hit[31] = ( cam_hit[62]  |  cam_hit[63] );
+
+		if( !rst_l | (rst_warm & ~(rst_tri_en | rst_tri_en_d1)) )  begin
+			valid = 64'b0;
+		end
+	
+	  	else if(cam_out) begin
+			valid = valid_bit & ~reset_valid;
+		end
+
+		// else valid = valid ( implicit latch )
+
+
+end
+
+	
+////////////////////////////////////////////////////////////
+// READ/WRITE  OPERATION
+// Phase 1 RD
+////////////////////////////////////////////////////////////
+
+always @(negedge rclk) begin
+
+	if(rd_en_d & ~rst_tri_en) begin
+		rd_data = {     addr_array[rw_addr_d],
+                                parity[rw_addr_d] ,
+                                valid_bit[rw_addr_d]
+                         };
+`ifdef  INNO_MUXEX
+`else
+`ifdef DEFINE_0IN
+`else
+                if(wr_en_d) begin
+		`ifdef MODELSIM		
+                   $display("L2_DIR_ERR"," rd/wr conflict");
+		`else
+                   $error("L2_DIR_ERR"," rd/wr conflict");
+		`endif		
+                end
+`endif
+`endif
+
+        end // of if rd_en_d
+
+  // WR
+`ifdef DEFINE_0IN
+`else
+        if(wr_en_d & ~rst_tri_en ) begin
+                // ---- \/ modelling write though behaviour \/-------
+                rd_data = {     wr_data[32:3],
+                                wr_data[1] ,
+                                wr_data[0]
+                         };
+
+                parity[rw_addr_d]  =  wr_data[1] ;
+                valid[rw_addr_d]  =  wr_data[0] ;
+                addr_array[rw_addr_d] =  wr_data[32:3] ;
+
+`ifdef  INNO_MUXEX
+`else
+                if(cam_en_d) begin
+		`ifdef MODELSIM
+                   $display("L2_DIR_ERR"," cam/wr conflict");
+		`else
+                   $error("L2_DIR_ERR"," cam/wr conflict");
+		`endif
+                end
+`endif
+
+        end
+`endif
+
+
+	//if( !rst_l | (rst_warm & ~rst_tri_en) ) valid = 64'b0;
+	//else  valid = valid & ~reset_valid;
+
+end
+
+
+
+
+`ifdef DEFINE_0IN
+always  @(posedge rclk)
+begin
+        if(!rst_l) begin        // rst_l all valid bits
+                valid_bit = 64'b0 ;
+        end else if(~rd_en_d & wr_en_d) begin
+                addr_array[rw_addr_d] =  wr_data[32:3] ;
+                parity[rw_addr_d]  =  wr_data[1] ;
+                valid_bit[rw_addr_d]  =  wr_data[0] ;
+        end
+end
+`endif
+
+
+
+
+
+	
+endmodule
Index: /trunk/T1-common/srams/bw_r_cm16x40b.v
===================================================================
--- /trunk/T1-common/srams/bw_r_cm16x40b.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_cm16x40b.v	(revision 6)
@@ -0,0 +1,354 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_cm16x40b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+// DUAL PORTED CAM RTL
+// 16 entries X 40 bits/entry
+// REad/Write ports can be accessed in PH1 only.
+// CAM port can be accessed in PH2 only.
+////////////////////////////////////////////////////////////////////////
+
+module bw_r_cm16x40b( /*AUTOARG*/
+   // Outputs
+   dout, match, match_idx, so, 
+   // Inputs
+   adr_w, din, write_en, rst_tri_en, adr_r, read_en, lookup_en, key, 
+   rclk, sehold, se, si, rst_l
+   );
+
+input   [15:0]  adr_w ; // set up to +ve edge
+input   [39:0]  din;    // set up to +ve edge
+input           write_en;       // +ve edge clk; write enable
+input		rst_tri_en;
+input   [15:0]  adr_r;  // set up to +ve edge
+input           read_en;
+output  [39:0]  dout;
+input           lookup_en;      // set up to -ve edge
+input   [39:8]  key;    // set up to -ve edge
+output  [15:0]  match ;
+output  [15:0]  match_idx ;
+input   rclk ;
+input   sehold, se, si, rst_l;
+output  so ;
+
+reg     [39:0]  mb_cam_data[15:0] ;
+
+reg     [39:0]  dout;
+reg     [39:8]  key_d1;
+reg     lookup_en_d1 ;
+
+reg     [39:0]  tmp_addr ;
+reg     [39:0]  tmp_addr0 ;
+reg     [39:0]  tmp_addr1 ;
+reg     [39:0]  tmp_addr2 ;
+reg     [39:0]  tmp_addr3 ;
+reg     [39:0]  tmp_addr4 ;
+reg     [39:0]  tmp_addr5 ;
+reg     [39:0]  tmp_addr6 ;
+reg     [39:0]  tmp_addr7 ;
+reg     [39:0]  tmp_addr8 ;
+reg     [39:0]  tmp_addr9 ;
+reg     [39:0]  tmp_addr10 ;
+reg     [39:0]  tmp_addr11 ;
+reg     [39:0]  tmp_addr12 ;
+reg     [39:0]  tmp_addr13 ;
+reg     [39:0]  tmp_addr14 ;
+reg     [39:0]  tmp_addr15 ;
+
+reg     [15:0]  adr_w_d1 ;
+reg     [15:0]  adr_r_d1 ;
+reg             mb_wen_d1 ;     // registered write enable
+reg             mb_ren_d1 ;     // registered read enable
+
+reg     [39:0]  din_d1;
+
+reg     [15:0]  match ;
+reg     [15:0]  match_idx ;
+reg     [15:0]  match_p ;
+reg     [15:0]  match_idx_p ;
+
+reg             so ;
+
+reg             rst_l_d1;
+reg             rst_tri_en_d1;
+
+
+always  @(posedge rclk ) begin
+
+	match <= match_p ;
+        match_idx <= match_idx_p ;
+	adr_w_d1 <= (sehold)? adr_w_d1: adr_w ;
+        adr_r_d1 <= (sehold)? adr_r_d1: adr_r;
+        din_d1 <= ( sehold)? din_d1: din ;
+        mb_wen_d1 <= ( sehold)? mb_wen_d1: write_en ;
+        mb_ren_d1 <= ( sehold)? mb_ren_d1 : read_en  ;
+
+	rst_l_d1 <= rst_l ; // this is not a real flop
+        rst_tri_en_d1 <= rst_tri_en ; // this is not a real flop
+
+
+end
+
+
+// CAM OPERATION
+
+`ifdef DEFINE_0IN
+always  @( negedge rclk         ) begin
+`else
+always  @( negedge rclk or rst_l) begin
+`endif
+        lookup_en_d1 = lookup_en ;
+        key_d1 = key;
+	
+	if(~rst_l) begin
+		match_idx_p = 16'b0;
+		match_p = 16'b0;
+	end
+
+        else if  (lookup_en_d1 ) begin
+
+		tmp_addr0 = mb_cam_data[0];
+                match_p[0] =  ( tmp_addr0[39:8] == key_d1[39:8] ) ;
+                match_idx_p[0] = ( tmp_addr0[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr1 = mb_cam_data[1];
+                match_p[1] =  ( tmp_addr1[39:8] == key_d1[39:8] ) ;
+                match_idx_p[1] = ( tmp_addr1[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr2 = mb_cam_data[2];
+                match_p[2] =  ( tmp_addr2[39:8] == key_d1[39:8] ) ;
+                match_idx_p[2] = ( tmp_addr2[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr3 = mb_cam_data[3];
+                match_p[3] =  ( tmp_addr3[39:8] == key_d1[39:8] ) ;
+                match_idx_p[3] = ( tmp_addr3[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr4 = mb_cam_data[4];
+                match_p[4] =  ( tmp_addr4[39:8] == key_d1[39:8] ) ;
+                match_idx_p[4] = ( tmp_addr4[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr5 = mb_cam_data[5];
+                match_p[5] =  ( tmp_addr5[39:8] == key_d1[39:8] ) ;
+                match_idx_p[5] = ( tmp_addr5[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr6 = mb_cam_data[6];
+                match_p[6] =  ( tmp_addr6[39:8] == key_d1[39:8] ) ;
+                match_idx_p[6] = ( tmp_addr6[17:8] == key_d1[17:8] ) ;
+
+                 tmp_addr7 = mb_cam_data[7];
+                match_p[7] =  ( tmp_addr7[39:8] == key_d1[39:8] ) ;
+                match_idx_p[7] = ( tmp_addr7[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr8 = mb_cam_data[8];
+                match_p[8] =  ( tmp_addr8[39:8] == key_d1[39:8] ) ;
+                match_idx_p[8] = ( tmp_addr8[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr9 = mb_cam_data[9];
+                match_p[9] =  ( tmp_addr9[39:8] == key_d1[39:8] ) ;
+                match_idx_p[9] = ( tmp_addr9[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr10 = mb_cam_data[10];
+                match_p[10] =  ( tmp_addr10[39:8] == key_d1[39:8] ) ;
+                match_idx_p[10] = ( tmp_addr10[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr11 = mb_cam_data[11];
+                match_p[11] =  ( tmp_addr11[39:8] == key_d1[39:8] ) ;
+                match_idx_p[11] = ( tmp_addr11[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr12 = mb_cam_data[12];
+                match_p[12] =  ( tmp_addr12[39:8] == key_d1[39:8] ) ;
+                match_idx_p[12] = ( tmp_addr12[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr13 = mb_cam_data[13];
+                match_p[13] =  ( tmp_addr13[39:8] == key_d1[39:8] ) ;
+                match_idx_p[13] = ( tmp_addr13[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr14 = mb_cam_data[14];
+                match_p[14] =  ( tmp_addr14[39:8] == key_d1[39:8] ) ;
+                match_idx_p[14] = ( tmp_addr14[17:8] == key_d1[17:8] ) ;
+
+                tmp_addr15 = mb_cam_data[15];
+                match_p[15] =  ( tmp_addr15[39:8] == key_d1[39:8] ) ;
+                match_idx_p[15] = ( tmp_addr15[17:8] == key_d1[17:8] ) ;
+
+  
+	end
+
+        else begin
+                match_p = 16'b0;
+                match_idx_p = 16'b0;
+        end
+
+end
+
+// READ AND WRITE HAPPEN in Phase 1.
+
+// rst_tri_en_d1 & rst_l_d1 are part of the
+// list because we want to enter the following
+// always block under the following condition:
+// - adr_w_d1 , din_d1 , mb_wen_d1 remain the same across the
+// rising edge of the clock
+// - rst_tri_en or rst_l change across the rising edge of the
+// clock from high to low.
+
+always  @(adr_w_d1 or din_d1 or mb_wen_d1  or rst_tri_en_d1 or rst_l_d1 ) begin
+  begin
+    if (mb_wen_d1  & ~rst_tri_en & rst_l ) begin
+        case(adr_w_d1 )
+          16'b0000_0000_0000_0000: ;  // do nothing
+          16'b0000_0000_0000_0001: mb_cam_data[0] = din_d1 ;
+          16'b0000_0000_0000_0010: mb_cam_data[1] = din_d1 ;
+          16'b0000_0000_0000_0100: mb_cam_data[2] = din_d1 ;
+          16'b0000_0000_0000_1000: mb_cam_data[3] = din_d1 ;
+          16'b0000_0000_0001_0000: mb_cam_data[4] = din_d1;
+          16'b0000_0000_0010_0000: mb_cam_data[5] = din_d1 ;
+          16'b0000_0000_0100_0000: mb_cam_data[6] = din_d1 ;
+          16'b0000_0000_1000_0000: mb_cam_data[7] = din_d1 ;
+          16'b0000_0001_0000_0000: mb_cam_data[8] = din_d1 ;
+          16'b0000_0010_0000_0000: mb_cam_data[9] = din_d1 ;
+          16'b0000_0100_0000_0000: mb_cam_data[10] = din_d1 ;
+          16'b0000_1000_0000_0000: mb_cam_data[11] = din_d1 ;
+          16'b0001_0000_0000_0000: mb_cam_data[12] = din_d1 ;
+          16'b0010_0000_0000_0000: mb_cam_data[13] = din_d1 ;
+          16'b0100_0000_0000_0000: mb_cam_data[14] = din_d1 ;
+          16'b1000_0000_0000_0000: mb_cam_data[15] = din_d1 ;
+          //16'b1111_1111_1111_1111:
+            //    begin
+             //           mb_cam_data[15] = din_d1 ;
+              //          mb_cam_data[14] = din_d1 ;
+               //         mb_cam_data[13] = din_d1 ;
+                //        mb_cam_data[12] = din_d1 ;
+                 //       mb_cam_data[11] = din_d1 ;
+                  //      mb_cam_data[10] = din_d1 ;
+                   //     mb_cam_data[9] = din_d1 ;
+                    //    mb_cam_data[8] = din_d1 ;
+                    //    mb_cam_data[7] = din_d1 ;
+                    //    mb_cam_data[6] = din_d1 ;
+                    //    mb_cam_data[5] = din_d1 ;
+                    //    mb_cam_data[4] = din_d1 ;
+                    //    mb_cam_data[3] = din_d1 ;
+                    //    mb_cam_data[2] = din_d1 ;
+                    //    mb_cam_data[1] = din_d1 ;
+                    //    mb_cam_data[0] = din_d1 ;
+               // end
+          default:
+     		// 0in <fire -message "FATAL ERROR: incorrect write wordline"
+`ifdef DEFINE_0IN
+             ;
+`else
+`ifdef  INNO_MUXEX
+             ;
+`else
+		`ifdef MODELSIM
+            $display("PH2_CAM2_ERROR"," incorrect write wordline %h ", adr_w_d1);
+		`else
+            $error("PH2_CAM2_ERROR"," incorrect write wordline %h ", adr_w_d1);
+		`endif
+`endif
+`endif
+
+        endcase
+      end
+  end
+
+end
+
+
+
+// rst_l_d1 has purely been added so that we enter the always
+// block when the wordline/wr_en does not change across clk cycles
+// but the rst_l does.
+// Notice rst_l_d1 is not used in any of the "if" statements.
+// Notice that the renable is qualified with rclk to take
+// care that we do not read from the array if rst_l goes high
+// during the negative phase of rclk.
+//
+
+always  @( /*memory or*/ adr_r_d1 or adr_w_d1
+          or mb_ren_d1 or mb_wen_d1 or rst_l_d1 or rst_l or rst_tri_en_d1) begin
+  if(~rst_l ) begin
+        dout = 40'b0 ;
+   end
+  else if (mb_ren_d1 & rclk & rst_tri_en ) begin
+                dout = 40'hff_ffff_ffff ;
+  end
+  else if (mb_ren_d1 & rclk & ~rst_tri_en) begin
+    if ((mb_wen_d1) && (adr_r_d1 == adr_w_d1) && (adr_r_d1) )
+      begin
+             dout = 40'bx ;
+
+`ifdef DEFINE_0IN
+`else
+`ifdef  INNO_MUXEX
+`else
+		`ifdef MODELSIM
+	         $display("PH1_CAM2_ERROR"," read write conflict %h ", adr_r_d1);	
+		`else
+             $error("PH1_CAM2_ERROR"," read write conflict %h ", adr_r_d1);
+   		`endif
+`endif
+`endif
+      end
+    else
+      begin
+        case(adr_r_d1)
+          // match sense amp ckt behavior when no read wl is selected
+          16'b0000_0000_0000_0000: dout = 40'hff_ffff_ffff ;
+          16'b0000_0000_0000_0001: dout = mb_cam_data[0] ;
+          16'b0000_0000_0000_0010: dout = mb_cam_data[1] ;
+          16'b0000_0000_0000_0100: dout = mb_cam_data[2] ;
+          16'b0000_0000_0000_1000: dout = mb_cam_data[3] ;
+          16'b0000_0000_0001_0000: dout = mb_cam_data[4] ;
+          16'b0000_0000_0010_0000: dout = mb_cam_data[5] ;
+          16'b0000_0000_0100_0000: dout = mb_cam_data[6] ;
+          16'b0000_0000_1000_0000: dout = mb_cam_data[7] ;
+          16'b0000_0001_0000_0000: dout = mb_cam_data[8] ;
+          16'b0000_0010_0000_0000: dout = mb_cam_data[9] ;
+          16'b0000_0100_0000_0000: dout = mb_cam_data[10] ;
+          16'b0000_1000_0000_0000: dout = mb_cam_data[11] ;
+          16'b0001_0000_0000_0000: dout = mb_cam_data[12] ;
+          16'b0010_0000_0000_0000: dout = mb_cam_data[13] ;
+          16'b0100_0000_0000_0000: dout = mb_cam_data[14] ;
+          16'b1000_0000_0000_0000: dout = mb_cam_data[15] ;
+          default:
+             // 0in <fire -message "FATAL ERROR: incorrect read wordline"
+`ifdef DEFINE_0IN
+             ;
+`else
+`ifdef  INNO_MUXEX
+             ;
+`else
+		`ifdef MODELSIM
+             $display("PH1_CAM2_ERROR"," incorrect read wordline %h ", adr_r_d1);
+		`else
+             $error("PH1_CAM2_ERROR"," incorrect read wordline %h ", adr_r_d1);
+		`endif
+`endif
+`endif
+
+        endcase
+      end
+
+        end // of else if
+end
+endmodule
+
+
+
Index: /trunk/T1-common/srams/bw_r_rf16x128d.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf16x128d.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf16x128d.v	(revision 6)
@@ -0,0 +1,226 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf16x128d.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+// 16 X 128 R1 W1 RF macro with decoded wordlines.
+// REad/Write ports can be accessed in PH1 only.
+////////////////////////////////////////////////////////////////////////
+
+module bw_r_rf16x128d(/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   din, rd_wl, wr_wl, read_en, wr_en, rst_tri_en, rclk, se, si, 
+   reset_l, sehold
+   );
+
+   input [127:0]  din; // data input
+   input [15:0]    rd_wl;   // read addr 
+   input [15:0]	  wr_wl;  // write addr
+   input          read_en;  
+   input	  wr_en;	//   used in conjunction with
+				//  word_wen and byte_wen 
+   input	  rst_tri_en ; // gates off writes during SCAN.
+   input          rclk;
+   input          se, si ;
+   input	  reset_l;
+   input	  sehold; // hold scan in data.
+
+   output [127:0] dout;
+   output         so;
+   
+
+
+
+   reg [127:0] dout;
+
+   // memory array
+   reg [127:0]  inq_ary [15:0];
+
+   // internal variable
+   integer      i;
+   reg [127:0]  temp, data_in;
+   reg [3:0]	rdptr_d1, wrptr_d1;
+   wire	[160:0]	scan_out;
+
+reg [127:0]  wrdata_d1 ;
+reg          ren_d1;
+reg		 wr_en_d1;
+reg [15:0]	 rd_wl_d1, wr_wl_d1;
+ reg	rst_tri_en_d1;
+
+always	@(posedge rclk ) begin
+
+  wrdata_d1 <= ( sehold)? wrdata_d1 : din;
+  wr_en_d1 <= ( sehold)? wr_en_d1 : wr_en ;
+  wr_wl_d1 <= (sehold) ? wr_wl_d1 : wr_wl ;
+  ren_d1 <= (sehold)? ren_d1 : read_en;
+  rd_wl_d1 <= (sehold) ? rd_wl_d1 : rd_wl ;
+
+  rst_tri_en_d1 <= rst_tri_en ; // not a real flop ( only used as a trigger ). Works only for accesses made in PH1
+end 
+  
+//////////////////////////////////////////////////////////////////////
+// Read Operation
+//////////////////////////////////////////////////////////////////////
+
+   always @(/*AUTOSENSE*/ /*memory or*/ rd_wl_d1 or ren_d1 or reset_l
+            or rst_tri_en_d1 or wr_en_d1 or wr_wl_d1)
+     begin
+         if (reset_l)
+
+               begin
+		  // ---- \/ added the rst_tri_en qual on 11/11 \/------
+                  if (ren_d1)
+                    begin
+					
+
+			case(rd_wl_d1 & {16{~rst_tri_en}})
+	  			16'b0000_0000_0000_0000: ; // do nothing.
+          			16'b0000_0000_0000_0001: rdptr_d1	= 4'b0000;
+          			16'b0000_0000_0000_0010: rdptr_d1     = 4'b0001;
+          			16'b0000_0000_0000_0100: rdptr_d1     = 4'b0010;
+          			16'b0000_0000_0000_1000: rdptr_d1     = 4'b0011;
+          			16'b0000_0000_0001_0000: rdptr_d1     = 4'b0100;
+          			16'b0000_0000_0010_0000: rdptr_d1     = 4'b0101;
+          			16'b0000_0000_0100_0000: rdptr_d1     = 4'b0110;
+          			16'b0000_0000_1000_0000: rdptr_d1     = 4'b0111;
+          			16'b0000_0001_0000_0000: rdptr_d1     = 4'b1000;
+          			16'b0000_0010_0000_0000: rdptr_d1     = 4'b1001;
+          			16'b0000_0100_0000_0000: rdptr_d1     = 4'b1010;
+          			16'b0000_1000_0000_0000: rdptr_d1     = 4'b1011;
+          			16'b0001_0000_0000_0000: rdptr_d1     = 4'b1100;
+          			16'b0010_0000_0000_0000: rdptr_d1     = 4'b1101;
+          			16'b0100_0000_0000_0000: rdptr_d1     = 4'b1110;
+          			16'b1000_0000_0000_0000: rdptr_d1     = 4'b1111;
+          			default: rdptr_d1 = 4'bx ; 
+        		endcase
+
+`ifdef  INNO_MUXEX
+`else
+
+                      // Checking for Xs on the rd pointer input when read is enabled
+                       if(rdptr_d1 == 4'bx) begin
+					`ifdef MODELSIM
+                                $display("rf_error"," read pointer error %h ", rdptr_d1[3:0]);
+					`else
+                                $error("rf_error"," read pointer error %h ", rdptr_d1[3:0]);
+					`endif		
+                       end
+`endif
+
+		       if(rst_tri_en_d1) begin // special case
+				dout[127:0] = 128'hFFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF ;
+		       end
+
+			// RW -conflict case and the case where all wlines are zero
+
+                       else if ((( wr_en_d1 & ~rst_tri_en ) && (rd_wl_d1 == wr_wl_d1))||
+				((rd_wl_d1 & {16{~rst_tri_en}}) == 16'b0 )) begin
+			 	dout[127:0] = 128'bx ;
+                       end
+
+                       else dout = inq_ary[rdptr_d1];
+
+                    end // of if rd_en
+
+          end // if reset_l
+	  else dout  = 128'b0 ;
+     end // always @ (...
+
+
+//////////////////////////////////////////////////////////////////////
+// Write Operation
+//////////////////////////////////////////////////////////////////////
+   always @ (/*AUTOSENSE*/reset_l or rst_tri_en_d1 or wr_en_d1
+             or wr_wl_d1 or wrdata_d1)
+     begin
+        if ( reset_l) begin
+
+`ifdef  INNO_MUXEX
+		if(wr_en_d1==1'bx) begin
+			// do nothing
+		end
+`else
+
+	 	if(wr_en_d1==1'bx) begin
+		`ifdef MODELSIM
+			$display("rf_error"," write enable error %b ", wr_en_d1);
+		`else
+			$error("rf_error"," write enable error %b ", wr_en_d1);
+		`endif	
+         	end
+`endif
+
+	 	else if(wr_en_d1 & ~rst_tri_en )  begin
+
+			case(wr_wl_d1)
+	  			16'b0000_0000_0000_0000: ; // do nothing.
+          			16'b0000_0000_0000_0001: wrptr_d1	= 4'b0000;
+          			16'b0000_0000_0000_0010: wrptr_d1     = 4'b0001;
+          			16'b0000_0000_0000_0100: wrptr_d1     = 4'b0010;
+          			16'b0000_0000_0000_1000: wrptr_d1     = 4'b0011;
+          			16'b0000_0000_0001_0000: wrptr_d1     = 4'b0100;
+          			16'b0000_0000_0010_0000: wrptr_d1     = 4'b0101;
+          			16'b0000_0000_0100_0000: wrptr_d1     = 4'b0110;
+          			16'b0000_0000_1000_0000: wrptr_d1     = 4'b0111;
+          			16'b0000_0001_0000_0000: wrptr_d1     = 4'b1000;
+          			16'b0000_0010_0000_0000: wrptr_d1     = 4'b1001;
+          			16'b0000_0100_0000_0000: wrptr_d1     = 4'b1010;
+          			16'b0000_1000_0000_0000: wrptr_d1     = 4'b1011;
+          			16'b0001_0000_0000_0000: wrptr_d1     = 4'b1100;
+          			16'b0010_0000_0000_0000: wrptr_d1     = 4'b1101;
+          			16'b0100_0000_0000_0000: wrptr_d1     = 4'b1110;
+          			16'b1000_0000_0000_0000: wrptr_d1     = 4'b1111;
+          			default:  wrptr_d1= 4'bx ; 
+			endcase
+
+`ifdef  INNO_MUXEX
+			      if(wr_wl_d1!=16'b0)
+             			inq_ary[wrptr_d1] = wrdata_d1 ;
+`else
+
+	 		if(wrptr_d1 == 4'bx) begin
+			`ifdef MODELSIM
+               			$display("rf_error"," write pointer error %h ", wrptr_d1[3:0]);
+			`else
+               			$error("rf_error"," write pointer error %h ", wrptr_d1[3:0]);
+			`endif
+         		end
+	 		else  begin
+			      if(wr_wl_d1!=16'b0)
+             			inq_ary[wrptr_d1] = wrdata_d1 ;
+         		end
+`endif
+	 	end
+
+	 	else  begin
+				// do nothing
+	 	end
+
+	end // of if reset_l
+	
+     end // always @ (...
+
+
+endmodule // rf_16x128d
+
+
+
Index: /trunk/T1-common/srams/bw_r_l2d_rep_top.v
===================================================================
--- /trunk/T1-common/srams/bw_r_l2d_rep_top.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_l2d_rep_top.v	(revision 6)
@@ -0,0 +1,67 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_l2d_rep_top.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_r_l2d_rep_top (/*AUTOARG*/
+   // Outputs
+   word_en_buf, col_offset_buf, set_buf, wr_en_buf, way_sel_buf, 
+   decc_in_buf, fbdt_l, fbdb_l, scdata_scbuf_decc_top_buf, 
+   scdata_scbuf_decc_bot_buf, 
+   // Inputs
+   word_en_l, col_offset_l, set_l, wr_en_l, way_sel_l, decc_in_l, 
+   scbuf_scdata_fbdecc_top, scbuf_scdata_fbdecc_bot, sbdt_l, sbdb_l
+   );
+
+   input [3:0]     word_en_l;
+   input 	   col_offset_l;
+   input [9:0] 	   set_l;
+   input 	   wr_en_l;
+   input [11:0]    way_sel_l;
+   input [155:0]   decc_in_l;
+   input [155:0]   scbuf_scdata_fbdecc_top;
+   input [155:0]   scbuf_scdata_fbdecc_bot;
+   input [155:0]   sbdt_l;
+   input [155:0]   sbdb_l;
+   
+   output [3:0]    word_en_buf;
+   output 	   col_offset_buf;
+   output [9:0]    set_buf;
+   output 	   wr_en_buf;
+   output [11:0]   way_sel_buf;
+   output [155:0]  decc_in_buf;
+   output [155:0]  fbdt_l;
+   output [155:0]  fbdb_l;
+   output [155:0]  scdata_scbuf_decc_top_buf;
+   output [155:0]  scdata_scbuf_decc_bot_buf;
+      
+   ///////////////////////////////////////////////////////////////////////
+   // Inverting Buffers
+   ///////////////////////////////////////////////////////////////////////
+   assign word_en_buf[3:0] = ~word_en_l[3:0];
+   assign col_offset_buf = ~col_offset_l;
+   assign set_buf[9:0] = ~set_l[9:0];
+   assign wr_en_buf = ~wr_en_l;
+   assign way_sel_buf[11:0] = ~way_sel_l[11:0];
+   assign decc_in_buf[155:0] = ~decc_in_l[155:0];
+   assign fbdt_l[155:0] = ~scbuf_scdata_fbdecc_top[155:0];
+   assign fbdb_l[155:0] = ~scbuf_scdata_fbdecc_bot[155:0];
+   assign scdata_scbuf_decc_top_buf[155:0] = ~sbdt_l[155:0];
+   assign scdata_scbuf_decc_bot_buf[155:0] = ~sbdb_l[155:0];
+
+endmodule // bw_r_l2d_rep_top
Index: /trunk/T1-common/srams/bw_r_rf16x32.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf16x32.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf16x32.v	(revision 6)
@@ -0,0 +1,460 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf16x32.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+ //  Module Name:  bw_r_rf16x32
+ //  Description:	
+ //   1r1w array for icache and dcache valid bits.  
+ //   Modified to conform to naming convention 
+ //   Added 16 bit wr en 
+ //   Made bit_wen and din flopped inputs 
+ //   So all inputs are setup to flops in the stage before memory
+ //   access.  The data output is available one cycle later (same
+ //   stage as mem access) 
+ // 
+ //  IMPORTANT NOTE: This block has to work even in the case where
+ //  there is contention between a read and write operation for the
+ //  same address.  Based on ease of implementation, the behavior
+ //  during contention is defined as follows.
+ //    -- write always succeeds
+ //    -- read data is (array_data & write_data)
+ //       (i.e. old_data & new_data)
+ //
+ //   So read 0 always succeeds.  read 1 succeeds if the data being
+ //   written is also a 1.  Otherwise it fails.
+ //
+ // new_data = 1, old_data = 0, does not give the expected or
+ // predictable result in post layout, so the code has been modified
+ // to be  
+ // old new rd_data
+ // --- --- -------
+ // 0    0     0
+ // 0    1     X
+ // 1    0     0
+ // 1    1     1
+ //
+ // **The write still succeeds in ALL cases**
+ */
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+//`include "sys.h" // system level definition file which contains the 
+// time scale definition
+
+//`include "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_IDCT
+`endif
+
+
+
+module bw_r_rf16x32 (/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   rclk, se, si, reset_l, sehold, rst_tri_en, rd_adr1, rd_adr2, 
+   rd_adr1_sel, rd_en, wr_adr, wr_en, bit_wen, din
+   );
+
+	
+   input        rclk;
+   input        se;
+   input        si;
+   input        reset_l;
+   input        sehold;	      // scan enable hold
+   input        rst_tri_en;
+   
+   // 11:5(I);10:4(D)
+   input [6:0] 	rd_adr1 ;     // rd address-1
+   input [6:0] 	rd_adr2 ;     // rd address-2
+
+   input        rd_adr1_sel ;	// sel rd addr 1 
+   input        rd_en ;		    // rd enable 
+
+   // 11:7(I);10:6(D)
+   input [6:2] 	wr_adr ;  // wr address 
+
+   input        wr_en ;		// wr enable
+   input [15:0] bit_wen ;	// write enable with bit select
+   input        din ;		  // write data
+
+   output [3:0]	dout ;    // valid bits for tag compare
+
+   output       so;
+
+   wire         clk;
+   assign       clk = rclk;
+
+   //----------------------------------------------------------------------
+   // Declarations
+   //----------------------------------------------------------------------
+   // local signals
+   wire [6:0]  	rd_index ;
+  
+   // 512 bit array  
+`ifdef FPGA_SYN_IDCT
+   reg [31:0]	idcv_ary_0000;
+   reg [31:0]	idcv_ary_0001;
+   reg [31:0]	idcv_ary_0010;
+   reg [31:0]	idcv_ary_0011;
+   reg [31:0]	idcv_ary_0100;
+   reg [31:0]	idcv_ary_0101;
+   reg [31:0]	idcv_ary_0110;
+   reg [31:0]	idcv_ary_0111;
+   reg [31:0]	idcv_ary_1000;
+   reg [31:0]	idcv_ary_1001;
+   reg [31:0]	idcv_ary_1010;
+   reg [31:0]	idcv_ary_1011;
+   reg [31:0]	idcv_ary_1100;
+   reg [31:0]	idcv_ary_1101;
+   reg [31:0]	idcv_ary_1110;
+   reg [31:0]	idcv_ary_1111;
+`else
+   reg [511:0] 	idcv_ary;
+`endif
+   
+   reg [3:0]   	vbit,
+               	vbit_sa;
+
+   reg [6:2]   	wr_index_d1;
+   reg [6:0]   	rd_index_d1;
+
+   reg         	rdreq_d1,
+		            wrreq_d1;
+
+   reg [15:0]   bit_wen_d1;
+   reg          din_d1;
+   reg [4:0] index;
+   
+   wire         rst_all;
+
+   //----------------------------------------------------------------------
+   // Code Begins Here
+   //----------------------------------------------------------------------
+   assign       rst_all = rst_tri_en | ~reset_l;
+   
+   // mux merged with flop on index
+   assign rd_index = rd_adr1_sel ? rd_adr1:rd_adr2 ;
+
+   // input flops
+   always @ (posedge clk)
+     begin
+        if (~sehold)
+          begin
+	           rdreq_d1 <= rd_en ;
+	           wrreq_d1 <= wr_en ;
+	           rd_index_d1 <= rd_index;
+	           wr_index_d1 <= wr_adr;
+             bit_wen_d1 <= bit_wen;
+             din_d1 <= din;
+          end
+     end
+   
+
+   //----------------------------------------------------------------------
+   // Read Operation
+   //----------------------------------------------------------------------
+`ifdef FPGA_SYN_IDCT
+   always @(/*AUTOSENSE*/
+	    idcv_ary_0000 or idcv_ary_0001 or idcv_ary_0010 or idcv_ary_0011 or
+	    idcv_ary_0100 or idcv_ary_1001 or idcv_ary_1010 or idcv_ary_0111 or
+	    idcv_ary_1000 or idcv_ary_0101 or idcv_ary_0110 or idcv_ary_1011 or
+	    idcv_ary_1100 or idcv_ary_1101 or idcv_ary_1110 or idcv_ary_1111 or rd_index_d1 or rdreq_d1) 
+`else
+   always @(/*AUTOSENSE*/idcv_ary or rd_index_d1 or rdreq_d1) 
+`endif
+     begin
+	      if (rdreq_d1)  // should work even if there is read
+		                   // write conflict.  Data can be latest
+	                     // or previous but should not be x
+	        begin
+`ifdef FPGA_SYN_IDCT
+ 	    case(rd_index_d1[1:0])
+              2'b00: begin
+              vbit[0] = idcv_ary_0000[{rd_index_d1[6:2]}];
+              vbit[1] = idcv_ary_0001[{rd_index_d1[6:2]}];
+              vbit[2] = idcv_ary_0010[{rd_index_d1[6:2]}];
+              vbit[3] = idcv_ary_0011[{rd_index_d1[6:2]}];
+              end
+              2'b01: begin
+              vbit[0] = idcv_ary_0100[{rd_index_d1[6:2]}];
+              vbit[1] = idcv_ary_0101[{rd_index_d1[6:2]}];
+              vbit[2] = idcv_ary_0110[{rd_index_d1[6:2]}];
+              vbit[3] = idcv_ary_0111[{rd_index_d1[6:2]}];
+              end
+              2'b10: begin
+              vbit[0] = idcv_ary_1000[{rd_index_d1[6:2]}];
+              vbit[1] = idcv_ary_1001[{rd_index_d1[6:2]}];
+              vbit[2] = idcv_ary_1010[{rd_index_d1[6:2]}];
+              vbit[3] = idcv_ary_1011[{rd_index_d1[6:2]}];
+              end
+              2'b11: begin
+              vbit[0] = idcv_ary_1100[{rd_index_d1[6:2]}];
+              vbit[1] = idcv_ary_1101[{rd_index_d1[6:2]}];
+              vbit[2] = idcv_ary_1110[{rd_index_d1[6:2]}];
+              vbit[3] = idcv_ary_1111[{rd_index_d1[6:2]}];
+              end
+            endcase
+`else
+	           vbit[0] = idcv_ary[{rd_index_d1, 2'b00}]; // way 0
+	           vbit[1] = idcv_ary[{rd_index_d1, 2'b01}]; // way 1
+	           vbit[2] = idcv_ary[{rd_index_d1, 2'b10}]; // way 2
+	           vbit[3] = idcv_ary[{rd_index_d1, 2'b11}]; // way 3
+`endif
+	        end     // if (rdreq_d1)
+
+        else      // i/dcache disabled or rd disabled
+          begin
+             vbit[3:0] = 4'bx;
+          end // else: !if(rdreq_d1)
+     end // always @ (...
+
+   // r-w conflict case, returns old_data & new_data
+   // 12/06 modified to be
+   // 0  0  0
+   // 0  1  X
+   // 1  0  0
+   // 1  1  1
+`ifdef FPGA_SYN_IDCT
+    initial
+    begin
+        for(index = 5'h0; index < 5'h1f; index = index+1)
+        begin
+            idcv_ary_0000[index] = 1'b0;
+            idcv_ary_0001[index] = 1'b0;
+            idcv_ary_0010[index] = 1'b0;
+            idcv_ary_0011[index] = 1'b0;
+            idcv_ary_0100[index] = 1'b0;
+            idcv_ary_0101[index] = 1'b0;
+            idcv_ary_0110[index] = 1'b0;
+            idcv_ary_0111[index] = 1'b0;
+            idcv_ary_1000[index] = 1'b0;
+            idcv_ary_1001[index] = 1'b0;
+            idcv_ary_1010[index] = 1'b0;
+            idcv_ary_1011[index] = 1'b0;
+            idcv_ary_1100[index] = 1'b0;
+            idcv_ary_1101[index] = 1'b0;
+            idcv_ary_1110[index] = 1'b0;
+            idcv_ary_1111[index] = 1'b0;
+        end
+    end
+`endif
+   reg [3:0] wr_data;
+   always @ (/*AUTOSENSE*/bit_wen_d1 or rd_index_d1 or rst_all
+             or wr_index_d1 or wrreq_d1)
+     begin
+        if (rd_index_d1[6:2] == wr_index_d1[6:2])
+          case (rd_index_d1[1:0])
+            2'b00:  wr_data = bit_wen_d1[3:0] & {4{wrreq_d1 & ~rst_all}};
+            2'b01:  wr_data = bit_wen_d1[7:4] & {4{wrreq_d1 & ~rst_all}};
+            2'b10:  wr_data = bit_wen_d1[11:8] & {4{wrreq_d1 & ~rst_all}};
+            default:  wr_data = bit_wen_d1[15:12] & {4{wrreq_d1 & ~rst_all}};
+          endcase // case(rd_index_d1[1:0])
+        else
+          wr_data = 4'b0;
+     end
+
+`ifdef FPGA_SYN_IDCT
+  assign dout[3:0] = (~reset_l | ~rdreq_d1) ? 4'b0000 : 
+		     (~wr_data & vbit | wr_data & {4{din_d1}} & vbit);
+`else
+   
+   // SA latch -- to make 0in happy
+   always @ (/*AUTOSENSE*/clk or din_d1 or vbit or wr_data)
+     begin
+        if (clk)
+          begin
+             vbit_sa <= (~wr_data & vbit | 
+                         wr_data & {4{din_d1}} & (vbit | 4'bxxxx));
+          end
+     end
+
+   
+// bug:2776 - remove holding the last read value
+// reset_l  rdreq_d1  dout
+//  0       -         0
+//  1       0         0
+//  1       1         vbit_sa
+
+   assign dout[3:0] = (~reset_l | ~rdreq_d1) ? 4'b0000 : vbit_sa[3:0] ;
+
+`endif
+   
+
+   //----------------------------------------------------------------------
+   // Write Operation
+   //----------------------------------------------------------------------
+   // Invalidate/Write occurs on 16B boundary.
+   // For this purpose, 4x4 write-enables are required.
+   // Index thus corresponds to 11:7,6:5,w[1:0], where w=way (ICache)
+   // Index thus corresponds to 10:6,5:4,w[1:0], where w=way (DCache)
+   // Thru data-in, vld bit can be set or cleared.
+   always @ (negedge clk)
+     begin
+	      if (wrreq_d1 & ~rst_all)  // should work even if rd-wr conflict
+	        begin
+             // line 0 (5:4=00)
+`ifdef FPGA_SYN_IDCT
+	           if (bit_wen_d1[0]) idcv_ary_0000[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[1]) idcv_ary_0001[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[2]) idcv_ary_0010[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[3]) idcv_ary_0011[{wr_index_d1[6:2]}] = din_d1;
+`else
+	           if (bit_wen_d1[0])
+	             idcv_ary[{wr_index_d1[6:2],2'b00,2'b00}] = din_d1;
+	           if (bit_wen_d1[1])
+	             idcv_ary[{wr_index_d1[6:2],2'b00,2'b01}] = din_d1;
+	           if (bit_wen_d1[2])
+	             idcv_ary[{wr_index_d1[6:2],2'b00,2'b10}] = din_d1;
+	           if (bit_wen_d1[3])
+	             idcv_ary[{wr_index_d1[6:2],2'b00,2'b11}] = din_d1;
+`endif
+
+             // line 1 (5:4=01)
+`ifdef FPGA_SYN_IDCT
+	           if (bit_wen_d1[4]) idcv_ary_0100[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[5]) idcv_ary_0101[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[6]) idcv_ary_0110[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[7]) idcv_ary_0111[{wr_index_d1[6:2]}] = din_d1;
+`else
+	           if (bit_wen_d1[4])
+	             idcv_ary[{wr_index_d1[6:2],2'b01,2'b00}] = din_d1;
+	           if (bit_wen_d1[5])
+	             idcv_ary[{wr_index_d1[6:2],2'b01,2'b01}] = din_d1;
+	           if (bit_wen_d1[6])
+	             idcv_ary[{wr_index_d1[6:2],2'b01,2'b10}] = din_d1;
+	           if (bit_wen_d1[7])
+	             idcv_ary[{wr_index_d1[6:2],2'b01,2'b11}] = din_d1;
+`endif
+
+             // line 2 (5:4=10)
+`ifdef FPGA_SYN_IDCT
+	           if (bit_wen_d1[8]) idcv_ary_1000[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[9]) idcv_ary_1001[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[10]) idcv_ary_1010[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[11]) idcv_ary_1011[{wr_index_d1[6:2]}] = din_d1;
+`else
+	           if (bit_wen_d1[8])
+	             idcv_ary[{wr_index_d1[6:2],2'b10,2'b00}] = din_d1;
+	           if (bit_wen_d1[9])
+	             idcv_ary[{wr_index_d1[6:2],2'b10,2'b01}] = din_d1;
+	           if (bit_wen_d1[10])
+	             idcv_ary[{wr_index_d1[6:2],2'b10,2'b10}] = din_d1;
+	           if (bit_wen_d1[11])
+	             idcv_ary[{wr_index_d1[6:2],2'b10,2'b11}] = din_d1;
+`endif
+
+             // line 3 (5:4=11)
+`ifdef FPGA_SYN_IDCT
+	           if (bit_wen_d1[12]) idcv_ary_1100[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[13]) idcv_ary_1101[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[14]) idcv_ary_1110[{wr_index_d1[6:2]}] = din_d1;
+	           if (bit_wen_d1[15]) idcv_ary_1111[{wr_index_d1[6:2]}] = din_d1;
+`else
+	           if (bit_wen_d1[12])
+	             idcv_ary[{wr_index_d1[6:2],2'b11,2'b00}] = din_d1;
+	           if (bit_wen_d1[13])
+	             idcv_ary[{wr_index_d1[6:2],2'b11,2'b01}] = din_d1;
+	           if (bit_wen_d1[14])
+	             idcv_ary[{wr_index_d1[6:2],2'b11,2'b10}] = din_d1;
+	           if (bit_wen_d1[15])
+	             idcv_ary[{wr_index_d1[6:2],2'b11,2'b11}] = din_d1;
+`endif
+
+	        end
+     end // always @ (...
+
+
+// synopsys translate_off
+//----------------------------------------------------------------
+// Monitors, shadow logic and other stuff not directly related to
+// memory functionality
+//----------------------------------------------------------------
+`ifdef INNO_MUXEX
+`else
+   // Address monitor
+   always @ (/*AUTOSENSE*/rd_index_d1 or rdreq_d1 or wr_index_d1
+             or wrreq_d1)
+     begin
+        if (rdreq_d1 && (rd_index_d1 == 7'bX))
+          begin
+             // 0in <fire -message "FATAL ERROR: bw_r_rf16x32 read address X"
+`ifdef DEFINE_0IN
+`else
+          //$error("RFRDADDR", "Error: bw_r_rf16x32 read address is %b\n", rd_index_d1);
+`endif
+          end
+        else if (wrreq_d1 && (wr_index_d1 == 5'bX))
+          begin
+             // 0in <fire -message "FATAL ERROR: bw_r_rf16x32 write address X"
+`ifdef DEFINE_0IN 
+`else              
+          //$error("RFWRADDR", "Error: bw_r_rf16x32 write address is %b\n", wr_index_d1);
+`endif
+          end
+     end // always @ (...
+
+
+`endif // !`ifdef INNO_MUXEX
+   
+   
+//reg [127:0] w0;
+//reg [127:0] w1;
+//reg [127:0] w2;
+//reg [127:0] w3;
+//integer  i;
+//   
+//    always @(idcv_ary) begin
+//       for (i=0;i<128; i=i+1) begin
+//          w0[i] = idcv_ary[4*i];
+//          w1[i] = idcv_ary[4*i+1];
+//          w2[i] = idcv_ary[4*i+2];
+//          w3[i] = idcv_ary[4*i+3];
+//       end
+//   end
+//
+//   reg [511:0] icv_ary;
+//
+//   always @ (idcv_ary)
+//     icv_ary = idcv_ary;
+
+// synopsys translate_on 
+
+endmodule // bw_r_rf16x32
+
+
+
+
+
+
+
+
+
+
+
+
Index: /trunk/T1-common/srams/bw_r_rf32x80.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf32x80.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf32x80.v	(revision 6)
@@ -0,0 +1,550 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf32x80.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Trap Stack Array
+//			- Dual-Ported.
+//			- Port1 - Write Port; Used by wrpr, trap insertion.
+//			Write occurs in W Stage. (M1:M2:W).
+//			- Port2 - Read Port; Used by rdpr, done/retry.
+//			Read occurs in E Stage. 
+//			- Arranged as 6(trap-levels/thread) x 4 threads = 24 entries.
+//			Trap-level and thread id used to index array.
+*/
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN
+`define FPGA_SYN_32x80
+`endif
+
+`ifdef FPGA_SYN_32x80
+module bw_r_rf32x80 (/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   rd_en, rd_adr, wr_en, nib_wr_en, wr_adr, din, 
+   si, se, sehold, rclk, rst_tri_en, reset_l);	
+
+parameter NUM_TPL = 6 ;			// 6 supported trap levels.
+parameter NUM_ENTRIES = 32 ;	// 8 entries per thread
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+input [4:0]   rd_adr;	  // read adr. 
+input	      rd_en;	  // read pointer
+input         wr_en;	  // write pointer vld
+input [19:0]  nib_wr_en;  // enable write of a byte in tsa. 
+input [4:0]   wr_adr;	  // write adr.
+input [79:0] din;	      // wr data for tsa.
+input		  rclk;	      // clock
+input         reset_l;    // active low reset
+input         rst_tri_en; // reset and scan  
+input         sehold;     // scan hold 
+input		  si;		  // scan in 
+input		  se;		  // scan enable 
+
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+// End of automatics
+output	[79:0] dout ; // rd data for tsa.
+output			so ;   // scan out write 
+
+wire [79:0]    dout;
+wire clk; 
+wire wr_vld, wr_vld_d1; 
+
+reg	[79:0]		tsa_rdata;
+reg [79:0]     local_dout;
+// reg		        so; 
+
+integer i,j;
+
+wire	[79:0]	write_mask;
+wire	[79:0]	write_mask_d1;
+//
+// added for atpg support
+wire [4:0]   sehold_rd_adr;	   // output of sehold mux - read adr. 
+wire	     sehold_rd_en;	   // output of sehold mux - read pointer
+wire         sehold_wr_en;	   // output of sehold mux - write pointer vld
+wire [19:0]  sehold_nib_wr_en; // output of sehold mux - enable write of a byte in tsa. 
+wire  [4:0]   sehold_wr_adr;	   // output of sehold mux - write adr.
+wire [79:0]  sehold_din;	   // wr data for tsa.
+
+reg [4:0]   rd_adr_d1;	   // flopped read adr. 
+wire	     rd_en_d1;	   // flopped read pointer
+wire         wr_en_d1;	   // flopped write pointer vld
+wire [19:0]  nib_wr_en_d1; // flopped enable write of a byte in tsa. 
+reg [4:0]   wr_adr_d1;	   // flopped write adr.
+wire [79:0]  din_d1;	   // flopped wr data for tsa.
+// wire [5:0]   local_scan1;
+// wire [25:0]  local_scan2;
+// wire [78:0]  local_scan3;
+
+//
+// creating local clock
+assign clk=rclk;
+// 
+//=========================================================================================
+//	support for atpg pattern generation
+//=========================================================================================
+//
+// read controls
+dp_mux2es #(1) mux_sehold_rd_ctrl (
+    .in0  ({rd_en}),
+    .in1  ({rd_en_d1}),
+    .sel  (sehold),
+    .dout ({sehold_rd_en})
+);
+//
+// modified to match circuit implementataion
+dff_s #(1) dff_rd_ctrl_d1(
+    .din ({sehold_rd_en}),
+    .q   ({rd_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write controls
+// modified to match circuit implementataion
+dp_mux2es #(21) mux_sehold_wr_ctrl (
+        .in0    ({nib_wr_en[19:0], wr_en}),
+        .in1    ({nib_wr_en_d1[19:0], wr_en_d1}),
+        .sel    (sehold),
+        .dout   ({sehold_nib_wr_en[19:0], sehold_wr_en})
+);
+
+// modified to match circuit implementataion
+dff_s #(21) dff_wr_ctrl_d1(
+    .din ({sehold_nib_wr_en[19:0], sehold_wr_en}),
+    .q   ({nib_wr_en_d1[19:0], wr_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write data
+dp_mux2es #(80) mux_sehold_din (
+        .in0    (din[79:0]),
+        .in1    (din_d1[79:0]),
+        .sel    (sehold),
+        .dout   (sehold_din[79:0])
+);
+
+dff_s #(80) dff_din_d1(
+    .din (sehold_din[79:0]),
+    .q   (din_d1[79:0]),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//
+// diable write to register file during reset or scan
+// assign wr_vld = sehold_wr_en & ~rst_tri_en & reset_l; 
+assign wr_vld = sehold_wr_en & ~rst_tri_en;
+assign wr_vld_d1 = wr_en_d1 & ~rst_tri_en;
+
+//    always @ (posedge clk)
+//      begin
+//         so <= 1'bx;
+//      end
+
+//=========================================================================================
+//	generate wordlines
+//=========================================================================================
+
+// Word-Line Generation skipped. Implicit in read and write.
+
+//=========================================================================================
+//	write or read to/from memory
+//=========================================================================================
+// creating the write mask from the nibble enable controls
+
+assign	write_mask[79:0] = 
+	{{4{sehold_nib_wr_en[19]}},
+	 {4{sehold_nib_wr_en[18]}},
+     {4{sehold_nib_wr_en[17]}},
+	 {4{sehold_nib_wr_en[16]}},
+	 {4{sehold_nib_wr_en[15]}},
+	 {4{sehold_nib_wr_en[14]}},
+	 {4{sehold_nib_wr_en[13]}},
+	 {4{sehold_nib_wr_en[12]}},
+	 {4{sehold_nib_wr_en[11]}},
+	 {4{sehold_nib_wr_en[10]}},
+	 {4{sehold_nib_wr_en[9]}},
+	 {4{sehold_nib_wr_en[8]}},
+	 {4{sehold_nib_wr_en[7]}},
+	 {4{sehold_nib_wr_en[6]}},
+	 {4{sehold_nib_wr_en[5]}},
+	 {4{sehold_nib_wr_en[4]}},
+	 {4{sehold_nib_wr_en[3]}},
+	 {4{sehold_nib_wr_en[2]}},
+	 {4{sehold_nib_wr_en[1]}},
+	 {4{sehold_nib_wr_en[0]}}
+	};
+
+assign	write_mask_d1[79:0] = 
+	{{4{nib_wr_en_d1[19]}},
+	 {4{nib_wr_en_d1[18]}},
+     {4{nib_wr_en_d1[17]}},
+	 {4{nib_wr_en_d1[16]}},
+	 {4{nib_wr_en_d1[15]}},
+	 {4{nib_wr_en_d1[14]}},
+	 {4{nib_wr_en_d1[13]}},
+	 {4{nib_wr_en_d1[12]}},
+	 {4{nib_wr_en_d1[11]}},
+	 {4{nib_wr_en_d1[10]}},
+	 {4{nib_wr_en_d1[9]}},
+	 {4{nib_wr_en_d1[8]}},
+	 {4{nib_wr_en_d1[7]}},
+	 {4{nib_wr_en_d1[6]}},
+	 {4{nib_wr_en_d1[5]}},
+	 {4{nib_wr_en_d1[4]}},
+	 {4{nib_wr_en_d1[3]}},
+	 {4{nib_wr_en_d1[2]}},
+	 {4{nib_wr_en_d1[1]}},
+	 {4{nib_wr_en_d1[0]}}
+	};
+
+reg	[79:0]	tsa_mem [NUM_ENTRIES-1:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+
+reg	[79:0]		temp_tlvl;
+wire	[79:0]		temp_tlvl2;
+
+
+
+always @(posedge clk) begin
+  rd_adr_d1 <= sehold_rd_adr;
+  wr_adr_d1 <= sehold_wr_adr;
+end
+
+assign sehold_wr_adr = sehold ? wr_adr_d1 : wr_adr;
+assign sehold_rd_adr = sehold ? rd_adr_d1 : rd_adr;
+
+assign temp_tlvl2 = tsa_mem[sehold_rd_adr[4:0]];
+
+always @(posedge clk)
+  if(~reset_l) 
+    local_dout[79:0] <= 80'b0;
+  else
+   if (sehold_rd_en)
+     local_dout[79:0] <= temp_tlvl2;
+
+always @ ( posedge clk) begin
+	temp_tlvl[79:0] = tsa_mem[sehold_wr_adr];
+	if (wr_vld & reset_l) begin
+		tsa_mem[sehold_wr_adr] = (temp_tlvl[79:0] & ~write_mask[79:0]) | (sehold_din[79:0] &  write_mask[79:0]) ;
+	end
+end
+
+
+assign dout[79:0] = local_dout;
+
+
+
+endmodule
+
+`else
+
+module bw_r_rf32x80 (/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   rd_en, rd_adr, wr_en, nib_wr_en, wr_adr, din, 
+   si, se, sehold, rclk, rst_tri_en, reset_l);	
+
+parameter NUM_TPL = 6 ;			// 6 supported trap levels.
+parameter NUM_ENTRIES = 32 ;	// 8 entries per thread
+
+/*AUTOINPUT*/
+// Beginning of automatic inputs (from unused autoinst inputs)
+// End of automatics
+input [4:0]   rd_adr;	  // read adr. 
+input	      rd_en;	  // read pointer
+input         wr_en;	  // write pointer vld
+input [19:0]  nib_wr_en;  // enable write of a byte in tsa. 
+input [4:0]   wr_adr;	  // write adr.
+input [79:0] din;	      // wr data for tsa.
+input		  rclk;	      // clock
+input         reset_l;    // active low reset
+input         rst_tri_en; // reset and scan  
+input         sehold;     // scan hold 
+input		  si;		  // scan in 
+input		  se;		  // scan enable 
+
+/*AUTOOUTPUT*/
+// Beginning of automatic outputs (from unused autoinst outputs)
+// End of automatics
+output	[79:0] dout ; // rd data for tsa.
+output			so ;   // scan out write 
+
+wire [79:0]    dout;
+wire clk; 
+wire wr_vld, wr_vld_d1; 
+
+reg	[79:0]		tsa_mem [NUM_ENTRIES-1:0];
+reg	[79:0]		tsa_rdata;
+reg [79:0]     local_dout;
+reg	[79:0]		temp_tlvl;
+// reg		        so; 
+
+integer i,j;
+
+wire	[79:0]	write_mask;
+wire	[79:0]	write_mask_d1;
+//
+// added for atpg support
+wire [4:0]   sehold_rd_adr;	   // output of sehold mux - read adr. 
+wire	     sehold_rd_en;	   // output of sehold mux - read pointer
+wire         sehold_wr_en;	   // output of sehold mux - write pointer vld
+wire [19:0]  sehold_nib_wr_en; // output of sehold mux - enable write of a byte in tsa. 
+wire [4:0]   sehold_wr_adr;	   // output of sehold mux - write adr.
+wire [79:0]  sehold_din;	   // wr data for tsa.
+
+wire [4:0]   rd_adr_d1;	   // flopped read adr. 
+wire	     rd_en_d1;	   // flopped read pointer
+wire         wr_en_d1;	   // flopped write pointer vld
+wire [19:0]  nib_wr_en_d1; // flopped enable write of a byte in tsa. 
+wire [4:0]   wr_adr_d1;	   // flopped write adr.
+wire [79:0]  din_d1;	   // flopped wr data for tsa.
+// wire [5:0]   local_scan1;
+// wire [25:0]  local_scan2;
+// wire [78:0]  local_scan3;
+
+//
+// creating local clock
+assign clk=rclk;
+// 
+//=========================================================================================
+//	support for atpg pattern generation
+//=========================================================================================
+//
+// read controls
+dp_mux2es #(6) mux_sehold_rd_ctrl (
+    .in0  ({rd_adr[4:0], rd_en}),
+    .in1  ({rd_adr_d1[4:0], rd_en_d1}),
+    .sel  (sehold),
+    .dout ({sehold_rd_adr[4:0],sehold_rd_en})
+);
+//
+// modified to match circuit implementataion
+dff_s #(6) dff_rd_ctrl_d1(
+    .din ({sehold_rd_adr[4:0], sehold_rd_en}),
+    .q   ({rd_adr_d1[4:0], rd_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write controls
+// modified to match circuit implementataion
+dp_mux2es #(26) mux_sehold_wr_ctrl (
+        .in0    ({nib_wr_en[19:0], wr_adr[4:0], wr_en}),
+        .in1    ({nib_wr_en_d1[19:0], wr_adr_d1[4:0], wr_en_d1}),
+        .sel    (sehold),
+        .dout   ({sehold_nib_wr_en[19:0], sehold_wr_adr[4:0],sehold_wr_en})
+);
+
+// modified to match circuit implementataion
+dff_s #(26) dff_wr_ctrl_d1(
+    .din ({sehold_nib_wr_en[19:0], sehold_wr_adr[4:0], sehold_wr_en}),
+    .q   ({nib_wr_en_d1[19:0], wr_adr_d1[4:0], wr_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write data
+dp_mux2es #(80) mux_sehold_din (
+        .in0    (din[79:0]),
+        .in1    (din_d1[79:0]),
+        .sel    (sehold),
+        .dout   (sehold_din[79:0])
+);
+
+dff_s #(80) dff_din_d1(
+    .din (sehold_din[79:0]),
+    .q   (din_d1[79:0]),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//
+// diable write to register file during reset or scan
+// assign wr_vld = sehold_wr_en & ~rst_tri_en & reset_l; 
+assign wr_vld = sehold_wr_en & ~rst_tri_en;
+assign wr_vld_d1 = wr_en_d1 & ~rst_tri_en;
+
+//    always @ (posedge clk)
+//      begin
+//         so <= 1'bx;
+//      end
+
+//=========================================================================================
+//	generate wordlines
+//=========================================================================================
+
+// Word-Line Generation skipped. Implicit in read and write.
+
+//=========================================================================================
+//	write or read to/from memory
+//=========================================================================================
+// creating the write mask from the nibble enable controls
+
+assign	write_mask[79:0] = 
+	{{4{sehold_nib_wr_en[19]}},
+	 {4{sehold_nib_wr_en[18]}},
+     {4{sehold_nib_wr_en[17]}},
+	 {4{sehold_nib_wr_en[16]}},
+	 {4{sehold_nib_wr_en[15]}},
+	 {4{sehold_nib_wr_en[14]}},
+	 {4{sehold_nib_wr_en[13]}},
+	 {4{sehold_nib_wr_en[12]}},
+	 {4{sehold_nib_wr_en[11]}},
+	 {4{sehold_nib_wr_en[10]}},
+	 {4{sehold_nib_wr_en[9]}},
+	 {4{sehold_nib_wr_en[8]}},
+	 {4{sehold_nib_wr_en[7]}},
+	 {4{sehold_nib_wr_en[6]}},
+	 {4{sehold_nib_wr_en[5]}},
+	 {4{sehold_nib_wr_en[4]}},
+	 {4{sehold_nib_wr_en[3]}},
+	 {4{sehold_nib_wr_en[2]}},
+	 {4{sehold_nib_wr_en[1]}},
+	 {4{sehold_nib_wr_en[0]}}
+	};
+
+assign	write_mask_d1[79:0] = 
+	{{4{nib_wr_en_d1[19]}},
+	 {4{nib_wr_en_d1[18]}},
+     {4{nib_wr_en_d1[17]}},
+	 {4{nib_wr_en_d1[16]}},
+	 {4{nib_wr_en_d1[15]}},
+	 {4{nib_wr_en_d1[14]}},
+	 {4{nib_wr_en_d1[13]}},
+	 {4{nib_wr_en_d1[12]}},
+	 {4{nib_wr_en_d1[11]}},
+	 {4{nib_wr_en_d1[10]}},
+	 {4{nib_wr_en_d1[9]}},
+	 {4{nib_wr_en_d1[8]}},
+	 {4{nib_wr_en_d1[7]}},
+	 {4{nib_wr_en_d1[6]}},
+	 {4{nib_wr_en_d1[5]}},
+	 {4{nib_wr_en_d1[4]}},
+	 {4{nib_wr_en_d1[3]}},
+	 {4{nib_wr_en_d1[2]}},
+	 {4{nib_wr_en_d1[1]}},
+	 {4{nib_wr_en_d1[0]}}
+	};
+
+always @ ( negedge reset_l) 
+	begin
+        local_dout[79:0] <= 80'h0;
+    end
+
+always @ ( posedge reset_l) 
+	begin
+	    if (rd_en_d1 & clk)
+		      begin 
+			    if (wr_vld_d1 & (wr_adr_d1[4:0] == rd_adr_d1[4:0]) )
+				    local_dout[79:0] <= 80'hx;
+				else	
+				    for (j=0;j<NUM_ENTRIES;j=j+1)
+					    begin
+						    if (rd_adr_d1[4:0] == j)
+							local_dout[79:0] <= tsa_mem[j] ;
+					    end
+		      end	
+	end
+
+
+
+
+always @ ( posedge reset_l) 
+	begin
+		if (wr_vld_d1 & clk) 
+			for (i=0;i<NUM_ENTRIES;i=i+1)
+				begin
+				if (wr_adr_d1[4:0] == i)
+					begin
+					// read
+					temp_tlvl[79:0] = tsa_mem[i]; 
+					// modify & write
+					tsa_mem[i] = 
+					(temp_tlvl[79:0] & ~write_mask_d1[79:0]) |
+					(din_d1[79:0] &  write_mask_d1[79:0]) ;
+					end
+				end
+	end
+
+	
+always @ ( posedge clk) 
+	begin
+		if (wr_vld & reset_l) 
+			for (i=0;i<NUM_ENTRIES;i=i+1)
+				begin
+				if (sehold_wr_adr[4:0] == i)
+					begin
+					// read
+					temp_tlvl[79:0] = tsa_mem[i]; 
+					// modify & write
+					tsa_mem[i] = 
+					(temp_tlvl[79:0] & ~write_mask[79:0]) |
+					(sehold_din[79:0] &  write_mask[79:0]) ;
+					end
+				end
+	end
+
+always @ ( posedge clk ) 
+	begin
+              begin
+				if (sehold_rd_en & reset_l)
+					begin 
+						if (wr_vld & (sehold_wr_adr[4:0] == sehold_rd_adr[4:0]) )
+							local_dout[79:0] <= 80'hx;
+						else	
+							for (j=0;j<NUM_ENTRIES;j=j+1)
+							begin
+								if (sehold_rd_adr[4:0] == j)
+								local_dout[79:0] <= tsa_mem[j] ;
+							end
+					end	
+            end
+	end
+
+assign dout[79:0] = local_dout[79:0];
+
+
+endmodule
+`endif
+
Index: /trunk/T1-common/srams/bw_r_frf.v
===================================================================
--- /trunk/T1-common/srams/bw_r_frf.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_frf.v	(revision 6)
@@ -0,0 +1,147 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_frf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: bw_r_frf
+//	Description: This is the floating point register file.  It has one R/W port that is
+//		 78 bits (64 bits data, 14 bits ecc) wide.
+*/
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_FRF
+`endif
+
+module bw_r_frf (/*AUTOARG*/
+   // Outputs
+   so, frf_dp_data, 
+   // Inputs
+   rclk, si, se, sehold, rst_tri_en, ctl_frf_wen, ctl_frf_ren, 
+   dp_frf_data, ctl_frf_addr
+   ) ;
+   input rclk;
+   input si;
+   input se;
+   input sehold;
+   input rst_tri_en;
+   input [1:0] ctl_frf_wen;
+   input ctl_frf_ren;
+   input [77:0] dp_frf_data;
+   input [6:0]   ctl_frf_addr;
+
+   output so;
+   output [77:0] frf_dp_data;
+
+   wire [7:0]    regfile_index;
+   //XST WA CR436004
+        (* keep = "yes" *) wire [7:0]   regfile_index_low;
+	(* keep = "yes" *) wire	[7:0]	regfile_index_high;
+   //
+
+`ifdef FPGA_SYN_FRF
+   reg [38:0]     regfile_high [127:0];
+   reg [38:0]     regfile_low [127:0];
+`else   
+   reg [38:0]     regfile [255:0];
+`endif
+
+   reg            rst_tri_en_negedge;
+   wire [77:0]    read_data;
+   wire           ren_d1;
+   wire [6:0]     addr_d1;
+   wire [1:0]     wen_d1;
+   wire [77:0]    write_data_d1;
+   wire [77:0]    sehold_write_data;
+   wire [9:0]     sehold_cntl_data;
+
+   wire [9:0]     cntl_scan_data;
+   wire [38:0]    write_scan_data_hi;
+   wire [38:0]    write_scan_data_lo;
+   wire [38:0]    read_scan_data_hi;
+   wire [38:0]    read_scan_data_lo;
+
+   wire           real_se;
+   assign         real_se = se & ~sehold;
+
+   // This is for sas comparisons
+   assign        regfile_index[7:0] = {ctl_frf_addr[6:0], 1'b0};
+   
+   assign        regfile_index_low[7:0] = {addr_d1[6:0], 1'b0};
+   assign        regfile_index_high[7:0] = {addr_d1[6:0], 1'b1};
+
+   assign         sehold_write_data[77:0] = (sehold)? write_data_d1[77:0]: dp_frf_data[77:0];
+   assign sehold_cntl_data[9:0] = (sehold)? {addr_d1[6:0],wen_d1[1:0], ren_d1}:
+                                            {ctl_frf_addr[6:0],ctl_frf_wen[1:0],ctl_frf_ren};
+   // All inputs go through flop
+   dff_s #(39) datain_dff1(.din(sehold_write_data[77:39]), .clk(rclk), .q(write_data_d1[77:39]),
+                         .se(real_se), .si({cntl_scan_data[0],write_scan_data_lo[38:1]}), 
+                         .so(write_scan_data_hi[38:0]));
+   dff_s #(39) datain_dff2(.din(sehold_write_data[38:0]), .clk(rclk), .q(write_data_d1[38:0]),
+                         .se(real_se), .si(write_scan_data_hi[38:0]), .so(write_scan_data_lo[38:0]));
+   dff_s #(10) controlin_dff(.din(sehold_cntl_data[9:0]),
+                           .q({addr_d1[6:0],wen_d1[1:0],ren_d1}),
+                           .clk(rclk), .se(real_se), .si({si,cntl_scan_data[9:1]}), .so(cntl_scan_data[9:0]));
+
+   // Read logic
+`ifdef FPGA_SYN_FRF
+   assign read_data[77:0] = (~ren_d1)?             78'b0: 
+                            (wen_d1[1]|wen_d1[0])? {78{1'bx}}:
+                               {regfile_high[regfile_index_high[7:1]],regfile_low[regfile_index_low[7:1]]};
+`else
+   assign read_data[77:0] = (~ren_d1)?             78'b0: 
+                            (wen_d1[1]|wen_d1[0])? {78{1'bx}}:
+                               {regfile[regfile_index_high],regfile[regfile_index_low]};
+`endif
+
+   
+   dff_s #(39) dataout_dff1(.din(read_data[77:39]), .clk(rclk), .q(frf_dp_data[77:39]),
+                          .se(real_se), .si(read_scan_data_lo[38:0]), .so(read_scan_data_hi[38:0]));
+   dff_s #(39) dataout_dff2(.din(read_data[38:0]), .clk(rclk), .q(frf_dp_data[38:0]),
+                          .se(real_se), .si({read_scan_data_hi[37:0],write_scan_data_lo[0]}), 
+                          .so(read_scan_data_lo[38:0]));
+   assign so = read_scan_data_hi[38];
+                                       
+   always @ (posedge rclk) begin
+      // Write port
+      // write is gated by rst_tri_en
+`ifdef FPGA_SYN_FRF
+      if (wen_d1[0] & ~ren_d1 & ~rst_tri_en_negedge) begin
+   	regfile_low[regfile_index_low[7:1]] <= write_data_d1[38:0];
+      end
+      if (wen_d1[1] & ~ren_d1 & ~rst_tri_en_negedge) begin
+         regfile_high[regfile_index_high[7:1]] <= write_data_d1[77:39];
+      end
+`else
+      if (wen_d1[0] & ~ren_d1 & ~rst_tri_en_negedge) begin
+         regfile[regfile_index_low] <= write_data_d1[38:0];
+      end
+      if (wen_d1[1] & ~ren_d1 & ~rst_tri_en_negedge) begin
+         regfile[regfile_index_high] <= write_data_d1[77:39];
+      end
+`endif
+   end
+   always @ (negedge rclk) begin
+      // latch rst_tri_en
+      rst_tri_en_negedge <= rst_tri_en;
+   end
+   
+endmodule // sparc_ffu_frf
+
Index: /trunk/T1-common/srams/bw_r_tlb_fpga.v
===================================================================
--- /trunk/T1-common/srams/bw_r_tlb_fpga.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_tlb_fpga.v	(revision 6)
@@ -0,0 +1,1901 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_tlb.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Common TLB for Instruction Fetch and Load/Stores
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include	"lsu.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN
+`define FPGA_SYN_TLB
+`endif
+
+
+`ifdef FPGA_SYN_TLB
+`ifdef FPGA_SYN_8TLB
+  `define TLB_ENTRIES 8
+  `define TLB_INDEX_WIDTH 3
+`else
+`ifdef FPGA_SYN_16TLB
+  `define TLB_ENTRIES 16
+  `define TLB_INDEX_WIDTH 4
+`else
+`ifdef FPGA_SYN_32TLB
+  `define TLB_ENTRIES 32
+  `define TLB_INDEX_WIDTH 5
+`else
+  `define TLB_ENTRIES 64
+  `define TLB_INDEX_WIDTH 6
+`endif
+`endif
+`endif
+
+module bw_r_tlb_fpga ( /*AUTOARG*/
+   // Outputs
+   tlb_rd_tte_tag, tlb_rd_tte_data, tlb_pgnum, tlb_pgnum_crit, 
+   tlb_cam_hit, cache_way_hit, cache_hit, so, 
+   // Inputs
+   tlb_cam_vld, tlb_cam_key, tlb_cam_pid,  
+   tlb_demap_key, tlb_addr_mask_l, tlb_ctxt, 
+   tlb_wr_vld, tlb_wr_tte_tag, tlb_wr_tte_data, tlb_rd_tag_vld, 
+   tlb_rd_data_vld, tlb_rw_index, tlb_rw_index_vld, tlb_demap, 
+   tlb_demap_auto, tlb_demap_all, cache_ptag_w0, cache_ptag_w1, 
+   cache_ptag_w2, cache_ptag_w3, cache_set_vld, tlb_bypass_va, 
+   tlb_bypass, se, si, hold, adj, arst_l, rst_soft_l, rclk,
+   rst_tri_en
+   ) ;	
+
+
+input			tlb_cam_vld ;		// ld/st requires xlation. 
+input	[40:0]		tlb_cam_key ;		// cam data for loads/stores;includes vld 
+						// CHANGE : add real bit for cam.
+input	[2:0]		tlb_cam_pid ;		// NEW: pid for cam. 
+input	[40:0]		tlb_demap_key ;		// cam data for demap; includes vlds. 
+						// CHANGE : add real bit for demap
+input			tlb_addr_mask_l ;	// address masking occurs
+input	[12:0]		tlb_ctxt ;		// context for cam xslate/demap. 
+input			tlb_wr_vld;		// write to tlb. 
+input	[58:0]		tlb_wr_tte_tag;		// CHANGE:tte tag to be written (55+4-1)
+						// R(+1b),PID(+3b),G(-1b). 
+input	[42:0]		tlb_wr_tte_data;	// tte data to be written.
+						// No change(!!!) - G bit becomes spare
+input			tlb_rd_tag_vld ;	// read tag
+input			tlb_rd_data_vld ;	// read data
+input	[5:0]		tlb_rw_index ;		// index to read/write tlb.
+input			tlb_rw_index_vld ;	// indexed write else use algorithm.
+input			tlb_demap ;		// demap : page/ctxt/all/auto.  
+input			tlb_demap_auto ;	// demap is of type auto 
+input			tlb_demap_all;		// demap-all operation : encoded separately.
+input  	[29:0]    	cache_ptag_w0;       	// way1 30b(D)/29b(I) tag.
+input  	[29:0]    	cache_ptag_w1;       	// way2 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w2;       	// way0 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w3;       	// way3 30b(D)/29b(I) tag.
+input	[3:0]		cache_set_vld;       	// set vld-4 ways
+input	[12:10]		tlb_bypass_va;	   	// bypass va.other va bits from cam-data
+input			tlb_bypass;		// bypass tlb xslation
+
+input			se ;			// scan-enable ; unused
+input			si ;			// scan data in ; unused
+input			hold ;			// scan hold signal
+input	[7:0]		adj ;			// self-time adjustment ; unused
+input			arst_l ;		// synchronous for tlb ; unused	
+input			rst_soft_l ;		// software reset - asi
+input			rclk;
+input			rst_tri_en ;
+
+output	[58:0]		tlb_rd_tte_tag;		// CHANGE: tte tag read from tlb.
+output	[42:0]		tlb_rd_tte_data;	// tte data read from tlb.
+// Need two ports for tlb_pgnum - critical and non-critical.
+output	[39:10]		tlb_pgnum ;		// bypass or xslated pgnum
+output	[39:10]		tlb_pgnum_crit ;	// bypass or xslated pgnum - critical
+output			tlb_cam_hit ;		// xlation hits in tlb.
+output	[3:0]		cache_way_hit;		// tag comparison results.
+output			cache_hit;		// tag comparison result - 'or' of above.
+
+//output			tlb_writeable ;		// tlb can be written in current cycle.
+
+output			so ;		// scan data out ; unused
+
+wire	[53:0]		tlb_cam_data ;
+wire	[58:0]		wr_tte_tag ;	// CHANGE
+wire	[42:0]		wr_tte_data ;
+wire	[29:3]		phy_pgnum_m;
+wire	[29:0]		pgnum_m;
+wire 	[`TLB_ENTRIES-1:0] used ;
+wire			tlb_not_writeable ;
+wire	[40:25] 	tlb_cam_key_masked ;
+wire	[26:0]		tlb_cam_comp_key ;
+wire			cam_vld ;
+wire			demap_other ;
+wire	[3:0]   	cache_way_hit ;
+wire	[`TLB_ENTRIES-1:0]		mismatch;
+
+reg			tlb_not_writeable_d1 ;
+reg			tlb_writeable ;
+wire	[`TLB_ENTRIES-1:0]		tlb_entry_locked ;
+wire	[`TLB_ENTRIES-1:0]		cam_hit ;
+wire	[`TLB_ENTRIES-1:0]		demap_hit ;
+reg	[`TLB_ENTRIES-1:0]		ademap_hit ;
+wire	[58:0]		rd_tte_tag ;	// CHANGE
+wire	[42:0]		rd_tte_data ;	
+wire	[42:0]		tlb_rd_tte_data ;	
+reg			cam_vld_tmp ;
+reg	[2:0]		cam_pid ;
+reg	[53:0]		cam_data ;
+reg			demap_auto, demap_other_tmp, demap_all ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_vld ;
+wire	[`TLB_ENTRIES-1:0]		tlb_entry_used ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_replace ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_replace_d2 ;
+wire	[29:0]		pgnum_g ;
+reg     [3:0]		cache_set_vld_g;
+reg	[29:0]		cache_ptag_w0_g,cache_ptag_w1_g;
+reg	[29:0]		cache_ptag_w2_g,cache_ptag_w3_g;
+reg	[`TLB_ENTRIES-1:0]		rw_wdline ;
+
+reg			rd_tag; 
+reg			rd_data;
+reg			wr_vld_tmp;
+reg	[`TLB_INDEX_WIDTH-1:0]		rw_index;
+reg			rw_index_vld;
+reg	[29:0] 		vrtl_pgnum_m;
+wire			bypass ;
+reg         bypass_d;
+wire			wr_vld ;
+
+integer	i,j,k,l,m,n,p,r,s,t,u,w;
+
+`define	CAM_CTXT_12_0_HI 	12 	// 13b ctxt
+`define	CAM_CTXT_12_0_LO 	0 		
+
+
+//=========================================================================================
+//	What's Left :
+//=========================================================================================
+
+// Scan Insertion - scan to be ignored in formal verification for now.
+
+//=========================================================================================
+//	Design Notes.
+//=========================================================================================
+
+// - Supported Demap Operations - By Page, By Context, All But
+// Locked, Autodemap, Invalidate-All i.e., reset. Demap Partition is
+// not supported - it is mapped to demap-all by logic. 
+// - Interpretation of demap inputs
+//	- tlb_demap - this is used to signal demap by page, by ctxt
+//	,all, and autodemap. 
+//	- tlb_demap_ctxt - If a demap_by_ctxt operation is occuring then
+//	this signal and tlb_demap must be active.
+//	- tlb_demap_all - demap all operation. If a demap_all operation is
+//	occuring, then tlb_demap_all must be asserted with tlb_demap. 
+// - Reset is similar to demap-all except that *all* entries
+// are invalidated. The action is initiated by software. The reset occurs
+// on the negedge and is synchronous with the clk.
+// - TTE Tag and Data
+// 	- The TTE tag and data can be read together. Each will have its 
+//	own bus and the muxing will occur externally. The tag needs to
+//	be read on a data request to supply the valid bit.
+// 	- The TTE tag and data can be written together.
+// - The cam hit is a separate output signal based on the 
+// the match signals.
+// - Read/Write may occur based on supplied index. If not valid
+// then use replacement way determined by algorithm to write.
+// - Only write can use replacement way determined by algorithm.
+// - Data is formatted appr. on read or write in the MMU. 
+// - The TLB will generate a signal which reports whether the 
+// tlb can be filled in the current cycle or not.
+// **Physical Tag Comparison**
+// For I-SIDE, comparison is of 28b, whereas for D-side, comparison is of 29b. The actual
+// comparison, due to legacy, is for 30b.
+// For the I-TLB, va[11:10] must be hardwired to the same value as the lsb of the 4 tags
+// at the port level. Since the itag it only 28b, add two least significant bits to extend it to 30b.
+// Similarly, for the dside, va[10] needs to be made same.	
+// **Differentiating among Various TLB Operations**
+// Valid bits are now associated with the key to allow selective incorporation of
+// match results. The 5 valid bits are : v4(b47-28),v3(b27-22),v2(21-16),v1(b15-13)
+// and Gk(G bit for auto-demap). The rules of use are :
+//	- cam: v4-v1 are set high. G=~cam_real=0/1.
+//	- demap_by_page : v4-v1 are set high. G=1. cam_real=0.
+// 	- demap_by_ctxt : v4-v1 are low. G=1. cam_real=0
+//	- demap_all : v4-v1 are don't-care. G=x. cam_real=x
+//	- autodemap : v4-v1 are based on page size of incoming tte. G=~cam_real=0/1.
+// Note : Gk is now used only to void a context match on a Real Translation.
+// In general, if a valid bit is low then the corresponding va field will not take
+// part in the match. Similarly, for the ctxt, if Gk=1, the ctxt will participate
+// in the match.
+//
+// Demap Table (For Satya) :
+// Note : To include a context match, Gk must be set to 1.
+//--------------------------------------------------------------------------------------------------------
+//tlb_demap tlb_demap_all  tlb_ctxt Gk	Vk4 Vk3	Vk2 Vk1 Real	Operation
+//--------------------------------------------------------------------------------------------------------
+//0		x		x   x	x   x	x   x   0	No demap operation
+//1		0		0   1	1   1	1   1	0	Demap by page
+//1		0		0   1	1   0	0   0	0/1	256M demap(auto demap)
+//1		0		0   0	1   0	0   0	0	256M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	0   0	0/1	4M demap(auto demap)
+//1		0		0   0	1   1	0   0	0	4M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   0	0/1	64k demap(auto demap)
+//1		0		0   0	1   1	1   0	0	64k demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   1	0/1	8k demap(auto demap)
+//1		0		0   0	1   1	1   1	0	8k demap(auto demap) (*Illgl*)
+//1		0		1   1	0   0	0   0	0	demap by ctxt
+//1		1		x   x	x   x	x   x	0	demap_all
+//------------------------------------------------------------------------------------------
+//-----
+//All other are illegal combinations
+//
+//=========================================================================================
+//	Changes related to Hypervisor/Legacy Compatibility
+//=========================================================================================
+//
+// - Add PID. PID does not effect demap-all. Otherwise it is included in cam, other demap
+// operations and auto-demap.
+// - Add R. Real translation ignores context. This is controlled externally by Gk.
+// - Remove G bit for tte. Input remains in demap-key/cam-key to allow for disabling
+//   of context match Real Translation  
+// - Final Page Size support - 8KB,64KB,4M,256M
+// - SPARC_HPV_EN has been defined to enable new tlb design support. 
+// Issues : 
+// -Max ptag size is now 28b. Satya, will this help the speed at all. I doubt it !
+
+//=========================================================================================
+//	Miscellaneous
+//=========================================================================================
+   wire clk;
+   assign clk = rclk;
+   
+wire async_reset, sync_reset ;
+assign	async_reset = ~arst_l ; 			// hardware
+assign	sync_reset = (~rst_soft_l & ~rst_tri_en) ;	// software
+
+wire rw_disable ;
+// INNO - wr/rd gated off. Note required as rst_tri_en is
+// asserted, but implemented in addition in schematic.
+assign	rw_disable = ~arst_l | rst_tri_en ;
+
+
+reg     [`TLB_INDEX_WIDTH-1:0]   cam_hit_encoded;
+integer ii;
+
+reg cam_hit_any;
+
+always @(cam_hit) begin
+  cam_hit_any = 1'b0;
+  cam_hit_encoded = {`TLB_INDEX_WIDTH{1'b0}};
+  for(ii=0;ii<`TLB_ENTRIES;ii=ii+1) begin
+    if(cam_hit[ii]) begin
+      cam_hit_encoded = ii;
+      cam_hit_any = 1'b1;
+    end
+  end
+end
+
+reg cam_hit_any_or_bypass;
+
+always @(posedge clk) 
+  cam_hit_any_or_bypass <= cam_hit_any | bypass;  
+
+
+
+//=========================================================================================
+// 	Stage Data
+//=========================================================================================
+// Apply address masking
+assign	tlb_cam_key_masked[40:25]
+	= {16{tlb_addr_mask_l}} & 
+		tlb_cam_key[`CAM_VA_47_28_HI:`CAM_VA_47_28_LO+4] ;
+
+// Reconstitute cam data CHANGE : add additional bit for real mapping
+assign	tlb_cam_data[53:13] = tlb_demap ? 
+	tlb_demap_key[40:0] :
+	{tlb_cam_key_masked[40:25],tlb_cam_key[`CAM_VA_47_28_LO+3:0]} ; 
+
+assign tlb_cam_comp_key[26:0] = 
+		tlb_demap ?
+			{tlb_demap_key[32:21], tlb_demap_key[19:14],tlb_demap_key[12:7],
+			tlb_demap_key[5:3]} :
+			{tlb_cam_key_masked[32:25],tlb_cam_key[24:21],
+			tlb_cam_key[19:14],tlb_cam_key[12:7],tlb_cam_key[5:3]} ;
+
+assign	tlb_cam_data[12:0] = tlb_ctxt[12:0] ;
+
+// These signals are flow-thru.
+assign	wr_tte_tag[58:0] 	= tlb_wr_tte_tag[58:0] ;	// CHANGE
+assign	wr_tte_data[42:0] 	= tlb_wr_tte_data[42:0] ;
+
+// CHANGE(SATYA) - Currently the rw_index/rw_index_vld are shared by both reads
+// and writes. However, writes are done in the cycle of broadcast, whereas
+// the reads are done a cycle later, as given in the model(incorrect) 
+// They have to be treated uniformly. To make the model work, I've assumed the read/write 
+// are done in the cycle the valids are broadcast. 
+always @ (posedge clk)
+	begin
+	if (hold)
+		begin
+		cam_pid[2:0]		<= cam_pid[2:0] ;
+		cam_vld_tmp		<= cam_vld_tmp ;
+		cam_data[53:0] 		<= cam_data[53:0] ;
+		demap_other_tmp		<= demap_other_tmp ;
+		demap_auto		<= demap_auto ;
+		demap_all		<= demap_all ;
+		wr_vld_tmp 		<= wr_vld_tmp ;
+		rd_tag 			<= rd_tag ;
+		rd_data			<= rd_data ;
+		rw_index_vld		<= rw_index_vld ;
+		rw_index[`TLB_INDEX_WIDTH-1:0]		<= rw_index[`TLB_INDEX_WIDTH-1:0] ; 	
+		end
+	else
+		begin
+		cam_pid[2:0]		<= tlb_cam_pid[2:0] ;
+		cam_vld_tmp		<= tlb_cam_vld ;
+		cam_data[53:0] 		<= tlb_cam_data[53:0] ;
+		demap_other_tmp		<= tlb_demap ;
+		demap_auto		<= tlb_demap_auto ;
+		demap_all		<= tlb_demap_all ;
+		wr_vld_tmp 		<= tlb_wr_vld ;
+		rd_tag 			<= tlb_rd_tag_vld ;
+		rd_data			<= tlb_rd_data_vld ;
+		rw_index_vld		<= tlb_rw_index_vld ;
+		rw_index[`TLB_INDEX_WIDTH-1:0]		<= tlb_rw_index[`TLB_INDEX_WIDTH-1:0] ; 	
+		end
+
+	end
+
+// INNO - gate cam,demap,wr with rst_tri_en.
+reg rst_tri_en_lat;
+
+ always        @ (clk)
+ rst_tri_en_lat = rst_tri_en;
+
+assign	cam_vld = cam_vld_tmp & ~rst_tri_en_lat ;
+assign	demap_other = demap_other_tmp & ~rst_tri_en ;
+assign	wr_vld = wr_vld_tmp & ~rst_tri_en ;
+
+//=========================================================================================
+//	Generate Write Wordlines
+//=========================================================================================
+
+
+assign tlb_rd_tte_tag[58:0] = rd_tte_tag[58:0] ;	// CHANGE
+
+assign tlb_rd_tte_data[42:0] 	= rd_tte_data[42:0] ;
+
+//=========================================================================================
+//	CAM/DEMAP STLB for xlation
+//=========================================================================================
+
+
+// Demap and CAM operation are mutually exclusive.
+
+always  @ ( negedge clk )
+	begin
+	
+		for (n=0;n<`TLB_ENTRIES;n=n+1)
+			begin
+                                if (demap_auto & demap_other) 
+					ademap_hit[n] = (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+			end
+
+	end  // always
+
+
+assign	tlb_cam_hit = |cam_hit[`TLB_ENTRIES-1:0] ;
+
+// Change tlb_entry_vld handling for multi-threaded tlb writes.
+// A write is always preceeded by an autodemap. The intent is to make the result of autodemap
+// (clearing of vld bit if hit) invisible until write occurs. In the same cycle that the write
+// occurs, the vld bit for an entry will be cleared if there is an autodemap hit. The write
+// and admp action may even be to same entry. The write must dominate. There is no need to
+// clear the dmp latches after the write/clear has occurred as the subsequent admp will set
+// up new state in the latches.
+
+// Define valid bit based on write/demap/reset. 
+
+always  @ (/*AUTOSENSE*/rd_data or rd_tag or rw_index or rw_index_vld
+           or wr_vld_tmp)
+        begin
+                for (i=0;i<`TLB_ENTRIES;i=i+1)
+                        if ((rw_index[`TLB_INDEX_WIDTH-1:0] == i) & ((wr_vld_tmp & rw_index_vld) | rd_tag | rd_data))
+                                rw_wdline[i] = 1'b1 ;
+                        else    rw_wdline[i] = 1'b0 ;
+
+        end
+
+
+always @ (negedge clk)
+	begin
+	for (r=0;r<`TLB_ENTRIES;r=r+1)
+	begin // for
+	if (((rw_index_vld & rw_wdline[r]) | (~rw_index_vld & tlb_entry_replace_d2[r])) & 
+		wr_vld & ~rw_disable)
+			tlb_entry_vld[r] <= wr_tte_tag[`STLB_TAG_V] ;	// write
+	else	begin
+		if (ademap_hit[r] & wr_vld)			// autodemap specifically
+			tlb_entry_vld[r] <= 1'b0 ;		
+		end
+	  if ((demap_hit[r] & ~demap_auto) | sync_reset)	// non-auto-demap, reset
+			tlb_entry_vld[r] <= 1'b0 ;	
+	  if(async_reset) tlb_entry_vld[r] <= 1'b0 ;
+
+	end // for
+	end
+
+
+//=========================================================================================
+//	TAG COMPARISON
+//=========================================================================================
+
+reg [30:0] va_tag_plus ;
+
+// Stage to m
+always @(posedge clk)
+		begin
+		// INNO - add hold to this input
+		if (hold)
+			va_tag_plus[30:0] <= va_tag_plus[30:0] ;
+		else
+			va_tag_plus[30:0] 
+			<= {tlb_cam_comp_key[26:0],tlb_bypass_va[12:10],tlb_bypass}; 
+		end
+			
+always @(posedge clk)
+   begin
+      vrtl_pgnum_m[29:0] <= va_tag_plus[30:1] ;
+      bypass_d<=va_tag_plus[0];
+   end
+assign bypass = va_tag_plus[0] ;
+
+// Mux to bypass va or form pa tag based on tte-data.
+
+assign	phy_pgnum_m[29:3] = 
+	{rd_tte_data[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+		rd_tte_data[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+			rd_tte_data[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+				rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO]};
+
+// Derive the tlb-based physical address.
+assign pgnum_m[2:0] = vrtl_pgnum_m[2:0];
+assign pgnum_m[5:3] = (~rd_tte_data[`STLB_DATA_15_13_SEL] & ~bypass_d)
+				? phy_pgnum_m[5:3] : vrtl_pgnum_m[5:3] ;
+assign pgnum_m[11:6] = (~rd_tte_data[`STLB_DATA_21_16_SEL] & ~bypass_d)  
+				? phy_pgnum_m[11:6] : vrtl_pgnum_m[11:6] ;
+assign pgnum_m[17:12] = (~rd_tte_data[`STLB_DATA_27_22_SEL] & ~bypass_d)
+				? phy_pgnum_m[17:12] : vrtl_pgnum_m[17:12] ;
+assign pgnum_m[29:18] = ~bypass_d ? phy_pgnum_m[29:18] : vrtl_pgnum_m[29:18];
+
+// Stage to g
+// Flop tags in tlb itself and do comparison immediately after rising edge.
+// Similarly stage va/pa tag to g
+
+assign pgnum_g[29:0] = pgnum_m[29:0];
+
+always @(posedge clk)
+		begin
+			
+			// rm hold on these inputs.
+			cache_set_vld_g[3:0]  	<= cache_set_vld[3:0] ;
+			cache_ptag_w0_g[29:0] 	<= cache_ptag_w0[29:0] ;
+			cache_ptag_w1_g[29:0] 	<= cache_ptag_w1[29:0] ;
+			cache_ptag_w2_g[29:0] 	<= cache_ptag_w2[29:0] ;
+			cache_ptag_w3_g[29:0] 	<= cache_ptag_w3[29:0] ;
+		end
+
+
+// Need to stage by a cycle where used.
+assign	tlb_pgnum[39:10] = pgnum_g[29:0] ;
+// Same cycle as cam - meant for one load on critical path
+assign	tlb_pgnum_crit[39:10] = pgnum_m[29:0] ;
+
+
+assign	cache_way_hit[0] = 
+	(cache_ptag_w0_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[0] & cam_hit_any_or_bypass;
+assign	cache_way_hit[1] = 
+	(cache_ptag_w1_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[1] & cam_hit_any_or_bypass;
+assign	cache_way_hit[2] = 
+	(cache_ptag_w2_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[2] & cam_hit_any_or_bypass;
+assign	cache_way_hit[3] = 
+	(cache_ptag_w3_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[3] & cam_hit_any_or_bypass;
+
+assign	cache_hit = |cache_way_hit[3:0];
+
+
+//=========================================================================================
+//	TLB ENTRY REPLACEMENT
+//=========================================================================================
+
+// A single Used bit is used to track the replacement state of each entry.
+// Only an unused entry can be replaced.
+// An Unused entry is :
+//			- an invalid entry
+//			- a valid entry which has had its Used bit cleared.
+//				- on write of a valid entry, the Used bit is set.
+//				- The Used bit of a valid entry is cleared if all
+//				entries have their Used bits set and the entry itself is not Locked.
+// A locked entry should always appear to be Used.
+// A single priority-encoder is required to evaluate the used status. Priority is static
+// and used entry0 is of the highest priority if unused.
+
+// Timing :
+// Used bit gets updated by cam-hit or hit on negedge.
+// After Used bit gets updated off negedge, the replacement entry can be generated in
+// Phase2. In parallel, it is determined whether all Used bits are set or not. If
+// so, then they are cleared on the next negedge with the replacement entry generated
+// in the related Phase1 
+
+// Choosing replacement entry
+// Replacement entry is integer k
+
+assign	tlb_not_writeable = &used[`TLB_ENTRIES-1:0] ;
+/*
+// Used bit can be set because of write or because of cam-hit.
+always @(negedge clk)
+	begin
+		for (s=0;s<`TLB_ENTRIES;s=s+1)
+			begin
+				if (cam_hit[s]) 
+					tlb_entry_used[s] <= 1'b1;			
+			end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+		if (tlb_not_writeable)
+			begin
+				for (t=0;t<`TLB_ENTRIES;t=t+1)
+					begin
+						if (~tlb_entry_locked[t])
+							tlb_entry_used[t] <= 1'b0;
+					end
+			end
+	end
+*/
+
+// Determine whether entry should be squashed.
+
+assign	used[`TLB_ENTRIES-1:0] = tlb_entry_used[`TLB_ENTRIES-1:0] & tlb_entry_vld[`TLB_ENTRIES-1:0] ;
+
+
+// Based on updated Used state, generate replacement entry.
+// So, replacement entries can be generated on a cycle-by-cycle basis. 
+//always @(/*AUTOSENSE*/squash or used)
+
+	reg	[`TLB_ENTRIES-1:0]	tlb_entry_replace_d1;
+	reg		tlb_replace_flag;
+	always @(/*AUTOSENSE*/used)
+	begin
+  	  tlb_replace_flag=1'b0;
+  	  tlb_entry_replace_d1 = {`TLB_ENTRIES-1{1'b0}};
+  	  // Priority is given to entry0
+   	  for (u=0;u<`TLB_ENTRIES;u=u+1)
+  	  begin
+    	    if(~tlb_replace_flag & ~used[u])
+    	    begin
+      	      tlb_entry_replace_d1[u] = ~used[u] ;
+      	      tlb_replace_flag=1'b1; 
+    	    end
+  	  end
+  	  if(~tlb_replace_flag) begin
+      	     tlb_entry_replace_d1[`TLB_ENTRIES-1] = 1'b1;
+ 	  end
+	end
+	always @(posedge clk)
+	begin
+	  // named in this manner to keep arch model happy.
+  	  tlb_entry_replace <= tlb_entry_replace_d1 ;
+	end
+	// INNO - 2 stage delay before update is visible
+	always @(posedge clk)
+	begin
+  	  tlb_entry_replace_d2 <= tlb_entry_replace ;
+	end
+
+reg [`TLB_INDEX_WIDTH-1:0]  tlb_index_a1;
+reg [`TLB_INDEX_WIDTH-1:0]  tlb_index;
+wire tlb_index_vld_a1 = |tlb_entry_replace;
+reg  tlb_index_vld;
+integer jj;
+always @(tlb_entry_replace) begin
+  tlb_index_a1 = {`TLB_INDEX_WIDTH{1'b0}};
+  for(jj=0;jj<`TLB_ENTRIES;jj=jj+1)
+    if(tlb_entry_replace[jj]) tlb_index_a1 = jj;
+end
+always @(posedge clk) begin
+  tlb_index <= tlb_index_a1;  //use instead of tlb_entry_replace_d2;
+  tlb_index_vld <= tlb_index_vld_a1;
+end
+
+
+  
+
+//=========================================================================================
+//	TLB WRITEABLE DETECTION
+//=========================================================================================
+
+// 2-cycles later, tlb become writeable
+always @(posedge clk)
+	begin
+		tlb_not_writeable_d1 <= tlb_not_writeable ;
+	end
+
+always @(posedge clk)
+	begin
+		tlb_writeable <= ~tlb_not_writeable_d1 ;
+	end
+
+bw_r_tlb_tag_ram_fpga bw_r_tlb_tag_ram_fpga (
+	.rd_tag(rd_tag),
+	.rw_index_vld(rw_index_vld),
+	.wr_vld_tmp(wr_vld_tmp),
+	.clk(clk),
+	.rw_index(rw_index),
+	.tlb_index(tlb_index),
+	.tlb_index_vld(tlb_index_vld),
+	.rw_disable(rw_disable),
+	.rst_tri_en(rst_tri_en),
+	.wr_tte_tag(wr_tte_tag),
+	.tlb_entry_vld(tlb_entry_vld),
+	.tlb_entry_used(tlb_entry_used),
+	.tlb_entry_locked(tlb_entry_locked),
+	.rd_tte_tag(rd_tte_tag),
+	.mismatch(mismatch),
+	.tlb_writeable(tlb_writeable),
+	.cam_vld(cam_vld),
+	.wr_vld(wr_vld),
+	.cam_data(cam_data),
+	.cam_hit(cam_hit),
+	.cam_pid(cam_pid),
+	.demap_all(demap_all),
+	.demap_hit(demap_hit),
+	.demap_other(demap_other)
+);
+
+bw_r_tlb_data_ram_fpga bw_r_tlb_data_ram_fpga (
+	.rd_data(rd_data),
+	.rw_index_vld(rw_index_vld),
+	.wr_vld_tmp(wr_vld_tmp),
+	.clk(clk),
+	.cam_vld(cam_vld),
+	.cam_index(cam_hit_encoded),
+        .cam_hit_any(cam_hit_any),
+	.rw_index(rw_index),
+	.tlb_index(tlb_index),
+	.tlb_index_vld(tlb_index_vld),
+	.rw_disable(rw_disable),
+	.rst_tri_en(rst_tri_en),
+	.wr_tte_data(wr_tte_data),
+	.rd_tte_data(rd_tte_data),
+	.wr_vld(wr_vld)
+);
+
+
+endmodule
+
+module bw_r_tlb_tag_ram_fpga(
+	rd_tag,
+	rw_index_vld,
+	wr_vld_tmp,
+	clk,
+	rw_index,
+	tlb_index,
+	tlb_index_vld,
+	rw_disable,
+	rst_tri_en,
+	wr_tte_tag,
+	tlb_entry_vld,
+	tlb_entry_used,
+	tlb_entry_locked,
+	rd_tte_tag,
+	mismatch,
+	tlb_writeable,
+	wr_vld,
+	cam_vld,
+	cam_data,
+	cam_hit,
+	cam_pid,
+	demap_all,
+	demap_other,
+	demap_hit);
+
+input		rd_tag; 
+input		rw_index_vld;
+input		wr_vld_tmp;
+input		clk;
+input	[`TLB_INDEX_WIDTH-1:0]	rw_index;
+input	[`TLB_INDEX_WIDTH-1:0]	tlb_index;
+input		tlb_index_vld;
+input		rw_disable;
+input		rst_tri_en;
+input	[58:0]	wr_tte_tag;
+input	[`TLB_ENTRIES-1:0]	tlb_entry_vld;
+input		tlb_writeable;
+input		wr_vld;
+input	[2:0]	cam_pid;
+input		demap_all;
+input		demap_other;
+input	[53:0]	cam_data;
+input		cam_vld ;
+
+output	[`TLB_ENTRIES-1:0]	cam_hit ;
+output	[`TLB_ENTRIES-1:0]	demap_hit ;
+output	[`TLB_ENTRIES-1:0]	tlb_entry_used;
+output	[`TLB_ENTRIES-1:0]	tlb_entry_locked;
+reg	[`TLB_ENTRIES-1:0]	tlb_entry_locked ;
+
+output	[58:0]	rd_tte_tag;
+reg	[58:0]	rd_tte_tag;
+output	[`TLB_ENTRIES-1:0]	mismatch;
+
+reg	[`TLB_ENTRIES-1:0]	sat;
+
+reg	[`TLB_ENTRIES-1:0]	mismatch;
+reg	[`TLB_ENTRIES-1:0]	cam_hit ;
+reg	[`TLB_ENTRIES-1:0]	demap_all_but_locked_hit ;
+reg	[58:0]	tag ;	// CHANGE
+
+
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b47_28;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b27_22;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b21_16;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b15_13;
+reg	[`TLB_ENTRIES-1:0]	mismatch_ctxt;
+reg	[`TLB_ENTRIES-1:0]	mismatch_pid;
+reg	[`TLB_ENTRIES-1:0]	mismatch_type;
+reg	[`TLB_ENTRIES-1:0]	tlb_entry_used ;
+
+integer i,j,n,m, w, p, k, s, t;
+
+
+reg	[58:0]		tte_tag_ram  [`TLB_ENTRIES-1:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+
+reg	[58:0]	tmp_tag ;
+
+wire wren = rw_index_vld & wr_vld_tmp & ~rw_disable;
+wire tlben = tlb_index_vld & ~rw_index_vld & wr_vld_tmp & ~rw_disable;
+wire  [`TLB_INDEX_WIDTH-1:0] wr_addr = wren ? rw_index : tlb_index;
+
+
+always	@ (negedge clk) begin
+//=========================================================================================
+//	Write TLB
+//=========================================================================================
+
+	if(wren | tlben) begin
+		tte_tag_ram[wr_addr] <= wr_tte_tag[58:0];
+		tlb_entry_used[wr_addr] <= wr_tte_tag[`STLB_TAG_U];
+		tlb_entry_locked[wr_addr] = wr_tte_tag[`STLB_TAG_L];
+	end else begin
+	  tlb_entry_used <= (tlb_entry_used | cam_hit) & (tlb_entry_locked | ~{`TLB_ENTRIES{~tlb_writeable & ~cam_vld & ~wr_vld & ~rd_tag & ~rst_tri_en}}) ;
+        end
+
+//=========================================================================================
+//	Read STLB
+//=========================================================================================
+
+	if(rd_tag & ~rw_disable) begin
+		tmp_tag  <= tte_tag_ram[rw_index];
+	end
+
+
+end // always
+
+always @(posedge clk) begin
+  if(rd_tag & ~rw_disable)
+    rd_tte_tag[58:0] = {tmp_tag[58:27], tlb_entry_vld[rw_index], tlb_entry_locked[rw_index], tlb_entry_used[rw_index], tmp_tag[23:0]};
+  else if(wren | tlben)
+    rd_tte_tag[58:0] = wr_tte_tag[58:0];
+end
+
+reg	[58:0]		tte_tag_ram2  [`TLB_ENTRIES-1:0];
+
+always	@ (negedge clk) begin
+  if(wren | tlben)
+    tte_tag_ram2[wr_addr] <= wr_tte_tag[58:0];
+end
+
+
+always	@ (cam_data or cam_pid or cam_vld or demap_all
+           or demap_other or tlb_entry_vld)
+	begin
+	
+		for (n=0;n<`TLB_ENTRIES;n=n+1)
+			begin
+			tag[58:0] = tte_tag_ram2[n] ;	// CHANGE
+
+			mismatch_va_b47_28[n] = 
+			(tag[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO] 
+			!= cam_data[`CAM_VA_47_28_HI+13:`CAM_VA_47_28_LO+13]);
+
+			mismatch_va_b27_22[n] = 
+			(tag[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO] 
+			!= cam_data[`CAM_VA_27_22_HI+13:`CAM_VA_27_22_LO+13]);
+
+			mismatch_va_b21_16[n] = 
+			(tag[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO]
+			!= cam_data[`CAM_VA_21_16_HI+13:`CAM_VA_21_16_LO+13]) ;
+
+			mismatch_va_b15_13[n] = 
+			(tag[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO]
+			!= cam_data[`CAM_VA_15_13_HI+13:`CAM_VA_15_13_LO+13]) ;
+
+			mismatch_ctxt[n] = 
+			(tag[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO] 
+			!= cam_data[`CAM_CTXT_12_0_HI:`CAM_CTXT_12_0_LO]) ;
+			
+			mismatch_pid[n] = (tag[`STLB_TAG_PID_HI:`STLB_TAG_PID_LO] != cam_pid[2:0]) ;
+			mismatch_type[n] = (tag[`STLB_TAG_R] ^ cam_data[`CAM_REAL_V+13]);
+
+			mismatch[n] =
+			(mismatch_va_b47_28[n] & cam_data[`CAM_VA_47_28_V+13]) 				|
+			(mismatch_va_b27_22[n] & tag[`STLB_TAG_VA_27_22_V] & cam_data[`CAM_VA_27_22_V+13]) 	|
+			(mismatch_va_b21_16[n] & tag[`STLB_TAG_VA_21_16_V] & cam_data[`CAM_VA_21_16_V+13]) 	|
+			(mismatch_va_b15_13[n] & tag[`STLB_TAG_VA_15_13_V] & cam_data[`CAM_VA_15_13_V+13]) 	|
+			(mismatch_ctxt[n] & ~cam_data[`CAM_CTXT_GK+13])	|
+			(mismatch_type[n] & ~demap_all)  	| 
+			mismatch_pid[n] ;	// pid always included in mismatch calculations
+
+			demap_all_but_locked_hit[n] = ~tag[`STLB_TAG_L] & demap_all ;
+
+			cam_hit[n] 	= ~mismatch[n] & cam_vld   & tlb_entry_vld[n] ;
+		end
+
+	end  // always
+
+	assign demap_hit = demap_all ? ~mismatch & demap_all_but_locked_hit & tlb_entry_vld & {`TLB_ENTRIES{demap_other}}
+				     : ~mismatch & tlb_entry_vld & {`TLB_ENTRIES{demap_other}};
+
+endmodule
+
+
+
+module bw_r_tlb_data_ram_fpga(rd_data, rw_index_vld, wr_vld_tmp, clk, cam_vld,
+        rw_index, tlb_index, tlb_index_vld, rw_disable, rst_tri_en, wr_tte_data,
+        rd_tte_data, cam_index, cam_hit_any, wr_vld);
+
+        input                   rd_data;
+        input                   rw_index_vld;
+        input                   wr_vld_tmp;
+        input                   clk;
+        input   [(6 - 1):0]     rw_index;
+        input   [(6 - 1):0]     tlb_index;
+        input                   tlb_index_vld;
+        input   [(6 - 1):0]     cam_index;
+        input                   cam_hit_any;
+        input                   rw_disable;
+        input                   rst_tri_en;
+        input                   cam_vld;
+        input   [42:0]          wr_tte_data;
+        input                   wr_vld;
+        output  [42:0]          rd_tte_data;
+
+        wire    [42:0]          rd_tte_data;
+
+        reg     [42:0]          tte_data_ram[(64 - 1):0];
+
+        wire [5:0] wr_addr = (rw_index_vld & wr_vld_tmp) ? rw_index :tlb_index;
+        wire wr_en = ((rw_index_vld & wr_vld_tmp) & (~rw_disable)) |
+                     (((tlb_index_vld & (~rw_index_vld)) & wr_vld_tmp) & (~rw_disable));
+
+        always @(posedge clk) begin
+          if (wr_en)
+            tte_data_ram[wr_addr] <= wr_tte_data[42:0];
+          end
+
+        wire [5:0] rd_addr = rd_data ? rw_index : cam_index;
+        wire rd_en = (rd_data & (~rw_disable)) | ((cam_vld & (~rw_disable)));
+
+        reg [42:0] rd_tte_data_temp;
+
+        always @(posedge clk) begin
+	  //required for simulation; otherwise regression fails...
+	  if((cam_vld & (~rw_disable)) & (!cam_hit_any)) begin
+	    rd_tte_data_temp <= 43'bx;
+	  end else
+          if (rd_en) begin
+            rd_tte_data_temp[42:0] <= tte_data_ram[rd_addr];
+          end
+	end
+
+reg rdwe;
+reg [42:0] wr_tte_data_d;
+
+	
+       always @(posedge clk) begin
+	 wr_tte_data_d <= wr_tte_data;
+       end
+       always @(posedge clk) begin
+         if(wr_en) rdwe <= 1'b1;
+         else if(rd_en) rdwe <= 1'b0;
+       end
+       
+       assign rd_tte_data = rdwe ? wr_tte_data_d : rd_tte_data_temp;
+
+endmodule
+
+`else
+
+module bw_r_tlb ( /*AUTOARG*/
+   // Outputs
+   tlb_rd_tte_tag, tlb_rd_tte_data, tlb_pgnum, tlb_pgnum_crit, 
+   tlb_cam_hit, cache_way_hit, cache_hit, so, 
+   // Inputs
+   tlb_cam_vld, tlb_cam_key, tlb_cam_pid,  
+   tlb_demap_key, tlb_addr_mask_l, tlb_ctxt, 
+   tlb_wr_vld, tlb_wr_tte_tag, tlb_wr_tte_data, tlb_rd_tag_vld, 
+   tlb_rd_data_vld, tlb_rw_index, tlb_rw_index_vld, tlb_demap, 
+   tlb_demap_auto, tlb_demap_all, cache_ptag_w0, cache_ptag_w1, 
+   cache_ptag_w2, cache_ptag_w3, cache_set_vld, tlb_bypass_va, 
+   tlb_bypass, se, si, hold, adj, arst_l, rst_soft_l, rclk,
+   rst_tri_en
+   ) ;	
+
+
+input			tlb_cam_vld ;		// ld/st requires xlation. 
+input	[40:0]		tlb_cam_key ;		// cam data for loads/stores;includes vld 
+						// CHANGE : add real bit for cam.
+input	[2:0]		tlb_cam_pid ;		// NEW: pid for cam. 
+input	[40:0]		tlb_demap_key ;		// cam data for demap; includes vlds. 
+						// CHANGE : add real bit for demap
+input			tlb_addr_mask_l ;	// address masking occurs
+input	[12:0]		tlb_ctxt ;		// context for cam xslate/demap. 
+input			tlb_wr_vld;		// write to tlb. 
+input	[58:0]		tlb_wr_tte_tag;		// CHANGE:tte tag to be written (55+4-1)
+						// R(+1b),PID(+3b),G(-1b). 
+input	[42:0]		tlb_wr_tte_data;	// tte data to be written.
+						// No change(!!!) - G bit becomes spare
+input			tlb_rd_tag_vld ;	// read tag
+input			tlb_rd_data_vld ;	// read data
+input	[5:0]		tlb_rw_index ;		// index to read/write tlb.
+input			tlb_rw_index_vld ;	// indexed write else use algorithm.
+input			tlb_demap ;		// demap : page/ctxt/all/auto.  
+input			tlb_demap_auto ;	// demap is of type auto 
+input			tlb_demap_all;		// demap-all operation : encoded separately.
+input  	[29:0]    	cache_ptag_w0;       	// way1 30b(D)/29b(I) tag.
+input  	[29:0]    	cache_ptag_w1;       	// way2 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w2;       	// way0 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w3;       	// way3 30b(D)/29b(I) tag.
+input	[3:0]		cache_set_vld;       	// set vld-4 ways
+input	[12:10]		tlb_bypass_va;	   	// bypass va.other va bits from cam-data
+input			tlb_bypass;		// bypass tlb xslation
+
+input			se ;			// scan-enable ; unused
+input			si ;			// scan data in ; unused
+input			hold ;			// scan hold signal
+input	[7:0]		adj ;			// self-time adjustment ; unused
+input			arst_l ;		// synchronous for tlb ; unused	
+input			rst_soft_l ;		// software reset - asi
+input			rclk;
+input			rst_tri_en ;
+
+output	[58:0]		tlb_rd_tte_tag;		// CHANGE: tte tag read from tlb.
+output	[42:0]		tlb_rd_tte_data;	// tte data read from tlb.
+// Need two ports for tlb_pgnum - critical and non-critical.
+output	[39:10]		tlb_pgnum ;		// bypass or xslated pgnum
+output	[39:10]		tlb_pgnum_crit ;	// bypass or xslated pgnum - critical
+output			tlb_cam_hit ;		// xlation hits in tlb.
+output	[3:0]		cache_way_hit;		// tag comparison results.
+output			cache_hit;		// tag comparison result - 'or' of above.
+
+//output			tlb_writeable ;		// tlb can be written in current cycle.
+
+output			so ;		// scan data out ; unused
+
+wire	[53:0]		tlb_cam_data ;
+wire	[58:0]		wr_tte_tag ;	// CHANGE
+wire	[42:0]		wr_tte_data ;
+wire	[29:3]		phy_pgnum_m;
+wire	[29:0]		pgnum_m;
+wire 	[63:0]		used ;
+wire			tlb_not_writeable ;
+wire	[40:25] 	tlb_cam_key_masked ;
+wire	[26:0]		tlb_cam_comp_key ;
+wire			cam_vld ;
+wire			demap_other ;
+wire	[3:0]   	cache_way_hit ;
+
+reg			tlb_not_writeable_d1 ;
+reg			tlb_writeable ;
+reg	[58:0]		tte_tag_ram  [63:0] ;	// CHANGE
+reg	[42:0]		tte_data_ram [63:0] ;
+reg	[63:0]		tlb_entry_vld ;
+reg	[63:0]		tlb_entry_locked ;
+reg	[63:0]		ademap_hit ;
+reg	[58:0]		rd_tte_tag ;	// CHANGE
+reg	[42:0]		rd_tte_data ;	
+reg	[58:0]		tlb_rd_tte_tag ; // CHANGE	
+reg	[42:0]		tlb_rd_tte_data ;	
+reg			cam_vld_tmp ;
+reg	[2:0]		cam_pid ;
+reg	[53:0]		cam_data ;
+reg			demap_auto, demap_other_tmp, demap_all ;
+reg	[63:0]		mismatch ;
+reg	[63:0]		cam_hit ;
+reg	[63:0]		demap_hit ;
+reg	[63:0]		demap_all_but_locked_hit ;
+reg	[63:0]		mismatch_va_b47_28 ;
+reg	[63:0]		mismatch_va_b27_22 ;
+reg	[63:0]		mismatch_va_b21_16 ;
+reg	[63:0]		mismatch_va_b15_13 ;
+reg	[63:0]		mismatch_ctxt ;
+reg	[63:0]		mismatch_pid ;
+reg	[58:0]		tag ;	// CHANGE
+reg	[63:0]		rw_wdline ;
+reg	[63:0]		tlb_entry_used ;
+reg	[63:0]		tlb_entry_replace ;
+reg	[63:0]		tlb_entry_replace_d2 ;
+reg	[29:0]		pgnum_g ;
+reg     [3:0]		cache_set_vld_g;
+reg	[29:0]		cache_ptag_w0_g,cache_ptag_w1_g;
+reg	[29:0]		cache_ptag_w2_g,cache_ptag_w3_g;
+reg			wr_vld_tmp;
+reg			rd_tag; 
+reg			rd_data;
+reg			rw_index_vld;
+reg	[5:0]		rw_index;
+reg	[63:0]		sat ;
+
+wire	[29:0] 		vrtl_pgnum_m;
+wire			bypass ;
+
+wire			wr_vld ;
+
+integer	i,j,k,l,m,n,p,r,s,t,u,w;
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+// Some bits are removed from the tag and data. 
+// 'U' must be defined as a '1' on a write.
+// 'L' required for demap all function.
+// Do not need an internal valid bit for va range 47:22.
+// These bits are always valid for a page. 
+// 
+// TTE STLB_TAG
+//
+//`define	STLB_TAG_PID_HI		58	: NEW PID - bit2
+//`define	STLB_TAG_PID_LO		56	: NEW PID - bit0
+//`define	STLB_TAG_R		55	: NEW Real bit
+//`define 	STLB_TAG_PARITY		54	// Parity kept in same posn to avoid having
+//`define	STLB_TAG_VA_47_28_HI 	53	// to redo interface
+//`define	STLB_TAG_VA_47_28_LO 	34
+//`define	STLB_TAG_VA_27_22_HI 	33	
+//`define	STLB_TAG_VA_27_22_LO 	28
+//`define	STLB_TAG_27_22_V	27	
+//`define	STLB_TAG_V		26	: valid for entry. Write of 0 resets it.
+//`define	STLB_TAG_L		25
+//`define	STLB_TAG_U		24	
+//`define	STLB_TAG_VA_21_16_HI 	23
+//`define	STLB_TAG_VA_21_16_LO  	18
+//`define	STLB_TAG_VA_21_16_V  	17	  	
+//`define	STLB_TAG_VA_15_13_HI 	16
+//`define	STLB_TAG_VA_15_13_LO  	14
+//`define	STLB_TAG_VA_15_13_V  	13
+//`define	STLB_TAG_CTXT_12_0_HI  	12	// removed Global bit
+//`define	STLB_TAG_CTXT_12_0_LO  	0
+//// 				Total - 59b
+////
+//// TTE STLB_DATA
+////
+//// Soft[12:7] & Soft2[58:50] are removed.
+//// Diag[49:41] are removed. Used bit used for Diag[0] on read.
+//// CV is included for software correctness.
+//// PA<40> is removed as it is not used.
+//// G/L present in data even though present in tag : can't read out simultaneously.
+//   (Unfortunately this is no longer correct. For data read, tag is also read
+//   simultaneously to get valid bit, used bits).
+//`define 	STLB_DATA_PARITY  	42 
+//`define 	STLB_DATA_PA_39_28_HI 	41	// CHANGE
+//`define 	STLB_DATA_PA_39_28_LO 	30
+//`define 	STLB_DATA_PA_27_22_HI 	29	// CHANGE
+//`define 	STLB_DATA_PA_27_22_LO 	24
+//`define 	STLB_DATA_27_22_SEL	23
+//`define 	STLB_DATA_PA_21_16_HI 	22	// CHANGE
+//`define 	STLB_DATA_PA_21_16_LO 	17
+//`define 	STLB_DATA_21_16_SEL	16
+//`define 	STLB_DATA_PA_15_13_HI 	15	
+//`define 	STLB_DATA_PA_15_13_LO 	13
+//`define 	STLB_DATA_15_13_SEL	12
+//`define 	STLB_DATA_V  		11	: static, does not get modified.
+//`define 	STLB_DATA_NFO  		10
+//`define 	STLB_DATA_IE   		9
+//`define 	STLB_DATA_L 		8 	: added for read.
+//`define 	STLB_DATA_CP 		7 
+//`define 	STLB_DATA_CV 		6 
+//`define 	STLB_DATA_E  		5 
+//`define 	STLB_DATA_P  		4 
+//`define 	STLB_DATA_W  		3 
+//`define 	STLB_DATA_SPARE_HI  	2	: Global bit has been removed
+//`define 	STLB_DATA_SPARE_LO	0  	 
+// 				Total - 43b
+
+// Valid bits for key(tlb_cam_key/tlb_demap_key).
+// Total - 41b
+//`define	CAM_VA_47_28_HI  	40
+//`define	CAM_VA_47_28_LO  	21
+//`define	CAM_VA_47_28_V  	20	// b47-28 participate in match
+//`define	CAM_VA_27_22_HI  	19
+//`define	CAM_VA_27_22_LO  	14
+//`define	CAM_VA_27_22_V  	13	// b27-22 participate in match
+//`define	CAM_VA_21_16_HI  	12
+//`define	CAM_VA_21_16_LO  	7
+//`define	CAM_VA_21_16_V  	6	// b21-16 participate in match
+//`define	CAM_VA_15_13_HI 	5	
+//`define	CAM_VA_15_13_LO 	3	
+//`define	CAM_VA_15_13_V 	 	2	// b15-13 participate in match
+//`define	CAM_CTXT_GK 		1	// Context participates in match
+//`define	CAM_REAL_V 		0	// cam/demap applies to real mapping
+					
+
+// ctxt port is different from cam key port even though both are
+// required for cam. (tlb_ctxt)
+// If Gk is set then ctxt will not participate in match.
+// Total - 14b
+`define	CAM_CTXT_12_0_HI 	12 	// 13b ctxt
+`define	CAM_CTXT_12_0_LO 	0 		
+
+
+//=========================================================================================
+//	What's Left :
+//=========================================================================================
+
+// Scan Insertion - scan to be ignored in formal verification for now.
+
+//=========================================================================================
+//	Design Notes.
+//=========================================================================================
+
+// - Supported Demap Operations - By Page, By Context, All But
+// Locked, Autodemap, Invalidate-All i.e., reset. Demap Partition is
+// not supported - it is mapped to demap-all by logic. 
+// - Interpretation of demap inputs
+//	- tlb_demap - this is used to signal demap by page, by ctxt
+//	,all, and autodemap. 
+//	- tlb_demap_ctxt - If a demap_by_ctxt operation is occuring then
+//	this signal and tlb_demap must be active.
+//	- tlb_demap_all - demap all operation. If a demap_all operation is
+//	occuring, then tlb_demap_all must be asserted with tlb_demap. 
+// - Reset is similar to demap-all except that *all* entries
+// are invalidated. The action is initiated by software. The reset occurs
+// on the negedge and is synchronous with the clk.
+// - TTE Tag and Data
+// 	- The TTE tag and data can be read together. Each will have its 
+//	own bus and the muxing will occur externally. The tag needs to
+//	be read on a data request to supply the valid bit.
+// 	- The TTE tag and data can be written together.
+// - The cam hit is a separate output signal based on the 
+// the match signals.
+// - Read/Write may occur based on supplied index. If not valid
+// then use replacement way determined by algorithm to write.
+// - Only write can use replacement way determined by algorithm.
+// - Data is formatted appr. on read or write in the MMU. 
+// - The TLB will generate a signal which reports whether the 
+// tlb can be filled in the current cycle or not.
+// **Physical Tag Comparison**
+// For I-SIDE, comparison is of 28b, whereas for D-side, comparison is of 29b. The actual
+// comparison, due to legacy, is for 30b.
+// For the I-TLB, va[11:10] must be hardwired to the same value as the lsb of the 4 tags
+// at the port level. Since the itag it only 28b, add two least significant bits to extend it to 30b.
+// Similarly, for the dside, va[10] needs to be made same.	
+// **Differentiating among Various TLB Operations**
+// Valid bits are now associated with the key to allow selective incorporation of
+// match results. The 5 valid bits are : v4(b47-28),v3(b27-22),v2(21-16),v1(b15-13)
+// and Gk(G bit for auto-demap). The rules of use are :
+//	- cam: v4-v1 are set high. G=~cam_real=0/1.
+//	- demap_by_page : v4-v1 are set high. G=1. cam_real=0.
+// 	- demap_by_ctxt : v4-v1 are low. G=1. cam_real=0
+//	- demap_all : v4-v1 are don't-care. G=x. cam_real=x
+//	- autodemap : v4-v1 are based on page size of incoming tte. G=~cam_real=0/1.
+// Note : Gk is now used only to void a context match on a Real Translation.
+// In general, if a valid bit is low then the corresponding va field will not take
+// part in the match. Similarly, for the ctxt, if Gk=1, the ctxt will participate
+// in the match.
+//
+// Demap Table (For Satya) :
+// Note : To include a context match, Gk must be set to 1.
+//--------------------------------------------------------------------------------------------------------
+//tlb_demap tlb_demap_all  tlb_ctxt Gk	Vk4 Vk3	Vk2 Vk1 Real	Operation
+//--------------------------------------------------------------------------------------------------------
+//0		x		x   x	x   x	x   x   0	No demap operation
+//1		0		0   1	1   1	1   1	0	Demap by page
+//1		0		0   1	1   0	0   0	0/1	256M demap(auto demap)
+//1		0		0   0	1   0	0   0	0	256M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	0   0	0/1	4M demap(auto demap)
+//1		0		0   0	1   1	0   0	0	4M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   0	0/1	64k demap(auto demap)
+//1		0		0   0	1   1	1   0	0	64k demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   1	0/1	8k demap(auto demap)
+//1		0		0   0	1   1	1   1	0	8k demap(auto demap) (*Illgl*)
+//1		0		1   1	0   0	0   0	0	demap by ctxt
+//1		1		x   x	x   x	x   x	0	demap_all
+//------------------------------------------------------------------------------------------
+//-----
+//All other are illegal combinations
+//
+//=========================================================================================
+//	Changes related to Hypervisor/Legacy Compatibility
+//=========================================================================================
+//
+// - Add PID. PID does not effect demap-all. Otherwise it is included in cam, other demap
+// operations and auto-demap.
+// - Add R. Real translation ignores context. This is controlled externally by Gk.
+// - Remove G bit for tte. Input remains in demap-key/cam-key to allow for disabling
+//   of context match Real Translation  
+// - Final Page Size support - 8KB,64KB,4M,256M
+// - SPARC_HPV_EN has been defined to enable new tlb design support. 
+// Issues : 
+// -Max ptag size is now 28b. Satya, will this help the speed at all. I doubt it !
+
+//=========================================================================================
+//	Miscellaneous
+//=========================================================================================
+   wire clk;
+   assign clk = rclk;
+   
+wire async_reset, sync_reset ;
+assign	async_reset = ~arst_l ; 			// hardware
+assign	sync_reset = (~rst_soft_l & ~rst_tri_en) ;	// software
+
+wire rw_disable ;
+// INNO - wr/rd gated off. Note required as rst_tri_en is
+// asserted, but implemented in addition in schematic.
+assign	rw_disable = ~arst_l | rst_tri_en ;
+
+//=========================================================================================
+// 	Stage Data
+//=========================================================================================
+// Apply address masking
+assign	tlb_cam_key_masked[40:25]
+	= {16{tlb_addr_mask_l}} & 
+		tlb_cam_key[`CAM_VA_47_28_HI:`CAM_VA_47_28_LO+4] ;
+
+// Reconstitute cam data CHANGE : add additional bit for real mapping
+assign	tlb_cam_data[53:13] = tlb_demap ? 
+	tlb_demap_key[40:0] :
+	{tlb_cam_key_masked[40:25],tlb_cam_key[`CAM_VA_47_28_LO+3:0]} ; 
+
+assign tlb_cam_comp_key[26:0] = 
+		tlb_demap ?
+			{tlb_demap_key[32:21], tlb_demap_key[19:14],tlb_demap_key[12:7],
+			tlb_demap_key[5:3]} :
+			{tlb_cam_key_masked[32:25],tlb_cam_key[24:21],
+			tlb_cam_key[19:14],tlb_cam_key[12:7],tlb_cam_key[5:3]} ;
+
+assign	tlb_cam_data[12:0] = tlb_ctxt[12:0] ;
+
+// These signals are flow-thru.
+assign	wr_tte_tag[58:0] 	= tlb_wr_tte_tag[58:0] ;	// CHANGE
+assign	wr_tte_data[42:0] 	= tlb_wr_tte_data[42:0] ;
+
+// CHANGE(SATYA) - Currently the rw_index/rw_index_vld are shared by both reads
+// and writes. However, writes are done in the cycle of broadcast, whereas
+// the reads are done a cycle later, as given in the model(incorrect) 
+// They have to be treated uniformly. To make the model work, I've assumed the read/write 
+// are done in the cycle the valids are broadcast. 
+always @ (posedge clk)
+	begin
+	if (hold)
+		begin
+		cam_pid[2:0]		<= cam_pid[2:0] ;
+		cam_vld_tmp		<= cam_vld_tmp ;
+		cam_data[53:0] 		<= cam_data[53:0] ;
+		demap_other_tmp		<= demap_other_tmp ;
+		demap_auto		<= demap_auto ;
+		demap_all		<= demap_all ;
+		wr_vld_tmp 		<= wr_vld_tmp ;
+		rd_tag 			<= rd_tag ;
+		rd_data			<= rd_data ;
+		rw_index_vld		<= rw_index_vld ;
+		rw_index[5:0]		<= rw_index[5:0] ; 	
+		end
+	else
+		begin
+		cam_pid[2:0]		<= tlb_cam_pid[2:0] ;
+		cam_vld_tmp		<= tlb_cam_vld ;
+		cam_data[53:0] 		<= tlb_cam_data[53:0] ;
+		demap_other_tmp		<= tlb_demap ;
+		demap_auto		<= tlb_demap_auto ;
+		demap_all		<= tlb_demap_all ;
+		wr_vld_tmp 		<= tlb_wr_vld ;
+		rd_tag 			<= tlb_rd_tag_vld ;
+		rd_data			<= tlb_rd_data_vld ;
+		rw_index_vld		<= tlb_rw_index_vld ;
+		rw_index[5:0]		<= tlb_rw_index[5:0] ; 	
+		end
+
+	end
+
+// INNO - gate cam,demap,wr with rst_tri_en.
+reg rst_tri_en_lat;
+
+ always        @ (clk)
+ rst_tri_en_lat = rst_tri_en;
+
+assign	cam_vld = cam_vld_tmp & ~rst_tri_en_lat ;
+assign	demap_other = demap_other_tmp & ~rst_tri_en ;
+assign	wr_vld = wr_vld_tmp & ~rst_tri_en ;
+
+//=========================================================================================
+//	Generate Write Wordlines
+//=========================================================================================
+
+// Based on static rw index	
+// This generates the wordlines for a read/write to the tlb based on index. Wordlines for
+// the write based on replacement alg. are muxed in later.
+always	@ (/*AUTOSENSE*/rd_data or rd_tag or rw_index or rw_index_vld
+           or wr_vld_tmp)
+	begin
+		for (i=0;i<64;i=i+1)
+			if ((rw_index[5:0] == i) & ((wr_vld_tmp & rw_index_vld) | rd_tag | rd_data))
+				rw_wdline[i] = 1'b1 ;
+			else	rw_wdline[i] = 1'b0 ;
+					
+	end
+
+//=========================================================================================
+//	Write TLB
+//=========================================================================================
+
+reg	[58:0]	tmp_tag ;
+reg	[42:0]	tmp_data ;
+
+// Currently TLB_TAG and TLB_DATA RAMs are written in the B phase. 
+// Used bit is set on write in later code as it is also effected by read of tlb.
+always	@ (negedge clk)
+	begin
+		for (j=0;j<64;j=j+1)
+			if (((rw_index_vld & rw_wdline[j]) | (~rw_index_vld & tlb_entry_replace_d2[j])) & wr_vld_tmp & ~rw_disable)
+				begin
+				if (~rst_tri_en)
+					begin
+					tte_tag_ram[j] <= wr_tte_tag[58:0];	// CHANGE
+					tte_data_ram[j] <= wr_tte_data[42:0];
+					//tlb_entry_vld[j] <= wr_tte_tag[`STLB_TAG_V] ;
+					tlb_entry_used[j] <= wr_tte_tag[`STLB_TAG_U] ;
+					tlb_entry_locked[j] = wr_tte_tag[`STLB_TAG_L] ;
+					// write-thru 
+					rd_tte_tag[58:0]  <= wr_tte_tag[58:0] ;	// CHANGE 
+					rd_tte_data[42:0] <=  wr_tte_data[42:0];
+
+					end
+				else
+					begin
+					tmp_tag[58:0]=tte_tag_ram[j]; // use non-blocking
+					tmp_data[42:0]=tte_data_ram[j];
+					// INNO - read wins.
+					rd_tte_tag[58:0] <=	
+					{tmp_tag[58:27], tlb_entry_vld[j],tlb_entry_locked[j], 
+					tlb_entry_used[j], tmp_tag[23:0]}  ;
+					rd_tte_data[42:0] <= {tmp_data[42:12],tmp_data[11:0]} ;
+					end
+			
+			end
+
+//=========================================================================================
+//	Read STLB
+//=========================================================================================
+
+		for (m=0;m<64;m=m+1)
+			if (rw_wdline[m] & (rd_tag | rd_data) & ~rw_disable)
+				begin
+					tmp_tag  = tte_tag_ram[m] ;
+					tmp_data = tte_data_ram[m] ;
+					if (rd_tag)
+						rd_tte_tag[58:0] <=	// CHANGE - Bug 2185
+						{tmp_tag[58:27], tlb_entry_vld[m],tlb_entry_locked[m], 
+						tlb_entry_used[m], tmp_tag[23:0]}  ;
+						//{tmp_tag[58:29], tlb_entry_vld[m],tlb_entry_locked[m], 
+						//tlb_entry_used[m], tmp_tag[25:0]}  ;
+					if (rd_data) begin
+						rd_tte_data[42:0] <= {tmp_data[42:12],tmp_data[11:0]} ;
+					end
+
+				end
+
+		if (cam_vld & ~rw_disable)
+  		begin
+    			//Checking for no hit and multiple hits
+    			sat = 64'd0;
+    			for (w=0;w<64;w=w+1)
+    			begin
+      				if(cam_hit[w])
+      				begin
+        				sat = sat + 64'd1 ;
+      				end
+    			end
+			// Only one hit occur read the data
+    			if(sat == 64'd1)
+    			begin
+                        	for (p=0;p<64;p=p+1)
+				begin
+                                	if (cam_hit[p])
+                                	begin
+                                        	rd_tte_data[42:0] <= tte_data_ram[p] ;
+                                	end
+				end
+			end
+			else
+			begin
+				// INNO - just to keep the tool happy.
+				// ram cell will not be corrupted.
+				for (k=0;k<64;k=k+1)
+				begin
+					if (cam_hit[k])
+                        		tte_data_ram[k] <= 43'bx ;
+				end
+                        	rd_tte_data[42:0] <= 43'bx ;
+			end
+		end
+
+                for (s=0;s<64;s=s+1)
+                        begin
+                                if (cam_hit[s])
+                                        tlb_entry_used[s] <= 1'b1;
+                        end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+                //if (tlb_not_writeable)
+                if (~tlb_writeable & ~cam_vld & ~wr_vld & ~rd_tag & ~rst_tri_en)
+                        begin
+                                for (t=0;t<64;t=t+1)
+                                        begin
+                                                //if (~tlb_entry_locked[t])
+                                                if (~tlb_entry_locked[t] & ~cam_vld & ~wr_vld)
+                                                        tlb_entry_used[t] <= 1'b0;
+                                        end
+                        end
+	end
+
+// Stage to next cycle.
+always	@ (posedge clk)
+	begin
+		tlb_rd_tte_tag[58:0] 	<= rd_tte_tag[58:0] ;	// CHANGE
+		tlb_rd_tte_data[42:0] 	<= rd_tte_data[42:0] ;
+	end
+
+//=========================================================================================
+//	CAM/DEMAP STLB for xlation
+//=========================================================================================
+
+//  no_hit logic does not work because it is set in multiple clock
+//  domains and is reset before ever having a chance to be effective
+//reg	no_hit ;
+
+
+// Demap and CAM operation are mutually exclusive.
+
+always  @ ( negedge clk )
+	begin
+	
+		for (n=0;n<64;n=n+1)
+			begin
+                        /*if (demap_all)  begin
+                                if (demap_auto & demap_other) ademap_hit[n]   =
+                                        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                                & tlb_entry_vld[n]) ;
+                                end
+                        else    begin */
+                                if (demap_auto & demap_other) ademap_hit[n]    =
+                                        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+                                //end
+			end
+
+	end  // always
+
+always	@ (/*AUTOSENSE*/ /*memory or*/ 
+           cam_data or cam_pid or cam_vld or demap_all
+           or demap_other or tlb_entry_vld)
+	begin
+	
+		for (n=0;n<64;n=n+1)
+			begin
+			tag[58:0] = tte_tag_ram[n] ;	// CHANGE
+
+			mismatch_va_b47_28[n] = 
+			(tag[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO] 
+			!= cam_data[`CAM_VA_47_28_HI+13:`CAM_VA_47_28_LO+13]);
+
+			mismatch_va_b27_22[n] = 
+			(tag[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO] 
+			!= cam_data[`CAM_VA_27_22_HI+13:`CAM_VA_27_22_LO+13]);
+
+			mismatch_va_b21_16[n] = 
+			(tag[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO]
+			!= cam_data[`CAM_VA_21_16_HI+13:`CAM_VA_21_16_LO+13]) ;
+
+			mismatch_va_b15_13[n] = 
+			(tag[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO]
+			!= cam_data[`CAM_VA_15_13_HI+13:`CAM_VA_15_13_LO+13]) ;
+
+			mismatch_ctxt[n] = 
+			(tag[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO] 
+			!= cam_data[`CAM_CTXT_12_0_HI:`CAM_CTXT_12_0_LO]) ;
+			
+			mismatch_pid[n] = (tag[`STLB_TAG_PID_HI:`STLB_TAG_PID_LO] != cam_pid[2:0]) ;
+
+			mismatch[n] =
+			(mismatch_va_b47_28[n] & cam_data[`CAM_VA_47_28_V+13]) 				|
+			(mismatch_va_b27_22[n] & tag[`STLB_TAG_VA_27_22_V] & cam_data[`CAM_VA_27_22_V+13]) 	|
+			(mismatch_va_b21_16[n] & tag[`STLB_TAG_VA_21_16_V] & cam_data[`CAM_VA_21_16_V+13]) 	|
+			(mismatch_va_b15_13[n] & tag[`STLB_TAG_VA_15_13_V] & cam_data[`CAM_VA_15_13_V+13]) 	|
+			(mismatch_ctxt[n] & ~cam_data[`CAM_CTXT_GK+13])	|
+			// mismatch is request type not equal to entry type. types are real/virtual.
+			((tag[`STLB_TAG_R] ^ cam_data[`CAM_REAL_V+13]) & ~demap_all)  	| 
+			//(mismatch_real[n] & cam_data[`CAM_REAL_V+13])  	|
+			mismatch_pid[n] ;	// pid always included in mismatch calculations
+
+			demap_all_but_locked_hit[n] = 
+			~tag[`STLB_TAG_L] & demap_all ;
+
+			cam_hit[n] 	= 
+				~mismatch[n] & cam_vld   & tlb_entry_vld[n] ;
+
+                        if (demap_all)  begin
+                                // Satya(10/3) - I've simplified the demap-all equation
+                                // Pls confirm that this is okay. Otherwise we will nee
+                                // qualifying bits for the pid and r fields.
+                                /*demap_hit[n]  =
+                                        (demap_all_but_locked_hit[n] & demap_other) ;*/
+                                demap_hit[n]    =
+                                        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                                & tlb_entry_vld[n]) ;
+				// qualification with demap_auto to prevent ademap_hit from
+				// being cleared. Satya-we could get rid of this.
+                                // ademap_hit[n] is a phase A device and needs to be in a clocked always block
+                                //if (demap_auto & demap_other & clk) ademap_hit[n]   =
+                                //        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                //                & tlb_entry_vld[n]) ;
+                                end
+                        else    begin
+                                demap_hit[n]    =
+                                        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+				// qualification with demap_auto to prevent ademap_hit from
+				// being cleared. Satya-this is the only one we need.
+                                //if (demap_auto & demap_other & clk) ademap_hit[n]    =
+                                //        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+                                end
+//			no_hit = cam_vld ;
+			end
+
+	end  // always
+
+assign	tlb_cam_hit = |cam_hit[63:0] ;
+
+// Read on CAM hit occurs on negedge.
+/* MOVED TO COMMON ALWAYS BLOCK
+always @ (negedge clk)
+	begin
+		if (|cam_hit[63:0])	
+			begin
+			for (p=0;p<64;p=p+1)
+				if (cam_hit[p])	
+				begin
+					rd_tte_data[42:0] <= tte_data_ram[p] ;
+				end
+//				no_hit = 1'b0 ;
+			end
+//		else	if (no_hit) begin
+//			rd_tte_data[42:0] <= {43{1'bx}};
+//			no_hit = 1'b0 ;
+//			end
+	end
+*/
+// Change tlb_entry_vld handling for multi-threaded tlb writes.
+// A write is always preceeded by an autodemap. The intent is to make the result of autodemap
+// (clearing of vld bit if hit) invisible until write occurs. In the same cycle that the write
+// occurs, the vld bit for an entry will be cleared if there is an autodemap hit. The write
+// and admp action may even be to same entry. The write must dominate. There is no need to
+// clear the dmp latches after the write/clear has occurred as the subsequent admp will set
+// up new state in the latches.
+
+// Define valid bit based on write/demap/reset. 
+always @ (negedge clk)
+	begin
+	for (r=0;r<64;r=r+1)
+	begin // for
+	if (((rw_index_vld & rw_wdline[r]) | (~rw_index_vld & tlb_entry_replace_d2[r])) & 
+		wr_vld & ~rw_disable)
+			tlb_entry_vld[r] <= wr_tte_tag[`STLB_TAG_V] ;	// write
+	else	begin
+		if (ademap_hit[r] & wr_vld)			// autodemap specifically
+			tlb_entry_vld[r] <= 1'b0 ;		
+		end
+	  if ((demap_hit[r] & ~demap_auto) | sync_reset)	// non-auto-demap, reset
+			tlb_entry_vld[r] <= 1'b0 ;	
+	end // for
+	end
+
+
+// async reset.
+always  @ (async_reset) 
+	begin
+	for (l=0;l<64;l=l+1)
+		begin
+	  	tlb_entry_vld[l] <= 1'b0 ;
+		end
+	end
+
+//=========================================================================================
+//	TAG COMPARISON
+//=========================================================================================
+
+reg [30:0] va_tag_plus ;
+
+// Stage to m
+always @(posedge clk)
+		begin
+		// INNO - add hold to this input
+		if (hold)
+			va_tag_plus[30:0] <= va_tag_plus[30:0] ;
+		else
+			va_tag_plus[30:0] 
+			<= {tlb_cam_comp_key[26:0],tlb_bypass_va[12:10],tlb_bypass}; 
+		end
+			
+assign vrtl_pgnum_m[29:0] = va_tag_plus[30:1] ;
+assign bypass = va_tag_plus[0] ;
+
+// Mux to bypass va or form pa tag based on tte-data.
+
+assign	phy_pgnum_m[29:3] = 
+	{rd_tte_data[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+		rd_tte_data[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+			rd_tte_data[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+				rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO]};
+
+// Derive the tlb-based physical address.
+assign pgnum_m[2:0] = vrtl_pgnum_m[2:0];
+assign pgnum_m[5:3] = (~rd_tte_data[`STLB_DATA_15_13_SEL] & ~bypass)
+				? phy_pgnum_m[5:3] : vrtl_pgnum_m[5:3] ;
+assign pgnum_m[11:6] = (~rd_tte_data[`STLB_DATA_21_16_SEL] & ~bypass)  
+				? phy_pgnum_m[11:6] : vrtl_pgnum_m[11:6] ;
+assign pgnum_m[17:12] = (~rd_tte_data[`STLB_DATA_27_22_SEL] & ~bypass)
+				? phy_pgnum_m[17:12] : vrtl_pgnum_m[17:12] ;
+assign pgnum_m[29:18] = ~bypass ? phy_pgnum_m[29:18] : vrtl_pgnum_m[29:18];
+
+// Stage to g
+// Flop tags in tlb itself and do comparison immediately after rising edge.
+// Similarly stage va/pa tag to g
+always @(posedge clk)
+		begin
+			pgnum_g[29:0] <= pgnum_m[29:0];
+			// rm hold on these inputs.
+			cache_set_vld_g[3:0]  	<= cache_set_vld[3:0] ;
+			cache_ptag_w0_g[29:0] 	<= cache_ptag_w0[29:0] ;
+			cache_ptag_w1_g[29:0] 	<= cache_ptag_w1[29:0] ;
+			cache_ptag_w2_g[29:0] 	<= cache_ptag_w2[29:0] ;
+			cache_ptag_w3_g[29:0] 	<= cache_ptag_w3[29:0] ;
+		end
+
+
+// Need to stage by a cycle where used.
+assign	tlb_pgnum[39:10] = pgnum_g[29:0] ;
+// Same cycle as cam - meant for one load on critical path
+assign	tlb_pgnum_crit[39:10] = pgnum_m[29:0] ;
+
+
+assign	cache_way_hit[0] = 
+	(cache_ptag_w0_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[0];
+assign	cache_way_hit[1] = 
+	(cache_ptag_w1_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[1];
+assign	cache_way_hit[2] = 
+	(cache_ptag_w2_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[2];
+assign	cache_way_hit[3] = 
+	(cache_ptag_w3_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[3];
+
+assign	cache_hit = |cache_way_hit[3:0];
+
+
+//=========================================================================================
+//	TLB ENTRY REPLACEMENT
+//=========================================================================================
+
+// A single Used bit is used to track the replacement state of each entry.
+// Only an unused entry can be replaced.
+// An Unused entry is :
+//			- an invalid entry
+//			- a valid entry which has had its Used bit cleared.
+//				- on write of a valid entry, the Used bit is set.
+//				- The Used bit of a valid entry is cleared if all
+//				entries have their Used bits set and the entry itself is not Locked.
+// A locked entry should always appear to be Used.
+// A single priority-encoder is required to evaluate the used status. Priority is static
+// and used entry0 is of the highest priority if unused.
+
+// Timing :
+// Used bit gets updated by cam-hit or hit on negedge.
+// After Used bit gets updated off negedge, the replacement entry can be generated in
+// Phase2. In parallel, it is determined whether all Used bits are set or not. If
+// so, then they are cleared on the next negedge with the replacement entry generated
+// in the related Phase1 
+
+// Choosing replacement entry
+// Replacement entry is integer k
+
+assign	tlb_not_writeable = &used[63:0] ;
+/*
+// Used bit can be set because of write or because of cam-hit.
+always @(negedge clk)
+	begin
+		for (s=0;s<64;s=s+1)
+			begin
+				if (cam_hit[s]) 
+					tlb_entry_used[s] <= 1'b1;			
+			end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+		if (tlb_not_writeable)
+			begin
+				for (t=0;t<64;t=t+1)
+					begin
+						if (~tlb_entry_locked[t])
+							tlb_entry_used[t] <= 1'b0;
+					end
+			end
+	end
+*/
+
+// Determine whether entry should be squashed.
+
+assign	used[63:0] = tlb_entry_used[63:0] & tlb_entry_vld[63:0] ;
+
+/*assign squash[0] = 1'b0 ;
+assign squash[1] = ~used[0] ;
+assign squash[2] = |(~used[1:0]) ;
+assign squash[3] = |(~used[2:0]) ;
+assign squash[4] = |(~used[3:0]) ;
+assign squash[5] = |(~used[4:0]) ;
+assign squash[6] = |(~used[5:0]) ;
+assign squash[7] = |(~used[6:0]) ;
+assign squash[8] = |(~used[7:0]) ;
+assign squash[9] = |(~used[8:0]) ;
+assign squash[10] = |(~used[9:0]) ;
+assign squash[11] = |(~used[10:0]) ;
+assign squash[12] = |(~used[11:0]) ;
+assign squash[13] = |(~used[12:0]) ;
+assign squash[14] = |(~used[13:0]) ;
+assign squash[15] = |(~used[14:0]) ;
+assign squash[16] = |(~used[15:0]) ;
+assign squash[17] = |(~used[16:0]) ;
+assign squash[18] = |(~used[17:0]) ;
+assign squash[19] = |(~used[18:0]) ;
+assign squash[20] = |(~used[19:0]) ;
+assign squash[21] = |(~used[20:0]) ;
+assign squash[22] = |(~used[21:0]) ;
+assign squash[23] = |(~used[22:0]) ;
+assign squash[24] = |(~used[23:0]) ;
+assign squash[25] = |(~used[24:0]) ;
+assign squash[26] = |(~used[25:0]) ;
+assign squash[27] = |(~used[26:0]) ;
+assign squash[28] = |(~used[27:0]) ;
+assign squash[29] = |(~used[28:0]) ;
+assign squash[30] = |(~used[29:0]) ;
+assign squash[31] = |(~used[30:0]) ;
+assign squash[32] = |(~used[31:0]) ;
+assign squash[33] = |(~used[32:0]) ;
+assign squash[34] = |(~used[33:0]) ;
+assign squash[35] = |(~used[34:0]) ;
+assign squash[36] = |(~used[35:0]) ;
+assign squash[37] = |(~used[36:0]) ;
+assign squash[38] = |(~used[37:0]) ;
+assign squash[39] = |(~used[38:0]) ;
+assign squash[40] = |(~used[39:0]) ;
+assign squash[41] = |(~used[40:0]) ;
+assign squash[42] = |(~used[41:0]) ;
+assign squash[43] = |(~used[42:0]) ;
+assign squash[44] = |(~used[43:0]) ;
+assign squash[45] = |(~used[44:0]) ;
+assign squash[46] = |(~used[45:0]) ;
+assign squash[47] = |(~used[46:0]) ;
+assign squash[48] = |(~used[47:0]) ;
+assign squash[49] = |(~used[48:0]) ;
+assign squash[50] = |(~used[49:0]) ;
+assign squash[51] = |(~used[50:0]) ;
+assign squash[52] = |(~used[51:0]) ;
+assign squash[53] = |(~used[52:0]) ;
+assign squash[54] = |(~used[53:0]) ;
+assign squash[55] = |(~used[54:0]) ;
+assign squash[56] = |(~used[55:0]) ;
+assign squash[57] = |(~used[56:0]) ;
+assign squash[58] = |(~used[57:0]) ;
+assign squash[59] = |(~used[58:0]) ;
+assign squash[60] = |(~used[59:0]) ;
+assign squash[61] = |(~used[60:0]) ;
+assign squash[62] = |(~used[61:0]) ;
+assign squash[63] = |(~used[62:0]) ; */
+
+// Based on updated Used state, generate replacement entry.
+// So, replacement entries can be generated on a cycle-by-cycle basis. 
+//always @(/*AUTOSENSE*/squash or used)
+
+	reg	[63:0]	tlb_entry_replace_d1;
+	reg		tlb_replace_flag;
+	always @(/*AUTOSENSE*/used)
+	begin
+  	  tlb_replace_flag=1'b0;
+  	  tlb_entry_replace_d1 = 64'b0;
+  	  // Priority is given to entry0
+   	  for (u=0;u<64;u=u+1)
+  	  begin
+    	    if(~tlb_replace_flag & ~used[u])
+    	    begin
+      	      tlb_entry_replace_d1[u] = ~used[u] ;
+      	      tlb_replace_flag=1'b1; 
+    	    end
+  	  end
+  	  if(~tlb_replace_flag) begin
+      	     tlb_entry_replace_d1[63] = 1'b1;
+ 	  end
+	end
+	always @(posedge clk)
+	begin
+	  // named in this manner to keep arch model happy.
+  	  tlb_entry_replace <= tlb_entry_replace_d1 ;
+	end
+	// INNO - 2 stage delay before update is visible
+	always @(posedge clk)
+	begin
+  	  tlb_entry_replace_d2 <= tlb_entry_replace ;
+	end
+
+//=========================================================================================
+//	TLB WRITEABLE DETECTION
+//=========================================================================================
+
+// 2-cycles later, tlb become writeable
+always @(posedge clk)
+	begin
+		tlb_not_writeable_d1 <= tlb_not_writeable ;
+	end
+
+always @(posedge clk)
+	begin
+		tlb_writeable <= ~tlb_not_writeable_d1 ;
+	end
+
+endmodule
+
+`endif
+
Index: /trunk/T1-common/srams/regfile_1w_4r.v
===================================================================
--- /trunk/T1-common/srams/regfile_1w_4r.v	(revision 6)
+++ /trunk/T1-common/srams/regfile_1w_4r.v	(revision 6)
@@ -0,0 +1,91 @@
+module regfile_1w_4r(
+   input clk,
+   
+   input  [71:0] din,
+   input  [ 7:0] wraddr,
+   input         wren,
+   input  [ 7:0] rdaddr0,
+   input  [ 7:0] rdaddr1,
+   input  [ 7:0] rdaddr2,
+   input  [ 7:0] rdaddr3,
+   input         rd0,
+   input         rd1,
+   input         rd2,
+   input         rd3,
+
+   output [71:0] dout0,
+   output [71:0] dout1,
+   output [71:0] dout2,
+   output [71:0] dout3
+);
+
+reg [7:0] rdaddr0_d;
+reg [7:0] rdaddr1_d;
+reg [7:0] rdaddr2_d;
+reg [7:0] rdaddr3_d;
+reg       rd0_d;
+reg       rd1_d;
+reg       rd2_d;
+reg       rd3_d;
+
+always @(posedge clk)
+   begin
+      rdaddr0_d<=rdaddr0;
+      rdaddr1_d<=rdaddr1;
+      rdaddr2_d<=rdaddr2;
+      rdaddr3_d<=rdaddr3;
+      rd0_d<=rd0;
+      rd1_d<=rd1;
+      rd2_d<=rd2;
+      rd3_d<=rd3;
+   end
+
+regfile1 regfile_inst0(
+   .wrclock(clk),
+   .rdclock(~clk),
+  
+   .data(din),
+   .rdaddress(rdaddr0_d),
+   .rden(rd0_d),
+   .wraddress(wraddr),
+   .wren(wren),
+   .q(dout0)
+);
+
+regfile1 regfile_inst1(
+   .wrclock(clk),
+   .rdclock(~clk),
+  
+   .data(din),
+   .rdaddress(rdaddr1_d),
+   .rden(rd1_d),
+   .wraddress(wraddr),
+   .wren(wren),
+   .q(dout1)
+);
+
+regfile1 regfile_inst2(
+   .wrclock(clk),
+   .rdclock(~clk),
+  
+   .data(din),
+   .rdaddress(rdaddr2_d),
+   .rden(rd2_d),
+   .wraddress(wraddr),
+   .wren(wren),
+   .q(dout2)
+);
+
+regfile1 regfile_inst3(
+   .wrclock(clk),
+   .rdclock(~clk),
+  
+   .data(din),
+   .rdaddress(rdaddr3_d),
+   .rden(rd3_d),
+   .wraddress(wraddr),
+   .wren(wren),
+   .q(dout3)
+);
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_irf.v
===================================================================
--- /trunk/T1-common/srams/bw_r_irf.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_irf.v	(revision 6)
@@ -0,0 +1,2630 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_irf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name: bw_r_irf
+//	Description: Register file with 3 read ports and 2 write ports.  Has 
+//				32 registers per thread with 4 threads.  Reading and writing
+//				the same register concurrently produces x.
+*/
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_IRF
+`endif
+
+
+`ifdef FPGA_SYN_IRF
+`ifdef FPGA_SYN_1THREAD
+module bw_r_irf (/*AUTOARG*/
+   // Outputs
+   so, irf_byp_rs1_data_d_l, irf_byp_rs2_data_d_l, 
+   irf_byp_rs3_data_d_l, irf_byp_rs3h_data_d_l, 
+   // Inputs
+   rclk, reset_l, si, se, sehold, rst_tri_en, ifu_exu_tid_s2, 
+   ifu_exu_rs1_s, ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_exu_ren1_s, 
+   ifu_exu_ren2_s, ifu_exu_ren3_s, ecl_irf_wen_w, ecl_irf_wen_w2, 
+   ecl_irf_rd_m, ecl_irf_rd_g, byp_irf_rd_data_w, byp_irf_rd_data_w2, 
+   ecl_irf_tid_m, ecl_irf_tid_g, rml_irf_old_lo_cwp_e, 
+   rml_irf_new_lo_cwp_e, rml_irf_old_e_cwp_e, rml_irf_new_e_cwp_e, 
+   rml_irf_swap_even_e, rml_irf_swap_odd_e, rml_irf_swap_local_e, 
+   rml_irf_kill_restore_w, rml_irf_cwpswap_tid_e, rml_irf_old_agp, 
+   rml_irf_new_agp, rml_irf_swap_global, rml_irf_global_tid
+   ) ;
+   input rclk;
+   input reset_l;
+   input si;
+   input se;
+   input sehold;
+   input rst_tri_en;
+   input [1:0]  ifu_exu_tid_s2;  // s stage thread
+   input [4:0]  ifu_exu_rs1_s;  // source addresses
+   input [4:0]  ifu_exu_rs2_s;
+   input [4:0]  ifu_exu_rs3_s;
+   input ifu_exu_ren1_s;        // read enables for all 3 ports
+   input ifu_exu_ren2_s;
+   input ifu_exu_ren3_s;
+   input ecl_irf_wen_w;        // write enables for both write ports
+   input ecl_irf_wen_w2;
+   input [4:0]  ecl_irf_rd_m;   // w destination
+   input [4:0]  ecl_irf_rd_g;  // w2 destination
+   input [71:0] byp_irf_rd_data_w;// write data from w1
+   input [71:0] byp_irf_rd_data_w2;     // write data from w2
+   input [1:0]  ecl_irf_tid_m;  // w stage thread
+   input [1:0]  ecl_irf_tid_g; // w2 thread
+
+   input [2:0]  rml_irf_old_lo_cwp_e;  // current window pointer for locals and odds
+   input [2:0]  rml_irf_new_lo_cwp_e;  // target window pointer for locals and odds
+   input [2:1]  rml_irf_old_e_cwp_e;  // current window pointer for evens
+   input [2:1]  rml_irf_new_e_cwp_e;  // target window pointer for evens
+   input        rml_irf_swap_even_e;
+   input        rml_irf_swap_odd_e;
+   input        rml_irf_swap_local_e;
+   input        rml_irf_kill_restore_w;
+   input [1:0]  rml_irf_cwpswap_tid_e;
+
+   input [1:0]  rml_irf_old_agp; // alternate global pointer
+   input [1:0]  rml_irf_new_agp; // alternate global pointer
+   input        rml_irf_swap_global;
+   input [1:0]  rml_irf_global_tid;
+   
+   output       so;
+   output [71:0] irf_byp_rs1_data_d_l;
+   output [71:0] irf_byp_rs2_data_d_l;
+   output [71:0] irf_byp_rs3_data_d_l;
+   output [31:0] irf_byp_rs3h_data_d_l;
+
+   wire [71:0] irf_byp_rs1_data_d;
+   wire [71:0] irf_byp_rs2_data_d;
+   wire [71:0] irf_byp_rs3_data_d;
+   wire [71:0] irf_byp_rs3h_data_d;
+
+   wire [1:0]  ecl_irf_tid_w;  // w stage thread
+   wire [1:0]  ecl_irf_tid_w2; // w2 thread
+   wire [4:0]  ecl_irf_rd_w;   // w destination
+   wire [4:0]  ecl_irf_rd_w2;  // w2 destination
+   wire [1:0]  ifu_exu_thr_d;  // d stage thread
+   wire ifu_exu_ren1_d;        // read enables for all 3 ports
+   wire ifu_exu_ren2_d;
+   wire ifu_exu_ren3_d;
+   wire [4:0]  ifu_exu_rs1_d;  // source addresses
+   wire [4:0]  ifu_exu_rs2_d;
+   wire [4:0]  ifu_exu_rs3_d;
+   wire [6:0]    thr_rs1;       // these 5 are a combination of the thr and reg
+   wire [6:0]    thr_rs2;       // so that comparison can be done more easily
+   wire [6:0]    thr_rs3;
+   wire [6:0]    thr_rs3h;
+   wire [6:0]    thr_rd_w;
+   wire [6:0]    thr_rd_w2;
+
+   reg [1:0] cwpswap_tid_m;
+   reg [1:0] cwpswap_tid_w;
+   reg [2:0] old_lo_cwp_m;
+   reg [2:0] new_lo_cwp_m;
+   reg [2:0] new_lo_cwp_w;
+   reg [1:0] old_e_cwp_m;
+   reg [1:0] new_e_cwp_m;
+   reg [1:0] new_e_cwp_w;
+   reg       swap_local_m;
+   reg       swap_local_w;
+   reg       swap_even_m;
+   reg       swap_even_w;
+   reg       swap_odd_m;
+   reg       swap_odd_w;
+   reg       kill_restore_d1;
+   reg        swap_global_d1;
+   reg        swap_global_d2;
+   reg [1:0]  global_tid_d1;
+   reg [1:0]  global_tid_d2;
+   reg [1:0] old_agp_d1,
+             new_agp_d1,
+             new_agp_d2;
+
+`ifdef FPGA_SYN_SAVE_BRAM
+   wire [71:0] active_win_thr_rd_w_neg;
+   wire [71:0] active_win_thr_rd_w2_neg;
+   wire [6:0]  thr_rd_w_neg;
+   wire [6:0]  thr_rd_w2_neg;
+   wire        active_win_thr_rd_w_neg_wr_en;
+   wire        active_win_thr_rd_w2_neg_wr_en;
+   wire        rst_tri_en_neg;
+`else
+   reg [71:0] active_win_thr_rd_w_neg;
+   reg [71:0] active_win_thr_rd_w2_neg;
+   reg [6:0]  thr_rd_w_neg;
+   reg [6:0]  thr_rd_w2_neg;
+   reg        active_win_thr_rd_w_neg_wr_en;
+   reg        active_win_thr_rd_w2_neg_wr_en;
+   reg        rst_tri_en_neg;
+`endif
+   
+   wire          se;
+   wire          clk;
+//   assign        clk = rclk & reset_l;
+   assign 	 clk = rclk;
+   
+   wire          ren1_s;
+   wire          ren2_s;
+   wire          ren3_s;
+   wire [4:0]    rs1_s;
+   wire [4:0]    rs2_s;
+   wire [4:0]    rs3_s;
+   wire [1:0]    tid_s;
+   wire [1:0]    tid_g;
+   wire [1:0]    tid_m;
+   wire [4:0]    rd_m;
+   wire [4:0]    rd_g;
+   wire          kill_restore_w;
+   wire          swap_global_d1_vld;
+   wire          swap_local_m_vld;
+   wire          swap_even_m_vld;
+   wire          swap_odd_m_vld;
+
+   assign {ren1_s,ren2_s,ren3_s,rs1_s[4:0],rs2_s[4:0],rs3_s[4:0],tid_s[1:0],tid_g[1:0],tid_m[1:0],
+           rd_m[4:0], rd_g[4:0]} = (sehold)?
+          {ifu_exu_ren1_d,ifu_exu_ren2_d,ifu_exu_ren3_d,ifu_exu_rs1_d[4:0],ifu_exu_rs2_d[4:0],
+           ifu_exu_rs3_d[4:0],ifu_exu_thr_d[1:0],ecl_irf_tid_w2[1:0],ecl_irf_tid_w[1:0],
+           ecl_irf_rd_w[4:0],ecl_irf_rd_w2[4:0]}:
+          {ifu_exu_ren1_s,ifu_exu_ren2_s,ifu_exu_ren3_s,ifu_exu_rs1_s[4:0],ifu_exu_rs2_s[4:0],
+           ifu_exu_rs3_s[4:0],ifu_exu_tid_s2[1:0],ecl_irf_tid_g[1:0],ecl_irf_tid_m[1:0],
+           ecl_irf_rd_m[4:0],ecl_irf_rd_g[4:0]};
+   // Pipeline flops for irf control signals
+   dff_s dff_ren1_s2d(.din(ren1_s), .clk(clk), .q(ifu_exu_ren1_d), .se(se),
+                    .si(), .so());
+   dff_s dff_ren2_s2d(.din(ren2_s), .clk(clk), .q(ifu_exu_ren2_d), .se(se),
+                    .si(), .so());
+   dff_s dff_ren3_s2d(.din(ren3_s), .clk(clk), .q(ifu_exu_ren3_d), .se(se),
+                    .si(), .so());
+   dff_s #5 dff_rs1_s2d(.din(rs1_s[4:0]), .clk(clk), .q(ifu_exu_rs1_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rs2_s2d(.din(rs2_s[4:0]), .clk(clk), .q(ifu_exu_rs2_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rs3_s2d(.din(rs3_s[4:0]), .clk(clk), .q(ifu_exu_rs3_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_s2d(.din(tid_s[1:0]), .clk(clk), .q(ifu_exu_thr_d[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_g2w2(.din(tid_g[1:0]), .clk(clk), .q(ecl_irf_tid_w2[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_m2w(.din(tid_m[1:0]), .clk(clk), .q(ecl_irf_tid_w[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rd_m2w(.din(rd_m[4:0]), .clk(clk), .q(ecl_irf_rd_w[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rd_g2w2(.din(rd_g[4:0]), .clk(clk), .q(ecl_irf_rd_w2[4:0]), .se(se),
+                      .si(),.so());
+   
+   // Concatenate the thread and rs1/rd bits together
+   assign        thr_rs1[6:0] = {ifu_exu_thr_d, ifu_exu_rs1_d};
+   assign        thr_rs2[6:0] = {ifu_exu_thr_d, ifu_exu_rs2_d};
+   assign        thr_rs3[6:0] = {ifu_exu_thr_d, ifu_exu_rs3_d[4:0]};
+   assign        thr_rs3h[6:0] = {ifu_exu_thr_d[1:0], ifu_exu_rs3_d[4:1], 1'b1};
+   assign        thr_rd_w[6:0] = {ecl_irf_tid_w, ecl_irf_rd_w};
+   assign        thr_rd_w2[6:0] = {ecl_irf_tid_w2, ecl_irf_rd_w2};
+
+   // Active low outputs
+   assign        irf_byp_rs1_data_d_l[71:0] = ~irf_byp_rs1_data_d[71:0];
+   assign        irf_byp_rs2_data_d_l[71:0] = ~irf_byp_rs2_data_d[71:0];
+   assign        irf_byp_rs3_data_d_l[71:0] = ~irf_byp_rs3_data_d[71:0]; 
+   assign        irf_byp_rs3h_data_d_l[31:0] = ~irf_byp_rs3h_data_d[31:0];
+   
+/////////////////////////////////////////////////////////////////
+///  Write ports
+////////////////////////////////////////////////////////////////
+   // This is a latch that works if both wen is high and clk is low
+
+`ifdef FPGA_SYN_SAVE_BRAM
+  assign rst_tri_en_neg = rst_tri_en;
+  assign active_win_thr_rd_w_neg = byp_irf_rd_data_w;
+  assign active_win_thr_rd_w2_neg = byp_irf_rd_data_w2;
+  assign thr_rd_w_neg = thr_rd_w;
+  assign thr_rd_w2_neg = thr_rd_w2;
+  assign active_win_thr_rd_w_neg_wr_en = ecl_irf_wen_w & (thr_rd_w[4:0] != 5'b0);
+  assign active_win_thr_rd_w2_neg_wr_en = ecl_irf_wen_w2 & (thr_rd_w2[4:0] != 5'b0);
+`else
+
+   always @(negedge clk) begin
+      rst_tri_en_neg <= rst_tri_en;
+      // write conflict results in X written to destination
+      if (ecl_irf_wen_w & ecl_irf_wen_w2 & (thr_rd_w[6:0] == thr_rd_w2[6:0])) begin
+         active_win_thr_rd_w_neg <= {72{1'bx}};
+         thr_rd_w_neg <= thr_rd_w;
+         active_win_thr_rd_w_neg_wr_en <= 1'b1;
+         active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+      end
+      else begin
+         // W1 write port
+         if (ecl_irf_wen_w & (thr_rd_w[4:0] != 5'b0)) begin
+            active_win_thr_rd_w_neg <= byp_irf_rd_data_w;
+            thr_rd_w_neg <= thr_rd_w;
+            active_win_thr_rd_w_neg_wr_en <= 1'b1;
+         end
+         else
+           active_win_thr_rd_w_neg_wr_en <= 1'b0;
+         
+         // W2 write port
+         if (ecl_irf_wen_w2 & (thr_rd_w2[4:0] != 5'b0)) begin
+            active_win_thr_rd_w2_neg <= byp_irf_rd_data_w2;
+            thr_rd_w2_neg <= thr_rd_w2;
+            active_win_thr_rd_w2_neg_wr_en <= 1'b1;
+         end
+         else
+           active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+      end
+   end
+
+`endif
+   
+
+
+/* MOVED TO CMP ENVIRONMENT
+   initial begin
+      // Hardcode R0 to zero
+      active_window[{2'b00, 5'b00000}] = 72'b0;
+      active_window[{2'b01, 5'b00000}] = 72'b0;
+      active_window[{2'b10, 5'b00000}] = 72'b0;
+      active_window[{2'b11, 5'b00000}] = 72'b0;
+   end
+*/
+   //////////////////////////////////////////////////
+   // Window management logic
+   //////////////////////////////////////////////////
+   // Pipeline flops for control signals
+
+   // cwp swap signals
+   assign kill_restore_w = (sehold)? kill_restore_d1: rml_irf_kill_restore_w;
+   assign swap_local_m_vld = swap_local_m & ~rst_tri_en;
+   assign swap_odd_m_vld = swap_odd_m & ~rst_tri_en;
+   assign swap_even_m_vld = swap_even_m & ~rst_tri_en;
+   assign swap_global_d1_vld = swap_global_d1 & ~rst_tri_en;
+   
+   always @ (posedge clk) begin
+      cwpswap_tid_m[1:0] <= (sehold)? cwpswap_tid_m[1:0]: rml_irf_cwpswap_tid_e[1:0];
+      cwpswap_tid_w[1:0] <= cwpswap_tid_m[1:0];
+      old_lo_cwp_m[2:0] <= (sehold)? old_lo_cwp_m[2:0]: rml_irf_old_lo_cwp_e[2:0];
+      new_lo_cwp_m[2:0] <= (sehold)? new_lo_cwp_m[2:0]: rml_irf_new_lo_cwp_e[2:0];
+      new_lo_cwp_w[2:0] <= new_lo_cwp_m[2:0];
+      old_e_cwp_m[1:0] <= (sehold)? old_e_cwp_m[1:0]: rml_irf_old_e_cwp_e[2:1];
+      new_e_cwp_m[1:0] <= (sehold)? new_e_cwp_m[1:0]: rml_irf_new_e_cwp_e[2:1];
+      new_e_cwp_w[1:0] <= new_e_cwp_m[1:0];
+      swap_local_m <= (sehold)? swap_local_m & rst_tri_en: rml_irf_swap_local_e;
+      swap_local_w <= swap_local_m_vld;
+      swap_odd_m <= (sehold)? swap_odd_m & rst_tri_en: rml_irf_swap_odd_e;
+      swap_odd_w <= swap_odd_m_vld;
+      swap_even_m <= (sehold)? swap_even_m & rst_tri_en: rml_irf_swap_even_e;
+      swap_even_w <= swap_even_m_vld;
+      kill_restore_d1 <= kill_restore_w;
+   end  
+   // global swap signals    
+   always @ (posedge clk) begin
+      swap_global_d1 <= (sehold)? swap_global_d1 & rst_tri_en: rml_irf_swap_global;
+      swap_global_d2 <= swap_global_d1_vld;
+      global_tid_d1[1:0] <= (sehold)? global_tid_d1[1:0]: rml_irf_global_tid[1:0];
+      global_tid_d2[1:0] <= global_tid_d1[1:0];
+      old_agp_d1[1:0] <= (sehold)? old_agp_d1[1:0]: rml_irf_old_agp[1:0];
+      new_agp_d1[1:0] <= (sehold)? new_agp_d1[1:0]: rml_irf_new_agp[1:0];
+      new_agp_d2[1:0] <= new_agp_d1[1:0];
+   end
+
+  wire wr_en  = active_win_thr_rd_w_neg_wr_en & (~rst_tri_en | ~rst_tri_en_neg);
+  wire wr_en2 = active_win_thr_rd_w2_neg_wr_en & (~rst_tri_en | ~rst_tri_en_neg);
+
+// synthesis translate_off
+  always @(posedge clk) begin
+    if(wr_en) 
+      $display("Write Port 1: %h %h", active_win_thr_rd_w_neg, thr_rd_w_neg );
+    if(wr_en2) 
+      $display("Write Port 2: %h %h", active_win_thr_rd_w2_neg, thr_rd_w2_neg );
+    if(ifu_exu_ren1_d) begin
+      @(posedge clk);
+      $display("Read Port 1: %h %h", irf_byp_rs1_data_d, thr_rs1);
+    end
+    if(ifu_exu_ren2_d) begin
+      @(posedge clk);
+      $display("Read Port 2: %h %h", irf_byp_rs2_data_d, thr_rs2);
+    end
+    if(ifu_exu_ren3_d) begin
+      @(posedge clk);
+      $display("Read Port 3: %h %h", irf_byp_rs3_data_d, thr_rs3);
+    end
+  end
+//synthesis translate_on
+   
+bw_r_irf_core bw_r_irf_core (
+        .clk			(clk),
+        .ifu_exu_ren1_d		(ifu_exu_ren1_d),
+        .ifu_exu_ren2_d		(ifu_exu_ren2_d),
+        .ifu_exu_ren3_d		(ifu_exu_ren3_d),
+        .thr_rs1		(thr_rs1),
+        .thr_rs2		(thr_rs2),
+        .thr_rs3		(thr_rs3),
+        .thr_rs3h		(thr_rs3h),
+        .irf_byp_rs1_data_d	(irf_byp_rs1_data_d),
+        .irf_byp_rs2_data_d	(irf_byp_rs2_data_d),
+        .irf_byp_rs3_data_d	(irf_byp_rs3_data_d),
+        .irf_byp_rs3h_data_d	(irf_byp_rs3h_data_d),
+        .wr_en			(wr_en),
+        .wr_en2			(wr_en2),
+        .active_win_thr_rd_w_neg(active_win_thr_rd_w_neg),
+        .active_win_thr_rd_w2_neg(active_win_thr_rd_w2_neg),
+        .thr_rd_w_neg		(thr_rd_w_neg),
+        .thr_rd_w2_neg		(thr_rd_w2_neg),
+        .swap_global_d1_vld	(swap_global_d1_vld),
+        .swap_global_d2		(swap_global_d2),
+        .global_tid_d1		(global_tid_d1),
+        .global_tid_d2		(global_tid_d2),
+        .old_agp_d1		(old_agp_d1),
+        .new_agp_d2		(new_agp_d2),
+        .swap_local_m_vld	(swap_local_m_vld),
+        .swap_local_w		(swap_local_w),
+        .old_lo_cwp_m		(old_lo_cwp_m),
+        .new_lo_cwp_w		(new_lo_cwp_w),
+        .swap_even_m_vld	(swap_even_m_vld),
+        .swap_even_w		(swap_even_w),
+        .old_e_cwp_m		(old_e_cwp_m),
+        .new_e_cwp_w		(new_e_cwp_w),
+        .swap_odd_m_vld		(swap_odd_m_vld),
+        .swap_odd_w		(swap_odd_w),
+        .cwpswap_tid_m		(cwpswap_tid_m),
+        .cwpswap_tid_w		(cwpswap_tid_w),
+        .kill_restore_w		(kill_restore_w)
+	);
+
+endmodule // bw_r_irf
+
+module bw_r_irf_core(
+	clk,
+	ifu_exu_ren1_d,
+	ifu_exu_ren2_d,
+	ifu_exu_ren3_d,
+	thr_rs1,       
+	thr_rs2,       
+	thr_rs3,
+	thr_rs3h,
+	irf_byp_rs1_data_d,
+	irf_byp_rs2_data_d,
+	irf_byp_rs3_data_d,
+	irf_byp_rs3h_data_d,
+	wr_en,
+	wr_en2,
+	active_win_thr_rd_w_neg,
+	active_win_thr_rd_w2_neg,
+	thr_rd_w_neg,
+	thr_rd_w2_neg,
+	swap_global_d1_vld,
+	swap_global_d2,
+	global_tid_d1,
+	global_tid_d2,
+	old_agp_d1,
+	new_agp_d2,
+	swap_local_m_vld,
+	swap_local_w,
+	old_lo_cwp_m,
+	new_lo_cwp_w,
+	swap_even_m_vld,
+	swap_even_w,
+	old_e_cwp_m,
+	new_e_cwp_w,
+	swap_odd_m_vld,
+	swap_odd_w,
+	cwpswap_tid_m,
+	cwpswap_tid_w,
+	kill_restore_w);
+
+
+	input		clk;
+	input		ifu_exu_ren1_d;
+	input		ifu_exu_ren2_d;
+	input		ifu_exu_ren3_d;
+
+	input	[6:0]	thr_rs1;       
+	input	[6:0]	thr_rs2;       
+	input	[6:0]	thr_rs3;
+	input	[6:0]	thr_rs3h;
+
+	output	[71:0]	irf_byp_rs1_data_d;
+	output	[71:0]	irf_byp_rs2_data_d;
+	output	[71:0]	irf_byp_rs3_data_d;
+	output	[71:0]	irf_byp_rs3h_data_d;
+
+	
+	reg	[71:0]	irf_byp_rs1_data_d;
+	reg	[71:0]	irf_byp_rs2_data_d;
+	reg	[71:0]	irf_byp_rs3_data_d;
+	reg	[71:0]	irf_byp_rs3h_data_d;
+
+	input		wr_en;
+	input		wr_en2;
+	input	[71:0]	active_win_thr_rd_w_neg;
+	input	[71:0]	active_win_thr_rd_w2_neg;
+	input	[6:0]	thr_rd_w_neg;
+	input	[6:0]	thr_rd_w2_neg;
+
+	input		swap_global_d1_vld;
+	input		swap_global_d2;
+	input	[1:0]	global_tid_d1;
+	input	[1:0]	global_tid_d2;
+	input	[1:0]	old_agp_d1;
+	input	[1:0]	new_agp_d2;
+
+	input		swap_local_m_vld;
+	input		swap_local_w;
+	input	[2:0]	old_lo_cwp_m;
+	input	[2:0]	new_lo_cwp_w;
+
+	input		swap_even_m_vld;
+	input		swap_even_w;
+	input	[1:0]	old_e_cwp_m;
+	input	[1:0]	new_e_cwp_w;
+
+	input		swap_odd_m_vld;
+	input		swap_odd_w;
+
+	input	[1:0]	cwpswap_tid_m;
+	input	[1:0]	cwpswap_tid_w;
+
+   	input		kill_restore_w;
+
+
+	wire	[71:0]	rd_data00;
+	wire	[71:0]	rd_data01;
+	wire	[71:0]	rd_data02;
+	wire	[71:0]	rd_data03;
+	wire	[71:0]	rd_data04;
+	wire	[71:0]	rd_data05;
+	wire	[71:0]	rd_data06;
+	wire	[71:0]	rd_data07;
+	wire	[71:0]	rd_data08;
+	wire	[71:0]	rd_data09;
+	wire	[71:0]	rd_data10;
+	wire	[71:0]	rd_data11;
+	wire	[71:0]	rd_data12;
+	wire	[71:0]	rd_data13;
+	wire	[71:0]	rd_data14;
+	wire	[71:0]	rd_data15;
+	wire	[71:0]	rd_data16;
+	wire	[71:0]	rd_data17;
+	wire	[71:0]	rd_data18;
+	wire	[71:0]	rd_data19;
+	wire	[71:0]	rd_data20;
+	wire	[71:0]	rd_data21;
+	wire	[71:0]	rd_data22;
+	wire	[71:0]	rd_data23;
+	wire	[71:0]	rd_data24;
+	wire	[71:0]	rd_data25;
+	wire	[71:0]	rd_data26;
+	wire	[71:0]	rd_data27;
+	wire	[71:0]	rd_data28;
+	wire	[71:0]	rd_data29;
+	wire	[71:0]	rd_data30;
+	wire	[71:0]	rd_data31;
+
+// synthesis translate_off
+always @(posedge clk) begin
+	if(ifu_exu_ren1_d | ifu_exu_ren2_d | ifu_exu_ren3_d) begin
+		if(thr_rs1[6:5] != 2'b00) begin
+			$display("Accessing thread # other than 0");
+			$finish;	
+		end
+	end
+end
+// synthesis translate_on
+   
+   //reg [71:0]    active_window [127:0];// 32x4 72 bit registers
+
+	always @(negedge clk) 
+	  if(ifu_exu_ren1_d) //comes from a posedge clk
+	  case(thr_rs1[4:0])
+	    5'b00000: irf_byp_rs1_data_d <= rd_data00;
+	    5'b00001: irf_byp_rs1_data_d <= rd_data01;
+	    5'b00010: irf_byp_rs1_data_d <= rd_data02;
+	    5'b00011: irf_byp_rs1_data_d <= rd_data03;
+	    5'b00100: irf_byp_rs1_data_d <= rd_data04;
+	    5'b00101: irf_byp_rs1_data_d <= rd_data05;
+	    5'b00110: irf_byp_rs1_data_d <= rd_data06;
+	    5'b00111: irf_byp_rs1_data_d <= rd_data07;
+	    5'b01000: irf_byp_rs1_data_d <= rd_data08;
+	    5'b01001: irf_byp_rs1_data_d <= rd_data09;
+	    5'b01010: irf_byp_rs1_data_d <= rd_data10;
+	    5'b01011: irf_byp_rs1_data_d <= rd_data11;
+	    5'b01100: irf_byp_rs1_data_d <= rd_data12;
+	    5'b01101: irf_byp_rs1_data_d <= rd_data13;
+	    5'b01110: irf_byp_rs1_data_d <= rd_data14;
+	    5'b01111: irf_byp_rs1_data_d <= rd_data15;
+	    5'b10000: irf_byp_rs1_data_d <= rd_data16;
+	    5'b10001: irf_byp_rs1_data_d <= rd_data17;
+	    5'b10010: irf_byp_rs1_data_d <= rd_data18;
+	    5'b10011: irf_byp_rs1_data_d <= rd_data19;
+	    5'b10100: irf_byp_rs1_data_d <= rd_data20;
+	    5'b10101: irf_byp_rs1_data_d <= rd_data21;
+	    5'b10110: irf_byp_rs1_data_d <= rd_data22;
+	    5'b10111: irf_byp_rs1_data_d <= rd_data23;
+	    5'b11000: irf_byp_rs1_data_d <= rd_data24;
+	    5'b11001: irf_byp_rs1_data_d <= rd_data25;
+	    5'b11010: irf_byp_rs1_data_d <= rd_data26;
+	    5'b11011: irf_byp_rs1_data_d <= rd_data27;
+	    5'b11100: irf_byp_rs1_data_d <= rd_data28;
+	    5'b11101: irf_byp_rs1_data_d <= rd_data29;
+	    5'b11110: irf_byp_rs1_data_d <= rd_data30;
+	    5'b11111: irf_byp_rs1_data_d <= rd_data31;
+	  endcase
+
+	always @(negedge clk) 
+	  if(ifu_exu_ren2_d)
+	  case(thr_rs2[4:0])
+	    5'b00000: irf_byp_rs2_data_d <= rd_data00;
+	    5'b00001: irf_byp_rs2_data_d <= rd_data01;
+	    5'b00010: irf_byp_rs2_data_d <= rd_data02;
+	    5'b00011: irf_byp_rs2_data_d <= rd_data03;
+	    5'b00100: irf_byp_rs2_data_d <= rd_data04;
+	    5'b00101: irf_byp_rs2_data_d <= rd_data05;
+	    5'b00110: irf_byp_rs2_data_d <= rd_data06;
+	    5'b00111: irf_byp_rs2_data_d <= rd_data07;
+	    5'b01000: irf_byp_rs2_data_d <= rd_data08;
+	    5'b01001: irf_byp_rs2_data_d <= rd_data09;
+	    5'b01010: irf_byp_rs2_data_d <= rd_data10;
+	    5'b01011: irf_byp_rs2_data_d <= rd_data11;
+	    5'b01100: irf_byp_rs2_data_d <= rd_data12;
+	    5'b01101: irf_byp_rs2_data_d <= rd_data13;
+	    5'b01110: irf_byp_rs2_data_d <= rd_data14;
+	    5'b01111: irf_byp_rs2_data_d <= rd_data15;
+	    5'b10000: irf_byp_rs2_data_d <= rd_data16;
+	    5'b10001: irf_byp_rs2_data_d <= rd_data17;
+	    5'b10010: irf_byp_rs2_data_d <= rd_data18;
+	    5'b10011: irf_byp_rs2_data_d <= rd_data19;
+	    5'b10100: irf_byp_rs2_data_d <= rd_data20;
+	    5'b10101: irf_byp_rs2_data_d <= rd_data21;
+	    5'b10110: irf_byp_rs2_data_d <= rd_data22;
+	    5'b10111: irf_byp_rs2_data_d <= rd_data23;
+	    5'b11000: irf_byp_rs2_data_d <= rd_data24;
+	    5'b11001: irf_byp_rs2_data_d <= rd_data25;
+	    5'b11010: irf_byp_rs2_data_d <= rd_data26;
+	    5'b11011: irf_byp_rs2_data_d <= rd_data27;
+	    5'b11100: irf_byp_rs2_data_d <= rd_data28;
+	    5'b11101: irf_byp_rs2_data_d <= rd_data29;
+	    5'b11110: irf_byp_rs2_data_d <= rd_data30;
+	    5'b11111: irf_byp_rs2_data_d <= rd_data31;
+	  endcase
+
+	always @(negedge clk) 
+	  if(ifu_exu_ren3_d)
+	  case(thr_rs3[4:0])
+	    5'b00000: irf_byp_rs3_data_d <= rd_data00;
+	    5'b00001: irf_byp_rs3_data_d <= rd_data01;
+	    5'b00010: irf_byp_rs3_data_d <= rd_data02;
+	    5'b00011: irf_byp_rs3_data_d <= rd_data03;
+	    5'b00100: irf_byp_rs3_data_d <= rd_data04;
+	    5'b00101: irf_byp_rs3_data_d <= rd_data05;
+	    5'b00110: irf_byp_rs3_data_d <= rd_data06;
+	    5'b00111: irf_byp_rs3_data_d <= rd_data07;
+	    5'b01000: irf_byp_rs3_data_d <= rd_data08;
+	    5'b01001: irf_byp_rs3_data_d <= rd_data09;
+	    5'b01010: irf_byp_rs3_data_d <= rd_data10;
+	    5'b01011: irf_byp_rs3_data_d <= rd_data11;
+	    5'b01100: irf_byp_rs3_data_d <= rd_data12;
+	    5'b01101: irf_byp_rs3_data_d <= rd_data13;
+	    5'b01110: irf_byp_rs3_data_d <= rd_data14;
+	    5'b01111: irf_byp_rs3_data_d <= rd_data15;
+	    5'b10000: irf_byp_rs3_data_d <= rd_data16;
+	    5'b10001: irf_byp_rs3_data_d <= rd_data17;
+	    5'b10010: irf_byp_rs3_data_d <= rd_data18;
+	    5'b10011: irf_byp_rs3_data_d <= rd_data19;
+	    5'b10100: irf_byp_rs3_data_d <= rd_data20;
+	    5'b10101: irf_byp_rs3_data_d <= rd_data21;
+	    5'b10110: irf_byp_rs3_data_d <= rd_data22;
+	    5'b10111: irf_byp_rs3_data_d <= rd_data23;
+	    5'b11000: irf_byp_rs3_data_d <= rd_data24;
+	    5'b11001: irf_byp_rs3_data_d <= rd_data25;
+	    5'b11010: irf_byp_rs3_data_d <= rd_data26;
+	    5'b11011: irf_byp_rs3_data_d <= rd_data27;
+	    5'b11100: irf_byp_rs3_data_d <= rd_data28;
+	    5'b11101: irf_byp_rs3_data_d <= rd_data29;
+	    5'b11110: irf_byp_rs3_data_d <= rd_data30;
+	    5'b11111: irf_byp_rs3_data_d <= rd_data31;
+	  endcase
+
+	always @(negedge clk) 
+	  if(ifu_exu_ren3_d)
+	  case(thr_rs3h[4:1])
+	    4'b0000: irf_byp_rs3h_data_d <= rd_data01;
+	    4'b0001: irf_byp_rs3h_data_d <= rd_data03;
+	    4'b0010: irf_byp_rs3h_data_d <= rd_data05;
+	    4'b0011: irf_byp_rs3h_data_d <= rd_data07;
+	    4'b0100: irf_byp_rs3h_data_d <= rd_data09;
+	    4'b0101: irf_byp_rs3h_data_d <= rd_data11;
+	    4'b0110: irf_byp_rs3h_data_d <= rd_data13;
+	    4'b0111: irf_byp_rs3h_data_d <= rd_data15;
+	    4'b1000: irf_byp_rs3h_data_d <= rd_data17;
+	    4'b1001: irf_byp_rs3h_data_d <= rd_data19;
+	    4'b1010: irf_byp_rs3h_data_d <= rd_data21;
+	    4'b1011: irf_byp_rs3h_data_d <= rd_data23;
+	    4'b1100: irf_byp_rs3h_data_d <= rd_data25;
+	    4'b1101: irf_byp_rs3h_data_d <= rd_data27;
+	    4'b1110: irf_byp_rs3h_data_d <= rd_data29;
+	    4'b1111: irf_byp_rs3h_data_d <= rd_data31;
+	  endcase
+
+wire wren = wr_en | wr_en2;
+wire [4:0] wr_addr = wr_en ? thr_rd_w_neg[4:0] : thr_rd_w2_neg[4:0];
+wire [71:0] wr_data = wr_en ? active_win_thr_rd_w_neg : active_win_thr_rd_w2_neg;
+
+//GLOBALs
+bw_r_irf_register register00(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00000)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(72'b0), 
+		.rd_data(rd_data00)
+);
+
+bw_r_irf_register register01(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00001)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data01)
+);
+
+bw_r_irf_register register02(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00010)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data02)
+);
+
+bw_r_irf_register register03(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00011)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data03)
+);
+
+bw_r_irf_register register04(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00100)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data04)
+);
+
+bw_r_irf_register register05(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00101)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data05)
+);
+
+bw_r_irf_register register06(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00110)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data06)
+);
+
+bw_r_irf_register register07(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b00111)), 
+		.save(swap_global_d1_vld),
+		.save_addr({1'b0,old_agp_d1[1:0]}), 
+		.restore(swap_global_d2), 
+		.restore_addr({1'b0,new_agp_d2[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data07)
+);
+
+//ODDs
+bw_r_irf_register register08(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01000)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data08)
+);
+
+bw_r_irf_register register09(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01001)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data09)
+);
+
+bw_r_irf_register register10(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01010)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data10)
+);
+
+bw_r_irf_register register11(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01011)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data11)
+);
+
+bw_r_irf_register register12(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01100)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data12)
+);
+
+bw_r_irf_register register13(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01101)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data13)
+);
+
+bw_r_irf_register register14(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01110)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data14)
+);
+
+bw_r_irf_register register15(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b01111)), 
+		.save(swap_odd_m_vld),
+		.save_addr({1'b0,old_lo_cwp_m[2:1]}), 
+		.restore(swap_odd_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_lo_cwp_w[2:1]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data15)
+);
+
+//LOCALs
+bw_r_irf_register register16(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10000)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data16)
+);
+
+bw_r_irf_register register17(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10001)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data17)
+);
+
+bw_r_irf_register register18(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10010)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data18)
+);
+
+bw_r_irf_register register19(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10011)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data19)
+);
+
+bw_r_irf_register register20(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10100)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data20)
+);
+
+bw_r_irf_register register21(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10101)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data21)
+);
+
+bw_r_irf_register register22(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10110)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data22)
+);
+
+bw_r_irf_register register23(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b10111)), 
+		.save(swap_local_m_vld),
+		.save_addr({old_lo_cwp_m[2:0]}), 
+		.restore(swap_local_w & ~kill_restore_w), 
+		.restore_addr({new_lo_cwp_w[2:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data23)
+);
+
+//EVENs
+bw_r_irf_register register24(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11000)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data24)
+);
+
+bw_r_irf_register register25(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11001)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data25)
+);
+
+bw_r_irf_register register26(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11010)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data26)
+);
+
+bw_r_irf_register register27(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11011)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data27)
+);
+
+bw_r_irf_register register28(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11100)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data28)
+);
+
+bw_r_irf_register register29(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11101)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data29)
+);
+
+bw_r_irf_register register30(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11110)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data30)
+);
+
+bw_r_irf_register register31(
+		.clk(clk),
+		.wren(wren & (wr_addr == 5'b11111)), 
+		.save(swap_even_m_vld),
+		.save_addr({1'b0,old_e_cwp_m[1:0]}),
+		.restore(swap_even_w & ~kill_restore_w), 
+		.restore_addr({1'b0,new_e_cwp_w[1:0]}),
+		.wr_data(wr_data), 
+		.rd_data(rd_data31)
+);
+
+endmodule
+
+
+`else
+
+
+module bw_r_irf(so, irf_byp_rs1_data_d_l, irf_byp_rs2_data_d_l, 
+	irf_byp_rs3_data_d_l, irf_byp_rs3h_data_d_l, rclk, reset_l, si, se, 
+	sehold, rst_tri_en, ifu_exu_tid_s2, ifu_exu_rs1_s, ifu_exu_rs2_s, 
+	ifu_exu_rs3_s, ifu_exu_ren1_s, ifu_exu_ren2_s, ifu_exu_ren3_s, 
+	ecl_irf_wen_w, ecl_irf_wen_w2, ecl_irf_rd_m, ecl_irf_rd_g, 
+	byp_irf_rd_data_w, byp_irf_rd_data_w2, ecl_irf_tid_m, ecl_irf_tid_g, 
+	rml_irf_old_lo_cwp_e, rml_irf_new_lo_cwp_e, rml_irf_old_e_cwp_e, 
+	rml_irf_new_e_cwp_e, rml_irf_swap_even_e, rml_irf_swap_odd_e, 
+	rml_irf_swap_local_e, rml_irf_kill_restore_w, rml_irf_cwpswap_tid_e, 
+	rml_irf_old_agp, rml_irf_new_agp, rml_irf_swap_global, 
+	rml_irf_global_tid);
+
+	input			rclk;
+	input			reset_l;
+	input			si;
+	input			se;
+	input			sehold;
+	input			rst_tri_en;
+	input	[1:0]		ifu_exu_tid_s2;
+	input	[4:0]		ifu_exu_rs1_s;
+	input	[4:0]		ifu_exu_rs2_s;
+	input	[4:0]		ifu_exu_rs3_s;
+	input			ifu_exu_ren1_s;
+	input			ifu_exu_ren2_s;
+	input			ifu_exu_ren3_s;
+	input			ecl_irf_wen_w;
+	input			ecl_irf_wen_w2;
+	input	[4:0]		ecl_irf_rd_m;
+	input	[4:0]		ecl_irf_rd_g;
+	input	[71:0]		byp_irf_rd_data_w;
+	input	[71:0]		byp_irf_rd_data_w2;
+	input	[1:0]		ecl_irf_tid_m;
+	input	[1:0]		ecl_irf_tid_g;
+	input	[2:0]		rml_irf_old_lo_cwp_e;
+	input	[2:0]		rml_irf_new_lo_cwp_e;
+	input	[2:1]		rml_irf_old_e_cwp_e;
+	input	[2:1]		rml_irf_new_e_cwp_e;
+	input			rml_irf_swap_even_e;
+	input			rml_irf_swap_odd_e;
+	input			rml_irf_swap_local_e;
+	input			rml_irf_kill_restore_w;
+	input	[1:0]		rml_irf_cwpswap_tid_e;
+	input	[1:0]		rml_irf_old_agp;
+	input	[1:0]		rml_irf_new_agp;
+	input			rml_irf_swap_global;
+	input	[1:0]		rml_irf_global_tid;
+	output			so;
+	output	[71:0]		irf_byp_rs1_data_d_l;
+	output	[71:0]		irf_byp_rs2_data_d_l;
+	output	[71:0]		irf_byp_rs3_data_d_l;
+	output	[31:0]		irf_byp_rs3h_data_d_l;
+
+	wire	[71:0]		irf_byp_rs1_data_d;
+	wire	[71:0]		irf_byp_rs2_data_d;
+	wire	[71:0]		irf_byp_rs3_data_d;
+	wire	[71:0]		irf_byp_rs3h_data_d;
+	wire	[1:0]		ecl_irf_tid_w;
+	wire	[1:0]		ecl_irf_tid_w2;
+	wire	[4:0]		ecl_irf_rd_w;
+	wire	[4:0]		ecl_irf_rd_w2;
+	wire	[1:0]		ifu_exu_thr_d;
+	wire			ifu_exu_ren1_d;
+	wire			ifu_exu_ren2_d;
+	wire			ifu_exu_ren3_d;
+	wire	[4:0]		ifu_exu_rs1_d;
+	wire	[4:0]		ifu_exu_rs2_d;
+	wire	[4:0]		ifu_exu_rs3_d;
+	wire	[6:0]		thr_rs1;
+	wire	[6:0]		thr_rs2;
+	wire	[6:0]		thr_rs3;
+	wire	[6:0]		thr_rs3h;
+	wire	[6:0]		thr_rd_w;
+	wire	[6:0]		thr_rd_w2;
+	reg	[1:0]		cwpswap_tid_m;
+	reg	[1:0]		cwpswap_tid_w;
+	reg	[2:0]		old_lo_cwp_m;
+	reg	[2:0]		new_lo_cwp_m;
+	reg	[2:0]		new_lo_cwp_w;
+	reg	[1:0]		old_e_cwp_m;
+	reg	[1:0]		new_e_cwp_m;
+	reg	[1:0]		new_e_cwp_w;
+	reg			swap_local_m;
+	reg			swap_local_w;
+	reg			swap_even_m;
+	reg			swap_even_w;
+	reg			swap_odd_m;
+	reg			swap_odd_w;
+	reg			kill_restore_d1;
+	reg			swap_global_d1;
+	reg			swap_global_d2;
+	reg	[1:0]		global_tid_d1;
+	reg	[1:0]		global_tid_d2;
+	reg	[1:0]		old_agp_d1;
+	reg	[1:0]		new_agp_d1;
+	reg	[1:0]		new_agp_d2;
+	reg	[71:0]		active_win_thr_rd_w_neg;
+	reg	[71:0]		active_win_thr_rd_w2_neg;
+	reg	[6:0]		thr_rd_w_neg;
+	reg	[6:0]		thr_rd_w2_neg;
+	reg			active_win_thr_rd_w_neg_wr_en;
+	reg			active_win_thr_rd_w2_neg_wr_en;
+	reg			rst_tri_en_neg;
+	wire			clk;
+	wire			ren1_s;
+	wire			ren2_s;
+	wire			ren3_s;
+	wire	[4:0]		rs1_s;
+	wire	[4:0]		rs2_s;
+	wire	[4:0]		rs3_s;
+	wire	[1:0]		tid_s;
+	wire	[1:0]		tid_g;
+	wire	[1:0]		tid_m;
+	wire	[4:0]		rd_m;
+	wire	[4:0]		rd_g;
+	wire			kill_restore_w;
+	wire			swap_global_d1_vld;
+	wire			swap_local_m_vld;
+	wire			swap_even_m_vld;
+	wire			swap_odd_m_vld;
+	wire			wr_en;
+	wire			wr_en2;
+
+	assign clk = rclk;
+	assign {ren1_s, ren2_s, ren3_s, rs1_s[4:0], rs2_s[4:0], rs3_s[4:0], 
+		tid_s[1:0], tid_g[1:0], tid_m[1:0], rd_m[4:0], rd_g[4:0]} = (
+		sehold ? {ifu_exu_ren1_d, ifu_exu_ren2_d, ifu_exu_ren3_d, 
+		ifu_exu_rs1_d[4:0], ifu_exu_rs2_d[4:0], ifu_exu_rs3_d[4:0], 
+		ifu_exu_thr_d[1:0], ecl_irf_tid_w2[1:0], ecl_irf_tid_w[1:0], 
+		ecl_irf_rd_w[4:0], ecl_irf_rd_w2[4:0]} : {ifu_exu_ren1_s, 
+		ifu_exu_ren2_s, ifu_exu_ren3_s, ifu_exu_rs1_s[4:0], 
+		ifu_exu_rs2_s[4:0], ifu_exu_rs3_s[4:0], ifu_exu_tid_s2[1:0], 
+		ecl_irf_tid_g[1:0], ecl_irf_tid_m[1:0], ecl_irf_rd_m[4:0], 
+		ecl_irf_rd_g[4:0]});
+	assign thr_rs1[6:0] = {ifu_exu_thr_d, ifu_exu_rs1_d};
+	assign thr_rs2[6:0] = {ifu_exu_thr_d, ifu_exu_rs2_d};
+	assign thr_rs3[6:0] = {ifu_exu_thr_d, ifu_exu_rs3_d[4:0]};
+	assign thr_rs3h[6:0] = {ifu_exu_thr_d[1:0], ifu_exu_rs3_d[4:1], 1'b1};
+	assign thr_rd_w[6:0] = {ecl_irf_tid_w, ecl_irf_rd_w};
+	assign thr_rd_w2[6:0] = {ecl_irf_tid_w2, ecl_irf_rd_w2};
+	assign irf_byp_rs1_data_d_l[71:0] = (~irf_byp_rs1_data_d[71:0]);
+	assign irf_byp_rs2_data_d_l[71:0] = (~irf_byp_rs2_data_d[71:0]);
+	assign irf_byp_rs3_data_d_l[71:0] = (~irf_byp_rs3_data_d[71:0]);
+	assign irf_byp_rs3h_data_d_l[31:0] = (~irf_byp_rs3h_data_d[31:0]);
+	assign kill_restore_w = (sehold ? kill_restore_d1 : 
+		rml_irf_kill_restore_w);
+	assign swap_local_m_vld = (swap_local_m & (~rst_tri_en));
+	assign swap_odd_m_vld = (swap_odd_m & (~rst_tri_en));
+	assign swap_even_m_vld = (swap_even_m & (~rst_tri_en));
+	assign swap_global_d1_vld = (swap_global_d1 & (~rst_tri_en));
+	assign wr_en = (active_win_thr_rd_w_neg_wr_en & ((~rst_tri_en) | (~
+		rst_tri_en_neg)));
+	assign wr_en2 = (active_win_thr_rd_w2_neg_wr_en & ((~rst_tri_en) | (~
+		rst_tri_en_neg)));
+
+	dff_s dff_ren1_s2d(
+		.din				(ren1_s), 
+		.clk				(clk), 
+		.q				(ifu_exu_ren1_d), 
+		.se				(se));
+	dff_s dff_ren2_s2d(
+		.din				(ren2_s), 
+		.clk				(clk), 
+		.q				(ifu_exu_ren2_d), 
+		.se				(se));
+	dff_s dff_ren3_s2d(
+		.din				(ren3_s), 
+		.clk				(clk), 
+		.q				(ifu_exu_ren3_d), 
+		.se				(se));
+	dff_s #(5) dff_rs1_s2d(
+		.din				(rs1_s[4:0]), 
+		.clk				(clk), 
+		.q				(ifu_exu_rs1_d[4:0]), 
+		.se				(se));
+	dff_s #(5) dff_rs2_s2d(
+		.din				(rs2_s[4:0]), 
+		.clk				(clk), 
+		.q				(ifu_exu_rs2_d[4:0]), 
+		.se				(se));
+	dff_s #(5) dff_rs3_s2d(
+		.din				(rs3_s[4:0]), 
+		.clk				(clk), 
+		.q				(ifu_exu_rs3_d[4:0]), 
+		.se				(se));
+	dff_s #(2) dff_thr_s2d(
+		.din				(tid_s[1:0]), 
+		.clk				(clk), 
+		.q				(ifu_exu_thr_d[1:0]), 
+		.se				(se));
+	dff_s #(2) dff_thr_g2w2(
+		.din				(tid_g[1:0]), 
+		.clk				(clk), 
+		.q				(ecl_irf_tid_w2[1:0]), 
+		.se				(se));
+	dff_s #(2) dff_thr_m2w(
+		.din				(tid_m[1:0]), 
+		.clk				(clk), 
+		.q				(ecl_irf_tid_w[1:0]), 
+		.se				(se));
+	dff_s #(5) dff_rd_m2w(
+		.din				(rd_m[4:0]), 
+		.clk				(clk), 
+		.q				(ecl_irf_rd_w[4:0]), 
+		.se				(se));
+	dff_s #(5) dff_rd_g2w2(
+		.din				(rd_g[4:0]), 
+		.clk				(clk), 
+		.q				(ecl_irf_rd_w2[4:0]), 
+		.se				(se));
+	bw_r_irf_core bw_r_irf_core(
+		.clk				(clk), 
+		.ifu_exu_ren1_d			(ifu_exu_ren1_d), 
+		.ifu_exu_ren2_d			(ifu_exu_ren2_d), 
+		.ifu_exu_ren3_d			(ifu_exu_ren3_d), 
+		.thr_rs1			(thr_rs1), 
+		.thr_rs2			(thr_rs2), 
+		.thr_rs3			(thr_rs3), 
+		.thr_rs3h			(thr_rs3h), 
+		.irf_byp_rs1_data_d		(irf_byp_rs1_data_d), 
+		.irf_byp_rs2_data_d		(irf_byp_rs2_data_d), 
+		.irf_byp_rs3_data_d		(irf_byp_rs3_data_d), 
+		.irf_byp_rs3h_data_d		(irf_byp_rs3h_data_d), 
+		.wr_en				(wr_en), 
+		.wr_en2				(wr_en2), 
+		.active_win_thr_rd_w_neg	(active_win_thr_rd_w_neg), 
+		.active_win_thr_rd_w2_neg	(active_win_thr_rd_w2_neg), 
+		.thr_rd_w_neg			(thr_rd_w_neg), 
+		.thr_rd_w2_neg			(thr_rd_w2_neg), 
+		.swap_global_d1_vld		(swap_global_d1_vld), 
+		.swap_global_d2			(swap_global_d2), 
+		.global_tid_d1			(global_tid_d1), 
+		.global_tid_d2			(global_tid_d2), 
+		.old_agp_d1			(old_agp_d1), 
+		.new_agp_d2			(new_agp_d2), 
+		.swap_local_m_vld		(swap_local_m_vld), 
+		.swap_local_w			(swap_local_w), 
+		.old_lo_cwp_m			(old_lo_cwp_m), 
+		.new_lo_cwp_w			(new_lo_cwp_w), 
+		.swap_even_m_vld		(swap_even_m_vld), 
+		.swap_even_w			(swap_even_w), 
+		.old_e_cwp_m			(old_e_cwp_m), 
+		.new_e_cwp_w			(new_e_cwp_w), 
+		.swap_odd_m_vld			(swap_odd_m_vld), 
+		.swap_odd_w			(swap_odd_w), 
+		.cwpswap_tid_m			(cwpswap_tid_m), 
+		.cwpswap_tid_w			(cwpswap_tid_w), 
+		.kill_restore_w			(kill_restore_w));
+
+	always @(negedge clk) begin
+	  rst_tri_en_neg <= rst_tri_en;
+	  if ((ecl_irf_wen_w & ecl_irf_wen_w2) & (thr_rd_w[6:0] == 
+		  thr_rd_w2[6:0])) begin
+	    active_win_thr_rd_w_neg <= {72 {1'bx}};
+	    thr_rd_w_neg <= thr_rd_w;
+	    active_win_thr_rd_w_neg_wr_en <= 1'b1;
+	    active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+	  end
+	  else
+	    begin
+	      if (ecl_irf_wen_w & (thr_rd_w[4:0] != 5'b0)) begin
+		active_win_thr_rd_w_neg <= byp_irf_rd_data_w;
+		thr_rd_w_neg <= thr_rd_w;
+		active_win_thr_rd_w_neg_wr_en <= 1'b1;
+	      end
+	      else begin
+		active_win_thr_rd_w_neg_wr_en <= 1'b0;
+	      end
+	      if (ecl_irf_wen_w2 & (thr_rd_w2[4:0] != 5'b0)) begin
+		active_win_thr_rd_w2_neg <= byp_irf_rd_data_w2;
+		thr_rd_w2_neg <= thr_rd_w2;
+		active_win_thr_rd_w2_neg_wr_en <= 1'b1;
+	      end
+	      else begin
+		active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+	      end
+	    end
+	end
+	always @(posedge clk) begin
+	  cwpswap_tid_m[1:0] <= (sehold ? cwpswap_tid_m[1:0] : 
+		  rml_irf_cwpswap_tid_e[1:0]);
+	  cwpswap_tid_w[1:0] <= cwpswap_tid_m[1:0];
+	  old_lo_cwp_m[2:0] <= (sehold ? old_lo_cwp_m[2:0] : 
+		  rml_irf_old_lo_cwp_e[2:0]);
+	  new_lo_cwp_m[2:0] <= (sehold ? new_lo_cwp_m[2:0] : 
+		  rml_irf_new_lo_cwp_e[2:0]);
+	  new_lo_cwp_w[2:0] <= new_lo_cwp_m[2:0];
+	  old_e_cwp_m[1:0] <= (sehold ? old_e_cwp_m[1:0] : 
+		  rml_irf_old_e_cwp_e[2:1]);
+	  new_e_cwp_m[1:0] <= (sehold ? new_e_cwp_m[1:0] : 
+		  rml_irf_new_e_cwp_e[2:1]);
+	  new_e_cwp_w[1:0] <= new_e_cwp_m[1:0];
+	  swap_local_m <= (sehold ? (swap_local_m & rst_tri_en) : 
+		  rml_irf_swap_local_e);
+	  swap_local_w <= swap_local_m_vld;
+	  swap_odd_m <= (sehold ? (swap_odd_m & rst_tri_en) : rml_irf_swap_odd_e
+		  );
+	  swap_odd_w <= swap_odd_m_vld;
+	  swap_even_m <= (sehold ? (swap_even_m & rst_tri_en) : 
+		  rml_irf_swap_even_e);
+	  swap_even_w <= swap_even_m_vld;
+	  kill_restore_d1 <= kill_restore_w;
+	end
+	always @(posedge clk) begin
+	  swap_global_d1 <= (sehold ? (swap_global_d1 & rst_tri_en) : 
+		  rml_irf_swap_global);
+	  swap_global_d2 <= swap_global_d1_vld;
+	  global_tid_d1[1:0] <= (sehold ? global_tid_d1[1:0] : 
+		  rml_irf_global_tid[1:0]);
+	  global_tid_d2[1:0] <= global_tid_d1[1:0];
+	  old_agp_d1[1:0] <= (sehold ? old_agp_d1[1:0] : rml_irf_old_agp[1:0]);
+	  new_agp_d1[1:0] <= (sehold ? new_agp_d1[1:0] : rml_irf_new_agp[1:0]);
+	  new_agp_d2[1:0] <= new_agp_d1[1:0];
+	end
+/*
+	always @(posedge clk) begin
+	  if (wr_en) begin
+	    $display("Write Port 1: %h %h", active_win_thr_rd_w_neg, 
+		    thr_rd_w_neg);
+	  end
+	  if (wr_en2) begin
+	    $display("Write Port 2: %h %h", active_win_thr_rd_w2_neg, 
+		    thr_rd_w2_neg);
+	  end
+	  if (ifu_exu_ren1_d) begin
+	    @(posedge clk) ;
+	    $display("Read Port 1: %h %h", irf_byp_rs1_data_d, thr_rs1);
+	  end
+	  if (ifu_exu_ren2_d) begin
+	    @(posedge clk) ;
+	    $display("Read Port 2: %h %h", irf_byp_rs2_data_d, thr_rs2);
+	  end
+	  if (ifu_exu_ren3_d) begin
+	    @(posedge clk) ;
+	    $display("Read Port 3: %h %h", irf_byp_rs3_data_d, thr_rs3);
+	  end
+	end
+*/
+endmodule
+
+module bw_r_irf_core(clk, ifu_exu_ren1_d, ifu_exu_ren2_d, ifu_exu_ren3_d, 
+	thr_rs1, thr_rs2, thr_rs3, thr_rs3h, irf_byp_rs1_data_d, 
+	irf_byp_rs2_data_d, irf_byp_rs3_data_d, irf_byp_rs3h_data_d, wr_en, 
+	wr_en2, active_win_thr_rd_w_neg, active_win_thr_rd_w2_neg, thr_rd_w_neg,
+	thr_rd_w2_neg, swap_global_d1_vld, swap_global_d2, global_tid_d1, 
+	global_tid_d2, old_agp_d1, new_agp_d2, swap_local_m_vld, swap_local_w, 
+	old_lo_cwp_m, new_lo_cwp_w, swap_even_m_vld, swap_even_w, old_e_cwp_m, 
+	new_e_cwp_w, swap_odd_m_vld, swap_odd_w, cwpswap_tid_m, cwpswap_tid_w, 
+	kill_restore_w);
+
+	input			clk;
+	input			ifu_exu_ren1_d;
+	input			ifu_exu_ren2_d;
+	input			ifu_exu_ren3_d;
+	input	[6:0]		thr_rs1;
+	input	[6:0]		thr_rs2;
+	input	[6:0]		thr_rs3;
+	input	[6:0]		thr_rs3h;
+	output	[71:0]		irf_byp_rs1_data_d;
+	output	[71:0]		irf_byp_rs2_data_d;
+	output	[71:0]		irf_byp_rs3_data_d;
+	output	[71:0]		irf_byp_rs3h_data_d;
+	input			wr_en;
+	input			wr_en2;
+	input	[71:0]		active_win_thr_rd_w_neg;
+	input	[71:0]		active_win_thr_rd_w2_neg;
+	input	[6:0]		thr_rd_w_neg;
+	input	[6:0]		thr_rd_w2_neg;
+	input			swap_global_d1_vld;
+	input			swap_global_d2;
+	input	[1:0]		global_tid_d1;
+	input	[1:0]		global_tid_d2;
+	input	[1:0]		old_agp_d1;
+	input	[1:0]		new_agp_d2;
+	input			swap_local_m_vld;
+	input			swap_local_w;
+	input	[2:0]		old_lo_cwp_m;
+	input	[2:0]		new_lo_cwp_w;
+	input			swap_even_m_vld;
+	input			swap_even_w;
+	input	[1:0]		old_e_cwp_m;
+	input	[1:0]		new_e_cwp_w;
+	input			swap_odd_m_vld;
+	input			swap_odd_w;
+	input	[1:0]		cwpswap_tid_m;
+	input	[1:0]		cwpswap_tid_w;
+	input			kill_restore_w;
+
+	reg	[71:0]		irf_byp_rs1_data_d;
+	reg	[71:0]		irf_byp_rs2_data_d;
+	reg	[71:0]		irf_byp_rs3_data_d;
+	reg	[71:0]		irf_byp_rs3h_data_d;
+	wire	[71:0]		rd_data00;
+	wire	[71:0]		rd_data01;
+	wire	[71:0]		rd_data02;
+	wire	[71:0]		rd_data03;
+	wire	[71:0]		rd_data04;
+	wire	[71:0]		rd_data05;
+	wire	[71:0]		rd_data06;
+	wire	[71:0]		rd_data07;
+	wire	[71:0]		rd_data08;
+	wire	[71:0]		rd_data09;
+	wire	[71:0]		rd_data10;
+	wire	[71:0]		rd_data11;
+	wire	[71:0]		rd_data12;
+	wire	[71:0]		rd_data13;
+	wire	[71:0]		rd_data14;
+	wire	[71:0]		rd_data15;
+	wire	[71:0]		rd_data16;
+	wire	[71:0]		rd_data17;
+	wire	[71:0]		rd_data18;
+	wire	[71:0]		rd_data19;
+	wire	[71:0]		rd_data20;
+	wire	[71:0]		rd_data21;
+	wire	[71:0]		rd_data22;
+	wire	[71:0]		rd_data23;
+	wire	[71:0]		rd_data24;
+	wire	[71:0]		rd_data25;
+	wire	[71:0]		rd_data26;
+	wire	[71:0]		rd_data27;
+	wire	[71:0]		rd_data28;
+	wire	[71:0]		rd_data29;
+	wire	[71:0]		rd_data30;
+	wire	[71:0]		rd_data31;
+	wire			wren;
+	wire	[4:0]		wr_addr;
+	wire	[71:0]		wr_data;
+
+
+	wire 	[127:0]	wr_en1s = (wr_en << {thr_rd_w_neg[4:0],thr_rd_w_neg[6:5]});
+	wire	[127:0] wr_en2s = (wr_en2 << {thr_rd_w2_neg[4:0],thr_rd_w2_neg[6:5]});
+	wire	[127:0]	wrens = wr_en1s | wr_en2s;
+
+	wire	[3:0]	wr_th1 = wr_en << thr_rd_w_neg[6:5];
+
+	wire	[71:0]	wr_data0 = wr_th1[0] ? active_win_thr_rd_w_neg : active_win_thr_rd_w2_neg;
+	wire	[71:0]	wr_data1 = wr_th1[1] ? active_win_thr_rd_w_neg : active_win_thr_rd_w2_neg;
+	wire	[71:0]	wr_data2 = wr_th1[2] ? active_win_thr_rd_w_neg : active_win_thr_rd_w2_neg;
+	wire	[71:0]	wr_data3 = wr_th1[3] ? active_win_thr_rd_w_neg : active_win_thr_rd_w2_neg;
+
+
+	bw_r_irf_register register00(
+		.clk				(clk), 
+		.wrens				(wrens[3:0]),
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(72'b0), 
+		.wr_data1			(72'b0), 
+		.wr_data2			(72'b0), 
+		.wr_data3			(72'b0), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data00));
+	bw_r_irf_register register01(
+		.clk				(clk), 
+		.wrens				(wrens[7:4]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data01));
+	bw_r_irf_register register02(
+		.clk				(clk), 
+		.wrens				(wrens[11:8]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data02));
+	bw_r_irf_register register03(
+		.clk				(clk), 
+		.wrens				(wrens[15:12]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data03));
+	bw_r_irf_register register04(
+		.clk				(clk), 
+		.wrens				(wrens[19:16]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data04));
+	bw_r_irf_register register05(
+		.clk				(clk), 
+		.wrens				(wrens[23:20]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data05));
+	bw_r_irf_register register06(
+		.clk				(clk), 
+		.wrens				(wrens[27:24]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data06));
+	bw_r_irf_register register07(
+		.clk				(clk), 
+		.wrens				(wrens[31:28]), 
+		.save				(swap_global_d1_vld), 
+		.save_addr			({global_tid_d1, 1'b0, old_agp_d1[1:0]}), 
+		.restore			(swap_global_d2), 
+		.restore_addr			({global_tid_d2, 1'b0, new_agp_d2[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data07));
+
+	bw_r_irf_register register08(
+		.clk				(clk), 
+		.wrens				(wrens[35:32]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data08));
+	bw_r_irf_register register09(
+		.clk				(clk), 
+		.wrens				(wrens[39:36]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data09));
+	bw_r_irf_register register10(
+		.clk				(clk), 
+		.wrens				(wrens[43:40]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data10));
+	bw_r_irf_register register11(
+		.clk				(clk), 
+		.wrens				(wrens[47:44]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data11));
+	bw_r_irf_register register12(
+		.clk				(clk), 
+		.wrens				(wrens[51:48]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data12));
+	bw_r_irf_register register13(
+		.clk				(clk), 
+		.wrens				(wrens[55:52]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data13));
+	bw_r_irf_register register14(
+		.clk				(clk), 
+		.wrens				(wrens[59:56]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data14));
+	bw_r_irf_register register15(
+		.clk				(clk), 
+		.wrens				(wrens[63:60]),
+		.save				(swap_odd_m_vld), 
+		.save_addr			({cwpswap_tid_m, 1'b0, old_lo_cwp_m[2:1]}), 
+		.restore			((swap_odd_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w,1'b0, new_lo_cwp_w[2:1]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data15));
+
+	bw_r_irf_register register16(
+		.clk				(clk), 
+		.wrens				(wrens[67:64]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data16));
+	bw_r_irf_register register17(
+		.clk				(clk), 
+		.wrens				(wrens[71:68]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data17));
+	bw_r_irf_register register18(
+		.clk				(clk), 
+		.wrens				(wrens[75:72]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data18));
+	bw_r_irf_register register19(
+		.clk				(clk), 
+		.wrens				(wrens[79:76]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data19));
+	bw_r_irf_register register20(
+		.clk				(clk), 
+		.wrens				(wrens[83:80]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data20));
+	bw_r_irf_register register21(
+		.clk				(clk), 
+		.wrens				(wrens[87:84]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data21));
+	bw_r_irf_register register22(
+		.clk				(clk), 
+		.wrens				(wrens[91:88]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data22));
+	bw_r_irf_register register23(
+		.clk				(clk), 
+		.wrens				(wrens[95:92]), 
+		.save				(swap_local_m_vld), 
+		.save_addr			({cwpswap_tid_m, old_lo_cwp_m[2:0]}), 
+		.restore			((swap_local_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w, new_lo_cwp_w[2:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data23));
+
+	bw_r_irf_register register24(
+		.clk				(clk), 
+		.wrens				(wrens[99:96]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data24));
+	bw_r_irf_register register25(
+		.clk				(clk), 
+		.wrens				(wrens[103:100]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data25));
+	bw_r_irf_register register26(
+		.clk				(clk), 
+		.wrens				(wrens[107:104]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data26));
+	bw_r_irf_register register27(
+		.clk				(clk), 
+		.wrens				(wrens[111:108]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data27));
+	bw_r_irf_register register28(
+		.clk				(clk), 
+		.wrens				(wrens[115:112]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data28));
+	bw_r_irf_register register29(
+		.clk				(clk), 
+		.wrens				(wrens[119:116]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data29));
+	bw_r_irf_register register30(
+		.clk				(clk), 
+		.wrens				(wrens[123:120]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data30));
+	bw_r_irf_register register31(
+		.clk				(clk), 
+		.wrens				(wrens[127:124]),
+		.save				(swap_even_m_vld), 
+		.save_addr			({cwpswap_tid_m[1:0], 1'b0, old_e_cwp_m[1:0]}), 
+		.restore			((swap_even_w & (~kill_restore_w))), 
+		.restore_addr			({cwpswap_tid_w[1:0], 1'b0, new_e_cwp_w[1:0]}), 
+		.wr_data0			(wr_data0), 
+		.wr_data1			(wr_data1), 
+		.wr_data2			(wr_data2), 
+		.wr_data3			(wr_data3), 
+		.rd_thread			(thr_rs1[6:5]),
+		.rd_data			(rd_data31));
+
+
+	always @(negedge clk) if (ifu_exu_ren1_d) begin
+	  case (thr_rs1[4:0])
+	    5'b0:
+	      irf_byp_rs1_data_d <= rd_data00;
+	    5'b1:
+	      irf_byp_rs1_data_d <= rd_data01;
+	    5'b00010:
+	      irf_byp_rs1_data_d <= rd_data02;
+	    5'b00011:
+	      irf_byp_rs1_data_d <= rd_data03;
+	    5'b00100:
+	      irf_byp_rs1_data_d <= rd_data04;
+	    5'b00101:
+	      irf_byp_rs1_data_d <= rd_data05;
+	    5'b00110:
+	      irf_byp_rs1_data_d <= rd_data06;
+	    5'b00111:
+	      irf_byp_rs1_data_d <= rd_data07;
+	    5'b01000:
+	      irf_byp_rs1_data_d <= rd_data08;
+	    5'b01001:
+	      irf_byp_rs1_data_d <= rd_data09;
+	    5'b01010:
+	      irf_byp_rs1_data_d <= rd_data10;
+	    5'b01011:
+	      irf_byp_rs1_data_d <= rd_data11;
+	    5'b01100:
+	      irf_byp_rs1_data_d <= rd_data12;
+	    5'b01101:
+	      irf_byp_rs1_data_d <= rd_data13;
+	    5'b01110:
+	      irf_byp_rs1_data_d <= rd_data14;
+	    5'b01111:
+	      irf_byp_rs1_data_d <= rd_data15;
+	    5'b10000:
+	      irf_byp_rs1_data_d <= rd_data16;
+	    5'b10001:
+	      irf_byp_rs1_data_d <= rd_data17;
+	    5'b10010:
+	      irf_byp_rs1_data_d <= rd_data18;
+	    5'b10011:
+	      irf_byp_rs1_data_d <= rd_data19;
+	    5'b10100:
+	      irf_byp_rs1_data_d <= rd_data20;
+	    5'b10101:
+	      irf_byp_rs1_data_d <= rd_data21;
+	    5'b10110:
+	      irf_byp_rs1_data_d <= rd_data22;
+	    5'b10111:
+	      irf_byp_rs1_data_d <= rd_data23;
+	    5'b11000:
+	      irf_byp_rs1_data_d <= rd_data24;
+	    5'b11001:
+	      irf_byp_rs1_data_d <= rd_data25;
+	    5'b11010:
+	      irf_byp_rs1_data_d <= rd_data26;
+	    5'b11011:
+	      irf_byp_rs1_data_d <= rd_data27;
+	    5'b11100:
+	      irf_byp_rs1_data_d <= rd_data28;
+	    5'b11101:
+	      irf_byp_rs1_data_d <= rd_data29;
+	    5'b11110:
+	      irf_byp_rs1_data_d <= rd_data30;
+	    5'b11111:
+	      irf_byp_rs1_data_d <= rd_data31;
+	  endcase
+	end
+	always @(negedge clk) if (ifu_exu_ren2_d) begin
+	  case (thr_rs2[4:0])
+	    5'b0:
+	      irf_byp_rs2_data_d <= rd_data00;
+	    5'b1:
+	      irf_byp_rs2_data_d <= rd_data01;
+	    5'b00010:
+	      irf_byp_rs2_data_d <= rd_data02;
+	    5'b00011:
+	      irf_byp_rs2_data_d <= rd_data03;
+	    5'b00100:
+	      irf_byp_rs2_data_d <= rd_data04;
+	    5'b00101:
+	      irf_byp_rs2_data_d <= rd_data05;
+	    5'b00110:
+	      irf_byp_rs2_data_d <= rd_data06;
+	    5'b00111:
+	      irf_byp_rs2_data_d <= rd_data07;
+	    5'b01000:
+	      irf_byp_rs2_data_d <= rd_data08;
+	    5'b01001:
+	      irf_byp_rs2_data_d <= rd_data09;
+	    5'b01010:
+	      irf_byp_rs2_data_d <= rd_data10;
+	    5'b01011:
+	      irf_byp_rs2_data_d <= rd_data11;
+	    5'b01100:
+	      irf_byp_rs2_data_d <= rd_data12;
+	    5'b01101:
+	      irf_byp_rs2_data_d <= rd_data13;
+	    5'b01110:
+	      irf_byp_rs2_data_d <= rd_data14;
+	    5'b01111:
+	      irf_byp_rs2_data_d <= rd_data15;
+	    5'b10000:
+	      irf_byp_rs2_data_d <= rd_data16;
+	    5'b10001:
+	      irf_byp_rs2_data_d <= rd_data17;
+	    5'b10010:
+	      irf_byp_rs2_data_d <= rd_data18;
+	    5'b10011:
+	      irf_byp_rs2_data_d <= rd_data19;
+	    5'b10100:
+	      irf_byp_rs2_data_d <= rd_data20;
+	    5'b10101:
+	      irf_byp_rs2_data_d <= rd_data21;
+	    5'b10110:
+	      irf_byp_rs2_data_d <= rd_data22;
+	    5'b10111:
+	      irf_byp_rs2_data_d <= rd_data23;
+	    5'b11000:
+	      irf_byp_rs2_data_d <= rd_data24;
+	    5'b11001:
+	      irf_byp_rs2_data_d <= rd_data25;
+	    5'b11010:
+	      irf_byp_rs2_data_d <= rd_data26;
+	    5'b11011:
+	      irf_byp_rs2_data_d <= rd_data27;
+	    5'b11100:
+	      irf_byp_rs2_data_d <= rd_data28;
+	    5'b11101:
+	      irf_byp_rs2_data_d <= rd_data29;
+	    5'b11110:
+	      irf_byp_rs2_data_d <= rd_data30;
+	    5'b11111:
+	      irf_byp_rs2_data_d <= rd_data31;
+	  endcase
+	end
+	always @(negedge clk) if (ifu_exu_ren3_d) begin
+	  case (thr_rs3[4:0])
+	    5'b0:
+	      irf_byp_rs3_data_d <= rd_data00;
+	    5'b1:
+	      irf_byp_rs3_data_d <= rd_data01;
+	    5'b00010:
+	      irf_byp_rs3_data_d <= rd_data02;
+	    5'b00011:
+	      irf_byp_rs3_data_d <= rd_data03;
+	    5'b00100:
+	      irf_byp_rs3_data_d <= rd_data04;
+	    5'b00101:
+	      irf_byp_rs3_data_d <= rd_data05;
+	    5'b00110:
+	      irf_byp_rs3_data_d <= rd_data06;
+	    5'b00111:
+	      irf_byp_rs3_data_d <= rd_data07;
+	    5'b01000:
+	      irf_byp_rs3_data_d <= rd_data08;
+	    5'b01001:
+	      irf_byp_rs3_data_d <= rd_data09;
+	    5'b01010:
+	      irf_byp_rs3_data_d <= rd_data10;
+	    5'b01011:
+	      irf_byp_rs3_data_d <= rd_data11;
+	    5'b01100:
+	      irf_byp_rs3_data_d <= rd_data12;
+	    5'b01101:
+	      irf_byp_rs3_data_d <= rd_data13;
+	    5'b01110:
+	      irf_byp_rs3_data_d <= rd_data14;
+	    5'b01111:
+	      irf_byp_rs3_data_d <= rd_data15;
+	    5'b10000:
+	      irf_byp_rs3_data_d <= rd_data16;
+	    5'b10001:
+	      irf_byp_rs3_data_d <= rd_data17;
+	    5'b10010:
+	      irf_byp_rs3_data_d <= rd_data18;
+	    5'b10011:
+	      irf_byp_rs3_data_d <= rd_data19;
+	    5'b10100:
+	      irf_byp_rs3_data_d <= rd_data20;
+	    5'b10101:
+	      irf_byp_rs3_data_d <= rd_data21;
+	    5'b10110:
+	      irf_byp_rs3_data_d <= rd_data22;
+	    5'b10111:
+	      irf_byp_rs3_data_d <= rd_data23;
+	    5'b11000:
+	      irf_byp_rs3_data_d <= rd_data24;
+	    5'b11001:
+	      irf_byp_rs3_data_d <= rd_data25;
+	    5'b11010:
+	      irf_byp_rs3_data_d <= rd_data26;
+	    5'b11011:
+	      irf_byp_rs3_data_d <= rd_data27;
+	    5'b11100:
+	      irf_byp_rs3_data_d <= rd_data28;
+	    5'b11101:
+	      irf_byp_rs3_data_d <= rd_data29;
+	    5'b11110:
+	      irf_byp_rs3_data_d <= rd_data30;
+	    5'b11111:
+	      irf_byp_rs3_data_d <= rd_data31;
+	  endcase
+	end
+	always @(negedge clk) if (ifu_exu_ren3_d) begin
+	  case (thr_rs3h[4:1])
+	    4'b0:
+	      irf_byp_rs3h_data_d <= rd_data01;
+	    4'b1:
+	      irf_byp_rs3h_data_d <= rd_data03;
+	    4'b0010:
+	      irf_byp_rs3h_data_d <= rd_data05;
+	    4'b0011:
+	      irf_byp_rs3h_data_d <= rd_data07;
+	    4'b0100:
+	      irf_byp_rs3h_data_d <= rd_data09;
+	    4'b0101:
+	      irf_byp_rs3h_data_d <= rd_data11;
+	    4'b0110:
+	      irf_byp_rs3h_data_d <= rd_data13;
+	    4'b0111:
+	      irf_byp_rs3h_data_d <= rd_data15;
+	    4'b1000:
+	      irf_byp_rs3h_data_d <= rd_data17;
+	    4'b1001:
+	      irf_byp_rs3h_data_d <= rd_data19;
+	    4'b1010:
+	      irf_byp_rs3h_data_d <= rd_data21;
+	    4'b1011:
+	      irf_byp_rs3h_data_d <= rd_data23;
+	    4'b1100:
+	      irf_byp_rs3h_data_d <= rd_data25;
+	    4'b1101:
+	      irf_byp_rs3h_data_d <= rd_data27;
+	    4'b1110:
+	      irf_byp_rs3h_data_d <= rd_data29;
+	    4'b1111:
+	      irf_byp_rs3h_data_d <= rd_data31;
+	  endcase
+	end
+endmodule
+
+`endif
+`else
+
+module bw_r_irf (/*AUTOARG*/
+   // Outputs
+   so, irf_byp_rs1_data_d_l, irf_byp_rs2_data_d_l, 
+   irf_byp_rs3_data_d_l, irf_byp_rs3h_data_d_l, 
+   // Inputs
+   rclk, reset_l, si, se, sehold, rst_tri_en, ifu_exu_tid_s2, 
+   ifu_exu_rs1_s, ifu_exu_rs2_s, ifu_exu_rs3_s, ifu_exu_ren1_s, 
+   ifu_exu_ren2_s, ifu_exu_ren3_s, ecl_irf_wen_w, ecl_irf_wen_w2, 
+   ecl_irf_rd_m, ecl_irf_rd_g, byp_irf_rd_data_w, byp_irf_rd_data_w2, 
+   ecl_irf_tid_m, ecl_irf_tid_g, rml_irf_old_lo_cwp_e, 
+   rml_irf_new_lo_cwp_e, rml_irf_old_e_cwp_e, rml_irf_new_e_cwp_e, 
+   rml_irf_swap_even_e, rml_irf_swap_odd_e, rml_irf_swap_local_e, 
+   rml_irf_kill_restore_w, rml_irf_cwpswap_tid_e, rml_irf_old_agp, 
+   rml_irf_new_agp, rml_irf_swap_global, rml_irf_global_tid
+   ) ;
+   input rclk;
+   input reset_l;
+   input si;
+   input se;
+   input sehold;
+   input rst_tri_en;
+   input [1:0]  ifu_exu_tid_s2;  // s stage thread
+   input [4:0]  ifu_exu_rs1_s;  // source addresses
+   input [4:0]  ifu_exu_rs2_s;
+   input [4:0]  ifu_exu_rs3_s;
+   input ifu_exu_ren1_s;        // read enables for all 3 ports
+   input ifu_exu_ren2_s;
+   input ifu_exu_ren3_s;
+   input ecl_irf_wen_w;        // write enables for both write ports
+   input ecl_irf_wen_w2;
+   input [4:0]  ecl_irf_rd_m;   // w destination
+   input [4:0]  ecl_irf_rd_g;  // w2 destination
+   input [71:0] byp_irf_rd_data_w;// write data from w1
+   input [71:0] byp_irf_rd_data_w2;     // write data from w2
+   input [1:0]  ecl_irf_tid_m;  // w stage thread
+   input [1:0]  ecl_irf_tid_g; // w2 thread
+
+   input [2:0]  rml_irf_old_lo_cwp_e;  // current window pointer for locals and odds
+   input [2:0]  rml_irf_new_lo_cwp_e;  // target window pointer for locals and odds
+   input [2:1]  rml_irf_old_e_cwp_e;  // current window pointer for evens
+   input [2:1]  rml_irf_new_e_cwp_e;  // target window pointer for evens
+   input        rml_irf_swap_even_e;
+   input        rml_irf_swap_odd_e;
+   input        rml_irf_swap_local_e;
+   input        rml_irf_kill_restore_w;
+   input [1:0]  rml_irf_cwpswap_tid_e;
+
+   input [1:0]  rml_irf_old_agp; // alternate global pointer
+   input [1:0]  rml_irf_new_agp; // alternate global pointer
+   input        rml_irf_swap_global;
+   input [1:0]  rml_irf_global_tid;
+   
+   output       so;
+   output [71:0] irf_byp_rs1_data_d_l;
+   output [71:0] irf_byp_rs2_data_d_l;
+   output [71:0] irf_byp_rs3_data_d_l;
+   output [31:0] irf_byp_rs3h_data_d_l;
+   reg [71:0] irf_byp_rs1_data_d;
+   reg [71:0] irf_byp_rs2_data_d;
+   reg [71:0] irf_byp_rs3_data_d;
+   reg [71:0] irf_byp_rs3h_data_d;
+
+   reg [71:0]    active_window [127:0];// 32x4 72 bit registers
+   reg [71:0]    locals[255:0];      // 4x8x8 registers
+   reg [71:0]    evens[127:0];      // 4x4x8 registers
+   reg [71:0]    odds[127:0];      // 4x4x8 registers
+   reg [71:0]    globals[127:0];      // 4x4x8 registers
+   // registers for manipulating windows
+   reg [6:0] active_pointer;
+   reg [7:0] regfile_pointer;
+   reg [5:0] i;
+
+   wire [1:0]  ecl_irf_tid_w;  // w stage thread
+   wire [1:0]  ecl_irf_tid_w2; // w2 thread
+   wire [4:0]  ecl_irf_rd_w;   // w destination
+   wire [4:0]  ecl_irf_rd_w2;  // w2 destination
+   wire [1:0]  ifu_exu_thr_d;  // d stage thread
+   wire ifu_exu_ren1_d;        // read enables for all 3 ports
+   wire ifu_exu_ren2_d;
+   wire ifu_exu_ren3_d;
+   wire [4:0]  ifu_exu_rs1_d;  // source addresses
+   wire [4:0]  ifu_exu_rs2_d;
+   wire [4:0]  ifu_exu_rs3_d;
+   wire [6:0]    thr_rs1;       // these 5 are a combination of the thr and reg
+   wire [6:0]    thr_rs2;       // so that comparison can be done more easily
+   wire [6:0]    thr_rs3;
+   wire [6:0]    thr_rs3h;
+   wire [6:0]    thr_rd_w;
+   wire [6:0]    thr_rd_w2;
+
+   reg [1:0] cwpswap_tid_m;
+   reg [1:0] cwpswap_tid_w;
+   reg [2:0] old_lo_cwp_m;
+   reg [2:0] new_lo_cwp_m;
+   reg [2:0] new_lo_cwp_w;
+   reg [1:0] old_e_cwp_m;
+   reg [1:0] new_e_cwp_m;
+   reg [1:0] new_e_cwp_w;
+   reg       swap_local_m;
+   reg       swap_local_w;
+   reg       swap_even_m;
+   reg       swap_even_w;
+   reg       swap_odd_m;
+   reg       swap_odd_w;
+   reg       kill_restore_d1;
+   reg        swap_global_d1;
+   reg        swap_global_d2;
+   reg [1:0]  global_tid_d1;
+   reg [1:0]  global_tid_d2;
+   reg [1:0] old_agp_d1,
+             new_agp_d1,
+             new_agp_d2;
+
+   reg [71:0] active_win_thr_rd_w_neg;
+   reg        active_win_thr_rd_w_neg_wr_en;
+   reg [6:0]  thr_rd_w_neg;
+   reg [71:0] active_win_thr_rd_w2_neg;
+   reg        active_win_thr_rd_w2_neg_wr_en;
+   reg [6:0]  thr_rd_w2_neg;
+   reg        rst_tri_en_neg;
+   
+   wire          se;
+   wire          clk;
+   assign        clk = rclk & reset_l;
+   wire          ren1_s;
+   wire          ren2_s;
+   wire          ren3_s;
+   wire [4:0]    rs1_s;
+   wire [4:0]    rs2_s;
+   wire [4:0]    rs3_s;
+   wire [1:0]    tid_s;
+   wire [1:0]    tid_g;
+   wire [1:0]    tid_m;
+   wire [4:0]    rd_m;
+   wire [4:0]    rd_g;
+   wire          kill_restore_w;
+   wire          swap_global_d1_vld;
+   wire          swap_local_m_vld;
+   wire          swap_even_m_vld;
+   wire          swap_odd_m_vld;
+
+   assign {ren1_s,ren2_s,ren3_s,rs1_s[4:0],rs2_s[4:0],rs3_s[4:0],tid_s[1:0],tid_g[1:0],tid_m[1:0],
+           rd_m[4:0], rd_g[4:0]} = (sehold)?
+          {ifu_exu_ren1_d,ifu_exu_ren2_d,ifu_exu_ren3_d,ifu_exu_rs1_d[4:0],ifu_exu_rs2_d[4:0],
+           ifu_exu_rs3_d[4:0],ifu_exu_thr_d[1:0],ecl_irf_tid_w2[1:0],ecl_irf_tid_w[1:0],
+           ecl_irf_rd_w[4:0],ecl_irf_rd_w2[4:0]}:
+          {ifu_exu_ren1_s,ifu_exu_ren2_s,ifu_exu_ren3_s,ifu_exu_rs1_s[4:0],ifu_exu_rs2_s[4:0],
+           ifu_exu_rs3_s[4:0],ifu_exu_tid_s2[1:0],ecl_irf_tid_g[1:0],ecl_irf_tid_m[1:0],
+           ecl_irf_rd_m[4:0],ecl_irf_rd_g[4:0]};
+   // Pipeline flops for irf control signals
+   dff_s dff_ren1_s2d(.din(ren1_s), .clk(clk), .q(ifu_exu_ren1_d), .se(se),
+                    .si(), .so());
+   dff_s dff_ren2_s2d(.din(ren2_s), .clk(clk), .q(ifu_exu_ren2_d), .se(se),
+                    .si(), .so());
+   dff_s dff_ren3_s2d(.din(ren3_s), .clk(clk), .q(ifu_exu_ren3_d), .se(se),
+                    .si(), .so());
+   dff_s #5 dff_rs1_s2d(.din(rs1_s[4:0]), .clk(clk), .q(ifu_exu_rs1_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rs2_s2d(.din(rs2_s[4:0]), .clk(clk), .q(ifu_exu_rs2_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rs3_s2d(.din(rs3_s[4:0]), .clk(clk), .q(ifu_exu_rs3_d[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_s2d(.din(tid_s[1:0]), .clk(clk), .q(ifu_exu_thr_d[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_g2w2(.din(tid_g[1:0]), .clk(clk), .q(ecl_irf_tid_w2[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #2 dff_thr_m2w(.din(tid_m[1:0]), .clk(clk), .q(ecl_irf_tid_w[1:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rd_m2w(.din(rd_m[4:0]), .clk(clk), .q(ecl_irf_rd_w[4:0]), .se(se),
+                      .si(),.so());
+   dff_s #5 dff_rd_g2w2(.din(rd_g[4:0]), .clk(clk), .q(ecl_irf_rd_w2[4:0]), .se(se),
+                      .si(),.so());
+   
+   // Concatenate the thread and rs1/rd bits together
+   assign        thr_rs1[6:0] = {ifu_exu_thr_d, ifu_exu_rs1_d};
+   assign        thr_rs2[6:0] = {ifu_exu_thr_d, ifu_exu_rs2_d};
+   assign        thr_rs3[6:0] = {ifu_exu_thr_d, ifu_exu_rs3_d[4:0]};
+   assign        thr_rs3h[6:0] = {ifu_exu_thr_d[1:0], ifu_exu_rs3_d[4:1], 1'b1};
+   assign        thr_rd_w[6:0] = {ecl_irf_tid_w, ecl_irf_rd_w};
+   assign        thr_rd_w2[6:0] = {ecl_irf_tid_w2, ecl_irf_rd_w2};
+
+   // Active low outputs
+   assign        irf_byp_rs1_data_d_l[71:0] = ~irf_byp_rs1_data_d[71:0];
+   assign        irf_byp_rs2_data_d_l[71:0] = ~irf_byp_rs2_data_d[71:0];
+   assign        irf_byp_rs3_data_d_l[71:0] = ~irf_byp_rs3_data_d[71:0]; 
+   assign        irf_byp_rs3h_data_d_l[31:0] = ~irf_byp_rs3h_data_d[31:0];
+   
+   // Read port 1
+   always @ ( clk ) begin
+      if (clk) irf_byp_rs1_data_d <= {72{1'bx}};
+      else begin
+         if (ifu_exu_ren1_d) begin // read enable must be high
+            if (thr_rs1[4:0] == 5'b0) irf_byp_rs1_data_d <= {72{1'b0}};
+            else begin
+               if ((ecl_irf_wen_w && (thr_rs1 == thr_rd_w)) || // check r/w conflict
+                   (ecl_irf_wen_w2 && (thr_rs1 == thr_rd_w2))) begin
+                  irf_byp_rs1_data_d <= {72{1'bx}};  // rw conflict gives x
+               end
+               else begin 
+                  irf_byp_rs1_data_d <= active_window[thr_rs1[6:0]];
+               end
+            end
+         end
+         // output disabled
+         else begin
+            irf_byp_rs1_data_d <= {72{1'bx}};
+         end
+      end
+   end
+   
+   // Read port 2
+   always @ ( clk ) begin
+      if (clk) irf_byp_rs2_data_d <= {72{1'bx}};
+      else begin
+         if (ifu_exu_ren2_d) begin
+            if (thr_rs2[4:0] == 5'b0) irf_byp_rs2_data_d <= {72{1'b0}};
+            else if ((ecl_irf_wen_w && (thr_rs2 == thr_rd_w)) || 
+                     (ecl_irf_wen_w2 && (thr_rs2 == thr_rd_w2)))
+              irf_byp_rs2_data_d <= {72{1'bx}};
+            else begin 
+               irf_byp_rs2_data_d <= active_window[thr_rs2];
+            end
+         end
+         // output disabled
+         else irf_byp_rs2_data_d <= {72{1'bx}};
+      end
+   end
+   
+   // Read port 3
+   always @ ( clk ) begin
+      if (clk) irf_byp_rs3_data_d <= {72{1'bx}};
+      else begin 
+         if (ifu_exu_ren3_d) begin
+            if (thr_rs3[4:0] == 5'b0) irf_byp_rs3_data_d[71:0] <= {72{1'b0}};
+            else if ((ecl_irf_wen_w && (thr_rs3 == thr_rd_w)) || 
+                     (ecl_irf_wen_w2 && (thr_rs3 == thr_rd_w2))) 
+              begin	
+                 irf_byp_rs3_data_d[71:0] <= {72{1'bx}};
+              end
+            else begin
+               irf_byp_rs3_data_d[71:0] <= active_window[thr_rs3];
+            end
+         end
+         // output disabled
+         else begin
+            irf_byp_rs3_data_d[71:0] <= {72{1'bx}};
+         end
+      end
+   end
+      
+   // Read port 3h
+   always @ ( clk ) begin
+      if (clk) irf_byp_rs3h_data_d[71:0] <= {72{1'bx}};
+      else begin
+         if (ifu_exu_ren3_d) begin
+            if (thr_rs3h[4:0] == 5'b0) irf_byp_rs3h_data_d[71:0] <= 72'b0;
+            else if ((ecl_irf_wen_w && (thr_rs3h == thr_rd_w)) || 
+                     (ecl_irf_wen_w2 && (thr_rs3h == thr_rd_w2))) 
+              begin	
+                 irf_byp_rs3h_data_d[71:0] <= {72{1'bx}};
+              end
+            else begin
+               irf_byp_rs3h_data_d[71:0] <= active_window[thr_rs3h];
+            end
+         end
+         // output disabled
+         else begin
+            irf_byp_rs3h_data_d[71:0] <= {72{1'bx}};
+         end
+      end
+   end
+   
+/////////////////////////////////////////////////////////////////
+///  Write ports
+////////////////////////////////////////////////////////////////
+   // This is a latch that works if both wen is high and clk is low
+
+   always @(negedge clk) begin
+      rst_tri_en_neg <= rst_tri_en;
+      // write conflict results in X written to destination
+      if (ecl_irf_wen_w & ecl_irf_wen_w2 & (thr_rd_w[6:0] == thr_rd_w2[6:0])) begin
+         active_win_thr_rd_w_neg <= {72{1'bx}};
+         thr_rd_w_neg <= thr_rd_w;
+         active_win_thr_rd_w_neg_wr_en <= 1'b1;
+         active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+      end
+      else begin
+         // W1 write port
+         if (ecl_irf_wen_w & (thr_rd_w[4:0] != 5'b0)) begin
+            active_win_thr_rd_w_neg <= byp_irf_rd_data_w;
+            thr_rd_w_neg <= thr_rd_w;
+            active_win_thr_rd_w_neg_wr_en <= 1'b1;
+         end
+         else
+           active_win_thr_rd_w_neg_wr_en <= 1'b0;
+         
+         // W2 write port
+         if (ecl_irf_wen_w2 & (thr_rd_w2[4:0] != 5'b0)) begin
+            active_win_thr_rd_w2_neg <= byp_irf_rd_data_w2;
+            thr_rd_w2_neg <= thr_rd_w2;
+            active_win_thr_rd_w2_neg_wr_en <= 1'b1;
+         end
+         else
+           active_win_thr_rd_w2_neg_wr_en <= 1'b0;
+      end
+   end
+   
+
+
+/* MOVED TO CMP ENVIRONMENT
+   initial begin
+      // Hardcode R0 to zero
+      active_window[{2'b00, 5'b00000}] = 72'b0;
+      active_window[{2'b01, 5'b00000}] = 72'b0;
+      active_window[{2'b10, 5'b00000}] = 72'b0;
+      active_window[{2'b11, 5'b00000}] = 72'b0;
+   end
+*/
+   //////////////////////////////////////////////////
+   // Window management logic
+   //////////////////////////////////////////////////
+   // Pipeline flops for control signals
+
+   // cwp swap signals
+   assign kill_restore_w = (sehold)? kill_restore_d1: rml_irf_kill_restore_w;
+   assign swap_local_m_vld = swap_local_m & ~rst_tri_en;
+   assign swap_odd_m_vld = swap_odd_m & ~rst_tri_en;
+   assign swap_even_m_vld = swap_even_m & ~rst_tri_en;
+   assign swap_global_d1_vld = swap_global_d1 & ~rst_tri_en;
+   
+   always @ (posedge clk) begin
+      cwpswap_tid_m[1:0] <= (sehold)? cwpswap_tid_m[1:0]: rml_irf_cwpswap_tid_e[1:0];
+      cwpswap_tid_w[1:0] <= cwpswap_tid_m[1:0];
+      old_lo_cwp_m[2:0] <= (sehold)? old_lo_cwp_m[2:0]: rml_irf_old_lo_cwp_e[2:0];
+      new_lo_cwp_m[2:0] <= (sehold)? new_lo_cwp_m[2:0]: rml_irf_new_lo_cwp_e[2:0];
+      new_lo_cwp_w[2:0] <= new_lo_cwp_m[2:0];
+      old_e_cwp_m[1:0] <= (sehold)? old_e_cwp_m[1:0]: rml_irf_old_e_cwp_e[2:1];
+      new_e_cwp_m[1:0] <= (sehold)? new_e_cwp_m[1:0]: rml_irf_new_e_cwp_e[2:1];
+      new_e_cwp_w[1:0] <= new_e_cwp_m[1:0];
+      swap_local_m <= (sehold)? swap_local_m & rst_tri_en: rml_irf_swap_local_e;
+      swap_local_w <= swap_local_m_vld;
+      swap_odd_m <= (sehold)? swap_odd_m & rst_tri_en: rml_irf_swap_odd_e;
+      swap_odd_w <= swap_odd_m_vld;
+      swap_even_m <= (sehold)? swap_even_m & rst_tri_en: rml_irf_swap_even_e;
+      swap_even_w <= swap_even_m_vld;
+      kill_restore_d1 <= kill_restore_w;
+   end  
+   // global swap signals    
+   always @ (posedge clk) begin
+      swap_global_d1 <= (sehold)? swap_global_d1 & rst_tri_en: rml_irf_swap_global;
+      swap_global_d2 <= swap_global_d1_vld;
+      global_tid_d1[1:0] <= (sehold)? global_tid_d1[1:0]: rml_irf_global_tid[1:0];
+      global_tid_d2[1:0] <= global_tid_d1[1:0];
+      old_agp_d1[1:0] <= (sehold)? old_agp_d1[1:0]: rml_irf_old_agp[1:0];
+      new_agp_d1[1:0] <= (sehold)? new_agp_d1[1:0]: rml_irf_new_agp[1:0];
+      new_agp_d2[1:0] <= new_agp_d1[1:0];
+   end
+
+
+   /////////////////////////////////////////////
+   // Globals
+   //-----------------------------------
+   // rml inputs are latched on rising edge
+   // 1st cycle used for decode
+   // 2nd cycle stores active window in phase 1
+   // 3rd cycle loads new globals in phase 1
+   /////////////////////////////////////////////
+   
+   always @ (posedge clk) begin
+
+      if (active_win_thr_rd_w_neg_wr_en & (~rst_tri_en | ~rst_tri_en_neg)) begin
+         active_window[thr_rd_w_neg] = active_win_thr_rd_w_neg;
+      end
+      if (active_win_thr_rd_w2_neg_wr_en & (~rst_tri_en | ~rst_tri_en_neg)) begin
+         active_window[thr_rd_w2_neg] = active_win_thr_rd_w2_neg;
+      end
+      // save active globals in phase 1
+      if (swap_global_d1_vld) begin
+         for (i = 6'd0; i < 6'd8; i = i + 1) begin
+            active_pointer[6:0] = {global_tid_d1[1:0], i[4:0]};
+            regfile_pointer[7:0] = {1'b0, global_tid_d1[1:0], old_agp_d1[1:0], i[2:0]};
+            // prevent back to back swaps on same thread
+            if (swap_global_d2 & (global_tid_d1[1:0] == global_tid_d2[1:0])) begin
+               globals[regfile_pointer[6:0]] = {72{1'bx}};
+            end
+            else globals[regfile_pointer[6:0]] = active_window[active_pointer[6:0]];
+         end
+      end
+               
+    // load in new active globals in phase 2
+      if (swap_global_d2) begin
+         for (i = 6'd0; i < 6'd8; i = i + 1) begin
+            active_pointer[6:0] = {global_tid_d2[1:0], i[4:0]};
+            regfile_pointer[7:0] = {1'b0, global_tid_d2[1:0], new_agp_d2[1:0], i[2:0]};
+            if (swap_global_d1_vld & (global_tid_d1[1:0] == global_tid_d2[1:0])) begin
+               active_window[active_pointer] = {72{1'bx}};
+               globals[regfile_pointer[6:0]] = {72{1'bx}};
+            end
+            else active_window[active_pointer] = globals[regfile_pointer[6:0]];
+         end
+      end
+
+   ////////////////////////////
+   // locals, ins and outs
+   //-------------------------
+   // E - set up inputs to flop
+   // M - Decode
+   // W (phase 1) - Save
+   // W (phase 2) - write is allowed for save because restore will get killed
+   // W2 (phase 1) - Restore
+   // W2 (phase 2) - write is allowed
+   //
+   // actions that occur in phase one are modelled as occurring on the
+   // rising edge
+   //
+   // swaps to the same thread in consecutive cycles not allowed
+   /////////////////////////////
+       if (swap_local_m_vld) begin
+          // save the locals (16-23 in active window)
+          for (i = 6'd16; i < 6'd24; i = i + 1) begin
+             active_pointer[6:0] = {cwpswap_tid_m[1:0], i[4:0]};
+             regfile_pointer[7:0] = {cwpswap_tid_m[1:0], old_lo_cwp_m[2:0], i[2:0]};
+             if (swap_local_w & ~kill_restore_w & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0]))
+               locals[regfile_pointer[7:0]] = {72{1'bx}};
+             else 
+               locals[regfile_pointer[7:0]] = active_window[active_pointer];
+          end
+       end
+       if (swap_even_m_vld) begin
+          // save the ins in even window (24-31 in active window)
+          for (i = 6'd24; i < 6'd32; i = i + 1) begin
+             active_pointer[6:0] = {cwpswap_tid_m[1:0], i[4:0]};
+             regfile_pointer[7:0] = {1'b0, cwpswap_tid_m[1:0], old_e_cwp_m[1:0], i[2:0]};
+             if (swap_even_w & ~kill_restore_w & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0]))
+               evens[regfile_pointer[6:0]] = {72{1'bx}};
+             else
+               evens[regfile_pointer[6:0]] = active_window[active_pointer];
+          end
+       end
+       if (swap_odd_m_vld) begin
+          // save the ins in odd window (8-15 in active window)
+          for (i = 6'd8; i < 6'd16; i = i + 1) begin
+             active_pointer[6:0] = {cwpswap_tid_m[1:0], i[4:0]};
+             regfile_pointer[7:0] = {1'b0, cwpswap_tid_m[1:0], old_lo_cwp_m[2:1], i[2:0]};
+             if (swap_odd_w & ~kill_restore_w & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0]))
+               odds[regfile_pointer[6:0]] = {72{1'bx}};
+             else
+               odds[regfile_pointer[6:0]] = active_window[active_pointer];
+          end
+       end
+       if(~kill_restore_w) begin
+          if (swap_local_w) begin
+            // restore the locals (16-23 in active window)
+            for (i = 6'd16; i < 6'd24; i = i + 1) begin
+               active_pointer[6:0] = {cwpswap_tid_w[1:0], i[4:0]};
+               regfile_pointer[7:0] = {cwpswap_tid_w[1:0], new_lo_cwp_w[2:0], i[2:0]};
+               if (swap_local_m_vld & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0])) begin
+                 active_window[active_pointer] = {72{1'bx}};
+                 locals[regfile_pointer[7:0]] = {72{1'bx}};
+               end
+               else
+                 active_window[active_pointer] = locals[regfile_pointer[7:0]];
+            end
+         end
+         if (swap_even_w) begin
+            // restore the ins in even window (24-32 in active window)
+            for (i = 6'd24; i < 6'd32; i = i + 1) begin
+               active_pointer[6:0] = {cwpswap_tid_w[1:0], i[4:0]};
+               regfile_pointer[7:0] = {1'b0, cwpswap_tid_w[1:0], new_e_cwp_w[1:0], i[2:0]};
+               if (swap_even_m_vld & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0])) begin
+                 active_window[active_pointer] = {72{1'bx}};
+                 evens[regfile_pointer[6:0]] = {72{1'bx}};
+	       end
+               else
+                 active_window[active_pointer] = evens[regfile_pointer[6:0]];
+            end
+         end
+         if (swap_odd_w) begin
+            // restore the ins in odd window (8-16 in active window)
+            for (i = 6'd8; i < 6'd16; i = i + 1) begin
+               active_pointer[6:0] = {cwpswap_tid_w[1:0], i[4:0]};
+               regfile_pointer[7:0] = {1'b0, cwpswap_tid_w[1:0], new_lo_cwp_w[2:1], i[2:0]};
+               if (swap_odd_m_vld & (cwpswap_tid_m[1:0] == cwpswap_tid_w[1:0])) begin
+                 active_window[active_pointer] = {72{1'bx}};
+                 odds[regfile_pointer[6:0]]  = {72{1'bx}};
+	       end
+               else
+                 active_window[active_pointer] = odds[regfile_pointer[6:0]];
+            end
+         end
+       end
+    end // always @ (posedge clk)
+
+endmodule // bw_r_irf
+
+`endif
Index: /trunk/T1-common/srams/bw_r_tlb.v
===================================================================
--- /trunk/T1-common/srams/bw_r_tlb.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_tlb.v	(revision 6)
@@ -0,0 +1,1898 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_tlb.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Common TLB for Instruction Fetch and Load/Stores
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+					// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`include	"lsu.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN
+`define FPGA_SYN_TLB
+`endif
+
+
+`ifdef FPGA_SYN_TLB
+`ifdef FPGA_SYN_8TLB
+  `define TLB_ENTRIES 8
+  `define TLB_INDEX_WIDTH 3
+`else
+`ifdef FPGA_SYN_16TLB
+  `define TLB_ENTRIES 16
+  `define TLB_INDEX_WIDTH 4
+`else
+`ifdef FPGA_SYN_32TLB
+  `define TLB_ENTRIES 32
+  `define TLB_INDEX_WIDTH 5
+`else
+  `define TLB_ENTRIES 64
+  `define TLB_INDEX_WIDTH 6
+`endif
+`endif
+`endif
+
+module bw_r_tlb ( /*AUTOARG*/
+   // Outputs
+   tlb_rd_tte_tag, tlb_rd_tte_data, tlb_pgnum, tlb_pgnum_crit, 
+   tlb_cam_hit, cache_way_hit, cache_hit, so, 
+   // Inputs
+   tlb_cam_vld, tlb_cam_key, tlb_cam_pid,  
+   tlb_demap_key, tlb_addr_mask_l, tlb_ctxt, 
+   tlb_wr_vld, tlb_wr_tte_tag, tlb_wr_tte_data, tlb_rd_tag_vld, 
+   tlb_rd_data_vld, tlb_rw_index, tlb_rw_index_vld, tlb_demap, 
+   tlb_demap_auto, tlb_demap_all, cache_ptag_w0, cache_ptag_w1, 
+   cache_ptag_w2, cache_ptag_w3, cache_set_vld, tlb_bypass_va, 
+   tlb_bypass, se, si, hold, adj, arst_l, rst_soft_l, rclk,
+   rst_tri_en
+   ) ;	
+
+
+input			tlb_cam_vld ;		// ld/st requires xlation. 
+input	[40:0]		tlb_cam_key ;		// cam data for loads/stores;includes vld 
+						// CHANGE : add real bit for cam.
+input	[2:0]		tlb_cam_pid ;		// NEW: pid for cam. 
+input	[40:0]		tlb_demap_key ;		// cam data for demap; includes vlds. 
+						// CHANGE : add real bit for demap
+input			tlb_addr_mask_l ;	// address masking occurs
+input	[12:0]		tlb_ctxt ;		// context for cam xslate/demap. 
+input			tlb_wr_vld;		// write to tlb. 
+input	[58:0]		tlb_wr_tte_tag;		// CHANGE:tte tag to be written (55+4-1)
+						// R(+1b),PID(+3b),G(-1b). 
+input	[42:0]		tlb_wr_tte_data;	// tte data to be written.
+						// No change(!!!) - G bit becomes spare
+input			tlb_rd_tag_vld ;	// read tag
+input			tlb_rd_data_vld ;	// read data
+input	[5:0]		tlb_rw_index ;		// index to read/write tlb.
+input			tlb_rw_index_vld ;	// indexed write else use algorithm.
+input			tlb_demap ;		// demap : page/ctxt/all/auto.  
+input			tlb_demap_auto ;	// demap is of type auto 
+input			tlb_demap_all;		// demap-all operation : encoded separately.
+input  	[29:0]    	cache_ptag_w0;       	// way1 30b(D)/29b(I) tag.
+input  	[29:0]    	cache_ptag_w1;       	// way2 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w2;       	// way0 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w3;       	// way3 30b(D)/29b(I) tag.
+input	[3:0]		cache_set_vld;       	// set vld-4 ways
+input	[12:10]		tlb_bypass_va;	   	// bypass va.other va bits from cam-data
+input			tlb_bypass;		// bypass tlb xslation
+
+input			se ;			// scan-enable ; unused
+input			si ;			// scan data in ; unused
+input			hold ;			// scan hold signal
+input	[7:0]		adj ;			// self-time adjustment ; unused
+input			arst_l ;		// synchronous for tlb ; unused	
+input			rst_soft_l ;		// software reset - asi
+input			rclk;
+input			rst_tri_en ;
+
+output	[58:0]		tlb_rd_tte_tag;		// CHANGE: tte tag read from tlb.
+output	[42:0]		tlb_rd_tte_data;	// tte data read from tlb.
+// Need two ports for tlb_pgnum - critical and non-critical.
+output	[39:10]		tlb_pgnum ;		// bypass or xslated pgnum
+output	[39:10]		tlb_pgnum_crit ;	// bypass or xslated pgnum - critical
+output			tlb_cam_hit ;		// xlation hits in tlb.
+output	[3:0]		cache_way_hit;		// tag comparison results.
+output			cache_hit;		// tag comparison result - 'or' of above.
+
+//output			tlb_writeable ;		// tlb can be written in current cycle.
+
+output			so ;		// scan data out ; unused
+
+wire	[53:0]		tlb_cam_data ;
+wire	[58:0]		wr_tte_tag ;	// CHANGE
+wire	[42:0]		wr_tte_data ;
+wire	[29:3]		phy_pgnum_m;
+wire	[29:0]		pgnum_m;
+wire 	[`TLB_ENTRIES-1:0] used ;
+wire			tlb_not_writeable ;
+wire	[40:25] 	tlb_cam_key_masked ;
+wire	[26:0]		tlb_cam_comp_key ;
+wire			cam_vld ;
+wire			demap_other ;
+wire	[3:0]   	cache_way_hit ;
+wire	[`TLB_ENTRIES-1:0]		mismatch;
+
+reg			tlb_not_writeable_d1 ;
+reg			tlb_writeable ;
+wire	[`TLB_ENTRIES-1:0]		tlb_entry_locked ;
+wire	[`TLB_ENTRIES-1:0]		cam_hit ;
+wire	[`TLB_ENTRIES-1:0]		demap_hit ;
+reg	[`TLB_ENTRIES-1:0]		ademap_hit ;
+wire	[58:0]		rd_tte_tag ;	// CHANGE
+wire	[42:0]		rd_tte_data ;	
+reg	[42:0]		tlb_rd_tte_data ;	
+reg			cam_vld_tmp ;
+reg	[2:0]		cam_pid ;
+reg	[53:0]		cam_data ;
+reg			demap_auto, demap_other_tmp, demap_all ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_vld ;
+wire	[`TLB_ENTRIES-1:0]		tlb_entry_used ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_replace ;
+reg	[`TLB_ENTRIES-1:0]		tlb_entry_replace_d2 ;
+reg	[29:0]		pgnum_g ;
+reg     [3:0]		cache_set_vld_g;
+reg	[29:0]		cache_ptag_w0_g,cache_ptag_w1_g;
+reg	[29:0]		cache_ptag_w2_g,cache_ptag_w3_g;
+reg	[`TLB_ENTRIES-1:0]		rw_wdline ;
+
+reg			rd_tag; 
+reg			rd_data;
+reg			wr_vld_tmp;
+reg	[`TLB_INDEX_WIDTH-1:0]		rw_index;
+reg			rw_index_vld;
+wire	[29:0] 		vrtl_pgnum_m;
+wire			bypass ;
+
+wire			wr_vld ;
+
+integer	i,j,k,l,m,n,p,r,s,t,u,w;
+
+`define	CAM_CTXT_12_0_HI 	12 	// 13b ctxt
+`define	CAM_CTXT_12_0_LO 	0 		
+
+
+//=========================================================================================
+//	What's Left :
+//=========================================================================================
+
+// Scan Insertion - scan to be ignored in formal verification for now.
+
+//=========================================================================================
+//	Design Notes.
+//=========================================================================================
+
+// - Supported Demap Operations - By Page, By Context, All But
+// Locked, Autodemap, Invalidate-All i.e., reset. Demap Partition is
+// not supported - it is mapped to demap-all by logic. 
+// - Interpretation of demap inputs
+//	- tlb_demap - this is used to signal demap by page, by ctxt
+//	,all, and autodemap. 
+//	- tlb_demap_ctxt - If a demap_by_ctxt operation is occuring then
+//	this signal and tlb_demap must be active.
+//	- tlb_demap_all - demap all operation. If a demap_all operation is
+//	occuring, then tlb_demap_all must be asserted with tlb_demap. 
+// - Reset is similar to demap-all except that *all* entries
+// are invalidated. The action is initiated by software. The reset occurs
+// on the negedge and is synchronous with the clk.
+// - TTE Tag and Data
+// 	- The TTE tag and data can be read together. Each will have its 
+//	own bus and the muxing will occur externally. The tag needs to
+//	be read on a data request to supply the valid bit.
+// 	- The TTE tag and data can be written together.
+// - The cam hit is a separate output signal based on the 
+// the match signals.
+// - Read/Write may occur based on supplied index. If not valid
+// then use replacement way determined by algorithm to write.
+// - Only write can use replacement way determined by algorithm.
+// - Data is formatted appr. on read or write in the MMU. 
+// - The TLB will generate a signal which reports whether the 
+// tlb can be filled in the current cycle or not.
+// **Physical Tag Comparison**
+// For I-SIDE, comparison is of 28b, whereas for D-side, comparison is of 29b. The actual
+// comparison, due to legacy, is for 30b.
+// For the I-TLB, va[11:10] must be hardwired to the same value as the lsb of the 4 tags
+// at the port level. Since the itag it only 28b, add two least significant bits to extend it to 30b.
+// Similarly, for the dside, va[10] needs to be made same.	
+// **Differentiating among Various TLB Operations**
+// Valid bits are now associated with the key to allow selective incorporation of
+// match results. The 5 valid bits are : v4(b47-28),v3(b27-22),v2(21-16),v1(b15-13)
+// and Gk(G bit for auto-demap). The rules of use are :
+//	- cam: v4-v1 are set high. G=~cam_real=0/1.
+//	- demap_by_page : v4-v1 are set high. G=1. cam_real=0.
+// 	- demap_by_ctxt : v4-v1 are low. G=1. cam_real=0
+//	- demap_all : v4-v1 are don't-care. G=x. cam_real=x
+//	- autodemap : v4-v1 are based on page size of incoming tte. G=~cam_real=0/1.
+// Note : Gk is now used only to void a context match on a Real Translation.
+// In general, if a valid bit is low then the corresponding va field will not take
+// part in the match. Similarly, for the ctxt, if Gk=1, the ctxt will participate
+// in the match.
+//
+// Demap Table (For Satya) :
+// Note : To include a context match, Gk must be set to 1.
+//--------------------------------------------------------------------------------------------------------
+//tlb_demap tlb_demap_all  tlb_ctxt Gk	Vk4 Vk3	Vk2 Vk1 Real	Operation
+//--------------------------------------------------------------------------------------------------------
+//0		x		x   x	x   x	x   x   0	No demap operation
+//1		0		0   1	1   1	1   1	0	Demap by page
+//1		0		0   1	1   0	0   0	0/1	256M demap(auto demap)
+//1		0		0   0	1   0	0   0	0	256M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	0   0	0/1	4M demap(auto demap)
+//1		0		0   0	1   1	0   0	0	4M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   0	0/1	64k demap(auto demap)
+//1		0		0   0	1   1	1   0	0	64k demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   1	0/1	8k demap(auto demap)
+//1		0		0   0	1   1	1   1	0	8k demap(auto demap) (*Illgl*)
+//1		0		1   1	0   0	0   0	0	demap by ctxt
+//1		1		x   x	x   x	x   x	0	demap_all
+//------------------------------------------------------------------------------------------
+//-----
+//All other are illegal combinations
+//
+//=========================================================================================
+//	Changes related to Hypervisor/Legacy Compatibility
+//=========================================================================================
+//
+// - Add PID. PID does not effect demap-all. Otherwise it is included in cam, other demap
+// operations and auto-demap.
+// - Add R. Real translation ignores context. This is controlled externally by Gk.
+// - Remove G bit for tte. Input remains in demap-key/cam-key to allow for disabling
+//   of context match Real Translation  
+// - Final Page Size support - 8KB,64KB,4M,256M
+// - SPARC_HPV_EN has been defined to enable new tlb design support. 
+// Issues : 
+// -Max ptag size is now 28b. Satya, will this help the speed at all. I doubt it !
+
+//=========================================================================================
+//	Miscellaneous
+//=========================================================================================
+   wire clk;
+   assign clk = rclk;
+   
+wire async_reset, sync_reset ;
+assign	async_reset = ~arst_l ; 			// hardware
+assign	sync_reset = (~rst_soft_l & ~rst_tri_en) ;	// software
+
+wire rw_disable ;
+// INNO - wr/rd gated off. Note required as rst_tri_en is
+// asserted, but implemented in addition in schematic.
+assign	rw_disable = ~arst_l | rst_tri_en ;
+
+
+reg     [`TLB_INDEX_WIDTH-1:0]   cam_hit_encoded;
+integer ii;
+
+reg cam_hit_any;
+
+always @(cam_hit) begin
+  cam_hit_any = 1'b0;
+  cam_hit_encoded = {`TLB_INDEX_WIDTH{1'b0}};
+  for(ii=0;ii<`TLB_ENTRIES;ii=ii+1) begin
+    if(cam_hit[ii]) begin
+      cam_hit_encoded = ii;
+      cam_hit_any = 1'b1;
+    end
+  end
+end
+
+reg cam_hit_any_or_bypass;
+
+always @(posedge clk) 
+  cam_hit_any_or_bypass <= cam_hit_any | bypass;  
+
+
+
+//=========================================================================================
+// 	Stage Data
+//=========================================================================================
+// Apply address masking
+assign	tlb_cam_key_masked[40:25]
+	= {16{tlb_addr_mask_l}} & 
+		tlb_cam_key[`CAM_VA_47_28_HI:`CAM_VA_47_28_LO+4] ;
+
+// Reconstitute cam data CHANGE : add additional bit for real mapping
+assign	tlb_cam_data[53:13] = tlb_demap ? 
+	tlb_demap_key[40:0] :
+	{tlb_cam_key_masked[40:25],tlb_cam_key[`CAM_VA_47_28_LO+3:0]} ; 
+
+assign tlb_cam_comp_key[26:0] = 
+		tlb_demap ?
+			{tlb_demap_key[32:21], tlb_demap_key[19:14],tlb_demap_key[12:7],
+			tlb_demap_key[5:3]} :
+			{tlb_cam_key_masked[32:25],tlb_cam_key[24:21],
+			tlb_cam_key[19:14],tlb_cam_key[12:7],tlb_cam_key[5:3]} ;
+
+assign	tlb_cam_data[12:0] = tlb_ctxt[12:0] ;
+
+// These signals are flow-thru.
+assign	wr_tte_tag[58:0] 	= tlb_wr_tte_tag[58:0] ;	// CHANGE
+assign	wr_tte_data[42:0] 	= tlb_wr_tte_data[42:0] ;
+
+// CHANGE(SATYA) - Currently the rw_index/rw_index_vld are shared by both reads
+// and writes. However, writes are done in the cycle of broadcast, whereas
+// the reads are done a cycle later, as given in the model(incorrect) 
+// They have to be treated uniformly. To make the model work, I've assumed the read/write 
+// are done in the cycle the valids are broadcast. 
+always @ (posedge clk)
+	begin
+	if (hold)
+		begin
+		cam_pid[2:0]		<= cam_pid[2:0] ;
+		cam_vld_tmp		<= cam_vld_tmp ;
+		cam_data[53:0] 		<= cam_data[53:0] ;
+		demap_other_tmp		<= demap_other_tmp ;
+		demap_auto		<= demap_auto ;
+		demap_all		<= demap_all ;
+		wr_vld_tmp 		<= wr_vld_tmp ;
+		rd_tag 			<= rd_tag ;
+		rd_data			<= rd_data ;
+		rw_index_vld		<= rw_index_vld ;
+		rw_index[`TLB_INDEX_WIDTH-1:0]		<= rw_index[`TLB_INDEX_WIDTH-1:0] ; 	
+		end
+	else
+		begin
+		cam_pid[2:0]		<= tlb_cam_pid[2:0] ;
+		cam_vld_tmp		<= tlb_cam_vld ;
+		cam_data[53:0] 		<= tlb_cam_data[53:0] ;
+		demap_other_tmp		<= tlb_demap ;
+		demap_auto		<= tlb_demap_auto ;
+		demap_all		<= tlb_demap_all ;
+		wr_vld_tmp 		<= tlb_wr_vld ;
+		rd_tag 			<= tlb_rd_tag_vld ;
+		rd_data			<= tlb_rd_data_vld ;
+		rw_index_vld		<= tlb_rw_index_vld ;
+		rw_index[`TLB_INDEX_WIDTH-1:0]		<= tlb_rw_index[`TLB_INDEX_WIDTH-1:0] ; 	
+		end
+
+	end
+
+// INNO - gate cam,demap,wr with rst_tri_en.
+reg rst_tri_en_lat;
+
+ always        @ (clk)
+ rst_tri_en_lat = rst_tri_en;
+
+assign	cam_vld = cam_vld_tmp & ~rst_tri_en_lat ;
+assign	demap_other = demap_other_tmp & ~rst_tri_en ;
+assign	wr_vld = wr_vld_tmp & ~rst_tri_en ;
+
+//=========================================================================================
+//	Generate Write Wordlines
+//=========================================================================================
+
+
+assign tlb_rd_tte_tag[58:0] = rd_tte_tag[58:0] ;	// CHANGE
+
+// Stage to next cycle.
+always	@ (posedge clk)
+	begin
+		tlb_rd_tte_data[42:0] 	<= rd_tte_data[42:0] ;
+	end
+
+//=========================================================================================
+//	CAM/DEMAP STLB for xlation
+//=========================================================================================
+
+
+// Demap and CAM operation are mutually exclusive.
+
+always  @ ( negedge clk )
+	begin
+	
+		for (n=0;n<`TLB_ENTRIES;n=n+1)
+			begin
+                                if (demap_auto & demap_other) 
+					ademap_hit[n] = (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+			end
+
+	end  // always
+
+
+assign	tlb_cam_hit = |cam_hit[`TLB_ENTRIES-1:0] ;
+
+// Change tlb_entry_vld handling for multi-threaded tlb writes.
+// A write is always preceeded by an autodemap. The intent is to make the result of autodemap
+// (clearing of vld bit if hit) invisible until write occurs. In the same cycle that the write
+// occurs, the vld bit for an entry will be cleared if there is an autodemap hit. The write
+// and admp action may even be to same entry. The write must dominate. There is no need to
+// clear the dmp latches after the write/clear has occurred as the subsequent admp will set
+// up new state in the latches.
+
+// Define valid bit based on write/demap/reset. 
+
+always  @ (/*AUTOSENSE*/rd_data or rd_tag or rw_index or rw_index_vld
+           or wr_vld_tmp)
+        begin
+                for (i=0;i<`TLB_ENTRIES;i=i+1)
+                        if ((rw_index[`TLB_INDEX_WIDTH-1:0] == i) & ((wr_vld_tmp & rw_index_vld) | rd_tag | rd_data))
+                                rw_wdline[i] = 1'b1 ;
+                        else    rw_wdline[i] = 1'b0 ;
+
+        end
+
+
+always @ (negedge clk)
+	begin
+	for (r=0;r<`TLB_ENTRIES;r=r+1)
+	begin // for
+	if (((rw_index_vld & rw_wdline[r]) | (~rw_index_vld & tlb_entry_replace_d2[r])) & 
+		wr_vld & ~rw_disable)
+			tlb_entry_vld[r] <= wr_tte_tag[`STLB_TAG_V] ;	// write
+	else	begin
+		if (ademap_hit[r] & wr_vld)			// autodemap specifically
+			tlb_entry_vld[r] <= 1'b0 ;		
+		end
+	  if ((demap_hit[r] & ~demap_auto) | sync_reset)	// non-auto-demap, reset
+			tlb_entry_vld[r] <= 1'b0 ;	
+	  if(async_reset) tlb_entry_vld[r] <= 1'b0 ;
+
+	end // for
+	end
+
+
+//=========================================================================================
+//	TAG COMPARISON
+//=========================================================================================
+
+reg [30:0] va_tag_plus ;
+
+// Stage to m
+always @(posedge clk)
+		begin
+		// INNO - add hold to this input
+		if (hold)
+			va_tag_plus[30:0] <= va_tag_plus[30:0] ;
+		else
+			va_tag_plus[30:0] 
+			<= {tlb_cam_comp_key[26:0],tlb_bypass_va[12:10],tlb_bypass}; 
+		end
+			
+assign vrtl_pgnum_m[29:0] = va_tag_plus[30:1] ;
+assign bypass = va_tag_plus[0] ;
+
+// Mux to bypass va or form pa tag based on tte-data.
+
+assign	phy_pgnum_m[29:3] = 
+	{rd_tte_data[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+		rd_tte_data[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+			rd_tte_data[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+				rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO]};
+
+// Derive the tlb-based physical address.
+assign pgnum_m[2:0] = vrtl_pgnum_m[2:0];
+assign pgnum_m[5:3] = (~rd_tte_data[`STLB_DATA_15_13_SEL] & ~bypass)
+				? phy_pgnum_m[5:3] : vrtl_pgnum_m[5:3] ;
+assign pgnum_m[11:6] = (~rd_tte_data[`STLB_DATA_21_16_SEL] & ~bypass)  
+				? phy_pgnum_m[11:6] : vrtl_pgnum_m[11:6] ;
+assign pgnum_m[17:12] = (~rd_tte_data[`STLB_DATA_27_22_SEL] & ~bypass)
+				? phy_pgnum_m[17:12] : vrtl_pgnum_m[17:12] ;
+assign pgnum_m[29:18] = ~bypass ? phy_pgnum_m[29:18] : vrtl_pgnum_m[29:18];
+
+// Stage to g
+// Flop tags in tlb itself and do comparison immediately after rising edge.
+// Similarly stage va/pa tag to g
+always @(posedge clk)
+		begin
+			pgnum_g[29:0] <= pgnum_m[29:0];
+			// rm hold on these inputs.
+			cache_set_vld_g[3:0]  	<= cache_set_vld[3:0] ;
+			cache_ptag_w0_g[29:0] 	<= cache_ptag_w0[29:0] ;
+			cache_ptag_w1_g[29:0] 	<= cache_ptag_w1[29:0] ;
+			cache_ptag_w2_g[29:0] 	<= cache_ptag_w2[29:0] ;
+			cache_ptag_w3_g[29:0] 	<= cache_ptag_w3[29:0] ;
+		end
+
+
+// Need to stage by a cycle where used.
+assign	tlb_pgnum[39:10] = pgnum_g[29:0] ;
+// Same cycle as cam - meant for one load on critical path
+assign	tlb_pgnum_crit[39:10] = pgnum_m[29:0] ;
+
+
+assign	cache_way_hit[0] = 
+	(cache_ptag_w0_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[0] & cam_hit_any_or_bypass;
+assign	cache_way_hit[1] = 
+	(cache_ptag_w1_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[1] & cam_hit_any_or_bypass;
+assign	cache_way_hit[2] = 
+	(cache_ptag_w2_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[2] & cam_hit_any_or_bypass;
+assign	cache_way_hit[3] = 
+	(cache_ptag_w3_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[3] & cam_hit_any_or_bypass;
+
+assign	cache_hit = |cache_way_hit[3:0];
+
+
+//=========================================================================================
+//	TLB ENTRY REPLACEMENT
+//=========================================================================================
+
+// A single Used bit is used to track the replacement state of each entry.
+// Only an unused entry can be replaced.
+// An Unused entry is :
+//			- an invalid entry
+//			- a valid entry which has had its Used bit cleared.
+//				- on write of a valid entry, the Used bit is set.
+//				- The Used bit of a valid entry is cleared if all
+//				entries have their Used bits set and the entry itself is not Locked.
+// A locked entry should always appear to be Used.
+// A single priority-encoder is required to evaluate the used status. Priority is static
+// and used entry0 is of the highest priority if unused.
+
+// Timing :
+// Used bit gets updated by cam-hit or hit on negedge.
+// After Used bit gets updated off negedge, the replacement entry can be generated in
+// Phase2. In parallel, it is determined whether all Used bits are set or not. If
+// so, then they are cleared on the next negedge with the replacement entry generated
+// in the related Phase1 
+
+// Choosing replacement entry
+// Replacement entry is integer k
+
+assign	tlb_not_writeable = &used[`TLB_ENTRIES-1:0] ;
+/*
+// Used bit can be set because of write or because of cam-hit.
+always @(negedge clk)
+	begin
+		for (s=0;s<`TLB_ENTRIES;s=s+1)
+			begin
+				if (cam_hit[s]) 
+					tlb_entry_used[s] <= 1'b1;			
+			end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+		if (tlb_not_writeable)
+			begin
+				for (t=0;t<`TLB_ENTRIES;t=t+1)
+					begin
+						if (~tlb_entry_locked[t])
+							tlb_entry_used[t] <= 1'b0;
+					end
+			end
+	end
+*/
+
+// Determine whether entry should be squashed.
+
+assign	used[`TLB_ENTRIES-1:0] = tlb_entry_used[`TLB_ENTRIES-1:0] & tlb_entry_vld[`TLB_ENTRIES-1:0] ;
+
+
+// Based on updated Used state, generate replacement entry.
+// So, replacement entries can be generated on a cycle-by-cycle basis. 
+//always @(/*AUTOSENSE*/squash or used)
+
+	reg	[`TLB_ENTRIES-1:0]	tlb_entry_replace_d1;
+	reg		tlb_replace_flag;
+	always @(/*AUTOSENSE*/used)
+	begin
+  	  tlb_replace_flag=1'b0;
+  	  tlb_entry_replace_d1 = {`TLB_ENTRIES-1{1'b0}};
+  	  // Priority is given to entry0
+   	  for (u=0;u<`TLB_ENTRIES;u=u+1)
+  	  begin
+    	    if(~tlb_replace_flag & ~used[u])
+    	    begin
+      	      tlb_entry_replace_d1[u] = ~used[u] ;
+      	      tlb_replace_flag=1'b1; 
+    	    end
+  	  end
+  	  if(~tlb_replace_flag) begin
+      	     tlb_entry_replace_d1[`TLB_ENTRIES-1] = 1'b1;
+ 	  end
+	end
+	always @(posedge clk)
+	begin
+	  // named in this manner to keep arch model happy.
+  	  tlb_entry_replace <= tlb_entry_replace_d1 ;
+	end
+	// INNO - 2 stage delay before update is visible
+	always @(posedge clk)
+	begin
+  	  tlb_entry_replace_d2 <= tlb_entry_replace ;
+	end
+
+reg [`TLB_INDEX_WIDTH-1:0]  tlb_index_a1;
+reg [`TLB_INDEX_WIDTH-1:0]  tlb_index;
+wire tlb_index_vld_a1 = |tlb_entry_replace;
+reg  tlb_index_vld;
+integer jj;
+always @(tlb_entry_replace) begin
+  tlb_index_a1 = {`TLB_INDEX_WIDTH{1'b0}};
+  for(jj=0;jj<`TLB_ENTRIES;jj=jj+1)
+    if(tlb_entry_replace[jj]) tlb_index_a1 = jj;
+end
+always @(posedge clk) begin
+  tlb_index <= tlb_index_a1;  //use instead of tlb_entry_replace_d2;
+  tlb_index_vld <= tlb_index_vld_a1;
+end
+
+
+  
+
+//=========================================================================================
+//	TLB WRITEABLE DETECTION
+//=========================================================================================
+
+// 2-cycles later, tlb become writeable
+always @(posedge clk)
+	begin
+		tlb_not_writeable_d1 <= tlb_not_writeable ;
+	end
+
+always @(posedge clk)
+	begin
+		tlb_writeable <= ~tlb_not_writeable_d1 ;
+	end
+
+bw_r_tlb_tag_ram bw_r_tlb_tag_ram (
+	.rd_tag(rd_tag),
+	.rw_index_vld(rw_index_vld),
+	.wr_vld_tmp(wr_vld_tmp),
+	.clk(clk),
+	.rw_index(rw_index),
+	.tlb_index(tlb_index),
+	.tlb_index_vld(tlb_index_vld),
+	.rw_disable(rw_disable),
+	.rst_tri_en(rst_tri_en),
+	.wr_tte_tag(wr_tte_tag),
+	.tlb_entry_vld(tlb_entry_vld),
+	.tlb_entry_used(tlb_entry_used),
+	.tlb_entry_locked(tlb_entry_locked),
+	.rd_tte_tag(rd_tte_tag),
+	.mismatch(mismatch),
+	.tlb_writeable(tlb_writeable),
+	.cam_vld(cam_vld),
+	.wr_vld(wr_vld),
+	.cam_data(cam_data),
+	.cam_hit(cam_hit),
+	.cam_pid(cam_pid),
+	.demap_all(demap_all),
+	.demap_hit(demap_hit),
+	.demap_other(demap_other)
+);
+
+bw_r_tlb_data_ram bw_r_tlb_data_ram (
+	.rd_data(rd_data),
+	.rw_index_vld(rw_index_vld),
+	.wr_vld_tmp(wr_vld_tmp),
+	.clk(clk),
+	.cam_vld(cam_vld),
+	.cam_index(cam_hit_encoded),
+        .cam_hit_any(cam_hit_any),
+	.rw_index(rw_index),
+	.tlb_index(tlb_index),
+	.tlb_index_vld(tlb_index_vld),
+	.rw_disable(rw_disable),
+	.rst_tri_en(rst_tri_en),
+	.wr_tte_data(wr_tte_data),
+	.rd_tte_data(rd_tte_data),
+	.wr_vld(wr_vld)
+);
+
+
+endmodule
+
+module bw_r_tlb_tag_ram(
+	rd_tag,
+	rw_index_vld,
+	wr_vld_tmp,
+	clk,
+	rw_index,
+	tlb_index,
+	tlb_index_vld,
+	rw_disable,
+	rst_tri_en,
+	wr_tte_tag,
+	tlb_entry_vld,
+	tlb_entry_used,
+	tlb_entry_locked,
+	rd_tte_tag,
+	mismatch,
+	tlb_writeable,
+	wr_vld,
+	cam_vld,
+	cam_data,
+	cam_hit,
+	cam_pid,
+	demap_all,
+	demap_other,
+	demap_hit);
+
+input		rd_tag; 
+input		rw_index_vld;
+input		wr_vld_tmp;
+input		clk;
+input	[`TLB_INDEX_WIDTH-1:0]	rw_index;
+input	[`TLB_INDEX_WIDTH-1:0]	tlb_index;
+input		tlb_index_vld;
+input		rw_disable;
+input		rst_tri_en;
+input	[58:0]	wr_tte_tag;
+input	[`TLB_ENTRIES-1:0]	tlb_entry_vld;
+input		tlb_writeable;
+input		wr_vld;
+input	[2:0]	cam_pid;
+input		demap_all;
+input		demap_other;
+input	[53:0]	cam_data;
+input		cam_vld ;
+
+output	[`TLB_ENTRIES-1:0]	cam_hit ;
+output	[`TLB_ENTRIES-1:0]	demap_hit ;
+output	[`TLB_ENTRIES-1:0]	tlb_entry_used;
+output	[`TLB_ENTRIES-1:0]	tlb_entry_locked;
+reg	[`TLB_ENTRIES-1:0]	tlb_entry_locked ;
+
+output	[58:0]	rd_tte_tag;
+reg	[58:0]	rd_tte_tag;
+output	[`TLB_ENTRIES-1:0]	mismatch;
+
+reg	[`TLB_ENTRIES-1:0]	sat;
+
+reg	[`TLB_ENTRIES-1:0]	mismatch;
+reg	[`TLB_ENTRIES-1:0]	cam_hit ;
+reg	[`TLB_ENTRIES-1:0]	demap_all_but_locked_hit ;
+reg	[58:0]	tag ;	// CHANGE
+
+
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b47_28;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b27_22;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b21_16;
+reg	[`TLB_ENTRIES-1:0]	mismatch_va_b15_13;
+reg	[`TLB_ENTRIES-1:0]	mismatch_ctxt;
+reg	[`TLB_ENTRIES-1:0]	mismatch_pid;
+reg	[`TLB_ENTRIES-1:0]	mismatch_type;
+reg	[`TLB_ENTRIES-1:0]	tlb_entry_used ;
+
+integer i,j,n,m, w, p, k, s, t;
+
+
+reg	[58:0]		tte_tag_ram  [`TLB_ENTRIES-1:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+
+reg	[58:0]	tmp_tag ;
+
+wire wren = rw_index_vld & wr_vld_tmp & ~rw_disable;
+wire tlben = tlb_index_vld & ~rw_index_vld & wr_vld_tmp & ~rw_disable;
+wire  [`TLB_INDEX_WIDTH-1:0] wr_addr = wren ? rw_index : tlb_index;
+
+
+always	@ (negedge clk) begin
+//=========================================================================================
+//	Write TLB
+//=========================================================================================
+
+	if(wren | tlben) begin
+		tte_tag_ram[wr_addr] <= wr_tte_tag[58:0];
+		tlb_entry_used[wr_addr] <= wr_tte_tag[`STLB_TAG_U];
+		tlb_entry_locked[wr_addr] = wr_tte_tag[`STLB_TAG_L];
+	end else begin
+	  tlb_entry_used <= (tlb_entry_used | cam_hit) & (tlb_entry_locked | ~{`TLB_ENTRIES{~tlb_writeable & ~cam_vld & ~wr_vld & ~rd_tag & ~rst_tri_en}}) ;
+        end
+
+//=========================================================================================
+//	Read STLB
+//=========================================================================================
+
+	if(rd_tag & ~rw_disable) begin
+		tmp_tag  <= tte_tag_ram[rw_index];
+	end
+
+
+end // always
+
+always @(posedge clk) begin
+  if(rd_tag & ~rw_disable)
+    rd_tte_tag[58:0] = {tmp_tag[58:27], tlb_entry_vld[rw_index], tlb_entry_locked[rw_index], tlb_entry_used[rw_index], tmp_tag[23:0]};
+  else if(wren | tlben)
+    rd_tte_tag[58:0] = wr_tte_tag[58:0];
+end
+
+reg	[58:0]		tte_tag_ram2  [`TLB_ENTRIES-1:0];
+
+always	@ (negedge clk) begin
+  if(wren | tlben)
+    tte_tag_ram2[wr_addr] <= wr_tte_tag[58:0];
+end
+
+
+always	@ (cam_data or cam_pid or cam_vld or demap_all
+           or demap_other or tlb_entry_vld)
+	begin
+	
+		for (n=0;n<`TLB_ENTRIES;n=n+1)
+			begin
+			tag[58:0] = tte_tag_ram2[n] ;	// CHANGE
+
+			mismatch_va_b47_28[n] = 
+			(tag[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO] 
+			!= cam_data[`CAM_VA_47_28_HI+13:`CAM_VA_47_28_LO+13]);
+
+			mismatch_va_b27_22[n] = 
+			(tag[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO] 
+			!= cam_data[`CAM_VA_27_22_HI+13:`CAM_VA_27_22_LO+13]);
+
+			mismatch_va_b21_16[n] = 
+			(tag[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO]
+			!= cam_data[`CAM_VA_21_16_HI+13:`CAM_VA_21_16_LO+13]) ;
+
+			mismatch_va_b15_13[n] = 
+			(tag[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO]
+			!= cam_data[`CAM_VA_15_13_HI+13:`CAM_VA_15_13_LO+13]) ;
+
+			mismatch_ctxt[n] = 
+			(tag[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO] 
+			!= cam_data[`CAM_CTXT_12_0_HI:`CAM_CTXT_12_0_LO]) ;
+			
+			mismatch_pid[n] = (tag[`STLB_TAG_PID_HI:`STLB_TAG_PID_LO] != cam_pid[2:0]) ;
+			mismatch_type[n] = (tag[`STLB_TAG_R] ^ cam_data[`CAM_REAL_V+13]);
+
+			mismatch[n] =
+			(mismatch_va_b47_28[n] & cam_data[`CAM_VA_47_28_V+13]) 				|
+			(mismatch_va_b27_22[n] & tag[`STLB_TAG_VA_27_22_V] & cam_data[`CAM_VA_27_22_V+13]) 	|
+			(mismatch_va_b21_16[n] & tag[`STLB_TAG_VA_21_16_V] & cam_data[`CAM_VA_21_16_V+13]) 	|
+			(mismatch_va_b15_13[n] & tag[`STLB_TAG_VA_15_13_V] & cam_data[`CAM_VA_15_13_V+13]) 	|
+			(mismatch_ctxt[n] & ~cam_data[`CAM_CTXT_GK+13])	|
+			(mismatch_type[n] & ~demap_all)  	| 
+			mismatch_pid[n] ;	// pid always included in mismatch calculations
+
+			demap_all_but_locked_hit[n] = ~tag[`STLB_TAG_L] & demap_all ;
+
+			cam_hit[n] 	= ~mismatch[n] & cam_vld   & tlb_entry_vld[n] ;
+		end
+
+	end  // always
+
+	assign demap_hit = demap_all ? ~mismatch & demap_all_but_locked_hit & tlb_entry_vld & {`TLB_ENTRIES{demap_other}}
+				     : ~mismatch & tlb_entry_vld & {`TLB_ENTRIES{demap_other}};
+
+endmodule
+
+
+
+module bw_r_tlb_data_ram(rd_data, rw_index_vld, wr_vld_tmp, clk, cam_vld,
+        rw_index, tlb_index, tlb_index_vld, rw_disable, rst_tri_en, wr_tte_data,
+        rd_tte_data, cam_index, cam_hit_any, wr_vld);
+
+        input                   rd_data;
+        input                   rw_index_vld;
+        input                   wr_vld_tmp;
+        input                   clk;
+        input   [(6 - 1):0]     rw_index;
+        input   [(6 - 1):0]     tlb_index;
+        input                   tlb_index_vld;
+        input   [(6 - 1):0]     cam_index;
+        input                   cam_hit_any;
+        input                   rw_disable;
+        input                   rst_tri_en;
+        input                   cam_vld;
+        input   [42:0]          wr_tte_data;
+        input                   wr_vld;
+        output  [42:0]          rd_tte_data;
+
+        wire    [42:0]          rd_tte_data;
+
+        reg     [42:0]          tte_data_ram[(64 - 1):0];
+
+        wire [5:0] wr_addr = (rw_index_vld & wr_vld_tmp) ? rw_index :tlb_index;
+        wire wr_en = ((rw_index_vld & wr_vld_tmp) & (~rw_disable)) |
+                     (((tlb_index_vld & (~rw_index_vld)) & wr_vld_tmp) & (~rw_disable));
+
+        always @(negedge clk) begin
+          if (wr_en)
+            tte_data_ram[wr_addr] <= wr_tte_data[42:0];
+          end
+
+        wire [5:0] rd_addr = rd_data ? rw_index : cam_index;
+        wire rd_en = (rd_data & (~rw_disable)) | ((cam_vld & (~rw_disable)));
+
+        reg [42:0] rd_tte_data_temp;
+
+        always @(negedge clk) begin
+	  //required for simulation; otherwise regression fails...
+	  if((cam_vld & (~rw_disable)) & (!cam_hit_any)) begin
+	    rd_tte_data_temp <= 43'bx;
+	  end else
+          if (rd_en) begin
+            rd_tte_data_temp[42:0] <= tte_data_ram[rd_addr];
+          end
+	end
+
+reg rdwe;
+reg [42:0] wr_tte_data_d;
+
+	
+       always @(negedge clk) begin
+	 wr_tte_data_d <= wr_tte_data;
+       end
+       always @(negedge clk) begin
+         if(wr_en) rdwe <= 1'b1;
+         else if(rd_en) rdwe <= 1'b0;
+       end
+       
+       assign rd_tte_data = rdwe ? wr_tte_data_d : rd_tte_data_temp;
+
+endmodule
+
+`else
+
+module bw_r_tlb ( /*AUTOARG*/
+   // Outputs
+   tlb_rd_tte_tag, tlb_rd_tte_data, tlb_pgnum, tlb_pgnum_crit, 
+   tlb_cam_hit, cache_way_hit, cache_hit, so, 
+   // Inputs
+   tlb_cam_vld, tlb_cam_key, tlb_cam_pid,  
+   tlb_demap_key, tlb_addr_mask_l, tlb_ctxt, 
+   tlb_wr_vld, tlb_wr_tte_tag, tlb_wr_tte_data, tlb_rd_tag_vld, 
+   tlb_rd_data_vld, tlb_rw_index, tlb_rw_index_vld, tlb_demap, 
+   tlb_demap_auto, tlb_demap_all, cache_ptag_w0, cache_ptag_w1, 
+   cache_ptag_w2, cache_ptag_w3, cache_set_vld, tlb_bypass_va, 
+   tlb_bypass, se, si, hold, adj, arst_l, rst_soft_l, rclk,
+   rst_tri_en
+   ) ;	
+
+
+input			tlb_cam_vld ;		// ld/st requires xlation. 
+input	[40:0]		tlb_cam_key ;		// cam data for loads/stores;includes vld 
+						// CHANGE : add real bit for cam.
+input	[2:0]		tlb_cam_pid ;		// NEW: pid for cam. 
+input	[40:0]		tlb_demap_key ;		// cam data for demap; includes vlds. 
+						// CHANGE : add real bit for demap
+input			tlb_addr_mask_l ;	// address masking occurs
+input	[12:0]		tlb_ctxt ;		// context for cam xslate/demap. 
+input			tlb_wr_vld;		// write to tlb. 
+input	[58:0]		tlb_wr_tte_tag;		// CHANGE:tte tag to be written (55+4-1)
+						// R(+1b),PID(+3b),G(-1b). 
+input	[42:0]		tlb_wr_tte_data;	// tte data to be written.
+						// No change(!!!) - G bit becomes spare
+input			tlb_rd_tag_vld ;	// read tag
+input			tlb_rd_data_vld ;	// read data
+input	[5:0]		tlb_rw_index ;		// index to read/write tlb.
+input			tlb_rw_index_vld ;	// indexed write else use algorithm.
+input			tlb_demap ;		// demap : page/ctxt/all/auto.  
+input			tlb_demap_auto ;	// demap is of type auto 
+input			tlb_demap_all;		// demap-all operation : encoded separately.
+input  	[29:0]    	cache_ptag_w0;       	// way1 30b(D)/29b(I) tag.
+input  	[29:0]    	cache_ptag_w1;       	// way2 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w2;       	// way0 30b(D)/29b(I) tag.
+input  	[29:0]     	cache_ptag_w3;       	// way3 30b(D)/29b(I) tag.
+input	[3:0]		cache_set_vld;       	// set vld-4 ways
+input	[12:10]		tlb_bypass_va;	   	// bypass va.other va bits from cam-data
+input			tlb_bypass;		// bypass tlb xslation
+
+input			se ;			// scan-enable ; unused
+input			si ;			// scan data in ; unused
+input			hold ;			// scan hold signal
+input	[7:0]		adj ;			// self-time adjustment ; unused
+input			arst_l ;		// synchronous for tlb ; unused	
+input			rst_soft_l ;		// software reset - asi
+input			rclk;
+input			rst_tri_en ;
+
+output	[58:0]		tlb_rd_tte_tag;		// CHANGE: tte tag read from tlb.
+output	[42:0]		tlb_rd_tte_data;	// tte data read from tlb.
+// Need two ports for tlb_pgnum - critical and non-critical.
+output	[39:10]		tlb_pgnum ;		// bypass or xslated pgnum
+output	[39:10]		tlb_pgnum_crit ;	// bypass or xslated pgnum - critical
+output			tlb_cam_hit ;		// xlation hits in tlb.
+output	[3:0]		cache_way_hit;		// tag comparison results.
+output			cache_hit;		// tag comparison result - 'or' of above.
+
+//output			tlb_writeable ;		// tlb can be written in current cycle.
+
+output			so ;		// scan data out ; unused
+
+wire	[53:0]		tlb_cam_data ;
+wire	[58:0]		wr_tte_tag ;	// CHANGE
+wire	[42:0]		wr_tte_data ;
+wire	[29:3]		phy_pgnum_m;
+wire	[29:0]		pgnum_m;
+wire 	[63:0]		used ;
+wire			tlb_not_writeable ;
+wire	[40:25] 	tlb_cam_key_masked ;
+wire	[26:0]		tlb_cam_comp_key ;
+wire			cam_vld ;
+wire			demap_other ;
+wire	[3:0]   	cache_way_hit ;
+
+reg			tlb_not_writeable_d1 ;
+reg			tlb_writeable ;
+reg	[58:0]		tte_tag_ram  [63:0] ;	// CHANGE
+reg	[42:0]		tte_data_ram [63:0] ;
+reg	[63:0]		tlb_entry_vld ;
+reg	[63:0]		tlb_entry_locked ;
+reg	[63:0]		ademap_hit ;
+reg	[58:0]		rd_tte_tag ;	// CHANGE
+reg	[42:0]		rd_tte_data ;	
+reg	[58:0]		tlb_rd_tte_tag ; // CHANGE	
+reg	[42:0]		tlb_rd_tte_data ;	
+reg			cam_vld_tmp ;
+reg	[2:0]		cam_pid ;
+reg	[53:0]		cam_data ;
+reg			demap_auto, demap_other_tmp, demap_all ;
+reg	[63:0]		mismatch ;
+reg	[63:0]		cam_hit ;
+reg	[63:0]		demap_hit ;
+reg	[63:0]		demap_all_but_locked_hit ;
+reg	[63:0]		mismatch_va_b47_28 ;
+reg	[63:0]		mismatch_va_b27_22 ;
+reg	[63:0]		mismatch_va_b21_16 ;
+reg	[63:0]		mismatch_va_b15_13 ;
+reg	[63:0]		mismatch_ctxt ;
+reg	[63:0]		mismatch_pid ;
+reg	[58:0]		tag ;	// CHANGE
+reg	[63:0]		rw_wdline ;
+reg	[63:0]		tlb_entry_used ;
+reg	[63:0]		tlb_entry_replace ;
+reg	[63:0]		tlb_entry_replace_d2 ;
+reg	[29:0]		pgnum_g ;
+reg     [3:0]		cache_set_vld_g;
+reg	[29:0]		cache_ptag_w0_g,cache_ptag_w1_g;
+reg	[29:0]		cache_ptag_w2_g,cache_ptag_w3_g;
+reg			wr_vld_tmp;
+reg			rd_tag; 
+reg			rd_data;
+reg			rw_index_vld;
+reg	[5:0]		rw_index;
+reg	[63:0]		sat ;
+
+wire	[29:0] 		vrtl_pgnum_m;
+wire			bypass ;
+
+wire			wr_vld ;
+
+integer	i,j,k,l,m,n,p,r,s,t,u,w;
+
+/*AUTOWIRE*/
+// Beginning of automatic wires (for undeclared instantiated-module outputs)
+// End of automatics
+
+// Some bits are removed from the tag and data. 
+// 'U' must be defined as a '1' on a write.
+// 'L' required for demap all function.
+// Do not need an internal valid bit for va range 47:22.
+// These bits are always valid for a page. 
+// 
+// TTE STLB_TAG
+//
+//`define	STLB_TAG_PID_HI		58	: NEW PID - bit2
+//`define	STLB_TAG_PID_LO		56	: NEW PID - bit0
+//`define	STLB_TAG_R		55	: NEW Real bit
+//`define 	STLB_TAG_PARITY		54	// Parity kept in same posn to avoid having
+//`define	STLB_TAG_VA_47_28_HI 	53	// to redo interface
+//`define	STLB_TAG_VA_47_28_LO 	34
+//`define	STLB_TAG_VA_27_22_HI 	33	
+//`define	STLB_TAG_VA_27_22_LO 	28
+//`define	STLB_TAG_27_22_V	27	
+//`define	STLB_TAG_V		26	: valid for entry. Write of 0 resets it.
+//`define	STLB_TAG_L		25
+//`define	STLB_TAG_U		24	
+//`define	STLB_TAG_VA_21_16_HI 	23
+//`define	STLB_TAG_VA_21_16_LO  	18
+//`define	STLB_TAG_VA_21_16_V  	17	  	
+//`define	STLB_TAG_VA_15_13_HI 	16
+//`define	STLB_TAG_VA_15_13_LO  	14
+//`define	STLB_TAG_VA_15_13_V  	13
+//`define	STLB_TAG_CTXT_12_0_HI  	12	// removed Global bit
+//`define	STLB_TAG_CTXT_12_0_LO  	0
+//// 				Total - 59b
+////
+//// TTE STLB_DATA
+////
+//// Soft[12:7] & Soft2[58:50] are removed.
+//// Diag[49:41] are removed. Used bit used for Diag[0] on read.
+//// CV is included for software correctness.
+//// PA<40> is removed as it is not used.
+//// G/L present in data even though present in tag : can't read out simultaneously.
+//   (Unfortunately this is no longer correct. For data read, tag is also read
+//   simultaneously to get valid bit, used bits).
+//`define 	STLB_DATA_PARITY  	42 
+//`define 	STLB_DATA_PA_39_28_HI 	41	// CHANGE
+//`define 	STLB_DATA_PA_39_28_LO 	30
+//`define 	STLB_DATA_PA_27_22_HI 	29	// CHANGE
+//`define 	STLB_DATA_PA_27_22_LO 	24
+//`define 	STLB_DATA_27_22_SEL	23
+//`define 	STLB_DATA_PA_21_16_HI 	22	// CHANGE
+//`define 	STLB_DATA_PA_21_16_LO 	17
+//`define 	STLB_DATA_21_16_SEL	16
+//`define 	STLB_DATA_PA_15_13_HI 	15	
+//`define 	STLB_DATA_PA_15_13_LO 	13
+//`define 	STLB_DATA_15_13_SEL	12
+//`define 	STLB_DATA_V  		11	: static, does not get modified.
+//`define 	STLB_DATA_NFO  		10
+//`define 	STLB_DATA_IE   		9
+//`define 	STLB_DATA_L 		8 	: added for read.
+//`define 	STLB_DATA_CP 		7 
+//`define 	STLB_DATA_CV 		6 
+//`define 	STLB_DATA_E  		5 
+//`define 	STLB_DATA_P  		4 
+//`define 	STLB_DATA_W  		3 
+//`define 	STLB_DATA_SPARE_HI  	2	: Global bit has been removed
+//`define 	STLB_DATA_SPARE_LO	0  	 
+// 				Total - 43b
+
+// Valid bits for key(tlb_cam_key/tlb_demap_key).
+// Total - 41b
+//`define	CAM_VA_47_28_HI  	40
+//`define	CAM_VA_47_28_LO  	21
+//`define	CAM_VA_47_28_V  	20	// b47-28 participate in match
+//`define	CAM_VA_27_22_HI  	19
+//`define	CAM_VA_27_22_LO  	14
+//`define	CAM_VA_27_22_V  	13	// b27-22 participate in match
+//`define	CAM_VA_21_16_HI  	12
+//`define	CAM_VA_21_16_LO  	7
+//`define	CAM_VA_21_16_V  	6	// b21-16 participate in match
+//`define	CAM_VA_15_13_HI 	5	
+//`define	CAM_VA_15_13_LO 	3	
+//`define	CAM_VA_15_13_V 	 	2	// b15-13 participate in match
+//`define	CAM_CTXT_GK 		1	// Context participates in match
+//`define	CAM_REAL_V 		0	// cam/demap applies to real mapping
+					
+
+// ctxt port is different from cam key port even though both are
+// required for cam. (tlb_ctxt)
+// If Gk is set then ctxt will not participate in match.
+// Total - 14b
+`define	CAM_CTXT_12_0_HI 	12 	// 13b ctxt
+`define	CAM_CTXT_12_0_LO 	0 		
+
+
+//=========================================================================================
+//	What's Left :
+//=========================================================================================
+
+// Scan Insertion - scan to be ignored in formal verification for now.
+
+//=========================================================================================
+//	Design Notes.
+//=========================================================================================
+
+// - Supported Demap Operations - By Page, By Context, All But
+// Locked, Autodemap, Invalidate-All i.e., reset. Demap Partition is
+// not supported - it is mapped to demap-all by logic. 
+// - Interpretation of demap inputs
+//	- tlb_demap - this is used to signal demap by page, by ctxt
+//	,all, and autodemap. 
+//	- tlb_demap_ctxt - If a demap_by_ctxt operation is occuring then
+//	this signal and tlb_demap must be active.
+//	- tlb_demap_all - demap all operation. If a demap_all operation is
+//	occuring, then tlb_demap_all must be asserted with tlb_demap. 
+// - Reset is similar to demap-all except that *all* entries
+// are invalidated. The action is initiated by software. The reset occurs
+// on the negedge and is synchronous with the clk.
+// - TTE Tag and Data
+// 	- The TTE tag and data can be read together. Each will have its 
+//	own bus and the muxing will occur externally. The tag needs to
+//	be read on a data request to supply the valid bit.
+// 	- The TTE tag and data can be written together.
+// - The cam hit is a separate output signal based on the 
+// the match signals.
+// - Read/Write may occur based on supplied index. If not valid
+// then use replacement way determined by algorithm to write.
+// - Only write can use replacement way determined by algorithm.
+// - Data is formatted appr. on read or write in the MMU. 
+// - The TLB will generate a signal which reports whether the 
+// tlb can be filled in the current cycle or not.
+// **Physical Tag Comparison**
+// For I-SIDE, comparison is of 28b, whereas for D-side, comparison is of 29b. The actual
+// comparison, due to legacy, is for 30b.
+// For the I-TLB, va[11:10] must be hardwired to the same value as the lsb of the 4 tags
+// at the port level. Since the itag it only 28b, add two least significant bits to extend it to 30b.
+// Similarly, for the dside, va[10] needs to be made same.	
+// **Differentiating among Various TLB Operations**
+// Valid bits are now associated with the key to allow selective incorporation of
+// match results. The 5 valid bits are : v4(b47-28),v3(b27-22),v2(21-16),v1(b15-13)
+// and Gk(G bit for auto-demap). The rules of use are :
+//	- cam: v4-v1 are set high. G=~cam_real=0/1.
+//	- demap_by_page : v4-v1 are set high. G=1. cam_real=0.
+// 	- demap_by_ctxt : v4-v1 are low. G=1. cam_real=0
+//	- demap_all : v4-v1 are don't-care. G=x. cam_real=x
+//	- autodemap : v4-v1 are based on page size of incoming tte. G=~cam_real=0/1.
+// Note : Gk is now used only to void a context match on a Real Translation.
+// In general, if a valid bit is low then the corresponding va field will not take
+// part in the match. Similarly, for the ctxt, if Gk=1, the ctxt will participate
+// in the match.
+//
+// Demap Table (For Satya) :
+// Note : To include a context match, Gk must be set to 1.
+//--------------------------------------------------------------------------------------------------------
+//tlb_demap tlb_demap_all  tlb_ctxt Gk	Vk4 Vk3	Vk2 Vk1 Real	Operation
+//--------------------------------------------------------------------------------------------------------
+//0		x		x   x	x   x	x   x   0	No demap operation
+//1		0		0   1	1   1	1   1	0	Demap by page
+//1		0		0   1	1   0	0   0	0/1	256M demap(auto demap)
+//1		0		0   0	1   0	0   0	0	256M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	0   0	0/1	4M demap(auto demap)
+//1		0		0   0	1   1	0   0	0	4M demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   0	0/1	64k demap(auto demap)
+//1		0		0   0	1   1	1   0	0	64k demap(auto demap) (*Illgl*)
+//1		0		0   1	1   1	1   1	0/1	8k demap(auto demap)
+//1		0		0   0	1   1	1   1	0	8k demap(auto demap) (*Illgl*)
+//1		0		1   1	0   0	0   0	0	demap by ctxt
+//1		1		x   x	x   x	x   x	0	demap_all
+//------------------------------------------------------------------------------------------
+//-----
+//All other are illegal combinations
+//
+//=========================================================================================
+//	Changes related to Hypervisor/Legacy Compatibility
+//=========================================================================================
+//
+// - Add PID. PID does not effect demap-all. Otherwise it is included in cam, other demap
+// operations and auto-demap.
+// - Add R. Real translation ignores context. This is controlled externally by Gk.
+// - Remove G bit for tte. Input remains in demap-key/cam-key to allow for disabling
+//   of context match Real Translation  
+// - Final Page Size support - 8KB,64KB,4M,256M
+// - SPARC_HPV_EN has been defined to enable new tlb design support. 
+// Issues : 
+// -Max ptag size is now 28b. Satya, will this help the speed at all. I doubt it !
+
+//=========================================================================================
+//	Miscellaneous
+//=========================================================================================
+   wire clk;
+   assign clk = rclk;
+   
+wire async_reset, sync_reset ;
+assign	async_reset = ~arst_l ; 			// hardware
+assign	sync_reset = (~rst_soft_l & ~rst_tri_en) ;	// software
+
+wire rw_disable ;
+// INNO - wr/rd gated off. Note required as rst_tri_en is
+// asserted, but implemented in addition in schematic.
+assign	rw_disable = ~arst_l | rst_tri_en ;
+
+//=========================================================================================
+// 	Stage Data
+//=========================================================================================
+// Apply address masking
+assign	tlb_cam_key_masked[40:25]
+	= {16{tlb_addr_mask_l}} & 
+		tlb_cam_key[`CAM_VA_47_28_HI:`CAM_VA_47_28_LO+4] ;
+
+// Reconstitute cam data CHANGE : add additional bit for real mapping
+assign	tlb_cam_data[53:13] = tlb_demap ? 
+	tlb_demap_key[40:0] :
+	{tlb_cam_key_masked[40:25],tlb_cam_key[`CAM_VA_47_28_LO+3:0]} ; 
+
+assign tlb_cam_comp_key[26:0] = 
+		tlb_demap ?
+			{tlb_demap_key[32:21], tlb_demap_key[19:14],tlb_demap_key[12:7],
+			tlb_demap_key[5:3]} :
+			{tlb_cam_key_masked[32:25],tlb_cam_key[24:21],
+			tlb_cam_key[19:14],tlb_cam_key[12:7],tlb_cam_key[5:3]} ;
+
+assign	tlb_cam_data[12:0] = tlb_ctxt[12:0] ;
+
+// These signals are flow-thru.
+assign	wr_tte_tag[58:0] 	= tlb_wr_tte_tag[58:0] ;	// CHANGE
+assign	wr_tte_data[42:0] 	= tlb_wr_tte_data[42:0] ;
+
+// CHANGE(SATYA) - Currently the rw_index/rw_index_vld are shared by both reads
+// and writes. However, writes are done in the cycle of broadcast, whereas
+// the reads are done a cycle later, as given in the model(incorrect) 
+// They have to be treated uniformly. To make the model work, I've assumed the read/write 
+// are done in the cycle the valids are broadcast. 
+always @ (posedge clk)
+	begin
+	if (hold)
+		begin
+		cam_pid[2:0]		<= cam_pid[2:0] ;
+		cam_vld_tmp		<= cam_vld_tmp ;
+		cam_data[53:0] 		<= cam_data[53:0] ;
+		demap_other_tmp		<= demap_other_tmp ;
+		demap_auto		<= demap_auto ;
+		demap_all		<= demap_all ;
+		wr_vld_tmp 		<= wr_vld_tmp ;
+		rd_tag 			<= rd_tag ;
+		rd_data			<= rd_data ;
+		rw_index_vld		<= rw_index_vld ;
+		rw_index[5:0]		<= rw_index[5:0] ; 	
+		end
+	else
+		begin
+		cam_pid[2:0]		<= tlb_cam_pid[2:0] ;
+		cam_vld_tmp		<= tlb_cam_vld ;
+		cam_data[53:0] 		<= tlb_cam_data[53:0] ;
+		demap_other_tmp		<= tlb_demap ;
+		demap_auto		<= tlb_demap_auto ;
+		demap_all		<= tlb_demap_all ;
+		wr_vld_tmp 		<= tlb_wr_vld ;
+		rd_tag 			<= tlb_rd_tag_vld ;
+		rd_data			<= tlb_rd_data_vld ;
+		rw_index_vld		<= tlb_rw_index_vld ;
+		rw_index[5:0]		<= tlb_rw_index[5:0] ; 	
+		end
+
+	end
+
+// INNO - gate cam,demap,wr with rst_tri_en.
+reg rst_tri_en_lat;
+
+ always        @ (clk)
+ rst_tri_en_lat = rst_tri_en;
+
+assign	cam_vld = cam_vld_tmp & ~rst_tri_en_lat ;
+assign	demap_other = demap_other_tmp & ~rst_tri_en ;
+assign	wr_vld = wr_vld_tmp & ~rst_tri_en ;
+
+//=========================================================================================
+//	Generate Write Wordlines
+//=========================================================================================
+
+// Based on static rw index	
+// This generates the wordlines for a read/write to the tlb based on index. Wordlines for
+// the write based on replacement alg. are muxed in later.
+always	@ (/*AUTOSENSE*/rd_data or rd_tag or rw_index or rw_index_vld
+           or wr_vld_tmp)
+	begin
+		for (i=0;i<64;i=i+1)
+			if ((rw_index[5:0] == i) & ((wr_vld_tmp & rw_index_vld) | rd_tag | rd_data))
+				rw_wdline[i] = 1'b1 ;
+			else	rw_wdline[i] = 1'b0 ;
+					
+	end
+
+//=========================================================================================
+//	Write TLB
+//=========================================================================================
+
+reg	[58:0]	tmp_tag ;
+reg	[42:0]	tmp_data ;
+
+// Currently TLB_TAG and TLB_DATA RAMs are written in the B phase. 
+// Used bit is set on write in later code as it is also effected by read of tlb.
+always	@ (negedge clk)
+	begin
+		for (j=0;j<64;j=j+1)
+			if (((rw_index_vld & rw_wdline[j]) | (~rw_index_vld & tlb_entry_replace_d2[j])) & wr_vld_tmp & ~rw_disable)
+				begin
+				if (~rst_tri_en)
+					begin
+					tte_tag_ram[j] <= wr_tte_tag[58:0];	// CHANGE
+					tte_data_ram[j] <= wr_tte_data[42:0];
+					//tlb_entry_vld[j] <= wr_tte_tag[`STLB_TAG_V] ;
+					tlb_entry_used[j] <= wr_tte_tag[`STLB_TAG_U] ;
+					tlb_entry_locked[j] = wr_tte_tag[`STLB_TAG_L] ;
+					// write-thru 
+					rd_tte_tag[58:0]  <= wr_tte_tag[58:0] ;	// CHANGE 
+					rd_tte_data[42:0] <=  wr_tte_data[42:0];
+
+					end
+				else
+					begin
+					tmp_tag[58:0]=tte_tag_ram[j]; // use non-blocking
+					tmp_data[42:0]=tte_data_ram[j];
+					// INNO - read wins.
+					rd_tte_tag[58:0] <=	
+					{tmp_tag[58:27], tlb_entry_vld[j],tlb_entry_locked[j], 
+					tlb_entry_used[j], tmp_tag[23:0]}  ;
+					rd_tte_data[42:0] <= {tmp_data[42:12],tmp_data[11:0]} ;
+					end
+			
+			end
+
+//=========================================================================================
+//	Read STLB
+//=========================================================================================
+
+		for (m=0;m<64;m=m+1)
+			if (rw_wdline[m] & (rd_tag | rd_data) & ~rw_disable)
+				begin
+					tmp_tag  = tte_tag_ram[m] ;
+					tmp_data = tte_data_ram[m] ;
+					if (rd_tag)
+						rd_tte_tag[58:0] <=	// CHANGE - Bug 2185
+						{tmp_tag[58:27], tlb_entry_vld[m],tlb_entry_locked[m], 
+						tlb_entry_used[m], tmp_tag[23:0]}  ;
+						//{tmp_tag[58:29], tlb_entry_vld[m],tlb_entry_locked[m], 
+						//tlb_entry_used[m], tmp_tag[25:0]}  ;
+					if (rd_data) begin
+						rd_tte_data[42:0] <= {tmp_data[42:12],tmp_data[11:0]} ;
+					end
+
+				end
+
+		if (cam_vld & ~rw_disable)
+  		begin
+    			//Checking for no hit and multiple hits
+    			sat = 64'd0;
+    			for (w=0;w<64;w=w+1)
+    			begin
+      				if(cam_hit[w])
+      				begin
+        				sat = sat + 64'd1 ;
+      				end
+    			end
+			// Only one hit occur read the data
+    			if(sat == 64'd1)
+    			begin
+                        	for (p=0;p<64;p=p+1)
+				begin
+                                	if (cam_hit[p])
+                                	begin
+                                        	rd_tte_data[42:0] <= tte_data_ram[p] ;
+                                	end
+				end
+			end
+			else
+			begin
+				// INNO - just to keep the tool happy.
+				// ram cell will not be corrupted.
+				for (k=0;k<64;k=k+1)
+				begin
+					if (cam_hit[k])
+                        		tte_data_ram[k] <= 43'bx ;
+				end
+                        	rd_tte_data[42:0] <= 43'bx ;
+			end
+		end
+
+                for (s=0;s<64;s=s+1)
+                        begin
+                                if (cam_hit[s])
+                                        tlb_entry_used[s] <= 1'b1;
+                        end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+                //if (tlb_not_writeable)
+                if (~tlb_writeable & ~cam_vld & ~wr_vld & ~rd_tag & ~rst_tri_en)
+                        begin
+                                for (t=0;t<64;t=t+1)
+                                        begin
+                                                //if (~tlb_entry_locked[t])
+                                                if (~tlb_entry_locked[t] & ~cam_vld & ~wr_vld)
+                                                        tlb_entry_used[t] <= 1'b0;
+                                        end
+                        end
+	end
+
+// Stage to next cycle.
+always	@ (posedge clk)
+	begin
+		tlb_rd_tte_tag[58:0] 	<= rd_tte_tag[58:0] ;	// CHANGE
+		tlb_rd_tte_data[42:0] 	<= rd_tte_data[42:0] ;
+	end
+
+//=========================================================================================
+//	CAM/DEMAP STLB for xlation
+//=========================================================================================
+
+//  no_hit logic does not work because it is set in multiple clock
+//  domains and is reset before ever having a chance to be effective
+//reg	no_hit ;
+
+
+// Demap and CAM operation are mutually exclusive.
+
+always  @ ( negedge clk )
+	begin
+	
+		for (n=0;n<64;n=n+1)
+			begin
+                        /*if (demap_all)  begin
+                                if (demap_auto & demap_other) ademap_hit[n]   =
+                                        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                                & tlb_entry_vld[n]) ;
+                                end
+                        else    begin */
+                                if (demap_auto & demap_other) ademap_hit[n]    =
+                                        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+                                //end
+			end
+
+	end  // always
+
+always	@ (/*AUTOSENSE*/ /*memory or*/ 
+           cam_data or cam_pid or cam_vld or demap_all
+           or demap_other or tlb_entry_vld)
+	begin
+	
+		for (n=0;n<64;n=n+1)
+			begin
+			tag[58:0] = tte_tag_ram[n] ;	// CHANGE
+
+			mismatch_va_b47_28[n] = 
+			(tag[`STLB_TAG_VA_47_28_HI:`STLB_TAG_VA_47_28_LO] 
+			!= cam_data[`CAM_VA_47_28_HI+13:`CAM_VA_47_28_LO+13]);
+
+			mismatch_va_b27_22[n] = 
+			(tag[`STLB_TAG_VA_27_22_HI:`STLB_TAG_VA_27_22_LO] 
+			!= cam_data[`CAM_VA_27_22_HI+13:`CAM_VA_27_22_LO+13]);
+
+			mismatch_va_b21_16[n] = 
+			(tag[`STLB_TAG_VA_21_16_HI:`STLB_TAG_VA_21_16_LO]
+			!= cam_data[`CAM_VA_21_16_HI+13:`CAM_VA_21_16_LO+13]) ;
+
+			mismatch_va_b15_13[n] = 
+			(tag[`STLB_TAG_VA_15_13_HI:`STLB_TAG_VA_15_13_LO]
+			!= cam_data[`CAM_VA_15_13_HI+13:`CAM_VA_15_13_LO+13]) ;
+
+			mismatch_ctxt[n] = 
+			(tag[`STLB_TAG_CTXT_12_0_HI:`STLB_TAG_CTXT_12_0_LO] 
+			!= cam_data[`CAM_CTXT_12_0_HI:`CAM_CTXT_12_0_LO]) ;
+			
+			mismatch_pid[n] = (tag[`STLB_TAG_PID_HI:`STLB_TAG_PID_LO] != cam_pid[2:0]) ;
+
+			mismatch[n] =
+			(mismatch_va_b47_28[n] & cam_data[`CAM_VA_47_28_V+13]) 				|
+			(mismatch_va_b27_22[n] & tag[`STLB_TAG_VA_27_22_V] & cam_data[`CAM_VA_27_22_V+13]) 	|
+			(mismatch_va_b21_16[n] & tag[`STLB_TAG_VA_21_16_V] & cam_data[`CAM_VA_21_16_V+13]) 	|
+			(mismatch_va_b15_13[n] & tag[`STLB_TAG_VA_15_13_V] & cam_data[`CAM_VA_15_13_V+13]) 	|
+			(mismatch_ctxt[n] & ~cam_data[`CAM_CTXT_GK+13])	|
+			// mismatch is request type not equal to entry type. types are real/virtual.
+			((tag[`STLB_TAG_R] ^ cam_data[`CAM_REAL_V+13]) & ~demap_all)  	| 
+			//(mismatch_real[n] & cam_data[`CAM_REAL_V+13])  	|
+			mismatch_pid[n] ;	// pid always included in mismatch calculations
+
+			demap_all_but_locked_hit[n] = 
+			~tag[`STLB_TAG_L] & demap_all ;
+
+			cam_hit[n] 	= 
+				~mismatch[n] & cam_vld   & tlb_entry_vld[n] ;
+
+                        if (demap_all)  begin
+                                // Satya(10/3) - I've simplified the demap-all equation
+                                // Pls confirm that this is okay. Otherwise we will nee
+                                // qualifying bits for the pid and r fields.
+                                /*demap_hit[n]  =
+                                        (demap_all_but_locked_hit[n] & demap_other) ;*/
+                                demap_hit[n]    =
+                                        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                                & tlb_entry_vld[n]) ;
+				// qualification with demap_auto to prevent ademap_hit from
+				// being cleared. Satya-we could get rid of this.
+                                // ademap_hit[n] is a phase A device and needs to be in a clocked always block
+                                //if (demap_auto & demap_other & clk) ademap_hit[n]   =
+                                //        (~mismatch[n] & demap_all_but_locked_hit[n] & demap_other
+                                //                & tlb_entry_vld[n]) ;
+                                end
+                        else    begin
+                                demap_hit[n]    =
+                                        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+				// qualification with demap_auto to prevent ademap_hit from
+				// being cleared. Satya-this is the only one we need.
+                                //if (demap_auto & demap_other & clk) ademap_hit[n]    =
+                                //        (~mismatch[n] & demap_other & tlb_entry_vld[n]) ;
+                                end
+//			no_hit = cam_vld ;
+			end
+
+	end  // always
+
+assign	tlb_cam_hit = |cam_hit[63:0] ;
+
+// Read on CAM hit occurs on negedge.
+/* MOVED TO COMMON ALWAYS BLOCK
+always @ (negedge clk)
+	begin
+		if (|cam_hit[63:0])	
+			begin
+			for (p=0;p<64;p=p+1)
+				if (cam_hit[p])	
+				begin
+					rd_tte_data[42:0] <= tte_data_ram[p] ;
+				end
+//				no_hit = 1'b0 ;
+			end
+//		else	if (no_hit) begin
+//			rd_tte_data[42:0] <= {43{1'bx}};
+//			no_hit = 1'b0 ;
+//			end
+	end
+*/
+// Change tlb_entry_vld handling for multi-threaded tlb writes.
+// A write is always preceeded by an autodemap. The intent is to make the result of autodemap
+// (clearing of vld bit if hit) invisible until write occurs. In the same cycle that the write
+// occurs, the vld bit for an entry will be cleared if there is an autodemap hit. The write
+// and admp action may even be to same entry. The write must dominate. There is no need to
+// clear the dmp latches after the write/clear has occurred as the subsequent admp will set
+// up new state in the latches.
+
+// Define valid bit based on write/demap/reset. 
+always @ (negedge clk)
+	begin
+	for (r=0;r<64;r=r+1)
+	begin // for
+	if (((rw_index_vld & rw_wdline[r]) | (~rw_index_vld & tlb_entry_replace_d2[r])) & 
+		wr_vld & ~rw_disable)
+			tlb_entry_vld[r] <= wr_tte_tag[`STLB_TAG_V] ;	// write
+	else	begin
+		if (ademap_hit[r] & wr_vld)			// autodemap specifically
+			tlb_entry_vld[r] <= 1'b0 ;		
+		end
+	  if ((demap_hit[r] & ~demap_auto) | sync_reset)	// non-auto-demap, reset
+			tlb_entry_vld[r] <= 1'b0 ;	
+	end // for
+	end
+
+
+// async reset.
+always  @ (async_reset) 
+	begin
+	for (l=0;l<64;l=l+1)
+		begin
+	  	tlb_entry_vld[l] <= 1'b0 ;
+		end
+	end
+
+//=========================================================================================
+//	TAG COMPARISON
+//=========================================================================================
+
+reg [30:0] va_tag_plus ;
+
+// Stage to m
+always @(posedge clk)
+		begin
+		// INNO - add hold to this input
+		if (hold)
+			va_tag_plus[30:0] <= va_tag_plus[30:0] ;
+		else
+			va_tag_plus[30:0] 
+			<= {tlb_cam_comp_key[26:0],tlb_bypass_va[12:10],tlb_bypass}; 
+		end
+			
+assign vrtl_pgnum_m[29:0] = va_tag_plus[30:1] ;
+assign bypass = va_tag_plus[0] ;
+
+// Mux to bypass va or form pa tag based on tte-data.
+
+assign	phy_pgnum_m[29:3] = 
+	{rd_tte_data[`STLB_DATA_PA_39_28_HI:`STLB_DATA_PA_39_28_LO],
+		rd_tte_data[`STLB_DATA_PA_27_22_HI:`STLB_DATA_PA_27_22_LO],
+			rd_tte_data[`STLB_DATA_PA_21_16_HI:`STLB_DATA_PA_21_16_LO],
+				rd_tte_data[`STLB_DATA_PA_15_13_HI:`STLB_DATA_PA_15_13_LO]};
+
+// Derive the tlb-based physical address.
+assign pgnum_m[2:0] = vrtl_pgnum_m[2:0];
+assign pgnum_m[5:3] = (~rd_tte_data[`STLB_DATA_15_13_SEL] & ~bypass)
+				? phy_pgnum_m[5:3] : vrtl_pgnum_m[5:3] ;
+assign pgnum_m[11:6] = (~rd_tte_data[`STLB_DATA_21_16_SEL] & ~bypass)  
+				? phy_pgnum_m[11:6] : vrtl_pgnum_m[11:6] ;
+assign pgnum_m[17:12] = (~rd_tte_data[`STLB_DATA_27_22_SEL] & ~bypass)
+				? phy_pgnum_m[17:12] : vrtl_pgnum_m[17:12] ;
+assign pgnum_m[29:18] = ~bypass ? phy_pgnum_m[29:18] : vrtl_pgnum_m[29:18];
+
+// Stage to g
+// Flop tags in tlb itself and do comparison immediately after rising edge.
+// Similarly stage va/pa tag to g
+always @(posedge clk)
+		begin
+			pgnum_g[29:0] <= pgnum_m[29:0];
+			// rm hold on these inputs.
+			cache_set_vld_g[3:0]  	<= cache_set_vld[3:0] ;
+			cache_ptag_w0_g[29:0] 	<= cache_ptag_w0[29:0] ;
+			cache_ptag_w1_g[29:0] 	<= cache_ptag_w1[29:0] ;
+			cache_ptag_w2_g[29:0] 	<= cache_ptag_w2[29:0] ;
+			cache_ptag_w3_g[29:0] 	<= cache_ptag_w3[29:0] ;
+		end
+
+
+// Need to stage by a cycle where used.
+assign	tlb_pgnum[39:10] = pgnum_g[29:0] ;
+// Same cycle as cam - meant for one load on critical path
+assign	tlb_pgnum_crit[39:10] = pgnum_m[29:0] ;
+
+
+assign	cache_way_hit[0] = 
+	(cache_ptag_w0_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[0];
+assign	cache_way_hit[1] = 
+	(cache_ptag_w1_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[1];
+assign	cache_way_hit[2] = 
+	(cache_ptag_w2_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[2];
+assign	cache_way_hit[3] = 
+	(cache_ptag_w3_g[29:0] == pgnum_g[29:0]) & cache_set_vld_g[3];
+
+assign	cache_hit = |cache_way_hit[3:0];
+
+
+//=========================================================================================
+//	TLB ENTRY REPLACEMENT
+//=========================================================================================
+
+// A single Used bit is used to track the replacement state of each entry.
+// Only an unused entry can be replaced.
+// An Unused entry is :
+//			- an invalid entry
+//			- a valid entry which has had its Used bit cleared.
+//				- on write of a valid entry, the Used bit is set.
+//				- The Used bit of a valid entry is cleared if all
+//				entries have their Used bits set and the entry itself is not Locked.
+// A locked entry should always appear to be Used.
+// A single priority-encoder is required to evaluate the used status. Priority is static
+// and used entry0 is of the highest priority if unused.
+
+// Timing :
+// Used bit gets updated by cam-hit or hit on negedge.
+// After Used bit gets updated off negedge, the replacement entry can be generated in
+// Phase2. In parallel, it is determined whether all Used bits are set or not. If
+// so, then they are cleared on the next negedge with the replacement entry generated
+// in the related Phase1 
+
+// Choosing replacement entry
+// Replacement entry is integer k
+
+assign	tlb_not_writeable = &used[63:0] ;
+/*
+// Used bit can be set because of write or because of cam-hit.
+always @(negedge clk)
+	begin
+		for (s=0;s<64;s=s+1)
+			begin
+				if (cam_hit[s]) 
+					tlb_entry_used[s] <= 1'b1;			
+			end
+
+// Clear on following edge if necessary.
+// CHANGE(SATYA) : tlb_entry_used qualified with valid needs to be used to determine
+// whether the Used bits are to be cleared. This allows invalid entries created
+// by a demap to be used for replacement. Else we will ignore these entries
+// for replacement
+
+		if (tlb_not_writeable)
+			begin
+				for (t=0;t<64;t=t+1)
+					begin
+						if (~tlb_entry_locked[t])
+							tlb_entry_used[t] <= 1'b0;
+					end
+			end
+	end
+*/
+
+// Determine whether entry should be squashed.
+
+assign	used[63:0] = tlb_entry_used[63:0] & tlb_entry_vld[63:0] ;
+
+/*assign squash[0] = 1'b0 ;
+assign squash[1] = ~used[0] ;
+assign squash[2] = |(~used[1:0]) ;
+assign squash[3] = |(~used[2:0]) ;
+assign squash[4] = |(~used[3:0]) ;
+assign squash[5] = |(~used[4:0]) ;
+assign squash[6] = |(~used[5:0]) ;
+assign squash[7] = |(~used[6:0]) ;
+assign squash[8] = |(~used[7:0]) ;
+assign squash[9] = |(~used[8:0]) ;
+assign squash[10] = |(~used[9:0]) ;
+assign squash[11] = |(~used[10:0]) ;
+assign squash[12] = |(~used[11:0]) ;
+assign squash[13] = |(~used[12:0]) ;
+assign squash[14] = |(~used[13:0]) ;
+assign squash[15] = |(~used[14:0]) ;
+assign squash[16] = |(~used[15:0]) ;
+assign squash[17] = |(~used[16:0]) ;
+assign squash[18] = |(~used[17:0]) ;
+assign squash[19] = |(~used[18:0]) ;
+assign squash[20] = |(~used[19:0]) ;
+assign squash[21] = |(~used[20:0]) ;
+assign squash[22] = |(~used[21:0]) ;
+assign squash[23] = |(~used[22:0]) ;
+assign squash[24] = |(~used[23:0]) ;
+assign squash[25] = |(~used[24:0]) ;
+assign squash[26] = |(~used[25:0]) ;
+assign squash[27] = |(~used[26:0]) ;
+assign squash[28] = |(~used[27:0]) ;
+assign squash[29] = |(~used[28:0]) ;
+assign squash[30] = |(~used[29:0]) ;
+assign squash[31] = |(~used[30:0]) ;
+assign squash[32] = |(~used[31:0]) ;
+assign squash[33] = |(~used[32:0]) ;
+assign squash[34] = |(~used[33:0]) ;
+assign squash[35] = |(~used[34:0]) ;
+assign squash[36] = |(~used[35:0]) ;
+assign squash[37] = |(~used[36:0]) ;
+assign squash[38] = |(~used[37:0]) ;
+assign squash[39] = |(~used[38:0]) ;
+assign squash[40] = |(~used[39:0]) ;
+assign squash[41] = |(~used[40:0]) ;
+assign squash[42] = |(~used[41:0]) ;
+assign squash[43] = |(~used[42:0]) ;
+assign squash[44] = |(~used[43:0]) ;
+assign squash[45] = |(~used[44:0]) ;
+assign squash[46] = |(~used[45:0]) ;
+assign squash[47] = |(~used[46:0]) ;
+assign squash[48] = |(~used[47:0]) ;
+assign squash[49] = |(~used[48:0]) ;
+assign squash[50] = |(~used[49:0]) ;
+assign squash[51] = |(~used[50:0]) ;
+assign squash[52] = |(~used[51:0]) ;
+assign squash[53] = |(~used[52:0]) ;
+assign squash[54] = |(~used[53:0]) ;
+assign squash[55] = |(~used[54:0]) ;
+assign squash[56] = |(~used[55:0]) ;
+assign squash[57] = |(~used[56:0]) ;
+assign squash[58] = |(~used[57:0]) ;
+assign squash[59] = |(~used[58:0]) ;
+assign squash[60] = |(~used[59:0]) ;
+assign squash[61] = |(~used[60:0]) ;
+assign squash[62] = |(~used[61:0]) ;
+assign squash[63] = |(~used[62:0]) ; */
+
+// Based on updated Used state, generate replacement entry.
+// So, replacement entries can be generated on a cycle-by-cycle basis. 
+//always @(/*AUTOSENSE*/squash or used)
+
+	reg	[63:0]	tlb_entry_replace_d1;
+	reg		tlb_replace_flag;
+	always @(/*AUTOSENSE*/used)
+	begin
+  	  tlb_replace_flag=1'b0;
+  	  tlb_entry_replace_d1 = 64'b0;
+  	  // Priority is given to entry0
+   	  for (u=0;u<64;u=u+1)
+  	  begin
+    	    if(~tlb_replace_flag & ~used[u])
+    	    begin
+      	      tlb_entry_replace_d1[u] = ~used[u] ;
+      	      tlb_replace_flag=1'b1; 
+    	    end
+  	  end
+  	  if(~tlb_replace_flag) begin
+      	     tlb_entry_replace_d1[63] = 1'b1;
+ 	  end
+	end
+	always @(posedge clk)
+	begin
+	  // named in this manner to keep arch model happy.
+  	  tlb_entry_replace <= tlb_entry_replace_d1 ;
+	end
+	// INNO - 2 stage delay before update is visible
+	always @(posedge clk)
+	begin
+  	  tlb_entry_replace_d2 <= tlb_entry_replace ;
+	end
+
+//=========================================================================================
+//	TLB WRITEABLE DETECTION
+//=========================================================================================
+
+// 2-cycles later, tlb become writeable
+always @(posedge clk)
+	begin
+		tlb_not_writeable_d1 <= tlb_not_writeable ;
+	end
+
+always @(posedge clk)
+	begin
+		tlb_writeable <= ~tlb_not_writeable_d1 ;
+	end
+
+endmodule
+
+`endif
+
Index: /trunk/T1-common/srams/bw_r_l2d.v
===================================================================
--- /trunk/T1-common/srams/bw_r_l2d.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_l2d.v	(revision 6)
@@ -0,0 +1,269 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_l2d.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_r_l2d (/*AUTOARG*/
+   // Outputs
+   wr_en_buf, word_en_buf, way_sel_buf, so, set_buf, 
+   scdata_scbuf_decc_top_buf, scdata_scbuf_decc_bot_buf, 
+   scbuf_scdata_fbdecc_top_buf, scbuf_scdata_fbdecc_bot_buf, 
+   l2d_fuse_data_out, decc_out, decc_in_buf, col_offset_buf, 
+   fuse_l2d_rid_buf, fuse_l2d_data_in_buf, arst_l_buf, se_buf, 
+   sehold_buf, fuse_l2d_rden_buf, fuse_l2d_wren_buf, fuse_clk1_buf, 
+   fuse_clk2_buf, mem_write_disable_buf, 
+   // Inputs
+   wr_en, word_en, way_sel, si, set, sehold, se, 
+   scdata_scbuf_decc_top, scdata_scbuf_decc_bot, 
+   scbuf_scdata_fbdecc_top, scbuf_scdata_fbdecc_bot, rclk, 
+   mem_write_disable, fuse_read_data_in, fuse_l2d_wren, fuse_l2d_rid, 
+   fuse_l2d_rden, fuse_l2d_data_in, efc_scdata_fuse_clk2, 
+   efc_scdata_fuse_clk1, decc_read_in, decc_in, col_offset, arst_l
+   );
+
+   /*AUTOINPUT*/
+   // Beginning of automatic inputs (from unused autoinst inputs)
+   input		arst_l;			// To bot_rep of bw_r_l2d_rep_bot.v
+   input		col_offset;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input [155:0]	decc_in;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input [155:0]	decc_read_in;		// To mem_0 of bw_r_l2d_32k.v
+   input		efc_scdata_fuse_clk1;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		efc_scdata_fuse_clk2;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		fuse_l2d_data_in;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		fuse_l2d_rden;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input [2:0]		fuse_l2d_rid;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input [5:0]		fuse_l2d_wren;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input		fuse_read_data_in;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		mem_write_disable;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		rclk;			// To mem_0 of bw_r_l2d_32k.v, ...
+   input [155:0]	scbuf_scdata_fbdecc_bot;// To top_rep of bw_r_l2d_rep_top.v
+   input [155:0]	scbuf_scdata_fbdecc_top;// To top_rep of bw_r_l2d_rep_top.v
+   input [155:0]	scdata_scbuf_decc_bot;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input [155:0]	scdata_scbuf_decc_top;	// To bot_rep of bw_r_l2d_rep_bot.v
+   input		se;			// To bot_rep of bw_r_l2d_rep_bot.v
+   input		sehold;			// To bot_rep of bw_r_l2d_rep_bot.v
+   input [9:0]		set;			// To bot_rep of bw_r_l2d_rep_bot.v
+   input		si;			// To bot_rep of bw_r_l2d_rep_bot.v
+   input [11:0]		way_sel;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input [3:0]		word_en;		// To bot_rep of bw_r_l2d_rep_bot.v
+   input		wr_en;			// To bot_rep of bw_r_l2d_rep_bot.v
+   // End of automatics
+
+   output [2:0]		fuse_l2d_rid_buf;	// From bot_rep of bw_r_l2d_rep_bot.v
+   output 		fuse_l2d_data_in_buf;	// From bot_rep of bw_r_l2d_rep_bot.v
+   output 		arst_l_buf;		// From bot_rep of bw_r_l2d_rep_bot.v
+   output 		se_buf;			// From bot_rep of bw_r_l2d_rep_bot.v
+   output		sehold_buf;		// From bot_rep of bw_r_l2d_rep_bot.v
+   output 		fuse_l2d_rden_buf;	// From bot_rep of bw_r_l2d_rep_bot.v
+   output [5:0]		fuse_l2d_wren_buf;	// From bot_rep of bw_r_l2d_rep_bot.v
+   output 		fuse_clk1_buf;
+   output 		fuse_clk2_buf;
+   output 		mem_write_disable_buf;
+   
+   /*AUTOOUTPUT*/
+   // Beginning of automatic outputs (from unused autoinst outputs)
+   output		col_offset_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   output [155:0]	decc_in_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   output [155:0]	decc_out;		// From mem_1 of bw_r_l2d_32k.v
+   output		l2d_fuse_data_out;	// From mem_0 of bw_r_l2d_32k.v
+   output [155:0]	scbuf_scdata_fbdecc_bot_buf;// From bot_rep of bw_r_l2d_rep_bot.v
+   output [155:0]	scbuf_scdata_fbdecc_top_buf;// From bot_rep of bw_r_l2d_rep_bot.v
+   output [155:0]	scdata_scbuf_decc_bot_buf;// From top_rep of bw_r_l2d_rep_top.v
+   output [155:0]	scdata_scbuf_decc_top_buf;// From top_rep of bw_r_l2d_rep_top.v
+   output [9:0]		set_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   output		so;			// From mem_0 of bw_r_l2d_32k.v
+   output [11:0]	way_sel_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   output [3:0]		word_en_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   output		wr_en_buf;		// From top_rep of bw_r_l2d_rep_top.v
+   // End of automatics
+
+   wire [155:0] 	decc_out_0;
+   wire 		l2d_fuse_data_out_0;
+   
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   wire			col_offset_l;		// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [155:0]		decc_in_l;		// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [155:0]		fbdb_l;			// From top_rep of bw_r_l2d_rep_top.v
+   wire [155:0]		fbdt_l;			// From top_rep of bw_r_l2d_rep_top.v
+   wire			fuse_read_data_in_buf;	// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [155:0]		sbdb_l;			// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [155:0]		sbdt_l;			// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [9:0]		set_l;			// From bot_rep of bw_r_l2d_rep_bot.v
+   wire			si_buf;			// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [11:0]		way_sel_l;		// From bot_rep of bw_r_l2d_rep_bot.v
+   wire [3:0]		word_en_l;		// From bot_rep of bw_r_l2d_rep_bot.v
+   wire			wr_en_l;		// From bot_rep of bw_r_l2d_rep_bot.v
+   // End of automatics
+
+
+
+   bw_r_l2d_rep_bot  bot_rep (/*AUTOINST*/
+			      // Outputs
+			      .fuse_l2d_rden_buf(fuse_l2d_rden_buf),
+			      .fuse_l2d_wren_buf(fuse_l2d_wren_buf[5:0]),
+			      .si_buf	(si_buf),
+			      .arst_l_buf(arst_l_buf),
+			      .se_buf	(se_buf),
+			      .sehold_buf(sehold_buf),
+			      .fuse_l2d_rid_buf(fuse_l2d_rid_buf[2:0]),
+			      .fuse_read_data_in_buf(fuse_read_data_in_buf),
+			      .fuse_l2d_data_in_buf(fuse_l2d_data_in_buf),
+			      .word_en_l(word_en_l[3:0]),
+			      .col_offset_l(col_offset_l),
+			      .set_l	(set_l[9:0]),
+			      .wr_en_l	(wr_en_l),
+			      .way_sel_l(way_sel_l[11:0]),
+			      .decc_in_l(decc_in_l[155:0]),
+			      .scbuf_scdata_fbdecc_top_buf(scbuf_scdata_fbdecc_top_buf[155:0]),
+			      .scbuf_scdata_fbdecc_bot_buf(scbuf_scdata_fbdecc_bot_buf[155:0]),
+			      .sbdt_l	(sbdt_l[155:0]),
+			      .sbdb_l	(sbdb_l[155:0]),
+			      .fuse_clk1_buf(fuse_clk1_buf),
+			      .fuse_clk2_buf(fuse_clk2_buf),
+			      .mem_write_disable_buf(mem_write_disable_buf),
+			      // Inputs
+			      .fuse_l2d_rden(fuse_l2d_rden),
+			      .fuse_l2d_wren(fuse_l2d_wren[5:0]),
+			      .si	(si),
+			      .arst_l	(arst_l),
+			      .se	(se),
+			      .sehold	(sehold),
+			      .fuse_l2d_rid(fuse_l2d_rid[2:0]),
+			      .fuse_read_data_in(fuse_read_data_in),
+			      .fuse_l2d_data_in(fuse_l2d_data_in),
+			      .word_en	(word_en[3:0]),
+			      .col_offset(col_offset),
+			      .set	(set[9:0]),
+			      .wr_en	(wr_en),
+			      .way_sel	(way_sel[11:0]),
+			      .decc_in	(decc_in[155:0]),
+			      .fbdt_l	(fbdt_l[155:0]),
+			      .fbdb_l	(fbdb_l[155:0]),
+			      .scdata_scbuf_decc_top(scdata_scbuf_decc_top[155:0]),
+			      .scdata_scbuf_decc_bot(scdata_scbuf_decc_bot[155:0]),
+			      .efc_scdata_fuse_clk1(efc_scdata_fuse_clk1),
+			      .efc_scdata_fuse_clk2(efc_scdata_fuse_clk2),
+			      .mem_write_disable(mem_write_disable));
+   
+
+   bw_r_l2d_rep_top  top_rep (/*AUTOINST*/
+			      // Outputs
+			      .word_en_buf(word_en_buf[3:0]),
+			      .col_offset_buf(col_offset_buf),
+			      .set_buf	(set_buf[9:0]),
+			      .wr_en_buf(wr_en_buf),
+			      .way_sel_buf(way_sel_buf[11:0]),
+			      .decc_in_buf(decc_in_buf[155:0]),
+			      .fbdt_l	(fbdt_l[155:0]),
+			      .fbdb_l	(fbdb_l[155:0]),
+			      .scdata_scbuf_decc_top_buf(scdata_scbuf_decc_top_buf[155:0]),
+			      .scdata_scbuf_decc_bot_buf(scdata_scbuf_decc_bot_buf[155:0]),
+			      // Inputs
+			      .word_en_l(word_en_l[3:0]),
+			      .col_offset_l(col_offset_l),
+			      .set_l	(set_l[9:0]),
+			      .wr_en_l	(wr_en_l),
+			      .way_sel_l(way_sel_l[11:0]),
+			      .decc_in_l(decc_in_l[155:0]),
+			      .scbuf_scdata_fbdecc_top(scbuf_scdata_fbdecc_top[155:0]),
+			      .scbuf_scdata_fbdecc_bot(scbuf_scdata_fbdecc_bot[155:0]),
+			      .sbdt_l	(sbdt_l[155:0]),
+			      .sbdb_l	(sbdb_l[155:0]));
+   
+
+			      
+   /*
+    bw_r_l2d_32k	AUTO_TEMPLATE(
+    .way_sel_l(way_sel_l[@"(+ 9 (* @ 2))":@"(+ 8 (* @ 2))"]),
+    .fuse_l2d_wren(fuse_l2d_wren_buf[@"(+ 4 @)"]),
+    .fuse_l2d_rden(fuse_l2d_rden_buf),
+    .si(si_buf),
+    .se(se_buf),
+    .arst_l(arst_l_buf),
+    .sehold(sehold_buf),
+    .mem_write_disable(mem_write_disable_buf),
+    .fuse_l2d_rid(fuse_l2d_rid_buf[2:0]),
+    .fuse_clk1(fuse_clk1_buf),
+    .fuse_clk2(fuse_clk2_buf),
+    .fuse_l2d_data_in(fuse_l2d_data_in_buf),
+    .fuse_read_data_in(fuse_read_data_in_buf));
+    */
+
+   
+   bw_r_l2d_32k mem_0(
+		      //Inputs
+           	      .si		(scan_out_0),
+		      .fuse_read_data_in(l2d_fuse_data_out_0),
+		      //Outputs
+		      .decc_out(decc_out_0[155:0]),
+		      /*AUTOINST*/
+		      // Outputs
+		      .so		(so),
+		      .l2d_fuse_data_out(l2d_fuse_data_out),
+		      // Inputs
+		      .decc_in_l	(decc_in_l[155:0]),
+		      .decc_read_in	(decc_read_in[155:0]),
+		      .word_en_l	(word_en_l[3:0]),
+		      .way_sel_l	(way_sel_l[9:8]),	 // Templated
+		      .set_l		(set_l[9:0]),
+		      .col_offset_l	(col_offset_l),
+		      .wr_en_l		(wr_en_l),
+		      .rclk		(rclk),
+		      .arst_l		(arst_l_buf),		 // Templated
+		      .mem_write_disable(mem_write_disable_buf), // Templated
+		      .sehold		(sehold_buf),		 // Templated
+		      .se		(se_buf),		 // Templated
+		      .fuse_l2d_wren	(fuse_l2d_wren_buf[4]),	 // Templated
+		      .fuse_l2d_rden	(fuse_l2d_rden_buf),	 // Templated
+		      .fuse_l2d_rid	(fuse_l2d_rid_buf[2:0]), // Templated
+		      .fuse_clk1	(fuse_clk1_buf),	 // Templated
+		      .fuse_clk2	(fuse_clk2_buf),	 // Templated
+		      .fuse_l2d_data_in	(fuse_l2d_data_in_buf));	 // Templated
+   
+   bw_r_l2d_32k  mem_1(
+		       //Inputs
+		       .decc_read_in(decc_out_0),
+		       //Outputs
+		       .l2d_fuse_data_out(l2d_fuse_data_out_0),
+		       .so		(scan_out_0),
+		       /*AUTOINST*/
+		       // Outputs
+		       .decc_out	(decc_out[155:0]),
+		       // Inputs
+		       .decc_in_l	(decc_in_l[155:0]),
+		       .word_en_l	(word_en_l[3:0]),
+		       .way_sel_l	(way_sel_l[11:10]),	 // Templated
+		       .set_l		(set_l[9:0]),
+		       .col_offset_l	(col_offset_l),
+		       .wr_en_l		(wr_en_l),
+		       .rclk		(rclk),
+		       .arst_l		(arst_l_buf),		 // Templated
+		       .mem_write_disable(mem_write_disable_buf), // Templated
+		       .sehold		(sehold_buf),		 // Templated
+		       .se		(se_buf),		 // Templated
+		       .si		(si_buf),		 // Templated
+		       .fuse_l2d_wren	(fuse_l2d_wren_buf[5]),	 // Templated
+		       .fuse_l2d_rden	(fuse_l2d_rden_buf),	 // Templated
+		       .fuse_l2d_rid	(fuse_l2d_rid_buf[2:0]), // Templated
+		       .fuse_clk1	(fuse_clk1_buf),	 // Templated
+		       .fuse_clk2	(fuse_clk2_buf),	 // Templated
+		       .fuse_l2d_data_in(fuse_l2d_data_in_buf),	 // Templated
+		       .fuse_read_data_in(fuse_read_data_in_buf)); // Templated
+
+endmodule // bw_r_l2d
Index: /trunk/T1-common/srams/bw_r_scm.v
===================================================================
--- /trunk/T1-common/srams/bw_r_scm.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_scm.v	(revision 6)
@@ -0,0 +1,363 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_scm.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//	Description:	Store Buffer of Load/Store Unit (CAM Side)
+//		 - Physically divided into CAM and DATA RAMs.
+//                              - CAM RAM has a single cam port and a single
+//                              port for read/writes. The cam port is for loads,
+//                              write for stores, read for test/diagnostic purposes.
+//                              rd or write can be simultaneous with cam. can rd and cam
+//                              a single entry simultaneously. cannot write and cam
+//                              the same entry.
+//                              - DATA RAM read occurs for a load raw match in the
+//                              stb CAM RAM. DATA RAM write occurs a store. Both
+//                              actions are architecturally guaranteed to be
+//                              mutex.
+//                              - Write occurs simultaneously to both arrays.
+//                              - Reads are not necessarily simultaneous and are
+//                              controlled by individual read signals.
+//                              - Certain bits are maintained outside the array
+//                              in the stb's control section, such as the valid
+//                              bits.                	
+//
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_SCM
+`endif
+
+module bw_r_scm (/*AUTOARG*/
+   // Outputs
+   stb_rdata_ramc, stb_ld_full_raw, stb_ld_partial_raw, 
+   stb_cam_hit_ptr, stb_cam_hit, stb_cam_mhit, 
+   // Inputs
+   stb_cam_data, stb_alt_wr_data, stb_camwr_data, stb_alt_wsel, 
+   stb_cam_vld, stb_cam_cm_tid, stb_cam_sqsh_msk, stb_cam_rw_ptr, 
+   stb_cam_wptr_vld, stb_cam_rptr_vld, stb_cam_rw_tid, 
+   stb_quad_ld_cam, rclk, rst_tri_en
+   ) ;	
+
+parameter NUMENTRIES = 32 ;				// number of entries in stb
+
+input	[44:15]		stb_cam_data ;	  // data for compare; disjoint msb
+input	[44:15]		stb_alt_wr_data ;	  // data for compare; disjoint msb
+input	[14:0]		stb_camwr_data ;  // data for compare/write; common lsb
+input			stb_alt_wsel ;
+input			stb_cam_vld ;	  // cam is required.
+input	[1:0]		stb_cam_cm_tid ;  // thread id for cam operation.
+input	[7:0]		stb_cam_sqsh_msk; // mask for squashing cam results.
+
+input 	[2:0]		stb_cam_rw_ptr ;  // wr pointer for single port.
+input 	     		stb_cam_wptr_vld ;// write pointer vld
+input 	     		stb_cam_rptr_vld ;// write pointer vld
+input	[1:0]		stb_cam_rw_tid ;  // thread id for rw.
+input 			stb_quad_ld_cam ; // quad-ld cam.
+
+input			rclk ;		  // clock
+
+//input			scan_ena ;	  // no longer required !
+//input	[7:0]		adj ;
+
+input			rst_tri_en ;
+
+output	[44:0]		stb_rdata_ramc ;  // rd data from CAM RAM.
+// raw output is muxed on a thread basis.
+output	[7:0]		stb_ld_full_raw ; // ld with full raw.
+output	[7:0]		stb_ld_partial_raw ; // ld with partial raw.
+output	[2:0]		stb_cam_hit_ptr ;
+output			stb_cam_hit ;	  // any hit in stb
+output			stb_cam_mhit ;	  // multiple hits in stb	
+
+
+/*UTOREG*/
+// Beginning of automatic regs (for this module's undeclared outputs)
+// End of automatics
+reg [44:0]		stb_rdata_ramc ;
+reg [31:0]		rw_wdline ;
+reg [44:0]		stb_ramc [NUMENTRIES-1:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */;
+reg [44:0]		ramc_entry ;
+reg [36:0]		cam_tag ;
+reg [31:0]		ptag_hit ;
+reg [7:0]		cam_bmask ;
+reg [31:0]		byte_match ;
+reg [31:0]		byte_overlap ;
+reg [31:0]		ld_full_raw ;
+reg [31:0]		ld_partial_raw ;
+reg [44:15]		alt_wr_data ;
+wire [44:15]		pipe_wr_data ;
+reg [14:0]		camwr_data ;
+reg			wptr_vld ; 
+reg			rptr_vld_tmp ; 
+reg [1:0]	  	cam_tid ;
+reg [1:0]	  	cam_vld ;
+reg			alt_wsel ;
+
+wire		rptr_vld ; 
+wire 		ldq ;
+wire	[7:0]	sqsh_msk ;
+wire 	[7:0]	ld_full_raw_mx ;
+wire    [7:0]	ld_partial_raw_mx ;
+wire	[7:0]	ptag_hit_mx ;
+wire	[7:0]	byte_overlap_mx ;
+wire	[7:0]	byte_match_mx ;
+wire	[7:0]	cam_hit ;
+wire	[44:0]	wdata_ramc ;
+wire	[44:0]	cam_data ;
+wire	[44:15] wr_data ;
+`ifdef FPGA_SYN_SCM
+reg	[4:0]	stb_addr;
+`endif
+
+   
+integer	i,j,k,l ;
+
+
+wire	scan_ena ;
+assign	scan_ena = 1'b0 ;
+
+//=========================================================================================
+//	generate wordlines
+//=========================================================================================
+
+assign	sqsh_msk[7:0]	= stb_cam_sqsh_msk[7:0]; 
+
+// cam_vld and cam_tid_tmp are set-up a phase earlier. 
+// Comment out - Now setup to posedge.
+/*always @(negedge clk)
+	begin
+		cam_tid_tmp[1:0]	<= stb_cam_cm_tid[1:0] ;
+		cam_vld_tmp		<= stb_cam_vld ;
+	end */
+
+`ifdef FPGA_SYN_SCM
+`else
+// Wordlines need to be generated locally 
+always @ (posedge rclk)
+        begin
+                for (i=0;i<32;i=i+1)
+                        begin
+                        if ({stb_cam_rw_tid[1:0],stb_cam_rw_ptr[2:0]} == i)
+                                rw_wdline[i]  <= 1'b1;
+                        else
+                                rw_wdline[i]  <= 1'b0;
+                        end
+        end
+`endif
+
+assign pipe_wr_data[44:15] = stb_cam_data[44:15];
+
+always @(posedge rclk)
+	begin
+		alt_wr_data[44:15] <= stb_alt_wr_data[44:15];
+		camwr_data[14:0] <= stb_camwr_data[14:0];
+		wptr_vld 	<= stb_cam_wptr_vld ;
+		rptr_vld_tmp 	<= stb_cam_rptr_vld ;
+		cam_tid[1:0]	<= stb_cam_cm_tid[1:0] ;
+		//cam_tid[1:0]	<= cam_tid_tmp[1:0] ;
+ 		//ldq 		<=  stb_quad_ld_cam ; Bug 2870
+		alt_wsel 	<= stb_alt_wsel ;
+`ifdef FPGA_SYN_SCM
+                stb_addr	<= {stb_cam_rw_tid[1:0],stb_cam_rw_ptr[2:0]};
+`endif
+	end
+
+assign 	ldq =  stb_quad_ld_cam ;
+assign  rptr_vld = rptr_vld_tmp | rst_tri_en ;
+
+//=========================================================================================
+//	write or read to/from memory
+//=========================================================================================
+
+// For blk-st, select out-of-pipe.
+assign	wr_data[44:15] = alt_wsel ? 
+                alt_wr_data[44:15] : pipe_wr_data[44:15] ;	
+
+assign	wdata_ramc[44:0] = {wr_data[44:15],camwr_data[14:0]};
+
+// Write
+always @ (negedge rclk)
+	begin
+`ifdef FPGA_SYN_SCM
+	if(wptr_vld) begin
+		if(~rst_tri_en) begin
+			stb_ramc[stb_addr] <= wdata_ramc[44:0];
+			stb_rdata_ramc[44:0] <=  wdata_ramc[44:0];
+                end else begin
+			stb_rdata_ramc[44:0] <=  stb_ramc[stb_addr];
+		end
+	end
+`else
+		for (j=0;j<NUMENTRIES;j=j+1)
+			begin
+			if (rw_wdline[j] & wptr_vld)
+				begin
+				if (~rst_tri_en)
+					begin
+					stb_ramc[j] <=  wdata_ramc[44:0];
+					// write data is write-thru
+					stb_rdata_ramc[44:0] <=  wdata_ramc[44:0];
+					end
+				else
+					begin
+					// INNO - default rd if wr squashed by scan_ena.
+					stb_rdata_ramc[44:0] <=  stb_ramc[j];
+					end
+				end
+			end
+`endif
+// Read
+`ifdef FPGA_SYN_SCM
+		if(rptr_vld & ~scan_ena) begin
+			if (rptr_vld & wptr_vld & ~rst_tri_en) begin
+				stb_rdata_ramc[44:0] <=  wdata_ramc[44:0];
+			end
+			else begin
+				stb_rdata_ramc[44:0] <=  stb_ramc[stb_addr];
+			end
+		end
+`else
+		for (k=0;k<NUMENTRIES;k=k+1)
+			begin
+			if (rw_wdline[k] & rptr_vld & ~scan_ena)
+				begin
+				if (rptr_vld & wptr_vld & ~rst_tri_en) // INNO - write-thru
+                                        stb_rdata_ramc[44:0] <=  wdata_ramc[44:0];
+                                else
+                                        stb_rdata_ramc[44:0] <=  stb_ramc[k];
+				end
+			end
+`endif
+	end
+
+//=========================================================================================
+//	CAM contents of CAM RAM
+//=========================================================================================
+
+// - Generate full/partial raw for incoming load.
+// - Output signals need to be qualified with per entry
+// vlds before causing any subsequent event, the read of
+// the DATA RAM specifically.
+// - full_raw & vld will cause rd of DATA RAM.
+// - partial_raw & vld will cause ld to follow corresponding
+// st on way out to xbar.
+// - logic to generate partial and full raws may be done outside
+// but that would require an additional signal per entry to
+// be output.
+
+// Mapping of cam/write data
+// 
+//	| 	40-3=37b(pa)	| 1b(stquad) 	|	8b(bytemask)	| <- use
+//	|	45:9		| 8		|	7:0		| <- input port
+//				**^ stquad rm'ed
+
+reg [14:0] stb_camwr_data_d;
+reg        ldq_d;
+reg        stb_cam_vld_d;
+reg        scan_ena_d;
+reg [44:0]		stb_ramc_d [NUMENTRIES-1:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */;
+
+always @(posedge rclk)
+   begin
+      stb_camwr_data_d[14:0]<=stb_camwr_data[14:0];
+      ldq_d<=ldq;
+      stb_cam_vld_d<=stb_cam_vld;
+      scan_ena_d<=scan_ena;
+		for (l=0;l<NUMENTRIES;l=l+1)
+         stb_ramc_d[l]<=stb_ramc[l];
+   end
+
+assign	cam_data[44:0] = {stb_cam_data[44:15],stb_camwr_data_d[14:0]}; 
+
+// tolgo stb_ramc_d dalla sensitivity list...
+always @( byte_overlap or ramc_entry,cam_tag or cam_bmask or ptag_hit or byte_match)
+  begin
+	for (l=0;l<NUMENTRIES;l=l+1)
+    begin
+		ramc_entry[44:0] = stb_ramc_d[l] ;
+		cam_tag[36:0] = ramc_entry[44:8] ;
+		cam_bmask[7:0] = ramc_entry[7:0] ;
+      ptag_hit[l] = (cam_tag[36:1] == cam_data[44:9]) & 
+		  (((cam_tag[0] == cam_data[8]) & ~ldq_d) | ldq_d) & stb_cam_vld_d & ~scan_ena_d ;
+		byte_match[l] = |(cam_bmask[7:0] & cam_data[7:0]) & stb_cam_vld_d & ~scan_ena_d ;
+		// Simplification :
+		byte_overlap[l] = |(~cam_bmask[7:0] & cam_data[7:0]) & stb_cam_vld_d & ~scan_ena_d ;
+    end
+  end
+// Mux the raw signals down to 8b quantities. Squash mask comes mid-way thru cycle.
+
+assign	byte_overlap_mx[7:0] =
+	(cam_tid[1:0] == 2'b00) ? byte_overlap[7:0] :
+		(cam_tid[1:0] == 2'b01) ? byte_overlap[15:8] :
+			(cam_tid[1:0] == 2'b10) ? byte_overlap[23:16] :
+				(cam_tid[1:0] == 2'b11) ? byte_overlap[31:24] : 8'bxxxx_xxxx ;
+
+assign	byte_match_mx[7:0] =
+	(cam_tid[1:0] == 2'b00) ? byte_match[7:0] :
+		(cam_tid[1:0] == 2'b01) ? byte_match[15:8] :
+			(cam_tid[1:0] == 2'b10) ? byte_match[23:16] :
+				(cam_tid[1:0] == 2'b11) ? byte_match[31:24] : 8'bxxxx_xxxx ;
+
+assign	ptag_hit_mx[7:0] =
+	(cam_tid[1:0] == 2'b00) ? ptag_hit[7:0] :
+		(cam_tid[1:0] == 2'b01) ? ptag_hit[15:8] :
+			(cam_tid[1:0] == 2'b10) ? ptag_hit[23:16] :
+				(cam_tid[1:0] == 2'b11) ? ptag_hit[31:24] : 8'bxxxx_xxxx ;
+
+assign	stb_ld_full_raw[7:0] =  
+	ptag_hit_mx[7:0] & byte_match_mx[7:0] & ~byte_overlap_mx[7:0] & ~sqsh_msk[7:0] ;
+assign	stb_ld_partial_raw[7:0] =  
+	ptag_hit_mx[7:0] & byte_match_mx[7:0] &  byte_overlap_mx[7:0] & ~sqsh_msk[7:0] ;
+
+assign	cam_hit[7:0] = 
+	ptag_hit_mx[7:0] & byte_match_mx[7:0] & ~sqsh_msk[7:0] ;
+assign	stb_cam_hit = |(cam_hit[7:0]);
+
+// The stb data is meant to be read for single hit full raw case. It may actually be read
+// for full raw, partial raw or multiple hit case but the read output will be ignored for
+// partial and multiple hit case. Multiple hits will not cause a hazard as the ptr is first
+// encoded and then decoded to form the wdline for the stb-data
+// Use cam_hit result to void false hits.
+assign	stb_cam_hit_ptr[0] 	=  cam_hit[1] | cam_hit[3] | cam_hit[5] | cam_hit[7] ;
+assign	stb_cam_hit_ptr[1] 	=  cam_hit[2] | cam_hit[3] | cam_hit[6] | cam_hit[7] ;
+assign	stb_cam_hit_ptr[2] 	=  cam_hit[4] | cam_hit[5] | cam_hit[6] | cam_hit[7] ;
+
+//Generating multiple hits
+assign  stb_cam_mhit            =  (cam_hit[0]  & cam_hit[1]) | (cam_hit[2] & cam_hit[3])  |
+                                   (cam_hit[4]  & cam_hit[5]) | (cam_hit[6] & cam_hit[7])  |
+                                   ((cam_hit[0] | cam_hit[1]) & (cam_hit[2] | cam_hit[3])) |
+                                   ((cam_hit[4] | cam_hit[5]) & (cam_hit[6] | cam_hit[7])) |
+                                   ((|cam_hit[3:0]) & (|cam_hit[7:4]));
+
+//--------------------------------------------------------------
+// Error Checking.
+//--------------------------------------------------------------
+
+// 1. simultaneous rd/wr on single port - terminate 
+// 2. simultaneous cam and wr - terminate
+// * PUT OUTSIDE OF SRAM RTL, AS RST NOT AVAILABLE. *
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_idct.v
===================================================================
--- /trunk/T1-common/srams/bw_r_idct.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_idct.v	(revision 6)
@@ -0,0 +1,419 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_idct.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+ //  Module Name:  bw_r_idct.v
+ //  Description:	
+ //    Contains the RTL for the icache and dcache tag blocks.  
+ //    This is a 1RW 512 entry X 33b macro, with 132b rd and 132b wr,
+ //    broken into 4 33b segments with its own write enable.
+ //    Address and Control inputs are available the stage before
+ //    array access, which is referred to as "_x".  Write data is
+ //    available in the same stage as the write to the ram, referred
+ //    to as "_y".  Read data is also read out and available in "_y".
+ //
+ //            X       |      Y
+ //     index          |  ram access
+ //     index sel      |  write_tag 
+ //     rd/wr req      |     -> read_tag
+ //     way enable     |
+ */
+
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_IDCT
+`endif
+
+`ifdef FPGA_SYN_IDCT
+
+module bw_r_idct(rdtag_w0_y, rdtag_w1_y, rdtag_w2_y, rdtag_w3_y, so, rclk, se, 
+	si, reset_l, sehold, rst_tri_en, index0_x, index1_x, index_sel_x, 
+	dec_wrway_x, rdreq_x, wrreq_x, wrtag_w0_y, wrtag_w1_y, wrtag_w2_y, 
+	wrtag_w3_y, adj);
+
+	input			rclk;
+	input			se;
+	input			si;
+	input			reset_l;
+	input			sehold;
+	input			rst_tri_en;
+	input	[6:0]		index0_x;
+	input	[6:0]		index1_x;
+	input			index_sel_x;
+	input	[3:0]		dec_wrway_x;
+	input			rdreq_x;
+	input			wrreq_x;
+	input	[32:0]		wrtag_w0_y;
+	input	[32:0]		wrtag_w1_y;
+	input	[32:0]		wrtag_w2_y;
+	input	[32:0]		wrtag_w3_y;
+	input	[3:0]		adj;
+	output	[32:0]		rdtag_w0_y;
+	output	[32:0]		rdtag_w1_y;
+	output	[32:0]		rdtag_w2_y;
+	output	[32:0]		rdtag_w3_y;
+	output			so;
+
+	wire			clk;
+	reg	[6:0]		index_y;
+	reg			rdreq_y;
+	reg			wrreq_y;
+	reg	[3:0]		dec_wrway_y;
+	wire	[6:0]		index_x;
+	wire	[3:0]		we;
+
+   	reg [131:0]  rdtag_sa_y; //for error_inject XMR
+
+	assign clk = rclk;
+	assign index_x = (index_sel_x ? index1_x : index0_x);
+    assign we = ({4 {((wrreq_y & reset_l) & (~rst_tri_en))}} & dec_wrway_y);
+
+	always @(posedge clk) begin
+	  if (~sehold) begin
+	    rdreq_y <= rdreq_x;
+	    wrreq_y <= wrreq_x;
+	    index_y <= index_x;
+	    dec_wrway_y <= dec_wrway_x;
+	  end
+	end
+
+	bw_r_idct_array ictag_ary_00(
+		.we	(we[0]),
+		.clk	(clk),
+        .way (2'b00),
+		.rd_data(rdtag_w0_y), 
+		.wr_data(wrtag_w0_y),
+		.addr	(index_y),
+        .dec_wrway_y (dec_wrway_y));
+
+	bw_r_idct_array ictag_ary_01(
+		.we	(we[1]),
+		.clk	(clk),
+        .way (2'b01),
+		.rd_data(rdtag_w1_y),
+		.wr_data(wrtag_w1_y),
+		.addr	(index_y),
+        .dec_wrway_y (dec_wrway_y));
+
+	bw_r_idct_array ictag_ary_10(
+		.we	(we[2]),
+		.clk	(clk),
+        .way(2'b10),
+		.rd_data(rdtag_w2_y),
+		.wr_data(wrtag_w2_y),
+		.addr	(index_y),
+        .dec_wrway_y (dec_wrway_y));
+
+	bw_r_idct_array ictag_ary_11(
+		.we	(we[3]),
+		.clk	(clk),
+        .way(2'b11),
+		.rd_data(rdtag_w3_y),
+		.wr_data(wrtag_w3_y),
+		.addr	(index_y),
+        .dec_wrway_y (dec_wrway_y));
+
+endmodule
+
+module bw_r_idct_array(we, clk, rd_data, wr_data, addr,dec_wrway_y,way);
+
+input we;
+input clk;
+input [32:0] wr_data;
+input [6:0] addr;
+input [3:0] dec_wrway_y;
+input [1:0] way;
+output [32:0] rd_data;
+reg [32:0] rd_data;
+
+reg	[32:0]		array[511:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+integer i;
+
+initial begin
+`ifdef DO_MEM_INIT
+    // Add the memory init file in the database
+    $readmemb("/import/dtg-data11/sandeep/niagara/design/sys/iop/srams/rtl/mem_init_idct.txt",array);
+`endif
+end
+
+	always @(negedge clk) begin
+	  if (we) 
+          begin
+              array[addr] <= wr_data;
+          end
+	  else 
+          rd_data <= array[addr];
+	end
+endmodule
+
+`else
+
+module bw_r_idct(/*AUTOARG*/
+   // Outputs
+   rdtag_w0_y, rdtag_w1_y, rdtag_w2_y, rdtag_w3_y, so, 
+   // Inputs
+   rclk, se, si, reset_l, sehold, rst_tri_en, index0_x, index1_x, 
+   index_sel_x, dec_wrway_x, rdreq_x, wrreq_x, wrtag_w0_y, 
+   wrtag_w1_y, wrtag_w2_y, wrtag_w3_y, adj
+   );
+
+   input          rclk, 
+                  se, 
+                  si, 
+                  reset_l;      // active LOW reset
+
+   input          sehold;
+   input          rst_tri_en;
+   
+   input [6:0]    index0_x;     // read/write address0
+   input [6:0]    index1_x;     // read/write address1
+
+   input          index_sel_x;  // selects between index1 and index0
+
+   input [3:0]    dec_wrway_x;  // way -- functions as a write enable
+                                // per 33b
+                                   
+   input          rdreq_x,      // read enable
+		              wrreq_x;      // write enable
+
+   // Don't use rdreq and wrreq to gate off the clock, since these are
+   // critical.  A separate power down signal can be supplied if
+   // needed. 
+   
+   input [32:0]   wrtag_w0_y;      // write data, not flopped
+   input [32:0]   wrtag_w1_y;      //
+   input [32:0]   wrtag_w2_y;      //
+   input [32:0]   wrtag_w3_y;      //
+
+   input [3:0]    adj;
+   
+
+   output [32:0] rdtag_w0_y;    // read data split into 4 ports
+   output [32:0] rdtag_w1_y;    // not flopped
+   output [32:0] rdtag_w2_y;    // 
+   output [32:0] rdtag_w3_y;    // 
+
+   output        so;
+
+
+   // Declarations
+   // local signals
+`ifdef DEFINE_0IN 
+`else
+   reg [32:0]   ictag_ary  [511:0];
+   reg [131:0]  rdtag_bl_y,
+                rdtag_sa_y;
+`endif
+
+   wire         clk;
+   
+
+   reg [6:0]    index_y;
+   reg          rdreq_y,
+		            wrreq_y;
+   reg [3:0]    dec_wrway_y;
+
+   wire [6:0]   index_x;
+
+   
+   //----------------
+   // Code start here 
+   //----------------
+
+   assign       clk = rclk;
+   
+   //-------------------------
+   // 2:1 mux on address input
+   //-------------------------
+   // address inputs are critical and this mux needs to be merged with 
+   // the receiving flop.
+   assign index_x = index_sel_x ? index1_x :
+                                  index0_x;
+
+   //------------------------
+   // input flops from x to y
+   //------------------------
+   // these need to be scannable
+   always @ (posedge clk)
+     begin
+        if (~sehold)
+          begin
+                   rdreq_y <= rdreq_x;
+                   wrreq_y <= wrreq_x;
+                   index_y <= index_x;
+                   dec_wrway_y <= dec_wrway_x;
+          end
+     end
+   
+`ifdef DEFINE_0IN 
+wire [131:0] wm = { {33{(dec_wrway_y[3])}},{33{(dec_wrway_y[2])}},{33{(dec_wrway_y[1])}},{33{(dec_wrway_y[0])}} };
+wire         we = wrreq_y & ~se;
+
+l1_tag l1_tag ( .nclk(~clk), .adr(index_y[6:0]), .we(we), .wm(wm),
+                                              .din ({wrtag_w3_y,wrtag_w2_y,wrtag_w1_y,wrtag_w0_y}),
+                                              .dout({rdtag_w3_y,rdtag_w2_y,rdtag_w1_y,rdtag_w0_y}) );
+`else
+
+   //----------------------------------------------------------------------
+   // Read Operation
+   //----------------------------------------------------------------------
+
+   always @(/*AUTOSENSE*/ /*memory or*/ index_y or rdreq_y or reset_l
+            or wrreq_y) 
+     begin
+	      if (rdreq_y & reset_l)
+          begin
+             if (wrreq_y)    // rd_wr conflict
+	             begin
+	                rdtag_bl_y = {132{1'bx}};
+	             end 
+             
+	           else   // no write, read only
+	             begin
+                  rdtag_bl_y[32:0] = ictag_ary[{index_y,2'b00}];  // way0
+                  rdtag_bl_y[65:33] = ictag_ary[{index_y,2'b01}]; // way1
+                  rdtag_bl_y[98:66] = ictag_ary[{index_y,2'b10}]; // way2
+                  rdtag_bl_y[131:99] = ictag_ary[{index_y,2'b11}];// way3
+	             end
+          end
+        else    // no read
+          begin
+             rdtag_bl_y =  {132{1'bx}};
+          end
+        
+     end // always @ (...
+
+   
+   // SA latch -- to make 0in happy
+   always @ (/*AUTOSENSE*/clk or rdreq_y or rdtag_bl_y or reset_l)
+     begin
+        if (rdreq_y & ~clk & reset_l)
+          begin
+             rdtag_sa_y <= rdtag_bl_y;
+          end
+     end
+
+   // Output is held the same if there is no read.  This is not a
+   // hard requirement, please let me know if the output has to
+   // be something else for ease of implementation.
+
+   // Output behavior during reset is currently not coded.
+   // Functionally there is no preference, though it should be
+   // unchanging to keep the power low.
+
+   // Final Output
+   assign rdtag_w0_y = rdtag_sa_y[32:0];
+   assign rdtag_w1_y = rdtag_sa_y[65:33];
+   assign rdtag_w2_y = rdtag_sa_y[98:66];
+   assign rdtag_w3_y = rdtag_sa_y[131:99];
+   
+
+   //----------------------------------------------------------------------
+   // Write Operation
+   //----------------------------------------------------------------------
+   // Writes should be blocked off during scan shift.
+   always @ (negedge clk)
+     begin
+	   if (wrreq_y & reset_l & ~rst_tri_en)
+	   begin
+             if (dec_wrway_y[0])
+	             ictag_ary[{index_y, 2'b00}] = wrtag_w0_y;
+             if (dec_wrway_y[1])
+	             ictag_ary[{index_y, 2'b01}] = wrtag_w1_y;
+             if (dec_wrway_y[2])
+	             ictag_ary[{index_y, 2'b10}] = wrtag_w2_y;
+             if (dec_wrway_y[3])
+	             ictag_ary[{index_y, 2'b11}] = wrtag_w3_y;
+	   end 
+     end
+
+   // TBD: Need to model rd-wr contention
+`endif
+
+   //******************************************************
+   // The stuff below is not part of the main functionality
+   // and has no representation in the actual circuit.
+   //******************************************************
+
+   // synopsys translate_off
+   
+   //-----------------------
+   // Contention Monitor
+   //-----------------------
+ `ifdef INNO_MUXEX
+ `else
+   always @ (negedge clk)
+   begin
+      if (rdreq_y & wrreq_y & reset_l)
+        begin
+           // 0in <fire -message "FATAL ERROR: rd and wr contention in idct"
+           //$error("IDtag Contention", "ERROR rd and wr contention in idct");
+        end
+   end // always @ (negedge clk)
+
+ `endif
+   
+
+   //--------------------------------
+//   // For dump_cache.v
+//   //--------------------------------
+//   //fake to make dump_cache.v happy
+//   reg [29:0] w0 [127:0];
+//   reg [29:0] w1 [127:0];
+//   reg [29:0] w2 [127:0];
+//   reg [29:0] w3 [127:0];
+//      
+//   always @ (negedge clk)
+//     begin
+//	      if (wrreq_y & ~se)
+//	        begin
+//             if (rdreq_y) begin // rd/wr contention
+//               case (dec_wrway_y)
+//                 4'b0001 : w0[index_y[6:0]] ={30{1'bx}};
+//                 4'b0010 : w1[index_y[6:0]] ={30{1'bx}};
+//                 4'b0100 : w2[index_y[6:0]] ={30{1'bx}};
+//                 4'b1000 : w3[index_y[6:0]] ={30{1'bx}};
+//               endcase // case(wrway_y)
+//             end
+//             else begin 
+//               case (dec_wrway_y)
+//                 4'b0001 : w0[index_y[6:0]] = wrtag_w0_y[29:0];
+//                 4'b0010 : w1[index_y[6:0]] = wrtag_w1_y[29:0];
+//                 4'b0100 : w2[index_y[6:0]] = wrtag_w2_y[29:0];
+//                 4'b1000 : w3[index_y[6:0]] = wrtag_w3_y[29:0];
+//               endcase // case(wrway_y)
+//             end
+//	        end 
+//     end
+
+   // synopsys translate_on    
+ 
+   
+endmodule // bw_r_idct
+
+`endif
+
+
Index: /trunk/T1-common/srams/bw_r_irf_register.v
===================================================================
--- /trunk/T1-common/srams/bw_r_irf_register.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_irf_register.v	(revision 6)
@@ -0,0 +1,303 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_irf_register.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+`ifdef FPGA_SYN_1THREAD
+
+`ifdef FPGA_SYN_SAVE_BRAM
+
+
+module bw_r_irf_register(clk, wren, save, save_addr, restore, restore_addr, wr_data, rd_data);
+	input		clk;
+	input		wren;
+	input		save;
+	input	[2:0]	save_addr;
+	input		restore;
+	input	[2:0]	restore_addr;
+	input	[71:0]	wr_data;
+	output	[71:0]	rd_data;
+`ifdef FPGA_SYN_ALTERA
+    reg	[35:0]	window[15:0]/* synthesis syn_ramstyle = block_ram*/; //  syn_ramstyle = no_rw_check */;
+`else
+    reg	[35:0]	window[15:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */;
+`endif    
+reg	[71:0]	onereg;
+
+  initial onereg = 72'h0;
+
+  assign rd_data = onereg;
+
+  reg [71:0] restore_data;
+  wire [71:0] wrdata = restore ? restore_data : wr_data;
+
+  wire wr_en = wren | restore;
+
+  always @(posedge clk) begin
+    if(wr_en) onereg <= wrdata;
+  end
+
+  wire [2:0] addr = save ? save_addr : restore_addr;
+
+  wire [3:0] addr1 = {1'b1, addr};
+  wire [3:0] addr0 = {1'b0, addr};
+
+  always @(negedge clk) begin
+    if(save) window[addr1] <= wren ? wr_data[71:36] : rd_data[71:36];
+    else restore_data[71:36] <= window[addr1];
+  end
+
+  always @(negedge clk) begin
+    if(save) window[addr0] <= wren ? wr_data[35:0] : rd_data[35:0];
+    else restore_data[35:0] <= window[addr0];
+  end
+
+
+endmodule
+
+
+`else
+
+
+module bw_r_irf_register(clk, wren, save, save_addr, restore, restore_addr, wr_data, rd_data);
+	input		clk;
+	input		wren;
+	input		save;
+	input	[2:0]	save_addr;
+	input		restore;
+	input	[2:0]	restore_addr;
+	input	[71:0]	wr_data;
+	output	[71:0]	rd_data;
+`ifdef FPGA_SYN_ALTERA
+    reg	[71:0]	window[7:0]/* synthesis syn_ramstyle = block_ram*/; //  syn_ramstyle = no_rw_check */;
+`else
+reg	[71:0]	window[7:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */;
+`endif
+reg	[71:0]	onereg;
+
+reg	[2:0]	rd_addr;
+reg	[2:0]	wr_addr;
+reg		save_d;
+`ifdef FPGA_SYN_ALTERA
+    integer k;
+
+    initial
+    begin
+        for (k = 0; k < 8 ; k = k + 1)
+        begin
+            window[k] = 72'h0;	
+        end
+    end
+`endif
+
+  initial 
+      begin
+          onereg = 72'b0;
+          wr_addr = 3'h0;
+          rd_addr = 3'h0;
+      end
+  
+  always @(negedge clk) begin
+    rd_addr = restore_addr;
+  end
+
+  always @(posedge clk) begin
+    wr_addr <= save_addr;
+  end
+  always @(posedge clk) begin
+    save_d <= save;
+  end
+
+  assign rd_data = onereg;
+
+  wire [71:0] restore_data = window[rd_addr];
+  wire [71:0] wrdata = restore ? restore_data : wr_data;
+
+  wire wr_en = wren | (restore & (wr_addr != rd_addr));
+
+  always @(posedge clk) begin
+    if(wr_en) onereg <= wrdata;
+  end
+    
+  always @(negedge clk) begin
+    if(save_d) window[wr_addr] <= rd_data;
+  end
+
+endmodule
+
+`endif
+
+`else
+
+
+module bw_r_irf_register(clk, wrens, save, save_addr, restore, restore_addr, wr_data0, wr_data1, wr_data2, wr_data3, rd_thread, rd_data);
+	input		clk;
+	input	[3:0]	wrens;
+	input		save;
+	input	[4:0]	save_addr;
+	input		restore;
+	input	[4:0]	restore_addr;
+	input	[71:0]	wr_data0;
+	input	[71:0]	wr_data1;
+	input	[71:0]	wr_data2;
+	input	[71:0]	wr_data3;
+	input	[1:0]	rd_thread;
+	output	[71:0]	rd_data;
+`ifdef FPGA_SYN_ALTERA
+    reg	[71:0]	window[31:0]/* synthesis syn_ramstyle = block_ram*/; //  syn_ramstyle = no_rw_check */;
+`else
+    reg	[71:0]	window[31:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */;
+`endif
+reg	[71:0]	reg_th0, reg_th1, reg_th2, reg_th3;
+
+reg	[4:0]	rd_addr;
+reg	[4:0]	wr_addr;
+reg		save_d;
+
+initial begin
+  reg_th0 = 72'b0;
+  reg_th1 = 72'b0;
+  reg_th2 = 72'b0;
+  reg_th3 = 72'b0;
+end
+
+bw_r_irf_72_4x1_mux mux4_1(
+	.sel(rd_thread),
+	.x0(reg_th0),
+	.x1(reg_th1),
+	.x2(reg_th2),
+	.x3(reg_th3),
+	.y(rd_data)
+	);
+
+  always @(negedge clk) begin
+    rd_addr = restore_addr;
+  end
+
+  wire [71:0] restore_data = window[rd_addr];
+
+  always @(posedge clk) begin
+    wr_addr <= save_addr;
+  end
+  always @(posedge clk) begin
+    save_d <= save;
+  end
+
+  wire [71:0] save_data;
+
+  bw_r_irf_72_4x1_mux mux4_2(
+        .sel(wr_addr[4:3]),
+        .x0(reg_th0),
+        .x1(reg_th1),
+        .x2(reg_th2),
+        .x3(reg_th3),
+        .y(save_data)
+        );
+
+  always @(negedge clk) begin
+    if(save_d) window[wr_addr] <= save_data;
+  end
+
+//Register implementation for 4 threads / 2 write & 1 restore port
+
+  wire [3:0] restores = (1'b1 << rd_addr[4:3]) & {4{restore}};
+  //wire [3:0] wren1s = (1'b1 << wr1_th) & {4{wren1}};
+  //wire [3:0] wren2s = (1'b1 << wr2_th) & {4{wren2}};
+
+  wire [71:0] wrdata0, wrdata1, wrdata2, wrdata3;
+
+  bw_r_irf_72_2x1_mux mux2_5(
+        .sel(restores[0]),
+        .x0(wr_data0),
+        .x1(restore_data),
+        .y(wrdata0)
+        );
+
+  bw_r_irf_72_2x1_mux mux2_6(
+        .sel(restores[1]),
+        .x0(wr_data1),
+        .x1(restore_data),
+        .y(wrdata1)
+        );
+
+  bw_r_irf_72_2x1_mux mux2_7(
+        .sel(restores[2]),
+        .x0(wr_data2),
+        .x1(restore_data),
+        .y(wrdata2)
+        );
+
+  bw_r_irf_72_2x1_mux mux2_8(
+        .sel(restores[3]),
+        .x0(wr_data3),
+        .x1(restore_data),
+        .y(wrdata3)
+        );
+
+  //wire [3:0] wr_en = wren1s | wren2s | (restores & {4{(wr_addr[4:0] != rd_addr[4:0])}});
+  wire [3:0] wr_en = wrens | (restores & {4{(wr_addr[4:0] != rd_addr[4:0])}});
+
+  //288 Flops
+  always @(posedge clk) begin
+    if(wr_en[0]) reg_th0 <= wrdata0;
+    if(wr_en[1]) reg_th1 <= wrdata1;
+    if(wr_en[2]) reg_th2 <= wrdata2;
+    if(wr_en[3]) reg_th3 <= wrdata3;
+  end
+    
+endmodule
+
+
+module bw_r_irf_72_4x1_mux(sel, y, x0, x1, x2, x3);
+	input	[1:0]	sel;
+	input	[71:0]	x0;
+	input	[71:0]	x1;
+	input	[71:0]	x2;
+	input	[71:0]	x3;
+	output	[71:0] y;
+	reg	[71:0] y;
+
+	always @(sel or x0 or x1 or x2 or x3)
+		case(sel)
+		  2'b00: y = x0;
+		  2'b01: y = x1;
+		  2'b10: y = x2;
+		  2'b11: y = x3;
+		endcase
+
+endmodule
+	
+
+module bw_r_irf_72_2x1_mux(sel, y, x0, x1);
+	input		sel;
+	input	[71:0]	x0;
+	input	[71:0]	x1;
+	output	[71:0] y;
+	reg	[71:0] y;
+
+	always @(sel or x0 or x1)
+		case(sel)
+		  1'b0: y = x0;
+		  1'b1: y = x1;
+		endcase
+
+endmodule
+	
+`endif
+
+
Index: /trunk/T1-common/srams/Flist.srams
===================================================================
--- /trunk/T1-common/srams/Flist.srams	(revision 6)
+++ /trunk/T1-common/srams/Flist.srams	(revision 6)
@@ -0,0 +1,57 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: Flist.srams
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//ffu
+-v bw_r_frf.v
+//exu
+-v bw_r_irf.v
+-v bw_r_irf_register.v
+//lsu
+-v bw_r_scm.v
+-v bw_r_dcd.v
+-v bw_r_rf32x152b.v
+//lsu+ifu
+-v ../../common/rtl/cmp_sram_redhdr.v
+-v bw_r_rf16x32.v
+//lsu+ifu+spu
+-v bw_r_idct.v
+//lsu+tlu
+-v bw_r_rf32x80.v
+//tlu+spu+sctag+scbuf+jbi
+-v bw_r_rf16x160.v
+//sctag
+-v bw_r_dcm.v
+-v bw_r_l2t.v
+-v bw_r_rf16x128d.v
+-v bw_r_rf32x108.v
+-v bw_r_cm16x40.v
+-v bw_r_cm16x40b.v
+//scdata
+-v bw_r_l2d.v
+-v bw_r_l2d_rep_bot.v
+-v bw_r_l2d_rep_top.v
+-v bw_r_l2d_32k.v
+//dram+jbi+iobdg
+-v bw_rf_16x65.v
+//jbi
+-v bw_rf_16x81.v
+//iobdg
+//-v bw_r_rf16x160iop.v
+//-v bw_rf_64x15.v
Index: /trunk/T1-common/srams/bw_r_dcd.v
===================================================================
--- /trunk/T1-common/srams/bw_r_dcd.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_dcd.v	(revision 6)
@@ -0,0 +1,636 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_dcd.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:
+//  Description:  LSU Data Cache.
+//      - Physically-Indexed Physically Tagged (PIPT)
+//      - 8KB
+//      - 4 way set-associative.
+//      - 16B lines
+//      - 2:1 column select by choosing either lower
+//      or upper half of 16B line.
+//      - Parity protected on a byte basis.
+//      - Byte enables for byte-wide stores.
+//
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+//`include  "sys.h" // system level definition file which contains the 
+                  // time scale definition
+
+//`include "iop.h"
+//`include "fabric.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_DCD
+`endif
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module bw_r_dcd ( /*AUTOARG*/
+   // Outputs
+   so, dcache_rdata_wb, dcache_rparity_wb, dcache_rparity_err_wb, 
+   dcache_rdata_msb_w0_m, dcache_rdata_msb_w1_m, 
+   dcache_rdata_msb_w2_m, dcache_rdata_msb_w3_m, 
+   dcd_fuse_repair_value, dcd_fuse_repair_en, 
+   // Inputs
+   dcache_rd_addr_e, dcache_alt_addr_e, dcache_rvld_e, dcache_wvld_e, 
+   dcache_wdata_e, dcache_wr_rway_e, dcache_byte_wr_en_e, 
+   dcache_alt_rsel_way_e, dcache_rsel_way_wb, dcache_alt_mx_sel_e, 
+   si, se, sehold, rst_tri_en, arst_l, rclk, dcache_alt_data_w0_m, 
+   dcache_arry_data_sel_m, efc_spc_fuse_clk1, fuse_dcd_wren, 
+   fuse_dcd_rid, fuse_dcd_repair_value, fuse_dcd_repair_en
+   ) ;  
+
+input [10:3]    dcache_rd_addr_e;     // read cache index [10:4] + bit [3] offset
+input [10:3]    dcache_alt_addr_e;    // write/bist/diagnostic read cache index + offset 
+
+input           dcache_rvld_e;        // read accesses d$.
+input           dcache_wvld_e;        // valid write setup to m-stage.
+   
+input [143:0]   dcache_wdata_e;       // write data - 16Bx8 + 8b parity.
+input [3:0]     dcache_wr_rway_e;     // replacement way for load miss/store.
+input [15:0]    dcache_byte_wr_en_e;  // 16b byte wr enable for stores.
+
+input [3:0]     dcache_alt_rsel_way_e ; // bist/diagnostic read way select
+input [3:0]     dcache_rsel_way_wb;     // load way select, connect to cache_way_hit
+input           dcache_alt_mx_sel_e;
+       
+input           si;
+input           se;
+input           sehold;
+   
+output          so;
+
+input		rst_tri_en ;		
+
+input           arst_l;	// used for redundancy flops - do not reset on wrm reset.
+
+input           rclk;
+
+output  [63:0]  dcache_rdata_wb;
+output  [7:0]   dcache_rparity_wb;
+output          dcache_rparity_err_wb; 
+
+//=================================
+//    dc_fill critical path
+//=================================  
+   input [63:0] dcache_alt_data_w0_m; //from qdp1
+   input        dcache_arry_data_sel_m;            //from dctl
+   
+   output [7:0] dcache_rdata_msb_w0_m;    //to dcdp
+   output [7:0] dcache_rdata_msb_w1_m;    //to dcdp
+   output [7:0] dcache_rdata_msb_w2_m;    //to dcdp
+   output [7:0] dcache_rdata_msb_w3_m;    //to dcdp
+
+//-----------------------------------------------------------------------------
+// 32KB block fuse inputs
+//-----------------------------------------------------------------------------
+// efuse non ovl clks
+input           efc_spc_fuse_clk1;
+   
+input           fuse_dcd_wren;          //redundancy register write enable, qualified
+input [2:0]     fuse_dcd_rid;           //redundancy register id
+input [7:0]     fuse_dcd_repair_value;  //data in for redundancy register
+input [1:0]	    fuse_dcd_repair_en;     //enable bits to turn on redundancy
+output [7:0]    dcd_fuse_repair_value;  //data out for redundancy register
+output [1:0]	  dcd_fuse_repair_en;     //enable bits out 
+   
+// Memory declaration.
+
+`ifdef DEFINE_0IN
+wire [143:0]   temp_w0a;
+wire [143:0]   temp_w1a;
+wire [143:0]   temp_w2a;
+wire [143:0]   temp_w3a;
+`else
+reg [143:0]   w0 [127:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */  ;   // way0, byte0. Data+Parity. 
+reg [143:0]   w1 [127:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */  ;   // way0, byte0. Data+Parity.
+reg [143:0]   w2 [127:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */  ;   // way0, byte0. Data+Parity.
+reg [143:0]   w3 [127:0]/* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */  ;   // way0, byte0. Data+Parity.
+
+reg [143:0]   temp_w0a_reg;
+reg [143:0]   temp_w1a_reg;
+reg [143:0]   temp_w2a_reg;
+reg [143:0]   temp_w3a_reg;
+  
+wire [143:0]   temp_w0a;
+wire [143:0]   temp_w1a;
+wire [143:0]   temp_w2a;
+wire [143:0]   temp_w3a;
+
+reg [143:0]   temp_w0;
+reg [143:0]   temp_w1;
+reg [143:0]   temp_w2;
+reg [143:0]   temp_w3;
+`endif
+reg [10:3]    dcache_rwaddr_m ;
+reg [10:3]    dcache_raddr_m ;
+reg           dcache_rvld_m ;
+reg           wvld_m ;
+reg [143:0]   dcache_wdata_m ;
+reg [127:0]   rw_wdline ;
+reg [3:0]     dcache_wr_rway_m ;
+
+reg [63:0]    dcache_rdata_w0_wb;   // way0 64b data.
+reg [63:0]    dcache_rdata_w1_wb;   // way1 64b data.
+reg [63:0]    dcache_rdata_w2_wb;   // way2 64b data.
+reg [63:0]    dcache_rdata_w3_wb;   // way3 64b data.
+reg [15:0]    byte_wr_enable ;
+reg [7:0]     ctr;
+
+reg           dcache_alt_mx_sel_m, dcache_alt_mx_sel_wb;
+reg [3:0]     dcache_alt_rsel_way_m, dcache_alt_rsel_way_wb;
+   
+integer       i,j;
+
+wire            dcache_wvld_m ;
+wire  [63:0]    dcache_rdata_w0_m;    // way0 64b data.
+wire  [63:0]    dcache_rdata_w1_m;    // way1 64b data.
+wire  [63:0]    dcache_rdata_w2_m;    // way2 64b data.
+wire  [63:0]    dcache_rdata_w3_m;    // way3 64b data.
+wire  [7:0]     dcache_rparity_w0_m;  // way0 8b parity.
+wire  [7:0]     dcache_rparity_w1_m;  // way1 8b parity.
+wire  [7:0]     dcache_rparity_w2_m;  // way2 8b parity.
+wire  [7:0]     dcache_rparity_w3_m;  // way3 8b parity.
+
+   wire [7:0]   rd_parity_err_w0_m;
+   wire [7:0]   rd_parity_err_w1_m;
+   wire [7:0]   rd_parity_err_w2_m;
+   wire [7:0]   rd_parity_err_w3_m;
+   
+   
+wire  [143:0]   way_mask ;
+wire  [143:0]   way_mask_inv ;
+
+wire  [10:3]    dcache_rwaddr_e ;
+wire  [10:3]    dcache_raddr_e ;
+
+//calculated parity based on read-out data
+wire [7:0]  gen_dcache_parity_w0_m;
+wire [7:0]  gen_dcache_parity_w1_m;
+wire [7:0]  gen_dcache_parity_w2_m;
+wire [7:0]  gen_dcache_parity_w3_m;
+
+   wire     clk;
+   assign   clk = rclk;
+   
+//=========================================================================================
+//  Staging
+//=========================================================================================
+
+// BIST Rd used fill address port.
+assign  dcache_rwaddr_e[10:3] = 
+  (dcache_alt_mx_sel_e) ? dcache_alt_addr_e[10:3] : dcache_rd_addr_e[10:3] ; 
+
+assign  dcache_raddr_e[10:3] = 
+  (dcache_alt_mx_sel_e) ? dcache_alt_addr_e[10:3] : dcache_rd_addr_e[10:3] ; 
+   
+always @(posedge clk) 
+  begin
+    dcache_alt_mx_sel_m   <= sehold ? dcache_alt_mx_sel_m : dcache_alt_mx_sel_e;
+    
+    dcache_alt_rsel_way_m <= sehold ? dcache_alt_rsel_way_m : dcache_alt_rsel_way_e;
+     
+    dcache_rwaddr_m[10:3] <= sehold ? dcache_rwaddr_m[10:3] : dcache_rwaddr_e[10:3] ;
+
+    dcache_raddr_m[10:3] <= sehold ? dcache_raddr_m[10:3] : dcache_raddr_e[10:3] ;
+
+    dcache_rvld_m         <= sehold ? dcache_rvld_m  : dcache_rvld_e ;
+
+    wvld_m                <= sehold ?  wvld_m : dcache_wvld_e ;
+
+    dcache_wdata_m[143:0] <= sehold ?  dcache_wdata_m[143:0] : dcache_wdata_e[143:0] ;
+
+    dcache_wr_rway_m[3:0] <= sehold ?  dcache_wr_rway_m[3:0] : dcache_wr_rway_e[3:0] ;
+
+    byte_wr_enable[15:0]  <= sehold ? byte_wr_enable[15:0] : dcache_byte_wr_en_e[15:0] ;
+
+  end
+
+always @ (posedge clk)
+  begin
+// JC modified begin
+//    dcache_alt_mx_sel_wb <= dcache_alt_mx_sel_m;
+//    dcache_alt_rsel_way_wb  <= dcache_alt_rsel_way_m;
+    dcache_alt_mx_sel_wb <= sehold ? dcache_alt_mx_sel_wb :dcache_alt_mx_sel_m;
+    dcache_alt_rsel_way_wb  <= sehold ? dcache_alt_rsel_way_wb :dcache_alt_rsel_way_m;
+// JC modified end
+  end
+   
+assign	dcache_wvld_m = wvld_m & ~rst_tri_en ;
+
+
+`ifdef DEFINE_0IN
+wire [3:0] dc_we = dcache_wvld_m ? dcache_wr_rway_m : 4'b0;
+
+dc_data dc_data0 ( .nclk(~clk), .adr(dcache_rwaddr_m[10:4]),
+                                 .we(dc_we           [0]  ), .wm(way_mask  [143:0]),
+                                .din(dcache_wdata_m[143:0]), .dout(temp_w0a[143:0]) );
+dc_data dc_data1 ( .nclk(~clk), .adr(dcache_rwaddr_m[10:4]),
+                                 .we(dc_we           [1]  ), .wm(way_mask  [143:0]),
+                                .din(dcache_wdata_m[143:0]), .dout(temp_w1a[143:0]) );
+dc_data dc_data2 ( .nclk(~clk), .adr(dcache_rwaddr_m[10:4]),
+                                 .we(dc_we           [2]  ), .wm(way_mask  [143:0]),
+                                .din(dcache_wdata_m[143:0]), .dout(temp_w2a[143:0]) );
+dc_data dc_data3 ( .nclk(~clk), .adr(dcache_rwaddr_m[10:4]),
+                                 .we(dc_we           [3]  ), .wm(way_mask  [143:0]),
+                                .din(dcache_wdata_m[143:0]), .dout(temp_w3a[143:0]) );
+`else
+//=========================================================================================
+//  generate wordlines
+//=========================================================================================
+
+// Generate at posedge of clk.
+// JC modified begin
+/*
+always @ (posedge clk)
+  begin
+    for (ctr=8'h00;ctr<128;ctr=ctr+1)
+      begin
+      if (clk & ({1'b0,dcache_rwaddr_e[10:4]} == ctr) & 
+         (dcache_rvld_e | dcache_wvld_e))  
+        rw_wdline[ctr]  = 1'b1;
+      else  
+        rw_wdline[ctr]  = 1'b0;
+      end 
+  end
+*/
+
+`ifdef FPGA_SYN_DCD
+`else
+always @ (clk or dcache_rwaddr_m or dcache_wvld_m or dcache_rvld_m)
+  begin
+   if (clk) begin
+    for (ctr=8'h00;ctr<128;ctr=ctr+1)
+      begin
+      if (({1'b0,dcache_rwaddr_m[10:4]} == ctr) & 
+         (dcache_rvld_m | dcache_wvld_m))  
+        rw_wdline[ctr]  = 1'b1;
+      else  
+        rw_wdline[ctr]  = 1'b0;
+      end 
+   end
+  end
+// JC modified end
+`endif
+
+
+//=========================================================================================
+//  Read from Memory.
+//=========================================================================================
+
+`ifdef FPGA_SYN_DCD
+always @(posedge clk) begin
+  temp_w0a_reg[143:0] = w0[dcache_raddr_e[10:4]];
+  temp_w1a_reg[143:0] = w1[dcache_raddr_e[10:4]];
+  temp_w2a_reg[143:0] = w2[dcache_raddr_e[10:4]];
+  temp_w3a_reg[143:0] = w3[dcache_raddr_e[10:4]];
+end
+`else
+// Read
+always @ (negedge clk)
+  begin
+    for (i=0;i<128;i=i+1)
+      begin
+        if (rw_wdline[i] & dcache_rvld_m) 
+          begin
+            temp_w0a_reg[143:0] <= w0[i];
+            temp_w1a_reg[143:0] <= w1[i];
+            temp_w2a_reg[143:0] <= w2[i];
+            temp_w3a_reg[143:0] <= w3[i];
+          end
+      end
+  end
+`endif
+
+//removed stablizer, zero out without read
+assign  temp_w0a[143:0] = dcache_rvld_m? temp_w0a_reg[143:0]: 144'b0;
+assign  temp_w1a[143:0] = dcache_rvld_m? temp_w1a_reg[143:0]: 144'b0;
+assign  temp_w2a[143:0] = dcache_rvld_m? temp_w2a_reg[143:0]: 144'b0;
+assign  temp_w3a[143:0] = dcache_rvld_m? temp_w3a_reg[143:0]: 144'b0;
+
+`endif
+
+// Prior to SA, column mux (64(D)+8(P))x4 bits. Assume parity is
+// at the end of the 144b line. Entry is wX||Parity
+
+// Select either upper or lower 64b from each of the 4 ways.
+assign  dcache_rdata_w0_m[63:0] = ~dcache_rwaddr_m[3] ? temp_w0a[143:80] : temp_w0a[79:16] ; 
+assign  dcache_rdata_w1_m[63:0] = ~dcache_rwaddr_m[3] ? temp_w1a[143:80] : temp_w1a[79:16] ; 
+assign  dcache_rdata_w2_m[63:0] = ~dcache_rwaddr_m[3] ? temp_w2a[143:80] : temp_w2a[79:16] ; 
+assign  dcache_rdata_w3_m[63:0] = ~dcache_rwaddr_m[3] ? temp_w3a[143:80] : temp_w3a[79:16] ; 
+
+   wire [7:0] dcache_msb_w0_m;
+   wire [7:0] dcache_alt_data_w0_msb_m;
+   
+//MSB sent out to dcdp in M stage   
+assign dcache_msb_w0_m[7:0]=
+    {dcache_rdata_w0_m[63], 
+     dcache_rdata_w0_m[55], 
+     dcache_rdata_w0_m[47], 
+     dcache_rdata_w0_m[39],
+	   dcache_rdata_w0_m[31], 
+     dcache_rdata_w0_m[23], 
+     dcache_rdata_w0_m[15], 
+     dcache_rdata_w0_m[07]} ;
+   
+assign dcache_alt_data_w0_msb_m [7:0]=
+    {dcache_alt_data_w0_m[63], 
+     dcache_alt_data_w0_m[55], 
+     dcache_alt_data_w0_m[47], 
+     dcache_alt_data_w0_m[39],
+	   dcache_alt_data_w0_m[31], 
+     dcache_alt_data_w0_m[23], 
+     dcache_alt_data_w0_m[15], 
+     dcache_alt_data_w0_m[07]} ;
+
+//2-to-1 mux   
+assign dcache_rdata_msb_w0_m[7:0] = dcache_arry_data_sel_m ? 
+                                    dcache_msb_w0_m[7:0] : 
+                                    dcache_alt_data_w0_msb_m[7:0];
+   
+assign dcache_rdata_msb_w1_m[7:0]=
+    {dcache_rdata_w1_m[63], 
+     dcache_rdata_w1_m[55], 
+     dcache_rdata_w1_m[47], 
+     dcache_rdata_w1_m[39],
+	   dcache_rdata_w1_m[31], 
+     dcache_rdata_w1_m[23], 
+     dcache_rdata_w1_m[15], 
+     dcache_rdata_w1_m[07]} ;
+
+assign dcache_rdata_msb_w2_m[7:0]=
+    {dcache_rdata_w2_m[63], 
+     dcache_rdata_w2_m[55], 
+     dcache_rdata_w2_m[47], 
+     dcache_rdata_w2_m[39],
+	   dcache_rdata_w2_m[31], 
+     dcache_rdata_w2_m[23], 
+     dcache_rdata_w2_m[15], 
+     dcache_rdata_w2_m[07]} ;
+   
+assign dcache_rdata_msb_w3_m[7:0]=
+    {dcache_rdata_w3_m[63], 
+     dcache_rdata_w3_m[55], 
+     dcache_rdata_w3_m[47], 
+     dcache_rdata_w3_m[39],
+	   dcache_rdata_w3_m[31], 
+     dcache_rdata_w3_m[23], 
+     dcache_rdata_w3_m[15], 
+     dcache_rdata_w3_m[07]} ;
+   
+   wire [63:0] rdata_w0_m;
+   wire [63:0] rdata_w1_m;
+   wire [63:0] rdata_w2_m;
+   wire [63:0] rdata_w3_m;
+
+//2-to-1 mux   
+//dcache_alt_mx_sel default 0001 (way 0) when not in MBIST mode (logic in qdp2)
+assign rdata_w0_m[63:0] = dcache_arry_data_sel_m ? 
+                          dcache_rdata_w0_m[63:0] : dcache_alt_data_w0_m[63:0];
+
+//assign rdata_w0_m[63:0] = dcache_rdata_w0_m[63:0];
+assign rdata_w1_m[63:0] = dcache_rdata_w1_m[63:0];
+assign rdata_w2_m[63:0] = dcache_rdata_w2_m[63:0];
+assign rdata_w3_m[63:0] = dcache_rdata_w3_m[63:0];
+
+// Select upper half or lower half of parity. 
+assign  dcache_rparity_w0_m[7:0] = ~dcache_rwaddr_m[3] ? temp_w0a[15:8] : temp_w0a[7:0] ; 
+assign  dcache_rparity_w1_m[7:0] = ~dcache_rwaddr_m[3] ? temp_w1a[15:8] : temp_w1a[7:0] ; 
+assign  dcache_rparity_w2_m[7:0] = ~dcache_rwaddr_m[3] ? temp_w2a[15:8] : temp_w2a[7:0] ; 
+assign  dcache_rparity_w3_m[7:0] = ~dcache_rwaddr_m[3] ? temp_w3a[15:8] : temp_w3a[7:0] ; 
+
+   reg [7:0] dcache_rparity_w0_wb;
+   reg [7:0] dcache_rparity_w1_wb;
+   reg [7:0] dcache_rparity_w2_wb;
+   reg [7:0] dcache_rparity_w3_wb;
+
+   reg [7:0] rd_parity_err_w0_wb;
+   reg [7:0] rd_parity_err_w1_wb;
+   reg [7:0] rd_parity_err_w2_wb;
+   reg [7:0] rd_parity_err_w3_wb;
+   
+   
+// Stage to WB
+always  @(posedge clk)
+  begin
+    dcache_rdata_w0_wb[63:0] <= rdata_w0_m[63:0] ;  
+    dcache_rdata_w1_wb[63:0] <= rdata_w1_m[63:0] ;  
+    dcache_rdata_w2_wb[63:0] <= rdata_w2_m[63:0] ;  
+    dcache_rdata_w3_wb[63:0] <= rdata_w3_m[63:0] ;  
+
+    dcache_rparity_w0_wb[7:0] <=  dcache_rparity_w0_m[7:0];  
+    dcache_rparity_w1_wb[7:0] <=  dcache_rparity_w1_m[7:0];  
+    dcache_rparity_w2_wb[7:0] <=  dcache_rparity_w2_m[7:0];  
+    dcache_rparity_w3_wb[7:0] <=  dcache_rparity_w3_m[7:0];  
+
+    rd_parity_err_w0_wb [7:0] <= rd_parity_err_w0_m[7:0];
+    rd_parity_err_w1_wb [7:0] <= rd_parity_err_w1_m[7:0];
+    rd_parity_err_w2_wb [7:0] <= rd_parity_err_w2_m[7:0];
+    rd_parity_err_w3_wb [7:0] <= rd_parity_err_w3_m[7:0];
+     
+  end
+   
+//parity calculation and check are done in M stage for 4 way data   
+   wire rd_parity_err_w0;
+   wire rd_parity_err_w1;
+   wire rd_parity_err_w2;
+   wire rd_parity_err_w3;
+  
+lsu_dc_parity_gen #(8,8)  parity_gen_w0 (
+                .data_in        (dcache_rdata_w0_m[63:0]),
+                .parity_out     (gen_dcache_parity_w0_m[7:0])
+        );
+   
+assign rd_parity_err_w0_m[7:0] = dcache_rvld_m ? (dcache_rparity_w0_m[7:0] ^ gen_dcache_parity_w0_m[7:0]) :
+                                                  8'hff;
+ 
+
+lsu_dc_parity_gen #(8,8)  parity_gen_w1 (
+                .data_in        (dcache_rdata_w1_m[63:0]),
+                .parity_out     (gen_dcache_parity_w1_m[7:0])
+        );   
+
+assign rd_parity_err_w1_m[7:0] = dcache_rvld_m ? (dcache_rparity_w1_m[7:0] ^ gen_dcache_parity_w1_m[7:0]) :
+                                                 8'hff;
+
+lsu_dc_parity_gen #(8,8)  parity_gen_w2 (
+                .data_in        (dcache_rdata_w2_m[63:0]),
+                .parity_out     (gen_dcache_parity_w2_m[7:0])
+        );   
+assign rd_parity_err_w2_m[7:0] = dcache_rvld_m ? (dcache_rparity_w2_m[7:0] ^ gen_dcache_parity_w2_m[7:0])  :
+                                                   8'hff;
+   
+lsu_dc_parity_gen #(8,8)  parity_gen_w3 (
+                .data_in        (dcache_rdata_w3_m[63:0]),
+                .parity_out     (gen_dcache_parity_w3_m[7:0])
+        );   
+assign rd_parity_err_w3_m[7:0] =  dcache_rvld_m ? (dcache_rparity_w3_m[7:0] ^ gen_dcache_parity_w3_m[7:0]) :
+                                                  8'hff;
+
+
+// way select mux on READ
+// Select one of four ways from indexed cache set.
+
+wire [3:0] dcache_rd_sel_way_wb;
+assign dcache_rd_sel_way_wb[3:0] = dcache_alt_mx_sel_wb ? dcache_alt_rsel_way_wb[3:0] : 
+                                                          dcache_rsel_way_wb[3:0];
+   
+assign  dcache_rdata_wb[63:0]  =  
+ (dcache_rd_sel_way_wb[0] ? dcache_rdata_w0_wb[63:0] : 64'b0) |
+ (dcache_rd_sel_way_wb[1] ? dcache_rdata_w1_wb[63:0] : 64'b0) |
+ (dcache_rd_sel_way_wb[2] ? dcache_rdata_w2_wb[63:0] : 64'b0) |
+ (dcache_rd_sel_way_wb[3] ? dcache_rdata_w3_wb[63:0] : 64'b0);
+
+//parity err in W-stage, cache_way_hit may not be one-hot 
+assign rd_parity_err_w0 =   |(rd_parity_err_w0_wb[7:0]);
+assign rd_parity_err_w1 =   |(rd_parity_err_w1_wb[7:0]);
+assign rd_parity_err_w2 =   |(rd_parity_err_w2_wb[7:0]);
+assign rd_parity_err_w3 =   |(rd_parity_err_w3_wb[7:0]);
+
+assign dcache_rparity_err_wb = rd_parity_err_w3 & dcache_rd_sel_way_wb[3] |
+                               rd_parity_err_w2 & dcache_rd_sel_way_wb[2] |
+                               rd_parity_err_w1 & dcache_rd_sel_way_wb[1] |
+                               rd_parity_err_w0 & dcache_rd_sel_way_wb[0] ;
+   
+//mux4ds #(64) dcache_rdata_wb_mx (
+//  .in0  (dcache_rdata_w0_wb[63:0]),
+//  .in1  (dcache_rdata_w1_wb[63:0]),
+//  .in2  (dcache_rdata_w2_wb[63:0]),
+//  .in3  (dcache_rdata_w3_wb[63:0]),
+//  .sel0 (dcache_rd_sel_way_wb[0]),
+//  .sel1 (dcache_rd_sel_way_wb[1]),
+//  .sel2 (dcache_rd_sel_way_wb[2]),
+//  .sel3 (dcache_rd_sel_way_wb[3]),
+//  .dout (dcache_rdata_wb[63:0])
+//);
+                      
+// dcache_rparity_wb only used by MBIST   
+//mux4ds #(8) dcache_rparity_wb_mx (
+//  .in0 (dcache_rparity_w0_wb[7:0]),                        
+//  .in1 (dcache_rparity_w1_wb[7:0]),                        
+//  .in2 (dcache_rparity_w2_wb[7:0]),                        
+//  .in3 (dcache_rparity_w3_wb[7:0]),                        
+//  .sel0(dcache_alt_rsel_way_wb[0]),
+//  .sel1(dcache_alt_rsel_way_wb[1]),
+//  .sel2(dcache_alt_rsel_way_wb[2]),
+//  .sel3(dcache_alt_rsel_way_wb[3]),
+//  .dout(dcache_rparity_wb[7:0])
+//);
+
+assign  dcache_rparity_wb[7:0] =
+( dcache_rd_sel_way_wb[0] ? dcache_rparity_w0_wb[7:0] : 8'b0 ) |
+( dcache_rd_sel_way_wb[1] ? dcache_rparity_w1_wb[7:0] : 8'b0 ) |
+( dcache_rd_sel_way_wb[2] ? dcache_rparity_w2_wb[7:0] : 8'b0 ) |
+( dcache_rd_sel_way_wb[3] ? dcache_rparity_w3_wb[7:0] : 8'b0 ) ;
+   
+  
+//=========================================================================================
+//  Write to Memory
+//=========================================================================================
+
+// Reads and writes are mutex as array is single-ported.
+
+
+// Includes data(128b)+parity(16b).
+assign way_mask[143:0]  =
+    {{8{byte_wr_enable[15]}},{8{byte_wr_enable[14]}},{8{byte_wr_enable[13]}}, 
+     {8{byte_wr_enable[12]}},{8{byte_wr_enable[11]}},{8{byte_wr_enable[10]}}, 
+     {8{byte_wr_enable[9]}}, {8{byte_wr_enable[8]}}, {8{byte_wr_enable[7]}},  
+     {8{byte_wr_enable[6]}}, {8{byte_wr_enable[5]}}, {8{byte_wr_enable[4]}},  
+     {8{byte_wr_enable[3]}}, {8{byte_wr_enable[2]}}, {8{byte_wr_enable[1]}},  
+     {8{byte_wr_enable[0]}}, byte_wr_enable[15:0]} ;  
+
+assign way_mask_inv[143:0] =  ~way_mask[143:0];
+
+   
+always @ (negedge clk)
+  begin
+
+`ifdef FPGA_SYN_DCD
+
+	if(dcache_wvld_m & dcache_wr_rway_m[0]) begin
+		w0[dcache_rwaddr_m[10:4]] = (temp_w0a_reg[143:0] & way_mask_inv[143:0]) | 
+					   (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+	end
+	if(dcache_wvld_m & dcache_wr_rway_m[1]) begin
+		w1[dcache_rwaddr_m[10:4]] = (temp_w1a_reg[143:0] & way_mask_inv[143:0]) | 
+					   (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+	end
+	if(dcache_wvld_m & dcache_wr_rway_m[2]) begin
+		w2[dcache_rwaddr_m[10:4]] = (temp_w2a_reg[143:0] & way_mask_inv[143:0]) | 
+					   (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+	end
+	if(dcache_wvld_m & dcache_wr_rway_m[3]) begin
+		w3[dcache_rwaddr_m[10:4]] = (temp_w3a_reg[143:0] & way_mask_inv[143:0]) | 
+					   (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+	end
+
+`else // !`ifdef FPGA_SYN_DCD
+
+    for (j=0;j<128;j=j+1)
+      begin
+      if (rw_wdline[j] & dcache_wvld_m & dcache_wr_rway_m[0]) 
+        begin
+        // read 
+        temp_w0[143:0] = w0[j]; 
+        // modify & write
+        w0[j] = (temp_w0[143:0] & way_mask_inv[143:0]) |    
+            (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+        end
+      if (rw_wdline[j] & dcache_wvld_m & dcache_wr_rway_m[1]) 
+        begin
+        // read
+	           temp_w1[143:0] = w1[j];
+        // modify & write
+        w1[j] = (temp_w1[143:0] & way_mask_inv[143:0]) |    
+            (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+        end
+      if (rw_wdline[j] & dcache_wvld_m & dcache_wr_rway_m[2]) 
+        begin
+        // read 
+        temp_w2[143:0] = w2[j];
+        // modify & write
+        w2[j] = (temp_w2[143:0] & way_mask_inv[143:0]) |    
+            (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+        end 
+      if (rw_wdline[j] & dcache_wvld_m & dcache_wr_rway_m[3]) 
+        begin
+        // read 
+        temp_w3[143:0] = w3[j];
+        // modify & write.
+        w3[j] = (temp_w3[143:0] & way_mask_inv[143:0])  |
+		            (dcache_wdata_m[143:0] & way_mask[143:0]) ;
+        end
+      end
+`endif // !`ifdef FPGA_SYN_DCD
+
+  end // always @ (negedge clk)
+   
+endmodule
+
+
Index: /trunk/T1-common/srams/bw_r_l2d_rep_bot.v
===================================================================
--- /trunk/T1-common/srams/bw_r_l2d_rep_bot.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_l2d_rep_bot.v	(revision 6)
@@ -0,0 +1,115 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_l2d_rep_bot.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_r_l2d_rep_bot (/*AUTOARG*/
+   // Outputs
+   fuse_l2d_rden_buf, fuse_l2d_wren_buf, si_buf, arst_l_buf, se_buf, 
+   sehold_buf, fuse_l2d_rid_buf, fuse_read_data_in_buf, 
+   fuse_l2d_data_in_buf, word_en_l, col_offset_l, set_l, wr_en_l, 
+   way_sel_l, decc_in_l, scbuf_scdata_fbdecc_top_buf, 
+   scbuf_scdata_fbdecc_bot_buf, sbdt_l, sbdb_l, fuse_clk1_buf, 
+   fuse_clk2_buf, mem_write_disable_buf, 
+   // Inputs
+   fuse_l2d_rden, fuse_l2d_wren, si, arst_l, se, sehold, 
+   fuse_l2d_rid, fuse_read_data_in, fuse_l2d_data_in, word_en, 
+   col_offset, set, wr_en, way_sel, decc_in, fbdt_l, fbdb_l, 
+   scdata_scbuf_decc_top, scdata_scbuf_decc_bot, 
+   efc_scdata_fuse_clk1, efc_scdata_fuse_clk2, mem_write_disable
+   );
+
+   input           fuse_l2d_rden;
+   input [5:0] 	   fuse_l2d_wren;
+   input 	   si;
+   input 	   arst_l;
+   input 	   se;
+   input 	   sehold;
+   input [2:0] 	   fuse_l2d_rid;
+   input 	   fuse_read_data_in;
+   input 	   fuse_l2d_data_in;
+   input [3:0] 	   word_en;
+   input 	   col_offset;
+   input [9:0] 	   set;
+   input 	   wr_en;
+   input [11:0]	   way_sel;
+   input [155:0]   decc_in;
+   input [155:0]   fbdt_l;
+   input [155:0]   fbdb_l;
+   input [155:0]   scdata_scbuf_decc_top;
+   input [155:0]   scdata_scbuf_decc_bot;
+   input 	   efc_scdata_fuse_clk1;
+   input 	   efc_scdata_fuse_clk2;
+   input 	   mem_write_disable;
+
+   output 	   fuse_l2d_rden_buf;
+   output [5:0]    fuse_l2d_wren_buf;
+   output 	   si_buf;
+   output 	   arst_l_buf;
+   output 	   se_buf;
+   output 	   sehold_buf;
+   output [2:0]    fuse_l2d_rid_buf;
+   output 	   fuse_read_data_in_buf;
+   output 	   fuse_l2d_data_in_buf;
+   output [3:0]    word_en_l;
+   output 	   col_offset_l;
+   output [9:0]    set_l;
+   output 	   wr_en_l;
+   output [11:0]   way_sel_l;
+   output [155:0]  decc_in_l;
+   output [155:0]  scbuf_scdata_fbdecc_top_buf;
+   output [155:0]  scbuf_scdata_fbdecc_bot_buf;
+   output [155:0]  sbdt_l;
+   output [155:0]  sbdb_l;
+   output 	   fuse_clk1_buf;
+   output 	   fuse_clk2_buf;
+   output 	   mem_write_disable_buf;
+   
+   ///////////////////////////////////////////////////////////////////////
+   // Non-inverting Buffers
+   ///////////////////////////////////////////////////////////////////////
+   assign fuse_l2d_rden_buf = fuse_l2d_rden;
+   assign fuse_l2d_wren_buf[5:0] = fuse_l2d_wren[5:0];
+   assign si_buf = si;
+   assign arst_l_buf = arst_l;
+   assign se_buf = se;
+   assign sehold_buf = sehold;
+   assign fuse_l2d_rid_buf[2:0] = fuse_l2d_rid[2:0];
+   assign fuse_read_data_in_buf = fuse_read_data_in;
+   assign fuse_l2d_data_in_buf = fuse_l2d_data_in;
+   assign fuse_clk1_buf = efc_scdata_fuse_clk1;
+   assign fuse_clk2_buf = efc_scdata_fuse_clk2;
+   assign mem_write_disable_buf = mem_write_disable;
+   
+   ///////////////////////////////////////////////////////////////////////
+   // Inverting Buffers
+   ///////////////////////////////////////////////////////////////////////
+   assign word_en_l[3:0] = ~word_en[3:0];
+   assign col_offset_l = ~col_offset;
+   assign set_l[9:0] = ~set[9:0];
+   assign wr_en_l = ~wr_en;
+   assign way_sel_l = ~way_sel;
+   assign decc_in_l[155:0] = ~decc_in[155:0];
+   assign scbuf_scdata_fbdecc_top_buf[155:0] = ~fbdt_l[155:0];
+   assign scbuf_scdata_fbdecc_bot_buf[155:0] = ~fbdb_l[155:0];
+   assign sbdt_l[155:0] = ~scdata_scbuf_decc_top[155:0];
+   assign sbdb_l[155:0] = ~scdata_scbuf_decc_bot[155:0];
+
+endmodule // bw_r_l2d_rep_bot
+
+
Index: /trunk/T1-common/srams/bw_r_efa.v
===================================================================
--- /trunk/T1-common/srams/bw_r_efa.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_efa.v	(revision 6)
@@ -0,0 +1,173 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_efa.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//****************************************************************
+//
+//      Module:         bw_r_efa
+//
+//      Description:   RTL model for EFA (EFuse Array)
+//
+//****************************************************************
+`include "sys.h"
+
+module bw_r_efa (
+	vpp,
+	pi_efa_prog_en, 
+	sbc_efa_read_en,
+	sbc_efa_word_addr,	
+	sbc_efa_bit_addr,
+	sbc_efa_margin0_rd,
+	sbc_efa_margin1_rd,
+	efa_sbc_data,
+ 	pwr_ok,
+	por_n,
+        sbc_efa_sup_det_rd,
+	sbc_efa_power_down,
+	so,
+	si,
+	se,
+	vddo,
+	clk
+);
+
+
+input            vpp;			// VPP input from I/O
+
+output  [31:0]   efa_sbc_data;		// Data from e-fuse array to SBC
+input            pi_efa_prog_en; 	// e-fuse array program enable
+input            sbc_efa_read_en; 	// e-fuse array read enable
+input	[5:0]    sbc_efa_word_addr;	// e-fuse array word addr
+input   [4:0]    sbc_efa_bit_addr;	// e-fuse array bit addr
+input            sbc_efa_margin0_rd; 	// e-fuse array margin0 read
+input            sbc_efa_margin1_rd;	// e-fuse array margin1 read
+
+input		 pwr_ok;		// power_ok reset
+input		 por_n;			// por_n reset
+input		 sbc_efa_sup_det_rd;	// e-fuse array supply detect read
+input		 sbc_efa_power_down;	// e-fuse power down signal from SBC
+
+output           so;		// Scan ports
+input            si;
+input		 se;
+input 		 vddo;
+input            clk; 			// cpu clk
+
+/*--------------------------------------------------------------------------*/
+
+//** Parameters and define **//
+parameter MAXFILENAME=200;
+//parameter 	EFA_READ_LAT = 5670 ; // 7 system cycles (150Mhz) - 1/4(sys clk); about 45ns
+				     // 840 ticks = 1 system cycle
+parameter 	EFA_READ_LAT = 45000 ; //  about 45ns (timescale is 1 ps)
+/* The access time has been specified to be 45ns for a worst case read */
+
+//** Wire and Reg declarations **//
+
+reg [MAXFILENAME*8-1:0]  efuse_data_filename;
+reg [31:0] efuse_array[0:63],efuse_row,efa_read_data;	//EFUSE ARRAY
+integer file_get_status,i;
+reg [31:0] fpInVec;
+wire [31:0] efa_sbc_data;
+wire	l1clk;		
+wire   	lvl_det_l;           // level detect ok
+wire    vddc_ok_l;           // vddc ok
+wire    vddo_ok_l;           // vddo ok
+wire    vpp_ok_l;            // vpp ok
+reg     efuse_rd_progress;
+reg	efuse_enable_write_check;
+
+/*--------------------------------------------------------------------------*/
+
+// Process data file
+ 
+// synopsys translate_off
+initial 
+begin
+  efuse_enable_write_check = 1;
+  // Get Efuse data file from plusarg.
+  if ($value$plusargs("efuse_data_file=%s", efuse_data_filename))
+    begin
+      // Read Efuse data file if present 
+      $display("INFO: efuse data file is being read--filename=%0s", 
+      			efuse_data_filename);
+      $readmemh(efuse_data_filename, efuse_array);
+      $display("INFO: completed reading efuse data file");
+    end
+  else 
+    begin 
+      //if file not present, initialize efuse_array with default value
+      $display("INFO: Using default efuse data for the efuse array");
+      for (i=0;i<=63;i=i+1) begin
+	efuse_array[i] = 32'b0;
+      end
+    end
+end   
+
+// Process power down signal
+assign l1clk   = clk & ~sbc_efa_power_down;
+
+// Scan logic not in RTL 
+assign so = se ? si : 1'bx;
+
+//assign supply detect signals to valid values (circuit cannot be impl in model)
+assign vddc_ok_l = 1'b0;
+assign vddo_ok_l = 1'b0;
+assign vpp_ok_l  = 1'b0;
+assign lvl_det_l = 1'b0;
+
+
+always @(posedge l1clk) begin
+  // Write operation , one bit at a time
+  if ((pi_efa_prog_en === 1'b1) && (pwr_ok === 1'b1) && (por_n === 1'b1))  begin
+    efuse_row = efuse_array[sbc_efa_word_addr];
+    efuse_row[sbc_efa_bit_addr] = 1'b1;
+    efuse_array[sbc_efa_word_addr] <= efuse_row;
+  end
+end
+
+
+// efa_read_data is from the VPP_CORE which is reset to 0 in ckt when read is de-asserted
+// However in RTL it is reset to X because I want to simulate the wait time where
+// efa_read_data is indeed X till the latency period
+// margin reads are not modelled in the RTL
+always @(posedge l1clk) begin
+  // Read operation  , 32 bits at a time
+  if ((sbc_efa_read_en) & ~efuse_rd_progress)  begin
+   // About 45ns
+   efa_read_data[31:0] <= #EFA_READ_LAT efuse_array[sbc_efa_word_addr];
+   efuse_rd_progress = 1'b1;
+  end
+  if (~(sbc_efa_read_en))  begin
+    efuse_rd_progress = 1'b0;
+  end
+  if (~efuse_rd_progress) begin
+    efa_read_data[31:0] <= 32'bx;
+  end
+end
+// synopsys translate_on
+
+// In ckt, when sbc_efa_read_en is low, output remains the same.
+
+assign efa_sbc_data[31:0] = por_n ? ((pwr_ok & sbc_efa_read_en) ? (sbc_efa_sup_det_rd ?
+				{28'bx,~lvl_det_l,~vddc_ok_l,~vddo_ok_l,~vpp_ok_l}
+				: efa_read_data[31:0] ) : efa_sbc_data[31:0]) : 32'b0;
+
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_rf16x160.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf16x160.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf16x160.v	(revision 6)
@@ -0,0 +1,734 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf16x160.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+ ////////////////////////////////////////////////////////////////////////
+// 16 X 160 R1 W1 RF macro
+// REad/Write ports can be accessed in PH1 only.
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_16x160
+`endif
+
+module bw_r_rf16x160(/*AUTOARG*/
+   // Outputs
+   dout, so_w, so_r, 
+   // Inputs
+   din, rd_adr, wr_adr, read_en, wr_en, rst_tri_en, word_wen, 
+   byte_wen, rd_clk, wr_clk, se, si_r, si_w, reset_l, sehold
+   );
+
+   input [159:0]  din; // data input
+   input [3:0]    rd_adr;   // read addr 
+   input [3:0]	  wr_adr;  // write addr
+   input          read_en;  
+   input	  wr_en;	//   used in conjunction with
+				//  word_wen and byte_wen 
+   input	  rst_tri_en ; // gates off writes during SCAN.
+   input [3:0]    word_wen; // word enables ( if you don't use these
+			    // tie them to Vdd )
+   input [19:0]	  byte_wen;	// byte enables ( if you don't use these
+                            // tie them to Vdd )
+   input          rd_clk;
+   input          wr_clk;
+   input          se, si_r, si_w ;
+   input	  reset_l;
+   input	  sehold; // hold scan in data.
+
+   output [159:0] dout;
+   output         so_w;
+   output         so_r;
+   
+
+   // local signals
+   reg  [159:0]   wrdata_d1;
+
+   reg  [3:0]     rdptr_d1, wrptr_d1;
+   reg            ren_d1;
+   reg 	          wr_en_d1;
+
+
+`ifdef DEFINE_0IN
+   wire          so;
+`else
+   wire          so;
+
+`ifdef FPGA_SYN_16x160
+`else
+   reg [159:0] dout;
+   // memory array
+   reg [159:0]  inq_ary [15:0];
+`endif
+`endif
+
+   // internal variable
+   integer          i, j;
+   reg     [159:0]  temp, data_in, tmp_dout;
+   reg     [3:0]    word_wen_d1;
+   reg     [3:0]    word_wen_d2;
+   reg     [19:0]   byte_wen_d1;
+   reg			rst_tri_en_d1;
+
+
+//-------
+
+always @ (posedge wr_clk)
+begin
+  wrdata_d1[159:0]  <= (sehold) ? wrdata_d1[159:0]  : din[159:0];
+  wr_en_d1          <= (sehold) ? wr_en_d1          : wr_en;
+  word_wen_d1[3:0]  <= (sehold) ? word_wen_d1[3:0]  : word_wen[3:0];
+  word_wen_d2[3:0]  <= (sehold) ? word_wen_d2[3:0]  : (word_wen[3:0] &
+				{4{wr_en & ~rst_tri_en}});
+  byte_wen_d1[19:0] <= (sehold) ? byte_wen_d1[19:0] : byte_wen[19:0];
+  wrptr_d1[3:0]     <= (sehold) ? wrptr_d1[3:0]     : wr_adr[3:0];
+
+  rst_tri_en_d1 <= rst_tri_en ; // not a real flop. ONly used as a trigger.
+end
+//-------
+
+
+`ifdef DEFINE_0IN
+   wire [159:0] bit_en_d1;
+
+  assign bit_en_d1[0]  = word_wen_d1[0] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[1]  = word_wen_d1[1] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[2]  = word_wen_d1[2] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[3]  = word_wen_d1[3] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[4]  = word_wen_d1[0] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[5]  = word_wen_d1[1] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[6]  = word_wen_d1[2] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[7]  = word_wen_d1[3] & byte_wen_d1[0] & ~rst_tri_en;
+  assign bit_en_d1[8]  = word_wen_d1[0] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[9]  = word_wen_d1[1] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[10] = word_wen_d1[2] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[11] = word_wen_d1[3] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[12] = word_wen_d1[0] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[13] = word_wen_d1[1] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[14] = word_wen_d1[2] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[15] = word_wen_d1[3] & byte_wen_d1[1] & ~rst_tri_en;
+  assign bit_en_d1[16] = word_wen_d1[0] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[17] = word_wen_d1[1] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[18] = word_wen_d1[2] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[19] = word_wen_d1[3] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[20] = word_wen_d1[0] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[21] = word_wen_d1[1] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[22] = word_wen_d1[2] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[23] = word_wen_d1[3] & byte_wen_d1[2] & ~rst_tri_en;
+  assign bit_en_d1[24] = word_wen_d1[0] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[25] = word_wen_d1[1] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[26] = word_wen_d1[2] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[27] = word_wen_d1[3] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[28] = word_wen_d1[0] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[29] = word_wen_d1[1] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[30] = word_wen_d1[2] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[31] = word_wen_d1[3] & byte_wen_d1[3] & ~rst_tri_en;
+  assign bit_en_d1[32] = word_wen_d1[0] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[33] = word_wen_d1[1] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[34] = word_wen_d1[2] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[35] = word_wen_d1[3] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[36] = word_wen_d1[0] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[37] = word_wen_d1[1] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[38] = word_wen_d1[2] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[39] = word_wen_d1[3] & byte_wen_d1[4] & ~rst_tri_en;
+  assign bit_en_d1[40] = word_wen_d1[0] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[41] = word_wen_d1[1] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[42] = word_wen_d1[2] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[43] = word_wen_d1[3] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[44] = word_wen_d1[0] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[45] = word_wen_d1[1] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[46] = word_wen_d1[2] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[47] = word_wen_d1[3] & byte_wen_d1[5] & ~rst_tri_en;
+  assign bit_en_d1[48] = word_wen_d1[0] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[49] = word_wen_d1[1] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[50] = word_wen_d1[2] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[51] = word_wen_d1[3] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[52] = word_wen_d1[0] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[53] = word_wen_d1[1] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[54] = word_wen_d1[2] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[55] = word_wen_d1[3] & byte_wen_d1[6] & ~rst_tri_en;
+  assign bit_en_d1[56] = word_wen_d1[0] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[57] = word_wen_d1[1] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[58] = word_wen_d1[2] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[59] = word_wen_d1[3] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[60] = word_wen_d1[0] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[61] = word_wen_d1[1] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[62] = word_wen_d1[2] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[63] = word_wen_d1[3] & byte_wen_d1[7] & ~rst_tri_en;
+  assign bit_en_d1[64] = word_wen_d1[0] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[65] = word_wen_d1[1] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[66] = word_wen_d1[2] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[67] = word_wen_d1[3] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[68] = word_wen_d1[0] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[69] = word_wen_d1[1] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[70] = word_wen_d1[2] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[71] = word_wen_d1[3] & byte_wen_d1[8] & ~rst_tri_en;
+  assign bit_en_d1[72] = word_wen_d1[0] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[73] = word_wen_d1[1] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[74] = word_wen_d1[2] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[75] = word_wen_d1[3] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[76] = word_wen_d1[0] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[77] = word_wen_d1[1] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[78] = word_wen_d1[2] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[79] = word_wen_d1[3] & byte_wen_d1[9] & ~rst_tri_en;
+  assign bit_en_d1[80] = word_wen_d1[0] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[81] = word_wen_d1[1] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[82] = word_wen_d1[2] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[83] = word_wen_d1[3] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[84] = word_wen_d1[0] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[85] = word_wen_d1[1] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[86] = word_wen_d1[2] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[87] = word_wen_d1[3] & byte_wen_d1[10] & ~rst_tri_en;
+  assign bit_en_d1[88] = word_wen_d1[0] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[89] = word_wen_d1[1] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[90] = word_wen_d1[2] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[91] = word_wen_d1[3] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[92] = word_wen_d1[0] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[93] = word_wen_d1[1] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[94] = word_wen_d1[2] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[95] = word_wen_d1[3] & byte_wen_d1[11] & ~rst_tri_en;
+  assign bit_en_d1[96] = word_wen_d1[0] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[97] = word_wen_d1[1] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[98] = word_wen_d1[2] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[99] = word_wen_d1[3] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[100] = word_wen_d1[0] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[101] = word_wen_d1[1] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[102] = word_wen_d1[2] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[103] = word_wen_d1[3] & byte_wen_d1[12] & ~rst_tri_en;
+  assign bit_en_d1[104] = word_wen_d1[0] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[105] = word_wen_d1[1] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[106] = word_wen_d1[2] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[107] = word_wen_d1[3] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[108] = word_wen_d1[0] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[109] = word_wen_d1[1] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[110] = word_wen_d1[2] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[111] = word_wen_d1[3] & byte_wen_d1[13] & ~rst_tri_en;
+  assign bit_en_d1[112] = word_wen_d1[0] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[113] = word_wen_d1[1] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[114] = word_wen_d1[2] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[115] = word_wen_d1[3] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[116] = word_wen_d1[0] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[117] = word_wen_d1[1] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[118] = word_wen_d1[2] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[119] = word_wen_d1[3] & byte_wen_d1[14] & ~rst_tri_en;
+  assign bit_en_d1[120] = word_wen_d1[0] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[121] = word_wen_d1[1] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[122] = word_wen_d1[2] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[123] = word_wen_d1[3] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[124] = word_wen_d1[0] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[125] = word_wen_d1[1] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[126] = word_wen_d1[2] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[127] = word_wen_d1[3] & byte_wen_d1[15] & ~rst_tri_en;
+  assign bit_en_d1[128] = word_wen_d1[0] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[129] = word_wen_d1[1] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[130] = word_wen_d1[2] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[131] = word_wen_d1[3] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[132] = word_wen_d1[0] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[133] = word_wen_d1[1] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[134] = word_wen_d1[2] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[135] = word_wen_d1[3] & byte_wen_d1[16] & ~rst_tri_en;
+  assign bit_en_d1[136] = word_wen_d1[0] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[137] = word_wen_d1[1] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[138] = word_wen_d1[2] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[139] = word_wen_d1[3] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[140] = word_wen_d1[0] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[141] = word_wen_d1[1] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[142] = word_wen_d1[2] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[143] = word_wen_d1[3] & byte_wen_d1[17] & ~rst_tri_en;
+  assign bit_en_d1[144] = word_wen_d1[0] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[145] = word_wen_d1[1] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[146] = word_wen_d1[2] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[147] = word_wen_d1[3] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[148] = word_wen_d1[0] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[149] = word_wen_d1[1] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[150] = word_wen_d1[2] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[151] = word_wen_d1[3] & byte_wen_d1[18] & ~rst_tri_en;
+  assign bit_en_d1[152] = word_wen_d1[0] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[153] = word_wen_d1[1] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[154] = word_wen_d1[2] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[155] = word_wen_d1[3] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[156] = word_wen_d1[0] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[157] = word_wen_d1[1] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[158] = word_wen_d1[2] & byte_wen_d1[19] & ~rst_tri_en;
+  assign bit_en_d1[159] = word_wen_d1[3] & byte_wen_d1[19] & ~rst_tri_en;
+`else
+
+`endif
+//-------
+
+always @ (posedge rd_clk)
+begin
+  ren_d1        <= (sehold) ? ren_d1        : read_en;
+  rdptr_d1[3:0] <= (sehold) ? rdptr_d1[3:0] : rd_adr[3:0];
+end
+//-------
+
+
+`ifdef DEFINE_0IN
+rf16x160 rf16x160 ( .rdclk(rd_clk), .wrclk(~wr_clk), .radr(rdptr_d1), .wadr(wrptr_d1), .ren(ren_d1),
+                        .we(wr_en_d1), .wm(bit_en_d1), .din(wrdata_d1), .dout(dout) );
+`else
+
+`ifdef FPGA_SYN_16x160
+
+bw_r_rf16x2 arr0 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 0]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[  7:  0]),
+	.rd_data(dout[  7:  0]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr1 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 1]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 15:  8]),
+	.rd_data(dout[ 15:  8]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr2 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 2]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 23: 16]),
+	.rd_data(dout[ 23: 16]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr3 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 3]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 31: 24]),
+	.rd_data(dout[ 31: 24]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr4 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 4]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 39: 32]),
+	.rd_data(dout[ 39: 32]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr5 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 5]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 47: 40]),
+	.rd_data(dout[ 47: 40]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr6 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 6]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 55: 48]),
+	.rd_data(dout[ 55: 48]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr7 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 7]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 63: 56]),
+	.rd_data(dout[ 63: 56]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr8 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 8]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 71: 64]),
+	.rd_data(dout[ 71: 64]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr9 (
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[ 9]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 79: 72]),
+	.rd_data(dout[ 79: 72]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr10(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[10]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 87: 80]),
+	.rd_data(dout[ 87: 80]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr11(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[11]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[ 95: 88]),
+	.rd_data(dout[ 95: 88]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr12(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[12]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[103: 96]),
+	.rd_data(dout[103: 96]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr13(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[13]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[111:104]),
+	.rd_data(dout[111:104]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr14(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[14]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[119:112]),
+	.rd_data(dout[119:112]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr15(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[15]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[127:120]),
+	.rd_data(dout[127:120]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr16(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[16]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[135:128]),
+	.rd_data(dout[135:128]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr17(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[17]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[143:136]),
+	.rd_data(dout[143:136]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr18(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[18]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[151:144]),
+	.rd_data(dout[151:144]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+bw_r_rf16x2 arr19(
+	.word_wen(word_wen_d2),
+	.wen(byte_wen_d1[19]),
+	.ren(ren_d1),
+	.wr_addr(wrptr_d1),
+	.rd_addr(rdptr_d1),
+	.wr_data(wrdata_d1[159:152]),
+	.rd_data(dout[159:152]),
+	.clk(wr_clk),
+	.rd_clk(rd_clk),
+	.reset_l(reset_l));
+
+
+`else
+//
+// Read Operation
+//
+
+  always @(/*AUTOSENSE*/ /*memory or*/ byte_wen_d1 or rdptr_d1
+           or ren_d1 or reset_l or rst_tri_en_d1 or word_wen_d1
+           or wr_en_d1 or wrptr_d1)
+    begin
+      if (reset_l)
+        begin
+          if (ren_d1==1'b1)
+            begin
+            // Checking for Xs on the rd pointer input when read is enabled
+
+// synopsys translate_off
+
+`ifdef INNO_MUXEX
+`else
+              if (rdptr_d1 == 4'bx)
+                begin
+                  $error("rf_error"," read pointer error %h ", rdptr_d1[3:0]);
+                end
+`endif
+
+// synopsys translate_on
+
+              tmp_dout = inq_ary[rdptr_d1] ;
+              j = 0;
+
+              for (i=0; i<= 159; i=i+8)
+                begin
+                  if (rdptr_d1 == wrptr_d1)
+                    begin
+                      //dout[i]   = (wr_en_d1 & bit_en_d1[i]) ? 1'bx : tmp_dout[i];
+
+                      dout[i]   = (wr_en_d1 & word_wen_d1[0] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i] ;
+                      dout[i+1] = (wr_en_d1 & word_wen_d1[1] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+1] ;
+                      dout[i+2] = (wr_en_d1 & word_wen_d1[2] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+2] ;
+                      dout[i+3] = (wr_en_d1 & word_wen_d1[3] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+3] ;
+                      dout[i+4] = (wr_en_d1 & word_wen_d1[0] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+4] ;
+                      dout[i+5] = (wr_en_d1 & word_wen_d1[1] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+5] ;
+                      dout[i+6] = (wr_en_d1 & word_wen_d1[2] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+6] ;
+                      dout[i+7] = (wr_en_d1 & word_wen_d1[3] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                      1'bx : tmp_dout[i+7] ;
+                      j = j+1;
+                    end
+                  else
+                    begin
+                      //dout[i]   = tmp_dout[i] ;
+                      dout[i]   = tmp_dout[i] ;
+                      dout[i+1] = tmp_dout[i+1] ;
+                      dout[i+2] = tmp_dout[i+2] ;
+                      dout[i+3] = tmp_dout[i+3] ;
+                      dout[i+4] = tmp_dout[i+4] ;
+                      dout[i+5] = tmp_dout[i+5] ;
+                      dout[i+6] = tmp_dout[i+6] ;
+                      dout[i+7] = tmp_dout[i+7] ;
+                    end
+                end
+            end
+        end
+      else dout[159:0] = 160'b0 ;
+  end // always @ (...
+
+
+
+//
+// Write Operation
+//
+  always @ (/*AUTOSENSE*/byte_wen_d1 or reset_l or rst_tri_en_d1
+            or word_wen_d1 or wr_en_d1 or wrdata_d1 or wrptr_d1)
+    begin
+      if (reset_l)
+        begin
+        // Checking for Xs on bit write enables that are derived from
+        // the word_enables and wr enable input.
+
+// synopsys translate_off
+
+`ifdef INNO_MUXEX
+`else
+          if (wr_en_d1 == 1'bx)
+            begin
+              $error("rf_error"," write enable error %h ", wr_en_d1);
+            end
+          if (word_wen_d1 == 4'bx)
+            begin
+              $error("rf_error"," word enable error %h ", word_wen_d1[3:0]);
+            end
+          if (byte_wen_d1 == 20'bx)
+            begin
+              $error("rf_error"," byte enable error %h ", byte_wen_d1[19:0]);
+            end
+`endif
+
+// synopsys translate_on
+
+          if (wr_en_d1 & ~rst_tri_en)
+            begin
+
+// synopsys translate_off
+
+`ifdef INNO_MUXEX
+`else
+              if (wrptr_d1 == 4'bx)
+                begin
+                  $error("rf_error"," write pointer error %h ", wrptr_d1[3:0]);
+                end
+`endif
+
+// synopsys translate_on
+
+              temp = inq_ary[wrptr_d1];
+              j = 0;
+
+              for (i=0; i<=159; i=i+8)
+                begin
+                  //data_in[i] = (bit_en_d1[i]) ? wrdata_d1[i] : temp[i] ;
+                  data_in[i]   = (wr_en_d1 & word_wen_d1[0] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i]   : temp[i] ;
+                  data_in[i+1] = (wr_en_d1 & word_wen_d1[1] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+1] : temp[i+1] ;
+                  data_in[i+2] = (wr_en_d1 & word_wen_d1[2] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+2] : temp[i+2] ;
+                  data_in[i+3] = (wr_en_d1 & word_wen_d1[3] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+3] : temp[i+3] ;
+                  data_in[i+4] = (wr_en_d1 & word_wen_d1[0] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+4] : temp[i+4] ;
+                  data_in[i+5] = (wr_en_d1 & word_wen_d1[1] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+5] : temp[i+5] ;
+                  data_in[i+6] = (wr_en_d1 & word_wen_d1[2] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+6] : temp[i+6] ;
+                  data_in[i+7] = (wr_en_d1 & word_wen_d1[3] & byte_wen_d1[j] & ~rst_tri_en) ?
+                                   wrdata_d1[i+7] : temp[i+7] ;
+                  j = j+1;
+                end
+              inq_ary[wrptr_d1] = data_in ;
+            end
+        end 
+    end // always @ (...
+
+`endif
+`endif
+
+endmodule // rf_16x160
+
+`ifdef FPGA_SYN_16x160
+
+module bw_r_rf16x2(word_wen, wen, ren, wr_addr, rd_addr, wr_data,
+	rd_data, clk, rd_clk, reset_l);
+  input [3:0] word_wen;
+  input	      wen;
+  input	      ren;
+  input	[3:0] wr_addr;
+  input [3:0] rd_addr;
+  input [7:0] wr_data;
+  output [7:0] rd_data;
+  input	clk;
+  input	rd_clk;
+  input reset_l;
+
+  reg	[7:0] rd_data_temp;
+
+  reg [1:0] inq_ary0[15:0];
+  reg [1:0] inq_ary1[15:0];
+  reg [1:0] inq_ary2[15:0];
+  reg [1:0] inq_ary3[15:0];
+
+  always @(posedge clk) begin
+    if(reset_l & wen & word_wen[0])
+      inq_ary0[wr_addr] = {wr_data[4],wr_data[0]};
+    if(reset_l & wen & word_wen[1])
+      inq_ary1[wr_addr] = {wr_data[5],wr_data[1]};
+    if(reset_l & wen & word_wen[2])
+      inq_ary2[wr_addr] = {wr_data[6],wr_data[2]};
+    if(reset_l & wen & word_wen[3])
+      inq_ary3[wr_addr] = {wr_data[7],wr_data[3]};
+  end
+
+  always @(negedge rd_clk) begin
+    if (~reset_l) begin
+      rd_data_temp = 8'b0;
+    end else if(ren == 1'b1) begin
+        rd_data_temp = {inq_ary3[rd_addr], inq_ary2[rd_addr], inq_ary1[rd_addr], inq_ary0[rd_addr]};
+    end
+  end
+
+  assign rd_data = {rd_data_temp[7], rd_data_temp[5], rd_data_temp[3], 
+		rd_data_temp[1], rd_data_temp[6], rd_data_temp[4], 
+		rd_data_temp[2], rd_data_temp[0]};
+
+endmodule
+`endif
+
+
Index: /trunk/T1-common/srams/bw_r_rf32x108.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf32x108.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf32x108.v	(revision 6)
@@ -0,0 +1,354 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf32x108.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+// 32 X 108 R1 W1 RF macro
+// REad/Write ports can be accessed in PH1 only.
+////////////////////////////////////////////////////////////////////////
+
+module bw_r_rf32x108(/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   din, rd_adr1, rd_adr2, sel_rdaddr1, wr_adr, read_en, wr_en, 
+   word_wen, rst_tri_en, rclk, se, si, reset_l, sehold
+   );
+
+   input [107:0]  din; // data input
+   input [4:0]    rd_adr1;   // read addr1 
+   input [4:0]    rd_adr2;   // read addr2 
+   input	  sel_rdaddr1; // sel read addr1
+   input [4:0]	  wr_adr;  // write addr
+   input          read_en;  
+   input	  wr_en ;	//   used in conjunction with
+				//  word_wen and byte_wen 
+   input [3:0]    word_wen; // word enables ( if you don't use these
+			    // tie them to Vdd )
+   input	  rst_tri_en ; // used to gate off write during scan.
+   input          rclk;
+   input          se, si ;
+   input	  reset_l;
+   input	  sehold; // hold scan in data.
+
+   output [107:0] dout;
+   output         so;
+   
+
+   // local signals
+   reg [107:0]   wrdata_d1 ;
+   reg [3:0]     word_wen_d1;
+   reg [4:0]     rdptr_d1, wrptr_d1;
+   reg           ren_d1;
+   reg		  wr_en_d1;
+   reg		rst_tri_en_d1;
+
+
+
+`ifdef DEFINE_0IN
+   reg          so;
+`else
+   reg [107:0] dout;
+
+   wire	[122:0] scan_out ;
+
+   // memory array
+   reg [107:0]  inq_ary [31:0];
+`endif
+   // internal variable
+   integer      i;
+   reg [107:0]  temp, data_in, tmp_dout;
+
+`ifdef DEFINE_0IN
+   wire	[107:0]	bit_en_d1;
+
+		assign	bit_en_d1[0] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[1] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[2] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[3] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[4] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[5] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[6] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[7] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[8] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[9] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[10] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[11] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[12] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[13] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[14] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[15] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[16] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[17] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[18] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[19] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[20] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[21] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[22] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[23] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[24] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[25] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[26] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[27] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[28] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[29] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[30] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[31] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[32] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[33] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[34] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[35] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[36] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[37] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[38] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[39] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[40] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[41] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[42] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[43] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[44] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[45] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[46] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[47] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[48] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[49] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[50] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[51] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[52] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[53] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[54] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[55] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[56] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[57] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[58] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[59] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[60] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[61] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[62] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[63] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[64] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[65] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[66] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[67] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[68] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[69] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[70] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[71] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[72] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[73] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[74] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[75] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[76] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[77] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[78] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[79] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[80] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[81] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[82] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[83] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[84] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[85] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[86] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[87] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[88] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[89] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[90] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[91] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[92] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[93] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[94] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[95] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[96] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[97] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[98] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[99] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[100] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[101] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[102] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[103] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+		assign	bit_en_d1[104] = word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[105] = word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[106] = word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ;
+		assign	bit_en_d1[107] = word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ;
+	
+`else
+
+`endif
+		
+always	@(posedge rclk ) begin
+
+      	wrdata_d1 <= (sehold)? wrdata_d1 :din;
+	word_wen_d1 <= (sehold)? word_wen_d1 : word_wen ;
+       	wrptr_d1 <= (sehold)? wrptr_d1 :wr_adr;
+       	ren_d1 <= (sehold)? ren_d1 : read_en;
+       	wr_en_d1 <= (sehold)? wr_en_d1 : wr_en;
+       	rdptr_d1 <= (sehold)? rdptr_d1 : ( (sel_rdaddr1)?  rd_adr1: rd_adr2 ) ;
+	rst_tri_en_d1 <= rst_tri_en ;  // this is a dummy flop ( only used as a trigger )
+end
+
+
+
+`ifdef DEFINE_0IN
+rf32x108 rf32x108 ( .rclk(rclk), .radr(rdptr_d1), .wadr(wrptr_d1), .ren(ren_d1),
+                        .we(reset_l), .wm(bit_en_d1), .din(wrdata_d1), .dout(dout) ); 
+`else
+
+/////////////////////////////////////////////////////////////////////////////////
+// Read Operation
+/////////////////////////////////////////////////////////////////////////////////
+
+always @(/*AUTOSENSE*/ /*memory or*/ rdptr_d1 or ren_d1 or reset_l
+         or rst_tri_en_d1 or word_wen_d1 or wr_en_d1 or wrptr_d1)
+     begin
+             if (reset_l)
+               begin
+                  if (ren_d1 )
+                    begin
+
+		  // Checking for Xs on the rd pointer input when read is enabled
+`ifdef	INNO_MUXEX
+`else
+			if(rdptr_d1 == 5'bx) begin
+			`ifdef MODELSIM
+				$display("rf_error"," read pointer error %h ", rdptr_d1[4:0]);
+			`else
+				$error("rf_error"," read pointer error %h ", rdptr_d1[4:0]);
+			`endif
+			end
+`endif
+
+
+			tmp_dout = inq_ary[rdptr_d1] ;
+
+			for(i=0; i< 108; i=i+4) begin
+				
+				if((rdptr_d1 == wrptr_d1)) begin
+			 		dout[i] =   ( word_wen_d1[0] & wr_en_d1 & ~rst_tri_en )? 
+							1'bx : tmp_dout[i] ;
+			 		dout[i+1] = ( word_wen_d1[1] & wr_en_d1 & ~rst_tri_en )? 
+                                                        1'bx : tmp_dout[i+1] ;
+			 		dout[i+2] = ( word_wen_d1[2] & wr_en_d1 & ~rst_tri_en )? 
+                                                        1'bx : tmp_dout[i+2] ;
+			 		dout[i+3] = ( word_wen_d1[3] & wr_en_d1 & ~rst_tri_en )? 
+                                                        1'bx : tmp_dout[i+3] ;
+				end
+				else begin
+					dout[i] = tmp_dout[i] ;
+					dout[i+1] = tmp_dout[i+1] ;
+					dout[i+2] = tmp_dout[i+2] ;
+					dout[i+3] = tmp_dout[i+3] ;
+				end
+
+			end // of for
+
+                    end
+
+
+     	    end // of if reset_l
+
+	    else dout  = 108'b0 ;
+end 
+
+/////////////////////////////////////////////////////////////////////////////////
+// Write Operation
+/////////////////////////////////////////////////////////////////////////////////
+
+always @(/*AUTOSENSE*/reset_l or rst_tri_en_d1 or word_wen_d1 or wr_en_d1
+         or wrdata_d1 or wrptr_d1)
+     begin
+        if ( reset_l)
+	 begin    
+		 // Checking for Xs on bit write enables that are derived from
+	  	// the word_enables and wr enable input.
+`ifdef	INNO_MUXEX
+`else
+		if((word_wen_d1 & {4{wr_en_d1 & ~rst_tri_en}}) == 4'bx ) begin
+		`ifdef MODELSIM
+			$display("rf_error"," write enable error %h ", word_wen_d1[3:0]);
+		`else
+			$error("rf_error"," write enable error %h ", word_wen_d1[3:0]);
+		`endif	
+		end
+`endif
+
+		if(wr_en_d1 & ~rst_tri_en)   begin
+			
+`ifdef	INNO_MUXEX
+`else
+		  // Checking for Xs on the wr pointer input when write is enabled
+			if(wrptr_d1 == 5'bx) begin
+			`ifdef MODELSIM
+				$display("rf_error"," read pointer error %h ", wrptr_d1[4:0]);
+			`else
+				$error("rf_error"," read pointer error %h ", wrptr_d1[4:0]);
+			`endif
+			end
+`endif
+
+             		temp = 	inq_ary[wrptr_d1];
+             		for (i=0; i<108; i=i+4) begin
+                		data_in[i] = ( word_wen_d1[0] & wr_en_d1 & ~rst_tri_en ) ? 
+							wrdata_d1[i] : temp[i] ;
+                		data_in[i+1] = ( word_wen_d1[1] & wr_en_d1 & ~rst_tri_en ) ? 
+							wrdata_d1[i+1] : temp[i+1] ;
+                		data_in[i+2] = ( word_wen_d1[2] & wr_en_d1 & ~rst_tri_en ) ? 
+							wrdata_d1[i+2] : temp[i+2] ;
+                		data_in[i+3] = ( word_wen_d1[3] & wr_en_d1 & ~rst_tri_en ) ? 
+							wrdata_d1[i+3] : temp[i+3] ;
+             		end
+             		inq_ary[wrptr_d1] = data_in ;
+
+		end
+
+          end
+end // always @ (...
+
+`endif
+
+
+endmodule // rf_32x108
+
+
+
Index: /trunk/T1-common/srams/bw_rf_16x81.v
===================================================================
--- /trunk/T1-common/srams/bw_rf_16x81.v	(revision 6)
+++ /trunk/T1-common/srams/bw_rf_16x81.v	(revision 6)
@@ -0,0 +1,399 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_rf_16x81.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_rf_16x81(
+   rd_clk,    // read clock
+   wr_clk,    // read clock
+   csn_rd,    // read enable -- active low 
+   csn_wr,    // write enable -- active low
+   hold,      // hold signal -- unflopped -- hold =1 holds input data 
+   testmux_sel, // bypass  signal -- unflopped -- testmux_sel = 1 bypasses di to do 
+   scan_en,   // Scan enable unflopped  
+   margin,    // Delay for the circuits--- set to 01010101 
+   rd_a,      // read address  
+   wr_a,      // Write address
+   di,        // Data input
+   si,        // scan in  
+   so,        // scan out  
+   listen_out, // Listening flop-- 
+   do          // Data out
+
+);
+
+   input rd_clk;
+   input wr_clk;
+   input csn_rd;
+   input csn_wr;
+   input hold;
+   input testmux_sel;
+   input scan_en;
+   input [4:0] margin;
+   input [3:0] rd_a;
+   input [3:0] wr_a;
+   input [80:0] di;
+   input si;
+   output so;
+   output [80:0] do;
+   output [80:0] listen_out;
+
+parameter  SYNC_CLOCK_CHK1 = 1;
+parameter  SYNC_CLOCK_CHK2 = 1;
+parameter  SYNC_CLOCK_CHK3 = 1;
+parameter  MARGIN_WARNING = 1; // margin warning is on by default
+
+
+
+
+// Start code
+reg [80:0] memarray[15:0] ;
+reg [80:0] array_out  ;
+
+reg [80:0] array_out_latch    ;
+
+
+
+reg  [3:0] rd_a_ff   ;
+wire [3:0] rd_a_ff_so;
+wire [3:0] rd_a_ff_si ;
+
+reg  [3:0] wr_a_ff   ;
+wire [3:0] wr_a_ff_so;
+wire [3:0] wr_a_ff_si ;
+
+reg  [80:0] di_ff   ;
+wire [80:0] di_ff_so;
+wire [80:0] di_ff_si;
+
+wire [80:0] listen_out_so;
+wire [80:0] listen_out_si ;
+reg  [80:0] listen_out     ;
+
+
+reg        csn_rd_ff ;
+wire       csn_rd_ff_si ;
+wire       csn_rd_ff_so ;
+
+reg        csn_wr_ff ;
+wire       csn_wr_ff_si ;
+wire       csn_wr_ff_so ;
+
+reg        di_ff_latch_so ;
+///////////////////////////////////////
+// Scan chain connections            //
+///////////////////////////////////////
+assign wr_a_ff_si[3:0] = {si      , wr_a_ff_so[3:1]} ;
+assign csn_wr_ff_si    = wr_a_ff_so[0] ;
+assign di_ff_si        = {csn_wr_ff_so, di_ff_so[80:1]};
+assign listen_out_si   = {listen_out_so[79:0], di_ff_latch_so} ;
+assign csn_rd_ff_si    = listen_out_so[80] ;
+assign rd_a_ff_si[3:0] = {rd_a_ff_so[2:0], csn_rd_ff_so} ;
+assign so              = rd_a_ff_so[3] ;
+///////////////////////////////////////
+// Instantiate a clock headers        //
+///////////////////////////////////////
+
+wire   rd_ssclk       = rd_clk ; // clk_en & rd_clk ;
+wire   rd_local_clk   = rd_ssclk | scan_en | hold ; 
+wire   rd_smclk       = rd_ssclk |  ~(scan_en | hold) ;
+
+wire   wr_ssclk       = wr_clk ; // clk_en & wr_clk ;
+wire   wr_local_clk   = wr_ssclk | scan_en | hold ; 
+wire   wr_smclk       = wr_ssclk |  ~(scan_en | hold) ;
+
+
+/////////////////////////////////////////////////////
+// csn_rd Flop                                     //
+/////////////////////////////////////////////////////
+
+reg                     csn_rd_ff_inst_mdata ;
+wire                    csn_rd_ff_inst_smin ;
+reg                     csn_rd_ff_scan_out ;
+
+assign csn_rd_ff_inst_smin  = hold ?  csn_rd_ff_scan_out :  csn_rd_ff_si ; 
+always @(rd_smclk or rd_local_clk or csn_rd or csn_rd_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          csn_rd_ff_inst_mdata = csn_rd ;
+       end
+       if (!rd_smclk) begin
+          csn_rd_ff_inst_mdata = csn_rd_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    csn_rd_ff_scan_out    <=  csn_rd_ff_inst_mdata ; 
+end
+always @(rd_local_clk or csn_rd_ff_inst_mdata) begin
+   if (rd_local_clk ) begin
+    csn_rd_ff    <=  csn_rd_ff_inst_mdata ; 
+   end
+end
+assign csn_rd_ff_so =  csn_rd_ff_scan_out;
+        
+/////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////
+// rd_a Flop                                       //
+/////////////////////////////////////////////////////
+reg     [3:0]   rd_a_ff_inst_mdata ;
+wire    [3:0]   rd_a_ff_inst_smin ;
+reg     [3:0]   rd_a_ff_scan_out ;
+
+assign rd_a_ff_inst_smin[3:0]  = hold ?  rd_a_ff_scan_out[3:0] :  rd_a_ff_si[3:0] ; 
+always @(rd_smclk or rd_local_clk or rd_a or rd_a_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          rd_a_ff_inst_mdata = rd_a[3:0] ;
+       end
+       if (!rd_smclk) begin
+          rd_a_ff_inst_mdata = rd_a_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    rd_a_ff_scan_out[3:0]   <=  rd_a_ff_inst_mdata ; 
+end
+always @(rd_local_clk or rd_a_ff_inst_mdata) begin
+   if (rd_local_clk) begin
+    rd_a_ff[3:0]   <=  rd_a_ff_inst_mdata ; 
+   end
+end
+assign rd_a_ff_so[3:0] = rd_a_ff_scan_out[3:0] ;
+/////////////////////////////////////////////////////
+        
+/////////////////////////////////////////////////////
+// csn_wr Flop                                     //
+/////////////////////////////////////////////////////
+reg                     csn_wr_ff_inst_mdata ;
+wire                    csn_wr_ff_inst_smin ;
+
+assign csn_wr_ff_inst_smin  = hold ?  csn_wr_ff :  csn_wr_ff_si ; 
+always @(wr_smclk or wr_local_clk or csn_wr or csn_wr_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          csn_wr_ff_inst_mdata = csn_wr ;
+       end
+       if (!wr_smclk) begin
+          csn_wr_ff_inst_mdata = csn_wr_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    csn_wr_ff    <=  csn_wr_ff_inst_mdata ; 
+end
+assign csn_wr_ff_so =  csn_wr_ff;
+/////////////////////////////////////////////////////
+        
+/////////////////////////////////////////////////////
+// wr_a Flop                                       //
+/////////////////////////////////////////////////////
+reg     [3:0]   wr_a_ff_inst_mdata ;
+wire    [3:0]   wr_a_ff_inst_smin ;
+
+assign wr_a_ff_inst_smin[3:0]  = hold ?  wr_a_ff[3:0] :  wr_a_ff_si[3:0] ; 
+always @(wr_smclk or wr_local_clk or wr_a or wr_a_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          wr_a_ff_inst_mdata = wr_a[3:0] ;
+       end
+       if (!wr_smclk) begin
+          wr_a_ff_inst_mdata = wr_a_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    wr_a_ff[3:0]   <=  wr_a_ff_inst_mdata ; 
+end
+assign wr_a_ff_so[3:0] = wr_a_ff[3:0] ;
+/////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////
+// di Flop                                         //
+/////////////////////////////////////////////////////
+reg     [80:0]  di_ff_inst_mdata ;
+wire    [80:0]  di_ff_inst_smin ;
+
+assign di_ff_inst_smin[80:0]  = hold ?  di_ff[80:0] :  di_ff_si[80:0] ; 
+always @(wr_smclk or wr_local_clk or di or di_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          di_ff_inst_mdata = di[80:0] ;
+       end
+       if (!wr_smclk) begin
+          di_ff_inst_mdata = di_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    di_ff[80:0]   <=  di_ff_inst_mdata ; 
+end
+assign di_ff_so[80:0] = di_ff[80:0] ;
+/////////////////////////////////////////////////////
+
+wire wr_enable_l = csn_wr_ff | scan_en ;
+wire rd_enable_l = csn_rd_ff | scan_en ;
+
+// wire wr_clk_qual = wr_ssclk & ~scan_en ; 
+always @(wr_ssclk or wr_a_ff or wr_enable_l or di_ff ) begin
+     if (!wr_ssclk) begin
+        if (!wr_enable_l) begin
+               memarray[wr_a_ff] <= di_ff[80:0] ; 
+        end
+     end 
+end
+        
+// wire  rd_clk_qual =  (rd_ssclk & ~scan_en) ; 
+always @(rd_ssclk or rd_a_ff or rd_enable_l) begin
+     if (rd_ssclk) begin
+        if (rd_enable_l == 1'b0) begin
+             array_out[80:0] <= memarray[rd_a_ff] ;
+        end else if (rd_enable_l == 1'b1) begin
+             array_out[80:0] <= 81'h1FFFFFFFFFFFFFFFFFFFF;
+        end else begin 
+             array_out[80:0] <= 81'hXXXXXXXXXXXXXXXXXXXXX;
+        end
+     end
+end
+
+
+// synopsys translate_off
+
+`ifdef  INNO_MUXEX
+`else
+  always @(csn_rd_ff or csn_wr_ff or rd_a_ff or wr_a_ff)   begin
+   if ((SYNC_CLOCK_CHK1 == 0) & !csn_rd_ff & !csn_wr_ff & (rd_a_ff == wr_a_ff)) begin
+      array_out   <= 81'hxxxxxxxxxxxxxxxxxxxxx;
+	`ifdef MODELSIM  
+      $display ("sram_conflict", "conflict between read: %h and write: %h pointers", rd_a_ff, wr_a_ff);
+	`else
+      $error ("sram_conflict", "conflict between read: %h and write: %h pointers", rd_a_ff, wr_a_ff);
+	`endif
+   end
+  end
+`endif
+
+///////////////////////////////////////////////////////////////
+// Purely ERROR checking code.                               //
+///////////////////////////////////////////////////////////////
+reg  [3:0] rd_a_ff_del ;
+reg        csn_rd_ff_del ; 
+reg        rd_clk_del ; 
+always @(rd_local_clk) begin
+     if (rd_local_clk)  rd_clk_del = #300 rd_local_clk;
+     else              rd_clk_del = #300 rd_local_clk;
+end
+always @(posedge rd_clk_del) begin
+       rd_a_ff_del <= rd_a_ff ;
+       csn_rd_ff_del <= csn_rd_ff ;
+end 
+
+`ifdef  INNO_MUXEX
+`else
+  always @(csn_rd_ff_del or csn_wr_ff or rd_a_ff_del or wr_a_ff or rd_clk_del or wr_ssclk)   begin
+   if (SYNC_CLOCK_CHK2 == 0) begin
+       if (rd_clk_del & !wr_ssclk & !csn_rd_ff_del & !csn_wr_ff & (rd_a_ff_del == wr_a_ff)) begin
+	`ifdef MODELSIM   
+	      $display ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff_del, wr_a_ff);
+	`else
+	      $error ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff_del, wr_a_ff);
+	`endif
+       end 
+   end
+  end
+`endif
+
+reg  [3:0] wr_a_ff_del ;
+reg        csn_wr_ff_del ; 
+reg        wr_clk_del ; 
+always @(wr_ssclk) begin
+     if (wr_ssclk)  wr_clk_del = #300 wr_ssclk;
+     else              wr_clk_del = #300 wr_ssclk;
+end
+always @(posedge wr_clk_del) begin
+       wr_a_ff_del <= wr_a_ff ;
+       csn_wr_ff_del <= csn_wr_ff ;
+end 
+
+`ifdef  INNO_MUXEX
+`else
+  always @(csn_rd_ff or csn_wr_ff_del or rd_a_ff or wr_a_ff_del or rd_local_clk or wr_clk_del)   begin
+   if (SYNC_CLOCK_CHK3 == 0) begin
+       if (rd_local_clk & !wr_clk_del & !csn_rd_ff & !csn_wr_ff_del & (rd_a_ff == wr_a_ff_del)) begin
+      $display ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff, wr_a_ff_del);
+       end
+   end
+  end
+`endif
+
+///////////////////////////////////////////////////////////////
+// end the ERROR checking code.                              // 
+///////////////////////////////////////////////////////////////
+///////////////////////////////////////
+
+
+// synopsys translate_on
+
+///////////////////////////////////
+// Transparent latch with reset
+///////////////////////////////////
+
+always @(array_out or rd_ssclk) begin
+     if (rd_ssclk) begin
+        array_out_latch <= array_out ;
+     end
+end
+
+always @(di_ff_so[0] or wr_ssclk) begin
+     if (!wr_ssclk) begin
+        di_ff_latch_so <= di_ff_so[0] ;
+     end
+end
+
+
+assign do  = testmux_sel ? di_ff : array_out_latch ;
+
+/////////////////////////////////////////////////////
+// listen_out Flop                                 //
+/////////////////////////////////////////////////////
+reg     [80:0]  listen_out_ff_inst_mdata ;
+wire    [80:0]  listen_out_ff_inst_smin ;
+
+assign listen_out_ff_inst_smin[80:0]  = hold ?  do[80:0] :  listen_out_si[80:0] ; 
+always @(rd_smclk or rd_local_clk or do or listen_out_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          listen_out_ff_inst_mdata = do[80:0] ;
+       end
+       if (!rd_smclk) begin
+          listen_out_ff_inst_mdata = listen_out_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    listen_out[80:0]   <=  listen_out_ff_inst_mdata ; 
+end
+assign listen_out_so[80:0] = listen_out[80:0] ;
+
+// synopsys translate_off 
+
+`ifdef  INNO_MUXEX
+`else
+   always @(posedge rd_clk) begin
+     if ((MARGIN_WARNING == 0) & margin != 5'b10101) begin
+	`ifdef MODELSIM 
+          $display ("sram_margin", "margin is not set to the default value") ;
+	`else
+          $error ("sram_margin", "margin is not set to the default value") ;
+	`endif
+     end
+   end
+`endif
+
+// synopsys translate_on 
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_irf_fpga1.v
===================================================================
--- /trunk/T1-common/srams/bw_r_irf_fpga1.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_irf_fpga1.v	(revision 6)
@@ -0,0 +1,376 @@
+module bw_r_irf_fpga1 (
+   input [ 11:0]  current_cwp,
+   input         rclk,
+   input         reset_l,
+
+   input         si,
+   input         se,
+   input         sehold,
+   input         rst_tri_en,
+
+   input  [ 1:0] ifu_exu_tid_s2,  // s stage thread
+   input  [ 4:0] ifu_exu_rs1_s,  // source addresses
+   input  [ 4:0] ifu_exu_rs2_s,
+   input  [ 4:0] ifu_exu_rs3_s,
+   input         ifu_exu_ren1_s,        // read enables for all 3 ports
+   input         ifu_exu_ren2_s,
+   input         ifu_exu_ren3_s,
+   input         ecl_irf_wen_w,        // write enables for both write ports
+   input         ecl_irf_wen_w2,
+   input  [ 4:0] ecl_irf_rd_m,   // w destination
+   input  [ 4:0] ecl_irf_rd_g,  // w2 destination
+   input  [71:0] byp_irf_rd_data_w,// write data from w1
+   input  [71:0] byp_irf_rd_data_w2,     // write data from w2
+   input  [ 1:0] ecl_irf_tid_m,  // w stage thread
+   input  [ 1:0] ecl_irf_tid_g, // w2 thread
+
+   input  [ 2:0] rml_irf_old_lo_cwp_e,  // current window pointer for locals and odds
+   input  [ 2:0] rml_irf_new_lo_cwp_e,  // target window pointer for locals and odds
+   input  [ 2:1] rml_irf_old_e_cwp_e,  // current window pointer for evens
+   input  [ 2:1] rml_irf_new_e_cwp_e,  // target window pointer for evens
+   input         rml_irf_swap_even_e,
+   input         rml_irf_swap_odd_e,
+   input         rml_irf_swap_local_e,
+   input         rml_irf_kill_restore_w,
+   input  [ 1:0] rml_irf_cwpswap_tid_e,
+
+   input  [ 1:0] rml_irf_old_agp, // alternate global pointer
+   input  [ 1:0] rml_irf_new_agp, // alternate global pointer
+   input         rml_irf_swap_global,
+   input  [ 1:0] rml_irf_global_tid,
+   
+   output        so,
+   output reg [71:0] irf_byp_rs1_data_d_l,
+   output reg [71:0] irf_byp_rs2_data_d_l,
+   output reg [71:0] irf_byp_rs3_data_d_l,
+   output reg [31:0] irf_byp_rs3h_data_d_l
+);
+
+wire [71:0] dout0_0;
+wire [71:0] dout0_1;
+wire [71:0] dout0_2;
+wire [71:0] dout0_3;
+wire [71:0] dout1_0;
+wire [71:0] dout1_1;
+wire [71:0] dout1_2;
+wire [71:0] dout1_3;
+wire [71:0] dout2_0;
+wire [71:0] dout2_1;
+wire [71:0] dout2_2;
+wire [71:0] dout2_3;
+wire [71:0] dout3_0;
+wire [71:0] dout3_1;
+wire [71:0] dout3_2;
+wire [71:0] dout3_3;
+
+reg [1:0] ecl_irf_tid_m_d;  
+reg [1:0] ecl_irf_tid_g_d;  
+reg [4:0] ecl_irf_rd_m_d;
+reg [4:0] ecl_irf_rd_g_d;
+
+wire wen0_0=(ecl_irf_tid_m_d==2'b00) && ecl_irf_wen_w && (ecl_irf_rd_m_d!=0) & ~rst_tri_en;
+wire wen0_1=(ecl_irf_tid_g_d==2'b00) && ecl_irf_wen_w2 && (ecl_irf_rd_g_d!=0) & ~rst_tri_en;
+wire wen1_0=(ecl_irf_tid_m_d==2'b01) && ecl_irf_wen_w && (ecl_irf_rd_m_d!=0) & ~rst_tri_en;
+wire wen1_1=(ecl_irf_tid_g_d==2'b01) && ecl_irf_wen_w2 && (ecl_irf_rd_g_d!=0) & ~rst_tri_en;
+wire wen2_0=(ecl_irf_tid_m_d==2'b10) && ecl_irf_wen_w && (ecl_irf_rd_m_d!=0) & ~rst_tri_en;
+wire wen2_1=(ecl_irf_tid_g_d==2'b10) && ecl_irf_wen_w2 && (ecl_irf_rd_g_d!=0) & ~rst_tri_en;
+wire wen3_0=(ecl_irf_tid_m_d==2'b11) && ecl_irf_wen_w && (ecl_irf_rd_m_d!=0) & ~rst_tri_en;
+wire wen3_1=(ecl_irf_tid_g_d==2'b11) && ecl_irf_wen_w2 && (ecl_irf_rd_g_d!=0) & ~rst_tri_en;
+
+
+reg [2:0] wr0_window; 
+reg [2:0] wr1_window; 
+reg [2:0] rd0_window; 
+reg [2:0] rd1_window; 
+reg [2:0] rd2_window;  
+
+reg [2:0] current_global[3:0];
+reg [2:0] current_window[3:0];
+reg [2:0] current_read[3:0];
+reg [2:0] current_write[3:0];
+reg [2:0] current_write_d[3:0];
+
+reg [1:0] cwpswap_tid_d;
+reg [2:0] new_lo_cwp_d;
+reg [2:0] old_lo_cwp_d;
+reg       swap_local_d;
+
+reg [1:0] cwpswap_tid_d1;
+reg [2:0] new_lo_cwp_d1;
+reg [2:0] old_lo_cwp_d1;
+reg       swap_local_d1;
+
+reg [1:0] cwpswap_tid_d2;
+reg [2:0] new_lo_cwp_d2;
+reg [2:0] old_lo_cwp_d2;
+reg       swap_local_d2;
+
+reg [1:0] ifu_exu_tid_s2_d;
+
+integer i;
+
+always @(posedge rclk or negedge reset_l)
+   if(~reset_l)
+      begin
+         current_global[0]<=3'd3;
+         current_global[1]<=3'd3;
+         current_global[2]<=3'd3;
+         current_global[3]<=3'd3;
+         current_window[0]<=0;
+         current_window[1]<=0;
+         current_window[2]<=0;
+         current_window[3]<=0;
+         current_write[0]<=0;
+         current_write[1]<=0;
+         current_write[2]<=0;
+         current_write[3]<=0;
+         current_read[0]<=0;
+         current_read[1]<=0;
+         current_read[2]<=0;
+         current_read[3]<=0;
+         swap_local_d<=0;
+         swap_local_d1<=0;
+      end
+   else
+      begin
+         // !!! Maybe we should flop that on negedge also
+         if(ifu_exu_ren1_s || ifu_exu_ren2_s || ifu_exu_ren3_s)
+            ifu_exu_tid_s2_d<=ifu_exu_tid_s2;
+         
+         ecl_irf_tid_m_d<=ecl_irf_tid_m;  
+         ecl_irf_tid_g_d<=ecl_irf_tid_g;  
+         ecl_irf_rd_m_d<=ecl_irf_rd_m;
+         ecl_irf_rd_g_d<=ecl_irf_rd_g;
+         
+         swap_local_d<=rml_irf_swap_local_e & ~rst_tri_en;
+         cwpswap_tid_d<=rml_irf_cwpswap_tid_e;
+         new_lo_cwp_d<=rml_irf_new_lo_cwp_e;
+         old_lo_cwp_d<=rml_irf_old_lo_cwp_e;
+         
+         swap_local_d1<=swap_local_d;
+         cwpswap_tid_d1<=cwpswap_tid_d;
+         new_lo_cwp_d1<=new_lo_cwp_d;
+         old_lo_cwp_d1<=old_lo_cwp_d;
+         
+         swap_local_d2<=swap_local_d1;
+         cwpswap_tid_d2<=cwpswap_tid_d1;
+         new_lo_cwp_d2<=new_lo_cwp_d1;
+         old_lo_cwp_d2<=old_lo_cwp_d1;
+
+         if(rml_irf_swap_global & ~rst_tri_en)
+            current_global[rml_irf_global_tid]<={1'b0,rml_irf_new_agp};
+         
+         /*if(swap_local_d)
+            begin
+               current_write[cwpswap_tid_d]<=new_lo_cwp_d;
+               current_read[cwpswap_tid_d]<=new_lo_cwp_d;
+            end
+         else
+            if(swap_local_d2 && (new_lo_cwp_d2[0]!=exu_ifu_oddwin_s[cwpswap_tid_d2]))
+               begin
+                  current_write[cwpswap_tid_d2]<=old_lo_cwp_d2;
+                  current_read[cwpswap_tid_d2]<=old_lo_cwp_d2;
+               end*/
+               
+         
+         /*   
+         if(rml_irf_swap_local_e)
+           current_write[rml_irf_cwpswap_tid_e]<=rml_irf_old_lo_cwp_e;
+         else
+            if(swap_local_d)
+               current_write[cwpswap_tid_d]<=new_lo_cwp_d;
+            
+         for(i=0;i<4;i=i+1)
+            current_write_d[i]<=current_write[i];
+            
+         if(rml_irf_swap_local_e)
+            current_read[cwpswap_tid_d1]<=rml_irf_old_lo_cwp_e;
+         else
+            if(swap_local_d1)
+               current_read[cwpswap_tid_d1]<=new_lo_cwp_d1;
+         */
+      end  
+
+/*
+always @( * )
+   begin
+      wr0_window<=ecl_irf_rd_m_d[4:3]==2'b0 ? current_global[ecl_irf_tid_m_d]:(rml_irf_swap_local_e  && (ecl_irf_tid_m_d==rml_irf_cwpswap_tid_e) ? rml_irf_old_lo_cwp_e:current_write[ecl_irf_tid_m_d]);
+      wr1_window<=ecl_irf_rd_g_d[4:3]==2'b0 ? current_global[ecl_irf_tid_g_d]:(rml_irf_swap_local_e  && (ecl_irf_tid_g_d==rml_irf_cwpswap_tid_e) ? rml_irf_old_lo_cwp_e:current_write[ecl_irf_tid_g_d]);
+      rd0_window<=ifu_exu_rs1_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:(rml_irf_swap_local_e  && (ifu_exu_tid_s2==rml_irf_cwpswap_tid_e) ? rml_irf_old_lo_cwp_e:current_read[ifu_exu_tid_s2]);
+      rd1_window<=ifu_exu_rs2_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:(rml_irf_swap_local_e  && (ifu_exu_tid_s2==rml_irf_cwpswap_tid_e) ? rml_irf_old_lo_cwp_e:current_read[ifu_exu_tid_s2]);
+      rd2_window<=ifu_exu_rs3_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:(rml_irf_swap_local_e  && (ifu_exu_tid_s2==rml_irf_cwpswap_tid_e) ? rml_irf_old_lo_cwp_e:current_read[ifu_exu_tid_s2]);
+   end
+*/
+
+reg [2:0] wr0_cwp;
+reg [2:0] wr1_cwp;
+reg [2:0] rd_cwp;
+
+always @( * )
+   case(ecl_irf_tid_m_d)
+      2'b00:wr0_cwp<=current_cwp[2:0];
+      2'b01:wr0_cwp<=current_cwp[5:3];
+      2'b10:wr0_cwp<=current_cwp[8:6];
+      2'b11:wr0_cwp<=current_cwp[11:9];
+   endcase
+      
+always @( * )
+   case(ecl_irf_tid_g_d)
+      2'b00:wr1_cwp<=current_cwp[2:0];
+      2'b01:wr1_cwp<=current_cwp[5:3];
+      2'b10:wr1_cwp<=current_cwp[8:6];
+      2'b11:wr1_cwp<=current_cwp[11:9];
+   endcase
+      
+always @( * )
+   case(ifu_exu_tid_s2)
+      2'b00:rd_cwp<=current_cwp[2:0];
+      2'b01:rd_cwp<=current_cwp[5:3];
+      2'b10:rd_cwp<=current_cwp[8:6];
+      2'b11:rd_cwp<=current_cwp[11:9];
+   endcase
+      
+always @( * )
+   begin
+      wr0_window<=ecl_irf_rd_m_d[4:3]==2'b0 ? current_global[ecl_irf_tid_m_d]:wr0_cwp;
+      wr1_window<=ecl_irf_rd_g_d[4:3]==2'b0 ? current_global[ecl_irf_tid_g_d]:wr1_cwp;
+      rd0_window<=ifu_exu_rs1_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:rd_cwp;
+      rd1_window<=ifu_exu_rs2_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:rd_cwp;
+      rd2_window<=ifu_exu_rs3_s[4:3]==2'b0 ? current_global[ifu_exu_tid_s2]:rd_cwp;
+   end
+
+wire [4:0] wraddr0_swapoe=(!wr0_window[0] && ecl_irf_rd_m_d[3]) ? {~ecl_irf_rd_m_d[4],ecl_irf_rd_m_d[3:0]}:ecl_irf_rd_m_d;
+wire [4:0] wraddr1_swapoe=(!wr1_window[0] && ecl_irf_rd_g_d[3]) ? {~ecl_irf_rd_g_d[4],ecl_irf_rd_g_d[3:0]}:ecl_irf_rd_g_d;
+wire [4:0] rdaddr0_swapoe=(!rd0_window[0] && ifu_exu_rs1_s[3]) ? {~ifu_exu_rs1_s[4],ifu_exu_rs1_s[3:0]}:ifu_exu_rs1_s;
+wire [4:0] rdaddr1_swapoe=(!rd1_window[0] && ifu_exu_rs2_s[3]) ? {~ifu_exu_rs2_s[4],ifu_exu_rs2_s[3:0]}:ifu_exu_rs2_s;
+wire [4:0] rdaddr2_swapoe=(!rd2_window[0] && ifu_exu_rs3_s[3]) ? {~ifu_exu_rs3_s[4],ifu_exu_rs3_s[3:0]}:ifu_exu_rs3_s;
+
+wire [6:0] wraddr0_wa={2'b0,wraddr0_swapoe}+{wr0_window,4'b0};
+wire [6:0] wraddr1_wa={2'b0,wraddr1_swapoe}+{wr1_window,4'b0};
+wire [6:0] rdaddr0_wa={2'b0,rdaddr0_swapoe}+{rd0_window,4'b0};
+wire [6:0] rdaddr1_wa={2'b0,rdaddr1_swapoe}+{rd1_window,4'b0};
+wire [6:0] rdaddr2_wa={2'b0,rdaddr2_swapoe}+{rd2_window,4'b0};
+
+wire [7:0] wraddr0={1'b0,wraddr0_wa}+(ecl_irf_rd_m_d[4:3]!=2'b0 ? 8'd64:8'd0);
+wire [7:0] wraddr1={1'b0,wraddr1_wa}+(ecl_irf_rd_g_d[4:3]!=2'b0 ? 8'd64:8'd0);
+wire [7:0] rdaddr0={1'b0,rdaddr0_wa}+(ifu_exu_rs1_s[4:3]!=2'b0 ? 8'd64:8'd0);
+wire [7:0] rdaddr1={1'b0,rdaddr1_wa}+(ifu_exu_rs2_s[4:3]!=2'b0 ? 8'd64:8'd0);
+wire [7:0] rdaddr2={1'b0,rdaddr2_wa}+(ifu_exu_rs3_s[4:3]!=2'b0 ? 8'd64:8'd0);
+
+regfile_1w_4r regfile_thr0(
+   .clk(rclk),
+   
+   .din(wen0_1 ? byp_irf_rd_data_w2:byp_irf_rd_data_w),
+   .wraddr(wen0_1 ? wraddr1:wraddr0),
+   .wren(wen0_0 || wen0_1),
+   .rdaddr0(rdaddr0),
+   .rdaddr1(rdaddr1),
+   .rdaddr2(rdaddr2),
+   .rdaddr3({rdaddr2[7:1],1'b1}),
+   .rd0(ifu_exu_ren1_s && (ifu_exu_tid_s2==2'b00)),
+   .rd1(ifu_exu_ren2_s && (ifu_exu_tid_s2==2'b00)),
+   .rd2(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b00)),
+   .rd3(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b00)),
+
+   .dout0(dout0_0),
+   .dout1(dout0_1),
+   .dout2(dout0_2),
+   .dout3(dout0_3)
+);
+
+regfile_1w_4r regfile_thr1(
+   .clk(rclk),
+   
+   .din(wen1_1 ? byp_irf_rd_data_w2:byp_irf_rd_data_w),
+   .wraddr(wen1_1 ? wraddr1:wraddr0),
+   .wren(wen1_0 || wen1_1),
+   .rdaddr0(rdaddr0),
+   .rdaddr1(rdaddr1),
+   .rdaddr2(rdaddr2),
+   .rdaddr3({rdaddr2[7:1],1'b1}),
+   .rd0(ifu_exu_ren1_s && (ifu_exu_tid_s2==2'b01)),
+   .rd1(ifu_exu_ren2_s && (ifu_exu_tid_s2==2'b01)),
+   .rd2(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b01)),
+   .rd3(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b01)),
+
+   .dout0(dout1_0),
+   .dout1(dout1_1),
+   .dout2(dout1_2),
+   .dout3(dout1_3)
+);
+
+regfile_1w_4r regfile_thr2(
+   .clk(rclk),
+   
+   .din(wen2_1 ? byp_irf_rd_data_w2:byp_irf_rd_data_w),
+   .wraddr(wen2_1 ? wraddr1:wraddr0),
+   .wren(wen2_0 || wen2_1),
+   .rdaddr0(rdaddr0),
+   .rdaddr1(rdaddr1),
+   .rdaddr2(rdaddr2),
+   .rdaddr3({rdaddr2[7:1],1'b1}),
+   .rd0(ifu_exu_ren1_s && (ifu_exu_tid_s2==2'b10)),
+   .rd1(ifu_exu_ren2_s && (ifu_exu_tid_s2==2'b10)),
+   .rd2(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b10)),
+   .rd3(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b10)),
+
+   .dout0(dout2_0),
+   .dout1(dout2_1),
+   .dout2(dout2_2),
+   .dout3(dout2_3)
+);
+
+regfile_1w_4r regfile_thr3(
+   .clk(rclk),
+   
+   .din(wen3_1 ? byp_irf_rd_data_w2:byp_irf_rd_data_w),
+   .wraddr(wen3_1 ? wraddr1:wraddr0),
+   .wren(wen3_0 || wen3_1),
+   .rdaddr0(rdaddr0),
+   .rdaddr1(rdaddr1),
+   .rdaddr2(rdaddr2),
+   .rdaddr3({rdaddr2[7:1],1'b1}),
+   .rd0(ifu_exu_ren1_s && (ifu_exu_tid_s2==2'b11)),
+   .rd1(ifu_exu_ren2_s && (ifu_exu_tid_s2==2'b11)),
+   .rd2(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b11)),
+   .rd3(ifu_exu_ren3_s && (ifu_exu_tid_s2==2'b11)),
+
+   .dout0(dout3_0),
+   .dout1(dout3_1),
+   .dout2(dout3_2),
+   .dout3(dout3_3)
+);
+
+always @( * )
+   case(ifu_exu_tid_s2_d)
+      2'b00:
+         begin
+            irf_byp_rs1_data_d_l<=~dout0_0;
+            irf_byp_rs2_data_d_l<=~dout0_1;
+            irf_byp_rs3_data_d_l<=~dout0_2;
+            irf_byp_rs3h_data_d_l<=~dout0_3[31:0];
+         end
+      2'b01:
+         begin
+            irf_byp_rs1_data_d_l<=~dout1_0;
+            irf_byp_rs2_data_d_l<=~dout1_1;
+            irf_byp_rs3_data_d_l<=~dout1_2;
+            irf_byp_rs3h_data_d_l<=~dout1_3[31:0];
+         end
+      2'b10:
+         begin
+            irf_byp_rs1_data_d_l<=~dout2_0;
+            irf_byp_rs2_data_d_l<=~dout2_1;
+            irf_byp_rs3_data_d_l<=~dout2_2;
+            irf_byp_rs3h_data_d_l<=~dout2_3[31:0];
+         end
+      2'b11:
+         begin
+            irf_byp_rs1_data_d_l<=~dout3_0;
+            irf_byp_rs2_data_d_l<=~dout3_1;
+            irf_byp_rs3_data_d_l<=~dout3_2;
+            irf_byp_rs3h_data_d_l<=~dout3_3[31:0];
+         end
+   endcase
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_rf32x152b.v
===================================================================
--- /trunk/T1-common/srams/bw_r_rf32x152b.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_rf32x152b.v	(revision 6)
@@ -0,0 +1,288 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_rf32x152b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//      Description:    DCache Fill Queue of Load Store Unit.
+//                      - Contains invalidates and loads.
+//                      - loads will bypass and/or fill dcache.
+//                      - Entry at head of queue may have to
+//                      be held for multiple passes.
+//
+*/
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_32x152
+`endif
+
+`ifdef FPGA_SYN_32x152
+
+
+module bw_r_rf32x152b(dout, so, rd_en, rd_adr, wr_en, wr_adr, din, si, se, 
+	sehold, rclk, rst_tri_en, reset_l);
+
+	parameter		NUMENTRIES	= 32;
+
+	input	[4:0]		rd_adr;
+	input			rd_en;
+	input			wr_en;
+	input	[4:0]		wr_adr;
+	input	[151:0]		din;
+	input			rclk;
+	input			reset_l;
+	input			rst_tri_en;
+	input			sehold;
+	input			si;
+	input			se;
+	output	[151:0]		dout;
+	reg	[151:0]		dout;
+	output			so;
+
+	wire			clk;
+	wire			wr_vld;
+
+	reg	[151:0]		dfq_mem[(NUMENTRIES - 1):0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+
+	assign clk = rclk;
+	assign wr_vld = ((wr_en & (~rst_tri_en)) & reset_l);
+
+	always @(posedge clk) begin
+	  if (wr_vld) begin
+	    dfq_mem[wr_adr] = din;
+	  end
+	end
+	always @(posedge clk) begin
+	  if (rd_en) begin
+	    dout[151:0] <= dfq_mem[rd_adr[4:0]];
+	  end
+	end
+endmodule
+
+
+`else
+
+module bw_r_rf32x152b (/*AUTOARG*/
+   // Outputs
+   dout, so, 
+   // Inputs
+   rd_en, rd_adr, wr_en, wr_adr, din, 
+   si, se, sehold, rclk, rst_tri_en, reset_l);  
+
+parameter NUMENTRIES = 32 ;     // number of entries in dfq 
+
+input [4:0]   rd_adr;     // read adr. 
+input         rd_en;      // read pointer
+input         wr_en;      // write pointer vld
+input [4:0]   wr_adr;     // write adr.
+input [151:0] din;            // wr data 
+input             rclk;       // clock
+input         reset_l;    // active low reset
+input         rst_tri_en; // reset and scan  
+input         sehold;     // scan hold 
+input             si;             // scan in 
+input             se;             // scan enable 
+
+output  [151:0] dout ; // data read out
+output                  so ;   // scan out  
+
+wire [151:0]    dout;
+wire clk; 
+wire wr_vld; 
+
+reg     [151:0]         dfq_mem [NUMENTRIES-1:0];
+
+reg [151:0]     local_dout;
+// reg                  so; 
+
+integer i,j;
+
+//
+// added for atpg support
+wire [4:0]   sehold_rd_adr;        // output of sehold mux - read adr. 
+wire         sehold_rd_en;         // output of sehold mux - read pointer
+wire         sehold_wr_en;         // output of sehold mux - write pointer vld
+wire [4:0]   sehold_wr_adr;        // output of sehold mux - write adr.
+wire [151:0]  sehold_din;          // wr data 
+
+wire [4:0]   rd_adr_d1;    // flopped read adr. 
+wire         rd_en_d1;     // flopped read pointer
+wire         wr_en_d1;     // flopped write pointer vld
+wire [4:0]   wr_adr_d1;    // flopped write adr.
+wire [151:0]  din_d1;      // flopped wr data 
+
+//
+// creating local clock
+assign clk=rclk;
+// 
+//=========================================================================================
+//      support for atpg pattern generation
+//=========================================================================================
+//
+// read controls
+dp_mux2es #(6) mux_sehold_rd_ctrl (
+    .in0  ({rd_adr[4:0], rd_en}),
+    .in1  ({rd_adr_d1[4:0], rd_en_d1}),
+    .sel  (sehold),
+    .dout ({sehold_rd_adr[4:0],sehold_rd_en})
+);
+
+dff_s #(6) dff_rd_ctrl_d1(
+    .din ({sehold_rd_adr[4:0], sehold_rd_en}),
+    .q   ({rd_adr_d1[4:0], rd_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write controls
+dp_mux2es #(6) mux_sehold_wr_ctrl (
+        .in0    ({wr_adr[4:0], wr_en}),
+        .in1    ({wr_adr_d1[4:0], wr_en_d1}),
+        .sel    (sehold),
+        .dout   ({sehold_wr_adr[4:0],sehold_wr_en})
+);
+
+dff_s #(6) dff_wr_ctrl_d1(
+    .din ({sehold_wr_adr[4:0], sehold_wr_en}),
+    .q   ({wr_adr_d1[4:0], wr_en_d1}),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+//
+// write data
+dp_mux2es #(152) mux_sehold_din (
+        .in0    (din[151:0]),
+        .in1    (din_d1[151:0]),
+        .sel    (sehold),
+        .dout   (sehold_din[151:0])
+);
+
+dff_s #(152) dff_din_d1(
+    .din (sehold_din[151:0]),
+    .q   (din_d1[151:0]),
+    .clk (clk), 
+    .se  (se),
+    .si  (),
+    .so  ()
+);
+
+//
+// diable write to register file during reset or scan
+assign wr_vld = sehold_wr_en & ~rst_tri_en & reset_l; 
+
+//    always @ (posedge clk)
+//      begin
+//         so <= 1'bx;
+//      end
+
+//=========================================================================================
+//      generate wordlines
+//=========================================================================================
+
+// Word-Line Generation skipped. Implicit in read and write.
+
+//=========================================================================================
+//      write or read to/from memory
+//=========================================================================================
+
+
+always @ ( posedge clk ) 
+        begin
+                if (wr_vld)
+                 dfq_mem[sehold_wr_adr] = sehold_din[151:0] ;
+		end
+
+always @ ( posedge clk ) 
+        begin
+                   if (sehold_rd_en)
+                      begin 
+                      if (wr_vld & (sehold_wr_adr[4:0] == sehold_rd_adr[4:0]) )
+                         local_dout[151:0] <= 152'hx;
+                      else    
+                      for (j=0;j<NUMENTRIES;j=j+1)
+                      begin
+                      if (sehold_rd_adr[4:0] == j)
+                         local_dout[151:0] <= dfq_mem[j] ;
+                      end
+                    end     
+        end
+
+always @ ( ~reset_l ) 
+	begin
+           	local_dout[151:0] <=  
+		152'hxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx ;
+	end
+
+
+assign dout[151:0] = local_dout[151:0];
+
+// Error Checking : Termination Conditions
+
+always @ (posedge clk)
+        begin
+                if ((rd_en == 1'bx) | // wr is undefined, thus terminate
+                        (sehold_rd_en & (sehold_rd_adr[4:0] == 5'hxx)) & reset_l) // check outside reset.
+                        if (sehold_rd_adr[4:0] == 5'hxx)
+                        begin
+`ifdef INNO_MUXEX
+`else
+`ifdef DEFINE_0IN
+                                // 0in <fire -message "rf32x152b_error, read pointer error (X)"
+                                $display("rf32x152b_error"," read pointer error (X) %h ", rd_adr[4:0]);
+`else
+						`ifdef MODELSIM
+                                $display("rf32x152b_error"," read pointer error (X) %h ", rd_adr[4:0]);
+						`else
+                                $error("rf32x152b_error"," read pointer error (X) %h ", rd_adr[4:0]);
+						`endif		
+`endif
+`endif
+                        end
+                if ((wr_vld == 1'bx) | // wr is undefined, thus terminate
+                        (wr_vld & (sehold_wr_adr[4:0] == 5'hxx)) & reset_l) // check outside reset.
+                        begin
+`ifdef INNO_MUXEX
+`else
+`ifdef DEFINE_0IN
+                                // 0in <fire -message "rf32x152b_error, write error (X)"
+                                $display("rf32x152b_error"," write error (X) %h ", wr_adr[4:0]);
+`else
+						`ifdef MODELSIM
+	                            $display("rf32x152b_error"," write error (X) %h ", wr_adr[4:0]);
+						`else
+                                $error("rf32x152b_error"," write error (X) %h ", wr_adr[4:0]);
+						`endif		
+`endif
+`endif
+                        end
+        end
+
+endmodule
+
+`endif
+
Index: /trunk/T1-common/srams/bw_r_icd.v
===================================================================
--- /trunk/T1-common/srams/bw_r_icd.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_icd.v	(revision 6)
@@ -0,0 +1,944 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_icd.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+ //  Module Name:  bw_r_icd
+ //  Description:	
+ //    The ICD contains the icache data.  
+ //    32B line size.  
+ //    Write BW: 16B
+ //    Read BW: 16Bx2 (fetdata and topdata), collapsed to 4Bx2
+ //    Associativity: 4
+ //    Write boundary: 34b (32b inst + parity + predec bit)
+ //    NOTES: 
+ //    1. No clock enable.  Rd/Wr enable is used to trigger the
+ //    operation.
+ //    2. 2:1 mux on address input.  Selects provided externally.
+ //    3. 3:1 mux on data input.   Selects provided and guaranteed
+ //    exclusive, externally.
+ //    
+ */
+
+
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+//`include "sys.h" // system level definition file which contains the 
+// time scale definition
+
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+`include "ifu.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_ICD
+`endif
+
+`ifdef FPGA_SYN_ICD
+
+module bw_r_icd(icd_wsel_fetdata_s1, icd_wsel_topdata_s1, icd_fuse_repair_value,
+	icd_fuse_repair_en, so, rclk, se, si, reset_l, sehold, fdp_icd_index_bf,
+	ifq_icd_index_bf, fcl_icd_index_sel_ifq_bf, ifq_icd_wrway_bf, 
+	ifq_icd_worden_bf, ifq_icd_wrdata_i2, fcl_icd_rdreq_bf, 
+	fcl_icd_wrreq_bf, bist_ic_data, rst_tri_en, ifq_icd_data_sel_old_i2, 
+	ifq_icd_data_sel_fill_i2, ifq_icd_data_sel_bist_i2, fuse_icd_wren, 
+	fuse_icd_rid, fuse_icd_repair_value, fuse_icd_repair_en, 
+	efc_spc_fuse_clk1);
+
+	input			rclk;
+	input			se;
+	input			si;
+	input			reset_l;
+	input			sehold;
+	input	[11:2]		fdp_icd_index_bf;
+	input	[11:2]		ifq_icd_index_bf;
+	input			fcl_icd_index_sel_ifq_bf;
+	input	[1:0]		ifq_icd_wrway_bf;
+	input	[3:0]		ifq_icd_worden_bf;
+	input	[135:0]		ifq_icd_wrdata_i2;
+	input			fcl_icd_rdreq_bf;
+	input			fcl_icd_wrreq_bf;
+	input	[7:0]		bist_ic_data;
+	input			rst_tri_en;
+	input			ifq_icd_data_sel_old_i2;
+	input			ifq_icd_data_sel_fill_i2;
+	input			ifq_icd_data_sel_bist_i2;
+	input			fuse_icd_wren;
+	input	[3:0]		fuse_icd_rid;
+	input	[7:0]		fuse_icd_repair_value;
+	input	[1:0]		fuse_icd_repair_en;
+	input			efc_spc_fuse_clk1;
+	output	[135:0]		icd_wsel_fetdata_s1;
+	output	[135:0]		icd_wsel_topdata_s1;
+	output	[7:0]		icd_fuse_repair_value;
+	output	[1:0]		icd_fuse_repair_en;
+	output			so;
+
+	reg	[7:0]		icd_fuse_repair_value;
+	reg	[1:0]		icd_fuse_repair_en;
+	reg	[135:0]		fetdata_f;
+	reg	[135:0]		topdata_f;
+	reg	[135:0]		fetdata_sa;
+	reg	[135:0]		topdata_sa;
+	reg	[135:0]		fetdata_s1;
+	reg	[135:0]		topdata_s1;
+	wire			clk;
+	wire	[135:0]		next_wrdata_bf;
+	wire	[135:0]		wrdata_f;
+	wire	[135:0]		bist_data_expand;
+    `ifdef FPGA_SYN_ALTERA
+        reg	[11:2]		index_bf;
+    `else
+        wire [11:2]     index_bf;
+    `endif
+	reg	[11:2]		index_f;
+	reg	[11:0]		wr_index0;
+	reg	[11:0]		wr_index1;
+	reg	[11:0]		wr_index2;
+	reg	[11:0]		wr_index3;
+	reg			rdreq_f;
+	reg			wrreq_f;
+	reg	[3:0]		worden_f;
+	reg	[1:0]		wrway_f;
+    `ifdef FPGA_SYN_ALTERA
+
+        reg [33:0]     icdata_ary_00_00  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_00_01  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_00_10  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_00_11  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_01_00  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_01_01  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_01_10  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_01_11  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_10_00  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_10_01  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_10_10  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_10_11  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_11_00  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_11_01  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_11_10  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+        reg [33:0]     icdata_ary_11_11  [255:0] /* synthesis syn_ramstyle = block_ram  */ ;/* syn_ramstyle = no_rw_check */ 
+    `else
+        reg [33:0]     icdata_ary_00_00  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_00_01  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_00_10  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_00_11  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_01_00  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_01_01  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_01_10  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_01_11  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_10_00  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_10_01  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_10_10  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_10_11  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_11_00  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_11_01  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_11_10  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+        reg [33:0]     icdata_ary_11_11  [255:0] /* synthesis syn_ramstyle = block_ram  syn_ramstyle = no_rw_check */ ;
+    `endif
+
+
+
+
+
+	assign clk = rclk;
+    `ifdef FPGA_SYN_ALTERA
+    `else
+        assign index_bf = (fcl_icd_index_sel_ifq_bf ? ifq_icd_index_bf : 
+            fdp_icd_index_bf);
+    `endif
+//	assign index_bf = (fcl_icd_index_sel_ifq_bf ? ifq_icd_index_bf : 
+//		fdp_icd_index_bf);
+  	wire [11:2] top_index = {index_f[11:3] , 1'b1};
+
+	assign bist_data_expand = 136'b0;
+	assign icd_wsel_fetdata_s1 = fetdata_s1;
+	assign icd_wsel_topdata_s1 = topdata_s1;
+
+	mux3ds #(136) icden_mux(
+		.dout				(next_wrdata_bf), 
+		.in0				(wrdata_f), 
+		.in1				(ifq_icd_wrdata_i2), 
+		.in2				(bist_data_expand), 
+		.sel0				(ifq_icd_data_sel_old_i2), 
+		.sel1				(ifq_icd_data_sel_fill_i2), 
+		.sel2				(ifq_icd_data_sel_bist_i2));
+	dffe_s #(136) wrdata_reg(
+		.din				(next_wrdata_bf), 
+		.clk				(clk), 
+		.q				(wrdata_f), 
+		.en				((~sehold)), 
+		.se				(se));
+
+	always @(posedge clk) begin
+	  if (~sehold) begin
+	    rdreq_f <= fcl_icd_rdreq_bf;
+	    wrreq_f <= fcl_icd_wrreq_bf;
+        `ifdef FPGA_SYN_ALTERA
+        `else
+            index_f <= index_bf;
+        `endif
+	    wrway_f <= ifq_icd_wrway_bf;
+	    worden_f <= ifq_icd_worden_bf;
+	    wr_index0 <= {index_bf[11:4], 2'b0, ifq_icd_wrway_bf};
+	    wr_index1 <= {index_bf[11:4], 2'b1, ifq_icd_wrway_bf};
+	    wr_index2 <= {index_bf[11:4], 2'b10, ifq_icd_wrway_bf};
+	    wr_index3 <= {index_bf[11:4], 2'b11, ifq_icd_wrway_bf};
+	  end
+	  fetdata_s1 <= fetdata_f;
+	  topdata_s1 <= topdata_f;
+  end
+
+
+	reg [33:0] fetch_00_00;
+	reg [33:0] fetch_00_01;
+	reg [33:0] fetch_00_10;
+	reg [33:0] fetch_00_11;
+
+	reg [33:0] fetch_01_00;
+	reg [33:0] fetch_01_01;
+	reg [33:0] fetch_01_10;
+	reg [33:0] fetch_01_11;
+
+	reg [33:0] fetch_10_00;
+	reg [33:0] fetch_10_01;
+	reg [33:0] fetch_10_10;
+	reg [33:0] fetch_10_11;
+
+	reg [33:0] fetch_11_00;
+	reg [33:0] fetch_11_01;
+	reg [33:0] fetch_11_10;
+	reg [33:0] fetch_11_11;
+    `ifdef FPGA_SYN_ALTERA
+
+	reg [33:0] fetch_00_00_d;
+	reg [33:0] fetch_00_01_d;
+	reg [33:0] fetch_00_10_d;
+	reg [33:0] fetch_00_11_d;
+
+	reg [33:0] fetch_01_00_d;
+	reg [33:0] fetch_01_01_d;
+	reg [33:0] fetch_01_10_d;
+	reg [33:0] fetch_01_11_d;
+
+	reg [33:0] fetch_10_00_d;
+	reg [33:0] fetch_10_01_d;
+	reg [33:0] fetch_10_10_d;
+	reg [33:0] fetch_10_11_d;
+
+	reg [33:0] fetch_11_00_d;
+	reg [33:0] fetch_11_01_d;
+	reg [33:0] fetch_11_10_d;
+	reg [33:0] fetch_11_11_d;
+    reg        delay_half_cycle;
+
+
+	always @(negedge clk) begin // Sandeep Changed this to negedge clock from posedge clock
+        // Can we push the reads to the next negedge? Delay this read!! Looks
+        // like the previous write does not get through
+    `else
+    always @(posedge clk) begin
+    `endif
+	  fetch_00_00 <= icdata_ary_00_00[index_bf[11:4]];
+	  fetch_00_01 <= icdata_ary_00_01[index_bf[11:4]];
+	  fetch_00_10 <= icdata_ary_00_10[index_bf[11:4]];
+	  fetch_00_11 <= icdata_ary_00_11[index_bf[11:4]];
+          
+	  fetch_01_00 <= icdata_ary_01_00[index_bf[11:4]];
+	  fetch_01_01 <= icdata_ary_01_01[index_bf[11:4]];
+	  fetch_01_10 <= icdata_ary_01_10[index_bf[11:4]];
+	  fetch_01_11 <= icdata_ary_01_11[index_bf[11:4]];
+          
+	  fetch_10_00 <= icdata_ary_10_00[index_bf[11:4]];
+	  fetch_10_01 <= icdata_ary_10_01[index_bf[11:4]];
+	  fetch_10_10 <= icdata_ary_10_10[index_bf[11:4]];
+	  fetch_10_11 <= icdata_ary_10_11[index_bf[11:4]];
+          
+	  fetch_11_00 <= icdata_ary_11_00[index_bf[11:4]];
+	  fetch_11_01 <= icdata_ary_11_01[index_bf[11:4]];
+	  fetch_11_10 <= icdata_ary_11_10[index_bf[11:4]];
+	  fetch_11_11 <= icdata_ary_11_11[index_bf[11:4]];
+      `ifdef FPGA_SYN_ALTERA
+          index_f <= index_bf; // Sandeep moved this logic 1/2 cycle forward for altera
+          index_bf <= (fcl_icd_index_sel_ifq_bf ? ifq_icd_index_bf : // Moved this logic from a continuous assignment to a synchronous assignment
+              fdp_icd_index_bf);
+      `endif
+	end
+
+
+	always @(index_f or rdreq_f or fetch_00_00 or fetch_01_00 or fetch_10_00 or fetch_11_00
+				    or fetch_00_01 or fetch_01_01 or fetch_10_01 or fetch_11_01
+				    or fetch_00_10 or fetch_01_10 or fetch_10_10 or fetch_11_10
+				    or fetch_00_11 or fetch_01_11 or fetch_10_11 or fetch_11_11) begin
+//	  if (rdreq_f) begin
+	    case(index_f[3:2])
+	      2'b00: fetdata_f[33:0] = fetch_00_00;
+	      2'b01: fetdata_f[33:0] = fetch_01_00;
+	      2'b10: fetdata_f[33:0] = fetch_10_00;
+	      2'b11: fetdata_f[33:0] = fetch_11_00;
+	    endcase
+	    case(index_f[3:2])
+	      2'b00: fetdata_f[67:34] = fetch_00_01;
+	      2'b01: fetdata_f[67:34] = fetch_01_01;
+	      2'b10: fetdata_f[67:34] = fetch_10_01;
+	      2'b11: fetdata_f[67:34] = fetch_11_01;
+	    endcase
+	    case(index_f[3:2])
+	      2'b00: fetdata_f[101:68] = fetch_00_10;
+	      2'b01: fetdata_f[101:68] = fetch_01_10;
+	      2'b10: fetdata_f[101:68] = fetch_10_10;
+	      2'b11: fetdata_f[101:68] = fetch_11_10;
+	    endcase
+	    case(index_f[3:2])
+	      2'b00: fetdata_f[135:102] = fetch_00_11;
+	      2'b01: fetdata_f[135:102] = fetch_01_11;
+	      2'b10: fetdata_f[135:102] = fetch_10_11;
+	      2'b11: fetdata_f[135:102] = fetch_11_11;
+	    endcase
+	    case(index_f[3])
+              1'b0: topdata_f[33:0] = fetch_01_00;
+	      1'b1: topdata_f[33:0] = fetch_11_00;
+	    endcase
+	    case(index_f[3])
+              1'b0: topdata_f[67:34] = fetch_01_01;
+	      1'b1: topdata_f[67:34] = fetch_11_01;
+	    endcase
+	    case(index_f[3])
+              1'b0: topdata_f[101:68] = fetch_01_10;
+	      1'b1: topdata_f[101:68] = fetch_11_10;
+	    endcase
+	    case(index_f[3])
+              1'b0: topdata_f[135:102] = fetch_01_11;
+	      1'b1: topdata_f[135:102] = fetch_11_11;
+	    endcase
+	  end
+//	  else
+//	    begin
+//	      fetdata_f = 136'b0;
+//	      topdata_f = 136'b0;
+//	    end
+//	end
+
+	always @(negedge clk) begin // Writes happening at the negedge
+	  if (wrreq_f & (~rst_tri_en)) begin
+	    if (worden_f[0]) begin
+	      if (wr_index0[1:0] == 2'b0) begin
+		icdata_ary_00_00[wr_index0[11:4]] <= wrdata_f[135:102];
+	      end
+	      if (wr_index0[1:0] == 2'b1) begin
+		icdata_ary_00_01[wr_index0[11:4]] <= wrdata_f[135:102];
+	      end
+	      if (wr_index0[1:0] == 2'b10) begin
+		icdata_ary_00_10[wr_index0[11:4]] <= wrdata_f[135:102];
+	      end
+	      if (wr_index0[1:0] == 2'b11) begin
+		icdata_ary_00_11[wr_index0[11:4]] <= wrdata_f[135:102];
+	      end
+	    end
+	    if (worden_f[1]) begin
+	      if (wr_index1[1:0] == 2'b0) begin
+		icdata_ary_01_00[wr_index1[11:4]] <= wrdata_f[101:68];
+	      end
+	      if (wr_index1[1:0] == 2'b1) begin
+		icdata_ary_01_01[wr_index1[11:4]] <= wrdata_f[101:68];
+	      end
+	      if (wr_index1[1:0] == 2'b10) begin
+		icdata_ary_01_10[wr_index1[11:4]] <= wrdata_f[101:68];
+	      end
+	      if (wr_index1[1:0] == 2'b11) begin
+		icdata_ary_01_11[wr_index1[11:4]] <= wrdata_f[101:68];
+	      end
+	    end
+	    if (worden_f[2]) begin
+	      if (wr_index2[1:0] == 2'b0) begin
+		icdata_ary_10_00[wr_index2[11:4]] <= wrdata_f[67:34];
+	      end
+	      if (wr_index2[1:0] == 2'b1) begin
+		icdata_ary_10_01[wr_index2[11:4]] <= wrdata_f[67:34];
+	      end
+	      if (wr_index2[1:0] == 2'b10) begin
+		icdata_ary_10_10[wr_index2[11:4]] <= wrdata_f[67:34];
+	      end
+	      if (wr_index2[1:0] == 2'b11) begin
+		icdata_ary_10_11[wr_index2[11:4]] <= wrdata_f[67:34];
+	      end
+	    end
+	    if (worden_f[3]) begin
+	      if (wr_index3[1:0] == 2'b0) begin
+		icdata_ary_11_00[wr_index3[11:4]] <= wrdata_f[33:0];
+	      end
+	      if (wr_index3[1:0] == 2'b1) begin
+		icdata_ary_11_01[wr_index3[11:4]] <= wrdata_f[33:0];
+	      end
+	      if (wr_index3[1:0] == 2'b10) begin
+		icdata_ary_11_10[wr_index3[11:4]] <= wrdata_f[33:0];
+	      end
+	      if (wr_index3[1:0] == 2'b11) begin
+		icdata_ary_11_11[wr_index3[11:4]] <= wrdata_f[33:0];
+	      end
+	    end
+	  end
+	end
+endmodule
+
+`else
+
+module bw_r_icd(/*AUTOARG*/
+   // Outputs
+   icd_wsel_fetdata_s1, icd_wsel_topdata_s1, icd_fuse_repair_value, 
+   icd_fuse_repair_en, so, 
+   // Inputs
+   rclk, se, si, reset_l, sehold, fdp_icd_index_bf, ifq_icd_index_bf, 
+   fcl_icd_index_sel_ifq_bf, ifq_icd_wrway_bf, ifq_icd_worden_bf, 
+   ifq_icd_wrdata_i2, fcl_icd_rdreq_bf, fcl_icd_wrreq_bf, 
+   bist_ic_data, rst_tri_en, ifq_icd_data_sel_old_i2, 
+   ifq_icd_data_sel_fill_i2, ifq_icd_data_sel_bist_i2, fuse_icd_wren, 
+   fuse_icd_rid, fuse_icd_repair_value, fuse_icd_repair_en, 
+   efc_spc_fuse_clk1
+   );
+
+   input          rclk, 
+                  se, 
+                  si,
+                  reset_l;
+   input          sehold;
+   
+   input [11:2]   fdp_icd_index_bf,    // index to write to/read from
+                  ifq_icd_index_bf;
+   input          fcl_icd_index_sel_ifq_bf;
+
+   input [1:0]    ifq_icd_wrway_bf;    // way to write to
+   input [3:0]    ifq_icd_worden_bf;   // word to write to (ignore index 1:0)
+   input [135:0]  ifq_icd_wrdata_i2;   // 128b data, 4b sw, 4b parity
+
+   input          fcl_icd_rdreq_bf,
+		              fcl_icd_wrreq_bf;
+
+   input [7:0]    bist_ic_data;        // needs to be expanded
+   input          rst_tri_en;
+   
+   // datain mux selects
+   input          ifq_icd_data_sel_old_i2,
+                  ifq_icd_data_sel_fill_i2,
+                  ifq_icd_data_sel_bist_i2;
+
+   // efuse values for redundancy
+   input         fuse_icd_wren;         
+   input [3:0]   fuse_icd_rid;         
+   input [7:0]   fuse_icd_repair_value;
+   input [1:0]   fuse_icd_repair_en;
+
+   // efuse non ovl clks
+   input         efc_spc_fuse_clk1;  // use this clk to talk to fuse hdr
+   // outputs
+   output [135:0]  icd_wsel_fetdata_s1,
+		               icd_wsel_topdata_s1;
+
+   // redundancy reg read
+   output [7:0]    icd_fuse_repair_value;
+   output [1:0]    icd_fuse_repair_en;
+   
+   output          so;
+   
+
+   //----------------------------------------------------------------------
+   // Declarations
+   //----------------------------------------------------------------------
+
+   // local signals
+`ifdef DEFINE_0IN
+   reg [135:0]    fetdata_s1, 
+                  topdata_s1;
+   wire [135:0]   fetdata_sa, 
+                  topdata_sa;
+`else
+   reg [33:0]     icdata_ary  [4095:0];
+
+   reg [135:0]    fetdata_f,             // way0 is lsb, way3 is msb
+		              topdata_f,
+                  fetdata_sa,
+                  topdata_sa,
+   		            fetdata_s1,
+		              topdata_s1;
+`endif
+
+   wire           clk;
+   
+   wire [135:0]   next_wrdata_bf,
+                  wrdata_f,
+                  bist_data_expand;
+
+   wire [11:2]     top_index,
+                   index_bf;
+   
+   reg  [11:2]     index_f;
+
+   wire [11:0]     wr_index0,
+		               wr_index1,
+		               wr_index2,
+		               wr_index3;
+   
+   reg            rdreq_f,
+		              wrreq_f;
+   reg [3:0]      worden_f;
+   reg [1:0]      wrway_f;
+
+
+   // redundancy crap
+   reg [7:0] red0_ev_row,
+             red0_od_row;
+   reg [9:0] red0_ev_col,
+             red0_od_col;
+   reg [7:0] red1_ev_row,
+             red1_od_row;
+   reg [9:0] red1_ev_col,
+             red1_od_col;
+   reg [7:0] red2_ev_row,
+             red2_od_row;
+   reg [9:0] red2_ev_col,
+             red2_od_col;
+   reg [7:0] red3_ev_row,
+             red3_od_row;
+   reg [9:0] red3_ev_col,
+             red3_od_col;
+
+   reg [7:0] icd_fuse_repair_value;
+   reg [1:0] icd_fuse_repair_en;
+   
+   
+   //
+   // Code start here 
+   //
+
+   // clk header derives clk from rclk
+   assign         clk = rclk;
+
+
+   // mux merged with flop
+   assign index_bf = fcl_icd_index_sel_ifq_bf ? ifq_icd_index_bf :
+                                                fdp_icd_index_bf;
+
+   always @ (posedge clk)
+     begin
+	      // input flops
+        if (~sehold)
+          begin
+	           rdreq_f <= fcl_icd_rdreq_bf;
+	           wrreq_f <= fcl_icd_wrreq_bf;
+	           index_f <= index_bf;
+	           wrway_f <= ifq_icd_wrway_bf;
+	           worden_f <= ifq_icd_worden_bf;
+          end
+	      // S stage flops (for rd data)
+	      fetdata_s1 <= fetdata_sa;
+	      topdata_s1 <= topdata_sa;
+	      
+     end // always @ (posedge clk)
+
+   // BIST data
+   assign   bist_data_expand = {bist_ic_data[1:0], {4{bist_ic_data[7:0]}},
+                                bist_ic_data[1:0], {4{bist_ic_data[7:0]}},
+                                bist_ic_data[1:0], {4{bist_ic_data[7:0]}},
+                                bist_ic_data[1:0], {4{bist_ic_data[7:0]}}};   
+
+   
+   // Mux + flop for write data input
+   // ic data enable mux
+   mux3ds #(136) icden_mux(.dout (next_wrdata_bf),
+			                     .in0  (wrdata_f),
+			                     .in1  (ifq_icd_wrdata_i2),
+			                     .in2  (bist_data_expand),
+			                     .sel0 (ifq_icd_data_sel_old_i2),
+			                     .sel1 (ifq_icd_data_sel_fill_i2),
+			                     .sel2 (ifq_icd_data_sel_bist_i2));
+   // write data regsiter
+   // se hold is taken care of by external logic (in ifqctl)
+   dffe_s #(136)  wrdata_reg(.din (next_wrdata_bf),
+			                     .clk (clk),
+			                     .q   (wrdata_f),
+                           .en  (~sehold),
+			                     .se  (se), .si(), .so());
+
+   
+   //----------------------------------------------------------------------
+   // Read Operation
+   //----------------------------------------------------------------------
+
+   // The index has 2 parts. 
+   //    1. The 16B half-line index -- bits 11:4
+   //    2. The word offset -- bits 3:2 for reads, xx for writes
+   //    3. The way -- wrway_f for writes, xx for reads
+   // i.e. we read 1 word from each of 4 ways, but 
+   //      we write 4 words to 1 way
+   
+   assign top_index = {index_f[11:3] , 1'b1};
+
+`ifdef DEFINE_0IN
+// physical implmentation: ignore this and use else portion
+   
+   wire [15:0] we_wrd = ({ 3'b0,worden_f[3], 3'b0,worden_f[2],
+                           3'b0,worden_f[1], 3'b0,worden_f[0] }) << wrway_f;
+
+   wire [543:0] we = (~wrreq_f        )   ? 544'h0 :  
+                { {34{we_wrd[15]}}, {34{we_wrd[14]}}, {34{we_wrd[13]}}, {34{we_wrd[12]}},
+                  {34{we_wrd[11]}}, {34{we_wrd[10]}}, {34{we_wrd[ 9]}}, {34{we_wrd[ 8]}},
+                  {34{we_wrd[ 7]}}, {34{we_wrd[ 6]}}, {34{we_wrd[ 5]}}, {34{we_wrd[ 4]}},
+                  {34{we_wrd[ 3]}}, {34{we_wrd[ 2]}}, {34{we_wrd[ 1]}}, {34{we_wrd[ 0]}} };
+
+   wire [543:0] din = ({ {4{wrdata_f[ 33: 0]}}, {4{wrdata_f[ 67: 34]}},
+                         {4{wrdata_f[101:68]}}, {4{wrdata_f[135:102]}} });
+   wire [543:0] dout;
+
+   ic_data ic_data ( .nclk(~clk), .adr(index_f[11:4]), .we(we), .din(din), .dout(dout) );
+
+   wire [271:0] dout_l1 = index_f[3] ? dout[543:272] : dout[271:0];
+
+   assign       fetdata_sa[135:0] = index_f[2] ? dout_l1[271:136] : dout_l1[135:0];
+   assign       topdata_sa[135:0] =              dout_l1[271:136];
+
+
+`else 
+
+   // for physical implementation use this
+   
+   // read (inst[31:0] + sw bit + par bit) * 4 ways
+   always @(/*AUTOSENSE*/ /*memory or*/ index_f or rdreq_f
+            or top_index or wrreq_f)
+     begin
+        if (rdreq_f)
+          begin
+             if (wrreq_f)  // rd-wr contention
+               begin
+	                fetdata_f = 136'bx;
+	                topdata_f = 136'bx;
+	             end
+	           else 
+	             begin  // regular read
+	                fetdata_f[33:0] = icdata_ary[{index_f,2'b00}];    // way 0
+	                fetdata_f[67:34] = icdata_ary[{index_f,2'b01}];   // way 1
+	                fetdata_f[101:68] = icdata_ary[{index_f,2'b10}];  // way 2
+	                fetdata_f[135:102] = icdata_ary[{index_f,2'b11}]; // way 3
+
+	                topdata_f[33:0] = icdata_ary[{top_index, 2'b00}];
+	                topdata_f[67:34] = icdata_ary[{top_index, 2'b01}];
+	                topdata_f[101:68] = icdata_ary[{top_index, 2'b10}];
+	                topdata_f[135:102] = icdata_ary[{top_index, 2'b11}];
+	             end // else: !if(wrreq_f)
+          end // if (rdreq_f)
+
+	      else      // icache disabled or rd disabled
+	        begin
+// JC modified begin
+//                 fetdata_f = 136'bx;
+//                 topdata_f = 136'bx;
+                   fetdata_f = 136'b0;
+                   topdata_f = 136'b0;
+// JC modified end
+	        end // else: !if(rdreq_f)
+     end // always @ (...
+
+
+   // SA latch -- to make 0in happy
+   always @ (clk or fetdata_f or topdata_f)
+     begin
+        if (~clk)
+          begin
+             fetdata_sa <= fetdata_f;
+             topdata_sa <= topdata_f;
+          end
+     end
+`endif // !`ifdef DEFINE_0IN
+
+   // final outputs (272bits)
+   assign icd_wsel_fetdata_s1 = fetdata_s1;
+   assign icd_wsel_topdata_s1 = topdata_s1;
+   
+   
+   //----------------------------------------------------------------------
+   // Write Operation
+   //----------------------------------------------------------------------
+   
+   // The index has 3 parts. 
+   //    1. The 16B half-line index -- bits 11:4 of index_f
+   //    2. The word offset -- bits 3:2 for reads, xx for writes
+   //    3. The way -- wrway_f for writes, xx for reads
+
+   //                  index          word    way
+   //                  -----          ----    ---
+   assign wr_index0 = {index_f[11:4], 2'b00, wrway_f};
+   assign wr_index1 = {index_f[11:4], 2'b01, wrway_f};
+   assign wr_index2 = {index_f[11:4], 2'b10, wrway_f};
+   assign wr_index3 = {index_f[11:4], 2'b11, wrway_f};
+
+`ifdef DEFINE_0IN
+`else
+   // assume write happens @ negedge clk  (i.e. phase 1)
+   always @ (negedge clk)
+     begin
+	      if (wrreq_f & ~rst_tri_en)
+	        begin
+	           // instructions always Big Endian
+	           if (worden_f[0]) 
+			icdata_ary[wr_index0] <= wrdata_f[135:102];
+	           if (worden_f[1]) 
+			icdata_ary[wr_index1] <= wrdata_f[101:68];
+	           if (worden_f[2]) 
+			icdata_ary[wr_index2] <= wrdata_f[67:34];
+	           if (worden_f[3]) 
+			icdata_ary[wr_index3] <= wrdata_f[33:0];
+	        end // if (wrreq_f)
+     end // always @ (...
+`endif // !`ifdef DEFINE_0IN
+
+
+   //--------------------------------------------------------------
+   // Redundancy Registers
+   //--------------------------------------------------------------
+   //
+   // read red regs 
+   // 16:1 mux
+   always @ (/*AUTOSENSE*/fuse_icd_rid or red0_ev_col or red0_ev_row
+             or red0_od_col or red0_od_row or red1_ev_col
+             or red1_ev_row or red1_od_col or red1_od_row
+             or red2_ev_col or red2_ev_row or red2_od_col
+             or red2_od_row or red3_ev_col or red3_ev_row
+             or red3_od_col or red3_od_row)
+     begin
+        // sub array 0
+        if (fuse_icd_rid[3:0] == 4'b0)
+          begin
+             icd_fuse_repair_value = {2'b0, red0_ev_row[5:0]};
+             icd_fuse_repair_en = red0_ev_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1)
+          begin
+             icd_fuse_repair_value =  {2'b0, red0_od_row[5:0]};
+             icd_fuse_repair_en = red0_od_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b10)
+          begin
+             icd_fuse_repair_value = red0_ev_col[7:0];
+             icd_fuse_repair_en = red0_ev_col[9:8];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b11)
+          begin
+             icd_fuse_repair_value = red0_od_col[7:0];
+             icd_fuse_repair_en = red0_od_col[9:8];
+          end
+
+        // sub array 1
+        else if (fuse_icd_rid[3:0] == 4'b100)
+          begin
+             icd_fuse_repair_value =  {2'b0, red1_ev_row[5:0]};
+             icd_fuse_repair_en = red1_ev_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b101)
+          begin
+             icd_fuse_repair_value =  {2'b0, red1_od_row[5:0]};
+             icd_fuse_repair_en = red1_od_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b110)
+          begin
+             icd_fuse_repair_value = red1_ev_col[7:0];
+             icd_fuse_repair_en = red1_ev_col[9:8];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b111)
+          begin
+             icd_fuse_repair_value = red1_od_col[7:0];
+             icd_fuse_repair_en = red1_od_col[9:8];
+          end
+
+        // sub array 2
+        else if (fuse_icd_rid[3:0] == 4'b1000)
+          begin
+             icd_fuse_repair_value =  {2'b0, red2_ev_row[5:0]};
+             icd_fuse_repair_en = red2_ev_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1001)
+          begin
+             icd_fuse_repair_value =  {2'b0, red2_od_row[5:0]};
+             icd_fuse_repair_en = red2_od_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1010)
+          begin
+             icd_fuse_repair_value = red2_ev_col[7:0];
+             icd_fuse_repair_en = red2_ev_col[9:8];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1011)
+          begin
+             icd_fuse_repair_value = red2_od_col[7:0];
+             icd_fuse_repair_en = red2_od_col[9:8];
+          end
+
+        // sub array 3
+        else if (fuse_icd_rid[3:0] == 4'b1100)
+          begin
+             icd_fuse_repair_value =  {2'b0, red3_ev_row[5:0]};
+             icd_fuse_repair_en = red3_ev_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1101)
+          begin
+             icd_fuse_repair_value =  {2'b0, red3_od_row[5:0]};
+             icd_fuse_repair_en = red3_od_row[7:6];
+          end
+        else if (fuse_icd_rid[3:0] == 4'b1110)
+          begin
+             icd_fuse_repair_value = red3_ev_col[7:0];
+             icd_fuse_repair_en = red3_ev_col[9:8];
+          end
+        else // if (fuse_icd_rid[3:0] == 4'b1111)
+          begin
+             icd_fuse_repair_value = red3_od_col[7:0];
+             icd_fuse_repair_en = red3_od_col[9:8];
+          end
+     end // always @ (...
+
+
+   //
+   // write red regs
+   //
+   // use clk1 to latch anything to/from the hdr
+   //
+   // reset_l is an asynchronous reset.  Only the the repair enables [9:8]
+   // need to be reset.  However, the actual circuit resets all the bits.
+   always @ (posedge efc_spc_fuse_clk1 or negedge reset_l)
+     begin
+        if (~reset_l)
+          begin // async reset
+             red0_ev_row[7:0] <= 8'b0;
+             red1_ev_row[7:0] <= 8'b0;
+             red2_ev_row[7:0] <= 8'b0;
+             red3_ev_row[7:0] <= 8'b0;
+
+             red0_od_row[7:0] <= 8'b0;
+             red1_od_row[7:0] <= 8'b0;
+             red2_od_row[7:0] <= 8'b0;
+             red3_od_row[7:0] <= 8'b0;
+
+             red0_ev_col[9:0] <= 10'b0;
+             red1_ev_col[9:0] <= 10'b0;
+             red2_ev_col[9:0] <= 10'b0;
+             red3_ev_col[9:0] <= 10'b0;
+
+             red0_od_col[9:0] <= 10'b0;
+             red1_od_col[9:0] <= 10'b0;
+             red2_od_col[9:0] <= 10'b0;
+             red3_od_col[9:0] <= 10'b0;
+          end // if (~reset_l)
+             
+        else if (fuse_icd_wren & reset_l)
+          begin    // 4:16 decode
+             if (fuse_icd_rid[3:0] == 4'b0)
+               begin
+                  red0_ev_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1)
+               begin
+                  red0_od_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b10)
+               begin
+                  red0_ev_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b11)
+               begin
+                  red0_od_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+
+             // sub array 1
+             else if (fuse_icd_rid[3:0] == 4'b100)
+               begin
+                  red1_ev_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b101)
+               begin
+                  red1_od_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b110)
+               begin
+                  red1_ev_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b111)
+               begin
+                  red1_od_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+
+             // sub array 2
+             else if (fuse_icd_rid[3:0] == 4'b1000)
+               begin
+                  red2_ev_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1001)
+               begin
+                  red2_od_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1010)
+               begin
+                  red2_ev_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1011)
+               begin
+                  red2_od_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+
+             // sub array 2
+             else if (fuse_icd_rid[3:0] == 4'b1100)
+               begin
+                  red3_ev_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1101)
+               begin
+                  red3_od_row <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[5:0]};
+               end
+             else if (fuse_icd_rid[3:0] == 4'b1110)
+               begin
+                  red3_ev_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+             else // if (fuse_icd_rid[3:0] == 4'b1111)
+               begin
+                  red3_od_col <= {fuse_icd_repair_en[1:0],
+                                 fuse_icd_repair_value[7:0]};
+               end
+          end // if (fuse_icd_wren)
+     end // always @ (...
+
+endmodule // bw_r_icd
+
+`endif
Index: /trunk/T1-common/srams/bw_rf_16x65.v
===================================================================
--- /trunk/T1-common/srams/bw_rf_16x65.v	(revision 6)
+++ /trunk/T1-common/srams/bw_rf_16x65.v	(revision 6)
@@ -0,0 +1,410 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_rf_16x65.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_rf_16x65(
+   rd_clk,    // read clock
+   wr_clk,    // read clock
+   csn_rd,    // read enable -- active low 
+   csn_wr,    // write enable -- active low
+   hold,      // hold signal -- unflopped -- hold =1 holds input data 
+   testmux_sel, // bypass  signal -- unflopped -- testmux_sel = 1 bypasses di to do 
+   scan_en,   // Scan enable unflopped  
+   margin,    // Delay for the circuits--- set to 01010101 
+   rd_a,      // read address  
+   wr_a,      // Write address
+   di,        // Data input
+   si,        // scan in  
+   so,        // scan out  
+   listen_out, // Listening flop-- 
+   do          // Data out
+
+);
+
+   input rd_clk;
+   input wr_clk;
+   input csn_rd;
+   input csn_wr;
+   input hold;
+   input testmux_sel;
+   input scan_en;
+   input [4:0] margin;
+   input [3:0] rd_a;
+   input [3:0] wr_a;
+   input [64:0] di;
+   input si;
+   output so;
+   output [64:0] do;
+   output [64:0] listen_out;
+
+parameter  SYNC_CLOCK_CHK1 = 0;
+parameter  SYNC_CLOCK_CHK2 = 0;
+parameter  SYNC_CLOCK_CHK3 = 0;
+parameter  MARGIN_WARNING = 0; // margin warning is on by default
+
+
+// Start code
+`ifdef DEFINE_0IN
+wire [64:0] array_out  ;
+`else
+reg [64:0] memarray[15:0] ;
+reg [64:0] array_out  ;
+`endif
+
+reg [64:0] array_out_latch    ;
+
+
+reg  [3:0] rd_a_ff   ;
+wire [3:0] rd_a_ff_so;
+wire [3:0] rd_a_ff_si ;
+
+reg  [3:0] wr_a_ff   ;
+wire [3:0] wr_a_ff_so;
+wire [3:0] wr_a_ff_si ;
+
+reg  [64:0] di_ff   ;
+wire [64:0] di_ff_so;
+wire [64:0] di_ff_si;
+
+wire [64:0] listen_out_so;
+wire [64:0] listen_out_si ;
+reg  [64:0] listen_out     ;
+
+
+reg        csn_rd_ff ;
+wire       csn_rd_ff_si ;
+wire       csn_rd_ff_so ;
+
+reg        csn_wr_ff ;
+wire       csn_wr_ff_si ;
+wire       csn_wr_ff_so ;
+
+reg        di_ff_latch_so ;
+///////////////////////////////////////
+// Scan chain connections            //
+///////////////////////////////////////
+assign wr_a_ff_si[3:0] = {si      , wr_a_ff_so[3:1]} ;
+assign csn_wr_ff_si    = wr_a_ff_so[0] ;
+assign di_ff_si        = {csn_wr_ff_so, di_ff_so[64:1]};
+assign listen_out_si   = {listen_out_so[63:0], di_ff_latch_so} ;
+assign csn_rd_ff_si    = listen_out_so[64] ;
+assign rd_a_ff_si[3:0] = {rd_a_ff_so[2:0], csn_rd_ff_so} ;
+assign so              = rd_a_ff_so[3] ;
+///////////////////////////////////////
+// Instantiate a clock headers        //
+///////////////////////////////////////
+
+wire   rd_ssclk       = rd_clk ; // clk_en & rd_clk ;
+wire   rd_local_clk   = rd_ssclk | scan_en | hold ; 
+wire   rd_smclk       = rd_ssclk |  ~(scan_en | hold) ;
+
+wire   wr_ssclk       = wr_clk ; // clk_en & wr_clk ;
+wire   wr_local_clk   = wr_ssclk | scan_en | hold ; 
+wire   wr_smclk       = wr_ssclk |  ~(scan_en | hold) ;
+
+
+/////////////////////////////////////////////////////
+// csn_rd Flop                                     //
+/////////////////////////////////////////////////////
+
+reg                     csn_rd_ff_inst_mdata ;
+wire                    csn_rd_ff_inst_smin ;
+reg                     csn_rd_ff_scan_out ;
+
+assign csn_rd_ff_inst_smin  = hold ?  csn_rd_ff_scan_out :  csn_rd_ff_si ; 
+always @(rd_smclk or rd_local_clk or csn_rd or csn_rd_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          csn_rd_ff_inst_mdata = csn_rd ;
+       end
+       if (!rd_smclk) begin
+          csn_rd_ff_inst_mdata = csn_rd_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    csn_rd_ff_scan_out    <=  csn_rd_ff_inst_mdata ; 
+end
+always @(rd_local_clk or csn_rd_ff_inst_mdata) begin
+   if (rd_local_clk ) begin
+    csn_rd_ff    <=  csn_rd_ff_inst_mdata ; 
+   end
+end
+assign csn_rd_ff_so =  csn_rd_ff_scan_out;
+        
+/////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////
+// rd_a Flop                                       //
+/////////////////////////////////////////////////////
+reg     [3:0]   rd_a_ff_inst_mdata ;
+wire    [3:0]   rd_a_ff_inst_smin ;
+reg     [3:0]   rd_a_ff_scan_out ;
+
+assign rd_a_ff_inst_smin[3:0]  = hold ?  rd_a_ff_scan_out[3:0] :  rd_a_ff_si[3:0] ; 
+always @(rd_smclk or rd_local_clk or rd_a or rd_a_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          rd_a_ff_inst_mdata = rd_a[3:0] ;
+       end
+       if (!rd_smclk) begin
+          rd_a_ff_inst_mdata = rd_a_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    rd_a_ff_scan_out[3:0]   <=  rd_a_ff_inst_mdata ; 
+end
+always @(rd_local_clk or rd_a_ff_inst_mdata) begin
+   if (rd_local_clk) begin
+    rd_a_ff[3:0]   <=  rd_a_ff_inst_mdata ; 
+   end
+end
+assign rd_a_ff_so[3:0] = rd_a_ff_scan_out[3:0] ;
+/////////////////////////////////////////////////////
+        
+/////////////////////////////////////////////////////
+// csn_wr Flop                                     //
+/////////////////////////////////////////////////////
+reg                     csn_wr_ff_inst_mdata ;
+wire                    csn_wr_ff_inst_smin ;
+
+assign csn_wr_ff_inst_smin  = hold ?  csn_wr_ff :  csn_wr_ff_si ; 
+always @(wr_smclk or wr_local_clk or csn_wr or csn_wr_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          csn_wr_ff_inst_mdata = csn_wr ;
+       end
+       if (!wr_smclk) begin
+          csn_wr_ff_inst_mdata = csn_wr_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    csn_wr_ff    <=  csn_wr_ff_inst_mdata ; 
+end
+assign csn_wr_ff_so =  csn_wr_ff;
+/////////////////////////////////////////////////////
+        
+/////////////////////////////////////////////////////
+// wr_a Flop                                       //
+/////////////////////////////////////////////////////
+reg     [3:0]   wr_a_ff_inst_mdata ;
+wire    [3:0]   wr_a_ff_inst_smin ;
+
+assign wr_a_ff_inst_smin[3:0]  = hold ?  wr_a_ff[3:0] :  wr_a_ff_si[3:0] ; 
+always @(wr_smclk or wr_local_clk or wr_a or wr_a_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          wr_a_ff_inst_mdata = wr_a[3:0] ;
+       end
+       if (!wr_smclk) begin
+          wr_a_ff_inst_mdata = wr_a_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    wr_a_ff[3:0]   <=  wr_a_ff_inst_mdata ; 
+end
+assign wr_a_ff_so[3:0] = wr_a_ff[3:0] ;
+/////////////////////////////////////////////////////
+
+/////////////////////////////////////////////////////
+// di Flop                                         //
+/////////////////////////////////////////////////////
+reg     [64:0]  di_ff_inst_mdata ;
+wire    [64:0]  di_ff_inst_smin ;
+
+assign di_ff_inst_smin[64:0]  = hold ?  di_ff[64:0] :  di_ff_si[64:0] ; 
+always @(wr_smclk or wr_local_clk or di or di_ff_inst_smin ) begin
+       if (!wr_local_clk) begin
+          di_ff_inst_mdata = di[64:0] ;
+       end
+       if (!wr_smclk) begin
+          di_ff_inst_mdata = di_ff_inst_smin;
+       end
+end
+always @(posedge wr_ssclk) begin
+    di_ff[64:0]   <=  di_ff_inst_mdata ; 
+end
+assign di_ff_so[64:0] = di_ff[64:0] ;
+/////////////////////////////////////////////////////
+
+wire wr_enable_l = csn_wr_ff | scan_en ;
+wire rd_enable_l = csn_rd_ff | scan_en ;
+
+// wire wr_clk_qual = wr_ssclk & ~scan_en ; 
+`ifdef DEFINE_0IN
+rf16x65  rf16x65 ( .rdclk(rd_ssclk), .wrclk(wr_ssclk), .radr(rd_a_ff), .wadr(wr_a_ff), .ren(!rd_enable_l),
+                        .we(!wr_enable_l), .wm(65'h1FFFFFFFFFFFFFFFF), .din(di_ff), .dout(array_out) );
+`else
+always @(wr_ssclk or wr_a_ff or wr_enable_l or di_ff ) begin
+     if (!wr_ssclk) begin
+        if (!wr_enable_l) begin
+               memarray[wr_a_ff] <= di_ff[64:0] ; 
+        end
+     end 
+end
+        
+// wire  rd_clk_qual =  (rd_ssclk & ~scan_en) ; 
+always @(rd_ssclk or rd_a_ff or rd_enable_l) begin
+     if (rd_ssclk) begin
+        if (rd_enable_l == 1'b0) begin
+             array_out[64:0] <= memarray[rd_a_ff] ;
+        end else if (rd_enable_l == 1'b1) begin
+             array_out[64:0] <= 65'h1FFFFFFFFFFFFFFFF;
+        end else begin 
+             array_out[64:0] <= 65'hXXXXXXXXXXXXXXXXX;
+        end
+     end
+end
+`endif
+
+// synopsys translate_off
+
+`ifdef DEFINE_0IN
+`else
+`ifdef INNO_MUXEX
+`else
+always @(csn_rd_ff or csn_wr_ff or rd_a_ff or wr_a_ff)   begin
+   if ((SYNC_CLOCK_CHK1 == 0) & !csn_rd_ff & !csn_wr_ff & (rd_a_ff == wr_a_ff)) begin
+      array_out   <= 65'hxxxxxxxxxxxxxxxxx;
+	`ifdef MODELSIM  
+      $display ("sram_conflict", "conflict between read: %h and write: %h pointers", rd_a_ff, wr_a_ff);
+	`else
+      $error ("sram_conflict", "conflict between read: %h and write: %h pointers", rd_a_ff, wr_a_ff);
+	`endif
+   end
+end
+`endif
+
+///////////////////////////////////////////////////////////////
+// Purely ERROR checking code.                               //
+///////////////////////////////////////////////////////////////
+reg  [3:0] rd_a_ff_del ;
+reg        csn_rd_ff_del ; 
+reg        rd_clk_del ; 
+always @(rd_local_clk) begin
+     if (rd_local_clk)  rd_clk_del = #300 rd_local_clk;
+     else              rd_clk_del = #300 rd_local_clk;
+end
+always @(posedge rd_clk_del) begin
+       rd_a_ff_del <= rd_a_ff ;
+       csn_rd_ff_del <= csn_rd_ff ;
+end 
+`ifdef INNO_MUXEX
+`else
+always @(csn_rd_ff_del or csn_wr_ff or rd_a_ff_del or wr_a_ff or rd_clk_del or wr_ssclk)   begin
+   if (SYNC_CLOCK_CHK2 == 0) begin
+       if (rd_clk_del & !wr_ssclk & !csn_rd_ff_del & !csn_wr_ff & (rd_a_ff_del == wr_a_ff)) begin
+	`ifdef MODELSIM   
+	      $display ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff_del, wr_a_ff);
+	`else
+	      $error ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff_del, wr_a_ff);
+	`endif	  
+      end 
+   end
+end
+`endif
+reg  [3:0] wr_a_ff_del ;
+reg        csn_wr_ff_del ; 
+reg        wr_clk_del ; 
+always @(wr_ssclk) begin
+     if (wr_ssclk)  wr_clk_del = #300 wr_ssclk;
+     else              wr_clk_del = #300 wr_ssclk;
+end
+always @(posedge wr_clk_del) begin
+       wr_a_ff_del <= wr_a_ff ;
+       csn_wr_ff_del <= csn_wr_ff ;
+end 
+`ifdef INNO_MUXEX
+`else
+always @(csn_rd_ff or csn_wr_ff_del or rd_a_ff or wr_a_ff_del or rd_local_clk or wr_clk_del)   begin
+   if (SYNC_CLOCK_CHK3 == 0) begin
+       if (rd_local_clk & !wr_clk_del & !csn_rd_ff & !csn_wr_ff_del & (rd_a_ff == wr_a_ff_del)) begin
+	`ifdef MODELSIM   
+	      $display ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff, wr_a_ff_del);
+	`else	  
+	      $error ("sram_conflict", "conflict between read: %h and write: %h pointers ", rd_a_ff, wr_a_ff_del);
+	`endif	  
+       end
+   end
+end
+`endif
+`endif
+///////////////////////////////////////////////////////////////
+// end the ERROR checking code.                              // 
+///////////////////////////////////////////////////////////////
+///////////////////////////////////////
+
+// synopsys translate_on
+
+
+///////////////////////////////////
+// Transparent latch with reset
+///////////////////////////////////
+
+always @(array_out or rd_ssclk) begin
+     if (rd_ssclk) begin
+        array_out_latch <= array_out ;
+     end
+end
+
+always @(di_ff_so[0] or wr_ssclk) begin
+     if (!wr_ssclk) begin
+        di_ff_latch_so <= di_ff_so[0] ;
+     end
+end
+
+
+assign do  = testmux_sel ? di_ff : array_out_latch ;
+
+/////////////////////////////////////////////////////
+// listen_out Flop                                 //
+/////////////////////////////////////////////////////
+reg     [64:0]  listen_out_ff_inst_mdata ;
+wire    [64:0]  listen_out_ff_inst_smin ;
+
+assign listen_out_ff_inst_smin[64:0]  = hold ?  do[64:0] :  listen_out_si[64:0] ; 
+always @(rd_smclk or rd_local_clk or do or listen_out_ff_inst_smin ) begin
+       if (!rd_local_clk) begin
+          listen_out_ff_inst_mdata = do[64:0] ;
+       end
+       if (!rd_smclk) begin
+          listen_out_ff_inst_mdata = listen_out_ff_inst_smin;
+       end
+end
+always @(posedge rd_ssclk) begin
+    listen_out[64:0]   <=  listen_out_ff_inst_mdata ; 
+end
+assign listen_out_so[64:0] = listen_out[64:0] ;
+
+// synopsys translate_off 
+ 
+`ifdef DEFINE_0IN
+`else
+`ifdef INNO_MUXEX
+`else
+always @(posedge rd_clk) begin
+     if ((MARGIN_WARNING == 0) & margin != 5'b10101) begin
+	`ifdef MODELSIM 
+          $display ("sram_margin", "margin is not set to the default value") ;
+	`else	  
+          $error ("sram_margin", "margin is not set to the default value") ;
+	`endif
+     end
+end
+`endif
+`endif
+
+// synopsys translate_on 
+
+endmodule
Index: /trunk/T1-common/srams/bw_r_l2d_32k.v
===================================================================
--- /trunk/T1-common/srams/bw_r_l2d_32k.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_l2d_32k.v	(revision 6)
@@ -0,0 +1,433 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_l2d_32k.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_RED
+`endif
+
+module bw_r_l2d_32k (/*AUTOARG*/
+   // Outputs
+   decc_out, so, l2d_fuse_data_out, 
+   // Inputs
+   decc_in_l, decc_read_in, word_en_l, way_sel_l, set_l, 
+   col_offset_l, wr_en_l, rclk, arst_l, mem_write_disable, 
+   sehold, se, si, fuse_l2d_wren, fuse_l2d_rden, 
+   fuse_l2d_rid, fuse_clk1, fuse_clk2, 
+   fuse_l2d_data_in, fuse_read_data_in
+   );
+
+   input [155:0]	decc_in_l;
+   input [155:0] 	decc_read_in;
+   input [3:0] 		word_en_l;
+   input [1:0] 		way_sel_l;
+   input [9:0] 		set_l;
+   input		col_offset_l;
+   input		wr_en_l;
+   input 		rclk;
+   input 		arst_l;
+   
+   // Test signals
+   input  		mem_write_disable;
+   input 		sehold;
+   input 		se;
+   input 		si;
+   
+
+   // Efuse inputs
+   input 		fuse_l2d_wren;
+   input 		fuse_l2d_rden;
+   input [2:0] 		fuse_l2d_rid;
+   input 		fuse_clk1;
+   input 		fuse_clk2;
+   input 		fuse_l2d_data_in;
+   input 		fuse_read_data_in;
+
+
+   output [155:0] 	decc_out ;
+   output 		so;
+
+   // Efuse outputs
+   output 		l2d_fuse_data_out;
+      
+   reg [155:0] 		tmp_decc_out;
+   reg [155:0] 		decc_out_tmp;
+   reg [155:0] 		reg_decc_in;   
+
+`ifdef DEFINE_0IN
+`else
+   reg [155:0] 		way0_decc[1023:0] ;
+   reg [155:0] 		way1_decc[1023:0] ;
+`endif
+    
+   wire			acc_en_d1;
+   reg [1:0] 		way_sel_d1;
+   reg [9:0] 		set_d1;
+   reg [3:0] 		word_en_d1;
+   reg 			wr_en_d1;
+   reg [155:0] 		decc_in_d1;
+   reg [155:0] 		decc_out_d1;
+   reg 			col_offset_d1;
+   
+   wire	[1:0] 		way_sel_sehold;
+   wire	[9:0] 		set_sehold;
+   wire	[3:0] 		word_en_sehold;
+   wire 		wr_en_sehold;
+   wire [155:0] 	decc_in_sehold;
+   wire 		col_offset;
+   
+   wire [155:0] 	decc_out ;
+
+// JC begin
+// Because of this 2 cycle block,
+// The following codes are just helping me for Innologic verification 
+// stop_1_cyc: when col_offset = 1, the next cycle will be ignore
+// keep_rd_out: The output data will be kept for another cycle
+   reg			keep_rd_out;
+   reg			stop_1_cyc;
+   always @(posedge rclk) begin
+      if (col_offset && (|way_sel_sehold)) begin
+         stop_1_cyc <= 1'b1;
+      end
+      else stop_1_cyc <= 1'b0;
+      if (acc_en_d1 & ~wr_en_d1) begin
+	 keep_rd_out <= 1'b1;
+      end
+      else keep_rd_out <= 1'b0;
+   end
+// JC end  
+
+
+   assign 		wr_en_sehold = (sehold) ? wr_en_d1 : ~wr_en_l;
+   assign 		set_sehold = (sehold) ? set_d1 : ~set_l;
+   assign 		way_sel_sehold = (sehold) ? way_sel_d1 : ~way_sel_l;
+   assign 		word_en_sehold = (sehold) ? word_en_d1 : ~word_en_l;
+// In Circuits, we use se to disable write, however, I modified testbench as following 
+// to verify write disable:
+//     force inno_tb_top.xtor.xcnt.se_l = ~mem_write_disable ;
+
+   assign 		col_offset = (stop_1_cyc || mem_write_disable ) ? (1'b0) : ~col_offset_l ;
+ 
+   assign acc_en_d1 = col_offset_d1 & (|way_sel_d1);   
+
+   always @(posedge rclk) begin
+      col_offset_d1 <= col_offset;
+      way_sel_d1  <= way_sel_sehold;
+      set_d1  <= set_sehold;
+      word_en_d1 <= word_en_sehold;
+      wr_en_d1  <= wr_en_sehold;
+// JC 
+// EVEN THOUGH We don't have any write data latch,
+// Our write-data drivers act like latch which gating by 
+// Worden signals.
+      decc_in_d1 <= ~decc_in_l;
+// JC 
+//This is NOT output flops, but we can keep read outs for
+// 2 cycles.      
+      decc_out_d1 <= decc_out_tmp;
+   end      
+  
+
+`ifdef DEFINE_0IN
+   wire [155:0] decc_out0, decc_out1;
+
+   wire [155:0] wm  = { {39{word_en_d1[3]}}, {39{word_en_d1[2]}}, {39{word_en_d1[1]}}, {39{word_en_d1[0]}} };
+   wire         we0 = acc_en_d1 & wr_en_d1 & way_sel_d1[0];
+   wire         we1 = acc_en_d1 & wr_en_d1 & way_sel_d1[1];
+
+l2data_axis     data_array0 (.data_out  (decc_out0[155:0]),
+                             .rclk      (rclk),
+                             .adr       (set_d1[9:0]),
+                             .data_in   (decc_in_d1[155:0]),
+                             .we        (we0),
+                             .wm        (wm[155:0]) );
+l2data_axis     data_array1 (.data_out  (decc_out1[155:0]),
+                             .rclk      (rclk),
+                             .adr       (set_d1[9:0]),
+                             .data_in   (decc_in_d1[155:0]),
+                             .we        (we1),
+                             .wm        (wm[155:0]) );
+
+   always @(/*AUTOSENSE*/acc_en_d1 or decc_in_d1 or decc_out0
+	    or decc_out1 or way_sel_d1 or word_en_d1 or wr_en_d1) begin
+        if (acc_en_d1 & ~wr_en_d1) begin
+           //////////////////////////
+           // 16 or 64B byte read
+           //////////////////////////
+           decc_out_tmp = way_sel_d1[0] ? decc_out0[155:0] : decc_out1[155:0];
+        end
+
+        if (acc_en_d1 & wr_en_d1) begin
+           //////////////////////////
+           // Store word/dword OR 64B store
+           //////////////////////////
+           tmp_decc_out = way_sel_d1[0] ? decc_out0[155:0] : decc_out1[155:0];
+
+           //////////////////////////////////////
+           // Write data based on Word enables.
+           //////////////////////////////////////
+
+           reg_decc_in[155:117] = (decc_in_d1[155:117] & {39{word_en_d1[3]}} |
+                                   tmp_decc_out[155:117] & {39{~word_en_d1[3]}});
+           reg_decc_in[116:78] = (decc_in_d1[116:78] & {39{word_en_d1[2]}} |
+                                  tmp_decc_out[116:78] & {39{~word_en_d1[2]}});
+           reg_decc_in[77:39] = (decc_in_d1[77:39] & {39{word_en_d1[1]}} |
+                                 tmp_decc_out[77:39] & {39{~word_en_d1[1]}});
+           reg_decc_in[38:0] = (decc_in_d1[38:0] & {39{word_en_d1[0]}} |
+				tmp_decc_out[38:0] & {39{~word_en_d1[0]}});
+          
+           //////////////////////////////////////////////////////////
+           // the store data gets reflected onto the read output bus
+           //////////////////////////////////////////////////////////
+          
+//           decc_out_tmp[155:0] = reg_decc_in[155:0];
+// Store data is *not* reflected onto the read output bus in the physical implementation
+	     decc_out_tmp[155:0] = 156'b0;
+          
+        end // of write operation
+
+      if (~acc_en_d1) begin
+        // no access
+         decc_out_tmp[155:0] = 156'b0;
+      end
+
+   end // of always block
+
+`else
+
+   always @(/*AUTOSENSE*/acc_en_d1 or decc_in_d1 or set_d1
+	    or way_sel_d1 or word_en_d1 or wr_en_d1) begin
+      
+`ifdef	INNO_MUXEX
+`else
+//----- PURELY FOR VERIFICATION -----------------------
+      if(wr_en_d1==1'bx) begin
+	`ifdef MODELSIM
+         $display("L2_DATA_ERR"," wr en error %b ", wr_en_d1);
+	`else
+         $error("L2_DATA_ERR"," wr en error %b ", wr_en_d1);
+	`endif	
+      end
+//----- PURELY FOR VERIFICATION -----------------------
+`endif
+
+
+//////////////////
+// MEMORY ACCESS
+//////////////////
+
+      if (acc_en_d1) begin
+
+`ifdef	INNO_MUXEX
+`else
+//----- PURELY FOR VERIFICATION -----------------------
+	 if(set_d1==10'bx) begin
+	`ifdef MODELSIM 
+            $error("L2_DATA_ERR"," index error %h ", set_d1[9:0]);
+	`else
+            $display("L2_DATA_ERR"," index error %h ", set_d1[9:0]);
+	`endif
+	 end
+//----- PURELY FOR VERIFICATION -----------------------
+`endif
+
+
+	if (~wr_en_d1) begin
+	   //////////////////////////
+	   // 16 or 64B byte read 
+	   //////////////////////////
+	   decc_out_tmp = way_sel_d1[0] ? way0_decc[set_d1] : way1_decc[set_d1];
+//JC: For keeping data for 2 cycle
+//	   keep_rd_out = 2'b01;	   
+	end
+
+	else begin
+	   //////////////////////////
+      	   // Store word/dword OR 64B store
+	   //////////////////////////
+	   tmp_decc_out = way_sel_d1[0] ? way0_decc[set_d1] : way1_decc[set_d1];
+	   
+//	   keep_rd_out = 2'b00;	   
+	   //////////////////////////////////////
+	   // Write data based on Word enables.
+	   //////////////////////////////////////
+	   
+	   reg_decc_in[155:117] = (decc_in_d1[155:117] & {39{word_en_d1[3]}} |
+				   tmp_decc_out[155:117] & {39{~word_en_d1[3]}});
+	   reg_decc_in[116:78] = (decc_in_d1[116:78] & {39{word_en_d1[2]}} |
+				  tmp_decc_out[116:78] & {39{~word_en_d1[2]}});
+	   reg_decc_in[77:39] = (decc_in_d1[77:39] & {39{word_en_d1[1]}} |
+				 tmp_decc_out[77:39] & {39{~word_en_d1[1]}});
+	   reg_decc_in[38:0] = (decc_in_d1[38:0] & {39{word_en_d1[0]}} |
+				tmp_decc_out[38:0] & {39{~word_en_d1[0]}});
+	   
+	   if (way_sel_d1[0]) way0_decc[set_d1] =  reg_decc_in;
+	   if (way_sel_d1[1]) way1_decc[set_d1] =  reg_decc_in;
+	   
+	   //////////////////////////////////////////////////////////
+	   // the store data gets reflected onto the read output bus
+	   //////////////////////////////////////////////////////////
+
+//           decc_out_tmp[155:0] = reg_decc_in[155:0];
+// Store data is *not* reflected onto the read output bus in the physical implementation
+
+	     decc_out_tmp[155:0] = 156'b0;
+	   
+	end // of write operation
+
+      end
+      
+      else begin
+	// no access
+	 decc_out_tmp[155:0] = 156'b0;
+      end
+      
+   end // of always block
+`endif
+   // Modeling wired-OR
+
+// JC we don't have any flop in this level
+//   assign decc_out[155:0] = decc_out_d1[155:0] | decc_read_in[155:0];   
+
+   assign decc_out[155:0] = (acc_en_d1 & ~wr_en_d1) ? 156'bX : (keep_rd_out) ? 
+			    (decc_out_d1[155:0] | decc_read_in[155:0]) : 
+			    (decc_out_tmp[155:0] | decc_read_in[155:0]);
+   
+/////////////////////////////////////////////////////////////////////
+// Redundancy Registers
+/////////////////////////////////////////////////////////////////////
+
+   reg [8:0] 	s_red_reg0;
+   reg [8:0] 	s_red_reg1;
+   reg [8:0] 	s_red_reg2;
+   reg [8:0] 	s_red_reg3;
+   reg [8:0] 	s_red_reg4;
+   reg [8:0] 	s_red_reg5;
+ 		
+   reg [8:0] 	m_red_reg0;
+   reg [8:0] 	m_red_reg1;
+   reg [8:0] 	m_red_reg2;
+   reg [8:0] 	m_red_reg3;
+   reg [8:0] 	m_red_reg4;
+   reg [8:0] 	m_red_reg5;
+ 		   
+   wire	     l2d_fuse_data_out;
+   
+assign l2d_fuse_data_out = s_red_reg5[8];
+   
+   always @(arst_l or fuse_clk1 or fuse_l2d_rid or fuse_l2d_wren or fuse_l2d_rden 
+            or fuse_l2d_data_in or fuse_read_data_in 
+	    or s_red_reg0 or s_red_reg1 or s_red_reg2 
+	    or s_red_reg3 or s_red_reg4 or s_red_reg5) begin
+
+      if (!arst_l) begin
+         m_red_reg0[8:0] = 9'b0;
+         m_red_reg1[8:0] = 9'b0;
+         m_red_reg2[8:0] = 9'b0;
+         m_red_reg3[8:0] = 9'b0;
+         m_red_reg4[8:0] = 9'b0;
+         m_red_reg5[8:0] = 9'b0;
+      end
+
+      if (arst_l && fuse_clk1) begin
+      
+      /////////////////////////////////
+      // Write operation
+      /////////////////////////////////
+      
+         if (fuse_l2d_wren) begin
+	    case (fuse_l2d_rid) //selecting among the six registers
+	      3'b101: m_red_reg0[8:0] = {s_red_reg0[7:0], fuse_l2d_data_in};// bottom odd row
+	      3'b011: m_red_reg1[8:0] = {s_red_reg1[7:0], fuse_l2d_data_in};// bottom even row
+	      3'b010: m_red_reg2[8:0] = {s_red_reg2[7:0], fuse_l2d_data_in};// bottom column
+	      3'b100: m_red_reg3[8:0] = {s_red_reg3[7:0], fuse_l2d_data_in};// top odd row
+	      3'b001: m_red_reg4[8:0] = {s_red_reg4[7:0], fuse_l2d_data_in};// top even row
+	      3'b000: m_red_reg5[8:0] = {s_red_reg5[7:0], fuse_l2d_data_in};// top column
+	      default: ;
+	    endcase // case(fuse_l2d_rid)
+         end // if (fuse_l2d_wren)
+      
+      /////////////////////////////////
+      // Read operation
+      /////////////////////////////////
+      
+//JC This is just temporary fix for read operation, rid = 3'b111 will turn on everything
+         else if (fuse_l2d_rden) begin
+		      m_red_reg0[8:0] = {s_red_reg0[7:0], fuse_read_data_in};
+		      m_red_reg1[8:0] = {s_red_reg1[7:0], s_red_reg0[8]};
+		      m_red_reg2[8:0] = {s_red_reg2[7:0], s_red_reg1[8]};
+		      m_red_reg3[8:0] = {s_red_reg3[7:0], s_red_reg2[8]};
+		      m_red_reg4[8:0] = {s_red_reg4[7:0], s_red_reg3[8]};
+		      m_red_reg5[8:0] = {s_red_reg5[7:0], s_red_reg4[8]};
+              end // if (fuse_l2d_rden)
+
+      end // if (fuse_clk1)
+
+   end // always @ (fuse_clk1 or...
+
+//   always @(posedge efc_scdata_fuse_clk1) begin
+
+   always @(arst_l or fuse_clk2 or fuse_l2d_rid or fuse_l2d_wren or fuse_l2d_rden 
+	    or m_red_reg0 or m_red_reg1 or m_red_reg2 
+	    or m_red_reg3 or m_red_reg4 or m_red_reg5) begin
+
+`ifdef DEFINE_0IN
+`else
+  `ifdef FPGA_SYN_RED
+  `else
+      if (!arst_l) begin
+         m_red_reg0[8:0] = 9'b0;
+         m_red_reg1[8:0] = 9'b0;
+         m_red_reg2[8:0] = 9'b0;
+         m_red_reg3[8:0] = 9'b0;
+         m_red_reg4[8:0] = 9'b0;
+         m_red_reg5[8:0] = 9'b0;
+      end
+  `endif
+`endif
+
+      if (fuse_clk2) begin
+      
+         if (fuse_l2d_wren) begin
+	    case (fuse_l2d_rid) //selecting among the six registers
+	      3'b101: s_red_reg0[8:0] = m_red_reg0[8:0];// bottom odd row
+	      3'b011: s_red_reg1[8:0] = m_red_reg1[8:0];// bottom even row
+	      3'b010: s_red_reg2[8:0] = m_red_reg2[8:0];// bottom column
+	      3'b100: s_red_reg3[8:0] = m_red_reg3[8:0];// top odd row
+	      3'b001: s_red_reg4[8:0] = m_red_reg4[8:0];// top even row
+	      3'b000: s_red_reg5[8:0] = m_red_reg5[8:0];// top column
+	     default: ;
+	    endcase // case(fuse_l2d_rid)
+         end // if (fuse_l2d_wren)
+         else if (fuse_l2d_rden) begin
+	        s_red_reg0[8:0] = m_red_reg0[8:0];// bottom odd row
+	        s_red_reg1[8:0] = m_red_reg1[8:0];// bottom even row
+	        s_red_reg2[8:0] = m_red_reg2[8:0];// bottom column
+	        s_red_reg3[8:0] = m_red_reg3[8:0];// top odd row
+	        s_red_reg4[8:0] = m_red_reg4[8:0];// top even row
+	      	s_red_reg5[8:0] = m_red_reg5[8:0];// top column
+              end // if (fuse_l2d_rden)
+
+      end // if (fuse_clk2)
+
+   end // always @ (fuse_clk2 or...
+   
+endmodule // bw_r_l2d_32k
+
Index: /trunk/T1-common/srams/bw_r_l2t.v
===================================================================
--- /trunk/T1-common/srams/bw_r_l2t.v	(revision 6)
+++ /trunk/T1-common/srams/bw_r_l2t.v	(revision 6)
@@ -0,0 +1,984 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_r_l2t.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local define
+// The sctag_pcx*** signals need to be appropriately bound in the
+// instantiation made in sctag.v
+////////////////////////////////////////////////////////////////////////
+
+module bw_r_l2t( /*AUTOARG*/
+   // Outputs
+   so, l2t_fuse_repair_value, l2t_fuse_repair_en, way_sel, way_sel_1, 
+   tag_way0, tag_way1, tag_way2, tag_way3, tag_way4, tag_way5, 
+   tag_way6, tag_way7, tag_way8, tag_way9, tag_way10, tag_way11, 
+   // Inputs
+   index, bist_index, rd_en, bist_rd_en, way, bist_way, wr_en, 
+   bist_wr_en, wrdata0, bist_wrdata0, wrdata1, bist_wrdata1, 
+   lkup_tag_d1, rclk, fuse_l2t_wren, fuse_l2t_rid, 
+   fuse_l2t_repair_value, fuse_l2t_repair_en, efc_sctag_fuse_clk1, 
+   rst_tri_en, si, se, arst_l, sehold
+   );
+
+// select xbar
+
+input	[9:0]	index ; // from addrdp
+input	[9:0]	bist_index ; // BIST INPUT
+
+
+input		rd_en ;  // enable from arbctl is speculatively asserted.
+input		bist_rd_en ; // BIST INPUT
+
+input	[11:0]	way; // way for a fill/tag write
+input	[11:0]	bist_way;// BIST INPUT
+
+input	 	wr_en; // on a fill in px2 or a diag/tecc write.
+input		bist_wr_en ; // BIST INPUT
+
+input	[27:0]	wrdata0 ; // wr tag
+input	[7:0]	bist_wrdata0 ; // wr tag
+input	[27:0]	wrdata1 ; // wr tag
+input	[7:0]	bist_wrdata1 ; // wr tag
+
+input	[27:1]	lkup_tag_d1 ; //ecc bits are appended to this tag.
+
+input		rclk;
+
+// input	[3:0]	tag_stm ;  ?? may not be needed.
+
+
+input          fuse_l2t_wren;          //redundancy reg wr enable, qualified
+input [5:0]    fuse_l2t_rid;           //redundancy register id <5:2> == subbank, <1:0> determines row/col red.
+input [6:0]    fuse_l2t_repair_value;  //data in for redundancy register  
+input [1:0]    fuse_l2t_repair_en;     //enable bits to turn on redundancy
+input	       efc_sctag_fuse_clk1;
+
+
+
+
+input		rst_tri_en;
+input 		si, se;
+output 		so;
+input		arst_l;
+input		sehold;
+
+output  [6:0]    l2t_fuse_repair_value;  //data out for redundancy register
+output  [1:0]    l2t_fuse_repair_en;     //enable bits out
+
+output	[11:0]	way_sel; // compare outputs
+output	[11:0]	way_sel_1; // compare outputs
+
+output	[27:0] tag_way0;
+output	[27:0] tag_way1;
+output	[27:0] tag_way2;
+output	[27:0] tag_way3;
+output	[27:0] tag_way4;
+output	[27:0] tag_way5;
+output	[27:0] tag_way6;
+output	[27:0] tag_way7;
+output	[27:0] tag_way8;
+output	[27:0] tag_way9;
+output	[27:0] tag_way10;
+output	[27:0] tag_way11;
+
+reg	[27:0]	wrdata0_d1_l, wrdata1_d1_l ;
+wire	[11:0]	gbl_red_bank_id;
+reg	[6:0]	l2t_fuse_repair_value;
+reg	[1:0]	l2t_fuse_repair_en;
+
+wire	[6:0]	red_reg_q_ab, red_reg_q_89, red_reg_q_67, red_reg_q_45 ;
+wire	[6:0]	red_reg_q_01, red_reg_q_23;
+wire	[1:0]	red_reg_enq_ab, red_reg_enq_89, red_reg_enq_67, red_reg_enq_45 ;
+wire	[1:0]	red_reg_enq_01, red_reg_enq_23;
+wire	[5:0]	wr_en_subbank;
+wire	[27:0]	tag_wrdata0_px2, tag_wrdata1_px2 ;
+
+assign  tag_wrdata0_px2 = ( bist_wr_en ) ?  { bist_wrdata0[3:0],
+                                          {3{bist_wrdata0[7:0]}} } : wrdata0;
+
+assign  tag_wrdata1_px2 = ( bist_wr_en ) ?  { bist_wrdata1[3:0],
+                                          {3{bist_wrdata1[7:0]}} } : wrdata1;
+
+// Inputs that are flopped
+
+always	@(posedge rclk) begin
+	wrdata0_d1_l <= (sehold)? wrdata0_d1_l: ~tag_wrdata0_px2 ;
+	wrdata1_d1_l <= (sehold)? wrdata1_d1_l: ~tag_wrdata1_px2 ;
+
+`ifdef	INNO_MUXEX
+`else
+//----- PURELY FOR VERIFICATION -----------------------
+	if(wr_en) begin
+		case(way)
+			12'b000000000001: ;
+			12'b000000000010: ;
+			12'b000000000100: ;
+			12'b000000001000: ;
+			12'b000000010000: ;
+			12'b000000100000: ;
+			12'b000001000000: ;
+			12'b000010000000: ;
+			12'b000100000000: ;
+			12'b001000000000: ;
+			12'b010000000000: ;
+			12'b100000000000: ;
+			default:
+			`ifdef MODELSIM
+				$display("L2_TAG_ERR"," way select error %h ", way[11:0]);
+			`else
+				$error("L2_TAG_ERR"," way select error %h ", way[11:0]); 
+			`endif	
+		endcase
+	end // of if
+//----- PURELY FOR VERIFICATION -----------------------
+`endif
+end
+
+assign		way_sel_1 = way_sel ;
+
+assign gbl_red_bank_id[0] = ( fuse_l2t_rid[5:2] == 4'd0) ;
+assign gbl_red_bank_id[1] = ( fuse_l2t_rid[5:2] == 4'd1) ;
+assign gbl_red_bank_id[2] = ( fuse_l2t_rid[5:2] == 4'd2) ;
+assign gbl_red_bank_id[3] = ( fuse_l2t_rid[5:2] == 4'd3) ;
+assign gbl_red_bank_id[4] = ( fuse_l2t_rid[5:2] == 4'd4) ;
+assign gbl_red_bank_id[5] = ( fuse_l2t_rid[5:2] == 4'd5) ;
+assign gbl_red_bank_id[6] = ( fuse_l2t_rid[5:2] == 4'd6) ;
+assign gbl_red_bank_id[7] = ( fuse_l2t_rid[5:2] == 4'd7) ;
+assign gbl_red_bank_id[8] = ( fuse_l2t_rid[5:2] == 4'd8) ;
+assign gbl_red_bank_id[9] = ( fuse_l2t_rid[5:2] == 4'd9) ;
+assign gbl_red_bank_id[10] = ( fuse_l2t_rid[5:2] == 4'd10) ;
+assign gbl_red_bank_id[11] = ( fuse_l2t_rid[5:2] == 4'd11) ;
+
+
+//assign	wr_en_subbank[0] = fuse_l2t_wren & ( |(gbl_red_bank_id[1:0]) );
+//assign	wr_en_subbank[1] = fuse_l2t_wren & ( |(gbl_red_bank_id[5:4]) );
+//assign	wr_en_subbank[2] = fuse_l2t_wren & ( |(gbl_red_bank_id[9:8]) );
+//assign	wr_en_subbank[3] = fuse_l2t_wren & ( |(gbl_red_bank_id[3:2]) );
+//assign	wr_en_subbank[4] = fuse_l2t_wren & ( |(gbl_red_bank_id[7:6]) );
+//assign	wr_en_subbank[5] = fuse_l2t_wren & ( |(gbl_red_bank_id[11:10]) );
+
+// JC modified begin
+// Write enable signal goes directly to subbank without any gating circuits.
+assign  wr_en_subbank[0] = fuse_l2t_wren;
+assign  wr_en_subbank[1] = fuse_l2t_wren;
+assign  wr_en_subbank[2] = fuse_l2t_wren;
+assign  wr_en_subbank[3] = fuse_l2t_wren;
+assign  wr_en_subbank[4] = fuse_l2t_wren;
+assign  wr_en_subbank[5] = fuse_l2t_wren;
+// JC modified begin
+
+
+
+always  @(/*AUTOSENSE*/gbl_red_bank_id or red_reg_enq_01
+          or red_reg_enq_23 or red_reg_enq_45 or red_reg_enq_67
+          or red_reg_enq_89 or red_reg_enq_ab or red_reg_q_01
+          or red_reg_q_23 or red_reg_q_45 or red_reg_q_67
+          or red_reg_q_89 or red_reg_q_ab)begin
+
+  	case(gbl_red_bank_id)
+
+	12'b000000000001: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_01[1:0], red_reg_q_01[6:0] } 	 ;
+	end
+	12'b000000000010: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_01[1:0], red_reg_q_01[6:0] } 	 ;
+	end
+	12'b000000000100: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_23[1:0], red_reg_q_23[6:0] } 	 ;
+	end
+	12'b000000001000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_23[1:0], red_reg_q_23[6:0] } 	 ;
+	end
+	12'b000000010000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_45[1:0], red_reg_q_45[6:0] } 	 ;
+	end
+	12'b000000100000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_45[1:0], red_reg_q_45[6:0] } 	 ;
+	end
+	12'b000001000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_67[1:0], red_reg_q_67[6:0] } 	 ;
+	end
+	12'b000010000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_67[1:0], red_reg_q_67[6:0] } 	 ;
+	end
+	12'b000100000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_89[1:0], red_reg_q_89[6:0] } 	 ;
+	end
+	12'b001000000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_89[1:0], red_reg_q_89[6:0] } 	 ;
+	end
+	12'b010000000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_ab[1:0], red_reg_q_ab[6:0] } 	 ;
+	end
+	12'b100000000000: begin
+		{ l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} =
+		{ red_reg_enq_ab[1:0], red_reg_q_ab[6:0] } 	 ;
+	end
+
+        default: begin
+// JC added begin
+// remove implicit latch.
+                { l2t_fuse_repair_en[1:0], l2t_fuse_repair_value[6:0]} = 9'b0;
+// JCadded end
+
+		 end
+
+	endcase
+
+end
+
+/* bw_r_l2t_subbank 	AUTO_TEMPLATE	 (
+                            // Outputs
+                            .wayselect0 (way_sel[0]),
+                            .wayselect1 (way_sel[1]),
+                            .tag_way0   (tag_way0[27:0]),
+                            .tag_way1   (tag_way1[27:0]),
+                            .red_reg_q_array2(red_reg_q_01[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_01[1:0]),
+                            // Inputs
+                            .way        (way[1:0]),
+                            .bist_way   (bist_way[1:0]),
+                            .wd_b_l     (wrdata0_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[0]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[1]),
+                            .gbl_red_wr_en(wr_en_subbank[0]));
+
+*/
+
+ bw_r_l2t_subbank 	subbank01(/*AUTOINST*/
+                              // Outputs
+                              .sout     (),                      // Templated
+                              .wayselect0(way_sel[0]),           // Templated
+                              .wayselect1(way_sel[1]),           // Templated
+                              .tag_way0 (tag_way0[27:0]),        // Templated
+                              .tag_way1 (tag_way1[27:0]),        // Templated
+                              .red_reg_q_array2(red_reg_q_01[6:0]), // Templated
+                              .red_reg_enq_array2(red_reg_enq_01[1:0]), // Templated
+                              // Inputs
+                              .index    (index[9:0]),
+                              .bist_index(bist_index[9:0]),
+                              .wr_en    (wr_en),
+                              .bist_wr_en(bist_wr_en),
+                              .rd_en    (rd_en),
+                              .bist_rd_en(bist_rd_en),
+                              .way      (way[1:0]),              // Templated
+                              .bist_way (bist_way[1:0]),         // Templated
+                              .wd_b_l   (wrdata0_d1_l[27:0]),    // Templated
+                              .lkuptag  (lkup_tag_d1[27:1]),     // Templated
+                              .rclk     (rclk),                  // Templated
+                              .sehold   (sehold),                // Templated
+                              .se       (se),                    // Templated
+                              .sin      (),                      // Templated
+                              .rst_tri_en(rst_tri_en),           // Templated
+                              .arst_l   (arst_l),                // Templated
+                              .gbl_red_rid(fuse_l2t_rid[1:0]),   // Templated
+                              .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                              .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                              .fclk1    (efc_sctag_fuse_clk1),   // Templated
+                              .gbl_red_bank_id_top(gbl_red_bank_id[0]), // Templated
+                              .gbl_red_bank_id_bottom(gbl_red_bank_id[1]), // Templated
+                              .gbl_red_wr_en(wr_en_subbank[0]));  // Templated
+
+/* bw_r_l2t_subbank     AUTO_TEMPLATE    (
+                            // Outputs
+                            .wayselect0 (way_sel[4]),
+                            .wayselect1 (way_sel[5]),
+                            .tag_way0   (tag_way4[27:0]),
+                            .tag_way1   (tag_way5[27:0]),
+                            .red_reg_q_array2(red_reg_q_45[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_45[1:0]),
+                            // Inputs
+                            .way        (way[5:4]),
+                            .bist_way   (bist_way[5:4]),
+                            .wd_b_l     (wrdata0_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[4]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[5]),
+                            .gbl_red_wr_en(wr_en_subbank[1]));
+
+*/
+
+ bw_r_l2t_subbank 	subbank45(/*AUTOINST*/
+                              // Outputs
+                              .sout     (),                      // Templated
+                              .wayselect0(way_sel[4]),           // Templated
+                              .wayselect1(way_sel[5]),           // Templated
+                              .tag_way0 (tag_way4[27:0]),        // Templated
+                              .tag_way1 (tag_way5[27:0]),        // Templated
+                              .red_reg_q_array2(red_reg_q_45[6:0]), // Templated
+                              .red_reg_enq_array2(red_reg_enq_45[1:0]), // Templated
+                              // Inputs
+                              .index    (index[9:0]),
+                              .bist_index(bist_index[9:0]),
+                              .wr_en    (wr_en),
+                              .bist_wr_en(bist_wr_en),
+                              .rd_en    (rd_en),
+                              .bist_rd_en(bist_rd_en),
+                              .way      (way[5:4]),              // Templated
+                              .bist_way (bist_way[5:4]),         // Templated
+                              .wd_b_l   (wrdata0_d1_l[27:0]),    // Templated
+                              .lkuptag  (lkup_tag_d1[27:1]),     // Templated
+                              .rclk     (rclk),                  // Templated
+                              .sehold   (sehold),                // Templated
+                              .se       (se),                    // Templated
+                              .sin      (),                      // Templated
+                              .rst_tri_en(rst_tri_en),           // Templated
+                              .arst_l   (arst_l),                // Templated
+                              .gbl_red_rid(fuse_l2t_rid[1:0]),   // Templated
+                              .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                              .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                              .fclk1    (efc_sctag_fuse_clk1),   // Templated
+                              .gbl_red_bank_id_top(gbl_red_bank_id[4]), // Templated
+                              .gbl_red_bank_id_bottom(gbl_red_bank_id[5]), // Templated
+                              .gbl_red_wr_en(wr_en_subbank[1]));  // Templated
+
+/* bw_r_l2t_subbank     AUTO_TEMPLATE    (
+                            // Outputs
+                            .wayselect0 (way_sel[8]),
+                            .wayselect1 (way_sel[9]),
+                            .tag_way0   (tag_way8[27:0]),
+                            .tag_way1   (tag_way9[27:0]),
+                            .red_reg_q_array2(red_reg_q_89[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_89[1:0]),
+                            // Inputs
+                            .way        (way[9:8]),
+                            .bist_way   (bist_way[9:8]),
+                            .wd_b_l     (wrdata0_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[8]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[9]),
+                            .gbl_red_wr_en(wr_en_subbank[2]));
+
+*/
+
+
+ bw_r_l2t_subbank 	subbank89(/*AUTOINST*/
+                              // Outputs
+                              .sout     (),                      // Templated
+                              .wayselect0(way_sel[8]),           // Templated
+                              .wayselect1(way_sel[9]),           // Templated
+                              .tag_way0 (tag_way8[27:0]),        // Templated
+                              .tag_way1 (tag_way9[27:0]),        // Templated
+                              .red_reg_q_array2(red_reg_q_89[6:0]), // Templated
+                              .red_reg_enq_array2(red_reg_enq_89[1:0]), // Templated
+                              // Inputs
+                              .index    (index[9:0]),
+                              .bist_index(bist_index[9:0]),
+                              .wr_en    (wr_en),
+                              .bist_wr_en(bist_wr_en),
+                              .rd_en    (rd_en),
+                              .bist_rd_en(bist_rd_en),
+                              .way      (way[9:8]),              // Templated
+                              .bist_way (bist_way[9:8]),         // Templated
+                              .wd_b_l   (wrdata0_d1_l[27:0]),    // Templated
+                              .lkuptag  (lkup_tag_d1[27:1]),     // Templated
+                              .rclk     (rclk),                  // Templated
+                              .sehold   (sehold),                // Templated
+                              .se       (se),                    // Templated
+                              .sin      (),                      // Templated
+                              .rst_tri_en(rst_tri_en),           // Templated
+                              .arst_l   (arst_l),                // Templated
+                              .gbl_red_rid(fuse_l2t_rid[1:0]),   // Templated
+                              .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                              .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                              .fclk1    (efc_sctag_fuse_clk1),   // Templated
+                              .gbl_red_bank_id_top(gbl_red_bank_id[8]), // Templated
+                              .gbl_red_bank_id_bottom(gbl_red_bank_id[9]), // Templated
+                              .gbl_red_wr_en(wr_en_subbank[2]));  // Templated
+
+/* bw_r_l2t_subbank     AUTO_TEMPLATE    (
+                            // Outputs
+                            .wayselect0 (way_sel[2]),
+                            .wayselect1 (way_sel[3]),
+                            .tag_way0   (tag_way2[27:0]),
+                            .tag_way1   (tag_way3[27:0]),
+                            .red_reg_q_array2(red_reg_q_23[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_23[1:0]),
+                            // Inputs
+                            .way        (way[3:2]),
+                            .bist_way   (bist_way[3:2]),
+                            .wd_b_l     (wrdata1_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[2]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[3]),
+                            .gbl_red_wr_en(wr_en_subbank[3]));
+
+*/
+
+ bw_r_l2t_subbank 	subbank23(/*AUTOINST*/
+                              // Outputs
+                              .sout     (),                      // Templated
+                              .wayselect0(way_sel[2]),           // Templated
+                              .wayselect1(way_sel[3]),           // Templated
+                              .tag_way0 (tag_way2[27:0]),        // Templated
+                              .tag_way1 (tag_way3[27:0]),        // Templated
+                              .red_reg_q_array2(red_reg_q_23[6:0]), // Templated
+                              .red_reg_enq_array2(red_reg_enq_23[1:0]), // Templated
+                              // Inputs
+                              .index    (index[9:0]),
+                              .bist_index(bist_index[9:0]),
+                              .wr_en    (wr_en),
+                              .bist_wr_en(bist_wr_en),
+                              .rd_en    (rd_en),
+                              .bist_rd_en(bist_rd_en),
+                              .way      (way[3:2]),              // Templated
+                              .bist_way (bist_way[3:2]),         // Templated
+                              .wd_b_l   (wrdata1_d1_l[27:0]),    // Templated
+                              .lkuptag  (lkup_tag_d1[27:1]),     // Templated
+                              .rclk     (rclk),                  // Templated
+                              .sehold   (sehold),                // Templated
+                              .se       (se),                    // Templated
+                              .sin      (),                      // Templated
+                              .rst_tri_en(rst_tri_en),           // Templated
+                              .arst_l   (arst_l),                // Templated
+                              .gbl_red_rid(fuse_l2t_rid[1:0]),   // Templated
+                              .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                              .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                              .fclk1    (efc_sctag_fuse_clk1),   // Templated
+                              .gbl_red_bank_id_top(gbl_red_bank_id[2]), // Templated
+                              .gbl_red_bank_id_bottom(gbl_red_bank_id[3]), // Templated
+                              .gbl_red_wr_en(wr_en_subbank[3]));  // Templated
+
+/* bw_r_l2t_subbank     AUTO_TEMPLATE    (
+                            // Outputs
+                            .wayselect0 (way_sel[6]),
+                            .wayselect1 (way_sel[7]),
+                            .tag_way0   (tag_way6[27:0]),
+                            .tag_way1   (tag_way7[27:0]),
+                            .red_reg_q_array2(red_reg_q_67[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_67[1:0]),
+                            // Inputs
+                            .way        (way[7:6]),
+                            .bist_way   (bist_way[7:6]),
+                            .wd_b_l     (wrdata1_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[6]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[7]),
+                            .gbl_red_wr_en(wr_en_subbank[4]));
+
+*/
+
+ bw_r_l2t_subbank       subbank67(/*AUTOINST*/
+                                  // Outputs
+                                  .sout (),                      // Templated
+                                  .wayselect0(way_sel[6]),       // Templated
+                                  .wayselect1(way_sel[7]),       // Templated
+                                  .tag_way0(tag_way6[27:0]),     // Templated
+                                  .tag_way1(tag_way7[27:0]),     // Templated
+                                  .red_reg_q_array2(red_reg_q_67[6:0]), // Templated
+                                  .red_reg_enq_array2(red_reg_enq_67[1:0]), // Templated
+                                  // Inputs
+                                  .index(index[9:0]),
+                                  .bist_index(bist_index[9:0]),
+                                  .wr_en(wr_en),
+                                  .bist_wr_en(bist_wr_en),
+                                  .rd_en(rd_en),
+                                  .bist_rd_en(bist_rd_en),
+                                  .way  (way[7:6]),              // Templated
+                                  .bist_way(bist_way[7:6]),      // Templated
+                                  .wd_b_l(wrdata1_d1_l[27:0]),   // Templated
+                                  .lkuptag(lkup_tag_d1[27:1]),   // Templated
+                                  .rclk (rclk),                  // Templated
+                                  .sehold(sehold),               // Templated
+                                  .se   (se),                    // Templated
+                                  .sin  (),                      // Templated
+                                  .rst_tri_en(rst_tri_en),       // Templated
+                                  .arst_l(arst_l),               // Templated
+                                  .gbl_red_rid(fuse_l2t_rid[1:0]), // Templated
+                                  .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                                  .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                                  .fclk1(efc_sctag_fuse_clk1),   // Templated
+                                  .gbl_red_bank_id_top(gbl_red_bank_id[6]), // Templated
+                                  .gbl_red_bank_id_bottom(gbl_red_bank_id[7]), // Templated
+                                  .gbl_red_wr_en(wr_en_subbank[4])); // Templated
+
+/* bw_r_l2t_subbank     AUTO_TEMPLATE    (
+                            // Outputs
+                            .wayselect0 (way_sel[10]),
+                            .wayselect1 (way_sel[11]),
+                            .tag_way0   (tag_way10[27:0]),
+                            .tag_way1   (tag_way11[27:0]),
+                            .red_reg_q_array2(red_reg_q_ab[6:0]),
+                            .red_reg_enq_array2(red_reg_enq_ab[1:0]),
+                            // Inputs
+                            .way        (way[11:10]),
+                            .bist_way   (bist_way[11:10]),
+                            .wd_b_l     (wrdata1_d1_l[27:0]),
+                            .lkuptag    (lkup_tag_d1[27:1]),
+                            .rclk       (rclk),
+                            .sehold     (sehold),
+                            .se         (se),
+                            .sin        (),
+                            .sout        (),
+                            .rst_tri_en (rst_tri_en),
+                            .arst_l     (arst_l),
+                            .gbl_red_rid(fuse_l2t_rid[1:0]),
+                            .gbl_red_reg_en(fuse_l2t_repair_en[1:0]),
+                            .gbl_red_reg_d(fuse_l2t_repair_value[6:0]),
+                            .fclk1      (efc_sctag_fuse_clk1),
+                            .gbl_red_bank_id_top(gbl_red_bank_id[10]),
+                            .gbl_red_bank_id_bottom(gbl_red_bank_id[11]),
+                            .gbl_red_wr_en(wr_en_subbank[5]));
+
+*/
+
+ bw_r_l2t_subbank       subbankab(/*AUTOINST*/
+                                  // Outputs
+                                  .sout (),                      // Templated
+                                  .wayselect0(way_sel[10]),      // Templated
+                                  .wayselect1(way_sel[11]),      // Templated
+                                  .tag_way0(tag_way10[27:0]),    // Templated
+                                  .tag_way1(tag_way11[27:0]),    // Templated
+                                  .red_reg_q_array2(red_reg_q_ab[6:0]), // Templated
+                                  .red_reg_enq_array2(red_reg_enq_ab[1:0]), // Templated
+                                  // Inputs
+                                  .index(index[9:0]),
+                                  .bist_index(bist_index[9:0]),
+                                  .wr_en(wr_en),
+                                  .bist_wr_en(bist_wr_en),
+                                  .rd_en(rd_en),
+                                  .bist_rd_en(bist_rd_en),
+                                  .way  (way[11:10]),            // Templated
+                                  .bist_way(bist_way[11:10]),    // Templated
+                                  .wd_b_l(wrdata1_d1_l[27:0]),   // Templated
+                                  .lkuptag(lkup_tag_d1[27:1]),   // Templated
+                                  .rclk (rclk),                  // Templated
+                                  .sehold(sehold),               // Templated
+                                  .se   (se),                    // Templated
+                                  .sin  (),                      // Templated
+                                  .rst_tri_en(rst_tri_en),       // Templated
+                                  .arst_l(arst_l),               // Templated
+                                  .gbl_red_rid(fuse_l2t_rid[1:0]), // Templated
+                                  .gbl_red_reg_en(fuse_l2t_repair_en[1:0]), // Templated
+                                  .gbl_red_reg_d(fuse_l2t_repair_value[6:0]), // Templated
+                                  .fclk1(efc_sctag_fuse_clk1),   // Templated
+                                  .gbl_red_bank_id_top(gbl_red_bank_id[10]), // Templated
+                                  .gbl_red_bank_id_bottom(gbl_red_bank_id[11]), // Templated
+                                  .gbl_red_wr_en(wr_en_subbank[5])); // Templated
+
+
+endmodule
+
+
+
+
+
+module	bw_r_l2t_subbank(/*AUTOARG*/
+   // Outputs
+   sout, wayselect0, wayselect1, tag_way0, tag_way1, 
+   red_reg_q_array2, red_reg_enq_array2, 
+   // Inputs
+   index, bist_index, wr_en, bist_wr_en, rd_en, bist_rd_en, way, 
+   bist_way, wd_b_l, lkuptag, rclk, sehold, se, sin, rst_tri_en, 
+   arst_l, gbl_red_rid, gbl_red_reg_en, gbl_red_reg_d, fclk1, 
+   gbl_red_bank_id_top, gbl_red_bank_id_bottom, gbl_red_wr_en
+   );
+
+// !!! Changed gbl_red_wren to gbl_red_wr_en as it is in schematic !!!
+
+//////////////
+// INPUTS
+//////////////
+
+input	[9:0]	index;
+input	[9:0]	bist_index;
+input		wr_en;
+input		bist_wr_en;
+input		rd_en;
+input		bist_rd_en;
+input	[1:0]	way;
+input	[1:0]	bist_way;
+
+input	[27:0]	wd_b_l ; //inverted data. not flopped here
+input	[27:1]	lkuptag; //not flopped here
+
+input		rclk;
+input		sehold;
+input		se;
+input		sin;
+input		rst_tri_en;
+
+// not coded in the spec
+// arst function
+
+input		arst_l;  // redundancy registers.
+
+input	[1:0]	gbl_red_rid;
+
+input	[1:0]	gbl_red_reg_en;
+input	[6:0]	gbl_red_reg_d;
+
+input		fclk1;
+input		gbl_red_bank_id_top;
+input		gbl_red_bank_id_bottom;
+
+input		gbl_red_wr_en ;
+
+// !!! Changed gbl_red_wren to gbl_red_wr_en as it is in schematic !!!
+
+
+
+//////////////
+// OUTPUTS
+//////////////
+
+output 		sout;
+output		wayselect0;
+output		wayselect1;
+
+output	[27:0]	tag_way0 ;
+output	[27:0]	tag_way1 ;
+
+
+output	[6:0]	red_reg_q_array2;
+output	[1:0]	red_reg_enq_array2;
+
+// !!! Taken out ssclk !!!
+
+// !!! Registering all tag outputs including wayselect as it is how implemented in design !!!
+wire		temp_wayselect0; //Registering wayselect signal 
+wire		temp_wayselect1; //Registering wayselect signal
+
+reg		wayselect0; // Registering wayselect signal
+reg		wayselect1; // Registering wayselect signal
+
+reg	[27:0]	temp_tag_way0 ; // Registering tag read out data 
+reg	[27:0]	temp_tag_way1 ; // Registering tag read out data
+// !!! Registering all tag outputs including wayselect as it is how implemented in design !!!
+
+reg	[9:0]	index_d1; 
+reg	[1:0]	way_d1;
+reg		wren_d1, rden_d1 ;
+reg             [27:0]  way0[1023:0] ;
+reg             [27:0]  way1[1023:0] ;
+reg	[27:0]	tag_way0, tag_way1 ;
+
+// JC modified begin
+// the size of row redundant register is 1 bit smaller than
+// the size of column one.
+reg    [7:0]   rid_subbank0_reg0 ;
+reg    [7:0]   rid_subbank0_reg1 ;
+// JC modified end
+reg    [8:0]   rid_subbank0_reg2 ;
+reg    [8:0]   rid_subbank0_reg3 ;
+
+// JC modified begin
+reg    [7:0]   rid_subbank1_reg0 ;
+reg    [7:0]   rid_subbank1_reg1 ;
+// JC modified end
+
+reg    [8:0]   rid_subbank1_reg2 ;
+reg    [8:0]   rid_subbank1_reg3 ;
+
+reg [1:0]	red_reg_enq_array2;
+reg [6:0]	red_reg_q_array2;
+wire	[3:0]	red_reg;
+
+
+////////////////////////////
+// REDUNDANCY LOGIC
+////////////////////////////
+assign	red_reg = { gbl_red_bank_id_top, gbl_red_bank_id_bottom, gbl_red_rid[1:0] };
+
+// JC modified begin
+// The following modification include
+// 1. the size of row redundant register changes.
+// 2. the redundant output does not gate with clock
+
+
+
+always	@(posedge fclk1 or arst_l ) begin
+
+	if(!arst_l) begin
+		rid_subbank0_reg0 = 8'b0 ;
+		rid_subbank0_reg1 = 8'b0 ;
+		rid_subbank0_reg2 = 9'b0 ;
+		rid_subbank0_reg3 = 9'b0 ;
+		rid_subbank1_reg0 = 8'b0 ;
+		rid_subbank1_reg1 = 8'b0 ;
+		rid_subbank1_reg2 = 9'b0 ;
+		rid_subbank1_reg3 = 9'b0 ;
+	end
+	
+	 else if(gbl_red_wr_en) begin
+                case(red_reg)
+
+                4'b1000:        rid_subbank0_reg0 = {gbl_red_reg_d[5:0], gbl_red_reg_en[1:0]};
+
+                4'b1001:        rid_subbank0_reg1 = {gbl_red_reg_d[5:0], gbl_red_reg_en[1:0]};
+
+                4'b1010:        rid_subbank0_reg2 = {gbl_red_reg_d[6:0], gbl_red_reg_en[1:0]};
+
+                4'b1011:        rid_subbank0_reg3 = {gbl_red_reg_d[6:0], gbl_red_reg_en[1:0]};
+
+                4'b0100:        rid_subbank1_reg0 = {gbl_red_reg_d[5:0], gbl_red_reg_en[1:0]};
+
+                4'b0101:        rid_subbank1_reg1 = {gbl_red_reg_d[5:0], gbl_red_reg_en[1:0]};
+
+                4'b0110:        rid_subbank1_reg2 = {gbl_red_reg_d[6:0], gbl_red_reg_en[1:0]};
+
+                4'b0111:        rid_subbank1_reg3 = {gbl_red_reg_d[6:0], gbl_red_reg_en[1:0]};
+
+                default: ; // Do nothing
+
+                endcase
+        end // of else if
+
+end // of always
+
+always  @( red_reg or rid_subbank0_reg0 or rid_subbank0_reg1 or rid_subbank0_reg2 or rid_subbank0_reg3 or
+           rid_subbank1_reg0 or rid_subbank1_reg1 or rid_subbank1_reg2 or rid_subbank1_reg3) begin
+
+                case(red_reg)
+
+                4'b1000:
+                { red_reg_q_array2, red_reg_enq_array2 }  = {1'b0,rid_subbank0_reg0};
+
+                4'b1001:
+                { red_reg_q_array2, red_reg_enq_array2 }  = {1'b0,rid_subbank0_reg1};
+
+                4'b1010:
+                { red_reg_q_array2, red_reg_enq_array2 }  = rid_subbank0_reg2;
+
+                4'b1011:
+                { red_reg_q_array2, red_reg_enq_array2 }  = rid_subbank0_reg3;
+
+                4'b0100:
+                { red_reg_q_array2, red_reg_enq_array2 }  = {1'b0,rid_subbank1_reg0};
+
+                4'b0101:
+                { red_reg_q_array2, red_reg_enq_array2 }  = {1'b0,rid_subbank1_reg1};
+
+                4'b0110:
+                { red_reg_q_array2, red_reg_enq_array2 }  = rid_subbank1_reg2;
+
+                4'b0111:
+                { red_reg_q_array2, red_reg_enq_array2 }  = rid_subbank1_reg3;
+
+                default:
+                { red_reg_q_array2, red_reg_enq_array2 }  = 9'b0;
+
+                endcase
+end
+
+
+always	@(posedge rclk) begin
+
+	index_d1 <= 	( sehold) ? index_d1 :
+		( bist_wr_en | bist_rd_en ) ? bist_index : index ;
+	way_d1	<= 	(sehold)? way_d1 :
+		( bist_wr_en | bist_rd_en ) ? bist_way : way ;
+	wren_d1 <= 	( sehold)? wren_d1 :
+		( bist_wr_en | wr_en ) ;
+	rden_d1 <= 	( sehold)? rden_d1 :  
+		( bist_rd_en | rd_en );
+
+end
+
+// !!! Flopping output signals !!!
+always	@(posedge rclk) begin
+        wayselect0 <= temp_wayselect0;
+        wayselect1 <= temp_wayselect1;
+	tag_way0 <= temp_tag_way0;
+	tag_way1 <= temp_tag_way1;
+end
+// !!! Flopping output signals !!!
+
+////////////////////////////////
+// COMPARE OPERATION 
+////////////////////////////////
+
+// !!! Also, we are gating wayselect with rd_en so, in other cycles (write or no op)
+// all wayselect signals are miss. !!!
+ 
+assign	temp_wayselect0 = (rden_d1) ? ( lkuptag == temp_tag_way0[27:1] ) : 0 ;
+assign	temp_wayselect1 = (rden_d1) ? ( lkuptag == temp_tag_way1[27:1] ) : 0 ;
+
+////////////////////////////////
+// READ OPERATION
+////////////////////////////////
+always	@( /*AUTOSENSE*/ /*memory or*/ index_d1 or rden_d1
+          or rst_tri_en or wren_d1) begin
+
+`ifdef	INNO_MUXEX
+`else
+	if(wren_d1==1'bx) begin
+		`ifdef MODELSIM
+		       $display("L2_TAG_ERR"," wr en error %b ", wren_d1);
+		`else	   
+               $error("L2_TAG_ERR"," wr en error %b ", wren_d1);
+		`endif	   
+        end
+`endif
+
+
+        if( rden_d1)  begin
+
+`ifdef  INNO_MUXEX
+`else
+//----- PURELY FOR VERIFICATION -----------------------
+         if(index_d1==10'bx) begin
+		`ifdef MODELSIM
+                $display("L2_TAG_ERR"," index error %h ", index_d1[9:0]);
+		`else		
+                $error("L2_TAG_ERR"," index error %h ", index_d1[9:0]);
+		`endif		
+         end
+//----- PURELY FOR VERIFICATION -----------------------
+`endif
+	 if( wren_d1 ) 	begin
+                temp_tag_way0 = 28'bx ;
+                temp_tag_way1 = 28'bx ;
+	 end
+	 else	begin
+                temp_tag_way0 = way0[index_d1] ;
+                temp_tag_way1 = way1[index_d1] ;
+         end
+
+        end // of if rden_d1
+
+	else  begin
+// !!! When Tag is in write or no-op cycles, all output will be "0" since SAs are precharged !!!
+
+                temp_tag_way0 = 0;
+                temp_tag_way1 = 0;
+
+        end
+
+end
+
+////////////////////////////////
+// WRITE OPERATION 
+////////////////////////////////
+always	@(negedge rclk ) begin
+        if( wren_d1 & ~rst_tri_en) begin
+
+`ifdef	INNO_MUXEX
+`else
+//----- PURELY FOR VERIFICATION -----------------------
+          if(index_d1==10'bx) begin
+		`ifdef MODELSIM  
+                $display("L2_TAG_ERR"," index error %h ", index_d1[9:0]);
+		`else
+                $error("L2_TAG_ERR"," index error %h ", index_d1[9:0]);
+		`endif		
+          end
+//----- PURELY FOR VERIFICATION -----------------------
+`endif
+	       
+// !!! When Tag is in write or no-op cycles, all output will be "0" since SAs are precharged !!!
+
+                temp_tag_way0 = 0;
+                temp_tag_way1 = 0;
+
+                case(way_d1)
+                2'b01 : begin
+                                way0[index_d1] = ~wd_b_l;
+                          end
+                2'b10 : begin
+                                way1[index_d1] = ~wd_b_l;
+			  end
+
+		default: ; 
+                endcase
+   	 end
+end
+
+
+
+
+
+
+
+
+
+
+
+
+endmodule
+
+
Index: /trunk/T1-common/u1/u1.V
===================================================================
--- /trunk/T1-common/u1/u1.V	(revision 6)
+++ /trunk/T1-common/u1/u1.V	(revision 6)
@@ -0,0 +1,4332 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: u1.behV
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+//
+// basic gates {
+//
+////////////////////////////////////////////////////////////////////////
+
+
+//bw_u1_inv_0p6x
+//
+//
+
+module bw_u1_inv_0p6x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_1x
+//
+//
+
+module bw_u1_inv_1x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_1p4x
+//
+//
+
+module bw_u1_inv_1p4x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_2x
+//
+//
+
+module bw_u1_inv_2x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_3x
+//
+//
+
+module bw_u1_inv_3x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_4x
+//
+//
+
+module bw_u1_inv_4x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+
+//bw_u1_inv_5x
+//
+//
+
+module bw_u1_inv_5x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_8x
+//
+//
+
+module bw_u1_inv_8x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_10x
+//
+//
+
+module bw_u1_inv_10x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_15x
+//
+//
+
+module bw_u1_inv_15x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_20x
+//
+//
+
+module bw_u1_inv_20x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_30x
+//
+//
+
+module bw_u1_inv_30x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_inv_40x
+//
+//
+
+module bw_u1_inv_40x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+//bw_u1_invh_15x
+//
+//
+
+module bw_u1_invh_15x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+//bw_u1_invh_25x
+//
+//
+
+module bw_u1_invh_25x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_invh_30x
+//
+//
+
+module bw_u1_invh_30x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_invh_50x
+//
+//
+
+module bw_u1_invh_50x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+//bw_u1_invh_60x
+//
+//
+
+module bw_u1_invh_60x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ~( a );
+
+endmodule
+
+
+
+
+//bw_u1_nand2_0p4x
+//
+//
+module bw_u1_nand2_0p4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_0p6x
+//
+//
+module bw_u1_nand2_0p6x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_1x
+//
+//
+module bw_u1_nand2_1x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_1p4x
+//
+//
+module bw_u1_nand2_1p4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_2x
+//
+//
+module bw_u1_nand2_2x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_3x
+//
+//
+module bw_u1_nand2_3x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_4x
+//
+//
+module bw_u1_nand2_4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_5x
+//
+//
+module bw_u1_nand2_5x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_7x
+//
+//
+module bw_u1_nand2_7x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_10x
+//
+//
+module bw_u1_nand2_10x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand2_15x
+//
+//
+module bw_u1_nand2_15x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a & b );
+
+endmodule
+
+
+//bw_u1_nand3_0p4x
+//
+//
+module bw_u1_nand3_0p4x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+
+
+//bw_u1_nand3_0p6x
+//
+//
+module bw_u1_nand3_0p6x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+
+//bw_u1_nand3_1x
+
+//
+//
+module bw_u1_nand3_1x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_1p4x
+
+//
+//
+module bw_u1_nand3_1p4x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_2x
+
+//
+//
+module bw_u1_nand3_2x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_3x
+
+//
+//
+module bw_u1_nand3_3x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_4x
+
+//
+//
+module bw_u1_nand3_4x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_5x
+
+//
+//
+module bw_u1_nand3_5x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_7x
+
+//
+//
+module bw_u1_nand3_7x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand3_10x
+
+//
+//
+module bw_u1_nand3_10x (
+    z,
+    a,  
+    b,  
+    c );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    
+    assign z = ~( a & b & c );
+
+endmodule
+
+
+//bw_u1_nand4_0p6x
+
+//
+//
+module bw_u1_nand4_0p6x (
+    z,
+    a,  
+    b,  
+    c,  
+    d );
+    
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+    
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_1x
+//
+//
+module bw_u1_nand4_1x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_1p4x
+//
+//
+module bw_u1_nand4_1p4x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_2x
+//
+//
+module bw_u1_nand4_2x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_3x
+//
+//
+module bw_u1_nand4_3x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_4x
+//
+//
+module bw_u1_nand4_4x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+    assign z = ~( a & b & c & d );
+
+endmodule
+
+
+//bw_u1_nand4_6x
+//
+//
+
+module bw_u1_nand4_6x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+
+    nand( z, a, b,c,d);
+
+endmodule
+
+//bw_u1_nand4_8x
+//
+//
+
+module bw_u1_nand4_8x (
+    z,
+    a,
+    b,
+    c,
+    d );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+    input  d;
+
+
+    nand( z, a, b,c,d);
+
+endmodule
+
+//bw_u1_nor2_0p6x
+//
+//
+
+module bw_u1_nor2_0p6x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_1x
+//
+//
+
+module bw_u1_nor2_1x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_1p4x
+//
+//
+
+module bw_u1_nor2_1p4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_2x
+//
+//
+
+module bw_u1_nor2_2x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_3x
+//
+//
+
+module bw_u1_nor2_3x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_4x
+//
+//
+
+module bw_u1_nor2_4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_6x
+//
+//
+
+module bw_u1_nor2_6x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_8x
+//
+//
+
+module bw_u1_nor2_8x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+//bw_u1_nor2_12x
+//
+//
+
+module bw_u1_nor2_12x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a | b );
+
+endmodule
+
+
+
+
+//bw_u1_nor3_0p6x
+//
+//
+
+module bw_u1_nor3_0p6x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_1x
+//
+//
+
+module bw_u1_nor3_1x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_1p4x
+//
+//
+
+module bw_u1_nor3_1p4x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_2x
+//
+//
+
+module bw_u1_nor3_2x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_3x
+//
+//
+
+module bw_u1_nor3_3x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_4x
+//
+//
+
+module bw_u1_nor3_4x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_6x
+//
+//
+
+module bw_u1_nor3_6x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_nor3_8x
+//
+//
+
+module bw_u1_nor3_8x (
+    z,
+    a,
+    b,
+    c );
+
+    output z;
+    input  a;
+    input  b;
+    input  c;
+
+    assign z = ~( a | b | c );
+
+endmodule
+
+
+//bw_u1_aoi21_0p4x
+//
+// 
+module bw_u1_aoi21_0p4x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+//bw_u1_aoi21_1x
+//
+// 
+module bw_u1_aoi21_1x (
+
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a  ));
+
+endmodule
+//bw_u1_aoi21_2x
+//
+// 
+module bw_u1_aoi21_2x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+//bw_u1_aoi21_4x
+//
+// 
+module bw_u1_aoi21_4x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+//bw_u1_aoi21_8x
+//
+// 
+module bw_u1_aoi21_8x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+//bw_u1_aoi21_12x
+//
+// 
+module bw_u1_aoi21_12x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+//bw_u1_aoi22_0p4x
+//
+// 
+module bw_u1_aoi22_0p4x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 & a2 ) | ( b1 & b2 ));
+
+endmodule
+//bw_u1_aoi22_1x
+//
+// 
+module bw_u1_aoi22_1x (
+    z,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+
+    assign z = ~(( a1 & a2 ) | ( b1 & b2 ));
+
+endmodule
+//bw_u1_aoi22_2x
+//
+// 
+module bw_u1_aoi22_2x (
+
+
+    z,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+ 
+    assign z = ~(( a1 & a2 ) | ( b1 & b2 ));
+
+endmodule
+//bw_u1_aoi22_4x
+//
+// 
+module bw_u1_aoi22_4x (
+
+    z,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( a1 & a2 ) | ( b1 & b2 ));
+
+endmodule
+//bw_u1_aoi22_8x
+//
+// 
+module bw_u1_aoi22_8x (
+
+    z,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( a1 & a2 ) | ( b1 & b2 ));
+
+endmodule
+//bw_u1_aoi211_0p3x
+//
+// 
+module bw_u1_aoi211_0p3x (
+
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 & c2 ) | (a)| (b));
+
+endmodule
+
+//bw_u1_aoi211_1x
+//
+// 
+module bw_u1_aoi211_1x (
+
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 & c2 ) | (a)| (b));
+
+endmodule
+
+//bw_u1_aoi211_2x
+//
+// 
+module bw_u1_aoi211_2x (
+
+
+
+    z,
+    c1,
+    c2,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+ 
+
+    assign z = ~(( c1 & c2 ) | (a)| (b));
+
+endmodule
+
+//bw_u1_aoi211_4x
+//
+// 
+module bw_u1_aoi211_4x (
+
+
+    z,
+    c1,
+    c2,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+ 
+
+
+    assign z = ~(( c1 & c2 ) | (a)| (b));
+
+endmodule
+
+//bw_u1_aoi211_8x
+//
+// 
+module bw_u1_aoi211_8x (
+
+
+    z,
+    c1,
+    c2,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+ 
+
+
+    assign z = ~(( c1 & c2 ) | (a)| (b));
+
+endmodule
+
+//bw_u1_oai21_0p4x
+//
+//
+module bw_u1_oai21_0p4x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai21_1x
+//
+//
+module bw_u1_oai21_1x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai21_2x
+//
+//
+module bw_u1_oai21_2x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai21_4x
+//
+//
+module bw_u1_oai21_4x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai21_8x
+//
+//
+module bw_u1_oai21_8x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai21_12x
+//
+//
+module bw_u1_oai21_12x (
+    z,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 | b2 ) & ( a ));
+
+endmodule
+
+
+
+//bw_u1_oai22_0p4x
+// 
+module bw_u1_oai22_0p4x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 | a2 ) & ( b1 | b2 ));
+
+endmodule
+
+//bw_u1_oai22_1x
+// 
+module bw_u1_oai22_1x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 | a2 ) & ( b1 | b2 ));
+
+endmodule
+
+//bw_u1_oai22_2x
+// 
+module bw_u1_oai22_2x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 | a2 ) & ( b1 | b2 ));
+
+endmodule
+
+//bw_u1_oai22_4x
+// 
+module bw_u1_oai22_4x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 | a2 ) & ( b1 | b2 ));
+
+endmodule
+
+//bw_u1_oai22_8x
+// 
+module bw_u1_oai22_8x (
+    z,
+    a1,
+    a2,
+    b1,
+    b2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+
+    assign z = ~(( a1 | a2 ) & ( b1 | b2 ));
+
+endmodule
+
+//bw_u1_oai211_0p3x
+//
+//
+module bw_u1_oai211_0p3x (
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+
+endmodule
+
+//bw_u1_oai211_1x
+//
+//
+module bw_u1_oai211_1x (
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+
+endmodule
+
+//bw_u1_oai211_2x
+//
+//
+module bw_u1_oai211_2x (
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+
+endmodule
+
+//bw_u1_oai211_4x
+//
+//
+module bw_u1_oai211_4x (
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+
+endmodule
+
+//bw_u1_oai211_8x
+//
+//
+module bw_u1_oai211_8x (
+    z,
+    c1,
+    c2,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+
+endmodule
+
+//bw_u1_aoi31_1x
+//
+// 
+module bw_u1_aoi31_1x (
+
+
+    z,
+    b1,
+    b2,
+    b3,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a ));
+
+endmodule
+//bw_u1_aoi31_2x
+//
+// 
+module bw_u1_aoi31_2x (
+
+    z, 
+    b1,
+    b2, 
+    b3, 
+    a );
+    
+    output z; 
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a ));
+
+endmodule
+//bw_u1_aoi31_4x
+//
+// 
+module bw_u1_aoi31_4x (
+    z, 
+    b1,
+    b2, 
+    b3, 
+    a );
+    
+    output z; 
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a ));
+
+endmodule
+//bw_u1_aoi31_8x
+//
+// 
+module bw_u1_aoi31_8x (
+
+    z, 
+    b1,
+    b2, 
+    b3, 
+    a );
+    
+    output z; 
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a ));
+
+endmodule
+//bw_u1_aoi32_1x
+//
+// 
+module bw_u1_aoi32_1x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1 & a2 ));
+
+endmodule
+
+//bw_u1_aoi32_2x
+//
+// 
+module bw_u1_aoi32_2x (
+    z,
+    b1, 
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1; 
+    input  b2; 
+    input  b3; 
+    input  a1;
+    input  a2;
+
+ 
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1 & a2 ));
+
+endmodule
+
+//bw_u1_aoi32_4x
+//
+// 
+module bw_u1_aoi32_4x (
+
+    z,
+    b1, 
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1; 
+    input  b2; 
+    input  b3; 
+    input  a1;
+    input  a2;
+
+ 
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1 & a2 ));
+
+endmodule
+
+//bw_u1_aoi32_8x
+//
+// 
+module bw_u1_aoi32_8x (
+
+    z,
+    b1, 
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1; 
+    input  b2; 
+    input  b3; 
+    input  a1;
+    input  a2;
+
+ 
+    assign z = ~(( b1 & b2&b3 ) | ( a1 & a2 ));
+
+endmodule
+
+//bw_u1_aoi33_1x
+//
+//
+module bw_u1_aoi33_1x (
+
+
+
+
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2,
+    a3 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1&a2&a3 ));
+
+endmodule
+
+
+//bw_u1_aoi33_2x
+//
+//
+module bw_u1_aoi33_2x (
+
+       
+    z, 
+    b1, 
+    b2,  
+    b3,  
+    a1,  
+    a2,  
+    a3 );
+    
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+    
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1&a2&a3 ));
+
+endmodule
+
+
+//bw_u1_aoi33_4x
+//
+//
+module bw_u1_aoi33_4x (
+
+       
+    z, 
+    b1, 
+    b2,  
+    b3,  
+    a1,  
+    a2,  
+    a3 );
+    
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+    
+
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1&a2&a3 ));
+
+endmodule
+
+
+//bw_u1_aoi33_8x
+//
+//
+module bw_u1_aoi33_8x (
+       
+    z, 
+    b1, 
+    b2,  
+    b3,  
+    a1,  
+    a2,  
+    a3 );
+    
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+    
+
+
+    assign z = ~(( b1 & b2&b3 ) | ( a1&a2&a3 ));
+
+endmodule
+
+
+//bw_u1_aoi221_1x
+//
+// 
+module bw_u1_aoi221_1x (
+
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a));
+
+endmodule
+
+
+//bw_u1_aoi221_2x
+//
+// 
+module bw_u1_aoi221_2x (
+
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a; 
+
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a));
+
+endmodule
+
+
+//bw_u1_aoi221_4x
+//
+// 
+module bw_u1_aoi221_4x (
+
+
+
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a; 
+
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a));
+
+endmodule
+
+
+//bw_u1_aoi221_8x
+//
+// 
+module bw_u1_aoi221_8x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a; 
+
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a));
+
+endmodule
+
+
+//bw_u1_aoi222_1x
+//
+//
+module bw_u1_aoi222_1x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+//bw_u1_aoi222_2x
+//
+//
+module bw_u1_aoi222_2x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+
+//bw_u1_aoi222_4x
+//
+//
+module bw_u1_aoi222_4x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+
+    assign z = ~(( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+
+//bw_u1_aoi311_1x
+//
+//
+module bw_u1_aoi311_1x (
+
+    z,
+    c1,
+    c2,
+    c3,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 & c2& c3 ) | (a)| (b));
+
+endmodule
+
+
+
+
+//bw_u1_aoi311_2x
+//
+//
+module bw_u1_aoi311_2x (
+    z,
+    c1,
+    c2,
+    c3,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 & c2& c3 ) | (a)| (b));
+
+endmodule
+
+
+
+
+//bw_u1_aoi311_4x
+//
+//
+module bw_u1_aoi311_4x (
+    z,
+    c1,
+    c2,
+    c3,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+
+    assign z = ~(( c1 & c2& c3 ) | (a)| (b));
+
+endmodule
+
+
+
+
+//bw_u1_aoi311_8x
+//
+//
+module bw_u1_aoi311_8x (
+    z,
+    c1,
+    c2,
+    c3,
+    b, 
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 & c2& c3 ) | (a)| (b));
+
+endmodule
+
+
+
+
+//bw_u1_oai31_1x
+//
+//
+module bw_u1_oai31_1x (
+    z,
+    b1,
+    b2,
+    b3,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a ));
+
+endmodule
+
+
+
+
+//bw_u1_oai31_2x
+//
+//
+module bw_u1_oai31_2x (
+    z,
+    b1,
+    b2,
+    b3,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a ));
+
+endmodule
+
+
+
+
+//bw_u1_oai31_4x
+//
+//
+module bw_u1_oai31_4x (
+    z,
+    b1,
+    b2,
+    b3,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a ));
+
+endmodule
+
+
+
+
+//bw_u1_oai31_8x
+//
+//
+module bw_u1_oai31_8x (
+    z,
+    b1,
+    b2,
+    b3,
+    a );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a ));
+
+endmodule
+
+
+
+
+//bw_u1_oai32_1x
+//
+//
+module bw_u1_oai32_1x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( b1 | b2 | b3 ) & ( a1 | a2 ));
+
+endmodule
+
+
+
+//bw_u1_oai32_2x
+//
+//
+module bw_u1_oai32_2x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( b1 | b2 | b3 ) & ( a1 | a2 ));
+
+endmodule
+
+
+
+//bw_u1_oai32_4x
+//
+//
+module bw_u1_oai32_4x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( b1 | b2 | b3 ) & ( a1 | a2 ));
+
+endmodule
+
+
+
+//bw_u1_oai32_8x
+//
+//
+module bw_u1_oai32_8x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( b1 | b2 | b3 ) & ( a1 | a2 ));
+
+endmodule
+
+
+
+//bw_u1_oai33_1x
+//
+//
+module bw_u1_oai33_1x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2,
+    a3 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a1|a2|a3 ));
+
+endmodule
+
+
+//bw_u1_oai33_2x
+//
+//
+module bw_u1_oai33_2x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2,
+    a3 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a1|a2|a3 ));
+
+endmodule
+
+
+//bw_u1_oai33_4x
+//
+//
+module bw_u1_oai33_4x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2,
+    a3 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a1|a2|a3 ));
+
+endmodule
+
+
+//bw_u1_oai33_8x
+//
+//
+module bw_u1_oai33_8x (
+    z,
+    b1,
+    b2,
+    b3,
+    a1,
+    a2,
+    a3 );
+
+    output z;
+    input  b1;
+    input  b2;
+    input  b3;
+    input  a1;
+    input  a2;
+    input  a3;
+
+    assign z = ~(( b1 | b2|b3 ) & ( a1|a2|a3 ));
+
+endmodule
+
+
+//bw_u1_oai221_1x
+//
+//
+module bw_u1_oai221_1x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b1|b2));
+
+endmodule
+
+//bw_u1_oai221_2x
+//
+//
+module bw_u1_oai221_2x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b1|b2));
+
+endmodule
+
+//bw_u1_oai221_4x
+//
+//
+module bw_u1_oai221_4x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b1|b2));
+
+endmodule
+
+//bw_u1_oai221_8x
+//
+//
+module bw_u1_oai221_8x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( c1 | c2 ) & ( a ) & (b1|b2));
+
+endmodule
+
+//bw_u1_oai222_1x
+//
+//
+module bw_u1_oai222_1x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( c1 | c2 ) & ( a1|a2 ) & (b1|b2));
+
+endmodule
+
+
+//bw_u1_oai222_2x
+//
+//
+module bw_u1_oai222_2x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( c1 | c2 ) & ( a1|a2 ) & (b1|b2));
+
+endmodule
+
+
+//bw_u1_oai222_4x
+//
+//
+module bw_u1_oai222_4x (
+    z,
+    c1,
+    c2,
+    b1,
+    b2,
+    a1,
+    a2 );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  b1;
+    input  b2;
+    input  a1;
+    input  a2;
+
+    assign z = ~(( c1 | c2 ) & ( a1|a2 ) & (b1|b2));
+
+endmodule
+
+
+//bw_u1_oai311_1x
+//
+//
+module bw_u1_oai311_1x (
+    z,
+    c1,
+    c2,
+    c3,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2|c3 ) & ( a ) & (b));
+
+endmodule
+
+
+//bw_u1_oai311_2x
+//
+//
+module bw_u1_oai311_2x (
+    z,
+    c1,
+    c2,
+    c3,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2|c3 ) & ( a ) & (b));
+
+endmodule
+
+
+//bw_u1_oai311_4x
+//
+//
+module bw_u1_oai311_4x (
+    z,
+    c1,
+    c2,
+    c3,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2 | c3 ) & ( a ) & (b));
+
+endmodule
+
+
+//bw_u1_oai311_8x
+//
+//
+module bw_u1_oai311_8x (
+    z,
+    c1,
+    c2,
+    c3,
+    b,
+    a );
+
+    output z;
+    input  c1;
+    input  c2;
+    input  c3;
+    input  b;
+    input  a;
+
+    assign z = ~(( c1 | c2|c3 ) & ( a ) & (b));
+
+endmodule
+
+
+//bw_u1_muxi21_0p6x
+
+
+
+module bw_u1_muxi21_0p6x (z, d0, d1, s);
+output z;
+input  d0, d1, s;
+
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+
+//bw_u1_muxi21_1x
+
+
+
+module bw_u1_muxi21_1x (z, d0, d1, s);
+output z;
+input  d0, d1, s;
+
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+
+
+
+
+
+
+//bw_u1_muxi21_2x
+
+
+
+module bw_u1_muxi21_2x (z, d0, d1, s);
+output z;
+input  d0, d1, s;
+
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+
+//bw_u1_muxi21_4x
+
+
+
+module bw_u1_muxi21_4x (z, d0, d1, s);
+output z;
+input  d0, d1, s;
+
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+
+
+
+//bw_u1_muxi21_6x
+
+
+module bw_u1_muxi21_6x (z, d0, d1, s);
+output z;
+input  d0, d1, s;
+
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+//bw_u1_muxi31d_4x
+//
+
+module bw_u1_muxi31d_4x (z, d0, d1, d2, s0, s1, s2);
+output z;
+input  d0, d1, d2, s0, s1, s2;
+        zmuxi31d_prim i0 ( z, d0, d1, d2, s0, s1, s2 );
+endmodule
+
+//bw_u1_muxi41d_4x
+//
+
+module bw_u1_muxi41d_4x (z, d0, d1, d2, d3, s0, s1, s2, s3);
+output z;
+input  d0, d1, d2, d3, s0, s1, s2, s3;
+        zmuxi41d_prim i0 ( z, d0, d1, d2, d3, s0, s1, s2, s3 );
+endmodule
+
+//bw_u1_muxi41d_6x
+//
+
+module bw_u1_muxi41d_6x (z, d0, d1, d2, d3, s0, s1, s2, s3);
+output z;
+input  d0, d1, d2, d3, s0, s1, s2, s3;
+        zmuxi41d_prim i0 ( z, d0, d1, d2, d3, s0, s1, s2, s3 );
+endmodule
+ 
+
+//bw_u1_xor2_0p6x
+//
+// 
+module bw_u1_xor2_0p6x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ( a ^ b );
+
+endmodule
+//bw_u1_xor2_1x
+//
+// 
+module bw_u1_xor2_1x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ( a ^ b );
+
+endmodule
+//bw_u1_xor2_2x
+//
+// 
+module bw_u1_xor2_2x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ( a ^ b );
+
+endmodule
+//bw_u1_xor2_4x
+//
+// 
+module bw_u1_xor2_4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ( a ^ b );
+
+endmodule
+//bw_u1_xnor2_0p6x
+//
+// 
+module bw_u1_xnor2_0p6x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a ^ b );
+
+endmodule
+//bw_u1_xnor2_1x
+//
+// 
+module bw_u1_xnor2_1x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a ^ b );
+
+endmodule
+//bw_u1_xnor2_2x
+//
+// 
+module bw_u1_xnor2_2x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a ^ b );
+
+endmodule
+//bw_u1_xnor2_4x
+//
+// 
+module bw_u1_xnor2_4x (
+    z,
+    a,
+    b );
+
+    output z;
+    input  a;
+    input  b;
+
+    assign z = ~( a ^ b );
+
+endmodule
+
+//bw_u1_buf_1x
+//
+
+module bw_u1_buf_1x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+//bw_u1_buf_5x
+//
+
+module bw_u1_buf_5x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_buf_10x
+//
+
+module bw_u1_buf_10x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_buf_15x
+//
+
+module bw_u1_buf_15x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_buf_20x
+//
+
+module bw_u1_buf_20x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_buf_30x
+//
+
+module bw_u1_buf_30x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_buf_40x
+//
+
+module bw_u1_buf_40x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+
+//bw_u1_ao2222_1x
+//
+//
+module bw_u1_ao2222_1x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2,
+    d1,
+    d2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+    input  d1;
+    input  d2;
+
+    assign z = ((d1&d2) | ( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+
+//bw_u1_ao2222_2x
+//
+//
+module bw_u1_ao2222_2x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2,
+    d1,
+    d2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+    input  d1;
+    input  d2;
+
+    assign z = ((d1&d2) | ( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+//bw_u1_ao2222_4x
+//
+//
+module bw_u1_ao2222_4x (
+
+    z,
+    a1,
+    a2,
+    b1,
+    b2,
+    c1,
+    c2,
+    d1,
+    d2 );
+
+    output z;
+    input  a1;
+    input  a2;
+    input  b1;
+    input  b2;
+    input  c1;
+    input  c2;
+    input  d1;
+    input  d2;
+
+    assign z = ((d1&d2) | ( c1 & c2 ) | (b1&b2)| (a1& a2));
+
+endmodule
+
+////////////////////////////////////////////////////////////////////////
+//
+// flipflops {
+//
+////////////////////////////////////////////////////////////////////////
+
+//      scanable D-flipflop with scanout
+
+module bw_u1_soff_1x (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+        zsoff_prim i0 ( q, so, ck, d, se, sd );
+endmodule
+
+module bw_u1_soff_2x (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+        zsoff_prim i0 ( q, so, ck, d, se, sd );
+endmodule
+
+module bw_u1_soff_4x (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+        zsoff_prim i0 ( q, so, ck, d, se, sd );
+endmodule
+
+module bw_u1_soff_8x (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+        zsoff_prim i0 ( q, so, ck, d, se, sd );
+endmodule
+
+//      fast scanable D-flipflop with scanout with inverted Q output
+
+module bw_u1_soffi_4x (q_l, so, ck, d, se, sd);
+output q_l, so;
+input  ck, d, se, sd;
+        zsoffi_prim i0 ( q_l, so, ck, d, se, sd );
+endmodule
+  
+module bw_u1_soffi_8x (q_l, so, ck, d, se, sd);
+output q_l, so;
+input  ck, d, se, sd;
+        zsoffi_prim i0 ( q_l, so, ck, d, se, sd );
+endmodule
+
+//      scanable D-flipflop with scanout with 2-to-1 input mux
+
+module bw_u1_soffm2_4x (q, so, ck, d0, d1, s, se, sd);
+output q, so;
+input  ck, d0, d1, s, se, sd;
+        zsoffm2_prim i0 ( q, so, ck, d0, d1, s, se, sd );
+endmodule
+
+module bw_u1_soffm2_8x (q, so, ck, d0, d1, s, se, sd);
+output q, so;
+input  ck, d0, d1, s, se, sd;
+        zsoffm2_prim i0 ( q, so, ck, d0, d1, s, se, sd );
+endmodule
+
+//      scanable D-flipflop with scanout with sync reset-bar
+
+module bw_u1_soffr_2x (q, so, ck, d, se, sd, r_l);
+output q, so;
+input  ck, d, se, sd, r_l;
+        zsoffr_prim i0 ( q, so, ck, d, se, sd, r_l );
+endmodule
+  
+module bw_u1_soffr_4x (q, so, ck, d, se, sd, r_l);
+output q, so;
+input  ck, d, se, sd, r_l;
+        zsoffr_prim i0 ( q, so, ck, d, se, sd, r_l );
+endmodule
+
+module bw_u1_soffr_8x (q, so, ck, d, se, sd, r_l);
+output q, so;
+input  ck, d, se, sd, r_l;
+        zsoffr_prim i0 ( q, so, ck, d, se, sd, r_l );
+endmodule
+
+//bw_u1_soffasr_2x
+
+module bw_u1_soffasr_2x (q, so, ck, d, r_l, s_l, se, sd);
+output q, so;
+input  ck, d, r_l, s_l, se, sd;
+        zsoffasr_prim i0 (q, so, ck, d, r_l, s_l, se, sd);
+endmodule
+
+
+//bw_u1_ckbuf_1p5x
+
+
+module bw_u1_ckbuf_1p5x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+
+//bw_u1_ckbuf_3x
+
+
+module bw_u1_ckbuf_3x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+//bw_u1_ckbuf_4p5x
+
+
+module bw_u1_ckbuf_4p5x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+
+//bw_u1_ckbuf_6x
+
+
+module bw_u1_ckbuf_6x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+//bw_u1_ckbuf_7x
+//
+
+module bw_u1_ckbuf_7x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+//bw_u1_ckbuf_8x
+//
+module bw_u1_ckbuf_8x  (clk, rclk);
+output clk;
+input  rclk;
+        buf (clk, rclk);
+endmodule
+
+
+//bw_u1_ckbuf_11x
+//
+
+module bw_u1_ckbuf_11x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+//bw_u1_ckbuf_14x
+//
+
+module bw_u1_ckbuf_14x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+//bw_u1_ckbuf_17x
+//
+
+module bw_u1_ckbuf_17x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+
+
+
+//bw_u1_ckbuf_19x
+//
+
+module bw_u1_ckbuf_19x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+
+
+
+//bw_u1_ckbuf_22x
+//
+
+module bw_u1_ckbuf_22x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+//bw_u1_ckbuf_25x
+//
+
+module bw_u1_ckbuf_25x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+
+//bw_u1_ckbuf_28x
+//
+
+module bw_u1_ckbuf_28x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+
+//bw_u1_ckbuf_30x
+//
+
+module bw_u1_ckbuf_30x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+//bw_u1_ckbuf_33x
+//
+
+module bw_u1_ckbuf_33x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+//bw_u1_ckbuf_40x
+//
+
+module bw_u1_ckbuf_40x (clk, rclk);
+output clk;
+input  rclk;
+
+    assign clk = ( rclk );
+
+endmodule
+
+
+// gated clock buffers
+
+
+module bw_u1_ckenbuf_6x  (clk, rclk, en_l, tm_l);
+output clk;
+input  rclk, en_l, tm_l;
+        zckenbuf_prim i0 ( clk, rclk, en_l, tm_l );
+endmodule 
+
+module bw_u1_ckenbuf_14x (clk, rclk, en_l, tm_l);
+output clk;
+input  rclk, en_l, tm_l;
+        zckenbuf_prim i0 ( clk, rclk, en_l, tm_l );
+endmodule   
+
+////////////////////////////////////////////////////////////////////////
+//
+// half cells
+//
+////////////////////////////////////////////////////////////////////////
+
+
+
+module bw_u1_zhinv_0p6x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+module bw_u1_zhinv_1x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+
+module bw_u1_zhinv_1p4x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+module bw_u1_zhinv_2x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+
+module bw_u1_zhinv_3x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+
+module bw_u1_zhinv_4x (z, a);
+output z;
+input  a;
+        not (z, a);
+endmodule
+
+
+
+module bw_u1_zhnand2_0p4x (z, a, b);
+output z;
+input  a, b;
+        nand (z, a, b);
+endmodule
+
+
+module bw_u1_zhnand2_0p6x (z, a, b);
+output z;   
+input  a, b;
+        nand (z, a, b);
+endmodule   
+
+
+module bw_u1_zhnand2_1x (z, a, b);
+output z;   
+input  a, b;
+        nand (z, a, b);
+endmodule   
+
+
+module bw_u1_zhnand2_1p4x (z, a, b);
+output z;   
+input  a, b;
+        nand (z, a, b);
+endmodule   
+
+
+module bw_u1_zhnand2_2x (z, a, b);
+output z;   
+input  a, b;
+        nand (z, a, b);
+endmodule   
+
+
+module bw_u1_zhnand2_3x (z, a, b);
+output z;   
+input  a, b;
+        nand (z, a, b);
+endmodule   
+
+
+module bw_u1_zhnand3_0p6x (z, a, b, c);
+output z;
+input  a, b, c;
+        nand (z, a, b, c);
+endmodule
+
+module bw_u1_zhnand3_1x (z, a, b, c);
+output z;
+input  a, b, c;
+        nand (z, a, b, c);
+endmodule
+
+module bw_u1_zhnand3_2x (z, a, b, c);
+output z;
+input  a, b, c;
+        nand (z, a, b, c);
+endmodule
+
+
+module bw_u1_zhnand4_0p6x (z, a, b, c, d);
+output z;
+input  a, b, c, d;
+        nand (z, a, b, c, d);
+endmodule
+
+module bw_u1_zhnand4_1x (z, a, b, c, d);
+output z;
+input  a, b, c, d;
+        nand (z, a, b, c, d);
+endmodule
+
+module bw_u1_zhnand4_2x (z, a, b, c, d);
+output z;
+input  a, b, c, d;
+        nand (z, a, b, c, d);
+endmodule
+
+
+        
+module bw_u1_zhnor2_0p6x (z, a, b);
+output z;
+input  a, b;
+        nor (z, a, b);
+endmodule
+
+module bw_u1_zhnor2_1x (z, a, b);
+output z;   
+input  a, b;
+        nor (z, a, b);
+endmodule
+
+module bw_u1_zhnor2_2x (z, a, b);
+output z;   
+input  a, b;
+        nor (z, a, b);
+endmodule
+
+
+
+module bw_u1_zhnor3_0p6x (z, a, b, c);
+output z;
+input  a, b, c;
+        nor (z, a, b, c);
+endmodule
+
+
+module bw_u1_zhaoi21_0p4x (z,b1,b2,a);
+
+    output z;   
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+    
+endmodule
+
+
+
+module bw_u1_zhaoi21_1x (z, a, b1, b2);
+
+    output z;
+    input  b1;
+    input  b2;
+    input  a;
+
+    assign z = ~(( b1 & b2 ) | ( a ));
+
+endmodule
+
+
+
+module bw_u1_zhoai21_1x (z,b1,b2,a );
+    
+    output z;
+    input  b1;
+    input  b2;  
+    input  a;
+  
+    assign z = ~(( b1 | b2 ) & ( a ));
+      
+endmodule
+
+
+
+
+module bw_u1_zhoai211_0p3x (z, a, b, c1, c2);
+    output z; 
+    input  c1;  
+    input  c2;
+    input  b;
+    input  a;
+      
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+       
+endmodule
+
+
+
+
+
+module bw_u1_zhoai211_1x (z, a, b, c1, c2);
+output z;
+input  a, b, c1, c2;
+    assign z = ~(( c1 | c2 ) & ( a ) & (b));
+       
+endmodule
+
+
+
+
+
+/////////////// Scan data lock up latch ///////////////
+
+module bw_u1_scanlg_2x (so, sd, ck, se);
+output so;
+input sd, ck, se;
+
+reg so_l;
+
+    assign so = ~so_l;
+    always @ ( ck or sd or se )
+       if (~ck) so_l <= ~(sd & se) ;
+
+endmodule
+
+module bw_u1_scanl_2x (so, sd, ck);
+output so;
+input sd, ck;
+
+reg so_l;
+
+    assign so = ~so_l;
+    always @ ( ck or sd )
+       if (~ck) so_l <= ~sd ;
+
+endmodule
+
+
+
+////////////////// Synchronizer ////////////////
+
+module bw_u1_syncff_4x (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+
+reg    q_r;
+  always @ (posedge ck)
+      q_r <= se ? sd : d;
+  assign q  = q_r;
+  assign so = q_r;
+
+endmodule
+
+
+
+
+////////////////////////////////////////////////////////////////////////
+//
+// non library cells
+// 
+////////////////////////////////////////////////////////////////////////
+
+// These cells are used only in custom DP macros
+// Do not use in any block design without prior permission
+
+
+module bw_u1_zzeccxor2_5x (z, a, b); 
+ output z; 
+ input a, b;
+    assign z = ( a ^ b );
+
+endmodule
+
+
+
+module bw_u1_zzmulcsa42_5x (sum, carry, cout, a, b, c, d, cin);
+output sum, carry, cout;
+input  a, b, c, d, cin;
+wire and_cin_b, or_cin_b, xor_a_c_d, and_or_cin_b_xor_a_c_d;
+wire and_a_c, and_a_d, and_c_d;
+        assign sum   = cin ^ a ^ b ^ c ^ d;
+        assign carry = cin & b | (cin | b) & (a ^ c ^ d);
+        assign cout  = a & c | a & d | c & d;
+endmodule
+
+
+
+module bw_u1_zzmulcsa32_5x (sum, cout, a, b, c);
+output sum, cout;
+input  a, b, c;
+wire and_a_b, and_a_c, and_b_c;
+        assign sum  = a ^ b ^ c ;
+        assign cout = a & b | a & c | b & c ;
+endmodule
+
+
+
+module bw_u1_zzmulppmuxi21_2x ( z, d0, d1, s );
+output  z;
+input  d0, d1, s;
+    assign z = s ? ~d1 : ~d0;
+endmodule
+
+
+
+module bw_u1_zzmulnand2_2x ( z, a, b );
+output z;
+input  a;
+input  b;
+    assign z = ~( a & b );
+endmodule
+
+
+
+// Primitives
+
+
+
+
+module zmuxi31d_prim (z, d0, d1, d2, s0, s1, s2);
+output z;
+input  d0, d1, d2, s0, s1, s2;
+// for Blacktie
+`ifdef VERPLEX
+   $constraint dp_1h3 ($one_hot ({s0,s1,s2}));
+`endif
+wire [2:0] sel = {s0,s1,s2}; // 0in one_hot
+reg z;
+    always @ (s2 or d2 or s1 or d1 or s0 or d0)
+        casez ({s2,d2,s1,d1,s0,d0})
+            6'b0?0?10: z = 1'b1;  
+            6'b0?0?11: z = 1'b0;  
+            6'b0?100?: z = 1'b1;  
+            6'b0?110?: z = 1'b0;  
+            6'b0?1010: z = 1'b1;  
+            6'b0?1111: z = 1'b0;  
+            6'b100?0?: z = 1'b1;  
+            6'b110?0?: z = 1'b0;  
+            6'b100?10: z = 1'b1;  
+            6'b110?11: z = 1'b0;  
+            6'b10100?: z = 1'b1;  
+            6'b11110?: z = 1'b0;  
+            6'b101010: z = 1'b1;  
+            6'b111111: z = 1'b0;  
+            default: z = 1'bx;
+        endcase
+endmodule
+
+
+
+
+
+
+
+module zmuxi41d_prim (z, d0, d1, d2, d3, s0, s1, s2, s3);
+output z;
+input  d0, d1, d2, d3, s0, s1, s2, s3;
+// for Blacktie
+`ifdef VERPLEX
+   $constraint dp_1h4 ($one_hot ({s0,s1,s2,s3}));
+`endif
+wire [3:0] sel = {s0,s1,s2,s3}; // 0in one_hot
+reg z;
+    always @ (s3 or d3 or s2 or d2 or s1 or d1 or s0 or d0)
+        casez ({s3,d3,s2,d2,s1,d1,s0,d0})
+            8'b0?0?0?10: z = 1'b1;
+            8'b0?0?0?11: z = 1'b0;
+            8'b0?0?100?: z = 1'b1;
+            8'b0?0?110?: z = 1'b0;
+            8'b0?0?1010: z = 1'b1;
+            8'b0?0?1111: z = 1'b0;
+            8'b0?100?0?: z = 1'b1;
+            8'b0?110?0?: z = 1'b0;
+            8'b0?100?10: z = 1'b1;
+            8'b0?110?11: z = 1'b0;
+            8'b0?10100?: z = 1'b1;
+            8'b0?11110?: z = 1'b0;
+            8'b0?101010: z = 1'b1;
+            8'b0?111111: z = 1'b0;
+            8'b100?0?0?: z = 1'b1;
+            8'b110?0?0?: z = 1'b0;
+            8'b100?0?10: z = 1'b1;
+            8'b110?0?11: z = 1'b0;
+            8'b100?100?: z = 1'b1;
+            8'b110?110?: z = 1'b0;
+            8'b100?1010: z = 1'b1;
+            8'b110?1111: z = 1'b0;
+            8'b10100?0?: z = 1'b1;
+            8'b11110?0?: z = 1'b0;
+            8'b10100?10: z = 1'b1;
+            8'b11110?11: z = 1'b0;
+            8'b1010100?: z = 1'b1;
+            8'b1111110?: z = 1'b0;
+            8'b10101010: z = 1'b1;
+            8'b11111111: z = 1'b0;
+            default: z = 1'bx;
+        endcase   
+endmodule
+
+
+
+module zsoff_prim (q, so, ck, d, se, sd);
+output q, so;
+input  ck, d, se, sd;
+reg    q_r;
+  always @ (posedge ck)
+      q_r <= se ? sd : d;
+  assign q  = q_r;
+  assign so = q_r ;
+endmodule
+
+
+module zsoffr_prim (q, so, ck, d, se, sd, r_l);
+output q, so;
+input  ck, d, se, sd, r_l;
+reg    q_r;
+  always @ (posedge ck)
+      q_r <= se ? sd : (d & r_l) ;
+  assign q  = q_r;
+  assign so = q_r;
+endmodule
+
+
+module zsoffi_prim (q_l, so, ck, d, se, sd);
+output q_l, so;
+input  ck, d, se, sd;
+reg    q_r;
+  always @ (posedge ck)
+      q_r <= se ? sd : d;
+  assign q_l = ~q_r;
+  assign so  = q_r;
+endmodule
+
+
+
+module zsoffm2_prim (q, so, ck, d0, d1, s, se, sd);
+output q, so;
+input  ck, d0, d1, s, se, sd;
+reg    q_r;
+  always @ (posedge ck)
+      q_r <= se ? sd : (s ? d1 : d0) ;
+  assign q  = q_r;
+  assign so = q_r;
+endmodule
+
+module zsoffasr_prim (q, so, ck, d, r_l, s_l, se, sd);
+  output q, so;
+  input ck, d, r_l, s_l, se, sd;
+
+  // asynchronous reset and asynchronous set
+  // (priority: r_l > s_l > se > d)
+  reg q;
+  wire so;
+
+  always @ (posedge ck or negedge r_l or negedge s_l) begin
+		if(~r_l) q <= 1'b0;
+		else if (~s_l) q <= r_l;
+		else if (se) q <= r_l & s_l & sd;
+		else q <= r_l & s_l & (~se) & d;
+  end
+
+  assign so = q | ~se;
+
+endmodule
+
+
+
+module zckenbuf_prim (clk, rclk, en_l, tm_l);
+output clk;
+input  rclk, en_l, tm_l;
+reg    clken;
+
+  always @ (rclk or en_l or tm_l)
+    if (!rclk)  //latch opens on rclk low phase
+      clken <= ~en_l | ~tm_l;
+  assign clk = clken & rclk;
+
+endmodule
+
+module bw_mckbuf_40x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_33x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_30x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_28x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_25x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_22x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_19x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_17x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_14x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_11x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_8x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_7x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_6x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_4p5x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_3x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+module bw_mckbuf_1p5x (clk, rclk, en);
+output clk;
+input  rclk;
+input  en;
+
+    assign clk = rclk & en ;
+
+endmodule
+
+//bw_u1_minbuf_1x
+//
+
+module bw_u1_minbuf_1x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+//bw_u1_minbuf_4x
+//
+
+module bw_u1_minbuf_4x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+//bw_u1_minbuf_5x
+//
+
+module bw_u1_minbuf_5x (
+    z,
+    a );
+
+    output z;
+    input  a;
+
+    assign z = ( a );
+
+endmodule
+
+module bw_u1_ckenbuf_4p5x  (clk, rclk, en_l, tm_l);
+output clk;
+input  rclk, en_l, tm_l;
+        zckenbuf_prim i0 ( clk, rclk, en_l, tm_l );
+endmodule 
+
+// dummy fill modules to get rid of DFT "CAP" property errors (bug 5487)
+
+module bw_u1_fill_1x(\vdd! );
+input \vdd! ;
+endmodule
+
+module bw_u1_fill_2x(\vdd! );
+input \vdd! ;
+endmodule
+
+module bw_u1_fill_3x(\vdd! );
+input \vdd! ;
+endmodule
+
+module bw_u1_fill_4x(\vdd! );
+input \vdd! ;
+endmodule
Index: /trunk/T1-common/common/ucb_flow_spi.v
===================================================================
--- /trunk/T1-common/common/ucb_flow_spi.v	(revision 6)
+++ /trunk/T1-common/common/ucb_flow_spi.v	(revision 6)
@@ -0,0 +1,441 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_flow_spi.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        ucb_flow_spi
+//	Description:	Unit Control Block
+//                      - supports 1B/2B/4B/8B read with flow control
+//                      - supports 1B/2B/4B/8B write with flow control
+//                      - supports 4B ifill request
+//                      - supports interrupt return to IO Bridge
+//                      - provides 1+2 deep buffer for incoming requests
+//                        from the IO Bridge
+//                      - provides single buffer for returns going back
+//                        to the IO Bridge
+//
+//                      This module is customized for the SPI.
+//
+//                      Data bus width to and from the IO Bridge is
+//                      configured through parameters UCB_IOB_WIDTH and
+//                      IOB_UCB_WIDTH.  Supported widths are:
+//
+//                      IOB_UCB_WIDTH  UCB_IOB_WIDTH
+//                      ----------------------------
+//                      32             8
+//                      16             8
+//                       8             8
+//                       4             4             
+ */ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+`include        "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`define         UCB_BUF_DEPTH   2
+`define         UCB_BUF_WIDTH   64+(`UCB_ADDR_HI-`UCB_ADDR_LO+1)+(`UCB_SIZE_HI-`UCB_SIZE_LO+1)+(`UCB_BUF_HI-`UCB_BUF_LO+1)+(`UCB_THR_HI-`UCB_THR_LO+1)+1+1+1
+
+module ucb_flow_spi (/*AUTOARG*/
+   // Outputs
+   ucb_iob_stall, rd_req_vld, wr_req_vld, ifill_req_vld, thr_id_in, 
+   buf_id_in, size_in, addr_in, data_in, ack_busy, int_busy, 
+   ucb_iob_vld, ucb_iob_data, 
+   // Inputs
+   clk, rst_l, iob_ucb_vld, iob_ucb_data, req_acpted, rd_ack_vld, 
+   rd_nack_vld, ifill_ack_vld, ifill_nack_vld, thr_id_out, 
+   buf_id_out, data128, data_out, int_vld, int_typ, int_thr_id, 
+   dev_id, int_stat, int_vec, iob_ucb_stall
+   );
+   // synopsys template
+   
+   parameter IOB_UCB_WIDTH = 32;  // data bus width from IOB to UCB
+   parameter UCB_IOB_WIDTH = 8;   // data bus width from UCB to IOB
+   parameter REG_WIDTH     = 64;  // please do not change this parameter
+   
+
+   // Globals
+   input                                clk;
+   input 				rst_l;
+   
+   // Request from IO Bridge
+   input 				iob_ucb_vld;
+   input [IOB_UCB_WIDTH-1:0] 		iob_ucb_data;
+   output 				ucb_iob_stall;
+
+   // Request to local unit
+   output 				rd_req_vld;
+   output 				wr_req_vld;
+   output 				ifill_req_vld;
+   output [`UCB_THR_HI-`UCB_THR_LO:0]   thr_id_in;
+   output [`UCB_BUF_HI-`UCB_BUF_LO:0]   buf_id_in;
+   output [`UCB_SIZE_HI-`UCB_SIZE_LO:0] size_in;   // only pertinent to JBI and SPI
+   output [`UCB_ADDR_HI-`UCB_ADDR_LO:0] addr_in;
+   output [`UCB_DATA_HI-`UCB_DATA_LO:0] data_in;
+   input 				req_acpted;
+   
+   // Ack/Nack from local unit
+   input 				rd_ack_vld;
+   input 				rd_nack_vld;
+   input 				ifill_ack_vld;
+   input 				ifill_nack_vld;
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	thr_id_out;
+   input [`UCB_BUF_HI-`UCB_BUF_LO:0] 	buf_id_out;
+   input 				data128;   // set to 1 if data returned is 128 bit
+   input [REG_WIDTH-1:0] 		data_out;
+   output 				ack_busy;
+
+   // Interrupt from local unit
+   input 				int_vld;
+   input [`UCB_PKT_HI-`UCB_PKT_LO:0] 	int_typ;          // interrupt type
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	int_thr_id;       // interrupt thread ID
+   input [`UCB_INT_DEV_HI-`UCB_INT_DEV_LO:0] dev_id;      // interrupt device ID
+   input [`UCB_INT_STAT_HI-`UCB_INT_STAT_LO:0] int_stat;  // interrupt status
+   input [`UCB_INT_VEC_HI-`UCB_INT_VEC_LO:0]   int_vec;   // interrupt vector
+   output 				int_busy;
+   
+   // Output to IO Bridge
+   output 				ucb_iob_vld;
+   output [UCB_IOB_WIDTH-1:0] 		ucb_iob_data;
+   input 				iob_ucb_stall;
+   
+   // Local signals
+   wire                                 indata_buf_vld;
+   wire [127:0]                         indata_buf;
+   wire                                 ucb_iob_stall_a1;
+   
+   wire                                 read_pending;
+   wire                                 write_pending;
+   wire 				ifill_pending;
+   
+   wire 				rd_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head;
+   wire 				wr_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail;
+   wire 				buf_full_next;
+   wire 				buf_full;
+   wire 				buf_empty_next;
+   wire 				buf_empty;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_in;
+   wire 				buf0_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf0;
+   wire 				buf1_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf1;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_out;
+   wire 				rd_req_vld_nq;
+   wire 				wr_req_vld_nq;
+   wire 			        ifill_req_vld_nq;
+
+   wire                                 ack_buf_rd;
+   wire                                 ack_buf_wr;
+   wire                                 ack_buf_vld;
+   wire                                 ack_buf_vld_next;
+   wire                                 ack_buf_is_nack;
+   wire 				ack_buf_is_data128;
+   wire [`UCB_PKT_HI-`UCB_PKT_LO:0]     ack_typ_out;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf_in;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] ack_buf_vec;
+   
+   wire                                 int_buf_rd;
+   wire                                 int_buf_wr;
+   wire                                 int_buf_vld;
+   wire                                 int_buf_vld_next;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf_in;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] int_buf_vec;
+   
+   wire                                 int_last_rd;
+   wire                                 outdata_buf_busy;
+   wire                                 outdata_buf_wr;
+   wire [REG_WIDTH+63:0]                outdata_buf_in;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] outdata_vec_in;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * Inbound Data
+    ************************************************************/
+   // Register size is hardcoded to 64 bits here
+   ucb_bus_in #(IOB_UCB_WIDTH,64) ucb_bus_in (.rst_l(rst_l),
+                                              .clk(clk),
+                                              .vld(iob_ucb_vld),
+                                              .data(iob_ucb_data),
+                                              .stall(ucb_iob_stall),
+                                              .indata_buf_vld(indata_buf_vld),
+                                              .indata_buf(indata_buf),
+                                              .stall_a1(ucb_iob_stall_a1));
+
+
+   /************************************************************
+    * Decode inbound packet type
+    ************************************************************/
+   assign 	 read_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] ==
+				 `UCB_READ_REQ) &
+			        indata_buf_vld;
+
+   assign 	 write_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] == 
+				  `UCB_WRITE_REQ) &
+        	                  indata_buf_vld;
+
+   assign 	 ifill_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] == 
+				  `UCB_IFILL_REQ) &
+        	                  indata_buf_vld;
+
+   assign 	 ucb_iob_stall_a1 = (read_pending | write_pending | ifill_pending) & buf_full;
+
+   
+   /************************************************************
+    * Inbound buffer
+    ************************************************************/
+   // Head pointer
+   assign 	 rd_buf = req_acpted;
+   assign 	 buf_head_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 rd_buf ? {buf_head[`UCB_BUF_DEPTH-2:0],
+				           buf_head[`UCB_BUF_DEPTH-1]} :
+	                                  buf_head;
+   dff_ns #(`UCB_BUF_DEPTH) buf_head_ff (.din(buf_head_next),
+					 .clk(clk),
+					 .q(buf_head));
+
+   // Tail pointer
+   assign 	 wr_buf = (read_pending |
+			   write_pending |
+			   ifill_pending) &
+			  ~buf_full;
+   assign 	 buf_tail_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 wr_buf ? {buf_tail[`UCB_BUF_DEPTH-2:0],
+				           buf_tail[`UCB_BUF_DEPTH-1]} :
+	                                  buf_tail;
+   dff_ns #(`UCB_BUF_DEPTH) buf_tail_ff (.din(buf_tail_next),
+					 .clk(clk),
+					 .q(buf_tail));
+
+   // Buffer full
+   assign 	 buf_full_next = (buf_head_next == buf_tail_next) &
+				 wr_buf;
+   dffrle_ns #(1) buf_full_ff (.din(buf_full_next),
+			       .rst_l(rst_l),
+			       .en(rd_buf|wr_buf),
+			       .clk(clk),
+			       .q(buf_full));
+
+   // Buffer empty
+   assign 	 buf_empty_next = ((buf_head_next == buf_tail_next) &
+				   rd_buf) | ~rst_l;
+   dffe_ns #(1) buf_empty_ff (.din(buf_empty_next),
+			      .en(rd_buf|wr_buf|~rst_l), 
+			      .clk(clk),
+			      .q(buf_empty));
+   
+
+   assign 	 req_in = {indata_buf[`UCB_DATA_HI:`UCB_DATA_LO],
+			   indata_buf[`UCB_ADDR_HI:`UCB_ADDR_LO],
+			   indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO],
+			   indata_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+			   indata_buf[`UCB_THR_HI:`UCB_THR_LO],
+			   ifill_pending,
+			   write_pending,
+			   read_pending};
+	  
+   // Buffer 0
+   assign 	 buf0_en = buf_tail[0] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf0_ff (.din(req_in),
+				      .en(buf0_en),
+				      .clk(clk),
+				      .q(buf0));
+   // Buffer 1
+   assign 	 buf1_en = buf_tail[1] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf1_ff (.din(req_in),
+				      .en(buf1_en),
+				      .clk(clk),
+				      .q(buf1));
+
+   assign 	 req_out = buf_head[0] ? buf0 :
+	                   buf_head[1] ? buf1 :
+	                                 {`UCB_BUF_WIDTH{1'b0}};
+
+   
+   /************************************************************
+    * Inbound interface to local unit
+    ************************************************************/
+   assign 	 {data_in,
+		  addr_in,
+		  size_in,
+		  buf_id_in,
+		  thr_id_in,
+		  ifill_req_vld_nq,
+		  wr_req_vld_nq,
+		  rd_req_vld_nq} = req_out;
+   
+   assign 	 rd_req_vld = rd_req_vld_nq & ~buf_empty;
+   assign 	 wr_req_vld = wr_req_vld_nq & ~buf_empty;
+   assign 	 ifill_req_vld = ifill_req_vld_nq & ~buf_empty;
+   
+	  
+   /************************************************************
+    * Outbound Ack/Nack
+    ************************************************************/
+   assign        ack_buf_wr = rd_ack_vld | rd_nack_vld | ifill_ack_vld | ifill_nack_vld;
+   
+   assign        ack_buf_vld_next = ack_buf_wr ? 1'b1 :
+                                    ack_buf_rd ? 1'b0 :
+                                                 ack_buf_vld;
+   
+   dffrl_ns #(1) ack_buf_vld_ff (.din(ack_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(ack_buf_vld));
+   
+   dffe_ns #(1) ack_buf_is_nack_ff (.din(rd_nack_vld|ifill_nack_vld),
+                                    .en(ack_buf_wr),
+                                    .clk(clk),
+                                    .q(ack_buf_is_nack));
+   
+   dffe_ns #(1) ack_buf_is_data128_ff (.din(data128),
+                                       .en(ack_buf_wr),
+                                       .clk(clk),
+                                       .q(ack_buf_is_data128));
+
+   assign        ack_typ_out = rd_ack_vld    ? `UCB_READ_ACK:
+                               rd_nack_vld   ? `UCB_READ_NACK:
+		               ifill_ack_vld ? `UCB_IFILL_ACK:
+		                               `UCB_IFILL_NACK;
+
+   assign        ack_buf_in = {data_out,
+                               buf_id_out,
+                               thr_id_out,
+                               ack_typ_out};
+   
+   dffe_ns #(REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO+1) ack_buf_ff (.din(ack_buf_in),
+                                                              .en(ack_buf_wr),
+                                                              .clk(clk),
+                                                              .q(ack_buf));
+
+   assign        ack_buf_vec = ack_buf_is_nack    ? {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                                     {64/UCB_IOB_WIDTH{1'b1}}} :
+		               ack_buf_is_data128 ? {(REG_WIDTH+64)/UCB_IOB_WIDTH{1'b1}} :
+                                                    {(64+64)/UCB_IOB_WIDTH{1'b1}};
+   
+   assign        ack_busy = ack_buf_vld;
+   
+
+   /************************************************************
+    * Outbound Interrupt
+    ************************************************************/
+   // Assertion: int_buf_wr shoudn't be asserted if int_buf_busy
+   assign        int_buf_wr = int_vld;
+   
+   assign        int_buf_vld_next = int_buf_wr ? 1'b1 :
+                                    int_buf_rd ? 1'b0 :
+                                                 int_buf_vld;
+   
+   dffrl_ns #(1) int_buf_vld_ff (.din(int_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(int_buf_vld));
+
+   assign        int_buf_in = {int_vec,
+                               int_stat,
+                               dev_id,
+                               int_thr_id,
+                               int_typ};
+   
+   dffe_ns #(`UCB_INT_VEC_HI-`UCB_PKT_LO+1) int_buf_ff (.din(int_buf_in),
+                                                        .en(int_buf_wr),
+                                                        .clk(clk),
+                                                        .q(int_buf));
+
+   assign        int_buf_vec = {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                {64/UCB_IOB_WIDTH{1'b1}}};
+
+   assign        int_busy = int_buf_vld;
+
+
+   /************************************************************
+    * Outbound ack/interrupt Arbitration
+    ************************************************************/
+   dffrle_ns #(1) int_last_rd_ff (.din(int_buf_rd),
+                                  .en(ack_buf_rd|int_buf_rd),
+                                  .rst_l(rst_l),
+                                  .clk(clk),
+                                  .q(int_last_rd));
+                           
+   assign        ack_buf_rd = ~outdata_buf_busy & ack_buf_vld &
+                              (~int_buf_vld | int_last_rd);
+   
+   assign        int_buf_rd = ~outdata_buf_busy & int_buf_vld &
+                              (~ack_buf_vld | ~int_last_rd);
+
+   assign        outdata_buf_wr = ack_buf_rd | int_buf_rd;
+   
+   assign        outdata_buf_in = ack_buf_rd ? {ack_buf[REG_WIDTH+`UCB_BUF_HI:`UCB_BUF_HI+1],
+                                                {(`UCB_RSV_HI-`UCB_RSV_LO+1){1'b0}},
+                                                {(`UCB_ADDR_HI-`UCB_ADDR_LO+1){1'b0}},
+                                                {(`UCB_SIZE_HI-`UCB_SIZE_LO+1){1'b0}},
+                                                ack_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+                                                ack_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                ack_buf[`UCB_PKT_HI:`UCB_PKT_LO]}:
+                                               {{REG_WIDTH{1'b0}},
+                                                {(`UCB_INT_RSV_HI-`UCB_INT_RSV_LO+1){1'b0}},
+                                                int_buf[`UCB_INT_VEC_HI:`UCB_INT_VEC_LO],
+                                                int_buf[`UCB_INT_STAT_HI:`UCB_INT_STAT_LO],
+                                                int_buf[`UCB_INT_DEV_HI:`UCB_INT_DEV_LO],
+                                                int_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                int_buf[`UCB_PKT_HI:`UCB_PKT_LO]};
+   
+   assign        outdata_vec_in = ack_buf_rd ? ack_buf_vec :
+                                               int_buf_vec;
+   
+   ucb_bus_out #(UCB_IOB_WIDTH, REG_WIDTH) ucb_bus_out (.rst_l(rst_l),
+                                                        .clk(clk),
+                                                        .outdata_buf_wr(outdata_buf_wr),
+                                                        .outdata_buf_in(outdata_buf_in),
+                                                        .outdata_vec_in(outdata_vec_in),
+                                                        .outdata_buf_busy(outdata_buf_busy),
+                                                        .vld(ucb_iob_vld),
+                                                        .data(ucb_iob_data),
+                                                        .stall(iob_ucb_stall));
+   
+
+`undef		UCB_BUF_WIDTH
+	  
+endmodule // ucb_flow_spi
+
+
+// Local Variables:
+// verilog-library-directories:(".")
+// End:
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/swrvr_clib.v
===================================================================
--- /trunk/T1-common/common/swrvr_clib.v	(revision 6)
+++ /trunk/T1-common/common/swrvr_clib.v	(revision 6)
@@ -0,0 +1,914 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: swrvr_clib.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////
+/*
+//
+//  Module Name: swrvr_clib.v
+//      Description: Design control behavioural library
+*/                 
+
+`ifdef FPGA_SYN 
+`define NO_SCAN 
+`endif
+
+// POSITVE-EDGE TRIGGERED FLOP with SCAN
+module dff_s (din, clk, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk or scan clk
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+  q[SIZE-1:0]  <= din[SIZE-1:0] ;
+`else
+
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= (se) ? si[SIZE-1:0]  : din[SIZE-1:0] ;
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+
+`endif
+
+endmodule // dff_s
+
+// POSITVE-EDGE TRIGGERED FLOP with SCAN for Shadow-scan
+module dff_sscan (din, clk, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk or scan clk
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+`ifdef CONNECT_SHADOW_SCAN
+
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= (se) ? si[SIZE-1:0]  : din[SIZE-1:0] ;
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+
+`else
+always @ (posedge clk)
+  q[SIZE-1:0]  <= din[SIZE-1:0] ;
+
+assign so={SIZE{1'b0}};
+`endif
+
+endmodule // dff_sscan
+
+// POSITVE-EDGE TRIGGERED FLOP without SCAN
+module dff_ns (din, clk, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= din[SIZE-1:0] ;
+
+endmodule // dff_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with SCAN, RESET
+module dffr_s (din, clk, rst, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk or scan clk
+input			rst ;	// reset
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+	q[SIZE-1:0]  <= ((rst) ? {SIZE{1'b0}}  : din[SIZE-1:0] );
+`else
+
+// Scan-Enable dominates
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= se ? si[SIZE-1:0] : ((rst) ? {SIZE{1'b0}}  : din[SIZE-1:0] );
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+`endif
+
+endmodule // dffr_s
+
+// POSITIVE-EDGE TRIGGERED FLOP with SCAN, RESET_L
+module dffrl_s (din, clk, rst_l, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk or scan clk
+input			rst_l ;	// reset
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+	q[SIZE-1:0]  <= rst_l ? din[SIZE-1:0] : {SIZE{1'b0}};
+`else
+
+// Reset dominates
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= rst_l ? ((se) ? si[SIZE-1:0]  : din[SIZE-1:0] ) : {SIZE{1'b0}};
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+`endif
+
+endmodule // dffrl_s
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET, without SCAN
+module dffr_ns (din, clk, rst, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk
+input			rst ;	// reset
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+// synopsys sync_set_reset "rst"
+always @ (posedge clk)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b0}} : din[SIZE-1:0];
+   
+endmodule // dffr_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET_L, without SCAN
+module dffrl_ns (din, clk, rst_l, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			clk ;	// clk
+input			rst_l ;	// reset
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+// synopsys sync_set_reset "rst_l"
+always @ (posedge clk)
+  q[SIZE-1:0] <= rst_l ? din[SIZE-1:0] : {SIZE{1'b0}};
+
+endmodule // dffrl_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with SCAN and FUNCTIONAL ENABLE
+module dffe_s (din, en, clk, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			clk ;	// clk or scan clk
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+// Enable Interpretation. Ultimate interpretation depends on design
+// 
+// en	se	out
+//------------------
+// x	1	sin ; scan dominates
+// 1  	0	din
+// 0 	0	q
+//
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+	q[SIZE-1:0]  <= ((en) ? din[SIZE-1:0] : q[SIZE-1:0]) ;
+`else
+
+always @ (posedge clk)
+
+	q[SIZE-1:0]  <= (se) ? si[SIZE-1:0]  : ((en) ? din[SIZE-1:0] : q[SIZE-1:0]) ;
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+`endif
+
+endmodule // dffe_s
+
+// POSITIVE-EDGE TRIGGERED FLOP with enable, without SCAN
+module dffe_ns (din, en, clk, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			clk ;	// clk
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+always @ (posedge clk)
+  q[SIZE-1:0] <= en ? din[SIZE-1:0] : q[SIZE-1:0];
+
+endmodule // dffe_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET, FUNCTIONAL ENABLE, SCAN.
+module dffre_s (din, rst, en, clk, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			rst ;	// reset
+input			clk ;	// clk or scan clk
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+// Enable Interpretation. Ultimate interpretation depends on design
+// 
+// rst	en	se	out
+//------------------
+// 1	x	x	0   ; reset dominates
+// 0	x	1	sin ; scan dominates
+// 0	1  	0	din
+// 0 	0 	0	q
+//
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+	q[SIZE-1:0]  <= (rst ? {SIZE{1'b0}} : ((en) ? din[SIZE-1:0] : q[SIZE-1:0])) ;
+`else
+
+always @ (posedge clk)
+
+//	q[SIZE-1:0]  <= rst ? {SIZE{1'b0}} : ((se) ? si[SIZE-1:0]  : ((en) ? din[SIZE-1:0] : q[SIZE-1:0])) ;
+	q[SIZE-1:0]  <= se ? si[SIZE-1:0]  : (rst ? {SIZE{1'b0}} : ((en) ? din[SIZE-1:0] : q[SIZE-1:0])) ;
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+
+`endif
+
+endmodule // dffre_s
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET_L, FUNCTIONAL ENABLE, SCAN.
+module dffrle_s (din, rst_l, en, clk, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			rst_l ;	// reset
+input			clk ;	// clk or scan clk
+
+output	[SIZE-1:0]	q ;	// output
+
+input			se ;	// scan-enable
+input	[SIZE-1:0]	si ;	// scan-input
+output	[SIZE-1:0]	so ;	// scan-output
+
+reg 	[SIZE-1:0]	q ;
+
+// Enable Interpretation. Ultimate interpretation depends on design
+// 
+// rst	en	se	out
+//------------------
+// 0	x	x	0   ; reset dominates
+// 1	x	1	sin ; scan dominates
+// 1	1  	0	din
+// 1 	0 	0	q
+//
+
+`ifdef NO_SCAN
+always @ (posedge clk)
+	 q[SIZE-1:0]  <= (rst_l ? ((en) ? din[SIZE-1:0] : q[SIZE-1:0]) : {SIZE{1'b0}}) ;
+`else
+
+always @ (posedge clk)
+
+//	q[SIZE-1:0]  <= rst_l ? ((se) ? si[SIZE-1:0]  : ((en) ? din[SIZE-1:0] : q[SIZE-1:0])) : {SIZE{1'b0}} ;
+	q[SIZE-1:0]  <= se ? si[SIZE-1:0]  : (rst_l ? ((en) ? din[SIZE-1:0] : q[SIZE-1:0]) : {SIZE{1'b0}}) ;
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+`endif
+
+endmodule // dffrle_s
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET, ENABLE, without SCAN.
+module dffre_ns (din, rst, en, clk, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			rst ;	// reset
+input			clk ;	// clk
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+// Enable Interpretation. Ultimate interpretation depends on design
+// 
+// rst	en	out
+//------------------
+// 1	x	0   ; reset dominates
+// 0	1  	din
+// 0 	0 	q
+//
+
+// synopsys sync_set_reset "rst"
+always @ (posedge clk)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b0}} : ((en) ? din[SIZE-1:0] : q[SIZE-1:0]);
+
+endmodule // dffre_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with RESET_L, ENABLE, without SCAN.
+module dffrle_ns (din, rst_l, en, clk, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input	[SIZE-1:0]	din ;	// data in
+input			en ;	// functional enable
+input			rst_l ;	// reset
+input			clk ;	// clk
+
+output	[SIZE-1:0]	q ;	// output
+
+reg 	[SIZE-1:0]	q ;
+
+// Enable Interpretation. Ultimate interpretation depends on design
+// 
+// rst	en	out
+//------------------
+// 0	x	0   ; reset dominates
+// 1	1  	din
+// 1 	0 	q
+//
+
+// synopsys sync_set_reset "rst_l"
+always @ (posedge clk)
+  q[SIZE-1:0] <= rst_l ? ((en) ? din[SIZE-1:0] : q[SIZE-1:0]) : {SIZE{1'b0}} ;
+
+endmodule // dffrle_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with SCAN, and ASYNC RESET
+module dffr_async (din, clk, rst, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst ;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+input                   se ;    // scan-enable
+input   [SIZE-1:0]      si ;    // scan-input
+output  [SIZE-1:0]      so ;    // scan-output
+
+reg     [SIZE-1:0]      q ;
+
+`ifdef NO_SCAN
+always @ (posedge clk or posedge rst)
+	q[SIZE-1:0]  <= rst ? {SIZE{1'b0}} : din[SIZE-1:0];
+`else
+
+// Reset dominates
+always @ (posedge clk or posedge rst)
+  q[SIZE-1:0]  <= rst ? {SIZE{1'b0}} : ((se) ? si[SIZE-1:0]  : din[SIZE-1:0] );
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+
+`endif
+
+endmodule // dffr_async
+
+// POSITIVE-EDGE TRIGGERED FLOP with SCAN, and ASYNC RESET_L
+module dffrl_async (din, clk, rst_l, q, se, si, so);
+// synopsys template
+
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst_l ;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+input                   se ;    // scan-enable
+input   [SIZE-1:0]      si ;    // scan-input
+output  [SIZE-1:0]      so ;    // scan-output
+
+reg     [SIZE-1:0]      q ;
+
+`ifdef NO_SCAN
+always @ (posedge clk or negedge rst_l)
+	q[SIZE-1:0]  <= (!rst_l) ? {SIZE{1'b0}} : din[SIZE-1:0];
+`else
+
+// Reset dominates
+always @ (posedge clk or negedge rst_l)
+  q[SIZE-1:0]  <= (!rst_l) ? {SIZE{1'b0}} : ((se) ? si[SIZE-1:0]  : din[SIZE-1:0] );
+
+assign so[SIZE-1:0] = q[SIZE-1:0] ;
+
+`endif
+
+endmodule // dffrl_async
+
+// POSITIVE-EDGE TRIGGERED FLOP with ASYNC RESET, without SCAN
+//module dffr_async_ns (din, clk, rst, q);
+//// synopsys template
+//parameter SIZE = 1;
+//input   [SIZE-1:0]      din ;   // data in
+//input                   clk ;   // clk or scan clk
+//input                   rst ;   // reset
+//output  [SIZE-1:0]      q ;     // output
+//reg     [SIZE-1:0]      q ;
+// Reset dominates
+//// synopsys async_set_reset "rst"
+//always @ (posedge clk or posedge rst)
+//        if(rst) q[SIZE-1:0]  <= {SIZE{1'b0}};
+//        else if(clk) q[SIZE-1:0]  <= din[SIZE-1:0];
+//endmodule // dffr_async_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with ASYNC RESET_L, without SCAN
+module dffrl_async_ns (din, clk, rst_l, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst_l ;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+// Reset dominates
+// synopsys async_set_reset "rst_l"
+ reg [SIZE-1:0] q;   
+always @ (posedge clk or negedge rst_l)
+  q[SIZE-1:0] <= ~rst_l ?  {SIZE{1'b0}} : ({SIZE{rst_l}} & din[SIZE-1:0]);
+
+//   reg  [SIZE-1:0]   qm, qs, qm_l, qs_l, qm_f, qs_f;
+//   wire              s_l;
+//   assign            s_l = 1'b1;
+//
+//   always @ (rst_l or qm)   qm_l = ~(qm & {SIZE{rst_l}});
+//   always @ (s_l or qs)   qs_l = ~(qs & {SIZE{s_l}});
+//   always @ (s_l or qm_l) qm_f = ~(qm_l & {SIZE{s_l}});
+//   always @ (rst_l or qs_l) qs_f = ~(qs_l & {SIZE{rst_l}});
+//
+//   always @ (clk or din or qm_f)
+//      qm <= clk ? qm_f : din;
+//
+//   always @ (clk or qm_l or qs_f)
+//      qs <= clk ? qm_l : qs_f;
+//
+//   assign q  = ~qs;
+
+endmodule // dffrl_async_ns
+
+// 2:1 MUX WITH DECODED SELECTS
+module mux2ds (dout, in0, in1, sel0, sel1) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input			sel0;
+input			sel1;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+// priority encoding takes care of mutex'ing selects.
+`ifdef VERPLEX
+   $constraint cl_1h_chk2 ($one_hot ({sel1,sel0}));
+`endif
+
+wire [1:0] sel = {sel1, sel0}; // 0in one_hot
+   
+always @ (sel0 or sel1 or in0 or in1)
+
+	case ({sel1,sel0}) // synopsys infer_mux
+		2'b01 :	dout = in0 ;
+		2'b10 : dout = in1 ;
+		2'b11 : dout = {SIZE{1'bx}} ;
+		2'b00 : dout = {SIZE{1'bx}} ;
+			// 2'b00 : // E.g. 4state vs. 2state modelling.
+			// begin
+			//	`ifdef FOUR_STATE
+			//		dout = {SIZE{1'bx}};
+			//	`else
+			//		begin
+			//		dout = {SIZE{1'b0}};
+			//		$error();
+			//		end
+			//	`endif
+			// end
+		default : dout = {SIZE{1'bx}};
+	endcase
+
+endmodule // mux2ds
+
+// 3:1 MUX WITH DECODED SELECTS
+module mux3ds (dout, in0, in1, in2, sel0, sel1, sel2) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input			sel0;
+input			sel1;
+input			sel2;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint cl_1h_chk3 ($one_hot ({sel2,sel1,sel0}));
+`endif
+
+wire [2:0] sel = {sel2,sel1,sel0}; // 0in one_hot
+   
+// priority encoding takes care of mutex'ing selects.
+always @ (sel0 or sel1 or sel2 or in0 or in1 or in2)
+
+	case ({sel2,sel1,sel0}) 
+		3'b001 : dout = in0 ;
+		3'b010 : dout = in1 ;
+		3'b100 : dout = in2 ;
+		3'b000 : dout = {SIZE{1'bx}} ;
+		3'b011 : dout = {SIZE{1'bx}} ;
+		3'b101 : dout = {SIZE{1'bx}} ;
+		3'b110 : dout = {SIZE{1'bx}} ;
+		3'b111 : dout = {SIZE{1'bx}} ;
+		default : dout = {SIZE{1'bx}};
+			// two state vs four state modelling will be added.
+	endcase
+
+endmodule // mux3ds
+
+// 4:1 MUX WITH DECODED SELECTS
+module mux4ds (dout, in0, in1, in2, in3, sel0, sel1, sel2, sel3) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input	[SIZE-1:0]	in3;
+input			sel0;
+input			sel1;
+input			sel2;
+input			sel3;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint cl_1h_chk4 ($one_hot ({sel3,sel2,sel1,sel0}));
+`endif
+   
+wire [3:0] sel = {sel3,sel2,sel1,sel0}; // 0in one_hot
+   
+// priority encoding takes care of mutex'ing selects.
+always @ (sel0 or sel1 or sel2 or sel3 or in0 or in1 or in2 or in3)
+
+	case ({sel3,sel2,sel1,sel0}) 
+		4'b0001 : dout = in0 ;
+		4'b0010 : dout = in1 ;
+		4'b0100 : dout = in2 ;
+		4'b1000 : dout = in3 ;
+		4'b0000 : dout = {SIZE{1'bx}} ;
+		4'b0011 : dout = {SIZE{1'bx}} ;
+		4'b0101 : dout = {SIZE{1'bx}} ;
+		4'b0110 : dout = {SIZE{1'bx}} ;
+		4'b0111 : dout = {SIZE{1'bx}} ;
+		4'b1001 : dout = {SIZE{1'bx}} ;
+		4'b1010 : dout = {SIZE{1'bx}} ;
+		4'b1011 : dout = {SIZE{1'bx}} ;
+		4'b1100 : dout = {SIZE{1'bx}} ;
+		4'b1101 : dout = {SIZE{1'bx}} ;
+		4'b1110 : dout = {SIZE{1'bx}} ;
+		4'b1111 : dout = {SIZE{1'bx}} ;
+		default : dout = {SIZE{1'bx}};
+			// two state vs four state modelling will be added.
+	endcase
+
+endmodule // mux4ds
+
+// SINK FOR UNLOADED INPUT PORTS
+module sink (in);
+// synopsys template
+
+parameter SIZE = 1;
+
+input [SIZE-1:0] in;
+
+`ifdef FPGA_SYN
+   // As of version 8.2 XST does not remove this module without the
+   // following additional dead code
+
+   wire    a;
+
+   assign		a = | in;
+
+`endif
+
+endmodule //sink
+
+// SOURCE FOR UNDRIVEN OUTPUT PORTS
+module source (out) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output  [SIZE-1:0] out;
+// 
+// Once 4state/2state model established
+// then enable check.
+// `ifdef FOUR_STATE
+// leda check for x_or_z_in rhs_of assign turned off
+// assign  out = {SIZE{1'bx}};
+//`else
+assign  out = {SIZE{1'b0}};
+//`endif
+
+endmodule //source
+
+// 2:1 MUX WITH PRIORITY ENCODED SELECTS
+//module mux2es (dout, in0, in1, sel0, sel1) ;
+//
+//parameter SIZE = 1;
+//
+//output 	[SIZE-1:0] 	dout;
+//input	[SIZE-1:0]	in0;
+//input	[SIZE-1:0]	in1;
+//input			sel0;
+//input			sel1;
+//
+//// reg declaration does not imply state being maintained
+//// across cycles. Used to construct case statement and
+//// always updated by inputs every cycle.
+//reg	[SIZE-1:0]	dout ;
+//
+//// must take into account lack of mutex selects.
+//// there is no reason for handling of x and z conditions.
+//// This will be dictated by design.
+//always @ (sel0 or sel1 or in0 or in1)
+//
+//	case ({sel1,sel0})
+//		2'b1x : dout = in1 ; // 10(in1),11(z) 
+//		2'b0x :	dout = in0 ; // 01(in0),00(x)
+//	endcase
+//
+//endmodule // mux2es
+
+// CLK Header for gating off the clock of
+// a FF.
+// clk - output of the clk header
+// rclk - input clk
+// enb_l - Active low clock enable
+// tmb_l  - Active low clock enable ( in scan mode, this input is !se )
+
+module clken_buf (clk, rclk, enb_l, tmb_l);
+output clk;
+input  rclk, enb_l, tmb_l;
+reg    clken;
+
+`ifdef FPGA_SYN
+   assign clk = rclk;
+`else
+  always @ (rclk or enb_l or tmb_l)
+    if (!rclk)  //latch opens on rclk low phase
+      clken = !enb_l | !tmb_l;
+  assign clk = clken & rclk;
+`endif
+endmodule
+
+
+
+// The following flops are maintained and used in ENET , MAC IP  ONLY
+// -- Mimi X61467
+
+// POSITIVE-EDGE TRIGGERED FLOP with SET_L, without SCAN.
+
+module dffsl_ns (din, clk, set_l, q);
+// synopsys template
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   set_l ; // set
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// synopsys sync_set_reset "set_l"
+always @ (posedge clk)
+  q[SIZE-1:0] <= set_l ? din[SIZE-1:0] : {SIZE{1'b1}};
+
+endmodule // dffsl_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP with SET_L, without SCAN.
+
+module dffsl_async_ns (din, clk, set_l, q);
+// synopsys template
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   set_l ; // set
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// synopsys async_set_reset "set_l"
+always @ (posedge clk or negedge set_l)
+  q[SIZE-1:0] <= ~set_l ? {SIZE{1'b1}} : ({SIZE{~set_l}} | din[SIZE-1:0]);
+
+endmodule // dffsl_async_ns
+
+// POSITIVE-EDGE TRIGGERED FLOP WITH SET_H , without SCAN.
+
+module dffr_ns_r1 (din, clk, rst, q);
+// synopsys template
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst ;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// Set to 1
+// synopsys sync_set_reset "rst"
+always @ (posedge clk)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b1}} : din[SIZE-1:0];
+
+endmodule // dffr_ns_r1
+
+// POSITIVE-EDGE TRIGGERED ASYNC RESET_H FLOP , without SCAN.
+
+module dffr_async_ns (din, clk, rst, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// Reset dominates
+// synopsys async_set_reset "rst"
+always @ (posedge clk or posedge rst)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b0}} : din[SIZE-1:0];
+
+endmodule // dffr_async_ns
+
+// POSITIVE-EDGE TRIGGERED ASYNC SET_H FLOP , without SCAN.
+
+module dffr_async_ns_r1 (din, clk, rst, q);
+// synopsys template
+
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clk ;   // clk or scan clk
+input                   rst;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// Reset to 1
+// synopsys async_set_reset "rst"
+always @ (posedge clk or posedge rst)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b1}} : din[SIZE-1:0];
+
+endmodule // dffr_async_ns_r1
+
+
+// NEGATIVE-EDGE TRIGGERED ASYNC SET_H FLOP , without SCAN.
+
+module dffr_async_ns_cl_r1 (din, clkl, rst, q);
+// synopsys template
+parameter SIZE = 1;
+
+input   [SIZE-1:0]      din ;   // data in
+input                   clkl ;  // clk or scan clk
+input                   rst ;   // reset
+
+output  [SIZE-1:0]      q ;     // output
+
+reg     [SIZE-1:0]      q ;
+
+// Set to 1
+// synopsys sync_set_reset "rst"
+always @ (negedge clkl or posedge rst)
+  q[SIZE-1:0] <= rst ? {SIZE{1'b1}} : din[SIZE-1:0];
+
+endmodule // dffr_async_ns_cl_r1
+
Index: /trunk/T1-common/common/swrvr_dlib.v
===================================================================
--- /trunk/T1-common/common/swrvr_dlib.v	(revision 6)
+++ /trunk/T1-common/common/swrvr_dlib.v	(revision 6)
@@ -0,0 +1,270 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: swrvr_dlib.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// DP library
+
+// 2:1 MUX WITH ENCODED SELECT
+module dp_mux2es (dout, in0, in1, sel) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input			sel;
+
+reg	[SIZE-1:0]	dout ;
+
+always @ (sel or in0 or in1)
+
+ begin
+	   case (sel)
+	     1'b1: dout = in1 ; 
+	     1'b0: dout = in0;
+	     default: 
+         begin
+            if (in0 == in1) begin
+               dout = in0;
+            end
+            else
+              dout = {SIZE{1'bx}};
+         end
+	   endcase // case(sel)
+ end
+
+endmodule // dp_mux2es
+
+// ----------------------------------------------------------------------
+
+
+// 4:1 MUX WITH DECODED SELECTS
+module dp_mux4ds (dout, in0, in1, in2, in3, 
+		     sel0_l, sel1_l, sel2_l, sel3_l) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input	[SIZE-1:0]	in3;
+input			sel0_l;
+input			sel1_l;
+input			sel2_l;
+input			sel3_l;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint dl_1c_chk4 ($one_cold ({sel3_l,sel2_l,sel1_l,sel0_l}));
+`endif
+
+wire [3:0] sel = {sel3_l,sel2_l,sel1_l,sel0_l}; // 0in one_cold
+   
+always @ (sel0_l or sel1_l or sel2_l or sel3_l or in0 or in1 or in2 or in3)
+
+	case ({sel3_l,sel2_l,sel1_l,sel0_l})
+		4'b1110 : dout = in0 ;
+		4'b1101 : dout = in1 ;
+		4'b1011 : dout = in2 ;
+		4'b0111 : dout = in3 ;
+		4'b1111 : dout = {SIZE{1'bx}} ;
+		default : dout = {SIZE{1'bx}} ;
+	endcase
+
+endmodule // dp_mux4ds
+
+// ----------------------------------------------------------------------
+
+
+// 5:1 MUX WITH DECODED SELECTS
+module dp_mux5ds (dout, in0, in1, in2, in3,  in4,
+		     sel0_l, sel1_l, sel2_l, sel3_l, sel4_l) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input	[SIZE-1:0]	in3;
+input	[SIZE-1:0]	in4;
+input			sel0_l;
+input			sel1_l;
+input			sel2_l;
+input			sel3_l;
+input			sel4_l;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint dl_1c_chk5 ($one_cold ({sel4_l,sel3_l,sel2_l,sel1_l,sel0_l}));
+`endif
+   
+wire [4:0] sel = {sel4_l,sel3_l,sel2_l,sel1_l,sel0_l}; // 0in one_cold
+
+always @ (sel0_l or sel1_l or sel2_l or sel3_l or sel4_l or
+		in0 or in1 or in2 or in3 or in4)
+
+	case ({sel4_l,sel3_l,sel2_l,sel1_l,sel0_l})
+		5'b11110 : dout = in0 ;
+		5'b11101 : dout = in1 ;
+		5'b11011 : dout = in2 ;
+		5'b10111 : dout = in3 ;
+		5'b01111 : dout = in4 ;
+		5'b11111 : dout = {SIZE{1'bx}} ;
+		default : dout = {SIZE{1'bx}} ;
+	endcase
+
+endmodule // dp_mux5ds
+
+// --------------------------------------------------------------------
+
+// 8:1 MUX WITH DECODED SELECTS
+module dp_mux8ds (dout, in0, in1, in2, in3, 
+			in4, in5, in6, in7,
+		     sel0_l, sel1_l, sel2_l, sel3_l,
+		     sel4_l, sel5_l, sel6_l, sel7_l) ;
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input	[SIZE-1:0]	in3;
+input	[SIZE-1:0]	in4;
+input	[SIZE-1:0]	in5;
+input	[SIZE-1:0]	in6;
+input	[SIZE-1:0]	in7;
+input			sel0_l;
+input			sel1_l;
+input			sel2_l;
+input			sel3_l;
+input			sel4_l;
+input			sel5_l;
+input			sel6_l;
+input			sel7_l;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint dl_1c_chk8 ($one_cold ({sel7_l,sel6_l,sel5_l,sel4_l,
+				       sel3_l,sel2_l,sel1_l,sel0_l}));
+`endif
+
+wire [7:0] sel = {sel7_l,sel6_l,sel5_l,sel4_l,
+                  sel3_l,sel2_l,sel1_l,sel0_l}; // 0in one_cold
+
+always @ (sel0_l or sel1_l or sel2_l or sel3_l or in0 or in1 or in2 or in3 or
+	  sel4_l or sel5_l or sel6_l or sel7_l or in4 or in5 or in6 or in7)
+
+	case ({sel7_l,sel6_l,sel5_l,sel4_l,sel3_l,sel2_l,sel1_l,sel0_l})
+		8'b11111110 : dout = in0 ;
+		8'b11111101 : dout = in1 ;
+		8'b11111011 : dout = in2 ;
+		8'b11110111 : dout = in3 ;
+		8'b11101111 : dout = in4 ;
+		8'b11011111 : dout = in5 ;
+		8'b10111111 : dout = in6 ;
+		8'b01111111 : dout = in7 ;
+		8'b11111111 : dout = {SIZE{1'bx}} ;
+		default : dout = {SIZE{1'bx}} ;
+	endcase
+
+endmodule // dp_mux8ds
+
+
+// ----------------------------------------------------------------------
+
+
+// 3:1 MUX WITH DECODED SELECTS
+module dp_mux3ds (dout, in0, in1, in2, 
+		     sel0_l, sel1_l, sel2_l);
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in0;
+input	[SIZE-1:0]	in1;
+input	[SIZE-1:0]	in2;
+input			sel0_l;
+input			sel1_l;
+input			sel2_l;
+
+// reg declaration does not imply state being maintained
+// across cycles. Used to construct case statement and
+// always updated by inputs every cycle.
+reg	[SIZE-1:0]	dout ;
+
+`ifdef VERPLEX
+   $constraint dl_1c_chk3 ($one_cold ({sel2_l,sel1_l,sel0_l}));
+`endif
+
+wire [2:0] sel = {sel2_l,sel1_l,sel0_l}; // 0in one_cold
+   
+always @ (sel0_l or sel1_l or sel2_l or in0 or in1 or in2)
+
+	case ({sel2_l,sel1_l,sel0_l})
+		3'b110 : dout = in0 ;
+		3'b101 : dout = in1 ;
+		3'b011 : dout = in2 ;
+	        default : dout = {SIZE{1'bx}} ;
+	endcase
+
+endmodule // dp_mux3ds
+
+// ----------------------------------------------------------------------
+
+
+module dp_buffer(dout, in);
+// synopsys template
+
+parameter SIZE = 1;
+
+output 	[SIZE-1:0] 	dout;
+input	[SIZE-1:0]	in;
+
+assign dout = in;
+
+endmodule // dp_buffer
+
+
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/ucb_flow_2buf.v
===================================================================
--- /trunk/T1-common/common/ucb_flow_2buf.v	(revision 6)
+++ /trunk/T1-common/common/ucb_flow_2buf.v	(revision 6)
@@ -0,0 +1,434 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_flow_2buf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        ucb_flow_2buf
+//	Description:	Unit Control Block
+//                      - supports 64-bit or 128-bit read with flow control
+//                      - supports 64-bit write with flow control
+//                      - automactically drops non-64-bit writes
+//                      - supports interrupt return to IO Bridge
+//                      - provides 1+2 deep buffer for incoming requests
+//                        from the IO Bridge
+//                      - provides single buffer for returns going back
+//                        to the IO Bridge
+//
+//                      This module is intended for units that have
+//                      64-bit (no 128-bit) registers.
+//
+//                      Data bus width to and from the IO Bridge is
+//                      configured through parameters UCB_IOB_WIDTH and
+//                      IOB_UCB_WIDTH.  Supported widths are:
+//
+//                      IOB_UCB_WIDTH  UCB_IOB_WIDTH
+//                      ----------------------------
+//                      32             8
+//                      16             8
+//                       8             8
+//                       4             4             
+ */ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+`include        "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`define         UCB_BUF_DEPTH   2
+`define         UCB_BUF_WIDTH   64+(`UCB_ADDR_HI-`UCB_ADDR_LO+1)+(`UCB_SIZE_HI-`UCB_SIZE_LO+1)+(`UCB_BUF_HI-`UCB_BUF_LO+1)+(`UCB_THR_HI-`UCB_THR_LO+1)+1+1
+
+module ucb_flow_2buf (/*AUTOARG*/
+   // Outputs
+   ucb_iob_stall, rd_req_vld, wr_req_vld, thr_id_in, buf_id_in, 
+   size_in, addr_in, data_in, ack_busy, int_busy, ucb_iob_vld, 
+   ucb_iob_data, 
+   // Inputs
+   clk, rst_l, iob_ucb_vld, iob_ucb_data, req_acpted, rd_ack_vld, 
+   rd_nack_vld, thr_id_out, buf_id_out, data128, data_out, int_vld, 
+   int_typ, int_thr_id, dev_id, int_stat, int_vec, iob_ucb_stall
+   );
+   // synopsys template
+   
+   parameter IOB_UCB_WIDTH = 32;  // data bus width from IOB to UCB
+   parameter UCB_IOB_WIDTH = 8;   // data bus width from UCB to IOB
+   parameter REG_WIDTH     = 64;  // please do not change this parameter
+   
+
+   // Globals
+   input                                clk;
+   input 				rst_l;
+   
+   // Request from IO Bridge
+   input 				iob_ucb_vld;
+   input [IOB_UCB_WIDTH-1:0] 		iob_ucb_data;
+   output 				ucb_iob_stall;
+
+   // Request to local unit
+   output 				rd_req_vld;
+   output 				wr_req_vld;
+   output [`UCB_THR_HI-`UCB_THR_LO:0]   thr_id_in;
+   output [`UCB_BUF_HI-`UCB_BUF_LO:0]   buf_id_in;
+   output [`UCB_SIZE_HI-`UCB_SIZE_LO:0] size_in;   // only pertinent to PCI
+   output [`UCB_ADDR_HI-`UCB_ADDR_LO:0] addr_in;
+   output [`UCB_DATA_HI-`UCB_DATA_LO:0] data_in;
+   input 				req_acpted;
+   
+   // Ack/Nack from local unit
+   input 				rd_ack_vld;
+   input 				rd_nack_vld;
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	thr_id_out;
+   input [`UCB_BUF_HI-`UCB_BUF_LO:0] 	buf_id_out;
+   input 				data128;   // set to 1 if data returned is 128 bit
+   input [REG_WIDTH-1:0] 		data_out;
+   output 				ack_busy;
+
+   // Interrupt from local unit
+   input 				int_vld;
+   input [`UCB_PKT_HI-`UCB_PKT_LO:0] 	int_typ;          // interrupt type
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	int_thr_id;       // interrupt thread ID
+   input [`UCB_INT_DEV_HI-`UCB_INT_DEV_LO:0] dev_id;      // interrupt device ID
+   input [`UCB_INT_STAT_HI-`UCB_INT_STAT_LO:0] int_stat;  // interrupt status
+   input [`UCB_INT_VEC_HI-`UCB_INT_VEC_LO:0]   int_vec;   // interrupt vector
+   output 				int_busy;
+   
+   // Output to IO Bridge
+   output 				ucb_iob_vld;
+   output [UCB_IOB_WIDTH-1:0] 		ucb_iob_data;
+   input 				iob_ucb_stall;
+   
+   // Local signals
+   wire                                 indata_buf_vld;
+   wire [127:0]                         indata_buf;
+   wire                                 ucb_iob_stall_a1;
+   
+   wire                                 read_pending;
+   wire                                 write_pending;
+   wire                                 illegal_write_size;
+
+   wire 				rd_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head;
+   wire 				wr_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail;
+   wire 				buf_full_next;
+   wire 				buf_full;
+   wire 				buf_empty_next;
+   wire 				buf_empty;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_in;
+   wire 				buf0_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf0;
+   wire 				buf1_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf1;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_out;
+   wire 				rd_req_vld_nq;
+   wire 				wr_req_vld_nq;
+
+   wire                                 ack_buf_rd;
+   wire                                 ack_buf_wr;
+   wire                                 ack_buf_vld;
+   wire                                 ack_buf_vld_next;
+   wire                                 ack_buf_is_nack;
+   wire                                 ack_buf_is_data128;
+   wire [`UCB_PKT_HI-`UCB_PKT_LO:0]     ack_typ_out;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf_in;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] ack_buf_vec;
+   
+   wire                                 int_buf_rd;
+   wire                                 int_buf_wr;
+   wire                                 int_buf_vld;
+   wire                                 int_buf_vld_next;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf_in;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] int_buf_vec;
+   
+   wire                                 int_last_rd;
+   wire                                 outdata_buf_busy;
+   wire                                 outdata_buf_wr;
+   wire [REG_WIDTH+63:0]                outdata_buf_in;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] outdata_vec_in;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * Inbound Data
+    ************************************************************/
+   // Register size is hardcoded to 64 bits here because all
+   // units using the UCB module will only write to 64 bit registers.
+   ucb_bus_in #(IOB_UCB_WIDTH,64) ucb_bus_in (.rst_l(rst_l),
+                                              .clk(clk),
+                                              .vld(iob_ucb_vld),
+                                              .data(iob_ucb_data),
+                                              .stall(ucb_iob_stall),
+                                              .indata_buf_vld(indata_buf_vld),
+                                              .indata_buf(indata_buf),
+                                              .stall_a1(ucb_iob_stall_a1));
+
+
+   /************************************************************
+    * Decode inbound packet type
+    ************************************************************/
+   assign 	 read_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] ==
+				 `UCB_READ_REQ) &
+			        indata_buf_vld;
+
+   assign 	 write_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] == 
+				  `UCB_WRITE_REQ) &
+        	                  indata_buf_vld;
+
+   // 3'b011 is the encoding for double word.  All writes have to be
+   // 64 bits except writes going to PCI.  PCI will instantiate a
+   // customized version of UCB.
+   assign 	 illegal_write_size = (indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO] !=
+				       3'b011);
+   
+   assign 	 ucb_iob_stall_a1 = (read_pending | write_pending) & buf_full;
+
+   
+   /************************************************************
+    * Inbound buffer
+    ************************************************************/
+   // Head pointer
+   assign 	 rd_buf = req_acpted;
+   assign 	 buf_head_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 rd_buf ? {buf_head[`UCB_BUF_DEPTH-2:0],
+				           buf_head[`UCB_BUF_DEPTH-1]} :
+	                                  buf_head;
+   dff_ns #(`UCB_BUF_DEPTH) buf_head_ff (.din(buf_head_next),
+					 .clk(clk),
+					 .q(buf_head));
+
+   // Tail pointer
+   assign 	 wr_buf = (read_pending |
+		           (write_pending & ~illegal_write_size)) &
+			  ~buf_full;
+   assign 	 buf_tail_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 wr_buf ? {buf_tail[`UCB_BUF_DEPTH-2:0],
+				           buf_tail[`UCB_BUF_DEPTH-1]} :
+	                                  buf_tail;
+   dff_ns #(`UCB_BUF_DEPTH) buf_tail_ff (.din(buf_tail_next),
+					 .clk(clk),
+					 .q(buf_tail));
+
+   // Buffer full
+   assign 	 buf_full_next = (buf_head_next == buf_tail_next) &
+				 wr_buf;
+   dffrle_ns #(1) buf_full_ff (.din(buf_full_next),
+			       .rst_l(rst_l),
+			       .en(rd_buf|wr_buf),
+			       .clk(clk),
+			       .q(buf_full));
+
+   // Buffer empty
+   assign 	 buf_empty_next = ((buf_head_next == buf_tail_next) &
+				   rd_buf) | ~rst_l;
+   dffe_ns #(1) buf_empty_ff (.din(buf_empty_next),
+			      .en(rd_buf|wr_buf|~rst_l), 
+			      .clk(clk),
+			      .q(buf_empty));
+   
+
+   assign 	 req_in = {indata_buf[`UCB_DATA_HI:`UCB_DATA_LO],
+			   indata_buf[`UCB_ADDR_HI:`UCB_ADDR_LO],
+			   indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO],
+			   indata_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+			   indata_buf[`UCB_THR_HI:`UCB_THR_LO],
+			   write_pending & ~illegal_write_size,
+			   read_pending};
+	  
+   // Buffer 0
+   assign 	 buf0_en = buf_tail[0] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf0_ff (.din(req_in),
+				      .en(buf0_en),
+				      .clk(clk),
+				      .q(buf0));
+   // Buffer 1
+   assign 	 buf1_en = buf_tail[1] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf1_ff (.din(req_in),
+				      .en(buf1_en),
+				      .clk(clk),
+				      .q(buf1));
+
+   assign 	 req_out = buf_head[0] ? buf0 :
+	                   buf_head[1] ? buf1 :
+	                                 {`UCB_BUF_WIDTH{1'b0}};
+
+   
+   /************************************************************
+    * Inbound interface to local unit
+    ************************************************************/
+   assign 	 {data_in,
+		  addr_in,
+		  size_in,
+		  buf_id_in,
+		  thr_id_in,
+		  wr_req_vld_nq,
+		  rd_req_vld_nq} = req_out;
+   
+   assign 	 rd_req_vld = rd_req_vld_nq & ~buf_empty;
+   assign 	 wr_req_vld = wr_req_vld_nq & ~buf_empty;
+   
+	  
+   /************************************************************
+    * Outbound Ack/Nack
+    ************************************************************/
+   assign        ack_buf_wr = rd_ack_vld | rd_nack_vld;
+   
+   assign        ack_buf_vld_next = ack_buf_wr ? 1'b1 :
+                                    ack_buf_rd ? 1'b0 :
+                                                 ack_buf_vld;
+   
+   dffrl_ns #(1) ack_buf_vld_ff (.din(ack_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(ack_buf_vld));
+   
+   dffe_ns #(1) ack_buf_is_nack_ff (.din(rd_nack_vld),
+                                    .en(ack_buf_wr),
+                                    .clk(clk),
+                                    .q(ack_buf_is_nack));
+
+   dffe_ns #(1) ack_buf_is_data128_ff (.din(data128),
+                                       .en(ack_buf_wr),
+                                       .clk(clk),
+                                       .q(ack_buf_is_data128));
+
+   assign        ack_typ_out = rd_ack_vld ? `UCB_READ_ACK:
+                                            `UCB_READ_NACK;
+
+   assign        ack_buf_in = {data_out,
+                               buf_id_out,
+                               thr_id_out,
+                               ack_typ_out};
+   
+   dffe_ns #(REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO+1) ack_buf_ff (.din(ack_buf_in),
+                                                              .en(ack_buf_wr),
+                                                              .clk(clk),
+                                                              .q(ack_buf));
+
+   assign        ack_buf_vec = ack_buf_is_nack    ? {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                                     {64/UCB_IOB_WIDTH{1'b1}}} :
+                               ack_buf_is_data128 ? {(REG_WIDTH+64)/UCB_IOB_WIDTH{1'b1}} :
+                                                    {(64+64)/UCB_IOB_WIDTH{1'b1}};
+   
+   assign        ack_busy = ack_buf_vld;
+   
+
+   /************************************************************
+    * Outbound Interrupt
+    ************************************************************/
+   // Assertion: int_buf_wr shoudn't be asserted if int_buf_busy
+   assign        int_buf_wr = int_vld;
+   
+   assign        int_buf_vld_next = int_buf_wr ? 1'b1 :
+                                    int_buf_rd ? 1'b0 :
+                                                 int_buf_vld;
+   
+   dffrl_ns #(1) int_buf_vld_ff (.din(int_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(int_buf_vld));
+
+   assign        int_buf_in = {int_vec,
+                               int_stat,
+                               dev_id,
+                               int_thr_id,
+                               int_typ};
+   
+   dffe_ns #(`UCB_INT_VEC_HI-`UCB_PKT_LO+1) int_buf_ff (.din(int_buf_in),
+                                                        .en(int_buf_wr),
+                                                        .clk(clk),
+                                                        .q(int_buf));
+
+   assign        int_buf_vec = {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                {64/UCB_IOB_WIDTH{1'b1}}};
+
+   assign        int_busy = int_buf_vld;
+
+
+   /************************************************************
+    * Outbound ack/interrupt Arbitration
+    ************************************************************/
+   dffrle_ns #(1) int_last_rd_ff (.din(int_buf_rd),
+                                  .en(ack_buf_rd|int_buf_rd),
+                                  .rst_l(rst_l),
+                                  .clk(clk),
+                                  .q(int_last_rd));
+                           
+   assign        ack_buf_rd = ~outdata_buf_busy & ack_buf_vld &
+                              (~int_buf_vld | int_last_rd);
+   
+   assign        int_buf_rd = ~outdata_buf_busy & int_buf_vld &
+                              (~ack_buf_vld | ~int_last_rd);
+
+   assign        outdata_buf_wr = ack_buf_rd | int_buf_rd;
+   
+   assign        outdata_buf_in = ack_buf_rd ? {ack_buf[REG_WIDTH+`UCB_BUF_HI:`UCB_BUF_HI+1],
+                                                {(`UCB_RSV_HI-`UCB_RSV_LO+1){1'b0}},
+                                                {(`UCB_ADDR_HI-`UCB_ADDR_LO+1){1'b0}},
+                                                {(`UCB_SIZE_HI-`UCB_SIZE_LO+1){1'b0}},
+                                                ack_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+                                                ack_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                ack_buf[`UCB_PKT_HI:`UCB_PKT_LO]}:
+                                               {{REG_WIDTH{1'b0}},
+                                                {(`UCB_INT_RSV_HI-`UCB_INT_RSV_LO+1){1'b0}},
+                                                int_buf[`UCB_INT_VEC_HI:`UCB_INT_VEC_LO],
+                                                int_buf[`UCB_INT_STAT_HI:`UCB_INT_STAT_LO],
+                                                int_buf[`UCB_INT_DEV_HI:`UCB_INT_DEV_LO],
+                                                int_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                int_buf[`UCB_PKT_HI:`UCB_PKT_LO]};
+   
+   assign        outdata_vec_in = ack_buf_rd ? ack_buf_vec :
+                                               int_buf_vec;
+   
+   ucb_bus_out #(UCB_IOB_WIDTH, REG_WIDTH) ucb_bus_out (.rst_l(rst_l),
+                                                        .clk(clk),
+                                                        .outdata_buf_wr(outdata_buf_wr),
+                                                        .outdata_buf_in(outdata_buf_in),
+                                                        .outdata_vec_in(outdata_vec_in),
+                                                        .outdata_buf_busy(outdata_buf_busy),
+                                                        .vld(ucb_iob_vld),
+                                                        .data(ucb_iob_data),
+                                                        .stall(iob_ucb_stall));
+   
+
+`undef		UCB_BUF_WIDTH
+	  
+endmodule // ucb_flow_2buf
+
+
+// Local Variables:
+// verilog-library-directories:(".")
+// End:
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/test_stub_scan.v
===================================================================
--- /trunk/T1-common/common/test_stub_scan.v	(revision 6)
+++ /trunk/T1-common/common/test_stub_scan.v	(revision 6)
@@ -0,0 +1,146 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: test_stub_scan.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// ____________________________________________________________________________
+//
+//  test_stub_bist - Test Stub with Scan Support
+// ____________________________________________________________________________
+//
+// Description: DBB interface for test signal generation
+// ____________________________________________________________________________
+
+module test_stub_scan (/*AUTOARG*/
+// Outputs
+mux_drive_disable, mem_write_disable, sehold, se, testmode_l, 
+mem_bypass, so_0, so_1, so_2, 
+// Inputs
+ctu_tst_pre_grst_l, arst_l, global_shift_enable, 
+ctu_tst_scan_disable, ctu_tst_scanmode, ctu_tst_macrotest, 
+ctu_tst_short_chain, long_chain_so_0, short_chain_so_0, 
+long_chain_so_1, short_chain_so_1, long_chain_so_2, short_chain_so_2
+);
+
+   input        ctu_tst_pre_grst_l;
+   input        arst_l;                // no longer used
+   input        global_shift_enable;
+   input        ctu_tst_scan_disable;  // redefined as pin_based_scan
+   input        ctu_tst_scanmode;
+   input 	ctu_tst_macrotest;
+   input 	ctu_tst_short_chain;
+   input 	long_chain_so_0;
+   input 	short_chain_so_0;
+   input 	long_chain_so_1;
+   input 	short_chain_so_1;
+   input 	long_chain_so_2;
+   input 	short_chain_so_2;
+   
+   output 	mux_drive_disable;
+   output 	mem_write_disable;
+   output 	sehold;
+   output 	se;
+   output 	testmode_l;
+   output 	mem_bypass;
+   output 	so_0;
+   output 	so_1;
+   output 	so_2;
+
+   wire         pin_based_scan;
+   wire         short_chain_en;
+   wire         short_chain_select;
+
+   // INTERNAL CLUSTER CONNECTIONS
+   //
+   // Scan Chain Hookup
+   // =================
+   //
+   // Scan chains have two configurations: long and short.
+   // The short chain is typically the first tenth of the
+   // long chain. The short chain should contain memory
+   // collar flops for deep arrays. The CTU determines
+   // which configuration is selected. Up to three chains
+   // are supported.
+   //
+   // The scanout connections from the long and short
+   // chains connect to the following inputs:
+   //
+   // long_chain_so_0, short_chain_so_0 (mandatory)
+   // long_chain_so_1, short_chain_so_1 (optional)
+   // long_chain_so_2, short_chain_so_2 (optional)
+   //
+   // The test stub outputs should connect directly to the
+   // scanout port(s) of the cluster:
+   //
+   // so_0 (mandatory), so_1 (optional), so_2 (optional)
+   //
+   //
+   // Static Output Signals
+   // =====================
+   //
+   // testmode_l
+   //
+   // Local testmode control for overriding gated
+   // clocks, asynchronous resets, etc. Asserted
+   // for all shift-based test modes.
+   //
+   // mem_bypass
+   //
+   // Memory bypass control for arrays without output
+   // flops. Allows testing of shadow logic. Asserted
+   // for scan test; de-asserted for macrotest.
+   //
+   //
+   // Dynamic Output Signals
+   // ======================
+   //
+   // sehold
+   //
+   // The sehold signal needs to be set for macrotest
+   // to allow holding flops in the array collars
+   // to retain their shifted data during capture.
+   // Inverted version of scan enable during macrotest.
+   //
+   // mux_drive_disable (for mux/long chain protection)
+   //
+   // Activate one-hot mux protection circuitry during
+   // scan shift and reset. Formerly known as rst_tri_en.
+   // Also used by long chain memories with embedded
+   // control.
+   //
+   // mem_write_disable (for short chain protection)
+   //
+   // Protects contents of short chain memories during
+   // shift and POR.
+   //
+   // se
+
+   assign  mux_drive_disable  = ~ctu_tst_pre_grst_l | short_chain_select | se;
+   assign  mem_write_disable  = ~ctu_tst_pre_grst_l | se;
+   assign  sehold             = ctu_tst_macrotest & ~se;
+   assign  se                 = global_shift_enable;
+   assign  testmode_l         = ~ctu_tst_scanmode;
+   assign  mem_bypass         = ~ctu_tst_macrotest & ~testmode_l;
+   assign  pin_based_scan     = ctu_tst_scan_disable;
+   assign  short_chain_en     = ~(pin_based_scan & se);
+   assign  short_chain_select = ctu_tst_short_chain & ~testmode_l & short_chain_en;
+   assign  so_0               = short_chain_select ? short_chain_so_0 : long_chain_so_0;
+   assign  so_1               = short_chain_select ? short_chain_so_1 : long_chain_so_1;
+   assign  so_2               = short_chain_select ? short_chain_so_2 : long_chain_so_2;
+   
+endmodule // test_stub_scan
Index: /trunk/T1-common/common/ucb_flow_jbi.v
===================================================================
--- /trunk/T1-common/common/ucb_flow_jbi.v	(revision 6)
+++ /trunk/T1-common/common/ucb_flow_jbi.v	(revision 6)
@@ -0,0 +1,426 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_flow_jbi.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        ucb_flow_jbi
+//	Description:	Unit Control Block
+//                      - supports 1B/2B/4B/8B/16B read with flow control
+//                      - supports 1B/2B/4B/8B write with flow control
+//                      - does NOT support ifill request
+//                      - supports interrupt return to IO Bridge
+//                      - provides 1+2 deep buffer for incoming requests
+//                        from the IO Bridge
+//                      - provides single buffer for returns going back
+//                        to the IO Bridge
+//
+//                      This module is customized for the JBI.
+//
+//                      Data bus width to and from the IO Bridge is
+//                      configured through parameters UCB_IOB_WIDTH and
+//                      IOB_UCB_WIDTH.  Supported widths are:
+//
+//                      IOB_UCB_WIDTH  UCB_IOB_WIDTH
+//                      ----------------------------
+//                      32             8
+//                      16             8
+//                       8             8
+//                       4             4             
+ */ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+`include        "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+`define         UCB_BUF_DEPTH   2
+`define         UCB_BUF_WIDTH   64+(`UCB_ADDR_HI-`UCB_ADDR_LO+1)+(`UCB_SIZE_HI-`UCB_SIZE_LO+1)+(`UCB_BUF_HI-`UCB_BUF_LO+1)+(`UCB_THR_HI-`UCB_THR_LO+1)+1+1
+
+module ucb_flow_jbi (/*AUTOARG*/
+   // Outputs
+   ucb_iob_stall, rd_req_vld, wr_req_vld, thr_id_in, buf_id_in, 
+   size_in, addr_in, data_in, ack_busy, int_busy, ucb_iob_vld, 
+   ucb_iob_data, 
+   // Inputs
+   clk, rst_l, iob_ucb_vld, iob_ucb_data, req_acpted, rd_ack_vld, 
+   rd_nack_vld, thr_id_out, buf_id_out, data128, data_out, int_vld, 
+   int_typ, int_thr_id, dev_id, int_stat, int_vec, iob_ucb_stall
+   );
+   // synopsys template
+   
+   parameter IOB_UCB_WIDTH = 32;  // data bus width from IOB to UCB
+   parameter UCB_IOB_WIDTH = 8;   // data bus width from UCB to IOB
+   parameter REG_WIDTH     = 128;  // please do not change this parameter
+   
+
+   // Globals
+   input                                clk;
+   input 				rst_l;
+   
+   // Request from IO Bridge
+   input 				iob_ucb_vld;
+   input [IOB_UCB_WIDTH-1:0] 		iob_ucb_data;
+   output 				ucb_iob_stall;
+
+   // Request to local unit
+   output 				rd_req_vld;
+   output 				wr_req_vld;
+   output [`UCB_THR_HI-`UCB_THR_LO:0]   thr_id_in;
+   output [`UCB_BUF_HI-`UCB_BUF_LO:0]   buf_id_in;
+   output [`UCB_SIZE_HI-`UCB_SIZE_LO:0] size_in;   // only pertinent to JBI and SPI
+   output [`UCB_ADDR_HI-`UCB_ADDR_LO:0] addr_in;
+   output [`UCB_DATA_HI-`UCB_DATA_LO:0] data_in;
+   input 				req_acpted;
+   
+   // Ack/Nack from local unit
+   input 				rd_ack_vld;
+   input 				rd_nack_vld;
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	thr_id_out;
+   input [`UCB_BUF_HI-`UCB_BUF_LO:0] 	buf_id_out;
+   input 				data128;   // set to 1 if data returned is 128 bit
+   input [REG_WIDTH-1:0] 		data_out;
+   output 				ack_busy;
+
+   // Interrupt from local unit
+   input 				int_vld;
+   input [`UCB_PKT_HI-`UCB_PKT_LO:0] 	int_typ;          // interrupt type
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	int_thr_id;       // interrupt thread ID
+   input [`UCB_INT_DEV_HI-`UCB_INT_DEV_LO:0] dev_id;      // interrupt device ID
+   input [`UCB_INT_STAT_HI-`UCB_INT_STAT_LO:0] int_stat;  // interrupt status
+   input [`UCB_INT_VEC_HI-`UCB_INT_VEC_LO:0]   int_vec;   // interrupt vector
+   output 				int_busy;
+   
+   // Output to IO Bridge
+   output 				ucb_iob_vld;
+   output [UCB_IOB_WIDTH-1:0] 		ucb_iob_data;
+   input 				iob_ucb_stall;
+   
+   // Local signals
+   wire                                 indata_buf_vld;
+   wire [127:0]                         indata_buf;
+   wire                                 ucb_iob_stall_a1;
+   
+   wire                                 read_pending;
+   wire                                 write_pending;
+   
+   wire 				rd_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_head;
+   wire 				wr_buf;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail_next;
+   wire [`UCB_BUF_DEPTH-1:0] 		buf_tail;
+   wire 				buf_full_next;
+   wire 				buf_full;
+   wire 				buf_empty_next;
+   wire 				buf_empty;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_in;
+   wire 				buf0_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf0;
+   wire 				buf1_en;
+   wire [`UCB_BUF_WIDTH-1:0] 		buf1;
+   wire [`UCB_BUF_WIDTH-1:0] 		req_out;
+   wire 				rd_req_vld_nq;
+   wire 				wr_req_vld_nq;
+
+   wire                                 ack_buf_rd;
+   wire                                 ack_buf_wr;
+   wire                                 ack_buf_vld;
+   wire                                 ack_buf_vld_next;
+   wire                                 ack_buf_is_nack;
+   wire                                 ack_buf_is_data128;
+   wire [`UCB_PKT_HI-`UCB_PKT_LO:0]     ack_typ_out;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf_in;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] ack_buf_vec;
+   
+   wire                                 int_buf_rd;
+   wire                                 int_buf_wr;
+   wire                                 int_buf_vld;
+   wire                                 int_buf_vld_next;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf_in;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] int_buf_vec;
+   
+   wire                                 int_last_rd;
+   wire                                 outdata_buf_busy;
+   wire                                 outdata_buf_wr;
+   wire [REG_WIDTH+63:0]                outdata_buf_in;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] outdata_vec_in;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * Inbound Data
+    ************************************************************/
+   // Register size is hardcoded to 64 bits here
+   ucb_bus_in #(IOB_UCB_WIDTH,64) ucb_bus_in (.rst_l(rst_l),
+                                              .clk(clk),
+                                              .vld(iob_ucb_vld),
+                                              .data(iob_ucb_data),
+                                              .stall(ucb_iob_stall),
+                                              .indata_buf_vld(indata_buf_vld),
+                                              .indata_buf(indata_buf),
+                                              .stall_a1(ucb_iob_stall_a1));
+
+
+   /************************************************************
+    * Decode inbound packet type
+    ************************************************************/
+   assign 	 read_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] ==
+				 `UCB_READ_REQ) &
+			        indata_buf_vld;
+
+   assign 	 write_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] == 
+				  `UCB_WRITE_REQ) &
+        	                  indata_buf_vld;
+
+   assign 	 ucb_iob_stall_a1 = (read_pending | write_pending) & buf_full;
+
+   
+   /************************************************************
+    * Inbound buffer
+    ************************************************************/
+   // Head pointer
+   assign 	 rd_buf = req_acpted;
+   assign 	 buf_head_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 rd_buf ? {buf_head[`UCB_BUF_DEPTH-2:0],
+				           buf_head[`UCB_BUF_DEPTH-1]} :
+	                                  buf_head;
+   dff_ns #(`UCB_BUF_DEPTH) buf_head_ff (.din(buf_head_next),
+					 .clk(clk),
+					 .q(buf_head));
+
+   // Tail pointer
+   assign 	 wr_buf = (read_pending |
+		           write_pending) &
+			  ~buf_full;
+   assign 	 buf_tail_next = ~rst_l ? `UCB_BUF_DEPTH'b01 :
+                                 wr_buf ? {buf_tail[`UCB_BUF_DEPTH-2:0],
+				           buf_tail[`UCB_BUF_DEPTH-1]} :
+	                                  buf_tail;
+   dff_ns #(`UCB_BUF_DEPTH) buf_tail_ff (.din(buf_tail_next),
+					 .clk(clk),
+					 .q(buf_tail));
+
+   // Buffer full
+   assign 	 buf_full_next = (buf_head_next == buf_tail_next) &
+				 wr_buf;
+   dffrle_ns #(1) buf_full_ff (.din(buf_full_next),
+			       .rst_l(rst_l),
+			       .en(rd_buf|wr_buf),
+			       .clk(clk),
+			       .q(buf_full));
+
+   // Buffer empty
+   assign 	 buf_empty_next = ((buf_head_next == buf_tail_next) &
+				   rd_buf) | ~rst_l;
+   dffe_ns #(1) buf_empty_ff (.din(buf_empty_next),
+			      .en(rd_buf|wr_buf|~rst_l), 
+			      .clk(clk),
+			      .q(buf_empty));
+   
+
+   assign 	 req_in = {indata_buf[`UCB_DATA_HI:`UCB_DATA_LO],
+			   indata_buf[`UCB_ADDR_HI:`UCB_ADDR_LO],
+			   indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO],
+			   indata_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+			   indata_buf[`UCB_THR_HI:`UCB_THR_LO],
+			   write_pending,
+			   read_pending};
+	  
+   // Buffer 0
+   assign 	 buf0_en = buf_tail[0] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf0_ff (.din(req_in),
+				      .en(buf0_en),
+				      .clk(clk),
+				      .q(buf0));
+   // Buffer 1
+   assign 	 buf1_en = buf_tail[1] & wr_buf;
+   dffe_ns #(`UCB_BUF_WIDTH) buf1_ff (.din(req_in),
+				      .en(buf1_en),
+				      .clk(clk),
+				      .q(buf1));
+
+   assign 	 req_out = buf_head[0] ? buf0 :
+	                   buf_head[1] ? buf1 :
+	                                 {`UCB_BUF_WIDTH{1'b0}};
+
+   
+   /************************************************************
+    * Inbound interface to local unit
+    ************************************************************/
+   assign 	 {data_in,
+		  addr_in,
+		  size_in,
+		  buf_id_in,
+		  thr_id_in,
+		  wr_req_vld_nq,
+		  rd_req_vld_nq} = req_out;
+   
+   assign 	 rd_req_vld = rd_req_vld_nq & ~buf_empty;
+   assign 	 wr_req_vld = wr_req_vld_nq & ~buf_empty;
+   
+	  
+   /************************************************************
+    * Outbound Ack/Nack
+    ************************************************************/
+   assign        ack_buf_wr = rd_ack_vld | rd_nack_vld;
+   
+   assign        ack_buf_vld_next = ack_buf_wr ? 1'b1 :
+                                    ack_buf_rd ? 1'b0 :
+                                                 ack_buf_vld;
+   
+   dffrl_ns #(1) ack_buf_vld_ff (.din(ack_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(ack_buf_vld));
+   
+   dffe_ns #(1) ack_buf_is_nack_ff (.din(rd_nack_vld),
+                                    .en(ack_buf_wr),
+                                    .clk(clk),
+                                    .q(ack_buf_is_nack));
+
+   dffe_ns #(1) ack_buf_is_data128_ff (.din(data128),
+                                       .en(ack_buf_wr),
+                                       .clk(clk),
+                                       .q(ack_buf_is_data128));
+
+   assign        ack_typ_out = rd_ack_vld ? `UCB_READ_ACK:
+                                            `UCB_READ_NACK;
+   
+
+   assign        ack_buf_in = {data_out,
+                               buf_id_out,
+                               thr_id_out,
+                               ack_typ_out};
+   
+   dffe_ns #(REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO+1) ack_buf_ff (.din(ack_buf_in),
+                                                              .en(ack_buf_wr),
+                                                              .clk(clk),
+                                                              .q(ack_buf));
+
+   assign        ack_buf_vec = ack_buf_is_nack    ? {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                                     {64/UCB_IOB_WIDTH{1'b1}}} :
+                               ack_buf_is_data128 ? {(REG_WIDTH+64)/UCB_IOB_WIDTH{1'b1}} :
+                                                    {(64+64)/UCB_IOB_WIDTH{1'b1}};
+   
+   assign        ack_busy = ack_buf_vld;
+   
+
+   /************************************************************
+    * Outbound Interrupt
+    ************************************************************/
+   // Assertion: int_buf_wr shoudn't be asserted if int_buf_busy
+   assign        int_buf_wr = int_vld;
+   
+   assign        int_buf_vld_next = int_buf_wr ? 1'b1 :
+                                    int_buf_rd ? 1'b0 :
+                                                 int_buf_vld;
+   
+   dffrl_ns #(1) int_buf_vld_ff (.din(int_buf_vld_next),
+                                 .clk(clk),
+                                 .rst_l(rst_l),
+                                 .q(int_buf_vld));
+
+   assign        int_buf_in = {int_vec,
+                               int_stat,
+                               dev_id,
+                               int_thr_id,
+                               int_typ};
+   
+   dffe_ns #(`UCB_INT_VEC_HI-`UCB_PKT_LO+1) int_buf_ff (.din(int_buf_in),
+                                                        .en(int_buf_wr),
+                                                        .clk(clk),
+                                                        .q(int_buf));
+
+   assign        int_buf_vec = {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                {64/UCB_IOB_WIDTH{1'b1}}};
+
+   assign        int_busy = int_buf_vld;
+
+
+   /************************************************************
+    * Outbound ack/interrupt Arbitration
+    ************************************************************/
+   dffrle_ns #(1) int_last_rd_ff (.din(int_buf_rd),
+                                  .en(ack_buf_rd|int_buf_rd),
+                                  .rst_l(rst_l),
+                                  .clk(clk),
+                                  .q(int_last_rd));
+                           
+   assign        ack_buf_rd = ~outdata_buf_busy & ack_buf_vld &
+                              (~int_buf_vld | int_last_rd);
+   
+   assign        int_buf_rd = ~outdata_buf_busy & int_buf_vld &
+                              (~ack_buf_vld | ~int_last_rd);
+
+   assign        outdata_buf_wr = ack_buf_rd | int_buf_rd;
+   
+   assign        outdata_buf_in = ack_buf_rd ? {ack_buf[REG_WIDTH+`UCB_BUF_HI:`UCB_BUF_HI+1],
+                                                {(`UCB_RSV_HI-`UCB_RSV_LO+1){1'b0}},
+                                                {(`UCB_ADDR_HI-`UCB_ADDR_LO+1){1'b0}},
+                                                {(`UCB_SIZE_HI-`UCB_SIZE_LO+1){1'b0}},
+                                                ack_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+                                                ack_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                ack_buf[`UCB_PKT_HI:`UCB_PKT_LO]}:
+                                               {{REG_WIDTH{1'b0}},
+                                                {(`UCB_INT_RSV_HI-`UCB_INT_RSV_LO+1){1'b0}},
+                                                int_buf[`UCB_INT_VEC_HI:`UCB_INT_VEC_LO],
+                                                int_buf[`UCB_INT_STAT_HI:`UCB_INT_STAT_LO],
+                                                int_buf[`UCB_INT_DEV_HI:`UCB_INT_DEV_LO],
+                                                int_buf[`UCB_THR_HI:`UCB_THR_LO],
+                                                int_buf[`UCB_PKT_HI:`UCB_PKT_LO]};
+   
+   assign        outdata_vec_in = ack_buf_rd ? ack_buf_vec :
+                                               int_buf_vec;
+   
+   ucb_bus_out #(UCB_IOB_WIDTH, REG_WIDTH) ucb_bus_out (.rst_l(rst_l),
+                                                        .clk(clk),
+                                                        .outdata_buf_wr(outdata_buf_wr),
+                                                        .outdata_buf_in(outdata_buf_in),
+                                                        .outdata_vec_in(outdata_vec_in),
+                                                        .outdata_buf_busy(outdata_buf_busy),
+                                                        .vld(ucb_iob_vld),
+                                                        .data(ucb_iob_data),
+                                                        .stall(iob_ucb_stall));
+   
+
+`undef		UCB_BUF_WIDTH
+	  
+endmodule // ucb_flow_jbi
+
+
+// Local Variables:
+// verilog-library-directories:(".")
+// End:
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/cluster_header_sync.v
===================================================================
--- /trunk/T1-common/common/cluster_header_sync.v	(revision 6)
+++ /trunk/T1-common/common/cluster_header_sync.v	(revision 6)
@@ -0,0 +1,68 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cluster_header_sync.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// The sync pulse cluster header is instatiated as a hard macro.
+// This model is for simulation only.
+
+module cluster_header_sync (/*AUTOARG*/
+   // Outputs
+   dram_rx_sync_local, dram_tx_sync_local, jbus_rx_sync_local, 
+   jbus_tx_sync_local, so, 
+   // Inputs
+   dram_rx_sync_global, dram_tx_sync_global, jbus_rx_sync_global, 
+   jbus_tx_sync_global, cmp_gclk, cmp_rclk, si, se
+   );
+
+   output dram_rx_sync_local;
+   output dram_tx_sync_local;
+   output jbus_rx_sync_local;
+   output jbus_tx_sync_local;
+   output so;
+
+   input  dram_rx_sync_global;
+   input  dram_tx_sync_global;
+   input  jbus_rx_sync_global;
+   input  jbus_tx_sync_global;
+   input  cmp_gclk;
+   input  cmp_rclk;
+   input  si;
+   input  se;
+
+   wire   dram_rx_so;
+   wire   dram_tx_so;
+   wire   jbus_rx_so;
+
+   bw_clk_cclk_sync sync_wrapper (
+				  .dram_rx_sync_local(dram_rx_sync_local),
+				  .dram_tx_sync_local(dram_tx_sync_local),
+				  .jbus_rx_sync_local(jbus_rx_sync_local),
+				  .jbus_tx_sync_local(jbus_tx_sync_local),
+				  .so(so),
+				  .dram_rx_sync_global(dram_rx_sync_global),
+				  .dram_tx_sync_global(dram_tx_sync_global),
+				  .jbus_rx_sync_global(jbus_rx_sync_global),
+				  .jbus_tx_sync_global(jbus_tx_sync_global),
+				  .cmp_gclk(cmp_gclk),
+				  .cmp_rclk(cmp_rclk),
+				  .si(si),
+				  .se(se)
+				  );			       
+
+endmodule // cluster_header_sync
Index: /trunk/T1-common/common/cluster_header.v
===================================================================
--- /trunk/T1-common/common/cluster_header.v	(revision 6)
+++ /trunk/T1-common/common/cluster_header.v	(revision 6)
@@ -0,0 +1,117 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cluster_header.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// The cluster header is instatiated as a hard macro.
+// This model is for simulation only.
+`include "sys.h"
+
+module cluster_header (/*AUTOARG*/
+   // Outputs
+   dbginit_l, cluster_grst_l, rclk, so, 
+   // Inputs
+   gclk, cluster_cken, arst_l, grst_l, adbginit_l, gdbginit_l, si, 
+   se
+   );
+
+   input       gclk;
+   input       cluster_cken;
+   input       arst_l;
+   input       grst_l;
+   input       adbginit_l;
+   input       gdbginit_l; 
+   output      dbginit_l; 
+   output      cluster_grst_l;
+   output      rclk;
+
+   input       si; // scan ports for reset flop repeaters
+   input       se;
+   output      so;
+
+`ifdef FPGA_SYN
+//  assign #10 rclk = gclk;
+//  assign #10 dbginit_l = gdbginit_l;
+//  assign #10 cluster_grst_l = grst_l; 
+//  assign so = 1'b0;
+
+reg      dbginit_l; 
+reg      cluster_grst_l;
+
+assign #10 rclk = gclk;
+
+always @(negedge rclk) begin
+  dbginit_l <= gdbginit_l;
+  cluster_grst_l <= grst_l;
+end
+
+`else
+   
+   wire        pre_sync_enable;
+   wire        sync_enable;
+   wire        cluster_grst_l;
+   wire        dbginit_l;
+   wire        rst_sync_so;
+
+   bw_u1_syncff_4x sync_cluster_master ( // no scan hook-up
+                                        .so(),
+                                        .q (pre_sync_enable),
+                                        .ck (gclk),
+                                        .d (cluster_cken),
+                                        .sd(1'b0),
+                                        .se(1'b0)
+				        );
+   
+
+   bw_u1_scanl_2x sync_cluster_slave ( // use scan lock-up latch
+                                      .so (sync_enable),
+                                      .ck (gclk),
+                                      .sd (pre_sync_enable)
+        			      );
+   
+// NOTE! Pound delay in the below statement is meant to provide 10 ps
+// delay between gclk and rclk to allow the synchronizer for rst, dbginit,
+// and sync pulses to be modelled accurately.  gclk and rclk need to have 
+// at least one simulator timestep separation to allow the flop->flop 
+// synchronizer to work correctly.
+   assign #10 rclk = gclk & sync_enable;
+
+   synchronizer_asr rst_repeater (
+				 .sync_out(cluster_grst_l),
+				 .so(rst_sync_so),
+				 .async_in(grst_l),
+				 .gclk(gclk),
+				 .rclk(rclk),
+				 .arst_l(arst_l),
+				 .si(si),
+				 .se(se)
+				 );
+   
+   synchronizer_asr dbginit_repeater (
+				     .sync_out(dbginit_l),
+				     .so(so),
+				     .async_in(gdbginit_l),
+				     .gclk(gclk),
+				     .rclk(rclk),
+				     .arst_l(adbginit_l),
+				     .si(rst_sync_so),
+				     .se(se)
+				     );
+`endif
+
+endmodule // cluster_header
Index: /trunk/T1-common/common/cmp_sram_redhdr.v
===================================================================
--- /trunk/T1-common/common/cmp_sram_redhdr.v	(revision 6)
+++ /trunk/T1-common/common/cmp_sram_redhdr.v	(revision 6)
@@ -0,0 +1,216 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cmp_sram_redhdr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+//
+//    Cluster Name:  Efuse Cluster
+//    Unit Name:  cmp_redhdr (sram redundancy header)
+//    Block Name: EFC
+//
+//    This is the header used to read and write the fuse values to the
+//    RAM blocks.  It is used to drive the ICD, DCD and L2T.  It is
+//    outside the array it is driving.
+//
+//    Top level signal renaming:
+//       s/ary/<your_ary_name>/g
+//       s/xfuse/<your_ary_initial>fuse/g
+//
+//       E.g.  fuse_ary_wren -> fuse_icd_wren
+//             efc_spc_xfuse_data -> efc_spc_ifuse_data, efc_sct_fuse_data
+//
+//-----------------------------------------------------------------------------
+`include "sys.h"
+`include "iop.h"
+
+//FPGA_SYN enables all FPGA related modifications
+`ifdef FPGA_SYN 
+`define FPGA_SYN_CLK
+`endif
+
+module cmp_sram_redhdr (/*AUTOARG*/
+   // Outputs
+   fuse_ary_wren, fuse_ary_rid, fuse_ary_repair_value, 
+   fuse_ary_repair_en, spc_efc_xfuse_data, scanout, 
+   // Inputs
+   rclk, se, scanin, arst_l, testmode_l, efc_spc_fuse_clk1, 
+   efc_spc_fuse_clk2, efc_spc_xfuse_data, efc_spc_xfuse_ashift, 
+   efc_spc_xfuse_dshift, ary_fuse_repair_value, ary_fuse_repair_en
+   );
+
+   input		rclk;
+   input		se;
+   input		scanin;			// CMP clock, L1 phase
+   input    arst_l;
+   input    testmode_l;
+
+   // eFuse controller interface
+   input		efc_spc_fuse_clk1;
+   input		efc_spc_fuse_clk2;
+   input		efc_spc_xfuse_data;
+   input		efc_spc_xfuse_ashift;	// addr shift; low during rst
+   input		efc_spc_xfuse_dshift;	// data shift; low during rst
+
+   // interface to cache redundancy logic
+   input [7:0] ary_fuse_repair_value;  //data out for redundancy register
+   input [1:0] ary_fuse_repair_en;     //enable bits out 
+   
+
+   // outputs
+   // interface to icache
+   output      fuse_ary_wren;         //redundancy reg wr enable, qualified
+   output [5:0] fuse_ary_rid;         //redundancy register id
+   output [7:0] fuse_ary_repair_value;//data in for redundancy register
+   output [1:0] fuse_ary_repair_en;   //enable bits to turn on redundancy
+
+   // serial rd data to controller
+   output       spc_efc_xfuse_data;
+
+   // normal scan out
+   output       scanout;
+
+`ifdef FPGA_SYN_CLK
+   assign fuse_ary_wren = 1'b0;
+   assign fuse_ary_rid = 6'b0;
+   assign fuse_ary_repair_value = 8'b0;
+   assign fuse_ary_repair_en = 2'b0;
+   assign spc_efc_xfuse_data = 1'b0;
+   assign scanout = 1'b0;
+`else
+
+   // local signals
+   wire         clk;
+   wire         int_clk1;
+   wire         int_clk2;
+   wire         int_scanout; // !! hook up to last flop in scan chain !!
+   wire         int_scanin;  // !! hook up to 1st flop in scan chain !!
+
+   wire [6:0]   addr_shft_nxt;
+   wire [6:0]   addr_shft_ff;
+   wire         addr_shft_en;
+   wire         wren_bit;
+
+   wire [11:0]  data_shft_nxt;
+   wire [11:0]  data_shft_ff;
+   wire         data_shft_en;
+
+   wire         dshift_dly1_ff;
+   wire         dshift_dly2_ff;
+   wire         ashift_dly1_ff;
+   wire         ashift_dly2_ff;
+   wire         wren_ff;
+   wire         wren_ph1;
+   wire         rden_ph1;
+
+   /*AUTOWIRE*/
+   // Beginning of automatic wires (for undeclared instantiated-module outputs)
+   // End of automatics
+
+   //
+   // Code Begins Here
+   //
+
+   assign       clk = rclk;
+   
+   //  Test logic
+   assign       int_clk1 = (~testmode_l) ? rclk : efc_spc_fuse_clk1;
+   assign       int_clk2 = (~testmode_l) ? rclk : efc_spc_fuse_clk2;
+   assign       int_scanout = 1'b0; 
+
+   // Need latch to avoid hold time problems
+   // connect int_scanout to last flop in scan chain
+   bw_u1_scanlg_2x so_lockup(.so (scanout), 
+                             .sd (int_scanout), 
+                             .ck (clk),  .se(se));
+   // connect int_scanin to first flop in scan chain
+   bw_u1_scanlg_2x si_lockup(.so (int_scanin),
+                             .sd (scanin), 
+                             .ck (clk), .se(se));
+
+   //  Shift registers
+   //  Address
+   assign   addr_shft_en = efc_spc_xfuse_ashift;
+   assign   addr_shft_nxt = {addr_shft_ff[5:0], efc_spc_xfuse_data};
+
+   dffe_s #(7) addr_shft_reg (.din  (addr_shft_nxt), 
+		                        .q    (addr_shft_ff), 
+                            .en   (addr_shft_en), 
+                            .clk  (int_clk1), .se(se), .si(), .so());
+   
+   assign   fuse_ary_rid[5:0] = addr_shft_ff[6:1];
+   assign   wren_bit = addr_shft_ff[0];
+
+   // Data
+   assign   data_shft_en = efc_spc_xfuse_dshift | dshift_dly1_ff | rden_ph1;
+
+   // mux2es
+   assign   data_shft_nxt = rden_ph1
+            ? {{3{ary_fuse_repair_en[1]}},
+               ary_fuse_repair_value[7:0], 
+               ary_fuse_repair_en[0]}
+            : {data_shft_ff[10:0], 
+               efc_spc_xfuse_data};
+   
+   // 10:9 is unused
+   dffe_s #(12) 	 data_shft_reg (.din  (data_shft_nxt),
+			                          .q    (data_shft_ff), 
+                                .en   (data_shft_en), 
+                                .clk  (int_clk1), .se(se), .si(), .so());
+   
+   assign   fuse_ary_repair_value = data_shft_ff[8:1];
+   assign   fuse_ary_repair_en    = {(data_shft_ff[11] & wren_ff),
+                                     (data_shft_ff[0] & wren_ff)};
+
+   // Control
+   dff_s #(1) ashift_dly1_reg (.din (efc_spc_xfuse_ashift), 
+                             .q   (ashift_dly1_ff),
+				                     .clk (int_clk1), .se(se), .si(), .so());
+   dff_s #(1) ashift_dly2_reg (.din (ashift_dly1_ff), 
+                             .q   (ashift_dly2_ff),
+				                     .clk (int_clk1), .se(se), .si(), .so());
+
+   dffrl_async #(1) dshift_dly1_reg (.din (efc_spc_xfuse_dshift), 
+                             .q   (dshift_dly1_ff), 
+                             .rst_l (arst_l),
+				                     .clk (int_clk1), .se(se), .si(), .so());
+   dffrl_async #(1) dshift_dly2_reg (.din (dshift_dly1_ff), 
+                             .q   (dshift_dly2_ff), 
+                             .rst_l (arst_l),
+				                     .clk (int_clk1), .se(se), .si(), .so());
+   
+   assign   wren_ph1 = dshift_dly2_ff && ~dshift_dly1_ff && wren_bit;
+   assign   rden_ph1 = ashift_dly2_ff && ~ashift_dly1_ff && ~wren_bit;
+
+   // use phase two for wren since array writes in phase one
+   dffrl_async #(1) wren_reg (.din  (wren_ph1), 
+                      .q     (wren_ff),
+                      .rst_l (arst_l),
+				              .clk   (int_clk2), .se(se), .si(), .so());
+
+   // address is never shifted out
+   assign   spc_efc_xfuse_data = data_shft_ff[11];
+   assign   fuse_ary_wren = wren_ff & testmode_l;
+`endif
+   
+endmodule // cmp_sram_redhdr
+
+// Local Variables:
+// verilog-library-directories:("." "../../common/rtl")
+// verilog-library-files:      ("../../common/rtl/swrvr_clib.v")
+// verilog-auto-sense-defines-constant:t
+// End:
Index: /trunk/T1-common/common/ucb_noflow.v
===================================================================
--- /trunk/T1-common/common/ucb_noflow.v	(revision 6)
+++ /trunk/T1-common/common/ucb_noflow.v	(revision 6)
@@ -0,0 +1,345 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_noflow.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        ucb_noflow
+//	Description:	Unit Control Block
+//                      - supports 64 or 128-bit read with flow control
+//                      - supports 64-bit write without flow control
+//                      - automactically drops non-64-bit writes
+//                      - supports interrupt return to IO Bridge
+//                      - provides only single buffer at each interface 
+//
+//                      This module is intended for units that have
+//                      both 64 and 128 bit registers.
+//
+//                      Data bus width to and from the IO Bridge is
+//                      configured through parameters UCB_IOB_WIDTH and
+//                      IOB_UCB_WIDTH.  Supported widths are:
+//
+//                      IOB_UCB_WIDTH  UCB_IOB_WIDTH
+//                      ----------------------------
+//                      32             8
+//                      16             8
+//                       8             8
+//                       4             4             
+ */ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+`include        "iop.h"
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module ucb_noflow (/*AUTOARG*/
+   // Outputs
+   ucb_iob_stall, rd_req_vld, wr_req_vld, thr_id_in, buf_id_in, 
+   size_in, addr_in, data_in, int_busy, ucb_iob_vld, ucb_iob_data, 
+   // Inputs
+   clk, rst_l, iob_ucb_vld, iob_ucb_data, rd_ack_vld, rd_nack_vld, 
+   thr_id_out, buf_id_out, data128, data_out, int_vld, int_typ, 
+   int_thr_id, dev_id, int_stat, int_vec, iob_ucb_stall
+   );
+
+   // synopsys template
+   
+   parameter IOB_UCB_WIDTH = 32;  // data bus width from IOB to UCB
+   parameter UCB_IOB_WIDTH = 8;   // data bus width from UCB to IOB
+   parameter REG_WIDTH = 64;      // set this to 128 if unit needs to
+                                  // return 128-bit data
+   
+
+   // Globals
+   input                                clk;
+   input 				rst_l;
+   
+   // Request from IO Bridge
+   input 				iob_ucb_vld;
+   input [IOB_UCB_WIDTH-1:0] 		iob_ucb_data;
+   output 				ucb_iob_stall;
+
+   // Request to local unit
+   output 				rd_req_vld;
+   output 				wr_req_vld;
+   output [`UCB_THR_HI-`UCB_THR_LO:0]   thr_id_in;
+   output [`UCB_BUF_HI-`UCB_BUF_LO:0]   buf_id_in;
+   output [`UCB_SIZE_HI-`UCB_SIZE_LO:0] size_in;   // only pertinent to PCI
+   output [`UCB_ADDR_HI-`UCB_ADDR_LO:0] addr_in;
+   output [`UCB_DATA_HI-`UCB_DATA_LO:0] data_in;
+
+   // Ack/Nack from local unit
+   input 				rd_ack_vld;
+   input 				rd_nack_vld;
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	thr_id_out;
+   input [`UCB_BUF_HI-`UCB_BUF_LO:0] 	buf_id_out;
+   input 				data128;   // set to 1 if data returned is 128 bit
+   input [REG_WIDTH-1:0] 		data_out;
+
+   // Interrupt from local unit
+   input 				int_vld;
+   input [`UCB_PKT_HI-`UCB_PKT_LO:0] 	int_typ;          // interrupt type
+   input [`UCB_THR_HI-`UCB_THR_LO:0] 	int_thr_id;       // interrupt thread ID
+   input [`UCB_INT_DEV_HI-`UCB_INT_DEV_LO:0] dev_id;      // interrupt device ID
+   input [`UCB_INT_STAT_HI-`UCB_INT_STAT_LO:0] int_stat;  // interrupt status
+   input [`UCB_INT_VEC_HI-`UCB_INT_VEC_LO:0]   int_vec;   // interrupt vector
+   output 				int_busy;         // interrupt buffer busy
+   
+   // Output to IO Bridge
+   output 				ucb_iob_vld;
+   output [UCB_IOB_WIDTH-1:0] 		ucb_iob_data;
+   input 				iob_ucb_stall;
+   
+   // Local signals
+   wire 				indata_buf_vld;
+   wire [127:0] 			indata_buf;
+   wire 				ucb_iob_stall_a1;
+   
+   wire 				read_pending;
+   wire 				read_outstanding;
+   wire 				read_outstanding_next;
+   wire 				write_pending;
+   wire 				illegal_write_size;
+   
+   wire 				ack_buf_rd;
+   wire 				ack_buf_wr;
+   wire 				ack_buf_vld;
+   wire 				ack_buf_vld_next;
+   wire 				ack_buf_is_nack;
+   wire 				ack_buf_is_data128;
+   wire [`UCB_PKT_HI-`UCB_PKT_LO:0] 	ack_typ_out;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf_in;
+   wire [REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO:0] ack_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] ack_buf_vec;
+   
+   wire 				int_buf_rd;
+   wire 				int_buf_wr;
+   wire 				int_buf_vld;
+   wire 				int_buf_vld_next;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf_in;
+   wire [`UCB_INT_VEC_HI-`UCB_PKT_LO:0] int_buf;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] int_buf_vec;
+   
+   wire 				int_last_rd;
+   wire 				outdata_buf_busy;
+   wire 				outdata_buf_wr;
+   wire [REG_WIDTH+63:0] 		outdata_buf_in;
+   wire [(REG_WIDTH+64)/UCB_IOB_WIDTH-1:0] outdata_vec_in;
+
+
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * Inbound Data
+    ************************************************************/
+   // Register size is hardcoded to 64 bits here because all
+   // units using the UCB module will only write to 64 bit registers.
+   ucb_bus_in #(IOB_UCB_WIDTH,64) ucb_bus_in (.rst_l(rst_l),
+					      .clk(clk),
+					      .vld(iob_ucb_vld),
+					      .data(iob_ucb_data),
+					      .stall(ucb_iob_stall),
+					      .indata_buf_vld(indata_buf_vld),
+					      .indata_buf(indata_buf),
+					      .stall_a1(ucb_iob_stall_a1));
+
+   
+   /************************************************************
+    * Decode inbound packet type
+    ************************************************************/
+   assign 	 read_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] ==
+				 `UCB_READ_REQ) &
+			        indata_buf_vld;
+
+   // Assertion: rd_req_vld and ack_buf_rd must be
+   //            mutually exclusive
+   assign 	 read_outstanding_next = rd_req_vld ? 1'b1 :
+	                                 ack_buf_rd ? 1'b0 :
+	                                              read_outstanding;
+   dffrl_ns #(1) read_outstanding_ff (.din(read_outstanding_next),
+				      .clk(clk),
+				      .rst_l(rst_l),
+				      .q(read_outstanding));
+   
+   assign 	 ucb_iob_stall_a1 = read_pending & read_outstanding;
+
+   assign 	 write_pending = (indata_buf[`UCB_PKT_HI:`UCB_PKT_LO] == 
+				  `UCB_WRITE_REQ) &
+        	                 indata_buf_vld;
+
+   // 3'b011 is the encoding for double word.  All writes have to be
+   // 64 bits except writes going to PCI.  PCI will instantiate a
+   // customized version of UCB.
+   assign 	 illegal_write_size = (indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO] !=
+				       3'b011);
+   
+   
+   /************************************************************
+    * Inbound interface to local unit
+    ************************************************************/
+   assign 	 rd_req_vld = read_pending & ~read_outstanding;
+   assign 	 wr_req_vld = write_pending & ~illegal_write_size;
+   assign 	 thr_id_in = indata_buf[`UCB_THR_HI:`UCB_THR_LO];
+   assign 	 buf_id_in = indata_buf[`UCB_BUF_HI:`UCB_BUF_LO];
+   assign 	 size_in = indata_buf[`UCB_SIZE_HI:`UCB_SIZE_LO];
+   assign 	 addr_in = indata_buf[`UCB_ADDR_HI:`UCB_ADDR_LO];
+   assign 	 data_in = indata_buf[`UCB_DATA_HI:`UCB_DATA_LO];
+   
+   
+   /************************************************************
+    * Outbound Ack/Nack
+    ************************************************************/
+   assign 	 ack_buf_wr = rd_ack_vld | rd_nack_vld;
+   
+   assign 	 ack_buf_vld_next = ack_buf_wr ? 1'b1 :
+		                    ack_buf_rd ? 1'b0 :
+		                                 ack_buf_vld;
+   
+   dffrl_ns #(1) ack_buf_vld_ff (.din(ack_buf_vld_next),
+				 .clk(clk),
+				 .rst_l(rst_l),
+				 .q(ack_buf_vld));
+   
+   dffe_ns #(1) ack_buf_is_nack_ff (.din(rd_nack_vld),
+				    .en(ack_buf_wr),
+				    .clk(clk),
+				    .q(ack_buf_is_nack));
+
+   dffe_ns #(1) ack_buf_is_data128_ff (.din(data128),
+				       .en(ack_buf_wr),
+				       .clk(clk),
+				       .q(ack_buf_is_data128));
+
+   assign 	 ack_typ_out = rd_ack_vld ? `UCB_READ_ACK:
+	                                    `UCB_READ_NACK;
+
+   assign 	 ack_buf_in = {data_out,
+			       buf_id_out,
+			       thr_id_out,
+			       ack_typ_out};
+   
+   dffe_ns #(REG_WIDTH+`UCB_BUF_HI-`UCB_PKT_LO+1) ack_buf_ff (.din(ack_buf_in),
+							      .en(ack_buf_wr),
+							      .clk(clk),
+							      .q(ack_buf));
+
+   assign        ack_buf_vec = ack_buf_is_nack    ? {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                                     {64/UCB_IOB_WIDTH{1'b1}}} :
+                               ack_buf_is_data128 ? {(REG_WIDTH+64)/UCB_IOB_WIDTH{1'b1}} :
+                                                    {(64+64)/UCB_IOB_WIDTH{1'b1}};
+   
+
+   /************************************************************
+    * Outbound Interrupt
+    ************************************************************/
+   // Assertion: int_buf_wr shoudn't be asserted if int_buf_busy
+   assign 	 int_buf_wr = int_vld;
+   
+   assign 	 int_buf_vld_next = int_buf_wr ? 1'b1 :
+		                    int_buf_rd ? 1'b0 :
+		                                 int_buf_vld;
+   
+   dffrl_ns #(1) int_buf_vld_ff (.din(int_buf_vld_next),
+				 .clk(clk),
+				 .rst_l(rst_l),
+				 .q(int_buf_vld));
+
+   assign 	 int_buf_in = {int_vec,
+			       int_stat,
+			       dev_id,
+			       int_thr_id,
+			       int_typ};
+   
+   dffe_ns #(`UCB_INT_VEC_HI-`UCB_PKT_LO+1) int_buf_ff (.din(int_buf_in),
+							.en(int_buf_wr),
+							.clk(clk),
+							.q(int_buf));
+
+   assign 	 int_buf_vec = {{REG_WIDTH/UCB_IOB_WIDTH{1'b0}},
+                                {64/UCB_IOB_WIDTH{1'b1}}};
+
+   assign 	 int_busy = int_buf_vld;
+
+
+   /************************************************************
+    * Outbound ack/interrupt Arbitration
+    ************************************************************/
+   dffrle_ns #(1) int_last_rd_ff (.din(int_buf_rd),
+				  .en(ack_buf_rd|int_buf_rd),
+				  .rst_l(rst_l),
+				  .clk(clk),
+				  .q(int_last_rd));
+			   
+   assign 	 ack_buf_rd = ~outdata_buf_busy & ack_buf_vld &
+		              (~int_buf_vld | int_last_rd);
+   
+   assign 	 int_buf_rd = ~outdata_buf_busy & int_buf_vld &
+		              (~ack_buf_vld | ~int_last_rd);
+
+   assign 	 outdata_buf_wr = ack_buf_rd | int_buf_rd;
+   
+   assign 	 outdata_buf_in = ack_buf_rd ? {ack_buf[REG_WIDTH+`UCB_BUF_HI:`UCB_BUF_HI+1],
+						{(`UCB_RSV_HI-`UCB_RSV_LO+1){1'b0}},
+						{(`UCB_ADDR_HI-`UCB_ADDR_LO+1){1'b0}},
+						{(`UCB_SIZE_HI-`UCB_SIZE_LO+1){1'b0}},
+						ack_buf[`UCB_BUF_HI:`UCB_BUF_LO],
+						ack_buf[`UCB_THR_HI:`UCB_THR_LO],
+						ack_buf[`UCB_PKT_HI:`UCB_PKT_LO]}:
+		                               {{REG_WIDTH{1'b0}},
+						{(`UCB_INT_RSV_HI-`UCB_INT_RSV_LO+1){1'b0}},
+						int_buf[`UCB_INT_VEC_HI:`UCB_INT_VEC_LO],
+						int_buf[`UCB_INT_STAT_HI:`UCB_INT_STAT_LO],
+						int_buf[`UCB_INT_DEV_HI:`UCB_INT_DEV_LO],
+						int_buf[`UCB_THR_HI:`UCB_THR_LO],
+						int_buf[`UCB_PKT_HI:`UCB_PKT_LO]};
+   
+   assign 	 outdata_vec_in = ack_buf_rd ? ack_buf_vec :
+	                                       int_buf_vec;
+   
+   ucb_bus_out #(UCB_IOB_WIDTH, REG_WIDTH) ucb_bus_out (.rst_l(rst_l),
+							.clk(clk),
+							.outdata_buf_wr(outdata_buf_wr),
+							.outdata_buf_in(outdata_buf_in),
+							.outdata_vec_in(outdata_vec_in),
+							.outdata_buf_busy(outdata_buf_busy),
+							.vld(ucb_iob_vld),
+							.data(ucb_iob_data),
+							.stall(iob_ucb_stall));
+   
+   
+endmodule // ucb_noflow
+
+
+// Local Variables:
+// verilog-library-directories:(".")
+// End:
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/dbl_buf.v
===================================================================
--- /trunk/T1-common/common/dbl_buf.v	(revision 6)
+++ /trunk/T1-common/common/dbl_buf.v	(revision 6)
@@ -0,0 +1,148 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: dbl_buf.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        dbl_buf
+//	Description:	A simple double buffer
+//                      First-in first-out.  Asserts full when both entries
+//                      are occupied.
+*/ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module dbl_buf (/*AUTOARG*/
+   // Outputs
+   dout, vld, full, 
+   // Inputs
+   clk, rst_l, wr, rd, din
+   );
+   // synopsys template
+   
+   parameter BUF_WIDTH = 64;      // width of the buffer
+   
+
+   // Globals
+   input          clk;
+   input 	  rst_l;
+   
+   // Buffer Input
+   input 	  wr;
+   input 	  rd;
+   input [BUF_WIDTH-1:0] din;
+
+   // Buffer Output
+   output [BUF_WIDTH-1:0] dout;
+   output 	  vld;
+   output 	  full;
+   
+   // Buffer Output
+   wire 	  wr_buf0;
+   wire 	  wr_buf1;
+   wire 	  buf0_vld;
+   wire 	  buf1_vld;
+   wire 	  buf1_older;
+   wire 	  rd_buf0;
+   wire 	  rd_buf1;
+   wire 	  rd_buf;
+   wire 	  en_vld0;
+   wire 	  en_vld1;
+   wire [BUF_WIDTH-1:0] buf0_obj;
+   wire [BUF_WIDTH-1:0] buf1_obj;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   // if both entries are empty, write to entry pointed to by the older pointer
+   // if only one entry is empty, then write to the empty entry (duh!)
+   assign 	  wr_buf0 = wr &
+                            (buf1_vld | (~buf0_vld & ~buf1_older));
+   assign         wr_buf1 = wr &
+                            (buf0_vld | (~buf1_vld & buf1_older));
+
+   // read from the older entry
+   assign         rd_buf0 = rd & ~buf1_older;
+   assign         rd_buf1 = rd & buf1_older;
+
+   // flip older pointer when an entry is read
+   assign 	  rd_buf = rd & (buf0_vld | buf1_vld);
+   dffrle_ns buf1_older_ff (.din(~buf1_older),
+			    .rst_l(rst_l),
+                            .en(rd_buf),
+			    .clk(clk),
+			    .q(buf1_older));
+
+   // set valid bit for writes and reset for reads
+   assign         en_vld0 = wr_buf0 | rd_buf0;
+   assign         en_vld1 = wr_buf1 | rd_buf1;
+
+   // the actual buffers
+   dffrle_ns buf0_vld_ff (.din(wr_buf0),
+			  .rst_l(rst_l),
+			  .en(en_vld0),
+			  .clk(clk),
+			  .q(buf0_vld));
+
+   dffrle_ns buf1_vld_ff (.din(wr_buf1),
+			  .rst_l(rst_l),
+			  .en(en_vld1),
+			  .clk(clk),
+			  .q(buf1_vld));
+
+   dffe_ns #(BUF_WIDTH) buf0_obj_ff (.din(din),
+				     .en(wr_buf0),
+                                     .clk(clk),
+				     .q(buf0_obj));
+   
+   dffe_ns #(BUF_WIDTH) buf1_obj_ff (.din(din),
+				     .en(wr_buf1),
+                                     .clk(clk),
+				     .q(buf1_obj));
+   
+   // mux out the older entry
+   assign         dout = (buf1_older) ? buf1_obj:buf0_obj;
+
+   assign 	  vld = buf0_vld | buf1_vld;
+   assign 	  full = buf0_vld & buf1_vld;
+   
+	  
+endmodule // dbl_buf
+
+
+
+// Local Variables:
+// verilog-library-directories:(".")
+// End:
+
+
+
+
+
+
+
Index: /trunk/T1-common/common/test_stub_bist.v
===================================================================
--- /trunk/T1-common/common/test_stub_bist.v	(revision 6)
+++ /trunk/T1-common/common/test_stub_bist.v	(revision 6)
@@ -0,0 +1,247 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: test_stub_bist.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// ____________________________________________________________________________
+//
+//  test_stub_bist - Test Stub with BIST Support
+// ____________________________________________________________________________
+//
+// Description: DBB interface for test signal generation and BIST execution
+// ____________________________________________________________________________
+
+module test_stub_bist (/*AUTOARG*/
+// Outputs
+mux_drive_disable, mem_write_disable, sehold, se, testmode_l, 
+mem_bypass, so_0, so_1, so_2, so, tst_ctu_mbist_done, 
+tst_ctu_mbist_fail, bist_ctl_reg_out, mbist_bisi_mode, 
+mbist_stop_on_next_fail, mbist_stop_on_fail, mbist_loop_mode, 
+mbist_loop_on_addr, mbist_data_mode, mbist_start, 
+// Inputs
+ctu_tst_pre_grst_l, arst_l, cluster_grst_l, global_shift_enable, 
+ctu_tst_scan_disable, ctu_tst_scanmode, ctu_tst_macrotest, 
+ctu_tst_short_chain, long_chain_so_0, short_chain_so_0, 
+long_chain_so_1, short_chain_so_1, long_chain_so_2, short_chain_so_2, 
+si, ctu_tst_mbist_enable, rclk, bist_ctl_reg_in, bist_ctl_reg_wr_en, 
+mbist_done, mbist_err
+);
+
+// Scan interface
+
+input          ctu_tst_pre_grst_l;
+input 	       arst_l;
+input 	       cluster_grst_l;
+input 	       global_shift_enable;
+input 	       ctu_tst_scan_disable;
+input 	       ctu_tst_scanmode;
+input 	       ctu_tst_macrotest;
+input 	       ctu_tst_short_chain;
+input 	       long_chain_so_0;
+input 	       short_chain_so_0;
+input 	       long_chain_so_1;
+input 	       short_chain_so_1;
+input 	       long_chain_so_2;
+input 	       short_chain_so_2;
+input 	       si;
+output 	       mux_drive_disable;
+output 	       mem_write_disable;
+output 	       sehold;
+output 	       se;
+output 	       testmode_l;
+output 	       mem_bypass;
+output 	       so_0;
+output 	       so_1;
+output 	       so_2;
+output 	       so;
+
+// Global BIST control interface
+
+input          ctu_tst_mbist_enable;
+output         tst_ctu_mbist_done;
+output         tst_ctu_mbist_fail;
+
+// CSR interface
+
+input 	       rclk;
+input  [6:0]   bist_ctl_reg_in;
+input 	       bist_ctl_reg_wr_en;
+output [10:0]  bist_ctl_reg_out;
+
+// BIST diagnostic interface
+
+input          mbist_done;
+input  [2:0]   mbist_err;
+output         mbist_bisi_mode;
+output 	       mbist_stop_on_next_fail;
+output 	       mbist_stop_on_fail;
+output 	       mbist_loop_mode;
+output 	       mbist_loop_on_addr;
+output 	       mbist_data_mode;
+output 	       mbist_start;
+
+// Internal wires
+
+wire           csr_write;               // write enable for bist_ctl_reg
+wire 	       mbist_enable_d1;         // delayed version of ctu_tst_mbist_enable
+wire 	       mbist_enable_d2;         // delayed version of mbist_enable_d1
+wire 	       mbist_stop_serial_in;    // delayed version of mbist_start
+wire [6:0]     bist_diag_mode;          // data written to bist_ctl_reg
+wire 	       mbist_done_delayed;      // flopped version of mbist_done
+wire 	       clr_mbist_ctl_l;         // flag to clear mbist control bits
+wire 	       mbist_fail_flag;         // summation of array error signals
+wire           serial_setup_mode;       // serial setup mode flag
+wire           serial_setup_mode_ctl;   // serial setup mode control
+wire           serial_setup_start;      // edge to enable serial setup mode
+wire 	       serial_setup_enable;     // kick off serial setup mode
+wire           serial_setup_stop;       // reset for serial setup mode
+wire 	       serial_setup_valid;      // bist start qualifier
+wire 	       si;                      // scanin place holder
+wire 	       so;                      // scanout place holder
+
+// Scan control
+
+test_stub_scan scan_ctls (
+			  .ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+			  .arst_l(arst_l),
+			  .global_shift_enable(global_shift_enable),
+			  .ctu_tst_scan_disable(ctu_tst_scan_disable),
+			  .ctu_tst_scanmode(ctu_tst_scanmode),
+			  .ctu_tst_macrotest(ctu_tst_macrotest),
+			  .ctu_tst_short_chain(ctu_tst_short_chain),
+			  .long_chain_so_0(long_chain_so_0),
+			  .short_chain_so_0(short_chain_so_0),
+  			  .long_chain_so_1(long_chain_so_1),
+  			  .short_chain_so_1(short_chain_so_1),
+  			  .long_chain_so_2(long_chain_so_2),
+ 			  .short_chain_so_2(short_chain_so_2),
+			  .mux_drive_disable(mux_drive_disable),
+			  .mem_write_disable(mem_write_disable),
+ 			  .sehold(sehold),
+  			  .se(se),
+			  .testmode_l(testmode_l),
+ 			  .mem_bypass(mem_bypass),
+			  .so_0(so_0),
+  			  .so_1(so_1),
+			  .so_2(so_2)
+			  );
+
+// BIST control
+
+assign csr_write = bist_ctl_reg_wr_en | serial_setup_mode;
+assign mbist_done_delayed = bist_ctl_reg_out[10];
+assign clr_mbist_ctl_l = cluster_grst_l & ~serial_setup_start;
+assign {mbist_bisi_mode,
+	   mbist_stop_on_next_fail,
+	   mbist_stop_on_fail,
+	   mbist_loop_mode,
+	   mbist_loop_on_addr,
+	   mbist_data_mode,
+	   mbist_start
+	   } = bist_ctl_reg_out[6:0];
+
+// Software accessible CSR (parallel interface)
+//
+// Bit  Type Function
+// ---  ____ -----------------
+//  10   S   Done flag
+//   9   S   Array 2 fail flag
+//   8   S   Array 1 fail flag
+//   7   S   Array 0 fail flag
+//   6   C   Bisi mode
+//   5   C   Stop on next fail
+//   4   C   Stop on fail
+//   3   C   Loop
+//   2   C   Loop on address
+//   1   C   User data mode
+//   0   C   Start
+
+dffrl_ns #(4) bist_ctl_reg_10_7 (
+				 .din({mbist_done,mbist_err[2:0]}),
+				 .clk(rclk),
+				 .rst_l(cluster_grst_l),
+				 .q(bist_ctl_reg_out[10:7])
+				 );
+
+dffrle_ns #(1) bist_ctl_reg_6 (
+                               .din(bist_diag_mode[6]),
+                               .clk(rclk),
+			       .rst_l(clr_mbist_ctl_l),
+		               .en(csr_write),
+		     	       .q(bist_ctl_reg_out[6])
+			       );
+					       
+dffrle_ns #(5) bist_ctl_reg_5_1 (
+				 .din(bist_diag_mode[5:1]),
+				 .clk(rclk),
+				 .rst_l(clr_mbist_ctl_l),
+				 .en(csr_write),
+				 .q(bist_ctl_reg_out[5:1])
+				 );
+
+dffrle_ns #(1) bist_ctl_reg_0 (
+			       .din(bist_diag_mode[0]),
+			       .clk(rclk),
+			       .rst_l(clr_mbist_ctl_l),
+			       .en(csr_write),
+			       .q(bist_ctl_reg_out[0])
+			       );
+
+// CTU serial BIST interface. Bit ordering is 5,4,3,2,1,6,0.
+
+assign tst_ctu_mbist_done = mbist_done_delayed;
+assign mbist_fail_flag = |mbist_err[2:0];
+assign serial_setup_start = mbist_enable_d1 & ~mbist_enable_d2 & ~serial_setup_mode;
+assign serial_setup_stop = cluster_grst_l & ~serial_setup_valid;
+assign serial_setup_enable = serial_setup_start | serial_setup_mode;
+assign bist_diag_mode[5:1] = serial_setup_mode ? {mbist_enable_d2, bist_ctl_reg_out[5:2]} : bist_ctl_reg_in[5:1];
+assign bist_diag_mode[6]   = serial_setup_mode ? bist_ctl_reg_out[1] : bist_ctl_reg_in[6];
+assign bist_diag_mode[0]   = serial_setup_mode ? bist_ctl_reg_out[6] & serial_setup_valid : bist_ctl_reg_in[0];
+
+dff_ns #(1) tst_ctu_mbist_fail_reg (
+				    .din(mbist_fail_flag),
+				    .clk(rclk),
+				    .q(tst_ctu_mbist_fail)
+				    );
+
+dff_ns #(1) mbist_enable_d1_reg (
+			              .din(ctu_tst_mbist_enable),
+			              .clk(rclk),
+			              .q(mbist_enable_d1)
+  			              );
+
+dff_ns #(1) mbist_enable_d2_reg (
+			              .din(mbist_enable_d1),
+			              .clk(rclk),
+			              .q(mbist_enable_d2)
+  			              );
+
+dff_ns #(1) serial_setup_valid_reg (
+			            .din(bist_ctl_reg_out[6]),
+			            .clk(rclk),
+			            .q(serial_setup_valid)
+  			            );
+
+dffrl_ns #(1) serial_setup_mode_reg (
+       			             .din (serial_setup_enable),
+				     .clk(rclk),
+				     .rst_l(serial_setup_stop),
+				     .q(serial_setup_mode)
+				     );
+
+endmodule // test_stub_bist
Index: /trunk/T1-common/common/cluster_header_dup.v
===================================================================
--- /trunk/T1-common/common/cluster_header_dup.v	(revision 6)
+++ /trunk/T1-common/common/cluster_header_dup.v	(revision 6)
@@ -0,0 +1,101 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cluster_header_dup.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// The cluster header is instatiated as a hard macro.
+// This model is for simulation only.
+`include "sys.h"
+
+module cluster_header_dup (/*AUTOARG*/
+   // Outputs
+   dbginit_l, cluster_grst_l, rclk, so, 
+   // Inputs
+   gclk, cluster_cken, arst_l, grst_l, adbginit_l, gdbginit_l, si, 
+   se
+   );
+
+   input       gclk;
+   input       cluster_cken;
+   input       arst_l;
+   input       grst_l;
+   input       adbginit_l;
+   input       gdbginit_l; 
+   output      dbginit_l; 
+   output      cluster_grst_l;
+   output      rclk;
+
+   input       si; // scan ports for reset flop repeaters
+   input       se;
+   output      so;
+   
+   wire        pre_sync_enable;
+   wire        sync_enable;
+   wire        cluster_grst_l;
+   wire        dbginit_l;
+   wire        rst_sync_so;
+
+   bw_u1_soffasr_2x sync_cluster_master ( // no scan hook-up
+                                        .so(),
+                                        .q (pre_sync_enable),
+                                        .ck (gclk),
+                                        .d (cluster_cken),
+					.r_l (arst_l),	
+					.s_l (1'b1),	
+                                        .sd(1'b0),
+                                        .se(1'b0)
+				        );
+   
+
+   bw_clk_cclk_scanlasr_2x sync_cluster_slave ( // use scan lock-up latch
+                                      .r_l (arst_l),
+                                      .so (sync_enable),
+                                      .ck (gclk),
+                                      .sd (pre_sync_enable)
+        			      );
+   
+// NOTE! Pound delay in the below statement is meant to provide 10 ps
+// delay between gclk and rclk to allow the synchronizer for rst, dbginit,
+// and sync pulses to be modelled accurately.  gclk and rclk need to have 
+// at least one simulator timestep separation to allow the flop->flop 
+// synchronizer to work correctly.
+   assign #10 rclk = gclk & sync_enable;
+
+   synchronizer_asr rst_repeater (
+				 .sync_out(cluster_grst_l),
+				 .so(rst_sync_so),
+				 .async_in(grst_l),
+				 .gclk(gclk),
+				 .rclk(rclk),
+				 .arst_l(arst_l),
+				 .si(si),
+				 .se(se)
+				 );
+   
+   synchronizer_asr_dup dbginit_repeater (
+				     .sync_out(dbginit_l),
+				     .so(so),
+				     .async_in(gdbginit_l),
+				     .gclk(gclk),
+				     .rclk(rclk),
+				     .arst_l(adbginit_l),
+				     .si(rst_sync_so),
+				     .se(se)
+				     );
+
+endmodule // cluster_header
Index: /trunk/T1-common/common/sync_pulse_synchronizer.v
===================================================================
--- /trunk/T1-common/common/sync_pulse_synchronizer.v	(revision 6)
+++ /trunk/T1-common/common/sync_pulse_synchronizer.v	(revision 6)
@@ -0,0 +1,70 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: sync_pulse_synchronizer.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// Synchronizer without reset for use in cluster_header_sync.
+// For simulation only.
+
+module sync_pulse_synchronizer (/*AUTOARG*/
+   // Outputs
+   sync_out, so, 
+   // Inputs
+   async_in, gclk, rclk, si, se
+   );
+
+   output sync_out;
+   output so;
+   
+   input  async_in;
+   input  gclk;
+   input  rclk;
+   input  si;
+   input  se;
+
+   wire   pre_sync_out;
+   wire   so_rptr;
+   wire   so_lockup;
+
+   // Flop drive strengths to be adjusted as necessary
+   
+   bw_u1_soff_8x repeater (
+                           .q (pre_sync_out),
+                           .so (so_rptr),
+                           .ck (gclk),
+                           .d (async_in),	
+                           .se (se),
+                           .sd (si)
+			   );
+
+   bw_u1_scanl_2x lockup (
+			   .so (so_lockup),
+			   .sd (so_rptr),
+			   .ck (gclk)
+			   );
+
+   bw_u1_soff_8x syncff (
+                         .q (sync_out),
+                         .so (so),
+                         .ck (rclk),
+                         .d (pre_sync_out),
+                         .se (se),
+                         .sd (so_lockup)
+			 );
+
+endmodule // sync_pulse_synchronizer
Index: /trunk/T1-common/common/synchronizer_asr.v
===================================================================
--- /trunk/T1-common/common/synchronizer_asr.v	(revision 6)
+++ /trunk/T1-common/common/synchronizer_asr.v	(revision 6)
@@ -0,0 +1,69 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: synchronizer_asr.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module synchronizer_asr (/*AUTOARG*/
+   // Outputs
+   sync_out, so, 
+   // Inputs
+   async_in, gclk, rclk, arst_l, si, se
+   );
+
+   output sync_out;
+   output so;
+   
+   input  async_in;
+   input  gclk;
+   input  rclk;
+   input  arst_l;
+   input  si;
+   input  se;
+
+   wire   pre_sync_out;
+   wire   so_rptr;
+   wire   so_lockup;
+   
+   bw_u1_soffasr_2x repeater (
+                              .q (pre_sync_out),
+                              .so (so_rptr),
+                              .ck (gclk),
+                              .d (async_in),	
+                              .r_l (arst_l),	
+                              .s_l (1'b1),	
+                              .se (se),
+                              .sd (si)
+			      );
+   bw_u1_scanl_2x lockup (
+			   .so (so_lockup),
+			   .sd (so_rptr),
+			   .ck (gclk)
+			   );
+
+   bw_u1_soffasr_2x syncff (
+                            .q (sync_out),
+                            .so (so),
+                            .ck (rclk),
+                            .d (pre_sync_out),
+                            .r_l (arst_l),	
+                            .s_l (1'b1),	
+                            .se (se),
+                            .sd (so_lockup)
+			    );
+
+endmodule // synchronizer_asr
Index: /trunk/T1-common/common/cluster_header_ctu.v
===================================================================
--- /trunk/T1-common/common/cluster_header_ctu.v	(revision 6)
+++ /trunk/T1-common/common/cluster_header_ctu.v	(revision 6)
@@ -0,0 +1,100 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: cluster_header_ctu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+// The cluster header is instatiated as a hard macro.
+// This model is for simulation only.
+`include "sys.h"
+
+module cluster_header_ctu (/*AUTOARG*/
+   // Outputs
+   dbginit_l, cluster_grst_l, rclk, so, 
+   // Inputs
+   gclk, cluster_cken, arst_l, grst_l, adbginit_l, gdbginit_l, si, 
+   se
+   );
+
+   input       gclk;
+   input       cluster_cken;
+   input       arst_l;
+   input       grst_l;
+   input       adbginit_l;
+   input       gdbginit_l; 
+   output      dbginit_l; 
+   output      cluster_grst_l;
+   output      rclk;
+
+   input       si; // scan ports for reset flop repeaters
+   input       se;
+   output      so;
+   
+   wire        pre_sync_enable;
+   wire        sync_enable;
+   wire        cluster_grst_l;
+   wire        dbginit_l;
+   wire        rst_sync_so;
+
+   bw_u1_syncff_4x sync_cluster_master ( // no scan hook-up
+                                        .so(),
+                                        .q (pre_sync_enable),
+                                        .ck (gclk),
+                                        .d (cluster_cken),
+                                        .sd(1'b0),
+                                        .se(1'b0)
+				        );
+   
+
+   bw_u1_scanl_2x sync_cluster_slave ( // use scan lock-up latch
+                                      .so (sync_enable),
+                                      .ck (gclk),
+                                      .sd (pre_sync_enable)
+        			      );
+   
+// NOTE! Pound delay in the below statement is meant to provide 10 ps
+// delay between gclk and rclk to allow the synchronizer for rst, dbginit,
+// and sync pulses to be modelled accurately.  gclk and rclk need to have 
+// at least one simulator timestep separation to allow the flop->flop 
+// synchronizer to work correctly.
+   reg      rclk_reg;
+   always @(gclk)  rclk_reg = #10 gclk;
+   assign rclk = rclk_reg;
+
+   synchronizer_asr rst_repeater (
+				 .sync_out(cluster_grst_l),
+				 .so(rst_sync_so),
+				 .async_in(grst_l),
+				 .gclk(gclk),
+				 .rclk(rclk),
+				 .arst_l(arst_l),
+				 .si(si),
+				 .se(se)
+				 );
+   
+   synchronizer_asr dbginit_repeater (
+				     .sync_out(dbginit_l),
+				     .so(so),
+				     .async_in(gdbginit_l),
+				     .gclk(gclk),
+				     .rclk(rclk),
+				     .arst_l(adbginit_l),
+				     .si(rst_sync_so),
+				     .se(se)
+				     );
+
+endmodule // cluster_header
Index: /trunk/T1-common/common/ucb_bus_in.v
===================================================================
--- /trunk/T1-common/common/ucb_bus_in.v	(revision 6)
+++ /trunk/T1-common/common/ucb_bus_in.v	(revision 6)
@@ -0,0 +1,219 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_bus_in.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:	ucb_bus_in (ucb bus inbound interface block)
+//  Description:	This interface block is instaniated by the
+//                      UCB modules and IO Bridge to receive packets
+//                      on the UCB bus.
+*/
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which contains the 
+			// time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////
+// Interface signal list declarations
+////////////////////////////////////////////////////////////////////////
+module ucb_bus_in (/*AUTOARG*/
+   // Outputs
+   stall, indata_buf_vld, indata_buf, 
+   // Inputs
+   rst_l, clk, vld, data, stall_a1
+   );
+   
+   // synopsys template
+   
+   parameter UCB_BUS_WIDTH = 32;
+   parameter REG_WIDTH = 64;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Signal declarations
+////////////////////////////////////////////////////////////////////////
+   // Global interface
+   input                     rst_l;
+   input 		     clk;
+
+
+   // UCB bus interface
+   input 		     vld;
+   input [UCB_BUS_WIDTH-1:0] data;
+   output 		     stall;
+
+   
+   // Local interface
+   output 		     indata_buf_vld;
+   output [REG_WIDTH+63:0]   indata_buf;
+   input 		     stall_a1;
+   
+   
+   // Internal signals
+   wire 		     vld_d1;
+   wire 		     stall_d1;
+   wire [UCB_BUS_WIDTH-1:0]  data_d1;
+   wire 		     skid_buf0_en;
+   wire 		     vld_buf0;
+   wire [UCB_BUS_WIDTH-1:0]  data_buf0;
+   wire 		     skid_buf1_en;
+   wire 		     vld_buf1;
+   wire [UCB_BUS_WIDTH-1:0]  data_buf1;
+   wire 		     skid_buf0_sel;
+   wire 		     skid_buf1_sel;
+   wire 		     vld_mux;
+   wire [UCB_BUS_WIDTH-1:0]  data_mux;
+   wire [(REG_WIDTH+64)/UCB_BUS_WIDTH-1:0] indata_vec_next;
+   wire [(REG_WIDTH+64)/UCB_BUS_WIDTH-1:0] indata_vec;
+   wire [REG_WIDTH+63:0]     indata_buf_next;
+   wire 		     indata_vec0_d1;
+   
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * UCB bus interface flops
+    * This is to make signals going between IOB and UCB flop-to-flop
+    * to improve timing.
+    ************************************************************/
+   dffrle_ns #(1) vld_d1_ff (.din(vld),
+			     .rst_l(rst_l),
+			     .en(~stall_d1),
+			     .clk(clk),
+			     .q(vld_d1));
+   
+   dffe_ns #(UCB_BUS_WIDTH) data_d1_ff (.din(data),
+					.en(~stall_d1),
+					.clk(clk),
+					.q(data_d1));
+
+   dffrl_ns #(1) stall_ff (.din(stall_a1),
+			   .clk(clk),
+			   .rst_l(rst_l),
+			   .q(stall));
+   
+   dffrl_ns #(1) stall_d1_ff (.din(stall),
+			      .clk(clk),
+			      .rst_l(rst_l),
+			      .q(stall_d1));
+
+   
+   /************************************************************
+    * Skid buffer
+    * We need a two deep skid buffer to handle stalling.
+    ************************************************************/
+   // Assertion: stall has to be deasserted for more than 1 cycle
+   //            ie time between two separate stalls has to be
+   //            at least two cycles.  Otherwise, contents from
+   //            skid buffer will be lost.
+   
+   // Buffer 0
+   assign 	 skid_buf0_en = stall_a1 & ~stall;
+
+   dffrle_ns #(1) vld_buf0_ff (.din(vld_d1),
+			       .rst_l(rst_l),
+			       .en(skid_buf0_en),
+			       .clk(clk),
+			       .q(vld_buf0));
+   
+   dffe_ns #(UCB_BUS_WIDTH) data_buf0_ff (.din(data_d1),
+					  .en(skid_buf0_en),
+					  .clk(clk),
+					  .q(data_buf0));
+   
+   // Buffer 1
+   dffrl_ns #(1) skid_buf1_en_ff (.din(skid_buf0_en),
+				  .clk(clk),
+				  .rst_l(rst_l),
+				  .q(skid_buf1_en));
+   
+   dffrle_ns #(1) vld_buf1_ff (.din(vld_d1),
+			       .rst_l(rst_l),
+			       .en(skid_buf1_en),
+			       .clk(clk),
+			       .q(vld_buf1));
+   
+   dffe_ns #(UCB_BUS_WIDTH) data_buf1_ff (.din(data_d1),
+					  .en(skid_buf1_en),
+					  .clk(clk),
+					  .q(data_buf1));
+
+   
+   /************************************************************
+    * Mux between skid buffer and interface flop
+    ************************************************************/
+   // Assertion: stall has to be deasserted for more than 1 cycle
+   //            ie time between two separate stalls has to be
+   //            at least two cycles.  Otherwise, contents from
+   //            skid buffer will be lost.
+   
+   assign 	 skid_buf0_sel = ~stall_a1 & stall;
+   
+   dffrl_ns #(1) skid_buf1_sel_ff (.din(skid_buf0_sel),
+				   .clk(clk),
+				   .rst_l(rst_l),
+				   .q(skid_buf1_sel));
+
+   assign 	 vld_mux = skid_buf0_sel ? vld_buf0 :
+		           skid_buf1_sel ? vld_buf1 :
+		                           vld_d1;
+   
+   assign 	 data_mux = skid_buf0_sel ? data_buf0 :
+		            skid_buf1_sel ? data_buf1 :
+		                            data_d1;
+   
+
+   /************************************************************
+    * Assemble inbound data
+    ************************************************************/
+   // valid vector
+   assign 	 indata_vec_next = {vld_mux,
+				    indata_vec[(REG_WIDTH+64)/UCB_BUS_WIDTH-1:1]};
+   dffrle_ns #((REG_WIDTH+64)/UCB_BUS_WIDTH) indata_vec_ff (.din(indata_vec_next),
+							    .en(~stall_a1),
+							    .rst_l(rst_l),
+							    .clk(clk),
+							    .q(indata_vec));
+
+   // data buffer
+   assign 	 indata_buf_next = {data_mux,
+				    indata_buf[REG_WIDTH+63:UCB_BUS_WIDTH]};
+   dffe_ns #(REG_WIDTH+64) indata_buf_ff (.din(indata_buf_next),
+					  .en(~stall_a1),
+					  .clk(clk),
+					  .q(indata_buf));
+   
+   // detect a new packet	  
+   dffrle_ns #(1) indata_vec0_d1_ff (.din(indata_vec[0]),
+				     .rst_l(rst_l),
+				     .en(~stall_a1),
+				     .clk(clk),
+				     .q(indata_vec0_d1));
+   
+   assign        indata_buf_vld = indata_vec[0] & ~indata_vec0_d1;
+
+   
+endmodule // ucb_bus_in
Index: /trunk/T1-common/common/ucb_bus_out.v
===================================================================
--- /trunk/T1-common/common/ucb_bus_out.v	(revision 6)
+++ /trunk/T1-common/common/ucb_bus_out.v	(revision 6)
@@ -0,0 +1,128 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: ucb_bus_out.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+/*
+//  Module Name:        ucb_bus_out (ucb bus outbound interface block)
+//	Description:	This interface block is instantiated by the
+//                      UCB modules and IO Bridge to transmit packets
+//                      on the UCB bus.
+*/ 
+////////////////////////////////////////////////////////////////////////
+// Global header file includes
+////////////////////////////////////////////////////////////////////////
+`include	"sys.h" // system level definition file which 
+                        // contains the time scale definition
+
+////////////////////////////////////////////////////////////////////////
+// Local header file includes / local defines
+////////////////////////////////////////////////////////////////////////
+
+module ucb_bus_out (/*AUTOARG*/
+   // Outputs
+   vld, data, outdata_buf_busy, 
+   // Inputs
+   clk, rst_l, stall, outdata_buf_in, outdata_vec_in, outdata_buf_wr
+   );
+
+   // synopsys template
+   
+   parameter UCB_BUS_WIDTH = 32;
+   parameter REG_WIDTH = 64;            // maximum data bits that needs to
+                                        // be sent.  Set to 64 or 128
+   
+   // Globals
+   input                                clk;
+   input 				rst_l;
+
+   
+   // UCB bus interface
+   output 				vld;
+   output [UCB_BUS_WIDTH-1:0] 		data;
+   input 				stall;
+
+   
+   // Local interface
+   output 				outdata_buf_busy;
+   input [REG_WIDTH+63:0] 		outdata_buf_in;
+   input [(REG_WIDTH+64)/UCB_BUS_WIDTH-1:0] outdata_vec_in;
+   input 				outdata_buf_wr;
+
+
+   // Local signals
+   wire 				stall_d1;
+   wire [(REG_WIDTH+64)/UCB_BUS_WIDTH-1:0] 	outdata_vec;
+   wire [(REG_WIDTH+64)/UCB_BUS_WIDTH-1:0] 	outdata_vec_next;
+   wire [REG_WIDTH+63:0] 		outdata_buf;
+   wire [REG_WIDTH+63:0] 		outdata_buf_next;
+   wire 				load_outdata;
+   wire 				shift_outdata;
+
+   
+////////////////////////////////////////////////////////////////////////
+// Code starts here
+////////////////////////////////////////////////////////////////////////
+   /************************************************************
+    * UCB bus interface flops
+    ************************************************************/
+   assign 	 vld = outdata_vec[0];
+   assign 	 data = outdata_buf[UCB_BUS_WIDTH-1:0];
+   
+   dffrl_ns #(1) stall_d1_ff (.din(stall),
+                              .clk(clk),
+                              .rst_l(rst_l),
+                              .q(stall_d1));
+
+   
+   /************************************************************
+    * Outbound Data
+    ************************************************************/
+   // accept new data only if there is none being processed
+   assign 	 load_outdata = outdata_buf_wr & ~outdata_buf_busy;
+
+   assign 	 outdata_buf_busy = outdata_vec[0] | stall_d1;
+   
+   assign 	 shift_outdata = outdata_vec[0] & ~stall_d1;
+
+   assign 	 outdata_vec_next =
+		 load_outdata  ? outdata_vec_in:
+		 shift_outdata ? outdata_vec >> 1:    
+	                         outdata_vec;
+   dffrl_ns #((REG_WIDTH+64)/UCB_BUS_WIDTH) outdata_vec_ff (.din(outdata_vec_next),
+							    .clk(clk),
+							    .rst_l(rst_l),
+							    .q(outdata_vec));
+
+   assign 	 outdata_buf_next =
+		 load_outdata  ? outdata_buf_in:
+		 shift_outdata ? (outdata_buf >> UCB_BUS_WIDTH):  
+	                         outdata_buf;
+   dff_ns #(REG_WIDTH+64) outdata_buf_ff (.din(outdata_buf_next),
+					  .clk(clk),
+					  .q(outdata_buf));
+
+
+endmodule // ucb_bus_out
+
+
+
+
+
+
Index: /trunk/T1-common/common/synchronizer_asr_dup.v
===================================================================
--- /trunk/T1-common/common/synchronizer_asr_dup.v	(revision 6)
+++ /trunk/T1-common/common/synchronizer_asr_dup.v	(revision 6)
@@ -0,0 +1,67 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: synchronizer_asr_dup.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module synchronizer_asr_dup (/*AUTOARG*/
+   // Outputs
+   sync_out, so, 
+   // Inputs
+   async_in, gclk, rclk, arst_l, si, se
+   );
+
+   output sync_out;
+   output so;
+   
+   input  async_in;
+   input  gclk;
+   input  rclk;
+   input  arst_l;
+   input  si;
+   input  se;
+
+   wire   pre_sync_out;
+   wire   so_rptr;
+   wire   so_lockup;
+   
+   bw_u1_soff_8x repeater (
+                              .q (pre_sync_out),
+                              .so (so_rptr),
+                              .ck (gclk),
+                              .d (async_in),	
+                              .se (se),
+                              .sd (si)
+			      );
+   bw_u1_scanl_2x lockup (
+			   .so (so_lockup),
+			   .sd (so_rptr),
+			   .ck (gclk)
+			   );
+
+   bw_u1_soffasr_2x syncff (
+                            .q (sync_out),
+                            .so (so),
+                            .ck (rclk),
+                            .d (pre_sync_out),
+                            .r_l (arst_l),	
+                            .s_l (1'b1),	
+                            .se (se),
+                            .sd (so_lockup)
+			    );
+
+endmodule // synchronizer_asr
Index: /trunk/T1-common/m1/m1.V
===================================================================
--- /trunk/T1-common/m1/m1.V	(revision 6)
+++ /trunk/T1-common/m1/m1.V	(revision 6)
@@ -0,0 +1,1034 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: m1.behV
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+////////////////////////////////////////////////////////////////////////
+// 64 bit nor gate with first 32 bits out
+
+module zznor64_32 ( znor64, znor32, a );
+  input  [63:0] a;
+  output        znor64;
+  output        znor32;
+
+  assign znor32 =  ~(a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+		   | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+		   | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+		   | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]); 
+
+  assign znor64 =  ~(a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+		   | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+		   | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+		   | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] 
+		   | a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] 
+		   | a[40] | a[41] | a[42] | a[43] | a[44] | a[45] | a[46] | a[47] 
+		   | a[48] | a[49] | a[50] | a[51] | a[52] | a[53] | a[54] | a[55] 
+		   | a[56] | a[57] | a[58] | a[59] | a[60] | a[61] | a[62] | a[63]);
+
+endmodule // zznor64_32
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 36 bit or gate
+
+module zzor36 ( z, a );
+  input  [35:0] a;
+  output        z;
+
+  assign z =  (a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+	     | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+	     | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+	     | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]
+	     | a[32] | a[33] | a[34] | a[35]); 
+   
+endmodule // zzor36
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 32 bit or gate
+
+module zzor32 ( z, a );
+  input  [31:0] a;
+  output        z;
+
+  assign z =  (a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+	     | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+	     | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+	     | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]); 
+
+endmodule // zzor32
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 24 bit nor gate
+
+module zznor24 ( z, a );
+  input  [23:0] a;
+  output        z;
+
+  assign z =  ~(a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+	      | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+	      | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]); 
+
+endmodule // zznor24
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 16 bit nor gate
+
+module zznor16 ( z, a );
+  input  [15:0] a;
+  output        z;
+
+  assign z =  ~(a[0] | a[1] | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+	      | a[8] | a[9] | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]); 
+
+endmodule // zznor16
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit or gate
+
+module zzor8 ( z, a );
+  input  [7:0] a;
+  output       z;
+
+  assign z =  (a[0] | a[1] | a[2] | a[3] | a[4] | a[5] | a[6] | a[7]); 
+   
+endmodule // zzor8
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//  Description:	This block implements the adder for the sparc FPU.
+//  			It takes two operands and a carry bit.  It adds them together
+//			and sends the output to adder_out. 
+
+module zzadd13 ( rs1_data, rs2_data, cin, adder_out );
+
+  input  [12:0] rs1_data;   // 1st input operand
+  input  [12:0] rs2_data;   // 2nd input operand
+  input         cin;        // carry in
+
+  output [12:0] adder_out;  // result of adder
+
+  assign adder_out = rs1_data + rs2_data + cin;
+
+endmodule // zzadd13
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//  Description:	This block implements the adder for the sparc FPU.
+//  			It takes two operands and a carry bit.  It adds them together
+//			and sends the output to adder_out. 
+
+module zzadd56 ( rs1_data, rs2_data, cin, adder_out );
+
+  input  [55:0] rs1_data;   // 1st input operand
+  input  [55:0] rs2_data;   // 2nd input operand
+  input         cin;        // carry in
+
+  output [55:0] adder_out;  // result of adder
+
+  assign adder_out = rs1_data + rs2_data + cin;
+
+endmodule // zzadd56
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzadd48 ( rs1_data, rs2_data, cin, adder_out );
+
+  input  [47:0] rs1_data;   // 1st input operand
+  input  [47:0] rs2_data;   // 2nd input operand
+  input         cin;        // carry in
+
+  output [47:0] adder_out;  // result of adder
+
+  assign adder_out = rs1_data + rs2_data + cin;
+
+endmodule // zzadd48
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//  This adder is primarily used in the multiplier.
+//  The cin to out path is optimized.
+
+module zzadd34c ( rs1_data, rs2_data, cin, adder_out );
+
+  input  [33:0] rs1_data;
+  input  [33:0] rs2_data;
+  input         cin;
+
+  output [33:0] adder_out;
+
+  assign adder_out = rs1_data + rs2_data + cin;
+
+
+endmodule // zzadd34c
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzadd32 ( rs1_data, rs2_data, cin, adder_out, cout );
+
+  input  [31:0] rs1_data;   // 1st input operand
+  input  [31:0] rs2_data;   // 2nd input operand
+  input         cin;        // carry in
+
+  output [31:0] adder_out;  // result of adder
+  output 	cout;       // carry out
+
+  assign {cout, adder_out} = rs1_data + rs2_data + cin;
+
+endmodule // zzadd32
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzadd18 ( rs1_data, rs2_data, cin, adder_out, cout );
+
+  input  [17:0] rs1_data;   // 1st input operand
+  input  [17:0] rs2_data;   // 2nd input operand
+  input         cin;        // carry in
+
+  output [17:0] adder_out;  // result of adder
+  output 	cout;       // carry out
+
+  assign {cout, adder_out} = rs1_data + rs2_data + cin;
+
+endmodule // zzadd18
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzadd8 ( rs1_data, rs2_data, cin, adder_out, cout );
+
+  input  [7:0] rs1_data;   // 1st input operand
+  input  [7:0] rs2_data;   // 2nd input operand
+  input        cin;        // carry in
+
+  output [7:0] adder_out;  // result of add & decrement
+  output       cout;       // carry out
+
+  assign {cout, adder_out} = rs1_data + rs2_data + cin;
+
+endmodule // zzadd8
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Special 4-operand 32b adder used in spu_shamd5
+//  Description:        This block implements the 4-operand 32-bit adder for SPU
+//			It takes four 32-bit operands. It add them together and
+//			output the 32-bit results to adder_out. The overflow of
+//			32th bit and higher will be ignored.
+
+module zzadd32op4 ( rs1_data, rs2_data, rs3_data, rs4_data, adder_out );
+
+  input  [31:0] rs1_data;   // 1st input operand
+  input  [31:0] rs2_data;   // 2nd input operand
+  input  [31:0] rs3_data;   // 3rd input operand
+  input  [31:0] rs4_data;   // 4th input operand
+
+  output [31:0] adder_out;  // result of add
+
+  assign adder_out = rs1_data + rs2_data + rs3_data + rs4_data;
+
+endmodule // zzadd32op4
+
+
+////////////////////////////////////////////////////////////////////////////////
+//  Description:	This block implements the adder for the sparc alu.
+//  			It takes two operands and a carry bit.  It adds them together
+//			and sends the output to adder_out.  It outputs the overflow
+//			and carry condition codes for both 64 bit and 32 bit operations.
+
+module zzadd64 ( rs1_data, rs2_data, cin, adder_out, cout32, cout64 );
+
+   input [63:0]  rs1_data;   // 1st input operand
+   input [63:0]  rs2_data;   // 2nd input operand
+   input         cin;        // carry in
+
+   output [63:0] adder_out;  // result of adder
+   output        cout32;     // carry out from lower 32 bit add
+   output        cout64;     // carry out from 64 bit add
+
+   assign {cout32, adder_out[31:0]}  = rs1_data[31:0]  + rs2_data[31:0]  + cin;
+   assign {cout64, adder_out[63:32]} = rs1_data[63:32] + rs2_data[63:32] + cout32;
+
+endmodule // zzadd64
+
+
+
+///////////////////////////////////////////////////////////////////////
+/*
+//      Description: This is the ffu VIS adder.  It can do either
+//                              2 16 bit adds or 1 32 bit add.
+*/
+
+module zzadd32v (/*AUTOARG*/
+   // Outputs
+   z,
+   // Inputs
+   a, b, cin, add32
+   ) ;
+   input [31:0] a;
+   input [31:0] b;
+   input        cin;
+   input        add32;
+
+   output [31:0] z;
+
+   wire          cout15; // carry out from lower 16 bit add
+   wire          cin16; // carry in to the upper 16 bit add
+   wire          cout31; // carry out from the upper 16 bit add
+
+   assign        cin16 = (add32)? cout15: cin;
+
+   assign      {cout15, z[15:0]} = a[15:0]+b[15:0]+ cin;
+   assign      {cout31, z[31:16]} = a[31:16]+b[31:16]+ cin16;
+
+endmodule // zzadd32v
+
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 64-bit incrementer
+
+module zzinc64 ( in, out );
+
+  input  [63:0] in;
+
+  output [63:0] out;   // result of increment
+
+  assign out = in + 1'b1;
+
+endmodule // zzinc64
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 48-bit incrementer
+
+module zzinc48 ( in, out, overflow );
+
+  input  [47:0] in;
+
+  output [47:0] out;      // result of increment
+  output        overflow; // overflow
+
+  assign out      = in + 1'b1;
+  assign overflow = ~in[47] & out[47];
+
+endmodule // zzinc48
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 32-bit incrementer
+
+module zzinc32 ( in, out );
+
+  input  [31:0] in;
+
+  output [31:0] out;   // result of increment
+
+  assign out = in + 1'b1;
+
+endmodule // zzinc32
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzecc_exu_chkecc2 ( q,ce, ue, ne, d, p, vld );
+   input [63:0] d;
+   input [7:0]  p;
+   input        vld;
+   output [6:0] q;
+   output       ce,
+                ue,
+                ne;
+
+   wire       parity;
+
+   assign     ce = vld & parity;
+
+   assign ue = vld & ~parity & (q[6] | q[5] | q[4] | q[3] | q[2] | q[1] | q[0]);
+
+   assign ne = ~vld | ~(parity | q[6] | q[5] | q[4] | q[3] | q[2] | q[1] | q[0]);
+
+
+   assign q[0] = d[0]  ^ d[1]  ^ d[3]  ^ d[4]  ^ d[6]  ^ d[8]  ^ d[10]
+               ^ d[11] ^ d[13] ^ d[15] ^ d[17] ^ d[19] ^ d[21] ^ d[23]
+               ^ d[25] ^ d[26] ^ d[28] ^ d[30] ^ d[32] ^ d[34] ^ d[36]
+               ^ d[38] ^ d[40] ^ d[42] ^ d[44] ^ d[46] ^ d[48] ^ d[50]
+               ^ d[52] ^ d[54] ^ d[56] ^ d[57] ^ d[59] ^ d[61] ^ d[63]
+               ^ p[0]  ;
+
+   assign q[1] = d[0]  ^ d[2]  ^ d[3]  ^ d[5]  ^ d[6]  ^ d[9]  ^ d[10]
+               ^ d[12] ^ d[13] ^ d[16] ^ d[17] ^ d[20] ^ d[21] ^ d[24]
+               ^ d[25] ^ d[27] ^ d[28] ^ d[31] ^ d[32] ^ d[35] ^ d[36]
+               ^ d[39] ^ d[40] ^ d[43] ^ d[44] ^ d[47] ^ d[48] ^ d[51]
+               ^ d[52] ^ d[55] ^ d[56] ^ d[58] ^ d[59] ^ d[62] ^ d[63]
+               ^ p[1]  ;
+
+   assign q[2] = d[1]  ^ d[2]  ^ d[3]  ^ d[7]  ^ d[8]  ^ d[9]  ^ d[10]
+               ^ d[14] ^ d[15] ^ d[16] ^ d[17] ^ d[22] ^ d[23] ^ d[24]
+               ^ d[25] ^ d[29] ^ d[30] ^ d[31] ^ d[32] ^ d[37] ^ d[38]
+               ^ d[39] ^ d[40] ^ d[45] ^ d[46] ^ d[47] ^ d[48] ^ d[53]
+               ^ d[54] ^ d[55] ^ d[56] ^ d[60] ^ d[61] ^ d[62] ^ d[63]
+               ^ p[2]  ;
+
+   assign q[3] = d[4]  ^ d[5]  ^ d[6]  ^ d[7]  ^ d[8]  ^ d[9]  ^ d[10]
+               ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23] ^ d[24]
+               ^ d[25] ^ d[33] ^ d[34] ^ d[35] ^ d[36] ^ d[37] ^ d[38]
+               ^ d[39] ^ d[40] ^ d[49] ^ d[50] ^ d[51] ^ d[52] ^ d[53]
+               ^ d[54] ^ d[55] ^ d[56] ^ p[3]  ;
+
+   assign q[4] = d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15] ^ d[16] ^ d[17]
+               ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23] ^ d[24]
+               ^ d[25] ^ d[41] ^ d[42] ^ d[43] ^ d[44] ^ d[45] ^ d[46]
+               ^ d[47] ^ d[48] ^ d[49] ^ d[50] ^ d[51] ^ d[52] ^ d[53]
+               ^ d[54] ^ d[55] ^ d[56] ^ p[4]  ;
+
+   assign q[5] = d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31] ^ d[32]
+               ^ d[33] ^ d[34] ^ d[35] ^ d[36] ^ d[37] ^ d[38] ^ d[39]
+               ^ d[40] ^ d[41] ^ d[42] ^ d[43] ^ d[44] ^ d[45] ^ d[46]
+               ^ d[47] ^ d[48] ^ d[49] ^ d[50] ^ d[51] ^ d[52] ^ d[53]
+               ^ d[54] ^ d[55] ^ d[56] ^ p[5]  ;
+
+   assign q[6] = d[57] ^ d[58] ^ d[59] ^ d[60] ^ d[61] ^ d[62] ^ d[63] ^ p[6] ;
+
+   assign parity = d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+                 ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+                 ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+                 ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31]
+                 ^ d[32] ^ d[33] ^ d[34] ^ d[35] ^ d[36] ^ d[37] ^ d[38] ^ d[39]
+                 ^ d[40] ^ d[41] ^ d[42] ^ d[43] ^ d[44] ^ d[45] ^ d[46] ^ d[47]
+                 ^ d[48] ^ d[49] ^ d[50] ^ d[51] ^ d[52] ^ d[53] ^ d[54] ^ d[55]
+                 ^ d[56] ^ d[57] ^ d[58] ^ d[59] ^ d[60] ^ d[61] ^ d[62] ^ d[63]
+                 ^ p[0]  ^ p[1]  ^ p[2]  ^ p[3]  ^ p[4]  ^ p[5]  ^ p[6]  ^ p[7];
+
+endmodule // zzecc_exu_chkecc2
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzecc_sctag_24b_gen ( din, dout, parity ) ;
+
+// Input Ports
+input  [23:0] din ;
+
+// Output Ports
+output [23:0] dout ;
+output [5:0]  parity ;
+
+wire   [23:0] dout ;
+wire   [5:0]  parity ;
+
+// Local Reg and Wires
+wire          p1 ;
+wire          p2 ;
+wire          p4 ;
+wire          p8 ;
+wire          p16 ;
+wire          p30 ;
+
+
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//    |1 |2 |3 |4 |5 |6 |7 |8 |9 |10|11|12|13|14|15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |
+//    |P1|P2|D0|P4|D1|D2|D3|P8|D4|D5|D6|D7|D8|D9|D10|P16|D11|D12|D13|D14|D15|D16|D17|D18|D19|D20|D21|D22|D23|P30|
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//P1  |  |  |* |  |* |  |* |  |* |  |* |  |* |  | * |   | * |   | * |   | * |   | * |   | * |   | * |   | * |   |
+//P2  |  |  |* |  |  |* |* |  |  |* |* |  |  |* | * |   |   | * | * |   |   | * | * |   |   | * | * |   |   |   |
+//P4  |  |  |  |  |* |* |* |  |  |  |  |* |* |* | * |   |   |   |   | * | * | * | * |   |   |   |   | * | * |   |
+//P8  |  |  |  |  |  |  |  |  |* |* |* |* |* |* | * |   |   |   |   |   |   |   |   | * | * | * | * | * | * |   |
+//P16 |  |  |  |  |  |  |  |  |  |  |  |  |  |  |   |   | * | * | * | * | * | * | * | * | * | * | * | * | * |   |
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//p30 |  |  |* |  |* |* |  |  |* |* |  |* |  |  | * |   | * | * |   | * |   |   | * | * |   |   | * |   | * |   |
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+
+
+assign p1  = din[0]  ^ din[1]  ^ din[3]  ^ din[4]  ^ din[6]  ^ din[8]  ^
+             din[10] ^ din[11] ^ din[13] ^ din[15] ^ din[17] ^ din[19] ^
+             din[21] ^ din[23] ;
+
+assign p2  = din[0]  ^ din[2]  ^ din[3]  ^ din[5]  ^ din[6]  ^ din[9]  ^
+             din[10] ^ din[12] ^ din[13] ^ din[16] ^ din[17] ^ din[20] ^
+             din[21] ;
+
+assign p4  = din[1]  ^ din[2]  ^ din[3]  ^ din[7]  ^ din[8]  ^ din[9]  ^
+             din[10] ^ din[14] ^ din[15] ^ din[16] ^ din[17] ^ din[22] ^
+             din[23] ;
+
+assign p8  = din[4]  ^ din[5]  ^ din[6]  ^ din[7]  ^ din[8]  ^ din[9]  ^
+             din[10] ^ din[18] ^ din[19] ^ din[20] ^ din[21] ^ din[22] ^
+             din[23] ;
+
+assign p16 = din[11] ^ din[12] ^ din[13] ^ din[14] ^ din[15] ^ din[16] ^
+             din[17] ^ din[18] ^ din[19] ^ din[20] ^ din[21] ^ din[22] ^
+             din[23] ;
+
+assign p30 = din[0]  ^ din[1]  ^ din[2]  ^ din[4]  ^ din[5]  ^
+             din[7]  ^ din[10] ^ din[11] ^ din[12] ^ din[14] ^
+             din[17] ^ din[18] ^ din[21] ^ din[23] ;
+
+assign dout   = din ;
+assign parity = {p30, p16, p8, p4, p2, p1} ;
+
+endmodule
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+module zzecc_sctag_30b_cor ( din, parity, dout, corrected_bit ) ;
+
+// Input Ports
+input  [23:0] din ;
+input  [4:0]  parity ;
+
+// Output Ports
+output [23:0] dout ;
+output [4:0]  corrected_bit ;
+
+wire   [23:0] dout ;
+wire   [4:0]  corrected_bit ;
+
+// Local Reg and Wires
+wire          p1 ;
+wire          p2 ;
+wire          p4 ;
+wire          p8 ;
+wire          p16 ;
+wire [23:0]   error_bit ;
+
+
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//    |1 |2 |3 |4 |5 |6 |7 |8 |9 |10|11|12|13|14|15 |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |
+//    |P1|P2|D0|P4|D1|D2|D3|P8|D4|D5|D6|D7|D8|D9|D10|P16|D11|D12|D13|D14|D15|D16|D17|D18|D19|D20|D21|D22|D23|P30|
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//P1  |* |  |* |  |* |  |* |  |* |  |* |  |* |  | * |   | * |   | * |   | * |   | * |   | * |   | * |   | * |   |
+//P2  |  |* |* |  |  |* |* |  |  |* |* |  |  |* | * |   |   | * | * |   |   | * | * |   |   | * | * |   |   |   |
+//P4  |  |  |  |* |* |* |* |  |  |  |  |* |* |* | * |   |   |   |   | * | * | * | * |   |   |   |   | * | * |   |
+//P8  |  |  |  |  |  |  |  |* |* |* |* |* |* |* | * |   |   |   |   |   |   |   |   | * | * | * | * | * | * |   |
+//P16 |  |  |  |  |  |  |  |  |  |  |  |  |  |  |   | * | * | * | * | * | * | * | * | * | * | * | * | * | * |   |
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+//p30 |* |* |* |* |* |* |* |* |* |* |* |* |* |* | * | * | * | * | * | * | * | * | * | * | * | * | * | * | * | * |
+//----|--|--|--|--|--|--|--|--|--|--|--|--|--|--|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+
+
+assign p1  = parity[0] ^
+             din[0]  ^ din[1]  ^ din[3]  ^ din[4]  ^ din[6]  ^ din[8]  ^
+             din[10] ^ din[11] ^ din[13] ^ din[15] ^ din[17] ^ din[19] ^
+             din[21] ^ din[23] ;
+
+assign p2  = parity[1] ^
+             din[0]  ^ din[2]  ^ din[3]  ^ din[5]  ^ din[6]  ^ din[9]  ^
+             din[10] ^ din[12] ^ din[13] ^ din[16] ^ din[17] ^ din[20] ^
+             din[21] ;
+
+assign p4  = parity[2] ^
+             din[1]  ^ din[2]  ^ din[3]  ^ din[7]  ^ din[8]  ^ din[9]  ^
+             din[10] ^ din[14] ^ din[15] ^ din[16] ^ din[17] ^ din[22] ^
+             din[23] ;
+
+assign p8  = parity[3] ^
+             din[4]  ^ din[5]  ^ din[6]  ^ din[7]  ^ din[8]  ^ din[9]  ^
+             din[10] ^ din[18] ^ din[19] ^ din[20] ^ din[21] ^ din[22] ^
+             din[23] ;
+
+assign p16 = parity[4] ^
+             din[11] ^ din[12] ^ din[13] ^ din[14] ^ din[15] ^ din[16] ^
+             din[17] ^ din[18] ^ din[19] ^ din[20] ^ din[21] ^ din[22] ^
+             din[23] ;
+
+assign  error_bit[0]  = !p16 & !p8 & !p4 &  p2 &  p1 ; // 3
+assign  error_bit[1]  = !p16 & !p8 &  p4 & !p2 &  p1 ; // 5
+assign  error_bit[2]  = !p16 & !p8 &  p4 &  p2 & !p1 ; // 6
+assign  error_bit[3]  = !p16 & !p8 &  p4 &  p2 &  p1 ; // 7
+assign  error_bit[4]  = !p16 &  p8 & !p4 & !p2 &  p1 ; // 9
+assign  error_bit[5]  = !p16 &  p8 & !p4 &  p2 & !p1 ; // 10
+assign  error_bit[6]  = !p16 &  p8 & !p4 &  p2 &  p1 ; // 11
+assign  error_bit[7]  = !p16 &  p8 &  p4 & !p2 & !p1 ; // 12
+assign  error_bit[8]  = !p16 &  p8 &  p4 & !p2 &  p1 ; // 13
+assign  error_bit[9]  = !p16 &  p8 &  p4 &  p2 & !p1 ; // 14
+assign  error_bit[10] = !p16 &  p8 &  p4 &  p2 &  p1 ; // 15
+assign  error_bit[11] =  p16 & !p8 & !p4 & !p2 &  p1 ; // 17
+assign  error_bit[12] =  p16 & !p8 & !p4 &  p2 & !p1 ; // 18
+assign  error_bit[13] =  p16 & !p8 & !p4 &  p2 &  p1 ; // 19
+assign  error_bit[14] =  p16 & !p8 &  p4 & !p2 & !p1 ; // 20
+assign  error_bit[15] =  p16 & !p8 &  p4 & !p2 &  p1 ; // 21
+assign  error_bit[16] =  p16 & !p8 &  p4 &  p2 & !p1 ; // 22
+assign  error_bit[17] =  p16 & !p8 &  p4 &  p2 &  p1 ; // 23
+assign  error_bit[18] =  p16 &  p8 & !p4 & !p2 & !p1 ; // 24
+assign  error_bit[19] =  p16 &  p8 & !p4 & !p2 &  p1 ; // 25
+assign  error_bit[20] =  p16 &  p8 & !p4 &  p2 & !p1 ; // 26
+assign  error_bit[21] =  p16 &  p8 & !p4 &  p2 &  p1 ; // 27
+assign  error_bit[22] =  p16 &  p8 &  p4 & !p2 & !p1 ; // 28
+assign  error_bit[23] =  p16 &  p8 &  p4 & !p2 &  p1 ; // 29
+
+assign  dout          = din ^ error_bit ;
+assign  corrected_bit = {p16, p8, p4, p2, p1} ;
+
+endmodule
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//Module Name: zzecc_sctag_ecc39
+//Function: Error Detection and Correction
+//
+//
+
+module zzecc_sctag_ecc39 ( dout, cflag, pflag, parity, din);
+
+   //Output: 32bit corrected data
+   output[31:0] dout;
+   output [5:0] cflag;
+   output 	pflag;
+   
+   //Input: 32bit data din
+   input [31:0] din;
+   input [6:0]	parity;
+
+   wire 	c0,c1,c2,c3,c4,c5;
+   wire [31:0] 	err_bit_pos;
+
+   //refer to the comments in parity_gen_32b.v for the position description
+   
+   assign c0= parity[0]^(din[0]^din[1])^(din[3]^din[4])^(din[6]^din[8])
+                     ^(din[10]^din[11])^(din[13]^din[15])^(din[17]^din[19])
+		     ^(din[21]^din[23])^(din[25]^din[26])^(din[28]^din[30]);
+   
+   assign c1= parity[1]^(din[0]^din[2])^(din[3]^din[5])^(din[6]^din[9])
+                     ^(din[10]^din[12])^(din[13]^din[16])^(din[17]^din[20])
+		     ^(din[21]^din[24])^(din[25]^din[27])^(din[28]^din[31]);
+   
+   assign c2= parity[2]^(din[1]^din[2])^(din[3]^din[7])^(din[8]^din[9])
+                     ^(din[10]^din[14])^(din[15]^din[16])^(din[17]^din[22])
+		     ^(din[23]^din[24])^(din[25]^din[29])^(din[30]^din[31]);
+   
+   assign c3= parity[3]^(din[4]^din[5])^(din[6]^din[7])^(din[8]^din[9])
+                     ^(din[10]^din[18])^(din[19]^din[20])^(din[21]^din[22])
+		     ^(din[23]^din[24])^din[25];
+   
+   assign c4= parity[4]^(din[11]^din[12])^(din[13]^din[14])^
+                    (din[15]^din[16])^(din[17]^din[18])^(din[19]^din[20])^
+                    (din[21]^din[22])^(din[23]^din[24])^din[25];
+
+   assign c5= parity[5]^(din[26]^din[27])^(din[28]^din[29])^
+		    (din[30]^din[31]);
+
+   //generate total parity flag
+   assign pflag= c0 ^
+		(( (((parity[1]^parity[2])^(parity[3]^parity[4])) ^
+		 ((parity[5]^parity[6])^(din[2]^din[5]))) ^		 
+		 (((din[7]^din[9])^(din[12]^din[14])) ^
+		 ((din[16]^din[18])^(din[20]^din[22]))) ) ^
+		 ((din[24]^din[27])^(din[29]^din[31])) );
+   
+   assign cflag= {c5,c4,c3,c2,c1,c0};
+   
+   //6 to 32 decoder
+   assign err_bit_pos[0] = (c0)&(c1)&(~c2)&(~c3)&(~c4)&(~c5);
+   assign err_bit_pos[1] = (c0)&(~c1)&(c2)&(~c3)&(~c4)&(~c5);
+   assign err_bit_pos[2] = (~c0)&(c1)&(c2)&(~c3)&(~c4)&(~c5);
+   assign err_bit_pos[3] = (c0)&(c1)&(c2)&(~c3)&(~c4)&(~c5);
+   assign err_bit_pos[4] = (c0)&(~c1)&(~c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[5] = (~c0)&(c1)&(~c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[6] = (c0)&(c1)&(~c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[7] = (~c0)&(~c1)&(c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[8] = (c0)&(~c1)&(c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[9] = (~c0)&(c1)&(c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[10] = (c0)&(c1)&(c2)&(c3)&(~c4)&(~c5);
+   assign err_bit_pos[11] = (c0)&(~c1)&(~c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[12] = (~c0)&(c1)&(~c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[13] = (c0)&(c1)&(~c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[14] = (~c0)&(~c1)&(c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[15] = (c0)&(~c1)&(c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[16] = (~c0)&(c1)&(c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[17] = (c0)&(c1)&(c2)&(~c3)&(c4)&(~c5);
+   assign err_bit_pos[18] = (~c0)&(~c1)&(~c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[19] = (c0)&(~c1)&(~c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[20] = (~c0)&(c1)&(~c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[21] = (c0)&(c1)&(~c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[22] = (~c0)&(~c1)&(c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[23] = (c0)&(~c1)&(c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[24] = (~c0)&(c1)&(c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[25] = (c0)&(c1)&(c2)&(c3)&(c4)&(~c5);
+   assign err_bit_pos[26] = (c0)&(~c1)&(~c2)&(~c3)&(~c4)&(c5);
+   assign err_bit_pos[27] = (~c0)&(c1)&(~c2)&(~c3)&(~c4)&(c5);
+   assign err_bit_pos[28] = (c0)&(c1)&(~c2)&(~c3)&(~c4)&(c5);
+   assign err_bit_pos[29] = (~c0)&(~c1)&(c2)&(~c3)&(~c4)&(c5);
+   assign err_bit_pos[30] = (c0)&(~c1)&(c2)&(~c3)&(~c4)&(c5);
+   assign err_bit_pos[31] = (~c0)&(c1)&(c2)&(~c3)&(~c4)&(c5);
+
+   //correct the error bit, it can only correct one error bit.
+   
+   assign dout = din ^ err_bit_pos;
+
+endmodule // zzecc_sctag_ecc39
+
+
+////////////////////////////////////////////////////////////////////////////////
+//Module Name: zzecc_sctag_pgen_32b
+//Function: Generate 7 parity bits for 32bits input data
+//
+
+module zzecc_sctag_pgen_32b ( dout, parity, din);
+
+   //Output: 32bit dout and 7bit parity bit
+   output[31:0] dout;
+   output [6:0] parity;
+
+   //Input: 32bit data din
+   input [31:0] din;
+
+   //input data passing through this module
+   assign dout = din ;
+
+   //generate parity bits based on the hamming codes
+   //the method to generate parity bit is shown as follows
+   //1   2  3  4  5  6  7  8  9 10 11 12 13 14  15  16  17  18  19
+   //P1 P2 d0 P4 d1 d2 d3 P8 d4 d5 d6 d7 d8 d9 d10 P16 d11 d12 d13 
+   //
+   // 20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35 
+   //d14 d15 d16 d17 d18 d19 d20 d21 d22 d23 d24 d25 P32 d26 d27 d28
+   //
+   // 36  37  38       
+   //d29 d30 d31
+   //For binary numbers B1-B2-B3-B4-B5-B6:
+   //B1=1 for (1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,...)
+   //B2=1 for (2,3,6,7,10,11,14,15,18,19,22,23,26,27,30,31,34,35,38,39...)
+   //B3=1 for (4,5,6,7,12,13,14,15,20,21,22,23,28,29,30,31,36,37,38,39....)
+   //B4=1 for (8,9,10,11,12,13,14,15,24,25,26,27,28,29,30,31,40,41,42,....)
+   //B5=1 for (16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,48,49,...)
+   //B6=1 for (32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49...)
+   //Parity bit P1,P2,P4,P8,P16,P32 can be generated from the above group of
+   //bits B1=1,B2=1,B3=1,B4=1,B5=1,B6=1 respectively.
+
+   //use parity[5:0] to stand for P1,P2,P4,P8,P16,P32
+   assign parity[0] = (din[0]^din[1])^(din[3]^din[4])^(din[6]^din[8])
+                     ^(din[10]^din[11])^(din[13]^din[15])^(din[17]^din[19])
+		     ^(din[21]^din[23])^(din[25]^din[26])^(din[28]^din[30]);
+   //
+   assign parity[1] = (din[0]^din[2])^(din[3]^din[5])^(din[6]^din[9])
+                     ^(din[10]^din[12])^(din[13]^din[16])^(din[17]^din[20])
+		     ^(din[21]^din[24])^(din[25]^din[27])^(din[28]^din[31]);
+   //
+   assign parity[2] = (din[1]^din[2])^(din[3]^din[7])^(din[8]^din[9])
+                     ^(din[10]^din[14])^(din[15]^din[16])^(din[17]^din[22])
+		     ^(din[23]^din[24])^(din[25]^din[29])^(din[30]^din[31]);
+   //
+   assign parity[3] = (din[4]^din[5])^(din[6]^din[7])^(din[8]^din[9])
+                     ^(din[10]^din[18])^(din[19]^din[20])^(din[21]^din[22])
+		     ^(din[23]^din[24])^din[25];
+   //
+   assign parity[4] = (din[11]^din[12])^(din[13]^din[14])^(din[15]^din[16])
+                     ^(din[17]^din[18])^(din[19]^din[20])^(din[21]^din[22])
+		     ^(din[23]^din[24])^din[25];
+   //
+   assign parity[5] = (din[26]^din[27])^(din[28]^din[29])^(din[30]^din[31]);
+
+   //the last parity bit is the xor of all 38bits
+   //assign parity[6] = (^din)^(^parity[5:0]);
+   //it can be further simplified as:
+   //din= d0  d1  d2  d3  d4  d5  d6  d7  d8  d9 d10 d11 d12 d13 d14 d15 
+   //p0 =  x   x       x   x       x       x       x   x       x       x
+   //p1 =  x       x   x       x   x           x   x       x   x
+   //p2 =      x   x   x               x   x   x   x               x   x
+   //p3 =                  x   x   x   x   x   x   x  
+   //p4 =                                              x   x   x   x   x
+   //p5 =
+   //-------------------------------------------------------------------
+   //Total 3   3   3   4   3   3   4   3   4   4   5   3   3   4   3   4 
+   //
+   //din=d16 d17 d18 d19 d20 d21 d22 d23 d24 d25 d26 d27 d28 d29 d30 d31 
+   //p0=       x       x       x       x       x   x       x       x    
+   //p1=   x   x           x   x           x   x       x   x           x
+   //p2=   x   x                   x   x   x   x               x   x   x
+   //p3=           x   x   x   x   x   x   x   x
+   //p4=   x   x   x   x   x   x   x   x   x   x
+   //p5=                                           x   x   x   x   x   x
+   //-------------------------------------------------------------------
+   //total 4   5   3   4   4   5   4   5   5   6   3   3   4   3   4   4
+
+   //so total=even number, the corresponding bit will not show up in the
+   //final xor tree.
+   assign parity[6] =  din[0] ^ din[1]  ^ din[2]  ^ din[4]  ^ din[5] ^ din[7]
+		    ^ din[10] ^ din[11] ^ din[12] ^ din[14] ^ din[17]
+		    ^ din[18] ^ din[21] ^ din[23] ^ din[24] ^ din[26]
+		    ^ din[27] ^ din[29];
+   
+endmodule // zzecc_sctag_pgen_32b
+
+////////////////////////////////////////////////////////////////////////////////
+// 34 bit parity tree
+
+module zzpar34 ( z, d );
+   input  [33:0] d;
+   output        z;
+
+   assign  z =  d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+	      ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+	      ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+	      ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31]
+	      ^ d[32] ^ d[33]; 
+
+endmodule // zzpar34
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 32 bit parity tree
+
+module zzpar32 ( z, d );
+   input  [31:0] d;
+   output        z;
+
+   assign  z =  d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+	      ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+	      ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+	      ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31]; 
+
+endmodule // zzpar32
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 28 bit parity tree
+
+module zzpar28 ( z, d );
+   input  [27:0] d;
+   output        z;
+
+   assign  z =  d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+	      ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+	      ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+	      ^ d[24] ^ d[25] ^ d[26] ^ d[27]; 
+
+endmodule // zzpar28
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 16 bit parity tree
+
+module zzpar16 ( z, d );
+   input  [15:0] d;
+   output        z;
+
+   assign z = d[0] ^ d[1] ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+	    ^ d[8] ^ d[9] ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]; 
+   
+endmodule // zzpar16
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+// 8 bit parity tree
+
+module zzpar8 ( z, d );
+   input  [7:0] d;
+   output       z;
+
+   assign  z =  d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7]; 
+
+endmodule // zzpar8
+
+
+
+////////////////////////////////////////////////////////////////////////////////
+//    64 -> 6 priority encoder
+//    Bit 63 has the highest priority
+
+module zzpenc64 (/*AUTOARG*/
+   // Outputs
+   z, 
+   // Inputs
+  a 
+   );
+
+   input [63:0] a;
+   output [5:0] z;
+
+   integer      i;
+   reg  [5:0]   z;
+
+     always @ (a)
+     begin
+          z = 6'b0;
+          for (i=0;i<64;i=i+1)
+               if (a[i])
+                      z = i;
+     end
+
+endmodule // zzpenc64
+
+////////////////////////////////////////////////////////////////////////////////
+//    4-bit 60x buffers
+
+module zzbufh_60x4 (/*AUTOARG*/
+   // Outputs
+   z,
+   // Inputs
+  a
+   );
+
+   input [3:0] a;
+   output [3:0] z;
+
+   assign z = a;
+
+endmodule //zzbufh_60x4
+
+// LVT modules added below
+
+module zzadd64_lv ( rs1_data, rs2_data, cin, adder_out, cout32, cout64 );
+
+   input [63:0]  rs1_data;   // 1st input operand
+   input [63:0]  rs2_data;   // 2nd input operand
+   input         cin;        // carry in
+
+   output [63:0] adder_out;  // result of adder
+   output        cout32;     // carry out from lower 32 bit add
+   output        cout64;     // carry out from 64 bit add
+
+   assign {cout32, adder_out[31:0]}  = rs1_data[31:0]  + rs2_data[31:0]  + cin;
+   assign {cout64, adder_out[63:32]} = rs1_data[63:32] + rs2_data[63:32] + cout32;
+
+endmodule // zzadd64_lv
+
+module zzpar8_lv ( z, d );
+   input  [7:0] d;
+   output       z;
+
+   assign  z =  d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7]; 
+
+endmodule // zzpar8_lv
+
+
+module zzpar32_lv ( z, d );
+   input  [31:0] d;
+   output        z;
+
+   assign  z =  d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+              ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+              ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+              ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31];
+
+endmodule // zzpar32_lv
+
+
+
+module zznor64_32_lv ( znor64, znor32, a );
+  input  [63:0] a;
+  output        znor64;
+  output        znor32;
+
+  assign znor32 =  ~(a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+		   | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+		   | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+		   | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]); 
+
+  assign znor64 =  ~(a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+		   | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+		   | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+		   | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31] 
+		   | a[32] | a[33] | a[34] | a[35] | a[36] | a[37] | a[38] | a[39] 
+		   | a[40] | a[41] | a[42] | a[43] | a[44] | a[45] | a[46] | a[47] 
+		   | a[48] | a[49] | a[50] | a[51] | a[52] | a[53] | a[54] | a[55] 
+		   | a[56] | a[57] | a[58] | a[59] | a[60] | a[61] | a[62] | a[63]);
+
+endmodule // zznor64_32_lv
+
+////////////////////////////////////////////////////////////////////////////////
+//    64 -> 6 priority encoder
+//    Bit 63 has the highest priority
+//    LVT version
+
+module zzpenc64_lv (/*AUTOARG*/
+   // Outputs
+   z,
+   // Inputs
+  a
+   );
+
+   input [63:0] a;
+   output [5:0] z;
+
+   integer      i;
+   reg  [5:0]   z;
+
+     always @ (a)
+     begin
+          z = 6'b0;
+          for (i=0;i<64;i=i+1)
+               if (a[i])
+                      z = i;
+     end
+
+endmodule // zzpenc64_lv
+
+////////////////////////////////////////////////////////////////////////////////
+// 36 bit or gate
+// LVT version
+
+module zzor36_lv ( z, a );
+  input  [35:0] a;
+  output        z;
+
+  assign z =  (a[0]  | a[1]  | a[2]  | a[3]  | a[4]  | a[5]  | a[6]  | a[7]
+             | a[8]  | a[9]  | a[10] | a[11] | a[12] | a[13] | a[14] | a[15]
+             | a[16] | a[17] | a[18] | a[19] | a[20] | a[21] | a[22] | a[23]
+             | a[24] | a[25] | a[26] | a[27] | a[28] | a[29] | a[30] | a[31]
+             | a[32] | a[33] | a[34] | a[35]);
+
+endmodule // zzor36_lv
+
+////////////////////////////////////////////////////////////////////////////////
+// 34 bit parity tree
+// LVT version
+
+module zzpar34_lv ( z, d );
+   input  [33:0] d;
+   output        z;
+
+   assign  z =  d[0]  ^ d[1]  ^ d[2]  ^ d[3]  ^ d[4]  ^ d[5]  ^ d[6]  ^ d[7]
+              ^ d[8]  ^ d[9]  ^ d[10] ^ d[11] ^ d[12] ^ d[13] ^ d[14] ^ d[15]
+              ^ d[16] ^ d[17] ^ d[18] ^ d[19] ^ d[20] ^ d[21] ^ d[22] ^ d[23]
+              ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[28] ^ d[29] ^ d[30] ^ d[31]
+              ^ d[32] ^ d[33];
+
+endmodule // zzpar34_lv
+
+
Index: /trunk/Xilinx/pcx_fifo.v
===================================================================
--- /trunk/Xilinx/pcx_fifo.v	(revision 6)
+++ /trunk/Xilinx/pcx_fifo.v	(revision 6)
@@ -0,0 +1,171 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used             *
+*     solely for design, simulation, implementation and creation of            *
+*     design files limited to Xilinx devices or technologies. Use              *
+*     with non-Xilinx devices or technologies is expressly prohibited          *
+*     and immediately terminates your license.                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"            *
+*     SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR                  *
+*     XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE, OR INFORMATION          *
+*     AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION              *
+*     OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS                *
+*     IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,                  *
+*     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE         *
+*     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY                 *
+*     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE                  *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS          *
+*     FOR A PARTICULAR PURPOSE.                                                *
+*                                                                              *
+*     Xilinx products are not intended for use in life support                 *
+*     appliances, devices, or systems. Use in such applications are            *
+*     expressly prohibited.                                                    *
+*                                                                              *
+*     (c) Copyright 1995-2009 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+// You must compile the wrapper file pcx_fifo.v when simulating
+// the core, pcx_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+`timescale 1ns/1ps
+
+module pcx_fifo(
+	clk,
+	rst,
+	din,
+	wr_en,
+	rd_en,
+	dout,
+	full,
+	empty);
+
+
+input clk;
+input rst;
+input [129 : 0] din;
+input wr_en;
+input rd_en;
+output [129 : 0] dout;
+output full;
+output empty;
+
+// synthesis translate_off
+
+      FIFO_GENERATOR_V6_2 #(
+		.C_COMMON_CLOCK(1),
+		.C_COUNT_TYPE(0),
+		.C_DATA_COUNT_WIDTH(6),
+		.C_DEFAULT_VALUE("BlankString"),
+		.C_DIN_WIDTH(130),
+		.C_DOUT_RST_VAL("0"),
+		.C_DOUT_WIDTH(130),
+		.C_ENABLE_RLOCS(0),
+		.C_ENABLE_RST_SYNC(1),
+		.C_ERROR_INJECTION_TYPE(0),
+		.C_FAMILY("virtex5"),
+		.C_FULL_FLAGS_RST_VAL(1),
+		.C_HAS_ALMOST_EMPTY(0),
+		.C_HAS_ALMOST_FULL(0),
+		.C_HAS_BACKUP(0),
+		.C_HAS_DATA_COUNT(0),
+		.C_HAS_INT_CLK(0),
+		.C_HAS_MEMINIT_FILE(0),
+		.C_HAS_OVERFLOW(0),
+		.C_HAS_RD_DATA_COUNT(0),
+		.C_HAS_RD_RST(0),
+		.C_HAS_RST(1),
+		.C_HAS_SRST(0),
+		.C_HAS_UNDERFLOW(0),
+		.C_HAS_VALID(0),
+		.C_HAS_WR_ACK(0),
+		.C_HAS_WR_DATA_COUNT(0),
+		.C_HAS_WR_RST(0),
+		.C_IMPLEMENTATION_TYPE(0),
+		.C_INIT_WR_PNTR_VAL(0),
+		.C_MEMORY_TYPE(1),
+		.C_MIF_FILE_NAME("BlankString"),
+		.C_MSGON_VAL(1),
+		.C_OPTIMIZATION_MODE(0),
+		.C_OVERFLOW_LOW(0),
+		.C_PRELOAD_LATENCY(0),
+		.C_PRELOAD_REGS(1),
+		.C_PRIM_FIFO_TYPE("512x72"),
+		.C_PROG_EMPTY_THRESH_ASSERT_VAL(4),
+		.C_PROG_EMPTY_THRESH_NEGATE_VAL(5),
+		.C_PROG_EMPTY_TYPE(0),
+		.C_PROG_FULL_THRESH_ASSERT_VAL(31),
+		.C_PROG_FULL_THRESH_NEGATE_VAL(30),
+		.C_PROG_FULL_TYPE(0),
+		.C_RD_DATA_COUNT_WIDTH(6),
+		.C_RD_DEPTH(32),
+		.C_RD_FREQ(1),
+		.C_RD_PNTR_WIDTH(5),
+		.C_UNDERFLOW_LOW(0),
+		.C_USE_DOUT_RST(1),
+		.C_USE_ECC(0),
+		.C_USE_EMBEDDED_REG(0),
+		.C_USE_FIFO16_FLAGS(0),
+		.C_USE_FWFT_DATA_COUNT(1),
+		.C_VALID_LOW(0),
+		.C_WR_ACK_LOW(0),
+		.C_WR_DATA_COUNT_WIDTH(6),
+		.C_WR_DEPTH(32),
+		.C_WR_FREQ(1),
+		.C_WR_PNTR_WIDTH(5),
+		.C_WR_RESPONSE_LATENCY(1))
+	inst (
+		.CLK(clk),
+		.RST(rst),
+		.DIN(din),
+		.WR_EN(wr_en),
+		.RD_EN(rd_en),
+		.DOUT(dout),
+		.FULL(full),
+		.EMPTY(empty),
+		.BACKUP(),
+		.BACKUP_MARKER(),
+		.SRST(),
+		.WR_CLK(),
+		.WR_RST(),
+		.RD_CLK(),
+		.RD_RST(),
+		.PROG_EMPTY_THRESH(),
+		.PROG_EMPTY_THRESH_ASSERT(),
+		.PROG_EMPTY_THRESH_NEGATE(),
+		.PROG_FULL_THRESH(),
+		.PROG_FULL_THRESH_ASSERT(),
+		.PROG_FULL_THRESH_NEGATE(),
+		.INT_CLK(),
+		.INJECTDBITERR(),
+		.INJECTSBITERR(),
+		.ALMOST_FULL(),
+		.WR_ACK(),
+		.OVERFLOW(),
+		.ALMOST_EMPTY(),
+		.VALID(),
+		.UNDERFLOW(),
+		.DATA_COUNT(),
+		.RD_DATA_COUNT(),
+		.WR_DATA_COUNT(),
+		.PROG_FULL(),
+		.PROG_EMPTY(),
+		.SBITERR(),
+		.DBITERR());
+
+
+// synthesis translate_on
+
+// XST black box declaration
+// box_type "black_box"
+// synthesis attribute box_type of pcx_fifo is "black_box"
+
+endmodule
+
Index: /trunk/Xilinx/pcx_fifo.veo
===================================================================
--- /trunk/Xilinx/pcx_fifo.veo	(revision 6)
+++ /trunk/Xilinx/pcx_fifo.veo	(revision 6)
@@ -0,0 +1,50 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used             *
+*     solely for design, simulation, implementation and creation of            *
+*     design files limited to Xilinx devices or technologies. Use              *
+*     with non-Xilinx devices or technologies is expressly prohibited          *
+*     and immediately terminates your license.                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"            *
+*     SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR                  *
+*     XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE, OR INFORMATION          *
+*     AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION              *
+*     OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS                *
+*     IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,                  *
+*     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE         *
+*     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY                 *
+*     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE                  *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS          *
+*     FOR A PARTICULAR PURPOSE.                                                *
+*                                                                              *
+*     Xilinx products are not intended for use in life support                 *
+*     appliances, devices, or systems. Use in such applications are            *
+*     expressly prohibited.                                                    *
+*                                                                              *
+*     (c) Copyright 1995-2009 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// The following must be inserted into your Verilog file for this
+// core to be instantiated. Change the instance name and port connections
+// (in parentheses) to your own signal names.
+
+//----------- Begin Cut here for INSTANTIATION Template ---// INST_TAG
+pcx_fifo YourInstanceName (
+	.clk(clk),
+	.rst(rst),
+	.din(din), // Bus [129 : 0] 
+	.wr_en(wr_en),
+	.rd_en(rd_en),
+	.dout(dout), // Bus [129 : 0] 
+	.full(full),
+	.empty(empty));
+
+// INST_TAG_END ------ End INSTANTIATION Template ---------
+
+// You must compile the wrapper file pcx_fifo.v when simulating
+// the core, pcx_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
Index: /trunk/Xilinx/pcx_fifo.xco
===================================================================
--- /trunk/Xilinx/pcx_fifo.xco	(revision 6)
+++ /trunk/Xilinx/pcx_fifo.xco	(revision 6)
@@ -0,0 +1,84 @@
+##############################################################
+#
+# Xilinx Core Generator version 12.3
+# Date: Tue Mar 15 18:00:14 2011
+#
+##############################################################
+#
+#  This file contains the customisation parameters for a
+#  Xilinx CORE Generator IP GUI. It is strongly recommended
+#  that you do not manually alter this file as it may cause
+#  unexpected and unsupported behavior.
+#
+##############################################################
+#
+# BEGIN Project Options
+SET addpads = false
+SET asysymbol = true
+SET busformat = BusFormatAngleBracketNotRipped
+SET createndf = false
+SET designentry = Verilog
+SET device = xc5vlx110t
+SET devicefamily = virtex5
+SET flowvendor = Foundation_ISE
+SET formalverification = false
+SET foundationsym = false
+SET implementationfiletype = Ngc
+SET package = ff1136
+SET removerpms = false
+SET simulationfiles = Behavioral
+SET speedgrade = -3
+SET verilogsim = true
+SET vhdlsim = true
+# END Project Options
+# BEGIN Select
+SELECT Fifo_Generator family Xilinx,_Inc. 6.2
+# END Select
+# BEGIN Parameters
+CSET almost_empty_flag=false
+CSET almost_full_flag=false
+CSET component_name=pcx_fifo
+CSET data_count=false
+CSET data_count_width=6
+CSET disable_timing_violations=false
+CSET dout_reset_value=0
+CSET empty_threshold_assert_value=4
+CSET empty_threshold_negate_value=5
+CSET enable_ecc=false
+CSET enable_int_clk=false
+CSET enable_reset_synchronization=true
+CSET fifo_implementation=Common_Clock_Block_RAM
+CSET full_flags_reset_value=1
+CSET full_threshold_assert_value=31
+CSET full_threshold_negate_value=30
+CSET inject_dbit_error=false
+CSET inject_sbit_error=false
+CSET input_data_width=130
+CSET input_depth=32
+CSET output_data_width=130
+CSET output_depth=32
+CSET overflow_flag=false
+CSET overflow_sense=Active_High
+CSET performance_options=First_Word_Fall_Through
+CSET programmable_empty_type=No_Programmable_Empty_Threshold
+CSET programmable_full_type=No_Programmable_Full_Threshold
+CSET read_clock_frequency=1
+CSET read_data_count=false
+CSET read_data_count_width=6
+CSET reset_pin=true
+CSET reset_type=Asynchronous_Reset
+CSET underflow_flag=false
+CSET underflow_sense=Active_High
+CSET use_dout_reset=true
+CSET use_embedded_registers=false
+CSET use_extra_logic=true
+CSET valid_flag=false
+CSET valid_sense=Active_High
+CSET write_acknowledge_flag=false
+CSET write_acknowledge_sense=Active_High
+CSET write_clock_frequency=1
+CSET write_data_count=false
+CSET write_data_count_width=6
+# END Parameters
+GENERATE
+# CRC: 9ecc93b6
Index: /trunk/Xilinx/pll.xaw
===================================================================
--- /trunk/Xilinx/pll.xaw	(revision 6)
+++ /trunk/Xilinx/pll.xaw	(revision 6)
@@ -0,0 +1,3 @@
+XILINX-XDB 0.1 STUB 0.1 ASCII
+XILINX-XDM V1.6e
+$6gx7f=(`fgn#~oc/Egpoqiw'zzj~n8;sna,|pv2dq;:<y!fpbmqaZbnz&$$|ab!x`u2?53<8':;<55?.126456?39$:9>.as493*06792;=7<>!0120?467<2;9">?42291453<=':;<<577:CQGMQNR8<0M_YU_NLO]ZEKC@D:S[OCUD35?DTPRVEE@TQLLJKM6ZPFD\O:<6O]W[]LJI_XKEAXIA?n;@PT^ZIIDPUNBLLCE^AOOJSSLJG:86O]W[]LJI_XN@D:SX@FTR37?DTPRVEE@TQIIO0\QKOS[8k0M_YU_NLO]ZVKDVLI__MPCMILQQ7c3HX\VRAALX]SHIYAJZXHSNBDOTVGGH703HX\VRAALX]QAFIIN@MT\D@\1`9BVR\XGGFRS_KI_KKWP@TX]GC__k5NW[]@HNDRN]S[I<?8;@UY[FJLAG;TEC^ZT^@VBCCU9=1J[WQLLJKM5ZUUKVCIYKI>7:CT^ZEKC@D9SD@_UU]AQC@BZ8>0MZTPCMIJJ7YTZJUBNXHH189BS_YDDBE^XRJFVHFF[ACAI\F_o6OXZ^DOKRR^XL20MZTPP@PW5d=F_SU[@AQJSU]IMQRBZVIGGB[[0e9BS_YTQG^CXBAC9:@BJAQNL]B?7O[IG99@HN@ELJG:96MCKG@GGH&pa}bdyy.MUGE4?FJLNKCE46MCKG@LQQ723JF@JOAZT!ujpmir|)H^JJ??;BNHBGIR\VIGGKLFNe9@HN@EG\^TAXB[e:AOOCDH]]U[DL]J6:AOOLH6k2IGGD@>_H@VBB763JF@EC?PICWECZOI[]30OAEFN0]JJa=DDBCE=R^JRHMG2>EKC@D9h6MCKHL1[UCUAFN37NBDIOQFH2=DDBE^X=l4CMILQQ6XJ\L:=6MCKNWW4ZDRNNUBB^Zj;BNHKPR7WMC]EIK>2:AOOJSS8VN^XTQLYBNFf>EKCF__<RAZTe9@HNIR\9U[DL]J7:AOOJSS9>1H@FAZT358GIMH]]9<7NBDOTV73>EKCF__9l5LLJMVPAEJ8h1H@FAZTEAN5d=DDBE^XIMB2`9@HNIR\MIF?l5LLJMVPAEJ<h1H@FAZTEAN1<=DGDGBXYKKc:ALIUCI[H^BCC94CSGBP@B23MKOH_:4DBNH7>BBF;1OEi5KIWAOOZBN^@NN>6J@4:FQ@\5<L^O>7KOCSD38M0=NJ\LL:6B@CJGGe>JHKBOOSB[[3:OK^2=IM]]D^F?4O89LTQOJASOO56^FNICPFPUf3YCEE^IADDF;?UOIAZ\HO55_IOUJ@QN03YFGSLJYc:ROHZGC^VCE_Y64PSKNP\VB991[^B^PPHLJWDESA_O97]]n;SGE[OOS\LX87_KC3:PPP3=U[]UBBo5\IFG[P@TIIE<0_D@HLD31?V_T\LGT_T@LHSMMKPU?3Zse|agiy29WKU2<\[_N46[\E^@VBB?<]ZOTNXHHS49UM@Q6j2RJ[RXJRRKLJ4b<P@FBBU#WDC"3*4&T\\H+<#?/ARAJM3=_lkUBh<>4Xeo\Idlhz_oydaa119[`hYJageyZh||inl6?^6=8T<0W=4>5\78_5<3U=1j~zt8;ean[ujk?2|n~db`gc9uawungg*:"=?=;ya6ri~699~&?kh>72:~DEv412JKt=4I:382V2>2>31;n4>33faa`<5;?39wc9;:09m30<13-=86:<4}R64>2?=?j0:??jmed8172533m=26=4>:0yP0<<013=h6<==dcgf>7511;1}X:>50;395?1|[=31;448c;306adbm388:4<4b7a94?6=;3=p(h48a:&2b?1e3-8;6:j4$3092g=#?8027o8?:182g?6=8r.==7;i;%d9f>"68320(<?56:&26?353-;86894$06912=#9<0956*>6;05?!702<80(<65629'66<2i2.8=7m4$2193>"6m3n0(>65d:&0f?4<,:3196*;8;4b?!2b2?1/>o4n;%0`>4=#>=087)8j:7d8 4d=>m1/=i49d:&2e?0>3-;h6;74$3390>o5l3:1(8k5779'27<1l21b??4?:%7f>20<,<i1:i5+6385`>=n:>0;6);j:648 34=>m10e>:50;&6a?113-?o6;j4$7092a=<a?<1<7*:e;55?!052?n07d:n:18'1`<002.>o78k;%7a>3b<3`>o6=4+5d843>"2k3<o7);m:7f8?l36290/9h486:&6g?0c32c?o7>5$4g933=#=j0=h65f4c83>!3b2><0(8m56e98m33=83.>i799;%7`>3b<,?81:i54o2794?"2m3==7)8=:7f8 4?=:=10c>m50;&6a?1132e8h7>5$4g933=<g:o1<7*:e;55?>i4n3:1(8k57798k16=83.>i799;:m1<?6=,<o1;;5+6385`>=h=<0;6);j:648 34=>m10c8>50;&6a?1132e?j7>5$4g932=#=k0=h65`4083>!3b2><07b:=:18'1`<0>21d8>4?:%7f>20<3f>?6=4+5d842>=h<<0;6);j:648?j21290/9h486:9l60<72-?n6:84;|`1b?6=:3:1<v*91;77?l34290/9h486:&56?0c32e>57>5$4g933=#>;0=h65rb2594?4=83:p(;?5259j16<72-?n6:84$7092a=<g<31<7*:e;55?!052?n07p}=e;296~;1838o70<i:4;8yv4f2909w08?:358936=:11v;950;3x936=>?1/:54:3:p73<72;q6:=4<5:?03?343ty>:7>51z?54?323-<36874}r13>5<6s48m68=4$7:916=z{:k1<7?t=2591<=#>10>56srn3`94?7|ug8h6=4>{|l1`?6=9rwe>h4?:0y~j7`=83;pqc=?:182xh493:1=vsr}|BCG~5>20;=;o6?3|BCF~6zHIZpqMN
Index: /trunk/Xilinx/dram_fifo.v
===================================================================
--- /trunk/Xilinx/dram_fifo.v	(revision 6)
+++ /trunk/Xilinx/dram_fifo.v	(revision 6)
@@ -0,0 +1,175 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used             *
+*     solely for design, simulation, implementation and creation of            *
+*     design files limited to Xilinx devices or technologies. Use              *
+*     with non-Xilinx devices or technologies is expressly prohibited          *
+*     and immediately terminates your license.                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"            *
+*     SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR                  *
+*     XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE, OR INFORMATION          *
+*     AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION              *
+*     OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS                *
+*     IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,                  *
+*     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE         *
+*     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY                 *
+*     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE                  *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS          *
+*     FOR A PARTICULAR PURPOSE.                                                *
+*                                                                              *
+*     Xilinx products are not intended for use in life support                 *
+*     appliances, devices, or systems. Use in such applications are            *
+*     expressly prohibited.                                                    *
+*                                                                              *
+*     (c) Copyright 1995-2009 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// The synthesis directives "translate_off/translate_on" specified below are
+// supported by Xilinx, Mentor Graphics and Synplicity synthesis
+// tools. Ensure they are correct for your synthesis tool(s).
+
+// You must compile the wrapper file dram_fifo.v when simulating
+// the core, dram_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
+`timescale 1ns/1ps
+
+module dram_fifo(
+	rst,
+	wr_clk,
+	rd_clk,
+	din,
+	wr_en,
+	rd_en,
+	dout,
+	full,
+	empty,
+	wr_data_count);
+
+
+input rst;
+input wr_clk;
+input rd_clk;
+input [103 : 0] din;
+input wr_en;
+input rd_en;
+output [103 : 0] dout;
+output full;
+output empty;
+output [7 : 0] wr_data_count;
+
+// synthesis translate_off
+
+      FIFO_GENERATOR_V6_2 #(
+		.C_COMMON_CLOCK(0),
+		.C_COUNT_TYPE(0),
+		.C_DATA_COUNT_WIDTH(10),
+		.C_DEFAULT_VALUE("BlankString"),
+		.C_DIN_WIDTH(104),
+		.C_DOUT_RST_VAL("0"),
+		.C_DOUT_WIDTH(104),
+		.C_ENABLE_RLOCS(0),
+		.C_ENABLE_RST_SYNC(1),
+		.C_ERROR_INJECTION_TYPE(0),
+		.C_FAMILY("virtex5"),
+		.C_FULL_FLAGS_RST_VAL(1),
+		.C_HAS_ALMOST_EMPTY(0),
+		.C_HAS_ALMOST_FULL(0),
+		.C_HAS_BACKUP(0),
+		.C_HAS_DATA_COUNT(0),
+		.C_HAS_INT_CLK(0),
+		.C_HAS_MEMINIT_FILE(0),
+		.C_HAS_OVERFLOW(0),
+		.C_HAS_RD_DATA_COUNT(0),
+		.C_HAS_RD_RST(0),
+		.C_HAS_RST(1),
+		.C_HAS_SRST(0),
+		.C_HAS_UNDERFLOW(0),
+		.C_HAS_VALID(0),
+		.C_HAS_WR_ACK(0),
+		.C_HAS_WR_DATA_COUNT(1),
+		.C_HAS_WR_RST(0),
+		.C_IMPLEMENTATION_TYPE(2),
+		.C_INIT_WR_PNTR_VAL(0),
+		.C_MEMORY_TYPE(1),
+		.C_MIF_FILE_NAME("BlankString"),
+		.C_MSGON_VAL(1),
+		.C_OPTIMIZATION_MODE(0),
+		.C_OVERFLOW_LOW(0),
+		.C_PRELOAD_LATENCY(1),
+		.C_PRELOAD_REGS(0),
+		.C_PRIM_FIFO_TYPE("1kx36"),
+		.C_PROG_EMPTY_THRESH_ASSERT_VAL(2),
+		.C_PROG_EMPTY_THRESH_NEGATE_VAL(3),
+		.C_PROG_EMPTY_TYPE(0),
+		.C_PROG_FULL_THRESH_ASSERT_VAL(1021),
+		.C_PROG_FULL_THRESH_NEGATE_VAL(1020),
+		.C_PROG_FULL_TYPE(0),
+		.C_RD_DATA_COUNT_WIDTH(10),
+		.C_RD_DEPTH(1024),
+		.C_RD_FREQ(1),
+		.C_RD_PNTR_WIDTH(10),
+		.C_UNDERFLOW_LOW(0),
+		.C_USE_DOUT_RST(1),
+		.C_USE_ECC(0),
+		.C_USE_EMBEDDED_REG(0),
+		.C_USE_FIFO16_FLAGS(0),
+		.C_USE_FWFT_DATA_COUNT(0),
+		.C_VALID_LOW(0),
+		.C_WR_ACK_LOW(0),
+		.C_WR_DATA_COUNT_WIDTH(8),
+		.C_WR_DEPTH(1024),
+		.C_WR_FREQ(1),
+		.C_WR_PNTR_WIDTH(10),
+		.C_WR_RESPONSE_LATENCY(1))
+	inst (
+		.RST(rst),
+		.WR_CLK(wr_clk),
+		.RD_CLK(rd_clk),
+		.DIN(din),
+		.WR_EN(wr_en),
+		.RD_EN(rd_en),
+		.DOUT(dout),
+		.FULL(full),
+		.EMPTY(empty),
+		.WR_DATA_COUNT(wr_data_count),
+		.BACKUP(),
+		.BACKUP_MARKER(),
+		.CLK(),
+		.SRST(),
+		.WR_RST(),
+		.RD_RST(),
+		.PROG_EMPTY_THRESH(),
+		.PROG_EMPTY_THRESH_ASSERT(),
+		.PROG_EMPTY_THRESH_NEGATE(),
+		.PROG_FULL_THRESH(),
+		.PROG_FULL_THRESH_ASSERT(),
+		.PROG_FULL_THRESH_NEGATE(),
+		.INT_CLK(),
+		.INJECTDBITERR(),
+		.INJECTSBITERR(),
+		.ALMOST_FULL(),
+		.WR_ACK(),
+		.OVERFLOW(),
+		.ALMOST_EMPTY(),
+		.VALID(),
+		.UNDERFLOW(),
+		.DATA_COUNT(),
+		.RD_DATA_COUNT(),
+		.PROG_FULL(),
+		.PROG_EMPTY(),
+		.SBITERR(),
+		.DBITERR());
+
+
+// synthesis translate_on
+
+// XST black box declaration
+// box_type "black_box"
+// synthesis attribute box_type of dram_fifo is "black_box"
+
+endmodule
+
Index: /trunk/Xilinx/dram.veo
===================================================================
--- /trunk/Xilinx/dram.veo	(revision 6)
+++ /trunk/Xilinx/dram.veo	(revision 6)
@@ -0,0 +1,194 @@
+//*****************************************************************************
+// DISCLAIMER OF LIABILITY
+//
+// This file contains proprietary and confidential information of
+// Xilinx, Inc. ("Xilinx"), that is distributed under a license
+// from Xilinx, and may be used, copied and/or disclosed only
+// pursuant to the terms of a valid license agreement with Xilinx.
+//
+// XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION
+// ("MATERIALS") "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+// EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING WITHOUT
+// LIMITATION, ANY WARRANTY WITH RESPECT TO NONINFRINGEMENT,
+// MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE. Xilinx
+// does not warrant that functions included in the Materials will
+// meet the requirements of Licensee, or that the operation of the
+// Materials will be uninterrupted or error-free, or that defects
+// in the Materials will be corrected. Furthermore, Xilinx does
+// not warrant or make any representations regarding use, or the
+// results of the use, of the Materials in terms of correctness,
+// accuracy, reliability or otherwise.
+//
+// Xilinx products are not designed or intended to be fail-safe,
+// or for use in any application requiring fail-safe performance,
+// such as life-support or safety devices or systems, Class III
+// medical devices, nuclear facilities, applications related to
+// the deployment of airbags, or any other applications that could
+// lead to death, personal injury or severe property or
+// environmental damage (individually and collectively, "critical
+// applications"). Customer assumes the sole risk and liability
+// of any use of Xilinx products in critical applications,
+// subject only to applicable laws and regulations governing
+// limitations on product liability.
+//
+// Copyright 2007, 2008 Xilinx, Inc.
+// All rights reserved.
+//
+// This disclaimer and copyright notice must be retained as part
+// of this file at all times.
+//*****************************************************************************
+//   ____  ____
+//  /   /\/   /
+// /___/  \  /   Vendor             : Xilinx
+// \   \   \/    Version            : 3.6
+//  \   \        Application        : MIG
+//  /   /        Filename           : dram.veo
+// /___/   /\    Date Last Modified : $Date: 2010/06/09 18:13:34 $
+// \   \  /  \   Date Created       : Wed May 2 2007
+//  \___\/\___\
+//
+// Purpose     : Template file containing code that can be used as a model
+//               for instantiating a CORE Generator module in a HDL design.
+// Revision History:
+//*****************************************************************************
+
+// The following must be inserted into your Verilog file for this
+// core to be instantiated. Change the instance name and port connections
+// (in parentheses) to your own signal names.
+
+//----------- Begin Cut here for INSTANTIATION Template ---// INST_TAG
+
+ dram # (
+    .BANK_WIDTH(2),   
+                                       // # of memory bank addr bits.
+    .CKE_WIDTH(1),   
+                                       // # of memory clock enable outputs.
+    .CLK_WIDTH(2),   
+                                       // # of clock outputs.
+    .COL_WIDTH(10),   
+                                       // # of memory column bits.
+    .CS_NUM(1),   
+                                       // # of separate memory chip selects.
+    .CS_WIDTH(1),   
+                                       // # of total memory chip selects.
+    .CS_BITS(0),   
+                                       // set to log2(CS_NUM) (rounded up).
+    .DM_WIDTH(8),   
+                                       // # of data mask bits.
+    .DQ_WIDTH(64),   
+                                       // # of data width.
+    .DQ_PER_DQS(8),   
+                                       // # of DQ data bits per strobe.
+    .DQS_WIDTH(8),   
+                                       // # of DQS strobes.
+    .DQ_BITS(6),   
+                                       // set to log2(DQS_WIDTH*DQ_PER_DQS).
+    .DQS_BITS(3),   
+                                       // set to log2(DQS_WIDTH).
+    .ODT_WIDTH(1),   
+                                       // # of memory on-die term enables.
+    .ROW_WIDTH(13),   
+                                       // # of memory row and # of addr bits.
+    .ADDITIVE_LAT(0),   
+                                       // additive write latency.
+    .BURST_LEN(4),   
+                                       // burst length (in double words).
+    .BURST_TYPE(0),   
+                                       // burst type (=0 seq; =1 interleaved).
+    .CAS_LAT(3),   
+                                       // CAS latency.
+    .ECC_ENABLE(0),   
+                                       // enable ECC (=1 enable).
+    .APPDATA_WIDTH(128),   
+                                       // # of usr read/write data bus bits.
+    .MULTI_BANK_EN(1),   
+                                       // Keeps multiple banks open. (= 1 enable).
+    .TWO_T_TIME_EN(1),   
+                                       // 2t timing for unbuffered dimms.
+    .ODT_TYPE(1),   
+                                       // ODT (=0(none),=1(75),=2(150),=3(50)).
+    .REDUCE_DRV(0),   
+                                       // reduced strength mem I/O (=1 yes).
+    .REG_ENABLE(0),   
+                                       // registered addr/ctrl (=1 yes).
+    .TREFI_NS(7800),   
+                                       // auto refresh interval (ns).
+    .TRAS(40000),   
+                                       // active->precharge delay.
+    .TRCD(15000),   
+                                       // active->read/write delay.
+    .TRFC(105000),   
+                                       // refresh->refresh, refresh->active delay.
+    .TRP(15000),   
+                                       // precharge->command delay.
+    .TRTP(7500),   
+                                       // read->precharge delay.
+    .TWR(15000),   
+                                       // used to determine write->precharge.
+    .TWTR(7500),   
+                                       // write->read delay.
+    .HIGH_PERFORMANCE_MODE("TRUE"),   
+                              // # = TRUE, the IODELAY performance mode is set
+                              // to high.
+                              // # = FALSE, the IODELAY performance mode is set
+                              // to low.
+    .SIM_ONLY(0),   
+                                       // = 1 to skip SDRAM power up delay.
+    .DEBUG_EN(0),   
+                                       // Enable debug signals/controls.
+                                       // When this parameter is changed from 0 to 1,
+                                       // make sure to uncomment the coregen commands
+                                       // in ise_flow.bat or create_ise.bat files in
+                                       // par folder.
+    .CLK_PERIOD(5000),   
+                                       // Core/Memory clock period (in ps).
+    .DLL_FREQ_MODE("HIGH"),   
+                                       // DCM Frequency range.
+    .CLK_TYPE("SINGLE_ENDED"),   
+                                       // # = "DIFFERENTIAL " ->; Differential input clocks ,
+                                       // # = "SINGLE_ENDED" -> Single ended input clocks.
+    .NOCLK200(0),   
+                                       // clk200 enable and disable.
+    .RST_ACT_LOW(1)     
+                                       // =1 for active low reset, =0 for active high.
+)
+u_dram (
+    .ddr2_dq                   (ddr2_dq),
+    .ddr2_a                    (ddr2_a),
+    .ddr2_ba                   (ddr2_ba),
+    .ddr2_ras_n                (ddr2_ras_n),
+    .ddr2_cas_n                (ddr2_cas_n),
+    .ddr2_we_n                 (ddr2_we_n),
+    .ddr2_cs_n                 (ddr2_cs_n),
+    .ddr2_odt                  (ddr2_odt),
+    .ddr2_cke                  (ddr2_cke),
+    .ddr2_dm                   (ddr2_dm),
+    .sys_clk                   (sys_clk),
+    .idly_clk_200              (idly_clk_200),
+    .sys_rst_n                 (sys_rst_n),
+    .phy_init_done             (phy_init_done),
+    .rst0_tb                   (rst0_tb),
+    .clk0_tb                   (clk0_tb),
+    .app_wdf_afull             (app_wdf_afull),
+    .app_af_afull              (app_af_afull),
+    .rd_data_valid             (rd_data_valid),
+    .app_wdf_wren              (app_wdf_wren),
+    .app_af_wren               (app_af_wren),
+    .app_af_addr               (app_af_addr),
+    .app_af_cmd                (app_af_cmd),
+    .rd_data_fifo_out          (rd_data_fifo_out),
+    .app_wdf_data              (app_wdf_data),
+    .app_wdf_mask_data         (app_wdf_mask_data),
+    .ddr2_dqs                  (ddr2_dqs),
+    .ddr2_dqs_n                (ddr2_dqs_n),
+    .ddr2_ck                   (ddr2_ck),
+    .ddr2_ck_n                 (ddr2_ck_n)
+);
+
+// INST_TAG_END ------ End INSTANTIATION Template ---------
+
+// You must compile the wrapper file dram.v when simulating
+// the core, dram. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
Index: /trunk/Xilinx/dram.xco
===================================================================
--- /trunk/Xilinx/dram.xco	(revision 6)
+++ /trunk/Xilinx/dram.xco	(revision 6)
@@ -0,0 +1,42 @@
+##############################################################
+#
+# Xilinx Core Generator version 12.3
+# Date: Mon Mar 14 23:37:43 2011
+#
+##############################################################
+#
+#  This file contains the customisation parameters for a
+#  Xilinx CORE Generator IP GUI. It is strongly recommended
+#  that you do not manually alter this file as it may cause
+#  unexpected and unsupported behavior.
+#
+##############################################################
+#
+# BEGIN Project Options
+SET addpads = false
+SET asysymbol = true
+SET busformat = BusFormatAngleBracketNotRipped
+SET createndf = false
+SET designentry = VHDL
+SET device = xc5vlx110t
+SET devicefamily = virtex5
+SET flowvendor = Foundation_ISE
+SET formalverification = false
+SET foundationsym = false
+SET implementationfiletype = Ngc
+SET package = ff1136
+SET removerpms = false
+SET simulationfiles = Behavioral
+SET speedgrade = -3
+SET verilogsim = true
+SET vhdlsim = true
+# END Project Options
+# BEGIN Select
+SELECT MIG family Xilinx,_Inc. 3.6
+# END Select
+# BEGIN Parameters
+CSET component_name=dram
+CSET xml_input_file=./dram/user_design/mig.prj
+# END Parameters
+GENERATE
+# CRC: f2eca964
Index: /trunk/Xilinx/pll.v
===================================================================
--- /trunk/Xilinx/pll.v	(revision 6)
+++ /trunk/Xilinx/pll.v	(revision 6)
@@ -0,0 +1,82 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (c) 1995-2010 Xilinx, Inc.  All rights reserved.
+////////////////////////////////////////////////////////////////////////////////
+//   ____  ____ 
+//  /   /\/   / 
+// /___/  \  /    Vendor: Xilinx 
+// \   \   \/     Version : 12.3
+//  \   \         Application : xaw2verilog
+//  /   /         Filename : pll.v
+// /___/   /\     Timestamp : 03/18/2011 15:52:04
+// \   \  /  \ 
+//  \___\/\___\ 
+//
+//Command: xaw2verilog -st /home/sal/Desktop/sparc64soc/xup5lx110t/ipcore_dir/./pll.xaw /home/sal/Desktop/sparc64soc/xup5lx110t/ipcore_dir/./pll
+//Design Name: pll
+//Device: xc5vlx110t-3ff1738
+//
+// Module pll
+// Generated by Xilinx Architecture Wizard
+// Written for synthesis tool: Synplify
+// For block PLL_ADV_INST, Estimated PLL Jitter for CLKOUT0 = 0.153 ns
+`timescale 1ns / 1ps
+
+module pll(CLKIN1_IN, 
+           RST_IN, 
+           CLKOUT0_OUT, 
+           LOCKED_OUT);
+
+    input CLKIN1_IN;
+    input RST_IN;
+   output CLKOUT0_OUT;
+   output LOCKED_OUT;
+   
+   wire CLKFBOUT_CLKFBIN;
+   wire CLKIN1_IBUFG;
+   wire CLKOUT0_BUF;
+   wire GND_BIT;
+   wire [4:0] GND_BUS_5;
+   wire [15:0] GND_BUS_16;
+   wire VCC_BIT;
+   
+   assign GND_BIT = 0;
+   assign GND_BUS_5 = 5'b00000;
+   assign GND_BUS_16 = 16'b0000000000000000;
+   assign VCC_BIT = 1;
+   IBUFG  CLKIN1_IBUFG_INST (.I(CLKIN1_IN), 
+                            .O(CLKIN1_IBUFG));
+   BUFG  CLKOUT0_BUFG_INST (.I(CLKOUT0_BUF), 
+                           .O(CLKOUT0_OUT));
+   PLL_ADV #( .BANDWIDTH("OPTIMIZED"), .CLKIN1_PERIOD(5.000), 
+         .CLKIN2_PERIOD(10.000), .CLKOUT0_DIVIDE(8), .CLKOUT0_PHASE(0.000), 
+         .CLKOUT0_DUTY_CYCLE(0.500), .COMPENSATION("SYSTEM_SYNCHRONOUS"), 
+         .DIVCLK_DIVIDE(1), .CLKFBOUT_MULT(2), .CLKFBOUT_PHASE(0.0), 
+         .REF_JITTER(0.005000) ) PLL_ADV_INST (.CLKFBIN(CLKFBOUT_CLKFBIN), 
+                         .CLKINSEL(VCC_BIT), 
+                         .CLKIN1(CLKIN1_IBUFG), 
+                         .CLKIN2(GND_BIT), 
+                         .DADDR(GND_BUS_5[4:0]), 
+                         .DCLK(GND_BIT), 
+                         .DEN(GND_BIT), 
+                         .DI(GND_BUS_16[15:0]), 
+                         .DWE(GND_BIT), 
+                         .REL(GND_BIT), 
+                         .RST(RST_IN), 
+                         .CLKFBDCM(), 
+                         .CLKFBOUT(CLKFBOUT_CLKFBIN), 
+                         .CLKOUTDCM0(), 
+                         .CLKOUTDCM1(), 
+                         .CLKOUTDCM2(), 
+                         .CLKOUTDCM3(), 
+                         .CLKOUTDCM4(), 
+                         .CLKOUTDCM5(), 
+                         .CLKOUT0(CLKOUT0_BUF), 
+                         .CLKOUT1(), 
+                         .CLKOUT2(), 
+                         .CLKOUT3(), 
+                         .CLKOUT4(), 
+                         .CLKOUT5(), 
+                         .DO(), 
+                         .DRDY(), 
+                         .LOCKED(LOCKED_OUT));
+endmodule
Index: /trunk/Xilinx/dram_fifo.veo
===================================================================
--- /trunk/Xilinx/dram_fifo.veo	(revision 6)
+++ /trunk/Xilinx/dram_fifo.veo	(revision 6)
@@ -0,0 +1,52 @@
+/*******************************************************************************
+*     This file is owned and controlled by Xilinx and must be used             *
+*     solely for design, simulation, implementation and creation of            *
+*     design files limited to Xilinx devices or technologies. Use              *
+*     with non-Xilinx devices or technologies is expressly prohibited          *
+*     and immediately terminates your license.                                 *
+*                                                                              *
+*     XILINX IS PROVIDING THIS DESIGN, CODE, OR INFORMATION "AS IS"            *
+*     SOLELY FOR USE IN DEVELOPING PROGRAMS AND SOLUTIONS FOR                  *
+*     XILINX DEVICES.  BY PROVIDING THIS DESIGN, CODE, OR INFORMATION          *
+*     AS ONE POSSIBLE IMPLEMENTATION OF THIS FEATURE, APPLICATION              *
+*     OR STANDARD, XILINX IS MAKING NO REPRESENTATION THAT THIS                *
+*     IMPLEMENTATION IS FREE FROM ANY CLAIMS OF INFRINGEMENT,                  *
+*     AND YOU ARE RESPONSIBLE FOR OBTAINING ANY RIGHTS YOU MAY REQUIRE         *
+*     FOR YOUR IMPLEMENTATION.  XILINX EXPRESSLY DISCLAIMS ANY                 *
+*     WARRANTY WHATSOEVER WITH RESPECT TO THE ADEQUACY OF THE                  *
+*     IMPLEMENTATION, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OR           *
+*     REPRESENTATIONS THAT THIS IMPLEMENTATION IS FREE FROM CLAIMS OF          *
+*     INFRINGEMENT, IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS          *
+*     FOR A PARTICULAR PURPOSE.                                                *
+*                                                                              *
+*     Xilinx products are not intended for use in life support                 *
+*     appliances, devices, or systems. Use in such applications are            *
+*     expressly prohibited.                                                    *
+*                                                                              *
+*     (c) Copyright 1995-2009 Xilinx, Inc.                                     *
+*     All rights reserved.                                                     *
+*******************************************************************************/
+// The following must be inserted into your Verilog file for this
+// core to be instantiated. Change the instance name and port connections
+// (in parentheses) to your own signal names.
+
+//----------- Begin Cut here for INSTANTIATION Template ---// INST_TAG
+dram_fifo YourInstanceName (
+	.rst(rst),
+	.wr_clk(wr_clk),
+	.rd_clk(rd_clk),
+	.din(din), // Bus [103 : 0] 
+	.wr_en(wr_en),
+	.rd_en(rd_en),
+	.dout(dout), // Bus [103 : 0] 
+	.full(full),
+	.empty(empty),
+	.wr_data_count(wr_data_count)); // Bus [7 : 0] 
+
+// INST_TAG_END ------ End INSTANTIATION Template ---------
+
+// You must compile the wrapper file dram_fifo.v when simulating
+// the core, dram_fifo. When compiling the wrapper file, be sure to
+// reference the XilinxCoreLib Verilog simulation library. For detailed
+// instructions, please refer to the "CORE Generator Help".
+
Index: /trunk/Xilinx/dram_fifo.xco
===================================================================
--- /trunk/Xilinx/dram_fifo.xco	(revision 6)
+++ /trunk/Xilinx/dram_fifo.xco	(revision 6)
@@ -0,0 +1,58 @@
+##############################################################
+##############################################################
+##############################################################
+SET designentry = Verilog
+SET BusFormat = BusFormatAngleBracketNotRipped
+SET devicefamily = virtex5
+SET device = xc5vlx110t
+SET package = ff1136
+SET speedgrade = -3
+SET FlowVendor = Foundation_ISE
+SET VerilogSim = True
+SET VHDLSim = True
+SELECT Fifo_Generator family Xilinx,_Inc. 6.2
+CSET almost_empty_flag=false
+CSET almost_full_flag=false
+CSET component_name=dram_fifo
+CSET data_count=false
+CSET data_count_width=10
+CSET disable_timing_violations=false
+CSET dout_reset_value=0
+CSET empty_threshold_assert_value=2
+CSET empty_threshold_negate_value=3
+CSET enable_ecc=false
+CSET enable_int_clk=false
+CSET enable_reset_synchronization=true
+CSET fifo_implementation=Independent_Clocks_Block_RAM
+CSET full_flags_reset_value=1
+CSET full_threshold_assert_value=1021
+CSET full_threshold_negate_value=1020
+CSET inject_dbit_error=false
+CSET inject_sbit_error=false
+CSET input_data_width=104
+CSET input_depth=1024
+CSET output_data_width=104
+CSET output_depth=1024
+CSET overflow_flag=false
+CSET overflow_sense=Active_High
+CSET performance_options=Standard_FIFO
+CSET programmable_empty_type=No_Programmable_Empty_Threshold
+CSET programmable_full_type=No_Programmable_Full_Threshold
+CSET read_clock_frequency=1
+CSET read_data_count=false
+CSET read_data_count_width=10
+CSET reset_pin=true
+CSET reset_type=Asynchronous_Reset
+CSET underflow_flag=false
+CSET underflow_sense=Active_High
+CSET use_dout_reset=true
+CSET use_embedded_registers=false
+CSET use_extra_logic=false
+CSET valid_flag=false
+CSET valid_sense=Active_High
+CSET write_acknowledge_flag=false
+CSET write_acknowledge_sense=Active_High
+CSET write_clock_frequency=1
+CSET write_data_count=true
+CSET write_data_count_width=8
+GENERATE
Index: /trunk/T1-FPU/fpu_cnt_lead0_53b.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_53b.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_53b.v	(revision 6)
@@ -0,0 +1,438 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_53b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	53 bit lead 0 counter.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_53b (
+	din,
+
+	lead0
+);
+
+
+input [52:0]	din;			// data in- count its leading 0's
+
+output [5:0]	lead0;			// number of leading 0's in data in
+
+
+wire		din_52_49_eq_0;
+wire		din_52_51_eq_0;
+wire		lead0_52_49_0;
+wire		din_48_45_eq_0;
+wire		din_48_47_eq_0;
+wire		lead0_48_45_0;
+wire		din_44_41_eq_0;
+wire		din_44_43_eq_0;
+wire		lead0_44_41_0;
+wire		din_40_37_eq_0;
+wire		din_40_39_eq_0;
+wire		lead0_40_37_0;
+wire		din_36_33_eq_0;
+wire		din_36_35_eq_0;
+wire		lead0_36_33_0;
+wire		din_32_29_eq_0;
+wire		din_32_31_eq_0;
+wire		lead0_32_29_0;
+wire		din_28_25_eq_0;
+wire		din_28_27_eq_0;
+wire		lead0_28_25_0;
+wire		din_24_21_eq_0;
+wire		din_24_23_eq_0;
+wire		lead0_24_21_0;
+wire		din_20_17_eq_0;
+wire		din_20_19_eq_0;
+wire		lead0_20_17_0;
+wire		din_16_13_eq_0;
+wire		din_16_15_eq_0;
+wire		lead0_16_13_0;
+wire		din_12_9_eq_0;
+wire		din_12_11_eq_0;
+wire		lead0_12_9_0;
+wire		din_8_5_eq_0;
+wire		din_8_7_eq_0;
+wire		lead0_8_5_0;
+wire		din_4_1_eq_0;
+wire		din_4_3_eq_0;
+wire		lead0_4_1_0;
+wire		lead0_0_0;
+wire		din_52_45_eq_0;
+wire		lead0_52_45_1;
+wire		lead0_52_45_0;
+wire		din_44_37_eq_0;
+wire		lead0_44_37_1;
+wire		lead0_44_37_0;
+wire		din_36_29_eq_0;
+wire		lead0_36_29_1;
+wire		lead0_36_29_0;
+wire		din_28_21_eq_0;
+wire		lead0_28_21_1;
+wire		lead0_28_21_0;
+wire		din_20_13_eq_0;
+wire		lead0_20_13_1;
+wire		lead0_20_13_0;
+wire		din_12_5_eq_0;
+wire		lead0_12_5_1;
+wire		lead0_12_5_0;
+wire		lead0_4_0_1;
+wire		lead0_4_0_0;
+wire		din_52_37_eq_0;
+wire		lead0_52_37_2;
+wire		lead0_52_37_1;
+wire		lead0_52_37_0;
+wire		din_36_21_eq_0;
+wire		lead0_36_21_2;
+wire		lead0_36_21_1;
+wire		lead0_36_21_0;
+wire		din_20_5_eq_0;
+wire		lead0_20_5_2;
+wire		lead0_20_5_1;
+wire		lead0_20_5_0;
+wire		lead0_4_0_2;
+wire		din_52_21_eq_0;
+wire		lead0_52_21_3;
+wire		lead0_52_21_2;
+wire		lead0_52_21_1;
+wire		lead0_52_21_0;
+wire		lead0_20_0_3;
+wire		lead0_20_0_2;
+wire		lead0_20_0_1;
+wire		lead0_20_0_0;
+wire		lead0_5;
+wire		lead0_4;
+wire		lead0_3;
+wire		lead0_2;
+wire		lead0_1;
+wire		lead0_0;
+wire [5:0]	lead0;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Instantiations of lead 0 building blocks.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_52_49 (
+	.din			(din[52:49]),
+
+	.din_3_0_eq_0		(din_52_49_eq_0),
+	.din_3_2_eq_0		(din_52_51_eq_0),
+	.lead0_4b_0		(lead0_52_49_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_48_45 (
+        .din                    (din[48:45]),
+
+        .din_3_0_eq_0           (din_48_45_eq_0),
+        .din_3_2_eq_0           (din_48_47_eq_0),
+        .lead0_4b_0             (lead0_48_45_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_44_41 (
+        .din                    (din[44:41]),
+
+        .din_3_0_eq_0           (din_44_41_eq_0),
+        .din_3_2_eq_0           (din_44_43_eq_0),
+        .lead0_4b_0             (lead0_44_41_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_40_37 (
+        .din                    (din[40:37]),
+
+        .din_3_0_eq_0           (din_40_37_eq_0),
+        .din_3_2_eq_0           (din_40_39_eq_0),
+        .lead0_4b_0             (lead0_40_37_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_36_33 (
+        .din                    (din[36:33]),
+
+        .din_3_0_eq_0           (din_36_33_eq_0),
+        .din_3_2_eq_0           (din_36_35_eq_0),
+        .lead0_4b_0             (lead0_36_33_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_32_29 (
+        .din                    (din[32:29]),
+
+        .din_3_0_eq_0           (din_32_29_eq_0),
+        .din_3_2_eq_0           (din_32_31_eq_0),
+        .lead0_4b_0             (lead0_32_29_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_28_25 (
+        .din                    (din[28:25]),
+
+        .din_3_0_eq_0           (din_28_25_eq_0),
+        .din_3_2_eq_0           (din_28_27_eq_0),
+        .lead0_4b_0             (lead0_28_25_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_24_21 (
+        .din                    (din[24:21]),
+
+        .din_3_0_eq_0           (din_24_21_eq_0),
+        .din_3_2_eq_0           (din_24_23_eq_0),
+        .lead0_4b_0             (lead0_24_21_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_20_17 (
+        .din                    (din[20:17]),
+
+        .din_3_0_eq_0           (din_20_17_eq_0),
+        .din_3_2_eq_0           (din_20_19_eq_0),
+        .lead0_4b_0             (lead0_20_17_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_16_13 (
+        .din                    (din[16:13]),
+
+        .din_3_0_eq_0           (din_16_13_eq_0),
+        .din_3_2_eq_0           (din_16_15_eq_0),
+        .lead0_4b_0             (lead0_16_13_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_12_9 (
+        .din                    (din[12:9]),
+
+        .din_3_0_eq_0           (din_12_9_eq_0),
+        .din_3_2_eq_0           (din_12_11_eq_0),
+        .lead0_4b_0             (lead0_12_9_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_8_5 (
+        .din                    (din[8:5]),
+
+        .din_3_0_eq_0           (din_8_5_eq_0),
+        .din_3_2_eq_0           (din_8_7_eq_0),
+        .lead0_4b_0             (lead0_8_5_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_4_1 (
+        .din                    (din[4:1]),
+
+        .din_3_0_eq_0           (din_4_1_eq_0),
+        .din_3_2_eq_0           (din_4_3_eq_0),
+        .lead0_4b_0             (lead0_4_1_0)
+);
+
+assign lead0_0_0= (!din[0]);
+
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_52_45 (
+	.din_7_4_eq_0		(din_52_49_eq_0),
+	.din_7_6_eq_0		(din_52_51_eq_0),
+	.lead0_4b_0_hi		(lead0_52_49_0),
+	.din_3_0_eq_0		(din_48_45_eq_0),
+	.din_3_2_eq_0		(din_48_47_eq_0),
+	.lead0_4b_0_lo		(lead0_48_45_0),
+
+	.din_7_0_eq_0		(din_52_45_eq_0),
+	.lead0_8b_1		(lead0_52_45_1),
+	.lead0_8b_0		(lead0_52_45_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_44_37 (
+        .din_7_4_eq_0           (din_44_41_eq_0),
+        .din_7_6_eq_0           (din_44_43_eq_0),
+        .lead0_4b_0_hi          (lead0_44_41_0),
+        .din_3_0_eq_0           (din_40_37_eq_0),
+        .din_3_2_eq_0           (din_40_39_eq_0),
+        .lead0_4b_0_lo          (lead0_40_37_0),
+
+        .din_7_0_eq_0           (din_44_37_eq_0),
+        .lead0_8b_1             (lead0_44_37_1),
+        .lead0_8b_0             (lead0_44_37_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_36_29 (
+        .din_7_4_eq_0           (din_36_33_eq_0),
+        .din_7_6_eq_0           (din_36_35_eq_0),
+        .lead0_4b_0_hi          (lead0_36_33_0),
+        .din_3_0_eq_0           (din_32_29_eq_0),
+        .din_3_2_eq_0           (din_32_31_eq_0),
+        .lead0_4b_0_lo          (lead0_32_29_0),
+
+        .din_7_0_eq_0           (din_36_29_eq_0),
+        .lead0_8b_1             (lead0_36_29_1),
+        .lead0_8b_0             (lead0_36_29_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_28_21 (
+        .din_7_4_eq_0           (din_28_25_eq_0),
+        .din_7_6_eq_0           (din_28_27_eq_0),
+        .lead0_4b_0_hi          (lead0_28_25_0),
+        .din_3_0_eq_0           (din_24_21_eq_0),
+        .din_3_2_eq_0           (din_24_23_eq_0),
+        .lead0_4b_0_lo          (lead0_24_21_0),
+
+        .din_7_0_eq_0           (din_28_21_eq_0),
+        .lead0_8b_1             (lead0_28_21_1),
+        .lead0_8b_0             (lead0_28_21_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_20_13 (
+        .din_7_4_eq_0           (din_20_17_eq_0),
+        .din_7_6_eq_0           (din_20_19_eq_0),
+        .lead0_4b_0_hi          (lead0_20_17_0),
+        .din_3_0_eq_0           (din_16_13_eq_0),
+        .din_3_2_eq_0           (din_16_15_eq_0),
+        .lead0_4b_0_lo          (lead0_16_13_0),
+
+        .din_7_0_eq_0           (din_20_13_eq_0),
+        .lead0_8b_1             (lead0_20_13_1),
+        .lead0_8b_0             (lead0_20_13_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_12_5 (
+        .din_7_4_eq_0           (din_12_9_eq_0),
+        .din_7_6_eq_0           (din_12_11_eq_0),
+        .lead0_4b_0_hi          (lead0_12_9_0),
+        .din_3_0_eq_0           (din_8_5_eq_0),
+        .din_3_2_eq_0           (din_8_7_eq_0),
+        .lead0_4b_0_lo          (lead0_8_5_0),
+
+        .din_7_0_eq_0           (din_12_5_eq_0),
+        .lead0_8b_1             (lead0_12_5_1),
+        .lead0_8b_0             (lead0_12_5_0)
+);
+
+assign lead0_4_0_1= (!din_4_1_eq_0) && din_4_3_eq_0;
+
+assign lead0_4_0_0= ((!din_4_1_eq_0) && lead0_4_1_0)
+		|| (din_4_1_eq_0 && lead0_0_0);
+
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_52_37 (
+	.din_15_8_eq_0		(din_52_45_eq_0),
+	.din_15_12_eq_0		(din_52_49_eq_0),
+	.lead0_8b_1_hi		(lead0_52_45_1),
+	.lead0_8b_0_hi		(lead0_52_45_0),
+	.din_7_0_eq_0		(din_44_37_eq_0),
+	.din_7_4_eq_0		(din_44_41_eq_0),
+	.lead0_8b_1_lo		(lead0_44_37_1),
+	.lead0_8b_0_lo		(lead0_44_37_0),
+
+	.din_15_0_eq_0		(din_52_37_eq_0),
+	.lead0_16b_2		(lead0_52_37_2),
+	.lead0_16b_1		(lead0_52_37_1),
+	.lead0_16b_0		(lead0_52_37_0)
+);
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_36_21 (
+        .din_15_8_eq_0          (din_36_29_eq_0),
+        .din_15_12_eq_0         (din_36_33_eq_0),           
+        .lead0_8b_1_hi          (lead0_36_29_1),
+        .lead0_8b_0_hi          (lead0_36_29_0),
+        .din_7_0_eq_0           (din_28_21_eq_0),
+        .din_7_4_eq_0           (din_28_25_eq_0),
+        .lead0_8b_1_lo          (lead0_28_21_1),
+        .lead0_8b_0_lo          (lead0_28_21_0),
+
+        .din_15_0_eq_0          (din_36_21_eq_0),
+        .lead0_16b_2            (lead0_36_21_2),
+        .lead0_16b_1            (lead0_36_21_1),
+        .lead0_16b_0            (lead0_36_21_0)
+);
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_20_5 (
+        .din_15_8_eq_0          (din_20_13_eq_0),
+        .din_15_12_eq_0         (din_20_17_eq_0),           
+        .lead0_8b_1_hi          (lead0_20_13_1),
+        .lead0_8b_0_hi          (lead0_20_13_0),
+        .din_7_0_eq_0           (din_12_5_eq_0),
+        .din_7_4_eq_0           (din_12_9_eq_0),
+        .lead0_8b_1_lo          (lead0_12_5_1),
+        .lead0_8b_0_lo          (lead0_12_5_0),
+
+        .din_15_0_eq_0          (din_20_5_eq_0),
+        .lead0_16b_2            (lead0_20_5_2),
+        .lead0_16b_1            (lead0_20_5_1),
+        .lead0_16b_0            (lead0_20_5_0)
+);
+
+assign lead0_4_0_2= din_4_1_eq_0;
+
+
+fpu_cnt_lead0_lvl4 i_fpu_cnt_lead0_lvl4_52_21 (
+	.din_31_16_eq_0		(din_52_37_eq_0),
+	.din_31_24_eq_0		(din_52_45_eq_0),
+	.lead0_16b_2_hi		(lead0_52_37_2),
+	.lead0_16b_1_hi		(lead0_52_37_1),
+	.lead0_16b_0_hi		(lead0_52_37_0),
+	.din_15_0_eq_0		(din_36_21_eq_0),
+	.din_15_8_eq_0		(din_36_29_eq_0),
+	.lead0_16b_2_lo		(lead0_36_21_2),
+	.lead0_16b_1_lo		(lead0_36_21_1),
+	.lead0_16b_0_lo		(lead0_36_21_0),
+
+	.din_31_0_eq_0		(din_52_21_eq_0),
+	.lead0_32b_3		(lead0_52_21_3),
+	.lead0_32b_2		(lead0_52_21_2),
+	.lead0_32b_1		(lead0_52_21_1),
+	.lead0_32b_0		(lead0_52_21_0)
+);
+
+fpu_cnt_lead0_lvl4 i_fpu_cnt_lead0_lvl4_20_0 (
+        .din_31_16_eq_0         (din_20_5_eq_0),
+        .din_31_24_eq_0         (din_20_13_eq_0),
+        .lead0_16b_2_hi         (lead0_20_5_2),
+        .lead0_16b_1_hi         (lead0_20_5_1),
+        .lead0_16b_0_hi         (lead0_20_5_0),
+        .din_15_0_eq_0          (1'b0),     
+        .din_15_8_eq_0          (1'b0),
+        .lead0_16b_2_lo         (lead0_4_0_2),
+        .lead0_16b_1_lo         (lead0_4_0_1),
+        .lead0_16b_0_lo         (lead0_4_0_0),
+
+        .din_31_0_eq_0          (            ),
+        .lead0_32b_3            (lead0_20_0_3),
+        .lead0_32b_2            (lead0_20_0_2),
+        .lead0_32b_1            (lead0_20_0_1),
+        .lead0_32b_0            (lead0_20_0_0)
+);
+
+assign lead0_5= din_52_21_eq_0;
+
+assign lead0_4= ((!din_52_21_eq_0) && din_52_37_eq_0)
+		|| (din_52_21_eq_0 && din_20_5_eq_0);
+
+assign lead0_3= ((!din_52_21_eq_0) && lead0_52_21_3)
+		|| (din_52_21_eq_0 && lead0_20_0_3);
+
+assign lead0_2= ((!din_52_21_eq_0) && lead0_52_21_2)
+		|| (din_52_21_eq_0 && lead0_20_0_2);
+
+assign lead0_1= ((!din_52_21_eq_0) && lead0_52_21_1)
+		|| (din_52_21_eq_0 && lead0_20_0_1);
+
+assign lead0_0= ((!din_52_21_eq_0) && lead0_52_21_0)
+		|| (din_52_21_eq_0 && lead0_20_0_0);
+
+assign lead0[5:0]= {lead0_5, lead0_4, lead0_3, lead0_2, lead0_1, lead0_0};
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_add_frac_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_add_frac_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_add_frac_dp.v	(revision 6)
@@ -0,0 +1,1040 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_add_frac_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipeline fraction datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_add_frac_dp (
+	inq_in1,
+	inq_in2,
+	a1stg_step,
+	a1stg_sngop,
+	a1stg_expadd3_11,
+	a1stg_norm_dbl_in1,
+	a1stg_denorm_dbl_in1,
+	a1stg_norm_sng_in1,
+	a1stg_denorm_sng_in1,
+	a1stg_norm_dbl_in2,
+	a1stg_denorm_dbl_in2,
+	a1stg_norm_sng_in2,
+	a1stg_denorm_sng_in2,
+	a1stg_intlngop,
+	a2stg_frac1_in_frac1,
+	a2stg_frac1_in_frac2,
+	a1stg_2nan_in_inv,
+	a1stg_faddsubop_inv,
+	a2stg_frac1_in_qnan,
+	a2stg_frac1_in_nv,
+	a2stg_frac1_in_nv_dbl,
+	a6stg_step,
+	a2stg_frac2_in_frac1,
+	a2stg_frac2_in_qnan,
+	a2stg_shr_cnt_in,
+	a2stg_shr_cnt_5_inv_in,
+	a2stg_shr_frac2_shr_int,
+	a2stg_shr_frac2_shr_dbl,
+	a2stg_shr_frac2_shr_sng,
+	a2stg_shr_frac2_max,
+	a2stg_expadd_11,
+	a2stg_sub_step,
+	a2stg_fracadd_frac2_inv_in,
+	a2stg_fracadd_frac2_inv_shr1_in,
+	a2stg_fracadd_frac2,
+	a2stg_fracadd_cin_in,
+	a2stg_exp,
+	a2stg_expdec_neq_0,
+	a3stg_faddsubopa,
+	a3stg_sub_in,
+	a3stg_exp10_0_eq0,
+	a3stg_exp10_1_eq0,
+	a3stg_exp_0,
+	a4stg_rnd_frac_add_inv,
+	a3stg_fdtos_inv,
+	a4stg_fixtos_fxtod_inv,
+	a4stg_rnd_sng,
+	a4stg_rnd_dbl,
+	a4stg_shl_cnt_in,
+	add_frac_out_rndadd,
+	add_frac_out_rnd_frac,
+	a4stg_in_of,
+	add_frac_out_shl,
+	a4stg_to_0,
+	fadd_clken_l,
+	rclk,
+	
+	a1stg_in2_neq_in1_frac,
+	a1stg_in2_gt_in1_frac,
+	a1stg_in2_eq_in1_exp,
+	a2stg_frac2_63,
+	a2stg_frac2hi_neq_0,
+	a2stg_frac2lo_neq_0,
+	a3stg_fsdtoix_nx,
+	a3stg_fsdtoi_nx,
+	a3stg_denorm,
+	a3stg_denorm_inv,
+	a3stg_lead0,
+	a4stg_round,
+	a4stg_shl_cnt,
+	a4stg_denorm_inv,
+	a3stg_inc_exp_inv,
+	a3stg_same_exp_inv,
+	a3stg_dec_exp_inv,
+	a4stg_rnd_frac_40,
+	a4stg_rnd_frac_39,
+	a4stg_rnd_frac_11,
+	a4stg_rnd_frac_10,
+	a4stg_rndadd_cout,
+	a4stg_frac_9_0_nx,
+	a4stg_frac_dbl_nx,
+	a4stg_frac_38_0_nx,
+	a4stg_frac_sng_nx,
+	a4stg_frac_neq_0,
+	a4stg_shl_data_neq_0,
+	add_of_out_cout,
+	add_frac_out,
+
+	se,
+        si,
+        so
+);
+
+
+input [62:0]	inq_in1;		// request operand 1 to op pipes
+input [63:0]	inq_in2;		// request operand 2 to op pipes
+input		a1stg_step;		// add pipe load
+input		a1stg_sngop;		// single precision operation- add 1 stg
+input		a1stg_expadd3_11;	// exponent adder sign out- add 1 stg
+input		a1stg_norm_dbl_in1;	// select line to normalized fraction 1
+input		a1stg_denorm_dbl_in1;	// select line to normalized fraction 1
+input		a1stg_norm_sng_in1;	// select line to normalized fraction 1
+input		a1stg_denorm_sng_in1;	// select line to normalized fraction 1
+input		a1stg_norm_dbl_in2;	// select line to normalized fraction 2
+input		a1stg_denorm_dbl_in2;	// select line to normalized fraction 2
+input		a1stg_norm_sng_in2;	// select line to normalized fraction 2
+input		a1stg_denorm_sng_in2;	// select line to normalized fraction 2
+input		a1stg_intlngop;		// integer/long input- add 1 stage
+input		a2stg_frac1_in_frac1;	// select line to a2stg_frac1
+input		a2stg_frac1_in_frac2;	// select line to a2stg_frac1
+input		a1stg_2nan_in_inv;	// 2 NaN inputs- a1 stage
+input		a1stg_faddsubop_inv;	// add/subtract- a1 stage
+input		a2stg_frac1_in_qnan;	// make fraction 1 a QNaN
+input		a2stg_frac1_in_nv;	// NV- make a new prec QNaN
+input		a2stg_frac1_in_nv_dbl;	// NV- make a new double prec QNaN
+input		a6stg_step;		// advance the add pipe
+input		a2stg_frac2_in_frac1;	// select line to a2stg_frac2
+input		a2stg_frac2_in_qnan;	// make fraction 2 a QNaN
+input [5:0]	a2stg_shr_cnt_in;	// right shift count input- add 1 stage
+input		a2stg_shr_cnt_5_inv_in;	// right shift count input[5]- add 1 stg
+input		a2stg_shr_frac2_shr_int; // select line to a3stg_frac2
+input		a2stg_shr_frac2_shr_dbl; // select line to a3stg_frac2
+input		a2stg_shr_frac2_shr_sng; // select line to a3stg_frac2
+input		a2stg_shr_frac2_max;	// select line to a3stg_frac2
+input		a2stg_expadd_11;	// exponent adder[11]- add 2 stage
+input		a2stg_sub_step;		// select line to a3stg_frac2
+input		a2stg_fracadd_frac2_inv_in; // sel line to main adder input 2
+input		a2stg_fracadd_frac2_inv_shr1_in; // sel line to main adder in 2
+input		a2stg_fracadd_frac2;	// select line to main adder input 2
+input		a2stg_fracadd_cin_in;	// carry in to main adder- add 1 stage
+input [5:0]	a2stg_exp;		// exponent add 2 stage bits[5:0]
+input		a2stg_expdec_neq_0;	// exponent will be < 54
+input [1:0]	a3stg_faddsubopa;	// denorm compare lead0[10] input select
+input		a3stg_sub_in;		// subtract in main adder- add 3 stage
+input		a3stg_exp10_0_eq0;	// exponent[10:0]==0- add 3 stg
+input		a3stg_exp10_1_eq0;	// exponent[10:1]==0- add 3 stg
+input		a3stg_exp_0;		// exponent[0]- add 3 stg
+input		a4stg_rnd_frac_add_inv;	// select line to a4stg_rnd_frac
+input		a3stg_fdtos_inv;	// double to single convert- add 3 stg
+input		a4stg_fixtos_fxtod_inv;	// int to single/double cvt- add 4 stg
+input		a4stg_rnd_sng;		// round to single precision- add 4 stg
+input		a4stg_rnd_dbl;		// round to double precision- add 4 stg
+input [9:0]	a4stg_shl_cnt_in;	// postnorm shift left count- add 3 stg
+input		add_frac_out_rndadd;	// select line to add_frac_out
+input		add_frac_out_rnd_frac;	// select line to add_frac_out
+input		a4stg_in_of;		// add overflow- select fraction out
+input		add_frac_out_shl;	// select line to add_frac_out
+input		a4stg_to_0;		// result to max finite on overflow
+input		fadd_clken_l;           // add pipe clk enable - asserted low
+input		rclk;		// global clock
+
+output		a1stg_in2_neq_in1_frac;	// operand 2 fraction != oprnd 1 frac
+output		a1stg_in2_gt_in1_frac;	// operand 2 fraction > oprnd 1 frac
+output		a1stg_in2_eq_in1_exp;	// operand 2 exponent == oprnd 1 exp
+output		a2stg_frac2_63;		// fraction 2 bit[63]- add 2 stage
+output		a2stg_frac2hi_neq_0;	// fraction 2[62:32]in add 2 stage != 0
+output		a2stg_frac2lo_neq_0;	// fraction 2[31:11] in add 2 stage != 0
+output		a3stg_fsdtoix_nx;	// inexact result for flt -> ints
+output		a3stg_fsdtoi_nx;	// inexact result for flt -> 32b ints
+output		a3stg_denorm;		// denorm output- add 3 stage
+output		a3stg_denorm_inv;	// result is not a denorm- add 3 stage
+output [5:0]	a3stg_lead0;		// leading 0's count- add 3 stage
+output		a4stg_round;		// round the result- add 4 stage
+output [5:0]	a4stg_shl_cnt;		// subtract in main adder- add 4 stage
+output		a4stg_denorm_inv;	// 0 the exponent
+output		a3stg_inc_exp_inv;	// increment the exponent- add 3 stg
+output		a3stg_same_exp_inv;	// keep the exponent- add 3 stg
+output		a3stg_dec_exp_inv;	// decrement the exponent- add 3 stg
+output		a4stg_rnd_frac_40;	// rounded fraction[40]- add 4 stage
+output		a4stg_rnd_frac_39;	// rounded fraction[39]- add 4 stage
+output		a4stg_rnd_frac_11;	// rounded fraction[11]- add 4 stage
+output		a4stg_rnd_frac_10;	// rounded fraction[10]- add 4 stage
+output		a4stg_rndadd_cout;	// fraction rounding adder carry out
+output		a4stg_frac_9_0_nx;	// inexact double precision result
+output		a4stg_frac_dbl_nx;	// inexact double precision result
+output		a4stg_frac_38_0_nx;	// inexact single precision result
+output		a4stg_frac_sng_nx;	// inexact single precision result
+output		a4stg_frac_neq_0;	// fraction != 0- add 4 stage
+output		a4stg_shl_data_neq_0;	// left shift result != 0- add 4 stage
+output		add_of_out_cout;	// fraction rounding adder carry out
+output [63:0]	add_frac_out;		// add fraction output
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [62:0]	a1stg_in1;
+wire [54:0]	a1stg_in1a;
+wire		a1stg_in1_31_0_neq_0;
+wire		a1stg_in1_50_32_neq_0;
+wire		a1stg_in1_50_0_neq_0;
+wire		a1stg_in1_53_32_neq_0;
+wire		a1stg_in1_51;
+wire		a1stg_in1_54;
+wire [63:0]	a1stg_in2;
+wire [54:0]	a1stg_in2a;
+wire		a1stg_in2_31_0_neq_0;
+wire		a1stg_in2_50_32_neq_0;
+wire		a1stg_in2_50_0_neq_0;
+wire		a1stg_in2_53_32_neq_0;
+wire		a1stg_in2_51;
+wire		a1stg_in2_54;
+wire		a1stg_in2_neq_in1_frac;
+wire		a1stg_in2_gt_in1_frac;
+wire		a1stg_in2_gt_in1;
+wire		a1stg_in2_eq_in1_exp;
+wire [63:0]	a1stg_norm_frac1;
+wire [63:0]	a1stg_norm_frac2;
+wire [63:0]	a2stg_frac1_in;
+wire [63:0]	a2stg_frac1;
+wire [63:0]	a2stg_frac2_in;
+wire [63:0]	a2stg_frac2;
+wire [63:0]	a2stg_frac2a;
+wire		a2stg_frac2_63;
+wire		a2stg_frac2hi_neq_0;
+wire		a2stg_frac2lo_neq_0;
+wire [115:52]	a2stg_shr;
+wire		a2stg_fsdtoix_nx;
+wire		a2stg_fsdtoi_nx;
+wire		a2stg_shr_60_0_neq_0;
+wire [63:0]	a2stg_shr_frac2_inv;
+wire [63:0]	a3stg_frac2_in;
+wire [63:0]	a3stg_frac2;
+wire [63:0]	a3stg_frac1;
+wire [63:0]	a2stg_fracadd_in2;
+wire [63:0]	a2stg_fracadd;
+wire [63:0]	a3stg_ld0_frac;
+wire [53:0]	a2stg_expdec_tmp;
+wire [53:0]	a2stg_expdec;
+wire [53:0]	a3stg_expdec;
+wire		a3stg_ld0_dnrm_10;
+wire		a3stg_denorm;
+wire		a3stg_denorm_inv;
+wire		a3stg_denorma;
+wire		a3stg_denorm_inva;
+wire [5:0]	a3stg_lead0;
+wire [63:0]	a3stg_fracadd;
+wire		a4stg_round_in;
+wire		a4stg_round;
+wire [5:0]	a2stg_shr_cnt;
+wire [5:3]	a2stg_shr_cnta;
+wire [2:0]	a2stg_shr_cnta_5;
+wire [3:0]	a2stg_shr_cnt_5_inv;
+wire [3:0]	a2stg_shr_cnt_5;
+wire [4:0]	a2stg_shr_cnt_4;
+wire [4:0]	a2stg_shr_cnt_3;
+wire [1:0]	a2stg_shr_cnt_2;
+wire [1:0]	a2stg_shr_cnt_1;
+wire [1:0]	a2stg_shr_cnt_0;
+wire		a3stg_sub;
+wire		a3stg_suba;
+wire [2:0]	a4stg_shl_cnt_dec54_0;
+wire [2:0]	a4stg_shl_cnt_dec54_1;
+wire [2:0]	a4stg_shl_cnt_dec54_2;
+wire [2:0]	a4stg_shl_cnt_dec54_3;
+wire [5:0]	a4stg_shl_cnt;
+wire		a2stg_fracadd_frac2_inv;
+wire		a2stg_fracadd_frac2_inv_shr1;
+wire		a4stg_denorm_inv;
+wire		a3stg_fsdtoix_nx;
+wire		a3stg_fsdtoi_nx;
+wire		a2stg_fracadd_cin;
+wire [63:0]	astg_xtra_regs;
+wire		a3stg_inc_exp_inv;
+wire		a3stg_same_exp_inv;
+wire		a3stg_dec_exp_inv;
+wire		a3stg_inc_exp_inva;
+wire		a3stg_fsame_exp_inv;
+wire		a3stg_fdec_exp_inv;
+wire [63:0]	a4stg_rnd_frac_pre1_in;
+wire [63:0]	a4stg_rnd_frac_pre1;
+wire [63:0]	a4stg_rnd_frac_pre2_in;
+wire [63:0]	a4stg_rnd_frac_pre2;
+wire [63:0]	a4stg_rnd_frac_pre3_in;
+wire [63:0]	a4stg_rnd_frac_pre3;
+wire [63:0]	a4stg_rnd_frac;
+wire [63:0]	a4stg_rnd_fraca;
+wire		a4stg_rnd_frac_40;
+wire		a4stg_rnd_frac_39;
+wire		a4stg_rnd_frac_11;
+wire		a4stg_rnd_frac_10;
+wire [63:0]	a4stg_shl_data_in;
+wire [63:0]	a4stg_shl_data;
+wire [52:0]	a4stg_rndadd_tmp;
+wire		a4stg_rndadd_cout;
+wire [51:0]	a4stg_rndadd;
+wire		a4stg_frac_9_0_nx;
+wire		a4stg_frac_dbl_nx;
+wire		a4stg_frac_38_0_nx;
+wire		a4stg_frac_sng_nx;
+wire		a4stg_frac_neq_0;
+wire		a4stg_shl_data_neq_0;
+wire [126:0]	a4stg_shl_tmp;
+wire [63:0]	a4stg_shl;
+wire		add_of_out_cout;
+wire		a5stg_frac_out_rndadd;
+wire		a5stg_frac_out_rnd_frac;
+wire		a5stg_in_of;
+wire		a5stg_frac_out_shl;
+wire		a5stg_to_0;
+wire [51:0]	a5stg_rndadd;
+wire [63:0]	a5stg_rnd_frac;
+wire [63:0]	a5stg_shl;
+wire [63:0]	add_frac_out;
+
+wire [63:0] a2stg_shr_tmp2;
+wire [63:0] a2stg_shr_tmp4;
+wire [63:0] a2stg_shr_tmp6;
+wire [63:0] a2stg_shr_tmp8;
+wire [63:0] a2stg_shr_tmp10;
+wire [63:0] a2stg_shr_tmp13;
+wire [63:0] a2stg_shr_tmp18;
+wire [63:20] a2stg_nx_neq0_84_tmp_1;
+wire [63:36] a2stg_nx_neq0_84_tmp_2;
+wire [63:44] a2stg_nx_neq0_84_tmp_3;
+wire [63:48] a2stg_nx_neq0_84_tmp_4;
+wire [61:50] a2stg_nx_neq0_84_tmp_5;
+wire [60:59] a2stg_nx_neq0_84_tmp_6;
+wire a2stg_nx_neq0_84_tmp_6_51;
+
+wire [63:0] a4stg_shl_tmp4;
+
+wire se_l;
+
+assign se_l = ~se;
+
+
+clken_buf  ckbuf_add_frac_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fadd_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add fraction inputs.
+//
+//	Add input stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(63) i_a1stg_in1 (
+        .din    (inq_in1[62:0]),
+        .en     (a1stg_step),
+        .clk    (clk),
+ 
+        .q      (a1stg_in1[62:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(55) i_a1stg_in1a (
+        .din	(inq_in1[54:0]),
+        .en	(a1stg_step),
+        .clk	(clk),
+ 
+        .q	(a1stg_in1a[54:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(64) i_a1stg_in2 (
+        .din    (inq_in2[63:0]),
+        .en     (a1stg_step),
+        .clk    (clk),
+ 
+        .q      (a1stg_in2[63:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(55) i_a1stg_in2a (
+        .din	(inq_in2[54:0]),
+        .en	(a1stg_step),
+        .clk	(clk),
+ 
+        .q	(a1stg_in2a[54:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add normalization and special input injection.
+//
+//	Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_in2_gt_in1_frac i_a1stg_in2_gt_in1_frac (
+	.din1			(a1stg_in1a[54:0]),
+	.din2			(a1stg_in2a[54:0]),
+	.sngop			(a1stg_sngop),
+	.expadd11		(a1stg_expadd3_11),
+	.expeq			(a1stg_in2_eq_in1_exp),
+
+	.din2_neq_din1		(a1stg_in2_neq_in1_frac),
+	.din2_gt_din1		(a1stg_in2_gt_in1_frac),
+	.din2_gt1_din1		(a1stg_in2_gt_in1)
+);
+
+assign a1stg_in2_eq_in1_exp= (&{(~(a1stg_in1[62:55] ^ a1stg_in2[62:55])),
+				((~(a1stg_in1[54:52] ^ a1stg_in2[54:52]))
+					| {3{a1stg_sngop}})});
+
+assign a1stg_norm_frac1[63:0]= ({64{a1stg_norm_dbl_in1}}
+			    & {1'b1, a1stg_in1[51:0], 11'b0})
+		| ({64{a1stg_denorm_dbl_in1}}
+			    & {a1stg_in1[51:0], 12'b0})
+		| ({64{a1stg_norm_sng_in1}}
+			    & {1'b1, a1stg_in1[54:32], 40'b0})
+		| ({64{a1stg_denorm_sng_in1}}
+			    & {a1stg_in1[54:32], 41'b0});
+
+assign a1stg_norm_frac2[63:0]= ({64{a1stg_norm_dbl_in2}}
+			    & {1'b1, a1stg_in2[51:0], 11'b0})
+                | ({64{a1stg_denorm_dbl_in2}}
+                            & {a1stg_in2[51:0], 12'b0})
+                | ({64{a1stg_norm_sng_in2}} 
+                            & {1'b1, a1stg_in2[54:32], 40'b0})
+                | ({64{a1stg_denorm_sng_in2}}
+                            & {a1stg_in2[54:32], 41'b0})
+		| ({64{a1stg_intlngop}}
+			    & a1stg_in2[63:0]);
+
+assign a2stg_frac1_in[63:0]= ({64{(a1stg_faddsubop_inv
+				|| (!((a1stg_in2_gt_in1 && a1stg_2nan_in_inv)
+						|| a2stg_frac1_in_frac1)))}}
+			    & {a1stg_norm_frac1[63],
+				(a1stg_norm_frac1[62] || a2stg_frac1_in_qnan),
+				(a1stg_norm_frac1[61:40]
+						| {22{a2stg_frac1_in_nv}}),
+				(a1stg_norm_frac1[39:11]
+						| {29{a2stg_frac1_in_nv_dbl}}),
+				a1stg_norm_frac1[10:0]})
+		| ({64{(a2stg_frac1_in_frac2
+				    && (a1stg_in2_gt_in1 || a2stg_frac1_in_frac1))}}
+			    & {a1stg_norm_frac2[63],
+				(a1stg_norm_frac2[62] || a2stg_frac1_in_qnan),
+				(a1stg_norm_frac2[61:40]
+						| {22{a2stg_frac1_in_nv}}),
+				(a1stg_norm_frac2[39:11]
+						| {29{a2stg_frac1_in_nv_dbl}}),
+				a1stg_norm_frac2[10:0]});
+
+dffe_s #(64) i_a2stg_frac1 (
+	.din	(a2stg_frac1_in[63:0]),
+	.en	(a6stg_step),
+	.clk    (clk),
+
+        .q      (a2stg_frac1[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a2stg_frac2_in[63:0]= ({64{a1stg_faddsubop_inv}}
+			    & {a1stg_norm_frac2[63],
+				(a1stg_norm_frac2[62] || a2stg_frac2_in_qnan),
+                                a1stg_norm_frac2[61:0]})
+		| ({64{(a2stg_frac2_in_frac1 && (!a1stg_in2_gt_in1))}}
+			    & {a1stg_norm_frac2[63],
+        			(a1stg_norm_frac2[62] || a2stg_frac2_in_qnan),
+        			a1stg_norm_frac2[61:0]})
+		| ({64{(a2stg_frac2_in_frac1 && a1stg_in2_gt_in1)}}
+			    & a1stg_norm_frac1[63:0]);
+
+dffe_s #(64) i_a2stg_frac2 (
+	.din	(a2stg_frac2_in[63:0]),
+	.en	(a6stg_step),
+	.clk    (clk),
+
+        .q      (a2stg_frac2[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(64) i_a2stg_frac2a (
+	.din	(a2stg_frac2_in[63:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(a2stg_frac2a[63:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe right shift
+//		- shift the smaller fraction right for adds and subtracts
+//		- shift the fraction right for float to integer conversion
+//
+//	Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_frac2_63= a2stg_frac2[63];
+
+assign a2stg_frac2hi_neq_0= (|a2stg_frac2[62:32]);
+
+assign a2stg_frac2lo_neq_0= (|a2stg_frac2[31:11]);
+
+
+// a2 stage right shifter
+assign a2stg_shr_tmp2[63:0] = ({{24{a2stg_shr_cnt_5[0]}}, {16{a2stg_shr_cnt_5[1]}}, {13{a2stg_shr_cnt_5[2]}}, {11{a2stg_shr_cnt_5[3]}}} & {32'h00000000, a2stg_frac2a[63:32]})
+	| ({{24{a2stg_shr_cnt_5_inv[0]}}, {16{a2stg_shr_cnt_5_inv[1]}}, {13{a2stg_shr_cnt_5_inv[2]}}, {11{a2stg_shr_cnt_5_inv[3]}}} & a2stg_frac2a[63:0]);
+
+assign a2stg_shr_tmp4[63:0] = ({{24{a2stg_shr_cnt_4[0]}}, {16{a2stg_shr_cnt_4[1]}}, {13{a2stg_shr_cnt_4[2]}}, {11{a2stg_shr_cnt_4[3]}}} & {16'h0000, a2stg_shr_tmp2[63:16]})
+	| ({{43{~a2stg_shr_cnt_4[4]}}, {21{~a2stg_shr_cnt_4[4]}}} & a2stg_shr_tmp2[63:0]);
+
+assign a2stg_shr_tmp6[63:0] = ~(({{24{a2stg_shr_cnt_3[0]}}, {16{a2stg_shr_cnt_3[1]}}, {13{a2stg_shr_cnt_3[2]}}, {11{a2stg_shr_cnt_3[3]}}} & {8'h00, a2stg_shr_tmp4[63:8]})
+	| ({64{~a2stg_shr_cnt_3[4]}} & a2stg_shr_tmp4[63:0]));
+
+assign a2stg_shr_tmp8[63:0] = ~(({{43{a2stg_shr_cnt_2[0]}}, {21{a2stg_shr_cnt_2[0]}}} | a2stg_shr_tmp6[63:0])
+	& ({64{~a2stg_shr_cnt_2[1]}} | {4'hf, a2stg_shr_tmp6[63:4]}));
+
+assign a2stg_shr_tmp10[63:0] = ~(({{43{a2stg_shr_cnt_1[0]}}, {21{a2stg_shr_cnt_1[0]}}} & {2'b00, a2stg_shr_tmp8[63:2]})
+	| ({64{~a2stg_shr_cnt_1[1]}} & a2stg_shr_tmp8[63:0]));
+
+assign a2stg_shr[115:52] = ~(({{43{a2stg_shr_cnt_0[0]}}, {21{a2stg_shr_cnt_0[0]}}} | a2stg_shr_tmp10[63:0])
+	 & ({64{~a2stg_shr_cnt_0[1]}} | {1'b1, a2stg_shr_tmp10[63:1]}));
+
+assign a2stg_shr_tmp18[63:0] = ~a2stg_shr_tmp2[63:0];
+assign a2stg_shr_tmp13[63:0] = a2stg_shr_tmp4[63:0];
+
+// a2 stage nx signals
+assign a2stg_fsdtoi_nx = (| a2stg_shr_tmp13[31:0])
+	| (~(& a2stg_shr_tmp6[31:24]))
+	| (| a2stg_shr_tmp8[31:28])
+	| (~(& a2stg_shr_tmp10[31:30]))
+	| a2stg_shr[83];
+
+
+assign a2stg_nx_neq0_84_tmp_1[63:20] = ~((a2stg_frac2a[43:0] & {44{a2stg_shr_cnt[5]}})
+	| ({a2stg_frac2a[11:0], 32'h00000000} & {44{~a2stg_shr_cnt[5]}}));
+
+assign a2stg_nx_neq0_84_tmp_2[63:36] = ~(({a2stg_shr_tmp18[27:12], a2stg_nx_neq0_84_tmp_1[63:52]} | {28{~a2stg_shr_cnt[4]}})
+	& (a2stg_nx_neq0_84_tmp_1[63:36] | {28{a2stg_shr_cnt[4]}}));
+
+assign a2stg_nx_neq0_84_tmp_3[63:44] = ~(({a2stg_shr_tmp13[19:12], a2stg_nx_neq0_84_tmp_2[63:52]} & {20{a2stg_shr_cnt[3]}})
+	| (a2stg_nx_neq0_84_tmp_2[63:44] & {20{~a2stg_shr_cnt[3]}}));
+
+assign a2stg_nx_neq0_84_tmp_4[63:48] = ~(({a2stg_shr_tmp6[15:12], a2stg_nx_neq0_84_tmp_3[63:52]} | {16{~a2stg_shr_cnt[2]}})
+	& (a2stg_nx_neq0_84_tmp_3[63:48] | {16{a2stg_shr_cnt[2]}}));
+
+assign a2stg_nx_neq0_84_tmp_5[61:50] = ~((a2stg_nx_neq0_84_tmp_4[63:52] & {12{a2stg_shr_cnt[1]}})
+	| (a2stg_nx_neq0_84_tmp_4[61:50] & {12{~a2stg_shr_cnt[1]}}));
+
+assign a2stg_nx_neq0_84_tmp_6[59] = ~(a2stg_shr_cnt[0] | a2stg_nx_neq0_84_tmp_5[60]);
+assign a2stg_nx_neq0_84_tmp_6[60] = ~(~a2stg_shr_cnt[0] | a2stg_nx_neq0_84_tmp_5[61]);
+assign a2stg_nx_neq0_84_tmp_6_51 = ~((a2stg_nx_neq0_84_tmp_5[52] | ~a2stg_shr_cnt[0])
+	& (a2stg_nx_neq0_84_tmp_5[51] | a2stg_shr_cnt[0]));
+
+assign a2stg_fsdtoix_nx = (~(& a2stg_nx_neq0_84_tmp_1[51:20])
+	| (| a2stg_nx_neq0_84_tmp_2[51:36])
+	| ~(& a2stg_nx_neq0_84_tmp_3[51:44])
+	| (| a2stg_nx_neq0_84_tmp_4[51:48])
+	| ~(& a2stg_nx_neq0_84_tmp_5[51:50])
+	| a2stg_nx_neq0_84_tmp_6_51);
+
+assign a2stg_shr_60_0_neq_0 = (~(& a2stg_nx_neq0_84_tmp_1[60:20])
+	| (| a2stg_nx_neq0_84_tmp_2[60:45])
+	| ~(& a2stg_nx_neq0_84_tmp_3[60:53])
+	| (| a2stg_nx_neq0_84_tmp_4[60:57])
+	| ~(& a2stg_nx_neq0_84_tmp_5[60:59])
+	| (| a2stg_nx_neq0_84_tmp_6[60:59]));
+
+
+assign a2stg_shr_frac2_inv[63:0]= (~(({64{a2stg_shr_frac2_shr_int}}
+			    & {1'b0, a2stg_shr[115:61], a2stg_shr_60_0_neq_0,
+					7'b0})
+		| ({64{(a2stg_shr_frac2_shr_dbl && a2stg_expadd_11)}}
+			    & a2stg_shr[115:52])
+		| ({64{(a2stg_shr_frac2_shr_sng && a2stg_expadd_11)}}
+			    & {a2stg_shr[115:84], 32'b0})
+		| ({64{(a2stg_shr_frac2_max && (!a2stg_expadd_11))}}
+			    & 64'h7fffffffffffffff)
+		| ({64{(!a6stg_step)}}
+			    & a3stg_frac2[63:0])));
+
+assign a3stg_frac2_in[63:0]= (~(a2stg_shr_frac2_inv[63:0]
+		^ {64{a2stg_sub_step}}));
+
+dff_s #(64) i_a3stg_frac2 (
+	.din	(a3stg_frac2_in[63:0]),
+        .clk    (clk),
+
+        .q      (a3stg_frac2[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Pipe the other/larger fraction to stage 3.
+//
+//	Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(64) i_a3stg_frac1 (
+	.din    ({1'b0, a2stg_frac1[63:1]}),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (a3stg_frac1[63:0]),
+         
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe adder.
+//
+//	Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_fracadd_in2[63:0]= ({64{a2stg_fracadd_frac2_inv}}
+			    & (~a2stg_frac2[63:0]))
+		| ({64{a2stg_fracadd_frac2_inv_shr1}}
+			    & (~{1'b0, a2stg_frac2[63:1]}))
+		| ({64{a2stg_fracadd_frac2}}
+			    & a2stg_frac2[63:0]);
+
+assign a2stg_fracadd[63:0]= (a2stg_frac1[63:0]
+			+ a2stg_fracadd_in2[63:0]
+			+ {63'b0, a2stg_fracadd_cin});
+
+dffe_s #(64) i_a3stg_ld0_frac (
+	.din	(a2stg_fracadd[63:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (a3stg_ld0_frac[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe exponent decode- used to identify denorm results.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// assign a2stg_expdec_tmp[107:0]= ({54'b0, 54'h20000000000000} >> a2stg_exp[5:0]);
+assign a2stg_expdec_tmp[53:0] =          54'h20000000000000  >> a2stg_exp[5:0] ;
+
+assign a2stg_expdec[53:0]= a2stg_expdec_tmp[53:0] & {54{a2stg_expdec_neq_0}};
+
+dffe_s #(54) i_a3stg_expdec (
+	.din	(a2stg_expdec[53:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (a3stg_expdec[53:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe denorm comparator to identify denorm results.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_ld0_dnrm_10= (a3stg_faddsubopa[0] && a3stg_ld0_frac[10])
+		|| ((!a3stg_faddsubopa[0]) && (|a3stg_ld0_frac[10:0]));
+
+fpu_denorm_frac i_a3stg_denorm (
+	.din1			({a3stg_ld0_frac[63:11], a3stg_ld0_dnrm_10}),
+	.din2			(a3stg_expdec[53:0]),
+
+	.din2_din1_denorm	(a3stg_denorm),
+	.din2_din1_denorm_inv	(a3stg_denorm_inv),
+	.din2_din1_denorma	(a3stg_denorma),
+	.din2_din1_denorm_inva	(a3stg_denorm_inva)
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe leading 0's counter.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_cnt_lead0_64b i_a3stg_lead0 (
+	.din	(a3stg_ld0_frac[63:0]),
+
+	.lead0	(a3stg_lead0[5:0])
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe main adder.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_fracadd[63:0]= (a3stg_frac1[63:0]
+			+ a3stg_frac2[63:0]
+			+ {63'b0, a3stg_suba});
+
+dffe_s #(64) i_astg_xtra_regs (
+	.din	({{4{a2stg_shr_cnt_5_inv_in}}, {4{a2stg_shr_cnt_in[5]}},
+			a2stg_shr_cnt_in[5:3],
+			{5{a2stg_shr_cnt_in[4]}}, {5{a2stg_shr_cnt_in[3]}},
+			a2stg_shr_cnt_in[5:0], a4stg_round_in,
+			{2{a2stg_shr_cnt_in[2]}}, {2{a2stg_shr_cnt_in[1]}},
+			{2{a2stg_shr_cnt_in[0]}},
+			{3{a4stg_shl_cnt_in[6]}},
+			{3{a4stg_shl_cnt_in[7]}},
+			{3{a4stg_shl_cnt_in[8]}},
+			{3{a4stg_shl_cnt_in[9]}},
+			a4stg_shl_cnt_in[5:0],
+			{3{a2stg_shr_cnt_in[5]}},
+			a2stg_fracadd_frac2_inv_in,
+			a2stg_fracadd_frac2_inv_shr1_in,
+			a3stg_denorm_inva,
+			a2stg_fsdtoix_nx, a2stg_fsdtoi_nx,
+			1'b0, a2stg_fracadd_cin_in, {2{a3stg_sub_in}}}),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	({a2stg_shr_cnt_5_inv[3:0], a2stg_shr_cnt_5[3:0],
+			a2stg_shr_cnta[5:3],
+			a2stg_shr_cnt_4[4:0], a2stg_shr_cnt_3[4:0],
+			a2stg_shr_cnt[5:0], a4stg_round,
+			a2stg_shr_cnt_2[1:0], a2stg_shr_cnt_1[1:0],
+			a2stg_shr_cnt_0[1:0],
+			a4stg_shl_cnt_dec54_0[2:0],
+			a4stg_shl_cnt_dec54_1[2:0],
+			a4stg_shl_cnt_dec54_2[2:0],
+			a4stg_shl_cnt_dec54_3[2:0],
+			a4stg_shl_cnt[5:0],
+			a2stg_shr_cnta_5[2:0],
+			a2stg_fracadd_frac2_inv,
+			a2stg_fracadd_frac2_inv_shr1,
+			a4stg_denorm_inv,
+			a3stg_fsdtoix_nx, a3stg_fsdtoi_nx,
+			astg_xtra_regs[3], a2stg_fracadd_cin,
+			a3stg_sub, a3stg_suba}),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_round_in= a3stg_fracadd[61]
+		|| a3stg_fracadd[62]
+		|| a3stg_fracadd[63];
+
+assign a3stg_inc_exp_inv= (!a3stg_fracadd[63]);
+
+assign a3stg_same_exp_inv= (!(((!a3stg_fracadd[63]) && a3stg_fracadd[62])
+		|| ((!a3stg_fracadd[63]) && a3stg_exp10_0_eq0)));
+
+assign a3stg_dec_exp_inv= (!((!a3stg_fracadd[63])
+		&& (!a3stg_fracadd[62])
+		&& a3stg_fracadd[61]
+		&& (!a3stg_exp10_0_eq0)));
+
+assign a3stg_inc_exp_inva= (!a3stg_fracadd[63]);
+
+assign a3stg_fsame_exp_inv= (!(((!a3stg_fracadd[63])
+			&& (!a3stg_fracadd[62])
+			&& a3stg_fracadd[61]
+			&& a3stg_exp10_1_eq0
+			&& a3stg_exp_0)
+		|| ((!a3stg_fracadd[63])
+			&& a3stg_fracadd[62]
+			&& (!a3stg_exp10_0_eq0))));
+
+assign a3stg_fdec_exp_inv= (!((!a3stg_fracadd[63])
+		&& (!a3stg_fracadd[62])
+		&& a3stg_fracadd[61]
+		&& (!a3stg_exp10_1_eq0)));
+
+assign a4stg_rnd_frac_pre1_in[63:0]= ({64{(a3stg_faddsubopa[1] && a6stg_step
+					&& (!a3stg_fdec_exp_inv))}}
+			    & {a3stg_fracadd[61:0], 2'b00});
+
+dff_s #(64) i_a4stg_rnd_frac_pre1 (
+	.din	(a4stg_rnd_frac_pre1_in[63:0]),
+        .clk    (clk),
+
+        .q      (a4stg_rnd_frac_pre1[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_rnd_frac_pre3_in[63:0]= ({64{(a3stg_faddsubopa[1] && a6stg_step
+					&& (!a3stg_fsame_exp_inv))}}
+			    & {a3stg_fracadd[62:0], 1'b0});
+
+dff_s #(64) i_a4stg_rnd_frac_pre3 (
+	.din	(a4stg_rnd_frac_pre3_in[63:0]),
+        .clk    (clk),
+
+        .q      (a4stg_rnd_frac_pre3[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_rnd_frac_pre2_in[63:0]= ({64{(a3stg_faddsubopa[1] && a6stg_step
+					&& (!a3stg_inc_exp_inva))}}
+			    & a3stg_fracadd[63:0])
+		| ({64{((!a4stg_rnd_frac_add_inv) && a6stg_step)}}
+			    & a3stg_fracadd[63:0])
+		| ({64{((!a3stg_fdtos_inv) && a6stg_step)}}
+			    & {a3stg_fracadd[62:0], 1'b0})
+		| ({64{((!a4stg_fixtos_fxtod_inv) && a6stg_step)}}
+			    & a4stg_shl[63:0])
+		| ({64{(!a6stg_step)}}
+			    & a4stg_rnd_frac[63:0]);
+
+dff_s #(64) i_a4stg_rnd_frac_pre2 (
+	.din	(a4stg_rnd_frac_pre2_in[63:0]),
+        .clk    (clk),
+
+        .q      (a4stg_rnd_frac_pre2[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe input to left shift.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_shl_data_in[63:0]= ({64{a3stg_denorm_inva}}
+			    & a3stg_ld0_frac[63:0])
+		| ({64{a3stg_denorma}}
+			    & {1'b0, a3stg_ld0_frac[63:1]});
+
+dffe_s #(64) i_a4stg_shl_data (
+	.din	(a4stg_shl_data_in[63:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (a4stg_shl_data[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe rounding adder.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_rnd_frac[63:0]= (a4stg_rnd_frac_pre1[63:0]
+				| a4stg_rnd_frac_pre2[63:0]
+				| a4stg_rnd_frac_pre3[63:0]);
+
+assign a4stg_rnd_frac_40= a4stg_rnd_frac[40];
+
+assign a4stg_rnd_frac_39= a4stg_rnd_frac[39];
+
+assign a4stg_rnd_frac_11= a4stg_rnd_frac[11];
+
+assign a4stg_rnd_frac_10= a4stg_rnd_frac[10];
+
+assign a4stg_frac_9_0_nx= (|a4stg_rnd_frac[9:0]);
+
+assign a4stg_frac_dbl_nx= a4stg_frac_9_0_nx || a4stg_rnd_frac[10];
+
+assign a4stg_frac_38_0_nx= a4stg_frac_dbl_nx || (|a4stg_rnd_frac[38:11]);
+
+assign a4stg_frac_sng_nx= a4stg_frac_38_0_nx || a4stg_rnd_frac[39];
+
+assign a4stg_frac_neq_0= a4stg_frac_sng_nx || (|a4stg_rnd_frac[63:40]);
+
+assign a4stg_rndadd_tmp[52:0]= {1'b0, a4stg_rnd_frac[62:11]}
+			+ {23'b0, a4stg_rnd_sng, 28'b0, a4stg_rnd_dbl};
+
+assign a4stg_rndadd_cout= a4stg_rndadd_tmp[52];
+
+assign a4stg_rndadd[51:0]= a4stg_rndadd_tmp[51:0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe left shift.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_shl_data_neq_0= (|a4stg_shl_data[63:0]);
+
+assign a4stg_shl_tmp4[63:0] = ({{32{a4stg_shl_cnt_dec54_0[0]}}, {21{a4stg_shl_cnt_dec54_0[1]}}, {11{a4stg_shl_cnt_dec54_0[2]}}} & a4stg_shl_data[63:0])
+	| ({{32{a4stg_shl_cnt_dec54_1[0]}}, {21{a4stg_shl_cnt_dec54_1[1]}}, {11{a4stg_shl_cnt_dec54_1[2]}}} & {a4stg_shl_data[47:0], 16'h0000})
+	| ({{32{a4stg_shl_cnt_dec54_2[0]}}, {21{a4stg_shl_cnt_dec54_2[1]}}, {11{a4stg_shl_cnt_dec54_2[2]}}} & {a4stg_shl_data[31:0], 32'h00000000})
+	| ({{32{a4stg_shl_cnt_dec54_3[0]}}, {21{a4stg_shl_cnt_dec54_3[1]}}, {11{a4stg_shl_cnt_dec54_3[2]}}} & {a4stg_shl_data[15:0], 32'h00000000, 16'h0000});
+
+assign a4stg_shl[63:0] = a4stg_shl_tmp4[63:0] << a4stg_shl_cnt[3:0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe fraction output.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(58) i_a5stg_rndadd (
+	.din	({a4stg_rndadd_cout, add_frac_out_rndadd, add_frac_out_rnd_frac,
+			a4stg_in_of, add_frac_out_shl, a4stg_to_0,
+			a4stg_rndadd[51:0]}),
+	.en	(a6stg_step),
+        .clk    (clk),
+
+	.q	({add_of_out_cout, a5stg_frac_out_rndadd,
+			a5stg_frac_out_rnd_frac, a5stg_in_of,
+			a5stg_frac_out_shl, a5stg_to_0,
+			a5stg_rndadd[51:0]}),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(64) i_a5stg_rnd_frac (
+	.din	(a4stg_rnd_frac[63:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(a5stg_rnd_frac[63:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(64) i_a5stg_shl (
+	.din	(a4stg_shl[63:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(a5stg_shl[63:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign add_frac_out[63:0]= ({64{a5stg_frac_out_rndadd}}
+			    & {1'b0, a5stg_rndadd[51:0], 11'b0})
+		| ({64{a5stg_frac_out_rnd_frac}}
+			    & a5stg_rnd_frac[63:0])
+		| ({64{a5stg_in_of}}
+			    & {64{a5stg_to_0}})
+		| ({64{a5stg_frac_out_shl}}
+			    & a5stg_shl[63:0]);
+
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_mul_frac_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_mul_frac_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_mul_frac_dp.v	(revision 6)
@@ -0,0 +1,504 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_mul_frac_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply pipeline fraction datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_mul_frac_dp (
+	inq_in1,
+	inq_in2,
+	m6stg_step,
+	m2stg_frac1_dbl_norm,
+	m2stg_frac1_dbl_dnrm,
+	m2stg_frac1_sng_norm,
+	m2stg_frac1_sng_dnrm,
+	m2stg_frac1_inf,
+	m1stg_snan_dbl_in1,
+	m1stg_snan_sng_in1,
+	m2stg_frac2_dbl_norm,
+	m2stg_frac2_dbl_dnrm,
+	m2stg_frac2_sng_norm,
+	m2stg_frac2_sng_dnrm,
+	m2stg_frac2_inf,
+	m1stg_snan_dbl_in2,
+	m1stg_snan_sng_in2,
+	m1stg_inf_zero_in,
+	m1stg_inf_zero_in_dbl,
+	m1stg_dblop,
+	m1stg_dblop_inv,
+	m4stg_frac,
+	m4stg_sh_cnt_in,
+	m3bstg_ld0_inv,
+	m4stg_left_shift_step,
+	m4stg_right_shift_step,
+	m5stg_fmuls,
+	m5stg_fmulda,
+	mul_frac_out_fracadd,
+	mul_frac_out_frac,
+	m5stg_in_of,
+	m5stg_to_0,
+	fmul_clken_l,
+	rclk,
+	
+	m2stg_frac1_array_in,
+	m2stg_frac2_array_in,
+	m1stg_ld0_1,
+	m1stg_ld0_2,
+	m4stg_frac_105,
+	m3stg_ld0_inv,
+	m4stg_shl_54,
+	m4stg_shl_55,
+	m5stg_frac_32_0,
+	m5stg_frac_dbl_nx,
+	m5stg_frac_sng_nx,
+	m5stg_frac_neq_0,
+	m5stg_fracadd_cout,
+	mul_frac_out,
+
+	se,
+	si,
+	so
+);
+
+
+input [54:0]	inq_in1;		// request operand 1 to op pipes
+input [54:0]	inq_in2;		// request operand 2 to op pipes
+input		m6stg_step;		// advance the multiply pipe
+input		m2stg_frac1_dbl_norm;	// select line to m2stg_frac1
+input		m2stg_frac1_dbl_dnrm;	// select line to m2stg_frac1
+input		m2stg_frac1_sng_norm;	// select line to m2stg_frac1
+input		m2stg_frac1_sng_dnrm;	// select line to m2stg_frac1
+input		m2stg_frac1_inf;	// select line to m2stg_frac1
+input		m1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+input		m1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+input		m2stg_frac2_dbl_norm;	// select line to m2stg_frac2
+input		m2stg_frac2_dbl_dnrm;	// select line to m2stg_frac2
+input		m2stg_frac2_sng_norm;	// select line to m2stg_frac2
+input		m2stg_frac2_sng_dnrm;	// select line to m2stg_frac2
+input		m2stg_frac2_inf;	// select line to m2stg_frac2
+input		m1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+input		m1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+input		m1stg_inf_zero_in;	// 1 operand is infinity; other is 0
+input		m1stg_inf_zero_in_dbl;	// 1 opnd is infinity; other is 0- dbl
+input		m1stg_dblop;		// double precision operation- mul 1 stg
+input		m1stg_dblop_inv;	// single or int operation- mul 1 stg
+input [105:0]	m4stg_frac;		// multiply array output
+input [5:0]	m4stg_sh_cnt_in;	// multiply normalization shift count
+input [6:0]	m3bstg_ld0_inv;		// leading 0's in multiply operands
+input		m4stg_left_shift_step;	// select line to m5stg_frac
+input		m4stg_right_shift_step;	// select line to m5stg_frac
+input		m5stg_fmuls;		// fmuls- multiply 5 stage
+input		m5stg_fmulda;		// fmuld- multiply 5 stage
+input		mul_frac_out_fracadd;	// select line to mul_frac_out
+input		mul_frac_out_frac;	// select line to mul_frac_out
+input		m5stg_in_of;		// multiply overflow- select exp out
+input		m5stg_to_0;		// result to max finite on overflow
+input		fmul_clken_l;           // multiply pipe clk enable - asserted low
+input		rclk;		// global clock
+
+output [52:0]	m2stg_frac1_array_in;	// multiply array input 1
+output [52:0]	m2stg_frac2_array_in;	// multiply array input 2
+output [5:0]	m1stg_ld0_1;		// denorm operand 1 leading 0's
+output [5:0]	m1stg_ld0_2;		// denorm operand 2 leading 0's
+output		m4stg_frac_105;		// multiply stage 4a fraction input[105]
+output [6:0]	m3stg_ld0_inv;		// leading 0's in multiply operands
+output		m4stg_shl_54;		// multiply shift left output bit[54]
+output		m4stg_shl_55;		// multiply shift left output bit[55]
+output [32:0]	m5stg_frac_32_0;	// multiply stage 5 fraction input
+output		m5stg_frac_dbl_nx;	// double precision inexact result
+output		m5stg_frac_sng_nx;	// single precision inexact result
+output		m5stg_frac_neq_0;	// fraction input to mul 5 stage != 0
+output		m5stg_fracadd_cout;	// fraction rounding adder carry out
+output [51:0]	mul_frac_out;		// multiply fraction output
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [54:0]	mul_frac_in1;
+wire [54:0]	mul_frac_in2;
+wire [52:0]	m2stg_frac1_in;
+wire [52:0]	m2stg_frac1_array_in;
+wire [52:0]	m2stg_frac2_in;
+wire [52:0]	m2stg_frac2_array_in;
+wire [52:0]	m1stg_ld0_1_din;
+wire [5:0]	m1stg_ld0_1;
+wire [52:0]	m1stg_ld0_2_din;
+wire [5:0]	m1stg_ld0_2;
+wire		m4stg_frac_105;
+wire [5:0]	m4stg_sh_cnt_5;
+wire [5:0]	m4stg_sh_cnt_4;
+wire [5:0]	m4stg_sh_cnt;
+wire [6:0]	m3stg_ld0_inv;
+wire [168:63]	m4stg_shl_tmp;
+wire [55:0]	m4stg_shl;
+wire		m4stg_shl_54;
+wire		m4stg_shl_55;
+
+// 2/18/03: Changed to 225:0 (for easier LEC matching plus closer to implementation)
+// wire [219:0]	m4stg_shr_tmp;
+wire [168:0]	m4stg_shr_tmp;
+
+wire [55:0]	m4stg_shr;
+wire [54:0]	m5stg_frac_pre1_in;
+wire [54:0]	m5stg_frac_pre1;
+wire [54:0]	m5stg_frac_pre2_in;
+wire [54:0]	m5stg_frac_pre2;
+wire [54:0]	m5stg_frac_pre3_in;
+wire [54:0]	m5stg_frac_pre3;
+wire [54:0]	m5stg_frac_pre4_in;
+wire [54:0]	m5stg_frac_pre4;
+wire [54:33]	m5stg_frac_54_33;
+wire [32:0]	m5stg_frac_32_0;
+wire [54:3]	m5stg_fraca;
+wire [54:0]	m5stg_fracb;
+wire		m5stg_frac_dbl_nx;
+wire		m5stg_frac_sng_nx;
+wire		m5stg_frac_neq_0;
+wire [52:0]	m5stg_fracadd_tmp;
+wire		m5stg_fracadd_cout;
+wire [51:0]	m5stg_fracadd;
+wire [51:0]	mul_frac_out_in;
+wire [51:0]	mul_frac_out;
+wire [30:0] mstg_xtra_regs;
+
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_mul_frac_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fmul_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply fraction inputs.
+//
+//	Multiply input stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(55) i_mul_frac_in1 (
+        .din    (inq_in1[54:0]),
+        .en     (m6stg_step),
+        .clk    (clk),
+ 
+        .q      (mul_frac_in1[54:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(55) i_mul_frac_in2 (
+        .din    (inq_in2[54:0]),
+        .en     (m6stg_step),
+        .clk    (clk),
+ 
+        .q      (mul_frac_in2[54:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply normalization and special input injection.
+//
+//	Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_frac1_in[52:0]= ({53{m2stg_frac1_dbl_norm}}
+			    & {1'b1, (mul_frac_in1[51] || m1stg_snan_dbl_in1),
+				mul_frac_in1[50:0]})
+		| ({53{m2stg_frac1_dbl_dnrm}}
+                            & {mul_frac_in1[51:0], 1'b0})
+                | ({53{m2stg_frac1_sng_norm}}
+                            & {1'b1, (mul_frac_in1[54] || m1stg_snan_sng_in1),
+                                mul_frac_in1[53:32], 29'b0})
+                | ({53{m2stg_frac1_sng_dnrm}}
+                            & {mul_frac_in1[54:32], 30'b0})
+		| ({53{m2stg_frac1_inf}}
+			    & 53'h10000000000000);
+
+assign m2stg_frac1_array_in[52:0]= (~m2stg_frac1_in[52:0]);
+
+assign m2stg_frac2_in[52:0]= ({53{m2stg_frac2_dbl_norm}}
+                            & {1'b1, (mul_frac_in2[51] || m1stg_snan_dbl_in2),
+                                mul_frac_in2[50:0]})
+                | ({53{m2stg_frac2_dbl_dnrm}}
+                            & {mul_frac_in2[51:0], 1'b0})
+                | ({53{m2stg_frac2_sng_norm}}
+                            & {1'b1, (mul_frac_in2[54] || m1stg_snan_sng_in2),
+                                mul_frac_in2[53:32], 29'b0})
+                | ({53{m2stg_frac2_sng_dnrm}}
+                            & {mul_frac_in2[54:32], 30'b0})
+                | ({53{m2stg_frac2_inf}}
+			    & {1'b1, {23{m1stg_inf_zero_in}},
+					{29{m1stg_inf_zero_in_dbl}}});
+ 
+assign m2stg_frac2_array_in[52:0]= m2stg_frac2_in[52:0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply leading 0 counts.
+//
+//	Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_ld0_1_din[52:0]= ({53{m1stg_dblop_inv}}
+			    & {mul_frac_in1[54:32], 30'b0})
+		| ({53{m1stg_dblop}}
+			    & {mul_frac_in1[51:0], 1'b0});
+
+fpu_cnt_lead0_53b i_m1stg_ld0_1 (
+	.din	(m1stg_ld0_1_din[52:0]),
+
+	.lead0	(m1stg_ld0_1[5:0])
+);
+
+assign m1stg_ld0_2_din[52:0]= ({53{m1stg_dblop_inv}}
+			    & {mul_frac_in2[54:32], 30'b0})
+		| ({53{m1stg_dblop}}
+			    & {mul_frac_in2[51:0], 1'b0});
+
+fpu_cnt_lead0_53b i_m1stg_ld0_2 (
+	.din	(m1stg_ld0_2_din[52:0]),
+
+	.lead0	(m1stg_ld0_2[5:0])
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply shifts for post-normalization/denormalization.
+//
+//	Multiply stage 4a.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m4stg_frac_105= m4stg_frac[105];
+
+dffe_s #(56) i_mstg_xtra_regs (
+	.din	({{6{m4stg_sh_cnt_in[5]}}, 
+			{6{m4stg_sh_cnt_in[4]}},
+			m4stg_sh_cnt_in[5:0],
+			m3bstg_ld0_inv[6:0],
+			31'h0000_0000}),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+	.q	({m4stg_sh_cnt_5[5:0],
+			m4stg_sh_cnt_4[5:0],
+			m4stg_sh_cnt[5:0],
+			m3stg_ld0_inv[6:0],
+			mstg_xtra_regs[30:0]}),
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+//assign m4stg_shl_tmp[168:0]= {m4stg_frac[105:0], 63'b0}
+//		<< {m4stg_sh_cnt_5[0], m4stg_sh_cnt[4:0]};
+
+  assign m4stg_shl_tmp[168:63]=  m4stg_frac[105:0]
+		<< {m4stg_sh_cnt_5[0], m4stg_sh_cnt[4:0]};
+
+assign m4stg_shl[55:0]= {m4stg_shl_tmp[168:114], (|m4stg_shl_tmp[113:63])};
+assign m4stg_shl_54= m4stg_shl[54];
+
+assign m4stg_shl_55= m4stg_shl[55];
+
+// 2/18/03: changed below to match implementation plus easier LEC
+// assign m4stg_shr_tmp[219:0]= {57'b0, m4stg_frac[105:0], 57'b0} 
+// 						>> m4stg_sh_cnt[5:0];
+// assign m4stg_shr[55:0]= {m4stg_shr_tmp[162:108], (|m4stg_shr_tmp[107:0])};
+
+//assign m4stg_shr_tmp[225:0]= {57'b0, m4stg_frac[105:0], 63'b0} >> m4stg_sh_cnt[5:0];
+  assign m4stg_shr_tmp[168:0]= {       m4stg_frac[105:0], 63'b0} >> m4stg_sh_cnt[5:0];
+
+
+assign m4stg_shr[55:0]= {m4stg_shr_tmp[168:114], (|m4stg_shr_tmp[113:0])};
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select post-normalization or denormalization result.
+//
+//	Multiply stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// 2/18/03: Inverted the logic (nand instead of and) to reflect implementation and easier LEC
+// assign m5stg_frac_pre1_in[54:0]= ({55{(m4stg_left_shift_step && m4stg_shl[55])}}
+//           & m4stg_shl[54:0])
+//     | ({55{(!m6stg_step)}}
+//           & m5stg_fracb[54:0]);
+
+assign m5stg_frac_pre1_in[54:0]= ~(({55{(m4stg_left_shift_step && m4stg_shl[55])}}
+			    & m4stg_shl[54:0])
+		| ({55{(!m6stg_step)}}
+			    & m5stg_fracb[54:0]));
+
+dff_s #(55) i_m5stg_frac_pre1 (
+	.din	(m5stg_frac_pre1_in[54:0]),
+	.clk    (clk),
+
+        .q      (m5stg_frac_pre1[54:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+// 2/18/03: Inverted the logic (nand instead of and) to reflect implementation and easier LEC
+// assign m5stg_frac_pre2_in[54:0]= ({55{(m4stg_left_shift_step
+//           && (!m4stg_shl[55]))}}
+//           & {m4stg_shl[53:0], 1'b0});
+
+
+assign m5stg_frac_pre2_in[54:0]= ~({55{(m4stg_left_shift_step
+					&& (!m4stg_shl[55]))}}
+			    & {m4stg_shl[53:0], 1'b0});
+
+dff_s #(55) i_m5stg_frac_pre2 (
+	.din	(m5stg_frac_pre2_in[54:0]),
+	.clk	(clk),
+
+	.q	(m5stg_frac_pre2[54:0]),
+
+	.se	(se),
+		.si	(),
+	.so	()
+);
+
+// 2/18/03: Inverted the logic (nand instead of and) to reflect implementation and easier LEC
+// assign m5stg_frac_pre3_in[54:0]= ({55{(m4stg_right_shift_step
+//           && m4stg_shr[55])}}
+//           & m4stg_shr[54:0]);
+
+assign m5stg_frac_pre3_in[54:0]= ~({55{(m4stg_right_shift_step
+					&& m4stg_shr[55])}}
+			    & m4stg_shr[54:0]);
+
+dff_s #(55) i_m5stg_frac_pre3 (
+	.din	(m5stg_frac_pre3_in[54:0]),
+	.clk	(clk),
+
+	.q	(m5stg_frac_pre3[54:0]),
+
+	.se	(se),
+		.si	(),
+	.so	()
+);
+
+// 2/18/03: Inverted the logic (nand instead of and) to reflect implementation and easier LEC
+// assign m5stg_frac_pre4_in[54:0]= ({55{(m4stg_right_shift_step
+//           && (!m4stg_shr[55]))}}
+//           & {m4stg_shr[53:0], 1'b0});
+
+assign m5stg_frac_pre4_in[54:0]= ~({55{(m4stg_right_shift_step
+					&& (!m4stg_shr[55]))}}
+			    & {m4stg_shr[53:0], 1'b0});
+
+dff_s #(55) i_m5stg_frac_pre4 (
+	.din	(m5stg_frac_pre4_in[54:0]),
+	.clk	(clk),
+
+	.q	(m5stg_frac_pre4[54:0]),
+
+	.se	(se),
+		.si	(),
+	.so	()
+);
+
+// 2/18/03: Inverted the logic (nand instead of or) to reflect implementation and easier LEC
+// assign m5stg_frac[54:0]= (m5stg_frac_pre1[54:0]
+//     | m5stg_frac_pre2[54:0]
+//     | m5stg_frac_pre3[54:0]
+//     | m5stg_frac_pre4[54:0]);
+
+assign {m5stg_frac_54_33[54:33], m5stg_frac_32_0[32:0]} = ~(m5stg_frac_pre1[54:0]
+		& m5stg_frac_pre2[54:0]
+		& m5stg_frac_pre3[54:0]
+		& m5stg_frac_pre4[54:0]);
+
+
+assign m5stg_fraca[54:3]= {m5stg_frac_54_33[54:33], m5stg_frac_32_0[32:3]};
+
+assign m5stg_fracb[54:0]= {m5stg_frac_54_33[54:33], m5stg_frac_32_0[32:0]};
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply rounding.
+//
+//	Multiply stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m5stg_frac_dbl_nx= (|m5stg_fracb[2:0]);
+
+assign m5stg_frac_sng_nx= m5stg_frac_dbl_nx || (|m5stg_fracb[31:3]);
+
+assign m5stg_frac_neq_0= m5stg_frac_sng_nx || (|m5stg_fracb[54:32]);
+
+assign m5stg_fracadd_tmp[52:0]= {1'b0, m5stg_fraca[54:3]}
+			+ {23'b0, m5stg_fmuls, 28'b0, m5stg_fmulda};
+
+assign m5stg_fracadd_cout= m5stg_fracadd_tmp[52];
+
+assign m5stg_fracadd[51:0]= m5stg_fracadd_tmp[51:0];
+
+assign mul_frac_out_in[51:0]= ({52{mul_frac_out_fracadd}}
+			    & m5stg_fracadd[51:0])
+		| ({52{mul_frac_out_frac}}
+			    & m5stg_fracb[54:3])
+		| ({52{m5stg_in_of}}
+			    & {52{m5stg_to_0}});
+
+dffe_s #(52) i_mul_frac_out (
+	.din	(mul_frac_out_in[51:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (mul_frac_out[51:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in2_gt_in1_3b.v
===================================================================
--- /trunk/T1-FPU/fpu_in2_gt_in1_3b.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in2_gt_in1_3b.v	(revision 6)
@@ -0,0 +1,59 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in2_gt_in1_3b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Three bit comparison of two inputs that can have any value.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in2_gt_in1_3b (
+	din1,
+	din2,
+
+	din2_neq_din1,
+	din2_gt_din1
+);
+
+
+input [2:0]	din1;			// input 1- 3 bits
+input [2:0]	din2;			// input 2- 3 bits
+
+output		din2_neq_din1;		// input 2 doesn't equal input 1
+output		din2_gt_din1;		// input 2 is greater than input 1
+
+
+wire [2:0]	din2_eq_din1;
+wire		din2_neq_din1;
+wire		din2_gt_din1;
+
+
+assign din2_eq_din1[2:0]= (~(din1 ^ din2));
+
+assign din2_neq_din1= (!(&din2_eq_din1));
+
+assign din2_gt_din1= ((!din1[2]) && din2[2])
+		|| (din2_eq_din1[2] && (!din1[1]) && din2[1])
+		|| ((&din2_eq_din1[2:1]) && (!din1[0]) && din2[0]);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_div.v
===================================================================
--- /trunk/T1-FPU/fpu_div.v	(revision 6)
+++ /trunk/T1-FPU/fpu_div.v	(revision 6)
@@ -0,0 +1,444 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_div.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU divide pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_div (
+	inq_op,
+	inq_rnd_mode,
+	inq_id,
+	inq_in1,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_div,
+	div_dest_rdy,
+	fdiv_clken_l,
+	fdiv_clken_l_div_exp_buf1,
+	arst_l,
+	grst_l,
+	rclk,
+	
+	div_pipe_active,
+	d1stg_step,
+	d8stg_fdiv_in,
+	div_id_out_in,
+	div_exc_out,
+	d8stg_fdivd,
+	d8stg_fdivs,
+	div_sign_out,
+	div_exp_outa,
+	div_frac_outa,
+
+	se,
+	si,
+	so
+);
+
+
+input [7:0]	inq_op;			// request opcode to op pipes
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input [63:0]	inq_in1;		// request operand 1 to op pipes
+input		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1 exp==0
+input		inq_in1_exp_neq_ffs;	// request operand 1 exp!=0xff's
+input [63:0]	inq_in2;		// request operand 2 to op pipes
+input		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2 exp==0
+input		inq_in2_exp_neq_ffs;	// request operand 2 exp!=0xff's
+input		inq_div;		// divide pipe request
+input		div_dest_rdy;		// divide result req accepted for CPX
+input		fdiv_clken_l;           // fdiv clock enable for div_frac_dp
+input		fdiv_clken_l_div_exp_buf1;           // fdiv clock enable for div_exp_dp
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;			// global clock
+
+output		div_pipe_active;        // div pipe is executing a valid instr
+output		d1stg_step;		// divide pipe load
+output		d8stg_fdiv_in;		// div pipe output request next cycle
+output [9:0]	div_id_out_in;		// div pipe output ID next cycle
+output [4:0]	div_exc_out;		// divide pipe result- exception flags
+output		d8stg_fdivd;		// divide double- divide stage 8
+output		d8stg_fdivs;		// divide single- divide stage 8
+output		div_sign_out;		// divide sign output
+output [10:0]	div_exp_outa;		// divide exponent output
+output [51:0]	div_frac_outa;		// divide fraction output
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_div_ctl.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		d1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+wire		d1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+wire		d1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+wire		d1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+wire		d1stg_step;		// divide pipe load
+wire		d1stg_dblop;		// double precision operation- d1 stg
+wire		d234stg_fdiv;		// select line to div_expadd1
+wire		d3stg_fdiv;		// divide operation- divide stage 3
+wire		d4stg_fdiv;		// divide operation- divide stage 4
+wire		d5stg_fdiva;		// divide operation- divide stage 5
+wire		d5stg_fdivb;		// divide operation- divide stage 5
+wire		d5stg_fdivs;		// divide single- divide stage 5
+wire		d5stg_fdivd;		// divide double- divide stage 5
+wire		d6stg_fdiv;		// divide operation- divide stage 6
+wire		d6stg_fdivs;		// divide single- divide stage 6
+wire		d6stg_fdivd;		// divide double- divide stage 6
+wire		d7stg_fdiv;		// divide operation- divide stage 7
+wire		d7stg_fdivd;		// divide double- divide stage 7
+wire		d8stg_fdiv_in;		// div pipe output request next cycle
+wire		d8stg_fdivs;		// divide single- divide stage 8
+wire		d8stg_fdivd;		// divide double- divide stage 8
+wire [9:0]	div_id_out_in;		// div pipe output ID next cycle
+wire		div_sign_out;		// divide sign output
+wire [4:0]	div_exc_out;		// divide pipe result- exception flags
+wire		div_norm_frac_in1_dbl_norm; // select line to div_norm
+wire		div_norm_frac_in1_dbl_dnrm; // select line to div_norm
+wire		div_norm_frac_in1_sng_norm; // select line to div_norm
+wire		div_norm_frac_in1_sng_dnrm; // select line to div_norm
+wire		div_norm_frac_in2_dbl_norm; // select line to div_norm
+wire		div_norm_frac_in2_dbl_dnrm; // select line to div_norm
+wire		div_norm_frac_in2_sng_norm; // select line to div_norm
+wire		div_norm_frac_in2_sng_dnrm; // select line to div_norm
+wire		div_norm_inf;		// select line to div_norm
+wire		div_norm_qnan;		// select line to div_norm
+wire		div_norm_zero;		// select line to div_norm
+wire		div_frac_add_in2_load;	// load enable to div_frac_add_in2
+wire		d6stg_frac_out_shl1;	// select line to d6stg_frac
+wire		d6stg_frac_out_nosh;	// select line to d6stg_frac
+wire		div_frac_add_in1_add;	// select line to div_frac_add_in1
+wire		div_frac_add_in1_load;	// load enable to div_frac_add_in1
+wire		d7stg_rndup_inv;	// no rounding increment
+wire		d7stg_to_0;		// result to max finite on overflow
+wire		d7stg_to_0_inv;		// result to infinity on overflow
+wire		div_frac_out_add_in1;	// select line to div_frac_out
+wire		div_frac_out_add;	// select line to div_frac_out
+wire		div_frac_out_shl1_dbl;	// select line to div_frac_out
+wire		div_frac_out_shl1_sng;	// select line to div_frac_out
+wire		div_frac_out_of;	// select line to div_frac_out
+wire		div_frac_out_load;	// load enable to div_frac_out
+wire		div_expadd1_in1_dbl;	// select line to div_expadd1
+wire		div_expadd1_in1_sng;	// select line to div_expadd1
+wire		div_expadd1_in2_exp_in2_dbl; // select line to div_expadd1
+wire		div_expadd1_in2_exp_in2_sng; //select line to div_expadd1
+wire		div_exp1_expadd1;	// select line to div_exp1
+wire		div_exp1_0835;		// select line to div_exp1
+wire		div_exp1_0118;		// select line to div_exp1
+wire		div_exp1_zero;		// select line to div_exp1
+wire		div_exp1_load;		// load enable to div_exp1
+wire		div_expadd2_in1_exp_out; // select line to div_expadd2
+wire		div_expadd2_no_decr_inv; // no exponent decrement
+wire		div_expadd2_cin;	// carry in to 2nd exponent adder
+wire		div_exp_out_expadd22_inv; // select line to div_exp_out
+wire		div_exp_out_expadd2;	// select line to div_exp_out
+wire		div_exp_out_of;		// overflow to exponent output
+wire		div_exp_out_exp_out;	// select line to div_exp_out
+wire		div_exp_out_load;	// load enable to div_exp_out
+wire		div_pipe_active;        // div pipe is executing a valid instr
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_div_exp_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [12:0]	div_exp1;		// divide exponent- intermediate value
+wire [12:12]	div_expadd2;		// divide exponent- 2nd adder output
+wire [12:0]	div_exp_out;		// divide exponent output- fpu_div
+wire [10:0]	div_exp_outa;		// divide exponent output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_div_frac_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [5:0]	div_shl_cnt;		// divide left shift amount
+wire		d6stg_frac_0;		// divide fraction[0]- intermediate val
+wire		d6stg_frac_1;		// divide fraction[1]- intermediate val
+wire		d6stg_frac_2;		// divide fraction[2]- intermediate val
+wire		d6stg_frac_29;		// divide fraction[29]- intermediate val
+wire		d6stg_frac_30;		// divide fraction[30]- intermediate val
+wire		d6stg_frac_31;		// divide fraction[31]- intermediate val
+wire		div_frac_add_in1_neq_0;	// div_frac_add_in1 != 0
+wire		div_frac_add_52_inv;	// div_frac_add bit[52] inverted
+wire		div_frac_add_52_inva;	// div_frac_add bit[52] inverted copy
+wire [54:53]	div_frac_out;		// divide fraction output- fpu_div
+wire [51:0]	div_frac_outa;		// divide fraction output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_div_ctl fpu_div_ctl (
+	.inq_in1_51			(inq_in1[51]),
+	.inq_in1_54			(inq_in1[54]),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs),
+	.inq_in2_51			(inq_in2[51]),
+	.inq_in2_54			(inq_in2[54]),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs),
+	.inq_op				(inq_op[7:0]),
+	.div_exp1			(div_exp1[12:0]),
+	.div_dest_rdy			(div_dest_rdy),
+	.inq_rnd_mode			(inq_rnd_mode[1:0]),
+	.inq_id				(inq_id[4:0]),
+	.inq_in1_63			(inq_in1[63]),
+	.inq_in2_63			(inq_in2[63]),
+	.inq_div			(inq_div),
+	.div_exp_out			(div_exp_out[12:0]),
+	.div_frac_add_52_inva		(div_frac_add_52_inva),
+	.div_frac_add_in1_neq_0		(div_frac_add_in1_neq_0),
+	.div_frac_out_54		(div_frac_out[54]),
+	.d6stg_frac_0			(d6stg_frac_0),
+	.d6stg_frac_1			(d6stg_frac_1),
+	.d6stg_frac_2			(d6stg_frac_2),
+	.d6stg_frac_29			(d6stg_frac_29),
+	.d6stg_frac_30			(d6stg_frac_30),
+	.d6stg_frac_31			(d6stg_frac_31),
+	.div_frac_out_53		(div_frac_out[53]),
+	.div_expadd2_12			(div_expadd2[12]),
+	.arst_l				(arst_l),
+	.grst_l				(grst_l),
+	.rclk			(rclk),
+
+	.div_pipe_active		(div_pipe_active),
+	.d1stg_snan_sng_in1		(d1stg_snan_sng_in1),
+	.d1stg_snan_dbl_in1		(d1stg_snan_dbl_in1),
+	.d1stg_snan_sng_in2		(d1stg_snan_sng_in2),
+	.d1stg_snan_dbl_in2		(d1stg_snan_dbl_in2),
+	.d1stg_step			(d1stg_step),
+	.d1stg_dblop			(d1stg_dblop),
+	.d234stg_fdiv			(d234stg_fdiv),
+	.d3stg_fdiv			(d3stg_fdiv),
+	.d4stg_fdiv			(d4stg_fdiv),
+	.d5stg_fdiva			(d5stg_fdiva),
+	.d5stg_fdivb			(d5stg_fdivb),
+	.d5stg_fdivs			(d5stg_fdivs),
+	.d5stg_fdivd			(d5stg_fdivd),
+	.d6stg_fdiv			(d6stg_fdiv),
+	.d6stg_fdivs			(d6stg_fdivs),
+	.d6stg_fdivd			(d6stg_fdivd),
+	.d7stg_fdiv			(d7stg_fdiv),
+	.d7stg_fdivd			(d7stg_fdivd),
+	.d8stg_fdiv_in			(d8stg_fdiv_in),
+	.d8stg_fdivs			(d8stg_fdivs),
+	.d8stg_fdivd			(d8stg_fdivd),
+	.div_id_out_in			(div_id_out_in[9:0]),
+	.div_sign_out			(div_sign_out),
+	.div_exc_out			(div_exc_out[4:0]),
+	.div_norm_frac_in1_dbl_norm	(div_norm_frac_in1_dbl_norm),
+	.div_norm_frac_in1_dbl_dnrm	(div_norm_frac_in1_dbl_dnrm),
+	.div_norm_frac_in1_sng_norm	(div_norm_frac_in1_sng_norm),
+	.div_norm_frac_in1_sng_dnrm	(div_norm_frac_in1_sng_dnrm),
+	.div_norm_frac_in2_dbl_norm	(div_norm_frac_in2_dbl_norm),
+	.div_norm_frac_in2_dbl_dnrm	(div_norm_frac_in2_dbl_dnrm),
+	.div_norm_frac_in2_sng_norm	(div_norm_frac_in2_sng_norm),
+	.div_norm_frac_in2_sng_dnrm	(div_norm_frac_in2_sng_dnrm),
+	.div_norm_inf			(div_norm_inf),
+	.div_norm_qnan			(div_norm_qnan),
+	.div_norm_zero			(div_norm_zero),
+	.div_frac_add_in2_load		(div_frac_add_in2_load),
+	.d6stg_frac_out_shl1		(d6stg_frac_out_shl1),
+	.d6stg_frac_out_nosh		(d6stg_frac_out_nosh),
+	.div_frac_add_in1_add		(div_frac_add_in1_add),
+	.div_frac_add_in1_load		(div_frac_add_in1_load),
+	.d7stg_rndup_inv		(d7stg_rndup_inv),
+	.d7stg_to_0			(d7stg_to_0),
+	.d7stg_to_0_inv			(d7stg_to_0_inv),
+	.div_frac_out_add_in1		(div_frac_out_add_in1),
+	.div_frac_out_add		(div_frac_out_add),
+	.div_frac_out_shl1_dbl		(div_frac_out_shl1_dbl),
+	.div_frac_out_shl1_sng		(div_frac_out_shl1_sng),
+	.div_frac_out_of		(div_frac_out_of),
+	.div_frac_out_load		(div_frac_out_load),
+	.div_expadd1_in1_dbl		(div_expadd1_in1_dbl),
+	.div_expadd1_in1_sng		(div_expadd1_in1_sng),
+	.div_expadd1_in2_exp_in2_dbl	(div_expadd1_in2_exp_in2_dbl),
+	.div_expadd1_in2_exp_in2_sng	(div_expadd1_in2_exp_in2_sng),
+	.div_exp1_expadd1		(div_exp1_expadd1),
+	.div_exp1_0835			(div_exp1_0835),
+	.div_exp1_0118			(div_exp1_0118),
+	.div_exp1_zero			(div_exp1_zero),
+	.div_exp1_load			(div_exp1_load),
+	.div_expadd2_in1_exp_out	(div_expadd2_in1_exp_out),
+	.div_expadd2_no_decr_inv	(div_expadd2_no_decr_inv),
+	.div_expadd2_cin		(div_expadd2_cin),
+	.div_exp_out_expadd22_inv	(div_exp_out_expadd22_inv),
+	.div_exp_out_expadd2		(div_exp_out_expadd2),
+	.div_exp_out_of			(div_exp_out_of),
+	.div_exp_out_exp_out		(div_exp_out_exp_out),
+	.div_exp_out_load		(div_exp_out_load),
+
+	.se                             (se),
+        .si                             (si),
+        .so                             (scan_out_fpu_div_ctl)
+);
+
+
+fpu_div_exp_dp fpu_div_exp_dp (
+	.inq_in1			(inq_in1[62:52]),
+	.inq_in2			(inq_in2[62:52]),
+	.d1stg_step			(d1stg_step),
+	.d234stg_fdiv			(d234stg_fdiv),
+	.div_expadd1_in1_dbl		(div_expadd1_in1_dbl),
+	.div_expadd1_in1_sng		(div_expadd1_in1_sng),
+	.div_expadd1_in2_exp_in2_dbl	(div_expadd1_in2_exp_in2_dbl),
+	.div_expadd1_in2_exp_in2_sng	(div_expadd1_in2_exp_in2_sng),
+	.d3stg_fdiv			(d3stg_fdiv),
+	.d4stg_fdiv			(d4stg_fdiv),
+	.div_shl_cnt			(div_shl_cnt[5:0]),
+	.div_exp1_expadd1		(div_exp1_expadd1),
+	.div_exp1_0835			(div_exp1_0835),
+	.div_exp1_0118			(div_exp1_0118),
+	.div_exp1_zero			(div_exp1_zero),
+	.div_exp1_load			(div_exp1_load),
+	.div_expadd2_in1_exp_out	(div_expadd2_in1_exp_out),
+	.d5stg_fdiva			(d5stg_fdiva),
+	.d5stg_fdivd			(d5stg_fdivd),
+	.d5stg_fdivs			(d5stg_fdivs),
+	.d6stg_fdiv			(d6stg_fdiv),
+	.d7stg_fdiv			(d7stg_fdiv),
+	.div_expadd2_no_decr_inv	(div_expadd2_no_decr_inv),
+	.div_expadd2_cin		(div_expadd2_cin),
+	.div_exp_out_expadd2		(div_exp_out_expadd2),
+	.div_exp_out_expadd22_inv	(div_exp_out_expadd22_inv),
+	.div_exp_out_of			(div_exp_out_of),
+	.d7stg_to_0_inv			(d7stg_to_0_inv),
+	.d7stg_fdivd			(d7stg_fdivd),
+	.div_exp_out_exp_out		(div_exp_out_exp_out),
+	.d7stg_rndup_inv		(d7stg_rndup_inv),
+	.div_frac_add_52_inv		(div_frac_add_52_inv),
+	.div_exp_out_load		(div_exp_out_load),
+	.fdiv_clken_l			(fdiv_clken_l_div_exp_buf1),
+	.rclk			(rclk),
+
+	.div_exp1			(div_exp1[12:0]),
+	.div_expadd2_12			(div_expadd2[12]),
+	.div_exp_out			(div_exp_out[12:0]),
+	.div_exp_outa			(div_exp_outa[10:0]),
+
+	.se                             (se),
+        .si                             (scan_out_fpu_div_ctl),
+        .so                             (scan_out_fpu_div_exp_dp)
+);
+
+
+fpu_div_frac_dp fpu_div_frac_dp (
+	.inq_in1			(inq_in1[54:0]),
+	.inq_in2			(inq_in2[54:0]),
+	.d1stg_step			(d1stg_step),
+	.div_norm_frac_in1_dbl_norm	(div_norm_frac_in1_dbl_norm),
+	.div_norm_frac_in1_dbl_dnrm	(div_norm_frac_in1_dbl_dnrm),
+	.div_norm_frac_in1_sng_norm	(div_norm_frac_in1_sng_norm),
+	.div_norm_frac_in1_sng_dnrm	(div_norm_frac_in1_sng_dnrm),
+	.div_norm_frac_in2_dbl_norm	(div_norm_frac_in2_dbl_norm),
+	.div_norm_frac_in2_dbl_dnrm	(div_norm_frac_in2_dbl_dnrm),
+	.div_norm_frac_in2_sng_norm	(div_norm_frac_in2_sng_norm),
+	.div_norm_frac_in2_sng_dnrm	(div_norm_frac_in2_sng_dnrm),
+	.div_norm_inf			(div_norm_inf),
+	.div_norm_qnan			(div_norm_qnan),
+	.d1stg_dblop			(d1stg_dblop),
+	.div_norm_zero			(div_norm_zero),
+	.d1stg_snan_dbl_in1		(d1stg_snan_dbl_in1),
+	.d1stg_snan_sng_in1		(d1stg_snan_sng_in1),
+	.d1stg_snan_dbl_in2		(d1stg_snan_dbl_in2),
+	.d1stg_snan_sng_in2		(d1stg_snan_sng_in2),
+	.d3stg_fdiv			(d3stg_fdiv),
+	.d6stg_fdiv			(d6stg_fdiv),
+	.d6stg_fdivd			(d6stg_fdivd),
+	.d6stg_fdivs			(d6stg_fdivs),
+	.div_frac_add_in2_load		(div_frac_add_in2_load),
+	.d6stg_frac_out_shl1		(d6stg_frac_out_shl1),
+	.d6stg_frac_out_nosh		(d6stg_frac_out_nosh),
+	.d4stg_fdiv			(d4stg_fdiv),
+	.div_frac_add_in1_add		(div_frac_add_in1_add),
+	.div_frac_add_in1_load		(div_frac_add_in1_load),
+	.d5stg_fdivb			(d5stg_fdivb),
+	.div_frac_out_add_in1		(div_frac_out_add_in1),
+	.div_frac_out_add		(div_frac_out_add),
+	.div_frac_out_shl1_dbl		(div_frac_out_shl1_dbl),
+	.div_frac_out_shl1_sng		(div_frac_out_shl1_sng),
+	.div_frac_out_of		(div_frac_out_of),
+	.d7stg_to_0			(d7stg_to_0),
+	.div_frac_out_load		(div_frac_out_load),
+	.fdiv_clken_l			(fdiv_clken_l),
+	.rclk			(rclk),
+
+	.div_shl_cnt			(div_shl_cnt[5:0]),
+	.d6stg_frac_0			(d6stg_frac_0),
+	.d6stg_frac_1			(d6stg_frac_1),
+	.d6stg_frac_2			(d6stg_frac_2),
+	.d6stg_frac_29			(d6stg_frac_29),
+	.d6stg_frac_30			(d6stg_frac_30),
+	.d6stg_frac_31			(d6stg_frac_31),
+	.div_frac_add_in1_neq_0		(div_frac_add_in1_neq_0),
+	.div_frac_add_52_inv		(div_frac_add_52_inv),
+	.div_frac_add_52_inva		(div_frac_add_52_inva),
+	.div_frac_out_54_53      	(div_frac_out[54:53]),
+	.div_frac_outa			(div_frac_outa[51:0]),
+
+	.se                             (se),
+        .si                             (scan_out_fpu_div_exp_dp),
+        .so                             (so)
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in2_gt_in1_frac.v
===================================================================
--- /trunk/T1-FPU/fpu_in2_gt_in1_frac.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in2_gt_in1_frac.v	(revision 6)
@@ -0,0 +1,379 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in2_gt_in1_frac.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Fraction comparison of two inputs that can have any value.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in2_gt_in1_frac (
+	din1,
+	din2,
+	sngop,
+	expadd11,
+	expeq,
+
+	din2_neq_din1,
+	din2_gt_din1,
+	din2_gt1_din1
+);
+
+
+input [54:0]	din1;			// input 1- fraction
+input [54:0]	din2;			// input 2- fraction
+input		sngop;			// single precision inputs
+input		expadd11;		// exponent sign bit
+input		expeq;			// exponent are equal
+
+output		din2_neq_din1;		// input 2 != input 1- fraction
+output		din2_gt_din1;		// input 2 > input 1- fraction
+output		din2_gt1_din1;		// input 2 > input 1
+
+
+wire		din2_neq_din1_54_52;
+wire		din2_gt_din1_54_52;
+wire		din2_neq_din1_51_50;
+wire		din2_gt_din1_51_50;
+wire		din2_neq_din1_49_48;
+wire		din2_gt_din1_49_48;
+wire		din2_neq_din1_47_45;
+wire		din2_gt_din1_47_45;
+wire		din2_neq_din1_44_42;
+wire		din2_gt_din1_44_42;
+wire		din2_neq_din1_41_39;
+wire		din2_gt_din1_41_39;
+wire		din2_neq_din1_38_36;
+wire		din2_gt_din1_38_36;
+wire		din2_neq_din1_35_33;
+wire		din2_gt_din1_35_33;
+wire		din2_neq_din1_32_30;
+wire		din2_gt_din1_32_30;
+wire		din2_neq_din1_29_27;
+wire		din2_gt_din1_29_27;
+wire		din2_neq_din1_26_24;
+wire		din2_gt_din1_26_24;
+wire		din2_neq_din1_23_21;
+wire		din2_gt_din1_23_21;
+wire		din2_neq_din1_20_18;
+wire		din2_gt_din1_20_18;
+wire		din2_neq_din1_17_15;
+wire		din2_gt_din1_17_15;
+wire		din2_neq_din1_14_12;
+wire		din2_gt_din1_14_12;
+wire		din2_neq_din1_11_9;
+wire		din2_gt_din1_11_9;
+wire		din2_neq_din1_8_6;
+wire		din2_gt_din1_8_6;
+wire		din2_neq_din1_5_3;
+wire		din2_gt_din1_5_3;
+wire		din2_neq_din1_2_0;
+wire		din2_gt_din1_2_0;
+wire		din2_neq_din1_51_45;
+wire		din2_gt_din1_51_45;
+wire		din2_neq_din1_44_36;
+wire		din2_gt_din1_44_36;
+wire		din2_neq_din1_35_27;
+wire		din2_gt_din1_35_27;
+wire		din2_neq_din1_26_18;
+wire		din2_gt_din1_26_18;
+wire		din2_neq_din1_17_9;
+wire		din2_gt_din1_17_9;
+wire		din2_neq_din1_8_0;
+wire		din2_gt_din1_8_0;
+wire		din2_neq_din1_51_27;
+wire		din2_gt_din1_51_27;
+wire		din2_neq_din1_26_0;
+wire		din2_gt_din1_26_0;
+wire		din2_neq_din1;
+wire		din2_gt_din1;
+wire		din2_gt1_din1;
+
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_54_52 (
+	.din1			(din1[54:52]),
+	.din2			(din2[54:52]),
+
+	.din2_neq_din1		(din2_neq_din1_54_52),
+	.din2_gt_din1		(din2_gt_din1_54_52)
+);
+
+fpu_in2_gt_in1_2b fpu_in2_gt_in1_51_50 (
+	.din1			(din1[51:50]),
+	.din2			(din2[51:50]),
+
+	.din2_neq_din1		(din2_neq_din1_51_50),
+	.din2_gt_din1		(din2_gt_din1_51_50)
+);
+
+fpu_in2_gt_in1_2b fpu_in2_gt_in1_49_48 (
+        .din1                   (din1[49:48]),
+        .din2                   (din2[49:48]),
+
+        .din2_neq_din1          (din2_neq_din1_49_48),
+        .din2_gt_din1           (din2_gt_din1_49_48)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_47_45 (
+        .din1                   (din1[47:45]),
+        .din2                   (din2[47:45]),
+
+        .din2_neq_din1          (din2_neq_din1_47_45),
+        .din2_gt_din1           (din2_gt_din1_47_45)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_44_42 (
+        .din1                   (din1[44:42]),
+        .din2                   (din2[44:42]),
+
+        .din2_neq_din1          (din2_neq_din1_44_42),
+        .din2_gt_din1           (din2_gt_din1_44_42)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_41_39 (
+        .din1                   (din1[41:39]),
+        .din2                   (din2[41:39]),
+
+        .din2_neq_din1          (din2_neq_din1_41_39),
+        .din2_gt_din1           (din2_gt_din1_41_39)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_38_36 (
+        .din1                   (din1[38:36]),
+        .din2                   (din2[38:36]),
+
+        .din2_neq_din1          (din2_neq_din1_38_36),
+        .din2_gt_din1           (din2_gt_din1_38_36)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_35_33 (
+        .din1                   (din1[35:33]),
+        .din2                   (din2[35:33]),
+
+        .din2_neq_din1          (din2_neq_din1_35_33),
+        .din2_gt_din1           (din2_gt_din1_35_33)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_32_30 (
+        .din1                   (din1[32:30]),
+        .din2                   (din2[32:30]),
+
+        .din2_neq_din1          (din2_neq_din1_32_30),
+        .din2_gt_din1           (din2_gt_din1_32_30)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_29_27 (
+        .din1                   (din1[29:27]),
+        .din2                   (din2[29:27]),
+
+        .din2_neq_din1          (din2_neq_din1_29_27),
+        .din2_gt_din1           (din2_gt_din1_29_27)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_26_24 (
+        .din1                   (din1[26:24]),
+        .din2                   (din2[26:24]),
+
+        .din2_neq_din1          (din2_neq_din1_26_24),
+        .din2_gt_din1           (din2_gt_din1_26_24)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_23_21 (
+        .din1                   (din1[23:21]),
+        .din2                   (din2[23:21]),
+
+        .din2_neq_din1          (din2_neq_din1_23_21),
+        .din2_gt_din1           (din2_gt_din1_23_21)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_20_18 (
+        .din1                   (din1[20:18]),
+        .din2                   (din2[20:18]),
+
+        .din2_neq_din1          (din2_neq_din1_20_18),
+        .din2_gt_din1           (din2_gt_din1_20_18)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_17_15 (
+        .din1                   (din1[17:15]),
+        .din2                   (din2[17:15]),
+
+        .din2_neq_din1          (din2_neq_din1_17_15),
+        .din2_gt_din1           (din2_gt_din1_17_15)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_14_12 (
+        .din1                   (din1[14:12]),
+        .din2                   (din2[14:12]),
+
+        .din2_neq_din1          (din2_neq_din1_14_12),
+        .din2_gt_din1           (din2_gt_din1_14_12)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_11_9 (
+        .din1                   (din1[11:9]),
+        .din2                   (din2[11:9]),
+
+        .din2_neq_din1          (din2_neq_din1_11_9),
+        .din2_gt_din1           (din2_gt_din1_11_9)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_8_6 (
+        .din1                   (din1[8:6]),
+        .din2                   (din2[8:6]),
+
+        .din2_neq_din1          (din2_neq_din1_8_6),
+        .din2_gt_din1           (din2_gt_din1_8_6)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_5_3 (
+        .din1                   (din1[5:3]),
+        .din2                   (din2[5:3]),
+
+        .din2_neq_din1          (din2_neq_din1_5_3),
+        .din2_gt_din1           (din2_gt_din1_5_3)
+);
+
+fpu_in2_gt_in1_3b fpu_in2_gt_in1_2_0 (
+        .din1                   (din1[2:0]),
+        .din2                   (din2[2:0]),
+
+        .din2_neq_din1          (din2_neq_din1_2_0),
+        .din2_gt_din1           (din2_gt_din1_2_0)
+);
+
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_51_45 (
+	.din2_neq_din1_hi	(din2_neq_din1_51_50),
+	.din2_gt_din1_hi	(din2_gt_din1_51_50),
+	.din2_neq_din1_mid	(din2_neq_din1_49_48),
+	.din2_gt_din1_mid	(din2_gt_din1_49_48),
+	.din2_neq_din1_lo	(din2_neq_din1_47_45),
+	.din2_gt_din1_lo	(din2_gt_din1_47_45),
+
+	.din2_neq_din1		(din2_neq_din1_51_45),
+	.din2_gt_din1		(din2_gt_din1_51_45)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_44_36 (
+        .din2_neq_din1_hi       (din2_neq_din1_44_42),
+        .din2_gt_din1_hi        (din2_gt_din1_44_42),
+        .din2_neq_din1_mid      (din2_neq_din1_41_39),
+        .din2_gt_din1_mid       (din2_gt_din1_41_39),
+        .din2_neq_din1_lo       (din2_neq_din1_38_36),
+        .din2_gt_din1_lo        (din2_gt_din1_38_36),
+
+        .din2_neq_din1          (din2_neq_din1_44_36),
+	.din2_gt_din1           (din2_gt_din1_44_36)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_35_27 (
+        .din2_neq_din1_hi       (din2_neq_din1_35_33),
+        .din2_gt_din1_hi        (din2_gt_din1_35_33),
+        .din2_neq_din1_mid      (din2_neq_din1_32_30),
+        .din2_gt_din1_mid       (din2_gt_din1_32_30),
+        .din2_neq_din1_lo       (din2_neq_din1_29_27),
+        .din2_gt_din1_lo        (din2_gt_din1_29_27),
+
+        .din2_neq_din1          (din2_neq_din1_35_27),
+        .din2_gt_din1           (din2_gt_din1_35_27)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_26_18 (
+        .din2_neq_din1_hi       (din2_neq_din1_26_24),
+        .din2_gt_din1_hi        (din2_gt_din1_26_24),
+        .din2_neq_din1_mid      (din2_neq_din1_23_21),
+        .din2_gt_din1_mid       (din2_gt_din1_23_21),
+        .din2_neq_din1_lo       (din2_neq_din1_20_18),
+        .din2_gt_din1_lo        (din2_gt_din1_20_18),
+
+        .din2_neq_din1          (din2_neq_din1_26_18),
+        .din2_gt_din1           (din2_gt_din1_26_18)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_17_9 (
+        .din2_neq_din1_hi       (din2_neq_din1_17_15),
+        .din2_gt_din1_hi        (din2_gt_din1_17_15),
+        .din2_neq_din1_mid      (din2_neq_din1_14_12),
+        .din2_gt_din1_mid       (din2_gt_din1_14_12),
+        .din2_neq_din1_lo       (din2_neq_din1_11_9),
+        .din2_gt_din1_lo        (din2_gt_din1_11_9),
+
+        .din2_neq_din1          (din2_neq_din1_17_9),
+        .din2_gt_din1           (din2_gt_din1_17_9)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_8_0 (
+        .din2_neq_din1_hi       (din2_neq_din1_8_6),
+        .din2_gt_din1_hi        (din2_gt_din1_8_6),
+        .din2_neq_din1_mid      (din2_neq_din1_5_3),
+        .din2_gt_din1_mid       (din2_gt_din1_5_3),
+        .din2_neq_din1_lo       (din2_neq_din1_2_0),
+        .din2_gt_din1_lo        (din2_gt_din1_2_0),
+
+        .din2_neq_din1          (din2_neq_din1_8_0),
+        .din2_gt_din1           (din2_gt_din1_8_0)
+);
+
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_51_27 (
+	.din2_neq_din1_hi       (din2_neq_din1_51_45),
+	.din2_gt_din1_hi	(din2_gt_din1_51_45),
+	.din2_neq_din1_mid      (din2_neq_din1_44_36),
+	.din2_gt_din1_mid       (din2_gt_din1_44_36),
+	.din2_neq_din1_lo       (din2_neq_din1_35_27),
+	.din2_gt_din1_lo        (din2_gt_din1_35_27),
+
+	.din2_neq_din1          (din2_neq_din1_51_27),
+	.din2_gt_din1           (din2_gt_din1_51_27)
+);
+
+fpu_in2_gt_in1_3to1 fpu_in2_gt_in1_26_0 (
+	.din2_neq_din1_hi       (din2_neq_din1_26_18),
+	.din2_gt_din1_hi        (din2_gt_din1_26_18),
+	.din2_neq_din1_mid      (din2_neq_din1_17_9),
+	.din2_gt_din1_mid       (din2_gt_din1_17_9),
+	.din2_neq_din1_lo       (din2_neq_din1_8_0),
+	.din2_gt_din1_lo        (din2_gt_din1_8_0),
+
+	.din2_neq_din1          (din2_neq_din1_26_0),
+	.din2_gt_din1           (din2_gt_din1_26_0)
+);
+
+
+assign din2_neq_din1= din2_neq_din1_51_27
+		|| din2_neq_din1_26_0
+		|| (din2_neq_din1_54_52 && sngop);
+
+assign din2_gt_din1= (din2_neq_din1_54_52 && din2_gt_din1_54_52
+			&& sngop)
+		|| ((!(din2_neq_din1_54_52 && sngop))
+			&& din2_neq_din1_51_27 && din2_gt_din1_51_27)
+		|| ((!(din2_neq_din1_54_52 && sngop))
+			&& (!din2_neq_din1_51_27)
+			&& din2_gt_din1_26_0);
+
+assign din2_gt1_din1= expadd11
+		|| (din2_gt_din1 && expeq);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_div_ctl.v
===================================================================
--- /trunk/T1-FPU/fpu_div_ctl.v	(revision 6)
+++ /trunk/T1-FPU/fpu_div_ctl.v	(revision 6)
@@ -0,0 +1,2151 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_div_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide pipeline synthesizable logic
+//              - special input cases
+//              - opcode pipeline
+//              - sign logic
+//              - exception logic
+//              - datapath control- select lines and control logic
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_div_ctl (
+	inq_in1_51,
+	inq_in1_54,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2_51,
+	inq_in2_54,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_op,
+	div_exp1,
+	div_dest_rdy,
+	inq_rnd_mode,
+	inq_id,
+	inq_in1_63,
+	inq_in2_63,
+	inq_div,
+	div_exp_out,
+	div_frac_add_52_inva,
+	div_frac_add_in1_neq_0,
+	div_frac_out_54,
+	d6stg_frac_0,
+	d6stg_frac_1,
+	d6stg_frac_2,
+	d6stg_frac_29,
+	d6stg_frac_30,
+	d6stg_frac_31,
+	div_frac_out_53,
+	div_expadd2_12,
+	arst_l,
+	grst_l,
+	rclk,
+
+	div_pipe_active,	
+	d1stg_snan_sng_in1,
+	d1stg_snan_dbl_in1,
+	d1stg_snan_sng_in2,
+	d1stg_snan_dbl_in2,
+	d1stg_step,
+	d1stg_dblop,
+	d234stg_fdiv,
+	d3stg_fdiv,
+	d4stg_fdiv,
+	d5stg_fdiva,
+	d5stg_fdivb,
+	d5stg_fdivs,
+	d5stg_fdivd,
+	d6stg_fdiv,
+	d6stg_fdivs,
+	d6stg_fdivd,
+	d7stg_fdiv,
+	d7stg_fdivd,
+	d8stg_fdiv_in,
+	d8stg_fdivs,
+	d8stg_fdivd,
+	div_id_out_in,
+	div_sign_out,
+	div_exc_out,
+	div_norm_frac_in1_dbl_norm,
+	div_norm_frac_in1_dbl_dnrm,
+	div_norm_frac_in1_sng_norm,
+	div_norm_frac_in1_sng_dnrm,
+	div_norm_frac_in2_dbl_norm,
+	div_norm_frac_in2_dbl_dnrm,
+	div_norm_frac_in2_sng_norm,
+	div_norm_frac_in2_sng_dnrm,
+	div_norm_inf,
+	div_norm_qnan,
+	div_norm_zero,
+	div_frac_add_in2_load,
+	d6stg_frac_out_shl1,
+	d6stg_frac_out_nosh,
+	div_frac_add_in1_add,
+	div_frac_add_in1_load,
+	d7stg_rndup_inv,
+	d7stg_to_0,
+	d7stg_to_0_inv,
+	div_frac_out_add_in1,
+	div_frac_out_add,
+	div_frac_out_shl1_dbl,
+	div_frac_out_shl1_sng,
+	div_frac_out_of,
+	div_frac_out_load,
+	div_expadd1_in1_dbl,
+	div_expadd1_in1_sng,
+	div_expadd1_in2_exp_in2_dbl,
+	div_expadd1_in2_exp_in2_sng,
+	div_exp1_expadd1,
+	div_exp1_0835,
+	div_exp1_0118,
+	div_exp1_zero,
+	div_exp1_load,
+	div_expadd2_in1_exp_out,
+	div_expadd2_no_decr_inv,
+	div_expadd2_cin,
+	div_exp_out_expadd22_inv,
+	div_exp_out_expadd2,
+	div_exp_out_of,
+	div_exp_out_exp_out,
+	div_exp_out_load,
+
+	se,
+	si,
+	so
+);
+
+
+parameter
+		FDIVS=  8'h4d,
+		FDIVD=	8'h4e;
+
+
+input		inq_in1_51;		// request operand 1[51]
+input		inq_in1_54;		// request operand 1[54]
+input		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1[62:52]==0
+input		inq_in1_exp_neq_ffs;	// request operand 1[62:52]!=0x7ff
+input		inq_in2_51;		// request operand 2[51]
+input		inq_in2_54;		// request operand 2[54]
+input		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2[62:52]==0
+input		inq_in2_exp_neq_ffs;	// request operand 2[62:52]!=0x7ff
+input [7:0]	inq_op;			// request opcode to op pipes
+input [12:0]	div_exp1;		// divide exponent- intermediate value
+input		div_dest_rdy;		// divide result req accepted for CPX
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input		inq_in1_63;		// request operand 1 to op pipes- sign
+input		inq_in2_63;		// request operand 2 to op pipes- sign
+input		inq_div;		// divide pipe request
+input [12:0]	div_exp_out;		// divide exponent output
+input		div_frac_add_52_inva;	// div_frac_add bit[52] inverted
+input		div_frac_add_in1_neq_0;	// div_frac_add_in1 != 0
+input		div_frac_out_54;	// div_frac_out bit[54]
+input		d6stg_frac_0;		// divide fraction[0]- intermediate val
+input		d6stg_frac_1;		// divide fraction[1]- intermediate val
+input		d6stg_frac_2;		// divide fraction[2]- intermediate val
+input		d6stg_frac_29;		// divide fraction[29]- intermediate val
+input		d6stg_frac_30;		// divide fraction[30]- intermediate val
+input		d6stg_frac_31;		// divide fraction[31]- intermediate val
+input		div_frac_out_53;	// div_frac_out bit[53]
+input		div_expadd2_12;		// div_expadd2 bit[12]
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;		// global clock
+
+output		div_pipe_active;        // div pipe is executing a valid instr
+output		d1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+output		d1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+output		d1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+output		d1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+output		d1stg_step;		// divide pipe load
+output		d1stg_dblop;		// double precision operation- d1 stg
+output		d234stg_fdiv;		// select line to div_expadd1
+output		d3stg_fdiv;		// divide operation- divide stage 3
+output		d4stg_fdiv;		// divide operation- divide stage 4
+output		d5stg_fdiva;		// divide operation- divide stage 5
+output		d5stg_fdivb;		// divide operation- divide stage 5
+output		d5stg_fdivs;		// divide single- divide stage 5
+output		d5stg_fdivd;		// divide double- divide stage 5
+output		d6stg_fdiv;		// divide operation- divide stage 6
+output		d6stg_fdivs;		// divide single- divide stage 6
+output		d6stg_fdivd;		// divide double- divide stage 6
+output		d7stg_fdiv;		// divide operation- divide stage 7
+output		d7stg_fdivd;		// divide double- divide stage 7
+output		d8stg_fdiv_in;		// div pipe output request next cycle
+output		d8stg_fdivs;		// divide single- divide stage 8
+output		d8stg_fdivd;		// divide double- divide stage 8
+output [9:0]	div_id_out_in;		// div pipe output ID next cycle
+output		div_sign_out;		// divide sign output
+output [4:0]	div_exc_out;		// divide pipe result- exception flags
+output		div_norm_frac_in1_dbl_norm; // select line to div_norm
+output		div_norm_frac_in1_dbl_dnrm; // select line to div_norm
+output		div_norm_frac_in1_sng_norm; // select line to div_norm
+output		div_norm_frac_in1_sng_dnrm; // select line to div_norm
+output		div_norm_frac_in2_dbl_norm; // select line to div_norm
+output		div_norm_frac_in2_dbl_dnrm; // select line to div_norm
+output		div_norm_frac_in2_sng_norm; // select line to div_norm
+output		div_norm_frac_in2_sng_dnrm; // select line to div_norm
+output		div_norm_inf;		// select line to div_norm
+output		div_norm_qnan;		// select line to div_norm
+output		div_norm_zero;		// select line to div_norm
+output		div_frac_add_in2_load;	// load enable to div_frac_add_in2
+output		d6stg_frac_out_shl1;	// select line to d6stg_frac
+output		d6stg_frac_out_nosh;	// select line to d6stg_frac
+output		div_frac_add_in1_add;	// select line to div_frac_add_in1
+output		div_frac_add_in1_load;	// load enable to div_frac_add_in1
+output		d7stg_rndup_inv;	// no rounding increment
+output		d7stg_to_0;		// result to max finite on overflow
+output		d7stg_to_0_inv;		// result to infinity on overflow
+output		div_frac_out_add_in1;	// select line to div_frac_out
+output		div_frac_out_add;	// select line to div_frac_out
+output		div_frac_out_shl1_dbl;	// select line to div_frac_out
+output		div_frac_out_shl1_sng;	// select line to div_frac_out
+output		div_frac_out_of;	// select line to div_frac_out
+output		div_frac_out_load;	// load enable to div_frac_out
+output		div_expadd1_in1_dbl;	// select line to div_expadd1
+output		div_expadd1_in1_sng;	// select line to div_expadd1
+output		div_expadd1_in2_exp_in2_dbl; // select line to div_expadd1
+output		div_expadd1_in2_exp_in2_sng; //select line to div_expadd1
+output		div_exp1_expadd1;	// select line to div_exp1
+output		div_exp1_0835;		// select line to div_exp1
+output		div_exp1_0118;		// select line to div_exp1
+output		div_exp1_zero;		// select line to div_exp1
+output		div_exp1_load;		// load enable to div_exp1
+output		div_expadd2_in1_exp_out; // select line to div_expadd2
+output		div_expadd2_no_decr_inv; // no exponent decrement
+output		div_expadd2_cin;	// carry in to 2nd exponent adder
+output		div_exp_out_expadd22_inv; // select line to div_exp_out
+output		div_exp_out_expadd2;	// select line to div_exp_out
+output		div_exp_out_of;		// overflow to exponent output
+output		div_exp_out_exp_out;	// select line to div_exp_out
+output		div_exp_out_load;	// load enable to div_exp_out
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire		reset;
+wire		div_frac_in1_51;
+wire		div_frac_in1_54;
+wire		div_frac_in1_53_0_neq_0;
+wire		div_frac_in1_50_0_neq_0;
+wire		div_frac_in1_53_32_neq_0;
+wire		div_exp_in1_exp_eq_0;
+wire		div_exp_in1_exp_neq_ffs;
+wire		div_frac_in2_51;
+wire		div_frac_in2_54;
+wire		div_frac_in2_53_0_neq_0;
+wire		div_frac_in2_50_0_neq_0;
+wire		div_frac_in2_53_32_neq_0;
+wire		div_exp_in2_exp_eq_0;
+wire		div_exp_in2_exp_neq_ffs;
+wire		d1stg_denorm_sng_in1;
+wire		d1stg_denorm_dbl_in1;
+wire		d1stg_denorm_sng_in2;
+wire		d1stg_denorm_dbl_in2;
+wire		d2stg_denorm_sng_in2;
+wire		d2stg_denorm_dbl_in2;
+wire		d1stg_norm_sng_in1;
+wire		d1stg_norm_dbl_in1;
+wire		d1stg_norm_sng_in2;
+wire		d1stg_norm_dbl_in2;
+wire		d2stg_norm_sng_in2;
+wire		d2stg_norm_dbl_in2;
+wire		d1stg_snan_sng_in1;
+wire		d1stg_snan_dbl_in1;
+wire		d1stg_snan_sng_in2;
+wire		d1stg_snan_dbl_in2;
+wire		d1stg_qnan_sng_in1;
+wire		d1stg_qnan_dbl_in1;
+wire		d1stg_qnan_sng_in2;
+wire		d1stg_qnan_dbl_in2;
+wire		d1stg_snan_in1;
+wire		d1stg_snan_in2;
+wire		d1stg_qnan_in1;
+wire		d1stg_qnan_in2;
+wire		d1stg_nan_sng_in1;
+wire		d1stg_nan_dbl_in1;
+wire		d1stg_nan_sng_in2;
+wire		d1stg_nan_dbl_in2;
+wire		d1stg_nan_in1;
+wire		d1stg_nan_in2;
+wire		d1stg_nan_in;
+wire		d2stg_snan_in1;
+wire		d2stg_snan_in2;
+wire		d2stg_qnan_in1;
+wire		d2stg_qnan_in2;
+wire		d2stg_nan_in2;
+wire		d2stg_nan_in;
+wire		d1stg_inf_sng_in1;
+wire		d1stg_inf_dbl_in1;
+wire		d1stg_inf_sng_in2;
+wire		d1stg_inf_dbl_in2;
+wire		d1stg_inf_in1;
+wire		d1stg_inf_in2;
+wire		d1stg_inf_in;
+wire		d1stg_2inf_in;
+wire		d2stg_inf_in1;
+wire		d2stg_inf_in2;
+wire		d2stg_2inf_in;
+wire		d1stg_infnan_sng_in1;
+wire		d1stg_infnan_dbl_in1;
+wire		d1stg_infnan_sng_in2;
+wire		d1stg_infnan_dbl_in2;
+wire		d1stg_infnan_in1;
+wire		d1stg_infnan_in2;
+wire		d1stg_infnan_in;
+wire		d2stg_infnan_in1;
+wire		d2stg_infnan_in2;
+wire		d2stg_infnan_in;
+wire		d1stg_zero_in1;
+wire		d1stg_zero_in2;
+wire		d1stg_zero_in;
+wire		d1stg_2zero_in;
+wire		d2stg_zero_in1;
+wire		d2stg_zero_in2;
+wire		d2stg_zero_in;
+wire		d2stg_2zero_in;
+wire		d1stg_hold;
+wire		d1stg_holda;
+wire		d1stg_step;
+wire		d1stg_stepa;
+wire [7:0]	d1stg_op_in;
+wire [7:0]	d1stg_op;
+wire		d1stg_div_in;
+wire		d1stg_div;
+wire [4:0]	d1stg_sngopa;
+wire		d1stg_dblop;
+wire [4:0]	d1stg_dblopa;
+wire		d1stg_fdiv;
+wire		d1stg_fdivs;
+wire		d1stg_fdivd;
+wire [2:0]	d1stg_opdec;
+wire		d234stg_fdiv_in;
+wire [2:0]	d2stg_opdec;
+wire		d234stg_fdiv;
+wire		d2stg_fdiv;
+wire		d2stg_fdivs;
+wire		d2stg_fdivd;
+wire [2:0]	d3stg_opdec;
+wire		d3stg_fdiv;
+wire [2:0]	d4stg_opdec;
+wire		d4stg_fdiv;
+wire		d4stg_fdivs;
+wire		d4stg_fdivd;
+wire		d5stg_step;
+wire [2:0]	d5stg_opdec;
+wire		d5stg_fdiva;
+wire		d5stg_fdivb_in;
+wire		d5stg_fdivb;
+wire		d5stg_fdiv;
+wire		d5stg_fdivs;
+wire		d5stg_fdivd;
+wire		d6stg_step;
+wire [2:0]	d6stg_opdec_in;
+wire [2:0]	d6stg_opdec;
+wire		d6stg_fdiv;
+wire		d6stg_fdivs;
+wire		d6stg_fdivd;
+wire [2:0]	d7stg_opdec;
+wire		d7stg_fdiv;
+wire		d7stg_fdivs;
+wire		d7stg_fdivd;
+wire		d8stg_fdiv_in;
+wire [2:0]	d8stg_opdec;
+wire		d8stg_fdiv;
+wire		d8stg_fdivs;
+wire		d8stg_fdivd;
+wire		d8stg_hold;
+wire		d8stg_step;
+wire [1:0]	d1stg_rnd_mode;
+wire [4:0]	d1stg_id;
+wire		d1stg_sign1;
+wire		d1stg_sign2;
+wire		d1stg_sign;
+wire		div_bkend_step;
+wire [1:0]	div_rnd_mode;
+wire [9:0]	div_id_out_in;
+wire [9:0]	div_id_out;
+wire		div_sign_out;
+wire [5:0]	div_cnt_plus1;
+wire [5:0]	div_cnt_in;
+wire		div_cnt_step;
+wire [5:0]	div_cnt;
+wire		div_cnt_lt_step;
+wire		divs_cnt_lt_23_in;
+wire		divs_cnt_lt_23;
+wire		divs_cnt_lt_23a;
+wire		divd_cnt_lt_52_in;
+wire		divd_cnt_lt_52;
+wire		divd_cnt_lt_52a;
+wire		div_exc_step;
+wire		div_of_mask_in;
+wire		div_of_mask;
+wire		div_nv_out_in;
+wire		div_nv_out;
+wire		div_dz_out_in;
+wire		div_dz_out;
+wire		d7stg_in_of;
+wire		div_of_out_tmp1_in;
+wire		div_of_out_tmp1;
+wire		div_of_out_tmp2;
+wire		div_out_52_inv;
+wire		div_of_out;
+wire		div_uf_out_in;
+wire		div_uf_out;
+wire		div_nx_out_in;
+wire		div_nx_out;
+wire [4:0]	div_exc_out;
+wire		d1stg_spc_rslt;
+wire		div_norm_frac_in1_dbl_norm;
+wire		div_norm_frac_in1_dbl_dnrm;
+wire		div_norm_frac_in1_sng_norm;
+wire		div_norm_frac_in1_sng_dnrm;
+wire		div_norm_frac_in2_dbl_norm;
+wire		div_norm_frac_in2_dbl_dnrm;
+wire		div_norm_frac_in2_sng_norm;
+wire		div_norm_frac_in2_sng_dnrm;
+wire		div_norm_inf;
+wire		div_norm_qnan;
+wire		div_norm_zero;
+wire		div_frac_add_in2_load;
+wire		d6stg_frac_out_shl1;
+wire		d6stg_frac_out_nosh;
+wire		div_frac_add_in1_add;
+wire		div_frac_add_in1_load;
+wire		d7stg_lsb_in;
+wire		d7stg_grd_in;
+wire		d7stg_stk_in;
+wire		d7stg_lsb;
+wire		d7stg_grd;
+wire		d7stg_stk;
+wire		d7stg_rndup;
+wire		d7stg_rndup_inv;
+wire		d7stg_to_0;
+wire		d7stg_to_0_inv;
+wire		div_frac_out_add_in1;
+wire		div_frac_out_add;
+wire		div_frac_out_shl1_dbl;
+wire		div_frac_out_shl1_sng;
+wire		div_frac_out_of;
+wire		div_frac_out_load;
+wire		div_expadd1_in1_dbl_in;
+wire		div_expadd1_in1_dbl;
+wire		div_expadd1_in1_sng_in;
+wire		div_expadd1_in1_sng;
+wire		div_expadd1_in2_exp_in2_dbl;
+wire		div_expadd1_in2_exp_in2_sng;
+wire		div_exp1_expadd1;
+wire		div_exp1_0835;
+wire		div_exp1_0118;
+wire		div_exp1_zero;
+wire		d2stg_max_exp;
+wire		d2stg_zero_exp;
+wire		div_exp1_load;
+wire		div_expadd2_in1_exp_out_in;
+wire		div_expadd2_in1_exp_out;
+wire		div_expadd2_no_decr_inv_in;
+wire		div_expadd2_no_decr_load;
+wire		div_expadd2_no_decr_inv;
+wire		div_expadd2_cin;
+wire		div_exp_out_zero;
+wire		div_exp_out_expadd22_inv;
+wire		div_exp_out_expadd2;
+wire		div_exp_out_of;
+wire		div_exp_out_exp_out;
+wire		div_exp_out_load;
+wire		div_pipe_active_in;
+wire		div_pipe_active;
+
+
+dffrl_async #(1)  dffrl_div_ctl (
+  .din  (grst_l),
+  .clk  (rclk),
+  .rst_l(arst_l),
+  .q    (div_ctl_rst_l),
+	.se (se),
+	.si (),
+	.so ()
+  );
+
+assign reset= (!div_ctl_rst_l);
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide pipeline special input cases.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_div_frac_in1_51 (
+	.din	(inq_in1_51),
+	.en     (d1stg_step),
+        .clk    (rclk),
+ 
+        .q      (div_frac_in1_51),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_div_frac_in1_54 (
+	.din	(inq_in1_54),
+	.en     (d1stg_step),
+        .clk    (rclk),
+ 
+        .q      (div_frac_in1_54),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_div_frac_in1_53_0_neq_0 (
+	.din	(inq_in1_53_0_neq_0),
+	.en     (d1stg_step),
+        .clk    (rclk),
+ 
+        .q      (div_frac_in1_53_0_neq_0),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_div_frac_in1_50_0_neq_0 (
+	.din	(inq_in1_50_0_neq_0),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in1_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in1_53_32_neq_0 (
+	.din	(inq_in1_53_32_neq_0),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in1_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_exp_in1_exp_eq_0 (
+        .din	(inq_in1_exp_eq_0),
+        .en	(d1stg_step),
+        .clk	(rclk),
+ 
+        .q	(div_exp_in1_exp_eq_0),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_div_exp_in1_exp_neq_ffs (
+	.din	(inq_in1_exp_neq_ffs),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_exp_in1_exp_neq_ffs),
+
+   	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in2_51 (
+	.din	(inq_in2_51),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in2_51),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in2_54 (
+	.din	(inq_in2_54),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in2_54),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in2_53_0_neq_0 (
+	.din	(inq_in2_53_0_neq_0),
+	.en  	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in2_53_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in2_50_0_neq_0 (
+	.din	(inq_in2_50_0_neq_0),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in2_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_frac_in2_53_32_neq_0 (
+	.din	(inq_in2_53_32_neq_0),
+	.en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_frac_in2_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_exp_in2_exp_eq_0 (
+	.din	(inq_in2_exp_eq_0),
+	 .en	(d1stg_step),
+	.clk	(rclk),
+
+	.q	(div_exp_in2_exp_eq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_exp_in2_exp_neq_ffs (
+        .din	(inq_in2_exp_neq_ffs),
+        .en	(d1stg_step),
+        .clk	(rclk),
+ 
+        .q	(div_exp_in2_exp_neq_ffs),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Denorm divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_denorm_sng_in1= div_exp_in1_exp_eq_0 && d1stg_sngopa[0];
+
+assign d1stg_denorm_dbl_in1= div_exp_in1_exp_eq_0 && d1stg_dblopa[0];
+
+assign d1stg_denorm_sng_in2= div_exp_in2_exp_eq_0 && d1stg_sngopa[0];
+
+assign d1stg_denorm_dbl_in2= div_exp_in2_exp_eq_0 && d1stg_dblopa[0];
+
+dff_s #(1) i_d2stg_denorm_sng_in2 (
+	.din	(d1stg_denorm_sng_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_denorm_sng_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_denorm_dbl_in2 (
+	.din	(d1stg_denorm_dbl_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_denorm_dbl_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Non-denorm divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_norm_sng_in1= (!div_exp_in1_exp_eq_0) && d1stg_sngopa[0];
+
+assign d1stg_norm_dbl_in1= (!div_exp_in1_exp_eq_0) && d1stg_dblopa[0];
+
+assign d1stg_norm_sng_in2= (!div_exp_in2_exp_eq_0) && d1stg_sngopa[0];
+
+assign d1stg_norm_dbl_in2= (!div_exp_in2_exp_eq_0) && d1stg_dblopa[0];
+
+dff_s #(1) i_d2stg_norm_sng_in2 (
+	.din	(d1stg_norm_sng_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_norm_sng_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_norm_dbl_in2 (
+	.din	(d1stg_norm_dbl_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_norm_dbl_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Nan divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_snan_sng_in1= (!div_exp_in1_exp_neq_ffs) && (!div_frac_in1_54)
+		&& div_frac_in1_53_32_neq_0 && d1stg_sngopa[1];
+
+assign d1stg_snan_dbl_in1= (!div_exp_in1_exp_neq_ffs) && (!div_frac_in1_51)
+		&& div_frac_in1_50_0_neq_0 && d1stg_dblopa[1];
+
+assign d1stg_snan_sng_in2= (!div_exp_in2_exp_neq_ffs) && (!div_frac_in2_54)
+                && div_frac_in2_53_32_neq_0 && d1stg_sngopa[1];
+
+assign d1stg_snan_dbl_in2= (!div_exp_in2_exp_neq_ffs) && (!div_frac_in2_51)
+                && div_frac_in2_50_0_neq_0 && d1stg_dblopa[1];
+
+assign d1stg_qnan_sng_in1= (!div_exp_in1_exp_neq_ffs) && div_frac_in1_54
+		&& d1stg_sngopa[1];
+
+assign d1stg_qnan_dbl_in1= (!div_exp_in1_exp_neq_ffs) && div_frac_in1_51
+		&& d1stg_dblopa[1];
+
+assign d1stg_qnan_sng_in2= (!div_exp_in2_exp_neq_ffs) && div_frac_in2_54
+                && d1stg_sngopa[1];
+
+assign d1stg_qnan_dbl_in2= (!div_exp_in2_exp_neq_ffs) && div_frac_in2_51
+                && d1stg_dblopa[1];
+
+assign d1stg_snan_in1= d1stg_snan_sng_in1 || d1stg_snan_dbl_in1;
+
+assign d1stg_snan_in2= d1stg_snan_sng_in2 || d1stg_snan_dbl_in2;
+
+assign d1stg_qnan_in1= d1stg_qnan_sng_in1 || d1stg_qnan_dbl_in1;
+ 
+assign d1stg_qnan_in2= d1stg_qnan_sng_in2 || d1stg_qnan_dbl_in2;
+
+assign d1stg_nan_sng_in1= (!div_exp_in1_exp_neq_ffs)
+		&& (div_frac_in1_54 || div_frac_in1_53_32_neq_0)
+		&& d1stg_sngopa[2];
+
+assign d1stg_nan_dbl_in1= (!div_exp_in1_exp_neq_ffs)
+		&& (div_frac_in1_51 || div_frac_in1_50_0_neq_0)
+		&& d1stg_dblopa[2];
+
+assign d1stg_nan_sng_in2= (!div_exp_in2_exp_neq_ffs)
+		&& (div_frac_in2_54 || div_frac_in2_53_32_neq_0)
+		&& d1stg_sngopa[2];
+
+assign d1stg_nan_dbl_in2= (!div_exp_in2_exp_neq_ffs)
+		&& (div_frac_in2_51 || div_frac_in2_50_0_neq_0)
+		&& d1stg_dblopa[2];
+
+assign d1stg_nan_in1= d1stg_nan_sng_in1 || d1stg_nan_dbl_in1;
+
+assign d1stg_nan_in2= d1stg_nan_sng_in2 || d1stg_nan_dbl_in2;
+
+assign d1stg_nan_in= d1stg_nan_in1 || d1stg_nan_in2;
+
+dff_s #(1) i_d2stg_snan_in1 (
+	.din	(d1stg_snan_in1),
+	.clk	(rclk),
+
+	.q	(d2stg_snan_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_snan_in2 (
+	.din	(d1stg_snan_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_snan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_qnan_in1 (
+	.din	(d1stg_qnan_in1),
+	.clk	(rclk),
+
+	.q	(d2stg_qnan_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_qnan_in2 (
+	.din	(d1stg_qnan_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_qnan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_nan_in2 (
+	.din	(d1stg_nan_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_nan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_nan_in (
+	.din	(d1stg_nan_in),
+	.clk	(rclk),
+
+	.q	(d2stg_nan_in),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Infinity divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_inf_sng_in1= (!div_exp_in1_exp_neq_ffs)
+		&& (!div_frac_in1_54) && (!div_frac_in1_53_32_neq_0)
+		&& d1stg_sngopa[2];
+
+assign d1stg_inf_dbl_in1= (!div_exp_in1_exp_neq_ffs)
+		&& (!div_frac_in1_51) && (!div_frac_in1_50_0_neq_0)
+		&& d1stg_dblopa[2];
+
+assign d1stg_inf_sng_in2= (!div_exp_in2_exp_neq_ffs)
+		&& (!div_frac_in2_54) && (!div_frac_in2_53_32_neq_0)
+		&& d1stg_sngopa[2];
+
+assign d1stg_inf_dbl_in2= (!div_exp_in2_exp_neq_ffs)
+		&& (!div_frac_in2_51) && (!div_frac_in2_50_0_neq_0)
+		&& d1stg_dblopa[2];
+
+assign d1stg_inf_in1= d1stg_inf_sng_in1 || d1stg_inf_dbl_in1;
+
+assign d1stg_inf_in2= d1stg_inf_sng_in2 || d1stg_inf_dbl_in2;
+
+assign d1stg_inf_in= d1stg_inf_in1 || d1stg_inf_in2;
+
+assign d1stg_2inf_in= d1stg_inf_in1 && d1stg_inf_in2;
+
+dff_s #(1) i_d2stg_inf_in1 (
+	.din	(d1stg_inf_in1),
+	.clk	(rclk),
+
+	.q	(d2stg_inf_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_inf_in2 (
+	.din	(d1stg_inf_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_inf_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_2inf_in (
+	.din	(d1stg_2inf_in),
+	.clk	(rclk),
+
+	.q	(d2stg_2inf_in),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Infinity/Nan divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_infnan_sng_in1= (!div_exp_in1_exp_neq_ffs) && d1stg_sngopa[3];
+
+assign d1stg_infnan_dbl_in1= (!div_exp_in1_exp_neq_ffs) && d1stg_dblopa[3];
+
+assign d1stg_infnan_sng_in2= (!div_exp_in2_exp_neq_ffs) && d1stg_sngopa[3];
+
+assign d1stg_infnan_dbl_in2= (!div_exp_in2_exp_neq_ffs) && d1stg_dblopa[3];
+
+assign d1stg_infnan_in1= d1stg_infnan_sng_in1 || d1stg_infnan_dbl_in1;
+
+assign d1stg_infnan_in2= d1stg_infnan_sng_in2 || d1stg_infnan_dbl_in2;
+
+assign d1stg_infnan_in= d1stg_infnan_in1 || d1stg_infnan_in2;
+
+dff_s #(1) i_d2stg_infnan_in1 (
+	.din	(d1stg_infnan_in1),
+	.clk	(rclk),
+
+	.q	(d2stg_infnan_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_infnan_in2 (
+	.din	(d1stg_infnan_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_infnan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_infnan_in (
+	.din	(d1stg_infnan_in),
+	.clk	(rclk),
+
+	.q	(d2stg_infnan_in),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Zero divide inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_zero_in1= div_exp_in1_exp_eq_0
+		&& (!div_frac_in1_53_0_neq_0) && (!div_frac_in1_54);
+
+assign d1stg_zero_in2= div_exp_in2_exp_eq_0
+		&& (!div_frac_in2_53_0_neq_0) && (!div_frac_in2_54);
+
+assign d1stg_zero_in= d1stg_zero_in1 || d1stg_zero_in2;
+ 
+assign d1stg_2zero_in= d1stg_zero_in1 && d1stg_zero_in2;
+
+dff_s #(1) i_d2stg_zero_in1 (
+	.din	(d1stg_zero_in1),
+	.clk	(rclk),
+
+	.q	(d2stg_zero_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_zero_in2 (
+	.din	(d1stg_zero_in2),
+	.clk	(rclk),
+
+	.q	(d2stg_zero_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_zero_in (
+	.din	(d1stg_zero_in),
+	.clk	(rclk),
+
+	.q	(d2stg_zero_in),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dff_s #(1) i_d2stg_2zero_in (
+	.din	(d1stg_2zero_in),
+	.clk	(rclk),
+
+	.q	(d2stg_2zero_in),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Floating point divide control pipeline.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Opcode pipeline- divide pipeline input.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_hold= d1stg_div 
+		|| d234stg_fdiv
+		|| divs_cnt_lt_23
+		|| divd_cnt_lt_52;
+
+assign d1stg_holda= d1stg_div
+        	|| d234stg_fdiv
+        	|| divs_cnt_lt_23a
+		|| divd_cnt_lt_52a;
+
+assign d1stg_step= (!d1stg_hold);
+
+assign d1stg_stepa= (!d1stg_holda);
+
+
+assign d1stg_op_in[7:0]= ({8{d1stg_stepa}}
+			    & (inq_op[7:0] & {8{inq_div}}));
+
+dffr_s #(8) i_d1stg_op (
+	.din	(d1stg_op_in[7:0]),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(d1stg_op[7:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign d1stg_div_in= inq_div && d1stg_stepa;
+
+dffr_s #(1) i_d1stg_div (
+	.din	(d1stg_div_in),
+	.rst	(reset),
+        .clk	(rclk),
+ 
+        .q	(d1stg_div),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(5) i_d1stg_sngopa (
+        .din	({5{inq_op[0]}}),
+        .en	(d1stg_stepa),
+        .clk	(rclk),
+ 
+        .q	(d1stg_sngopa[4:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_d1stg_dblop (
+        .din    (inq_op[1]),
+        .en     (d1stg_stepa),
+        .clk    (rclk),
+ 
+        .q      (d1stg_dblop),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_d1stg_dblopa (
+        .din	({5{inq_op[1]}}),
+        .en	(d1stg_stepa),
+        .clk	(rclk),
+ 
+        .q	(d1stg_dblopa[4:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Opcode decode- divide stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_fdiv= (d1stg_op[7:0]==FDIVS) || (d1stg_op[7:0]==FDIVD);
+
+assign d1stg_fdivs= (d1stg_op[7:0]==FDIVS);
+
+assign d1stg_fdivd= (d1stg_op[7:0]==FDIVD);
+
+assign d1stg_opdec[2:0]= {d1stg_fdiv,
+			d1stg_fdivs,
+			d1stg_fdivd};
+
+assign d234stg_fdiv_in= d1stg_fdiv || d2stg_fdiv || d3stg_fdiv;
+
+dffr_s #(3) i_d2stg_opdec (
+	.din	(d1stg_opdec[2:0]),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(d2stg_opdec[2:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffr_s #(1) i_d234stg_fdiv (
+	.din	(d234stg_fdiv_in),
+	.rst    (reset),
+        .clk    (rclk),
+
+	.q	(d234stg_fdiv),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Opcode pipeline- divide stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d2stg_fdiv= d2stg_opdec[2];
+assign d2stg_fdivs= d2stg_opdec[1];
+assign d2stg_fdivd= d2stg_opdec[0];
+
+dffr_s #(3) i_d3stg_opdec (
+        .din    (d2stg_opdec[2:0]),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (d3stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d3stg_fdiv= d3stg_opdec[2];
+//assign d3stg_fdivs= d3stg_opdec[1];
+//assign d3stg_fdivd= d3stg_opdec[0];
+
+dffr_s #(3) i_d4stg_opdec (
+        .din    (d3stg_opdec[2:0]),
+        .rst    (reset),
+        .clk    (rclk),
+ 
+        .q      (d4stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+ 
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d4stg_fdiv= d4stg_opdec[2];
+assign d4stg_fdivs= d4stg_opdec[1];
+assign d4stg_fdivd= d4stg_opdec[0];
+ 
+assign d5stg_step= (!d5stg_fdiv) || d6stg_step;
+
+dffre_s #(3) i_d5stg_opdec (
+	.din	(d4stg_opdec[2:0]),
+	.en	(d5stg_step),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q	(d5stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(1) i_d5stg_fdiva (
+	.din	(d4stg_fdiv),
+	.en	(d5stg_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(d5stg_fdiva),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign d5stg_fdivb_in= ((d5stg_step && d4stg_fdiv)
+			|| ((!d5stg_step) && d5stg_fdiv))
+		&& (!reset);
+
+dff_s #(1) i_d5stg_fdivb (
+	.din	(d5stg_fdivb_in),
+	.clk	(rclk),
+
+	.q	(d5stg_fdivb),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d5stg_fdiv= d5stg_opdec[2];
+assign d5stg_fdivs= d5stg_opdec[1];
+assign d5stg_fdivd= d5stg_opdec[0];
+
+assign d6stg_step= (d5stg_fdivd && (div_cnt[5:0]==6'h36))
+		|| (d5stg_fdivs && (div_cnt[5:0]==6'h19))
+		|| (d5stg_fdiv && ((({7'b0, div_cnt[5:0]}==div_exp1[12:0])
+					&& (div_exp1[12:0]!=13'b0))
+				|| (({7'b0, div_cnt[5:0]}==div_exp1[12:0])
+					&& (div_exp1[12:0]==13'b0)
+					&& d8stg_step)
+				|| (div_exp1[12] && d8stg_step)));
+
+assign d6stg_opdec_in[2:0]= ({3{d6stg_step}}
+			    & d5stg_opdec[2:0]);
+
+dffr_s #(3) i_d6stg_opdec (
+	.din	(d6stg_opdec_in[2:0]),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      (d6stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide stage 6.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d6stg_fdiv= d6stg_opdec[2];
+assign d6stg_fdivs= d6stg_opdec[1];
+assign d6stg_fdivd= d6stg_opdec[0];
+
+dffr_s #(3) i_d7stg_opdec (
+        .din    (d6stg_opdec[2:0]),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      (d7stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide stage 7.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d7stg_fdiv= d7stg_opdec[2];
+assign d7stg_fdivs= d7stg_opdec[1];
+assign d7stg_fdivd= d7stg_opdec[0];
+
+assign d8stg_fdiv_in= (d8stg_step && (!reset) && d7stg_fdiv)
+                || ((!d8stg_step) && (!reset) && d8stg_fdiv);
+
+dffre_s #(3) i_d8stg_opdec (
+        .din    (d7stg_opdec[2:0]),
+	.en	(d8stg_step),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      (d8stg_opdec[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- divide pipeline output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d8stg_fdiv= d8stg_opdec[2];
+assign d8stg_fdivs= d8stg_opdec[1];
+assign d8stg_fdivd= d8stg_opdec[0];
+
+assign d8stg_hold= d8stg_fdiv && (!div_dest_rdy);
+
+assign d8stg_step= (!d8stg_hold);
+
+// Austin update
+// Power management update
+
+assign div_pipe_active_in =  // div pipe is executing a valid instr
+   d1stg_fdiv || d2stg_fdiv || d3stg_fdiv || d4stg_fdiv |
+   d5stg_fdiv || d6stg_fdiv || d7stg_fdiv || d8stg_fdiv ;
+
+dffre_s #(1) i_div_pipe_active (
+	.din	(div_pipe_active_in),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (div_pipe_active),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide information pipeline
+//		- rounding mode
+//		- ID
+//		- sign logic
+//	Front end of the pipeline.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(2) i_d1stg_rnd_mode (
+	.din	(inq_rnd_mode[1:0]),
+	.en	(d1stg_stepa),
+	.clk	(rclk),
+
+	.q	(d1stg_rnd_mode[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_d1stg_id (
+        .din    (inq_id[4:0]),
+        .en     (d1stg_stepa),
+        .clk    (rclk),
+
+        .q      (d1stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_d1stg_sign1 (
+	.din	(inq_in1_63),
+	.en	(d1stg_stepa),
+        .clk    (rclk),
+
+        .q      (d1stg_sign1),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_d1stg_sign2 (
+        .din    (inq_in2_63),
+        .en     (d1stg_stepa),
+        .clk    (rclk),
+
+        .q      (d1stg_sign2),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide information pipeline
+//              - rounding mode
+//              - ID
+//              - sign logic
+//      Back end of the pipeline.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_sign= ((d1stg_sign1
+				&& (!d2stg_snan_in2)
+				&& (!(d2stg_qnan_in2 && (!d2stg_snan_in1))))
+			^ (d1stg_sign2
+				&& (!(d2stg_snan_in1 && (!d2stg_snan_in2)))
+				&& (!(d2stg_qnan_in1 && (!d2stg_nan_in2)))))
+		&& (!(d2stg_2inf_in || d2stg_2zero_in));
+ 
+assign div_bkend_step= (d5stg_fdiv && (div_cnt[5:0]==6'b0) && d8stg_step);
+
+dffe_s #(2) i_div_rnd_mode (
+	.din	(d1stg_rnd_mode[1:0]),
+	.en	(div_bkend_step),
+	.clk    (rclk),
+
+        .q      (div_rnd_mode[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_id_out_in[9:0]= ({10{div_bkend_step}}
+			    & {(d1stg_id[4:2]==3'o7),
+				(d1stg_id[4:2]==3'o6),
+				(d1stg_id[4:2]==3'o5),
+				(d1stg_id[4:2]==3'o4),
+				(d1stg_id[4:2]==3'o3),
+				(d1stg_id[4:2]==3'o2),
+				(d1stg_id[4:2]==3'o1),
+				(d1stg_id[4:2]==3'o0),
+				d1stg_id[1:0]})
+		| ({10{(!div_bkend_step)}}
+			    & div_id_out[9:0]);
+
+dff_s #(10) i_div_id_out (
+	.din	(div_id_out_in[9:0]),
+	.clk    (rclk),
+ 
+        .q      (div_id_out[9:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_div_sign_out (
+	.din	(d1stg_sign),
+	.en	(div_bkend_step),
+	.clk    (rclk),
+ 
+        .q      (div_sign_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide counter.
+//
+//	Tracks the number of subtraction iterations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_cnt_plus1[5:0]= (div_cnt[5:0] + 6'h01);
+
+assign div_cnt_in[5:0]= ({6{(d5stg_fdiv && d8stg_step)}}
+			    & div_cnt_plus1[5:0])
+		| ({6{d4stg_fdiv}}
+			    & 6'b0);
+
+assign div_cnt_step= (d5stg_fdiv && d8stg_step)
+		|| d4stg_fdiv;
+
+dffre_s #(6) i_div_cnt (
+	.din	(div_cnt_in[5:0]),
+	.en	(div_cnt_step),
+	.rst	(reset),
+	.clk    (rclk),
+ 
+        .q      (div_cnt[5:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_cnt_lt_step= (!d5stg_fdiv) || d6stg_step || d8stg_step;
+
+assign divs_cnt_lt_23_in= d4stg_fdivs
+		|| (d5stg_fdivs && (!d6stg_step) && (div_cnt_plus1[5:0]<6'h17));
+
+dffre_s #(1) i_divs_cnt_lt_23 (
+	.din	(divs_cnt_lt_23_in),
+	.en	(div_cnt_lt_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(divs_cnt_lt_23),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffre_s #(1) i_divs_cnt_lt_23a (
+        .din	(divs_cnt_lt_23_in),
+        .en	(div_cnt_lt_step),
+        .rst	(reset),
+        .clk	(rclk),
+ 
+        .q	(divs_cnt_lt_23a),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+assign divd_cnt_lt_52_in= d4stg_fdivd
+		|| (d5stg_fdivd && (!d6stg_step) && (div_cnt_plus1[5:0]<6'h34));
+
+dffre_s #(1) i_divd_cnt_lt_52 (
+	.din	(divd_cnt_lt_52_in),
+	.en	(div_cnt_lt_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(divd_cnt_lt_52),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffre_s #(1) i_divd_cnt_lt_52a (
+        .din	(divd_cnt_lt_52_in),
+        .en	(div_cnt_lt_step),
+        .rst	(reset),
+        .clk	(rclk),
+ 
+        .q	(divd_cnt_lt_52a),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide exception logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide overflow exception enabled.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_exc_step= d5stg_fdiv && (div_cnt[5:0]==6'b0) && d8stg_step;
+
+assign div_of_mask_in= (!(d1stg_infnan_in || d1stg_zero_in));
+
+dffe_s #(1) i_div_of_mask (
+	.din	(div_of_mask_in),
+	.en	(div_exc_step),
+	.clk    (rclk),
+
+        .q      (div_of_mask),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide invalid exception.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_nv_out_in= d1stg_snan_in1 || d1stg_snan_in2 || d1stg_2inf_in
+		|| d1stg_2zero_in;
+
+dffe_s #(1) i_div_nv_out (
+	.din	(div_nv_out_in),
+	.en	(div_exc_step),
+	.clk    (rclk),
+
+        .q      (div_nv_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide by zero exception.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_dz_out_in= d1stg_zero_in2 && (!d1stg_zero_in1)
+		&& (!d1stg_infnan_in1);
+
+dffe_s #(1) i_div_dz_out (
+        .din    (div_dz_out_in),
+        .en     (div_exc_step),
+        .clk    (rclk),
+ 
+        .q      (div_dz_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide overflow exception.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d7stg_in_of= ((!div_exp_out[12])
+			&& d7stg_fdivd
+			&& (div_exp_out[11] || (&div_exp_out[10:0]))
+			&& div_of_mask)
+		|| ((!div_exp_out[12])
+			&& d7stg_fdivs
+			&& ((|div_exp_out[11:8]) || (&div_exp_out[7:0]))
+			&& div_of_mask);
+
+assign div_of_out_tmp1_in= ((!div_exp_out[12])
+			&& d7stg_fdivd
+			&& (&div_exp_out[10:1])
+			&& d7stg_rndup
+			&& div_of_mask)
+		|| ((!div_exp_out[12])
+	                && d7stg_fdivs
+			&& (&div_exp_out[7:1])
+	                && d7stg_rndup
+	                && div_of_mask);
+
+dffe_s #(1) i_div_of_out_tmp1 (
+	.din	(div_of_out_tmp1_in),
+	.en	(d7stg_fdiv),
+	.clk    (rclk),
+
+        .q      (div_of_out_tmp1),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_div_of_out_tmp2 (
+	.din	(d7stg_in_of),
+	.en	(d7stg_fdiv),
+	.clk	(rclk),
+
+	.q	(div_of_out_tmp2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_div_out_52_inv (
+	.din	(div_frac_add_52_inva),
+	.en	(d7stg_fdiv),
+        .clk	(rclk),
+ 
+        .q	(div_out_52_inv),
+
+// Austin update
+// include se pin
+	.se	(se),
+	.si	(),
+        .so	()
+);
+
+assign div_of_out= div_of_out_tmp2
+		|| (div_of_out_tmp1 && (!div_out_52_inv));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide underflow exception.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_uf_out_in= ((!(|div_exp_out[11:0]))
+			&& (div_frac_add_in1_neq_0
+				|| d7stg_grd
+				|| d7stg_stk)
+			&& div_of_mask)
+		|| (div_exp_out[12]
+			&& div_of_mask);
+
+dffe_s #(1) i_div_uf_out (
+        .din    (div_uf_out_in),
+        .en     (d7stg_fdiv),
+        .clk    (rclk),
+ 
+        .q      (div_uf_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide inexact exception.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_nx_out_in= d7stg_grd || d7stg_stk;
+
+dffe_s #(1) i_div_nx_out (
+        .din    (div_nx_out_in),
+        .en     (d7stg_fdiv),
+        .clk    (rclk),
+
+        .q      (div_nx_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide exception output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Austin update
+// Overflow is always accompanied by inexact.
+// Previously this was handled within the FFU.
+
+// assign div_exc_out[4:0]= {div_nv_out, div_of_out, div_uf_out, div_dz_out,
+//				div_nx_out};
+
+assign div_exc_out[4:0] =
+  {div_nv_out,
+   div_of_out,
+   div_uf_out,
+   div_dz_out,
+   (div_nx_out || div_of_out)};  // Overflow is always accompanied by inexact
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide pipeline control logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select lines- divide normalization and special input injection.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d1stg_spc_rslt= (d1stg_inf_in || d1stg_zero_in) && (!d1stg_nan_in);
+
+assign div_norm_frac_in1_dbl_norm= d1stg_fdiv && d1stg_norm_dbl_in1
+		&& (!d1stg_snan_dbl_in2)
+		&& ((!d1stg_qnan_dbl_in2) || d1stg_snan_dbl_in1)
+		&& (!d1stg_spc_rslt);
+
+assign div_norm_frac_in1_dbl_dnrm= d1stg_fdiv && d1stg_denorm_dbl_in1
+		&& (!d1stg_snan_dbl_in2)
+		&& (!d1stg_qnan_dbl_in2)
+		&& (!d1stg_spc_rslt);
+
+assign div_norm_frac_in1_sng_norm= d1stg_fdiv && d1stg_norm_sng_in1
+		&& (!d1stg_snan_sng_in2)
+		&& ((!d1stg_qnan_sng_in2) || d1stg_snan_sng_in1)
+		&& (!d1stg_spc_rslt);
+
+assign div_norm_frac_in1_sng_dnrm= d1stg_fdiv && d1stg_denorm_sng_in1
+		&& (!d1stg_snan_sng_in2)
+		&& (!d1stg_qnan_sng_in2)
+		&& (!d1stg_spc_rslt);
+
+assign div_norm_frac_in2_dbl_norm= (d2stg_fdiv && d2stg_norm_dbl_in2
+			&& (!d2stg_infnan_in) && (!d2stg_zero_in))
+		|| (d1stg_fdiv && d1stg_snan_dbl_in2)
+		|| (d1stg_fdiv && d1stg_qnan_dbl_in2 && (!d1stg_snan_dbl_in1));
+
+assign div_norm_frac_in2_dbl_dnrm= d2stg_fdiv && d2stg_denorm_dbl_in2
+			&& (!d2stg_infnan_in) && (!d2stg_zero_in);
+
+assign div_norm_frac_in2_sng_norm= (d2stg_fdiv && d2stg_norm_sng_in2
+			&& (!d2stg_infnan_in) && (!d2stg_zero_in))
+		|| (d1stg_fdiv && d1stg_snan_sng_in2)
+		|| (d1stg_fdiv && d1stg_qnan_sng_in2 && (!d1stg_snan_sng_in1));
+
+assign div_norm_frac_in2_sng_dnrm= d2stg_fdiv && d2stg_denorm_sng_in2
+			&& (!d2stg_infnan_in) && (!d2stg_zero_in);
+
+assign div_norm_inf= (d2stg_fdiv && (d2stg_infnan_in || d2stg_zero_in))
+		|| (d1stg_fdiv && ((d1stg_inf_in1 && (!d1stg_infnan_in2))
+				|| (d1stg_zero_in2 && (!d1stg_infnan_in1)
+					&& (!d1stg_zero_in1))));
+
+assign div_norm_qnan= d1stg_fdiv && (d1stg_2inf_in || d1stg_2zero_in);
+
+assign div_norm_zero= d1stg_fdiv
+		&& ((d1stg_inf_in2 && (!d1stg_infnan_in1))
+			|| (d1stg_zero_in1 && (!d1stg_infnan_in2)
+				&& (!d1stg_zero_in2)));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- divide left shift.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_frac_add_in2_load= d4stg_fdiv || d6stg_fdiv;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- divide adder/subtractor 2nd input.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d6stg_frac_out_shl1= (!div_frac_out_54) && (!div_exp_out[12])
+		&& (div_exp_out[11:1]!=11'b0);
+
+assign d6stg_frac_out_nosh= (!d6stg_frac_out_shl1);
+
+assign div_frac_add_in1_add= d5stg_fdiv && (!div_exp1[12]) && d8stg_step;
+
+assign div_frac_add_in1_load= d4stg_fdiv
+		|| (d5stg_fdiv && (!div_exp1[12]) && d8stg_step)
+		|| d6stg_fdiv;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide rounding bits.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d7stg_lsb_in= (d6stg_fdivd && d6stg_frac_2)
+		|| ((!d6stg_fdivd) && d6stg_frac_31);
+
+assign d7stg_grd_in= (d6stg_fdivd && d6stg_frac_1)
+		|| ((!d6stg_fdivd) && d6stg_frac_30);
+
+assign d7stg_stk_in= (d6stg_fdivd && d6stg_frac_0)
+		|| ((!d6stg_fdivd) && d6stg_frac_29)
+		|| div_frac_add_in1_neq_0;
+
+dffe_s #(1) i_d7stg_lsb (
+	.din	(d7stg_lsb_in),
+	.en	(d6stg_fdiv),
+	.clk    (rclk),
+
+        .q      (d7stg_lsb),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_d7stg_grd (
+        .din    (d7stg_grd_in),
+        .en     (d6stg_fdiv),
+        .clk    (rclk),
+ 
+        .q      (d7stg_grd),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_d7stg_stk (
+        .din    (d7stg_stk_in),
+        .en     (d6stg_fdiv),
+        .clk    (rclk),
+ 
+        .q      (d7stg_stk),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select lines- divide adder/subtractor and fraction output register.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d7stg_rndup= ((div_rnd_mode[1:0]==2'b10) && (!div_sign_out)
+			&& (d7stg_grd || d7stg_stk))
+		|| ((div_rnd_mode[1:0]==2'b11) && div_sign_out
+			&& (d7stg_grd || d7stg_stk))
+		|| ((div_rnd_mode[1:0]==2'b00)
+			&& ((d7stg_grd && d7stg_stk)
+				|| (d7stg_grd && (!d7stg_stk) && d7stg_lsb)));
+
+assign d7stg_rndup_inv= (!d7stg_rndup);
+
+assign d7stg_to_0= (div_rnd_mode[1:0]==2'b01)
+		|| ((div_rnd_mode[1:0]==2'b10) && div_sign_out)
+		|| ((div_rnd_mode[1:0]==2'b11) && (!div_sign_out));
+
+assign d7stg_to_0_inv= (!d7stg_to_0);
+
+assign div_frac_out_add_in1= d7stg_fdiv && (!d7stg_rndup) && (!d7stg_in_of);
+
+assign div_frac_out_add= d7stg_fdiv && d7stg_rndup && (!d7stg_in_of);
+
+assign div_frac_out_shl1_dbl= d5stg_fdivd && (!div_exp1[12]) && d8stg_step;
+
+assign div_frac_out_shl1_sng= d5stg_fdivs && (!div_exp1[12]) && d8stg_step;
+
+assign div_frac_out_of= d7stg_fdiv && d7stg_in_of;
+
+assign div_frac_out_load= d4stg_fdiv
+		|| d7stg_fdiv
+		|| div_frac_out_shl1_dbl
+		|| div_frac_out_shl1_sng;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- divide exponent adder in the front end of the divide pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_expadd1_in1_dbl_in= ((d1stg_stepa && inq_op[1])
+			|| ((!d1stg_stepa) && d1stg_dblopa[4]))
+		&& (!((d1stg_fdiv || d2stg_fdiv || d3stg_fdiv) && (!reset)));
+
+dff_s #(1) i_div_expadd1_in1_dbl (
+	.din	(div_expadd1_in1_dbl_in),
+        .clk    (rclk),
+
+	.q	(div_expadd1_in1_dbl),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_expadd1_in1_sng_in= ((d1stg_stepa && inq_op[0])
+			|| ((!d1stg_stepa) && d1stg_sngopa[4]))
+		&& (!((d1stg_fdiv || d2stg_fdiv || d3stg_fdiv) && (!reset)));
+
+dff_s #(1) i_div_expadd1_in1_sng (
+	.din	(div_expadd1_in1_sng_in),
+	.clk	(rclk),
+
+	.q	(div_expadd1_in1_sng),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign div_expadd1_in2_exp_in2_dbl= d2stg_fdivd;
+
+assign div_expadd1_in2_exp_in2_sng= d2stg_fdivs;
+
+assign div_exp1_expadd1= d1stg_fdiv
+		|| (d2stg_fdiv && (!d2stg_infnan_in) && (!d2stg_zero_in))
+		|| d3stg_fdiv
+		|| d4stg_fdiv;
+
+assign div_exp1_0835= d2stg_fdivd && d2stg_max_exp;
+
+assign div_exp1_0118= d2stg_fdivs && d2stg_max_exp;
+
+assign div_exp1_zero= d2stg_fdiv && d2stg_zero_exp;
+
+assign d2stg_max_exp= d2stg_nan_in || d2stg_inf_in1 || d2stg_zero_in2;
+
+assign d2stg_zero_exp= (d2stg_inf_in2 && (!d2stg_infnan_in1))
+		|| (d2stg_zero_in1 && (!d2stg_infnan_in2) && (!d2stg_zero_in2));
+
+assign div_exp1_load= d1stg_fdiv || d2stg_fdiv || d3stg_fdiv || d4stg_fdiv;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- divide exponent adder in the back end of the divide pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_expadd2_in1_exp_out_in= d6stg_opdec_in[2] || d6stg_fdiv;
+
+dffr_s #(1) i_div_expadd2_in1_exp_out (
+	.din	(div_expadd2_in1_exp_out_in),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(div_expadd2_in1_exp_out),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign div_expadd2_no_decr_inv_in= (!(div_frac_out_53
+		|| (div_exp1[11:0]==(({12{(!d5stg_fdivs)}} & 12'h035)
+					| ({12{d5stg_fdivs}} & 12'h018)))
+		|| div_expadd2_12));
+
+assign div_expadd2_no_decr_load= d5stg_fdiv && d8stg_step;
+
+dffe_s #(1) i_div_expadd2_no_decr_inv (
+	.din	(div_expadd2_no_decr_inv_in),
+	.en	(div_expadd2_no_decr_load),
+	.clk	(rclk),
+
+	.q	(div_expadd2_no_decr_inv),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_expadd2_cin= d5stg_fdiv || d7stg_fdiv;
+
+assign div_exp_out_zero= d7stg_fdiv && div_exp_out[12];
+
+assign div_exp_out_expadd22_inv= (!(d6stg_fdiv
+			|| (d5stg_fdiv && (div_cnt[5:0]==6'b0) && d8stg_step)));
+
+assign div_exp_out_expadd2= ((d7stg_fdiv && d7stg_rndup && (!d7stg_in_of))
+			|| (d5stg_fdiv && (div_cnt[5:0]==6'b0) && d8stg_step)
+			|| d6stg_fdiv)
+		&& (!div_exp_out_zero);
+
+assign div_exp_out_of= d7stg_fdiv && d7stg_in_of;
+
+assign div_exp_out_exp_out= d7stg_fdiv
+		&& (!d7stg_in_of)
+		&& (!div_exp_out_zero);
+
+assign div_exp_out_load= (d5stg_fdiv && (div_cnt[5:0]==6'b0) && d8stg_step)
+		|| d6stg_fdiv
+		|| d7stg_fdiv;
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_add.v
===================================================================
--- /trunk/T1-FPU/fpu_add.v	(revision 6)
+++ /trunk/T1-FPU/fpu_add.v	(revision 6)
@@ -0,0 +1,576 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_add.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU add pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_add (
+	inq_op,
+	inq_rnd_mode,
+	inq_id,
+	inq_fcc,
+	inq_in1,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_add,
+	add_dest_rdy,
+	fadd_clken_l,
+	arst_l,
+	grst_l,
+	rclk,
+
+	add_pipe_active,	
+	a1stg_step,
+	a6stg_fadd_in,
+	add_id_out_in,
+	a6stg_fcmpop,
+	add_exc_out,
+	a6stg_dbl_dst,
+	a6stg_sng_dst,
+	a6stg_long_dst,
+	a6stg_int_dst,
+	add_sign_out,
+	add_exp_out,
+	add_frac_out,
+	add_cc_out,
+	add_fcc_out,
+
+	se_add_exp,
+	se_add_frac,
+	si,
+	so
+);
+
+
+input [7:0]	inq_op;			// request opcode to op pipes
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input [1:0]	inq_fcc;		// request cc ID to op pipes
+input [63:0]	inq_in1;		// request operand 1 to op pipes
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1 exp==0
+input		inq_in1_exp_neq_ffs;	// request operand 1 exp!=0xff's
+input [63:0]	inq_in2;		// request operand 2 to op pipes
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2 exp==0
+input		inq_in2_exp_neq_ffs;	// request operand 2 exp!=0xff's
+input		inq_add;		// add pipe request
+input		add_dest_rdy;		// add result req accepted for CPX
+input		fadd_clken_l;           // fadd clock enable
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;			// global clock
+
+output		add_pipe_active;        // add pipe is executing a valid instr
+output		a1stg_step;		// add pipe load
+output		a6stg_fadd_in;		// add pipe output request next cycle
+output [9:0]    add_id_out_in;		// add pipe output ID next cycle
+output		a6stg_fcmpop;		// compare- add 6 stage
+output [4:0]	add_exc_out;		// add pipe result- exception flags
+output		a6stg_dbl_dst;		// float double result- add 6 stage
+output		a6stg_sng_dst;		// float single result- add 6 stage
+output		a6stg_long_dst;		// 64bit integer result- add 6 stage
+output		a6stg_int_dst;		// 32bit integer result- add 6 stage
+output		add_sign_out;		// add sign output
+output [10:0]	add_exp_out;		// add exponent output
+output [63:0]	add_frac_out;		// add fraction output
+output [1:0]	add_cc_out;		// add pipe result- condition
+output [1:0]	add_fcc_out;		// add pipe input fcc passed through
+
+input           se_add_exp;     // scan_enable for add_exp_dp, add_ctl
+input           se_add_frac;    // scan_enable for add_frac_dp
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_add_ctl.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		a1stg_denorm_sng_in1;	// select line to normalized fraction 1
+wire		a1stg_denorm_dbl_in1;	// select line to normalized fraction 1
+wire		a1stg_denorm_sng_in2;	// select line to normalized fraction 2
+wire		a1stg_denorm_dbl_in2;	// select line to normalized fraction 2
+wire		a1stg_norm_sng_in1;	// select line to normalized fraction 1
+wire		a1stg_norm_dbl_in1;	// select line to normalized fraction 1
+wire		a1stg_norm_sng_in2;	// select line to normalized fraction 2
+wire		a1stg_norm_dbl_in2;	// select line to normalized fraction 2
+wire		a1stg_step;		// add pipe load
+wire		a1stg_stepa;		// add pipe load- copy
+wire		a1stg_sngop;		// single precision operation- add 1 stg
+wire		a1stg_intlngop;		// integer/long input- add 1 stage
+wire		a1stg_fsdtoix;		// float to integer convert- add 1 stg
+wire		a1stg_fstod;		// fstod- add 1 stage
+wire		a1stg_fstoi;		// fstoi- add 1 stage
+wire		a1stg_fstox;		// fstox- add 1 stage
+wire		a1stg_fdtoi;		// fdtoi- add 1 stage
+wire		a1stg_fdtox;		// fdtox- add 1 stage
+wire		a1stg_faddsubs;		// add/subtract single- add 1 stg
+wire		a1stg_faddsubd;		// add/subtract double- add 1 stg
+wire		a1stg_fdtos;		// fdtos- add 1 stage
+wire		a2stg_faddsubop;	// float add or subtract- add 2 stage
+wire		a2stg_fsdtoix_fdtos;	// float to integer convert- add 2 stg
+wire		a2stg_fitos;		// fitos- add 2 stage
+wire		a2stg_fitod;		// fitod- add 2 stage
+wire		a2stg_fxtos;		// fxtos- add 2 stage
+wire		a2stg_fxtod;		// fxtod- add 2 stage
+wire		a3stg_faddsubop;	// denorm compare lead0[10] input select
+wire [1:0]	a3stg_faddsubopa;	// denorm compare lead0[10] input select
+wire		a4stg_dblop;		// double precision operation- add 4 stg
+wire		a6stg_fadd_in;		// add pipe output request next cycle
+wire [9:0]	add_id_out_in;		// add pipe output ID next cycle
+wire [1:0]	add_fcc_out;		// add pipe input fcc passed through
+wire		a6stg_dbl_dst;		// float double result- add 6 stage
+wire		a6stg_sng_dst;		// float single result- add 6 stage
+wire		a6stg_long_dst;		// 64bit integer result- add 6 stage
+wire		a6stg_int_dst;		// 32bit integer result- add 6 stage
+wire		a6stg_fcmpop;		// compare- add 6 stage
+wire		a6stg_step;		// advance the add pipe
+wire		a3stg_sub_in;		// subtract in main adder- add 3 stage
+wire		add_sign_out;		// add sign output
+wire [1:0]	add_cc_out;		// add pipe result- condition
+wire		a4stg_in_of;		// add overflow- select exp out
+wire [4:0]	add_exc_out;		// add pipe result- exception flags
+wire		a2stg_frac1_in_frac1;	// select line to a2stg_frac1
+wire		a2stg_frac1_in_frac2;	// select line to a2stg_frac1
+wire		a1stg_2nan_in_inv;	// 2 NaN inputs- a1 stage
+wire		a1stg_faddsubop_inv;	// add/subtract- a1 stage
+wire		a2stg_frac1_in_qnan;	// make fraction 1 a QNaN
+wire		a2stg_frac1_in_nv;	// NV- make a new QNaN
+wire		a2stg_frac1_in_nv_dbl;	// NV- make a new double prec QNaN
+wire		a2stg_frac2_in_frac1;	// select line to a2stg_frac2
+wire		a2stg_frac2_in_qnan;	// make fraction 2 a QNaN
+wire [5:0]	a2stg_shr_cnt_in;	// right shift count input- add 1 stage
+wire 		a2stg_shr_cnt_5_inv_in; // right shift count input[5]- add 1 stg
+wire		a2stg_shr_frac2_shr_int; // select line to a3stg_frac2
+wire		a2stg_shr_frac2_shr_dbl; // select line to a3stg_frac2
+wire		a2stg_shr_frac2_shr_sng; // select line to a3stg_frac2
+wire		a2stg_shr_frac2_max;	// select line to a3stg_frac2
+wire		a2stg_sub_step;		// select line to a3stg_frac2
+wire		a2stg_fracadd_frac2_inv_in; // sel line to main adder input 2
+wire		a2stg_fracadd_frac2_inv_shr1_in; // sel line to main adder in 2
+wire		a2stg_fracadd_frac2;	// select line to main adder input 2
+wire		a2stg_fracadd_cin_in;	// carry in to main adder- add 1 stage
+wire		a3stg_exp_7ff;		// select line to a3stg_exp
+wire		a3stg_exp_ff;		// select line to a3stg_exp
+wire		a3stg_exp_add;		// select line to a3stg_exp
+wire		a2stg_expdec_neq_0;	// exponent will be < 54
+wire		a3stg_exp10_0_eq0;	// exponent[10:0]==0- add 3 stage
+wire		a3stg_exp10_1_eq0;	// exponent[10:1]==0- add 3 stage
+wire		a3stg_fdtos_inv;	// double to single convert- add 3 stg
+wire		a4stg_fixtos_fxtod_inv;	// int to single/double cvt- add 4 stg
+wire		a4stg_rnd_frac_add_inv;	// select line to a4stg_rnd_frac
+wire [9:0]	a4stg_shl_cnt_in;	// postnorm shift left count- add 3 stg
+wire		a4stg_rnd_sng;		// round to single precision- add 4 stg
+wire		a4stg_rnd_dbl;		// round to double precision- add 4 stg
+wire		add_frac_out_rndadd;	// select line to add_frac_out
+wire		add_frac_out_rnd_frac;	// select line to add_frac_out
+wire		add_frac_out_shl;	// select line to add_frac_out
+wire		a4stg_to_0;		// result to max finite on overflow
+wire		add_exp_out_expinc;	// select line to add_exp_out
+wire		add_exp_out_exp;	// select line to add_exp_out
+wire		add_exp_out_exp1;	// select line to add_exp_out
+wire		add_exp_out_expadd;	// select line to add_exp_out
+wire		a4stg_to_0_inv;		// result to infinity on overflow
+wire            add_pipe_active;        // add pipe is executing a valid instr
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_add_exp_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire        	a1stg_expadd3_11;	// exponent adder 3 output- add 1 stage
+wire [11:0]	a1stg_expadd1_11_0;	// exponent adder 1 output- add 1 stage
+wire [10:0]	a1stg_expadd4_inv;	// exponent adder 4 output- add 1 stage
+wire [5:0]	a1stg_expadd2_5_0;	// exponent adder 2 output- add 1 stage
+wire [11:0]	a2stg_exp;		// exponent- add 2 stage
+wire [12:0]	a2stg_expadd;		// exponent adder- add 2 stage
+wire [10:0]	a3stg_exp_10_0;		// exponent adder- add 3 stage
+wire [11:0]	a4stg_exp_11_0;		// exponent adder- add 4 stage
+wire [10:0]	add_exp_out;		// add exponent output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_add_frac_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		a1stg_in2_neq_in1_frac;	// operand 2 fraction != oprnd 1 frac
+wire		a1stg_in2_gt_in1_frac;	// operand 2 fraction > oprnd 1 frac
+wire		a1stg_in2_eq_in1_exp;	// operand 2 exponent == oprnd 1 exp
+wire		a2stg_frac2_63;		// fraction 2 bit[63]- add 2 stage
+wire		a2stg_frac2hi_neq_0;	// fraction 2[62:32]in add 2 stage != 0
+wire		a2stg_frac2lo_neq_0;	// fraction 2[31:11] in add 2 stage != 0
+wire		a3stg_fsdtoix_nx;	// inexact result for flt -> ints
+wire		a3stg_fsdtoi_nx;	// inexact result for flt -> 32b ints
+wire		a3stg_denorm;		// denorm output- add 3 stage
+wire		a3stg_denorm_inv;	// result is not a denorm- add 3 stage
+wire [5:0]	a3stg_lead0;		// leading 0's count- add 3 stage
+wire		a4stg_round;		// round the result- add 4 stage
+wire [5:0]	a4stg_shl_cnt;		// subtract in main adder- add 4 stage
+wire		a4stg_denorm_inv;	// 0 the exponent
+wire		a3stg_inc_exp_inv;	// increment the exponent- add 3 stg
+wire		a3stg_same_exp_inv;	// keep the exponent- add 3 stg
+wire		a3stg_dec_exp_inv;	// decrement the exponent- add 3 stg
+wire		a4stg_rnd_frac_40;	// rounded fraction[40]- add 4 stage
+wire		a4stg_rnd_frac_39;	// rounded fraction[39]- add 4 stage
+wire		a4stg_rnd_frac_11;	// rounded fraction[11]- add 4 stage
+wire		a4stg_rnd_frac_10;	// rounded fraction[10]- add 4 stage
+wire		a4stg_rndadd_cout;	// fraction rounding adder carry out
+wire		a4stg_frac_9_0_nx;	// inexact double precision result
+wire		a4stg_frac_dbl_nx;	// inexact double precision result
+wire		a4stg_frac_38_0_nx;	// inexact single precision result
+wire		a4stg_frac_sng_nx;	// inexact single precision result
+wire		a4stg_frac_neq_0;	// fraction != 0- add 4 stage
+wire		a4stg_shl_data_neq_0;	// left shift result != 0- add 4 stage
+wire		add_of_out_cout;	// fraction rounding adder carry out
+wire [63:0]	add_frac_out;		// add fraction output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_add_ctl fpu_add_ctl (
+	.inq_in1_51			(inq_in1[51]),
+	.inq_in1_54			(inq_in1[54]),
+	.inq_in1_63			(inq_in1[63]),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs),
+	.inq_in2_51			(inq_in2[51]),
+	.inq_in2_54			(inq_in2[54]),
+	.inq_in2_63			(inq_in2[63]),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs),
+	.inq_op				(inq_op[7:0]),
+	.inq_rnd_mode			(inq_rnd_mode[1:0]),
+	.inq_id				(inq_id[4:0]),
+	.inq_fcc			(inq_fcc[1:0]),
+	.inq_add			(inq_add),
+	.add_dest_rdy			(add_dest_rdy),
+	.a1stg_in2_neq_in1_frac		(a1stg_in2_neq_in1_frac),
+	.a1stg_in2_gt_in1_frac		(a1stg_in2_gt_in1_frac),
+	.a1stg_in2_eq_in1_exp		(a1stg_in2_eq_in1_exp),
+	.a1stg_expadd1			(a1stg_expadd1_11_0[11:0]),
+	.a2stg_expadd			(a2stg_expadd[11:0]),
+	.a2stg_frac2hi_neq_0		(a2stg_frac2hi_neq_0),
+	.a2stg_frac2lo_neq_0		(a2stg_frac2lo_neq_0),
+	.a2stg_exp			(a2stg_exp[11:0]),
+	.a3stg_fsdtoix_nx		(a3stg_fsdtoix_nx),
+	.a3stg_fsdtoi_nx		(a3stg_fsdtoi_nx),
+	.a2stg_frac2_63			(a2stg_frac2_63),
+	.a4stg_exp			(a4stg_exp_11_0[11:0]),
+	.add_of_out_cout		(add_of_out_cout),
+	.a4stg_frac_neq_0		(a4stg_frac_neq_0),
+	.a4stg_shl_data_neq_0		(a4stg_shl_data_neq_0),
+	.a4stg_frac_dbl_nx		(a4stg_frac_dbl_nx),
+	.a4stg_frac_sng_nx		(a4stg_frac_sng_nx),
+	.a1stg_expadd2			(a1stg_expadd2_5_0[5:0]),
+	.a1stg_expadd4_inv		(a1stg_expadd4_inv[10:0]),
+	.a3stg_denorm			(a3stg_denorm),
+	.a3stg_denorm_inv		(a3stg_denorm_inv),
+	.a4stg_denorm_inv		(a4stg_denorm_inv),
+	.a3stg_exp			(a3stg_exp_10_0[10:0]),
+	.a4stg_round			(a4stg_round),
+	.a3stg_lead0			(a3stg_lead0[5:0]),
+	.a4stg_rnd_frac_40		(a4stg_rnd_frac_40),
+	.a4stg_rnd_frac_39		(a4stg_rnd_frac_39),
+	.a4stg_rnd_frac_11		(a4stg_rnd_frac_11),
+	.a4stg_rnd_frac_10		(a4stg_rnd_frac_10),
+	.a4stg_frac_38_0_nx		(a4stg_frac_38_0_nx),
+	.a4stg_frac_9_0_nx		(a4stg_frac_9_0_nx),
+	.arst_l				(arst_l),
+	.grst_l				(grst_l),
+	.rclk			(rclk),
+
+	.add_pipe_active                (add_pipe_active),
+	.a1stg_denorm_sng_in1		(a1stg_denorm_sng_in1),
+	.a1stg_denorm_dbl_in1		(a1stg_denorm_dbl_in1),
+	.a1stg_denorm_sng_in2		(a1stg_denorm_sng_in2),
+	.a1stg_denorm_dbl_in2		(a1stg_denorm_dbl_in2),
+	.a1stg_norm_sng_in1		(a1stg_norm_sng_in1),
+	.a1stg_norm_dbl_in1		(a1stg_norm_dbl_in1),
+	.a1stg_norm_sng_in2		(a1stg_norm_sng_in2),
+	.a1stg_norm_dbl_in2		(a1stg_norm_dbl_in2),
+	.a1stg_step			(a1stg_step),
+	.a1stg_stepa			(a1stg_stepa),
+	.a1stg_sngop			(a1stg_sngop),
+	.a1stg_intlngop			(a1stg_intlngop),
+	.a1stg_fsdtoix			(a1stg_fsdtoix),
+	.a1stg_fstod			(a1stg_fstod),
+	.a1stg_fstoi			(a1stg_fstoi),
+	.a1stg_fstox			(a1stg_fstox),
+	.a1stg_fdtoi			(a1stg_fdtoi),
+	.a1stg_fdtox			(a1stg_fdtox),
+	.a1stg_faddsubs			(a1stg_faddsubs),
+	.a1stg_faddsubd			(a1stg_faddsubd),
+	.a1stg_fdtos			(a1stg_fdtos),
+	.a2stg_faddsubop		(a2stg_faddsubop),
+	.a2stg_fsdtoix_fdtos		(a2stg_fsdtoix_fdtos),
+	.a2stg_fitos			(a2stg_fitos),
+	.a2stg_fitod			(a2stg_fitod),
+	.a2stg_fxtos			(a2stg_fxtos),
+	.a2stg_fxtod			(a2stg_fxtod),
+	.a3stg_faddsubop		(a3stg_faddsubop),
+	.a3stg_faddsubopa		(a3stg_faddsubopa[1:0]),
+	.a4stg_dblop			(a4stg_dblop),
+	.a6stg_fadd_in			(a6stg_fadd_in),
+	.add_id_out_in			(add_id_out_in[9:0]),
+	.add_fcc_out			(add_fcc_out[1:0]),
+	.a6stg_dbl_dst			(a6stg_dbl_dst),
+	.a6stg_sng_dst			(a6stg_sng_dst),
+	.a6stg_long_dst			(a6stg_long_dst),
+	.a6stg_int_dst			(a6stg_int_dst),
+	.a6stg_fcmpop			(a6stg_fcmpop),
+	.a6stg_step			(a6stg_step),
+	.a3stg_sub_in			(a3stg_sub_in),
+	.add_sign_out			(add_sign_out),
+	.add_cc_out			(add_cc_out[1:0]),
+	.a4stg_in_of			(a4stg_in_of),
+	.add_exc_out			(add_exc_out[4:0]),
+	.a2stg_frac1_in_frac1		(a2stg_frac1_in_frac1),
+	.a2stg_frac1_in_frac2		(a2stg_frac1_in_frac2),
+	.a1stg_2nan_in_inv		(a1stg_2nan_in_inv),
+	.a1stg_faddsubop_inv		(a1stg_faddsubop_inv),
+	.a2stg_frac1_in_qnan		(a2stg_frac1_in_qnan),
+	.a2stg_frac1_in_nv		(a2stg_frac1_in_nv),
+	.a2stg_frac1_in_nv_dbl		(a2stg_frac1_in_nv_dbl),
+	.a2stg_frac2_in_frac1		(a2stg_frac2_in_frac1),
+	.a2stg_frac2_in_qnan		(a2stg_frac2_in_qnan),
+	.a2stg_shr_cnt_in		(a2stg_shr_cnt_in[5:0]),
+	.a2stg_shr_cnt_5_inv_in   (a2stg_shr_cnt_5_inv_in),
+	.a2stg_shr_frac2_shr_int	(a2stg_shr_frac2_shr_int),
+	.a2stg_shr_frac2_shr_dbl	(a2stg_shr_frac2_shr_dbl),
+	.a2stg_shr_frac2_shr_sng	(a2stg_shr_frac2_shr_sng),
+	.a2stg_shr_frac2_max		(a2stg_shr_frac2_max),
+	.a2stg_sub_step			(a2stg_sub_step),
+	.a2stg_fracadd_frac2_inv_in	(a2stg_fracadd_frac2_inv_in),
+	.a2stg_fracadd_frac2_inv_shr1_in (a2stg_fracadd_frac2_inv_shr1_in),
+	.a2stg_fracadd_frac2		(a2stg_fracadd_frac2),
+	.a2stg_fracadd_cin_in		(a2stg_fracadd_cin_in),
+	.a3stg_exp_7ff			(a3stg_exp_7ff),
+	.a3stg_exp_ff			(a3stg_exp_ff),
+	.a3stg_exp_add			(a3stg_exp_add),
+	.a2stg_expdec_neq_0		(a2stg_expdec_neq_0),
+	.a3stg_exp10_0_eq0		(a3stg_exp10_0_eq0),
+	.a3stg_exp10_1_eq0		(a3stg_exp10_1_eq0),
+	.a3stg_fdtos_inv		(a3stg_fdtos_inv),
+	.a4stg_fixtos_fxtod_inv		(a4stg_fixtos_fxtod_inv),
+	.a4stg_rnd_frac_add_inv		(a4stg_rnd_frac_add_inv),
+	.a4stg_shl_cnt_in		(a4stg_shl_cnt_in[9:0]),
+	.a4stg_rnd_sng			(a4stg_rnd_sng),
+	.a4stg_rnd_dbl			(a4stg_rnd_dbl),
+	.add_frac_out_rndadd		(add_frac_out_rndadd),
+	.add_frac_out_rnd_frac		(add_frac_out_rnd_frac),
+	.add_frac_out_shl		(add_frac_out_shl),
+	.a4stg_to_0			(a4stg_to_0),
+	.add_exp_out_expinc		(add_exp_out_expinc),
+	.add_exp_out_exp		(add_exp_out_exp),
+	.add_exp_out_exp1		(add_exp_out_exp1),
+	.add_exp_out_expadd		(add_exp_out_expadd),
+	.a4stg_to_0_inv			(a4stg_to_0_inv),
+
+	.se				(se_add_exp),
+	.si				(si),
+	.so				(scan_out_fpu_add_ctl)
+);
+
+
+fpu_add_exp_dp fpu_add_exp_dp (
+	.inq_in1			(inq_in1[62:52]),
+	.inq_in2			(inq_in2[62:52]),
+	.inq_op				(inq_op[1:0]),
+	.inq_op_7			(inq_op[7]),
+	.a1stg_step			(a1stg_stepa),
+	.a1stg_faddsubd			(a1stg_faddsubd),
+	.a1stg_faddsubs			(a1stg_faddsubs),
+	.a1stg_fsdtoix			(a1stg_fsdtoix),
+	.a6stg_step			(a6stg_step),
+	.a1stg_fstod			(a1stg_fstod),
+	.a1stg_fdtos			(a1stg_fdtos),
+	.a1stg_fstoi			(a1stg_fstoi),
+	.a1stg_fstox			(a1stg_fstox),
+	.a1stg_fdtoi			(a1stg_fdtoi),
+	.a1stg_fdtox			(a1stg_fdtox),
+	.a2stg_fsdtoix_fdtos		(a2stg_fsdtoix_fdtos),
+	.a2stg_faddsubop		(a2stg_faddsubop),
+	.a2stg_fitos			(a2stg_fitos),
+	.a2stg_fitod			(a2stg_fitod),
+	.a2stg_fxtos			(a2stg_fxtos),
+	.a2stg_fxtod			(a2stg_fxtod),
+	.a3stg_exp_7ff			(a3stg_exp_7ff),
+	.a3stg_exp_ff			(a3stg_exp_ff),
+	.a3stg_exp_add			(a3stg_exp_add),
+	.a3stg_inc_exp_inv		(a3stg_inc_exp_inv),
+	.a3stg_same_exp_inv		(a3stg_same_exp_inv),
+	.a3stg_dec_exp_inv		(a3stg_dec_exp_inv),
+	.a3stg_faddsubop		(a3stg_faddsubop),
+	.a3stg_fdtos_inv		(a3stg_fdtos_inv),
+	.a4stg_fixtos_fxtod_inv		(a4stg_fixtos_fxtod_inv),
+	.a4stg_shl_cnt			(a4stg_shl_cnt[5:0]),
+	.a4stg_denorm_inv		(a4stg_denorm_inv),
+	.a4stg_rndadd_cout		(a4stg_rndadd_cout),
+	.add_exp_out_expinc		(add_exp_out_expinc),
+	.add_exp_out_exp		(add_exp_out_exp),
+	.add_exp_out_exp1		(add_exp_out_exp1),
+	.a4stg_in_of			(a4stg_in_of),
+	.add_exp_out_expadd		(add_exp_out_expadd),
+	.a4stg_dblop			(a4stg_dblop),
+	.a4stg_to_0_inv			(a4stg_to_0_inv),
+	.fadd_clken_l			(fadd_clken_l),
+	.rclk			(rclk),
+
+	.a1stg_expadd3_11		(a1stg_expadd3_11),
+	.a1stg_expadd1_11_0		(a1stg_expadd1_11_0[11:0]),
+	.a1stg_expadd4_inv		(a1stg_expadd4_inv[10:0]),
+	.a1stg_expadd2_5_0		(a1stg_expadd2_5_0[5:0]),
+	.a2stg_exp			(a2stg_exp[11:0]),
+	.a2stg_expadd			(a2stg_expadd[12:0]),
+	.a3stg_exp_10_0			(a3stg_exp_10_0[10:0]),
+	.a4stg_exp_11_0			(a4stg_exp_11_0[11:0]),
+	.add_exp_out			(add_exp_out[10:0]),
+
+	.se                             (se_add_exp),
+        .si                             (scan_out_fpu_add_ctl),
+        .so                             (scan_out_fpu_add_exp_dp)
+);
+
+
+fpu_add_frac_dp fpu_add_frac_dp (
+	.inq_in1			(inq_in1[62:0]),
+	.inq_in2			(inq_in2[63:0]),
+	.a1stg_step			(a1stg_stepa),
+	.a1stg_sngop			(a1stg_sngop),
+	.a1stg_expadd3_11		(a1stg_expadd3_11),
+	.a1stg_norm_dbl_in1		(a1stg_norm_dbl_in1),
+	.a1stg_denorm_dbl_in1		(a1stg_denorm_dbl_in1),
+	.a1stg_norm_sng_in1		(a1stg_norm_sng_in1),
+	.a1stg_denorm_sng_in1		(a1stg_denorm_sng_in1),
+	.a1stg_norm_dbl_in2		(a1stg_norm_dbl_in2),
+	.a1stg_denorm_dbl_in2		(a1stg_denorm_dbl_in2),
+	.a1stg_norm_sng_in2		(a1stg_norm_sng_in2),
+	.a1stg_denorm_sng_in2		(a1stg_denorm_sng_in2),
+	.a1stg_intlngop			(a1stg_intlngop),
+	.a2stg_frac1_in_frac1		(a2stg_frac1_in_frac1),
+	.a2stg_frac1_in_frac2		(a2stg_frac1_in_frac2),
+	.a1stg_2nan_in_inv		(a1stg_2nan_in_inv),
+	.a1stg_faddsubop_inv		(a1stg_faddsubop_inv),
+	.a2stg_frac1_in_qnan		(a2stg_frac1_in_qnan),
+	.a2stg_frac1_in_nv		(a2stg_frac1_in_nv),
+	.a2stg_frac1_in_nv_dbl		(a2stg_frac1_in_nv_dbl),
+	.a6stg_step			(a6stg_step),
+	.a2stg_frac2_in_frac1		(a2stg_frac2_in_frac1),
+	.a2stg_frac2_in_qnan		(a2stg_frac2_in_qnan),
+	.a2stg_shr_cnt_in		(a2stg_shr_cnt_in[5:0]),
+	.a2stg_shr_cnt_5_inv_in (a2stg_shr_cnt_5_inv_in),
+	.a2stg_shr_frac2_shr_int	(a2stg_shr_frac2_shr_int),
+	.a2stg_shr_frac2_shr_dbl	(a2stg_shr_frac2_shr_dbl),
+	.a2stg_shr_frac2_shr_sng	(a2stg_shr_frac2_shr_sng),
+	.a2stg_shr_frac2_max		(a2stg_shr_frac2_max),
+	.a2stg_expadd_11		(a2stg_expadd[12]),
+	.a2stg_sub_step			(a2stg_sub_step),
+	.a2stg_fracadd_frac2_inv_in	(a2stg_fracadd_frac2_inv_in),
+	.a2stg_fracadd_frac2_inv_shr1_in (a2stg_fracadd_frac2_inv_shr1_in),
+	.a2stg_fracadd_frac2		(a2stg_fracadd_frac2),
+	.a2stg_fracadd_cin_in		(a2stg_fracadd_cin_in),
+	.a2stg_exp			(a2stg_exp[5:0]),
+	.a2stg_expdec_neq_0		(a2stg_expdec_neq_0),
+	.a3stg_faddsubopa		(a3stg_faddsubopa[1:0]),
+	.a3stg_sub_in			(a3stg_sub_in),
+	.a3stg_exp10_0_eq0		(a3stg_exp10_0_eq0),
+	.a3stg_exp10_1_eq0		(a3stg_exp10_1_eq0),
+	.a3stg_exp_0			(a3stg_exp_10_0[0]),
+	.a4stg_rnd_frac_add_inv		(a4stg_rnd_frac_add_inv),
+	.a3stg_fdtos_inv		(a3stg_fdtos_inv),
+	.a4stg_fixtos_fxtod_inv		(a4stg_fixtos_fxtod_inv),
+	.a4stg_rnd_sng			(a4stg_rnd_sng),
+	.a4stg_rnd_dbl			(a4stg_rnd_dbl),
+	.a4stg_shl_cnt_in		(a4stg_shl_cnt_in[9:0]),
+	.add_frac_out_rndadd		(add_frac_out_rndadd),
+	.add_frac_out_rnd_frac		(add_frac_out_rnd_frac),
+	.a4stg_in_of			(a4stg_in_of),
+	.add_frac_out_shl		(add_frac_out_shl),
+	.a4stg_to_0			(a4stg_to_0),
+	.fadd_clken_l			(fadd_clken_l),
+	.rclk			(rclk),
+
+	.a1stg_in2_neq_in1_frac		(a1stg_in2_neq_in1_frac),
+	.a1stg_in2_gt_in1_frac		(a1stg_in2_gt_in1_frac),
+	.a1stg_in2_eq_in1_exp		(a1stg_in2_eq_in1_exp),
+	.a2stg_frac2_63			(a2stg_frac2_63),
+	.a2stg_frac2hi_neq_0		(a2stg_frac2hi_neq_0),
+	.a2stg_frac2lo_neq_0		(a2stg_frac2lo_neq_0),
+	.a3stg_fsdtoix_nx		(a3stg_fsdtoix_nx),
+	.a3stg_fsdtoi_nx		(a3stg_fsdtoi_nx),
+	.a3stg_denorm			(a3stg_denorm),
+	.a3stg_denorm_inv		(a3stg_denorm_inv),
+	.a3stg_lead0			(a3stg_lead0[5:0]),
+	.a4stg_round			(a4stg_round),
+	.a4stg_shl_cnt			(a4stg_shl_cnt[5:0]),
+	.a4stg_denorm_inv		(a4stg_denorm_inv),
+	.a3stg_inc_exp_inv		(a3stg_inc_exp_inv),
+	.a3stg_same_exp_inv		(a3stg_same_exp_inv),
+	.a3stg_dec_exp_inv		(a3stg_dec_exp_inv),
+	.a4stg_rnd_frac_40		(a4stg_rnd_frac_40),
+	.a4stg_rnd_frac_39		(a4stg_rnd_frac_39),
+	.a4stg_rnd_frac_11		(a4stg_rnd_frac_11),
+	.a4stg_rnd_frac_10		(a4stg_rnd_frac_10),
+	.a4stg_rndadd_cout		(a4stg_rndadd_cout),
+	.a4stg_frac_9_0_nx		(a4stg_frac_9_0_nx),
+	.a4stg_frac_dbl_nx		(a4stg_frac_dbl_nx),
+	.a4stg_frac_38_0_nx		(a4stg_frac_38_0_nx),
+	.a4stg_frac_sng_nx		(a4stg_frac_sng_nx),
+	.a4stg_frac_neq_0		(a4stg_frac_neq_0),
+	.a4stg_shl_data_neq_0		(a4stg_shl_data_neq_0),
+	.add_of_out_cout		(add_of_out_cout),
+	.add_frac_out			(add_frac_out[63:0]),
+
+	.se                             (se_add_frac),
+        .si                             (scan_out_fpu_add_exp_dp),
+        .so                             (so)
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_add_ctl.v
===================================================================
--- /trunk/T1-FPU/fpu_add_ctl.v	(revision 6)
+++ /trunk/T1-FPU/fpu_add_ctl.v	(revision 6)
@@ -0,0 +1,2742 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_add_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipeline synthesizable logic
+//		- special input cases
+//		- opcode pipeline
+//		- sign logic
+//		- exception logic
+//		- datapath control- select lines and control logic
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_add_ctl (
+	inq_in1_51,
+	inq_in1_54,
+	inq_in1_63,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2_51,
+	inq_in2_54,
+	inq_in2_63,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_op,
+	inq_rnd_mode,
+	inq_id,
+	inq_fcc,
+	inq_add,
+	add_dest_rdy,
+	a1stg_in2_neq_in1_frac,
+	a1stg_in2_gt_in1_frac,
+	a1stg_in2_eq_in1_exp,
+	a1stg_expadd1,
+	a2stg_expadd,
+	a2stg_frac2hi_neq_0,
+	a2stg_frac2lo_neq_0,
+	a2stg_exp,
+	a3stg_fsdtoix_nx,
+	a3stg_fsdtoi_nx,
+	a2stg_frac2_63,
+	a4stg_exp,
+	add_of_out_cout,
+	a4stg_frac_neq_0,
+	a4stg_shl_data_neq_0,
+	a4stg_frac_dbl_nx,
+	a4stg_frac_sng_nx,
+	a1stg_expadd2,
+	a1stg_expadd4_inv,
+	a3stg_denorm,
+	a3stg_denorm_inv,
+	a4stg_denorm_inv,
+	a3stg_exp,
+	a4stg_round,
+	a3stg_lead0,
+	a4stg_rnd_frac_40,
+	a4stg_rnd_frac_39,
+	a4stg_rnd_frac_11,
+	a4stg_rnd_frac_10,
+	a4stg_frac_38_0_nx,
+	a4stg_frac_9_0_nx,
+	arst_l,
+	grst_l,
+	rclk,
+	
+	add_pipe_active,
+	a1stg_denorm_sng_in1,
+	a1stg_denorm_dbl_in1,
+	a1stg_denorm_sng_in2,
+	a1stg_denorm_dbl_in2,
+	a1stg_norm_sng_in1,
+	a1stg_norm_dbl_in1,
+	a1stg_norm_sng_in2,
+	a1stg_norm_dbl_in2,
+	a1stg_step,
+	a1stg_stepa,
+	a1stg_sngop,
+	a1stg_intlngop,
+	a1stg_fsdtoix,
+	a1stg_fstod,
+	a1stg_fstoi,
+	a1stg_fstox,
+	a1stg_fdtoi,
+	a1stg_fdtox,
+	a1stg_faddsubs,
+	a1stg_faddsubd,
+	a1stg_fdtos,
+	a2stg_faddsubop,
+	a2stg_fsdtoix_fdtos,
+	a2stg_fitos,
+	a2stg_fitod,
+	a2stg_fxtos,
+	a2stg_fxtod,
+	a3stg_faddsubop,
+	a3stg_faddsubopa,
+	a4stg_dblop,
+	a6stg_fadd_in,
+	add_id_out_in,
+	add_fcc_out,
+	a6stg_dbl_dst,
+	a6stg_sng_dst,
+	a6stg_long_dst,
+	a6stg_int_dst,
+	a6stg_fcmpop,
+	a6stg_step,
+	a3stg_sub_in,
+	add_sign_out,
+	add_cc_out,
+	a4stg_in_of,
+	add_exc_out,
+	a2stg_frac1_in_frac1,
+	a2stg_frac1_in_frac2,
+	a1stg_2nan_in_inv,
+	a1stg_faddsubop_inv,
+	a2stg_frac1_in_qnan,
+	a2stg_frac1_in_nv,
+	a2stg_frac1_in_nv_dbl,
+	a2stg_frac2_in_frac1,
+	a2stg_frac2_in_qnan,
+	a2stg_shr_cnt_in,
+	a2stg_shr_cnt_5_inv_in,
+	a2stg_shr_frac2_shr_int,
+	a2stg_shr_frac2_shr_dbl,
+	a2stg_shr_frac2_shr_sng,
+	a2stg_shr_frac2_max,
+	a2stg_sub_step,
+	a2stg_fracadd_frac2_inv_in,
+	a2stg_fracadd_frac2_inv_shr1_in,
+	a2stg_fracadd_frac2,
+	a2stg_fracadd_cin_in,
+	a3stg_exp_7ff,
+	a3stg_exp_ff,
+	a3stg_exp_add,
+	a2stg_expdec_neq_0,
+	a3stg_exp10_0_eq0,
+	a3stg_exp10_1_eq0,
+	a3stg_fdtos_inv,
+	a4stg_fixtos_fxtod_inv,
+	a4stg_rnd_frac_add_inv,
+	a4stg_shl_cnt_in,
+	a4stg_rnd_sng,
+	a4stg_rnd_dbl,
+	add_frac_out_rndadd,
+	add_frac_out_rnd_frac,
+	add_frac_out_shl,
+	a4stg_to_0,
+	add_exp_out_expinc,
+	add_exp_out_exp,
+	add_exp_out_exp1,
+	add_exp_out_expadd,
+	a4stg_to_0_inv,
+
+	se,
+	si,
+	so
+);
+
+
+parameter
+		FADDS=	8'h41,
+		FADDD=	8'h42,
+		FSUBS=	8'h45,
+		FSUBD=	8'h46,
+		FCMPS=	8'h51,
+		FCMPD=	8'h52,
+		FCMPES=	8'h55,
+		FCMPED=	8'h56,
+		FSTOX=	8'h81,
+		FDTOX=	8'h82,
+		FSTOI=	8'hd1,
+		FDTOI=	8'hd2,
+		FSTOD=	8'hc9,
+		FDTOS=	8'hc6,
+		FXTOS=	8'h84,
+		FXTOD=	8'h88,
+		FITOS=	8'hc4,
+		FITOD=	8'hc8;
+
+
+input		inq_in1_51;		// request operand 1[51]
+input		inq_in1_54;		// request operand 1[54]
+input		inq_in1_63;		// request operand 1[63]
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1[62:52]==0
+input		inq_in1_exp_neq_ffs;	// request operand 1[62:52]!=0x7ff
+input		inq_in2_51;		// request operand 2[51]
+input		inq_in2_54;		// request operand 2[54]
+input		inq_in2_63;		// request operand 2[63]
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2[62:52]==0
+input		inq_in2_exp_neq_ffs;	// request operand 2[62:52]!=0x7ff
+input [7:0]	inq_op;			// request opcode to op pipes
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input [1:0]	inq_fcc;		// request cc ID to op pipes
+input		inq_add;		// add pipe request
+input		add_dest_rdy;		// add result req accepted for CPX
+input		a1stg_in2_neq_in1_frac;	// operand 2 fraction != oprnd 1 frac
+input		a1stg_in2_gt_in1_frac;	// operand 2 fraction > oprnd 1 frac
+input		a1stg_in2_eq_in1_exp;	// operand 2 exponent == oprnd 1 exp
+input [11:0]	a1stg_expadd1;		// exponent adder 1 output- add 1 stage
+input [11:0]	a2stg_expadd;		// exponent adder- add 2 stage
+input		a2stg_frac2hi_neq_0;	// fraction 2[62:32]in add 2 stage != 0
+input		a2stg_frac2lo_neq_0;	// fraction 2[31:11] in add 2 stage != 0
+input [11:0]	a2stg_exp;		// exponent- add 2 stage
+input		a3stg_fsdtoix_nx;	// inexact result for flt -> ints
+input		a3stg_fsdtoi_nx;	// inexact result for flt -> 32b ints
+input		a2stg_frac2_63;		// fraction 2 bit[63]- add 2 stage
+input [11:0]	a4stg_exp;		// exponent- add 4 stage
+input		add_of_out_cout;	// fraction rounding adder carry out
+input		a4stg_frac_neq_0;	// fraction != 0- add 4 stage
+input		a4stg_shl_data_neq_0;	// left shift result != 0- add 4 stage
+input		a4stg_frac_dbl_nx;	// inexact double precision result
+input		a4stg_frac_sng_nx;	// inexact single precision result
+input [5:0]	a1stg_expadd2;		// exponent adder 2 output- add 1 stage
+input [10:0]	a1stg_expadd4_inv;	// exponent adder 4 output- add 1 stage
+input		a3stg_denorm;		// denorm output- add 3 stage
+input		a3stg_denorm_inv;	// result is not a denorm- add 3 stage
+input		a4stg_denorm_inv;	// 0 the exponent
+input [10:0]	a3stg_exp;		// exponent- add 3 stage
+input		a4stg_round;		// round the result- add 4 stage
+input [5:0]	a3stg_lead0;		// leading 0's count- add 3 stage
+input		a4stg_rnd_frac_40;	// rounded fraction[40]- add 4 stage
+input		a4stg_rnd_frac_39;	// rounded fraction[39]- add 4 stage
+input		a4stg_rnd_frac_11;	// rounded fraction[11]- add 4 stage
+input		a4stg_rnd_frac_10;	// rounded fraction[10]- add 4 stage
+input		a4stg_frac_38_0_nx;	// inexact single precision result
+input		a4stg_frac_9_0_nx;	// inexact double precision result
+input		arst_l;			// global asynchronous reset- asserted low
+input		grst_l;			// global synchronous reset- asserted low
+input		rclk;		// global clock
+
+output		add_pipe_active;        // add pipe is executing a valid instr
+output		a1stg_denorm_sng_in1;	// select line to normalized fraction 1
+output		a1stg_denorm_dbl_in1;	// select line to normalized fraction 1
+output		a1stg_denorm_sng_in2;	// select line to normalized fraction 2
+output		a1stg_denorm_dbl_in2;	// select line to normalized fraction 2
+output		a1stg_norm_sng_in1;	// select line to normalized fraction 1
+output		a1stg_norm_dbl_in1;	// select line to normalized fraction 1
+output		a1stg_norm_sng_in2;	// select line to normalized fraction 2
+output		a1stg_norm_dbl_in2;	// select line to normalized fraction 2
+output		a1stg_step;		// add pipe load
+output		a1stg_stepa;		// add pipe load- copy
+output		a1stg_sngop;		// single precision operation- add 1 stg
+output		a1stg_intlngop;		// integer/long input- add 1 stage
+output		a1stg_fsdtoix;		// float to integer convert- add 1 stg
+output		a1stg_fstod;		// fstod- add 1 stage
+output		a1stg_fstoi;		// fstoi- add 1 stage
+output		a1stg_fstox;		// fstox- add 1 stage
+output		a1stg_fdtoi;		// fdtoi- add 1 stage
+output		a1stg_fdtox;		// fdtox- add 1 stage
+output		a1stg_faddsubs;		// add/subtract single- add 1 stg
+output		a1stg_faddsubd;		// add/subtract double- add 1 stg
+output		a1stg_fdtos;		// fdtos- add 1 stage
+output		a2stg_faddsubop;	// float add or subtract- add 2 stage
+output		a2stg_fsdtoix_fdtos;	// float to integer convert- add 2 stg
+output		a2stg_fitos;		// fitos- add 2 stage
+output		a2stg_fitod;		// fitod- add 2 stage
+output		a2stg_fxtos;		// fxtos- add 2 stage
+output		a2stg_fxtod;		// fxtod- add 2 stage
+output		a3stg_faddsubop;	// denorm compare lead0[10] input select
+output [1:0]	a3stg_faddsubopa;	// denorm compare lead0[10] input select
+output		a4stg_dblop;		// double precision operation- add 4 stg
+output		a6stg_fadd_in;		// add pipe output request next cycle
+output [9:0]	add_id_out_in;		// add pipe output ID next cycle
+output [1:0]	add_fcc_out;		// add pipe input fcc passed through
+output		a6stg_dbl_dst;		// float double result- add 6 stage
+output		a6stg_sng_dst;		// float single result- add 6 stage
+output		a6stg_long_dst;		// 64bit integer result- add 6 stage
+output		a6stg_int_dst;		// 32bit integer result- add 6 stage
+output		a6stg_fcmpop;		// compare- add 6 stage
+output		a6stg_step;		// advance the add pipe
+output		a3stg_sub_in;		// subtract in main adder- add 3 stage
+output		add_sign_out;		// add sign output
+output [1:0]	add_cc_out;		// add pipe result- condition
+output		a4stg_in_of;		// add overflow- select exp out
+output [4:0]	add_exc_out;		// add pipe result- exception flags
+output		a2stg_frac1_in_frac1;	// select line to a2stg_frac1
+output		a2stg_frac1_in_frac2;	// select line to a2stg_frac1
+output		a1stg_2nan_in_inv;	// 2 NaN inputs- a1 stage
+output		a1stg_faddsubop_inv;	// add/subtract- a1 stage
+output		a2stg_frac1_in_qnan;	// make fraction 1 a QNaN
+output		a2stg_frac1_in_nv;	// NV- make a new QNaN
+output		a2stg_frac1_in_nv_dbl;	// NV- make a new double prec QNaN
+output		a2stg_frac2_in_frac1;	// select line to a2stg_frac2
+output		a2stg_frac2_in_qnan;	// make fraction 2 a QNaN
+output [5:0]	a2stg_shr_cnt_in;	// right shift count input- add 1 stage
+output    a2stg_shr_cnt_5_inv_in; // right shift count input[5]- add 1 stg
+output		a2stg_shr_frac2_shr_int; // select line to a3stg_frac2
+output		a2stg_shr_frac2_shr_dbl; // select line to a3stg_frac2
+output		a2stg_shr_frac2_shr_sng; // select line to a3stg_frac2
+output		a2stg_shr_frac2_max;	// select line to a3stg_frac2
+output		a2stg_sub_step;		// select line to a3stg_frac2
+output		a2stg_fracadd_frac2_inv_in; // sel line to main adder input 2
+output		a2stg_fracadd_frac2_inv_shr1_in; // sel line to main adder in 2
+output		a2stg_fracadd_frac2;	// select line to main adder input 2
+output		a2stg_fracadd_cin_in;	// carry in to main adder- add 1 stage
+output		a3stg_exp_7ff;		// select line to a3stg_exp
+output		a3stg_exp_ff;		// select line to a3stg_exp
+output		a3stg_exp_add;		// select line to a3stg_exp
+output		a2stg_expdec_neq_0;	// exponent will be < 54
+output		a3stg_exp10_0_eq0;	// exponent[10:0]==0- add 3 stage
+output		a3stg_exp10_1_eq0;	// exponent[10:1]==0- add 3 stage
+output		a3stg_fdtos_inv;	// double to single convert- add 3 stg
+output		a4stg_fixtos_fxtod_inv;	// int to single/double cvt- add 4 stg
+output		a4stg_rnd_frac_add_inv; // select line to a4stg_rnd_frac
+output [9:0]	a4stg_shl_cnt_in;	// postnorm shift left count- add 3 stg
+output		a4stg_rnd_sng;		// round to single precision- add 4 stg
+output		a4stg_rnd_dbl;		// round to double precision- add 4 stg
+output		add_frac_out_rndadd;	// select line to add_frac_out
+output		add_frac_out_rnd_frac;	// select line to add_frac_out
+output		add_frac_out_shl;	// select line to add_frac_out
+output		a4stg_to_0;		// result to max finite on overflow
+output		add_exp_out_expinc;	// select line to add_exp_out
+output		add_exp_out_exp;	// select line to add_exp_out
+output		add_exp_out_exp1;	// select line to add_exp_out
+output		add_exp_out_expadd;	// select line to add_exp_out
+output		a4stg_to_0_inv;		// result to infinity on overflow
+
+input		se;			// scan_enable
+input		si;			// scan in
+output		so;			// scan out
+
+
+wire		reset;
+wire		a1stg_in1_51;
+wire		a1stg_in1_54;
+wire		a1stg_in1_63;
+wire		a1stg_in1_50_0_neq_0;
+wire		a1stg_in1_53_32_neq_0;
+wire		a1stg_in1_exp_eq_0;
+wire		a1stg_in1_exp_neq_ffs;
+wire		a1stg_in2_51;
+wire		a1stg_in2_54;
+wire		a1stg_in2_63;
+wire		a1stg_in2_50_0_neq_0;
+wire		a1stg_in2_53_32_neq_0;
+wire		a1stg_in2_exp_eq_0;
+wire		a1stg_in2_exp_neq_ffs;
+wire		a1stg_denorm_sng_in1;
+wire		a1stg_denorm_dbl_in1;
+wire		a1stg_denorm_sng_in2;
+wire		a1stg_denorm_dbl_in2;
+wire		a1stg_norm_sng_in1;
+wire		a1stg_norm_dbl_in1;
+wire		a1stg_norm_sng_in2;
+wire		a1stg_norm_dbl_in2;
+wire		a1stg_snan_sng_in1;
+wire		a1stg_snan_dbl_in1;
+wire		a1stg_snan_sng_in2;
+wire		a1stg_snan_dbl_in2;
+wire		a1stg_qnan_sng_in1;
+wire		a1stg_qnan_dbl_in1;
+wire		a1stg_qnan_sng_in2;
+wire		a1stg_qnan_dbl_in2;
+wire		a1stg_snan_in1;
+wire		a1stg_snan_in2;
+wire		a1stg_qnan_in1;
+wire		a1stg_qnan_in2;
+wire		a1stg_nan_sng_in1;
+wire		a1stg_nan_dbl_in1;
+wire		a1stg_nan_sng_in2;
+wire		a1stg_nan_dbl_in2;
+wire		a1stg_nan_in1;
+wire		a1stg_nan_in2;
+wire		a1stg_nan_in;
+wire		a1stg_2nan_in;
+wire		a1stg_inf_sng_in1;
+wire		a1stg_inf_dbl_in1;
+wire		a1stg_inf_sng_in2;
+wire		a1stg_inf_dbl_in2;
+wire		a1stg_inf_in1;
+wire		a1stg_inf_in2;
+wire		a1stg_2inf_in;
+wire		a1stg_infnan_sng_in1;
+wire		a1stg_infnan_dbl_in1;
+wire		a1stg_infnan_sng_in2;
+wire		a1stg_infnan_dbl_in2;
+wire		a1stg_infnan_in1;
+wire		a1stg_infnan_in2;
+wire		a1stg_infnan_in;
+wire		a1stg_2zero_in;
+wire		a1stg_step;
+wire		a1stg_stepa;
+wire [7:0]	a1stg_op_in;
+wire [7:0]	a1stg_op;
+wire		a1stg_sngop;
+wire [3:0]	a1stg_sngopa;
+wire		a1stg_dblop;
+wire [3:0]	a1stg_dblopa;
+wire [1:0]	a1stg_rnd_mode;
+wire [4:0]	a1stg_id;
+wire [1:0]	a1stg_fcc;
+wire		a1stg_fadd;
+wire		a1stg_dbl_dst;
+wire		a1stg_sng_dst;
+wire		a1stg_long_dst;
+wire		a1stg_int_dst;
+wire		a1stg_intlngop;
+wire		a1stg_faddsubop;
+wire		a1stg_fsubop;
+wire		a1stg_fsdtox;
+wire		a1stg_fcmpesd;
+wire		a1stg_fcmpsd;
+wire		a1stg_faddsub_dtosop;
+wire		a1stg_fdtoix;
+wire		a1stg_fstoix;
+wire		a1stg_fsdtoix;
+wire		a1stg_fixtosd;
+wire		a1stg_fstod;
+wire		a1stg_fstoi;
+wire		a1stg_fstox;
+wire		a1stg_fdtoi;
+wire		a1stg_fdtox;
+wire		a1stg_fsdtoix_fdtos;
+wire		a1stg_fitos;
+wire		a1stg_fitod;
+wire		a1stg_fxtos;
+wire		a1stg_fcmpop;
+wire		a1stg_f4cycop;
+wire		a1stg_fixtos_fxtod;
+wire		a1stg_faddsubs_fdtos;
+wire		a1stg_faddsubs;
+wire		a1stg_faddsubd;
+wire		a1stg_fdtos;
+wire		a1stg_fistod;
+wire		a1stg_fixtos;
+wire		a1stg_fxtod;
+wire            a1stg_opdec_36;
+wire [34:28]	a1stg_opdec;
+wire [3:0]      a1stg_opdec_24_21;
+wire [8:0]      a1stg_opdec_19_11;
+wire [9:0]      a1stg_opdec_9_0;
+wire		fixtosd_hold;
+wire [30:0]	a2stg_opdec_in;
+wire            a2stg_opdec_36;
+wire [34:28]	a2stg_opdec;
+wire [3:0]      a2stg_opdec_24_21;
+wire [8:0]      a2stg_opdec_19_11;
+wire [9:0]      a2stg_opdec_9_0;
+wire [1:0]	a2stg_rnd_mode;
+wire [4:0]	a2stg_id;
+wire [1:0]	a2stg_fcc;
+wire		a2stg_fadd;
+wire		a2stg_long_dst;
+wire		a2stg_faddsubop;
+wire		a2stg_fsubop;
+wire		a2stg_faddsub_dtosop;
+wire		a2stg_fdtoix;
+wire		a2stg_fstoix;
+wire		a2stg_fsdtoix;
+wire		a2stg_fstod;
+wire		a2stg_fstoi;
+wire		a2stg_fstox;
+wire		a2stg_fdtoi;
+wire		a2stg_fdtox;
+wire		a2stg_fsdtoix_fdtos;
+wire		a2stg_fitos;
+wire		a2stg_fitod;
+wire		a2stg_fxtos;
+wire		a2stg_fcmpop;
+wire		a2stg_fixtos_fxtod;
+wire		a2stg_fdtos;
+wire		a2stg_fxtod;
+wire            a3stg_opdec_36;
+wire [34:29]	a3stg_opdec;
+wire            a3stg_opdec_24;
+wire            a3stg_opdec_21;
+wire [9:0]      a3stg_opdec_9_0;
+wire [1:0]	a3stg_rnd_mode;
+wire [4:0]	a3stg_id;
+wire [1:0]	a3stg_fcc;
+wire		a3stg_fadd;
+wire		a3stg_int_dst;
+wire		a3stg_faddsubop;
+wire [1:0]	a3stg_faddsubopa;
+wire		a3stg_fsdtoix;
+wire		a3stg_f4cycop;
+wire		a3stg_fixtos_fxtod;
+wire		a3stg_fdtos;
+wire            a4stg_opdec_36;
+wire [34:29]	a4stg_opdec;
+wire            a4stg_opdec_24;
+wire            a4stg_opdec_21;
+wire            a4stg_opdec_9;
+wire [7:0]      a4stg_opdec_7_0;
+wire [1:0]	a4stg_rnd_mode_in;
+wire [1:0]	a4stg_rnd_mode;
+wire [1:0]	a4stg_rnd_mode2;
+wire [9:0]	a4stg_id_in;
+wire [9:0]	a4stg_id;
+wire [1:0]	a4stg_fcc;
+wire		a4stg_dblop;
+wire		a4stg_fadd;
+wire		a4stg_faddsubop;
+wire		a4stg_faddsub_dtosop;
+wire		a4stg_fsdtoix;
+wire		a4stg_fcmpop;
+wire		a4stg_fixtos_fxtod;
+wire		a4stg_faddsubs_fdtos;
+wire		a4stg_faddsubs;
+wire		a4stg_faddsubd;
+wire		a4stg_fdtos;
+wire		a4stg_fistod;
+wire [34:30]	a5stg_opdec;
+wire            a5stg_opdec_9;
+wire            a5stg_opdec_7;
+wire            a5stg_opdec_1;
+wire            a5stg_opdec_0;
+wire [9:0]	a5stg_id;
+wire		a5stg_fadd;
+wire		a5stg_fixtos_fxtod;
+wire		a5stg_fixtos;
+wire		a5stg_fxtod;
+wire [34:30]	a6stg_opdec_in;
+wire            a6stg_opdec_in_9;
+wire		a6stg_fadd_in;
+wire [34:30]	a6stg_opdec;
+wire            a6stg_opdec_9;
+wire [9:0]	add_id_out_in;
+wire [9:0]	add_id_out;
+wire [1:0]	add_fcc_out_in;
+wire [1:0]	add_fcc_out;
+wire		a6stg_fadd;
+wire		a6stg_dbl_dst;
+wire		a6stg_sng_dst;
+wire		a6stg_long_dst;
+wire		a6stg_int_dst;
+wire		a6stg_fcmpop;
+wire		a6stg_hold;
+wire		a6stg_step;
+wire		a1stg_sub;
+wire		a2stg_sign1;
+wire		a2stg_sign2;
+wire		a2stg_sub;
+wire		a2stg_in2_neq_in1_frac;
+wire		a2stg_in2_gt_in1_frac;
+wire		a2stg_in2_eq_in1_exp;
+wire		a2stg_in2_gt_in1_exp;
+wire		a2stg_nan_in;
+wire		a2stg_nan_in2;
+wire		a2stg_snan_in2;
+wire		a2stg_qnan_in2;
+wire		a2stg_snan_in1;
+wire		a2stg_qnan_in1;
+wire		a2stg_2zero_in;
+wire		a2stg_2inf_in;
+wire		a2stg_in2_eq_in1;
+wire		a2stg_in2_gt_in1;
+wire		a3stg_sub_in;
+wire		a2stg_faddsub_sign;
+wire		a3stg_sign_in;
+wire		a3stg_sign;
+wire		a2stg_cc_1;
+wire		a2stg_cc_0;
+wire [1:0]	a2stg_cc;
+wire [1:0]	a3stg_cc;
+wire		a4stg_sign_in;
+wire		a4stg_sign;
+wire		a4stg_sign2;
+wire [1:0]	a4stg_cc;
+wire		add_sign_out;
+wire [1:0]	add_cc_out_in;
+wire [1:0]	add_cc_out;
+wire		a1stg_nv;
+wire		a2stg_nv;
+wire		a1stg_of_mask;
+wire		a2stg_of_mask;
+wire		a3stg_nv_in;
+wire		a3stg_nv;
+wire		a3stg_of_mask;
+wire		a2stg_nx_tmp1;
+wire		a2stg_nx_tmp2;
+wire		a2stg_nx_tmp3;
+wire		a3stg_a2_expadd_11;
+wire		a3stg_nx_tmp1;
+wire		a3stg_nx_tmp2;
+wire		a3stg_nx_tmp3;
+wire		a3stg_nx;
+wire		a4stg_nv_in;
+wire		a4stg_nv;
+wire		a4stg_nv2;
+wire		a4stg_of_mask_in;
+wire		a4stg_of_mask;
+wire		a4stg_of_mask2;
+wire		a4stg_nx_in;
+wire		a4stg_nx;
+wire		a4stg_nx2;
+wire		add_nv_out;
+wire		a4stg_in_of;
+wire		add_of_out_tmp1_in;
+wire		add_of_out_tmp1;
+wire		add_of_out_tmp2;
+wire		add_of_out;
+wire		a4stg_uf;
+wire		add_uf_out;
+wire		add_nx_out_in;
+wire		add_nx_out;
+wire [4:0]	add_exc_out;
+wire		a2stg_frac1_in_frac1;
+wire		a2stg_frac1_in_frac2;
+wire		a1stg_2nan_in_inv;
+wire		a1stg_faddsubop_inv;
+wire		a2stg_frac1_in_qnan;
+wire		a2stg_frac1_in_nv;
+wire		a2stg_frac1_in_nv_dbl;
+wire		a2stg_frac2_in_frac1;
+wire		a2stg_frac2_in_qnan;
+wire		a1stg_exp_diff_add1;
+wire		a1stg_exp_diff_add2;
+wire		a1stg_exp_diff_5;
+wire [10:0]	a1stg_exp_diff;
+wire [5:0]	a1stg_clamp63;
+wire [5:0]	a2stg_shr_cnt_in;
+wire    a2stg_shr_cnt_5_inv_in;
+wire		a2stg_shr_frac2_shr_int;
+wire		a2stg_shr_frac2_shr_dbl;
+wire		a2stg_shr_frac2_shr_sng;
+wire		a2stg_shr_frac2_max;
+wire		a2stg_sub_step;
+wire		a1stg_faddsub_clamp63_0;
+wire		a2stg_fracadd_frac2_inv_in;
+wire		a2stg_fracadd_frac2_inv_shr1_in;
+wire		a2stg_fracadd_frac2_in;
+wire		a2stg_fracadd_frac2;
+wire		a2stg_fracadd_cin_in;
+wire		a3stg_exp_7ff;
+wire		a3stg_exp_ff;
+wire		a3stg_exp_add;
+wire		a2stg_expdec_neq_0;
+wire		a3stg_exp10_0_eq0;
+wire		a3stg_exp10_1_eq0;
+wire		a3stg_fdtos_inv;
+wire		a4stg_fixtos_fxtod_inv;
+wire		a4stg_rnd_frac_add_inv;
+wire [9:0]	a4stg_shl_cnt_in;
+wire		a4stg_rnd_sng;
+wire		a4stg_rnd_dbl;
+wire		a4stg_rndup_sng;
+wire		a4stg_rndup_dbl;
+wire		a4stg_rndup;
+wire		a5stg_rndup;
+wire		add_frac_out_rndadd;
+wire		add_frac_out_rnd_frac;
+wire		add_frac_out_shl;
+wire		a4stg_to_0;
+wire		add_exp_out_expinc;
+wire		add_exp_out_exp;
+wire		add_exp_out_exp1;
+wire		add_exp_out_expadd;
+wire		a4stg_to_0_inv;
+wire		add_pipe_active_in;
+wire		add_pipe_active;
+
+
+dffrl_async #(1)  dffrl_add_ctl (
+  .din  (grst_l),
+  .clk  (rclk),
+  .rst_l(arst_l),
+  .q    (add_ctl_rst_l),
+	.se (se),
+	.si (),
+	.so ()
+  );
+
+assign reset= (!add_ctl_rst_l);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipeline special input cases.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_a1stg_in1_51 (
+	.din	(inq_in1_51),
+	.en     (a1stg_step),
+        .clk    (rclk),
+ 
+        .q      (a1stg_in1_51),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a1stg_in1_54 (
+	.din	(inq_in1_54),
+	.en     (a1stg_step),
+        .clk    (rclk),
+ 
+        .q      (a1stg_in1_54),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a1stg_in1_63 (
+        .din	(inq_in1_63),
+        .en	(a1stg_step),
+        .clk	(rclk),
+ 
+        .q	(a1stg_in1_63),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_a1stg_in1_50_0_neq_0 (
+	.din	(inq_in1_50_0_neq_0),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in1_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in1_53_32_neq_0 (
+	.din	(inq_in1_53_32_neq_0),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in1_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in1_exp_eq_0 (
+        .din	(inq_in1_exp_eq_0),
+        .en	(a1stg_step),
+        .clk	(rclk),
+ 
+        .q	(a1stg_in1_exp_eq_0),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_a1stg_in1_exp_neq_ffs (
+	.din	(inq_in1_exp_neq_ffs),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in1_exp_neq_ffs),
+
+   	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_51 (
+	.din	(inq_in2_51),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in2_51),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_54 (
+	.din	(inq_in2_54),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in2_54),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_63 (
+        .din	(inq_in2_63),
+        .en	(a1stg_step),
+        .clk	(rclk),
+ 
+        .q	(a1stg_in2_63),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_a1stg_in2_50_0_neq_0 (
+	.din	(inq_in2_50_0_neq_0),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in2_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_53_32_neq_0 (
+	.din	(inq_in2_53_32_neq_0),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in2_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_exp_eq_0 (
+	.din	(inq_in2_exp_eq_0),
+	 .en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_in2_exp_eq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a1stg_in2_exp_neq_ffs (
+        .din	(inq_in2_exp_neq_ffs),
+        .en	(a1stg_step),
+        .clk	(rclk),
+ 
+        .q	(a1stg_in2_exp_neq_ffs),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Denorm add inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_denorm_sng_in1= a1stg_in1_exp_eq_0 && a1stg_sngopa[0];
+
+assign a1stg_denorm_dbl_in1= a1stg_in1_exp_eq_0 && a1stg_dblopa[0];
+
+assign a1stg_denorm_sng_in2= a1stg_in2_exp_eq_0 && a1stg_sngopa[0];
+
+assign a1stg_denorm_dbl_in2= a1stg_in2_exp_eq_0 && a1stg_dblopa[0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Non-denorm add inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_norm_sng_in1= (!a1stg_in1_exp_eq_0) && a1stg_sngopa[0];
+
+assign a1stg_norm_dbl_in1= (!a1stg_in1_exp_eq_0) && a1stg_dblopa[0];
+
+assign a1stg_norm_sng_in2= (!a1stg_in2_exp_eq_0) && a1stg_sngopa[0];
+
+assign a1stg_norm_dbl_in2= (!a1stg_in2_exp_eq_0) && a1stg_dblopa[0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Nan add inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_snan_sng_in1= (!a1stg_in1_exp_neq_ffs) && (!a1stg_in1_54)
+		&& a1stg_in1_53_32_neq_0 && a1stg_sngopa[1];
+
+assign a1stg_snan_dbl_in1= (!a1stg_in1_exp_neq_ffs) && (!a1stg_in1_51)
+		&& a1stg_in1_50_0_neq_0 && a1stg_dblopa[1];
+
+assign a1stg_snan_sng_in2= (!a1stg_in2_exp_neq_ffs) && (!a1stg_in2_54)
+                && a1stg_in2_53_32_neq_0 && a1stg_sngopa[1];
+
+assign a1stg_snan_dbl_in2= (!a1stg_in2_exp_neq_ffs) && (!a1stg_in2_51)
+                && a1stg_in2_50_0_neq_0 && a1stg_dblopa[1];
+
+assign a1stg_qnan_sng_in1= (!a1stg_in1_exp_neq_ffs) && a1stg_in1_54
+		&& a1stg_sngopa[1];
+
+assign a1stg_qnan_dbl_in1= (!a1stg_in1_exp_neq_ffs) && a1stg_in1_51
+		&& a1stg_dblopa[1];
+
+assign a1stg_qnan_sng_in2= (!a1stg_in2_exp_neq_ffs) && a1stg_in2_54
+                && a1stg_sngopa[1];
+
+assign a1stg_qnan_dbl_in2= (!a1stg_in2_exp_neq_ffs) && a1stg_in2_51
+                && a1stg_dblopa[1];
+
+assign a1stg_snan_in1= a1stg_snan_sng_in1 || a1stg_snan_dbl_in1;
+
+assign a1stg_snan_in2= a1stg_snan_sng_in2 || a1stg_snan_dbl_in2;
+
+assign a1stg_qnan_in1= a1stg_qnan_sng_in1 || a1stg_qnan_dbl_in1;
+ 
+assign a1stg_qnan_in2= a1stg_qnan_sng_in2 || a1stg_qnan_dbl_in2;
+
+assign a1stg_nan_sng_in1= (!a1stg_in1_exp_neq_ffs)
+		&& (a1stg_in1_54 || a1stg_in1_53_32_neq_0)
+		&& a1stg_sngopa[2];
+
+assign a1stg_nan_dbl_in1= (!a1stg_in1_exp_neq_ffs)
+		&& (a1stg_in1_51 || a1stg_in1_50_0_neq_0)
+		&& a1stg_dblopa[2];
+
+assign a1stg_nan_sng_in2= (!a1stg_in2_exp_neq_ffs)
+		&& (a1stg_in2_54 || a1stg_in2_53_32_neq_0)
+		&& a1stg_sngopa[2];
+
+assign a1stg_nan_dbl_in2= (!a1stg_in2_exp_neq_ffs)
+		&& (a1stg_in2_51 || a1stg_in2_50_0_neq_0)
+		&& a1stg_dblopa[2];
+
+assign a1stg_nan_in1= a1stg_nan_sng_in1 || a1stg_nan_dbl_in1;
+
+assign a1stg_nan_in2= a1stg_nan_sng_in2 || a1stg_nan_dbl_in2;
+
+assign a1stg_nan_in= a1stg_nan_in1 || a1stg_nan_in2;
+
+assign a1stg_2nan_in= a1stg_nan_in1 && a1stg_nan_in2;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Infinity add inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_inf_sng_in1= (!a1stg_in1_exp_neq_ffs)
+		&& (!a1stg_in1_54) && (!a1stg_in1_53_32_neq_0)
+		&& a1stg_sngopa[2];
+
+assign a1stg_inf_dbl_in1= (!a1stg_in1_exp_neq_ffs)
+		&& (!a1stg_in1_51) && (!a1stg_in1_50_0_neq_0)
+		&& a1stg_dblopa[2];
+
+assign a1stg_inf_sng_in2= (!a1stg_in2_exp_neq_ffs)
+		&& (!a1stg_in2_54) && (!a1stg_in2_53_32_neq_0)
+		&& a1stg_sngopa[2];
+
+assign a1stg_inf_dbl_in2= (!a1stg_in2_exp_neq_ffs)
+		&& (!a1stg_in2_51) && (!a1stg_in2_50_0_neq_0)
+		&& a1stg_dblopa[2];
+
+assign a1stg_inf_in1= a1stg_inf_sng_in1 || a1stg_inf_dbl_in1;
+
+assign a1stg_inf_in2= a1stg_inf_sng_in2 || a1stg_inf_dbl_in2;
+
+assign a1stg_2inf_in= a1stg_inf_in1 && a1stg_inf_in2;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Infinity/Nan add inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_infnan_sng_in1= (!a1stg_in1_exp_neq_ffs) && a1stg_sngopa[3];
+
+assign a1stg_infnan_dbl_in1= (!a1stg_in1_exp_neq_ffs) && a1stg_dblopa[3];
+
+assign a1stg_infnan_sng_in2= (!a1stg_in2_exp_neq_ffs) && a1stg_sngopa[3];
+
+assign a1stg_infnan_dbl_in2= (!a1stg_in2_exp_neq_ffs) && a1stg_dblopa[3];
+
+assign a1stg_infnan_in1= a1stg_infnan_sng_in1 || a1stg_infnan_dbl_in1;
+
+assign a1stg_infnan_in2= a1stg_infnan_sng_in2 || a1stg_infnan_dbl_in2;
+
+assign a1stg_infnan_in= a1stg_infnan_in1 || a1stg_infnan_in2;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Zero inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Austin update
+// correctly detect case where both single precision operands are zero
+
+// assign a1stg_2zero_in= a1stg_in1_exp_eq_0 && (!a1stg_in1_51)
+//		&& (!a1stg_in1_50_0_neq_0)
+//		&& a1stg_in2_exp_eq_0 && (!a1stg_in2_51)
+//		&& (!a1stg_in2_50_0_neq_0);
+
+assign a1stg_2zero_in =
+
+		a1stg_in1_exp_eq_0                          &&
+                (!a1stg_in1_54          || a1stg_dblopa[3]) &&  // (!bit54          ) || dp
+                (!a1stg_in1_53_32_neq_0 || a1stg_dblopa[3]) &&  // (!bit53 && !bit52) || dp
+                (!a1stg_in1_51)                             &&
+                (!a1stg_in1_50_0_neq_0)                     &&
+
+                a1stg_in2_exp_eq_0                          &&
+                (!a1stg_in2_54          || a1stg_dblopa[3]) &&  // (!bit54          ) || dp
+                (!a1stg_in2_53_32_neq_0 || a1stg_dblopa[3]) &&  // (!bit53 && !bit52) || dp
+                (!a1stg_in2_51)                             &&
+                (!a1stg_in2_50_0_neq_0);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Floating point add control pipeline.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- add input stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_step= (!fixtosd_hold) && (!a6stg_hold);
+
+assign a1stg_stepa= a1stg_step;
+
+assign a1stg_op_in[7:0]= ({8{inq_add}}
+			    & inq_op[7:0]);
+
+dffre_s #(8) i_a1stg_op (
+        .din    (a1stg_op_in[7:0]),
+        .en     (a1stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (a1stg_op[7:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a1stg_sngop (
+	.din	(inq_op[0]),
+        .en     (a1stg_step),
+        .clk    (rclk),
+
+        .q      (a1stg_sngop),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(4) i_a1stg_sngopa (
+        .din	({4{inq_op[0]}}),
+        .en	(a1stg_step),
+        .clk	(rclk),
+ 
+        .q	(a1stg_sngopa[3:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_a1stg_dblop (
+	.din	(inq_op[1]),
+        .en     (a1stg_step),
+        .clk    (rclk),
+ 
+        .q      (a1stg_dblop),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(4) i_a1stg_dblopa (
+ 	.din	({4{inq_op[1]}}),
+	.en	(a1stg_step),
+	.clk	(rclk),
+
+	.q	(a1stg_dblopa[3:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(2) i_a1stg_rnd_mode (
+        .din    (inq_rnd_mode[1:0]),
+        .en     (a1stg_step),
+        .clk    (rclk),
+
+        .q      (a1stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_a1stg_id (
+        .din    (inq_id[4:0]),
+        .en     (a1stg_step),
+        .clk    (rclk),
+ 
+        .q      (a1stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a1stg_fcc (
+        .din    (inq_fcc[1:0]),
+        .en     (a1stg_step),
+        .clk    (rclk),
+
+        .q      (a1stg_fcc[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode decode- add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_fadd= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FADDD)
+		|| (a1stg_op[7:0]==FSUBS) || (a1stg_op[7:0]==FSUBD)
+		|| (a1stg_op[7:0]==FCMPES) || (a1stg_op[7:0]==FCMPED)
+		|| (a1stg_op[7:0]==FCMPS) || (a1stg_op[7:0]==FCMPD)
+		|| (a1stg_op[7:0]==FITOS) || (a1stg_op[7:0]==FITOD)
+		|| (a1stg_op[7:0]==FXTOS) || (a1stg_op[7:0]==FXTOD)
+		|| (a1stg_op[7:0]==FSTOI) || (a1stg_op[7:0]==FSTOX)
+		|| (a1stg_op[7:0]==FDTOI) || (a1stg_op[7:0]==FDTOX)
+		|| (a1stg_op[7:0]==FSTOD) || (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_dbl_dst= (a1stg_op[7:0]==FADDD) || (a1stg_op[7:0]==FSUBD)
+		|| (a1stg_op[7:0]==FITOD) || (a1stg_op[7:0]==FXTOD)
+		|| (a1stg_op[7:0]==FSTOD);
+
+assign a1stg_sng_dst= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FSUBS)
+		|| (a1stg_op[7:0]==FITOS) || (a1stg_op[7:0]==FXTOS)
+		|| (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_long_dst= (a1stg_op[7:0]==FSTOX) || (a1stg_op[7:0]==FDTOX);
+
+assign a1stg_int_dst= (a1stg_op[7:0]==FSTOI) || (a1stg_op[7:0]==FDTOI);
+
+assign a1stg_intlngop= (!(a1stg_sngopa[3] || a1stg_dblop));
+
+assign a1stg_faddsubop= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FADDD)
+		|| (a1stg_op[7:0]==FSUBS) || (a1stg_op[7:0]==FSUBD);
+
+assign a1stg_fsubop= (a1stg_op[7:0]==FSUBS) || (a1stg_op[7:0]==FSUBD);
+
+assign a1stg_fsdtox= (a1stg_op[7:0]==FSTOX) || (a1stg_op[7:0]==FDTOX);
+
+assign a1stg_fcmpesd= (a1stg_op[7:0]==FCMPES) || (a1stg_op[7:0]==FCMPED);
+
+assign a1stg_fcmpsd= (a1stg_op[7:0]==FCMPS) || (a1stg_op[7:0]==FCMPD);
+
+assign a1stg_faddsub_dtosop= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FADDD)
+                || (a1stg_op[7:0]==FSUBS) || (a1stg_op[7:0]==FSUBD)
+		|| (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_fdtoix= (a1stg_op[7:0]==FDTOI) || (a1stg_op[7:0]==FDTOX);
+
+assign a1stg_fstoix= (a1stg_op[7:0]==FSTOI) || (a1stg_op[7:0]==FSTOX);
+
+assign a1stg_fsdtoix= (a1stg_op[7:0]==FSTOI) || (a1stg_op[7:0]==FSTOX)
+		|| (a1stg_op[7:0]==FDTOI) || (a1stg_op[7:0]==FDTOX);
+
+assign a1stg_fixtosd= (a1stg_op[7:0]==FITOS) || (a1stg_op[7:0]==FITOD)
+		|| (a1stg_op[7:0]==FXTOS) || (a1stg_op[7:0]==FXTOD);
+
+assign a1stg_fstod= (a1stg_op[7:0]==FSTOD);
+
+assign a1stg_fstoi= (a1stg_op[7:0]==FSTOI);
+
+assign a1stg_fstox= (a1stg_op[7:0]==FSTOX);
+
+assign a1stg_fdtoi= (a1stg_op[7:0]==FDTOI);
+
+assign a1stg_fdtox= (a1stg_op[7:0]==FDTOX);
+
+assign a1stg_fsdtoix_fdtos= (a1stg_op[7:0]==FSTOI) || (a1stg_op[7:0]==FSTOX)
+                || (a1stg_op[7:0]==FDTOI) || (a1stg_op[7:0]==FDTOX)
+		|| (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_fitos= (a1stg_op[7:0]==FITOS);
+
+assign a1stg_fitod= (a1stg_op[7:0]==FITOD);
+
+assign a1stg_fxtos= (a1stg_op[7:0]==FXTOS);
+
+assign a1stg_fcmpop= (a1stg_op[7:0]==FCMPS) || (a1stg_op[7:0]==FCMPD)
+		|| (a1stg_op[7:0]==FCMPES) || (a1stg_op[7:0]==FCMPED);
+
+assign a1stg_f4cycop= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FADDD)
+                || (a1stg_op[7:0]==FSUBS) || (a1stg_op[7:0]==FSUBD)
+                || (a1stg_op[7:0]==FDTOS) || (a1stg_op[7:0]==FSTOD)
+		|| (a1stg_op[7:0]==FITOD);
+
+assign a1stg_fixtos_fxtod= (a1stg_op[7:0]==FITOS) || (a1stg_op[7:0]==FXTOS)
+		|| (a1stg_op[7:0]==FXTOD);
+
+assign a1stg_faddsubs_fdtos= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FSUBS)
+		|| (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_faddsubs= (a1stg_op[7:0]==FADDS) || (a1stg_op[7:0]==FSUBS);
+
+assign a1stg_faddsubd= (a1stg_op[7:0]==FADDD) || (a1stg_op[7:0]==FSUBD);
+
+assign a1stg_fdtos= (a1stg_op[7:0]==FDTOS);
+
+assign a1stg_fistod= (a1stg_op[7:0]==FITOD) || (a1stg_op[7:0]==FSTOD);
+
+assign a1stg_fixtos= (a1stg_op[7:0]==FITOS) || (a1stg_op[7:0]==FXTOS);
+
+assign a1stg_fxtod= (a1stg_op[7:0]==FXTOD);
+
+assign a1stg_opdec_36 = a1stg_dblop;
+
+assign a1stg_opdec[34:28] =
+			 {a1stg_fadd,
+			  a1stg_dbl_dst,
+			  a1stg_sng_dst,
+			  a1stg_long_dst,
+			  a1stg_int_dst,
+			  a1stg_faddsubop,
+			  a1stg_fsubop};
+
+assign a1stg_opdec_24_21[3:0] =
+			 {a1stg_faddsub_dtosop,
+			  a1stg_fdtoix,
+			  a1stg_fstoix,
+			  a1stg_fsdtoix};
+
+assign a1stg_opdec_19_11[8:0] =
+			 {a1stg_fstod,
+			  a1stg_fstoi,
+			  a1stg_fstox,
+			  a1stg_fdtoi,
+			  a1stg_fdtox,
+			  a1stg_fsdtoix_fdtos,
+			  a1stg_fitos,
+			  a1stg_fitod,
+			  a1stg_fxtos};
+ 
+assign a1stg_opdec_9_0[9:0] = 
+			 {a1stg_fcmpop,
+			  a1stg_f4cycop,
+			  a1stg_fixtos_fxtod,
+			  a1stg_faddsubs_fdtos,
+			  a1stg_faddsubs,
+			  a1stg_faddsubd,
+			  a1stg_fdtos,
+			  a1stg_fistod,
+			  a1stg_fixtos,
+			  a1stg_fxtod};
+
+assign fixtosd_hold= a2stg_fixtos_fxtod
+		&& (!(a1stg_op[7] && (!a1stg_op[1]) && (!a1stg_op[0])
+			&& (a1stg_op[2] || (!a1stg_op[6]))));
+
+assign a2stg_opdec_in[30:0]= {31{(!fixtosd_hold)}}
+			    & {a1stg_opdec_36, a1stg_opdec[34:28],
+                               a1stg_opdec_24_21[3:0], a1stg_opdec_19_11[8:0],
+                               a1stg_opdec_9_0[9:0]};
+
+dffre_s #(31) i_a2stg_opdec (
+	.din	(a2stg_opdec_in[30:0]),
+	.en	(a6stg_step),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      ({a2stg_opdec_36, a2stg_opdec[34:28], a2stg_opdec_24_21[3:0],
+                  a2stg_opdec_19_11[8:0], a2stg_opdec_9_0[9:0]}),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a2stg_rnd_mode (
+        .din    (a1stg_rnd_mode[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a2stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_a2stg_id (
+        .din    (a1stg_id[4:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a2stg_fcc (
+        .din    (a1stg_fcc[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a2stg_fcc[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_fadd= a2stg_opdec[34];
+assign a2stg_long_dst= a2stg_opdec[31];
+assign a2stg_faddsubop= a2stg_opdec[29];
+assign a2stg_fsubop= a2stg_opdec[28];
+assign a2stg_faddsub_dtosop= a2stg_opdec_24_21[3];
+assign a2stg_fdtoix= a2stg_opdec_24_21[2];
+assign a2stg_fstoix= a2stg_opdec_24_21[1];
+assign a2stg_fsdtoix= a2stg_opdec_24_21[0];
+assign a2stg_fstod= a2stg_opdec_19_11[8];
+assign a2stg_fstoi= a2stg_opdec_19_11[7];
+assign a2stg_fstox= a2stg_opdec_19_11[6];
+assign a2stg_fdtoi= a2stg_opdec_19_11[5];
+assign a2stg_fdtox= a2stg_opdec_19_11[4];
+assign a2stg_fsdtoix_fdtos= a2stg_opdec_19_11[3];
+assign a2stg_fitos= a2stg_opdec_19_11[2];
+assign a2stg_fitod= a2stg_opdec_19_11[1];
+assign a2stg_fxtos= a2stg_opdec_19_11[0];
+assign a2stg_fcmpop= a2stg_opdec_9_0[9];
+assign a2stg_fixtos_fxtod= a2stg_opdec_9_0[7];
+assign a2stg_fdtos= a2stg_opdec_9_0[3];
+assign a2stg_fxtod= a2stg_opdec_9_0[0];
+
+dffre_s #(19) i_a3stg_opdec (
+        .din    ({a2stg_opdec_36, a2stg_opdec[34:29], a2stg_opdec_24_21[3],
+                  a2stg_opdec_24_21[0], a2stg_opdec_9_0[9:0]}),
+        .en     (a6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      ({a3stg_opdec_36, a3stg_opdec[34:29], a3stg_opdec_24,
+                  a3stg_opdec_21, a3stg_opdec_9_0[9:0]}),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(2) i_a3stg_faddsubopa (
+	.din	({2{a2stg_faddsubop}}),
+	.en	(a6stg_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(a3stg_faddsubopa[1:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(2) i_a3stg_rnd_mode (
+        .din    (a2stg_rnd_mode[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_a3stg_id (
+        .din    (a2stg_id[4:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a3stg_fcc (
+        .din    (a2stg_fcc[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_fcc[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_fadd= a3stg_opdec[34];
+assign a3stg_int_dst= a3stg_opdec[30];
+assign a3stg_faddsubop= a3stg_opdec[29];
+assign a3stg_fsdtoix= a3stg_opdec_21;
+assign a3stg_f4cycop= a3stg_opdec_9_0[8];
+assign a3stg_fixtos_fxtod= a3stg_opdec_9_0[7];
+assign a3stg_fdtos= a3stg_opdec_9_0[3];
+
+dffre_s #(18) i_a4stg_opdec (
+        .din    ({a3stg_opdec_36, a3stg_opdec[34:29], a3stg_opdec_24,
+                  a3stg_opdec_21, a3stg_opdec_9_0[9], a3stg_opdec_9_0[7:0]}),
+        .en     (a6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      ({a4stg_opdec_36, a4stg_opdec[34:29], a4stg_opdec_24,
+                  a4stg_opdec_21, a4stg_opdec_9, a4stg_opdec_7_0[7:0]}),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_rnd_mode_in[1:0]= ({2{a3stg_f4cycop}}
+			    & a3stg_rnd_mode[1:0])
+		| ({2{(!a3stg_f4cycop)}}
+			    & a4stg_rnd_mode2[1:0]);
+
+dffe_s #(2) i_a4stg_rnd_mode (
+	.din	(a4stg_rnd_mode_in[1:0]),
+	.en     (a6stg_step),
+	.clk    (rclk),
+
+        .q      (a4stg_rnd_mode[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a4stg_rnd_mode2 (
+	.din	(a3stg_rnd_mode[1:0]),
+	.en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a4stg_rnd_mode2[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_id_in[9:0]= {(a3stg_id[4:2]==3'o7),
+				(a3stg_id[4:2]==3'o6),
+				(a3stg_id[4:2]==3'o5),
+				(a3stg_id[4:2]==3'o4),
+				(a3stg_id[4:2]==3'o3),
+				(a3stg_id[4:2]==3'o2),
+				(a3stg_id[4:2]==3'o1),
+				(a3stg_id[4:2]==3'o0),
+				a3stg_id[1:0]};
+
+dffe_s #(10) i_a4stg_id (
+        .din    (a4stg_id_in[9:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_id[9:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a4stg_fcc (
+        .din    (a3stg_fcc[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_fcc[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- add stages 4 and 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_dblop= a4stg_opdec_36;
+assign a4stg_fadd= a4stg_opdec[34];
+assign a4stg_faddsubop= a4stg_opdec[29];
+assign a4stg_faddsub_dtosop= a4stg_opdec_24;
+assign a4stg_fsdtoix= a4stg_opdec_21;
+assign a4stg_fcmpop= a4stg_opdec_9;
+assign a4stg_fixtos_fxtod= a4stg_opdec_7_0[7];
+assign a4stg_faddsubs_fdtos= a4stg_opdec_7_0[6];
+assign a4stg_faddsubs= a4stg_opdec_7_0[5];
+assign a4stg_faddsubd= a4stg_opdec_7_0[4];
+assign a4stg_fdtos= a4stg_opdec_7_0[3];
+assign a4stg_fistod= a4stg_opdec_7_0[2];
+
+dffre_s #(9) i_a5stg_opdec (
+        .din    ({a4stg_opdec[34:30], a4stg_opdec_9, a4stg_opdec_7_0[7],
+                  a4stg_opdec_7_0[1], a4stg_opdec_7_0[0]}),
+        .en     (a6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      ({a5stg_opdec[34:30], a5stg_opdec_9, a5stg_opdec_7,
+                  a5stg_opdec_1, a5stg_opdec_0}),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(10) i_a5stg_id (
+        .din    (a4stg_id[9:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a5stg_id[9:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a5stg_fadd= a5stg_opdec[34];
+assign a5stg_fixtos_fxtod= a5stg_opdec_7;
+assign a5stg_fixtos= a5stg_opdec_1;
+assign a5stg_fxtod= a5stg_opdec_0;
+
+assign a6stg_opdec_in[34:30] = ({5{a5stg_fixtos_fxtod}}
+			    & a5stg_opdec[34:30])
+		| ({5{((!a4stg_fixtos_fxtod) && (!a5stg_fixtos_fxtod))}}
+			    & a4stg_opdec[34:30]);
+
+assign a6stg_opdec_in_9 = (a5stg_fixtos_fxtod
+			    & a5stg_opdec_9)
+		| (((!a4stg_fixtos_fxtod) && (!a5stg_fixtos_fxtod))
+			    & a4stg_opdec_9);
+
+assign a6stg_fadd_in= (a5stg_fixtos_fxtod && a6stg_step && (!reset)
+			&& a5stg_fadd)
+		|| ((!a4stg_fixtos_fxtod) && (!a5stg_fixtos_fxtod)
+			&& a6stg_step && (!reset) && a4stg_fadd)
+		|| ((!a6stg_step) && (!reset) && a6stg_fadd);
+
+dffre_s #(6) i_a6stg_opdec (
+	.din	({a6stg_opdec_in[34:30], a6stg_opdec_in_9}),
+	.en     (a6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      ({a6stg_opdec[34:30], a6stg_opdec_9}),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign add_id_out_in[9:0]= ({10{((!a5stg_fixtos_fxtod) && a6stg_step)}}
+			    & a4stg_id[9:0])
+		| ({10{(a5stg_fixtos_fxtod && a6stg_step)}}
+			    & a5stg_id[9:0])
+		| ({10{(!a6stg_step)}}
+			    & add_id_out[9:0]);
+
+dff_s #(10) i_add_id_out (
+	.din	(add_id_out_in[9:0]),
+        .clk    (rclk),
+
+        .q      (add_id_out[9:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign add_fcc_out_in[1:0]= ({2{a4stg_fcmpop}}
+			    & a4stg_fcc);
+
+dffe_s #(2) i_add_fcc_out (
+	.din    (add_fcc_out_in[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_fcc_out[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- add pipeline output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a6stg_fadd= a6stg_opdec[34];
+assign a6stg_dbl_dst= a6stg_opdec[33];
+assign a6stg_sng_dst= a6stg_opdec[32];
+assign a6stg_long_dst= a6stg_opdec[31];
+assign a6stg_int_dst= a6stg_opdec[30];
+assign a6stg_fcmpop= a6stg_opdec_9;
+
+assign a6stg_hold= a6stg_fadd && (!add_dest_rdy);
+
+assign a6stg_step= (!a6stg_hold);
+
+// Austin update
+// Power management update
+
+assign add_pipe_active_in =  // add pipe is executing a valid instr
+   a1stg_fadd || a2stg_fadd || a3stg_fadd || a4stg_fadd || a5stg_fadd || a6stg_fadd;
+
+dffre_s #(1) i_add_pipe_active (
+	.din	(add_pipe_active_in),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (add_pipe_active),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add sign and exception logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add sign logic.
+//
+//	Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_sub= (a1stg_fsubop ^ (a1stg_in1_63 ^ a1stg_in2_63))
+		&& (!a1stg_fdtos)
+		&& (!(a1stg_faddsubop && a1stg_nan_in));
+
+dffe_s #(1) i_a2stg_sign1 (
+	.din	(a1stg_in1_63),
+	.en	(a6stg_step),
+	.clk    (rclk),
+
+        .q      (a2stg_sign1),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_sign2 (
+	.din    (a1stg_in2_63),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_sign2),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_sub (
+        .din    (a1stg_sub),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_sub),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_in2_neq_in1_frac (
+        .din    (a1stg_in2_neq_in1_frac),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_in2_neq_in1_frac),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_in2_gt_in1_frac (
+        .din    (a1stg_in2_gt_in1_frac),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_in2_gt_in1_frac),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_in2_eq_in1_exp (
+        .din    (a1stg_in2_eq_in1_exp),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_in2_eq_in1_exp),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_in2_gt_in1_exp (
+        .din    (a1stg_expadd1[11]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_in2_gt_in1_exp),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_nan_in (
+        .din    (a1stg_nan_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_nan_in),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_nan_in2 (
+        .din    (a1stg_nan_in2),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_nan_in2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_snan_in2 (
+        .din    (a1stg_snan_in2),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_snan_in2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_qnan_in2 (
+        .din    (a1stg_qnan_in2),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_qnan_in2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_snan_in1 (
+        .din    (a1stg_snan_in1),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_snan_in1),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_qnan_in1 (
+        .din    (a1stg_qnan_in1),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_qnan_in1),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_2zero_in (
+        .din    (a1stg_2zero_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_2zero_in),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a2stg_2inf_in (
+        .din    (a1stg_2inf_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a2stg_2inf_in),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add sign logic.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_in2_eq_in1= a2stg_in2_eq_in1_exp && (!a2stg_in2_neq_in1_frac);
+
+assign a2stg_in2_gt_in1= a2stg_in2_gt_in1_exp
+		|| (a2stg_in2_eq_in1_exp && a2stg_in2_neq_in1_frac
+			&& a2stg_in2_gt_in1_frac);
+
+assign a3stg_sub_in= a2stg_sub
+		&& (!a2stg_nan_in)
+		&& (!(a2stg_fsdtoix && (!a2stg_expadd[11])));
+
+assign a2stg_faddsub_sign= (a2stg_sign1
+			&& (!a2stg_nan_in)
+			&& (a2stg_sign2 ^ a2stg_fsubop)
+			&& (!(a2stg_2inf_in && a2stg_sub)))
+		|| (a2stg_sign1
+			&& (!a2stg_nan_in)
+			&& (!a2stg_in2_eq_in1)
+			&& (!a2stg_in2_gt_in1)
+			&& (!(a2stg_2inf_in && a2stg_sub)))
+		|| ((!a2stg_in2_eq_in1)
+			&& a2stg_in2_gt_in1
+			&& (!a2stg_nan_in)
+			&& (a2stg_sign2 ^ a2stg_fsubop)
+			&& (!(a2stg_2inf_in && a2stg_sub)))
+		|| (a2stg_sign2
+			&& (a2stg_snan_in2
+				|| (a2stg_qnan_in2 && (!a2stg_snan_in1))))
+		|| (a2stg_sign1
+			&& ((a2stg_snan_in1 && (!a2stg_snan_in2))
+				|| (a2stg_qnan_in1 && (!a2stg_nan_in2))))
+		|| ((a2stg_rnd_mode[1:0]==2'b11)
+			&& a2stg_in2_eq_in1
+			&& (a2stg_sign1 ^ (a2stg_sign2 ^ a2stg_fsubop))
+			&& (!a2stg_nan_in)
+			&& (!a2stg_2inf_in));
+
+assign a3stg_sign_in= (a2stg_faddsubop && a2stg_faddsub_sign)
+		|| ((!a2stg_faddsubop) && a2stg_sign2);
+
+dffe_s #(1) i_a3stg_sign (
+	.din	(a3stg_sign_in),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_sign),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a2stg_cc_1= ((a2stg_sign2 && (!a2stg_2zero_in) && a2stg_sub)
+			|| ((!a2stg_in2_eq_in1) && (!a2stg_sub)
+				&& (a2stg_in2_gt_in1 ^ (!a2stg_sign2)))
+			|| a2stg_nan_in)
+		&& a2stg_fcmpop;
+
+assign a2stg_cc_0= (((!a2stg_sign2) && (!a2stg_2zero_in) && a2stg_sub)
+			|| ((!a2stg_in2_eq_in1) && (!a2stg_sub)
+				&& (a2stg_in2_gt_in1 ^ a2stg_sign2))
+			|| a2stg_nan_in)
+		&& a2stg_fcmpop;
+
+assign a2stg_cc[1:0]= {a2stg_cc_1, a2stg_cc_0};
+
+dffe_s #(2) i_a3stg_cc (
+	.din	(a2stg_cc[1:0]),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_cc[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add sign logic.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_sign_in= (a3stg_f4cycop && a3stg_sign)
+		|| ((!a3stg_f4cycop) && a4stg_sign2);
+
+dffe_s #(1) i_a4stg_sign (
+	.din	(a4stg_sign_in),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_sign),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a4stg_sign2 (
+	.din	(a3stg_sign),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_sign2),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_a4stg_cc (
+        .din    (a3stg_cc[1:0]),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_cc[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add sign logic.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_add_sign_out (
+	.din	(a4stg_sign),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_sign_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign add_cc_out_in[1:0]= ({2{a4stg_fcmpop}}
+			    & a4stg_cc[1:0]);
+
+dffe_s #(2) i_add_cc_out (
+	.din	(add_cc_out_in[1:0]),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_cc_out[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add exception logic.
+//
+//      Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_nv= (a1stg_faddsubop
+			&& ((a1stg_2inf_in && a1stg_sub)
+				|| a1stg_snan_in1
+				|| a1stg_snan_in2))
+		|| (a1stg_fstod && a1stg_snan_in2)
+		|| (a1stg_fdtos && a1stg_snan_in2)
+		|| (a1stg_fcmpesd && a1stg_nan_in)
+		|| (a1stg_fcmpsd
+			&& (a1stg_snan_in1 || a1stg_snan_in2));
+
+dffe_s #(1) i_a2stg_nv (
+	.din	(a1stg_nv),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a2stg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a1stg_of_mask= (!(a1stg_faddsub_dtosop && a1stg_infnan_in));
+
+dffe_s #(1) i_a2stg_of_mask (
+        .din    (a1stg_of_mask),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a2stg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add exception logic.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_nv_in= ((!a2stg_expadd[11])
+			&& a2stg_fsdtoix
+			&& ((!a2stg_sign2)
+				|| (|a2stg_expadd[10:0])
+				|| a2stg_frac2hi_neq_0
+				|| (a2stg_long_dst && a2stg_frac2lo_neq_0)))
+		|| a2stg_nv;
+
+dffe_s #(1) i_a3stg_nv (
+	.din	(a3stg_nv_in),
+	.en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a3stg_of_mask (
+        .din    (a2stg_of_mask),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a3stg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a2stg_nx_tmp1= (a2stg_fdtoix && (|a2stg_exp[11:10]))
+		|| (a2stg_fstoix && (|a2stg_exp[11:7]));
+
+assign a2stg_nx_tmp2= ((a2stg_fdtoix && (!(|a2stg_exp[11:10])))
+			|| (a2stg_fstoix && (!(|a2stg_exp[11:7]))))
+		&& ((|a2stg_exp[10:1])
+			|| a2stg_frac2hi_neq_0
+			|| a2stg_frac2lo_neq_0
+			|| a2stg_frac2_63);
+
+assign a2stg_nx_tmp3= (a2stg_exp[11:0]==12'h41f)
+		&& a2stg_sign2
+		&& (!a2stg_frac2hi_neq_0)
+		&& a2stg_frac2lo_neq_0
+		&& a2stg_fdtoi;
+
+dffe_s #(1) i_a3stg_a2_expadd_11 (
+	.din	(a2stg_expadd[11]),
+	.en	(a6stg_step),
+	.clk	(rclk),
+
+	.q	(a3stg_a2_expadd_11),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a3stg_nx_tmp1 (
+	.din	(a2stg_nx_tmp1),
+	.en	(a6stg_step),
+	.clk	(rclk),
+
+	.q	(a3stg_nx_tmp1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a3stg_nx_tmp2 (
+	.din	(a2stg_nx_tmp2),
+	.en	(a6stg_step),
+	.clk	(rclk),
+
+	.q	(a3stg_nx_tmp2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_a3stg_nx_tmp3 (
+	.din	(a2stg_nx_tmp3),
+	.en	(a6stg_step),
+	.clk	(rclk),
+
+	.q	(a3stg_nx_tmp3),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add exception logic.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_nx= (a3stg_a2_expadd_11
+		    && ((a3stg_nx_tmp1
+				&& ((a3stg_fsdtoi_nx && a3stg_int_dst)
+					|| a3stg_fsdtoix_nx))
+			|| a3stg_nx_tmp2))
+		|| a3stg_nx_tmp3;
+
+assign a4stg_nv_in= ((a3stg_fadd && (!a3stg_fixtos_fxtod))
+			&& a3stg_nv)
+		|| ((!(a3stg_fadd && (!a3stg_fixtos_fxtod)))
+			&& a4stg_nv2);
+dffe_s #(1) i_a4stg_nv (
+        .din    (a4stg_nv_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a4stg_nv2 (
+        .din    (a3stg_nv),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a4stg_nv2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_of_mask_in= ((a3stg_fadd && (!a3stg_fixtos_fxtod))
+                        && a3stg_of_mask)
+		|| ((!(a3stg_fadd && (!a3stg_fixtos_fxtod)))
+                        && a4stg_of_mask2);
+
+dffe_s #(1) i_a4stg_of_mask (
+        .din    (a4stg_of_mask_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a4stg_of_mask2 (
+        .din    (a3stg_of_mask),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a4stg_of_mask2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_nx_in= ((a3stg_fadd && (!a3stg_fixtos_fxtod))
+                        && a3stg_nx)
+                || ((!(a3stg_fadd && (!a3stg_fixtos_fxtod)))
+                        && a4stg_nx2);
+
+dffe_s #(1) i_a4stg_nx (
+        .din    (a4stg_nx_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (a4stg_nx),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_a4stg_nx2 (
+        .din    (a3stg_nx),
+        .en     (a6stg_step),
+        .clk    (rclk),
+ 
+        .q      (a4stg_nx2),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add exception logic.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_add_nv_out (
+        .din    (a4stg_nv),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_nv_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_in_of= ((a4stg_exp[11] || (&a4stg_exp[10:0]))
+			&& a4stg_faddsubd
+			&& a4stg_of_mask)
+		|| (((|a4stg_exp[11:8]) || (&a4stg_exp[7:0]))
+			&& a4stg_faddsubs_fdtos
+			&& a4stg_of_mask);
+
+assign add_of_out_tmp1_in= ((&a4stg_exp[10:1]) && a4stg_rndup && a4stg_round
+			&& a4stg_faddsubd
+                        && a4stg_of_mask)
+		|| ((&a4stg_exp[7:1]) && a4stg_rndup
+			&& (a4stg_round || a4stg_fdtos)
+			&& a4stg_faddsubs_fdtos
+			&& a4stg_of_mask);
+
+dffe_s #(1) i_add_of_out_tmp1 (
+        .din    (add_of_out_tmp1_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_of_out_tmp1),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_add_of_out_tmp2 (
+	.din	(a4stg_in_of),
+        .en	(a6stg_step),
+        .clk	(rclk),
+ 
+        .q	(add_of_out_tmp2),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+assign add_of_out= add_of_out_tmp2
+		|| (add_of_out_tmp1 && add_of_out_cout);
+
+assign a4stg_uf= ((!(|a4stg_exp[10:0]))
+			&& a4stg_frac_neq_0
+			&& (a4stg_round || a4stg_fdtos)
+			&& a4stg_faddsub_dtosop)
+		|| (a4stg_faddsubop
+			&& (!(a4stg_round || a4stg_fdtos))
+			&& (!a4stg_denorm_inv)
+			&& a4stg_shl_data_neq_0);
+
+dffe_s #(1) i_add_uf_out (
+        .din    (a4stg_uf),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_uf_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign add_nx_out_in= (a4stg_of_mask
+			&& a4stg_frac_dbl_nx
+			&& (a4stg_faddsubd || a5stg_fxtod)
+			&& ((!a4stg_faddsubd) || a4stg_round))
+		|| (a4stg_of_mask
+			&& a4stg_frac_sng_nx
+			&& (a4stg_faddsubs_fdtos || a5stg_fixtos)
+			&& ((!a4stg_faddsubs) || a4stg_round))
+		|| a4stg_nx;
+
+dffe_s #(1) i_add_nx_out (
+        .din    (add_nx_out_in),
+        .en     (a6stg_step),
+        .clk    (rclk),
+
+        .q      (add_nx_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe exception output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Austin update
+// Overflow is always accompanied by inexact.
+// Previously this was handled within the FFU.
+
+// assign add_exc_out[4:0]= {add_nv_out, add_of_out, add_uf_out, 1'b0, add_nx_out};
+
+assign add_exc_out[4:0] =
+  {add_nv_out,
+   add_of_out,
+   add_uf_out,
+   1'b0,
+   (add_nx_out || add_of_out)};  // Overflow is always accompanied by inexact
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipeline control logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- add normalization and special input injection.
+//
+//	Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_frac1_in_frac1= a1stg_snan_in2
+		|| (a1stg_qnan_in2 && (!a1stg_snan_in1));
+
+assign a2stg_frac1_in_frac2= a1stg_faddsubop
+		&& ((!a1stg_2nan_in)
+			|| a1stg_snan_in2
+        		|| (a1stg_qnan_in2 && (!a1stg_snan_in1)));
+
+assign a1stg_2nan_in_inv= (!a1stg_2nan_in);
+
+assign a1stg_faddsubop_inv= (!a1stg_faddsubop);
+
+assign a2stg_frac1_in_qnan= (a1stg_nan_in
+			|| (a1stg_2inf_in && a1stg_sub))
+		&& a1stg_faddsubop;
+
+assign a2stg_frac1_in_nv= a1stg_2inf_in && a1stg_sub && a1stg_faddsubop;
+
+assign a2stg_frac1_in_nv_dbl= a1stg_2inf_in && a1stg_sub && a1stg_faddsubd;
+
+assign a2stg_frac2_in_frac1= a1stg_faddsubop && (!a1stg_infnan_in);
+
+assign a2stg_frac2_in_qnan= a1stg_snan_in2 && (!a1stg_faddsubop);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- add pipe right shift count
+//		count calculation.
+//
+//      Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_exp_diff_add1= a1stg_faddsub_dtosop && (!a1stg_expadd1[11]);
+
+assign a1stg_exp_diff_add2= a1stg_faddsubop && a1stg_expadd1[11];
+
+assign a1stg_exp_diff_5= (!a1stg_expadd2[5]) && a1stg_fsdtox;
+
+assign a1stg_exp_diff[10:0]= ({11{a1stg_exp_diff_add1}}
+			    & a1stg_expadd1[10:0])
+		| ({11{a1stg_exp_diff_add2}}
+			    & (~a1stg_expadd4_inv[10:0]))
+		| ({11{a1stg_fsdtoix}}
+			    & {5'b0, a1stg_exp_diff_5, (~a1stg_expadd2[4:0])});
+
+assign a1stg_clamp63[5:0]= a1stg_exp_diff[5:0] | {6{(|a1stg_exp_diff[10:6])}};
+
+assign a2stg_shr_cnt_in[5:0]= a1stg_clamp63[5:0];
+
+assign a2stg_shr_cnt_5_inv_in= (!a1stg_clamp63[5]);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- add pipe right shift.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_shr_frac2_shr_int= a2stg_faddsub_dtosop && a6stg_step;
+
+assign a2stg_shr_frac2_shr_dbl= ((a2stg_fdtox && (|a2stg_exp[11:10]))
+			|| (a2stg_fstox && (|a2stg_exp[11:7])))
+		&& a6stg_step;
+
+assign a2stg_shr_frac2_shr_sng= ((a2stg_fdtoi && (|a2stg_exp[11:10]))
+			|| (a2stg_fstoi && (|a2stg_exp[11:7])))
+		&& a6stg_step;
+
+assign a2stg_shr_frac2_max= a2stg_fsdtoix && a6stg_step;
+
+assign a2stg_sub_step= a2stg_sub && a6stg_step;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- add pipe adder.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_faddsub_clamp63_0= (|(({6{a1stg_expadd1[11]}}
+			    & (~{a1stg_expadd4_inv[10:6],
+						a1stg_expadd4_inv[0]}))
+		| ({6{(!a1stg_expadd1[11])}}
+			    & {a1stg_expadd1[10:6], a1stg_expadd1[0]})));
+
+assign a2stg_fracadd_frac2_inv_in= (a1stg_fixtosd && a1stg_in2_63)
+		|| (a1stg_faddsubop && a1stg_sub
+			&& (!a1stg_faddsub_clamp63_0));
+
+assign a2stg_fracadd_frac2_inv_shr1_in= a1stg_faddsubop && a1stg_sub
+			&& a1stg_faddsub_clamp63_0;
+
+assign a2stg_fracadd_frac2_in= (a1stg_fixtosd && (!a1stg_in2_63))
+		|| a1stg_fstod
+		|| (a1stg_faddsubop && (!a1stg_sub));
+
+dffe_s #(1) i_a2stg_fracadd_frac2 (
+	.din	(a2stg_fracadd_frac2_in),
+	.en	(a6stg_step),
+	.clk	(rclk),
+
+	.q	(a2stg_fracadd_frac2),
+
+	.se	(se),
+	.si	(),
+	.so   	()
+);
+
+assign a2stg_fracadd_cin_in= (a1stg_fixtosd && a1stg_in2_63)
+		|| (a1stg_faddsubop && a1stg_sub);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- add pipe exponent adjustment.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_exp_7ff= a2stg_fstod && (&a2stg_exp[7:0]);
+
+assign a3stg_exp_ff= a2stg_fdtos && (&a2stg_exp[10:0]);
+
+assign a3stg_exp_add= (a2stg_fstod && (!(&a2stg_exp[7:0])))
+		|| (a2stg_fdtos && (!(&a2stg_exp[10:0])));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- add pipe exponent decode- used to identify denorm results.
+//
+//      Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_expdec_neq_0= a2stg_faddsubop && (a2stg_exp[10:0]<11'h36);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select lines and control logic
+//		- add pipe main adder
+//		- add pipe exponent increment/decrement adjustment
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_exp10_0_eq0= (a3stg_exp[10:0]==11'b0);
+
+assign a3stg_exp10_1_eq0= (a3stg_exp[10:1]==10'b0);
+
+assign a3stg_fdtos_inv= (!a3stg_fdtos);
+
+assign a4stg_fixtos_fxtod_inv= (!a4stg_fixtos_fxtod);
+
+assign a4stg_rnd_frac_add_inv= (!(a3stg_fsdtoix
+		|| (a3stg_faddsubop && a3stg_exp10_0_eq0)));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Control logic- add pipe left shift count.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_shl_cnt_in[9:0]= ({10{a3stg_denorm}}
+			    & {(a3stg_exp[5:4]==2'b11),
+				(a3stg_exp[5:4]==2'b10),
+				(a3stg_exp[5:4]==2'b01),
+				(a3stg_exp[5:4]==2'b00),
+				a3stg_exp[5:0]})
+		| ({10{a3stg_denorm_inv}}
+			    & {(a3stg_lead0[5:4]==2'b11),
+				(a3stg_lead0[5:4]==2'b10),
+				(a3stg_lead0[5:4]==2'b01),
+				(a3stg_lead0[5:4]==2'b00),
+				a3stg_lead0[5:0]});
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- add pipe rounding adder.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_rnd_sng= a5stg_fixtos || a4stg_faddsubs_fdtos;
+
+assign a4stg_rnd_dbl= a5stg_fxtod || a4stg_faddsubd;
+
+	
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- add pipe fraction output.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_rndup_sng= ((a4stg_rnd_mode==2'b10) && (!a4stg_sign)
+			&& a4stg_frac_sng_nx)
+		|| ((a4stg_rnd_mode==2'b11) && a4stg_sign
+			&& a4stg_frac_sng_nx)
+		|| ((a4stg_rnd_mode==2'b00) && a4stg_rnd_frac_39
+			&& (a4stg_frac_38_0_nx || a4stg_rnd_frac_40));
+
+assign a4stg_rndup_dbl= ((a4stg_rnd_mode==2'b10) && (!a4stg_sign)
+                        && a4stg_frac_dbl_nx)
+                || ((a4stg_rnd_mode==2'b11) && a4stg_sign
+                        && a4stg_frac_dbl_nx)
+                || ((a4stg_rnd_mode==2'b00) && a4stg_rnd_frac_10
+			&& (a4stg_frac_9_0_nx || a4stg_rnd_frac_11));
+
+assign a4stg_rndup= (a4stg_faddsubd && a4stg_rndup_dbl)
+		|| (a4stg_faddsubs && a4stg_rndup_sng)
+		|| (a4stg_fdtos && a4stg_rndup_sng && a4stg_of_mask);
+
+assign a5stg_rndup= (a5stg_fxtod && a4stg_rndup_dbl)
+		|| (a5stg_fixtos && a4stg_rndup_sng);
+
+assign add_frac_out_rndadd= (a4stg_faddsubop && a4stg_round && a4stg_rndup
+			&& (!a4stg_in_of))
+		|| (a4stg_fdtos && a4stg_rndup && (!a4stg_in_of))
+		|| (a5stg_fixtos_fxtod && a5stg_rndup);
+
+assign add_frac_out_rnd_frac= (a4stg_faddsubop && a4stg_round && (!a4stg_rndup)
+			&& (!a4stg_in_of))
+		|| (a4stg_fdtos && (!a4stg_rndup) && (!a4stg_in_of))
+		|| (a5stg_fixtos_fxtod && (!a5stg_rndup))
+		|| a4stg_fsdtoix;
+
+assign add_frac_out_shl= (a4stg_faddsubop && (!a4stg_round) && (!a4stg_in_of))
+		|| a4stg_fistod;
+
+assign a4stg_to_0= (!((a4stg_rnd_mode==2'b00)
+			|| ((a4stg_rnd_mode==2'b10) && (!a4stg_sign))
+			|| ((a4stg_rnd_mode==2'b11) && a4stg_sign)));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- add pipe exponent output.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign add_exp_out_expinc= (a4stg_faddsubop && a4stg_round && a4stg_rndup
+			&& (!a4stg_in_of))
+		|| (a4stg_fdtos && a4stg_rndup
+			&& (!a4stg_in_of))
+		|| (a5stg_fixtos_fxtod && a5stg_rndup);
+
+assign add_exp_out_exp= (a4stg_faddsubop && a4stg_round
+			&& (!a4stg_in_of))
+		|| (a4stg_fdtos
+			&& (!a4stg_in_of))
+		|| a5stg_fixtos_fxtod;
+
+assign add_exp_out_exp1= (a4stg_faddsubop && a4stg_round
+			&& (!a4stg_rndup)
+			&& (!a4stg_in_of))
+		|| (a4stg_fdtos
+			&& (!a4stg_rndup)
+			&& (!a4stg_in_of))
+		|| (a5stg_fixtos_fxtod
+			&& (!a5stg_rndup));
+
+assign add_exp_out_expadd= (a4stg_faddsubop && (!a4stg_round) && (!a4stg_in_of))
+		|| a4stg_fistod;
+
+assign a4stg_to_0_inv= (!a4stg_to_0);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_out_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_out_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_out_dp.v	(revision 6)
@@ -0,0 +1,216 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_out_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU output datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+
+module fpu_out_dp (
+	dest_rdy,
+	req_thread,
+	div_exc_out,
+	d8stg_fdivd,
+	d8stg_fdivs,
+	div_sign_out,
+	div_exp_out,
+	div_frac_out,
+	mul_exc_out,
+	m6stg_fmul_dbl_dst,
+	m6stg_fmuls,
+	mul_sign_out,
+	mul_exp_out,
+	mul_frac_out,
+	add_exc_out,
+	a6stg_fcmpop,
+	add_cc_out,
+	add_fcc_out,
+	a6stg_dbl_dst,
+	a6stg_sng_dst,
+	a6stg_long_dst,
+	a6stg_int_dst,
+	add_sign_out,
+	add_exp_out,
+	add_frac_out,
+	rclk,
+	
+	fp_cpx_data_ca,
+
+	se,
+	si,
+	so
+);
+
+
+input [2:0]	dest_rdy;		// pipe with result request this cycle
+input [1:0]	req_thread;		// thread ID of result req this cycle
+input [4:0]	div_exc_out;		// divide pipe result- exception flags
+input		d8stg_fdivd;		// divide double- divide stage 8
+input		d8stg_fdivs;		// divide single- divide stage 8
+input		div_sign_out;		// divide sign output
+input [10:0]	div_exp_out;		// divide exponent output
+input [51:0]	div_frac_out;		// divide fraction output
+input [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+input		m6stg_fmul_dbl_dst;	// double precision multiply result
+input		m6stg_fmuls;		// fmuls- multiply 6 stage
+input		mul_sign_out;		// multiply sign output
+input [10:0]	mul_exp_out;		// multiply exponent output
+input [51:0]	mul_frac_out;		// multiply fraction output
+input [4:0]	add_exc_out;		// add pipe result- exception flags
+input		a6stg_fcmpop;		// compare- add 6 stage
+input [1:0]	add_cc_out;		// add pipe result- condition
+input [1:0]	add_fcc_out;		// add pipe input fcc passed through
+input		a6stg_dbl_dst;		// float double result- add 6 stage
+input		a6stg_sng_dst;		// float single result- add 6 stage
+input		a6stg_long_dst;		// 64bit integer result- add 6 stage
+input		a6stg_int_dst;		// 32bit integer result- add 6 stage
+input		add_sign_out;		// add sign output
+input [10:0]	add_exp_out;		// add exponent output
+input [63:0]	add_frac_out;		// add fraction output
+input		rclk;		// global clock
+
+output [144:0]	fp_cpx_data_ca;		// FPU result to CPX
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [63:0]	add_out;
+wire [63:0]	mul_out;
+wire [63:0]	div_out;
+wire [7:0]	fp_cpx_data_ca_84_77_in;
+wire [76:0]	fp_cpx_data_ca_76_0_in;
+wire [7:0]	fp_cpx_data_ca_84_77;
+wire [76:0]	fp_cpx_data_ca_76_0;
+wire [144:0]	fp_cpx_data_ca;
+
+
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_out_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(1'b0),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign add_out[63:0]= ({64{a6stg_dbl_dst}}
+			    & {add_sign_out, add_exp_out[10:0],
+				add_frac_out[62:11]})
+		| ({64{a6stg_sng_dst}}
+			    & {add_sign_out, add_exp_out[7:0],
+				add_frac_out[62:40], 32'b0})
+		| ({64{a6stg_long_dst}}
+			    & add_frac_out[63:0])
+		| ({64{a6stg_int_dst}}
+			    & {add_frac_out[63:32], 32'b0});
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign mul_out[63:0]= ({64{m6stg_fmul_dbl_dst}}
+			    & {mul_sign_out, mul_exp_out[10:0],
+				mul_frac_out[51:0]})
+		| ({64{m6stg_fmuls}}
+			    & {mul_sign_out, mul_exp_out[7:0],
+				mul_frac_out[51:29], 32'b0});
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_out[63:0]= ({64{d8stg_fdivd}}
+			    & {div_sign_out, div_exp_out[10:0],
+				div_frac_out[51:0]})
+		| ({64{d8stg_fdivs}}
+			    & {div_sign_out, div_exp_out[7:0],
+				div_frac_out[51:29], 32'b0});
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Choose the output data.
+//
+//	Input to the CPX data (CA) stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign fp_cpx_data_ca_84_77_in[7:0]= ({8{(|dest_rdy)}}
+			    & {1'b1, 4'b1000, 1'b0, req_thread[1:0]});
+
+assign fp_cpx_data_ca_76_0_in[76:0]= ({77{dest_rdy[2]}}
+			    & {div_exc_out[4:0], 8'b0, div_out[63:0]})
+		| ({77{dest_rdy[1]}}
+			    & {mul_exc_out[4:0], 8'b0, mul_out[63:0]})
+		| ({77{dest_rdy[0]}}
+			    & {add_exc_out[4:0], 2'b0, a6stg_fcmpop,
+				add_cc_out[1:0], add_fcc_out[1:0], 1'b0,
+				add_out[63:0]});
+
+dff_s #(8) i_fp_cpx_data_ca_84_77 (
+	.din	(fp_cpx_data_ca_84_77_in[7:0]),
+	.clk    (clk),
+
+        .q      (fp_cpx_data_ca_84_77[7:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(77) i_fp_cpx_data_ca_76_0 (
+	.din	(fp_cpx_data_ca_76_0_in[76:0]),
+	.clk    (clk),
+
+        .q      (fp_cpx_data_ca_76_0[76:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign fp_cpx_data_ca[144:0]= {fp_cpx_data_ca_84_77[7:3],
+				3'b0,
+				fp_cpx_data_ca_84_77[2:0],
+				57'b0,
+				fp_cpx_data_ca_76_0[76:0]};
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_mul_exp_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_mul_exp_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_mul_exp_dp.v	(revision 6)
@@ -0,0 +1,491 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_mul_exp_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply pipeline exponent datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_mul_exp_dp (
+	inq_in1,
+	inq_in2,
+	m6stg_step,
+	m1stg_dblop,
+	m1stg_sngop,
+	m2stg_exp_expadd,
+	m2stg_exp_0bff,
+	m2stg_exp_017f,
+	m2stg_exp_04ff,
+	m2stg_exp_zero,
+	m1stg_fsmuld,
+	m2stg_fmuld,
+	m2stg_fmuls,
+	m2stg_fsmuld,
+	m3stg_ld0_inv,
+	m5stg_fracadd_cout,
+	mul_exp_out_exp_plus1,
+	mul_exp_out_exp,
+	m5stg_in_of,
+	m5stg_fmuld,
+	m5stg_to_0_inv,
+	m4stg_shl_54,
+	m4stg_shl_55,
+	m4stg_inc_exp_54,
+	m4stg_inc_exp_55,
+	m4stg_inc_exp_105,
+	fmul_clken_l,
+	rclk,
+	
+	m3stg_exp,
+	m3stg_expadd_eq_0,
+	m3stg_expadd_lte_0_inv,
+	m4stg_exp,
+	m5stg_exp,
+	mul_exp_out,
+
+	se,
+	si,
+	so
+);
+
+
+input [62:52]	inq_in1;		// request operand 1 to op pipes
+input [62:52]	inq_in2;		// request operand 2 to op pipes
+input		m6stg_step;		// advance the multiply pipe
+input		m1stg_dblop;		// double precision operation- mul 1 stg
+input		m1stg_sngop;		// single precision operation- mul 1 stg
+input		m2stg_exp_expadd;	// select line to m2stg_exp
+input		m2stg_exp_0bff;		// select line to m2stg_exp
+input		m2stg_exp_017f;		// select line to m2stg_exp
+input		m2stg_exp_04ff;		// select line to m2stg_exp
+input		m2stg_exp_zero;		// select line to m2stg_exp
+input		m1stg_fsmuld;		// fsmuld- multiply 1 stage
+input		m2stg_fmuld;		// fmuld- multiply 2 stage
+input		m2stg_fmuls;		// fmuls- multiply 2 stage
+input		m2stg_fsmuld;		// fsmuld- multiply 2 stage
+input [6:0]	m3stg_ld0_inv;		// leading 0's in multiply operands
+input           m4stg_inc_exp_54;       // select line to m5stg_exp
+input           m4stg_inc_exp_55;       // select line to m5stg_exp
+input           m4stg_inc_exp_105;      // select line to m5stg_exp
+input		m5stg_fracadd_cout;	// fraction rounding adder carry out
+input		mul_exp_out_exp_plus1;	// select line to mul_exp_out
+input		mul_exp_out_exp;	// select line to mul_exp_out
+input		m5stg_in_of;		// multiply overflow- select exp out
+input		m5stg_fmuld;		// fmuld- multiply 5 stage
+input		m5stg_to_0_inv;		// result to infinity on overflow
+input		m4stg_shl_54;		// multiply shift left output bit[54]
+input		m4stg_shl_55;		// multiply shift left output bit[55]
+input		fmul_clken_l;           // multiply pipe clk enable - asserted low
+input		rclk; 		// global clock
+
+output [12:0]	m3stg_exp;		// exponent input- multiply 3 stage
+output		m3stg_expadd_eq_0;	// mul stage 3 exponent adder sum == 0
+output		m3stg_expadd_lte_0_inv;	// mul stage 3 exponent adder sum <= 0
+output [12:0]	m4stg_exp;		// exponent input- multiply 4 stage
+output [12:0]	m5stg_exp;		// exponent input- multiply 5 stage
+output [10:0]	mul_exp_out;		// multiply exponent output
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [10:0]	m1stg_exp_in1;
+wire [10:0]	m1stg_exp_in2;
+wire [12:0]	m1stg_expadd_in1;
+wire [12:0]	m1stg_expadd_in2;
+wire [12:0]	m1stg_expadd;
+wire [12:0]	m2stg_exp_in;
+wire [12:0]	m2stg_exp;
+wire [12:0]	m2stg_expadd_in2;
+wire [12:0]	m2stg_expadd;
+wire [12:0]	m3astg_exp;
+wire [12:0]	m3bstg_exp;
+wire [12:0]	m3stg_exp;
+wire [12:0]	m3stg_expa;
+wire [12:0]	m3stg_expadd;
+wire		m3stg_expadd_eq_0;
+wire		m3stg_expadd_lte_0_inv;
+wire [12:0]	m4stg_exp_in;
+wire [12:0]	m4stg_exp;
+wire [12:0]	m4stg_exp_plus1;
+wire [12:0]	m5stg_exp_pre1_in;
+wire [12:0]	m5stg_exp_pre1;
+wire [12:0]	m5stg_exp_pre2_in;
+wire [12:0]	m5stg_exp_pre2;
+wire [12:0]	m5stg_exp_pre3_in;
+wire [12:0]	m5stg_exp_pre3;
+wire [12:0]	m5stg_exp;
+wire [12:0]	m5stg_expa;
+wire [12:0]	m5stg_exp_plus1;
+wire [10:0]	mul_exp_out_in;
+wire [10:0]	mul_exp_out;
+
+
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_mul_exp_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fmul_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply exponent inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(11) i_m1stg_exp_in1 (
+        .din    (inq_in1[62:52]),
+        .en     (m6stg_step),
+        .clk    (clk),
+ 
+        .q      (m1stg_exp_in1[10:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(11) i_m1stg_exp_in2 (
+        .din    (inq_in2[62:52]),
+        .en     (m6stg_step),
+        .clk    (clk),
+ 
+        .q      (m1stg_exp_in2[10:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply exponent adder.
+//
+//	Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_expadd_in1[12:0]= ({13{m1stg_dblop}}
+			    & {2'b0, m1stg_exp_in1[10:0]})
+		| ({13{m1stg_sngop}}
+			    & {5'b0, m1stg_exp_in1[10:3]});
+
+assign m1stg_expadd_in2[12:0]= ({13{m1stg_dblop}}
+                            & {2'b0, m1stg_exp_in2[10:0]})
+                | ({13{m1stg_sngop}}
+                            & {5'b0, m1stg_exp_in2[10:3]});
+
+assign m1stg_expadd[12:0]= (m1stg_expadd_in1[12:0]
+			+ m1stg_expadd_in2[12:0]
+			+ 13'h0001);
+
+assign m2stg_exp_in[12:0]= ({13{m2stg_exp_expadd}}
+			    & m1stg_expadd[12:0])
+		| ({13{m2stg_exp_0bff}}
+			    & 13'h0bff)
+		| ({13{m2stg_exp_017f}}
+			    & 13'h017f)
+		| ({13{m2stg_exp_04ff}}
+			    & 13'h04ff)
+		| ({13{m2stg_exp_zero}}
+			    & {{3{m1stg_fsmuld}}, 10'b0});
+
+dffe_s #(13) i_m2stg_exp (
+	.din	(m2stg_exp_in[12:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (m2stg_exp[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply exponent adder.
+//
+//      Multiply stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_expadd_in2[12:0]= ({13{m2stg_fmuld}}
+			    & 13'h1c00)
+		| ({13{m2stg_fmuls}}
+			    & 13'h1f80)
+		| ({13{m2stg_fsmuld}}
+			    & 13'h0300);
+
+assign m2stg_expadd[12:0]= m2stg_exp[12:0]
+			+ m2stg_expadd_in2[12:0];
+
+dffe_s #(13) i_m3astg_exp (
+	.din	(m2stg_expadd[12:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (m3astg_exp[12:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply exponent.
+//
+//      Multiply stage 3a.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(13) i_m3bstg_exp (
+        .din    (m3astg_exp[12:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (m3bstg_exp[12:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply exponent.
+//
+//      Multiply stage 3b.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(13) i_m3stg_exp (
+        .din    (m3bstg_exp[12:0]),
+        .en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (m3stg_exp[12:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(13) i_m3stg_expa (
+	.din	(m3bstg_exp[12:0]),
+	.en	(m6stg_step),
+	.clk	(clk),
+
+	.q	(m3stg_expa[12:0]),
+
+	.se	(se),
+	.si	(),
+  	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply exponent adder.
+//
+//      Multiply stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m3stg_expadd[12:0]= (m3stg_expa[12:0]
+			+ {6'h3f, m3stg_ld0_inv[6:0]}
+			+ 13'h0001);
+
+assign m3stg_expadd_eq_0= (&(m3stg_exp[12:0] ^ {6'h3f, m3stg_ld0_inv[6:0]}));
+
+assign m3stg_expadd_lte_0_inv= (!(m3stg_expadd[12] || m3stg_expadd_eq_0));
+
+assign m4stg_exp_in[12:0]= (m3stg_expadd[12:0] & {13{(!m3stg_expadd[12])}});
+
+dffe_s #(13) i_m4stg_exp (
+        .din    (m4stg_exp_in[12:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (m4stg_exp[12:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply exponent increment.
+//
+//      Multiply stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m4stg_exp_plus1[12:0]= m4stg_exp[12:0]
+			+ 13'h0001;
+
+
+// Austin update
+// uarch timing fix
+// Endpoint: fpu_mul_exp_dp/i_m5stg_exp_pre2_10
+
+// assign m5stg_exp_pre1_in[12:0]= (~({13{m4stg_inc_exp}}
+//			    & m4stg_exp_plus1[12:0]));
+
+assign m5stg_exp_pre1_in[12:0]= ( ({13{m6stg_step}}
+			    & m4stg_exp_plus1[12:0]));
+
+dff_s #(13) i_m5stg_exp_pre1 (
+	.din	(m5stg_exp_pre1_in[12:0]),
+	.clk    (clk),
+ 
+        .q      (m5stg_exp_pre1[12:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+// Austin update
+// uarch timing fix
+// Endpoint: fpu_mul_exp_dp/i_m5stg_exp_pre2_10
+
+// assign m5stg_exp_pre2_in[12:0]= (~({13{m4stg_inc_exp_inv}}
+//			    & m4stg_exp[12:0]));
+
+assign m5stg_exp_pre2_in[12:0]= ( ({13{m6stg_step}}
+			    & m4stg_exp[12:0]));
+
+dff_s #(13) i_m5stg_exp_pre2 (
+	.din	(m5stg_exp_pre2_in[12:0]),
+	.clk	(clk),
+
+	.q	(m5stg_exp_pre2[12:0]),
+
+	.se	(se),
+	 .si	(),
+	.so	()
+);
+
+assign m5stg_exp_pre3_in[12:0]= (~({13{(!m6stg_step)}}
+			    & m5stg_expa[12:0]));
+
+dff_s #(13) i_m5stg_exp_pre3 (
+	.din	(m5stg_exp_pre3_in[12:0]),
+	.clk	(clk),
+
+	.q	(m5stg_exp_pre3[12:0]),
+
+	.se	(se),
+	 .si	(),
+	.so	()
+);
+
+
+// Austin update
+// uarch timing fix
+// Endpoint: fpu_mul_exp_dp/i_m5stg_exp_pre2_10
+
+//assign m5stg_exp[12:0]= (~m5stg_exp_pre1[12:0])
+//		| (~m5stg_exp_pre2[12:0])
+//		| (~m5stg_exp_pre3[12:0]);
+
+dff_s #(5) i_m5stg_inc_exp (
+	.din	({m4stg_shl_55,m4stg_shl_54,
+                  m4stg_inc_exp_54,m4stg_inc_exp_55,m4stg_inc_exp_105}),
+	.clk	(clk),
+
+	.q	({m5stg_shl_55,m5stg_shl_54,
+                  m5stg_inc_exp_54,m5stg_inc_exp_55,m5stg_inc_exp_105}),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign m5stg_exp[12:0] =
+
+          ( {13{((m5stg_shl_54 & m5stg_inc_exp_54) |
+                 (m5stg_shl_55 & m5stg_inc_exp_55) |
+                 (m5stg_inc_exp_105)                )}} & m5stg_exp_pre1[12:0]) |
+
+          (~{13{((m5stg_shl_54 & m5stg_inc_exp_54) |
+                 (m5stg_shl_55 & m5stg_inc_exp_55) |
+                 (m5stg_inc_exp_105)                )}} & m5stg_exp_pre2[12:0]) |
+
+         ~(m5stg_exp_pre3[12:0]);
+
+
+assign m5stg_expa[12:0]= m5stg_exp[12:0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply rounding.
+//      Multiply stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+assign m5stg_exp_plus1[12:0]= m5stg_expa[12:0]
+                        + 13'h0001;
+
+assign mul_exp_out_in[10:0]= ({11{(mul_exp_out_exp_plus1
+					&& m5stg_fracadd_cout)}}
+			    & m5stg_exp_plus1[10:0])
+		| ({11{mul_exp_out_exp}}
+			    & m5stg_expa[10:0])
+		| ({11{((!m5stg_fracadd_cout) && (!m5stg_in_of))}}
+			    & m5stg_expa[10:0])
+		| ({11{m5stg_in_of}}
+			    & {{3{m5stg_fmuld}}, 7'h7f, m5stg_to_0_inv});
+
+
+dffe_s #(11) i_mul_exp_out (
+	.din	(mul_exp_out_in[10:0]),
+	.en     (m6stg_step),
+        .clk    (clk),
+
+        .q      (mul_exp_out[10:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_mul.v
===================================================================
--- /trunk/T1-FPU/fpu_mul.v	(revision 6)
+++ /trunk/T1-FPU/fpu_mul.v	(revision 6)
@@ -0,0 +1,467 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_mul.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU multiply pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_mul (
+	inq_op,
+	inq_rnd_mode,
+	inq_id,
+	inq_in1,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_mul,
+	mul_dest_rdy,
+	mul_dest_rdya,
+	fmul_clken_l,
+	fmul_clken_l_buf1,
+	arst_l,
+	grst_l,
+	rclk,
+	
+	mul_pipe_active,
+	m1stg_step,
+	m6stg_fmul_in,
+	m6stg_id_in,
+	mul_exc_out,
+	m6stg_fmul_dbl_dst,
+	m6stg_fmuls,
+	mul_sign_out,
+	mul_exp_out,
+	mul_frac_out,
+
+	se_mul,
+	se_mul64,
+	si,
+	so
+);
+
+
+input [7:0]	inq_op;			// request opcode to op pipes
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input [63:0]	inq_in1;		// request operand 1 to op pipes
+input		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1 exp==0
+input		inq_in1_exp_neq_ffs;	// request operand 1 exp!=0xff's
+input [63:0]	inq_in2;		// request operand 2 to op pipes
+input		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2 exp==0
+input		inq_in2_exp_neq_ffs;	// request operand 2 exp!=0xff's
+input		inq_mul;		// multiply pipe request
+input		mul_dest_rdy;		// multiply result req accepted for CPX
+input		mul_dest_rdya;		// multiply result req accepted for CPX
+input		fmul_clken_l;           // fmul clock enable for mul_frac_dp
+input		fmul_clken_l_buf1;           // fmul clock enable for mul_exp_dp
+input		arst_l;			// global asynch. reset- asserted low
+input		grst_l;			// global synch. reset- asserted low
+input		rclk;			// global clock
+
+output		mul_pipe_active;        // mul pipe is executing a valid instr
+output		m1stg_step;		// multiply pipe load
+output		m6stg_fmul_in;		// mul pipe output request next cycle
+output [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+output [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+output		m6stg_fmul_dbl_dst;	// double precision multiply result
+output		m6stg_fmuls;		// fmuls- multiply 6 stage
+output		mul_sign_out;		// multiply sign output
+output [10:0]	mul_exp_out;		// multiply exponent output
+output [51:0]	mul_frac_out;		// multiply fraction output
+
+input           se_mul;     // scan_enable for mul_frac_dp, mul_ctl, mul_exp_dp
+input           se_mul64;       // scan_enable for mul64
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_mul_ctl.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		m1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+wire		m1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+wire		m1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+wire		m1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+wire		m1stg_step;		// multiply pipe load
+wire		m1stg_sngop;		// single precision operation- mul 1 stg
+wire		m1stg_dblop;		// double precision operation- mul 1 stg
+wire		m1stg_dblop_inv;	// single or int operation- mul 1 stg
+wire		m1stg_fmul;		// multiply operation- mul 1 stage
+wire		m1stg_fsmuld;		// fsmuld- multiply 1 stage
+wire		m2stg_fmuls;		// fmuls- multiply 2 stage
+wire		m2stg_fmuld;		// fmuld- multiply 2 stage
+wire		m2stg_fsmuld;		// fsmuld- multiply 2 stage
+wire		m5stg_fmuls;		// fmuls- multiply 5 stage
+wire		m5stg_fmuld;		// fmuld- multiply 5 stage
+wire		m5stg_fmulda;		// fmuld- multiply 5 stage copy
+wire		m6stg_fmul_in;		// mul pipe output request next cycle
+wire [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+wire		m6stg_fmul_dbl_dst;	// double precision multiply result
+wire		m6stg_fmuls;		// fmuls- multiply 6 stage
+wire		m6stg_step;		// advance the multiply pipe
+wire		mul_sign_out;		// multiply sign output
+wire		m5stg_in_of;		// multiply overflow- select exp out
+wire [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+wire		m2stg_frac1_dbl_norm;	// select line to m2stg_frac1
+wire		m2stg_frac1_dbl_dnrm;	// select line to m2stg_frac1
+wire		m2stg_frac1_sng_norm;	// select line to m2stg_frac1
+wire		m2stg_frac1_sng_dnrm;	// select line to m2stg_frac1
+wire		m2stg_frac1_inf;	// select line to m2stg_frac1
+wire		m2stg_frac2_dbl_norm;	// select line to m2stg_frac2
+wire		m2stg_frac2_dbl_dnrm;	// select line to m2stg_frac2
+wire		m2stg_frac2_sng_norm;	// select line to m2stg_frac2
+wire		m2stg_frac2_sng_dnrm;	// select line to m2stg_frac2
+wire		m2stg_frac2_inf;	// select line to m2stg_frac2
+wire		m1stg_inf_zero_in;	// 1 operand is infinity; other is 0
+wire		m1stg_inf_zero_in_dbl;	// 1 opnd is infinity; other is 0- dbl
+wire		m2stg_exp_expadd;	// select line to m2stg_exp
+wire		m2stg_exp_0bff;		// select line to m2stg_exp
+wire		m2stg_exp_017f;		// select line to m2stg_exp
+wire		m2stg_exp_04ff;		// select line to m2stg_exp
+wire		m2stg_exp_zero;		// select line to m2stg_exp
+wire [6:0]	m3bstg_ld0_inv;		// leading 0's in multiply operands
+wire [5:0]	m4stg_sh_cnt_in;	// multiply normalization shift count
+wire            m4stg_inc_exp_54;       // select line to m5stg_exp
+wire            m4stg_inc_exp_55;       // select line to m5stg_exp
+wire            m4stg_inc_exp_105;      // select line to m5stg_exp
+wire		m4stg_left_shift_step;	// select line to m5stg_frac
+wire		m4stg_right_shift_step;	// select line to m5stg_frac
+wire		m5stg_to_0;		// result to max finite on overflow
+wire		m5stg_to_0_inv;		// result to infinity on overflow
+wire		mul_frac_out_fracadd;	// select line to mul_frac_out
+wire		mul_frac_out_frac;	// select line to mul_frac_out
+wire		mul_exp_out_exp_plus1;	// select line to mul_exp_out
+wire		mul_exp_out_exp;	// select line to mul_exp_out
+wire		mul_pipe_active;        // mul pipe is executing a valid instr
+
+// 3/14/03: output of dffrl_async is an input to mul64
+wire mul_rst_l; // reset (active low) signal for mul64
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_mul_exp_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [12:0]	m3stg_exp;		// exponent input- multiply 3 stage
+wire		m3stg_expadd_eq_0;	// mul stage 3 exponent adder sum == 0
+wire		m3stg_expadd_lte_0_inv;	// mul stage 3 exponent adder sum <= 0
+wire [12:0]	m4stg_exp;		// exponent input- multiply 4 stage
+wire [12:0]	m5stg_exp;		// exponent input- multiply 5 stage
+wire [10:0]	mul_exp_out;		// multiply exponent output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_mul_frac_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [52:0]	m2stg_frac1_array_in;	// multiply array input 1
+wire [52:0]	m2stg_frac2_array_in;	// multiply array input 2
+wire [5:0]	m1stg_ld0_1;		// denorm operand 1 leading 0's
+wire [5:0]	m1stg_ld0_2;		// denorm operand 2 leading 0's
+wire		m4stg_frac_105;		// multiply stage 4a fraction input[105]
+wire [6:0]	m3stg_ld0_inv;		// leading 0's in multiply operands
+wire		m4stg_shl_54;		// multiply shift left output bit[54]
+wire		m4stg_shl_55;		// multiply shift left output bit[55]
+wire [32:0]	m5stg_frac_32_0;	// multiply stage 5 fraction input
+wire		m5stg_frac_dbl_nx;	// double precision inexact result
+wire		m5stg_frac_sng_nx;	// single precision inexact result
+wire		m5stg_frac_neq_0;	// fraction input to mul 5 stage != 0
+wire		m5stg_fracadd_cout;	// fraction rounding adder carry out
+wire [51:0]	mul_frac_out;		// multiply fraction output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of mul64
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [105:0]	m4stg_frac;		// multiply array output
+wire [29:0] m4stg_frac_unused; // unused upper 30 bits (135:106) of the mul64 output
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_mul_ctl fpu_mul_ctl (
+	.inq_in1_51			(inq_in1[51]),
+	.inq_in1_54			(inq_in1[54]),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs),
+	.inq_in2_51			(inq_in2[51]),
+	.inq_in2_54			(inq_in2[54]),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs),
+	.inq_op				(inq_op[7:0]),
+	.inq_mul			(inq_mul),
+	.inq_rnd_mode			(inq_rnd_mode[1:0]),
+	.inq_id				(inq_id[4:0]),
+	.inq_in1_63			(inq_in1[63]),
+	.inq_in2_63			(inq_in2[63]),
+	.mul_dest_rdy			(mul_dest_rdy),
+	.mul_dest_rdya			(mul_dest_rdya),
+	.m5stg_exp			(m5stg_exp[12:0]),
+	.m5stg_fracadd_cout		(m5stg_fracadd_cout),
+	.m5stg_frac_neq_0		(m5stg_frac_neq_0),
+	.m5stg_frac_dbl_nx		(m5stg_frac_dbl_nx),
+	.m5stg_frac_sng_nx		(m5stg_frac_sng_nx),
+	.m1stg_ld0_1			(m1stg_ld0_1[5:0]),
+	.m1stg_ld0_2			(m1stg_ld0_2[5:0]),
+	.m3stg_exp			(m3stg_exp[12:0]),
+	.m3stg_expadd_eq_0		(m3stg_expadd_eq_0),
+	.m3stg_expadd_lte_0_inv		(m3stg_expadd_lte_0_inv),
+	.m3stg_ld0_inv			(m3stg_ld0_inv[5:0]),
+	.m4stg_exp			(m4stg_exp[12:0]),
+	.m4stg_frac_105			(m4stg_frac_105),
+	.m5stg_frac			(m5stg_frac_32_0[32:0]),
+	.arst_l				(arst_l),
+	.grst_l				(grst_l),
+	.mula_rst_l    (mul_rst_l),
+	.rclk			(rclk),
+
+	.mul_pipe_active                (mul_pipe_active),
+	.m1stg_snan_sng_in1		(m1stg_snan_sng_in1),
+	.m1stg_snan_dbl_in1		(m1stg_snan_dbl_in1),
+	.m1stg_snan_sng_in2		(m1stg_snan_sng_in2),
+	.m1stg_snan_dbl_in2		(m1stg_snan_dbl_in2),
+	.m1stg_step			(m1stg_step),
+	.m1stg_sngop			(m1stg_sngop),
+	.m1stg_dblop			(m1stg_dblop),
+	.m1stg_dblop_inv		(m1stg_dblop_inv),
+	.m1stg_fmul			(m1stg_fmul),
+	.m1stg_fsmuld			(m1stg_fsmuld),
+	.m2stg_fmuls			(m2stg_fmuls),
+	.m2stg_fmuld			(m2stg_fmuld),
+	.m2stg_fsmuld			(m2stg_fsmuld),
+	.m5stg_fmuls			(m5stg_fmuls),
+	.m5stg_fmuld			(m5stg_fmuld),
+	.m5stg_fmulda			(m5stg_fmulda),
+	.m6stg_fmul_in			(m6stg_fmul_in),
+	.m6stg_id_in			(m6stg_id_in[9:0]),
+	.m6stg_fmul_dbl_dst		(m6stg_fmul_dbl_dst),
+	.m6stg_fmuls			(m6stg_fmuls),
+	.m6stg_step			(m6stg_step),
+	.mul_sign_out			(mul_sign_out),
+	.m5stg_in_of			(m5stg_in_of),
+	.mul_exc_out			(mul_exc_out[4:0]),
+	.m2stg_frac1_dbl_norm		(m2stg_frac1_dbl_norm),
+	.m2stg_frac1_dbl_dnrm		(m2stg_frac1_dbl_dnrm),
+	.m2stg_frac1_sng_norm		(m2stg_frac1_sng_norm),
+	.m2stg_frac1_sng_dnrm		(m2stg_frac1_sng_dnrm),
+	.m2stg_frac1_inf		(m2stg_frac1_inf),
+	.m2stg_frac2_dbl_norm		(m2stg_frac2_dbl_norm),
+	.m2stg_frac2_dbl_dnrm		(m2stg_frac2_dbl_dnrm),
+	.m2stg_frac2_sng_norm		(m2stg_frac2_sng_norm),
+	.m2stg_frac2_sng_dnrm		(m2stg_frac2_sng_dnrm),
+	.m2stg_frac2_inf		(m2stg_frac2_inf),
+	.m1stg_inf_zero_in		(m1stg_inf_zero_in),
+	.m1stg_inf_zero_in_dbl		(m1stg_inf_zero_in_dbl),
+	.m2stg_exp_expadd		(m2stg_exp_expadd),
+	.m2stg_exp_0bff			(m2stg_exp_0bff),
+	.m2stg_exp_017f			(m2stg_exp_017f),
+	.m2stg_exp_04ff			(m2stg_exp_04ff),
+	.m2stg_exp_zero			(m2stg_exp_zero),
+	.m3bstg_ld0_inv			(m3bstg_ld0_inv[6:0]),
+	.m4stg_sh_cnt_in		(m4stg_sh_cnt_in[5:0]),
+	.m4stg_inc_exp_54		(m4stg_inc_exp_54),
+	.m4stg_inc_exp_55		(m4stg_inc_exp_55),
+	.m4stg_inc_exp_105		(m4stg_inc_exp_105),
+	.m4stg_left_shift_step		(m4stg_left_shift_step),
+	.m4stg_right_shift_step		(m4stg_right_shift_step),
+	.m5stg_to_0			(m5stg_to_0),
+	.m5stg_to_0_inv			(m5stg_to_0_inv),
+	.mul_frac_out_fracadd		(mul_frac_out_fracadd),
+	.mul_frac_out_frac		(mul_frac_out_frac),
+	.mul_exp_out_exp_plus1		(mul_exp_out_exp_plus1),
+	.mul_exp_out_exp		(mul_exp_out_exp),
+
+	.se                             (se_mul),
+        .si                             (si),
+        .so                             (scan_out_fpu_mul_ctl)
+);
+
+
+fpu_mul_exp_dp fpu_mul_exp_dp (
+	.inq_in1			(inq_in1[62:52]),
+	.inq_in2			(inq_in2[62:52]),
+	.m6stg_step			(m6stg_step),
+	.m1stg_dblop			(m1stg_dblop),
+	.m1stg_sngop			(m1stg_sngop),
+	.m2stg_exp_expadd		(m2stg_exp_expadd),
+	.m2stg_exp_0bff			(m2stg_exp_0bff),
+	.m2stg_exp_017f			(m2stg_exp_017f),
+	.m2stg_exp_04ff			(m2stg_exp_04ff),
+	.m2stg_exp_zero			(m2stg_exp_zero),
+	.m1stg_fsmuld			(m1stg_fsmuld),
+	.m2stg_fmuld			(m2stg_fmuld),
+	.m2stg_fmuls			(m2stg_fmuls),
+	.m2stg_fsmuld			(m2stg_fsmuld),
+	.m3stg_ld0_inv			(m3stg_ld0_inv[6:0]),
+	.m4stg_inc_exp_54		(m4stg_inc_exp_54),
+	.m4stg_inc_exp_55		(m4stg_inc_exp_55),
+	.m4stg_inc_exp_105		(m4stg_inc_exp_105),
+	.m5stg_fracadd_cout		(m5stg_fracadd_cout),
+	.mul_exp_out_exp_plus1		(mul_exp_out_exp_plus1),
+	.mul_exp_out_exp		(mul_exp_out_exp),
+	.m5stg_in_of			(m5stg_in_of),
+	.m5stg_fmuld			(m5stg_fmuld),
+	.m5stg_to_0_inv			(m5stg_to_0_inv),
+	.m4stg_shl_54			(m4stg_shl_54),
+	.m4stg_shl_55			(m4stg_shl_55),
+	.fmul_clken_l			(fmul_clken_l_buf1),
+	.rclk			(rclk),
+
+	.m3stg_exp			(m3stg_exp[12:0]),
+	.m3stg_expadd_eq_0		(m3stg_expadd_eq_0),
+	.m3stg_expadd_lte_0_inv		(m3stg_expadd_lte_0_inv),
+	.m4stg_exp			(m4stg_exp[12:0]),
+	.m5stg_exp			(m5stg_exp[12:0]),
+	.mul_exp_out			(mul_exp_out[10:0]),
+
+	.se                             (se_mul),
+        .si                             (scan_out_fpu_mul_ctl),
+        .so                             (scan_out_fpu_mul_exp_dp)
+);
+
+
+fpu_mul_frac_dp fpu_mul_frac_dp (
+	.inq_in1			(inq_in1[54:0]),
+	.inq_in2			(inq_in2[54:0]),
+	.m6stg_step			(m6stg_step),
+	.m2stg_frac1_dbl_norm		(m2stg_frac1_dbl_norm),
+	.m2stg_frac1_dbl_dnrm		(m2stg_frac1_dbl_dnrm),
+	.m2stg_frac1_sng_norm		(m2stg_frac1_sng_norm),
+	.m2stg_frac1_sng_dnrm		(m2stg_frac1_sng_dnrm),
+	.m2stg_frac1_inf		(m2stg_frac1_inf),
+	.m1stg_snan_dbl_in1		(m1stg_snan_dbl_in1),
+	.m1stg_snan_sng_in1		(m1stg_snan_sng_in1),
+	.m2stg_frac2_dbl_norm		(m2stg_frac2_dbl_norm),
+	.m2stg_frac2_dbl_dnrm		(m2stg_frac2_dbl_dnrm),
+	.m2stg_frac2_sng_norm		(m2stg_frac2_sng_norm),
+	.m2stg_frac2_sng_dnrm		(m2stg_frac2_sng_dnrm),
+	.m2stg_frac2_inf		(m2stg_frac2_inf),
+	.m1stg_snan_dbl_in2		(m1stg_snan_dbl_in2),
+	.m1stg_snan_sng_in2		(m1stg_snan_sng_in2),
+	.m1stg_inf_zero_in		(m1stg_inf_zero_in),
+	.m1stg_inf_zero_in_dbl		(m1stg_inf_zero_in_dbl),
+	.m1stg_dblop			(m1stg_dblop),
+	.m1stg_dblop_inv		(m1stg_dblop_inv),
+	.m4stg_frac			(m4stg_frac),
+	.m4stg_sh_cnt_in		(m4stg_sh_cnt_in[5:0]),
+	.m3bstg_ld0_inv			(m3bstg_ld0_inv[6:0]),
+	.m4stg_left_shift_step		(m4stg_left_shift_step),
+	.m4stg_right_shift_step		(m4stg_right_shift_step),
+	.m5stg_fmuls			(m5stg_fmuls),
+	.m5stg_fmulda			(m5stg_fmulda),
+	.mul_frac_out_fracadd		(mul_frac_out_fracadd),
+	.mul_frac_out_frac		(mul_frac_out_frac),
+	.m5stg_in_of			(m5stg_in_of),
+	.m5stg_to_0			(m5stg_to_0),
+	.fmul_clken_l			(fmul_clken_l),
+	.rclk			(rclk),
+
+	.m2stg_frac1_array_in		(m2stg_frac1_array_in),
+	.m2stg_frac2_array_in		(m2stg_frac2_array_in),
+	.m1stg_ld0_1			(m1stg_ld0_1),
+	.m1stg_ld0_2			(m1stg_ld0_2),
+	.m4stg_frac_105			(m4stg_frac_105),
+	.m3stg_ld0_inv			(m3stg_ld0_inv[6:0]),
+	.m4stg_shl_54			(m4stg_shl_54),
+	.m4stg_shl_55			(m4stg_shl_55),
+	.m5stg_frac_32_0		(m5stg_frac_32_0[32:0]),
+	.m5stg_frac_dbl_nx		(m5stg_frac_dbl_nx),
+	.m5stg_frac_sng_nx		(m5stg_frac_sng_nx),
+	.m5stg_frac_neq_0		(m5stg_frac_neq_0),
+	.m5stg_fracadd_cout		(m5stg_fracadd_cout),
+	.mul_frac_out			(mul_frac_out[51:0]),
+
+	.se                             (se_mul),
+        .si                             (scan_out_fpu_mul_exp_dp),
+        .so                             (scan_out_fpu_mul_frac_dp)
+);
+
+
+// 3/14/03: Promoted mul64 to unit level. Got rid of fpu_mul_array. Same representation for RTL and gates. Also, now agreed that mul64 will not have dffrl_async inside, so the staged signal mul_rst_l is sent from fpu_mul_ctl. mul64 port step has been renamed to mul_step to avoid conflicts with DEF keyword STEP in some backend flows (WARP).
+mul64 i_m4stg_frac (
+	.rs1_l ({1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1, 1'b1,
+		1'b1, 1'b1, 1'b1, m2stg_frac1_array_in[52:0]}),
+	.rs2 ({1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, m2stg_frac2_array_in[52:0]}),
+	.valid(m1stg_fmul),
+	.areg ({1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0,
+		1'b0}),
+	.accreg ({1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0, 1'b0}),
+	.x2 (1'b0),
+	.rclk (rclk),
+	.si (scan_out_fpu_mul_frac_dp),
+	.se (se_mul64),
+	.mul_rst_l (mul_rst_l),
+	.mul_step (m6stg_step),
+	.so (so),
+	.out ({m4stg_frac_unused[29:0], m4stg_frac[105:0]})
+);
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_mul_ctl.v
===================================================================
--- /trunk/T1-FPU/fpu_mul_ctl.v	(revision 6)
+++ /trunk/T1-FPU/fpu_mul_ctl.v	(revision 6)
@@ -0,0 +1,2234 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_mul_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply pipeline synthesizable logic
+//              - special input cases
+//              - opcode pipeline
+//              - sign logic
+//              - exception logic
+//              - datapath control- select lines and control logic
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_mul_ctl (
+	inq_in1_51,
+	inq_in1_54,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2_51,
+	inq_in2_54,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	inq_op,
+	inq_mul,
+	inq_rnd_mode,
+	inq_id,
+	inq_in1_63,
+	inq_in2_63,
+	mul_dest_rdy,
+	mul_dest_rdya,
+	m5stg_exp,
+	m5stg_fracadd_cout,
+	m5stg_frac_neq_0,
+	m5stg_frac_dbl_nx,
+	m5stg_frac_sng_nx,
+	m1stg_ld0_1,
+	m1stg_ld0_2,
+	m3stg_exp,
+	m3stg_expadd_eq_0,
+	m3stg_expadd_lte_0_inv,
+	m3stg_ld0_inv,
+	m4stg_exp,
+	m4stg_frac_105,
+	m5stg_frac,
+	arst_l,
+	grst_l,
+	rclk,
+
+	mul_pipe_active,
+	m1stg_snan_sng_in1,
+	m1stg_snan_dbl_in1,
+	m1stg_snan_sng_in2,
+	m1stg_snan_dbl_in2,
+	m1stg_step,
+	m1stg_sngop,
+	m1stg_dblop,
+	m1stg_dblop_inv,
+	m1stg_fmul,
+	m1stg_fsmuld,
+	m2stg_fmuls,
+	m2stg_fmuld,
+	m2stg_fsmuld,
+	m5stg_fmuls,
+	m5stg_fmuld,
+	m5stg_fmulda,
+	m6stg_fmul_in,
+	m6stg_id_in,
+	m6stg_fmul_dbl_dst,
+	m6stg_fmuls,
+	m6stg_step,
+	mul_sign_out,
+	m5stg_in_of,
+	mul_exc_out,
+	m2stg_frac1_dbl_norm,
+	m2stg_frac1_dbl_dnrm,
+	m2stg_frac1_sng_norm,
+	m2stg_frac1_sng_dnrm,
+	m2stg_frac1_inf,
+	m2stg_frac2_dbl_norm,
+	m2stg_frac2_dbl_dnrm,
+	m2stg_frac2_sng_norm,
+	m2stg_frac2_sng_dnrm,
+	m2stg_frac2_inf,
+	m1stg_inf_zero_in,
+	m1stg_inf_zero_in_dbl,
+	m2stg_exp_expadd,
+	m2stg_exp_0bff,
+	m2stg_exp_017f,
+	m2stg_exp_04ff,
+	m2stg_exp_zero,
+	m3bstg_ld0_inv,
+	m4stg_sh_cnt_in,
+	m4stg_inc_exp_54,
+	m4stg_inc_exp_55,
+	m4stg_inc_exp_105,
+	m4stg_left_shift_step,
+	m4stg_right_shift_step,
+	m5stg_to_0,
+	m5stg_to_0_inv,
+	mul_frac_out_fracadd,
+	mul_frac_out_frac,
+	mul_exp_out_exp_plus1,
+	mul_exp_out_exp,
+	mula_rst_l,
+
+	se,
+	si,
+	so
+);
+
+
+parameter
+		FMULS=  8'h49,
+		FMULD=	8'h4a,
+		FSMULD=	8'h69;
+
+
+input		inq_in1_51;		// request operand 1[51]
+input		inq_in1_54;		// request operand 1[54]
+input		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+input		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+input		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+input		inq_in1_exp_eq_0;	// request operand 1[62:52]==0
+input		inq_in1_exp_neq_ffs;	// request operand 1[62:52]!=0x7ff
+input		inq_in2_51;		// request operand 2[51]
+input		inq_in2_54;		// request operand 2[54]
+input		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+input		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+input		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+input		inq_in2_exp_eq_0;	// request operand 2[62:52]==0
+input		inq_in2_exp_neq_ffs;	// request operand 2[62:52]!=0x7ff
+input [7:0]	inq_op;			// request opcode to op pipes
+input		inq_mul;		// multiply pipe request
+input [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+input [4:0]	inq_id;			// request ID to the operation pipes
+input		inq_in1_63;		// request[63] operand 1 to op pipes
+input		inq_in2_63;		// request[63] operand 2 to op pipes
+input		mul_dest_rdy;		// multiply result req accepted for CPX
+input		mul_dest_rdya;		// multiply result req accepted for CPX
+input [12:0]	m5stg_exp;		// exponent input- multiply 5 stage
+input		m5stg_fracadd_cout;	// fraction rounding adder carry out
+input		m5stg_frac_neq_0;	// fraction input to mul 5 stage != 0
+input		m5stg_frac_dbl_nx;	// double precision inexact result
+input		m5stg_frac_sng_nx;	// single precision inexact result
+input [5:0]	m1stg_ld0_1;		// denorm operand 1 leading 0's
+input [5:0]	m1stg_ld0_2;		// denorm operand 2 leading 0's
+input [12:0]	m3stg_exp;		// exponent input- multiply 3 stage
+input		m3stg_expadd_eq_0;	// mul stage 3 exponent adder sum == 0
+input		m3stg_expadd_lte_0_inv;	// mul stage 3 exponent adder sum <= 0
+input [5:0]	m3stg_ld0_inv;		// leading 0's in multiply operands
+input [12:0]	m4stg_exp;		// exponent input- multiply 4 stage
+input		m4stg_frac_105;	// multiply stage 4a fraction input[105]
+input [32:0]	m5stg_frac;		// multiply stage 5 fraction input
+input		arst_l;			// asynchronous global reset- asserted low
+input		grst_l;			// synchronous global reset- asserted low
+input		rclk;		// global clock
+
+output		mul_pipe_active;        // mul pipe is executing a valid instr
+output		m1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+output		m1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+output		m1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+output		m1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+output		m1stg_step;		// multiply pipe load
+output		m1stg_sngop;		// single precision operation- mul 1 stg
+output		m1stg_dblop;		// double precision operation- mul 1 stg
+output		m1stg_dblop_inv;	// single or int operation- mul 1 stg
+output		m1stg_fmul;		// multiply operation- mul 1 stage
+output		m1stg_fsmuld;		// fsmuld- multiply 1 stage
+output		m2stg_fmuls;		// fmuls- multiply 2 stage
+output		m2stg_fmuld;		// fmuld- multiply 2 stage
+output		m2stg_fsmuld;		// fsmuld- multiply 2 stage
+output		m5stg_fmuls;		// fmuls- multiply 5 stage
+output		m5stg_fmuld;		// fmuld- multiply 5 stage
+output		m5stg_fmulda;		// fmuld- multiply 5 stage copy
+output		m6stg_fmul_in;		// mul pipe output request next cycle
+output [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+output		m6stg_fmul_dbl_dst;	// double precision multiply result
+output		m6stg_fmuls;		// fmuls- multiply 6 stage
+output		m6stg_step;		// advance the multiply pipe
+output		mul_sign_out;		// multiply sign output
+output		m5stg_in_of;		// multiply overflow- select exp out
+output [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+output		m2stg_frac1_dbl_norm;	// select line to m2stg_frac1
+output		m2stg_frac1_dbl_dnrm;	// select line to m2stg_frac1
+output		m2stg_frac1_sng_norm;	// select line to m2stg_frac1
+output		m2stg_frac1_sng_dnrm;	// select line to m2stg_frac1
+output		m2stg_frac1_inf;	// select line to m2stg_frac1
+output		m2stg_frac2_dbl_norm;	// select line to m2stg_frac2
+output		m2stg_frac2_dbl_dnrm;	// select line to m2stg_frac2
+output		m2stg_frac2_sng_norm;	// select line to m2stg_frac2
+output		m2stg_frac2_sng_dnrm;	// select line to m2stg_frac2
+output		m2stg_frac2_inf;	// select line to m2stg_frac2
+output		m1stg_inf_zero_in;	// 1 operand is infinity; other is 0
+output		m1stg_inf_zero_in_dbl;	// 1 opnd is infinity; other is 0- dbl
+output		m2stg_exp_expadd;	// select line to m2stg_exp
+output		m2stg_exp_0bff;		// select line to m2stg_exp
+output		m2stg_exp_017f;		// select line to m2stg_exp
+output		m2stg_exp_04ff;		// select line to m2stg_exp
+output		m2stg_exp_zero;		// select line to m2stg_exp
+output [6:0]	m3bstg_ld0_inv;		// leading 0's in multiply operands
+output [5:0]	m4stg_sh_cnt_in;	// multiply normalization shift count
+output          m4stg_inc_exp_54;       // select line to m5stg_exp
+output          m4stg_inc_exp_55;       // select line to m5stg_exp
+output          m4stg_inc_exp_105;      // select line to m5stg_exp
+output		m4stg_left_shift_step;	// select line to m5stg_frac
+output		m4stg_right_shift_step;	// select line to m5stg_frac
+output		m5stg_to_0;		// result to max finite on overflow
+output		m5stg_to_0_inv;		// result to infinity on overflow
+output		mul_frac_out_fracadd;	// select line to mul_frac_out
+output		mul_frac_out_frac;	// select line to mul_frac_out
+output		mul_exp_out_exp_plus1;	// select line to mul_exp_out
+output		mul_exp_out_exp;	// select line to mul_exp_out
+output    mula_rst_l; // reset for mul64
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire		reset;
+wire		mul_frac_in1_51;
+wire		mul_frac_in1_54;
+wire		mul_frac_in1_53_0_neq_0;
+wire		mul_frac_in1_50_0_neq_0;
+wire		mul_frac_in1_53_32_neq_0;
+wire		mul_exp_in1_exp_eq_0;
+wire		mul_exp_in1_exp_neq_ffs;
+wire		mul_frac_in2_51;
+wire		mul_frac_in2_54;
+wire		mul_frac_in2_53_0_neq_0;
+wire		mul_frac_in2_50_0_neq_0;
+wire		mul_frac_in2_53_32_neq_0;
+wire		mul_exp_in2_exp_eq_0;
+wire		mul_exp_in2_exp_neq_ffs;
+wire		m1stg_denorm_sng_in1;
+wire		m1stg_denorm_dbl_in1;
+wire		m1stg_denorm_sng_in2;
+wire		m1stg_denorm_dbl_in2;
+wire		m1stg_denorm_in1;
+wire		m1stg_denorm_in2;
+wire		m1stg_norm_sng_in1;
+wire		m1stg_norm_dbl_in1;
+wire		m1stg_norm_sng_in2;
+wire		m1stg_norm_dbl_in2;
+wire		m1stg_snan_sng_in1;
+wire		m1stg_snan_dbl_in1;
+wire		m1stg_snan_sng_in2;
+wire		m1stg_snan_dbl_in2;
+wire		m1stg_qnan_sng_in1;
+wire		m1stg_qnan_dbl_in1;
+wire		m1stg_qnan_sng_in2;
+wire		m1stg_qnan_dbl_in2;
+wire		m1stg_snan_in1;
+wire		m1stg_snan_in2;
+wire		m1stg_qnan_in1;
+wire		m1stg_qnan_in2;
+wire		m2stg_snan_in1;
+wire		m2stg_snan_in2;
+wire		m2stg_qnan_in1;
+wire		m2stg_qnan_in2;
+wire		m1stg_nan_sng_in1;
+wire		m1stg_nan_dbl_in1;
+wire		m1stg_nan_sng_in2;
+wire		m1stg_nan_dbl_in2;
+wire		m1stg_nan_in1;
+wire		m1stg_nan_in2;
+wire		m2stg_nan_in2;
+wire		m1stg_inf_sng_in1;
+wire		m1stg_inf_dbl_in1;
+wire		m1stg_inf_sng_in2;
+wire		m1stg_inf_dbl_in2;
+wire		m1stg_inf_in1;
+wire		m1stg_inf_in2;
+wire		m1stg_inf_in;
+wire		m2stg_inf_in1;
+wire		m2stg_inf_in2;
+wire		m2stg_inf_in;
+wire		m1stg_infnan_sng_in1;
+wire		m1stg_infnan_dbl_in1;
+wire		m1stg_infnan_sng_in2;
+wire		m1stg_infnan_dbl_in2;
+wire		m1stg_infnan_in1;
+wire		m1stg_infnan_in2;
+wire		m1stg_infnan_in;
+wire		m1stg_zero_in1;
+wire		m1stg_zero_in2;
+wire		m1stg_zero_in;
+wire		m2stg_zero_in1;
+wire		m2stg_zero_in2;
+wire		m2stg_zero_in;
+wire		m1stg_step;
+wire [7:0]	m1stg_op_in;
+wire [7:0]	m1stg_op;
+wire		m1stg_mul_in;
+wire		m1stg_mul;
+wire		m1stg_sngop;
+wire [3:0]	m1stg_sngopa;
+wire		m1stg_dblop;
+wire [3:0]	m1stg_dblopa;
+wire		m1stg_dblop_inv_in;
+wire		m1stg_dblop_inv;
+wire [1:0]	m1stg_rnd_mode;
+wire [4:0]	m1stg_id;
+wire		m1stg_fmul;
+wire		m1stg_fmul_dbl_dst;
+wire		m1stg_fmuls;
+wire		m1stg_fmuld;
+wire		m1stg_fsmuld;
+wire [4:0]	m1stg_opdec;
+wire [4:0]	m2stg_opdec;
+wire [1:0]	m2stg_rnd_mode;
+wire [4:0]	m2stg_id;
+wire		m2stg_fmul;
+wire		m2stg_fmuls;
+wire		m2stg_fmuld;
+wire		m2stg_fsmuld;
+wire [4:1]	m3astg_opdec;
+wire [1:0]	m3astg_rnd_mode;
+wire [4:0]	m3astg_id;
+wire [4:1]	m3bstg_opdec;
+wire [1:0]	m3bstg_rnd_mode;
+wire [4:0]	m3bstg_id;
+wire [4:1]	m3stg_opdec;
+wire [1:0]	m3stg_rnd_mode;
+wire [4:0]	m3stg_id;
+wire		m3stg_fmul;
+wire [4:1]	m4stg_opdec;
+wire [1:0]	m4stg_rnd_mode;
+wire [4:0]	m4stg_id;
+wire		m4stg_fmul;
+wire		m4stg_fmuld;
+wire [4:1]	m5stg_opdec;
+wire [1:0]	m5stg_rnd_mode;
+wire [4:0]	m5stg_id;
+wire		m5stg_fmul;
+wire		m5stg_fmuls;
+wire		m5stg_fmuld;
+wire		m5stg_fmulda;
+wire		m6stg_fmul_in;
+wire [4:2]	m6stg_opdec;
+wire [9:0]	m6stg_id_in;
+wire [9:0]	m6stg_id;
+wire		m6stg_fmul;
+wire		m6stg_fmul_dbl_dst;
+wire		m6stg_fmuls;
+wire		m6stg_hold;
+wire		m6stg_holda;
+wire		m6stg_step;
+wire		m6stg_stepa;
+wire		m1stg_sign1;
+wire		m1stg_sign2;
+wire		m2stg_sign1;
+wire		m2stg_sign2;
+wire		m1stg_of_mask;
+wire		m2stg_of_mask;
+wire		m2stg_sign;
+wire		m3astg_sign;
+wire		m2stg_nv;
+wire		m3astg_nv;
+wire		m3astg_of_mask;
+wire		m3bstg_sign;
+wire		m3bstg_nv;
+wire		m3stg_sign;
+wire		m3stg_nv;
+wire		m3stg_of_mask;
+wire		m4stg_sign;
+wire		m4stg_nv;
+wire		m4stg_of_mask;
+wire		m5stg_sign;
+wire		m5stg_nv;
+wire		m5stg_of_mask;
+wire		mul_sign_out;
+wire		mul_nv_out;
+wire		m5stg_in_of;
+wire		mul_of_out_tmp1_in;
+wire		mul_of_out_tmp1;
+wire		mul_of_out_tmp2;
+wire		mul_of_out_cout;
+wire		mul_of_out;
+wire		mul_uf_out_in;
+wire		mul_uf_out;
+wire		mul_nx_out_in;
+wire		mul_nx_out;
+wire [4:0]	mul_exc_out;
+wire		m2stg_frac1_dbl_norm;
+wire		m2stg_frac1_dbl_dnrm;
+wire		m2stg_frac1_sng_norm;
+wire		m2stg_frac1_sng_dnrm;
+wire		m2stg_frac1_inf;
+wire		m2stg_frac2_dbl_norm;
+wire		m2stg_frac2_dbl_dnrm;
+wire		m2stg_frac2_sng_norm;
+wire		m2stg_frac2_sng_dnrm;
+wire		m2stg_frac2_inf;
+wire		m1stg_inf_zero_in;
+wire		m1stg_inf_zero_in_dbl;
+wire [5:0]	m2stg_ld0_1_in;
+wire [5:0]	m2stg_ld0_1;
+wire [5:0]	m2stg_ld0_2_in;
+wire [5:0]	m2stg_ld0_2;
+wire		m2stg_exp_expadd;
+wire		m2stg_exp_0bff;
+wire		m2stg_exp_017f;
+wire		m2stg_exp_04ff;
+wire		m2stg_exp_zero;
+wire [6:0]	m2stg_ld0;
+wire [6:0]	m2stg_ld0_inv;
+wire [6:0]	m3astg_ld0_inv;
+wire [6:0]	m3bstg_ld0_inv;
+wire		m4stg_expadd_eq_0;
+wire		m3stg_exp_lte_0;
+wire		m4stg_right_shift_in;
+wire		m4stg_right_shift;
+wire [5:0]	m3stg_exp_minus1;
+wire [5:0]	m3stg_exp_inv_plus2;
+wire		m3stg_exp_lt_neg57;
+wire [5:0]	m4stg_sh_cnt_in;
+wire		m4stg_left_shift_step;
+wire		m4stg_right_shift_step;
+wire		m4stg_inc_exp_54;
+wire		m4stg_inc_exp_55;
+wire		m4stg_inc_exp_105;
+wire		m5stg_rndup;
+wire		m5stg_to_0;
+wire		m5stg_to_0_inv;
+wire		mul_frac_out_fracadd;
+wire		mul_frac_out_frac;
+wire		mul_exp_out_exp_plus1;
+wire		mul_exp_out_exp;
+wire		mul_pipe_active_in;
+wire		mul_pipe_active;
+wire    mula_rst_l;
+
+
+dffrl_async #(1)  dffrl_mul_ctl (
+  .din  (grst_l),
+  .clk  (rclk),
+  .rst_l(arst_l),
+  .q    (mul_ctl_rst_l),
+	.se (se),
+	.si (),
+	.so ()
+  );
+
+assign reset= (!mul_ctl_rst_l);
+
+// 3/14/03 reset signal for mul64
+assign mula_rst_l = mul_ctl_rst_l;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply pipeline special input cases.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_mul_frac_in1_51 (
+	.din	(inq_in1_51),
+	.en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (mul_frac_in1_51),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_mul_frac_in1_54 (
+	.din	(inq_in1_54),
+	.en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (mul_frac_in1_54),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_mul_frac_in1_53_0_neq_0 (
+	.din	(inq_in1_53_0_neq_0),
+	.en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (mul_frac_in1_53_0_neq_0),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_mul_frac_in1_50_0_neq_0 (
+	.din	(inq_in1_50_0_neq_0),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in1_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in1_53_32_neq_0 (
+	.din	(inq_in1_53_32_neq_0),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in1_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_exp_in1_exp_eq_0 (
+        .din	(inq_in1_exp_eq_0),
+        .en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(mul_exp_in1_exp_eq_0),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_mul_exp_in1_exp_neq_ffs (
+	.din	(inq_in1_exp_neq_ffs),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_exp_in1_exp_neq_ffs),
+
+   	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in2_51 (
+	.din	(inq_in2_51),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in2_51),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in2_54 (
+	.din	(inq_in2_54),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in2_54),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in2_53_0_neq_0 (
+	.din	(inq_in2_53_0_neq_0),
+	.en  	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in2_53_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in2_50_0_neq_0 (
+	.din	(inq_in2_50_0_neq_0),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in2_50_0_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_frac_in2_53_32_neq_0 (
+	.din	(inq_in2_53_32_neq_0),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_frac_in2_53_32_neq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_exp_in2_exp_eq_0 (
+	.din	(inq_in2_exp_eq_0),
+	 .en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(mul_exp_in2_exp_eq_0),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_exp_in2_exp_neq_ffs (
+        .din	(inq_in2_exp_neq_ffs),
+        .en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(mul_exp_in2_exp_neq_ffs),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Denorm multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_denorm_sng_in1= mul_exp_in1_exp_eq_0 && m1stg_sngopa[0];
+
+assign m1stg_denorm_dbl_in1= mul_exp_in1_exp_eq_0 && m1stg_dblopa[0];
+
+assign m1stg_denorm_sng_in2= mul_exp_in2_exp_eq_0 && m1stg_sngopa[0];
+
+assign m1stg_denorm_dbl_in2= mul_exp_in2_exp_eq_0 && m1stg_dblopa[0];
+
+assign m1stg_denorm_in1= m1stg_denorm_sng_in1 || m1stg_denorm_dbl_in1;
+
+assign m1stg_denorm_in2= m1stg_denorm_sng_in2 || m1stg_denorm_dbl_in2;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Non-denorm multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_norm_sng_in1= (!mul_exp_in1_exp_eq_0) && m1stg_sngopa[0];
+
+assign m1stg_norm_dbl_in1= (!mul_exp_in1_exp_eq_0) && m1stg_dblopa[0];
+
+assign m1stg_norm_sng_in2= (!mul_exp_in2_exp_eq_0) && m1stg_sngopa[0];
+
+assign m1stg_norm_dbl_in2= (!mul_exp_in2_exp_eq_0) && m1stg_dblopa[0];
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Nan multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_snan_sng_in1= (!mul_exp_in1_exp_neq_ffs) && (!mul_frac_in1_54)
+		&& (mul_frac_in1_53_32_neq_0) && m1stg_sngopa[1];
+
+assign m1stg_snan_dbl_in1= (!mul_exp_in1_exp_neq_ffs)
+		&& (!mul_frac_in1_51) && mul_frac_in1_50_0_neq_0
+		&& m1stg_dblopa[1];
+
+assign m1stg_snan_sng_in2= (!mul_exp_in2_exp_neq_ffs) && (!mul_frac_in2_54)
+                && (mul_frac_in2_53_32_neq_0) && m1stg_sngopa[1];
+
+assign m1stg_snan_dbl_in2= (!mul_exp_in2_exp_neq_ffs)
+                && (!mul_frac_in2_51) && mul_frac_in2_50_0_neq_0
+                && m1stg_dblopa[1];
+
+assign m1stg_qnan_sng_in1= (!mul_exp_in1_exp_neq_ffs) && mul_frac_in1_54
+		&& m1stg_sngopa[1];
+
+assign m1stg_qnan_dbl_in1= (!mul_exp_in1_exp_neq_ffs) && mul_frac_in1_51
+		&& m1stg_dblopa[1];
+
+assign m1stg_qnan_sng_in2= (!mul_exp_in2_exp_neq_ffs) && mul_frac_in2_54
+                && m1stg_sngopa[1];
+
+assign m1stg_qnan_dbl_in2= (!mul_exp_in2_exp_neq_ffs) && mul_frac_in2_51
+                && m1stg_dblopa[1];
+
+assign m1stg_snan_in1= m1stg_snan_sng_in1 || m1stg_snan_dbl_in1;
+
+assign m1stg_snan_in2= m1stg_snan_sng_in2 || m1stg_snan_dbl_in2;
+
+assign m1stg_qnan_in1= m1stg_qnan_sng_in1 || m1stg_qnan_dbl_in1;
+ 
+assign m1stg_qnan_in2= m1stg_qnan_sng_in2 || m1stg_qnan_dbl_in2;
+
+dffe_s #(1) i_m2stg_snan_in1 (
+	.din	(m1stg_snan_in1),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_snan_in1),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m2stg_snan_in2 (
+	.din	(m1stg_snan_in2),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_snan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_m2stg_qnan_in1 (
+	.din	(m1stg_qnan_in1),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_qnan_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_m2stg_qnan_in2 (
+	.din	(m1stg_qnan_in2),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_qnan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign m1stg_nan_sng_in1= (!mul_exp_in1_exp_neq_ffs)
+		&& (mul_frac_in1_54 || mul_frac_in1_53_32_neq_0)
+		&& m1stg_sngopa[2];
+
+assign m1stg_nan_dbl_in1= (!mul_exp_in1_exp_neq_ffs)
+		&& (mul_frac_in1_51 || mul_frac_in1_50_0_neq_0)
+		&& m1stg_dblopa[2];
+
+assign m1stg_nan_sng_in2= (!mul_exp_in2_exp_neq_ffs)
+		&& (mul_frac_in2_54 || mul_frac_in2_53_32_neq_0)
+		&& m1stg_sngopa[2];
+
+assign m1stg_nan_dbl_in2= (!mul_exp_in2_exp_neq_ffs)
+		&& (mul_frac_in2_51 || mul_frac_in2_50_0_neq_0)
+		&& m1stg_dblopa[2];
+
+assign m1stg_nan_in1= m1stg_nan_sng_in1 || m1stg_nan_dbl_in1;
+
+assign m1stg_nan_in2= m1stg_nan_sng_in2 || m1stg_nan_dbl_in2;
+
+dffe_s #(1) i_m2stg_nan_in2 (
+	.din	(m1stg_nan_in2),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_nan_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Infinity multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_inf_sng_in1= (!mul_exp_in1_exp_neq_ffs)
+		&& (!mul_frac_in1_54) && (!mul_frac_in1_53_32_neq_0)
+		&& m1stg_sngopa[2];
+
+assign m1stg_inf_dbl_in1= (!mul_exp_in1_exp_neq_ffs)
+		&& (!mul_frac_in1_51) && (!mul_frac_in1_50_0_neq_0)
+		&& m1stg_dblopa[2];
+
+assign m1stg_inf_sng_in2= (!mul_exp_in2_exp_neq_ffs)
+		&& (!mul_frac_in2_54) && (!mul_frac_in2_53_32_neq_0)
+		&& m1stg_sngopa[2];
+
+assign m1stg_inf_dbl_in2= (!mul_exp_in2_exp_neq_ffs)
+		&& (!mul_frac_in2_51) && (!mul_frac_in2_50_0_neq_0)
+		&& m1stg_dblopa[2];
+
+assign m1stg_inf_in1= m1stg_inf_sng_in1 || m1stg_inf_dbl_in1;
+
+assign m1stg_inf_in2= m1stg_inf_sng_in2 || m1stg_inf_dbl_in2;
+
+assign m1stg_inf_in= m1stg_inf_in1 || m1stg_inf_in2;
+
+dffe_s #(1) i_m2stg_inf_in1 (
+	.din	(m1stg_inf_in1),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_inf_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_m2stg_inf_in2 (
+	.din	(m1stg_inf_in2),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_inf_in2),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_m2stg_inf_in (
+	.din	(m1stg_inf_in),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_inf_in),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Infinity/Nan multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_infnan_sng_in1= (!mul_exp_in1_exp_neq_ffs) && m1stg_sngopa[3];
+
+assign m1stg_infnan_dbl_in1= (!mul_exp_in1_exp_neq_ffs) && m1stg_dblopa[3];
+
+assign m1stg_infnan_sng_in2= (!mul_exp_in2_exp_neq_ffs) && m1stg_sngopa[3];
+
+assign m1stg_infnan_dbl_in2= (!mul_exp_in2_exp_neq_ffs) && m1stg_dblopa[3];
+
+assign m1stg_infnan_in1= m1stg_infnan_sng_in1 || m1stg_infnan_dbl_in1;
+
+assign m1stg_infnan_in2= m1stg_infnan_sng_in2 || m1stg_infnan_dbl_in2;
+
+assign m1stg_infnan_in= m1stg_infnan_in1 || m1stg_infnan_in2;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Zero multiply inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_zero_in1= mul_exp_in1_exp_eq_0
+		&& (!mul_frac_in1_53_0_neq_0) && (!mul_frac_in1_54);
+
+assign m1stg_zero_in2= mul_exp_in2_exp_eq_0
+                && (!mul_frac_in2_53_0_neq_0) && (!mul_frac_in2_54);
+
+assign m1stg_zero_in= m1stg_zero_in1 || m1stg_zero_in2;
+
+dffe_s #(1) i_m2stg_zero_in1 (
+	.din	(m1stg_zero_in1),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_zero_in1),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_m2stg_zero_in2 (
+	.din	(m1stg_zero_in2),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_zero_in2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_m2stg_zero_in (
+	.din	(m1stg_zero_in),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m2stg_zero_in),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+ 
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Floating point multiply control pipeline.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply input stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_step= m6stg_stepa && (!m1stg_mul);
+
+assign m1stg_op_in[7:0]= ({8{(m1stg_step && (!reset))}}
+			    & (inq_op[7:0] & {8{inq_mul}}))
+		| ({8{((!m6stg_step) && (!reset))}}
+			    & m1stg_op[7:0]);
+
+dff_s #(8) i_m1stg_op (
+	.din	(m1stg_op_in[7:0]),
+	.clk	(rclk),
+
+	.q	(m1stg_op[7:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign m1stg_mul_in= (m1stg_step && (!reset) && inq_mul)
+		|| ((!m6stg_step) && (!reset) && m1stg_mul);
+
+dff_s #(1) i_m1stg_mul (
+        .din    (m1stg_mul_in),
+	.clk    (rclk),
+ 
+        .q      (m1stg_mul),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m1stg_sngop (
+	.din	(inq_op[0]),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	.q	(m1stg_sngop),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(4) i_m1stg_sngopa (
+	.din	({4{inq_op[0]}}),
+	.en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(m1stg_sngopa[3:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(1) i_m1stg_dblop (
+        .din    (inq_op[1]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m1stg_dblop),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(4) i_m1stg_dblopa (
+	.din	({4{inq_op[1]}}),
+	.en	(m6stg_step),
+	.clk	(rclk),
+
+	 .q	(m1stg_dblopa[3:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign m1stg_dblop_inv_in= (!inq_op[1]);
+
+dffe_s #(1) i_m1stg_dblop_inv (
+        .din	(m1stg_dblop_inv_in),
+        .en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(m1stg_dblop_inv),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(2) i_m1stg_rnd_mode (
+	.din	(inq_rnd_mode[1:0]),
+	.en	(m6stg_step),
+	.clk    (rclk),
+
+        .q      (m1stg_rnd_mode[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m1stg_id (
+	.din	(inq_id[4:0]),
+	.en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m1stg_id[4:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Opcode decode- multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m1stg_fmul= (m1stg_op[7:0]==FMULS) || (m1stg_op[7:0]==FMULD)
+		|| (m1stg_op[7:0]==FSMULD);
+
+assign m1stg_fmul_dbl_dst= (m1stg_op[7:0]==FMULD) || (m1stg_op[7:0]==FSMULD);
+
+assign m1stg_fmuls= (m1stg_op[7:0]==FMULS);
+
+assign m1stg_fmuld= (m1stg_op[7:0]==FMULD);
+
+assign m1stg_fsmuld= (m1stg_op[7:0]==FSMULD);
+
+assign m1stg_opdec[4:0]= {m1stg_fmul,
+			m1stg_fmul_dbl_dst,
+			m1stg_fmuls,
+			m1stg_fmuld,
+			m1stg_fsmuld};
+
+dffre_s #(5) i_m2stg_opdec (
+	.din	(m1stg_opdec[4:0]),
+	.en	(m6stg_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+	.q	(m2stg_opdec[4:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m2stg_rnd_mode (
+        .din    (m1stg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m2stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+ 
+dffe_s #(5) i_m2stg_id (
+        .din    (m1stg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m2stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Opcode pipeline- multiply stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_fmul= m2stg_opdec[4];
+assign m2stg_fmuls= m2stg_opdec[2];
+assign m2stg_fmuld= m2stg_opdec[1];
+assign m2stg_fsmuld= m2stg_opdec[0];
+
+dffre_s #(4) i_m3astg_opdec (
+        .din    (m2stg_opdec[4:1]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m3astg_opdec[4:1]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m3astg_rnd_mode (
+        .din    (m2stg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m3astg_id (
+        .din    (m2stg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply stage 3a.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffre_s #(4) i_m3bstg_opdec (
+        .din    (m3astg_opdec[4:1]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m3bstg_opdec[4:1]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m3bstg_rnd_mode (
+        .din    (m3astg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m3bstg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m3bstg_id (
+        .din    (m3astg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3bstg_id[4:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply stage 3b.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffre_s #(4) i_m3stg_opdec (
+        .din    (m3bstg_opdec[4:1]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m3stg_opdec[4:1]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m3stg_rnd_mode (
+        .din    (m3bstg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m3stg_id (
+        .din    (m3bstg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m3stg_id[4:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m3stg_fmul= m3stg_opdec[4];
+
+dffre_s #(4) i_m4stg_opdec (
+        .din    (m3stg_opdec[4:1]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m4stg_opdec[4:1]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m4stg_rnd_mode (
+        .din    (m3stg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m4stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m4stg_id (
+        .din    (m3stg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m4stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m4stg_fmul= m4stg_opdec[4];
+assign m4stg_fmuld= m4stg_opdec[1];
+
+dffre_s #(4) i_m5stg_opdec (
+        .din    (m4stg_opdec[4:1]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m5stg_opdec[4:1]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(2) i_m5stg_rnd_mode (
+        .din    (m4stg_rnd_mode[1:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m5stg_rnd_mode[1:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(5) i_m5stg_id (
+        .din    (m4stg_id[4:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m5stg_id[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(1) i_m5stg_fmulda (
+	.din	(m4stg_fmuld),
+	.en	(m6stg_step),
+	.rst	(reset),
+	.clk	(rclk),
+
+  	.q	(m5stg_fmulda),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m5stg_fmul= m5stg_opdec[4];
+assign m5stg_fmuls= m5stg_opdec[2];
+assign m5stg_fmuld= m5stg_opdec[1];
+
+assign m6stg_fmul_in= (m6stg_stepa && (!reset)
+			&& m5stg_fmul)
+		|| ((!m6stg_stepa) && (!reset)
+			&& m6stg_fmul);
+
+dffre_s #(3) i_m6stg_opdec (
+        .din    (m5stg_opdec[4:2]),
+        .en     (m6stg_step),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (m6stg_opdec[4:2]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m6stg_id_in[9:0]= ({10{m6stg_stepa}}
+			    & {(m5stg_id[4:2]==3'o7),
+				(m5stg_id[4:2]==3'o6),
+				(m5stg_id[4:2]==3'o5),
+				(m5stg_id[4:2]==3'o4),
+				(m5stg_id[4:2]==3'o3),
+				(m5stg_id[4:2]==3'o2),
+				(m5stg_id[4:2]==3'o1),
+				(m5stg_id[4:2]==3'o0),
+				m5stg_id[1:0]})
+		| ({10{(!m6stg_stepa)}}
+			    & m6stg_id[9:0]);
+
+dffe_s #(10) i_m6stg_id (
+        .din    (m6stg_id_in[9:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m6stg_id[9:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Opcode pipeline- multiply pipeline output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m6stg_fmul= m6stg_opdec[4];
+assign m6stg_fmul_dbl_dst= m6stg_opdec[3];
+assign m6stg_fmuls= m6stg_opdec[2];
+
+assign m6stg_hold= m6stg_fmul && (!mul_dest_rdy);
+assign m6stg_holda= m6stg_fmul && (!mul_dest_rdya);
+
+assign m6stg_step= (!m6stg_hold);
+assign m6stg_stepa= (!m6stg_holda);
+
+// Austin update
+// Power management update
+
+assign mul_pipe_active_in =  // mul pipe is executing a valid instr
+   m1stg_fmul || m2stg_fmul || m3astg_opdec[4] || m3bstg_opdec[4] ||
+   m3stg_fmul || m4stg_fmul || m5stg_fmul      || m6stg_fmul;
+
+dffre_s #(1) i_mul_pipe_active (
+	.din	(mul_pipe_active_in),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (mul_pipe_active),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply sign and exception logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply sign inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m1stg_sign1 (
+        .din    (inq_in1_63),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m1stg_sign1),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m1stg_sign2 (
+        .din    (inq_in2_63),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m1stg_sign2),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//	Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m2stg_sign1 (
+        .din	(m1stg_sign1),
+        .en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(m2stg_sign1),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+ 
+dffe_s #(1) i_m2stg_sign2 (
+        .din	(m1stg_sign2),
+        .en	(m6stg_step),
+        .clk	(rclk),
+ 
+        .q	(m2stg_sign2),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+assign m1stg_of_mask= (!m1stg_infnan_in);
+
+dffe_s #(1) i_m2stg_of_mask (
+        .din    (m1stg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m2stg_of_mask),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_sign= ((m2stg_sign1
+				&& (!m2stg_snan_in2)
+				&& (!(m2stg_qnan_in2 && (!m2stg_snan_in1))))
+			^ (m2stg_sign2
+				&& (!(m2stg_snan_in1 && (!m2stg_snan_in2)))
+				&& (!(m2stg_qnan_in1 && (!m2stg_nan_in2)))))
+		&& (!(m2stg_inf_in && m2stg_zero_in));
+
+dffe_s #(1) i_m3astg_sign (
+        .din    (m2stg_sign),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_sign),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m2stg_nv= m2stg_snan_in1
+		|| m2stg_snan_in2
+		|| (m2stg_zero_in1 && m2stg_inf_in2)
+		|| (m2stg_inf_in1 && m2stg_zero_in2);
+
+dffe_s #(1) i_m3astg_nv (
+        .din    (m2stg_nv),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m3astg_of_mask (
+        .din    (m2stg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 3a.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m3bstg_sign (
+        .din    (m3astg_sign),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3bstg_sign),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m3bstg_nv (
+        .din    (m3astg_nv),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3bstg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m3bstg_of_mask (
+        .din    (m3astg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3bstg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 3b.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m3stg_sign (
+        .din    (m3bstg_sign),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3stg_sign),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m3stg_nv (
+        .din    (m3bstg_nv),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3stg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m3stg_of_mask (
+        .din    (m3bstg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3stg_of_mask),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+dffe_s #(1) i_m4stg_sign (
+        .din    (m3stg_sign),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m4stg_sign),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+ 
+dffe_s #(1) i_m4stg_nv (
+        .din    (m3stg_nv),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m4stg_nv),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+ 
+dffe_s #(1) i_m4stg_of_mask (
+        .din    (m3stg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m4stg_of_mask),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m5stg_sign (
+        .din    (m4stg_sign),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m5stg_sign),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m5stg_nv (
+        .din    (m4stg_nv),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m5stg_nv),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_m5stg_of_mask (
+        .din    (m4stg_of_mask),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m5stg_of_mask),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply sign and exceptions.
+//
+//      Multiply stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_mul_sign_out (
+	.din	(m5stg_sign),
+	.en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (mul_sign_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_mul_nv_out (
+	.din	(m5stg_nv),
+	.en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (mul_nv_out),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m5stg_in_of= ((!m5stg_exp[12])
+                        && m5stg_fmuld
+                        && (m5stg_exp[11] || (&m5stg_exp[10:0]))
+                        && m5stg_of_mask)
+                || ((!m5stg_exp[12])
+                        && m5stg_fmuls
+                        && ((|m5stg_exp[11:8]) || (&m5stg_exp[7:0]))
+                        && m5stg_of_mask);
+
+assign mul_of_out_tmp1_in= ((!m5stg_exp[12])
+                        && m5stg_fmuld
+                        && (&m5stg_exp[10:1])
+                        && m5stg_rndup
+                        && m5stg_of_mask)
+                || ((!m5stg_exp[12])
+                        && m5stg_fmuls
+                        && (&m5stg_exp[7:1])
+                        && m5stg_rndup
+                        && m5stg_of_mask);
+
+dffe_s #(1) i_mul_of_out_tmp1 (
+        .din    (mul_of_out_tmp1_in),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (mul_of_out_tmp1),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(1) i_mul_of_out_tmp2 (
+	.din	(m5stg_in_of),
+	.en	(m6stg_step),
+    	.clk	(rclk),
+
+	.q	(mul_of_out_tmp2),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(1) i_mul_of_out_cout (
+	.din	(m5stg_fracadd_cout),
+	.en	(m6stg_step),
+    	.clk	(rclk),
+
+	.q	(mul_of_out_cout),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign mul_of_out= mul_of_out_tmp2
+		|| (mul_of_out_tmp1 && mul_of_out_cout);
+
+assign mul_uf_out_in= (m5stg_exp[12] || (!(|m5stg_exp[11:0])))
+		&& m5stg_frac_neq_0;
+
+dffe_s #(1) i_mul_uf_out (
+        .din    (mul_uf_out_in),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (mul_uf_out),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign mul_nx_out_in= (m5stg_fmuld && m5stg_frac_dbl_nx)
+		|| (m5stg_fmuls && m5stg_frac_sng_nx);
+
+dffe_s #(1) i_mul_nx_out (
+        .din    (mul_nx_out_in),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (mul_nx_out),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Multiply exception output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Austin update
+// Overflow is always accompanied by inexact.
+// Previously this was handled within the FFU.
+
+// assign mul_exc_out[4:0]= {mul_nv_out, mul_of_out, mul_uf_out, 1'b0, mul_nx_out};
+
+assign mul_exc_out[4:0] =
+  {mul_nv_out,
+   mul_of_out,
+   mul_uf_out,
+   1'b0,
+   (mul_nx_out || mul_of_out)};  // Overflow is always accompanied by inexact
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Multiply pipeline control logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- multiply normalization and special input injection.
+//
+//	Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_frac1_dbl_norm= m1stg_norm_dbl_in1
+		&& ((!(m1stg_infnan_dbl_in1 || m1stg_infnan_dbl_in2))
+			|| (m1stg_snan_dbl_in1 && (!m1stg_snan_dbl_in2))
+			|| (m1stg_qnan_dbl_in1 && (!m1stg_nan_dbl_in2)));
+
+assign m2stg_frac1_dbl_dnrm= m1stg_denorm_dbl_in1
+		&& (!(m1stg_infnan_dbl_in1 || m1stg_infnan_dbl_in2));
+
+assign m2stg_frac1_sng_norm= m1stg_norm_sng_in1
+		&& ((!(m1stg_infnan_sng_in1 || m1stg_infnan_sng_in2))
+                        || (m1stg_snan_sng_in1 && (!m1stg_snan_sng_in2))
+                        || (m1stg_qnan_sng_in1 && (!m1stg_nan_sng_in2)));
+
+assign m2stg_frac1_sng_dnrm= m1stg_denorm_sng_in1
+		&& (!(m1stg_infnan_sng_in1 || m1stg_infnan_sng_in2));
+
+assign m2stg_frac1_inf= (m1stg_inf_in && (!m1stg_nan_in1) && (!m1stg_nan_in2))
+		|| m1stg_snan_in2
+		|| (m1stg_qnan_in2 && (!m1stg_snan_in1));
+
+assign m2stg_frac2_dbl_norm= m1stg_norm_dbl_in2
+		&& ((!(m1stg_infnan_dbl_in1 || m1stg_infnan_dbl_in2))
+			|| m1stg_snan_dbl_in2
+			|| (m1stg_qnan_dbl_in2 && (!m1stg_snan_dbl_in1)));
+
+assign m2stg_frac2_dbl_dnrm= m1stg_denorm_dbl_in2
+		&& (!(m1stg_infnan_dbl_in1 || m1stg_infnan_dbl_in2));
+
+assign m2stg_frac2_sng_norm= m1stg_norm_sng_in2
+		&& ((!(m1stg_infnan_sng_in1 || m1stg_infnan_sng_in2))
+                        || m1stg_snan_sng_in2
+                        || (m1stg_qnan_sng_in2 && (!m1stg_snan_sng_in1)));
+
+assign m2stg_frac2_sng_dnrm= m1stg_denorm_sng_in2
+		&& (!(m1stg_infnan_sng_in1 || m1stg_infnan_sng_in2));
+
+assign m2stg_frac2_inf= (m1stg_inf_in && (!m1stg_nan_in1) && (!m1stg_nan_in2))
+		|| (m1stg_snan_in1 && (!m1stg_snan_in2))
+		|| (m1stg_qnan_in1 && (!m1stg_nan_in2));
+
+assign m1stg_inf_zero_in= (m1stg_inf_in1 && m1stg_zero_in2)
+		|| (m1stg_zero_in1 && m1stg_inf_in2);
+
+assign m1stg_inf_zero_in_dbl= ((m1stg_inf_in1 && m1stg_zero_in2)
+			|| (m1stg_zero_in1 && m1stg_inf_in2))
+		&& m1stg_fmul_dbl_dst;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select lines and control logic- multiply leading 0 counts.
+//
+//      Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_ld0_1_in[5:0]= ({6{(m1stg_denorm_in1 && (!m1stg_infnan_in))}}
+		& m1stg_ld0_1[5:0]);
+
+dffe_s #(6) i_m2stg_ld0_1 (
+	.din	(m2stg_ld0_1_in[5:0]),
+	.en	(m6stg_step),
+	.clk    (rclk),
+
+        .q      (m2stg_ld0_1[5:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m2stg_ld0_2_in[5:0]= ({6{(m1stg_denorm_in2 && (!m1stg_infnan_in))}}
+		& m1stg_ld0_2[5:0]);
+
+dffe_s #(6) i_m2stg_ld0_2 (
+        .din    (m2stg_ld0_2_in[5:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m2stg_ld0_2[5:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines- multiply exponent adder.
+//
+//      Multiply stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_exp_expadd= (!m1stg_infnan_in) && (!m1stg_zero_in);
+
+assign m2stg_exp_0bff= m1stg_fmuld && m1stg_infnan_in;
+
+assign m2stg_exp_017f= m1stg_fmuls && m1stg_infnan_in;
+
+assign m2stg_exp_04ff= m1stg_fsmuld && m1stg_infnan_in;
+ 
+assign m2stg_exp_zero= m1stg_zero_in && (!m1stg_infnan_in);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Total the leading 0's.
+//
+//	Multiply stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m2stg_ld0[6:0]= {1'b0, m2stg_ld0_1[5:0]}
+			+ {1'b0, m2stg_ld0_2[5:0]};
+
+assign m2stg_ld0_inv[6:0]= (~m2stg_ld0[6:0]);
+
+dffe_s #(7) i_m3astg_ld0_inv (
+	.din	(m2stg_ld0_inv[6:0]),
+	.en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3astg_ld0_inv[6:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Leading 0's.
+//
+//      Multiply stage 3a.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(7) i_m3bstg_ld0_inv (
+        .din    (m3astg_ld0_inv[6:0]),
+        .en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m3bstg_ld0_inv[6:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Post-normalization/denormalization shift count and direction.
+//
+//	Multiply stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(1) i_m4stg_expadd_eq_0 (
+        .din    (m3stg_expadd_eq_0),
+        .en     (m6stg_step),
+        .clk    (rclk),
+ 
+        .q      (m4stg_expadd_eq_0),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m3stg_exp_lte_0= (!(|m3stg_exp[11:0])) || m3stg_exp[12];
+
+assign m4stg_right_shift_in= (!m3stg_expadd_lte_0_inv) && m3stg_exp_lte_0;
+
+dffe_s #(1) i_m4stg_right_shift (
+	.din	(m4stg_right_shift_in),
+	.en     (m6stg_step),
+        .clk    (rclk),
+
+        .q      (m4stg_right_shift),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign m3stg_exp_minus1[5:0]= m3stg_exp[5:0]
+			+ 6'h3f;
+
+assign m3stg_exp_inv_plus2[5:0]= (~m3stg_exp[5:0])
+			+ 6'h02;
+
+assign m3stg_exp_lt_neg57= ((!(&m3stg_exp[11:6]))
+			|| (!(|m3stg_exp[5:3])))
+		&& m3stg_exp[12];
+
+assign m4stg_sh_cnt_in[5:0]= ({6{((!m3stg_expadd_lte_0_inv)
+				&& (!m3stg_exp_lte_0))}}
+			    & m3stg_exp_minus1[5:0])
+		| ({6{((!m3stg_expadd_lte_0_inv) && m3stg_exp_lte_0
+				&& m3stg_exp_lt_neg57)}}
+			    & 6'h39)
+		| ({6{((!m3stg_expadd_lte_0_inv) && m3stg_exp_lte_0
+				&& (!m3stg_exp_lt_neg57))}}
+			    & m3stg_exp_inv_plus2[5:0])
+		| ({6{m3stg_expadd_lte_0_inv}}
+			    & (~m3stg_ld0_inv[5:0]));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- multiply shifts for
+//              post-normalization/denormalization.
+//
+//      Multiply stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m4stg_left_shift_step= (!m4stg_right_shift) && m6stg_step;
+
+assign m4stg_right_shift_step= m4stg_right_shift && m6stg_step;
+
+// Austin update
+// uarch timing fix
+// Endpoint: fpu_mul_exp_dp/i_m5stg_exp_pre2_10
+
+// assign m4stg_inc_exp= (((!(|m4stg_exp[12:0])) && (!m4stg_right_shift)
+//				&& m4stg_shl_54)
+//			|| (m4stg_expadd_eq_0 && m4stg_right_shift
+//				&& m4stg_frac_105)
+//			|| ((!m4stg_right_shift) && m4stg_shl_55))
+//	 	&& m6stg_step;
+//
+// assign m4stg_inc_exp_inv= (!m4stg_inc_exp) && m6stg_step;
+
+assign m4stg_inc_exp_54  = (!(|m4stg_exp[12:0])) && (!m4stg_right_shift);
+assign m4stg_inc_exp_55  = !m4stg_right_shift;
+assign m4stg_inc_exp_105 = m4stg_expadd_eq_0 && m4stg_right_shift && m4stg_frac_105;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Select lines and control logic- multiply rounding.
+//
+//      Multiply stage 5.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign m5stg_rndup= ((((m5stg_rnd_mode[1:0]==2'b10) && (!m5stg_sign)
+					&& (m5stg_frac[2:0]!=3'b0))
+				|| ((m5stg_rnd_mode[1:0]==2'b11) && m5stg_sign
+					&& (m5stg_frac[2:0]!=3'b0))
+				|| ((m5stg_rnd_mode[1:0]==2'b00)
+					&& m5stg_frac[2]
+					&& ((m5stg_frac[1:0]!=2'b0)
+						|| m5stg_frac[3])))
+			&& m5stg_fmuld)
+		|| ((((m5stg_rnd_mode[1:0]==2'b10) && (!m5stg_sign)
+					&& (m5stg_frac[31:0]!=32'b0))
+				|| ((m5stg_rnd_mode[1:0]==2'b11) && m5stg_sign
+                                        && (m5stg_frac[31:0]!=32'b0))
+				|| ((m5stg_rnd_mode[1:0]==2'b00)
+                                        && m5stg_frac[31]
+					&& ((m5stg_frac[30:0]!=31'b0)
+						|| m5stg_frac[32])))
+			&& m5stg_fmuls);
+
+assign m5stg_to_0= (m5stg_rnd_mode[1:0]==2'b01)
+                || ((m5stg_rnd_mode[1:0]==2'b10) && m5stg_sign)
+                || ((m5stg_rnd_mode[1:0]==2'b11) && (!m5stg_sign));
+
+assign m5stg_to_0_inv= (!m5stg_to_0);
+
+assign mul_frac_out_fracadd= m5stg_rndup && (!m5stg_in_of);
+
+assign mul_frac_out_frac= (!m5stg_rndup) && (!m5stg_in_of);
+
+assign mul_exp_out_exp_plus1= m5stg_rndup && (!m5stg_in_of);
+
+assign mul_exp_out_exp= (!m5stg_rndup) && (!m5stg_in_of);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in2_gt_in1_3to1.v
===================================================================
--- /trunk/T1-FPU/fpu_in2_gt_in1_3to1.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in2_gt_in1_3to1.v	(revision 6)
@@ -0,0 +1,66 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in2_gt_in1_3to1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Reduce three fpu_in2_gt_in1_*b results to one set of results.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in2_gt_in1_3to1 (
+	din2_neq_din1_hi,
+	din2_gt_din1_hi,
+	din2_neq_din1_mid,
+	din2_gt_din1_mid,
+	din2_neq_din1_lo,
+	din2_gt_din1_lo,
+
+	din2_neq_din1,
+	din2_gt_din1
+);
+
+
+input		din2_neq_din1_hi;	// input 2 != input 1- high 3 bits
+input		din2_gt_din1_hi;	// input 2 > input 1- high 3 bits
+input		din2_neq_din1_mid;	// input 2 != input 1- middle 3 bits
+input		din2_gt_din1_mid;	// input 2 > input 1- middle 3 bits
+input		din2_neq_din1_lo;	// input 2 != input 1- low 3 bits
+input		din2_gt_din1_lo;	// input 2 > input 1- low 3 bits
+
+output		din2_neq_din1;		// input 2 doesn't equal input 1
+output		din2_gt_din1;		// input 2 is greater than input 1
+
+
+wire		din2_neq_din1;
+wire		din2_gt_din1;
+
+
+assign din2_neq_din1= din2_neq_din1_hi || din2_neq_din1_mid || din2_neq_din1_lo;
+
+assign din2_gt_din1= (din2_neq_din1_hi && din2_gt_din1_hi)
+		|| ((!din2_neq_din1_hi) && din2_neq_din1_mid
+			&& din2_gt_din1_mid)
+		|| ((!din2_neq_din1_hi) && (!din2_neq_din1_mid)
+			&& din2_gt_din1_lo);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_cnt_lead0_64b.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_64b.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_64b.v	(revision 6)
@@ -0,0 +1,515 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_64b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	64 bit lead 0 counter.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_64b (
+        din,
+
+        lead0
+);
+
+
+input [63:0]    din;                    // data in- count its leading 0's
+
+output [5:0]    lead0;                  // number of leading 0's in data in
+
+
+wire		din_63_60_eq_0;
+wire		din_63_62_eq_0;
+wire		lead0_63_60_0;
+wire		din_59_56_eq_0;
+wire		din_59_58_eq_0;
+wire		lead0_59_56_0;
+wire		din_55_52_eq_0;
+wire		din_55_54_eq_0;
+wire		lead0_55_52_0;
+wire		din_51_48_eq_0;
+wire		din_51_50_eq_0;
+wire		lead0_51_48_0;
+wire		din_47_44_eq_0;
+wire		din_47_46_eq_0;
+wire		lead0_47_44_0;
+wire		din_43_40_eq_0;
+wire		din_43_42_eq_0;
+wire		lead0_43_40_0;
+wire		din_39_36_eq_0;
+wire		din_39_38_eq_0;
+wire		lead0_39_36_0;
+wire		din_35_32_eq_0;
+wire		din_35_34_eq_0;
+wire		lead0_35_32_0;
+wire		din_31_28_eq_0;
+wire		din_31_30_eq_0;
+wire		lead0_31_28_0;
+wire		din_27_24_eq_0;
+wire		din_27_26_eq_0;
+wire		lead0_27_24_0;
+wire		din_23_20_eq_0;
+wire		din_23_22_eq_0;
+wire		lead0_23_20_0;
+wire		din_19_16_eq_0;
+wire		din_19_18_eq_0;
+wire		lead0_19_16_0;
+wire		din_15_12_eq_0;
+wire		din_15_14_eq_0;
+wire		lead0_15_12_0;
+wire		din_11_8_eq_0;
+wire		din_11_10_eq_0;
+wire		lead0_11_8_0;
+wire		din_7_4_eq_0;
+wire		din_7_6_eq_0;
+wire		lead0_7_4_0;
+wire		din_3_0_eq_0;
+wire		din_3_2_eq_0;
+wire		lead0_3_0_0;
+wire		din_63_56_eq_0;
+wire		lead0_63_56_1;
+wire		lead0_63_56_0;
+wire		din_55_48_eq_0;
+wire		lead0_55_48_1;
+wire		lead0_55_48_0;
+wire		din_47_40_eq_0;
+wire		lead0_47_40_1;
+wire		lead0_47_40_0;
+wire		din_39_32_eq_0;
+wire		lead0_39_32_1;
+wire		lead0_39_32_0;
+wire		din_31_24_eq_0;
+wire		lead0_31_24_1;
+wire		lead0_31_24_0;
+wire		din_23_16_eq_0;
+wire		lead0_23_16_1;
+wire		lead0_23_16_0;
+wire		din_15_8_eq_0;
+wire		lead0_15_8_1;
+wire		lead0_15_8_0;
+wire		din_7_0_eq_0;
+wire		lead0_7_0_1;
+wire		lead0_7_0_0;
+wire		din_63_48_eq_0;
+wire		lead0_63_48_2;
+wire		lead0_63_48_1;
+wire		lead0_63_48_0;
+wire		din_47_32_eq_0;
+wire		lead0_47_32_2;
+wire		lead0_47_32_1;
+wire		lead0_47_32_0;
+wire		din_31_16_eq_0;
+wire		lead0_31_16_2;
+wire		lead0_31_16_1;
+wire		lead0_31_16_0;
+wire		din_15_0_eq_0;
+wire		lead0_15_0_2;
+wire		lead0_15_0_1;
+wire		lead0_15_0_0;
+wire		din_63_32_eq_0;
+wire		lead0_63_32_3;
+wire		lead0_63_32_2;
+wire		lead0_63_32_1;
+wire		din_31_0_eq_0;
+wire		lead0_31_0_3;
+wire		lead0_31_0_2;
+wire		lead0_31_0_1;
+wire		lead0_31_0_0;
+wire		lead0_6;
+wire		lead0_5;
+wire		lead0_4;
+wire		lead0_3;
+wire		lead0_2;
+wire		lead0_1;
+wire		lead0_0;
+wire [5:0]	lead0;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Instantiations of lead 0 building blocks.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_63_60 (
+	.din			(din[63:60]),
+
+	.din_3_0_eq_0		(din_63_60_eq_0),
+	.din_3_2_eq_0		(din_63_62_eq_0),
+	.lead0_4b_0		(lead0_63_60_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_59_56 (
+        .din                    (din[59:56]),
+
+        .din_3_0_eq_0           (din_59_56_eq_0),
+        .din_3_2_eq_0           (din_59_58_eq_0),
+        .lead0_4b_0             (lead0_59_56_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_55_52 (
+        .din                    (din[55:52]),
+
+        .din_3_0_eq_0           (din_55_52_eq_0),
+        .din_3_2_eq_0           (din_55_54_eq_0),
+        .lead0_4b_0             (lead0_55_52_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_51_48 (
+        .din                    (din[51:48]),
+
+        .din_3_0_eq_0           (din_51_48_eq_0),
+        .din_3_2_eq_0           (din_51_50_eq_0),
+        .lead0_4b_0             (lead0_51_48_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_47_44 (
+        .din                    (din[47:44]),
+
+        .din_3_0_eq_0           (din_47_44_eq_0),
+        .din_3_2_eq_0           (din_47_46_eq_0),
+        .lead0_4b_0             (lead0_47_44_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_43_40 (
+        .din                    (din[43:40]),
+
+        .din_3_0_eq_0           (din_43_40_eq_0),
+        .din_3_2_eq_0           (din_43_42_eq_0),
+        .lead0_4b_0             (lead0_43_40_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_39_36 (
+        .din                    (din[39:36]),
+
+        .din_3_0_eq_0           (din_39_36_eq_0),
+        .din_3_2_eq_0           (din_39_38_eq_0),
+        .lead0_4b_0             (lead0_39_36_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_35_32 (
+        .din                    (din[35:32]),
+
+        .din_3_0_eq_0           (din_35_32_eq_0),
+        .din_3_2_eq_0           (din_35_34_eq_0),
+        .lead0_4b_0             (lead0_35_32_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_31_28 (
+        .din                    (din[31:28]),
+
+        .din_3_0_eq_0           (din_31_28_eq_0),
+        .din_3_2_eq_0           (din_31_30_eq_0),
+        .lead0_4b_0             (lead0_31_28_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_27_24 (
+        .din                    (din[27:24]),
+
+        .din_3_0_eq_0           (din_27_24_eq_0),
+        .din_3_2_eq_0           (din_27_26_eq_0),
+        .lead0_4b_0             (lead0_27_24_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_23_20 (
+        .din                    (din[23:20]),
+
+        .din_3_0_eq_0           (din_23_20_eq_0),
+        .din_3_2_eq_0           (din_23_22_eq_0),
+        .lead0_4b_0             (lead0_23_20_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_19_16 (
+        .din                    (din[19:16]),
+
+        .din_3_0_eq_0           (din_19_16_eq_0),
+        .din_3_2_eq_0           (din_19_18_eq_0),
+        .lead0_4b_0             (lead0_19_16_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_15_12 (
+        .din                    (din[15:12]),
+
+        .din_3_0_eq_0           (din_15_12_eq_0),
+        .din_3_2_eq_0           (din_15_14_eq_0),
+        .lead0_4b_0             (lead0_15_12_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_11_8 (
+        .din                    (din[11:8]),
+
+        .din_3_0_eq_0           (din_11_8_eq_0),
+        .din_3_2_eq_0           (din_11_10_eq_0),
+        .lead0_4b_0             (lead0_11_8_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_7_4 (
+        .din                    (din[7:4]),
+
+        .din_3_0_eq_0           (din_7_4_eq_0),
+        .din_3_2_eq_0           (din_7_6_eq_0),
+        .lead0_4b_0             (lead0_7_4_0)
+);
+
+fpu_cnt_lead0_lvl1 i_fpu_cnt_lead0_lvl1_3_0 (
+        .din                    (din[3:0]),
+
+        .din_3_0_eq_0           (din_3_0_eq_0),
+        .din_3_2_eq_0           (din_3_2_eq_0),
+        .lead0_4b_0             (lead0_3_0_0)
+);
+
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_63_56 (
+	.din_7_4_eq_0		(din_63_60_eq_0),
+	.din_7_6_eq_0		(din_63_62_eq_0),
+	.lead0_4b_0_hi		(lead0_63_60_0),
+	.din_3_0_eq_0		(din_59_56_eq_0),
+	.din_3_2_eq_0		(din_59_58_eq_0),
+	.lead0_4b_0_lo		(lead0_59_56_0),
+
+	.din_7_0_eq_0		(din_63_56_eq_0),
+	.lead0_8b_1		(lead0_63_56_1),
+	.lead0_8b_0		(lead0_63_56_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_55_48 (
+        .din_7_4_eq_0           (din_55_52_eq_0),
+        .din_7_6_eq_0           (din_55_54_eq_0),
+        .lead0_4b_0_hi          (lead0_55_52_0),
+        .din_3_0_eq_0           (din_51_48_eq_0),
+        .din_3_2_eq_0           (din_51_50_eq_0),
+        .lead0_4b_0_lo          (lead0_51_48_0),
+
+        .din_7_0_eq_0           (din_55_48_eq_0),
+        .lead0_8b_1             (lead0_55_48_1),
+        .lead0_8b_0             (lead0_55_48_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_47_40 (
+        .din_7_4_eq_0           (din_47_44_eq_0),
+        .din_7_6_eq_0           (din_47_46_eq_0),
+        .lead0_4b_0_hi          (lead0_47_44_0),
+        .din_3_0_eq_0           (din_43_40_eq_0),
+        .din_3_2_eq_0           (din_43_42_eq_0),
+        .lead0_4b_0_lo          (lead0_43_40_0),
+
+        .din_7_0_eq_0           (din_47_40_eq_0),
+        .lead0_8b_1             (lead0_47_40_1),
+        .lead0_8b_0             (lead0_47_40_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_39_32 (
+        .din_7_4_eq_0           (din_39_36_eq_0),
+        .din_7_6_eq_0           (din_39_38_eq_0),
+        .lead0_4b_0_hi          (lead0_39_36_0),
+        .din_3_0_eq_0           (din_35_32_eq_0),
+        .din_3_2_eq_0           (din_35_34_eq_0),
+        .lead0_4b_0_lo          (lead0_35_32_0),
+
+        .din_7_0_eq_0           (din_39_32_eq_0),
+        .lead0_8b_1             (lead0_39_32_1),
+        .lead0_8b_0             (lead0_39_32_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_31_24 (
+        .din_7_4_eq_0           (din_31_28_eq_0),
+        .din_7_6_eq_0           (din_31_30_eq_0),
+        .lead0_4b_0_hi          (lead0_31_28_0),
+        .din_3_0_eq_0           (din_27_24_eq_0),
+        .din_3_2_eq_0           (din_27_26_eq_0),
+        .lead0_4b_0_lo          (lead0_27_24_0),
+
+        .din_7_0_eq_0           (din_31_24_eq_0),
+        .lead0_8b_1             (lead0_31_24_1),
+        .lead0_8b_0             (lead0_31_24_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_23_16 (
+        .din_7_4_eq_0           (din_23_20_eq_0),
+        .din_7_6_eq_0           (din_23_22_eq_0),
+        .lead0_4b_0_hi          (lead0_23_20_0),
+        .din_3_0_eq_0           (din_19_16_eq_0),
+        .din_3_2_eq_0           (din_19_18_eq_0),
+        .lead0_4b_0_lo          (lead0_19_16_0),
+
+        .din_7_0_eq_0           (din_23_16_eq_0),
+        .lead0_8b_1             (lead0_23_16_1),
+        .lead0_8b_0             (lead0_23_16_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_15_8 (
+        .din_7_4_eq_0           (din_15_12_eq_0),
+        .din_7_6_eq_0           (din_15_14_eq_0),
+        .lead0_4b_0_hi          (lead0_15_12_0),
+        .din_3_0_eq_0           (din_11_8_eq_0),
+        .din_3_2_eq_0           (din_11_10_eq_0),
+        .lead0_4b_0_lo          (lead0_11_8_0),
+
+        .din_7_0_eq_0           (din_15_8_eq_0),
+        .lead0_8b_1             (lead0_15_8_1),
+        .lead0_8b_0             (lead0_15_8_0)
+);
+
+fpu_cnt_lead0_lvl2 i_fpu_cnt_lead0_lvl2_7_0 (
+        .din_7_4_eq_0           (din_7_4_eq_0),
+        .din_7_6_eq_0           (din_7_6_eq_0),
+        .lead0_4b_0_hi          (lead0_7_4_0),
+        .din_3_0_eq_0           (din_3_0_eq_0),
+        .din_3_2_eq_0           (din_3_2_eq_0),
+        .lead0_4b_0_lo          (lead0_3_0_0),
+
+        .din_7_0_eq_0           (din_7_0_eq_0),
+        .lead0_8b_1             (lead0_7_0_1),
+        .lead0_8b_0             (lead0_7_0_0)
+);
+
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_63_48 (
+	.din_15_8_eq_0		(din_63_56_eq_0),
+	.din_15_12_eq_0		(din_63_60_eq_0),
+	.lead0_8b_1_hi		(lead0_63_56_1),
+	.lead0_8b_0_hi		(lead0_63_56_0),
+	.din_7_0_eq_0		(din_55_48_eq_0),
+	.din_7_4_eq_0		(din_55_52_eq_0),
+	.lead0_8b_1_lo		(lead0_55_48_1),
+	.lead0_8b_0_lo		(lead0_55_48_0),
+
+	.din_15_0_eq_0		(din_63_48_eq_0),
+	.lead0_16b_2		(lead0_63_48_2),
+	.lead0_16b_1		(lead0_63_48_1),
+	.lead0_16b_0		(lead0_63_48_0)
+);
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_47_32 (
+        .din_15_8_eq_0          (din_47_40_eq_0),
+        .din_15_12_eq_0         (din_47_44_eq_0),
+        .lead0_8b_1_hi          (lead0_47_40_1),
+        .lead0_8b_0_hi          (lead0_47_40_0),
+        .din_7_0_eq_0           (din_39_32_eq_0),
+        .din_7_4_eq_0           (din_39_36_eq_0),
+        .lead0_8b_1_lo          (lead0_39_32_1),
+        .lead0_8b_0_lo          (lead0_39_32_0),
+
+        .din_15_0_eq_0          (din_47_32_eq_0),
+        .lead0_16b_2            (lead0_47_32_2),
+        .lead0_16b_1            (lead0_47_32_1),
+        .lead0_16b_0            (lead0_47_32_0)
+);
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_31_16 (
+        .din_15_8_eq_0          (din_31_24_eq_0),
+        .din_15_12_eq_0         (din_31_28_eq_0),
+        .lead0_8b_1_hi          (lead0_31_24_1),
+        .lead0_8b_0_hi          (lead0_31_24_0),
+        .din_7_0_eq_0           (din_23_16_eq_0),
+        .din_7_4_eq_0           (din_23_20_eq_0),
+        .lead0_8b_1_lo          (lead0_23_16_1),
+        .lead0_8b_0_lo          (lead0_23_16_0),
+
+        .din_15_0_eq_0          (din_31_16_eq_0),
+        .lead0_16b_2            (lead0_31_16_2),
+        .lead0_16b_1            (lead0_31_16_1),
+        .lead0_16b_0            (lead0_31_16_0)
+);
+
+fpu_cnt_lead0_lvl3 i_fpu_cnt_lead0_lvl3_15_0 (
+        .din_15_8_eq_0          (din_15_8_eq_0),
+        .din_15_12_eq_0         (din_15_12_eq_0),
+        .lead0_8b_1_hi          (lead0_15_8_1),
+        .lead0_8b_0_hi          (lead0_15_8_0),
+        .din_7_0_eq_0           (din_7_0_eq_0),
+        .din_7_4_eq_0           (din_7_4_eq_0),
+        .lead0_8b_1_lo          (lead0_7_0_1),
+        .lead0_8b_0_lo          (lead0_7_0_0),
+
+        .din_15_0_eq_0          (din_15_0_eq_0),
+        .lead0_16b_2            (lead0_15_0_2),
+        .lead0_16b_1            (lead0_15_0_1),
+        .lead0_16b_0            (lead0_15_0_0)
+);
+
+
+fpu_cnt_lead0_lvl4 i_fpu_cnt_lead0_lvl4_63_32 (
+	.din_31_16_eq_0		(din_63_48_eq_0),
+	.din_31_24_eq_0		(din_63_56_eq_0),
+	.lead0_16b_2_hi		(lead0_63_48_2),
+	.lead0_16b_1_hi		(lead0_63_48_1),
+	.lead0_16b_0_hi		(lead0_63_48_0),
+	.din_15_0_eq_0		(din_47_32_eq_0),
+	.din_15_8_eq_0		(din_47_40_eq_0),
+	.lead0_16b_2_lo		(lead0_47_32_2),
+	.lead0_16b_1_lo		(lead0_47_32_1),
+	.lead0_16b_0_lo		(lead0_47_32_0),
+
+	.din_31_0_eq_0		(din_63_32_eq_0),
+	.lead0_32b_3		(lead0_63_32_3),
+	.lead0_32b_2		(lead0_63_32_2),
+	.lead0_32b_1		(lead0_63_32_1),
+	.lead0_32b_0		(lead0_63_32_0)
+);
+
+fpu_cnt_lead0_lvl4 i_fpu_cnt_lead0_lvl4_31_0 (
+        .din_31_16_eq_0         (din_31_16_eq_0),
+        .din_31_24_eq_0         (din_31_24_eq_0),
+        .lead0_16b_2_hi         (lead0_31_16_2),
+        .lead0_16b_1_hi         (lead0_31_16_1),
+        .lead0_16b_0_hi         (lead0_31_16_0),
+        .din_15_0_eq_0          (din_15_0_eq_0),
+        .din_15_8_eq_0          (din_15_8_eq_0),
+        .lead0_16b_2_lo         (lead0_15_0_2),
+        .lead0_16b_1_lo         (lead0_15_0_1),
+        .lead0_16b_0_lo         (lead0_15_0_0),
+
+        .din_31_0_eq_0          (din_31_0_eq_0),
+        .lead0_32b_3            (lead0_31_0_3),
+        .lead0_32b_2            (lead0_31_0_2),
+        .lead0_32b_1            (lead0_31_0_1),
+        .lead0_32b_0            (lead0_31_0_0)
+);
+
+
+assign lead0_6= din_63_32_eq_0 && din_31_0_eq_0;
+
+assign lead0_5= (!lead0_6) && din_63_32_eq_0;
+
+assign lead0_4= ((!din_63_32_eq_0) && din_63_48_eq_0)
+		|| (din_63_32_eq_0 && din_31_16_eq_0 && (!lead0_6));
+
+assign lead0_3= ((!din_63_32_eq_0) && lead0_63_32_3)
+		|| (din_63_32_eq_0 && lead0_31_0_3 && (!lead0_6));
+
+assign lead0_2= ((!din_63_32_eq_0) && lead0_63_32_2)
+		|| (din_63_32_eq_0 && lead0_31_0_2 && (!lead0_6));
+ 
+assign lead0_1= ((!din_63_32_eq_0) && lead0_63_32_1)
+		|| (din_63_32_eq_0 && lead0_31_0_1 && (!lead0_6));
+ 
+assign lead0_0= ((!din_63_32_eq_0) && lead0_63_32_0)
+		|| (din_63_32_eq_0 && lead0_31_0_0 && (!lead0_6));
+
+assign lead0[5:0]= {lead0_5, lead0_4, lead0_3, lead0_2, lead0_1,
+		lead0_0};
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in.v
===================================================================
--- /trunk/T1-FPU/fpu_in.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in.v	(revision 6)
@@ -0,0 +1,310 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU request input.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in (
+	pcx_fpio_data_rdy_px2,
+	pcx_fpio_data_px2,
+	a1stg_step,
+	m1stg_step,
+	d1stg_step,
+	add_pipe_active,
+	mul_pipe_active,
+	div_pipe_active,
+	inq_dout,
+	sehold,
+	arst_l,
+	grst_l,
+	rclk,
+
+	fadd_clken_l,
+	fmul_clken_l,
+	fdiv_clken_l,
+	
+	inq_add,
+	inq_mul,
+	inq_div,
+	inq_id,
+	inq_rnd_mode,
+	inq_fcc,
+	inq_op,
+	inq_in1_exp_neq_ffs,
+	inq_in1_exp_eq_0,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1,
+	inq_in2_exp_neq_ffs,
+	inq_in2_exp_eq_0,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2,
+
+	fp_id_in,
+	fp_rnd_mode_in,
+	fp_fcc_in,
+	fp_op_in,
+	fp_src1_in,
+	fp_src2_in,
+	inq_rdaddr,
+	inq_wraddr,
+	inq_read_en,
+	inq_we,
+
+	se,
+	si,
+	so
+);
+
+
+input		pcx_fpio_data_rdy_px2;	// FPU request ready from PCX
+input [123:0]	pcx_fpio_data_px2;	// FPU request data from PCX
+input		a1stg_step;		// add pipe load
+input		m1stg_step;		// multiply pipe load
+input		d1stg_step;		// divide pipe load
+input 		add_pipe_active;        // add pipe is executing a valid instr
+input 		mul_pipe_active;        // mul pipe is executing a valid instr
+input 		div_pipe_active;        // div pipe is executing a valid instr
+input [154:0] inq_dout; // data read out from input Q SRAM
+input sehold; // macrotest hold for sram output mux in fpu_in_dp
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;			// global clock
+
+output		fadd_clken_l;		// add      pipe clk enable - asserted low
+output		fmul_clken_l;		// multiply pipe clk enable - asserted low
+output		fdiv_clken_l;		// divide   pipe clk enable - asserted low
+
+output		inq_add;		// add pipe request
+output		inq_mul;		// multiply pipe request
+output		inq_div;		// divide pipe request
+output [4:0]	inq_id;			// request ID to the operation pipes
+output [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+output [1:0]	inq_fcc;		// request cc ID to op pipes
+output [7:0]	inq_op;			// request opcode to op pipes
+output		inq_in1_exp_neq_ffs;	// request operand 1 exp!=ff's
+output		inq_in1_exp_eq_0;	// request operand 1 exp==0
+output		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+output		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+output		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+output [63:0]	inq_in1;		// request operand 1 to op pipes
+output		inq_in2_exp_neq_ffs;	// request operand 2 exp!=ff's
+output		inq_in2_exp_eq_0;	// request operand 2 exp==0
+output		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+output		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+output		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+output [63:0]	inq_in2;		// request operand 2 to op pipes
+
+// 6/20/03: New outputs to drive fpu-level i_fpu_inq_sram inputs 
+output [4:0] fp_id_in; // id to be written into inq_sram
+output [1:0] fp_rnd_mode_in; // rnd_mode to be written into inq_sram
+output [1:0] fp_fcc_in; // fcc to be written into inq_sram
+output [7:0] fp_op_in; // op field to be written into inq_sram
+output [68:0] fp_src1_in; // operand1 and its pre-computed bits portion
+output [68:0] fp_src2_in; // operand2, includes pre-computed bits
+output [3:0] inq_rdaddr; // read address for inq_sram
+output [3:0] inq_wraddr; // write address for inq_sram
+output inq_read_en; // read enable for inq_sram
+output inq_we; // write enable for inq_sram
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+// Assertions
+//
+// PCX/FPU Protocol Assumptions:
+// -----------------------------
+// 
+// (1) If a split transaction occurs (fpu packet type A --> N stall cycles -->
+// fpu packet type B), the next valid packet after the N stall cycles will always
+// be "fpu packet type B"
+//
+// not0in state_transition -var {pcx_fpio_data_rdy_px2, (pcx_fpio_data_px2[123] & (pcx_fpio_data_px2[122:118]==5'h0a)), (pcx_fpio_data_px2[122:118]==5'h0b)} -val {1'b1, 1'b1, 1'b0} -next {1'b1, 1'b0, 1'b1} {1'b0, 1'b0, 1'b0} {1'b0, 1'b0, 1'b1} {1'b0, 1'b1, 1'b0} -match_by_cycle -message "PCX/FPU protocol violation"
+// 
+// (3) Crossbar always provides a two beat fpu transfer (packet types A and B).
+// Single source instructions produce an invalid transfer on the second beat
+// (packet type B).
+//
+// not0in custom -fire (pcx_fpio_data_rdy_px2 & pcx_fpio_data_px2[123] & (pcx_fpio_data_px2[122:118]==5'h0b) & pcx_fpio_data_px2[79]) -message "FPU given valid PCX packet B for single src fpop"
+// 
+// (4) For single precision operands, the unused 32-bit region of the 64-bit
+// source is forced to zero by the FFU. The 32-bits of single precision data is
+// always contained in the upper 32-bits of the 64-bit source.
+//
+// not0in custom -fire (pcx_fpio_data_rdy_px2 & pcx_fpio_data_px2[123] & (pcx_fpio_data_px2[122:118]==5'h0a) & ~pcx_fpio_data_px2[73] & ~(pcx_fpio_data_px2[31:0]==32'b0)) -message "FPU given invalid SP data in PCX packet A"
+// not0in custom -fire (pcx_fpio_data_rdy_px2 & pcx_fpio_data_px2[123] & (pcx_fpio_data_px2[122:118]==5'h0b) & ~pcx_fpio_data_px2[73] & ~(pcx_fpio_data_px2[31:0]==32'b0)) -message "FPU given invalid SP data in PCX packet B"
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_in_ctl.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		inq_we;			// input Q write enable
+wire [3:0]	inq_wraddr;		// input Q write address
+wire            inq_read_en;            // input Q read enable
+wire [3:0]	inq_rdaddr;		// input Q read address
+wire		inq_bp;			// bypass the input Q SRAM
+wire		inq_bp_inv;		// don't bypass the input Q SRAM
+wire		inq_fwrd;		// input Q is fwrd
+wire		inq_fwrd_inv;		// input Q is not fwrd
+wire		inq_add;		// add pipe request
+wire		inq_mul;		// multiply pipe request
+wire		inq_div;		// divide pipe request
+wire  		fadd_clken_l;		// add      pipe clk enable - asserted low
+wire 		fmul_clken_l;		// multiply pipe clk enable - asserted low
+wire 		fdiv_clken_l;		// divide   pipe clk enable - asserted low
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_in_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [7:0]	fp_op_in;		// request opcode
+wire            fp_op_in_7in;           // request opcode
+wire [4:0]	inq_id;			// request ID to the operation pipes
+wire [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+wire [1:0]	inq_fcc;		// request cc ID to op pipes
+wire [7:0]	inq_op;			// request opcode to op pipes
+wire		inq_in1_exp_neq_ffs;	// request operand 1 exp!=ff's
+wire		inq_in1_exp_eq_0;	// request operand 1 exp==0
+wire		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+wire		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+wire		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+wire [63:0]	inq_in1;		// request operand 1 to op pipes
+wire		inq_in2_exp_neq_ffs;	// request operand 2 exp!=ff's
+wire		inq_in2_exp_eq_0;	// request operand 2 exp==0
+wire		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+wire		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+wire		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+wire [63:0]	inq_in2;		// request operand 2 to op pipes
+
+// 6/20/03: New outputs to drive fpu-level i_fpu_inq_sram inputs 
+wire [4:0] fp_id_in; // id to be written into inq_sram
+wire [1:0] fp_rnd_mode_in; // rnd_mode to be written into inq_sram
+wire [1:0] fp_fcc_in; // fcc to be written into inq_sram
+wire [68:0] fp_src1_in; // operand1 and its pre-computed bits portion
+wire [68:0] fp_src2_in; // operand2, includes pre-computed bits
+
+wire fp_data_rdy;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_in_ctl fpu_in_ctl (
+	.pcx_fpio_data_rdy_px2		(pcx_fpio_data_rdy_px2),
+	.pcx_fpio_data_px2		(pcx_fpio_data_px2[123:118]),
+	.fp_op_in    			(fp_op_in[3:2]),
+        .fp_op_in_7in                   (fp_op_in_7in),
+	.a1stg_step			(a1stg_step),
+	.m1stg_step			(m1stg_step),
+	.d1stg_step			(d1stg_step),
+	.add_pipe_active		(add_pipe_active),
+	.mul_pipe_active		(mul_pipe_active),
+	.div_pipe_active		(div_pipe_active),
+	.sehold (sehold),
+	.arst_l				(arst_l),
+	.grst_l				(grst_l),
+	.rclk			(rclk),
+
+        .fp_data_rdy			(fp_data_rdy),
+	.fadd_clken_l			(fadd_clken_l),
+	.fmul_clken_l			(fmul_clken_l),
+	.fdiv_clken_l			(fdiv_clken_l),
+
+	.inq_we				(inq_we),
+	.inq_wraddr			(inq_wraddr[3:0]),
+	.inq_read_en			(inq_read_en),
+	.inq_rdaddr			(inq_rdaddr[3:0]),
+	.inq_bp				(inq_bp),
+	.inq_bp_inv			(inq_bp_inv),
+	.inq_fwrd			(inq_fwrd),
+	.inq_fwrd_inv			(inq_fwrd_inv),
+	.inq_add			(inq_add),
+	.inq_mul			(inq_mul),
+	.inq_div			(inq_div),
+
+	.se           (se),
+  .si           (si),
+  .so           (scan_out_fpu_in_ctl)
+);
+
+
+fpu_in_dp fpu_in_dp (
+        .fp_data_rdy			(fp_data_rdy),
+        .fpio_data_px2_116_112          (pcx_fpio_data_px2[116:112]),
+        .fpio_data_px2_79_72            (pcx_fpio_data_px2[79:72]),
+        .fpio_data_px2_67_0             (pcx_fpio_data_px2[67:0]),
+	.inq_fwrd			(inq_fwrd),
+	.inq_fwrd_inv			(inq_fwrd_inv),
+	.inq_bp				(inq_bp),
+	.inq_bp_inv			(inq_bp_inv),
+	.inq_dout    (inq_dout[154:0]),
+	.rclk			(rclk),
+
+        .fp_op_in_7in                   (fp_op_in_7in),
+	.inq_id				(inq_id[4:0]),
+	.inq_rnd_mode			(inq_rnd_mode[1:0]),
+	.inq_fcc			(inq_fcc[1:0]),
+	.inq_op				(inq_op[7:0]),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0),
+	.inq_in1			(inq_in1[63:0]),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0),
+	.inq_in2			(inq_in2[63:0]),
+
+	.fp_id_in (fp_id_in[4:0]),
+	.fp_rnd_mode_in (fp_rnd_mode_in[1:0]),
+	.fp_fcc_in (fp_fcc_in[1:0]),
+	.fp_op_in (fp_op_in[7:0]),
+	.fp_src1_in (fp_src1_in[68:0]),
+	.fp_src2_in (fp_src2_in[68:0]),
+
+	.se                             (se),
+        .si                             (scan_out_fpu_in_ctl),
+        .so                             (so)
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_out.v
===================================================================
--- /trunk/T1-FPU/fpu_out.v	(revision 6)
+++ /trunk/T1-FPU/fpu_out.v	(revision 6)
@@ -0,0 +1,209 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_out.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU result output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_out (
+	d8stg_fdiv_in,
+	m6stg_fmul_in,
+	a6stg_fadd_in,
+	div_id_out_in,
+	m6stg_id_in,
+	add_id_out_in,
+	div_exc_out,
+	d8stg_fdivd,
+	d8stg_fdivs,
+	div_sign_out,
+	div_exp_out,
+	div_frac_out,
+	mul_exc_out,
+	m6stg_fmul_dbl_dst,
+	m6stg_fmuls,
+	mul_sign_out,
+	mul_exp_out,
+	mul_frac_out,
+	add_exc_out,
+	a6stg_fcmpop,
+	add_cc_out,
+	add_fcc_out,
+	a6stg_dbl_dst,
+	a6stg_sng_dst,
+	a6stg_long_dst,
+	a6stg_int_dst,
+	add_sign_out,
+	add_exp_out,
+	add_frac_out,
+	arst_l,
+	grst_l,
+	rclk,
+	
+	fp_cpx_req_cq,
+	add_dest_rdy,
+	mul_dest_rdy,
+	div_dest_rdy,
+	fp_cpx_data_ca,
+
+	se,
+	si,
+	so
+);
+
+
+input		d8stg_fdiv_in;		// div pipe output request next cycle
+input		m6stg_fmul_in;		// mul pipe output request next cycle
+input		a6stg_fadd_in;		// add pipe output request next cycle
+input [9:0]	div_id_out_in;		// div pipe output ID next cycle
+input [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+input [9:0]	add_id_out_in;		// add pipe output ID next cycle
+input [4:0]	div_exc_out;		// divide pipe result- exception flags
+input		d8stg_fdivd;		// divide double- divide stage 8
+input		d8stg_fdivs;		// divide single- divide stage 8
+input		div_sign_out;		// divide sign output
+input [10:0]	div_exp_out;		// divide exponent output
+input [51:0]	div_frac_out;		// divide fraction output
+input [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+input		m6stg_fmul_dbl_dst;	// double precision multiply result
+input		m6stg_fmuls;		// fmuls- multiply 6 stage
+input		mul_sign_out;		// multiply sign output
+input [10:0]	mul_exp_out;		// multiply exponent output
+input [51:0]	mul_frac_out;		// multiply fraction output
+input [4:0]	add_exc_out;		// add pipe result- exception flags
+input		a6stg_fcmpop;		// compare- add 6 stage
+input [1:0]	add_cc_out;		// add pipe result- condition
+input [1:0]	add_fcc_out;		// add pipe input fcc passed through
+input		a6stg_dbl_dst;		// float double result- add 6 stage
+input		a6stg_sng_dst;		// float single result- add 6 stage
+input		a6stg_long_dst;		// 64bit integer result- add 6 stage
+input		a6stg_int_dst;		// 32bit integer result- add 6 stage
+input		add_sign_out;		// add sign output
+input [10:0]	add_exp_out;		// add exponent output
+input [63:0]	add_frac_out;		// add fraction output
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;			// global clock
+
+output [7:0]	fp_cpx_req_cq;		// FPU result request to CPX
+output		add_dest_rdy;		// add pipe result request this cycle
+output		mul_dest_rdy;		// mul pipe result request this cycle
+output		div_dest_rdy;		// div pipe result request this cycle
+output [144:0]	fp_cpx_data_ca;		// FPU result to CPX
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_out_ctl.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [7:0]	fp_cpx_req_cq;		// FPU result request to CPX
+wire [1:0]	req_thread;		// thread ID of result req this cycle
+wire [2:0]	dest_rdy;		// pipe with result request this cycle
+wire		add_dest_rdy;		// add pipe result request this cycle
+wire		mul_dest_rdy;		// mul pipe result request this cycle
+wire		div_dest_rdy;		// div pipe result request this cycle
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_out_dp.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [144:0]	fp_cpx_data_ca;		// FPU result to CPX
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_out_ctl fpu_out_ctl (
+	.d8stg_fdiv_in			(d8stg_fdiv_in),
+	.m6stg_fmul_in			(m6stg_fmul_in),
+	.a6stg_fadd_in			(a6stg_fadd_in),
+	.div_id_out_in			(div_id_out_in[9:0]),
+	.m6stg_id_in			(m6stg_id_in[9:0]),
+	.add_id_out_in			(add_id_out_in[9:0]),
+	.arst_l				(arst_l),
+	.grst_l				(grst_l),
+	.rclk			(rclk),
+
+	.fp_cpx_req_cq			(fp_cpx_req_cq[7:0]),
+	.req_thread			(req_thread[1:0]),
+	.dest_rdy			(dest_rdy[2:0]),
+	.add_dest_rdy			(add_dest_rdy),
+	.mul_dest_rdy			(mul_dest_rdy),
+	.div_dest_rdy			(div_dest_rdy),
+
+	.se                             (se),
+        .si                             (si),
+        .so                             (scan_out_fpu_out_ctl)
+);
+
+
+fpu_out_dp fpu_out_dp (
+	.dest_rdy			(dest_rdy[2:0]),
+	.req_thread			(req_thread[1:0]),
+	.div_exc_out			(div_exc_out[4:0]),
+	.d8stg_fdivd			(d8stg_fdivd),
+	.d8stg_fdivs			(d8stg_fdivs),
+	.div_sign_out			(div_sign_out),
+	.div_exp_out			(div_exp_out[10:0]),
+	.div_frac_out			(div_frac_out[51:0]),
+	.mul_exc_out			(mul_exc_out[4:0]),
+	.m6stg_fmul_dbl_dst		(m6stg_fmul_dbl_dst),
+	.m6stg_fmuls			(m6stg_fmuls),
+	.mul_sign_out			(mul_sign_out),
+	.mul_exp_out			(mul_exp_out[10:0]),
+	.mul_frac_out			(mul_frac_out[51:0]),
+	.add_exc_out			(add_exc_out[4:0]),
+	.a6stg_fcmpop			(a6stg_fcmpop),
+	.add_cc_out			(add_cc_out[1:0]),
+	.add_fcc_out			(add_fcc_out[1:0]),
+	.a6stg_dbl_dst			(a6stg_dbl_dst),
+	.a6stg_sng_dst			(a6stg_sng_dst),
+	.a6stg_long_dst			(a6stg_long_dst),
+	.a6stg_int_dst			(a6stg_int_dst),
+	.add_sign_out			(add_sign_out),
+	.add_exp_out			(add_exp_out[10:0]),
+	.add_frac_out			(add_frac_out[63:0]),
+	.rclk			(rclk),
+
+	.fp_cpx_data_ca			(fp_cpx_data_ca[144:0]),
+
+	.se                             (se),
+        .si                             (scan_out_fpu_out_ctl),
+        .so                             (so)
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_denorm_3b.v
===================================================================
--- /trunk/T1-FPU/fpu_denorm_3b.v	(revision 6)
+++ /trunk/T1-FPU/fpu_denorm_3b.v	(revision 6)
@@ -0,0 +1,60 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_denorm_3b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Three bit comparison of two inputs when both will always have
+//		leading 0s.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_denorm_3b (
+	din1,
+	din2,
+
+	din2_din1_nz,
+	din2_din1_denorm
+);
+
+
+input [2:0]     din1;                   // input 1- 3 bits
+input [2:0]     din2;                   // input 2- 3 bits
+
+output		din2_din1_nz;		// input 1 and input 2 are not 0
+output		din2_din1_denorm;	// input 1 is a denorm
+
+
+wire [2:0]	din2_din1_zero;
+wire		din2_din1_nz;
+wire		din2_din1_denorm;
+
+
+assign din2_din1_zero[2:0]= (~(din1 | din2));
+
+assign din2_din1_nz= (!(&din2_din1_zero[2:0]));
+
+assign din2_din1_denorm= din2[2]
+		|| (din2_din1_zero[2] && din2[1])
+		|| ((&din2_din1_zero[2:1]) && din2[0]);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in_ctl.v
===================================================================
--- /trunk/T1-FPU/fpu_in_ctl.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in_ctl.v	(revision 6)
@@ -0,0 +1,873 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU input control logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+module fpu_in_ctl (
+	pcx_fpio_data_rdy_px2,
+	pcx_fpio_data_px2,
+	fp_op_in,
+        fp_op_in_7in,
+	a1stg_step,
+	m1stg_step,
+	d1stg_step,
+	add_pipe_active,
+	mul_pipe_active,
+	div_pipe_active,
+	sehold,
+	arst_l,
+	grst_l,
+	rclk,
+
+        fp_data_rdy,
+	fadd_clken_l,
+	fmul_clken_l,
+	fdiv_clken_l,
+	
+	inq_we,
+	inq_wraddr,
+	inq_read_en,
+	inq_rdaddr,
+	inq_bp,
+	inq_bp_inv,
+	inq_fwrd,
+	inq_fwrd_inv,
+	inq_add,
+	inq_mul,
+	inq_div,
+
+	se,
+	si,
+	so
+);
+
+
+input		pcx_fpio_data_rdy_px2;	// FPU request ready from PCX
+input [123:118]	pcx_fpio_data_px2;	// FPU request data from PCX
+input [3:2]	fp_op_in;		// request opcode
+input         	fp_op_in_7in;		// request opcode
+input		a1stg_step;		// add pipe load
+input		m1stg_step;		// multiply pipe load
+input		d1stg_step;		// divide pipe load
+input 		add_pipe_active;        // add pipe is executing a valid instr
+input 		mul_pipe_active;        // mul pipe is executing a valid instr
+input 		div_pipe_active;        // div pipe is executing a valid instr
+input sehold; // hold sram output MUX (for inq_data[155:0] in fpu_in_dp) for macrotest
+input		arst_l;			// global asynchronous reset- asserted low
+input		grst_l;			// global synchronous reset- asserted low
+input		rclk;		// global clock
+
+output          fp_data_rdy;
+
+output		fadd_clken_l;		// add      pipe clk enable - asserted low
+output		fmul_clken_l;		// multiply pipe clk enable - asserted low
+output		fdiv_clken_l;		// divide   pipe clk enable - asserted low
+
+output		inq_we;			// input Q write enable
+output [3:0]	inq_wraddr;		// input Q write address
+output          inq_read_en;            // input Q read enable
+output [3:0]	inq_rdaddr;		// input Q read address
+output		inq_bp;			// bypass the input Q SRAM
+output		inq_bp_inv;		// don't bypass the input Q SRAM
+output		inq_fwrd;		// input Q is empty
+output		inq_fwrd_inv;		// input Q is not empty
+output		inq_add;		// add pipe request
+output		inq_mul;		// multiply pipe request
+output		inq_div;		// divide pipe request
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire		reset;
+wire		fp_data_rdy;
+wire		fp_vld_in;
+wire [4:0]	fp_type_in;
+wire  		fadd_clken_l;
+wire 		fmul_clken_l;
+wire 		fdiv_clken_l;
+wire		fp_op_in_7;
+wire		fp_op_in_7_inv;
+wire		inq_we;
+wire            inq_read_en;
+wire [3:0]	inq_wrptr_plus1;
+wire		inq_wrptr_step;
+wire [3:0]	inq_wrptr;
+wire [3:0]	inq_div_wrptr_plus1;
+wire		inq_div_wrptr_step;
+wire [3:0]	inq_div_wrptr;
+wire [3:0]	inq_wraddr;
+wire [3:0]	inq_wraddr_del;
+wire		inq_re;
+wire [3:0]	inq_rdptr_plus1;
+wire [3:0]	inq_rdptr_in;
+wire [3:0]	inq_rdptr;
+wire		inq_div_re;
+wire [3:0]	inq_div_rdptr_plus1;
+wire [3:0]	inq_div_rdptr_in;
+wire [3:0]	inq_div_rdptr;
+wire		inq_div_rd_in;
+wire		inq_div_rd;
+wire [3:0]	inq_rdaddr;
+wire [3:0]	inq_rdaddr_del;
+wire		inq_bp;
+wire		inq_bp_inv;
+wire		inq_empty;
+wire		inq_div_empty;
+wire		inq_fwrd;
+wire		inq_fwrd_inv;
+wire		fp_add_in;
+wire		fp_mul_in;
+wire		fp_div_in;
+wire [7:0]	inq_rdptr_dec_in;
+wire [7:0]	inq_rdptr_dec;
+wire [7:0]	inq_div_rdptr_dec_in;
+wire [7:0]	inq_div_rdptr_dec;
+wire [15:0]	inq_rdaddr_del_dec_in;
+wire [15:0]	inq_rdaddr_del_dec;
+wire		inq_pipe0_we;
+wire		inq_pipe1_we;
+wire		inq_pipe2_we;
+wire		inq_pipe3_we;
+wire		inq_pipe4_we;
+wire		inq_pipe5_we;
+wire		inq_pipe6_we;
+wire		inq_pipe7_we;
+wire		inq_pipe8_we;
+wire		inq_pipe9_we;
+wire		inq_pipe10_we;
+wire		inq_pipe11_we;
+wire		inq_pipe12_we;
+wire		inq_pipe13_we;
+wire		inq_pipe14_we;
+wire		inq_pipe15_we;
+wire [2:0]	inq_pipe0;
+wire [2:0]	inq_pipe1;
+wire [2:0]	inq_pipe2;
+wire [2:0]	inq_pipe3;
+wire [2:0]	inq_pipe4;
+wire [2:0]	inq_pipe5;
+wire [2:0]	inq_pipe6;
+wire [2:0]	inq_pipe7;
+wire [2:0]	inq_pipe8;
+wire [2:0]	inq_pipe9;
+wire [2:0]	inq_pipe10;
+wire [2:0]	inq_pipe11;
+wire [2:0]	inq_pipe12;
+wire [2:0]	inq_pipe13;
+wire [2:0]	inq_pipe14;
+wire [2:0]	inq_pipe15;
+wire [2:0]	inq_pipe;
+wire		inq_div;
+wire		inq_diva;
+wire		inq_diva_dly;
+wire		d1stg_step_dly;
+wire		inq_mul;
+wire		inq_mula;
+wire		inq_add;
+wire		inq_adda;
+wire		valid_packet;
+wire            valid_packet_dly;
+wire		tag_sel;
+wire sehold_inv;
+
+
+dffrl_async #(1)  dffrl_in_ctl (
+  .din  (grst_l),
+  .clk  (rclk),
+  .rst_l(arst_l),
+  .q    (in_ctl_rst_l),
+	.se (se),
+	.si (),
+	.so ()
+  );
+
+assign reset= (!in_ctl_rst_l);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Capture request and input control information.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffr_s #(1) i_fp_data_rdy (
+	.din	(pcx_fpio_data_rdy_px2),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      (fp_data_rdy),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(1) i_fp_vld_in (
+	.din	(pcx_fpio_data_px2[123]),
+	.clk    (rclk),
+
+        .q      (fp_vld_in),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(5) i_fp_type_in (
+	.din	(pcx_fpio_data_px2[122:118]),
+        .clk    (rclk),
+ 
+        .q      (fp_type_in[4:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Select lines- extract the two operands.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign fp_op_in_7= fp_op_in_7in;
+
+assign fp_op_in_7_inv= (!fp_op_in_7);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Input queue control logic
+//		- write enables
+//		- write pointers
+//		- read enables
+//		- read pointers
+//		- write address
+//		- read address
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign inq_we= fp_data_rdy && fp_vld_in
+		&& (((fp_type_in[4:0]==5'h0a) && fp_op_in_7)
+			|| ((fp_type_in[4:0]==5'h0b) && fp_op_in_7_inv));
+
+assign inq_wrptr_plus1[3:0]= inq_wrptr[3:0] + 4'h1;
+
+assign inq_wrptr_step= inq_we && (!fp_div_in);
+
+dffre_s #(4) i_inq_wrptr (
+	.din	(inq_wrptr_plus1[3:0]),
+	.en	(inq_wrptr_step),
+	.rst	(reset),
+	.clk    (rclk),
+
+        .q      (inq_wrptr[3:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_div_wrptr_plus1[3:0]= inq_div_wrptr[3:0] + 4'h1;
+
+assign inq_div_wrptr_step= inq_we && fp_div_in;
+
+dffre_s #(4) i_inq_div_wrptr (
+        .din    (inq_div_wrptr_plus1[3:0]),
+        .en     (inq_div_wrptr_step),
+        .rst    (reset),
+        .clk    (rclk),
+ 
+        .q      (inq_div_wrptr[3:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_wraddr[3:0]= {fp_div_in,
+		(({3{fp_div_in}}
+			    & inq_div_wrptr[2:0])
+		    | ({3{(!fp_div_in)}}
+			    & inq_wrptr[2:0]))};
+
+dff_s #(4) i_inq_wraddr_del (
+	.din	(inq_wraddr[3:0]),
+	.clk	(rclk),
+
+	.q	(inq_wraddr_del[3:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign inq_read_en = ~inq_empty | ~inq_div_empty;
+
+assign inq_re= (inq_adda && a1stg_step)
+		|| (inq_mula && m1stg_step);
+
+assign inq_rdptr_plus1[3:0]= inq_rdptr[3:0] + 4'h1;
+
+assign inq_rdptr_in[3:0]= ({4{(inq_re && (!reset))}}
+			    & inq_rdptr_plus1[3:0])
+		| ({4{((!inq_re) && (!reset))}}
+			    & inq_rdptr[3:0]);
+
+dff_s #(4) i_inq_rdptr (
+	.din	(inq_rdptr_in[3:0]),
+	.clk    (rclk),
+ 
+        .q      (inq_rdptr[3:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_div_re= (inq_diva && d1stg_step);
+
+assign inq_div_rdptr_plus1[3:0]= inq_div_rdptr[3:0] + 4'h1;
+
+assign inq_div_rdptr_in[3:0]= ({4{(inq_div_re && (!reset))}}
+                            & inq_div_rdptr_plus1[3:0])
+                | ({4{((!inq_div_re) && (!reset))}}
+                            & inq_div_rdptr[3:0]);
+ 
+dff_s #(4) i_inq_div_rdptr (
+        .din    (inq_div_rdptr_in[3:0]),
+        .clk    (rclk),
+
+        .q      (inq_div_rdptr[3:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_div_rd_in= (!inq_div_empty) && d1stg_step && (!inq_diva);
+
+dff_s #(1) i_inq_div_rd (
+	.din	(inq_div_rd_in),
+	.clk    (rclk),
+
+        .q      (inq_div_rd),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_rdaddr[3:0]= {inq_div_rd_in,
+		(({3{inq_div_rd_in}}
+			    & (inq_div_rdptr[2:0] & {3{(!reset)}}))
+		    | ({3{(!inq_div_rd_in)}}
+			    & inq_rdptr_in[2:0]))};
+
+dff_s #(4) i_inq_rdaddr_del (
+	.din	(inq_rdaddr[3:0]),
+        .clk	(rclk),
+ 
+        .q	(inq_rdaddr_del[3:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Input queue empty and bypass signals.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Power management update
+
+assign valid_packet = fp_data_rdy && fp_vld_in &&
+                      ((fp_type_in[4:0]==5'h0a) || (fp_type_in[4:0]==5'h0b));
+
+dffre_s #(1) i_valid_packet_dly (
+	.din	(valid_packet),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (valid_packet_dly),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+// Never bypass/forward invalid packets to the execution pipes
+// assign inq_bp= (inq_wraddr_del[3:0]==inq_rdaddr_del[3:0]);
+
+// 11/11/03: macrotest (AND with sehold_inv) 
+assign sehold_inv = ~sehold;
+
+assign inq_bp= (inq_wraddr_del[3:0]==inq_rdaddr_del[3:0]) && valid_packet_dly && sehold_inv;
+
+assign inq_bp_inv= (!inq_bp);
+
+assign inq_empty= (inq_wrptr[3:0]==inq_rdptr[3:0]);
+
+assign inq_div_empty= (inq_div_wrptr[3:0]==inq_div_rdptr[3:0]);
+
+// Power management update
+// Never bypass/forward invalid packets to the execution pipes
+// assign inq_fwrd= (inq_empty && (!inq_div_rd))
+//		|| (inq_div_empty && fp_div_in && fp_data_rdy && fp_vld_in
+//			&& d1stg_step);
+
+// 11/11/03: macrotest change (AND with sehold_inv) 
+assign inq_fwrd= ((inq_empty && (!inq_div_rd))
+  		|| (inq_div_empty && fp_div_in
+  			&& d1stg_step)) && valid_packet && sehold_inv;
+
+assign inq_fwrd_inv= (!inq_fwrd);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU pipe selection flags.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign fp_add_in= fp_data_rdy && fp_vld_in && (fp_type_in[4:1]==4'h5)
+		&& ((fp_op_in_7 && (!fp_type_in[0]))
+			|| (fp_op_in_7_inv && (!fp_op_in[3]) && fp_type_in[0]));
+
+assign fp_mul_in= fp_data_rdy && fp_vld_in && (fp_type_in[4:0]==5'h0b)
+		&& fp_op_in_7_inv && (fp_op_in[3:2]==2'b10);
+
+assign fp_div_in= fp_data_rdy && fp_vld_in && (fp_type_in[4:0]==5'h0b)
+                && fp_op_in_7_inv && (fp_op_in[3:2]==2'b11);
+
+assign inq_rdptr_dec_in[7:0]= ({8{reset}}
+			    & 8'h01)
+		| ({8{(inq_re && (!reset))}}
+			    & {inq_rdptr_dec[6:0], inq_rdptr_dec[7]})
+		| ({8{((!inq_re) && (!reset))}}
+			    & inq_rdptr_dec[7:0]);
+
+dff_s #(8) i_inq_rdptr_dec (
+	.din	(inq_rdptr_dec_in[7:0]),
+	.clk	(rclk),
+
+	.q	(inq_rdptr_dec[7:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_div_rdptr_dec_in[7:0]= ({8{reset}}
+                            & 8'h01)
+                | ({8{(inq_div_re && (!reset))}}
+                            & {inq_div_rdptr_dec[6:0], inq_div_rdptr_dec[7]})
+                | ({8{((!inq_div_re) && (!reset))}}
+                            & inq_div_rdptr_dec[7:0]);
+ 
+dff_s #(8) i_inq_div_rdptr_dec (
+        .din    (inq_div_rdptr_dec_in[7:0]),
+        .clk    (rclk),
+
+        .q      (inq_div_rdptr_dec[7:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_rdaddr_del_dec_in[15:0]= ({16{((!inq_div_empty) && d1stg_step
+					&& (!inq_diva))}}
+			    & {(inq_div_rdptr_dec[7:1] & {7{(!reset)}}),
+				(inq_div_rdptr_dec[0] || reset), 8'b0})
+		| ({16{(!((!inq_div_empty) && d1stg_step && (!inq_diva)))}}
+			    & {8'b0, inq_rdptr_dec_in[7:0]});
+
+dff_s #16 i_inq_rdaddr_del_dec (
+	.din	(inq_rdaddr_del_dec_in[15:0]),
+	.clk	(rclk),
+
+	.q	(inq_rdaddr_del_dec[15:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign inq_pipe0_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h0);
+assign inq_pipe1_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h1);
+assign inq_pipe2_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h2);
+assign inq_pipe3_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h3);
+assign inq_pipe4_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h4);
+assign inq_pipe5_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h5);
+assign inq_pipe6_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h6);
+assign inq_pipe7_we= inq_we && (!fp_div_in) && (inq_wrptr[2:0]==3'h7);
+
+assign inq_pipe8_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h0);
+assign inq_pipe9_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h1);
+assign inq_pipe10_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h2);
+assign inq_pipe11_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h3);
+assign inq_pipe12_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h4);
+assign inq_pipe13_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h5);
+assign inq_pipe14_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h6);
+assign inq_pipe15_we= inq_we && fp_div_in && (inq_div_wrptr[2:0]==3'h7);
+
+dffre_s #(3) i_inq_pipe0 (
+	.din	({fp_div_in, fp_mul_in, fp_add_in}),
+	.en	(inq_pipe0_we),
+        .rst    (reset),
+	.clk    (rclk),
+
+        .q      (inq_pipe0[2:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe1 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe1_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe1[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe2 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe2_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe2[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe3 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe3_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe3[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe4 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe4_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe4[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe5 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe5_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe5[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe6 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe6_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe6[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe7 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe7_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe7[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe8 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe8_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe8[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe9 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe9_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe9[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe10 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe10_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe10[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe11 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe11_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe11[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe12 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe12_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe12[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe13 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe13_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe13[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe14 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe14_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe14[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(3) i_inq_pipe15 (
+        .din    ({fp_div_in, fp_mul_in, fp_add_in}),
+        .en     (inq_pipe15_we),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_pipe15[2:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+// Power management update
+// 3-bit fifo pipe tags (div,mul,add) are not cleared after use.
+// Now that inq_fwrd is qualified by valid_packet, inq_fwrd can't be
+// used for inq_pipe[2:0] selection.
+
+assign tag_sel = (inq_empty && (!inq_div_rd))
+  		|| (inq_div_empty && fp_div_in && fp_data_rdy && fp_vld_in
+  			&& d1stg_step);
+
+assign inq_pipe[2:0]= ({3{tag_sel}}
+                                // Austin update
+                                // performance change: allow div to bypass FIFO (2 cyc latency reduction)
+			    & {(inq_div_empty && fp_div_in && fp_data_rdy && fp_vld_in
+				&& d1stg_step
+				&& d1stg_step_dly && (!inq_diva_dly)),
+                                fp_mul_in,
+				fp_add_in})
+		| ({3{(!tag_sel)}}
+			    & (({3{inq_rdaddr_del_dec[0]}}
+					& inq_pipe0[2:0])
+				| ({3{inq_rdaddr_del_dec[1]}}
+                                        & inq_pipe1[2:0])
+                                | ({3{inq_rdaddr_del_dec[2]}}
+                                        & inq_pipe2[2:0])
+                                | ({3{inq_rdaddr_del_dec[3]}}
+                                        & inq_pipe3[2:0])
+                                | ({3{inq_rdaddr_del_dec[4]}}
+                                        & inq_pipe4[2:0])
+                                | ({3{inq_rdaddr_del_dec[5]}}
+                                        & inq_pipe5[2:0])
+                                | ({3{inq_rdaddr_del_dec[6]}}
+                                        & inq_pipe6[2:0])
+                                | ({3{inq_rdaddr_del_dec[7]}}
+                                        & inq_pipe7[2:0])
+                                | ({3{inq_rdaddr_del_dec[8]}}
+                                        & inq_pipe8[2:0])
+                                | ({3{inq_rdaddr_del_dec[9]}}
+                                        & inq_pipe9[2:0])
+                                | ({3{inq_rdaddr_del_dec[10]}}
+                                        & inq_pipe10[2:0])
+                                | ({3{inq_rdaddr_del_dec[11]}}
+                                        & inq_pipe11[2:0])
+                                | ({3{inq_rdaddr_del_dec[12]}}
+                                        & inq_pipe12[2:0])
+                                | ({3{inq_rdaddr_del_dec[13]}}
+                                        & inq_pipe13[2:0])
+                                | ({3{inq_rdaddr_del_dec[14]}}
+                                        & inq_pipe14[2:0])
+                                | ({3{inq_rdaddr_del_dec[15]}}
+                                        & inq_pipe15[2:0])));
+
+assign inq_div= inq_pipe[2];
+assign inq_diva= inq_pipe[2];
+assign inq_mul= inq_pipe[1];
+assign inq_mula= inq_pipe[1];
+assign inq_add= inq_pipe[0];
+assign inq_adda= inq_pipe[0];
+
+
+// Power management update
+// Gate the clocks on a per pipe basis (add, mul, div independently)
+// when a given pipe is not in use
+
+dffre_s #(1) i_inq_adda_dly (
+	.din	(inq_adda),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_adda_dly),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(1) i_inq_mula_dly (
+	.din	(inq_mula),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_mula_dly),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(1) i_inq_diva_dly (
+	.din	(inq_diva),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (inq_diva_dly),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffre_s #(1) i_d1stg_step_dly (
+	.din	(d1stg_step),
+	.en     (1'b1),
+        .rst    (reset),
+        .clk    (rclk),
+
+        .q      (d1stg_step_dly),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign fadd_clken_l = !(add_pipe_active || inq_adda || inq_adda_dly || reset);
+assign fmul_clken_l = !(mul_pipe_active || inq_mula || inq_mula_dly || reset);
+assign fdiv_clken_l = !(div_pipe_active || inq_diva || inq_diva_dly || reset);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_out_ctl.v
===================================================================
--- /trunk/T1-FPU/fpu_out_ctl.v	(revision 6)
+++ /trunk/T1-FPU/fpu_out_ctl.v	(revision 6)
@@ -0,0 +1,245 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_out_ctl.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	FPU output control logic.
+//
+///////////////////////////////////////////////////////////////////////////////
+ 
+
+module fpu_out_ctl (
+	d8stg_fdiv_in,
+	m6stg_fmul_in,
+	a6stg_fadd_in,
+	div_id_out_in,
+	m6stg_id_in,
+	add_id_out_in,
+	arst_l,
+	grst_l,
+	rclk,
+	
+	fp_cpx_req_cq,
+	req_thread,
+	dest_rdy,
+	add_dest_rdy,
+	mul_dest_rdy,
+	div_dest_rdy,
+
+	se,
+	si,
+	so
+);
+
+
+input		d8stg_fdiv_in;		// div pipe output request next cycle
+input		m6stg_fmul_in;		// mul pipe output request next cycle
+input		a6stg_fadd_in;		// add pipe output request next cycle
+input [9:0]	div_id_out_in;		// div pipe output ID next cycle
+input [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+input [9:0]	add_id_out_in;		// add pipe output ID next cycle
+input		arst_l;			// global async. reset- asserted low
+input		grst_l;			// global sync. reset- asserted low
+input		rclk;		// global clock
+
+output [7:0]	fp_cpx_req_cq;		// FPU result request to CPX
+output [1:0]	req_thread;		// thread ID of result req this cycle
+output [2:0]	dest_rdy;		// pipe with result request this cycle
+output		add_dest_rdy;		// add pipe result request this cycle
+output		mul_dest_rdy;		// mul pipe result request this cycle
+output		div_dest_rdy;		// div pipe result request this cycle
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire		reset;
+wire		add_req_in;
+wire		add_req_step;
+wire		add_req;
+wire		div_req_sel;
+wire		mul_req_sel;
+wire		add_req_sel;
+wire [9:0]	out_id;
+wire [7:0]	fp_cpx_req_cq;
+wire [1:0]	req_thread;
+wire [2:0]	dest_rdy_in;
+wire [2:0]	dest_rdy;
+wire		add_dest_rdy;
+wire		mul_dest_rdy;
+wire		div_dest_rdy;
+
+dffrl_async #(1)  dffrl_out_ctl (
+  .din  (grst_l),
+  .clk  (rclk),
+  .rst_l(arst_l),
+  .q    (out_ctl_rst_l),
+	.se (se),
+	.si (),
+	.so ()
+  );
+
+assign reset= (!out_ctl_rst_l);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Arbitrate for the output.
+//
+//	Top priority- divide.
+//	Low priority- round robin arbitration between the add and multiply
+//		pipes.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign add_req_in= (!add_req);
+
+assign add_req_step= add_req_sel || mul_req_sel;
+
+dffre_s #(1) i_add_req (
+	.din	(add_req_in),
+	.en	(add_req_step),
+	.rst    (reset),
+        .clk    (rclk),
+
+        .q      (add_req),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_req_sel= d8stg_fdiv_in;
+
+assign mul_req_sel= m6stg_fmul_in
+		&& ((!add_req) || (!a6stg_fadd_in))
+		&& (!div_req_sel);
+
+assign add_req_sel= a6stg_fadd_in
+		&& (add_req || (!m6stg_fmul_in))
+		&& (!div_req_sel);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Generate the request.
+//
+//	Input to the output request (CQ) stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign out_id[9:0]= ({10{div_req_sel}}
+			    & div_id_out_in[9:0])
+		| ({10{mul_req_sel}}
+			    & m6stg_id_in[9:0])
+		| ({10{add_req_sel}}
+			    & add_id_out_in[9:0]);
+
+dff_s #(8) i_fp_cpx_req_cq (
+	.din	(out_id[9:2]),
+	.clk    (rclk),
+
+        .q      (fp_cpx_req_cq[7:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Capture the thread.
+//
+//      Input to the output request (CQ) stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dff_s #(2) i_req_thread (
+	.din	(out_id[1:0]),
+	.clk    (rclk),
+ 
+        .q      (req_thread[1:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Capture the pipe that wins the output request.
+//
+//      Input to the output request (CQ) stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign dest_rdy_in[2:0]= {div_req_sel, mul_req_sel, add_req_sel};
+
+dff_s #(3) i_dest_rdy (
+	.din	(dest_rdy_in[2:0]),
+	.clk    (rclk),
+
+        .q      (dest_rdy[2:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s i_add_dest_rdy (
+	.din	(add_req_sel),
+	.clk	(rclk),
+
+	.q	(add_dest_rdy),
+
+	.se	(se),
+        .si	(),
+        .so	()
+);
+
+dff_s i_mul_dest_rdy (
+	.din	(mul_req_sel),
+	.clk	(rclk),
+
+	.q	(mul_dest_rdy),
+
+	.se	(se),
+        .si	(),
+        .so	()
+);
+
+dff_s i_div_dest_rdy (
+	.din	(div_req_sel),
+	.clk	(rclk),
+
+	.q	(div_dest_rdy),
+
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_rptr_min_global.v
===================================================================
--- /trunk/T1-FPU/fpu_rptr_min_global.v	(revision 6)
+++ /trunk/T1-FPU/fpu_rptr_min_global.v	(revision 6)
@@ -0,0 +1,78 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_rptr_min_global.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+// global (bufrpt_grp4 used to buffer rst_l, scan signals) and mintiming buffers in this file 
+
+// fpu_bufrpt_grp4: 4 bit wide to fix max trans time for scan, reset
+module fpu_bufrpt_grp4 (
+	in,
+	out
+);
+	
+	input [3:0] in;
+	output [3:0] out;
+
+	assign out[3:0] = in[3:0];
+
+endmodule
+
+
+// fpu_rptr_fp_cpx_grp16: 16 bit wide vertical MSB top mintiming buffer for fp_cpx*
+module fpu_rptr_fp_cpx_grp16 (
+	in,
+	out
+);
+
+	input [15:0] in;
+	output [15:0] out;
+
+	assign out[15:0] = in[15:0];
+
+endmodule
+
+
+// fpu_rptr_pcx_fpio_grp16: 16 bit wide mintming vertical buffer, MSB top, for pcx_fpio*
+// use minbuf_5x -> buf_5x -> buf_30x
+module fpu_rptr_pcx_fpio_grp16 (
+	in,
+	out
+);
+
+	input [15:0] in;
+	output [15:0] out;
+
+	assign out[15:0] = in[15:0];
+
+endmodule
+
+// fpu_rptr_inq: 156 bits wide mintiming buffer for inq_sram din (matched to inq_sram bit order)
+module fpu_rptr_inq (
+	in,
+	out
+);
+	
+	input [155:0] in;
+	output [155:0] out;
+
+	assign out[155:0] = in[155:0];
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_in2_gt_in1_2b.v
===================================================================
--- /trunk/T1-FPU/fpu_in2_gt_in1_2b.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in2_gt_in1_2b.v	(revision 6)
@@ -0,0 +1,58 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in2_gt_in1_2b.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Two bit comparison of two inputs that can have any value.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in2_gt_in1_2b (
+	din1,
+	din2,
+
+	din2_neq_din1,
+	din2_gt_din1
+);
+
+
+input [1:0]	din1;			// input 1- 3 bits
+input [1:0]	din2;			// input 2- 3 bits
+
+output		din2_neq_din1;		// input 2 doesn't equal input 1
+output		din2_gt_din1;		// input 2 is greater than input 1
+
+
+wire [1:0]	din2_eq_din1;
+wire		din2_neq_din1;
+wire		din2_gt_din1;
+
+
+assign din2_eq_din1[1:0]= (~(din1 ^ din2));
+
+assign din2_neq_din1= (!(&din2_eq_din1));
+
+assign din2_gt_din1= ((!din1[1]) && din2[1])
+		|| (din2_eq_din1[1] && (!din1[0]) && din2[0]);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/bw_clk_cl_fpu_cmp.v
===================================================================
--- /trunk/T1-FPU/bw_clk_cl_fpu_cmp.v	(revision 6)
+++ /trunk/T1-FPU/bw_clk_cl_fpu_cmp.v	(revision 6)
@@ -0,0 +1,65 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: bw_clk_cl_fpu_cmp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module bw_clk_cl_fpu_cmp (
+	so,
+	dbginit_l,
+	cluster_grst_l,
+	rclk,
+	si,
+	se,
+	adbginit_l,
+	gdbginit_l,
+	arst_l,
+	grst_l,
+	cluster_cken,
+	gclk
+);
+
+	output so;
+	output dbginit_l;
+	output cluster_grst_l;
+	output rclk;
+	input si;
+	input se;
+	input adbginit_l;
+	input gdbginit_l;
+	input arst_l;
+	input grst_l;
+	input cluster_cken;
+	input gclk;
+
+	cluster_header I0 (
+		.rclk            (rclk ),
+		.so              (so ),
+		.dbginit_l       (dbginit_l ),
+		.cluster_grst_l  (cluster_grst_l ),
+		.si              (si ),
+		.se              (se ),
+		.adbginit_l      (adbginit_l ),
+		.gdbginit_l      (gdbginit_l ),
+		.arst_l          (arst_l ),
+		.grst_l          (grst_l ),
+		.cluster_cken    (cluster_cken ),
+		.gclk            (gclk )
+	);
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_denorm_frac.v
===================================================================
--- /trunk/T1-FPU/fpu_denorm_frac.v	(revision 6)
+++ /trunk/T1-FPU/fpu_denorm_frac.v	(revision 6)
@@ -0,0 +1,361 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_denorm_frac.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Fraction comparison of two inputs that both have leading 0's.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_denorm_frac (
+	din1,
+	din2,
+
+	din2_din1_denorm,
+	din2_din1_denorm_inv,
+	din2_din1_denorma,
+	din2_din1_denorm_inva
+);
+
+
+input [53:0]	din1;                   // input 1- fraction
+input [53:0]    din2;                   // input 2- fraction
+
+output		din2_din1_denorm;	// input 1 == denorm
+output		din2_din1_denorm_inv;	// input 1 != denorm
+output		din2_din1_denorma;	// input 1 == denorm- copy
+output		din2_din1_denorm_inva;	// input 1 != denorm- copy
+
+
+wire		din2_din1_nz_53_51;
+wire		din2_din1_denorm_53_51;
+wire		din2_din1_nz_50_48;
+wire		din2_din1_denorm_50_48;
+wire		din2_din1_nz_47_45;
+wire		din2_din1_denorm_47_45;
+wire		din2_din1_nz_44_42;
+wire		din2_din1_denorm_44_42;
+wire		din2_din1_nz_41_39;
+wire		din2_din1_denorm_41_39;
+wire		din2_din1_nz_38_36;
+wire		din2_din1_denorm_38_36;
+wire		din2_din1_nz_35_33;
+wire		din2_din1_denorm_35_33;
+wire		din2_din1_nz_32_30;
+wire		din2_din1_denorm_32_30;
+wire		din2_din1_nz_29_27;
+wire		din2_din1_denorm_29_27;
+wire		din2_din1_nz_26_24;
+wire		din2_din1_denorm_26_24;
+wire		din2_din1_nz_23_21;
+wire		din2_din1_denorm_23_21;
+wire		din2_din1_nz_20_18;
+wire		din2_din1_denorm_20_18;
+wire		din2_din1_nz_17_15;
+wire		din2_din1_denorm_17_15;
+wire		din2_din1_nz_14_12;
+wire		din2_din1_denorm_14_12;
+wire		din2_din1_nz_11_9;
+wire		din2_din1_denorm_11_9;
+wire		din2_din1_nz_8_6;
+wire		din2_din1_denorm_8_6;
+wire		din2_din1_nz_5_3;
+wire		din2_din1_denorm_5_3;
+wire		din2_din1_nz_2_0;
+wire		din2_din1_denorm_2_0;
+wire		din2_din1_nz_53_45;
+wire		din2_din1_denorm_53_45;
+wire		din2_din1_nz_44_36;
+wire		din2_din1_denorm_44_36;
+wire		din2_din1_nz_35_27;
+wire		din2_din1_denorm_35_27;
+wire		din2_din1_nz_26_18;
+wire		din2_din1_denorm_26_18;
+wire		din2_din1_nz_17_9;
+wire		din2_din1_denorm_17_9;
+wire		din2_din1_nz_8_0;
+wire		din2_din1_denorm_8_0;
+wire		din2_din1_nz_53_27;
+wire		din2_din1_denorm_53_27;
+wire		din2_din1_nz_26_0;
+wire		din2_din1_denorm_26_0;
+wire		din2_din1_denorm;
+wire		din2_din1_denorm_inv;
+wire		din2_din1_denorma;
+wire		din2_din1_denorm_inva;
+
+
+fpu_denorm_3b i_fpu_denorm_53_51 (
+	.din1			(din1[53:51]),
+	.din2			(din2[53:51]),
+
+	.din2_din1_nz		(din2_din1_nz_53_51),
+	.din2_din1_denorm	(din2_din1_denorm_53_51)
+);
+
+fpu_denorm_3b i_fpu_denorm_50_48 (
+        .din1                   (din1[50:48]),
+        .din2                   (din2[50:48]),
+
+        .din2_din1_nz           (din2_din1_nz_50_48),
+        .din2_din1_denorm       (din2_din1_denorm_50_48)
+);
+
+fpu_denorm_3b i_fpu_denorm_47_45 (
+        .din1                   (din1[47:45]),
+        .din2                   (din2[47:45]),
+
+        .din2_din1_nz           (din2_din1_nz_47_45),
+        .din2_din1_denorm       (din2_din1_denorm_47_45)
+);
+
+fpu_denorm_3b i_fpu_denorm_44_42 (
+        .din1                   (din1[44:42]),
+        .din2                   (din2[44:42]),
+
+        .din2_din1_nz           (din2_din1_nz_44_42),
+        .din2_din1_denorm       (din2_din1_denorm_44_42)
+);
+
+fpu_denorm_3b i_fpu_denorm_41_39 (
+        .din1                   (din1[41:39]),
+        .din2                   (din2[41:39]),
+
+        .din2_din1_nz           (din2_din1_nz_41_39),
+        .din2_din1_denorm       (din2_din1_denorm_41_39)
+);
+
+fpu_denorm_3b i_fpu_denorm_38_36 (
+        .din1                   (din1[38:36]),
+        .din2                   (din2[38:36]),
+
+        .din2_din1_nz           (din2_din1_nz_38_36),
+        .din2_din1_denorm       (din2_din1_denorm_38_36)
+);
+
+fpu_denorm_3b i_fpu_denorm_35_33 (
+        .din1                   (din1[35:33]),
+        .din2                   (din2[35:33]),
+
+        .din2_din1_nz           (din2_din1_nz_35_33),
+        .din2_din1_denorm       (din2_din1_denorm_35_33)
+);
+
+fpu_denorm_3b i_fpu_denorm_32_30 (
+        .din1                   (din1[32:30]),
+        .din2                   (din2[32:30]),
+
+        .din2_din1_nz           (din2_din1_nz_32_30),
+        .din2_din1_denorm       (din2_din1_denorm_32_30)
+);
+
+fpu_denorm_3b i_fpu_denorm_29_27 (
+        .din1                   (din1[29:27]),
+        .din2                   (din2[29:27]),
+
+        .din2_din1_nz           (din2_din1_nz_29_27),
+        .din2_din1_denorm       (din2_din1_denorm_29_27)
+);
+
+fpu_denorm_3b i_fpu_denorm_26_24 (
+        .din1                   (din1[26:24]),
+        .din2                   (din2[26:24]),
+
+        .din2_din1_nz           (din2_din1_nz_26_24),
+        .din2_din1_denorm       (din2_din1_denorm_26_24)
+);
+
+fpu_denorm_3b i_fpu_denorm_23_21 (
+        .din1                   (din1[23:21]),
+        .din2                   (din2[23:21]),
+
+        .din2_din1_nz           (din2_din1_nz_23_21),
+        .din2_din1_denorm       (din2_din1_denorm_23_21)
+);
+
+fpu_denorm_3b i_fpu_denorm_20_18 (
+        .din1                   (din1[20:18]),
+        .din2                   (din2[20:18]),
+
+        .din2_din1_nz           (din2_din1_nz_20_18),
+        .din2_din1_denorm       (din2_din1_denorm_20_18)
+);
+
+fpu_denorm_3b i_fpu_denorm_17_15 (
+        .din1                   (din1[17:15]),
+        .din2                   (din2[17:15]),
+
+        .din2_din1_nz           (din2_din1_nz_17_15),
+        .din2_din1_denorm       (din2_din1_denorm_17_15)
+);
+
+fpu_denorm_3b i_fpu_denorm_14_12 (
+        .din1                   (din1[14:12]),
+        .din2                   (din2[14:12]),
+
+        .din2_din1_nz           (din2_din1_nz_14_12),
+        .din2_din1_denorm       (din2_din1_denorm_14_12)
+);
+
+fpu_denorm_3b i_fpu_denorm_11_9 (
+        .din1                   (din1[11:9]),
+        .din2                   (din2[11:9]),
+
+        .din2_din1_nz           (din2_din1_nz_11_9),
+        .din2_din1_denorm       (din2_din1_denorm_11_9)
+);
+
+fpu_denorm_3b i_fpu_denorm_8_6 (
+        .din1                   (din1[8:6]),
+        .din2                   (din2[8:6]),
+
+        .din2_din1_nz           (din2_din1_nz_8_6),
+        .din2_din1_denorm       (din2_din1_denorm_8_6)
+);
+
+fpu_denorm_3b i_fpu_denorm_5_3 (
+        .din1                   (din1[5:3]),
+        .din2                   (din2[5:3]),
+
+        .din2_din1_nz           (din2_din1_nz_5_3),
+        .din2_din1_denorm       (din2_din1_denorm_5_3)
+);
+
+fpu_denorm_3b i_fpu_denorm_2_0 (
+        .din1                   (din1[2:0]),
+        .din2                   (din2[2:0]),
+
+        .din2_din1_nz           (din2_din1_nz_2_0),
+        .din2_din1_denorm       (din2_din1_denorm_2_0)
+);
+
+
+fpu_denorm_3to1 i_fpu_denorm_53_45 (
+	.din2_din1_nz_hi	(din2_din1_nz_53_51),
+	.din2_din1_denorm_hi	(din2_din1_denorm_53_51),
+	.din2_din1_nz_mid	(din2_din1_nz_50_48),
+	.din2_din1_denorm_mid	(din2_din1_denorm_50_48),
+	.din2_din1_nz_lo	(din2_din1_nz_47_45),
+	.din2_din1_denorm_lo	(din2_din1_denorm_47_45),
+
+	.din2_din1_nz		(din2_din1_nz_53_45),
+	.din2_din1_denorm	(din2_din1_denorm_53_45)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_44_36 (
+        .din2_din1_nz_hi        (din2_din1_nz_44_42),
+        .din2_din1_denorm_hi    (din2_din1_denorm_44_42),
+        .din2_din1_nz_mid       (din2_din1_nz_41_39),
+        .din2_din1_denorm_mid   (din2_din1_denorm_41_39),
+        .din2_din1_nz_lo        (din2_din1_nz_38_36),
+        .din2_din1_denorm_lo    (din2_din1_denorm_38_36),
+
+        .din2_din1_nz           (din2_din1_nz_44_36),
+        .din2_din1_denorm       (din2_din1_denorm_44_36)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_35_27 (
+        .din2_din1_nz_hi        (din2_din1_nz_35_33),
+        .din2_din1_denorm_hi    (din2_din1_denorm_35_33),
+        .din2_din1_nz_mid       (din2_din1_nz_32_30),
+        .din2_din1_denorm_mid   (din2_din1_denorm_32_30),
+        .din2_din1_nz_lo        (din2_din1_nz_29_27),
+        .din2_din1_denorm_lo    (din2_din1_denorm_29_27),
+
+        .din2_din1_nz           (din2_din1_nz_35_27),
+        .din2_din1_denorm       (din2_din1_denorm_35_27)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_26_18 (
+        .din2_din1_nz_hi        (din2_din1_nz_26_24),
+        .din2_din1_denorm_hi    (din2_din1_denorm_26_24),
+        .din2_din1_nz_mid       (din2_din1_nz_23_21),
+        .din2_din1_denorm_mid   (din2_din1_denorm_23_21),
+        .din2_din1_nz_lo        (din2_din1_nz_20_18),
+        .din2_din1_denorm_lo    (din2_din1_denorm_20_18),
+
+        .din2_din1_nz           (din2_din1_nz_26_18),
+        .din2_din1_denorm       (din2_din1_denorm_26_18)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_17_9 (
+        .din2_din1_nz_hi        (din2_din1_nz_17_15),
+        .din2_din1_denorm_hi    (din2_din1_denorm_17_15),
+        .din2_din1_nz_mid       (din2_din1_nz_14_12),
+        .din2_din1_denorm_mid   (din2_din1_denorm_14_12),
+        .din2_din1_nz_lo        (din2_din1_nz_11_9),
+        .din2_din1_denorm_lo    (din2_din1_denorm_11_9),
+
+        .din2_din1_nz           (din2_din1_nz_17_9),
+        .din2_din1_denorm       (din2_din1_denorm_17_9)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_8_0 (
+        .din2_din1_nz_hi        (din2_din1_nz_8_6),
+        .din2_din1_denorm_hi    (din2_din1_denorm_8_6),
+        .din2_din1_nz_mid       (din2_din1_nz_5_3),
+        .din2_din1_denorm_mid   (din2_din1_denorm_5_3),
+        .din2_din1_nz_lo        (din2_din1_nz_2_0),
+        .din2_din1_denorm_lo    (din2_din1_denorm_2_0),
+
+        .din2_din1_nz           (din2_din1_nz_8_0),
+        .din2_din1_denorm       (din2_din1_denorm_8_0)
+);
+
+
+fpu_denorm_3to1 i_fpu_denorm_53_27 (
+	.din2_din1_nz_hi	(din2_din1_nz_53_45),
+	.din2_din1_denorm_hi	(din2_din1_denorm_53_45),
+	.din2_din1_nz_mid	(din2_din1_nz_44_36),
+	.din2_din1_denorm_mid	(din2_din1_denorm_44_36),
+	.din2_din1_nz_lo	(din2_din1_nz_35_27),
+	.din2_din1_denorm_lo	(din2_din1_denorm_35_27),
+
+	.din2_din1_nz		(din2_din1_nz_53_27),
+	.din2_din1_denorm	(din2_din1_denorm_53_27)
+);
+
+fpu_denorm_3to1 i_fpu_denorm_26_0 (
+        .din2_din1_nz_hi        (din2_din1_nz_26_18),
+        .din2_din1_denorm_hi    (din2_din1_denorm_26_18),
+        .din2_din1_nz_mid       (din2_din1_nz_17_9),
+        .din2_din1_denorm_mid   (din2_din1_denorm_17_9),
+        .din2_din1_nz_lo        (din2_din1_nz_8_0),
+        .din2_din1_denorm_lo    (din2_din1_denorm_8_0),
+
+        .din2_din1_nz           (din2_din1_nz_26_0),
+        .din2_din1_denorm       (din2_din1_denorm_26_0)
+);
+
+
+assign din2_din1_denorm= (din2_din1_nz_53_27 && din2_din1_denorm_53_27)
+		|| ((!din2_din1_nz_53_27) && (!din2_din1_nz_26_0))
+		|| ((!din2_din1_nz_53_27) && din2_din1_denorm_26_0);
+
+assign din2_din1_denorm_inv= (!din2_din1_denorm);
+
+assign din2_din1_denorma= din2_din1_denorm;
+
+assign din2_din1_denorm_inva= din2_din1_denorm_inv;
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu.v
===================================================================
--- /trunk/T1-FPU/fpu.v	(revision 6)
+++ /trunk/T1-FPU/fpu.v	(revision 6)
@@ -0,0 +1,740 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Floating Point Unit.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu (
+	pcx_fpio_data_rdy_px2,
+	pcx_fpio_data_px2,
+	arst_l,
+	grst_l,
+	gclk,
+	cluster_cken,
+	
+	fp_cpx_req_cq,
+	fp_cpx_data_ca,
+
+	ctu_tst_pre_grst_l,
+	global_shift_enable,
+	ctu_tst_scan_disable,
+	ctu_tst_scanmode,
+	ctu_tst_macrotest,
+	ctu_tst_short_chain,
+
+	si,
+	so
+);
+
+
+input		pcx_fpio_data_rdy_px2;	// FPU request ready from PCX
+input [123:0]	pcx_fpio_data_px2;	// FPU request data from PCX
+input		arst_l;			// chip async. reset- asserted low
+input		grst_l;			// chip sync. reset- asserted low
+input		gclk;			// chip clock
+input		cluster_cken;			// cluster clock enable
+
+output [7:0]	fp_cpx_req_cq;		// FPU result request to CPX
+output [144:0]	fp_cpx_data_ca;		// FPU result to CPX
+
+input						ctu_tst_pre_grst_l;
+input						global_shift_enable;
+input						ctu_tst_scan_disable;
+input						ctu_tst_scanmode;
+input 					ctu_tst_macrotest;
+input 					ctu_tst_short_chain;
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_in.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		inq_add;		// add pipe request
+wire		inq_mul;		// multiply pipe request
+wire		inq_div;		// divide pipe request
+wire [4:0]	inq_id;			// request ID to the operation pipes
+wire [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+wire [1:0]	inq_fcc;		// request cc ID to op pipes
+wire [7:0]	inq_op;			// request opcode to op pipes
+wire		inq_in1_exp_neq_ffs;	// request operand 1 exp!=ff's
+wire		inq_in1_exp_eq_0;	// request operand 1 exp==0
+wire		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+wire		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+wire		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+wire [63:0]	inq_in1;		// request operand 1 to op pipes
+wire		inq_in2_exp_neq_ffs;	// request operand 2 exp!=ff's
+wire		inq_in2_exp_eq_0;	// request operand 2 exp==0
+wire		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+wire		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+wire		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+wire [63:0]	inq_in2;		// request operand 2 to op pipes
+wire  		fadd_clken_l;		// add      pipe clk enable - asserted low
+wire 		fmul_clken_l;		// multiply pipe clk enable - asserted low
+wire 		fdiv_clken_l;		// divide   pipe clk enable - asserted low
+
+// 6/20/03: New outputs of fpu_in for fpu-level i_fpu_inq_sram 
+
+wire [4:0] fp_id_in; // id to be written into inq_sram
+wire [1:0] fp_rnd_mode_in; // rnd_mode to be written into inq_sram
+wire [1:0] fp_fcc_in; // fcc to be written into inq_sram
+wire [7:0] fp_op_in; // op field to be written into inq_sram
+wire [68:0] fp_src1_in; // operand1, includes pre-computed bits matching special values, such as exp all ffs
+wire [68:0] fp_src2_in; // operand2, includes pre-computed bits matching special values, such as exp all ffs
+wire [3:0] inq_rdaddr; // read address for inq_sram
+wire [3:0] inq_wraddr; // write address for inq_sram
+wire inq_read_en; // read enable for inq_sram
+wire inq_we; // write enable for inq_sram
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Outputs of i_fpu_inq_sram
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [154:0] inq_dout; // fpu op packet read out from inq_sram
+wire [4:0] inq_dout_unused; // unused bits from sram
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_add.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		a1stg_step;		// add pipe load
+wire		a6stg_fadd_in;		// add pipe output request next cycle
+wire [9:0]	add_id_out_in;		// add pipe output ID next cycle
+wire		a6stg_fcmpop;		// compare- add 6 stage
+wire [4:0]	add_exc_out;		// add pipe result- exception flags
+wire		a6stg_dbl_dst;		// float double result- add 6 stage
+wire		a6stg_sng_dst;		// float single result- add 6 stage
+wire		a6stg_long_dst;		// 64bit integer result- add 6 stage
+wire		a6stg_int_dst;		// 32bit integer result- add 6 stage
+wire		add_sign_out;		// add sign output
+wire [10:0]	add_exp_out;		// add exponent output
+wire [63:0]	add_frac_out;		// add fraction output
+wire [1:0]	add_cc_out;		// add pipe result- condition
+wire [1:0]	add_fcc_out;		// add pipe input fcc passed through
+wire		add_pipe_active;        // add pipe is executing a valid instr
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_mul.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		m1stg_step;		// multiply pipe load
+wire		m6stg_fmul_in;		// mul pipe output request next cycle
+wire [9:0]	m6stg_id_in;		// mul pipe output ID next cycle
+wire [4:0]	mul_exc_out;		// multiply pipe result- exception flags
+wire		m6stg_fmul_dbl_dst;	// double precision multiply result
+wire		m6stg_fmuls;		// fmuls- multiply 6 stage
+wire		mul_sign_out;		// multiply sign output
+wire [10:0]	mul_exp_out;		// multiply exponent output
+wire [51:0]	mul_frac_out;		// multiply fraction output
+wire		mul_pipe_active;        // mul pipe is executing a valid instr
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_div.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire		d1stg_step;		// divide pipe load
+wire		d8stg_fdiv_in;		// div pipe output request next cycle
+wire [9:0]	div_id_out_in;		// div pipe output ID next cycle
+wire [4:0]	div_exc_out;		// divide pipe result- exception flags
+wire		d8stg_fdivd;		// divide double- divide stage 8
+wire		d8stg_fdivs;		// divide single- divide stage 8
+wire		div_sign_out;		// divide sign output
+wire [10:0]	div_exp_out;		// divide exponent output
+wire [51:0]	div_frac_out;		// divide fraction output
+wire		div_pipe_active;        // div pipe is executing a valid instr
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_out.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [7:0]	fp_cpx_req_cq_unbuf;		// FPU result request to CPX
+wire		add_dest_rdy;		// add pipe result request this cycle
+wire		mul_dest_rdy;		// mul pipe result request this cycle
+wire		div_dest_rdy;		// div pipe result request this cycle
+wire [144:0]	fp_cpx_data_ca_unbuf;		// FPU result to CPX
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of cluster_header, test_stub.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire rclk; // ref. clock 
+
+wire		sehold; // scan in data hold
+
+wire fpu_grst_l;
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Outputs of fpu_rptr_groups.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+wire [63:0] inq_in1_add_buf1;
+wire [63:0] inq_in1_mul_buf1;
+wire [63:0] inq_in1_div_buf1;
+wire [63:0] inq_in2_add_buf1;
+wire [63:0] inq_in2_mul_buf1;
+wire [63:0] inq_in2_div_buf1;
+wire [4:0] inq_id_add_buf1;
+wire [4:0] inq_id_mul_buf1;
+wire [4:0] inq_id_div_buf1;
+wire [7:0] inq_op_add_buf1;
+wire [7:0] inq_op_mul_buf1;
+wire [7:0] inq_op_div_buf1;
+wire [1:0] inq_rnd_mode_add_buf1;
+wire [1:0] inq_rnd_mode_mul_buf1;
+wire [1:0] inq_rnd_mode_div_buf1;
+wire inq_in1_50_0_neq_0_add_buf1;
+wire inq_in1_50_0_neq_0_mul_buf1;
+wire inq_in1_50_0_neq_0_div_buf1;
+wire inq_in1_53_0_neq_0_add_buf1;
+wire inq_in1_53_0_neq_0_mul_buf1;
+wire inq_in1_53_0_neq_0_div_buf1;
+wire inq_in1_53_32_neq_0_add_buf1;
+wire inq_in1_53_32_neq_0_mul_buf1;
+wire inq_in1_53_32_neq_0_div_buf1;
+wire inq_in1_exp_eq_0_add_buf1;
+wire inq_in1_exp_eq_0_mul_buf1;
+wire inq_in1_exp_eq_0_div_buf1;
+wire inq_in1_exp_neq_ffs_add_buf1;
+wire inq_in1_exp_neq_ffs_mul_buf1;
+wire inq_in1_exp_neq_ffs_div_buf1;
+wire inq_in2_50_0_neq_0_add_buf1;
+wire inq_in2_50_0_neq_0_mul_buf1;
+wire inq_in2_50_0_neq_0_div_buf1;
+wire inq_in2_53_0_neq_0_add_buf1;
+wire inq_in2_53_0_neq_0_mul_buf1;
+wire inq_in2_53_0_neq_0_div_buf1;
+wire inq_in2_53_32_neq_0_add_buf1;
+wire inq_in2_53_32_neq_0_mul_buf1;
+wire inq_in2_53_32_neq_0_div_buf1;
+wire inq_in2_exp_eq_0_add_buf1;
+wire inq_in2_exp_eq_0_mul_buf1;
+wire inq_in2_exp_eq_0_div_buf1;
+wire inq_in2_exp_neq_ffs_add_buf1;
+wire inq_in2_exp_neq_ffs_mul_buf1;
+wire inq_in2_exp_neq_ffs_div_buf1;
+
+wire [123:0] pcx_fpio_data_px2_buf1;
+wire [155:0] inq_sram_din_buf1;
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Instantiations.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+fpu_in fpu_in (
+	.pcx_fpio_data_rdy_px2		(pcx_fpio_data_rdy_px2_buf1),
+	.pcx_fpio_data_px2		(pcx_fpio_data_px2_buf1[123:0]),
+	.a1stg_step			(a1stg_step),
+	.m1stg_step			(m1stg_step),
+	.d1stg_step			(d1stg_step),
+	.add_pipe_active		(add_pipe_active),
+	.mul_pipe_active		(mul_pipe_active),
+	.div_pipe_active		(div_pipe_active),
+	.inq_dout    (inq_dout[154:0]),
+	.sehold (sehold),
+	.arst_l				(arst_l_in_buf3),
+	.grst_l				(fpu_grst_l_in_buf2),
+	.rclk				(rclk),
+
+	.fadd_clken_l			(fadd_clken_l),
+	.fmul_clken_l			(fmul_clken_l),
+	.fdiv_clken_l			(fdiv_clken_l),
+
+	.inq_add			(inq_add),
+	.inq_mul			(inq_mul),
+	.inq_div			(inq_div),
+	.inq_id				(inq_id[4:0]),
+	.inq_rnd_mode			(inq_rnd_mode[1:0]),
+	.inq_fcc			(inq_fcc[1:0]),
+	.inq_op				(inq_op[7:0]),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0),
+	.inq_in1			(inq_in1[63:0]),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0),
+	.inq_in2			(inq_in2[63:0]),
+
+// new outputs of fpu_in to drive i_fpu_inq_sram 
+	.fp_id_in (fp_id_in[4:0]),
+	.fp_rnd_mode_in (fp_rnd_mode_in[1:0]),
+	.fp_fcc_in (fp_fcc_in[1:0]),
+	.fp_op_in (fp_op_in[7:0]),
+	.fp_src1_in (fp_src1_in[68:0]),
+	.fp_src2_in (fp_src2_in[68:0]),
+	.inq_rdaddr (inq_rdaddr[3:0]),
+	.inq_wraddr (inq_wraddr[3:0]),
+	.inq_read_en (inq_read_en),
+	.inq_we (inq_we),
+
+	.se (se_in_buf3),
+  .si (manual_scan_0),
+  .so (scan_manual_1)
+);
+
+
+// 7/30/03: updated scan ports from si to si_r, si_w and so to so_r, so_w 
+// 06/20/03: Pulled up i_fpu_inq_sram from fpu_in_dp into fpu 
+
+bw_r_rf16x160 i_fpu_inq_sram (
+	.din ({inq_sram_din_buf1[155:0], 4'b0000}),
+	.rd_adr (inq_rdaddr[3:0]),
+	.wr_adr (inq_wraddr[3:0]),
+	.read_en (inq_read_en),
+	.wr_en (inq_we),
+	.word_wen (4'hf),
+	.byte_wen (20'hfffff),
+	.rd_clk (rclk),
+	.wr_clk (rclk),
+	.se (se),
+	.si_r (si_buf1),
+	.si_w (scan_inq_sram_w),
+	.reset_l (arst_l_in_buf3),
+	.sehold (sehold),
+	.rst_tri_en (rst_tri_en),
+	.dout ({inq_dout[154:0], inq_dout_unused[4:0]}),
+	.so_r (scan_inq_sram_w),
+	.so_w (manual_scan_0)
+);
+
+
+fpu_add fpu_add (
+	.inq_op				(inq_op_add_buf1[7:0]),
+	.inq_rnd_mode			(inq_rnd_mode_add_buf1[1:0]),
+	.inq_id				(inq_id_add_buf1[4:0]),
+	.inq_fcc			(inq_fcc[1:0]),
+	.inq_in1			(inq_in1_add_buf1[63:0]),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0_add_buf1),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0_add_buf1),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0_add_buf1),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs_add_buf1),
+	.inq_in2			(inq_in2_add_buf1[63:0]),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0_add_buf1),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0_add_buf1),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0_add_buf1),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs_add_buf1),
+	.inq_add			(inq_add),
+	.add_dest_rdy			(add_dest_rdy),
+	.fadd_clken_l			(fadd_clken_l),
+	.arst_l				(arst_l_add_buf4),
+	.grst_l				(fpu_grst_l_add_buf3),
+	.rclk				(rclk),
+
+	.add_pipe_active                (add_pipe_active),
+	.a1stg_step			(a1stg_step),
+	.a6stg_fadd_in			(a6stg_fadd_in),
+	.add_id_out_in			(add_id_out_in[9:0]),
+	.a6stg_fcmpop			(a6stg_fcmpop),
+	.add_exc_out			(add_exc_out[4:0]),
+	.a6stg_dbl_dst			(a6stg_dbl_dst),
+	.a6stg_sng_dst			(a6stg_sng_dst),
+	.a6stg_long_dst			(a6stg_long_dst),
+	.a6stg_int_dst			(a6stg_int_dst),
+	.add_sign_out			(add_sign_out),
+	.add_exp_out			(add_exp_out[10:0]),
+	.add_frac_out			(add_frac_out[63:0]),
+	.add_cc_out			(add_cc_out[1:0]),
+	.add_fcc_out			(add_fcc_out[1:0]),
+
+	.se_add_exp     (se_add_exp_buf2),
+	.se_add_frac    (se_add_frac_buf2),
+  .si             (scan_manual_1),
+  .so             (scan_manual_2)
+);
+
+
+fpu_mul fpu_mul (
+	.inq_op				(inq_op_mul_buf1[7:0]),
+	.inq_rnd_mode			(inq_rnd_mode_mul_buf1[1:0]),
+	.inq_id				(inq_id_mul_buf1[4:0]),
+	.inq_in1			(inq_in1_mul_buf1[63:0]),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0_mul_buf1),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0_mul_buf1),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0_mul_buf1),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs_mul_buf1),
+	.inq_in2			(inq_in2_mul_buf1[63:0]),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0_mul_buf1),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0_mul_buf1),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0_mul_buf1),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs_mul_buf1),
+	.inq_mul			(inq_mul),
+	.mul_dest_rdy			(mul_dest_rdy),
+	.mul_dest_rdya			(mul_dest_rdy),
+	.fmul_clken_l			(fmul_clken_l),
+	.fmul_clken_l_buf1			(fmul_clken_l_buf1),
+	.arst_l				(arst_l_mul_buf2),
+	.grst_l				(fpu_grst_l_mul_buf1),
+	.rclk				(rclk),
+
+	.mul_pipe_active                (mul_pipe_active),
+	.m1stg_step			(m1stg_step),
+	.m6stg_fmul_in			(m6stg_fmul_in),
+	.m6stg_id_in			(m6stg_id_in[9:0]),
+	.mul_exc_out			(mul_exc_out[4:0]),
+	.m6stg_fmul_dbl_dst		(m6stg_fmul_dbl_dst),
+	.m6stg_fmuls			(m6stg_fmuls),
+	.mul_sign_out			(mul_sign_out),
+	.mul_exp_out			(mul_exp_out[10:0]),
+	.mul_frac_out			(mul_frac_out[51:0]),
+
+	.se_mul           (se_mul_buf4),
+	.se_mul64 (se_mul64_buf2),
+  .si              (scan_manual_2),
+  .so              (scan_manual_3)
+);
+
+
+fpu_div fpu_div (
+	.inq_op				(inq_op_div_buf1[7:0]),
+	.inq_rnd_mode			(inq_rnd_mode_div_buf1[1:0]),
+	.inq_id				(inq_id_div_buf1[4:0]),
+	.inq_in1			(inq_in1_div_buf1[63:0]),
+	.inq_in1_53_0_neq_0		(inq_in1_53_0_neq_0_div_buf1),
+	.inq_in1_50_0_neq_0		(inq_in1_50_0_neq_0_div_buf1),
+	.inq_in1_53_32_neq_0		(inq_in1_53_32_neq_0_div_buf1),
+	.inq_in1_exp_eq_0		(inq_in1_exp_eq_0_div_buf1),
+	.inq_in1_exp_neq_ffs		(inq_in1_exp_neq_ffs_div_buf1),
+	.inq_in2			(inq_in2_div_buf1[63:0]),
+	.inq_in2_53_0_neq_0		(inq_in2_53_0_neq_0_div_buf1),
+	.inq_in2_50_0_neq_0		(inq_in2_50_0_neq_0_div_buf1),
+	.inq_in2_53_32_neq_0		(inq_in2_53_32_neq_0_div_buf1),
+	.inq_in2_exp_eq_0		(inq_in2_exp_eq_0_div_buf1),
+	.inq_in2_exp_neq_ffs		(inq_in2_exp_neq_ffs_div_buf1),
+	.inq_div			(inq_div),
+	.div_dest_rdy			(div_dest_rdy),
+  .fdiv_clken_l			(fdiv_clken_l_div_frac_buf1),
+  .fdiv_clken_l_div_exp_buf1 (fdiv_clken_l_div_exp_buf1),
+	.arst_l				(arst_l_div_buf2),
+	.grst_l				(fpu_grst_l),
+	.rclk				(rclk),
+
+	.div_pipe_active                (div_pipe_active),
+	.d1stg_step			(d1stg_step),
+	.d8stg_fdiv_in			(d8stg_fdiv_in),
+	.div_id_out_in			(div_id_out_in[9:0]),
+	.div_exc_out			(div_exc_out[4:0]),
+	.d8stg_fdivd			(d8stg_fdivd),
+	.d8stg_fdivs			(d8stg_fdivs),
+	.div_sign_out			(div_sign_out),
+	.div_exp_outa			(div_exp_out[10:0]),
+	.div_frac_outa			(div_frac_out[51:0]),
+
+	.se              (se_div_buf5),
+  .si              (scan_manual_3),
+  .so              (scan_manual_4)
+);
+
+
+fpu_out fpu_out (
+	.d8stg_fdiv_in			(d8stg_fdiv_in),
+	.m6stg_fmul_in			(m6stg_fmul_in),
+	.a6stg_fadd_in			(a6stg_fadd_in),
+	.div_id_out_in			(div_id_out_in[9:0]),
+	.m6stg_id_in			(m6stg_id_in[9:0]),
+	.add_id_out_in			(add_id_out_in[9:0]),
+	.div_exc_out			(div_exc_out[4:0]),
+	.d8stg_fdivd			(d8stg_fdivd),
+	.d8stg_fdivs			(d8stg_fdivs),
+	.div_sign_out			(div_sign_out),
+	.div_exp_out			(div_exp_out[10:0]),
+	.div_frac_out			(div_frac_out[51:0]),
+	.mul_exc_out			(mul_exc_out[4:0]),
+	.m6stg_fmul_dbl_dst		(m6stg_fmul_dbl_dst),
+	.m6stg_fmuls			(m6stg_fmuls),
+	.mul_sign_out			(mul_sign_out),
+	.mul_exp_out			(mul_exp_out[10:0]),
+	.mul_frac_out			(mul_frac_out[51:0]),
+	.add_exc_out			(add_exc_out[4:0]),
+	.a6stg_fcmpop			(a6stg_fcmpop),
+	.add_cc_out			(add_cc_out[1:0]),
+	.add_fcc_out			(add_fcc_out[1:0]),
+	.a6stg_dbl_dst			(a6stg_dbl_dst),
+	.a6stg_sng_dst			(a6stg_sng_dst),
+	.a6stg_long_dst			(a6stg_long_dst),
+	.a6stg_int_dst			(a6stg_int_dst),
+	.add_sign_out			(add_sign_out),
+	.add_exp_out			(add_exp_out[10:0]),
+	.add_frac_out			(add_frac_out[63:0]),
+	.arst_l				(arst_l_out_buf3),
+	.grst_l				(fpu_grst_l_add_buf3),
+	.rclk				(rclk),
+
+	.fp_cpx_req_cq			(fp_cpx_req_cq_unbuf[7:0]),
+	.add_dest_rdy			(add_dest_rdy),
+	.mul_dest_rdy			(mul_dest_rdy),
+	.div_dest_rdy			(div_dest_rdy),
+	.fp_cpx_data_ca			(fp_cpx_data_ca_unbuf[144:0]),
+
+	.se               (se_out_buf2),
+  .si           (scan_manual_4),
+  .so           (scan_manual_5)
+);
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU test_stub.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+test_stub_scan test_stub (
+	.ctu_tst_pre_grst_l (ctu_tst_pre_grst_l_buf1),
+	.arst_l (arst_l_add_buf4),
+	.global_shift_enable (global_shift_enable_buf1),
+	.ctu_tst_scan_disable (ctu_tst_scan_disable_buf1),
+	.ctu_tst_scanmode (ctu_tst_scanmode_buf1),
+	.ctu_tst_macrotest (ctu_tst_macrotest_buf1),
+	.ctu_tst_short_chain (ctu_tst_short_chain_buf1),
+	.long_chain_so_0 (scan_manual_6_buf1), // connect to long scan chain
+	.short_chain_so_0 (manual_scan_0), // connect to short scan chain (from fpu_inq_sram)
+	.long_chain_so_1 (1'b0),
+	.short_chain_so_1 (1'b0),
+	.long_chain_so_2 (1'b0),
+	.short_chain_so_2 (1'b0),
+
+	.mux_drive_disable (),
+	.mem_write_disable (rst_tri_en),
+	.sehold (sehold),
+	.se (se),
+	.testmode_l (),
+	.mem_bypass (),
+	.so_0 (so_unbuf),
+	.so_1 (),
+	.so_2 ()
+);
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU cluster_header.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+bw_clk_cl_fpu_cmp cluster_header (
+	.gclk (gclk),
+	.cluster_cken (cluster_cken_buf1),
+	.arst_l (arst_l_cluster_header_buf2),
+	.grst_l (grst_l_buf1),
+	.adbginit_l (1'b1),
+	.gdbginit_l (1'b1),
+	.dbginit_l (),
+	.cluster_grst_l (fpu_grst_l),
+	.rclk (rclk),
+	.se (se_cluster_header_buf2),
+	.si (scan_manual_5),
+	.so (scan_manual_6)
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU repeater_groups.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// 3/14/03: Added repeater groups
+fpu_rptr_groups fpu_rptr_groups (
+	.inq_in1 (inq_in1[63:0]),
+	.inq_in2 (inq_in2[63:0]),
+	.inq_id (inq_id[4:0]),
+	.inq_op (inq_op[7:0]),
+	.inq_rnd_mode (inq_rnd_mode[1:0]),
+	.inq_in1_50_0_neq_0 (inq_in1_50_0_neq_0),
+	.inq_in1_53_0_neq_0 (inq_in1_53_0_neq_0),
+	.inq_in1_53_32_neq_0 (inq_in1_53_32_neq_0),
+	.inq_in1_exp_eq_0 (inq_in1_exp_eq_0),
+	.inq_in1_exp_neq_ffs (inq_in1_exp_neq_ffs),
+	.inq_in2_50_0_neq_0 (inq_in2_50_0_neq_0),
+	.inq_in2_53_0_neq_0 (inq_in2_53_0_neq_0),
+	.inq_in2_53_32_neq_0 (inq_in2_53_32_neq_0),
+	.inq_in2_exp_eq_0 (inq_in2_exp_eq_0),
+	.inq_in2_exp_neq_ffs (inq_in2_exp_neq_ffs),
+
+	.ctu_tst_macrotest (ctu_tst_macrotest),
+	.ctu_tst_pre_grst_l (ctu_tst_pre_grst_l),
+	.ctu_tst_scan_disable (ctu_tst_scan_disable),
+	.ctu_tst_scanmode (ctu_tst_scanmode),
+	.ctu_tst_short_chain (ctu_tst_short_chain),
+	.global_shift_enable (global_shift_enable),
+
+	.grst_l (grst_l),
+	.cluster_cken (cluster_cken),
+
+	.se (se),
+
+	.arst_l (arst_l),
+
+	.fpu_grst_l (fpu_grst_l),
+
+	.fmul_clken_l (fmul_clken_l),
+	.fdiv_clken_l (fdiv_clken_l),
+
+	.scan_manual_6 (scan_manual_6),
+
+	.si (si),
+	.so_unbuf (so_unbuf),
+
+	.pcx_fpio_data_px2 (pcx_fpio_data_px2[123:0]),
+	.pcx_fpio_data_rdy_px2 (pcx_fpio_data_rdy_px2),
+
+	.fp_cpx_data_ca (fp_cpx_data_ca_unbuf[144:0]),
+	.fp_cpx_req_cq (fp_cpx_req_cq_unbuf[7:0]),
+
+	.inq_sram_din_unbuf ({fp_id_in[4:0],
+		fp_rnd_mode_in[1:0],
+		fp_fcc_in[1:0],
+		fp_op_in[7:0],
+		fp_src1_in[68:0],
+		fp_src2_in[68:0], 1'b0}),
+
+	.inq_in1_add_buf1 (inq_in1_add_buf1[63:0]),
+	.inq_in1_mul_buf1 (inq_in1_mul_buf1[63:0]),
+	.inq_in1_div_buf1 (inq_in1_div_buf1[63:0]),
+	.inq_in2_add_buf1 (inq_in2_add_buf1[63:0]),
+	.inq_in2_mul_buf1 (inq_in2_mul_buf1[63:0]),
+	.inq_in2_div_buf1 (inq_in2_div_buf1[63:0]),
+	.inq_id_add_buf1 (inq_id_add_buf1[4:0]),
+	.inq_id_div_buf1 (inq_id_div_buf1[4:0]),
+	.inq_id_mul_buf1 (inq_id_mul_buf1[4:0]),
+	.inq_op_add_buf1 (inq_op_add_buf1[7:0]),
+	.inq_op_mul_buf1 (inq_op_mul_buf1[7:0]),
+	.inq_op_div_buf1 (inq_op_div_buf1[7:0]),
+	.inq_rnd_mode_add_buf1 (inq_rnd_mode_add_buf1[1:0]),
+	.inq_rnd_mode_mul_buf1 (inq_rnd_mode_mul_buf1[1:0]),
+	.inq_rnd_mode_div_buf1 (inq_rnd_mode_div_buf1[1:0]),
+	.inq_in1_50_0_neq_0_add_buf1 (inq_in1_50_0_neq_0_add_buf1),
+	.inq_in1_50_0_neq_0_mul_buf1 (inq_in1_50_0_neq_0_mul_buf1),
+	.inq_in1_50_0_neq_0_div_buf1 (inq_in1_50_0_neq_0_div_buf1),
+	.inq_in1_53_0_neq_0_add_buf1 (inq_in1_53_0_neq_0_add_buf1),
+	.inq_in1_53_0_neq_0_mul_buf1 (inq_in1_53_0_neq_0_mul_buf1),
+	.inq_in1_53_0_neq_0_div_buf1 (inq_in1_53_0_neq_0_div_buf1),
+	.inq_in1_53_32_neq_0_add_buf1 (inq_in1_53_32_neq_0_add_buf1),
+	.inq_in1_53_32_neq_0_mul_buf1 (inq_in1_53_32_neq_0_mul_buf1),
+	.inq_in1_53_32_neq_0_div_buf1 (inq_in1_53_32_neq_0_div_buf1),
+	.inq_in1_exp_eq_0_add_buf1 (inq_in1_exp_eq_0_add_buf1),
+	.inq_in1_exp_eq_0_mul_buf1 (inq_in1_exp_eq_0_mul_buf1),
+	.inq_in1_exp_eq_0_div_buf1 (inq_in1_exp_eq_0_div_buf1),
+	.inq_in1_exp_neq_ffs_add_buf1 (inq_in1_exp_neq_ffs_add_buf1),
+	.inq_in1_exp_neq_ffs_mul_buf1 (inq_in1_exp_neq_ffs_mul_buf1),
+	.inq_in1_exp_neq_ffs_div_buf1 (inq_in1_exp_neq_ffs_div_buf1),
+	.inq_in2_50_0_neq_0_add_buf1 (inq_in2_50_0_neq_0_add_buf1),
+	.inq_in2_50_0_neq_0_mul_buf1 (inq_in2_50_0_neq_0_mul_buf1),
+	.inq_in2_50_0_neq_0_div_buf1 (inq_in2_50_0_neq_0_div_buf1),
+	.inq_in2_53_0_neq_0_add_buf1 (inq_in2_53_0_neq_0_add_buf1),
+	.inq_in2_53_0_neq_0_mul_buf1 (inq_in2_53_0_neq_0_mul_buf1),
+	.inq_in2_53_0_neq_0_div_buf1 (inq_in2_53_0_neq_0_div_buf1),
+	.inq_in2_53_32_neq_0_add_buf1 (inq_in2_53_32_neq_0_add_buf1),
+	.inq_in2_53_32_neq_0_mul_buf1 (inq_in2_53_32_neq_0_mul_buf1),
+	.inq_in2_53_32_neq_0_div_buf1 (inq_in2_53_32_neq_0_div_buf1),
+	.inq_in2_exp_eq_0_add_buf1 (inq_in2_exp_eq_0_add_buf1),
+	.inq_in2_exp_eq_0_mul_buf1 (inq_in2_exp_eq_0_mul_buf1),
+	.inq_in2_exp_eq_0_div_buf1 (inq_in2_exp_eq_0_div_buf1),
+	.inq_in2_exp_neq_ffs_add_buf1 (inq_in2_exp_neq_ffs_add_buf1),
+	.inq_in2_exp_neq_ffs_mul_buf1 (inq_in2_exp_neq_ffs_mul_buf1),
+	.inq_in2_exp_neq_ffs_div_buf1 (inq_in2_exp_neq_ffs_div_buf1),
+
+	.ctu_tst_macrotest_buf1 (ctu_tst_macrotest_buf1),
+	.ctu_tst_pre_grst_l_buf1 (ctu_tst_pre_grst_l_buf1),
+	.ctu_tst_scan_disable_buf1 (ctu_tst_scan_disable_buf1),
+	.ctu_tst_scanmode_buf1 (ctu_tst_scanmode_buf1),
+	.ctu_tst_short_chain_buf1 (ctu_tst_short_chain_buf1),
+	.global_shift_enable_buf1 (global_shift_enable_buf1),
+
+	.grst_l_buf1 (grst_l_buf1),
+	.cluster_cken_buf1 (cluster_cken_buf1),
+
+	.se_add_exp_buf2 (se_add_exp_buf2),
+	.se_add_frac_buf2 (se_add_frac_buf2),
+	.se_out_buf2 (se_out_buf2),
+	.se_mul64_buf2 (se_mul64_buf2),
+	.se_cluster_header_buf2 (se_cluster_header_buf2),
+	.se_in_buf3 (se_in_buf3),
+	.se_mul_buf4 (se_mul_buf4),
+	.se_div_buf5 (se_div_buf5),
+
+	.arst_l_div_buf2 (arst_l_div_buf2),
+	.arst_l_mul_buf2 (arst_l_mul_buf2),
+	.arst_l_cluster_header_buf2 (arst_l_cluster_header_buf2),
+	.arst_l_in_buf3 (arst_l_in_buf3),
+	.arst_l_out_buf3 (arst_l_out_buf3),
+	.arst_l_add_buf4 (arst_l_add_buf4),
+
+	.fpu_grst_l_mul_buf1 (fpu_grst_l_mul_buf1),
+	.fpu_grst_l_in_buf2 (fpu_grst_l_in_buf2),
+	.fpu_grst_l_add_buf3 (fpu_grst_l_add_buf3),
+
+	.fmul_clken_l_buf1 (fmul_clken_l_buf1),
+	.fdiv_clken_l_div_exp_buf1 (fdiv_clken_l_div_exp_buf1),
+	.fdiv_clken_l_div_frac_buf1 (fdiv_clken_l_div_frac_buf1),
+
+	.scan_manual_6_buf1 (scan_manual_6_buf1),
+
+	.si_buf1 (si_buf1),
+	.so (so),
+
+	.pcx_fpio_data_px2_buf1 (pcx_fpio_data_px2_buf1[123:0]),
+	.pcx_fpio_data_rdy_px2_buf1 (pcx_fpio_data_rdy_px2_buf1),
+
+	.fp_cpx_data_ca_buf1 (fp_cpx_data_ca[144:0]),
+	.fp_cpx_req_cq_buf1 (fp_cpx_req_cq[7:0]),
+
+	.inq_sram_din_buf1 (inq_sram_din_buf1[155:0])
+
+);
+
+
+endmodule
+
+// Local Variables:
+// verilog-library-directories:("." "../../../srams/rtl")
+// End:
+
Index: /trunk/T1-FPU/fpu_div_exp_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_div_exp_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_div_exp_dp.v	(revision 6)
@@ -0,0 +1,281 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_div_exp_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide pipeline exponent datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_div_exp_dp (
+	inq_in1,
+	inq_in2,
+	d1stg_step,
+	d234stg_fdiv,
+	div_expadd1_in1_dbl,
+	div_expadd1_in1_sng,
+	div_expadd1_in2_exp_in2_dbl,
+	div_expadd1_in2_exp_in2_sng,
+	d3stg_fdiv,
+	d4stg_fdiv,
+	div_shl_cnt,
+	div_exp1_expadd1,
+	div_exp1_0835,
+	div_exp1_0118,
+	div_exp1_zero,
+	div_exp1_load,
+	div_expadd2_in1_exp_out,
+	d5stg_fdiva,
+	d5stg_fdivd,
+	d5stg_fdivs,
+	d6stg_fdiv,
+	d7stg_fdiv,
+	div_expadd2_no_decr_inv,
+	div_expadd2_cin,
+	div_exp_out_expadd2,
+	div_exp_out_expadd22_inv,
+	div_exp_out_of,
+	d7stg_to_0_inv,
+	d7stg_fdivd,
+	div_exp_out_exp_out,
+	d7stg_rndup_inv,
+	div_frac_add_52_inv,
+	div_exp_out_load,
+	fdiv_clken_l,
+	rclk,
+	
+	div_exp1,
+	div_expadd2_12,
+	div_exp_out,
+	div_exp_outa,
+
+	se,
+	si,
+	so
+);
+
+
+input [62:52]	inq_in1;		// request operand 1 to op pipes
+input [62:52]	inq_in2;		// request operand 2 to op pipes
+input		d1stg_step;		// divide pipe load
+input		d234stg_fdiv;		// select line to div_expadd1
+input		div_expadd1_in1_dbl;	// select line to div_expadd1
+input		div_expadd1_in1_sng;	// select line to div_expadd1
+input		div_expadd1_in2_exp_in2_dbl; // select line to div_expadd1
+input		div_expadd1_in2_exp_in2_sng; //select line to div_expadd1
+input		d3stg_fdiv;		// divide operation- divide stage 3
+input		d4stg_fdiv;		// divide operation- divide stage 4
+input [5:0]	div_shl_cnt;		// divide left shift amount
+input		div_exp1_expadd1;	// select line to div_exp1
+input		div_exp1_0835;		// select line to div_exp1
+input		div_exp1_0118;		// select line to div_exp1
+input		div_exp1_zero;		// select line to div_exp1
+input		div_exp1_load;		// load enable to div_exp1
+input		div_expadd2_in1_exp_out; // select line to div_expadd2
+input		d5stg_fdiva;		// divide operation- divide stage 5
+input		d5stg_fdivd;		// divide double- divide stage 5
+input		d5stg_fdivs;		// divide single- divide stage 5
+input		d6stg_fdiv;		// divide operation- divide stage 6
+input		d7stg_fdiv;		// divide operation- divide stage 7
+input		div_expadd2_no_decr_inv; // no exponent decrement
+input		div_expadd2_cin;	// carry in to 2nd exponent adder
+input		div_exp_out_expadd2;	// select line to div_exp_out
+input		div_exp_out_expadd22_inv; // select line to div_exp_out
+input		div_exp_out_of;		// overflow to exponent output
+input		d7stg_to_0_inv;		// result to infinity on overflow
+input		d7stg_fdivd;		// divide double- divide stage 7
+input		div_exp_out_exp_out;	// select line to div_exp_out
+input		d7stg_rndup_inv;	// no rounding increment
+input		div_frac_add_52_inv;	// div_frac_add bit[52] inverted
+input		div_exp_out_load;	// load enable to div_exp_out
+input		fdiv_clken_l;           // div pipe clk enable - asserted low
+input		rclk;		// global clock
+
+output [12:0]	div_exp1;		// divide exponent- intermediate value
+output        	div_expadd2_12;		// divide exponent- 2nd adder output
+output [12:0]	div_exp_out;		// divide exponent output
+output [10:0]	div_exp_outa;		// divide exponent output- buffered copy
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [10:0]	div_exp_in1;
+wire [10:0]	div_exp_in2;
+wire [12:0]	div_expadd1_in1;
+wire [12:0]	div_expadd1_in2;
+wire [12:0]	div_expadd1;
+wire [12:0]	div_exp1_in;
+wire [12:0]	div_exp1;
+wire [12:0]	div_expadd2_in1;
+wire [12:0]	div_expadd2_in2;
+wire [12:0]     div_expadd2;
+wire         	div_expadd2_12;
+wire [12:0]	div_exp_out_in;
+wire [12:0]	div_exp_out;
+wire [10:0]	div_exp_outa;
+
+
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_div_exp_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fdiv_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide exponent inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(11) i_div_exp_in1 (
+        .din    (inq_in1[62:52]),
+        .en     (d1stg_step),
+        .clk    (clk),
+ 
+        .q      (div_exp_in1[10:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(11) i_div_exp_in2 (
+        .din    (inq_in2[62:52]),
+        .en     (d1stg_step),
+        .clk    (clk),
+ 
+        .q      (div_exp_in2[10:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide exponent adder in the front end of the divide pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_expadd1_in1[12:0]= ({13{d234stg_fdiv}}
+			    & div_exp1[12:0])
+		| ({13{div_expadd1_in1_dbl}}
+			    & {2'b0, div_exp_in1[10:0]})
+		| ({13{div_expadd1_in1_sng}}
+			    & {5'b0, div_exp_in1[10:3]});
+
+assign div_expadd1_in2[12:0]= ({13{div_expadd1_in1_dbl}}
+			    & 13'h0436)
+		| ({13{div_expadd1_in1_sng}}
+			    & 13'h0099)
+		| ({13{div_expadd1_in2_exp_in2_dbl}}
+			    & (~{2'b0, div_exp_in2[10:0]}))
+		| ({13{div_expadd1_in2_exp_in2_sng}}
+			    & (~{5'b0, div_exp_in2[10:3]}))
+		| ({13{d3stg_fdiv}}
+			    & (~{7'b0, div_shl_cnt[5:0]}))
+		| ({13{d4stg_fdiv}}
+			    & {7'b0, div_shl_cnt[5:0]});
+
+assign div_expadd1[12:0]= (div_expadd1_in1[12:0]
+			+ div_expadd1_in2[12:0]);
+
+assign div_exp1_in[12:0]= ({13{div_exp1_expadd1}}
+			    & div_expadd1[12:0])
+		| ({13{div_exp1_0835}}
+			    & 13'h0835)
+		| ({13{div_exp1_0118}}
+			    & 13'h0118)
+		| ({13{div_exp1_zero}}
+			    & 13'h0000);
+
+dffe_s #(13) i_div_exp1 (
+	.din	(div_exp1_in[12:0]),
+	.en	(div_exp1_load),
+	.clk    (clk),
+
+        .q      (div_exp1[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide exponent adder in the back end of the divide pipe.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_expadd2_in1[12:0]= ({13{div_expadd2_in1_exp_out}}
+			    & div_exp_out[12:0])
+		| ({13{d5stg_fdiva}}
+			    & div_exp1[12:0]);
+
+assign div_expadd2_in2[12:0]= ({13{d5stg_fdiva}}
+			    & {7'h7f, d5stg_fdivs, 1'b0, d5stg_fdivd,
+				d5stg_fdivs, 1'b1, d5stg_fdivs})
+		| ({13{d6stg_fdiv}}
+			    & {13{div_expadd2_no_decr_inv}})
+		| ({13{d7stg_fdiv}}
+			    & 13'h0000);
+
+assign div_expadd2[12:0]= (div_expadd2_in1[12:0]
+			+ div_expadd2_in2[12:0]
+			+ {12'b0, div_expadd2_cin});
+assign div_expadd2_12 = div_expadd2[12];
+
+assign div_exp_out_in[12:0]= ({13{(div_exp_out_expadd2
+				&& (!(div_frac_add_52_inv
+					&& div_exp_out_expadd22_inv)))}}
+			    & div_expadd2[12:0])
+		| ({13{div_exp_out_of}}
+			    & {2'b00, {3{d7stg_fdivd}}, 7'h7f, d7stg_to_0_inv})
+		| ({13{(div_exp_out_exp_out
+			&& (div_frac_add_52_inv || d7stg_rndup_inv))}}
+			    & div_exp_out[12:0]);
+
+dffe_s #(13) i_div_exp_out (
+	.din	(div_exp_out_in[12:0]),
+	.en	(div_exp_out_load),
+	.clk    (clk),
+
+        .q      (div_exp_out[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_exp_outa[10:0]= div_exp_out[10:0];
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_rptr_groups.v
===================================================================
--- /trunk/T1-FPU/fpu_rptr_groups.v	(revision 6)
+++ /trunk/T1-FPU/fpu_rptr_groups.v	(revision 6)
@@ -0,0 +1,1087 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_rptr_groups.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+
+module fpu_rptr_groups (
+	inq_in1,
+	inq_in2,
+	inq_id,
+	inq_op,
+	inq_rnd_mode,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1_exp_eq_0,
+	inq_in1_exp_neq_ffs,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2_exp_eq_0,
+	inq_in2_exp_neq_ffs,
+	ctu_tst_macrotest,
+	ctu_tst_pre_grst_l,
+	ctu_tst_scan_disable,
+	ctu_tst_scanmode,
+	ctu_tst_short_chain,
+	global_shift_enable,
+	grst_l,
+	cluster_cken,
+	se,
+	arst_l,
+	fpu_grst_l,
+	fmul_clken_l,
+	fdiv_clken_l,
+	scan_manual_6,
+	si,
+	so_unbuf,
+	pcx_fpio_data_px2,
+	pcx_fpio_data_rdy_px2,
+	fp_cpx_req_cq,
+	fp_cpx_data_ca,
+	inq_sram_din_unbuf,
+
+	inq_in1_add_buf1,
+	inq_in1_mul_buf1,
+	inq_in1_div_buf1,
+	inq_in2_add_buf1,
+	inq_in2_mul_buf1,
+	inq_in2_div_buf1,
+	inq_id_add_buf1,
+	inq_id_mul_buf1,
+	inq_id_div_buf1,
+	inq_op_add_buf1,
+	inq_op_div_buf1,
+	inq_op_mul_buf1,
+	inq_rnd_mode_add_buf1,
+	inq_rnd_mode_div_buf1,
+	inq_rnd_mode_mul_buf1,
+	inq_in1_50_0_neq_0_add_buf1,
+	inq_in1_50_0_neq_0_mul_buf1,
+	inq_in1_50_0_neq_0_div_buf1,
+	inq_in1_53_0_neq_0_add_buf1,
+	inq_in1_53_0_neq_0_mul_buf1,
+	inq_in1_53_0_neq_0_div_buf1,
+	inq_in1_53_32_neq_0_add_buf1,
+	inq_in1_53_32_neq_0_mul_buf1,
+	inq_in1_53_32_neq_0_div_buf1,
+	inq_in1_exp_eq_0_add_buf1,
+	inq_in1_exp_eq_0_mul_buf1,
+	inq_in1_exp_eq_0_div_buf1,
+	inq_in1_exp_neq_ffs_add_buf1,
+	inq_in1_exp_neq_ffs_mul_buf1,
+	inq_in1_exp_neq_ffs_div_buf1,
+	inq_in2_50_0_neq_0_add_buf1,
+	inq_in2_50_0_neq_0_mul_buf1,
+	inq_in2_50_0_neq_0_div_buf1,
+	inq_in2_53_0_neq_0_add_buf1,
+	inq_in2_53_0_neq_0_mul_buf1,
+	inq_in2_53_0_neq_0_div_buf1,
+	inq_in2_53_32_neq_0_add_buf1,
+	inq_in2_53_32_neq_0_mul_buf1,
+	inq_in2_53_32_neq_0_div_buf1,
+	inq_in2_exp_eq_0_add_buf1,
+	inq_in2_exp_eq_0_mul_buf1,
+	inq_in2_exp_eq_0_div_buf1,
+	inq_in2_exp_neq_ffs_add_buf1,
+	inq_in2_exp_neq_ffs_mul_buf1,
+	inq_in2_exp_neq_ffs_div_buf1,
+	ctu_tst_macrotest_buf1,
+	ctu_tst_pre_grst_l_buf1,
+	ctu_tst_scan_disable_buf1,
+	ctu_tst_scanmode_buf1,
+	ctu_tst_short_chain_buf1,
+	global_shift_enable_buf1,
+	grst_l_buf1,
+	cluster_cken_buf1,
+	se_add_exp_buf2,
+	se_add_frac_buf2,
+	se_out_buf2,
+	se_mul64_buf2,
+	se_cluster_header_buf2,
+	se_in_buf3,
+	se_mul_buf4,
+	se_div_buf5,
+	arst_l_div_buf2,
+	arst_l_mul_buf2,
+	arst_l_cluster_header_buf2,
+	arst_l_in_buf3,
+	arst_l_out_buf3,
+	arst_l_add_buf4,
+	fpu_grst_l_mul_buf1,
+	fpu_grst_l_in_buf2,
+	fpu_grst_l_add_buf3,
+	fmul_clken_l_buf1,
+	fdiv_clken_l_div_exp_buf1,
+	fdiv_clken_l_div_frac_buf1,
+	scan_manual_6_buf1,
+	si_buf1,
+	so,
+	pcx_fpio_data_px2_buf1,
+	pcx_fpio_data_rdy_px2_buf1,
+	fp_cpx_req_cq_buf1,
+	fp_cpx_data_ca_buf1,
+	inq_sram_din_buf1
+);
+
+	input [63:0] inq_in1;
+	input [63:0] inq_in2;
+	input [4:0] inq_id;
+	input [7:0] inq_op;
+	input [1:0] inq_rnd_mode;
+	input inq_in1_50_0_neq_0;
+	input inq_in1_53_0_neq_0;
+	input inq_in1_53_32_neq_0;
+	input inq_in1_exp_eq_0;
+	input inq_in1_exp_neq_ffs;
+	input inq_in2_50_0_neq_0;
+	input inq_in2_53_0_neq_0;
+	input inq_in2_53_32_neq_0;
+	input inq_in2_exp_eq_0;
+	input inq_in2_exp_neq_ffs;
+
+	input ctu_tst_macrotest;
+	input ctu_tst_pre_grst_l;
+	input ctu_tst_scan_disable;
+	input ctu_tst_scanmode;
+	input ctu_tst_short_chain;
+	input global_shift_enable;
+
+	input grst_l;
+	input cluster_cken;
+
+	input se;
+
+	input arst_l;
+
+	input fpu_grst_l;
+
+	input fmul_clken_l;
+	input fdiv_clken_l;
+
+	input scan_manual_6;
+
+	input si;
+	input so_unbuf;
+
+	input [123:0] pcx_fpio_data_px2;
+	input pcx_fpio_data_rdy_px2;
+
+	input [7:0] fp_cpx_req_cq;
+	input [144:0] fp_cpx_data_ca;
+
+	input [155:0] inq_sram_din_unbuf;
+
+	output [63:0] inq_in1_add_buf1;
+	output [63:0] inq_in1_mul_buf1;
+	output [63:0] inq_in1_div_buf1;
+	output [63:0] inq_in2_add_buf1;
+	output [63:0] inq_in2_mul_buf1;
+	output [63:0] inq_in2_div_buf1;
+	output [4:0] inq_id_add_buf1;
+	output [4:0] inq_id_mul_buf1;
+	output [4:0] inq_id_div_buf1;
+	output [7:0] inq_op_add_buf1;
+	output [7:0] inq_op_mul_buf1;
+	output [7:0] inq_op_div_buf1;
+	output [1:0] inq_rnd_mode_add_buf1;
+	output [1:0] inq_rnd_mode_mul_buf1;
+	output [1:0] inq_rnd_mode_div_buf1;
+	output inq_in1_50_0_neq_0_add_buf1;
+	output inq_in1_50_0_neq_0_mul_buf1;
+	output inq_in1_50_0_neq_0_div_buf1;
+	output inq_in1_53_0_neq_0_add_buf1;
+	output inq_in1_53_0_neq_0_mul_buf1;
+	output inq_in1_53_0_neq_0_div_buf1;
+	output inq_in1_53_32_neq_0_add_buf1;
+	output inq_in1_53_32_neq_0_mul_buf1;
+	output inq_in1_53_32_neq_0_div_buf1;
+	output inq_in1_exp_eq_0_add_buf1;
+	output inq_in1_exp_eq_0_mul_buf1;
+	output inq_in1_exp_eq_0_div_buf1;
+	output inq_in1_exp_neq_ffs_add_buf1;
+	output inq_in1_exp_neq_ffs_mul_buf1;
+	output inq_in1_exp_neq_ffs_div_buf1;
+	output inq_in2_50_0_neq_0_add_buf1;
+	output inq_in2_50_0_neq_0_mul_buf1;
+	output inq_in2_50_0_neq_0_div_buf1;
+	output inq_in2_53_0_neq_0_add_buf1;
+	output inq_in2_53_0_neq_0_mul_buf1;
+	output inq_in2_53_0_neq_0_div_buf1;
+	output inq_in2_53_32_neq_0_add_buf1;
+	output inq_in2_53_32_neq_0_mul_buf1;
+	output inq_in2_53_32_neq_0_div_buf1;
+	output inq_in2_exp_eq_0_add_buf1;
+	output inq_in2_exp_eq_0_mul_buf1;
+	output inq_in2_exp_eq_0_div_buf1;
+	output inq_in2_exp_neq_ffs_add_buf1;
+	output inq_in2_exp_neq_ffs_mul_buf1;
+	output inq_in2_exp_neq_ffs_div_buf1;
+
+	output ctu_tst_macrotest_buf1;
+	output ctu_tst_pre_grst_l_buf1;
+	output ctu_tst_scan_disable_buf1;
+	output ctu_tst_scanmode_buf1;
+	output ctu_tst_short_chain_buf1;
+	output global_shift_enable_buf1;
+
+	output grst_l_buf1;
+	output cluster_cken_buf1;
+
+	output se_add_exp_buf2;
+	output se_add_frac_buf2;
+	output se_out_buf2;
+	output se_mul64_buf2;
+	output se_cluster_header_buf2;
+	output se_in_buf3;
+	output se_mul_buf4;
+	output se_div_buf5;
+
+	output arst_l_div_buf2;
+	output arst_l_mul_buf2;
+	output arst_l_cluster_header_buf2;
+	output arst_l_in_buf3;
+	output arst_l_out_buf3;
+	output arst_l_add_buf4;
+
+	output fpu_grst_l_mul_buf1;
+	output fpu_grst_l_in_buf2;
+	output fpu_grst_l_add_buf3;
+
+	output fmul_clken_l_buf1;
+	output fdiv_clken_l_div_exp_buf1;
+	output fdiv_clken_l_div_frac_buf1;
+
+	output scan_manual_6_buf1;
+
+	output si_buf1;
+	output so;
+
+	output [123:0] pcx_fpio_data_px2_buf1;
+	output pcx_fpio_data_rdy_px2_buf1;
+
+	output [7:0] fp_cpx_req_cq_buf1;
+	output [144:0] fp_cpx_data_ca_buf1;
+
+	output [155:0] inq_sram_din_buf1;
+
+	wire [3:0] inq_id_add_buf1_unused;
+	wire [2:0] inq_id_mul_buf1_unused;
+	wire [4:0] inq_id_div_buf1_unused;
+
+	wire [1:0] ctu_tst_buf1_lo_unused;
+
+	wire [1:0] cluster_cken_buf1_unused;
+
+	wire [1:0] se_mul64_buf2_unused;
+
+	wire [2:0] arst_l_buf1_unused;
+
+	wire [1:0] fdiv_clken_l_buf1_unused;
+
+	wire [2:0] so_cluster_header_buf1_unused;
+	wire [2:0] si_buf1_unused;
+
+	wire [2:0] pcx_fpio_data_px2_buf1_unused;
+	wire [5:0] fp_cpx_buf1_9_unused;
+
+	// inq_in1
+	fpu_bufrpt_grp32 i_inq_in1_add_buf1_hi (
+		.in (inq_in1[63:32]),
+		.out (inq_in1_add_buf1[63:32])
+	);
+	fpu_bufrpt_grp32 i_inq_in1_add_buf1_lo (
+		.in (inq_in1[31:0]),
+		.out (inq_in1_add_buf1[31:0])
+	);
+	fpu_bufrpt_grp32 i_inq_in1_mul_buf1_hi (
+		.in (inq_in1[63:32]),
+		.out (inq_in1_mul_buf1[63:32])
+	);
+	fpu_bufrpt_grp32 i_inq_in1_mul_buf1_lo (
+		.in (inq_in1[31:0]),
+		.out (inq_in1_mul_buf1[31:0])
+	);
+	fpu_bufrpt_grp64 i_inq_in1_div_buf1 (
+		.in (inq_in1[63:0]),
+		.out (inq_in1_div_buf1[63:0])
+	);
+
+	// inq_in2
+	fpu_bufrpt_grp32 i_inq_in2_add_buf1_hi (
+		.in (inq_in2[63:32]),
+		.out (inq_in2_add_buf1[63:32])
+	);
+	fpu_bufrpt_grp32 i_inq_in2_add_buf1_lo (
+		.in (inq_in2[31:0]),
+		.out (inq_in2_add_buf1[31:0])
+	);
+	fpu_bufrpt_grp32 i_inq_in2_mul_buf1_hi (
+		.in (inq_in2[63:32]),
+		.out (inq_in2_mul_buf1[63:32])
+	);
+	fpu_bufrpt_grp32 i_inq_in2_mul_buf1_lo (
+		.in (inq_in2[31:0]),
+		.out (inq_in2_mul_buf1[31:0])
+	);
+	fpu_bufrpt_grp64 i_inq_in2_div_buf1 (
+		.in (inq_in2[63:0]),
+		.out (inq_in2_div_buf1[63:0])
+	);
+
+	// group inq_*eq_*
+	fpu_bufrpt_grp32 i_inq_id_add_buf1 (
+		.in ({4'h0,
+			se_out_buf2,
+			arst_l_out_buf3,
+			fpu_grst_l_in_buf2,
+			inq_id[4:0],
+			inq_op[7:0],
+			inq_rnd_mode[1:0],
+			inq_in1_50_0_neq_0,
+			inq_in1_53_0_neq_0,
+			inq_in1_53_32_neq_0,
+			inq_in1_exp_eq_0,
+			inq_in1_exp_neq_ffs,
+			inq_in2_50_0_neq_0,
+			inq_in2_53_0_neq_0,
+			inq_in2_53_32_neq_0,
+			inq_in2_exp_eq_0,
+			inq_in2_exp_neq_ffs}),
+		.out ({inq_id_add_buf1_unused[3:0],
+			se_in_buf3,
+			arst_l_add_buf4,
+			fpu_grst_l_add_buf3,
+			inq_id_add_buf1[4:0],
+			inq_op_add_buf1[7:0],
+			inq_rnd_mode_add_buf1[1:0],
+			inq_in1_50_0_neq_0_add_buf1,
+			inq_in1_53_0_neq_0_add_buf1,
+			inq_in1_53_32_neq_0_add_buf1,
+			inq_in1_exp_eq_0_add_buf1,
+			inq_in1_exp_neq_ffs_add_buf1,
+			inq_in2_50_0_neq_0_add_buf1,
+			inq_in2_53_0_neq_0_add_buf1,
+			inq_in2_53_32_neq_0_add_buf1,
+			inq_in2_exp_eq_0_add_buf1,
+			inq_in2_exp_neq_ffs_add_buf1})
+	);
+
+	fpu_bufrpt_grp32 i_inq_id_mul_buf1 (
+		.in ({3'h0,
+			se_in_buf3,
+			arst_l_mul_buf2,
+			fpu_grst_l_mul_buf1,
+			fmul_clken_l,
+			inq_id[4:0],
+			inq_op[7:0],
+			inq_rnd_mode[1:0],
+			inq_in1_50_0_neq_0,
+			inq_in1_53_0_neq_0,
+			inq_in1_53_32_neq_0,
+			inq_in1_exp_eq_0,
+			inq_in1_exp_neq_ffs,
+			inq_in2_50_0_neq_0,
+			inq_in2_53_0_neq_0,
+			inq_in2_53_32_neq_0,
+			inq_in2_exp_eq_0,
+			inq_in2_exp_neq_ffs}),
+		.out ({inq_id_mul_buf1_unused[2:0],
+			se_mul_buf4,
+			arst_l_out_buf3,
+			fpu_grst_l_in_buf2,
+			fmul_clken_l_buf1,
+			inq_id_mul_buf1[4:0],
+			inq_op_mul_buf1[7:0],
+			inq_rnd_mode_mul_buf1[1:0],
+			inq_in1_50_0_neq_0_mul_buf1,
+			inq_in1_53_0_neq_0_mul_buf1,
+			inq_in1_53_32_neq_0_mul_buf1,
+			inq_in1_exp_eq_0_mul_buf1,
+			inq_in1_exp_neq_ffs_mul_buf1,
+			inq_in2_50_0_neq_0_mul_buf1,
+			inq_in2_53_0_neq_0_mul_buf1,
+			inq_in2_53_32_neq_0_mul_buf1,
+			inq_in2_exp_eq_0_mul_buf1,
+			inq_in2_exp_neq_ffs_mul_buf1})
+	);
+
+	fpu_bufrpt_grp32 i_inq_id_div_buf1 (
+		.in ({5'h00,
+			se_mul_buf4,
+			arst_l_mul_buf2,
+			inq_id[4:0],
+			inq_op[7:0],
+			inq_rnd_mode[1:0],
+			inq_in1_50_0_neq_0,
+			inq_in1_53_0_neq_0,
+			inq_in1_53_32_neq_0,
+			inq_in1_exp_eq_0,
+			inq_in1_exp_neq_ffs,
+			inq_in2_50_0_neq_0,
+			inq_in2_53_0_neq_0,
+			inq_in2_53_32_neq_0,
+			inq_in2_exp_eq_0,
+			inq_in2_exp_neq_ffs}),
+		.out ({inq_id_div_buf1_unused[4:0],
+			se_div_buf5,
+			arst_l_in_buf3,
+			inq_id_div_buf1[4:0],
+			inq_op_div_buf1[7:0],
+			inq_rnd_mode_div_buf1[1:0],
+			inq_in1_50_0_neq_0_div_buf1,
+			inq_in1_53_0_neq_0_div_buf1,
+			inq_in1_53_32_neq_0_div_buf1,
+			inq_in1_exp_eq_0_div_buf1,
+			inq_in1_exp_neq_ffs_div_buf1,
+			inq_in2_50_0_neq_0_div_buf1,
+			inq_in2_53_0_neq_0_div_buf1,
+			inq_in2_53_32_neq_0_div_buf1,
+			inq_in2_exp_eq_0_div_buf1,
+			inq_in2_exp_neq_ffs_div_buf1})
+	);
+
+	// buffer ctu_tst signals
+	fpu_bufrpt_grp4 i_ctu_tst_buf1_hi (
+		.in ({ctu_tst_short_chain,
+			ctu_tst_macrotest,
+			ctu_tst_scan_disable,
+			ctu_tst_pre_grst_l}),
+		.out ({ctu_tst_short_chain_buf1,
+			ctu_tst_macrotest_buf1,
+			ctu_tst_scan_disable_buf1,
+			ctu_tst_pre_grst_l_buf1})
+	);
+
+	fpu_bufrpt_grp4 i_ctu_tst_buf1_lo (
+		.in ({ctu_tst_scanmode,
+			global_shift_enable,
+			2'b00}),
+		.out ({ctu_tst_scanmode_buf1,
+			global_shift_enable_buf1,
+			ctu_tst_buf1_lo_unused[1:0]})
+	);
+
+	// buffer cluster_header inputs
+	fpu_bufrpt_grp4 i_cluster_cken_buf1 (
+		.in ({cluster_cken,
+			grst_l,
+			2'b00}),
+		.out ({cluster_cken_buf1,
+			grst_l_buf1,
+			cluster_cken_buf1_unused[1:0]})
+	);
+
+	// buffers for se (scan enable driven from test_stub_scan)
+	fpu_bufrpt_grp4 i_se_buf1 (
+		.in ({se,
+			se,
+			so_unbuf,
+			1'b0}),
+		.out ({se_add_buf1,
+			se_mul64_buf1,
+			so_buf1,
+			se_buf1_unused})
+	);
+
+	fpu_bufrpt_grp4 i_se_add_buf2 (
+		.in ({se_add_buf1,
+			se_add_buf1,
+			se_add_buf1,
+			1'b0}),
+		.out ({se_add_exp_buf2,
+			se_add_frac_buf2,
+			se_out_buf2,
+			se_add_buf2_unused})
+	);
+
+	fpu_bufrpt_grp4 i_se_mul64_buf2 (
+		.in ({se_mul64_buf1,
+			se_mul64_buf1,
+			2'b00}),
+		.out ({se_mul64_buf2,
+			se_cluster_header_buf2,
+			se_mul64_buf2_unused[1:0]})
+	);
+
+	// buffers for arst_l, also use to buffer fpu_grst_l
+	fpu_bufrpt_grp4 i_arst_l_buf1 (
+		.in ({arst_l,
+			3'b000}),
+		.out ({arst_l_buf1,
+			arst_l_buf1_unused[2:0]})
+	);
+
+	fpu_bufrpt_grp4 i_arst_l_buf2 (
+		.in ({arst_l_buf1,
+			arst_l_buf1,
+			arst_l_buf1,
+			fpu_grst_l}),
+		.out ({arst_l_mul_buf2,
+			arst_l_cluster_header_buf2,
+			arst_l_div_buf2,
+			fpu_grst_l_mul_buf1})
+	);
+
+	// buffers for fdiv_clken_l
+	fpu_bufrpt_grp4 i_fdiv_clken_l_buf1 (
+		.in ({fdiv_clken_l,
+			fdiv_clken_l,
+			2'b00}),
+		.out ({fdiv_clken_l_div_exp_buf1,
+			fdiv_clken_l_div_frac_buf1,
+			fdiv_clken_l_buf1_unused[1:0]})
+	);
+
+	// buffer scan_out from cluster_header (internal driver 2X) to test_stub (long_chain_so_0)
+	fpu_bufrpt_grp4 i_so_cluster_header_buf1 (
+		.in ({scan_manual_6,
+			3'b000}),
+		.out ({scan_manual_6_buf1,
+			so_cluster_header_buf1_unused[2:0]})
+	);
+
+	// buffer si at FPU cluster right edge
+	fpu_bufrpt_grp4 i_si_buf1 (
+		.in ({si,
+			3'b000}),
+		.out ({si_buf1,
+			si_buf1_unused[2:0]})
+	);
+
+	// pcx_fpio* signals buffered for mintiming
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_0 (
+		.in ({pcx_fpio_data_px2[108],
+			pcx_fpio_data_px2[109],
+			pcx_fpio_data_px2[110],
+			pcx_fpio_data_px2[111],
+			pcx_fpio_data_px2[112],
+			pcx_fpio_data_px2[113],
+			pcx_fpio_data_px2[114],
+			pcx_fpio_data_px2[115],
+			pcx_fpio_data_px2[116],
+			pcx_fpio_data_px2[117],
+			pcx_fpio_data_px2[118],
+			pcx_fpio_data_px2[119],
+			pcx_fpio_data_px2[120],
+			pcx_fpio_data_px2[121],
+			pcx_fpio_data_px2[122],
+			pcx_fpio_data_px2[123]}),
+		.out ({pcx_fpio_data_px2_buf1[108],
+			pcx_fpio_data_px2_buf1[109],
+			pcx_fpio_data_px2_buf1[110],
+			pcx_fpio_data_px2_buf1[111],
+			pcx_fpio_data_px2_buf1[112],
+			pcx_fpio_data_px2_buf1[113],
+			pcx_fpio_data_px2_buf1[114],
+			pcx_fpio_data_px2_buf1[115],
+			pcx_fpio_data_px2_buf1[116],
+			pcx_fpio_data_px2_buf1[117],
+			pcx_fpio_data_px2_buf1[118],
+			pcx_fpio_data_px2_buf1[119],
+			pcx_fpio_data_px2_buf1[120],
+			pcx_fpio_data_px2_buf1[121],
+			pcx_fpio_data_px2_buf1[122],
+			pcx_fpio_data_px2_buf1[123]})
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_1 (
+		.in ({pcx_fpio_data_px2[92],
+			pcx_fpio_data_px2[93],
+			pcx_fpio_data_px2[94],
+			pcx_fpio_data_px2[95],
+			pcx_fpio_data_px2[96],
+			pcx_fpio_data_px2[97],
+			pcx_fpio_data_px2[98],
+			pcx_fpio_data_px2[99],
+			pcx_fpio_data_px2[100],
+			pcx_fpio_data_px2[101],
+			pcx_fpio_data_px2[102],
+			pcx_fpio_data_px2[103],
+			pcx_fpio_data_px2[104],
+			pcx_fpio_data_px2[105],
+			pcx_fpio_data_px2[106],
+			pcx_fpio_data_px2[107]}),
+		.out ({pcx_fpio_data_px2_buf1[92],
+			pcx_fpio_data_px2_buf1[93],
+			pcx_fpio_data_px2_buf1[94],
+			pcx_fpio_data_px2_buf1[95],
+			pcx_fpio_data_px2_buf1[96],
+			pcx_fpio_data_px2_buf1[97],
+			pcx_fpio_data_px2_buf1[98],
+			pcx_fpio_data_px2_buf1[99],
+			pcx_fpio_data_px2_buf1[100],
+			pcx_fpio_data_px2_buf1[101],
+			pcx_fpio_data_px2_buf1[102],
+			pcx_fpio_data_px2_buf1[103],
+			pcx_fpio_data_px2_buf1[104],
+			pcx_fpio_data_px2_buf1[105],
+			pcx_fpio_data_px2_buf1[106],
+			pcx_fpio_data_px2_buf1[107]})
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_2 (
+		.in ({pcx_fpio_data_px2[76],
+			pcx_fpio_data_px2[77],
+			pcx_fpio_data_px2[78],
+			pcx_fpio_data_px2[79],
+			pcx_fpio_data_px2[80],
+			pcx_fpio_data_px2[81],
+			pcx_fpio_data_px2[82],
+			pcx_fpio_data_px2[83],
+			pcx_fpio_data_px2[84],
+			pcx_fpio_data_px2[85],
+			pcx_fpio_data_px2[86],
+			pcx_fpio_data_px2[87],
+			pcx_fpio_data_px2[88],
+			pcx_fpio_data_px2[89],
+			pcx_fpio_data_px2[90],
+			pcx_fpio_data_px2[91]}),
+		.out ({pcx_fpio_data_px2_buf1[76],
+			pcx_fpio_data_px2_buf1[77],
+			pcx_fpio_data_px2_buf1[78],
+			pcx_fpio_data_px2_buf1[79],
+			pcx_fpio_data_px2_buf1[80],
+			pcx_fpio_data_px2_buf1[81],
+			pcx_fpio_data_px2_buf1[82],
+			pcx_fpio_data_px2_buf1[83],
+			pcx_fpio_data_px2_buf1[84],
+			pcx_fpio_data_px2_buf1[85],
+			pcx_fpio_data_px2_buf1[86],
+			pcx_fpio_data_px2_buf1[87],
+			pcx_fpio_data_px2_buf1[88],
+			pcx_fpio_data_px2_buf1[89],
+			pcx_fpio_data_px2_buf1[90],
+			pcx_fpio_data_px2_buf1[91]})
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_3 (
+		.in ({pcx_fpio_data_px2[3:0],
+			pcx_fpio_data_px2[64],
+			pcx_fpio_data_px2[65],
+			pcx_fpio_data_px2[66],
+			pcx_fpio_data_px2[67],
+			pcx_fpio_data_px2[68],
+			pcx_fpio_data_px2[69],
+			pcx_fpio_data_px2[70],
+			pcx_fpio_data_px2[71],
+			pcx_fpio_data_px2[72],
+			pcx_fpio_data_px2[73],
+			pcx_fpio_data_px2[74],
+			pcx_fpio_data_px2[75]}),
+		.out ({pcx_fpio_data_px2_buf1[3:0],
+			pcx_fpio_data_px2_buf1[64],
+			pcx_fpio_data_px2_buf1[65],
+			pcx_fpio_data_px2_buf1[66],
+			pcx_fpio_data_px2_buf1[67],
+			pcx_fpio_data_px2_buf1[68],
+			pcx_fpio_data_px2_buf1[69],
+			pcx_fpio_data_px2_buf1[70],
+			pcx_fpio_data_px2_buf1[71],
+			pcx_fpio_data_px2_buf1[72],
+			pcx_fpio_data_px2_buf1[73],
+			pcx_fpio_data_px2_buf1[74],
+			pcx_fpio_data_px2_buf1[75]})
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_4 (
+		.in (pcx_fpio_data_px2[19:4]),
+		.out (pcx_fpio_data_px2_buf1[19:4])
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_5 (
+		.in (pcx_fpio_data_px2[35:20]),
+		.out (pcx_fpio_data_px2_buf1[35:20])
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_6 (
+		.in ({pcx_fpio_data_rdy_px2,
+			pcx_fpio_data_px2[50:36]}),
+		.out ({pcx_fpio_data_rdy_px2_buf1,
+			pcx_fpio_data_px2_buf1[50:36]})
+	);
+
+	fpu_rptr_pcx_fpio_grp16 i_pcx_fpio_buf1_7 (
+		.in ({3'b000,
+			pcx_fpio_data_px2[63:51]}),
+		.out ({pcx_fpio_data_px2_buf1_unused[2:0],
+			pcx_fpio_data_px2_buf1[63:51]})
+	);
+
+	// buffer fp_cpx_* signals for mintiming
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_0 (
+		.in ({
+			fp_cpx_data_ca[142],
+			fp_cpx_data_ca[140],
+			fp_cpx_data_ca[138],
+			fp_cpx_data_ca[136],
+			fp_cpx_data_ca[134],
+			fp_cpx_data_ca[132],
+			fp_cpx_data_ca[130],
+			fp_cpx_data_ca[128],
+			fp_cpx_req_cq[6],
+			fp_cpx_req_cq[7],
+			fp_cpx_req_cq[3],
+			fp_cpx_req_cq[2],
+			fp_cpx_req_cq[5],
+			fp_cpx_req_cq[1],
+			fp_cpx_req_cq[0],
+			fp_cpx_req_cq[4]}),
+		.out ({
+			fp_cpx_data_ca_buf1[142],
+			fp_cpx_data_ca_buf1[140],
+			fp_cpx_data_ca_buf1[138],
+			fp_cpx_data_ca_buf1[136],
+			fp_cpx_data_ca_buf1[134],
+			fp_cpx_data_ca_buf1[132],
+			fp_cpx_data_ca_buf1[130],
+			fp_cpx_data_ca_buf1[128],
+			fp_cpx_req_cq_buf1[6],
+			fp_cpx_req_cq_buf1[7],
+			fp_cpx_req_cq_buf1[3],
+			fp_cpx_req_cq_buf1[2],
+			fp_cpx_req_cq_buf1[5],
+			fp_cpx_req_cq_buf1[1],
+			fp_cpx_req_cq_buf1[0],
+			fp_cpx_req_cq_buf1[4]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_1 (
+		.in ({
+			fp_cpx_data_ca[34],
+			fp_cpx_data_ca[36],
+			fp_cpx_data_ca[38],
+			fp_cpx_data_ca[40],
+			fp_cpx_data_ca[42],
+			fp_cpx_data_ca[44],
+			fp_cpx_data_ca[46],
+			fp_cpx_data_ca[48],
+			fp_cpx_data_ca[50],
+			fp_cpx_data_ca[52],
+			fp_cpx_data_ca[54],
+			fp_cpx_data_ca[56],
+			fp_cpx_data_ca[58],
+			fp_cpx_data_ca[60],
+			fp_cpx_data_ca[62],
+			fp_cpx_data_ca[144]}),
+		.out ({
+			fp_cpx_data_ca_buf1[34],
+			fp_cpx_data_ca_buf1[36],
+			fp_cpx_data_ca_buf1[38],
+			fp_cpx_data_ca_buf1[40],
+			fp_cpx_data_ca_buf1[42],
+			fp_cpx_data_ca_buf1[44],
+			fp_cpx_data_ca_buf1[46],
+			fp_cpx_data_ca_buf1[48],
+			fp_cpx_data_ca_buf1[50],
+			fp_cpx_data_ca_buf1[52],
+			fp_cpx_data_ca_buf1[54],
+			fp_cpx_data_ca_buf1[56],
+			fp_cpx_data_ca_buf1[58],
+			fp_cpx_data_ca_buf1[60],
+			fp_cpx_data_ca_buf1[62],
+			fp_cpx_data_ca_buf1[144]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_2 (
+		.in ({
+			fp_cpx_data_ca[2],
+			fp_cpx_data_ca[4],
+			fp_cpx_data_ca[6],
+			fp_cpx_data_ca[8],
+			fp_cpx_data_ca[10],
+			fp_cpx_data_ca[12],
+			fp_cpx_data_ca[14],
+			fp_cpx_data_ca[16],
+			fp_cpx_data_ca[18],
+			fp_cpx_data_ca[20],
+			fp_cpx_data_ca[22],
+			fp_cpx_data_ca[24],
+			fp_cpx_data_ca[26],
+			fp_cpx_data_ca[28],
+			fp_cpx_data_ca[30],
+			fp_cpx_data_ca[32]}),
+		.out ({
+			fp_cpx_data_ca_buf1[2],
+			fp_cpx_data_ca_buf1[4],
+			fp_cpx_data_ca_buf1[6],
+			fp_cpx_data_ca_buf1[8],
+			fp_cpx_data_ca_buf1[10],
+			fp_cpx_data_ca_buf1[12],
+			fp_cpx_data_ca_buf1[14],
+			fp_cpx_data_ca_buf1[16],
+			fp_cpx_data_ca_buf1[18],
+			fp_cpx_data_ca_buf1[20],
+			fp_cpx_data_ca_buf1[22],
+			fp_cpx_data_ca_buf1[24],
+			fp_cpx_data_ca_buf1[26],
+			fp_cpx_data_ca_buf1[28],
+			fp_cpx_data_ca_buf1[30],
+			fp_cpx_data_ca_buf1[32]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_3 (
+		.in ({
+			fp_cpx_data_ca[31],
+			fp_cpx_data_ca[27],
+			fp_cpx_data_ca[23],
+			fp_cpx_data_ca[25],
+			fp_cpx_data_ca[21],
+			fp_cpx_data_ca[17],
+			fp_cpx_data_ca[19],
+			fp_cpx_data_ca[15],
+			fp_cpx_data_ca[11],
+			fp_cpx_data_ca[13],
+			fp_cpx_data_ca[9],
+			fp_cpx_data_ca[5],
+			fp_cpx_data_ca[7],
+			fp_cpx_data_ca[3],
+			fp_cpx_data_ca[0],
+			fp_cpx_data_ca[1]}),
+		.out ({
+			fp_cpx_data_ca_buf1[31],
+			fp_cpx_data_ca_buf1[27],
+			fp_cpx_data_ca_buf1[23],
+			fp_cpx_data_ca_buf1[25],
+			fp_cpx_data_ca_buf1[21],
+			fp_cpx_data_ca_buf1[17],
+			fp_cpx_data_ca_buf1[19],
+			fp_cpx_data_ca_buf1[15],
+			fp_cpx_data_ca_buf1[11],
+			fp_cpx_data_ca_buf1[13],
+			fp_cpx_data_ca_buf1[9],
+			fp_cpx_data_ca_buf1[5],
+			fp_cpx_data_ca_buf1[7],
+			fp_cpx_data_ca_buf1[3],
+			fp_cpx_data_ca_buf1[0],
+			fp_cpx_data_ca_buf1[1]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_4 (
+		.in ({
+			fp_cpx_data_ca[59],
+			fp_cpx_data_ca[61],
+			fp_cpx_data_ca[57],
+			fp_cpx_data_ca[53],
+			fp_cpx_data_ca[55],
+			fp_cpx_data_ca[51],
+			fp_cpx_data_ca[47],
+			fp_cpx_data_ca[49],
+			fp_cpx_data_ca[45],
+			fp_cpx_data_ca[41],
+			fp_cpx_data_ca[43],
+			fp_cpx_data_ca[39],
+			fp_cpx_data_ca[35],
+			fp_cpx_data_ca[37],
+			fp_cpx_data_ca[33],
+			fp_cpx_data_ca[29]}),
+		.out ({
+			fp_cpx_data_ca_buf1[59],
+			fp_cpx_data_ca_buf1[61],
+			fp_cpx_data_ca_buf1[57],
+			fp_cpx_data_ca_buf1[53],
+			fp_cpx_data_ca_buf1[55],
+			fp_cpx_data_ca_buf1[51],
+			fp_cpx_data_ca_buf1[47],
+			fp_cpx_data_ca_buf1[49],
+			fp_cpx_data_ca_buf1[45],
+			fp_cpx_data_ca_buf1[41],
+			fp_cpx_data_ca_buf1[43],
+			fp_cpx_data_ca_buf1[39],
+			fp_cpx_data_ca_buf1[35],
+			fp_cpx_data_ca_buf1[37],
+			fp_cpx_data_ca_buf1[33],
+			fp_cpx_data_ca_buf1[29]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_5 (
+		.in ({
+			fp_cpx_data_ca[113],
+			fp_cpx_data_ca[117],
+			fp_cpx_data_ca[121],
+			fp_cpx_data_ca[119],
+			fp_cpx_data_ca[123],
+			fp_cpx_data_ca[127],
+			fp_cpx_data_ca[125],
+			fp_cpx_data_ca[129],
+			fp_cpx_data_ca[133],
+			fp_cpx_data_ca[131],
+			fp_cpx_data_ca[135],
+			fp_cpx_data_ca[139],
+			fp_cpx_data_ca[137],
+			fp_cpx_data_ca[141],
+			fp_cpx_data_ca[143],
+			fp_cpx_data_ca[63]}),
+		.out ({
+			fp_cpx_data_ca_buf1[113],
+			fp_cpx_data_ca_buf1[117],
+			fp_cpx_data_ca_buf1[121],
+			fp_cpx_data_ca_buf1[119],
+			fp_cpx_data_ca_buf1[123],
+			fp_cpx_data_ca_buf1[127],
+			fp_cpx_data_ca_buf1[125],
+			fp_cpx_data_ca_buf1[129],
+			fp_cpx_data_ca_buf1[133],
+			fp_cpx_data_ca_buf1[131],
+			fp_cpx_data_ca_buf1[135],
+			fp_cpx_data_ca_buf1[139],
+			fp_cpx_data_ca_buf1[137],
+			fp_cpx_data_ca_buf1[141],
+			fp_cpx_data_ca_buf1[143],
+			fp_cpx_data_ca_buf1[63]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_6 (
+		.in ({
+			fp_cpx_data_ca[85],
+			fp_cpx_data_ca[83],
+			fp_cpx_data_ca[87],
+			fp_cpx_data_ca[91],
+			fp_cpx_data_ca[89],
+			fp_cpx_data_ca[93],
+			fp_cpx_data_ca[97],
+			fp_cpx_data_ca[95],
+			fp_cpx_data_ca[99],
+			fp_cpx_data_ca[103],
+			fp_cpx_data_ca[101],
+			fp_cpx_data_ca[105],
+			fp_cpx_data_ca[109],
+			fp_cpx_data_ca[107],
+			fp_cpx_data_ca[111],
+			fp_cpx_data_ca[115]}),
+		.out ({
+			fp_cpx_data_ca_buf1[85],
+			fp_cpx_data_ca_buf1[83],
+			fp_cpx_data_ca_buf1[87],
+			fp_cpx_data_ca_buf1[91],
+			fp_cpx_data_ca_buf1[89],
+			fp_cpx_data_ca_buf1[93],
+			fp_cpx_data_ca_buf1[97],
+			fp_cpx_data_ca_buf1[95],
+			fp_cpx_data_ca_buf1[99],
+			fp_cpx_data_ca_buf1[103],
+			fp_cpx_data_ca_buf1[101],
+			fp_cpx_data_ca_buf1[105],
+			fp_cpx_data_ca_buf1[109],
+			fp_cpx_data_ca_buf1[107],
+			fp_cpx_data_ca_buf1[111],
+			fp_cpx_data_ca_buf1[115]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_7 (
+		.in ({
+			fp_cpx_data_ca[114],
+			fp_cpx_data_ca[116],
+			fp_cpx_data_ca[118],
+			fp_cpx_data_ca[120],
+			fp_cpx_data_ca[122],
+			fp_cpx_data_ca[124],
+			fp_cpx_data_ca[126],
+			fp_cpx_data_ca[65],
+			fp_cpx_data_ca[67],
+			fp_cpx_data_ca[69],
+			fp_cpx_data_ca[73],
+			fp_cpx_data_ca[71],
+			fp_cpx_data_ca[75],
+			fp_cpx_data_ca[79],
+			fp_cpx_data_ca[77],
+			fp_cpx_data_ca[81]}),
+		.out ({
+			fp_cpx_data_ca_buf1[114],
+			fp_cpx_data_ca_buf1[116],
+			fp_cpx_data_ca_buf1[118],
+			fp_cpx_data_ca_buf1[120],
+			fp_cpx_data_ca_buf1[122],
+			fp_cpx_data_ca_buf1[124],
+			fp_cpx_data_ca_buf1[126],
+			fp_cpx_data_ca_buf1[65],
+			fp_cpx_data_ca_buf1[67],
+			fp_cpx_data_ca_buf1[69],
+			fp_cpx_data_ca_buf1[73],
+			fp_cpx_data_ca_buf1[71],
+			fp_cpx_data_ca_buf1[75],
+			fp_cpx_data_ca_buf1[79],
+			fp_cpx_data_ca_buf1[77],
+			fp_cpx_data_ca_buf1[81]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_8 (
+		.in ({
+			fp_cpx_data_ca[82],
+			fp_cpx_data_ca[84],
+			fp_cpx_data_ca[86],
+			fp_cpx_data_ca[88],
+			fp_cpx_data_ca[90],
+			fp_cpx_data_ca[92],
+			fp_cpx_data_ca[94],
+			fp_cpx_data_ca[96],
+			fp_cpx_data_ca[98],
+			fp_cpx_data_ca[100],
+			fp_cpx_data_ca[102],
+			fp_cpx_data_ca[104],
+			fp_cpx_data_ca[106],
+			fp_cpx_data_ca[108],
+			fp_cpx_data_ca[110],
+			fp_cpx_data_ca[112]}),
+		.out ({
+			fp_cpx_data_ca_buf1[82],
+			fp_cpx_data_ca_buf1[84],
+			fp_cpx_data_ca_buf1[86],
+			fp_cpx_data_ca_buf1[88],
+			fp_cpx_data_ca_buf1[90],
+			fp_cpx_data_ca_buf1[92],
+			fp_cpx_data_ca_buf1[94],
+			fp_cpx_data_ca_buf1[96],
+			fp_cpx_data_ca_buf1[98],
+			fp_cpx_data_ca_buf1[100],
+			fp_cpx_data_ca_buf1[102],
+			fp_cpx_data_ca_buf1[104],
+			fp_cpx_data_ca_buf1[106],
+			fp_cpx_data_ca_buf1[108],
+			fp_cpx_data_ca_buf1[110],
+			fp_cpx_data_ca_buf1[112]})
+	);
+	fpu_rptr_fp_cpx_grp16 i_fp_cpx_buf1_9 (
+		.in ({
+			6'b000000,
+			so_buf1,
+			fp_cpx_data_ca[64],
+			fp_cpx_data_ca[66],
+			fp_cpx_data_ca[68],
+			fp_cpx_data_ca[70],
+			fp_cpx_data_ca[72],
+			fp_cpx_data_ca[74],
+			fp_cpx_data_ca[76],
+			fp_cpx_data_ca[78],
+			fp_cpx_data_ca[80]}),
+		.out ({
+			fp_cpx_buf1_9_unused[5:0],
+			so,
+			fp_cpx_data_ca_buf1[64],
+			fp_cpx_data_ca_buf1[66],
+			fp_cpx_data_ca_buf1[68],
+			fp_cpx_data_ca_buf1[70],
+			fp_cpx_data_ca_buf1[72],
+			fp_cpx_data_ca_buf1[74],
+			fp_cpx_data_ca_buf1[76],
+			fp_cpx_data_ca_buf1[78],
+			fp_cpx_data_ca_buf1[80]})
+	);
+
+	// buffer fpu_in_dp outputs  (sram din inputs) for mintiming
+
+	fpu_rptr_inq i_inq_sram_din_buf1 (
+		.in (inq_sram_din_unbuf[155:0]),
+		.out (inq_sram_din_buf1[155:0])
+	);
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_cnt_lead0_lvl1.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_lvl1.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_lvl1.v	(revision 6)
@@ -0,0 +1,57 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_lvl1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Lowest level of lead 0 counters.  Lead 0 count for 4 bits.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_lvl1 (
+	din,
+
+	din_3_0_eq_0,
+	din_3_2_eq_0,
+	lead0_4b_0
+);
+
+
+input [3:0]	din;			// data for lead 0 count bits[3:0]
+
+output		din_3_0_eq_0;		// data in[3:0] is zero
+output		din_3_2_eq_0;		// data in[3:2] is zero
+output		lead0_4b_0;		// bit[0] of lead 0 count
+
+
+wire		din_3_0_eq_0;
+wire		din_3_2_eq_0;
+wire		lead0_4b_0;
+
+
+assign din_3_0_eq_0= (!(|din[3:0]));
+
+assign din_3_2_eq_0= (!(|din[3:2]));
+
+assign lead0_4b_0= ((!din_3_2_eq_0) && (!din[3]))
+		|| (din_3_2_eq_0 && (!din[1]));
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_add_exp_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_add_exp_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_add_exp_dp.v	(revision 6)
@@ -0,0 +1,737 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_add_exp_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipeline exponent datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_add_exp_dp (
+	inq_in1,
+	inq_in2,
+	inq_op,
+	inq_op_7,
+	a1stg_step,
+	a1stg_faddsubd,
+	a1stg_faddsubs,
+	a1stg_fsdtoix,
+	a6stg_step,
+	a1stg_fstod,
+	a1stg_fdtos,
+	a1stg_fstoi,
+	a1stg_fstox,
+	a1stg_fdtoi,
+	a1stg_fdtox,
+	a2stg_fsdtoix_fdtos,
+	a2stg_faddsubop,
+	a2stg_fitos,
+	a2stg_fitod,
+	a2stg_fxtos,
+	a2stg_fxtod,
+	a3stg_exp_7ff,
+	a3stg_exp_ff,
+	a3stg_exp_add,
+	a3stg_inc_exp_inv,
+	a3stg_same_exp_inv,
+	a3stg_dec_exp_inv,
+	a3stg_faddsubop,
+	a3stg_fdtos_inv,
+	a4stg_fixtos_fxtod_inv,
+	a4stg_shl_cnt,
+	a4stg_denorm_inv,
+	a4stg_rndadd_cout,
+	add_exp_out_expinc,
+	add_exp_out_exp,
+	add_exp_out_exp1,
+	a4stg_in_of,
+	add_exp_out_expadd,
+	a4stg_dblop,
+	a4stg_to_0_inv,
+	fadd_clken_l,
+	rclk,
+	
+	a1stg_expadd3_11,
+	a1stg_expadd1_11_0,
+	a1stg_expadd4_inv,
+	a1stg_expadd2_5_0,
+	a2stg_exp,
+	a2stg_expadd,
+	a3stg_exp_10_0,
+	a4stg_exp_11_0,
+	add_exp_out,
+
+	se,
+	si,
+	so
+);
+
+
+input [62:52]	inq_in1;		// request operand 1 to op pipes
+input [62:52]	inq_in2;		// request operand 2 to op pipes
+input [1:0]	inq_op;			// request opcode[1:0]
+input		inq_op_7;		// request opcode[7]
+input		a1stg_step;		// add pipe load
+input		a1stg_faddsubd;		// add/subtract double- add 1 stg
+input		a1stg_faddsubs;		// add/subtract single- add 1 stg
+input		a1stg_fsdtoix;		// float to integer convert- add 1 stg
+input		a6stg_step;		// advance the add pipe
+input		a1stg_fstod;		// fstod- add 1 stage
+input		a1stg_fdtos;		// fdtos- add 1 stage
+input		a1stg_fstoi;		// fstoi- add 1 stage
+input		a1stg_fstox;		// fstox- add 1 stage
+input		a1stg_fdtoi;		// fdtoi- add 1 stage
+input		a1stg_fdtox;		// fdtox- add 1 stage
+input		a2stg_fsdtoix_fdtos;	// float to integer convert- add 2 stg
+input		a2stg_faddsubop;	// float add or subtract- add 2 stage
+input		a2stg_fitos;		// fitos- add 2 stage
+input		a2stg_fitod;		// fitod- add 2 stage
+input		a2stg_fxtos;		// fxtos- add 2 stage
+input		a2stg_fxtod;		// fxtod- add 2 stage
+input		a3stg_exp_7ff;		// select line to a3stg_exp
+input		a3stg_exp_ff;		// select line to a3stg_exp
+input		a3stg_exp_add;		// select line to a3stg_exp
+input		a3stg_inc_exp_inv;	// increment the exponent- add 3 stg
+input		a3stg_same_exp_inv;	// keep the exponent- add 3 stg
+input		a3stg_dec_exp_inv;	// decrement the exponent- add 3 stg
+input		a3stg_faddsubop;	// add/subtract- add 3 stage
+input		a3stg_fdtos_inv;	// double to single convert- add 3 stg
+input		a4stg_fixtos_fxtod_inv;	// int to single/double cvt- add 4 stg
+input [5:0]	a4stg_shl_cnt;		// postnorm shift left count- add 4 stg
+input		a4stg_denorm_inv;	// 0 the exponent
+input		a4stg_rndadd_cout;	// fraction rounding adder carry out
+input		add_exp_out_expinc;	// select line to add_exp_out
+input		add_exp_out_exp;	// select line to add_exp_out
+input		add_exp_out_exp1;	// select line to add_exp_out
+input		a4stg_in_of;		// add overflow- select exp out
+input		add_exp_out_expadd;	// select line to add_exp_out
+input		a4stg_dblop;		// double precision operation- add 4 stg
+input		a4stg_to_0_inv;		// result to infinity on overflow
+input		fadd_clken_l;           // add pipe clk enable - asserted low
+input		rclk;		// global clock
+
+output        	a1stg_expadd3_11;	// exponent adder 3 output- add 1 stage
+output [11:0]	a1stg_expadd1_11_0;	// exponent adder 1 output- add 1 stage
+output [10:0]	a1stg_expadd4_inv;	// exponent adder 4 output- add 1 stage
+output [5:0]	a1stg_expadd2_5_0;	// exponent adder 2 output- add 1 stage
+output [11:0]	a2stg_exp;		// exponent- add 2 stage
+output [12:0]	a2stg_expadd;		// exponent adder- add 2 stage
+output [10:0]	a3stg_exp_10_0;		// exponent adder- add 3 stage
+output [11:0]	a4stg_exp_11_0;		// exponent adder- add 4 stage
+output [10:0]	add_exp_out;		// add exponent output
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [62:52]	a1stg_in1;
+wire [62:52]	a1stg_in1a;
+wire [62:52]	a1stg_in2;
+wire [62:52]	a1stg_in2a;
+wire [12:0]	a1stg_dp_sngop;
+wire [12:0]	a1stg_dp_sngopa;
+wire [12:0]	a1stg_dp_dblop;
+wire [12:0]	a1stg_dp_dblopa;
+wire [9:7]      a1stg_op_7;
+wire            a1stg_op_7_0;
+wire [10:0]	a1stg_expadd3_in1;
+wire [10:0]	a1stg_expadd3_in2_in;
+wire [10:0]	a1stg_expadd3_in2;
+wire [12:0]	a1stg_expadd3;
+wire            a1stg_expadd3_11;
+wire [12:0]	a1stg_expadd1_in1;
+wire [12:0]	a1stg_expadd1_in2;
+wire [12:0]	a1stg_expadd1;
+wire [11:0]     a1stg_expadd1_11_0;
+wire [12:0]	a1stg_expadd4_in1;
+wire [12:0]	a1stg_expadd4_in2;
+wire [12:0]	a1stg_expadd4;
+wire [10:0]	a1stg_expadd4_inv;
+wire [12:0]	a1stg_expadd2_in1;
+wire [12:0]	a1stg_expadd2;
+wire [5:0]      a1stg_expadd2_5_0;
+wire [12:0]	a2stg_exp_in;
+wire [11:0]	a2stg_exp;
+wire [12:0]	a2stg_expa;
+wire [12:0]	a2stg_expadd_in2_in;
+wire [12:0]	a2stg_expadd_in2;
+wire [12:0]	a2stg_expadd;
+wire [12:0]	a3stg_exp_in;
+wire [12:0]	a3stg_exp;
+wire [10:0]     a3stg_exp_10_0;
+wire [12:0]	a3stg_exp_plus1;
+wire [12:0]	a3stg_exp_minus1;
+wire [12:0]	a4stg_exp_pre1_in;
+wire [12:0]	a4stg_exp_pre1;
+wire [12:0]	a4stg_exp_pre3_in;
+wire [12:0]	a4stg_exp_pre3;
+wire [12:0]	a4stg_exp_pre2_in;
+wire [12:0]	a4stg_exp_pre2;
+wire [12:0]	a4stg_exp_pre4_in;
+wire [12:0]	a4stg_exp_pre4;
+wire [12:0]	a4stg_exp;
+wire [11:0]	a4stg_exp_11_0;
+wire [12:0]	a4stg_exp2;
+wire [12:0]	a4stg_expinc;
+wire [12:0]	a4stg_expadd_in2;
+wire [12:0]	a4stg_expadd;
+wire [12:0]	a4stg_expshl;
+wire [10:0]	add_exp_out_in1;
+wire [10:0]	add_exp_out1;
+wire [10:0]	add_exp_out_in2;
+wire [10:0]	add_exp_out2;
+wire [10:0]	add_exp_out_in3;
+wire [10:0]	add_exp_out3;
+wire [10:0]	add_exp_out4;
+wire [10:0]	add_exp_out;
+
+
+// 6/23/03: Removed tm_l input port. Using locally generated se_l instead for cken_buf 
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_add_exp_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fadd_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add exponent inputs.
+//
+//	Add input stage.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(11) i_a1stg_in1 (
+        .din    (inq_in1[62:52]),
+        .en     (a1stg_step),
+        .clk    (clk),
+ 
+        .q      (a1stg_in1[62:52]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(11) i_a1stg_in1a (
+	.din	(inq_in1[62:52]),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_in1a[62:52]),
+
+	.se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(11) i_a1stg_in2 (
+	.din	(inq_in2[62:52]),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_in2[62:52]),
+
+	.se	(se),
+	.si	(),
+   	.so	()
+);
+
+dffe_s #(11) i_a1stg_in2a (
+        .din	(inq_in2[62:52]),
+        .en	(a1stg_step),
+        .clk	(clk),
+ 
+        .q	(a1stg_in2a[62:52]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(13) i_a1stg_dp_sngop (
+	.din	({13{inq_op[0]}}),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_dp_sngop[12:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(13) i_a1stg_dp_sngopa (
+        .din	({13{inq_op[0]}}),
+        .en	(a1stg_step),
+        .clk	(clk),
+ 
+        .q	(a1stg_dp_sngopa[12:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+dffe_s #(13) i_a1stg_dp_dblop (
+	.din	({13{inq_op[1]}}),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_dp_dblop[12:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(13) i_a1stg_dp_dblopa (
+	.din	({13{inq_op[1]}}),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_dp_dblopa[12:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(4) i_a1stg_op_7 (
+	.din	({4{inq_op_7}}),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	({a1stg_op_7[9:7], a1stg_op_7_0}),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(11) i_a1stg_expadd3_in1 (
+	.din	(inq_in1[62:52]),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_expadd3_in1[10:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign a1stg_expadd3_in2_in[10:0]= (~(inq_in2[62:52] 
+		& {8'hff, {3{inq_op[1]}}}));
+
+dffe_s #(11) i_a1stg_expadd3_in2 (
+	.din	(a1stg_expadd3_in2_in[10:0]),
+	.en	(a1stg_step),
+	.clk	(clk),
+
+	.q	(a1stg_expadd3_in2[10:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe exponent comparison.
+//
+//	Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a1stg_expadd3[12:0]= ({2'b00, a1stg_expadd3_in1[10:0]}
+			+ {2'b11, a1stg_expadd3_in2[10:0]}
+			+ 13'h0001);
+assign a1stg_expadd3_11 = a1stg_expadd3[11];
+
+
+assign a1stg_expadd1_in1[12:0]= (a1stg_dp_dblopa
+			    & {2'b0, a1stg_in1[62:52]})
+		| (a1stg_dp_sngopa
+			    & {5'b0, a1stg_in1[62:55]})
+		| {3'b0, a1stg_op_7[9:7], 6'b0, a1stg_op_7_0};
+
+assign a1stg_expadd1_in2[12:0]= (~((a1stg_dp_dblop
+			    & {2'b0, a1stg_in2[62:52]})
+		| (a1stg_dp_sngop
+			    & {5'b0, a1stg_in2[62:55]})));
+
+assign a1stg_expadd1[12:0]= (a1stg_expadd1_in1[12:0]
+			+ a1stg_expadd1_in2[12:0]
+			+ 13'h0001);
+assign a1stg_expadd1_11_0[11:0] = a1stg_expadd1[11:0];
+
+assign a1stg_expadd4_in1[12:0]= (a1stg_dp_dblopa
+			    & {2'b0, a1stg_in2a[62:52]})
+                | (a1stg_dp_sngopa
+			    & {5'b0, a1stg_in2a[62:55]});
+
+assign a1stg_expadd4_in2[12:0]= (~((a1stg_dp_dblop
+                            & {2'b0, a1stg_in1a[62:52]})
+		| (a1stg_dp_sngop
+			    & {5'b0, a1stg_in1a[62:55]})));
+
+assign a1stg_expadd4[12:0]= (a1stg_expadd4_in1[12:0]
+			+ a1stg_expadd4_in2[12:0]
+			+ 13'h0001);
+assign a1stg_expadd4_inv[10:0]= (~a1stg_expadd4[10:0]);
+
+assign a1stg_expadd2_in1[12:0]= (a1stg_dp_dblopa
+			    & {2'b0, a1stg_in2a[62:52]})
+                | (a1stg_dp_sngopa
+			    & {5'b0, a1stg_in2a[62:55]});
+
+assign a1stg_expadd2[12:0]= (a1stg_expadd2_in1[12:0]
+			+ 13'h0001);
+assign a1stg_expadd2_5_0[5:0] = a1stg_expadd2[5:0];
+
+assign a2stg_exp_in[12:0]= ({13{(a1stg_faddsubd && (!a1stg_expadd1[12]))}}
+			    & {2'b0, a1stg_in1a[62:52]})
+		| ({13{(a1stg_faddsubs && (!a1stg_expadd1[12]))}}
+			    & {5'b0, a1stg_in1a[62:55]})
+		| ({13{(a1stg_faddsubd && a1stg_expadd1[12])}}
+			    & {2'b0, a1stg_in2[62:52]})
+		| ({13{a1stg_fdtos}}
+			    & {2'b0, a1stg_in2[62:52]})
+		| ({13{(a1stg_faddsubs && a1stg_expadd1[12])}}
+			    & {5'b0, a1stg_in2[62:55]})
+		| ({13{a1stg_fstod}}
+			    & {5'b0, a1stg_in2[62:55]})
+		| ({13{a1stg_fsdtoix}}
+			    & a1stg_expadd2[12:0]);
+
+dffe_s #(12) i_a2stg_exp (
+	.din	(a2stg_exp_in[11:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+ 
+        .q      (a2stg_exp[11:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(13) i_a2stg_expa (
+	.din	(a2stg_exp_in[12:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(a2stg_expa[12:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe exponent adjustment.
+//
+//      Add stage 1.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_expadd_in2_in[12:0]= ({13{a1stg_fstod}}
+			    & 13'h0380)
+		| ({13{a1stg_fdtos}}
+			    & (~13'h0380))
+		| ({13{a1stg_fstoi}}
+			    & (~13'h009f))
+		| ({13{a1stg_fstox}}
+			    & (~13'h00bf))
+		| ({13{a1stg_fdtoi}}
+			    & (~13'h041f))
+		| ({13{a1stg_fdtox}}
+			    & (~13'h043f));
+
+dffe_s #(13) i_a2stg_expadd2_in2 (
+        .din	(a2stg_expadd_in2_in[12:0]),
+        .en	(a6stg_step),
+        .clk	(clk),
+ 
+        .q	(a2stg_expadd_in2[12:0]),
+ 
+        .se	(se),
+        .si	(),
+        .so	()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Add pipe exponent adjustment.
+//
+//	Add stage 2.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a2stg_expadd[12:0]= (a2stg_expa[12:0]
+			+ a2stg_expadd_in2[12:0]
+			+ {12'b0, a2stg_fsdtoix_fdtos});
+
+assign a3stg_exp_in[12:0]= ({13{a2stg_faddsubop}}
+			    & a2stg_expa[12:0])
+		| ({13{a2stg_fitos}}
+			    & 13'h009e)
+		| ({13{a2stg_fitod}}
+			    & 13'h041e)
+		| ({13{a2stg_fxtos}}
+			    & 13'h00be)
+		| ({13{a2stg_fxtod}}
+			    & 13'h043e)
+		| ({13{a3stg_exp_7ff}}
+			    & 13'h07ff)
+		| ({13{a3stg_exp_ff}}
+			    & 13'h00ff)
+		| ({13{a3stg_exp_add}}
+			    & (a2stg_expadd[12:0] & {13{(!a2stg_expadd[11])}}));
+
+dffe_s #(13) i_a3stg_exp (
+        .din    (a3stg_exp_in[12:0]),
+        .en     (a6stg_step),
+        .clk    (clk),
+ 
+        .q      (a3stg_exp[12:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a3stg_exp_10_0[10:0] = a3stg_exp[10:0];
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe exponent increment/decrement adjustment.
+//
+//      Add stage 3.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a3stg_exp_plus1[12:0]= a3stg_exp[12:0] + 13'h0001;
+
+assign a3stg_exp_minus1[12:0]= a3stg_exp[12:0] - 13'h0001;
+
+assign a4stg_exp_pre1_in[12:0]= ({13{(a3stg_faddsubop && a6stg_step
+					&& (!a3stg_inc_exp_inv))}}
+			    & a3stg_exp_plus1[12:0]);
+
+dff_s #(13) i_a4stg_exp_pre1 (
+	.din	(a4stg_exp_pre1_in[12:0]),
+	.clk	(clk),
+
+	.q	(a4stg_exp_pre1[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_exp_pre3_in[12:0]= ({13{(a3stg_faddsubop && a6stg_step
+					&& (!a3stg_dec_exp_inv))}}
+			    & a3stg_exp_minus1[12:0]);
+
+dff_s #(13) i_a4stg_exp_pre3 (
+	.din	(a4stg_exp_pre3_in[12:0]),
+	.clk	(clk),
+
+	.q	(a4stg_exp_pre3[12:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign a4stg_exp_pre2_in[12:0]= ({13{((!a3stg_fdtos_inv) && a6stg_step)}}
+			    & a3stg_exp[12:0])
+		| ({13{((!a4stg_fixtos_fxtod_inv) && a6stg_step)}}
+			    & a4stg_expshl[12:0])
+		| ({13{(!a6stg_step)}}
+			    & a4stg_exp[12:0]);
+
+dff_s #(13) i_a4stg_exp_pre2 (
+	.din	(a4stg_exp_pre2_in[12:0]),
+        .clk    (clk),
+
+        .q      (a4stg_exp_pre2[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign a4stg_exp_pre4_in[12:0]= ({13{(a3stg_faddsubop && a6stg_step
+					&& (!a3stg_same_exp_inv))}}
+			    & a3stg_exp[12:0]);
+
+dff_s #(13) i_a4stg_exp_pre4 (
+	.din	(a4stg_exp_pre4_in[12:0]),
+	.clk	(clk),
+
+	.q	(a4stg_exp_pre4[12:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(13) i_a4stg_exp2 (
+	.din	(a3stg_exp[12:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (a4stg_exp2[12:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe exponent rounding increment.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_exp[12:0]= (a4stg_exp_pre1[12:0]
+		| a4stg_exp_pre2[12:0]
+		| a4stg_exp_pre3[12:0]
+		| a4stg_exp_pre4[12:0]);
+
+assign a4stg_exp_11_0[11:0] = a4stg_exp[11:0];
+
+assign a4stg_expinc[12:0]= a4stg_exp[12:0] + 13'h0001;
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe exponent adjustment for post normalization left shift.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_expadd_in2[12:0]= (~{7'b0, a4stg_shl_cnt[5:0]});
+
+assign a4stg_expadd[12:0]= (a4stg_exp2[12:0]
+			+ a4stg_expadd_in2[12:0]
+			+ 13'h0001);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Add pipe exponent output.
+//
+//      Add stage 4.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign a4stg_expshl[12:0]= (a4stg_expadd[12:0] & {13{a4stg_denorm_inv}});
+
+assign add_exp_out_in1[10:0]= (~(({11{add_exp_out_exp1}}
+			    & a4stg_exp[10:0])
+		| ({11{a4stg_in_of}}
+			    & {{3{a4stg_dblop}}, 7'h7f, a4stg_to_0_inv})
+		| ({11{add_exp_out_expadd}}
+			    & a4stg_expshl[10:0])));
+
+dffe_s #(11) i_add_exp_out1 (
+	.din	(add_exp_out_in1[10:0]),
+	.en     (a6stg_step),
+        .clk    (clk),
+
+        .q      (add_exp_out1[10:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign add_exp_out_in2[10:0]= (~({11{(add_exp_out_expinc
+					&& a4stg_rndadd_cout)}}
+			    & a4stg_expinc[10:0]));
+
+dffe_s #(11) i_add_exp_out2 (
+	.din	(add_exp_out_in2[10:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(add_exp_out2[10:0]),
+
+	.se  	(se),
+	.si	(),
+	.so	()
+);
+
+assign add_exp_out_in3[10:0]= (~({11{add_exp_out_exp}}
+			    & a4stg_exp[10:0]));
+
+dffe_s #(11) i_add_exp_out3 (
+	.din	(add_exp_out_in3[10:0]),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(add_exp_out3[10:0]),
+
+	.se  	(se),
+	.si	(),
+	.so	()
+);
+
+dffe_s #(11) i_add_exp_out4 (
+	.din	({11{a4stg_rndadd_cout}}),
+	.en	(a6stg_step),
+	.clk	(clk),
+
+	.q	(add_exp_out4[10:0]),
+
+	.se  	(se),
+	.si	(),
+	.so	()
+);
+
+assign add_exp_out[10:0]= (~(add_exp_out1[10:0]
+		& add_exp_out2[10:0]
+		& (add_exp_out3[10:0] | add_exp_out4[10:0])));
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_cnt_lead0_lvl2.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_lvl2.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_lvl2.v	(revision 6)
@@ -0,0 +1,68 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_lvl2.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	2nd level of lead 0 counters.  Lead 0 count for 8 bits.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_lvl2 (
+	din_7_4_eq_0,
+	din_7_6_eq_0,
+	lead0_4b_0_hi,
+	din_3_0_eq_0,
+	din_3_2_eq_0,
+	lead0_4b_0_lo,
+
+	din_7_0_eq_0,
+	lead0_8b_1,
+	lead0_8b_0
+);
+
+
+input		din_7_4_eq_0;		// data in[7:4] is zero
+input		din_7_6_eq_0;		// data in[7:6] is zero
+input		lead0_4b_0_hi;		// bit[0] of lead 0 count- data in[7:4]
+input		din_3_0_eq_0;		// data in[3:0] is zero
+input		din_3_2_eq_0;		// data in[3:2] is zero
+input		lead0_4b_0_lo;		// bit[0] of lead 0 count- data in[3:0]
+
+output		din_7_0_eq_0;		// data in[7:0] is zero
+output		lead0_8b_1;		// bit[1] of lead 0 count
+output		lead0_8b_0;		// bit[0] of lead 0 count
+
+wire		din_7_0_eq_0;
+wire		lead0_8b_1;
+wire		lead0_8b_0;
+
+
+assign din_7_0_eq_0= din_3_0_eq_0 && din_7_4_eq_0;
+
+assign lead0_8b_1= ((!din_7_4_eq_0) && din_7_6_eq_0)
+		|| (din_7_4_eq_0 && din_3_2_eq_0);
+
+assign lead0_8b_0= ((!din_7_4_eq_0) && lead0_4b_0_hi)
+		|| (din_7_4_eq_0 && lead0_4b_0_lo);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_cnt_lead0_lvl3.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_lvl3.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_lvl3.v	(revision 6)
@@ -0,0 +1,79 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_lvl3.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	3rd level of lead 0 counters.  Lead 0 count for 16 bits.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_lvl3 (
+	din_15_8_eq_0,
+	din_15_12_eq_0,
+	lead0_8b_1_hi,
+	lead0_8b_0_hi,
+	din_7_0_eq_0,
+	din_7_4_eq_0,
+	lead0_8b_1_lo,
+	lead0_8b_0_lo,
+
+	din_15_0_eq_0,
+	lead0_16b_2,
+	lead0_16b_1,
+	lead0_16b_0
+);
+
+
+input		din_15_8_eq_0;		// data in[15:8] is zero
+input		din_15_12_eq_0;		// data in[15:12] is zero
+input		lead0_8b_1_hi;		// bit[1] of lead 0 count- din[15:8]
+input		lead0_8b_0_hi;		// bit[0] of lead 0 count- din[15:8]
+input		din_7_0_eq_0;		// data in[7:0] is zero
+input		din_7_4_eq_0;		// data in[7:4] is zero
+input		lead0_8b_1_lo;		// bit[1] of lead 0 count- din[7:0]
+input		lead0_8b_0_lo;		// bit[0] of lead 0 count- din[7:0]
+
+output		din_15_0_eq_0;		// data in[15:0] is zero
+output		lead0_16b_2;		// bit[2] of lead 0 count
+output		lead0_16b_1;		// bit[1] of lead 0 count
+output		lead0_16b_0;		// bit[0] of lead 0 count
+
+
+wire		din_15_0_eq_0;
+wire		lead0_16b_2;
+wire		lead0_16b_1;
+wire		lead0_16b_0;
+
+
+assign din_15_0_eq_0= din_7_0_eq_0 && din_15_8_eq_0;
+
+assign lead0_16b_2= ((!din_15_8_eq_0) && din_15_12_eq_0)
+		|| (din_15_8_eq_0 && din_7_4_eq_0);
+
+assign lead0_16b_1= ((!din_15_8_eq_0) && lead0_8b_1_hi)
+		|| (din_15_8_eq_0 && lead0_8b_1_lo);
+
+assign lead0_16b_0= ((!din_15_8_eq_0) && lead0_8b_0_hi)
+		|| (din_15_8_eq_0 && lead0_8b_0_lo);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_in_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_in_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_in_dp.v	(revision 6)
@@ -0,0 +1,323 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_in_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      FPU input datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_in_dp (
+	fp_data_rdy,
+        fpio_data_px2_116_112,
+        fpio_data_px2_79_72,
+        fpio_data_px2_67_0,
+	inq_fwrd,
+	inq_fwrd_inv,
+	inq_bp,
+	inq_bp_inv,
+	inq_dout,
+	rclk,
+	
+        fp_op_in_7in,
+	inq_id,
+	inq_rnd_mode,
+	inq_fcc,
+	inq_op,
+	inq_in1_exp_neq_ffs,
+	inq_in1_exp_eq_0,
+	inq_in1_53_0_neq_0,
+	inq_in1_50_0_neq_0,
+	inq_in1_53_32_neq_0,
+	inq_in1,
+	inq_in2_exp_neq_ffs,
+	inq_in2_exp_eq_0,
+	inq_in2_53_0_neq_0,
+	inq_in2_50_0_neq_0,
+	inq_in2_53_32_neq_0,
+	inq_in2,
+
+	fp_id_in,
+	fp_rnd_mode_in,
+	fp_fcc_in,
+	fp_op_in,
+	fp_src1_in,
+	fp_src2_in,
+
+	se,
+	si,
+	so
+);
+
+
+input           fp_data_rdy;
+input [116:112] fpio_data_px2_116_112;  // FPU request data from PCX
+input [79:72]   fpio_data_px2_79_72;    // FPU request data from PCX
+input [67:0]    fpio_data_px2_67_0;     // FPU request data from PCX
+input		inq_fwrd;		// input Q is empty
+input		inq_fwrd_inv;		// input Q is not empty
+input		inq_bp;			// bypass the input Q SRAM
+input		inq_bp_inv;		// don't bypass the input Q SRAM
+input [154:0] inq_dout; // data read out from input Q SRAM
+input		rclk;		// global clock
+
+output          fp_op_in_7in;           // request opcode
+output [4:0]	inq_id;			// request ID to the operation pipes
+output [1:0]	inq_rnd_mode;		// request rounding mode to op pipes
+output [1:0]	inq_fcc;		// request cc ID to op pipes
+output [7:0]	inq_op;			// request opcode to op pipes
+output		inq_in1_exp_neq_ffs;	// request operand 1 exp!=ff's
+output		inq_in1_exp_eq_0;	// request operand 1 exp==0
+output		inq_in1_53_0_neq_0;	// request operand 1[53:0]!=0
+output		inq_in1_50_0_neq_0;	// request operand 1[50:0]!=0
+output		inq_in1_53_32_neq_0;	// request operand 1[53:32]!=0
+output [63:0]	inq_in1;		// request operand 1 to op pipes
+output		inq_in2_exp_neq_ffs;	// request operand 2 exp!=ff's
+output		inq_in2_exp_eq_0;	// request operand 2 exp==0
+output		inq_in2_53_0_neq_0;	// request operand 2[53:0]!=0
+output		inq_in2_50_0_neq_0;	// request operand 2[50:0]!=0
+output		inq_in2_53_32_neq_0;	// request operand 2[53:32]!=0
+output [63:0]	inq_in2;		// request operand 2 to op pipes
+
+// 6/20/03: New outputs to drive fpu-level i_fpu_inq_sram inputs 
+output [4:0] fp_id_in; // id to be written into inq_sram
+output [1:0] fp_rnd_mode_in; // rnd_mode to be written into inq_sram
+output [1:0] fp_fcc_in; // fcc to be written into inq_sram
+output [7:0] fp_op_in; // request opcode
+output [68:0] fp_src1_in; // operand1 and its pre-computed bits portion
+output [68:0] fp_src2_in; // operand2, includes pre-computed bits
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [154:0]	inq_dout;
+wire [4:0]	fp_id_in;
+wire [7:0]	fp_op_in;
+wire		fp_op_in_7;		// request opcode bit[7]
+wire		fp_op_in_7_inv;		// inverted request opcode bit[7]
+wire            fp_op_in_7in;
+wire [1:0]	fp_fcc_in;
+wire [1:0]	fp_rnd_mode_in;
+wire [63:0]	fp_srca_in;
+wire		fp_srca_53_0_neq_0;
+wire		fp_srca_50_0_neq_0;
+wire		fp_srca_53_32_neq_0;
+wire		fp_srca_exp_eq_0;
+wire		fp_srca_exp_neq_ffs;
+wire [68:0]	fp_srcb_in;
+wire [68:0]	fp_src1_in;
+wire [68:0]	fp_src2_in;
+wire [154:0]	inq_din_d1;
+wire [154:0]	inq_data;
+wire [4:0]	inq_id;
+wire [1:0]	inq_rnd_mode;
+wire [1:0]	inq_fcc;
+wire [7:0]	inq_op;
+wire		inq_in1_exp_neq_ffs;
+wire		inq_in1_exp_eq_0;
+wire		inq_in1_53_0_neq_0;
+wire		inq_in1_50_0_neq_0;
+wire		inq_in1_53_32_neq_0;
+wire [63:0]	inq_in1;
+wire		inq_in2_exp_neq_ffs;
+wire		inq_in2_exp_eq_0;
+wire		inq_in2_53_0_neq_0;
+wire		inq_in2_50_0_neq_0;
+wire		inq_in2_53_32_neq_0;
+wire [63:0]	inq_in2;
+
+wire clk;
+
+wire se_l;
+
+// 6/23/03: Replaced tm_l with se_l 
+assign se_l = ~se;
+
+clken_buf  ckbuf_in_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(1'b0),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Capture input information.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dff_s #(5) i_fp_id_in (
+	.din	(fpio_data_px2_116_112[116:112]),
+	.clk    (clk),
+ 
+        .q      (fp_id_in[4:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(8) i_fp_op_in (
+        .din    (fpio_data_px2_79_72[79:72]),
+        .clk    (clk),
+
+        .q      (fp_op_in[7:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign fp_op_in_7in = fp_op_in[7];
+assign fp_op_in_7 = fp_op_in[7];
+assign fp_op_in_7_inv = ~fp_op_in[7];
+
+dff_s #(2) i_fp_fcc_in (
+        .din    (fpio_data_px2_67_0[67:66]),
+        .clk    (clk),
+
+        .q      (fp_fcc_in[1:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(2) i_fp_rnd_mode_in (
+        .din    (fpio_data_px2_67_0[65:64]),
+        .clk    (clk),
+
+        .q      (fp_rnd_mode_in[1:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+dff_s #(64) i_fp_srca_in (
+	.din    (fpio_data_px2_67_0[63:0]),
+        .clk    (clk),
+
+        .q      (fp_srca_in[63:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign fp_srca_53_0_neq_0= (|fp_srca_in[53:0]);
+
+assign fp_srca_50_0_neq_0= (|fp_srca_in[50:0]);
+
+assign fp_srca_53_32_neq_0= (|fp_srca_in[53:32]);
+
+assign fp_srca_exp_eq_0= (!((|fp_srca_in[62:55])
+		|| (fp_op_in[1] && (|fp_srca_in[54:52]))));
+
+assign fp_srca_exp_neq_ffs= (!((&fp_srca_in[62:55])
+		&& (fp_op_in[0] || (&fp_srca_in[54:52]))));
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Extract the two operands.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(69) i_fp_srcb_in (
+	.din	({fp_srca_exp_neq_ffs, fp_srca_exp_eq_0, fp_srca_53_0_neq_0,
+			fp_srca_50_0_neq_0, fp_srca_53_32_neq_0,
+			fp_srca_in[63:0]}),
+        .en     (fp_data_rdy),
+	.clk    (clk),
+
+        .q      (fp_srcb_in[68:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign fp_src1_in[68:0]= ({69{fp_op_in_7_inv}}
+			    & {fp_srca_exp_neq_ffs, fp_srca_exp_eq_0,
+				fp_srca_53_0_neq_0, fp_srca_50_0_neq_0,
+				fp_srca_53_32_neq_0, fp_srca_in[63:0]})
+		| ({69{fp_op_in_7}}
+			    & 69'h180000000000000000);
+
+assign fp_src2_in[68:0]= ({69{fp_op_in_7_inv}}
+			    & fp_srcb_in[68:0])
+		| ({69{fp_op_in_7}}
+			    & {fp_srca_exp_neq_ffs, fp_srca_exp_eq_0,
+				fp_srca_53_0_neq_0, fp_srca_50_0_neq_0,
+				fp_srca_53_32_neq_0, fp_srca_in[63:0]});
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Input queue FIFO bypass and output.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dff_s #(155) i_inq_din_d1 (
+	.din	({fp_id_in[4:0], fp_rnd_mode_in[1:0], fp_fcc_in[1:0],
+                        fp_op_in[7:0], fp_src1_in[68:0], fp_src2_in[68:0]}),
+	.clk    (clk),
+
+        .q      (inq_din_d1[154:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+assign inq_data[154:0]= ({155{inq_fwrd}}
+			    & {fp_id_in[4:0], fp_rnd_mode_in[1:0],
+				fp_fcc_in[1:0], fp_op_in[7:0],
+				fp_src1_in[68:0], fp_src2_in[68:0]})
+		| ({155{inq_fwrd_inv}}
+			    & (({155{inq_bp}}
+					& inq_din_d1[154:0])
+				| ({155{inq_bp_inv}}
+					& inq_dout[154:0])));
+
+assign inq_id[4:0]= inq_data[154:150];
+assign inq_rnd_mode[1:0]= inq_data[149:148];
+assign inq_fcc[1:0]= inq_data[147:146];
+assign inq_op[7:0]= inq_data[145:138];
+assign inq_in1_exp_neq_ffs= inq_data[137];
+assign inq_in1_exp_eq_0= inq_data[136];
+assign inq_in1_53_0_neq_0= inq_data[135];
+assign inq_in1_50_0_neq_0= inq_data[134];
+assign inq_in1_53_32_neq_0= inq_data[133];
+assign inq_in1[63:0]= inq_data[132:69];
+assign inq_in2_exp_neq_ffs= inq_data[68];
+assign inq_in2_exp_eq_0= inq_data[67];
+assign inq_in2_53_0_neq_0= inq_data[66];
+assign inq_in2_50_0_neq_0= inq_data[65];
+assign inq_in2_53_32_neq_0= inq_data[64];
+assign inq_in2[63:0]= inq_data[63:0];
+
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_cnt_lead0_lvl4.v
===================================================================
--- /trunk/T1-FPU/fpu_cnt_lead0_lvl4.v	(revision 6)
+++ /trunk/T1-FPU/fpu_cnt_lead0_lvl4.v	(revision 6)
@@ -0,0 +1,89 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_cnt_lead0_lvl4.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	4th level of lead 0 counters.  Lead 0 count for 32 bits.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_cnt_lead0_lvl4 (
+	din_31_16_eq_0,
+	din_31_24_eq_0,
+	lead0_16b_2_hi,
+	lead0_16b_1_hi,
+	lead0_16b_0_hi,
+	din_15_0_eq_0,
+	din_15_8_eq_0,
+	lead0_16b_2_lo,
+	lead0_16b_1_lo,
+	lead0_16b_0_lo,
+
+	din_31_0_eq_0,
+	lead0_32b_3,
+	lead0_32b_2,
+	lead0_32b_1,
+	lead0_32b_0
+);
+
+
+input		din_31_16_eq_0;		// data in[31:16] is zero
+input		din_31_24_eq_0;		// data in[31:24] is zero
+input		lead0_16b_2_hi;		// bit[2] of lead 0 count- din[31:16]
+input		lead0_16b_1_hi;		// bit[1] of lead 0 count- din[31:16]
+input		lead0_16b_0_hi;		// bit[0] of lead 0 count- din[31:16]
+input		din_15_0_eq_0;		// data in[15:0] is zero
+input		din_15_8_eq_0;		// data in[15:8] is zero
+input		lead0_16b_2_lo;		// bit[2] of lead 0 count- din[15:0]
+input		lead0_16b_1_lo;		// bit[1] of lead 0 count- din[15:0]
+input		lead0_16b_0_lo;		// bit[0] of lead 0 count- din[15:0]
+
+output		din_31_0_eq_0;		// data in[31:0] is zero
+output		lead0_32b_3;		// bit[3] of lead 0 count
+output		lead0_32b_2;		// bit[2] of lead 0 count
+output		lead0_32b_1;		// bit[1] of lead 0 count
+output		lead0_32b_0;		// bit[0] of lead 0 count
+
+
+wire		din_31_0_eq_0;
+wire		lead0_32b_3;
+wire		lead0_32b_2;
+wire		lead0_32b_1;
+wire		lead0_32b_0;
+
+
+assign din_31_0_eq_0= din_15_0_eq_0 && din_31_16_eq_0;
+
+assign lead0_32b_3= ((!din_31_16_eq_0) && din_31_24_eq_0)
+		|| (din_31_16_eq_0 && din_15_8_eq_0);
+
+assign lead0_32b_2= ((!din_31_16_eq_0) && lead0_16b_2_hi)
+		|| (din_31_16_eq_0 && lead0_16b_2_lo);
+
+assign lead0_32b_1= ((!din_31_16_eq_0) && lead0_16b_1_hi)
+		|| (din_31_16_eq_0 && lead0_16b_1_lo);
+
+assign lead0_32b_0= ((!din_31_16_eq_0) && lead0_16b_0_hi)
+		|| (din_31_16_eq_0 && lead0_16b_0_lo);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_denorm_3to1.v
===================================================================
--- /trunk/T1-FPU/fpu_denorm_3to1.v	(revision 6)
+++ /trunk/T1-FPU/fpu_denorm_3to1.v	(revision 6)
@@ -0,0 +1,67 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_denorm_3to1.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Reduce three fpu_denorm_3b results to one set of results.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+module fpu_denorm_3to1 (
+	din2_din1_nz_hi,
+	din2_din1_denorm_hi,
+	din2_din1_nz_mid,
+	din2_din1_denorm_mid,
+	din2_din1_nz_lo,
+	din2_din1_denorm_lo,
+
+	din2_din1_nz,
+	din2_din1_denorm
+);
+
+
+input		din2_din1_nz_hi;	// input 1 and input 2 != 0- high 3 bits
+input		din2_din1_denorm_hi;	// input 1 == denorm- high 3 bits
+input		din2_din1_nz_mid;	// input 1 and input 2 != 0- mid 3 bits
+input		din2_din1_denorm_mid;	// input 1 == denorm- mid 3 bits
+input		din2_din1_nz_lo;	// input 1 and input 2 != 0- low 3 bits
+input		din2_din1_denorm_lo;	// input 1 == denorm- low 3 bits
+
+output		din2_din1_nz;		// input 1 and input 2 != 0
+output		din2_din1_denorm;	// input 1 == denorm
+
+
+wire		din2_din1_nz;
+wire		din2_din1_denorm;
+
+
+assign din2_din1_nz= din2_din1_nz_hi || din2_din1_nz_mid
+		|| din2_din1_nz_lo;
+
+assign din2_din1_denorm= (din2_din1_nz_hi && din2_din1_denorm_hi)
+		|| ((!din2_din1_nz_hi) && din2_din1_nz_mid
+			&& din2_din1_denorm_mid)
+		|| ((!din2_din1_nz_hi) && (!din2_din1_nz_mid)
+			&& din2_din1_denorm_lo);
+
+
+endmodule
+
+
Index: /trunk/T1-FPU/fpu_rptr_macros.v
===================================================================
--- /trunk/T1-FPU/fpu_rptr_macros.v	(revision 6)
+++ /trunk/T1-FPU/fpu_rptr_macros.v	(revision 6)
@@ -0,0 +1,44 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_rptr_macros.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+module fpu_bufrpt_grp64 (
+	in,
+	out
+);
+	
+	input [63:0] in;
+	output [63:0] out;
+
+	assign out[63:0] = in[63:0];
+
+endmodule
+
+module fpu_bufrpt_grp32 (
+	in,
+	out
+);
+
+	input [31:0] in;
+	output [31:0] out;
+
+	assign out[31:0] = in[31:0];
+
+endmodule
+
Index: /trunk/T1-FPU/fpu_div_frac_dp.v
===================================================================
--- /trunk/T1-FPU/fpu_div_frac_dp.v	(revision 6)
+++ /trunk/T1-FPU/fpu_div_frac_dp.v	(revision 6)
@@ -0,0 +1,445 @@
+// ========== Copyright Header Begin ==========================================
+// 
+// OpenSPARC T1 Processor File: fpu_div_frac_dp.v
+// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
+// 
+// The above named program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public
+// License version 2 as published by the Free Software Foundation.
+// 
+// The above named program is distributed in the hope that it will be 
+// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public
+// License along with this work; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+// 
+// ========== Copyright Header End ============================================
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide pipeline fraction datapath.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+module fpu_div_frac_dp (
+	inq_in1,
+	inq_in2,
+	d1stg_step,
+	div_norm_frac_in1_dbl_norm,
+	div_norm_frac_in1_dbl_dnrm,
+	div_norm_frac_in1_sng_norm,
+	div_norm_frac_in1_sng_dnrm,
+	div_norm_frac_in2_dbl_norm,
+	div_norm_frac_in2_dbl_dnrm,
+	div_norm_frac_in2_sng_norm,
+	div_norm_frac_in2_sng_dnrm,
+	div_norm_inf,
+	div_norm_qnan,
+	d1stg_dblop,
+	div_norm_zero,
+	d1stg_snan_dbl_in1,
+	d1stg_snan_sng_in1,
+	d1stg_snan_dbl_in2,
+	d1stg_snan_sng_in2,
+	d3stg_fdiv,
+	d6stg_fdiv,
+	d6stg_fdivd,
+	d6stg_fdivs,
+	div_frac_add_in2_load,
+	d6stg_frac_out_shl1,
+	d6stg_frac_out_nosh,
+	d4stg_fdiv,
+	div_frac_add_in1_add,
+	div_frac_add_in1_load,
+	d5stg_fdivb,
+	div_frac_out_add_in1,
+	div_frac_out_add,
+	div_frac_out_shl1_dbl,
+	div_frac_out_shl1_sng,
+	div_frac_out_of,
+	d7stg_to_0,
+	div_frac_out_load,
+	fdiv_clken_l,
+	rclk,
+	
+	div_shl_cnt,
+	d6stg_frac_0,
+	d6stg_frac_1,
+	d6stg_frac_2,
+	d6stg_frac_29,
+	d6stg_frac_30,
+	d6stg_frac_31,
+	div_frac_add_in1_neq_0,
+	div_frac_add_52_inv,
+	div_frac_add_52_inva,
+	div_frac_out_54_53,
+	div_frac_outa,
+
+	se,
+	si,
+	so
+);
+
+
+input [54:0]	inq_in1;		// request operand 1 to op pipes
+input [54:0]	inq_in2;		// request operand 2 to op pipes
+input		d1stg_step;		// divide pipe load
+input		div_norm_frac_in1_dbl_norm; // select line to div_norm
+input		div_norm_frac_in1_dbl_dnrm; // select line to div_norm
+input		div_norm_frac_in1_sng_norm; // select line to div_norm
+input		div_norm_frac_in1_sng_dnrm; // select line to div_norm
+input		div_norm_frac_in2_dbl_norm; // select line to div_norm
+input		div_norm_frac_in2_dbl_dnrm; // select line to div_norm
+input		div_norm_frac_in2_sng_norm; // select line to div_norm
+input		div_norm_frac_in2_sng_dnrm; // select line to div_norm
+input		div_norm_inf;		// select line to div_norm
+input		div_norm_qnan;		// select line to div_norm
+input		d1stg_dblop;		// double precision operation- d1 stg
+input		div_norm_zero;		// select line to div_norm
+input		d1stg_snan_dbl_in1;	// operand 1 is double signalling NaN
+input		d1stg_snan_sng_in1;	// operand 1 is single signalling NaN
+input		d1stg_snan_dbl_in2;	// operand 2 is double signalling NaN
+input		d1stg_snan_sng_in2;	// operand 2 is single signalling NaN
+input		d3stg_fdiv;		// divide operation- divide stage 3
+input		d6stg_fdiv;		// divide operation- divide stage 6
+input		d6stg_fdivd;		// divide double- divide stage 6
+input		d6stg_fdivs;		// divide single- divide stage 6
+input		div_frac_add_in2_load;	// load enable to div_frac_add_in2
+input		d6stg_frac_out_shl1;	// select line to d6stg_frac
+input		d6stg_frac_out_nosh;	// select line to d6stg_frac
+input		d4stg_fdiv;		// divide operation- divide stage 4
+input		div_frac_add_in1_add;	// select line to div_frac_add_in1
+input		div_frac_add_in1_load;	// load enable to div_frac_add_in1
+input		d5stg_fdivb;		// divide operation- divide stage 5
+input		div_frac_out_add_in1;	// select line to div_frac_out
+input		div_frac_out_add;	// select line to div_frac_out
+input		div_frac_out_shl1_dbl;	// select line to div_frac_out
+input		div_frac_out_shl1_sng;	// select line to div_frac_out
+input		div_frac_out_of;	// select line to div_frac_out
+input		d7stg_to_0;		// result to max finite on overflow
+input		div_frac_out_load;	// load enable to div_frac_out
+input		fdiv_clken_l;           // div pipe clk enable - asserted low
+input		rclk;		// global clock
+
+output [5:0]	div_shl_cnt;		// divide left shift amount
+output		d6stg_frac_0;		// divide fraction[0]- intermediate val
+output		d6stg_frac_1;		// divide fraction[1]- intermediate val
+output		d6stg_frac_2;		// divide fraction[2]- intermediate val
+output		d6stg_frac_29;		// divide fraction[29]- intermediate val
+output		d6stg_frac_30;		// divide fraction[30]- intermediate val
+output		d6stg_frac_31;		// divide fraction[31]- intermediate val
+output		div_frac_add_in1_neq_0;	// div_frac_add_in1 != 0
+output		div_frac_add_52_inv;	// div_frac_add bit[52] inverted
+output		div_frac_add_52_inva;	// div_frac_add bit[52] inverted copy
+output [1:0]  	div_frac_out_54_53;	// divide fraction output
+output [51:0]	div_frac_outa;		// divide fraction output- buffered copy
+
+input           se;                     // scan_enable
+input           si;                     // scan in
+output          so;                     // scan out
+
+
+wire [54:0]	div_frac_in1;
+wire [54:0]	div_frac_in2;
+wire [52:0]	div_norm_inv_in;
+wire [52:0]	div_norm_inv;
+wire [52:0]	div_norm;
+wire [5:0]	div_lead0;
+wire [5:0]	div_shl_cnt;
+wire [5:0]	div_shl_cnta;
+wire [52:0]	div_shl_data;
+wire [105:53]	div_shl_tmp;
+wire [52:0]	div_shl;
+wire [54:0]	div_shl_save;
+wire [54:0]	div_frac_add_in2_in;
+wire [54:0]	div_frac_add_in2;
+wire [53:0]	d6stg_frac;
+wire		d6stg_frac_0;
+wire		d6stg_frac_1;
+wire		d6stg_frac_2;
+wire		d6stg_frac_29;
+wire		d6stg_frac_30;
+wire		d6stg_frac_31;
+wire [54:0]	div_frac_add_in1_in;
+wire [54:0]	div_frac_add_in1;
+wire [54:0]	div_frac_add_in1a;
+wire		div_frac_add_in1_neq_0;
+wire [54:0]	div_frac_add;
+wire		div_frac_add_52_inv;
+wire		div_frac_add_52_inva;
+wire [54:0]	div_frac_out_in;
+wire  [1:0]     div_frac_out_54_53;
+wire [54:0]	div_frac_out;
+wire [51:0]	div_frac_outa;
+
+
+wire se_l;
+
+assign se_l = ~se;
+
+clken_buf  ckbuf_div_frac_dp (
+  .clk(clk),
+  .rclk(rclk),
+  .enb_l(fdiv_clken_l),
+  .tmb_l(se_l)
+  );
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide fraction inputs.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dffe_s #(55) i_div_frac_in1 (
+	.din	(inq_in1[54:0]),
+	.en	(d1stg_step),
+	.clk    (clk),
+ 
+        .q      (div_frac_in1[54:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(55) i_div_frac_in2 (
+        .din    (inq_in2[54:0]),
+        .en     (d1stg_step),
+        .clk    (clk),
+ 
+        .q      (div_frac_in2[54:0]),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide normalization and special input injection.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_norm_inv_in[52:0]= (~(({53{div_norm_frac_in1_dbl_norm}}
+			    & {1'b1, (div_frac_in1[51] || d1stg_snan_dbl_in1),
+				div_frac_in1[50:0]})
+		| ({53{div_norm_frac_in1_dbl_dnrm}}
+			    & {div_frac_in1[51:0], 1'b0})
+		| ({53{div_norm_frac_in1_sng_norm}}
+			    & {1'b1, (div_frac_in1[54] || d1stg_snan_sng_in1),
+				div_frac_in1[53:32], 29'b0})
+		| ({53{div_norm_frac_in1_sng_dnrm}}
+			    & {div_frac_in1[54:32], 30'b0})
+		| ({53{div_norm_frac_in2_dbl_norm}}
+			    & {1'b1, (div_frac_in2[51] || d1stg_snan_dbl_in2),
+				div_frac_in2[50:0]})
+		| ({53{div_norm_frac_in2_dbl_dnrm}}
+			    & {div_frac_in2[51:0], 1'b0})
+		| ({53{div_norm_frac_in2_sng_norm}}
+			    & {1'b1, (div_frac_in2[54] || d1stg_snan_sng_in2),
+				div_frac_in2[53:32], 29'b0})
+		| ({53{div_norm_frac_in2_sng_dnrm}}
+			    & {div_frac_in2[54:32], 30'b0})
+		| ({53{div_norm_inf}}
+			    & 53'h10000000000000)
+		| ({53{div_norm_qnan}}
+			    & {24'hffffff, {29{d1stg_dblop}}})
+		| ({53{div_norm_zero}}
+			    & 53'h00000000000000)));
+
+dff_s #(53) i_div_norm_inv (
+	.din	(div_norm_inv_in[52:0]),
+	.clk	(clk),
+
+	.q	(div_norm_inv[52:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_norm[52:0]= (~div_norm_inv);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//	Divide lead zero count.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+fpu_cnt_lead0_53b i_div_lead0 (
+	.din	(div_norm[52:0]),
+
+	.lead0 (div_lead0[5:0])
+);
+
+dff_s #12 i_dstg_xtra_regs (
+        .din    ({div_lead0[5:0], div_lead0[5:0]}),
+        .clk    (clk),
+
+        .q      ({div_shl_cnta[5:0], div_shl_cnt[5:0]}),
+
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide left shift.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+dff_s #(53) i_div_shl_data (
+	.din	(div_norm[52:0]),
+	.clk    (clk),
+
+        .q      (div_shl_data[52:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+//assign div_shl_tmp[105:0]= {div_shl_data[52:0], 53'b0} << div_shl_cnta[5:0];
+  assign div_shl_tmp[105:53]= div_shl_data[52:0]         << div_shl_cnta[5:0];
+
+assign div_shl[52:0]= div_shl_tmp[105:53];
+
+dffe_s #(55) i_div_shl_save (
+	.din	({2'b0, div_shl[52:0]}),
+	.en	(d3stg_fdiv),
+        .clk    (clk),
+ 
+        .q      (div_shl_save[54:0]),
+ 
+        .se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_frac_add_in2_in[54:0]= ({55{d4stg_fdiv}}
+			    & (~{2'b0, div_shl[52:0]}))
+		| ({55{d6stg_fdiv}}
+			    & {25'b0, d6stg_fdivs, 28'b0, d6stg_fdivd});
+
+dffe_s #(55) i_div_frac_add_in2 (
+	.din	(div_frac_add_in2_in[54:0]),
+	.en	(div_frac_add_in2_load),
+	.clk    (clk),
+
+        .q      (div_frac_add_in2[54:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide adder/subtractor 2nd input.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign d6stg_frac[53:0]= ({54{d6stg_frac_out_shl1}}
+			    & {div_frac_out[52:0], 1'b0})
+		| ({54{d6stg_frac_out_nosh}}
+			    & div_frac_out[53:0]);
+
+assign d6stg_frac_0= d6stg_frac[0];
+assign d6stg_frac_1= d6stg_frac[1];
+assign d6stg_frac_2= d6stg_frac[2];
+assign d6stg_frac_29= d6stg_frac[29];
+assign d6stg_frac_30= d6stg_frac[30];
+assign d6stg_frac_31= d6stg_frac[31];
+
+assign div_frac_add_in1_in[54:0]= ({55{d4stg_fdiv}}
+			    & div_shl_save[54:0])
+		| ({55{(div_frac_add_in1_add && (!div_frac_add[54]))}}
+			    & {div_frac_add[53:0], 1'b0})
+		| ({55{(div_frac_add_in1_add && div_frac_add[54])}}
+			    & {div_frac_add_in1[53:0], 1'b0})
+		| ({55{d6stg_fdiv}}
+			    & {3'b0, d6stg_frac[53:31],
+				(d6stg_frac[30:2] & {29{d6stg_fdivd}})});
+
+dffe_s #(55) i_div_frac_add_in1 (
+	.din	(div_frac_add_in1_in[54:0]),
+	.en	(div_frac_add_in1_load),
+	.clk    (clk),
+
+        .q      (div_frac_add_in1[54:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+dffe_s #(55) i_div_frac_add_in1a (
+	.din	(div_frac_add_in1_in[54:0]),
+	.en	(div_frac_add_in1_load),
+	.clk	(clk),
+
+	.q	(div_frac_add_in1a[54:0]),
+
+	.se	(se),
+	.si	(),
+	.so	()
+);
+
+assign div_frac_add_in1_neq_0= (|div_frac_add_in1[54:0]);
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//      Divide adder/subtractor.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+assign div_frac_add[54:0]= (div_frac_add_in1a[54:0]
+			+ div_frac_add_in2[54:0]
+			+ {54'b0, d5stg_fdivb});
+
+assign div_frac_add_52_inv= (!div_frac_add[52]);
+assign div_frac_add_52_inva= (!div_frac_add[52]);
+
+assign div_frac_out_in[54:0]= ({55{d4stg_fdiv}}
+			    & 55'b0)
+		| ({55{div_frac_out_add_in1}}
+			    & div_frac_add_in1[54:0])
+		| ({55{div_frac_out_add}}
+			    & div_frac_add[54:0])
+		| ({55{div_frac_out_shl1_dbl}}
+			    & {div_frac_out[53:0], (!div_frac_add[54])})
+		| ({55{div_frac_out_shl1_sng}}
+			    & {div_frac_out[53:29], (!div_frac_add[54]), 29'b0})
+		| ({55{div_frac_out_of}}
+			    & {55{d7stg_to_0}});
+
+dffe_s #(55) i_div_frac_out (
+	.din	(div_frac_out_in[54:0]),
+	.en	(div_frac_out_load),
+	.clk    (clk),
+
+        .q      (div_frac_out[54:0]),
+
+	.se     (se),
+        .si     (),
+        .so     ()
+);
+
+assign div_frac_out_54_53[1:0] = div_frac_out[54:53];
+
+assign div_frac_outa[51:0]= div_frac_out[51:0];
+
+endmodule
+
+
Index: /trunk/os2wb/l1dir.v
===================================================================
--- /trunk/os2wb/l1dir.v	(revision 6)
+++ /trunk/os2wb/l1dir.v	(revision 6)
@@ -0,0 +1,262 @@
+module l1dir(
+   input clk,
+   input reset,
+   
+   input        cpu,     // Issuing CPU number
+   input        strobe,  // Start transaction
+   input [ 1:0] way,     // Way to allocate for allocating loads
+   input [39:0] address,
+   input        load,
+   input        ifill,
+   input        store,
+   input        cas,
+   input        swap,
+   input        strload,
+   input        strstore,
+   input        cacheable,
+   input        prefetch,
+   input        invalidate,
+   input        blockstore,
+   
+   output [111:0] inval_vect0,    // Invalidation vector
+   output [111:0] inval_vect1,    
+   output [  1:0] othercachehit, // Other cache hit in the same CPU, wayval0/wayval1
+   output [  1:0] othercpuhit,   // Any cache hit in the other CPU, wayval0/wayval1
+   output [  1:0] wayval0,       // Way valid
+   output [  1:0] wayval1,       // Second way valid for ifill
+   output         ready         // Directory init done   
+);
+
+wire [3:0] rdy;
+wire dquery0=(!cpu) && store && (!blockstore);
+wire dquery1=  cpu  && store && (!blockstore);
+wire dalloc0=(!cpu) && cacheable && (!invalidate) && load && (!prefetch);
+wire dalloc1=  cpu  && cacheable && (!invalidate) && load && (!prefetch);
+wire ddealloc0=((!cpu) && ((ifill && (!prefetch) && (!invalidate)) || cas || swap || strstore || (store && blockstore))) ||
+               (  cpu  && ((load && cacheable && (!prefetch) && (!invalidate)) || (ifill && (!prefetch) && (!invalidate)) || store || cas || swap || strload || strstore));
+wire ddealloc1=(  cpu  && ((ifill && (!prefetch) && (!invalidate)) || cas || swap || strstore || (store && blockstore))) ||
+               ((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || (ifill && (!prefetch) && (!invalidate)) || store || cas || swap || strload || strstore));
+
+wire iquery0=0;
+wire iquery1=0;
+wire ialloc0=(!cpu) && cacheable && (!invalidate) && ifill;
+wire ialloc1=  cpu  && cacheable && (!invalidate) && ifill;
+wire idealloc0=((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate))          || store || cas || swap || strstore)) ||
+               (  cpu  && ((load && cacheable && (!prefetch) && (!invalidate)) || (ifill && (!prefetch) && (!invalidate)) || store || cas || swap || strload || strstore));
+wire idealloc1=(  cpu  && ((load && cacheable && (!prefetch) && (!invalidate))          || store || cas || swap || strstore )) ||
+               ((!cpu) && ((load && cacheable && (!prefetch) && (!invalidate)) || (ifill && (!prefetch) && (!invalidate)) || store || cas || swap || strload || strstore));
+
+
+wire [2:0] cpu0_dhit0;
+wire [2:0] cpu0_dhit1;
+wire [2:0] cpu1_dhit0;
+wire [2:0] cpu1_dhit1;
+wire [2:0] cpu0_ihit;
+wire [2:0] cpu1_ihit;
+wire invalidate_d=invalidate && load;
+wire invalidate_i=invalidate && ifill;
+
+reg        ifill_d;
+reg        load_d;
+reg        cacheable_d;
+reg        cpu_d;
+reg [39:0] address_d;
+reg        strobe_d;
+reg        strobe_d1;
+reg        strobe_d2;
+
+always @(posedge clk)
+   begin
+      strobe_d<=strobe;
+      strobe_d1<=strobe_d;
+      strobe_d2<=strobe_d1;
+   end
+   
+always @(posedge clk)
+   if(strobe)
+      begin
+         ifill_d<=ifill;
+         load_d<=load;
+         cacheable_d<=cacheable;
+         cpu_d<=cpu;
+         address_d<=address;
+      end
+
+l1ddir cpu0_ddir(
+   .clk(clk),
+   .reset(reset),
+   
+   .index(address[10:4]),
+   .way(way),
+   .tag(address[39:11]),
+	.strobe(strobe),
+   .query(dquery0),
+   .allocate(dalloc0),
+   .deallocate(ddealloc0),
+   .dualdealloc(ifill),
+   .invalidate(invalidate_d && !cpu),
+   
+   .hit0(cpu0_dhit0),
+   .hit1(cpu0_dhit1),
+   
+   .ready(rdy[0])
+);
+
+l1ddir cpu1_ddir(
+   .clk(clk),
+   .reset(reset),
+   
+   .index(address[10:4]),
+   .way(way),
+   .tag(address[39:11]),
+	.strobe(strobe),
+   .query(dquery1),
+   .allocate(dalloc1),
+   .deallocate(ddealloc1),
+   .dualdealloc(ifill),
+   .invalidate(invalidate_d && cpu),
+   
+   .hit0(cpu1_dhit0),
+   .hit1(cpu1_dhit1),
+   
+   .ready(rdy[1])
+);
+
+l1idir cpu0_idir(
+   .clk(clk),
+   .reset(reset),
+   
+   .index(address[11:5]),
+   .way(way),
+   .tag(address[39:12]),
+	.strobe(strobe),
+   .query(iquery0),
+   .allocate(ialloc0),
+   .deallocate(idealloc0),
+   .invalidate(invalidate_i && !cpu),
+   
+   .hit(cpu0_ihit),
+   
+   .ready(rdy[2])
+);
+
+l1idir cpu1_idir(
+   .clk(clk),
+   .reset(reset),
+   
+   .index(address[11:5]),
+   .way(way),
+   .tag(address[39:12]),
+	.strobe(strobe),
+   .query(iquery1),
+   .allocate(ialloc1),
+   .deallocate(idealloc1),
+   .invalidate(invalidate_i && cpu),
+   
+   .hit(cpu1_ihit),
+   
+   .ready(rdy[3])
+);
+
+assign ready=(!rdy[0] | !rdy[1] | !rdy[2] | !rdy[3]) ? 0:1;
+assign inval_vect0[3:0]={wayval0,cpu0_ihit[2] && (!address_d[5]),cpu0_dhit0[2] && (address_d[5:4]==2'b00)};
+assign inval_vect0[7:4]={wayval0,cpu1_ihit[2] && (!address_d[5]),cpu1_dhit0[2] && (address_d[5:4]==2'b00)};
+assign inval_vect0[31:8]=0;
+assign inval_vect0[34:32]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b01)};
+assign inval_vect0[37:35]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b01)};
+assign inval_vect0[55:38]=0;
+assign inval_vect0[59:56]={wayval0,cpu0_ihit[2] && address_d[5],cpu0_dhit0[2] && (address_d[5:4]==2'b10)};
+assign inval_vect0[63:60]={wayval0,cpu1_ihit[2] && address_d[5],cpu1_dhit0[2] && (address_d[5:4]==2'b10)};
+assign inval_vect0[87:64]=0;
+assign inval_vect0[90:88]={wayval0,cpu0_dhit0[2] && (address_d[5:4]==2'b11)};
+assign inval_vect0[93:91]={wayval0,cpu1_dhit0[2] && (address_d[5:4]==2'b11)};
+assign inval_vect0[111:94]=0;
+
+/*assign inval_vect1[3:0]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b00)};
+assign inval_vect1[7:4]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b00)};
+assign inval_vect1[31:8]=0;
+assign inval_vect1[34:32]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b01)};
+assign inval_vect1[37:35]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b01)};
+assign inval_vect1[55:38]=0;
+assign inval_vect1[59:56]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b10)};
+assign inval_vect1[63:60]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b10)};
+assign inval_vect1[87:64]=0;
+assign inval_vect1[90:88]={wayval1,cpu0_dhit1[2] && (address_d[5:4]==2'b11)};
+assign inval_vect1[93:91]={wayval1,cpu1_dhit1[2] && (address_d[5:4]==2'b11)};
+assign inval_vect1[111:94]=0;*/
+
+assign inval_vect1[3:0]=0;
+assign inval_vect1[7:4]=0;
+assign inval_vect1[31:8]=0;
+assign inval_vect1[34:32]={wayval1,cpu0_dhit1[2] && (address_d[5]==0)};
+assign inval_vect1[37:35]={wayval1,cpu1_dhit1[2] && (address_d[5]==0)};
+assign inval_vect1[55:38]=0;
+assign inval_vect1[59:56]=0;
+assign inval_vect1[63:60]=0;
+assign inval_vect1[87:64]=0;
+assign inval_vect1[90:88]={wayval1,cpu0_dhit1[2] && (address_d[5]==1)};
+assign inval_vect1[93:91]={wayval1,cpu1_dhit1[2] && (address_d[5]==1)};
+assign inval_vect1[111:94]=0;
+
+assign wayval0=cpu0_dhit0[1:0] | cpu1_dhit0[1:0] | cpu0_ihit[1:0] | cpu1_ihit[1:0];
+assign wayval1=cpu0_dhit1[1:0] | cpu1_dhit1[1:0];
+assign othercachehit[0]=((!cpu_d) && ifill_d && cpu0_dhit0[2]) ||
+                        (  cpu_d  && ifill_d && cpu1_dhit0[2]) ||
+                        ((!cpu_d) && load_d && cacheable_d && cpu0_ihit[2]) ||
+                        (  cpu_d  && load_d && cacheable_d && cpu1_ihit[2]);
+assign othercachehit[1]=((!cpu_d) && ifill_d && cpu0_dhit1[2]) ||
+                        (  cpu_d  && ifill_d && cpu1_dhit1[2]);
+assign othercpuhit[0]=((!cpu_d) && (cpu1_dhit0[2] || cpu1_ihit[2])) ||
+                      (  cpu_d  && (cpu0_dhit0[2] || cpu0_ihit[2]));
+assign othercpuhit[1]=((!cpu_d) && ifill_d && cpu1_dhit1[2]) ||
+                      (  cpu_d  && ifill_d && cpu0_dhit1[2]);
+
+//wire [149:0] ILA_DATA;
+
+/*st2 st2_inst(
+	.acq_clk(clk),
+	.acq_data_in(ILA_DATA),
+	.acq_trigger_in(ILA_DATA),
+	.storage_enable(strobe || strobe_d || strobe_d1 || strobe_d2)
+);
+
+assign ILA_DATA[39:0]=address;
+assign ILA_DATA[41:40]=way;
+assign ILA_DATA[42]=strobe;
+assign ILA_DATA[43]=load;
+assign ILA_DATA[44]=ifill;
+assign ILA_DATA[45]=store;
+assign ILA_DATA[46]=cas;
+assign ILA_DATA[47]=swap;
+assign ILA_DATA[48]=strload;
+assign ILA_DATA[49]=strstore;
+assign ILA_DATA[50]=cacheable;
+assign ILA_DATA[51]=prefetch;
+assign ILA_DATA[52]=invalidate;
+assign ILA_DATA[53]=blockstore;
+assign ILA_DATA[55:54]=othercachehit;
+assign ILA_DATA[57:56]=othercpuhit;
+assign ILA_DATA[59:58]=wayval0;
+assign ILA_DATA[61:60]=wayval1;
+assign ILA_DATA[69:62]=inval_vect0[7:0];
+assign ILA_DATA[75:70]=inval_vect0[37:32];
+assign ILA_DATA[83:76]=inval_vect0[63:56];
+assign ILA_DATA[89:84]=inval_vect0[93:88];
+assign ILA_DATA[97:90]=inval_vect1[7:0];
+assign ILA_DATA[103:98]=inval_vect1[37:32];
+assign ILA_DATA[111:104]=inval_vect1[63:56];
+assign ILA_DATA[117:112]=inval_vect1[93:88];
+assign ILA_DATA[118]=dquery0;
+assign ILA_DATA[119]=dquery1;
+assign ILA_DATA[120]=dalloc0;
+assign ILA_DATA[121]=dalloc1;
+assign ILA_DATA[122]=ddealloc0;
+assign ILA_DATA[123]=ddealloc1;
+assign ILA_DATA[124]=iquery0;
+assign ILA_DATA[125]=iquery1;
+assign ILA_DATA[126]=ialloc0;
+assign ILA_DATA[127]=ialloc1;
+assign ILA_DATA[128]=idealloc0;
+assign ILA_DATA[129]=idealloc1;
+*/
+endmodule
Index: /trunk/os2wb/os2wb.v
===================================================================
--- /trunk/os2wb/os2wb.v	(revision 6)
+++ /trunk/os2wb/os2wb.v	(revision 6)
@@ -0,0 +1,1262 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    Bridge from SPARC Core to Wishbone Master
+// Module Name:    os2wb 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+module os2wb(
+    input              clk,
+    input              rstn,
+    
+    // Core interface 
+    input      [  4:0] pcx_req,
+    input              pcx_atom,
+    input      [123:0] pcx_data,
+    output reg [  4:0] pcx_grant,
+    output reg         cpx_ready,
+    output reg [144:0] cpx_packet,
+    
+    // Wishbone master interface
+    input      [ 63:0] wb_data_i,
+    input              wb_ack,
+    output reg         wb_cycle,
+    output reg         wb_strobe,
+    output reg         wb_we,
+    output reg [  7:0] wb_sel,
+    output reg [ 63:0] wb_addr,
+    output reg [ 63:0] wb_data_o,
+    
+    // FPU interface
+    output reg [123:0] fp_pcx,
+    output reg         fp_req,
+    input      [144:0] fp_cpx,
+    input              fp_rdy,
+    
+    // Ethernet interrupt, sensed on posedge, mapped to vector 'd29
+    input              eth_int
+);
+
+reg [123:0] pcx_packet_d;    // Latched incoming PCX packet
+reg [123:0] pcx_packet_2nd;  // Second packet for atomic (CAS)
+reg [  4:0] pcx_req_d;       // Latched request
+reg         pcx_atom_d;      // Latched atomic flasg
+reg [  4:0] state;           // FSM state
+reg [144:0] cpx_packet_1;    // First CPX packet
+reg [144:0] cpx_packet_2;    // Second CPX packet (for atomics and cached IFILLs)
+reg         cpx_two_packet;  // CPX answer is two-packet (!=atomic, SWAP has atomic==0 and answer is two-packet)
+
+reg  [ 3:0] inval_vect0; // Invalidate, instr/data, way
+reg  [ 3:0] inval_vect1; // IFill may cause two D lines invalidation at a time
+
+wire [111:0] store_inv_vec; // Store invalidation vector
+
+assign store_inv_vec[111:91]=0;
+assign store_inv_vec[90:88]=((pcx_packet_d[64+5:64+4]==2'b11) && inval_vect0[3:2]==2'b11) ? {inval_vect0[1:0],1'b1}:3'b000;
+assign store_inv_vec[87:60]=0;
+assign store_inv_vec[59:56]=((pcx_packet_d[64+5:64+4]==2'b10) && inval_vect0[3:2]==2'b11) || ((pcx_packet_d[64+5]==1'b1) && inval_vect0[3:2]==2'b10) ? {inval_vect0[1:0],!inval_vect0[2],inval_vect0[2]}:4'b0000;
+assign store_inv_vec[55:35]=0;
+assign store_inv_vec[34:32]=((pcx_packet_d[64+5:64+4]==2'b01) && inval_vect0[3:2]==2'b11) ? {inval_vect0[1:0],1'b1}:3'b000;
+assign store_inv_vec[31:4]=0;
+assign store_inv_vec[3:0]=((pcx_packet_d[64+5:64+4]==2'b00) && inval_vect0[3:2]==2'b11) || ((pcx_packet_d[64+5]==1'b0) && inval_vect0[3:2]==2'b10) ? {inval_vect0[1:0],!inval_vect0[2],inval_vect0[2]}:4'b0000;
+
+wire [28:0] dcache0_do0;
+wire [28:0] dcache0_do1;
+wire [28:0] dcache1_do0;
+wire [28:0] dcache1_do1;
+wire [28:0] dcache2_do0;
+wire [28:0] dcache2_do1;
+wire [28:0] dcache3_do0;
+wire [28:0] dcache3_do1;
+wire [28:0] icache0_do;
+wire [28:0] icache1_do;
+wire [28:0] icache2_do;
+wire [28:0] icache3_do;
+
+`define TEST_DRAM_1      5'b00000
+`define TEST_DRAM_2      5'b00001
+`define TEST_DRAM_3      5'b00010
+`define TEST_DRAM_4      5'b00011
+`define INIT_DRAM_1      5'b00100
+`define INIT_DRAM_2      5'b00101
+`define WAKEUP           5'b00110
+`define PCX_IDLE         5'b00111
+`define GOT_PCX_REQ      5'b01000
+`define PCX_REQ_2ND      5'b01001
+`define PCX_REQ_STEP1    5'b01010
+`define PCX_REQ_STEP1_1  5'b01011
+`define PCX_REQ_STEP2    5'b01100
+`define PCX_REQ_STEP2_1  5'b01101
+`define PCX_REQ_STEP3    5'b01110
+`define PCX_REQ_STEP3_1  5'b01111
+`define PCX_REQ_STEP4    5'b10000
+`define PCX_REQ_STEP4_1  5'b10001
+`define PCX_BIS          5'b10010
+`define PCX_BIS_1        5'b10011
+`define PCX_BIS_2        5'b10100
+`define CPX_READY_1      5'b10101
+`define CPX_READY_2      5'b10110
+`define PCX_UNKNOWN      5'b11000
+`define PCX_FP_1         5'b11001
+`define PCX_FP_2         5'b11010
+`define FP_WAIT          5'b11011
+`define CPX_FP           5'b11100
+`define CPX_SEND_ETH_IRQ 5'b11101
+`define CPX_INT_VEC_DIS  5'b11110
+`define PCX_REQ_CAS_COMPARE 5'b11111
+
+`define MEM_SIZE         64'h00000000_10000000
+
+`define TEST_DRAM        1
+`define DEBUGGING        1
+
+reg        cache_init;
+wire [3:0] dcache0_hit;
+wire [3:0] dcache1_hit;
+wire [3:0] icache_hit;
+reg        multi_hit;
+reg        multi_hit1;
+reg        eth_int_d;
+reg        eth_int_send;
+reg        eth_int_sent;
+reg  [3:0] cnt;
+
+// PCX channel FIFO
+wire [129:0] pcx_data_fifo;
+wire         pcx_fifo_empty;
+reg  [  4:0] pcx_req_1;
+reg  [  4:0] pcx_req_2;
+reg          pcx_atom_1;
+reg          pcx_atom_2;
+reg          pcx_data_123_d;
+
+always @(posedge clk)
+   begin
+      pcx_req_1<=pcx_req;
+      pcx_atom_1<=pcx_atom;
+      pcx_atom_2<=pcx_atom_1;
+      pcx_req_2<=pcx_atom_1 ? pcx_req_1:5'b0;
+      pcx_grant<=(pcx_req_1 | pcx_req_2);
+      pcx_data_123_d<=pcx_data[123];
+   end
+        
+pcx_fifo pcx_fifo_inst( 
+       // FIFO should be first word fall-through
+       // It has no full flag as the core will send only limited number of requests,
+       // in original design we used it 32 words deep
+       // Just make it deeper if you experience overflow - 
+       // you can't just send no grant on full because the core expects immediate
+       // grant for at least two requests for each zone
+    .aclr(!rstn),
+    .clock(clk),
+    .data({pcx_atom_1,pcx_req_1,pcx_data}),
+    .rdreq(fifo_rd),
+    .wrreq((pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d)), 
+       // Second atomic packet for FPU may be invalid, but should be sent to FPU
+       // so if the first atomic packet is valid we latch both
+    .empty(pcx_fifo_empty),
+    .q(pcx_data_fifo)
+);
+// --------------------------
+
+always @(posedge clk or negedge rstn)
+   if(!rstn)
+      eth_int_send<=0;
+   else
+      begin
+         eth_int_d<=eth_int;
+         if(eth_int && !eth_int_d)
+            eth_int_send<=1;
+         else
+            if(eth_int_sent)
+               eth_int_send<=0;
+      end
+
+reg fifo_rd;
+wire [123:0] pcx_packet;
+assign pcx_packet=pcx_data_fifo[123:0];
+
+always @(posedge clk or negedge rstn)
+   if(rstn==0)
+      begin
+         if(`TEST_DRAM)
+            state<=`TEST_DRAM_1;
+         else
+            state<=`INIT_DRAM_1; // DRAM initialization is mandatory!
+         cpx_ready<=0;
+         fifo_rd<=0;
+         cpx_packet<=145'b0;
+         wb_cycle<=0;
+         wb_strobe<=0;
+         wb_we<=0;
+         wb_sel<=0;
+         wb_addr<=64'b0;
+         wb_data_o<=64'b0;
+         pcx_packet_d<=124'b0;
+         fp_pcx<=124'b0;
+         fp_req<=0;
+      end
+   else
+      case(state)
+         `TEST_DRAM_1:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               wb_we<=1;
+               state<=`TEST_DRAM_2;
+            end
+         `TEST_DRAM_2:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        wb_addr[31:0]<=wb_addr[31:0]+8;
+                        wb_data_o<={wb_addr[31:0]+8,wb_addr[31:0]+8};
+                        state<=`TEST_DRAM_1;
+                     end
+                  else
+                     begin
+                        state<=`TEST_DRAM_3;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_data_o<=64'b0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `TEST_DRAM_3:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               state<=`TEST_DRAM_4;
+            end
+         `TEST_DRAM_4:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        if(wb_data_i=={wb_addr[31:0],wb_addr[31:0]})
+                           begin
+                              wb_addr[31:0]<=wb_addr[31:0]+8;
+                              state<=`TEST_DRAM_3;
+                           end
+                     end
+                  else
+                     begin
+                        state<=`INIT_DRAM_1;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_data_o<=64'b0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `INIT_DRAM_1:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               wb_we<=1;
+               cache_init<=1; // We also init cache directories here
+               state<=`INIT_DRAM_2;
+            end
+         `INIT_DRAM_2:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        wb_addr[31:0]<=wb_addr[31:0]+8;
+                        pcx_packet_d[64+11:64+4]<=pcx_packet_d[64+11:64+4]+1; // Address for cachedir init
+                        state<=`INIT_DRAM_1;
+                     end
+                  else
+                     begin
+                        state<=`WAKEUP;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        cache_init<=0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `WAKEUP:
+            begin
+               cpx_packet<=145'h1700000000000000000000000000000010001;
+               cpx_ready<=1;
+               state<=`PCX_IDLE;
+            end
+         `PCX_IDLE:
+            begin
+               cnt<=0;
+               cpx_packet<=145'b0;
+               cpx_ready<=0;
+               cpx_two_packet<=0;
+               inval_vect0[3]<=0;
+               inval_vect1[3]<=0;
+               multi_hit<=0;
+               multi_hit1<=0;
+               if(eth_int_send)
+                  begin
+                     state<=`CPX_SEND_ETH_IRQ;
+                     eth_int_sent<=1;
+                  end
+               else
+                  if(!pcx_fifo_empty)
+                     begin
+                        pcx_req_d<=pcx_data_fifo[128:124];
+                        pcx_atom_d<=pcx_data_fifo[129];
+                        fifo_rd<=1;
+                        state<=`GOT_PCX_REQ;
+                     end
+            end
+         `GOT_PCX_REQ:
+            begin
+               pcx_packet_d<=pcx_packet;
+               if(`DEBUGGING)
+                  begin
+                     wb_sel[1:0]<=pcx_packet[113:112];
+                     wb_sel[2]<=1;
+                  end
+               if(pcx_packet[103:64]==40'h9800000800 && pcx_packet[122:118]==5'b00001)
+                  begin
+                     state<=`CPX_INT_VEC_DIS;
+                     fifo_rd<=0;
+                  end
+               else
+                  if(pcx_atom_d==0)
+                     begin
+                        fifo_rd<=0;
+                        if(pcx_packet[122:118]==5'b01010) // FP req
+                           begin
+                              state<=`PCX_FP_1;
+                              pcx_packet_2nd[123]<=0;
+                           end
+                        else
+                           state<=`PCX_REQ_STEP1;
+                     end
+                  else
+                     state<=`PCX_REQ_2ND;
+            end
+         `PCX_REQ_2ND:
+            begin
+               pcx_packet_2nd<=pcx_packet; //Latch second packet for atomics
+               if(`DEBUGGING)
+                  if(pcx_fifo_empty)
+                     wb_sel<=8'h67;
+               fifo_rd<=0;
+               if(pcx_packet_d[122:118]==5'b01010) // FP req
+                  state<=`PCX_FP_1;
+               else               
+                  state<=`PCX_REQ_STEP1;
+            end
+         `PCX_REQ_STEP1:
+            begin
+               if(pcx_packet_d[111]==1'b1) // Invalidate request
+                  begin
+                     cpx_packet_1[144]<=1;     // Valid
+                     cpx_packet_1[143:140]<=4'b0100; // Invalidate reply is Store ACK
+                     cpx_packet_1[139]<=1;     // L2 miss
+                     cpx_packet_1[138:137]<=0; // Error
+                     cpx_packet_1[136]<=pcx_packet_d[117]; // Non-cacheble
+                     cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     cpx_packet_1[133:131]<=0; // Way valid
+                     cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && (pcx_req_d==5'b10000)) ? 1:0; // Four byte fill
+                     cpx_packet_1[129]<=pcx_atom_d;
+                     cpx_packet_1[128]<=pcx_packet_d[110]; // Prefetch
+                     cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,pcx_packet_d[122:118]==5'b00000 ? 2'b01:2'b10,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0};
+                     state<=`CPX_READY_1;
+                  end
+               else
+                  if(pcx_packet_d[122:118]!=5'b01001) // Not INT
+                     begin
+                        wb_cycle<=1'b1;
+                        wb_strobe<=1'b1;
+                        if((pcx_packet_d[122:118]==5'b00000 && !pcx_req_d[4]) || pcx_packet_d[122:118]==5'b00010 || pcx_packet_d[122:118]==5'b00100 || pcx_packet_d[122:118]==5'b00110)
+                           wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b0000}; //DRAM load/streamload, CAS and SWAP always use DRAM and load first 
+                        else
+                           if(pcx_packet_d[122:118]==5'b10000 && !pcx_req_d[4])
+                              wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b00000}; //DRAM ifill
+                           else
+                              if(pcx_packet_d[64+39:64+28]==12'hFFF && pcx_packet_d[64+27:64+24]!=4'b0) // flash remap FFF1->FFF8
+                                 wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3]+37'h0000E00000,3'b000};
+                              else
+                                 wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000};
+                        wb_data_o<=pcx_packet_d[63:0];
+                        state<=`PCX_REQ_STEP1_1;
+                     end
+                  else
+                     if((pcx_packet_d[12:10]!=3'b000) && !pcx_packet_d[117]) // Not FLUSH int and not this core
+                        state<=`PCX_IDLE; 
+                     else
+                        state<=`CPX_READY_1;
+               case(pcx_packet_d[122:118]) // Packet type
+                  5'b00000://Load
+                     begin
+                        wb_we<=0;
+                        if(!pcx_packet_d[110] && !pcx_packet_d[117])
+                           case(icache_hit)
+                              4'b0000:;
+                              4'b0001:inval_vect0<=4'b1_0_00;
+                              4'b0010:inval_vect0<=4'b1_0_01;
+                              4'b0100:inval_vect0<=4'b1_0_10;
+                              4'b1000:inval_vect0<=4'b1_0_11;
+                              default:multi_hit<=1;
+                           endcase
+                        if(!pcx_req_d[4])
+                           wb_sel<=8'b11111111; // DRAM requests are always 128 bit
+                        else
+                           case(pcx_packet_d[106:104]) //Size
+                              3'b000://Byte
+                                 case(pcx_packet_d[64+2:64])
+                                    3'b000:wb_sel<=8'b10000000;
+                                    3'b001:wb_sel<=8'b01000000;
+                                    3'b010:wb_sel<=8'b00100000;
+                                    3'b011:wb_sel<=8'b00010000;
+                                    3'b100:wb_sel<=8'b00001000;
+                                    3'b101:wb_sel<=8'b00000100;
+                                    3'b110:wb_sel<=8'b00000010;
+                                    3'b111:wb_sel<=8'b00000001;
+                                 endcase
+                              3'b001://Halfword
+                                 case(pcx_packet_d[64+2:64+1])
+                                    2'b00:wb_sel<=8'b11000000;
+                                    2'b01:wb_sel<=8'b00110000;
+                                    2'b10:wb_sel<=8'b00001100;
+                                    2'b11:wb_sel<=8'b00000011;
+                                 endcase
+                              3'b010://Word
+                                 wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                              3'b011://Doubleword
+                                 wb_sel<=8'b11111111;
+                              3'b100://Quadword
+                                 wb_sel<=8'b11111111;
+                              3'b111://Cacheline
+                                 wb_sel<=8'b11111111;
+                              default:
+                                 wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                           endcase
+                     end
+                  5'b00001://Store
+                     begin
+                        wb_we<=1;
+                        case({icache_hit,dcache0_hit})
+                           8'b00000000:;
+                           8'b00000001:inval_vect0<=4'b1_1_00;
+                           8'b00000010:inval_vect0<=4'b1_1_01;
+                           8'b00000100:inval_vect0<=4'b1_1_10;
+                           8'b00001000:inval_vect0<=4'b1_1_11;
+                           8'b00010000:inval_vect0<=4'b1_0_00;
+                           8'b00100000:inval_vect0<=4'b1_0_01;
+                           8'b01000000:inval_vect0<=4'b1_0_10;
+                           8'b10000000:inval_vect0<=4'b1_0_11;
+                           default:multi_hit<=1;
+                        endcase
+                        if(pcx_packet_d[110:109]!=2'b00) //Block (or init) store
+                           wb_sel<=8'b11111111; // Blocks are always 64 bit
+                        else
+                           case(pcx_packet_d[106:104]) //Size
+                              3'b000://Byte
+                                 case(pcx_packet_d[64+2:64])
+                                    3'b000:wb_sel<=8'b10000000;
+                                    3'b001:wb_sel<=8'b01000000;
+                                    3'b010:wb_sel<=8'b00100000;
+                                    3'b011:wb_sel<=8'b00010000;
+                                    3'b100:wb_sel<=8'b00001000;
+                                    3'b101:wb_sel<=8'b00000100;
+                                    3'b110:wb_sel<=8'b00000010;
+                                    3'b111:wb_sel<=8'b00000001;
+                                 endcase
+                              3'b001://Halfword
+                                 case(pcx_packet_d[64+2:64+1])
+                                    2'b00:wb_sel<=8'b11000000;
+                                    2'b01:wb_sel<=8'b00110000;
+                                    2'b10:wb_sel<=8'b00001100;
+                                    2'b11:wb_sel<=8'b00000011;
+                                 endcase
+                              3'b010://Word
+                                 wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                              3'b011://Doubleword
+                                 wb_sel<=8'b11111111;
+                              default:
+                                 if(`DEBUGGING)
+                                    wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                           endcase
+                     end
+                  5'b00010://CAS
+                     begin
+                        wb_we<=0; //Load first
+                        case({icache_hit,dcache0_hit})
+                           8'b00000000:;
+                           8'b00000001:inval_vect0<=4'b1_1_00;
+                           8'b00000010:inval_vect0<=4'b1_1_01;
+                           8'b00000100:inval_vect0<=4'b1_1_10;
+                           8'b00001000:inval_vect0<=4'b1_1_11;
+                           8'b00010000:inval_vect0<=4'b1_0_00;
+                           8'b00100000:inval_vect0<=4'b1_0_01;
+                           8'b01000000:inval_vect0<=4'b1_0_10;
+                           8'b10000000:inval_vect0<=4'b1_0_11;
+                           default:multi_hit<=1;
+                        endcase
+                        wb_sel<=8'b11111111; // CAS loads are as cacheline
+                     end
+                  5'b00100://STRLOAD
+                     begin
+                        wb_we<=0;
+                        wb_sel<=8'b11111111; // Stream loads are always 128 bit
+                     end
+                  5'b00101://STRSTORE
+                     begin
+                        wb_we<=1;
+                        case({icache_hit,dcache0_hit})
+                           8'b00000000:;
+                           8'b00000001:inval_vect0<=4'b1_1_00;
+                           8'b00000010:inval_vect0<=4'b1_1_01;
+                           8'b00000100:inval_vect0<=4'b1_1_10;
+                           8'b00001000:inval_vect0<=4'b1_1_11;
+                           8'b00010000:inval_vect0<=4'b1_0_00;
+                           8'b00100000:inval_vect0<=4'b1_0_01;
+                           8'b01000000:inval_vect0<=4'b1_0_10;
+                           8'b10000000:inval_vect0<=4'b1_0_11;
+                           default:multi_hit<=1;
+                        endcase
+                        case(pcx_packet_d[106:104]) //Size
+                           3'b000://Byte
+                              case(pcx_packet_d[64+2:64])
+                                 3'b000:wb_sel<=8'b10000000;
+                                 3'b001:wb_sel<=8'b01000000;
+                                 3'b010:wb_sel<=8'b00100000;
+                                 3'b011:wb_sel<=8'b00010000;
+                                 3'b100:wb_sel<=8'b00001000;
+                                 3'b101:wb_sel<=8'b00000100;
+                                 3'b110:wb_sel<=8'b00000010;
+                                 3'b111:wb_sel<=8'b00000001;
+                              endcase
+                           3'b001://Halfword
+                              case(pcx_packet_d[64+2:64+1])
+                                 2'b00:wb_sel<=8'b11000000;
+                                 2'b01:wb_sel<=8'b00110000;
+                                 2'b10:wb_sel<=8'b00001100;
+                                 2'b11:wb_sel<=8'b00000011;
+                              endcase
+                           3'b010://Word
+                              wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                           3'b011://Doubleword
+                              wb_sel<=8'b11111111;
+                           3'b100://Quadword
+                              wb_sel<=8'b11111111;
+                           3'b111://Cacheline
+                              wb_sel<=8'b11111111;
+                           default:
+                              wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                        endcase
+                     end
+                  5'b00110://SWAP/LDSTUB
+                     begin
+                        case({icache_hit,dcache0_hit})
+                           8'b00000000:;
+                           8'b00000001:inval_vect0<=4'b1_1_00;
+                           8'b00000010:inval_vect0<=4'b1_1_01;
+                           8'b00000100:inval_vect0<=4'b1_1_10;
+                           8'b00001000:inval_vect0<=4'b1_1_11;
+                           8'b00010000:inval_vect0<=4'b1_0_00;
+                           8'b00100000:inval_vect0<=4'b1_0_01;
+                           8'b01000000:inval_vect0<=4'b1_0_10;
+                           8'b10000000:inval_vect0<=4'b1_0_11;
+                           default:multi_hit<=1;
+                        endcase
+                        wb_we<=0; // Load first, as CAS
+                        wb_sel<=8'b11111111; // SWAP/LDSTUB loads are as cacheline
+                     end
+                  5'b01001://INT
+                     if(pcx_packet_d[117]) // Flush
+                        cpx_packet_1<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer
+                     else // Tread-to-thread interrupt
+                        cpx_packet_1<={9'h170,pcx_packet_d[113:112],52'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; 
+                  //5'b01010: FP1 - processed by separate state
+                  //5'b01011: FP2 - processed by separate state
+                  //5'b01101: FWDREQ - not implemented
+                  //5'b01110: FWDREPL - not implemented
+                  5'b10000://IFILL
+                     begin
+                        wb_we<=0;
+                        if(!pcx_req_d[4]) // not I/O access
+                           begin
+                              case(dcache0_hit)
+                                 4'b0000:;
+                                 4'b0001:inval_vect0<=4'b1_1_00;
+                                 4'b0010:inval_vect0<=4'b1_1_01;
+                                 4'b0100:inval_vect0<=4'b1_1_10;
+                                 4'b1000:inval_vect0<=4'b1_1_11;
+                                 default:multi_hit<=1;
+                              endcase
+                              case(dcache1_hit)
+                                 4'b0000:;
+                                 4'b0001:inval_vect1<=4'b1_1_00;
+                                 4'b0010:inval_vect1<=4'b1_1_01;
+                                 4'b0100:inval_vect1<=4'b1_1_10;
+                                 4'b1000:inval_vect1<=4'b1_1_11;
+                                 default:multi_hit1<=1;
+                              endcase
+                           end
+                        if(pcx_req_d[4]) // I/O access
+                           wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                        else
+                           wb_sel<=8'b11111111;
+                     end
+                  default:
+                     begin
+                        wb_we<=0;
+                        wb_sel<=8'b10101010; // Unreal eye-catching value for debug
+                     end
+               endcase
+            end
+         `PCX_REQ_STEP1_1:
+            begin
+               if(wb_ack)
+                  begin
+                     cpx_packet_1[144]<=1;     // Valid
+                     cpx_packet_1[139]<=(pcx_packet_d[122:118]==5'b00000) || (pcx_packet_d[122:118]==5'b10000) ? 1:0;     // L2 always miss on load and ifill
+                     cpx_packet_1[138:137]<=0; // Error
+                     cpx_packet_1[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
+                     cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     if((pcx_packet_d[122:118]==5'b00000 && !pcx_packet_d[117] && !pcx_packet_d[110]) || (pcx_packet_d[122:118]==5'b10000)) // Cacheble Load or IFill
+                        cpx_packet_1[133:131]<={inval_vect0[3],inval_vect0[1:0]};
+                     else
+                        cpx_packet_1[133:131]<=3'b000; // Way valid
+                     if(pcx_packet_d[122:118]==5'b00100) // Strload
+                        cpx_packet_1[130]<=pcx_packet_d[106]; // A
+                     else
+                        if(pcx_packet_d[122:118]==5'b00101) // Stream store
+                           cpx_packet_1[130]<=pcx_packet_d[108]; // A
+                        else
+                           cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && pcx_req_d[4]) ? 1:0; // Four byte fill
+                     if(pcx_packet_d[122:118]==5'b00100) // Strload
+                        cpx_packet_1[129]<=pcx_packet_d[105]; // B
+                     else      
+                        cpx_packet_1[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110); // SWAP is single-packet but needs atom in CPX
+                     cpx_packet_1[128]<=pcx_packet_d[110] && pcx_packet_d[122:118]==5'b00000; // Prefetch
+                     cpx_packet_2[144]<=1;     // Valid
+                     cpx_packet_2[139]<=0;     // L2 miss
+                     cpx_packet_2[138:137]<=0; // Error
+                     cpx_packet_2[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
+                     cpx_packet_2[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     if(pcx_packet_d[122:118]==5'b10000) // IFill
+                        cpx_packet_2[133:131]<={inval_vect1[3],inval_vect1[1:0]};
+                     else
+                        cpx_packet_2[133:131]<=3'b000; // Way valid
+                     cpx_packet_2[130]<=0; // Four byte fill
+                     cpx_packet_2[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110) || ((pcx_packet_d[122:118]==5'b10000) && !pcx_req_d[4]);
+                     cpx_packet_2[128]<=0; // Prefetch
+                     wb_strobe<=0;
+                     wb_sel<=8'b0;
+                     wb_addr<=64'b0;
+                     wb_data_o<=64'b0;
+                     wb_we<=0;
+                     case(pcx_packet_d[122:118]) // Packet type
+                        5'b00000://Load
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Type
+                              if(!pcx_req_d[4])
+                                 begin
+                                    cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                    state<=`PCX_REQ_STEP2;
+                                 end
+                              else
+                                 case(pcx_packet_d[106:104]) //Size
+                                    3'b000://Byte
+                                       begin
+                                          case(pcx_packet_d[64+2:64])
+                                             3'b000:cpx_packet_1[127:0]<={wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56]};
+                                             3'b001:cpx_packet_1[127:0]<={wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48]};
+                                             3'b010:cpx_packet_1[127:0]<={wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40]};
+                                             3'b011:cpx_packet_1[127:0]<={wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32]};
+                                             3'b100:cpx_packet_1[127:0]<={wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24]};
+                                             3'b101:cpx_packet_1[127:0]<={wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16]};
+                                             3'b110:cpx_packet_1[127:0]<={wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8]};
+                                             3'b111:cpx_packet_1[127:0]<={wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0]};
+                                          endcase                      
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b001://Halfword
+                                       begin
+                                          case(pcx_packet_d[64+2:64+1])
+                                             2'b00:cpx_packet_1[127:0]<={wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48]};
+                                             2'b01:cpx_packet_1[127:0]<={wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32]};
+                                             2'b10:cpx_packet_1[127:0]<={wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16]};
+                                             2'b11:cpx_packet_1[127:0]<={wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0]};
+                                          endcase                     
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b010://Word
+                                       begin
+                                          if(pcx_packet_d[64+2]==0)
+                                             cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
+                                          else
+                                             cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b011://Doubleword
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b100://Quadword
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
+                                       end
+                                    3'b111://Cacheline
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
+                                       end
+                                    default:
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`PCX_UNKNOWN;
+                                       end
+                                 endcase
+                           end
+                        5'b00001://Store
+                           begin
+                              cpx_packet_1[143:140]<=4'b0100; // Type
+                              cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,2'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec};
+//                              if((pcx_packet_d[110:109]==2'b01) && (pcx_packet_d[64+5:64]==0) && !inval_vect0[3] && !inval_vect1[3]) // Block init store
+//                                 state<=`PCX_BIS;
+//                              else
+//                                 begin
+                                    wb_cycle<=0;
+                                    state<=`CPX_READY_1;
+//                                 end
+                           end
+                        5'b00010://CAS
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
+                              cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
+                              cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec};
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2;
+                           end
+                        5'b00100://STRLOAD
+                           begin
+                              cpx_packet_1[143:140]<=4'b0010; // Type
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2;
+                           end
+                        5'b00101://STRSTORE
+                           begin
+                              cpx_packet_1[143:140]<=4'b0110; // Type
+                              cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec};
+                              wb_cycle<=0;
+                              state<=`CPX_READY_1;
+                           end
+                        5'b00110://SWAP/LDSTUB
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
+                              cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
+                              cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],store_inv_vec};
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2; 
+                           end
+                        5'b10000://IFILL
+                           begin
+                              cpx_packet_1[143:140]<=4'b0001; // Type
+                              cpx_packet_2[143:140]<=4'b0001; // Type
+                              if(pcx_req_d[4]) // I/O access
+                                 begin
+                                    if(pcx_packet_d[64+2]==0)
+                                       cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
+                                    else
+                                       cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
+                                    state<=`CPX_READY_1;
+                                    wb_cycle<=0; 
+                                 end
+                              else
+                                 begin
+                                    cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                                    state<=`PCX_REQ_STEP2;
+                                 end
+                           end
+                        default:
+                           begin
+                              wb_cycle<=0;
+                              state<=`PCX_UNKNOWN;
+                           end
+                     endcase
+                  end               
+               end
+         `PCX_REQ_STEP2: // IFill, Load/strload, CAS, SWAP, LDSTUB - alwas load
+            begin
+               wb_strobe<=1'b1;
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b01000};
+               else
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b1000};
+               wb_sel<=8'b11111111; // It is always full width for subsequent IFill and load accesses
+               state<=`PCX_REQ_STEP2_1;
+            end
+         `PCX_REQ_STEP2_1:
+            if(wb_ack==1)
+               begin
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_data_o<=64'b0;
+                  wb_we<=0;
+                  cpx_packet_1[63:0]<=wb_data_i;
+                  if((pcx_packet_d[122:118]!=5'b00000) && (pcx_packet_d[122:118]!=5'b00100))
+                     if(pcx_packet_d[122:118]!=5'b00010) // IFill, SWAP
+                        state<=`PCX_REQ_STEP3;
+                     else
+                        state<=`PCX_REQ_CAS_COMPARE; // CAS
+                  else
+                     begin
+                        wb_cycle<=0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_REQ_CAS_COMPARE:
+            begin
+               cpx_two_packet<=1;
+               if(pcx_packet_d[106:104]==3'b010) // 32-bit
+                  case(pcx_packet_d[64+3:64+2])
+                     2'b00:state<=cpx_packet_1[127:96]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b01:state<=cpx_packet_1[95:64]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b10:state<=cpx_packet_1[63:32]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b11:state<=cpx_packet_1[31:0]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                  endcase
+               else
+                  if(pcx_packet_d[64+3]==0)
+                     state<=cpx_packet_1[127:64]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                  else
+                     state<=cpx_packet_1[63:0]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
+            end
+         `PCX_REQ_STEP3: // 256-bit IFILL; CAS, SWAP and LDSTUB store
+            begin
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b10000};
+               else
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; // CAS or SWAP save
+               cpx_two_packet<=1;
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_we<=0;
+               else
+                  wb_we<=1;
+               wb_strobe<=1'b1;
+               if(pcx_packet_d[122:118]==5'b00010) // CAS
+                  if(pcx_packet_d[106:104]==3'b010)
+                     wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                  else
+                     wb_sel<=8'b11111111; //CASX
+               else
+                  if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
+                     if(pcx_packet_d[106:104]==3'b000)  //LDSTUB
+                        case(pcx_packet_d[64+2:64])
+                           3'b000:wb_sel<=8'b10000000;
+                           3'b001:wb_sel<=8'b01000000;
+                           3'b010:wb_sel<=8'b00100000;
+                           3'b011:wb_sel<=8'b00010000;
+                           3'b100:wb_sel<=8'b00001000;
+                           3'b101:wb_sel<=8'b00000100;
+                           3'b110:wb_sel<=8'b00000010;
+                           3'b111:wb_sel<=8'b00000001;
+                        endcase
+                     else   
+                        wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; ///SWAP is always 32-bit
+                  else
+                     wb_sel<=8'b11111111; // It is always full width for subsequent IFill accesses
+               if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
+                  wb_data_o<={pcx_packet_d[63:32],pcx_packet_d[63:32]};
+//                  wb_data_o<=pcx_packet_d[63:0];
+               else
+                  wb_data_o<=pcx_packet_2nd[63:0]; // CAS store second packet data
+//                  if(pcx_packet_d[106:104]==3'b010)
+//                     wb_data_o<={pcx_packet_2nd[63:32],pcx_packet_2nd[63:32]}; // CAS store second packet data
+//                  else
+//                     wb_data_o<=pcx_packet_2nd[63:0];
+               state<=`PCX_REQ_STEP3_1;
+            end
+         `PCX_REQ_STEP3_1:
+            if(wb_ack==1)
+               begin
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_we<=0;
+                  wb_data_o<=64'b0;
+                  if(pcx_packet_d[122:118]==5'b10000) // IFill
+                     begin
+                        cpx_packet_2[127:64]<=wb_data_i;
+                        state<=`PCX_REQ_STEP4;
+                     end
+                  else
+                     begin
+                        wb_cycle<=0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_REQ_STEP4: // 256-bit IFILL only
+            begin
+               wb_strobe<=1'b1;
+               wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b11000};
+               wb_sel<=8'b11111111; // It is always full width for subsequent accesses
+               state<=`PCX_REQ_STEP4_1;
+            end 
+         `PCX_REQ_STEP4_1:
+            if(wb_ack==1)  
+               begin
+                  wb_cycle<=0;
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_we<=0;
+                  cpx_packet_2[63:0]<=wb_data_i;
+                  state<=`CPX_READY_1;
+               end
+         `PCX_BIS: // Block init store
+            begin
+               wb_strobe<=1'b1;
+               wb_we<=1;
+               wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+6],6'b001000};
+               wb_sel<=8'b11111111;
+               wb_data_o<=64'b0;
+               state<=`PCX_BIS_1;
+            end
+         `PCX_BIS_1:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr[39:0]<(pcx_packet_d[64+39:64]+8*7))
+                     state<=`PCX_BIS_2;
+                  else
+                     begin
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_addr<=64'b0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_BIS_2:
+            begin
+               wb_strobe<=1'b1;
+               wb_addr[5:0]<=wb_addr[5:0]+8;
+               state<=`PCX_BIS_1;
+            end
+         `PCX_FP_1:
+            begin
+               fp_pcx<=pcx_packet_d;
+               fp_req<=1;
+               state<=`PCX_FP_2;
+               if(`DEBUGGING)
+                  begin
+                     wb_addr<=pcx_packet_d[103:64];
+                     wb_data_o<=pcx_packet_d[63:0];
+                     wb_sel<=8'h22;
+                  end
+            end
+         `PCX_FP_2:
+            begin
+               fp_pcx<=pcx_packet_2nd;
+               state<=`FP_WAIT;
+               if(`DEBUGGING)
+                  begin
+                     wb_addr<=pcx_packet_2nd[103:64];
+                     wb_data_o<=pcx_packet_d[63:0];
+                     wb_sel<=8'h23;
+                  end
+            end
+         `FP_WAIT:
+            begin
+               fp_pcx<=124'b0;
+               fp_req<=0;
+               if(fp_rdy)
+                  state<=`CPX_FP;
+               if(`DEBUGGING)
+                  wb_sel<=8'h24;
+            end
+         `CPX_FP:
+            if(fp_cpx[144]) // Packet valid
+               begin               
+                  cpx_packet_1<=fp_cpx;
+                  state<=`CPX_READY_1;
+                  if(`DEBUGGING)
+                     begin
+                        wb_addr<=fp_cpx[63:0];
+                        wb_data_o<=fp_cpx[127:64];
+                     end
+               end
+            else
+               if(!fp_rdy)
+                  state<=`FP_WAIT; // Else wait for another one if it is not here still
+         `CPX_SEND_ETH_IRQ:
+            begin
+               cpx_packet_1<=145'h1_7_000_000000000000001D_000000000000_001D;
+               eth_int_sent<=0;
+               state<=`CPX_READY_1;
+            end
+         `CPX_INT_VEC_DIS:
+            begin
+               if(pcx_packet_d[12:10]==3'b000)
+                  cpx_two_packet<=1; // Send interrupt only if it is for this core
+               cpx_packet_1[144:140]<=5'b10100;
+               cpx_packet_1[139:137]<=0;
+               cpx_packet_1[136]<=1;
+               cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+               cpx_packet_1[133:130]<=0;
+               cpx_packet_1[129]<=pcx_atom_d;
+               cpx_packet_1[128]<=0;
+               cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],3'b0,pcx_packet_d[64+11:64+6],112'b0};
+               cpx_packet_2<={9'h170,54'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; 
+               state<=`CPX_READY_1;
+            end
+         `CPX_READY_1:
+            begin
+               cpx_ready<=1;
+               cpx_packet<=cpx_packet_1;
+               cnt<=cnt+1;
+               if(`DEBUGGING)
+                  if(multi_hit || multi_hit1)
+                     wb_sel<=8'h11;
+               if(!cpx_two_packet)
+                  state<=`PCX_IDLE;
+               else
+                  //if(cnt==4'b1111 || pcx_packet_d[103:64]!=40'h9800000800)   
+                     state<=`CPX_READY_2;
+            end
+         `CPX_READY_2:
+            begin
+               cpx_ready<=1;
+               cpx_packet<=cpx_packet_2;
+               state<=`PCX_IDLE;
+            end
+         `PCX_UNKNOWN:
+            begin
+               wb_sel<=8'b10100101; // Illegal eye-catching value for debugging
+               state<=`PCX_IDLE;
+            end
+      endcase
+
+/* Cache directory checking:
+  Load:  allocate D if cacheable, check I, invalidate&deallocate if found
+  Store: check I, invalidate&deallocate if found; check D, invalidate if found
+  IFill: allocate I if cacheable, check D, invalidate&deallocate if found
+  SWAP/LDSTUB:  check I, invalidate&deallocate if found; check D, invalidate&deallocate if found
+  CAS: Like SWAP
+  
+  Allocation and querying is made simultaneously at GOT_PCX_REQ
+     (memory read mode does not matter as long as allocation and invalidation
+      are never made to the same directory, so if memory is written its output will not be checked)
+  Invalidation vectors are built during PCX_REQ_STEP1, or Invalidate all ways issued
+  During PCX_REQ_STEP1_1 directory is deallocated if needed
+  
+*/
+
+// Directory enable
+assign dir_en=((state==`GOT_PCX_REQ) || (state==`PCX_REQ_STEP1) || cache_init ||
+              ((state==`PCX_REQ_STEP1_1) && wb_ack)) ? 1:0;
+
+// ICache deallocation flag
+assign loadstore=((pcx_packet_d[122:118]==5'b00000) && !pcx_packet_d[117] && !pcx_packet_d[110]) || // cacheable load, not prefetch
+                 (pcx_packet_d[122:118]==5'b00001) || (pcx_packet_d[122:118]==5'b00010) || //  Store, CAS
+                 (pcx_packet_d[122:118]==5'b00110) || (pcx_packet_d[122:118]==5'b00101); // SWAP/LDSTUB, StrStore 
+
+// DCache deallocation flag                 
+assign ifillcas=(pcx_packet_d[122:118]==5'b00110) || (pcx_packet_d[122:118]==5'b00010) || //SWAP, CAS
+                (pcx_packet_d[122:118]==5'b10000) || (pcx_packet_d[122:118]==5'b00101) || // IFill, StrStore
+                ((pcx_packet_d[122:118]==5'b00001) && pcx_packet_d[110:109]!=2'b00); // Block (or init) store
+
+// DCache allocation flag
+assign cacheload=(pcx_packet[122:118]==5'b00000) && !pcx_packet[110] && !pcx_packet[117] && !pcx_packet[111];
+
+// ICache allocation flag
+assign cacheifill=(pcx_packet[122:118]==5'b10000) && !pcx_packet[117] && !pcx_packet[111];
+
+assign dcache0_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b00) && cacheload;
+assign dcache0_dealloc0=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_00) && ifillcas;
+assign dcache0_dealloc1=(state==`PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_00) && ifillcas;
+
+assign dcache1_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b01) && cacheload;
+assign dcache1_dealloc0=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_01) && ifillcas;
+assign dcache1_dealloc1=(state==`PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_01) && ifillcas;
+
+assign dcache2_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b10) && cacheload;
+assign dcache2_dealloc0=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_10) && ifillcas;
+assign dcache2_dealloc1=(state==`PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_10) && ifillcas;
+
+assign dcache3_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b11) && cacheload;
+assign dcache3_dealloc0=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_1_11) && ifillcas;
+assign dcache3_dealloc1=(state==`PCX_REQ_STEP1_1) && (inval_vect1==4'b1_1_11) && ifillcas;
+
+assign icache0_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b00) && cacheifill;
+assign icache0_dealloc=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_00) && loadstore;
+
+assign icache1_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b01) && cacheifill;
+assign icache1_dealloc=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_01) && loadstore;
+
+assign icache2_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b10) && cacheifill;
+assign icache2_dealloc=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_10) && loadstore;
+
+assign icache3_alloc=(state==`GOT_PCX_REQ) && (pcx_packet[108:107]==2'b11) && cacheifill;
+assign icache3_dealloc=(state==`PCX_REQ_STEP1_1) && (inval_vect0==4'b1_0_11) && loadstore;
+
+assign dcache_inval_all=(state==`PCX_REQ_STEP1) && pcx_packet_d[111] && pcx_packet_d[122:118]==5'b00000;
+assign icache_inval_all=(state==`PCX_REQ_STEP1) && pcx_packet_d[111] && pcx_packet_d[122:118]==5'b10000;
+
+`define INVAL_TAG 29'h10000000
+
+// DCache least address bit for first bank
+// it should be 0 for IFill (1 is hardcoded for second bank)
+assign dcache_la=(state==`GOT_PCX_REQ) ? (pcx_packet[122:118]==5'b10000 ? 1'b0:pcx_packet[64+4]):
+                 (pcx_packet_d[122:118]==5'b10000 ? 1'b0:pcx_packet_d[64+4]);
+
+wire [ 6:0] dcache_index;
+wire [28:0] dcache_data;
+assign dcache_index=(state==`GOT_PCX_REQ) ? pcx_packet[64+10:64+5]:pcx_packet_d[64+10:64+5];
+assign dcache_data=(state==`GOT_PCX_REQ) ? pcx_packet[64+39:64+11]:`INVAL_TAG;
+
+cachedir dcache0 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(dcache0_alloc || dcache0_dealloc0 || dcache_inval_all || cache_init),
+   .address_a({1'b0,dcache_index,dcache_la}),
+   .data_a(dcache_data),
+   .q_a(dcache0_do0),
+  
+   .wren_b(dcache0_dealloc1),
+   .address_b({1'b0,dcache_index,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(dcache0_do1) 
+);
+
+cachedir dcache1 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(dcache1_alloc || dcache1_dealloc0 || dcache_inval_all || cache_init),
+   .address_a({1'b0,dcache_index,dcache_la}),
+   .data_a(dcache_data),
+   .q_a(dcache1_do0),
+   
+   .wren_b(dcache1_dealloc1),
+   .address_b({1'b0,dcache_index,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(dcache1_do1) 
+);
+
+cachedir dcache2 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(dcache2_alloc || dcache2_dealloc0 || dcache_inval_all || cache_init),
+   .address_a({1'b0,dcache_index,dcache_la}),
+   .data_a(dcache_data),
+   .q_a(dcache2_do0),
+   
+   .wren_b(dcache2_dealloc1),
+   .address_b({1'b0,dcache_index,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(dcache2_do1) 
+);
+
+cachedir dcache3 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(dcache3_alloc || dcache3_dealloc0 || dcache_inval_all || cache_init),
+   .address_a({1'b0,dcache_index,dcache_la}),
+   .data_a(dcache_data),
+   .q_a(dcache3_do0),
+   
+   .wren_b(dcache3_dealloc1),
+   .address_b({1'b0,dcache_index,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(dcache3_do1) 
+);
+
+assign dcache0_hit={dcache3_do0==pcx_packet_d[64+39:64+11],
+                    dcache2_do0==pcx_packet_d[64+39:64+11],
+                    dcache1_do0==pcx_packet_d[64+39:64+11],
+                    dcache0_do0==pcx_packet_d[64+39:64+11]};
+assign dcache1_hit={dcache3_do1==pcx_packet_d[64+39:64+11],
+                    dcache2_do1==pcx_packet_d[64+39:64+11],
+                    dcache1_do1==pcx_packet_d[64+39:64+11],
+                    dcache0_do1==pcx_packet_d[64+39:64+11]};
+
+wire [ 6:0] icache_index;
+wire [28:0] icache_data;
+assign icache_index=(state==`GOT_PCX_REQ) ? pcx_packet[64+11:64+5]:pcx_packet_d[64+11:64+5];
+assign icache_data=(state==`GOT_PCX_REQ) ? {pcx_packet[64+39:64+12],1'b0}:`INVAL_TAG;
+
+cachedir icache01 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(icache0_alloc || icache0_dealloc || icache_inval_all || cache_init),
+   .address_a({1'b0,icache_index}),
+   .data_a(icache_data),
+   .q_a(icache0_do),
+   
+   .wren_b(icache1_alloc || icache1_dealloc || icache_inval_all || cache_init),
+   .address_b({1'b1,icache_index}),
+   .data_b(icache_data),
+   .q_b(icache1_do) 
+);
+
+cachedir icache23 (
+   .clock(clk),
+   .enable(dir_en),
+   .wren_a(icache2_alloc || icache2_dealloc || icache_inval_all || cache_init),
+   .address_a({1'b0,icache_index}),
+   .data_a(icache_data),
+   .q_a(icache2_do),
+   
+   .wren_b(icache3_alloc || icache3_dealloc || icache_inval_all || cache_init),
+   .address_b({1'b1,icache_index}),
+   .data_b(icache_data),
+   .q_b(icache3_do) 
+);
+
+assign icache_hit={icache3_do[28:1]==pcx_packet_d[64+39:64+12],
+                   icache2_do[28:1]==pcx_packet_d[64+39:64+12],
+                   icache1_do[28:1]==pcx_packet_d[64+39:64+12],
+                   icache0_do[28:1]==pcx_packet_d[64+39:64+12]};
+
+/*
+               case(pcx_packet_d[122:118]) // Packet type
+                  5'b00000://Load
+                  5'b00001://Store
+                  5'b00010://CAS
+                  5'b00100://STRLOAD
+                  5'b00101://STRSTORE
+                  5'b00110://SWAP
+                  5'b01001://INT
+                  //5'b01010://FP1
+                  //5'b01011://FP2
+                  //5'b01101://FWDREQ
+                  //5'b01110://FWDREPL
+                  5'b10000://IFILL
+               endcase
+*/
+endmodule
Index: /trunk/os2wb/rst_ctrl.v
===================================================================
--- /trunk/os2wb/rst_ctrl.v	(revision 6)
+++ /trunk/os2wb/rst_ctrl.v	(revision 6)
@@ -0,0 +1,92 @@
+/*
+ * Reset Controller
+ *
+ * (C) Copyleft 2007 Simply RISC LLP
+ * AUTHOR: Fabrizio Fazzino <fabrizio.fazzino@srisc.com>
+ *
+ * LICENSE:
+ * This is a Free Hardware Design; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * The above named program is distributed in the hope that it will
+ * be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * DESCRIPTION:
+ * This block implements the Reset Controller used by the S1 Core
+ * to wake up the SPARC Core of the OpenSPARC T1; its behavior was
+ * reverse-engineered from the OpenSPARC waveforms.
+ */
+
+module rst_ctrl (
+    sys_clock_i, sys_reset_i,
+    cluster_cken_o, gclk_o, cmp_grst_o, cmp_arst_o,
+    ctu_tst_pre_grst_o, adbginit_o, gdbginit_o,
+    sys_reset_final_o
+  );
+
+  /*
+   * Inputs
+   */
+
+  // System inputs
+  input sys_clock_i;                            // System Clock
+  input sys_reset_i;                            // System Reset
+
+  /*
+   * Registered Outputs
+   */
+
+  output gclk_o;
+
+  /*
+   * Registered Outputs
+   */
+
+  // SPARC Core inputs
+  output cluster_cken_o;
+  output cmp_grst_o;
+  output cmp_arst_o;
+  output ctu_tst_pre_grst_o;
+  output adbginit_o;
+  output gdbginit_o;
+  output sys_reset_final_o;
+
+  /*
+   * Registers
+   */
+
+  // Counter used as a timer to strobe the reset signals
+  reg [12:0] cycle_counter;
+
+  /*
+   * Procedural blocks
+   */
+
+  // This process handles the timer counter
+  
+  reg rst_sync;
+  reg sys_reset;
+  
+  always @(posedge sys_clock_i)
+     begin
+        rst_sync<=sys_reset_i;
+        sys_reset<=rst_sync;
+        if(sys_reset==1'b1)
+           cycle_counter<=0;
+        else
+           if(cycle_counter[12]==1'b0)
+              cycle_counter<=cycle_counter+1;
+     end
+     
+assign cmp_arst_o        =!sys_reset;
+assign adbginit_o        =!sys_reset;
+assign cluster_cken_o    =cycle_counter<'d20  ? 0:1;
+assign ctu_tst_pre_grst_o=cycle_counter<'d60  ? 0:1;
+assign gdbginit_o        =cycle_counter<'d120 ? 0:1;
+assign cmp_grst_o        =cycle_counter<'d120 ? 0:1;
+assign sys_reset_final_o =cycle_counter<'d126 ? 1:0;
+assign gclk_o = sys_clock_i;
+
+endmodule
Index: /trunk/os2wb/l1ddir.v
===================================================================
--- /trunk/os2wb/l1ddir.v	(revision 6)
+++ /trunk/os2wb/l1ddir.v	(revision 6)
@@ -0,0 +1,250 @@
+module l1ddir(
+   input clk,
+   input reset,
+   
+   input [ 6:0] index,
+   input [ 1:0] way,
+   input [28:0] tag,
+	input        strobe,
+   input        query,
+   input        allocate,   //tag->{way,index}
+   input        deallocate, //if({way,index}==tag) {way,index}<-FFFFFF
+   input        dualdealloc,
+   input        invalidate, //all ways
+   
+   output reg [2:0] hit0,
+   output reg [2:0] hit1,
+   
+   output reg       ready // directory init completed
+);
+
+`define INVAL_TAG 29'h10000000
+
+reg [28:0] tag_d;
+reg [ 6:0] addr0;
+reg [ 5:0] addr1;
+reg [ 3:0] we0;
+reg [ 3:0] we1;
+reg [ 3:0] re;
+reg [28:0] di;
+reg        dualdealloc_d;
+wire [28:0] do0_0;
+wire [28:0] do1_0;
+wire [28:0] do2_0;
+wire [28:0] do3_0;
+wire [28:0] do0_1;
+wire [28:0] do1_1;
+wire [28:0] do2_1;
+wire [28:0] do3_1;
+reg query_d;
+reg deallocate_d;
+reg query_d1;
+reg deallocate_d1;
+
+always @(posedge clk)
+   if(strobe)
+      if(query || deallocate)
+         begin
+            tag_d<=tag;
+            dualdealloc_d<=dualdealloc;
+         end
+
+always @(posedge clk)
+   begin
+      query_d<=query && strobe;
+      deallocate_d<=deallocate && strobe;
+      query_d1<=query_d;
+      deallocate_d1<=deallocate_d;
+   end
+   
+cachedir dcache0 (
+   .clock(clk),
+   .enable(we0[0] || we1[0] || re[0]),
+   .wren_a(we0[0]),
+   .address_a({1'b0,addr0}),
+   .data_a(di),
+   .q_a(do0_0),
+   
+   .wren_b(we1[0]),
+   .address_b({1'b0,addr1,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(do0_1) 
+);
+   
+cachedir dcache1 (
+   .clock(clk),
+   .enable(we0[1] || we1[1] || re[1]),
+   .wren_a(we0[1]),
+   .address_a({1'b0,addr0}),
+   .data_a(di),
+   .q_a(do1_0),
+   
+   .wren_b(we1[1]),
+   .address_b({1'b0,addr1,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(do1_1) 
+);
+
+cachedir dcache2 (
+   .clock(clk),
+   .enable(we0[2] || we1[2] || re[2]),
+   .wren_a(we0[2]),
+   .address_a({1'b0,addr0}),
+   .data_a(di),
+   .q_a(do2_0),
+   
+   .wren_b(we1[2]),
+   .address_b({1'b0,addr1,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(do2_1) 
+);
+   
+cachedir dcache3 (
+   .clock(clk),
+   .enable(we0[3] || we1[3] || re[3]),
+   .wren_a(we0[3]),
+   .address_a({1'b0,addr0}),
+   .data_a(di),
+   .q_a(do3_0),
+   
+   .wren_b(we1[3]),
+   .address_b({1'b0,addr1,1'b1}),
+   .data_b(`INVAL_TAG),
+   .q_b(do3_1) 
+);
+
+wire [3:0] hitvect0={(do3_0==tag_d),(do2_0==tag_d),(do1_0==tag_d),(do0_0==tag_d)};
+wire [3:0] hitvect1={(do3_1==tag_d),(do2_1==tag_d),(do1_1==tag_d),(do0_1==tag_d)};
+
+`define L1DDIR_RESET   3'b000
+`define L1DDIR_INIT    3'b001
+`define L1DDIR_IDLE    3'b010
+`define L1DDIR_READ    3'b011
+`define L1DDIR_DEALLOC 3'b100
+
+reg [2:0] state;
+
+always @(posedge clk or posedge reset)
+   if(reset)
+      begin
+         state<=`L1DDIR_RESET;
+         ready<=0;
+      end
+   else
+      case(state)
+         `L1DDIR_RESET:
+            begin
+               addr0<=7'b0;
+               addr1<=6'b0;
+               di<=`INVAL_TAG;
+               we0<=4'b1111;
+               we1<=4'b1111;
+               state<=`L1DDIR_INIT;
+            end
+         `L1DDIR_INIT:
+            begin
+               addr0<=addr0+2;
+               addr1<=addr1+1;
+               if(addr0==7'b1111110)
+                  begin
+                     we0<=4'b0;
+                     we1<=4'b0;
+                     ready<=1;
+                     state<=`L1DDIR_IDLE;
+                  end
+            end
+         `L1DDIR_IDLE:
+			   if(strobe)
+            if(invalidate)
+               begin
+                  we0<=4'b1111;
+                  we1<=0;
+                  addr0<=index;
+                  di<=`INVAL_TAG;
+               end
+            else
+				if(allocate)
+				   begin
+					  case(way)
+						 2'b00:we0<=4'b0001;
+						 2'b01:we0<=4'b0010;
+						 2'b10:we0<=4'b0100;
+						 2'b11:we0<=4'b1000;
+					  endcase
+					  we1<=0;
+					  addr0<=index;
+					  di<=tag;
+				   end
+				else
+				   if(deallocate)
+					  begin
+						 re<=4'b1111;
+						 we0<=0;
+						 we1<=0;
+						 if(dualdealloc)
+							begin
+							   addr0<={index[6:1],1'b0};
+							   addr1<=index[6:1];
+							end
+						 else
+							addr0<=index;
+						 state<=`L1DDIR_READ;
+					  end
+				   else
+                     if(query)
+                        begin
+                           addr0<=index;
+                           re<=4'b1111;
+                           we0<=0;
+                           we1<=0;
+                        end
+                     else
+                        begin
+                           we0<=0;
+                           we1<=0;
+                           re<=0;
+                        end
+			`L1DDIR_READ:
+			   state<=`L1DDIR_DEALLOC;
+         `L1DDIR_DEALLOC:
+            begin
+               re<=0;
+               di<=`INVAL_TAG;
+               we0<=hitvect0;
+               if(dualdealloc_d)
+                  we1<=hitvect1;
+               else
+                  we1<=0;
+               state<=`L1DDIR_IDLE;
+            end
+      endcase
+
+always @(posedge clk)
+   if(query_d1 || deallocate_d1)
+      begin
+         case(hitvect0)
+            4'b0001:hit0<=3'b100;
+            4'b0010:hit0<=3'b101;
+            4'b0100:hit0<=3'b110;
+            4'b1000:hit0<=3'b111;
+            default:hit0<=3'b000; // Hits will be ORed then
+         endcase
+         if(dualdealloc_d && deallocate_d1)
+			 case(hitvect1)
+				4'b0001:hit1<=3'b100;
+				4'b0010:hit1<=3'b101;
+				4'b0100:hit1<=3'b110;
+				4'b1000:hit1<=3'b111;
+				default:hit1<=3'b000;
+			 endcase
+	     else
+	        hit1<=3'b000;
+      end
+   else
+      if(strobe)
+         begin
+            hit0<=3'b000;
+            hit1<=3'b000;
+         end
+   
+endmodule
Index: /trunk/os2wb/os2wb_dual.v
===================================================================
--- /trunk/os2wb/os2wb_dual.v	(revision 6)
+++ /trunk/os2wb/os2wb_dual.v	(revision 6)
@@ -0,0 +1,1123 @@
+`timescale 1ns / 1ps
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    Bridge from SPARC Core to Wishbone Master
+// Module Name:    os2wb 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+module os2wb_dual(
+    input              clk,
+    input              rstn,
+    
+    // Core interface 
+    input      [  4:0] pcx_req,
+    input              pcx_atom,
+    input      [123:0] pcx_data,
+    output reg [  4:0] pcx_grant,
+    output reg         cpx_ready,
+    output reg [144:0] cpx_packet,
+    
+    // Core 2nd interface 
+    input      [  4:0] pcx1_req,
+    input              pcx1_atom,
+    input      [123:0] pcx1_data,
+    output reg [  4:0] pcx1_grant,
+    output reg         cpx1_ready,
+    output reg [144:0] cpx1_packet,
+    
+    // Wishbone master interface
+    input      [ 63:0] wb_data_i,
+    input              wb_ack,
+    output reg         wb_cycle,
+    output reg         wb_strobe,
+    output reg         wb_we,
+    output reg [  7:0] wb_sel,
+    output reg [ 63:0] wb_addr,
+    output reg [ 63:0] wb_data_o,
+    
+    // FPU interface
+    output reg [123:0] fp_pcx,
+    output reg         fp_req,
+    input      [144:0] fp_cpx,
+    input              fp_rdy,
+    
+    // Ethernet interrupt, sensed on posedge, mapped to vector 'd29
+    input              eth_int
+);
+
+reg [123:0] pcx_packet_d;    // Latched incoming PCX packet
+reg [123:0] pcx_packet_2nd;  // Second packet for atomic (CAS)
+reg [  4:0] pcx_req_d;       // Latched request
+reg         pcx_atom_d;      // Latched atomic flasg
+reg [  4:0] state;           // FSM state
+reg [144:0] cpx_packet_1;    // First CPX packet
+reg [144:0] cpx_packet_2;    // Second CPX packet (for atomics and cached IFILLs)
+reg         cpx_two_packet;  // CPX answer is two-packet (!=atomic, SWAP has atomic==0 and answer is two-packet)
+
+wire [111:0] inval_vect0; // Invalidate, instr/data, way
+wire [111:0] inval_vect1; // IFill may cause two D lines invalidation at a time
+
+wire [1:0] othercachehit;
+wire [1:0] othercpuhit;
+wire [1:0] wayval0;
+wire [1:0] wayval1;
+
+`define TEST_DRAM_1      5'b00000
+`define TEST_DRAM_2      5'b00001
+`define TEST_DRAM_3      5'b00010
+`define TEST_DRAM_4      5'b00011
+`define INIT_DRAM_1      5'b00100
+`define INIT_DRAM_2      5'b00101
+`define WAKEUP           5'b00110
+`define PCX_IDLE         5'b00111
+`define GOT_PCX_REQ      5'b01000
+`define PCX_REQ_2ND      5'b01001
+`define PCX_REQ_STEP1    5'b01010
+`define PCX_REQ_STEP1_1  5'b01011
+`define PCX_REQ_STEP2    5'b01100
+`define PCX_REQ_STEP2_1  5'b01101
+`define PCX_REQ_STEP3    5'b01110
+`define PCX_REQ_STEP3_1  5'b01111
+`define PCX_REQ_STEP4    5'b10000
+`define PCX_REQ_STEP4_1  5'b10001
+`define PCX_BIS          5'b10010
+`define PCX_BIS_1        5'b10011
+`define PCX_BIS_2        5'b10100
+`define CPX_READY_1      5'b10101
+`define CPX_READY_2      5'b10110
+`define PCX_REQ_STEP1_2  5'b10111
+`define PCX_UNKNOWN      5'b11000
+`define PCX_FP_1         5'b11001
+`define PCX_FP_2         5'b11010
+`define FP_WAIT          5'b11011
+`define CPX_FP           5'b11100
+`define CPX_SEND_ETH_IRQ 5'b11101
+`define CPX_INT_VEC_DIS  5'b11110
+`define PCX_REQ_CAS_COMPARE 5'b11111
+
+`define MEM_SIZE         64'h00000000_10000000
+
+`define TEST_DRAM        1
+`define DEBUGGING        1
+
+reg        cache_init;
+wire [3:0] dcache0_hit;
+wire [3:0] dcache1_hit;
+wire [3:0] icache_hit;
+reg        multi_hit;
+reg        multi_hit1;
+reg        eth_int_d;
+reg        eth_int_send;
+reg        eth_int_sent;
+reg  [3:0] cnt;
+
+// PCX channel FIFO
+wire [129:0] pcx_data_fifo;
+wire         pcx_fifo_empty;
+reg  [  4:0] pcx_req_1;
+reg  [  4:0] pcx_req_2;
+reg          pcx_atom_1;
+reg          pcx_atom_2;
+reg          pcx_data_123_d;
+
+// PCX 2nf channel FIFO
+wire [129:0] pcx1_data_fifo;
+wire         pcx1_fifo_empty;
+reg  [  4:0] pcx1_req_1;
+reg  [  4:0] pcx1_req_2;
+reg          pcx1_atom_1;
+reg          pcx1_atom_2;
+reg          pcx1_data_123_d;
+
+reg fifo_rd;
+reg fifo_rd1;
+
+always @(posedge clk)
+   begin
+      pcx_req_1<=pcx_req;
+      pcx_atom_1<=pcx_atom;
+      pcx_atom_2<=pcx_atom_1;
+      pcx_req_2<=pcx_atom_1 ? pcx_req_1:5'b0;
+      pcx_grant<=(pcx_req_1 | pcx_req_2);
+      pcx_data_123_d<=pcx_data[123];
+
+      pcx1_req_1<=pcx1_req;
+      pcx1_atom_1<=pcx1_atom;
+      pcx1_atom_2<=pcx1_atom_1;
+      pcx1_req_2<=pcx1_atom_1 ? pcx1_req_1:5'b0;
+      pcx1_grant<=(pcx1_req_1 | pcx1_req_2);
+      pcx1_data_123_d<=pcx1_data[123];
+   end
+        
+/*pcx_fifo pcx_fifo_inst( 
+       // FIFO should be first word fall-through
+       // It has no full flag as the core will send only limited number of requests,
+       // in original design we used it 32 words deep
+       // Just make it deeper if you experience overflow - 
+       // you can't just send no grant on full because the core expects immediate
+       // grant for at least two requests for each zone
+    .aclr(!rstn),
+    .clock(clk),
+    .data({pcx_atom_1,pcx_req_1,pcx_data}),
+    .rdreq(fifo_rd),
+    .wrreq((pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d)), 
+       // Second atomic packet for FPU may be invalid, but should be sent to FPU
+       // so if the first atomic packet is valid we latch both
+    .empty(pcx_fifo_empty),
+    .q(pcx_data_fifo)
+);
+*/
+pcx_fifo pcx_fifo_inst( 
+    .clk(clk),
+	 .rst(!rstn),
+    .din({pcx_atom_1,pcx_req_1,pcx_data}),
+    .rd_en(fifo_rd),
+    .wr_en((pcx_req_1!=5'b00000 && pcx_data[123]) || (pcx_atom_2 && pcx_data_123_d)), 
+    .empty(pcx_fifo_empty),
+    .dout(pcx_data_fifo)
+);
+	
+	
+pcx_fifo pcx_fifo_inst1( 
+    .clk(clk),
+	 .rst(!rstn),
+    .din({pcx1_atom_1,pcx1_req_1,pcx1_data}),
+    .rd_en(fifo_rd1),
+    .wr_en((pcx1_req_1!=5'b00000 && pcx1_data[123]) || (pcx1_atom_2 && pcx1_data_123_d)), 
+    .empty(pcx1_fifo_empty),
+    .dout(pcx1_data_fifo)
+);
+// --------------------------
+
+reg wb_ack_d;
+
+always @(posedge clk or negedge rstn)
+  begin
+  if(!rstn)
+      eth_int_send<=0;
+   else
+      begin
+         wb_ack_d<=wb_ack;
+         eth_int_d<=eth_int;
+         if(eth_int && !eth_int_d)
+            eth_int_send<=1;
+         else
+            if(eth_int_sent)
+               eth_int_send<=0;
+      end
+  end
+wire [123:0] pcx_packet;
+reg cpu;
+assign pcx_packet=cpu ? pcx1_data_fifo[123:0]:pcx_data_fifo[123:0];
+reg cpu2;
+
+always @(posedge clk or negedge rstn)
+   if(rstn==0)
+      begin
+         if(`TEST_DRAM)
+            state<=`TEST_DRAM_1;
+         else
+            state<=`INIT_DRAM_1; // DRAM initialization is mandatory!
+         cpx_ready<=0;
+         fifo_rd<=0;
+         cpx_packet<=145'b0;
+         wb_cycle<=0;
+         wb_strobe<=0;
+         wb_we<=0;
+         wb_sel<=0;
+         wb_addr<=64'b0;
+         wb_data_o<=64'b0;
+         pcx_packet_d<=124'b0;
+         fp_pcx<=124'b0;
+         fp_req<=0;
+      end
+   else
+      case(state)
+         `TEST_DRAM_1:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               wb_we<=1;
+               state<=`TEST_DRAM_2;
+            end
+         `TEST_DRAM_2:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        wb_addr[31:0]<=wb_addr[31:0]+8;
+                        wb_data_o<={wb_addr[31:0]+8,wb_addr[31:0]+8};
+                        state<=`TEST_DRAM_1;
+                     end
+                  else
+                     begin
+                        state<=`TEST_DRAM_3;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_data_o<=64'b0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `TEST_DRAM_3:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               state<=`TEST_DRAM_4;
+            end
+         `TEST_DRAM_4:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        if(wb_data_i=={wb_addr[31:0],wb_addr[31:0]})
+                           begin
+                              wb_addr[31:0]<=wb_addr[31:0]+8;
+                              state<=`TEST_DRAM_3;
+                           end
+                     end
+                  else
+                     begin
+                        state<=`INIT_DRAM_1;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_data_o<=64'b0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `INIT_DRAM_1:
+            begin
+               wb_cycle<=1;
+               wb_strobe<=1;
+               wb_sel<=8'hFF;
+               wb_we<=1;
+               cache_init<=1; // We also init cache directories here
+               state<=`INIT_DRAM_2;
+            end
+         `INIT_DRAM_2:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr<`MEM_SIZE-8)
+                     begin
+                        wb_addr[31:0]<=wb_addr[31:0]+8;
+                        pcx_packet_d[64+11:64+4]<=pcx_packet_d[64+11:64+4]+1; // Address for cachedir init
+                        state<=`INIT_DRAM_1;
+                     end
+                  else
+                     begin
+                        state<=`WAKEUP;
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        cache_init<=0;
+                        wb_addr<=64'b0;
+                     end
+               end               
+         `WAKEUP:
+            begin
+               cpx_packet<=145'h1700000000000000000000000000000010001;
+               cpx_ready<=1;
+               state<=`PCX_IDLE;
+            end
+         `PCX_IDLE:
+            begin
+               cnt<=0;
+               cpx_packet<=145'b0;
+               cpx_ready<=0;
+               cpx1_packet<=145'b0;
+               cpx1_ready<=0;
+               cpx_two_packet<=0;
+               multi_hit<=0;
+               multi_hit1<=0;
+               if(eth_int_send)
+                  begin
+                     state<=`CPX_SEND_ETH_IRQ;
+                     eth_int_sent<=1;
+                  end
+               else
+                  if(!pcx_fifo_empty)
+                     begin
+                        pcx_req_d<=pcx_data_fifo[128:124];
+                        pcx_atom_d<=pcx_data_fifo[129];
+                        fifo_rd<=1;
+                        state<=`GOT_PCX_REQ;
+								cpu<=0;
+								cpu2<=0;
+                     end
+						else
+                     if(!pcx1_fifo_empty)
+                        begin
+                           pcx_req_d<=pcx1_data_fifo[128:124];
+                           pcx_atom_d<=pcx1_data_fifo[129];
+                           fifo_rd1<=1;
+                           state<=`GOT_PCX_REQ;
+						   		cpu<=1;
+									cpu2<=1;
+                        end
+            end
+         `GOT_PCX_REQ:
+            begin
+               pcx_packet_d<=pcx_packet;
+               if(`DEBUGGING)
+                  begin
+                     wb_sel[1:0]<=pcx_packet[113:112];
+                     wb_sel[2]<=1;
+                  end
+               if(pcx_packet[103:64]==40'h9800000800 && pcx_packet[122:118]==5'b00001)
+                  begin
+                     state<=`CPX_INT_VEC_DIS;
+                     fifo_rd<=0;
+							fifo_rd1<=0;
+                  end
+               else
+                  if(pcx_atom_d==0)
+                     begin
+                        fifo_rd<=0;
+								fifo_rd1<=0;
+                        if(pcx_packet[122:118]==5'b01010) // FP req
+                           begin
+                              state<=`PCX_FP_1;
+                              pcx_packet_2nd[123]<=0;
+                           end
+                        else
+                           state<=`PCX_REQ_STEP1;
+                     end
+                  else
+                     state<=`PCX_REQ_2ND;
+            end
+         `PCX_REQ_2ND:
+            begin
+               pcx_packet_2nd<=pcx_packet; //Latch second packet for atomics
+               if(`DEBUGGING)
+                  if(pcx_fifo_empty)
+                     wb_sel<=8'h67;
+               fifo_rd<=0;
+					fifo_rd1<=0;
+               if(pcx_packet_d[122:118]==5'b01010) // FP req
+                  state<=`PCX_FP_1;
+               else               
+                  state<=`PCX_REQ_STEP1;
+            end
+         `PCX_REQ_STEP1:
+            begin
+               if(pcx_packet_d[111]==1'b1) // Invalidate request
+                  begin
+                     cpx_packet_1[144]<=1;     // Valid
+                     cpx_packet_1[143:140]<=4'b0100; // Invalidate reply is Store ACK
+                     cpx_packet_1[139]<=1;     // L2 miss
+                     cpx_packet_1[138:137]<=0; // Error
+                     cpx_packet_1[136]<=pcx_packet_d[117]; // Non-cacheble
+                     cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     cpx_packet_1[133:131]<=0; // Way valid
+                     cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && (pcx_req_d==5'b10000)) ? 1:0; // Four byte fill
+                     cpx_packet_1[129]<=pcx_atom_d;
+                     cpx_packet_1[128]<=pcx_packet_d[110]; // Prefetch
+                     cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,pcx_packet_d[122:118]==5'b00000 ? 2'b01:2'b10,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],112'b0};
+                     state<=`CPX_READY_1;
+                  end
+               else
+                  if(pcx_packet_d[122:118]!=5'b01001) // Not INT
+                     begin
+                        wb_cycle<=1'b1;
+                        wb_strobe<=1'b1;
+                        if((pcx_packet_d[122:118]==5'b00000 && !pcx_req_d[4]) || pcx_packet_d[122:118]==5'b00010 || pcx_packet_d[122:118]==5'b00100 || pcx_packet_d[122:118]==5'b00110)
+                           wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b0000}; //DRAM load/streamload, CAS and SWAP always use DRAM and load first 
+                        else
+                           if(pcx_packet_d[122:118]==5'b10000 && !pcx_req_d[4])
+                              wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b00000}; //DRAM ifill
+                           else
+                              if(pcx_packet_d[64+39:64+28]==12'hFFF && pcx_packet_d[64+27:64+24]!=4'b0) // flash remap FFF1->FFF8
+                                 wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3]+37'h0000E00000,3'b000};
+                              else
+                                 wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000};
+                        wb_data_o<=pcx_packet_d[63:0];
+                        state<=`PCX_REQ_STEP1_1;
+                     end
+                  else
+                     //if((pcx_packet_d[12:10]!=3'b000) && !pcx_packet_d[117]) // Not FLUSH int and not this core
+                     //   state<=`PCX_IDLE; 
+                     //else
+                        state<=`CPX_READY_1;
+               case(pcx_packet_d[122:118]) // Packet type
+                  5'b00000://Load
+                     begin
+                        wb_we<=0;
+                        if(!pcx_req_d[4])
+                           wb_sel<=8'b11111111; // DRAM requests are always 128 bit
+                        else
+                           case(pcx_packet_d[106:104]) //Size
+                              3'b000://Byte
+                                 case(pcx_packet_d[64+2:64])
+                                    3'b000:wb_sel<=8'b10000000;
+                                    3'b001:wb_sel<=8'b01000000;
+                                    3'b010:wb_sel<=8'b00100000;
+                                    3'b011:wb_sel<=8'b00010000;
+                                    3'b100:wb_sel<=8'b00001000;
+                                    3'b101:wb_sel<=8'b00000100;
+                                    3'b110:wb_sel<=8'b00000010;
+                                    3'b111:wb_sel<=8'b00000001;
+                                 endcase
+                              3'b001://Halfword
+                                 case(pcx_packet_d[64+2:64+1])
+                                    2'b00:wb_sel<=8'b11000000;
+                                    2'b01:wb_sel<=8'b00110000;
+                                    2'b10:wb_sel<=8'b00001100;
+                                    2'b11:wb_sel<=8'b00000011;
+                                 endcase
+                              3'b010://Word
+                                 wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                              3'b011://Doubleword
+                                 wb_sel<=8'b11111111;
+                              3'b100://Quadword
+                                 wb_sel<=8'b11111111;
+                              3'b111://Cacheline
+                                 wb_sel<=8'b11111111;
+                              default:
+                                 wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                           endcase
+                     end
+                  5'b00001://Store
+                     begin
+                        wb_we<=1;
+                        if(pcx_packet_d[110:109]!=2'b00) //Block (or init) store
+                           wb_sel<=8'b11111111; // Blocks are always 64 bit
+                        else
+                           case(pcx_packet_d[106:104]) //Size
+                              3'b000://Byte
+                                 case(pcx_packet_d[64+2:64])
+                                    3'b000:wb_sel<=8'b10000000;
+                                    3'b001:wb_sel<=8'b01000000;
+                                    3'b010:wb_sel<=8'b00100000;
+                                    3'b011:wb_sel<=8'b00010000;
+                                    3'b100:wb_sel<=8'b00001000;
+                                    3'b101:wb_sel<=8'b00000100;
+                                    3'b110:wb_sel<=8'b00000010;
+                                    3'b111:wb_sel<=8'b00000001;
+                                 endcase
+                              3'b001://Halfword
+                                 case(pcx_packet_d[64+2:64+1])
+                                    2'b00:wb_sel<=8'b11000000;
+                                    2'b01:wb_sel<=8'b00110000;
+                                    2'b10:wb_sel<=8'b00001100;
+                                    2'b11:wb_sel<=8'b00000011;
+                                 endcase
+                              3'b010://Word
+                                 wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                              3'b011://Doubleword
+                                 wb_sel<=8'b11111111;
+                              default:
+                                 if(`DEBUGGING)
+                                    wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                           endcase
+                     end
+                  5'b00010://CAS
+                     begin
+                        wb_we<=0; //Load first
+                        wb_sel<=8'b11111111; // CAS loads are as cacheline
+                     end
+                  5'b00100://STRLOAD
+                     begin
+                        wb_we<=0;
+                        wb_sel<=8'b11111111; // Stream loads are always 128 bit
+                     end
+                  5'b00101://STRSTORE
+                     begin
+                        wb_we<=1;
+                        case(pcx_packet_d[106:104]) //Size
+                           3'b000://Byte
+                              case(pcx_packet_d[64+2:64])
+                                 3'b000:wb_sel<=8'b10000000;
+                                 3'b001:wb_sel<=8'b01000000;
+                                 3'b010:wb_sel<=8'b00100000;
+                                 3'b011:wb_sel<=8'b00010000;
+                                 3'b100:wb_sel<=8'b00001000;
+                                 3'b101:wb_sel<=8'b00000100;
+                                 3'b110:wb_sel<=8'b00000010;
+                                 3'b111:wb_sel<=8'b00000001;
+                              endcase
+                           3'b001://Halfword
+                              case(pcx_packet_d[64+2:64+1])
+                                 2'b00:wb_sel<=8'b11000000;
+                                 2'b01:wb_sel<=8'b00110000;
+                                 2'b10:wb_sel<=8'b00001100;
+                                 2'b11:wb_sel<=8'b00000011;
+                              endcase
+                           3'b010://Word
+                              wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                           3'b011://Doubleword
+                              wb_sel<=8'b11111111;
+                           3'b100://Quadword
+                              wb_sel<=8'b11111111;
+                           3'b111://Cacheline
+                              wb_sel<=8'b11111111;
+                           default:
+                              wb_sel<=8'b01011010; // Unreal eye-catching value for debug
+                        endcase
+                     end
+                  5'b00110://SWAP/LDSTUB
+                     begin
+                        wb_we<=0; // Load first, as CAS
+                        wb_sel<=8'b11111111; // SWAP/LDSTUB loads are as cacheline
+                     end
+                  5'b01001://INT
+                     if(pcx_packet_d[117]) // Flush
+							   begin
+                           cpx_packet_1<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer
+                           //cpx_packet_2<={9'h171,pcx_packet_d[113:112],11'h0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],30'h0,pcx_packet_d[17:0],46'b0,pcx_packet_d[17:0]}; //FLUSH instruction answer
+									//cpx_two_packet<=1;
+									//cpu2<=!cpu; // Flush should be sent to both cores
+								end
+                     else // Tread-to-thread interrupt
+							   begin
+                           cpx_packet_1<={9'h170,pcx_packet_d[113:112],52'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; 
+									cpu<=pcx_packet_d[10];
+							   end
+                  //5'b01010: FP1 - processed by separate state
+                  //5'b01011: FP2 - processed by separate state
+                  //5'b01101: FWDREQ - not implemented
+                  //5'b01110: FWDREPL - not implemented
+                  5'b10000://IFILL
+                     begin
+                        wb_we<=0;
+                        if(pcx_req_d[4]) // I/O access
+                           wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                        else
+                           wb_sel<=8'b11111111;
+                     end
+                  default:
+                     begin
+                        wb_we<=0;
+                        wb_sel<=8'b10101010; // Unreal eye-catching value for debug
+                     end
+               endcase
+            end
+         `PCX_REQ_STEP1_1:
+            state<=`PCX_REQ_STEP1_2; // Delay for L1 directory
+         `PCX_REQ_STEP1_2:
+            begin
+               if(wb_ack || wb_ack_d)
+                  begin
+                     cpx_packet_1[144]<=1;     // Valid
+                     cpx_packet_1[139]<=(pcx_packet_d[122:118]==5'b00000) || (pcx_packet_d[122:118]==5'b10000) ? 1:0;     // L2 always miss on load and ifill
+                     cpx_packet_1[138:137]<=0; // Error
+                     cpx_packet_1[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
+                     cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     if((pcx_packet_d[122:118]==5'b00000 && !pcx_packet_d[117] && !pcx_packet_d[110]) || (pcx_packet_d[122:118]==5'b10000)) // Cacheble Load or IFill
+                        cpx_packet_1[133:131]<={othercachehit[0],wayval0};
+                     else
+                        cpx_packet_1[133:131]<=3'b000; // Way valid
+                     if(pcx_packet_d[122:118]==5'b00100) // Strload
+                        cpx_packet_1[130]<=pcx_packet_d[106]; // A
+                     else
+                        if(pcx_packet_d[122:118]==5'b00101) // Stream store
+                           cpx_packet_1[130]<=pcx_packet_d[108]; // A
+                        else
+                           cpx_packet_1[130]<=((pcx_packet_d[122:118]==5'b10000) && pcx_req_d[4]) ? 1:0; // Four byte fill
+                     if(pcx_packet_d[122:118]==5'b00100) // Strload
+                        cpx_packet_1[129]<=pcx_packet_d[105]; // B
+                     else      
+                        cpx_packet_1[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110); // SWAP is single-packet but needs atom in CPX
+                     cpx_packet_1[128]<=pcx_packet_d[110] && pcx_packet_d[122:118]==5'b00000; // Prefetch
+                     cpx_packet_2[144]<=1;     // Valid
+                     cpx_packet_2[139]<=0;     // L2 miss
+                     cpx_packet_2[138:137]<=0; // Error
+                     cpx_packet_2[136]<=pcx_packet_d[117] || (pcx_packet_d[122:118]==5'b00001) ? 1:0; // Non-cacheble is set on store too
+                     cpx_packet_2[135:134]<=pcx_packet_d[113:112]; // Thread ID
+                     if(pcx_packet_d[122:118]==5'b10000) // IFill
+                        cpx_packet_2[133:131]<={othercachehit[1],wayval1};
+                     else
+                        cpx_packet_2[133:131]<=3'b000; // Way valid
+                     cpx_packet_2[130]<=0; // Four byte fill
+                     cpx_packet_2[129]<=pcx_atom_d || (pcx_packet_d[122:118]==5'b00110) || ((pcx_packet_d[122:118]==5'b10000) && !pcx_req_d[4]);
+                     cpx_packet_2[128]<=0; // Prefetch
+                     wb_strobe<=0;
+                     wb_sel<=8'b0;
+                     wb_addr<=64'b0;
+                     wb_data_o<=64'b0;
+                     wb_we<=0;
+                     case(pcx_packet_d[122:118]) // Packet type
+                        5'b00000://Load
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Type
+                              if(!pcx_req_d[4])
+                                 begin
+                                    cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                    state<=`PCX_REQ_STEP2;
+                                 end
+                              else
+                                 case(pcx_packet_d[106:104]) //Size
+                                    3'b000://Byte
+                                       begin
+                                          case(pcx_packet_d[64+2:64])
+                                             3'b000:cpx_packet_1[127:0]<={wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56],wb_data_i[63:56]};
+                                             3'b001:cpx_packet_1[127:0]<={wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48],wb_data_i[55:48]};
+                                             3'b010:cpx_packet_1[127:0]<={wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40],wb_data_i[47:40]};
+                                             3'b011:cpx_packet_1[127:0]<={wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32],wb_data_i[39:32]};
+                                             3'b100:cpx_packet_1[127:0]<={wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24],wb_data_i[31:24]};
+                                             3'b101:cpx_packet_1[127:0]<={wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16],wb_data_i[23:16]};
+                                             3'b110:cpx_packet_1[127:0]<={wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8],wb_data_i[15: 8]};
+                                             3'b111:cpx_packet_1[127:0]<={wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0],wb_data_i[ 7: 0]};
+                                          endcase                      
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b001://Halfword
+                                       begin
+                                          case(pcx_packet_d[64+2:64+1])
+                                             2'b00:cpx_packet_1[127:0]<={wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48],wb_data_i[63:48]};
+                                             2'b01:cpx_packet_1[127:0]<={wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32],wb_data_i[47:32]};
+                                             2'b10:cpx_packet_1[127:0]<={wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16],wb_data_i[31:16]};
+                                             2'b11:cpx_packet_1[127:0]<={wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0],wb_data_i[15: 0]};
+                                          endcase                     
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b010://Word
+                                       begin
+                                          if(pcx_packet_d[64+2]==0)
+                                             cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
+                                          else
+                                             cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b011://Doubleword
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1;
+                                       end
+                                    3'b100://Quadword
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
+                                       end
+                                    3'b111://Cacheline
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`CPX_READY_1; // 16 byte access to PROM should just duplicate the data
+                                       end
+                                    default:
+                                       begin
+                                          cpx_packet_1[127:0]<={wb_data_i,wb_data_i};   
+                                          wb_cycle<=0;
+                                          state<=`PCX_UNKNOWN;
+                                       end
+                                 endcase
+                           end
+                        5'b00001://Store
+                           begin
+                              cpx_packet_1[143:140]<=4'b0100; // Type
+                              cpx_packet_1[127:0]<={2'b0,pcx_packet_d[109]/*BIS*/,2'b0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],inval_vect0};
+//                              if((pcx_packet_d[110:109]==2'b01) && (pcx_packet_d[64+5:64]==0) && !inval_vect0[3] && !inval_vect1[3]) // Block init store
+//                                 state<=`PCX_BIS;
+//                              else
+//                                 begin
+                                    wb_cycle<=0;
+                                    state<=`CPX_READY_1;
+//                                 end
+                           end
+                        5'b00010://CAS
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
+                              cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
+                              cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],inval_vect0};
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2;
+                           end
+                        5'b00100://STRLOAD
+                           begin
+                              cpx_packet_1[143:140]<=4'b0010; // Type
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2;
+                           end
+                        5'b00101://STRSTORE
+                           begin
+                              cpx_packet_1[143:140]<=4'b0110; // Type
+                              cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],inval_vect0};
+                              wb_cycle<=0;
+                              state<=`CPX_READY_1;
+                           end
+                        5'b00110://SWAP/LDSTUB
+                           begin
+                              cpx_packet_1[143:140]<=4'b0000; // Load return for first packet
+                              cpx_packet_2[143:140]<=4'b0100; // Store ACK for second packet
+                              cpx_packet_2[127:0]<={5'b0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],inval_vect0};
+                              cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                              state<=`PCX_REQ_STEP2; 
+                           end
+                        5'b10000://IFILL
+                           begin
+                              cpx_packet_1[143:140]<=4'b0001; // Type
+                              cpx_packet_2[143:140]<=4'b0001; // Type
+                              if(pcx_req_d[4]) // I/O access
+                                 begin
+                                    if(pcx_packet_d[64+2]==0)
+                                       cpx_packet_1[127:0]<={wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32],wb_data_i[63:32]};
+                                    else
+                                       cpx_packet_1[127:0]<={wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0],wb_data_i[31:0]};
+                                    state<=`CPX_READY_1;
+                                    wb_cycle<=0; 
+                                 end
+                              else
+                                 begin
+                                    cpx_packet_1[127:0]<={wb_data_i,wb_data_i};
+                                    state<=`PCX_REQ_STEP2;
+                                 end
+                           end
+                        default:
+                           begin
+                              wb_cycle<=0;
+                              state<=`PCX_UNKNOWN;
+                           end
+                     endcase
+                  end               
+               end
+         `PCX_REQ_STEP2: // IFill, Load/strload, CAS, SWAP, LDSTUB - alwas load
+            begin
+               wb_strobe<=1'b1;
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b01000};
+               else
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+4],4'b1000};
+               wb_sel<=8'b11111111; // It is always full width for subsequent IFill and load accesses
+               state<=`PCX_REQ_STEP2_1;
+            end
+         `PCX_REQ_STEP2_1:
+            if(wb_ack==1)
+               begin
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_data_o<=64'b0;
+                  wb_we<=0;
+                  cpx_packet_1[63:0]<=wb_data_i;
+                  if((pcx_packet_d[122:118]!=5'b00000) && (pcx_packet_d[122:118]!=5'b00100))
+                     if(pcx_packet_d[122:118]!=5'b00010) // IFill, SWAP
+                        state<=`PCX_REQ_STEP3;
+                     else
+                        state<=`PCX_REQ_CAS_COMPARE; // CAS
+                  else
+                     begin
+                        wb_cycle<=0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_REQ_CAS_COMPARE:
+            begin
+               cpx_two_packet<=1;
+               if(pcx_packet_d[106:104]==3'b010) // 32-bit
+                  case(pcx_packet_d[64+3:64+2])
+                     2'b00:state<=cpx_packet_1[127:96]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b01:state<=cpx_packet_1[95:64]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b10:state<=cpx_packet_1[63:32]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                     2'b11:state<=cpx_packet_1[31:0]==pcx_packet_d[63:32] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                  endcase
+               else
+                  if(pcx_packet_d[64+3]==0)
+                     state<=cpx_packet_1[127:64]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
+                  else
+                     state<=cpx_packet_1[63:0]==pcx_packet_d[63:0] ? `PCX_REQ_STEP3:`CPX_READY_1;
+            end
+         `PCX_REQ_STEP3: // 256-bit IFILL; CAS, SWAP and LDSTUB store
+            begin
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b10000};
+               else
+                  wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+3],3'b000}; // CAS or SWAP save
+               cpx_two_packet<=1;
+               if(pcx_packet_d[122:118]==5'b10000)
+                  wb_we<=0;
+               else
+                  wb_we<=1;
+               wb_strobe<=1'b1;
+               if(pcx_packet_d[122:118]==5'b00010) // CAS
+                  if(pcx_packet_d[106:104]==3'b010)
+                     wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111;
+                  else
+                     wb_sel<=8'b11111111; //CASX
+               else
+                  if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
+                     if(pcx_packet_d[106:104]==3'b000)  //LDSTUB
+                        case(pcx_packet_d[64+2:64])
+                           3'b000:wb_sel<=8'b10000000;
+                           3'b001:wb_sel<=8'b01000000;
+                           3'b010:wb_sel<=8'b00100000;
+                           3'b011:wb_sel<=8'b00010000;
+                           3'b100:wb_sel<=8'b00001000;
+                           3'b101:wb_sel<=8'b00000100;
+                           3'b110:wb_sel<=8'b00000010;
+                           3'b111:wb_sel<=8'b00000001;
+                        endcase
+                     else   
+                        wb_sel<=(pcx_packet_d[64+2]==0) ? 8'b11110000:8'b00001111; ///SWAP is always 32-bit
+                  else
+                     wb_sel<=8'b11111111; // It is always full width for subsequent IFill accesses
+               if(pcx_packet_d[122:118]==5'b00110) //SWAP or LDSTUB
+                  wb_data_o<={pcx_packet_d[63:32],pcx_packet_d[63:32]};
+//                  wb_data_o<=pcx_packet_d[63:0];
+               else
+                  wb_data_o<=pcx_packet_2nd[63:0]; // CAS store second packet data
+//                  if(pcx_packet_d[106:104]==3'b010)
+//                     wb_data_o<={pcx_packet_2nd[63:32],pcx_packet_2nd[63:32]}; // CAS store second packet data
+//                  else
+//                     wb_data_o<=pcx_packet_2nd[63:0];
+               state<=`PCX_REQ_STEP3_1;
+            end
+         `PCX_REQ_STEP3_1:
+            if(wb_ack==1)
+               begin
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_we<=0;
+                  wb_data_o<=64'b0;
+                  if(pcx_packet_d[122:118]==5'b10000) // IFill
+                     begin
+                        cpx_packet_2[127:64]<=wb_data_i;
+                        state<=`PCX_REQ_STEP4;
+                     end
+                  else
+                     begin
+                        wb_cycle<=0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_REQ_STEP4: // 256-bit IFILL only
+            begin
+               wb_strobe<=1'b1;
+               wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+5],5'b11000};
+               wb_sel<=8'b11111111; // It is always full width for subsequent accesses
+               state<=`PCX_REQ_STEP4_1;
+            end 
+         `PCX_REQ_STEP4_1:
+            if(wb_ack==1)  
+               begin
+                  wb_cycle<=0;
+                  wb_strobe<=0;
+                  wb_sel<=8'b0;
+                  wb_addr<=64'b0;
+                  wb_we<=0;
+                  cpx_packet_2[63:0]<=wb_data_i;
+                  state<=`CPX_READY_1;
+               end
+         `PCX_BIS: // Block init store
+            begin
+               wb_strobe<=1'b1;
+               wb_we<=1;
+               wb_addr<={pcx_req_d,19'b0,pcx_packet_d[103:64+6],6'b001000};
+               wb_sel<=8'b11111111;
+               wb_data_o<=64'b0;
+               state<=`PCX_BIS_1;
+            end
+         `PCX_BIS_1:
+            if(wb_ack)
+               begin
+                  wb_strobe<=0;
+                  if(wb_addr[39:0]<(pcx_packet_d[64+39:64]+8*7))
+                     state<=`PCX_BIS_2;
+                  else
+                     begin
+                        wb_cycle<=0;
+                        wb_sel<=0;
+                        wb_we<=0;
+                        wb_addr<=64'b0;
+                        state<=`CPX_READY_1;
+                     end
+               end
+         `PCX_BIS_2:
+            begin
+               wb_strobe<=1'b1;
+               wb_addr[5:0]<=wb_addr[5:0]+8;
+               state<=`PCX_BIS_1;
+            end
+         `PCX_FP_1:
+            begin
+               fp_pcx<=pcx_packet_d;
+               fp_req<=1;
+               state<=`PCX_FP_2;
+               if(`DEBUGGING)
+                  begin
+                     wb_addr<=pcx_packet_d[103:64];
+                     wb_data_o<=pcx_packet_d[63:0];
+                     wb_sel<=8'h22;
+                  end
+            end
+         `PCX_FP_2:
+            begin
+               fp_pcx<=pcx_packet_2nd;
+               state<=`FP_WAIT;
+               if(`DEBUGGING)
+                  begin
+                     wb_addr<=pcx_packet_2nd[103:64];
+                     wb_data_o<=pcx_packet_d[63:0];
+                     wb_sel<=8'h23;
+                  end
+            end
+         `FP_WAIT:
+            begin
+               fp_pcx<=124'b0;
+               fp_req<=0;
+               if(fp_rdy)
+                  state<=`CPX_FP;
+               if(`DEBUGGING)
+                  wb_sel<=8'h24;
+            end
+         `CPX_FP:
+            if(fp_cpx[144]) // Packet valid
+               begin               
+                  cpx_packet_1<=fp_cpx;
+                  state<=`CPX_READY_1;
+                  if(`DEBUGGING)
+                     begin
+                        wb_addr<=fp_cpx[63:0];
+                        wb_data_o<=fp_cpx[127:64];
+                     end
+               end
+            else
+               if(!fp_rdy)
+                  state<=`FP_WAIT; // Else wait for another one if it is not here still
+         `CPX_SEND_ETH_IRQ:
+            begin
+               cpx_packet_1<=145'h1_7_000_000000000000001D_000000000000_001D;
+               eth_int_sent<=0;
+               state<=`CPX_READY_1;
+            end
+         `CPX_INT_VEC_DIS:
+            begin
+               //if(pcx_packet_d[12:10]==3'b000) // Send interrupt only if it is for this core
+                  cpx_two_packet<=1; 
+				   cpu2<=pcx_packet_d[10];
+               cpx_packet_1[144:140]<=5'b10100;
+               cpx_packet_1[139:137]<=0;
+               cpx_packet_1[136]<=1;
+               cpx_packet_1[135:134]<=pcx_packet_d[113:112]; // Thread ID
+               cpx_packet_1[133:130]<=0;
+               cpx_packet_1[129]<=pcx_atom_d;
+               cpx_packet_1[128]<=0;
+               cpx_packet_1[127:0]<={5'b0,pcx_packet_d[64+5:64+4],2'b0,cpu,pcx_packet_d[64+11:64+6],112'b0};
+               cpx_packet_2<={9'h170,54'h0,pcx_packet_d[17:0],46'h0,pcx_packet_d[17:0]}; 
+               state<=`CPX_READY_1;
+            end
+         `CPX_READY_1:
+            begin
+				   if(!cpu)
+					   begin
+                     cpx_ready<=1;
+                     cpx_packet<=cpx_packet_1;
+							if(othercpuhit[0])
+							   begin
+                           cpx1_ready<=1;
+                           cpx1_packet<={1'b1,4'b0011,12'b0,5'b0,pcx_packet_d[64+5:64+4],3'b001,pcx_packet_d[64+11:64+6],inval_vect0};
+								end
+					   end
+					else
+					   begin
+                     cpx1_ready<=1;
+                     cpx1_packet<=cpx_packet_1;
+							if(othercpuhit[0])
+							   begin
+                           cpx_ready<=1;
+                           cpx_packet<={1'b1,4'b0011,12'b0,5'b0,pcx_packet_d[64+5:64+4],3'b000,pcx_packet_d[64+11:64+6],inval_vect0};
+								end
+					   end
+               cnt<=cnt+1;
+               if(`DEBUGGING)
+                  if(multi_hit || multi_hit1)
+                     wb_sel<=8'h11;
+                state<=`CPX_READY_2;
+            end
+         `CPX_READY_2:
+            begin
+				   if(cpx_two_packet && !cpu2)
+					   begin
+						   cpx_ready<=1;
+						   cpx_packet<=cpx_packet_2;
+						end
+					else
+					   if(cpu2 && othercpuhit[1])
+						   begin
+                        cpx_ready<=1;
+                        cpx_packet<={1'b1,4'b0011,12'b0,5'b0,pcx_packet_d[64+5],1'b1,3'b000,pcx_packet_d[64+11:64+6],inval_vect1};
+      					end
+						else
+						   begin
+							   cpx_ready<=0;
+								cpx_packet<=145'b0;
+							end
+				   if(cpx_two_packet && cpu2)
+					   begin
+						   cpx1_ready<=1;
+						   cpx1_packet<=cpx_packet_2;
+						end
+					else
+					   if(!cpu2 && othercpuhit[1])
+						   begin
+                        cpx1_ready<=1;
+                        cpx1_packet<={1'b1,4'b0011,12'b0,5'b0,pcx_packet_d[64+5],1'b1,3'b001,pcx_packet_d[64+11:64+6],inval_vect1};
+      					end
+						else
+						   begin
+							   cpx1_ready<=0;
+								cpx1_packet<=145'b0;
+							end
+					state<=`PCX_IDLE;
+            end
+         `PCX_UNKNOWN:
+            begin
+               wb_sel<=8'b10100101; // Illegal eye-catching value for debugging
+               state<=`PCX_IDLE;
+            end
+      endcase
+
+l1dir l1dir_inst(
+   .clk(clk),
+   .reset(!rstn),
+   
+   .cpu(cpu),     // Issuing CPU number
+   .strobe(state==`GOT_PCX_REQ),
+   .way(pcx_packet[108:107]),     // Way to allocate for allocating loads
+   .address(pcx_packet[64+39:64]),
+   .load(pcx_packet[122:118]==5'b00000),
+   .ifill(pcx_packet[122:118]==5'b10000),
+   .store(pcx_packet[122:118]==5'b00001),
+   .cas(pcx_packet[122:118]==5'b00010),
+   .swap(pcx_packet[122:118]==5'b00110),
+   .strload(pcx_packet[122:118]==5'b00100),
+   .strstore(pcx_packet[122:118]==5'b00101),
+   .cacheable((!pcx_packet[117]) && (!pcx_req_d[4])),
+   .prefetch(pcx_packet[110]),
+   .invalidate(pcx_packet[111]),
+   .blockstore(pcx_packet[109] | pcx_packet[110]),
+   
+   .inval_vect0(inval_vect0),    // Invalidation vector
+   .inval_vect1(inval_vect1),    
+   .othercachehit(othercachehit), // Other cache hit in the same CPU, wayval0/wayval1
+   .othercpuhit(othercpuhit),   // Any cache hit in the other CPU, wayval0/wayval1
+   .wayval0(wayval0),       // Way valid
+   .wayval1(wayval1),       // Second way valid for ifill
+   .ready(ready)         // Directory init done   
+);
+
+endmodule
Index: /trunk/os2wb/l1idir.v
===================================================================
--- /trunk/os2wb/l1idir.v	(revision 6)
+++ /trunk/os2wb/l1idir.v	(revision 6)
@@ -0,0 +1,178 @@
+module l1idir(
+   input clk,
+   input reset,
+   
+   input [ 6:0] index,
+   input [ 1:0] way,
+   input [27:0] tag,
+	input        strobe,
+   input        query,
+   input        allocate,   //tag->{way,index}
+   input        deallocate, //if({way,index}==tag) {way,index}<-FFFFFF
+   input        invalidate, //all ways
+   
+   output reg [2:0] hit,
+   
+   output reg       ready // directory init completed
+);
+
+`define INVAL_TAG 28'h8000000
+
+reg [27:0] tag_d;
+reg [ 6:0] addr;
+reg [ 3:0] we;
+reg [ 3:0] re;
+reg [28:0] di;
+
+wire [28:0] do0;
+wire [28:0] do1;
+wire [28:0] do2;
+wire [28:0] do3;
+reg query_d;
+reg deallocate_d;
+reg query_d1;
+reg deallocate_d1;
+
+always @(posedge clk)
+   if(strobe)
+      if(query || deallocate)
+         begin
+            tag_d<=tag;
+         end
+
+always @(posedge clk)
+   begin
+      query_d<=query && strobe;
+      deallocate_d<=deallocate && strobe;
+      query_d1<=query_d;
+      deallocate_d1<=deallocate_d;
+   end   
+   
+cachedir icache01 (
+   .clock(clk),
+   .enable(we[0] || re[0] || we[1] || re[1]),
+   .wren_a(we[0]),
+   .address_a({1'b0,addr}),
+   .data_a(di),
+   .q_a(do0),
+   
+   .wren_b(we[1]),
+   .address_b({1'b1,addr}),
+   .data_b(di),
+   .q_b(do1) 
+);
+   
+cachedir icache23 (
+   .clock(clk),
+   .enable(we[2] || re[2] || we[3] || re[3]),
+   .wren_a(we[2]),
+   .address_a({1'b0,addr}),
+   .data_a(di),
+   .q_a(do2),
+   
+   .wren_b(we[3]),
+   .address_b({1'b1,addr}),
+   .data_b(di),
+   .q_b(do3) 
+);
+
+wire [3:0] hitvect={(do3[28:1]==tag_d),(do2[28:1]==tag_d),(do1[28:1]==tag_d),(do0[28:1]==tag_d)};
+
+`define L1IDIR_RESET   3'b000
+`define L1IDIR_INIT    3'b001
+`define L1IDIR_IDLE    3'b010
+`define L1IDIR_READ    3'b011
+`define L1IDIR_DEALLOC 3'b100
+
+reg [2:0] state;
+
+always @(posedge clk or posedge reset)
+   if(reset)
+      begin
+         state<=`L1IDIR_RESET;
+         ready<=0;
+      end
+   else
+      case(state)
+         `L1IDIR_RESET:
+            begin
+               addr<=7'b0;
+               di<={`INVAL_TAG,1'b0};
+               we<=4'b1111;
+               state<=`L1IDIR_INIT;
+            end
+         `L1IDIR_INIT:
+            begin
+               addr<=addr+1;
+               if(addr==7'b1111111)
+                  begin
+                     we<=4'b0;
+                     ready<=1;
+                     state<=`L1IDIR_IDLE;
+                  end
+            end
+         `L1IDIR_IDLE:
+			   if(strobe)
+            if(invalidate)
+               begin
+                  we<=4'b1111;
+                  addr<=index;
+                  di<={`INVAL_TAG,1'b0};
+               end
+            else
+				if(allocate)
+				   begin
+					  case(way)
+						 2'b00:we<=4'b0001;
+						 2'b01:we<=4'b0010;
+						 2'b10:we<=4'b0100;
+						 2'b11:we<=4'b1000;
+					  endcase
+					  addr<=index;
+					  di<={tag,1'b0};
+				   end
+				else
+				   if(deallocate)
+					  begin
+						 re<=4'b1111;
+						 we<=0;
+						 addr<=index;
+						 state<=`L1IDIR_READ;
+					  end
+				   else
+                     if(query)
+                        begin
+                           addr<=index;
+                           re<=4'b1111;
+                           we<=0;
+                        end
+                     else
+                        begin
+                           we<=0;
+                           re<=0;
+                        end
+			`L1IDIR_READ:
+			   state<=`L1IDIR_DEALLOC;
+         `L1IDIR_DEALLOC:
+            begin
+               re<=0;
+               di<={`INVAL_TAG,1'b0};
+               we<=hitvect;
+               state<=`L1IDIR_IDLE;
+            end
+      endcase
+
+always @(posedge clk)
+   if(query_d1 || deallocate_d1)
+      case(hitvect)
+         4'b0001:hit<=3'b100;
+         4'b0010:hit<=3'b101;
+         4'b0100:hit<=3'b110;
+         4'b1000:hit<=3'b111;
+         default:hit<=3'b000; // Hits will be ORed then
+      endcase
+   else
+      if(strobe)
+         hit<=3'b000;
+      
+endmodule
Index: /trunk/os2wb/s1_top.v
===================================================================
--- /trunk/os2wb/s1_top.v	(revision 6)
+++ /trunk/os2wb/s1_top.v	(revision 6)
@@ -0,0 +1,358 @@
+/*
+ * Simply RISC S1 Core Top-Level
+ *
+ * (C) 2007 Simply RISC LLP
+ * AUTHOR: Fabrizio Fazzino <fabrizio.fazzino@srisc.com>
+ *
+ * LICENSE:
+ * This is a Free Hardware Design; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ * The above named program is distributed in the hope that it will
+ * be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * DESCRIPTION:
+ * This block implements the top-level of the S1 Core.
+ * It is just a schematic with four instances:
+ * 1) one single SPARC Core of the OpenSPARC T1;
+ * 2) a SPARC Core to Wishbone Master bridge;
+ * 3) a Reset Controller;
+ * 4) an Interrupt Controller.
+ *
+ */
+	 
+module s1_top (
+    input         sys_clock_i, 
+    input         sys_reset_i, 
+    
+    input         eth_irq_i,
+    
+    input         wbm_ack_i, 
+    input  [63:0] wbm_data_i,
+    output        wbm_cycle_o, 
+    output        wbm_strobe_o, 
+    output        wbm_we_o, 
+    output [63:0] wbm_addr_o, 
+    output [63:0] wbm_data_o, 
+    output [ 7:0] wbm_sel_o
+);
+  /*
+   * Wires
+   */
+
+  // Wires connected to SPARC Core outputs
+
+  // pcx
+  wire [4:0]   spc_pcx_req_pq;    // processor to pcx request
+  wire         spc_pcx_atom_pq;   // processor to pcx atomic request
+  wire [123:0] spc_pcx_data_pa;  // processor to pcx packet
+  wire [4:0]   spc1_pcx_req_pq;    // processor to pcx request
+  wire         spc1_pcx_atom_pq;   // processor to pcx atomic request
+  wire [123:0] spc1_pcx_data_pa;  // processor to pcx packet
+
+  // shadow scan
+  wire     spc_sscan_so;         // From ifu of sparc_ifu.v
+  wire     spc_scanout0;         // From test_stub of test_stub_bist.v
+  wire     spc_scanout1;         // From test_stub of test_stub_bist.v
+
+  // bist
+  wire     tst_ctu_mbist_done;  // From test_stub of test_stub_two_bist.v
+  wire     tst_ctu_mbist_fail;  // From test_stub of test_stub_two_bist.v
+
+  // fuse
+  wire     spc_efc_ifuse_data;     // From ifu of sparc_ifu.v
+  wire     spc_efc_dfuse_data;     // From ifu of sparc_ifu.v
+
+  // Wires connected to SPARC Core inputs
+
+  // cpx interface
+  wire [4:0] pcx_spc_grant_px; // pcx to processor grant info  
+  wire       cpx_spc_data_rdy_cx2; // cpx data inflight to sparc  
+  wire [144:0] cpx_spc_data_cx2;     // cpx to sparc data packet
+  wire [4:0] pcx1_spc_grant_px; // pcx to processor grant info  
+  wire       cpx1_spc_data_rdy_cx2; // cpx data inflight to sparc  
+  wire [144:0] cpx1_spc_data_cx2;     // cpx to sparc data packet
+
+  wire [3:0]  const_cpuid;
+  wire [3:0]  const_cpuid1;
+  wire [7:0]  const_maskid;           // To ifu of sparc_ifu.v
+
+  // sscan
+  wire        ctu_tck;                // To ifu of sparc_ifu.v
+  wire        ctu_sscan_se;           // To ifu of sparc_ifu.v
+  wire        ctu_sscan_snap;         // To ifu of sparc_ifu.v
+  wire [3:0]  ctu_sscan_tid;          // To ifu of sparc_ifu.v
+
+  // bist
+  wire        ctu_tst_mbist_enable;   // To test_stub of test_stub_bist.v
+
+  // efuse
+  wire        efc_spc_fuse_clk1;
+  wire        efc_spc_fuse_clk2;
+  wire        efc_spc_ifuse_ashift;
+  wire        efc_spc_ifuse_dshift;
+  wire        efc_spc_ifuse_data;
+  wire        efc_spc_dfuse_ashift;
+  wire        efc_spc_dfuse_dshift;
+  wire        efc_spc_dfuse_data;
+
+  // scan and macro test
+  wire        ctu_tst_macrotest;      // To test_stub of test_stub_bist.v
+  wire        ctu_tst_scan_disable;   // To test_stub of test_stub_bist.v
+  wire        ctu_tst_short_chain;    // To test_stub of test_stub_bist.v
+  wire        global_shift_enable;    // To test_stub of test_stub_two_bist.v
+  wire        ctu_tst_scanmode;       // To test_stub of test_stub_two_bist.v
+  wire        spc_scanin0;
+  wire        spc_scanin1;
+   
+  // clk
+  wire        cluster_cken;           // To spc_hdr of cluster_header.v
+  wire        gclk;                   // To spc_hdr of cluster_header.v
+
+  // reset
+  wire        cmp_grst_l;
+  wire        cmp_arst_l;
+  wire        ctu_tst_pre_grst_l;     // To test_stub of test_stub_bist.v
+
+  wire        adbginit_l;             // To spc_hdr of cluster_header.v
+  wire        gdbginit_l;             // To spc_hdr of cluster_header.v
+
+  // Reset signal from the reset controller to the bridge
+  wire sys_reset_final;
+
+  // Interrupt Source from the interrupt controller to the bridge
+
+  /*
+   * SPARC Core module instance
+   */
+reg [  4:0] pcx_spc_grant_px_fifo;
+reg [  4:0] pcx1_spc_grant_px_fifo;
+
+  sparc sparc_0 (
+
+    // Wires connected to SPARC Core outputs
+    .spc_pcx_req_pq(spc_pcx_req_pq),
+    .spc_pcx_atom_pq(spc_pcx_atom_pq),
+    .spc_pcx_data_pa(spc_pcx_data_pa),
+    //.spc_sscan_so(spc_sscan_so),
+    //.spc_scanout0(spc_scanout0),
+    //.spc_scanout1(spc_scanout1),
+    //.tst_ctu_mbist_done(tst_ctu_mbist_done),
+    //.tst_ctu_mbist_fail(tst_ctu_mbist_fail),
+    //.spc_efc_ifuse_data(spc_efc_ifuse_data),
+    //.spc_efc_dfuse_data(spc_efc_dfuse_data),
+
+    // Wires connected to SPARC Core inputs
+    .pcx_spc_grant_px(pcx_spc_grant_px),
+    .cpx_spc_data_rdy_cx2(cpx_spc_data_rdy_cx2),
+    .cpx_spc_data_cx2(cpx_spc_data_cx2),
+    .const_cpuid(const_cpuid),
+    .const_maskid(const_maskid),
+    .ctu_tck(ctu_tck),
+    .ctu_sscan_se(ctu_sscan_se),
+    .ctu_sscan_snap(ctu_sscan_snap),
+    .ctu_sscan_tid(ctu_sscan_tid),
+    .ctu_tst_mbist_enable(ctu_tst_mbist_enable),
+    .efc_spc_fuse_clk1(efc_spc_fuse_clk1),
+    .efc_spc_fuse_clk2(efc_spc_fuse_clk2),
+    .efc_spc_ifuse_ashift(efc_spc_ifuse_ashift),
+    .efc_spc_ifuse_dshift(efc_spc_ifuse_dshift),
+    .efc_spc_ifuse_data(efc_spc_ifuse_data),
+    .efc_spc_dfuse_ashift(efc_spc_dfuse_ashift),
+    .efc_spc_dfuse_dshift(efc_spc_dfuse_dshift),
+    .efc_spc_dfuse_data(efc_spc_dfuse_data),
+    .ctu_tst_macrotest(ctu_tst_macrotest),
+    .ctu_tst_scan_disable(ctu_tst_scan_disable),
+    .ctu_tst_short_chain(ctu_tst_short_chain),
+    .global_shift_enable(global_shift_enable),
+    .ctu_tst_scanmode(ctu_tst_scanmode),
+    .spc_scanin0(spc_scanin0),
+    .spc_scanin1(spc_scanin1),
+    .cluster_cken(cluster_cken),
+    .gclk(gclk),
+    .cmp_grst_l(cmp_grst_l),
+    .cmp_arst_l(cmp_arst_l),
+    .ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+    .adbginit_l(adbginit_l),
+    .gdbginit_l(gdbginit_l)
+
+  );
+
+  sparc sparc_1 (
+
+    // Wires connected to SPARC Core outputs
+    .spc_pcx_req_pq(spc1_pcx_req_pq),
+    .spc_pcx_atom_pq(spc1_pcx_atom_pq),
+    .spc_pcx_data_pa(spc1_pcx_data_pa),
+    //.spc_sscan_so(spc_sscan_so),
+    //.spc_scanout0(spc_scanout0),
+    //.spc_scanout1(spc_scanout1),
+    //.tst_ctu_mbist_done(tst_ctu_mbist_done),
+    //.tst_ctu_mbist_fail(tst_ctu_mbist_fail),
+    //.spc_efc_ifuse_data(spc_efc_ifuse_data),
+    //.spc_efc_dfuse_data(spc_efc_dfuse_data),
+
+    // Wires connected to SPARC Core inputs
+    .pcx_spc_grant_px(pcx1_spc_grant_px),
+    .cpx_spc_data_rdy_cx2(cpx1_spc_data_rdy_cx2),
+    .cpx_spc_data_cx2(cpx1_spc_data_cx2),
+    .const_cpuid(const_cpuid1),
+    .const_maskid(const_maskid),
+    .ctu_tck(ctu_tck),
+    .ctu_sscan_se(ctu_sscan_se),
+    .ctu_sscan_snap(ctu_sscan_snap),
+    .ctu_sscan_tid(ctu_sscan_tid),
+    .ctu_tst_mbist_enable(ctu_tst_mbist_enable),
+    .efc_spc_fuse_clk1(efc_spc_fuse_clk1),
+    .efc_spc_fuse_clk2(efc_spc_fuse_clk2),
+    .efc_spc_ifuse_ashift(efc_spc_ifuse_ashift),
+    .efc_spc_ifuse_dshift(efc_spc_ifuse_dshift),
+    .efc_spc_ifuse_data(efc_spc_ifuse_data),
+    .efc_spc_dfuse_ashift(efc_spc_dfuse_ashift),
+    .efc_spc_dfuse_dshift(efc_spc_dfuse_dshift),
+    .efc_spc_dfuse_data(efc_spc_dfuse_data),
+    .ctu_tst_macrotest(ctu_tst_macrotest),
+    .ctu_tst_scan_disable(ctu_tst_scan_disable),
+    .ctu_tst_short_chain(ctu_tst_short_chain),
+    .global_shift_enable(global_shift_enable),
+    .ctu_tst_scanmode(ctu_tst_scanmode),
+    .spc_scanin0(spc_scanin0),
+    .spc_scanin1(spc_scanin1),
+    .cluster_cken(cluster_cken),
+    .gclk(gclk),
+    .cmp_grst_l(cmp_grst_l),
+    .cmp_arst_l(cmp_arst_l),
+    .ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+    .adbginit_l(adbginit_l),
+    .gdbginit_l(gdbginit_l)
+
+  );
+
+  /*
+   * SPARC Core to Wishbone Master bridge
+   */
+
+wire         fp_req;
+wire [123:0] fp_pcx;
+wire [  7:0] fp_rdy;
+wire [144:0] fp_cpx;
+
+os2wb_dual os2wb_inst (
+    .clk(sys_clock_i), 
+    .rstn(~sys_reset_final), 
+    
+    .pcx_req(spc_pcx_req_pq), 
+    .pcx_atom(spc_pcx_atom_pq), 
+    .pcx_data(spc_pcx_data_pa), 
+    .pcx_grant(pcx_spc_grant_px), 
+    .cpx_ready(cpx_spc_data_rdy_cx2), 
+    .cpx_packet(cpx_spc_data_cx2), 
+	 
+    .pcx1_req(spc1_pcx_req_pq), 
+    .pcx1_atom(spc1_pcx_atom_pq), 
+    .pcx1_data(spc1_pcx_data_pa), 
+    .pcx1_grant(pcx1_spc_grant_px), 
+    .cpx1_ready(cpx1_spc_data_rdy_cx2), 
+    .cpx1_packet(cpx1_spc_data_cx2), 
+
+    .wb_data_i(wbm_data_i), 
+    .wb_ack(wbm_ack_i), 
+    .wb_cycle(wbm_cycle_o), 
+    .wb_strobe(wbm_strobe_o), 
+    .wb_we(wbm_we_o), 
+    .wb_sel(wbm_sel_o), 
+    .wb_addr(wbm_addr_o), 
+    .wb_data_o(wbm_data_o),
+    
+    .fp_pcx(fp_pcx),
+    .fp_req(fp_req),
+    .fp_cpx(fp_cpx),
+    .fp_rdy(fp_rdy!=8'h00),
+    
+    .eth_int(0/*eth_irq_i*/)
+);
+
+// FPU
+fpu fpu_inst(
+	.pcx_fpio_data_rdy_px2(fp_req),
+	.pcx_fpio_data_px2(fp_pcx),
+	.arst_l(cmp_arst_l),
+	.grst_l(cmp_grst_l),
+	.gclk(gclk),
+	.cluster_cken(cluster_cken),
+	
+	.fp_cpx_req_cq(fp_rdy),
+	.fp_cpx_data_ca(fp_cpx),
+
+	.ctu_tst_pre_grst_l(ctu_tst_pre_grst_l),
+	.global_shift_enable(global_shift_enable),
+	.ctu_tst_scan_disable(ctu_tst_scan_disable),
+	.ctu_tst_scanmode(ctu_tst_scanmode),
+	.ctu_tst_macrotest(ctu_tst_macrotest),
+	.ctu_tst_short_chain(ctu_tst_short_chain),
+
+	.si(0),
+	.so()
+);
+
+  /*
+   * Reset Controller
+   */
+
+  rst_ctrl rst_ctrl_0 (
+
+    // Top-level system inputs
+    .sys_clock_i(sys_clock_i),
+    .sys_reset_i(sys_reset_i),
+
+    // Reset Controller outputs connected to SPARC Core inputs
+    .cluster_cken_o(cluster_cken),
+    .gclk_o(gclk),
+    .cmp_grst_o(cmp_grst_l),
+    .cmp_arst_o(cmp_arst_l),
+    .ctu_tst_pre_grst_o(ctu_tst_pre_grst_l),
+    .adbginit_o(adbginit_l),
+    .gdbginit_o(gdbginit_l),
+    .sys_reset_final_o(sys_reset_final)
+
+  );
+
+  /*
+   * Continuous assignments
+   */
+
+  assign const_cpuid = 4'h0;
+  assign const_cpuid1 = 4'h1;
+  assign const_maskid = 8'h20;
+
+  // sscan
+  assign ctu_tck = 1'b0;
+  assign ctu_sscan_se = 1'b0;
+  assign ctu_sscan_snap = 1'b0;
+  assign ctu_sscan_tid = 4'h1;
+
+  // bist
+  assign ctu_tst_mbist_enable = 1'b0;
+
+  // efuse
+  assign efc_spc_fuse_clk1 = 1'b0;     // Activity
+  assign efc_spc_fuse_clk2 = 1'b0;     // Activity
+  assign efc_spc_ifuse_ashift = 1'b0;
+  assign efc_spc_ifuse_dshift = 1'b0;
+  assign efc_spc_ifuse_data = 1'b0;    // Activity
+  assign efc_spc_dfuse_ashift = 1'b0;
+  assign efc_spc_dfuse_dshift = 1'b0;
+  assign efc_spc_dfuse_data = 1'b0;    // Activity
+
+  // scan and macro test
+  assign ctu_tst_macrotest = 1'b0;
+  assign ctu_tst_scan_disable = 1'b0;
+  assign ctu_tst_short_chain = 1'b0;
+  assign global_shift_enable = 1'b0;
+  assign ctu_tst_scanmode = 1'b0;
+  assign spc_scanin0 = 1'b0;
+  assign spc_scanin1 = 1'b0;
+
+endmodule
Index: /trunk/OC-Ethernet/eth_spram_256x32.v
===================================================================
--- /trunk/OC-Ethernet/eth_spram_256x32.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_spram_256x32.v	(revision 6)
@@ -0,0 +1,309 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_spram_256x32.v                                          ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is available in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.9  2003/12/05 12:43:06  tadejm
+// Corrected address mismatch for xilinx RAMB4_S8 model which has wider address than RAMB4_S16.
+//
+// Revision 1.8  2003/12/04 14:59:13  simons
+// Lapsus fixed (!we -> ~we).
+//
+// Revision 1.7  2003/11/12 18:24:59  tadejm
+// WISHBONE slave changed and tested from only 32-bit accesss to byte access.
+//
+// Revision 1.6  2003/10/17 07:46:15  markom
+// mbist signals updated according to newest convention
+//
+// Revision 1.5  2003/08/14 16:42:58  simons
+// Artisan ram instance added.
+//
+// Revision 1.4  2002/10/18 17:04:20  tadejm
+// Changed BIST scan signals.
+//
+// Revision 1.3  2002/10/10 16:29:30  mohor
+// BIST added.
+//
+// Revision 1.2  2002/09/23 18:24:31  mohor
+// ETH_VIRTUAL_SILICON_RAM supported (for ASIC implementation).
+//
+// Revision 1.1  2002/07/23 16:36:09  mohor
+// ethernet spram added. So far a generic ram and xilinx RAMB4 are used.
+//
+//
+//
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+module eth_spram_256x32(
+	// Generic synchronous single-port RAM interface
+	clk, rst, ce, we, oe, addr, di, do
+
+`ifdef ETH_BIST
+  ,
+  // debug chain signals
+  mbist_si_i,       // bist scan serial in
+  mbist_so_o,       // bist scan serial out
+  mbist_ctrl_i        // bist chain shift control
+`endif
+
+
+
+);
+
+	//
+	// Generic synchronous single-port RAM interface
+	//
+	input           clk;  // Clock, rising edge
+	input           rst;  // Reset, active high
+	input           ce;   // Chip enable input, active high
+	input  [3:0]    we;   // Write enable input, active high
+	input           oe;   // Output enable input, active high
+	input  [7:0]    addr; // address bus inputs
+	input  [31:0]   di;   // input data bus
+	output [31:0]   do;   // output data bus
+
+
+`ifdef ETH_BIST
+  input   mbist_si_i;       // bist scan serial in
+  output  mbist_so_o;       // bist scan serial out
+  input [`ETH_MBIST_CTRL_WIDTH - 1:0] mbist_ctrl_i;       // bist chain shift control
+`endif
+
+`ifdef ETH_XILINX_RAMB4
+
+    /*RAMB4_S16 ram0
+    (
+        .DO      (do[15:0]),
+        .ADDR    (addr),
+        .DI      (di[15:0]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we),
+        .RST     (rst)
+    );
+
+    RAMB4_S16 ram1
+    (
+        .DO      (do[31:16]),
+        .ADDR    (addr),
+        .DI      (di[31:16]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we),
+        .RST     (rst)
+    );*/
+
+    RAMB4_S8 ram0
+    (
+        .DO      (do[7:0]),
+        .ADDR    ({1'b0, addr}),
+        .DI      (di[7:0]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we[0]),
+        .RST     (rst)
+    );
+
+    RAMB4_S8 ram1
+    (
+        .DO      (do[15:8]),
+        .ADDR    ({1'b0, addr}),
+        .DI      (di[15:8]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we[1]),
+        .RST     (rst)
+    );
+
+    RAMB4_S8 ram2
+    (
+        .DO      (do[23:16]),
+        .ADDR    ({1'b0, addr}),
+        .DI      (di[23:16]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we[2]),
+        .RST     (rst)
+    );
+
+    RAMB4_S8 ram3
+    (
+        .DO      (do[31:24]),
+        .ADDR    ({1'b0, addr}),
+        .DI      (di[31:24]),
+        .EN      (ce),
+        .CLK     (clk),
+        .WE      (we[3]),
+        .RST     (rst)
+    );
+
+`else   // !ETH_XILINX_RAMB4
+`ifdef  ETH_VIRTUAL_SILICON_RAM
+  `ifdef ETH_BIST
+      //vs_hdsp_256x32_bist ram0_bist
+      vs_hdsp_256x32_bw_bist ram0_bist
+  `else
+      //vs_hdsp_256x32 ram0
+      vs_hdsp_256x32_bw ram0
+  `endif
+      (
+        .CK         (clk),
+        .CEN        (!ce),
+        .WEN        (~we),
+        .OEN        (!oe),
+        .ADR        (addr),
+        .DI         (di),
+        .DOUT       (do)
+
+      `ifdef ETH_BIST
+        ,
+        // debug chain signals
+        .mbist_si_i       (mbist_si_i),
+        .mbist_so_o       (mbist_so_o),
+        .mbist_ctrl_i       (mbist_ctrl_i)
+      `endif
+      );
+
+`else   // !ETH_VIRTUAL_SILICON_RAM
+
+`ifdef  ETH_ARTISAN_RAM
+  `ifdef ETH_BIST
+      //art_hssp_256x32_bist ram0_bist
+      art_hssp_256x32_bw_bist ram0_bist
+  `else
+      //art_hssp_256x32 ram0
+      art_hssp_256x32_bw ram0
+  `endif
+      (
+        .CLK        (clk),
+        .CEN        (!ce),
+        .WEN        (~we),
+        .OEN        (!oe),
+        .A          (addr),
+        .D          (di),
+        .Q          (do)
+
+      `ifdef ETH_BIST
+        ,
+        // debug chain signals
+        .mbist_si_i       (mbist_si_i),
+        .mbist_so_o       (mbist_so_o),
+        .mbist_ctrl_i     (mbist_ctrl_i)
+      `endif
+      );
+
+`else   // !ETH_ARTISAN_RAM
+`ifdef ETH_ALTERA_ALTSYNCRAM
+
+    altera_spram_256x32	altera_spram_256x32_inst
+    (
+  	  .address        (addr),
+  	  .wren           (ce & we),
+  	  .clock          (clk),
+  	  .data           (di),
+  	  .q              (do)
+  	);  //exemplar attribute altera_spram_256x32_inst NOOPT TRUE
+
+`else   // !ETH_ALTERA_ALTSYNCRAM
+
+
+	//
+	// Generic single-port synchronous RAM model
+	//
+
+	//
+	// Generic RAM's registers and wires
+	//
+	reg  [ 7: 0] mem0 [255:0]; // RAM content
+	reg  [15: 8] mem1 [255:0]; // RAM content
+	reg  [23:16] mem2 [255:0]; // RAM content
+	reg  [31:24] mem3 [255:0]; // RAM content
+	wire [31:0]  q;            // RAM output
+	reg  [7:0]   raddr;        // RAM read address
+	//
+	// Data output drivers
+	//
+	assign do = (oe & ce) ? q : {32{1'bz}};
+
+	//
+	// RAM read and write
+	//
+
+	// read operation
+	always@(posedge clk)
+	  if (ce) // && !we)
+		raddr <= #1 addr;    // read address needs to be registered to read clock
+
+	assign #1 q = rst ? {32{1'b0}} : {mem3[raddr], mem2[raddr], mem1[raddr], mem0[raddr]};
+
+	// write operation
+	always@(posedge clk)
+        begin
+		if (ce && we[3])
+			mem3[addr] <= #1 di[31:24];
+		if (ce && we[2])
+			mem2[addr] <= #1 di[23:16];
+		if (ce && we[1])
+			mem1[addr] <= #1 di[15: 8];
+		if (ce && we[0])
+			mem0[addr] <= #1 di[ 7: 0];
+        end
+
+	// Task prints range of memory
+	// *** Remember that tasks are non reentrant, don't call this task in parallel for multiple instantiations. 
+	task print_ram;
+	input [7:0] start;
+	input [7:0] finish;
+	integer rnum;
+  	begin
+    		for (rnum=start;rnum<=finish;rnum=rnum+1)
+      			$display("Addr %h = %0h %0h %0h %0h",rnum,mem3[rnum],mem2[rnum],mem1[rnum],mem0[rnum]);
+  	end
+	endtask
+
+`endif  // !ETH_ALTERA_ALTSYNCRAM
+`endif  // !ETH_ARTISAN_RAM
+`endif  // !ETH_VIRTUAL_SILICON_RAM
+`endif  // !ETH_XILINX_RAMB4
+
+endmodule
Index: /trunk/OC-Ethernet/eth_shiftreg.v
===================================================================
--- /trunk/OC-Ethernet/eth_shiftreg.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_shiftreg.v	(revision 6)
@@ -0,0 +1,151 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_shiftreg.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/08/14 18:16:59  mohor
+// LinkFail signal was not latching appropriate bit.
+//
+// Revision 1.4  2002/03/02 21:06:01  mohor
+// LinkFail signal was not latching appropriate bit.
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/01 22:28:56  mohor
+// This files (MIIM) are fully working. They were thoroughly tested. The testbench is not updated.
+//
+//
+
+`include "timescale.v"
+
+
+module eth_shiftreg(Clk, Reset, MdcEn_n, Mdi, Fiad, Rgad, CtrlData, WriteOp, ByteSelect, 
+                    LatchByte, ShiftedBit, Prsd, LinkFail);
+
+
+parameter Tp=1;
+
+input       Clk;              // Input clock (Host clock)
+input       Reset;            // Reset signal
+input       MdcEn_n;          // Enable signal is asserted for one Clk period before Mdc falls.
+input       Mdi;              // MII input data
+input [4:0] Fiad;             // PHY address
+input [4:0] Rgad;             // Register address (within the selected PHY)
+input [15:0]CtrlData;         // Control data (data to be written to the PHY)
+input       WriteOp;          // The current operation is a PHY register write operation
+input [3:0] ByteSelect;       // Byte select
+input [1:0] LatchByte;        // Byte select for latching (read operation)
+
+output      ShiftedBit;       // Bit shifted out of the shift register
+output[15:0]Prsd;             // Read Status Data (data read from the PHY)
+output      LinkFail;         // Link Integrity Signal
+
+reg   [7:0] ShiftReg;         // Shift register for shifting the data in and out
+reg   [15:0]Prsd;
+reg         LinkFail;
+
+
+
+
+// ShiftReg[7:0] :: Shift Register Data
+always @ (posedge Clk or posedge Reset) 
+begin
+  if(Reset)
+    begin
+      ShiftReg[7:0] <= #Tp 8'h0;
+      Prsd[15:0] <= #Tp 16'h0;
+      LinkFail <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(MdcEn_n)
+        begin 
+          if(|ByteSelect)
+            begin
+              case (ByteSelect[3:0])  // synopsys parallel_case full_case
+                4'h1 :    ShiftReg[7:0] <= #Tp {2'b01, ~WriteOp, WriteOp, Fiad[4:1]};
+                4'h2 :    ShiftReg[7:0] <= #Tp {Fiad[0], Rgad[4:0], 2'b10};
+                4'h4 :    ShiftReg[7:0] <= #Tp CtrlData[15:8];
+                4'h8 :    ShiftReg[7:0] <= #Tp CtrlData[7:0];
+              endcase
+            end 
+          else
+            begin
+              ShiftReg[7:0] <= #Tp {ShiftReg[6:0], Mdi};
+              if(LatchByte[0])
+                begin
+                  Prsd[7:0] <= #Tp {ShiftReg[6:0], Mdi};
+                  if(Rgad == 5'h01)
+                    LinkFail <= #Tp ~ShiftReg[1];  // this is bit [2], because it is not shifted yet
+                end
+              else
+                begin
+                  if(LatchByte[1])
+                    Prsd[15:8] <= #Tp {ShiftReg[6:0], Mdi};
+                end
+            end
+        end
+    end
+end
+
+
+assign ShiftedBit = ShiftReg[7];
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_rxethmac.v
===================================================================
--- /trunk/OC-Ethernet/eth_rxethmac.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_rxethmac.v	(revision 6)
@@ -0,0 +1,377 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_rxethmac.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.12  2004/04/26 15:26:23  igorm
+// - Bug connected to the TX_BD_NUM_Wr signal fixed (bug came in with the
+//   previous update of the core.
+// - TxBDAddress is set to 0 after the TX is enabled in the MODER register.
+// - RxBDAddress is set to r_TxBDNum<<1 after the RX is enabled in the MODER
+//   register. (thanks to Mathias and Torbjorn)
+// - Multicast reception was fixed. Thanks to Ulrich Gries
+//
+// Revision 1.11  2004/03/17 09:32:15  igorm
+// Multicast detection fixed. Only the LSB of the first byte is checked.
+//
+// Revision 1.10  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.9  2002/11/19 17:35:35  mohor
+// AddressMiss status is connecting to the Rx BD. AddressMiss is identifying
+// that a frame was received because of the promiscous mode.
+//
+// Revision 1.8  2002/02/16 07:15:27  mohor
+// Testbench fixed, code simplified, unused signals removed.
+//
+// Revision 1.7  2002/02/15 13:44:28  mohor
+// RxAbort is an output. No need to have is declared as wire.
+//
+// Revision 1.6  2002/02/15 11:17:48  mohor
+// File format changed.
+//
+// Revision 1.5  2002/02/14 20:48:43  billditt
+// Addition  of new module eth_addrcheck.v
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.1  2001/06/27 21:26:19  mohor
+// Initial release of the RxEthMAC module.
+//
+//
+//
+//
+//
+
+`include "timescale.v"
+
+
+module eth_rxethmac (MRxClk, MRxDV, MRxD, Reset, Transmitting, MaxFL, r_IFG, HugEn, DlyCrcEn, 
+                     RxData, RxValid, RxStartFrm, RxEndFrm, ByteCnt, ByteCntEq0, ByteCntGreat2, 
+                     ByteCntMaxFrame, CrcError, StateIdle, StatePreamble, StateSFD, StateData,
+                     MAC, r_Pro, r_Bro,r_HASH0, r_HASH1, RxAbort, AddressMiss, PassAll, ControlFrmAddressOK
+                    );
+
+parameter Tp = 1;
+
+
+
+input         MRxClk;
+input         MRxDV;
+input   [3:0] MRxD;
+input         Transmitting;
+input         HugEn;
+input         DlyCrcEn;
+input  [15:0] MaxFL;
+input         r_IFG;
+input         Reset;
+input  [47:0] MAC;     //  Station Address  
+input         r_Bro;   //  broadcast disable
+input         r_Pro;   //  promiscuous enable 
+input [31:0]  r_HASH0; //  lower 4 bytes Hash Table
+input [31:0]  r_HASH1; //  upper 4 bytes Hash Table
+input         PassAll;
+input         ControlFrmAddressOK;
+
+output  [7:0] RxData;
+output        RxValid;
+output        RxStartFrm;
+output        RxEndFrm;
+output [15:0] ByteCnt;
+output        ByteCntEq0;
+output        ByteCntGreat2;
+output        ByteCntMaxFrame;
+output        CrcError;
+output        StateIdle;
+output        StatePreamble;
+output        StateSFD;
+output  [1:0] StateData;
+output        RxAbort;
+output        AddressMiss;
+
+reg     [7:0] RxData;
+reg           RxValid;
+reg           RxStartFrm;
+reg           RxEndFrm;
+reg           Broadcast;
+reg           Multicast;
+reg     [5:0] CrcHash;
+reg           CrcHashGood;
+reg           DelayData;
+reg     [7:0] LatchedByte;
+reg     [7:0] RxData_d;
+reg           RxValid_d;
+reg           RxStartFrm_d;
+reg           RxEndFrm_d;
+
+wire          MRxDEqD;
+wire          MRxDEq5;
+wire          StateDrop;
+wire          ByteCntEq1;
+wire          ByteCntEq2;
+wire          ByteCntEq3;
+wire          ByteCntEq4;
+wire          ByteCntEq5;
+wire          ByteCntEq6;
+wire          ByteCntEq7;
+wire          ByteCntSmall7;
+wire   [31:0] Crc;
+wire          Enable_Crc;
+wire          Initialize_Crc;
+wire    [3:0] Data_Crc;
+wire          GenerateRxValid;
+wire          GenerateRxStartFrm;
+wire          GenerateRxEndFrm;
+wire          DribbleRxEndFrm;
+wire    [3:0] DlyCrcCnt;
+wire          IFGCounterEq24;
+
+assign MRxDEqD = MRxD == 4'hd;
+assign MRxDEq5 = MRxD == 4'h5;
+
+
+// Rx State Machine module
+eth_rxstatem rxstatem1 (.MRxClk(MRxClk), .Reset(Reset), .MRxDV(MRxDV), .ByteCntEq0(ByteCntEq0), 
+                        .ByteCntGreat2(ByteCntGreat2), .Transmitting(Transmitting), .MRxDEq5(MRxDEq5), 
+                        .MRxDEqD(MRxDEqD), .IFGCounterEq24(IFGCounterEq24), .ByteCntMaxFrame(ByteCntMaxFrame), 
+                        .StateData(StateData), .StateIdle(StateIdle), .StatePreamble(StatePreamble), 
+                        .StateSFD(StateSFD), .StateDrop(StateDrop)
+                       );
+
+
+// Rx Counters module
+eth_rxcounters rxcounters1 (.MRxClk(MRxClk), .Reset(Reset), .MRxDV(MRxDV), .StateIdle(StateIdle), 
+                            .StateSFD(StateSFD), .StateData(StateData), .StateDrop(StateDrop), 
+                            .StatePreamble(StatePreamble), .MRxDEqD(MRxDEqD), .DlyCrcEn(DlyCrcEn), 
+                            .DlyCrcCnt(DlyCrcCnt), .Transmitting(Transmitting), .MaxFL(MaxFL), .r_IFG(r_IFG), 
+                            .HugEn(HugEn), .IFGCounterEq24(IFGCounterEq24), .ByteCntEq0(ByteCntEq0), 
+                            .ByteCntEq1(ByteCntEq1), .ByteCntEq2(ByteCntEq2), .ByteCntEq3(ByteCntEq3), 
+                            .ByteCntEq4(ByteCntEq4), .ByteCntEq5(ByteCntEq5), .ByteCntEq6(ByteCntEq6), 
+                            .ByteCntEq7(ByteCntEq7), .ByteCntGreat2(ByteCntGreat2), 
+                            .ByteCntSmall7(ByteCntSmall7), .ByteCntMaxFrame(ByteCntMaxFrame), 
+                            .ByteCntOut(ByteCnt)
+                           );
+
+// Rx Address Check
+
+eth_rxaddrcheck rxaddrcheck1
+              (.MRxClk(MRxClk),         .Reset( Reset),             .RxData(RxData), 
+               .Broadcast (Broadcast),  .r_Bro (r_Bro),             .r_Pro(r_Pro),
+               .ByteCntEq6(ByteCntEq6), .ByteCntEq7(ByteCntEq7),    .ByteCntEq2(ByteCntEq2), 
+               .ByteCntEq3(ByteCntEq3), .ByteCntEq4(ByteCntEq4),    .ByteCntEq5(ByteCntEq5), 
+               .HASH0(r_HASH0),         .HASH1(r_HASH1),           
+               .CrcHash(CrcHash),       .CrcHashGood(CrcHashGood),  .StateData(StateData),
+               .Multicast(Multicast),   .MAC(MAC),                  .RxAbort(RxAbort),
+               .RxEndFrm(RxEndFrm),     .AddressMiss(AddressMiss),  .PassAll(PassAll),
+               .ControlFrmAddressOK(ControlFrmAddressOK)
+              );
+
+
+assign Enable_Crc = MRxDV & (|StateData & ~ByteCntMaxFrame);
+assign Initialize_Crc = StateSFD | DlyCrcEn & (|DlyCrcCnt[3:0]) & DlyCrcCnt[3:0] < 4'h9;
+
+assign Data_Crc[0] = MRxD[3];
+assign Data_Crc[1] = MRxD[2];
+assign Data_Crc[2] = MRxD[1];
+assign Data_Crc[3] = MRxD[0];
+
+
+// Connecting module Crc
+eth_crc crcrx (.Clk(MRxClk), .Reset(Reset), .Data(Data_Crc), .Enable(Enable_Crc), .Initialize(Initialize_Crc), 
+               .Crc(Crc), .CrcError(CrcError)
+              );
+
+
+
+// Latching CRC for use in the hash table
+
+always @ (posedge MRxClk)
+begin
+  CrcHashGood <= #Tp StateData[0] & ByteCntEq6;
+end
+
+always @ (posedge MRxClk)
+begin
+  if(Reset | StateIdle)
+    CrcHash[5:0] <= #Tp 6'h0;
+  else
+  if(StateData[0] & ByteCntEq6)
+    CrcHash[5:0] <= #Tp Crc[31:26];
+end
+
+
+// Output byte stream
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxData_d[7:0]      <= #Tp 8'h0;
+      DelayData          <= #Tp 1'b0;
+      LatchedByte[7:0]   <= #Tp 8'h0;
+      RxData[7:0]        <= #Tp 8'h0;
+    end
+  else
+    begin
+      LatchedByte[7:0]   <= #Tp {MRxD[3:0], LatchedByte[7:4]};  // Latched byte
+      DelayData          <= #Tp StateData[0];
+
+      if(GenerateRxValid)
+        RxData_d[7:0] <= #Tp LatchedByte[7:0] & {8{|StateData}};  // Data goes through only in data state 
+      else
+      if(~DelayData)
+        RxData_d[7:0] <= #Tp 8'h0;                                // Delaying data to be valid for two cycles. Zero when not active.
+
+      RxData[7:0] <= #Tp RxData_d[7:0];                           // Output data byte
+    end
+end
+
+    
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    Broadcast <= #Tp 1'b0;
+  else
+    begin      
+      if(StateData[0] & ~(&LatchedByte[7:0]) & ByteCntSmall7)
+        Broadcast <= #Tp 1'b0;
+      else
+      if(StateData[0] & (&LatchedByte[7:0]) & ByteCntEq1)
+        Broadcast <= #Tp 1'b1;
+      else
+      if(RxAbort | RxEndFrm)
+        Broadcast <= #Tp 1'b0;
+    end
+end
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    Multicast <= #Tp 1'b0;
+  else
+    begin      
+      if(StateData[0] & ByteCntEq1 & LatchedByte[0])
+        Multicast <= #Tp 1'b1;
+      else if(RxAbort | RxEndFrm)
+      Multicast <= #Tp 1'b0;
+    end
+end
+
+
+assign GenerateRxValid = StateData[0] & (~ByteCntEq0 | DlyCrcCnt >= 4'h3);
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxValid_d <= #Tp 1'b0;
+      RxValid   <= #Tp 1'b0;
+    end
+  else
+    begin
+      RxValid_d <= #Tp GenerateRxValid;
+      RxValid   <= #Tp RxValid_d;
+    end
+end
+
+
+assign GenerateRxStartFrm = StateData[0] & (ByteCntEq1 & ~DlyCrcEn | DlyCrcCnt == 4'h3 & DlyCrcEn);
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxStartFrm_d <= #Tp 1'b0;
+      RxStartFrm   <= #Tp 1'b0;
+    end
+  else
+    begin
+      RxStartFrm_d <= #Tp GenerateRxStartFrm;
+      RxStartFrm   <= #Tp RxStartFrm_d;
+    end
+end
+
+
+assign GenerateRxEndFrm = StateData[0] & (~MRxDV & ByteCntGreat2 | ByteCntMaxFrame);
+assign DribbleRxEndFrm  = StateData[1] &  ~MRxDV & ByteCntGreat2;
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxEndFrm_d <= #Tp 1'b0;
+      RxEndFrm   <= #Tp 1'b0;
+    end
+  else
+    begin
+      RxEndFrm_d <= #Tp GenerateRxEndFrm;
+      RxEndFrm   <= #Tp RxEndFrm_d | DribbleRxEndFrm;
+    end
+end
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_rxcounters.v
===================================================================
--- /trunk/OC-Ethernet/eth_rxcounters.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_rxcounters.v	(revision 6)
@@ -0,0 +1,218 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_rxcounters.v                                            ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/02/15 11:13:29  mohor
+// Format of the file changed a bit.
+//
+// Revision 1.4  2002/02/14 20:19:41  billditt
+// Modified for Address Checking,
+// addition of eth_addrcheck.v
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.1  2001/06/27 21:26:19  mohor
+// Initial release of the RxEthMAC module.
+//
+//
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_rxcounters (MRxClk, Reset, MRxDV, StateIdle, StateSFD, StateData, StateDrop, StatePreamble, 
+                       MRxDEqD, DlyCrcEn, DlyCrcCnt, Transmitting, MaxFL, r_IFG, HugEn, IFGCounterEq24, 
+                       ByteCntEq0, ByteCntEq1, ByteCntEq2,ByteCntEq3,ByteCntEq4,ByteCntEq5, ByteCntEq6,
+                       ByteCntEq7, ByteCntGreat2, ByteCntSmall7, ByteCntMaxFrame, ByteCntOut
+                      );
+
+parameter Tp = 1;
+
+input         MRxClk;
+input         Reset;
+input         MRxDV;
+input         StateSFD;
+input [1:0]   StateData;
+input         MRxDEqD;
+input         StateIdle;
+input         StateDrop;
+input         DlyCrcEn;
+input         StatePreamble;
+input         Transmitting;
+input         HugEn;
+input [15:0]  MaxFL;
+input         r_IFG;
+
+output        IFGCounterEq24;           // IFG counter reaches 9600 ns (960 ns)
+output [3:0]  DlyCrcCnt;                // Delayed CRC counter
+output        ByteCntEq0;               // Byte counter = 0
+output        ByteCntEq1;               // Byte counter = 1
+output        ByteCntEq2;               // Byte counter = 2  
+output        ByteCntEq3;               // Byte counter = 3  
+output        ByteCntEq4;               // Byte counter = 4  
+output        ByteCntEq5;               // Byte counter = 5  
+output        ByteCntEq6;               // Byte counter = 6
+output        ByteCntEq7;               // Byte counter = 7
+output        ByteCntGreat2;            // Byte counter > 2
+output        ByteCntSmall7;            // Byte counter < 7
+output        ByteCntMaxFrame;          // Byte counter = MaxFL
+output [15:0] ByteCntOut;               // Byte counter
+
+wire          ResetByteCounter;
+wire          IncrementByteCounter;
+wire          ResetIFGCounter;
+wire          IncrementIFGCounter;
+wire          ByteCntMax;
+
+reg   [15:0]  ByteCnt;
+reg   [3:0]   DlyCrcCnt;
+reg   [4:0]   IFGCounter;
+
+wire  [15:0]  ByteCntDelayed;
+
+
+
+assign ResetByteCounter = MRxDV & (StateSFD & MRxDEqD | StateData[0] & ByteCntMaxFrame);
+
+assign IncrementByteCounter = ~ResetByteCounter & MRxDV & 
+                              (StatePreamble | StateSFD | StateIdle & ~Transmitting |
+                               StateData[1] & ~ByteCntMax & ~(DlyCrcEn & |DlyCrcCnt)
+                              );
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ByteCnt[15:0] <= #Tp 16'h0;
+  else
+    begin
+      if(ResetByteCounter)
+        ByteCnt[15:0] <= #Tp 16'h0;
+      else
+      if(IncrementByteCounter)
+        ByteCnt[15:0] <= #Tp ByteCnt[15:0] + 1'b1;
+     end
+end
+
+assign ByteCntDelayed = ByteCnt + 3'h4;
+assign ByteCntOut = DlyCrcEn? ByteCntDelayed : ByteCnt;
+
+assign ByteCntEq0       = ByteCnt == 16'h0;
+assign ByteCntEq1       = ByteCnt == 16'h1;
+assign ByteCntEq2       = ByteCnt == 16'h2; 
+assign ByteCntEq3       = ByteCnt == 16'h3; 
+assign ByteCntEq4       = ByteCnt == 16'h4; 
+assign ByteCntEq5       = ByteCnt == 16'h5; 
+assign ByteCntEq6       = ByteCnt == 16'h6;
+assign ByteCntEq7       = ByteCnt == 16'h7;
+assign ByteCntGreat2    = ByteCnt >  16'h2;
+assign ByteCntSmall7    = ByteCnt <  16'h7;
+assign ByteCntMax       = ByteCnt == 16'hffff;
+assign ByteCntMaxFrame  = ByteCnt == MaxFL[15:0] & ~HugEn;
+
+
+assign ResetIFGCounter = StateSFD  &  MRxDV & MRxDEqD | StateDrop;
+
+assign IncrementIFGCounter = ~ResetIFGCounter & (StateDrop | StateIdle | StatePreamble | StateSFD) & ~IFGCounterEq24;
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    IFGCounter[4:0] <= #Tp 5'h0;
+  else
+    begin
+      if(ResetIFGCounter)
+        IFGCounter[4:0] <= #Tp 5'h0;
+      else
+      if(IncrementIFGCounter)
+        IFGCounter[4:0] <= #Tp IFGCounter[4:0] + 1'b1; 
+    end
+end
+
+
+
+assign IFGCounterEq24 = (IFGCounter[4:0] == 5'h18) | r_IFG; // 24*400 = 9600 ns or r_IFG is set to 1
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    DlyCrcCnt[3:0] <= #Tp 4'h0;
+  else
+    begin
+      if(DlyCrcCnt[3:0] == 4'h9)
+        DlyCrcCnt[3:0] <= #Tp 4'h0;
+      else
+      if(DlyCrcEn & StateSFD)
+        DlyCrcCnt[3:0] <= #Tp 4'h1;
+      else
+      if(DlyCrcEn & (|DlyCrcCnt[3:0]))
+        DlyCrcCnt[3:0] <= #Tp DlyCrcCnt[3:0] + 1'b1;
+    end
+end
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_txethmac.v
===================================================================
--- /trunk/OC-Ethernet/eth_txethmac.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_txethmac.v	(revision 6)
@@ -0,0 +1,492 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_txethmac.v                                              ////
+///                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.8  2003/01/30 13:33:24  mohor
+// When padding was enabled and crc disabled, frame was not ended correctly.
+//
+// Revision 1.7  2002/02/26 16:24:01  mohor
+// RetryCntLatched was unused and removed from design
+//
+// Revision 1.6  2002/02/22 12:56:35  mohor
+// Retry is not activated when a Tx Underrun occured
+//
+// Revision 1.5  2002/02/11 09:18:22  mohor
+// Tx status is written back to the BD.
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/19 18:16:40  mohor
+// TxClk changed to MTxClk (as discribed in the documentation).
+// Crc changed so only one file can be used instead of two.
+//
+// Revision 1.2  2001/06/19 10:38:08  mohor
+// Minor changes in header.
+//
+// Revision 1.1  2001/06/19 10:27:58  mohor
+// TxEthMAC initial release.
+//
+//
+//
+
+`include "timescale.v"
+
+
+module eth_txethmac (MTxClk, Reset, TxStartFrm, TxEndFrm, TxUnderRun, TxData, CarrierSense, 
+                     Collision, Pad, CrcEn, FullD, HugEn, DlyCrcEn, MinFL, MaxFL, IPGT, 
+                     IPGR1, IPGR2, CollValid, MaxRet, NoBckof, ExDfrEn, 
+                     MTxD, MTxEn, MTxErr, TxDone, TxRetry, TxAbort, TxUsedData, WillTransmit, 
+                     ResetCollision, RetryCnt, StartTxDone, StartTxAbort, MaxCollisionOccured,
+                     LateCollision, DeferIndication, StatePreamble, StateData
+
+                    );
+
+parameter Tp = 1;
+
+
+input MTxClk;                   // Transmit clock (from PHY)
+input Reset;                    // Reset
+input TxStartFrm;               // Transmit packet start frame
+input TxEndFrm;                 // Transmit packet end frame
+input TxUnderRun;               // Transmit packet under-run
+input [7:0] TxData;             // Transmit packet data byte
+input CarrierSense;             // Carrier sense (synchronized)
+input Collision;                // Collision (synchronized)
+input Pad;                      // Pad enable (from register)
+input CrcEn;                    // Crc enable (from register)
+input FullD;                    // Full duplex (from register)
+input HugEn;                    // Huge packets enable (from register)
+input DlyCrcEn;                 // Delayed Crc enabled (from register)
+input [15:0] MinFL;             // Minimum frame length (from register)
+input [15:0] MaxFL;             // Maximum frame length (from register)
+input [6:0] IPGT;               // Back to back transmit inter packet gap parameter (from register)
+input [6:0] IPGR1;              // Non back to back transmit inter packet gap parameter IPGR1 (from register)
+input [6:0] IPGR2;              // Non back to back transmit inter packet gap parameter IPGR2 (from register)
+input [5:0] CollValid;          // Valid collision window (from register)
+input [3:0] MaxRet;             // Maximum retry number (from register)
+input NoBckof;                  // No backoff (from register)
+input ExDfrEn;                  // Excessive defferal enable (from register)
+
+output [3:0] MTxD;              // Transmit nibble (to PHY)
+output MTxEn;                   // Transmit enable (to PHY)
+output MTxErr;                  // Transmit error (to PHY)
+output TxDone;                  // Transmit packet done (to RISC)
+output TxRetry;                 // Transmit packet retry (to RISC)
+output TxAbort;                 // Transmit packet abort (to RISC)
+output TxUsedData;              // Transmit packet used data (to RISC)
+output WillTransmit;            // Will transmit (to RxEthMAC)
+output ResetCollision;          // Reset Collision (for synchronizing collision)
+output [3:0] RetryCnt;          // Latched Retry Counter for tx status purposes
+output StartTxDone;
+output StartTxAbort;
+output MaxCollisionOccured;
+output LateCollision;
+output DeferIndication;
+output StatePreamble;
+output [1:0] StateData;
+
+reg [3:0] MTxD;
+reg MTxEn;
+reg MTxErr;
+reg TxDone;
+reg TxRetry;
+reg TxAbort;
+reg TxUsedData;
+reg WillTransmit;
+reg ColWindow;
+reg StopExcessiveDeferOccured;
+reg [3:0] RetryCnt;
+reg [3:0] MTxD_d;
+reg StatusLatch;
+reg PacketFinished_q;
+reg PacketFinished;
+
+
+wire ExcessiveDeferOccured;
+wire StartIPG;
+wire StartPreamble;
+wire [1:0] StartData;
+wire StartFCS;
+wire StartJam;
+wire StartDefer;
+wire StartBackoff;
+wire StateDefer;
+wire StateIPG;
+wire StateIdle;
+wire StatePAD;
+wire StateFCS;
+wire StateJam;
+wire StateJam_q;
+wire StateBackOff;
+wire StateSFD;
+wire StartTxRetry;
+wire UnderRun;
+wire TooBig;
+wire [31:0] Crc;
+wire CrcError;
+wire [2:0] DlyCrcCnt;
+wire [15:0] NibCnt;
+wire NibCntEq7;
+wire NibCntEq15;
+wire NibbleMinFl;
+wire ExcessiveDefer;
+wire [15:0] ByteCnt;
+wire MaxFrame;
+wire RetryMax;
+wire RandomEq0;
+wire RandomEqByteCnt;
+wire PacketFinished_d;
+
+
+
+assign ResetCollision = ~(StatePreamble | (|StateData) | StatePAD | StateFCS);
+
+assign ExcessiveDeferOccured = TxStartFrm & StateDefer & ExcessiveDefer & ~StopExcessiveDeferOccured;
+
+assign StartTxDone = ~Collision & (StateFCS & NibCntEq7 | StateData[1] & TxEndFrm & (~Pad | Pad & NibbleMinFl) & ~CrcEn);
+
+assign UnderRun = StateData[0] & TxUnderRun & ~Collision;
+
+assign TooBig = ~Collision & MaxFrame & (StateData[0] & ~TxUnderRun | StateFCS);
+
+// assign StartTxRetry = StartJam & (ColWindow & ~RetryMax);
+assign StartTxRetry = StartJam & (ColWindow & ~RetryMax) & ~UnderRun;
+
+assign LateCollision = StartJam & ~ColWindow & ~UnderRun;
+
+assign MaxCollisionOccured = StartJam & ColWindow & RetryMax;
+
+assign StateSFD = StatePreamble & NibCntEq15;
+
+assign StartTxAbort = TooBig | UnderRun | ExcessiveDeferOccured | LateCollision | MaxCollisionOccured;
+
+
+// StopExcessiveDeferOccured
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    StopExcessiveDeferOccured <= #Tp 1'b0;
+  else
+    begin
+      if(~TxStartFrm)
+        StopExcessiveDeferOccured <= #Tp 1'b0;
+      else
+      if(ExcessiveDeferOccured)
+        StopExcessiveDeferOccured <= #Tp 1'b1;
+    end
+end
+
+
+// Collision Window
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ColWindow <= #Tp 1'b1;
+  else
+    begin  
+      if(~Collision & ByteCnt[5:0] == CollValid[5:0] & (StateData[1] | StatePAD & NibCnt[0] | StateFCS & NibCnt[0]))
+        ColWindow <= #Tp 1'b0;
+      else
+      if(StateIdle | StateIPG)
+        ColWindow <= #Tp 1'b1;
+    end
+end
+
+
+// Start Window
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    StatusLatch <= #Tp 1'b0;
+  else
+    begin
+      if(~TxStartFrm)
+        StatusLatch <= #Tp 1'b0;
+      else
+      if(ExcessiveDeferOccured | StateIdle)
+        StatusLatch <= #Tp 1'b1;
+     end
+end
+
+
+// Transmit packet used data
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxUsedData <= #Tp 1'b0;
+  else
+    TxUsedData <= #Tp |StartData;
+end
+
+
+// Transmit packet done
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxDone <= #Tp 1'b0;
+  else
+    begin
+      if(TxStartFrm & ~StatusLatch)
+        TxDone <= #Tp 1'b0;
+      else
+      if(StartTxDone)
+        TxDone <= #Tp 1'b1;
+    end
+end
+
+
+// Transmit packet retry
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxRetry <= #Tp 1'b0;
+  else
+    begin
+      if(TxStartFrm & ~StatusLatch)
+        TxRetry <= #Tp 1'b0;
+      else
+      if(StartTxRetry)
+        TxRetry <= #Tp 1'b1;
+     end
+end                                    
+
+
+// Transmit packet abort
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxAbort <= #Tp 1'b0;
+  else
+    begin
+      if(TxStartFrm & ~StatusLatch & ~ExcessiveDeferOccured)
+        TxAbort <= #Tp 1'b0;
+      else
+      if(StartTxAbort)
+        TxAbort <= #Tp 1'b1;
+    end
+end
+
+
+// Retry counter
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    RetryCnt[3:0] <= #Tp 4'h0;
+  else
+    begin
+      if(ExcessiveDeferOccured | UnderRun | TooBig | StartTxDone | TxUnderRun 
+          | StateJam & NibCntEq7 & (~ColWindow | RetryMax))
+        RetryCnt[3:0] <= #Tp 4'h0;
+      else
+      if(StateJam & NibCntEq7 & ColWindow & (RandomEq0 | NoBckof) | StateBackOff & RandomEqByteCnt)
+        RetryCnt[3:0] <= #Tp RetryCnt[3:0] + 1'b1;
+    end
+end
+
+
+assign RetryMax = RetryCnt[3:0] == MaxRet[3:0];
+
+
+// Transmit nibble
+always @ (StatePreamble or StateData or StateData or StateFCS or StateJam or StateSFD or TxData or 
+          Crc or NibCntEq15)
+begin
+  if(StateData[0])
+    MTxD_d[3:0] = TxData[3:0];                                  // Lower nibble
+  else
+  if(StateData[1])
+    MTxD_d[3:0] = TxData[7:4];                                  // Higher nibble
+  else
+  if(StateFCS)
+    MTxD_d[3:0] = {~Crc[28], ~Crc[29], ~Crc[30], ~Crc[31]};     // Crc
+  else
+  if(StateJam)
+    MTxD_d[3:0] = 4'h9;                                         // Jam pattern
+  else
+  if(StatePreamble)
+    if(NibCntEq15)
+      MTxD_d[3:0] = 4'hd;                                       // SFD
+    else
+      MTxD_d[3:0] = 4'h5;                                       // Preamble
+  else
+    MTxD_d[3:0] = 4'h0;
+end
+
+
+// Transmit Enable
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    MTxEn <= #Tp 1'b0;
+  else
+    MTxEn <= #Tp StatePreamble | (|StateData) | StatePAD | StateFCS | StateJam;
+end
+
+
+// Transmit nibble
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    MTxD[3:0] <= #Tp 4'h0;
+  else
+    MTxD[3:0] <= #Tp MTxD_d[3:0];
+end
+
+
+// Transmit error
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    MTxErr <= #Tp 1'b0;
+  else
+    MTxErr <= #Tp TooBig | UnderRun;
+end
+
+
+// WillTransmit
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    WillTransmit <= #Tp  1'b0;
+  else
+    WillTransmit <= #Tp StartPreamble | StatePreamble | (|StateData) | StatePAD | StateFCS | StateJam;
+end
+
+
+assign PacketFinished_d = StartTxDone | TooBig | UnderRun | LateCollision | MaxCollisionOccured | ExcessiveDeferOccured;
+
+
+// Packet finished
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      PacketFinished <= #Tp 1'b0;
+      PacketFinished_q  <= #Tp 1'b0;
+    end
+  else
+    begin
+      PacketFinished <= #Tp PacketFinished_d;
+      PacketFinished_q  <= #Tp PacketFinished;
+    end
+end
+
+
+// Connecting module Counters
+eth_txcounters txcounters1 (.StatePreamble(StatePreamble), .StateIPG(StateIPG), .StateData(StateData), 
+                            .StatePAD(StatePAD), .StateFCS(StateFCS), .StateJam(StateJam), .StateBackOff(StateBackOff), 
+                            .StateDefer(StateDefer), .StateIdle(StateIdle), .StartDefer(StartDefer), .StartIPG(StartIPG), 
+                            .StartFCS(StartFCS), .StartJam(StartJam), .TxStartFrm(TxStartFrm), .MTxClk(MTxClk), 
+                            .Reset(Reset), .MinFL(MinFL), .MaxFL(MaxFL), .HugEn(HugEn), .ExDfrEn(ExDfrEn), 
+                            .PacketFinished_q(PacketFinished_q), .DlyCrcEn(DlyCrcEn), .StartBackoff(StartBackoff), 
+                            .StateSFD(StateSFD), .ByteCnt(ByteCnt), .NibCnt(NibCnt), .ExcessiveDefer(ExcessiveDefer), 
+                            .NibCntEq7(NibCntEq7), .NibCntEq15(NibCntEq15), .MaxFrame(MaxFrame), .NibbleMinFl(NibbleMinFl), 
+                            .DlyCrcCnt(DlyCrcCnt)
+                           );
+
+
+// Connecting module StateM
+eth_txstatem txstatem1 (.MTxClk(MTxClk), .Reset(Reset), .ExcessiveDefer(ExcessiveDefer), .CarrierSense(CarrierSense), 
+                        .NibCnt(NibCnt[6:0]), .IPGT(IPGT), .IPGR1(IPGR1), .IPGR2(IPGR2), .FullD(FullD), 
+                        .TxStartFrm(TxStartFrm), .TxEndFrm(TxEndFrm), .TxUnderRun(TxUnderRun), .Collision(Collision), 
+                        .UnderRun(UnderRun), .StartTxDone(StartTxDone), .TooBig(TooBig), .NibCntEq7(NibCntEq7), 
+                        .NibCntEq15(NibCntEq15), .MaxFrame(MaxFrame), .Pad(Pad), .CrcEn(CrcEn), 
+                        .NibbleMinFl(NibbleMinFl), .RandomEq0(RandomEq0), .ColWindow(ColWindow), .RetryMax(RetryMax), 
+                        .NoBckof(NoBckof), .RandomEqByteCnt(RandomEqByteCnt), .StateIdle(StateIdle), 
+                        .StateIPG(StateIPG), .StatePreamble(StatePreamble), .StateData(StateData), .StatePAD(StatePAD), 
+                        .StateFCS(StateFCS), .StateJam(StateJam), .StateJam_q(StateJam_q), .StateBackOff(StateBackOff), 
+                        .StateDefer(StateDefer), .StartFCS(StartFCS), .StartJam(StartJam), .StartBackoff(StartBackoff), 
+                        .StartDefer(StartDefer), .DeferIndication(DeferIndication), .StartPreamble(StartPreamble), .StartData(StartData), .StartIPG(StartIPG)
+                       );
+
+
+wire Enable_Crc;
+wire [3:0] Data_Crc;
+wire Initialize_Crc;
+
+assign Enable_Crc = ~StateFCS;
+
+assign Data_Crc[0] = StateData[0]? TxData[3] : StateData[1]? TxData[7] : 1'b0;
+assign Data_Crc[1] = StateData[0]? TxData[2] : StateData[1]? TxData[6] : 1'b0;
+assign Data_Crc[2] = StateData[0]? TxData[1] : StateData[1]? TxData[5] : 1'b0;
+assign Data_Crc[3] = StateData[0]? TxData[0] : StateData[1]? TxData[4] : 1'b0;
+
+assign Initialize_Crc = StateIdle | StatePreamble | (|DlyCrcCnt);
+
+
+// Connecting module Crc
+eth_crc txcrc (.Clk(MTxClk), .Reset(Reset), .Data(Data_Crc), .Enable(Enable_Crc), .Initialize(Initialize_Crc), 
+               .Crc(Crc), .CrcError(CrcError)
+              );
+
+
+// Connecting module Random
+eth_random random1 (.MTxClk(MTxClk), .Reset(Reset), .StateJam(StateJam), .StateJam_q(StateJam_q), .RetryCnt(RetryCnt), 
+                    .NibCnt(NibCnt), .ByteCnt(ByteCnt[9:0]), .RandomEq0(RandomEq0), .RandomEqByteCnt(RandomEqByteCnt));
+
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_defines.v
===================================================================
--- /trunk/OC-Ethernet/eth_defines.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_defines.v	(revision 6)
@@ -0,0 +1,345 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_defines.v                                               ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is available in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.33  2003/11/12 18:24:58  tadejm
+// WISHBONE slave changed and tested from only 32-bit accesss to byte access.
+//
+// Revision 1.32  2003/10/17 07:46:13  markom
+// mbist signals updated according to newest convention
+//
+// Revision 1.31  2003/08/14 16:42:58  simons
+// Artisan ram instance added.
+//
+// Revision 1.30  2003/06/13 11:55:37  mohor
+// Define file in eth_cop.v is changed to eth_defines.v. Some defines were
+// moved from tb_eth_defines.v to eth_defines.v.
+//
+// Revision 1.29  2002/11/19 18:13:49  mohor
+// r_MiiMRst is not used for resetting the MIIM module. wb_rst used instead.
+//
+// Revision 1.28  2002/11/15 14:27:15  mohor
+// Since r_Rst bit is not used any more, default value is changed to 0xa000.
+//
+// Revision 1.27  2002/11/01 18:19:34  mohor
+// Defines fixed to use generic RAM by default.
+//
+// Revision 1.26  2002/10/24 18:53:03  mohor
+// fpga define added.
+//
+// Revision 1.3  2002/10/11 16:57:54  igorm
+// eth_defines.v tagged with rel_5 used.
+//
+// Revision 1.25  2002/10/10 16:47:44  mohor
+// Defines changed to have ETH_ prolog.
+// ETH_WISHBONE_B# define added.
+//
+// Revision 1.24  2002/10/10 16:33:11  mohor
+// Bist added.
+//
+// Revision 1.23  2002/09/23 18:22:48  mohor
+// Virtual Silicon RAM might be used in the ASIC implementation of the ethernet
+// core.
+//
+// Revision 1.22  2002/09/04 18:36:49  mohor
+// Defines for control registers added (ETH_TXCTRL and ETH_RXCTRL).
+//
+// Revision 1.21  2002/08/16 22:09:47  mohor
+// Defines for register width added. mii_rst signal in MIIMODER register
+// changed.
+//
+// Revision 1.20  2002/08/14 19:31:48  mohor
+// Register TX_BD_NUM is changed so it contains value of the Tx buffer descriptors. No
+// need to multiply or devide any more.
+//
+// Revision 1.19  2002/07/23 15:28:31  mohor
+// Ram , used for BDs changed from generic_spram to eth_spram_256x32.
+//
+// Revision 1.18  2002/05/03 10:15:50  mohor
+// Outputs registered. Reset changed for eth_wishbone module.
+//
+// Revision 1.17  2002/04/24 08:52:19  mohor
+// Compiler directives added. Tx and Rx fifo size incremented. A "late collision"
+// bug fixed.
+//
+// Revision 1.16  2002/03/19 12:53:29  mohor
+// Some defines that are used in testbench only were moved to tb_eth_defines.v
+// file.
+//
+// Revision 1.15  2002/02/26 16:11:32  mohor
+// Number of interrupts changed
+//
+// Revision 1.14  2002/02/16 14:03:44  mohor
+// Registered trimmed. Unused registers removed.
+//
+// Revision 1.13  2002/02/16 13:06:33  mohor
+// EXTERNAL_DMA used instead of WISHBONE_DMA.
+//
+// Revision 1.12  2002/02/15 10:58:31  mohor
+// Changed that were lost with last update put back to the file.
+//
+// Revision 1.11  2002/02/14 20:19:41  billditt
+// Modified for Address Checking,
+// addition of eth_addrcheck.v
+//
+// Revision 1.10  2002/02/12 17:01:19  mohor
+// HASH0 and HASH1 registers added. 
+
+// Revision 1.9  2002/02/08 16:21:54  mohor
+// Rx status is written back to the BD.
+//
+// Revision 1.8  2002/02/05 16:44:38  mohor
+// Both rx and tx part are finished. Tested with wb_clk_i between 10 and 200
+// MHz. Statuses, overrun, control frame transmission and reception still  need
+// to be fixed.
+//
+// Revision 1.7  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.6  2001/12/05 15:00:16  mohor
+// RX_BD_NUM changed to TX_BD_NUM (holds number of TX descriptors
+// instead of the number of RX descriptors).
+//
+// Revision 1.5  2001/12/05 10:21:37  mohor
+// ETH_RX_BD_ADR register deleted. ETH_RX_BD_NUM is used instead.
+//
+// Revision 1.4  2001/11/13 14:23:56  mohor
+// Generic memory model is used. Defines are changed for the same reason.
+//
+// Revision 1.3  2001/10/18 12:07:11  mohor
+// Status signals changed, Adress decoding changed, interrupt controller
+// added.
+//
+// Revision 1.2  2001/09/24 15:02:56  mohor
+// Defines changed (All precede with ETH_). Small changes because some
+// tools generate warnings when two operands are together. Synchronization
+// between two clocks domains in eth_wishbonedma.v is changed (due to ASIC
+// demands).
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+//
+//
+//
+//
+
+
+
+//`define ETH_BIST                    // Bist for usage with Virtual Silicon RAMS
+
+`define ETH_MBIST_CTRL_WIDTH 3        // width of MBIST control bus
+
+// Ethernet implemented in Xilinx Chips (uncomment following lines)
+// `define ETH_FIFO_XILINX             // Use Xilinx distributed ram for tx and rx fifo
+// `define ETH_XILINX_RAMB4            // Selection of the used memory for Buffer descriptors
+                                      // Core is going to be implemented in Virtex FPGA and contains Virtex 
+                                      // specific elements. 
+
+// Ethernet implemented in Altera Chips (uncomment following lines)
+//`define ETH_ALTERA_ALTSYNCRAM
+
+// Ethernet implemented in ASIC with Virtual Silicon RAMs
+// `define ETH_VIRTUAL_SILICON_RAM     // Virtual Silicon RAMS used storing buffer decriptors (ASIC implementation)
+
+// Ethernet implemented in ASIC with Artisan RAMs
+// `define ETH_ARTISAN_RAM             // Artisan RAMS used storing buffer decriptors (ASIC implementation)
+
+// Uncomment when Avalon bus is used
+//`define ETH_AVALON_BUS
+
+`define ETH_MODER_ADR         8'h0    // 0x0 
+`define ETH_INT_SOURCE_ADR    8'h1    // 0x4 
+`define ETH_INT_MASK_ADR      8'h2    // 0x8 
+`define ETH_IPGT_ADR          8'h3    // 0xC 
+`define ETH_IPGR1_ADR         8'h4    // 0x10
+`define ETH_IPGR2_ADR         8'h5    // 0x14
+`define ETH_PACKETLEN_ADR     8'h6    // 0x18
+`define ETH_COLLCONF_ADR      8'h7    // 0x1C
+`define ETH_TX_BD_NUM_ADR     8'h8    // 0x20
+`define ETH_CTRLMODER_ADR     8'h9    // 0x24
+`define ETH_MIIMODER_ADR      8'hA    // 0x28
+`define ETH_MIICOMMAND_ADR    8'hB    // 0x2C
+`define ETH_MIIADDRESS_ADR    8'hC    // 0x30
+`define ETH_MIITX_DATA_ADR    8'hD    // 0x34
+`define ETH_MIIRX_DATA_ADR    8'hE    // 0x38
+`define ETH_MIISTATUS_ADR     8'hF    // 0x3C
+`define ETH_MAC_ADDR0_ADR     8'h10   // 0x40
+`define ETH_MAC_ADDR1_ADR     8'h11   // 0x44
+`define ETH_HASH0_ADR         8'h12   // 0x48
+`define ETH_HASH1_ADR         8'h13   // 0x4C
+`define ETH_TX_CTRL_ADR       8'h14   // 0x50
+`define ETH_RX_CTRL_ADR       8'h15   // 0x54
+
+
+`define ETH_MODER_DEF_0         8'h00
+`define ETH_MODER_DEF_1         8'hA0
+`define ETH_MODER_DEF_2         1'h0
+`define ETH_INT_MASK_DEF_0      7'h0
+`define ETH_IPGT_DEF_0          7'h12
+`define ETH_IPGR1_DEF_0         7'h0C
+`define ETH_IPGR2_DEF_0         7'h12
+`define ETH_PACKETLEN_DEF_0     8'h00
+`define ETH_PACKETLEN_DEF_1     8'h06
+`define ETH_PACKETLEN_DEF_2     8'h40
+`define ETH_PACKETLEN_DEF_3     8'h00
+`define ETH_COLLCONF_DEF_0      6'h3f
+`define ETH_COLLCONF_DEF_2      4'hF
+`define ETH_TX_BD_NUM_DEF_0     8'h40
+`define ETH_CTRLMODER_DEF_0     3'h0
+`define ETH_MIIMODER_DEF_0      8'h64
+`define ETH_MIIMODER_DEF_1      1'h0
+`define ETH_MIIADDRESS_DEF_0    5'h00
+`define ETH_MIIADDRESS_DEF_1    5'h00
+`define ETH_MIITX_DATA_DEF_0    8'h00
+`define ETH_MIITX_DATA_DEF_1    8'h00
+`define ETH_MIIRX_DATA_DEF     16'h0000 // not written from WB
+`define ETH_MAC_ADDR0_DEF_0     8'h00
+`define ETH_MAC_ADDR0_DEF_1     8'h00
+`define ETH_MAC_ADDR0_DEF_2     8'h00
+`define ETH_MAC_ADDR0_DEF_3     8'h00
+`define ETH_MAC_ADDR1_DEF_0     8'h00
+`define ETH_MAC_ADDR1_DEF_1     8'h00
+`define ETH_HASH0_DEF_0         8'h00
+`define ETH_HASH0_DEF_1         8'h00
+`define ETH_HASH0_DEF_2         8'h00
+`define ETH_HASH0_DEF_3         8'h00
+`define ETH_HASH1_DEF_0         8'h00
+`define ETH_HASH1_DEF_1         8'h00
+`define ETH_HASH1_DEF_2         8'h00
+`define ETH_HASH1_DEF_3         8'h00
+`define ETH_TX_CTRL_DEF_0       8'h00 //
+`define ETH_TX_CTRL_DEF_1       8'h00 //
+`define ETH_TX_CTRL_DEF_2       1'h0  //
+`define ETH_RX_CTRL_DEF_0       8'h00
+`define ETH_RX_CTRL_DEF_1       8'h00
+
+
+`define ETH_MODER_WIDTH_0       8
+`define ETH_MODER_WIDTH_1       8
+`define ETH_MODER_WIDTH_2       1
+`define ETH_INT_SOURCE_WIDTH_0  7
+`define ETH_INT_MASK_WIDTH_0    7
+`define ETH_IPGT_WIDTH_0        7
+`define ETH_IPGR1_WIDTH_0       7
+`define ETH_IPGR2_WIDTH_0       7
+`define ETH_PACKETLEN_WIDTH_0   8
+`define ETH_PACKETLEN_WIDTH_1   8
+`define ETH_PACKETLEN_WIDTH_2   8
+`define ETH_PACKETLEN_WIDTH_3   8
+`define ETH_COLLCONF_WIDTH_0    6
+`define ETH_COLLCONF_WIDTH_2    4
+`define ETH_TX_BD_NUM_WIDTH_0   8
+`define ETH_CTRLMODER_WIDTH_0   3
+`define ETH_MIIMODER_WIDTH_0    8
+`define ETH_MIIMODER_WIDTH_1    1
+`define ETH_MIICOMMAND_WIDTH_0  3
+`define ETH_MIIADDRESS_WIDTH_0  5
+`define ETH_MIIADDRESS_WIDTH_1  5
+`define ETH_MIITX_DATA_WIDTH_0  8
+`define ETH_MIITX_DATA_WIDTH_1  8
+`define ETH_MIIRX_DATA_WIDTH    16 // not written from WB
+`define ETH_MIISTATUS_WIDTH     3 // not written from WB
+`define ETH_MAC_ADDR0_WIDTH_0   8
+`define ETH_MAC_ADDR0_WIDTH_1   8
+`define ETH_MAC_ADDR0_WIDTH_2   8
+`define ETH_MAC_ADDR0_WIDTH_3   8
+`define ETH_MAC_ADDR1_WIDTH_0   8
+`define ETH_MAC_ADDR1_WIDTH_1   8
+`define ETH_HASH0_WIDTH_0       8
+`define ETH_HASH0_WIDTH_1       8
+`define ETH_HASH0_WIDTH_2       8
+`define ETH_HASH0_WIDTH_3       8
+`define ETH_HASH1_WIDTH_0       8
+`define ETH_HASH1_WIDTH_1       8
+`define ETH_HASH1_WIDTH_2       8
+`define ETH_HASH1_WIDTH_3       8
+`define ETH_TX_CTRL_WIDTH_0     8
+`define ETH_TX_CTRL_WIDTH_1     8
+`define ETH_TX_CTRL_WIDTH_2     1
+`define ETH_RX_CTRL_WIDTH_0     8
+`define ETH_RX_CTRL_WIDTH_1     8
+
+
+// Outputs are registered (uncomment when needed)
+`define ETH_REGISTERED_OUTPUTS
+
+// Settings for TX FIFO
+`define ETH_TX_FIFO_CNT_WIDTH  5
+`define ETH_TX_FIFO_DEPTH      16
+`define ETH_TX_FIFO_DATA_WIDTH 32
+
+// Settings for RX FIFO
+`define ETH_RX_FIFO_CNT_WIDTH  5
+`define ETH_RX_FIFO_DEPTH      16
+`define ETH_RX_FIFO_DATA_WIDTH 32
+
+// Burst length
+`define ETH_BURST_LENGTH       4    // Change also ETH_BURST_CNT_WIDTH
+`define ETH_BURST_CNT_WIDTH    3    // The counter must be width enough to count to ETH_BURST_LENGTH
+
+// WISHBONE interface is Revision B3 compliant (uncomment when needed)
+//`define ETH_WISHBONE_B3
+
+
+// Following defines are needed when eth_cop.v is used. Otherwise they may be deleted.
+`define ETH_BASE              32'hd0000000
+`define ETH_WIDTH             32'h800
+`define MEMORY_BASE           32'h2000
+`define MEMORY_WIDTH          32'h10000
+
+`define M1_ADDRESSED_S1 ( (m1_wb_adr_i >= `ETH_BASE)    & (m1_wb_adr_i < (`ETH_BASE    + `ETH_WIDTH   )) )
+`define M1_ADDRESSED_S2 ( (m1_wb_adr_i >= `MEMORY_BASE) & (m1_wb_adr_i < (`MEMORY_BASE + `MEMORY_WIDTH)) )
+`define M2_ADDRESSED_S1 ( (m2_wb_adr_i >= `ETH_BASE)    & (m2_wb_adr_i < (`ETH_BASE    + `ETH_WIDTH   )) )
+`define M2_ADDRESSED_S2 ( (m2_wb_adr_i >= `MEMORY_BASE) & (m2_wb_adr_i < (`MEMORY_BASE + `MEMORY_WIDTH)) )
+// Previous defines are only needed for eth_cop.v
+
Index: /trunk/OC-Ethernet/eth_wishbone.v
===================================================================
--- /trunk/OC-Ethernet/eth_wishbone.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_wishbone.v	(revision 6)
@@ -0,0 +1,2556 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_wishbone.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is available in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.57  2005/02/21 11:35:33  igorm
+// Defer indication fixed.
+//
+// Revision 1.56  2004/04/30 10:30:00  igorm
+// Accidently deleted line put back.
+//
+// Revision 1.55  2004/04/26 15:26:23  igorm
+// - Bug connected to the TX_BD_NUM_Wr signal fixed (bug came in with the
+//   previous update of the core.
+// - TxBDAddress is set to 0 after the TX is enabled in the MODER register.
+// - RxBDAddress is set to r_TxBDNum<<1 after the RX is enabled in the MODER
+//   register. (thanks to Mathias and Torbjorn)
+// - Multicast reception was fixed. Thanks to Ulrich Gries
+//
+// Revision 1.54  2003/11/12 18:24:59  tadejm
+// WISHBONE slave changed and tested from only 32-bit accesss to byte access.
+//
+// Revision 1.53  2003/10/17 07:46:17  markom
+// mbist signals updated according to newest convention
+//
+// Revision 1.52  2003/01/30 14:51:31  mohor
+// Reset has priority in some flipflops.
+//
+// Revision 1.51  2003/01/30 13:36:22  mohor
+// A new bug (entered with previous update) fixed. When abort occured sometimes
+// data transmission was blocked.
+//
+// Revision 1.50  2003/01/22 13:49:26  tadejm
+// When control packets were received, they were ignored in some cases.
+//
+// Revision 1.49  2003/01/21 12:09:40  mohor
+// When receiving normal data frame and RxFlow control was switched on, RXB
+// interrupt was not set.
+//
+// Revision 1.48  2003/01/20 12:05:26  mohor
+// When in full duplex, transmit was sometimes blocked. Fixed.
+//
+// Revision 1.47  2002/11/22 13:26:21  mohor
+// Registers RxStatusWrite_rck and RxStatusWriteLatched were not used
+// anywhere. Removed.
+//
+// Revision 1.46  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.45  2002/11/19 17:33:34  mohor
+// AddressMiss status is connecting to the Rx BD. AddressMiss is identifying
+// that a frame was received because of the promiscous mode.
+//
+// Revision 1.44  2002/11/13 22:21:40  tadejm
+// RxError is not generated when small frame reception is enabled and small
+// frames are received.
+//
+// Revision 1.43  2002/10/18 20:53:34  mohor
+// case changed to casex.
+//
+// Revision 1.42  2002/10/18 17:04:20  tadejm
+// Changed BIST scan signals.
+//
+// Revision 1.41  2002/10/18 15:42:09  tadejm
+// Igor added WB burst support and repaired BUG when handling TX under-run and retry.
+//
+// Revision 1.40  2002/10/14 16:07:02  mohor
+// TxStatus is written after last access to the TX fifo is finished (in case of abort
+// or retry). TxDone is fixed.
+//
+// Revision 1.39  2002/10/11 15:35:20  mohor
+// txfifo_cnt and rxfifo_cnt counters width is defined in the eth_define.v file,
+// TxDone and TxRetry are generated after the current WISHBONE access is
+// finished.
+//
+// Revision 1.38  2002/10/10 16:29:30  mohor
+// BIST added.
+//
+// Revision 1.37  2002/09/11 14:18:46  mohor
+// Sometimes both RxB_IRQ and RxE_IRQ were activated. Bug fixed.
+//
+// Revision 1.36  2002/09/10 13:48:46  mohor
+// Reception is possible after RxPointer is read and not after BD is read. For
+// that reason RxBDReady is changed to RxReady.
+// Busy_IRQ interrupt connected. When there is no RxBD ready and frame
+// comes, interrupt is generated.
+//
+// Revision 1.35  2002/09/10 10:35:23  mohor
+// Ethernet debug registers removed.
+//
+// Revision 1.34  2002/09/08 16:31:49  mohor
+// Async reset for WB_ACK_O removed (when core was in reset, it was
+// impossible to access BDs).
+// RxPointers and TxPointers names changed to be more descriptive.
+// TxUnderRun synchronized.
+//
+// Revision 1.33  2002/09/04 18:47:57  mohor
+// Debug registers reg1, 2, 3, 4 connected. Synchronization of many signals
+// changed (bugs fixed). Access to un-alligned buffers fixed. RxAbort signal
+// was not used OK.
+//
+// Revision 1.32  2002/08/14 19:31:48  mohor
+// Register TX_BD_NUM is changed so it contains value of the Tx buffer descriptors. No
+// need to multiply or devide any more.
+//
+// Revision 1.31  2002/07/25 18:29:01  mohor
+// WriteRxDataToMemory signal changed so end of frame (when last word is
+// written to fifo) is changed.
+//
+// Revision 1.30  2002/07/23 15:28:31  mohor
+// Ram , used for BDs changed from generic_spram to eth_spram_256x32.
+//
+// Revision 1.29  2002/07/20 00:41:32  mohor
+// ShiftEnded synchronization changed.
+//
+// Revision 1.28  2002/07/18 16:11:46  mohor
+// RxBDAddress takes `ETH_TX_BD_NUM_DEF value after reset.
+//
+// Revision 1.27  2002/07/11 02:53:20  mohor
+// RxPointer bug fixed.
+//
+// Revision 1.26  2002/07/10 13:12:38  mohor
+// Previous bug wasn't succesfully removed. Now fixed.
+//
+// Revision 1.25  2002/07/09 23:53:24  mohor
+// Master state machine had a bug when switching from master write to
+// master read.
+//
+// Revision 1.24  2002/07/09 20:44:41  mohor
+// m_wb_cyc_o signal released after every single transfer.
+//
+// Revision 1.23  2002/05/03 10:15:50  mohor
+// Outputs registered. Reset changed for eth_wishbone module.
+//
+// Revision 1.22  2002/04/24 08:52:19  mohor
+// Compiler directives added. Tx and Rx fifo size incremented. A "late collision"
+// bug fixed.
+//
+// Revision 1.21  2002/03/29 16:18:11  lampret
+// Small typo fixed.
+//
+// Revision 1.20  2002/03/25 16:19:12  mohor
+// Any address can be used for Tx and Rx BD pointers. Address does not need
+// to be aligned.
+//
+// Revision 1.19  2002/03/19 12:51:50  mohor
+// Comments in Slovene language removed.
+//
+// Revision 1.18  2002/03/19 12:46:52  mohor
+// casex changed with case, fifo reset changed.
+//
+// Revision 1.17  2002/03/09 16:08:45  mohor
+// rx_fifo was not always cleared ok. Fixed.
+//
+// Revision 1.16  2002/03/09 13:51:20  mohor
+// Status was not latched correctly sometimes. Fixed.
+//
+// Revision 1.15  2002/03/08 06:56:46  mohor
+// Big Endian problem when sending frames fixed.
+//
+// Revision 1.14  2002/03/02 19:12:40  mohor
+// Byte ordering changed (Big Endian used). casex changed with case because
+// Xilinx Foundation had problems. Tested in HW. It WORKS.
+//
+// Revision 1.13  2002/02/26 16:59:55  mohor
+// Small fixes for external/internal DMA missmatches.
+//
+// Revision 1.12  2002/02/26 16:22:07  mohor
+// Interrupts changed
+//
+// Revision 1.11  2002/02/15 17:07:39  mohor
+// Status was not written correctly when frames were discarted because of
+// address mismatch.
+//
+// Revision 1.10  2002/02/15 12:17:39  mohor
+// RxStartFrm cleared when abort or retry comes.
+//
+// Revision 1.9  2002/02/15 11:59:10  mohor
+// Changes that were lost when updating from 1.5 to 1.8 fixed.
+//
+// Revision 1.8  2002/02/14 20:54:33  billditt
+// Addition  of new module eth_addrcheck.v
+//
+// Revision 1.7  2002/02/12 17:03:47  mohor
+// RxOverRun added to statuses.
+//
+// Revision 1.6  2002/02/11 09:18:22  mohor
+// Tx status is written back to the BD.
+//
+// Revision 1.5  2002/02/08 16:21:54  mohor
+// Rx status is written back to the BD.
+//
+// Revision 1.4  2002/02/06 14:10:21  mohor
+// non-DMA host interface added. Select the right configutation in eth_defines.
+//
+// Revision 1.3  2002/02/05 16:44:39  mohor
+// Both rx and tx part are finished. Tested with wb_clk_i between 10 and 200
+// MHz. Statuses, overrun, control frame transmission and reception still  need
+// to be fixed.
+//
+// Revision 1.2  2002/02/01 12:46:51  mohor
+// Tx part finished. TxStatus needs to be fixed. Pause request needs to be
+// added.
+//
+// Revision 1.1  2002/01/23 10:47:59  mohor
+// Initial version. Equals to eth_wishbonedma.v at this moment.
+//
+//
+//
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+
+module eth_wishbone
+   (
+
+    // WISHBONE common
+    WB_CLK_I, WB_DAT_I, WB_DAT_O, 
+
+    // WISHBONE slave
+ 		WB_ADR_I, WB_WE_I, WB_ACK_O, 
+    BDCs, 
+
+    Reset, 
+
+    // WISHBONE master
+    m_wb_adr_o, m_wb_sel_o, m_wb_we_o, 
+    m_wb_dat_o, m_wb_dat_i, m_wb_cyc_o, 
+    m_wb_stb_o, m_wb_ack_i, m_wb_err_i, 
+
+`ifdef ETH_WISHBONE_B3
+    m_wb_cti_o, m_wb_bte_o, 
+`endif
+
+    //TX
+    MTxClk, TxStartFrm, TxEndFrm, TxUsedData, TxData, 
+    TxRetry, TxAbort, TxUnderRun, TxDone, PerPacketCrcEn, 
+    PerPacketPad, 
+
+    //RX
+    MRxClk, RxData, RxValid, RxStartFrm, RxEndFrm, RxAbort, RxStatusWriteLatched_sync2, 
+    
+    // Register
+    r_TxEn, r_RxEn, r_TxBDNum, r_RxFlow, r_PassAll, 
+
+    // Interrupts
+    TxB_IRQ, TxE_IRQ, RxB_IRQ, RxE_IRQ, Busy_IRQ, 
+    
+    // Rx Status
+    InvalidSymbol, LatchedCrcError, RxLateCollision, ShortFrame, DribbleNibble,
+    ReceivedPacketTooBig, RxLength, LoadRxStatus, ReceivedPacketGood, AddressMiss, 
+    ReceivedPauseFrm, 
+    
+    // Tx Status
+    RetryCntLatched, RetryLimit, LateCollLatched, DeferLatched, RstDeferLatched, CarrierSenseLost
+
+    // Bist
+`ifdef ETH_BIST
+    ,
+    // debug chain signals
+    mbist_si_i,       // bist scan serial in
+    mbist_so_o,       // bist scan serial out
+    mbist_ctrl_i        // bist chain shift control
+`endif
+    
+
+
+		);
+
+
+parameter Tp = 1;
+
+
+// WISHBONE common
+input           WB_CLK_I;       // WISHBONE clock
+input  [31:0]   WB_DAT_I;       // WISHBONE data input
+output [31:0]   WB_DAT_O;       // WISHBONE data output
+
+// WISHBONE slave
+input   [9:2]   WB_ADR_I;       // WISHBONE address input
+input           WB_WE_I;        // WISHBONE write enable input
+input   [3:0]   BDCs;           // Buffer descriptors are selected
+output          WB_ACK_O;       // WISHBONE acknowledge output
+
+// WISHBONE master
+output  [29:0]  m_wb_adr_o;     // 
+output   [3:0]  m_wb_sel_o;     // 
+output          m_wb_we_o;      // 
+output  [31:0]  m_wb_dat_o;     // 
+output          m_wb_cyc_o;     // 
+output          m_wb_stb_o;     // 
+input   [31:0]  m_wb_dat_i;     // 
+input           m_wb_ack_i;     // 
+input           m_wb_err_i;     // 
+
+`ifdef ETH_WISHBONE_B3
+output   [2:0]  m_wb_cti_o;     // Cycle Type Identifier
+output   [1:0]  m_wb_bte_o;     // Burst Type Extension
+reg      [2:0]  m_wb_cti_o;     // Cycle Type Identifier
+`endif
+
+input           Reset;       // Reset signal
+
+// Rx Status signals
+input           InvalidSymbol;    // Invalid symbol was received during reception in 100 Mbps mode
+input           LatchedCrcError;  // CRC error
+input           RxLateCollision;  // Late collision occured while receiving frame
+input           ShortFrame;       // Frame shorter then the minimum size (r_MinFL) was received while small packets are enabled (r_RecSmall)
+input           DribbleNibble;    // Extra nibble received
+input           ReceivedPacketTooBig;// Received packet is bigger than r_MaxFL
+input    [15:0] RxLength;         // Length of the incoming frame
+input           LoadRxStatus;     // Rx status was loaded
+input           ReceivedPacketGood;// Received packet's length and CRC are good
+input           AddressMiss;      // When a packet is received AddressMiss status is written to the Rx BD
+input           r_RxFlow;
+input           r_PassAll;
+input           ReceivedPauseFrm;
+
+// Tx Status signals
+input     [3:0] RetryCntLatched;  // Latched Retry Counter
+input           RetryLimit;       // Retry limit reached (Retry Max value + 1 attempts were made)
+input           LateCollLatched;  // Late collision occured
+input           DeferLatched;     // Defer indication (Frame was defered before sucessfully sent)
+output          RstDeferLatched;
+input           CarrierSenseLost; // Carrier Sense was lost during the frame transmission
+
+// Tx
+input           MTxClk;         // Transmit clock (from PHY)
+input           TxUsedData;     // Transmit packet used data
+input           TxRetry;        // Transmit packet retry
+input           TxAbort;        // Transmit packet abort
+input           TxDone;         // Transmission ended
+output          TxStartFrm;     // Transmit packet start frame
+output          TxEndFrm;       // Transmit packet end frame
+output  [7:0]   TxData;         // Transmit packet data byte
+output          TxUnderRun;     // Transmit packet under-run
+output          PerPacketCrcEn; // Per packet crc enable
+output          PerPacketPad;   // Per packet pading
+
+// Rx
+input           MRxClk;         // Receive clock (from PHY)
+input   [7:0]   RxData;         // Received data byte (from PHY)
+input           RxValid;        // 
+input           RxStartFrm;     // 
+input           RxEndFrm;       // 
+input           RxAbort;        // This signal is set when address doesn't match.
+output          RxStatusWriteLatched_sync2;
+
+//Register
+input           r_TxEn;         // Transmit enable
+input           r_RxEn;         // Receive enable
+input   [7:0]   r_TxBDNum;      // Receive buffer descriptor number
+
+// Interrupts
+output TxB_IRQ;
+output TxE_IRQ;
+output RxB_IRQ;
+output RxE_IRQ;
+output Busy_IRQ;
+
+
+// Bist
+`ifdef ETH_BIST
+input   mbist_si_i;       // bist scan serial in
+output  mbist_so_o;       // bist scan serial out
+input [`ETH_MBIST_CTRL_WIDTH - 1:0] mbist_ctrl_i;       // bist chain shift control
+`endif
+
+reg TxB_IRQ;
+reg TxE_IRQ;
+reg RxB_IRQ;
+reg RxE_IRQ;
+
+reg             TxStartFrm;
+reg             TxEndFrm;
+reg     [7:0]   TxData;
+
+reg             TxUnderRun;
+reg             TxUnderRun_wb;
+
+reg             TxBDRead;
+wire            TxStatusWrite;
+
+reg     [1:0]   TxValidBytesLatched;
+
+reg    [15:0]   TxLength;
+reg    [15:0]   LatchedTxLength;
+reg   [14:11]   TxStatus;
+
+reg   [14:13]   RxStatus;
+
+reg             TxStartFrm_wb;
+reg             TxRetry_wb;
+reg             TxAbort_wb;
+reg             TxDone_wb;
+
+reg             TxDone_wb_q;
+reg             TxAbort_wb_q;
+reg             TxRetry_wb_q;
+reg             TxRetryPacket;
+reg             TxRetryPacket_NotCleared;
+reg             TxDonePacket;
+reg             TxDonePacket_NotCleared;
+reg             TxAbortPacket;
+reg             TxAbortPacket_NotCleared;
+reg             RxBDReady;
+reg             RxReady;
+reg             TxBDReady;
+
+reg             RxBDRead;
+
+reg    [31:0]   TxDataLatched;
+reg     [1:0]   TxByteCnt;
+reg             LastWord;
+reg             ReadTxDataFromFifo_tck;
+
+reg             BlockingTxStatusWrite;
+reg             BlockingTxBDRead;
+
+reg             Flop;
+
+reg     [7:1]   TxBDAddress;
+reg     [7:1]   RxBDAddress;
+
+reg             TxRetrySync1;
+reg             TxAbortSync1;
+reg             TxDoneSync1;
+
+reg             TxAbort_q;
+reg             TxRetry_q;
+reg             TxUsedData_q;
+
+reg    [31:0]   RxDataLatched2;
+
+reg    [31:8]   RxDataLatched1;     // Big Endian Byte Ordering
+
+reg     [1:0]   RxValidBytes;
+reg     [1:0]   RxByteCnt;
+reg             LastByteIn;
+reg             ShiftWillEnd;
+
+reg             WriteRxDataToFifo;
+reg    [15:0]   LatchedRxLength;
+reg             RxAbortLatched;
+
+reg             ShiftEnded;
+reg             RxOverrun;
+
+reg     [3:0]   BDWrite;                    // BD Write Enable for access from WISHBONE side
+reg             BDRead;                     // BD Read access from WISHBONE side
+wire   [31:0]   RxBDDataIn;                 // Rx BD data in
+wire   [31:0]   TxBDDataIn;                 // Tx BD data in
+
+reg             TxEndFrm_wb;
+
+wire            TxRetryPulse;
+wire            TxDonePulse;
+wire            TxAbortPulse;
+
+wire            StartRxBDRead;
+
+wire            StartTxBDRead;
+
+wire            TxIRQEn;
+wire            WrapTxStatusBit;
+
+wire            RxIRQEn;
+wire            WrapRxStatusBit;
+
+wire    [1:0]   TxValidBytes;
+
+wire    [7:1]   TempTxBDAddress;
+wire    [7:1]   TempRxBDAddress;
+
+wire            RxStatusWrite;
+wire            RxBufferFull;
+wire            RxBufferAlmostEmpty;
+wire            RxBufferEmpty;
+
+reg             WB_ACK_O;
+
+wire    [8:0]   RxStatusIn;
+reg     [8:0]   RxStatusInLatched;
+
+reg WbEn, WbEn_q;
+reg RxEn, RxEn_q;
+reg TxEn, TxEn_q;
+reg r_TxEn_q;
+reg r_RxEn_q;
+
+wire ram_ce;
+wire [3:0]  ram_we;
+wire ram_oe;
+reg [7:0]   ram_addr;
+reg [31:0]  ram_di;
+wire [31:0] ram_do;
+
+wire StartTxPointerRead;
+reg  TxPointerRead;
+reg TxEn_needed;
+reg RxEn_needed;
+
+wire StartRxPointerRead;
+reg RxPointerRead; 
+
+`ifdef ETH_WISHBONE_B3
+assign m_wb_bte_o = 2'b00;    // Linear burst
+`endif
+
+assign m_wb_stb_o = m_wb_cyc_o;
+
+always @ (posedge WB_CLK_I)
+begin
+  WB_ACK_O <=#Tp (|BDWrite) & WbEn & WbEn_q | BDRead & WbEn & ~WbEn_q;
+end
+
+assign WB_DAT_O = ram_do;
+
+// Generic synchronous single-port RAM interface
+eth_spram_256x32 bd_ram (
+	.clk(WB_CLK_I), .rst(Reset), .ce(ram_ce), .we(ram_we), .oe(ram_oe), .addr(ram_addr), .di(ram_di), .do(ram_do)
+`ifdef ETH_BIST
+  ,
+  .mbist_si_i       (mbist_si_i),
+  .mbist_so_o       (mbist_so_o),
+  .mbist_ctrl_i       (mbist_ctrl_i)
+`endif
+);
+
+assign ram_ce = 1'b1;
+assign ram_we = (BDWrite & {4{(WbEn & WbEn_q)}}) | {4{(TxStatusWrite | RxStatusWrite)}};
+assign ram_oe = BDRead & WbEn & WbEn_q | TxEn & TxEn_q & (TxBDRead | TxPointerRead) | RxEn & RxEn_q & (RxBDRead | RxPointerRead);
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxEn_needed <=#Tp 1'b0;
+  else
+  if(~TxBDReady & r_TxEn & WbEn & ~WbEn_q)
+    TxEn_needed <=#Tp 1'b1;
+  else
+  if(TxPointerRead & TxEn & TxEn_q)
+    TxEn_needed <=#Tp 1'b0;
+end
+
+// Enabling access to the RAM for three devices.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    begin
+      WbEn <=#Tp 1'b1;
+      RxEn <=#Tp 1'b0;
+      TxEn <=#Tp 1'b0;
+      ram_addr <=#Tp 8'h0;
+      ram_di <=#Tp 32'h0;
+      BDRead <=#Tp 1'b0;
+      BDWrite <=#Tp 1'b0;
+    end
+  else
+    begin
+      // Switching between three stages depends on enable signals
+      case ({WbEn_q, RxEn_q, TxEn_q, RxEn_needed, TxEn_needed})  // synopsys parallel_case
+        5'b100_10, 5'b100_11 :
+          begin
+            WbEn <=#Tp 1'b0;
+            RxEn <=#Tp 1'b1;  // wb access stage and r_RxEn is enabled
+            TxEn <=#Tp 1'b0;
+            ram_addr <=#Tp {RxBDAddress, RxPointerRead};
+            ram_di <=#Tp RxBDDataIn;
+          end
+        5'b100_01 :
+          begin
+            WbEn <=#Tp 1'b0;
+            RxEn <=#Tp 1'b0;
+            TxEn <=#Tp 1'b1;  // wb access stage, r_RxEn is disabled but r_TxEn is enabled
+            ram_addr <=#Tp {TxBDAddress, TxPointerRead};
+            ram_di <=#Tp TxBDDataIn;
+          end
+        5'b010_00, 5'b010_10 :
+          begin
+            WbEn <=#Tp 1'b1;  // RxEn access stage and r_TxEn is disabled
+            RxEn <=#Tp 1'b0;
+            TxEn <=#Tp 1'b0;
+            ram_addr <=#Tp WB_ADR_I[9:2];
+            ram_di <=#Tp WB_DAT_I;
+            BDWrite <=#Tp BDCs[3:0] & {4{WB_WE_I}};
+            BDRead <=#Tp (|BDCs) & ~WB_WE_I;
+          end
+        5'b010_01, 5'b010_11 :
+          begin
+            WbEn <=#Tp 1'b0;
+            RxEn <=#Tp 1'b0;
+            TxEn <=#Tp 1'b1;  // RxEn access stage and r_TxEn is enabled
+            ram_addr <=#Tp {TxBDAddress, TxPointerRead};
+            ram_di <=#Tp TxBDDataIn;
+          end
+        5'b001_00, 5'b001_01, 5'b001_10, 5'b001_11 :
+          begin
+            WbEn <=#Tp 1'b1;  // TxEn access stage (we always go to wb access stage)
+            RxEn <=#Tp 1'b0;
+            TxEn <=#Tp 1'b0;
+            ram_addr <=#Tp WB_ADR_I[9:2];
+            ram_di <=#Tp WB_DAT_I;
+            BDWrite <=#Tp BDCs[3:0] & {4{WB_WE_I}};
+            BDRead <=#Tp (|BDCs) & ~WB_WE_I;
+          end
+        5'b100_00 :
+          begin
+            WbEn <=#Tp 1'b0;  // WbEn access stage and there is no need for other stages. WbEn needs to be switched off for a bit
+          end
+        5'b000_00 :
+          begin
+            WbEn <=#Tp 1'b1;  // Idle state. We go to WbEn access stage.
+            RxEn <=#Tp 1'b0;
+            TxEn <=#Tp 1'b0;
+            ram_addr <=#Tp WB_ADR_I[9:2];
+            ram_di <=#Tp WB_DAT_I;
+            BDWrite <=#Tp BDCs[3:0] & {4{WB_WE_I}};
+            BDRead <=#Tp (|BDCs) & ~WB_WE_I;
+          end
+      endcase
+    end
+end
+
+
+// Delayed stage signals
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    begin
+      WbEn_q <=#Tp 1'b0;
+      RxEn_q <=#Tp 1'b0;
+      TxEn_q <=#Tp 1'b0;
+      r_TxEn_q <=#Tp 1'b0;
+      r_RxEn_q <=#Tp 1'b0;
+    end
+  else
+    begin
+      WbEn_q <=#Tp WbEn;
+      RxEn_q <=#Tp RxEn;
+      TxEn_q <=#Tp TxEn;
+      r_TxEn_q <=#Tp r_TxEn;
+      r_RxEn_q <=#Tp r_RxEn;
+    end
+end
+
+// Changes for tx occur every second clock. Flop is used for this manner.
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    Flop <=#Tp 1'b0;
+  else
+  if(TxDone | TxAbort | TxRetry_q)
+    Flop <=#Tp 1'b0;
+  else
+  if(TxUsedData)
+    Flop <=#Tp ~Flop;
+end
+
+wire ResetTxBDReady;
+assign ResetTxBDReady = TxDonePulse | TxAbortPulse | TxRetryPulse;
+
+// Latching READY status of the Tx buffer descriptor
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxBDReady <=#Tp 1'b0;
+  else
+  if(TxEn & TxEn_q & TxBDRead)
+    TxBDReady <=#Tp ram_do[15] & (ram_do[31:16] > 4); // TxBDReady is sampled only once at the beginning.
+  else                                                // Only packets larger then 4 bytes are transmitted.
+  if(ResetTxBDReady)
+    TxBDReady <=#Tp 1'b0;
+end
+
+
+// Reading the Tx buffer descriptor
+assign StartTxBDRead = (TxRetryPacket_NotCleared | TxStatusWrite) & ~BlockingTxBDRead & ~TxBDReady;
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxBDRead <=#Tp 1'b1;
+  else
+  if(StartTxBDRead)
+    TxBDRead <=#Tp 1'b1;
+  else
+  if(TxBDReady)
+    TxBDRead <=#Tp 1'b0;
+end
+
+
+// Reading Tx BD pointer
+assign StartTxPointerRead = TxBDRead & TxBDReady;
+
+// Reading Tx BD Pointer
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxPointerRead <=#Tp 1'b0;
+  else
+  if(StartTxPointerRead)
+    TxPointerRead <=#Tp 1'b1;
+  else
+  if(TxEn_q)
+    TxPointerRead <=#Tp 1'b0;
+end
+
+
+// Writing status back to the Tx buffer descriptor
+assign TxStatusWrite = (TxDonePacket_NotCleared | TxAbortPacket_NotCleared) & TxEn & TxEn_q & ~BlockingTxStatusWrite;
+
+
+
+// Status writing must occur only once. Meanwhile it is blocked.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    BlockingTxStatusWrite <=#Tp 1'b0;
+  else
+  if(~TxDone_wb & ~TxAbort_wb)
+    BlockingTxStatusWrite <=#Tp 1'b0;
+  else
+  if(TxStatusWrite)
+    BlockingTxStatusWrite <=#Tp 1'b1;
+end
+
+
+reg BlockingTxStatusWrite_sync1;
+reg BlockingTxStatusWrite_sync2;
+reg BlockingTxStatusWrite_sync3;
+
+// Synchronizing BlockingTxStatusWrite to MTxClk
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    BlockingTxStatusWrite_sync1 <=#Tp 1'b0;
+  else
+    BlockingTxStatusWrite_sync1 <=#Tp BlockingTxStatusWrite;
+end
+
+// Synchronizing BlockingTxStatusWrite to MTxClk
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    BlockingTxStatusWrite_sync2 <=#Tp 1'b0;
+  else
+    BlockingTxStatusWrite_sync2 <=#Tp BlockingTxStatusWrite_sync1;
+end
+
+// Synchronizing BlockingTxStatusWrite to MTxClk
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    BlockingTxStatusWrite_sync3 <=#Tp 1'b0;
+  else
+    BlockingTxStatusWrite_sync3 <=#Tp BlockingTxStatusWrite_sync2;
+end
+
+assign RstDeferLatched = BlockingTxStatusWrite_sync2 & ~BlockingTxStatusWrite_sync3;
+
+// TxBDRead state is activated only once. 
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    BlockingTxBDRead <=#Tp 1'b0;
+  else
+  if(StartTxBDRead)
+    BlockingTxBDRead <=#Tp 1'b1;
+  else
+  if(~StartTxBDRead & ~TxBDReady)
+    BlockingTxBDRead <=#Tp 1'b0;
+end
+
+
+// Latching status from the tx buffer descriptor
+// Data is avaliable one cycle after the access is started (at that time signal TxEn is not active)
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxStatus <=#Tp 4'h0;
+  else
+  if(TxEn & TxEn_q & TxBDRead)
+    TxStatus <=#Tp ram_do[14:11];
+end
+
+reg ReadTxDataFromMemory;
+wire WriteRxDataToMemory;
+
+reg MasterWbTX;
+reg MasterWbRX;
+
+reg [29:0] m_wb_adr_o;
+reg        m_wb_cyc_o;
+reg  [3:0] m_wb_sel_o;
+reg        m_wb_we_o;
+
+wire TxLengthEq0;
+wire TxLengthLt4;
+
+reg BlockingIncrementTxPointer;
+reg [31:2] TxPointerMSB;
+reg [1:0]  TxPointerLSB;
+reg [1:0]  TxPointerLSB_rst;
+reg [31:2] RxPointerMSB;
+reg [1:0]  RxPointerLSB_rst;
+
+wire RxBurstAcc;
+wire RxWordAcc;
+wire RxHalfAcc;
+wire RxByteAcc;
+
+//Latching length from the buffer descriptor;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxLength <=#Tp 16'h0;
+  else
+  if(TxEn & TxEn_q & TxBDRead)
+    TxLength <=#Tp ram_do[31:16];
+  else
+  if(MasterWbTX & m_wb_ack_i)
+    begin
+      if(TxLengthLt4)
+        TxLength <=#Tp 16'h0;
+      else
+      if(TxPointerLSB_rst==2'h0)
+        TxLength <=#Tp TxLength - 3'h4;    // Length is subtracted at the data request
+      else
+      if(TxPointerLSB_rst==2'h1)
+        TxLength <=#Tp TxLength - 3'h3;    // Length is subtracted at the data request
+      else
+      if(TxPointerLSB_rst==2'h2)
+        TxLength <=#Tp TxLength - 3'h2;    // Length is subtracted at the data request
+      else
+      if(TxPointerLSB_rst==2'h3)
+        TxLength <=#Tp TxLength - 3'h1;    // Length is subtracted at the data request
+    end
+end
+
+
+
+//Latching length from the buffer descriptor;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    LatchedTxLength <=#Tp 16'h0;
+  else
+  if(TxEn & TxEn_q & TxBDRead)
+    LatchedTxLength <=#Tp ram_do[31:16];
+end
+
+assign TxLengthEq0 = TxLength == 0;
+assign TxLengthLt4 = TxLength < 4;
+
+reg cyc_cleared;
+reg IncrTxPointer;
+
+
+// Latching Tx buffer pointer from buffer descriptor. Only 30 MSB bits are latched
+// because TxPointerMSB is only used for word-aligned accesses.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxPointerMSB <=#Tp 30'h0;
+  else
+  if(TxEn & TxEn_q & TxPointerRead)
+    TxPointerMSB <=#Tp ram_do[31:2];
+  else
+  if(IncrTxPointer & ~BlockingIncrementTxPointer)
+    TxPointerMSB <=#Tp TxPointerMSB + 1'b1;     // TxPointer is word-aligned
+end
+
+
+// Latching 2 MSB bits of the buffer descriptor. Since word accesses are performed,
+// valid data does not necesserly start at byte 0 (could be byte 0, 1, 2 or 3). This
+// signals are used for proper selection of the start byte (TxData and TxByteCnt) are
+// set by this two bits.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxPointerLSB[1:0] <=#Tp 0;
+  else
+  if(TxEn & TxEn_q & TxPointerRead)
+    TxPointerLSB[1:0] <=#Tp ram_do[1:0];
+end
+
+
+// Latching 2 MSB bits of the buffer descriptor. 
+// After the read access, TxLength needs to be decremented for the number of the valid
+// bytes (1 to 4 bytes are valid in the first word). After the first read all bytes are 
+// valid so this two bits are reset to zero. 
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxPointerLSB_rst[1:0] <=#Tp 0;
+  else
+  if(TxEn & TxEn_q & TxPointerRead)
+    TxPointerLSB_rst[1:0] <=#Tp ram_do[1:0];
+  else
+  if(MasterWbTX & m_wb_ack_i)                 // After first access pointer is word alligned
+    TxPointerLSB_rst[1:0] <=#Tp 0;
+end
+
+
+reg  [3:0] RxByteSel;
+wire MasterAccessFinished;
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    BlockingIncrementTxPointer <=#Tp 0;
+  else
+  if(MasterAccessFinished)
+    BlockingIncrementTxPointer <=#Tp 0;
+  else
+  if(IncrTxPointer)
+    BlockingIncrementTxPointer <=#Tp 1'b1;
+end
+
+
+wire TxBufferAlmostFull;
+wire TxBufferFull;
+wire TxBufferEmpty;
+wire TxBufferAlmostEmpty;
+wire SetReadTxDataFromMemory;
+
+reg BlockReadTxDataFromMemory;
+
+assign SetReadTxDataFromMemory = TxEn & TxEn_q & TxPointerRead;
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromMemory <=#Tp 1'b0;
+  else
+  if(TxLengthEq0 | TxAbortPulse | TxRetryPulse)
+    ReadTxDataFromMemory <=#Tp 1'b0;
+  else
+  if(SetReadTxDataFromMemory)
+    ReadTxDataFromMemory <=#Tp 1'b1;
+end
+
+reg tx_burst_en;
+reg rx_burst_en;
+
+wire ReadTxDataFromMemory_2 = ReadTxDataFromMemory & ~BlockReadTxDataFromMemory;
+wire tx_burst = ReadTxDataFromMemory_2 & tx_burst_en;
+
+wire [31:0] TxData_wb;
+wire ReadTxDataFromFifo_wb;
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    BlockReadTxDataFromMemory <=#Tp 1'b0;
+  else
+  if((TxBufferAlmostFull | TxLength <= 4)& MasterWbTX & (~cyc_cleared) & (!(TxAbortPacket_NotCleared | TxRetryPacket_NotCleared)))
+    BlockReadTxDataFromMemory <=#Tp 1'b1;
+  else
+  if(ReadTxDataFromFifo_wb | TxDonePacket | TxAbortPacket | TxRetryPacket)
+    BlockReadTxDataFromMemory <=#Tp 1'b0;
+end
+
+
+assign MasterAccessFinished = m_wb_ack_i | m_wb_err_i;
+wire [`ETH_TX_FIFO_CNT_WIDTH-1:0] txfifo_cnt;
+wire [`ETH_RX_FIFO_CNT_WIDTH-1:0] rxfifo_cnt;
+reg  [`ETH_BURST_CNT_WIDTH-1:0] tx_burst_cnt;
+reg  [`ETH_BURST_CNT_WIDTH-1:0] rx_burst_cnt;
+
+wire rx_burst;
+wire enough_data_in_rxfifo_for_burst;
+wire enough_data_in_rxfifo_for_burst_plus1;
+
+// Enabling master wishbone access to the memory for two devices TX and RX.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    begin
+      MasterWbTX <=#Tp 1'b0;
+      MasterWbRX <=#Tp 1'b0;
+      m_wb_adr_o <=#Tp 30'h0;
+      m_wb_cyc_o <=#Tp 1'b0;
+      m_wb_we_o  <=#Tp 1'b0;
+      m_wb_sel_o <=#Tp 4'h0;
+      cyc_cleared<=#Tp 1'b0;
+      tx_burst_cnt<=#Tp 0;
+      rx_burst_cnt<=#Tp 0;
+      IncrTxPointer<=#Tp 1'b0;
+      tx_burst_en<=#Tp 1'b1;
+      rx_burst_en<=#Tp 1'b0;
+      `ifdef ETH_WISHBONE_B3
+        m_wb_cti_o <=#Tp 3'b0;
+      `endif
+    end
+  else
+    begin
+      // Switching between two stages depends on enable signals
+      casex ({MasterWbTX, MasterWbRX, ReadTxDataFromMemory_2, WriteRxDataToMemory, MasterAccessFinished, cyc_cleared, tx_burst, rx_burst})  // synopsys parallel_case
+        8'b00_10_00_10,             // Idle and MRB needed
+        8'b10_1x_10_1x,             // MRB continues
+        8'b10_10_01_10,             // Clear (previously MR) and MRB needed
+        8'b01_1x_01_1x :            // Clear (previously MW) and MRB needed
+          begin
+            MasterWbTX <=#Tp 1'b1;  // tx burst
+            MasterWbRX <=#Tp 1'b0;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b0;
+            m_wb_sel_o <=#Tp 4'hf;
+            cyc_cleared<=#Tp 1'b0;
+            IncrTxPointer<=#Tp 1'b1;
+            tx_burst_cnt <=#Tp tx_burst_cnt+3'h1;
+            if(tx_burst_cnt==0)
+              m_wb_adr_o <=#Tp TxPointerMSB;
+            else
+              m_wb_adr_o <=#Tp m_wb_adr_o+1'b1;
+
+            if(tx_burst_cnt==(`ETH_BURST_LENGTH-1))
+              begin
+                tx_burst_en<=#Tp 1'b0;
+              `ifdef ETH_WISHBONE_B3
+                m_wb_cti_o <=#Tp 3'b111;
+              `endif
+              end
+            else
+              begin
+              `ifdef ETH_WISHBONE_B3
+                m_wb_cti_o <=#Tp 3'b010;
+              `endif
+              end
+          end
+        8'b00_x1_00_x1,             // Idle and MWB needed
+        8'b01_x1_10_x1,             // MWB continues
+        8'b01_01_01_01,             // Clear (previously MW) and MWB needed
+        8'b10_x1_01_x1 :            // Clear (previously MR) and MWB needed
+          begin
+            MasterWbTX <=#Tp 1'b0;  // rx burst
+            MasterWbRX <=#Tp 1'b1;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b1;
+            m_wb_sel_o <=#Tp RxByteSel;
+            IncrTxPointer<=#Tp 1'b0;
+            cyc_cleared<=#Tp 1'b0;
+            rx_burst_cnt <=#Tp rx_burst_cnt+3'h1;
+
+            if(rx_burst_cnt==0)
+              m_wb_adr_o <=#Tp RxPointerMSB;
+            else
+              m_wb_adr_o <=#Tp m_wb_adr_o+1'b1;
+
+            if(rx_burst_cnt==(`ETH_BURST_LENGTH-1))
+              begin
+                rx_burst_en<=#Tp 1'b0;
+              `ifdef ETH_WISHBONE_B3
+                m_wb_cti_o <=#Tp 3'b111;
+              `endif
+              end
+            else
+              begin
+              `ifdef ETH_WISHBONE_B3
+                m_wb_cti_o <=#Tp 3'b010;
+              `endif
+              end
+          end
+        8'b00_x1_00_x0 :            // idle and MW is needed (data write to rx buffer)
+          begin
+            MasterWbTX <=#Tp 1'b0;
+            MasterWbRX <=#Tp 1'b1;
+            m_wb_adr_o <=#Tp RxPointerMSB;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b1;
+            m_wb_sel_o <=#Tp RxByteSel;
+            IncrTxPointer<=#Tp 1'b0;
+          end
+        8'b00_10_00_00 :            // idle and MR is needed (data read from tx buffer)
+          begin
+            MasterWbTX <=#Tp 1'b1;
+            MasterWbRX <=#Tp 1'b0;
+            m_wb_adr_o <=#Tp TxPointerMSB;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b0;
+            m_wb_sel_o <=#Tp 4'hf;
+            IncrTxPointer<=#Tp 1'b1;
+          end
+        8'b10_10_01_00,             // MR and MR is needed (data read from tx buffer)
+        8'b01_1x_01_0x  :           // MW and MR is needed (data read from tx buffer)
+          begin
+            MasterWbTX <=#Tp 1'b1;
+            MasterWbRX <=#Tp 1'b0;
+            m_wb_adr_o <=#Tp TxPointerMSB;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b0;
+            m_wb_sel_o <=#Tp 4'hf;
+            cyc_cleared<=#Tp 1'b0;
+            IncrTxPointer<=#Tp 1'b1;
+          end
+        8'b01_01_01_00,             // MW and MW needed (data write to rx buffer)
+        8'b10_x1_01_x0  :           // MR and MW is needed (data write to rx buffer)
+          begin
+            MasterWbTX <=#Tp 1'b0;
+            MasterWbRX <=#Tp 1'b1;
+            m_wb_adr_o <=#Tp RxPointerMSB;
+            m_wb_cyc_o <=#Tp 1'b1;
+            m_wb_we_o  <=#Tp 1'b1;
+            m_wb_sel_o <=#Tp RxByteSel;
+            cyc_cleared<=#Tp 1'b0;
+            IncrTxPointer<=#Tp 1'b0;
+          end
+        8'b01_01_10_00,             // MW and MW needed (cycle is cleared between previous and next access)
+        8'b01_1x_10_x0,             // MW and MW or MR or MRB needed (cycle is cleared between previous and next access)
+        8'b10_10_10_00,             // MR and MR needed (cycle is cleared between previous and next access)
+        8'b10_x1_10_0x :            // MR and MR or MW or MWB (cycle is cleared between previous and next access)
+          begin
+            m_wb_cyc_o <=#Tp 1'b0;  // whatever and master read or write is needed. We need to clear m_wb_cyc_o before next access is started
+            cyc_cleared<=#Tp 1'b1;
+            IncrTxPointer<=#Tp 1'b0;
+            tx_burst_cnt<=#Tp 0;
+            tx_burst_en<=#Tp txfifo_cnt<(`ETH_TX_FIFO_DEPTH-`ETH_BURST_LENGTH) & (TxLength>(`ETH_BURST_LENGTH*4+4));
+            rx_burst_cnt<=#Tp 0;
+            rx_burst_en<=#Tp MasterWbRX ? enough_data_in_rxfifo_for_burst_plus1 : enough_data_in_rxfifo_for_burst;  // Counter is not decremented, yet, so plus1 is used.
+            `ifdef ETH_WISHBONE_B3
+              m_wb_cti_o <=#Tp 3'b0;
+            `endif
+          end
+        8'bxx_00_10_00,             // whatever and no master read or write is needed (ack or err comes finishing previous access)
+        8'bxx_00_01_00 :            // Between cyc_cleared request was cleared
+          begin
+            MasterWbTX <=#Tp 1'b0;
+            MasterWbRX <=#Tp 1'b0;
+            m_wb_cyc_o <=#Tp 1'b0;
+            cyc_cleared<=#Tp 1'b0;
+            IncrTxPointer<=#Tp 1'b0;
+            rx_burst_cnt<=#Tp 0;
+            rx_burst_en<=#Tp MasterWbRX ? enough_data_in_rxfifo_for_burst_plus1 : enough_data_in_rxfifo_for_burst;  // Counter is not decremented, yet, so plus1 is used.
+            `ifdef ETH_WISHBONE_B3
+              m_wb_cti_o <=#Tp 3'b0;
+            `endif
+          end
+        8'b00_00_00_00:             // whatever and no master read or write is needed (ack or err comes finishing previous access)
+          begin
+            tx_burst_cnt<=#Tp 0;
+            tx_burst_en<=#Tp txfifo_cnt<(`ETH_TX_FIFO_DEPTH-`ETH_BURST_LENGTH) & (TxLength>(`ETH_BURST_LENGTH*4+4));
+          end
+        default:                    // Don't touch
+          begin
+            MasterWbTX <=#Tp MasterWbTX;
+            MasterWbRX <=#Tp MasterWbRX;
+            m_wb_cyc_o <=#Tp m_wb_cyc_o;
+            m_wb_sel_o <=#Tp m_wb_sel_o;
+            IncrTxPointer<=#Tp IncrTxPointer;
+          end
+      endcase
+    end
+end
+
+
+wire TxFifoClear;
+
+assign TxFifoClear = (TxAbortPacket | TxRetryPacket);
+
+eth_fifo #(`ETH_TX_FIFO_DATA_WIDTH, `ETH_TX_FIFO_DEPTH, `ETH_TX_FIFO_CNT_WIDTH)
+tx_fifo ( .data_in(m_wb_dat_i),                             .data_out(TxData_wb), 
+          .clk(WB_CLK_I),                                   .reset(Reset), 
+          .write(MasterWbTX & m_wb_ack_i),                  .read(ReadTxDataFromFifo_wb & ~TxBufferEmpty),
+          .clear(TxFifoClear),                              .full(TxBufferFull), 
+          .almost_full(TxBufferAlmostFull),                 .almost_empty(TxBufferAlmostEmpty), 
+          .empty(TxBufferEmpty),                            .cnt(txfifo_cnt)
+        );
+
+
+reg StartOccured;
+reg TxStartFrm_sync1;
+reg TxStartFrm_sync2;
+reg TxStartFrm_syncb1;
+reg TxStartFrm_syncb2;
+
+
+
+// Start: Generation of the TxStartFrm_wb which is then synchronized to the MTxClk
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm_wb <=#Tp 1'b0;
+  else
+  if(TxBDReady & ~StartOccured & (TxBufferFull | TxLengthEq0))
+    TxStartFrm_wb <=#Tp 1'b1;
+  else
+  if(TxStartFrm_syncb2)
+    TxStartFrm_wb <=#Tp 1'b0;
+end
+
+// StartOccured: TxStartFrm_wb occurs only ones at the beginning. Then it's blocked.
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    StartOccured <=#Tp 1'b0;
+  else
+  if(TxStartFrm_wb)
+    StartOccured <=#Tp 1'b1;
+  else
+  if(ResetTxBDReady)
+    StartOccured <=#Tp 1'b0;
+end
+
+// Synchronizing TxStartFrm_wb to MTxClk
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm_sync1 <=#Tp 1'b0;
+  else
+    TxStartFrm_sync1 <=#Tp TxStartFrm_wb;
+end
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm_sync2 <=#Tp 1'b0;
+  else
+    TxStartFrm_sync2 <=#Tp TxStartFrm_sync1;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm_syncb1 <=#Tp 1'b0;
+  else
+    TxStartFrm_syncb1 <=#Tp TxStartFrm_sync2;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm_syncb2 <=#Tp 1'b0;
+  else
+    TxStartFrm_syncb2 <=#Tp TxStartFrm_syncb1;
+end
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxStartFrm <=#Tp 1'b0;
+  else
+  if(TxStartFrm_sync2)
+    TxStartFrm <=#Tp 1'b1;
+  else
+  if(TxUsedData_q | ~TxStartFrm_sync2 & (TxRetry & (~TxRetry_q) | TxAbort & (~TxAbort_q)))
+    TxStartFrm <=#Tp 1'b0;
+end
+// End: Generation of the TxStartFrm_wb which is then synchronized to the MTxClk
+
+
+// TxEndFrm_wb: indicator of the end of frame
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxEndFrm_wb <=#Tp 1'b0;
+  else
+  if(TxLengthEq0 & TxBufferAlmostEmpty & TxUsedData)
+    TxEndFrm_wb <=#Tp 1'b1;
+  else
+  if(TxRetryPulse | TxDonePulse | TxAbortPulse)
+    TxEndFrm_wb <=#Tp 1'b0;
+end
+
+
+// Marks which bytes are valid within the word.
+assign TxValidBytes = TxLengthLt4 ? TxLength[1:0] : 2'b0;
+
+reg LatchValidBytes;
+reg LatchValidBytes_q;
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    LatchValidBytes <=#Tp 1'b0;
+  else
+  if(TxLengthLt4 & TxBDReady)
+    LatchValidBytes <=#Tp 1'b1;
+  else
+    LatchValidBytes <=#Tp 1'b0;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    LatchValidBytes_q <=#Tp 1'b0;
+  else
+    LatchValidBytes_q <=#Tp LatchValidBytes;
+end
+
+
+// Latching valid bytes
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxValidBytesLatched <=#Tp 2'h0;
+  else
+  if(LatchValidBytes & ~LatchValidBytes_q)
+    TxValidBytesLatched <=#Tp TxValidBytes;
+  else
+  if(TxRetryPulse | TxDonePulse | TxAbortPulse)
+    TxValidBytesLatched <=#Tp 2'h0;
+end
+
+
+assign TxIRQEn          = TxStatus[14];
+assign WrapTxStatusBit  = TxStatus[13];
+assign PerPacketPad     = TxStatus[12];
+assign PerPacketCrcEn   = TxStatus[11];
+
+
+assign RxIRQEn         = RxStatus[14];
+assign WrapRxStatusBit = RxStatus[13];
+
+
+// Temporary Tx and Rx buffer descriptor address 
+assign TempTxBDAddress[7:1] = {7{ TxStatusWrite     & ~WrapTxStatusBit}}   & (TxBDAddress + 1'b1) ; // Tx BD increment or wrap (last BD)
+assign TempRxBDAddress[7:1] = {7{ WrapRxStatusBit}} & (r_TxBDNum[6:0])     | // Using first Rx BD
+                              {7{~WrapRxStatusBit}} & (RxBDAddress + 1'b1) ; // Using next Rx BD (incremenrement address)
+
+
+// Latching Tx buffer descriptor address
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxBDAddress <=#Tp 7'h0;
+  else if (r_TxEn & (~r_TxEn_q))
+    TxBDAddress <=#Tp 7'h0;
+  else if (TxStatusWrite)
+    TxBDAddress <=#Tp TempTxBDAddress;
+end
+
+
+// Latching Rx buffer descriptor address
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxBDAddress <=#Tp 7'h0;
+  else if(r_RxEn & (~r_RxEn_q))
+    RxBDAddress <=#Tp r_TxBDNum[6:0];
+  else if(RxStatusWrite)
+    RxBDAddress <=#Tp TempRxBDAddress;
+end
+
+wire [8:0] TxStatusInLatched = {TxUnderRun, RetryCntLatched[3:0], RetryLimit, LateCollLatched, DeferLatched, CarrierSenseLost};
+
+assign RxBDDataIn = {LatchedRxLength, 1'b0, RxStatus, 4'h0, RxStatusInLatched};
+assign TxBDDataIn = {LatchedTxLength, 1'b0, TxStatus, 2'h0, TxStatusInLatched};
+
+
+// Signals used for various purposes
+assign TxRetryPulse   = TxRetry_wb   & ~TxRetry_wb_q;
+assign TxDonePulse    = TxDone_wb    & ~TxDone_wb_q;
+assign TxAbortPulse   = TxAbort_wb   & ~TxAbort_wb_q;
+
+
+
+// Generating delayed signals
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      TxAbort_q      <=#Tp 1'b0;
+      TxRetry_q      <=#Tp 1'b0;
+      TxUsedData_q   <=#Tp 1'b0;
+    end
+  else
+    begin
+      TxAbort_q      <=#Tp TxAbort;
+      TxRetry_q      <=#Tp TxRetry;
+      TxUsedData_q   <=#Tp TxUsedData;
+    end
+end
+
+// Generating delayed signals
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    begin
+      TxDone_wb_q   <=#Tp 1'b0;
+      TxAbort_wb_q  <=#Tp 1'b0;
+      TxRetry_wb_q  <=#Tp 1'b0;
+    end
+  else
+    begin
+      TxDone_wb_q   <=#Tp TxDone_wb;
+      TxAbort_wb_q  <=#Tp TxAbort_wb;
+      TxRetry_wb_q  <=#Tp TxRetry_wb;
+    end
+end
+
+
+reg TxAbortPacketBlocked;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxAbortPacket <=#Tp 1'b0;
+  else
+  if(TxAbort_wb & (~tx_burst_en) & MasterWbTX & MasterAccessFinished & (~TxAbortPacketBlocked) |
+     TxAbort_wb & (~MasterWbTX) & (~TxAbortPacketBlocked))
+    TxAbortPacket <=#Tp 1'b1;
+  else
+    TxAbortPacket <=#Tp 1'b0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxAbortPacket_NotCleared <=#Tp 1'b0;
+  else
+  if(TxEn & TxEn_q & TxAbortPacket_NotCleared)
+    TxAbortPacket_NotCleared <=#Tp 1'b0;
+  else
+  if(TxAbort_wb & (~tx_burst_en) & MasterWbTX & MasterAccessFinished & (~TxAbortPacketBlocked) |
+     TxAbort_wb & (~MasterWbTX) & (~TxAbortPacketBlocked))
+    TxAbortPacket_NotCleared <=#Tp 1'b1;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxAbortPacketBlocked <=#Tp 1'b0;
+  else
+  if(!TxAbort_wb & TxAbort_wb_q)
+    TxAbortPacketBlocked <=#Tp 1'b0;
+  else
+  if(TxAbortPacket)
+    TxAbortPacketBlocked <=#Tp 1'b1;
+end
+
+
+reg TxRetryPacketBlocked;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxRetryPacket <=#Tp 1'b0;
+  else
+  if(TxRetry_wb & !tx_burst_en & MasterWbTX & MasterAccessFinished & !TxRetryPacketBlocked | 
+     TxRetry_wb & !MasterWbTX & !TxRetryPacketBlocked)
+    TxRetryPacket <=#Tp 1'b1;
+  else
+    TxRetryPacket <=#Tp 1'b0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxRetryPacket_NotCleared <=#Tp 1'b0;
+  else
+  if(StartTxBDRead)
+    TxRetryPacket_NotCleared <=#Tp 1'b0;
+  else
+  if(TxRetry_wb & !tx_burst_en & MasterWbTX & MasterAccessFinished & !TxRetryPacketBlocked | 
+     TxRetry_wb & !MasterWbTX & !TxRetryPacketBlocked)
+    TxRetryPacket_NotCleared <=#Tp 1'b1;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxRetryPacketBlocked <=#Tp 1'b0;
+  else
+  if(!TxRetry_wb & TxRetry_wb_q)
+    TxRetryPacketBlocked <=#Tp 1'b0;
+  else
+  if(TxRetryPacket)
+    TxRetryPacketBlocked <=#Tp 1'b1;
+end
+
+
+reg TxDonePacketBlocked;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxDonePacket <=#Tp 1'b0;
+  else
+  if(TxDone_wb & !tx_burst_en & MasterWbTX & MasterAccessFinished & !TxDonePacketBlocked | 
+     TxDone_wb & !MasterWbTX & !TxDonePacketBlocked)
+    TxDonePacket <=#Tp 1'b1;
+  else
+    TxDonePacket <=#Tp 1'b0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxDonePacket_NotCleared <=#Tp 1'b0;
+  else
+  if(TxEn & TxEn_q & TxDonePacket_NotCleared)
+    TxDonePacket_NotCleared <=#Tp 1'b0;
+  else
+  if(TxDone_wb & !tx_burst_en & MasterWbTX & MasterAccessFinished & (~TxDonePacketBlocked) | 
+     TxDone_wb & !MasterWbTX & (~TxDonePacketBlocked))
+    TxDonePacket_NotCleared <=#Tp 1'b1;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxDonePacketBlocked <=#Tp 1'b0;
+  else
+  if(!TxDone_wb & TxDone_wb_q)
+    TxDonePacketBlocked <=#Tp 1'b0;
+  else
+  if(TxDonePacket)
+    TxDonePacketBlocked <=#Tp 1'b1;
+end
+
+
+// Indication of the last word
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    LastWord <=#Tp 1'b0;
+  else
+  if((TxEndFrm | TxAbort | TxRetry) & Flop)
+    LastWord <=#Tp 1'b0;
+  else
+  if(TxUsedData & Flop & TxByteCnt == 2'h3)
+    LastWord <=#Tp TxEndFrm_wb;
+end
+
+
+// Tx end frame generation
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxEndFrm <=#Tp 1'b0;
+  else
+  if(Flop & TxEndFrm | TxAbort | TxRetry_q)
+    TxEndFrm <=#Tp 1'b0;        
+  else
+  if(Flop & LastWord)
+    begin
+      case (TxValidBytesLatched)  // synopsys parallel_case
+        1 : TxEndFrm <=#Tp TxByteCnt == 2'h0;
+        2 : TxEndFrm <=#Tp TxByteCnt == 2'h1;
+        3 : TxEndFrm <=#Tp TxByteCnt == 2'h2;
+        0 : TxEndFrm <=#Tp TxByteCnt == 2'h3;
+        default : TxEndFrm <=#Tp 1'b0;
+      endcase
+    end
+end
+
+
+// Tx data selection (latching)
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxData <=#Tp 0;
+  else
+  if(TxStartFrm_sync2 & ~TxStartFrm)
+    case(TxPointerLSB)  // synopsys parallel_case
+      2'h0 : TxData <=#Tp TxData_wb[31:24];                  // Big Endian Byte Ordering
+      2'h1 : TxData <=#Tp TxData_wb[23:16];                  // Big Endian Byte Ordering
+      2'h2 : TxData <=#Tp TxData_wb[15:08];                  // Big Endian Byte Ordering
+      2'h3 : TxData <=#Tp TxData_wb[07:00];                  // Big Endian Byte Ordering
+    endcase
+  else
+  if(TxStartFrm & TxUsedData & TxPointerLSB==2'h3)
+    TxData <=#Tp TxData_wb[31:24];                           // Big Endian Byte Ordering
+  else
+  if(TxUsedData & Flop)
+    begin
+      case(TxByteCnt)  // synopsys parallel_case
+        0 : TxData <=#Tp TxDataLatched[31:24];               // Big Endian Byte Ordering
+        1 : TxData <=#Tp TxDataLatched[23:16];
+        2 : TxData <=#Tp TxDataLatched[15:8];
+        3 : TxData <=#Tp TxDataLatched[7:0];
+      endcase
+    end
+end
+
+
+// Latching tx data
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxDataLatched[31:0] <=#Tp 32'h0;
+  else
+ if(TxStartFrm_sync2 & ~TxStartFrm | TxUsedData & Flop & TxByteCnt == 2'h3 | TxStartFrm & TxUsedData & Flop & TxByteCnt == 2'h0)
+    TxDataLatched[31:0] <=#Tp TxData_wb[31:0];
+end
+
+
+// Tx under run
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxUnderRun_wb <=#Tp 1'b0;
+  else
+  if(TxAbortPulse)
+    TxUnderRun_wb <=#Tp 1'b0;
+  else
+  if(TxBufferEmpty & ReadTxDataFromFifo_wb)
+    TxUnderRun_wb <=#Tp 1'b1;
+end
+
+
+reg TxUnderRun_sync1;
+
+// Tx under run
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxUnderRun_sync1 <=#Tp 1'b0;
+  else
+  if(TxUnderRun_wb)
+    TxUnderRun_sync1 <=#Tp 1'b1;
+  else
+  if(BlockingTxStatusWrite_sync2)
+    TxUnderRun_sync1 <=#Tp 1'b0;
+end
+
+// Tx under run
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxUnderRun <=#Tp 1'b0;
+  else
+  if(BlockingTxStatusWrite_sync2)
+    TxUnderRun <=#Tp 1'b0;
+  else
+  if(TxUnderRun_sync1)
+    TxUnderRun <=#Tp 1'b1;
+end
+
+
+// Tx Byte counter
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    TxByteCnt <=#Tp 2'h0;
+  else
+  if(TxAbort_q | TxRetry_q)
+    TxByteCnt <=#Tp 2'h0;
+  else
+  if(TxStartFrm & ~TxUsedData)
+    case(TxPointerLSB)  // synopsys parallel_case
+      2'h0 : TxByteCnt <=#Tp 2'h1;
+      2'h1 : TxByteCnt <=#Tp 2'h2;
+      2'h2 : TxByteCnt <=#Tp 2'h3;
+      2'h3 : TxByteCnt <=#Tp 2'h0;
+    endcase
+  else
+  if(TxUsedData & Flop)
+    TxByteCnt <=#Tp TxByteCnt + 1'b1;
+end
+
+
+// Start: Generation of the ReadTxDataFromFifo_tck signal and synchronization to the WB_CLK_I
+reg ReadTxDataFromFifo_sync1;
+reg ReadTxDataFromFifo_sync2;
+reg ReadTxDataFromFifo_sync3;
+reg ReadTxDataFromFifo_syncb1;
+reg ReadTxDataFromFifo_syncb2;
+reg ReadTxDataFromFifo_syncb3;
+
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_tck <=#Tp 1'b0;
+  else
+  if(TxStartFrm_sync2 & ~TxStartFrm | TxUsedData & Flop & TxByteCnt == 2'h3 & ~LastWord | TxStartFrm & TxUsedData & Flop & TxByteCnt == 2'h0)
+     ReadTxDataFromFifo_tck <=#Tp 1'b1;
+  else
+  if(ReadTxDataFromFifo_syncb2 & ~ReadTxDataFromFifo_syncb3)
+    ReadTxDataFromFifo_tck <=#Tp 1'b0;
+end
+
+// Synchronizing TxStartFrm_wb to MTxClk
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_sync1 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_sync1 <=#Tp ReadTxDataFromFifo_tck;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_sync2 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_sync2 <=#Tp ReadTxDataFromFifo_sync1;
+end
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_syncb1 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_syncb1 <=#Tp ReadTxDataFromFifo_sync2;
+end
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_syncb2 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_syncb2 <=#Tp ReadTxDataFromFifo_syncb1;
+end
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_syncb3 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_syncb3 <=#Tp ReadTxDataFromFifo_syncb2;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ReadTxDataFromFifo_sync3 <=#Tp 1'b0;
+  else
+    ReadTxDataFromFifo_sync3 <=#Tp ReadTxDataFromFifo_sync2;
+end
+
+assign ReadTxDataFromFifo_wb = ReadTxDataFromFifo_sync2 & ~ReadTxDataFromFifo_sync3;
+// End: Generation of the ReadTxDataFromFifo_tck signal and synchronization to the WB_CLK_I
+
+
+// Synchronizing TxRetry signal (synchronized to WISHBONE clock)
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxRetrySync1 <=#Tp 1'b0;
+  else
+    TxRetrySync1 <=#Tp TxRetry;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxRetry_wb <=#Tp 1'b0;
+  else
+    TxRetry_wb <=#Tp TxRetrySync1;
+end
+
+
+// Synchronized TxDone_wb signal (synchronized to WISHBONE clock)
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxDoneSync1 <=#Tp 1'b0;
+  else
+    TxDoneSync1 <=#Tp TxDone;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxDone_wb <=#Tp 1'b0;
+  else
+    TxDone_wb <=#Tp TxDoneSync1;
+end
+
+// Synchronizing TxAbort signal (synchronized to WISHBONE clock)
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxAbortSync1 <=#Tp 1'b0;
+  else
+    TxAbortSync1 <=#Tp TxAbort;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxAbort_wb <=#Tp 1'b0;
+  else
+    TxAbort_wb <=#Tp TxAbortSync1;
+end
+
+
+reg RxAbortSync1;
+reg RxAbortSync2;
+reg RxAbortSync3;
+reg RxAbortSync4;
+reg RxAbortSyncb1;
+reg RxAbortSyncb2;
+
+assign StartRxBDRead = RxStatusWrite | RxAbortSync3 & ~RxAbortSync4 | r_RxEn & ~r_RxEn_q;
+
+// Reading the Rx buffer descriptor
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxBDRead <=#Tp 1'b0;
+  else
+  if(StartRxBDRead & ~RxReady)
+    RxBDRead <=#Tp 1'b1;
+  else
+  if(RxBDReady)
+    RxBDRead <=#Tp 1'b0;
+end
+
+
+// Reading of the next receive buffer descriptor starts after reception status is
+// written to the previous one.
+
+// Latching READY status of the Rx buffer descriptor
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxBDReady <=#Tp 1'b0;
+  else
+  if(RxPointerRead)
+    RxBDReady <=#Tp 1'b0;
+  else
+  if(RxEn & RxEn_q & RxBDRead)
+    RxBDReady <=#Tp ram_do[15]; // RxBDReady is sampled only once at the beginning
+end
+
+// Latching Rx buffer descriptor status
+// Data is avaliable one cycle after the access is started (at that time signal RxEn is not active)
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxStatus <=#Tp 2'h0;
+  else
+  if(RxEn & RxEn_q & RxBDRead)
+    RxStatus <=#Tp ram_do[14:13];
+end
+
+
+// RxReady generation
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxReady <=#Tp 1'b0;
+  else
+  if(ShiftEnded | RxAbortSync2 & ~RxAbortSync3 | ~r_RxEn & r_RxEn_q)
+    RxReady <=#Tp 1'b0;
+  else
+  if(RxEn & RxEn_q & RxPointerRead)
+    RxReady <=#Tp 1'b1;
+end
+
+
+// Reading Rx BD pointer
+
+
+assign StartRxPointerRead = RxBDRead & RxBDReady;
+
+// Reading Tx BD Pointer
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxPointerRead <=#Tp 1'b0;
+  else
+  if(StartRxPointerRead)
+    RxPointerRead <=#Tp 1'b1;
+  else
+  if(RxEn & RxEn_q)
+    RxPointerRead <=#Tp 1'b0;
+end
+
+
+//Latching Rx buffer pointer from buffer descriptor;
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxPointerMSB <=#Tp 30'h0;
+  else
+  if(RxEn & RxEn_q & RxPointerRead)
+    RxPointerMSB <=#Tp ram_do[31:2];
+  else
+  if(MasterWbRX & m_wb_ack_i)
+      RxPointerMSB <=#Tp RxPointerMSB + 1'b1; // Word access  (always word access. m_wb_sel_o are used for selecting bytes)
+end
+
+
+//Latching last addresses from buffer descriptor (used as byte-half-word indicator);
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxPointerLSB_rst[1:0] <=#Tp 0;
+  else
+  if(MasterWbRX & m_wb_ack_i)                 // After first write all RxByteSel are active
+    RxPointerLSB_rst[1:0] <=#Tp 0;
+  else
+  if(RxEn & RxEn_q & RxPointerRead)
+    RxPointerLSB_rst[1:0] <=#Tp ram_do[1:0];
+end
+
+
+always @ (RxPointerLSB_rst)
+begin
+  case(RxPointerLSB_rst[1:0])  // synopsys parallel_case
+    2'h0 : RxByteSel[3:0] = 4'hf;
+    2'h1 : RxByteSel[3:0] = 4'h7;
+    2'h2 : RxByteSel[3:0] = 4'h3;
+    2'h3 : RxByteSel[3:0] = 4'h1;
+  endcase
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxEn_needed <=#Tp 1'b0;
+  else
+  if(~RxReady & r_RxEn & WbEn & ~WbEn_q)
+    RxEn_needed <=#Tp 1'b1;
+  else
+  if(RxPointerRead & RxEn & RxEn_q)
+    RxEn_needed <=#Tp 1'b0;
+end
+
+
+// Reception status is written back to the buffer descriptor after the end of frame is detected.
+assign RxStatusWrite = ShiftEnded & RxEn & RxEn_q;
+
+reg RxEnableWindow;
+
+// Indicating that last byte is being reveived
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LastByteIn <=#Tp 1'b0;
+  else
+  if(ShiftWillEnd & (&RxByteCnt) | RxAbort)
+    LastByteIn <=#Tp 1'b0;
+  else
+  if(RxValid & RxReady & RxEndFrm & ~(&RxByteCnt) & RxEnableWindow)
+    LastByteIn <=#Tp 1'b1;
+end
+
+reg ShiftEnded_rck;
+reg ShiftEndedSync1;
+reg ShiftEndedSync2;
+reg ShiftEndedSync3;
+reg ShiftEndedSync_c1;
+reg ShiftEndedSync_c2;
+
+wire StartShiftWillEnd;
+assign StartShiftWillEnd = LastByteIn  | RxValid & RxEndFrm & (&RxByteCnt) & RxEnableWindow;
+
+// Indicating that data reception will end
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ShiftWillEnd <=#Tp 1'b0;
+  else
+  if(ShiftEnded_rck | RxAbort)
+    ShiftWillEnd <=#Tp 1'b0;
+  else
+  if(StartShiftWillEnd)
+    ShiftWillEnd <=#Tp 1'b1;
+end
+
+
+
+// Receive byte counter
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxByteCnt <=#Tp 2'h0;
+  else
+  if(ShiftEnded_rck | RxAbort)
+    RxByteCnt <=#Tp 2'h0;
+  else
+  if(RxValid & RxStartFrm & RxReady)
+    case(RxPointerLSB_rst)  // synopsys parallel_case
+      2'h0 : RxByteCnt <=#Tp 2'h1;
+      2'h1 : RxByteCnt <=#Tp 2'h2;
+      2'h2 : RxByteCnt <=#Tp 2'h3;
+      2'h3 : RxByteCnt <=#Tp 2'h0;
+    endcase
+  else
+  if(RxValid & RxEnableWindow & RxReady | LastByteIn)
+    RxByteCnt <=#Tp RxByteCnt + 1'b1;
+end
+
+
+// Indicates how many bytes are valid within the last word
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxValidBytes <=#Tp 2'h1;
+  else
+  if(RxValid & RxStartFrm)
+    case(RxPointerLSB_rst)  // synopsys parallel_case
+      2'h0 : RxValidBytes <=#Tp 2'h1;
+      2'h1 : RxValidBytes <=#Tp 2'h2;
+      2'h2 : RxValidBytes <=#Tp 2'h3;
+      2'h3 : RxValidBytes <=#Tp 2'h0;
+    endcase
+  else
+  if(RxValid & ~LastByteIn & ~RxStartFrm & RxEnableWindow)
+    RxValidBytes <=#Tp RxValidBytes + 1'b1;
+end
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxDataLatched1       <=#Tp 24'h0;
+  else
+  if(RxValid & RxReady & ~LastByteIn)
+    if(RxStartFrm)
+    begin
+      case(RxPointerLSB_rst)     // synopsys parallel_case
+        2'h0:        RxDataLatched1[31:24] <=#Tp RxData;            // Big Endian Byte Ordering
+        2'h1:        RxDataLatched1[23:16] <=#Tp RxData;
+        2'h2:        RxDataLatched1[15:8]  <=#Tp RxData;
+        2'h3:        RxDataLatched1        <=#Tp RxDataLatched1;
+      endcase
+    end
+    else if (RxEnableWindow)
+    begin
+      case(RxByteCnt)     // synopsys parallel_case
+        2'h0:        RxDataLatched1[31:24] <=#Tp RxData;            // Big Endian Byte Ordering
+        2'h1:        RxDataLatched1[23:16] <=#Tp RxData;
+        2'h2:        RxDataLatched1[15:8]  <=#Tp RxData;
+        2'h3:        RxDataLatched1        <=#Tp RxDataLatched1;
+      endcase
+    end
+end
+
+wire SetWriteRxDataToFifo;
+
+// Assembling data that will be written to the rx_fifo
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxDataLatched2 <=#Tp 32'h0;
+  else
+  if(SetWriteRxDataToFifo & ~ShiftWillEnd)
+    RxDataLatched2 <=#Tp {RxDataLatched1[31:8], RxData};              // Big Endian Byte Ordering
+  else
+  if(SetWriteRxDataToFifo & ShiftWillEnd)
+    case(RxValidBytes)  // synopsys parallel_case
+      0 : RxDataLatched2 <=#Tp {RxDataLatched1[31:8],  RxData};       // Big Endian Byte Ordering
+      1 : RxDataLatched2 <=#Tp {RxDataLatched1[31:24], 24'h0};
+      2 : RxDataLatched2 <=#Tp {RxDataLatched1[31:16], 16'h0};
+      3 : RxDataLatched2 <=#Tp {RxDataLatched1[31:8],   8'h0};
+    endcase
+end
+
+
+reg WriteRxDataToFifoSync1;
+reg WriteRxDataToFifoSync2;
+reg WriteRxDataToFifoSync3;
+
+
+// Indicating start of the reception process
+assign SetWriteRxDataToFifo = (RxValid & RxReady & ~RxStartFrm & RxEnableWindow & (&RxByteCnt)) | 
+                              (RxValid & RxReady &  RxStartFrm & (&RxPointerLSB_rst))           | 
+                              (ShiftWillEnd & LastByteIn & (&RxByteCnt));
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    WriteRxDataToFifo <=#Tp 1'b0;
+  else
+  if(SetWriteRxDataToFifo & ~RxAbort)
+    WriteRxDataToFifo <=#Tp 1'b1;
+  else
+  if(WriteRxDataToFifoSync2 | RxAbort)
+    WriteRxDataToFifo <=#Tp 1'b0;
+end
+
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    WriteRxDataToFifoSync1 <=#Tp 1'b0;
+  else
+  if(WriteRxDataToFifo)
+    WriteRxDataToFifoSync1 <=#Tp 1'b1;
+  else
+    WriteRxDataToFifoSync1 <=#Tp 1'b0;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    WriteRxDataToFifoSync2 <=#Tp 1'b0;
+  else
+    WriteRxDataToFifoSync2 <=#Tp WriteRxDataToFifoSync1;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    WriteRxDataToFifoSync3 <=#Tp 1'b0;
+  else
+    WriteRxDataToFifoSync3 <=#Tp WriteRxDataToFifoSync2;
+end
+
+wire WriteRxDataToFifo_wb;
+assign WriteRxDataToFifo_wb = WriteRxDataToFifoSync2 & ~WriteRxDataToFifoSync3;
+
+
+reg LatchedRxStartFrm;
+reg SyncRxStartFrm;
+reg SyncRxStartFrm_q;
+reg SyncRxStartFrm_q2;
+wire RxFifoReset;
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LatchedRxStartFrm <=#Tp 0;
+  else
+  if(RxStartFrm & ~SyncRxStartFrm_q)
+    LatchedRxStartFrm <=#Tp 1;
+  else
+  if(SyncRxStartFrm_q)
+    LatchedRxStartFrm <=#Tp 0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    SyncRxStartFrm <=#Tp 0;
+  else
+  if(LatchedRxStartFrm)
+    SyncRxStartFrm <=#Tp 1;
+  else
+    SyncRxStartFrm <=#Tp 0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    SyncRxStartFrm_q <=#Tp 0;
+  else
+    SyncRxStartFrm_q <=#Tp SyncRxStartFrm;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    SyncRxStartFrm_q2 <=#Tp 0;
+  else
+    SyncRxStartFrm_q2 <=#Tp SyncRxStartFrm_q;
+end
+
+
+assign RxFifoReset = SyncRxStartFrm_q & ~SyncRxStartFrm_q2;
+
+
+eth_fifo #(`ETH_RX_FIFO_DATA_WIDTH, `ETH_RX_FIFO_DEPTH, `ETH_RX_FIFO_CNT_WIDTH)
+rx_fifo (.data_in(RxDataLatched2),                      .data_out(m_wb_dat_o), 
+         .clk(WB_CLK_I),                                .reset(Reset), 
+         .write(WriteRxDataToFifo_wb & ~RxBufferFull),  .read(MasterWbRX & m_wb_ack_i), 
+         .clear(RxFifoReset),                           .full(RxBufferFull), 
+         .almost_full(),                                .almost_empty(RxBufferAlmostEmpty), 
+         .empty(RxBufferEmpty),                         .cnt(rxfifo_cnt)
+        );
+
+assign enough_data_in_rxfifo_for_burst = rxfifo_cnt>=`ETH_BURST_LENGTH;
+assign enough_data_in_rxfifo_for_burst_plus1 = rxfifo_cnt>`ETH_BURST_LENGTH;
+assign WriteRxDataToMemory = ~RxBufferEmpty;
+assign rx_burst = rx_burst_en & WriteRxDataToMemory;
+
+
+// Generation of the end-of-frame signal
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ShiftEnded_rck <=#Tp 1'b0;
+  else
+  if(~RxAbort & SetWriteRxDataToFifo & StartShiftWillEnd)
+    ShiftEnded_rck <=#Tp 1'b1;
+  else
+  if(RxAbort | ShiftEndedSync_c1 & ShiftEndedSync_c2)
+    ShiftEnded_rck <=#Tp 1'b0;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ShiftEndedSync1 <=#Tp 1'b0;
+  else
+    ShiftEndedSync1 <=#Tp ShiftEnded_rck;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ShiftEndedSync2 <=#Tp 1'b0;
+  else
+    ShiftEndedSync2 <=#Tp ShiftEndedSync1;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ShiftEndedSync3 <=#Tp 1'b0;
+  else
+  if(ShiftEndedSync1 & ~ShiftEndedSync2)
+    ShiftEndedSync3 <=#Tp 1'b1;
+  else
+  if(ShiftEnded)
+    ShiftEndedSync3 <=#Tp 1'b0;
+end
+
+// Generation of the end-of-frame signal
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    ShiftEnded <=#Tp 1'b0;
+  else
+  if(ShiftEndedSync3 & MasterWbRX & m_wb_ack_i & RxBufferAlmostEmpty & ~ShiftEnded)
+    ShiftEnded <=#Tp 1'b1;
+  else
+  if(RxStatusWrite)
+    ShiftEnded <=#Tp 1'b0;
+end
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ShiftEndedSync_c1 <=#Tp 1'b0;
+  else
+    ShiftEndedSync_c1 <=#Tp ShiftEndedSync2;
+end
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ShiftEndedSync_c2 <=#Tp 1'b0;
+  else
+    ShiftEndedSync_c2 <=#Tp ShiftEndedSync_c1;
+end
+
+// Generation of the end-of-frame signal
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxEnableWindow <=#Tp 1'b0;
+  else
+  if(RxStartFrm)
+    RxEnableWindow <=#Tp 1'b1;
+  else
+  if(RxEndFrm | RxAbort)
+    RxEnableWindow <=#Tp 1'b0;
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSync1 <=#Tp 1'b0;
+  else
+    RxAbortSync1 <=#Tp RxAbortLatched;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSync2 <=#Tp 1'b0;
+  else
+    RxAbortSync2 <=#Tp RxAbortSync1;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSync3 <=#Tp 1'b0;
+  else
+    RxAbortSync3 <=#Tp RxAbortSync2;
+end
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSync4 <=#Tp 1'b0;
+  else
+    RxAbortSync4 <=#Tp RxAbortSync3;
+end
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSyncb1 <=#Tp 1'b0;
+  else
+    RxAbortSyncb1 <=#Tp RxAbortSync2;
+end
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxAbortSyncb2 <=#Tp 1'b0;
+  else
+    RxAbortSyncb2 <=#Tp RxAbortSyncb1;
+end
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxAbortLatched <=#Tp 1'b0;
+  else
+  if(RxAbortSyncb2)
+    RxAbortLatched <=#Tp 1'b0;
+  else
+  if(RxAbort)
+    RxAbortLatched <=#Tp 1'b1;
+end
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LatchedRxLength[15:0] <=#Tp 16'h0;
+  else
+  if(LoadRxStatus)
+    LatchedRxLength[15:0] <=#Tp RxLength[15:0];
+end
+
+
+assign RxStatusIn = {ReceivedPauseFrm, AddressMiss, RxOverrun, InvalidSymbol, DribbleNibble, ReceivedPacketTooBig, ShortFrame, LatchedCrcError, RxLateCollision};
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxStatusInLatched <=#Tp 'h0;
+  else
+  if(LoadRxStatus)
+    RxStatusInLatched <=#Tp RxStatusIn;
+end
+
+
+// Rx overrun
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxOverrun <=#Tp 1'b0;
+  else
+  if(RxStatusWrite)
+    RxOverrun <=#Tp 1'b0;
+  else
+  if(RxBufferFull & WriteRxDataToFifo_wb)
+    RxOverrun <=#Tp 1'b1;
+end
+
+
+
+wire TxError;
+assign TxError = TxUnderRun | RetryLimit | LateCollLatched | CarrierSenseLost;
+
+wire RxError;
+
+// ShortFrame (RxStatusInLatched[2]) can not set an error because short frames
+// are aborted when signal r_RecSmall is set to 0 in MODER register. 
+// AddressMiss is identifying that a frame was received because of the promiscous
+// mode and is not an error
+assign RxError = (|RxStatusInLatched[6:3]) | (|RxStatusInLatched[1:0]);
+
+
+
+reg RxStatusWriteLatched;
+reg RxStatusWriteLatched_sync1;
+reg RxStatusWriteLatched_sync2;
+reg RxStatusWriteLatched_syncb1;
+reg RxStatusWriteLatched_syncb2;
+
+
+// Latching and synchronizing RxStatusWrite signal. This signal is used for clearing the ReceivedPauseFrm signal
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxStatusWriteLatched <=#Tp 1'b0;
+  else
+  if(RxStatusWriteLatched_syncb2)
+    RxStatusWriteLatched <=#Tp 1'b0;        
+  else
+  if(RxStatusWrite)
+    RxStatusWriteLatched <=#Tp 1'b1;
+end
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxStatusWriteLatched_sync1 <=#Tp 1'b0;
+      RxStatusWriteLatched_sync2 <=#Tp 1'b0;
+    end
+  else
+    begin
+      RxStatusWriteLatched_sync1 <=#Tp RxStatusWriteLatched;
+      RxStatusWriteLatched_sync2 <=#Tp RxStatusWriteLatched_sync1;
+    end
+end
+
+
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    begin
+      RxStatusWriteLatched_syncb1 <=#Tp 1'b0;
+      RxStatusWriteLatched_syncb2 <=#Tp 1'b0;
+    end
+  else
+    begin
+      RxStatusWriteLatched_syncb1 <=#Tp RxStatusWriteLatched_sync2;
+      RxStatusWriteLatched_syncb2 <=#Tp RxStatusWriteLatched_syncb1;
+    end
+end
+
+
+
+// Tx Done Interrupt
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxB_IRQ <=#Tp 1'b0;
+  else
+  if(TxStatusWrite & TxIRQEn)
+    TxB_IRQ <=#Tp ~TxError;
+  else
+    TxB_IRQ <=#Tp 1'b0;
+end
+
+
+// Tx Error Interrupt
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    TxE_IRQ <=#Tp 1'b0;
+  else
+  if(TxStatusWrite & TxIRQEn)
+    TxE_IRQ <=#Tp TxError;
+  else
+    TxE_IRQ <=#Tp 1'b0;
+end
+
+
+// Rx Done Interrupt
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxB_IRQ <=#Tp 1'b0;
+  else
+  if(RxStatusWrite & RxIRQEn & ReceivedPacketGood & (~ReceivedPauseFrm | ReceivedPauseFrm & r_PassAll & (~r_RxFlow)))
+    RxB_IRQ <=#Tp (~RxError);
+  else
+    RxB_IRQ <=#Tp 1'b0;
+end
+
+
+// Rx Error Interrupt
+always @ (posedge WB_CLK_I or posedge Reset)
+begin
+  if(Reset)
+    RxE_IRQ <=#Tp 1'b0;
+  else
+  if(RxStatusWrite & RxIRQEn & (~ReceivedPauseFrm | ReceivedPauseFrm & r_PassAll & (~r_RxFlow)))
+    RxE_IRQ <=#Tp RxError;
+  else
+    RxE_IRQ <=#Tp 1'b0;
+end
+
+
+// Busy Interrupt
+
+reg Busy_IRQ_rck;
+reg Busy_IRQ_sync1;
+reg Busy_IRQ_sync2;
+reg Busy_IRQ_sync3;
+reg Busy_IRQ_syncb1;
+reg Busy_IRQ_syncb2;
+
+
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    Busy_IRQ_rck <=#Tp 1'b0;
+  else
+  if(RxValid & RxStartFrm & ~RxReady)
+    Busy_IRQ_rck <=#Tp 1'b1;
+  else
+  if(Busy_IRQ_syncb2)
+    Busy_IRQ_rck <=#Tp 1'b0;
+end
+
+always @ (posedge WB_CLK_I)
+begin
+    Busy_IRQ_sync1 <=#Tp Busy_IRQ_rck;
+    Busy_IRQ_sync2 <=#Tp Busy_IRQ_sync1;
+    Busy_IRQ_sync3 <=#Tp Busy_IRQ_sync2;
+end
+
+always @ (posedge MRxClk)
+begin
+    Busy_IRQ_syncb1 <=#Tp Busy_IRQ_sync2;
+    Busy_IRQ_syncb2 <=#Tp Busy_IRQ_syncb1;
+end
+
+assign Busy_IRQ = Busy_IRQ_sync2 & ~Busy_IRQ_sync3;
+
+
+         
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_txcounters.v
===================================================================
--- /trunk/OC-Ethernet/eth_txcounters.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_txcounters.v	(revision 6)
@@ -0,0 +1,221 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_txcounters.v                                            ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/04/22 14:54:14  mohor
+// FCS should not be included in NibbleMinFl.
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.4  2001/06/27 21:27:45  mohor
+// Few typos fixed.
+//
+// Revision 1.2  2001/06/19 10:38:07  mohor
+// Minor changes in header.
+//
+// Revision 1.1  2001/06/19 10:27:57  mohor
+// TxEthMAC initial release.
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_txcounters (StatePreamble, StateIPG, StateData, StatePAD, StateFCS, StateJam, 
+                       StateBackOff, StateDefer, StateIdle, StartDefer, StartIPG, StartFCS, 
+                       StartJam, StartBackoff, TxStartFrm, MTxClk, Reset, MinFL, MaxFL, HugEn, 
+                       ExDfrEn, PacketFinished_q, DlyCrcEn, StateSFD, ByteCnt, NibCnt, 
+                       ExcessiveDefer, NibCntEq7, NibCntEq15, MaxFrame, NibbleMinFl, DlyCrcCnt
+                      );
+
+parameter Tp = 1;
+
+input MTxClk;             // Tx clock
+input Reset;              // Reset
+input StatePreamble;      // Preamble state
+input StateIPG;           // IPG state
+input [1:0] StateData;    // Data state
+input StatePAD;           // PAD state
+input StateFCS;           // FCS state
+input StateJam;           // Jam state
+input StateBackOff;       // Backoff state
+input StateDefer;         // Defer state
+input StateIdle;          // Idle state
+input StateSFD;           // SFD state
+input StartDefer;         // Defer state will be activated in next clock
+input StartIPG;           // IPG state will be activated in next clock
+input StartFCS;           // FCS state will be activated in next clock
+input StartJam;           // Jam state will be activated in next clock
+input StartBackoff;       // Backoff state will be activated in next clock
+input TxStartFrm;         // Tx start frame
+input [15:0] MinFL;       // Minimum frame length (in bytes)
+input [15:0] MaxFL;       // Miximum frame length (in bytes)
+input HugEn;              // Pakets bigger then MaxFL enabled
+input ExDfrEn;            // Excessive deferral enabled
+input PacketFinished_q;             
+input DlyCrcEn;           // Delayed CRC enabled
+
+output [15:0] ByteCnt;    // Byte counter
+output [15:0] NibCnt;     // Nibble counter
+output ExcessiveDefer;    // Excessive Deferral occuring
+output NibCntEq7;         // Nibble counter is equal to 7
+output NibCntEq15;        // Nibble counter is equal to 15
+output MaxFrame;          // Maximum frame occured
+output NibbleMinFl;       // Nibble counter is greater than the minimum frame length
+output [2:0] DlyCrcCnt;   // Delayed CRC Count
+
+wire ExcessiveDeferCnt;
+wire ResetNibCnt;
+wire IncrementNibCnt;
+wire ResetByteCnt;
+wire IncrementByteCnt;
+wire ByteCntMax;
+
+reg [15:0] NibCnt;
+reg [15:0] ByteCnt;
+reg  [2:0] DlyCrcCnt;
+
+
+
+assign IncrementNibCnt = StateIPG | StatePreamble | (|StateData) | StatePAD 
+                       | StateFCS | StateJam | StateBackOff | StateDefer & ~ExcessiveDefer & TxStartFrm;
+
+
+assign ResetNibCnt = StateDefer & ExcessiveDefer & ~TxStartFrm | StatePreamble & NibCntEq15 
+                   | StateJam & NibCntEq7 | StateIdle | StartDefer | StartIPG | StartFCS | StartJam;
+
+// Nibble Counter
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    NibCnt <= #Tp 16'h0;
+  else
+    begin
+      if(ResetNibCnt)
+        NibCnt <= #Tp 16'h0;
+      else
+      if(IncrementNibCnt)
+        NibCnt <= #Tp NibCnt + 1'b1;
+     end
+end
+
+
+assign NibCntEq7   = &NibCnt[2:0];
+assign NibCntEq15  = &NibCnt[3:0];
+
+assign NibbleMinFl = NibCnt >= (((MinFL-3'h4)<<1) -1);  // FCS should not be included in NibbleMinFl
+
+assign ExcessiveDeferCnt = NibCnt[13:0] == 16'h17b7;
+
+assign ExcessiveDefer  = NibCnt[13:0] == 16'h17b7 & ~ExDfrEn;   // 6071 nibbles
+
+assign IncrementByteCnt = StateData[1] & ~ByteCntMax
+                        | StateBackOff & (&NibCnt[6:0])
+                        | (StatePAD | StateFCS) & NibCnt[0] & ~ByteCntMax;
+
+assign ResetByteCnt = StartBackoff | StateIdle & TxStartFrm | PacketFinished_q;
+
+
+// Transmit Byte Counter
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    ByteCnt[15:0] <= #Tp 16'h0;
+  else
+    begin
+      if(ResetByteCnt)
+        ByteCnt[15:0] <= #Tp 16'h0;
+      else
+      if(IncrementByteCnt)
+        ByteCnt[15:0] <= #Tp ByteCnt[15:0] + 1'b1;
+    end
+end
+
+
+assign MaxFrame = ByteCnt[15:0] == MaxFL[15:0] & ~HugEn;
+
+assign ByteCntMax = &ByteCnt[15:0];
+
+
+// Delayed CRC counter
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    DlyCrcCnt <= #Tp 3'h0;
+  else
+    begin        
+      if(StateData[1] & DlyCrcCnt == 3'h4 | StartJam | PacketFinished_q)
+        DlyCrcCnt <= #Tp 3'h0;
+      else
+      if(DlyCrcEn & (StateSFD | StateData[1] & (|DlyCrcCnt[2:0])))
+        DlyCrcCnt <= #Tp DlyCrcCnt + 1'b1;
+    end
+end
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_random.v
===================================================================
--- /trunk/OC-Ethernet/eth_random.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_random.v	(revision 6)
@@ -0,0 +1,141 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_random.v                                                ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/19 18:16:40  mohor
+// TxClk changed to MTxClk (as discribed in the documentation).
+// Crc changed so only one file can be used instead of two.
+//
+// Revision 1.2  2001/06/19 10:38:07  mohor
+// Minor changes in header.
+//
+// Revision 1.1  2001/06/19 10:27:57  mohor
+// TxEthMAC initial release.
+//
+//
+//
+//
+
+`include "timescale.v"
+
+module eth_random (MTxClk, Reset, StateJam, StateJam_q, RetryCnt, NibCnt, ByteCnt, 
+                   RandomEq0, RandomEqByteCnt);
+
+parameter Tp = 1;
+
+input MTxClk;
+input Reset;
+input StateJam;
+input StateJam_q;
+input [3:0] RetryCnt;
+input [15:0] NibCnt;
+input [9:0] ByteCnt;
+output RandomEq0;
+output RandomEqByteCnt;
+
+wire Feedback;
+reg [9:0] x;
+wire [9:0] Random;
+reg  [9:0] RandomLatched;
+
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    x[9:0] <= #Tp 0;
+  else
+    x[9:0] <= #Tp {x[8:0], Feedback};
+end
+
+assign Feedback = ~(x[2] ^ x[9]);
+
+assign Random [0] = x[0];
+assign Random [1] = (RetryCnt > 1) ? x[1] : 1'b0;
+assign Random [2] = (RetryCnt > 2) ? x[2] : 1'b0;
+assign Random [3] = (RetryCnt > 3) ? x[3] : 1'b0;
+assign Random [4] = (RetryCnt > 4) ? x[4] : 1'b0;
+assign Random [5] = (RetryCnt > 5) ? x[5] : 1'b0;
+assign Random [6] = (RetryCnt > 6) ? x[6] : 1'b0;
+assign Random [7] = (RetryCnt > 7) ? x[7] : 1'b0;
+assign Random [8] = (RetryCnt > 8) ? x[8] : 1'b0;
+assign Random [9] = (RetryCnt > 9) ? x[9] : 1'b0;
+
+
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    RandomLatched <= #Tp 10'h000;
+  else
+    begin
+      if(StateJam & StateJam_q)
+        RandomLatched <= #Tp Random;
+    end
+end
+
+// Random Number == 0      IEEE 802.3 page 68. If 0 we go to defer and not to backoff.
+assign RandomEq0 = RandomLatched == 10'h0; 
+
+assign RandomEqByteCnt = ByteCnt[9:0] == RandomLatched & (&NibCnt[6:0]);
+
+endmodule
Index: /trunk/OC-Ethernet/eth_cop.v
===================================================================
--- /trunk/OC-Ethernet/eth_cop.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_cop.v	(revision 6)
@@ -0,0 +1,388 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_cop.v                                                   ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2002/10/10 16:43:59  mohor
+// Minor $display change.
+//
+// Revision 1.2  2002/09/09 12:54:13  mohor
+// error acknowledge cycle termination added to display.
+//
+// Revision 1.1  2002/08/14 17:16:07  mohor
+// Traffic cop with 2 wishbone master interfaces and 2 wishbona slave
+// interfaces:
+// - Host connects to the master interface
+// - Ethernet master (DMA) connects to the second master interface
+// - Memory interface connects to the slave interface
+// - Ethernet slave interface (access to registers and BDs) connects to second
+//   slave interface
+//
+//
+//
+//
+//
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+module eth_cop
+(
+  // WISHBONE common
+  wb_clk_i, wb_rst_i, 
+  
+  // WISHBONE MASTER 1
+  m1_wb_adr_i, m1_wb_sel_i, m1_wb_we_i,  m1_wb_dat_o, 
+  m1_wb_dat_i, m1_wb_cyc_i, m1_wb_stb_i, m1_wb_ack_o, 
+  m1_wb_err_o, 
+
+  // WISHBONE MASTER 2
+  m2_wb_adr_i, m2_wb_sel_i, m2_wb_we_i,  m2_wb_dat_o, 
+  m2_wb_dat_i, m2_wb_cyc_i, m2_wb_stb_i, m2_wb_ack_o, 
+  m2_wb_err_o, 
+
+  // WISHBONE slave 1
+ 	s1_wb_adr_o, s1_wb_sel_o, s1_wb_we_o,  s1_wb_cyc_o, 
+ 	s1_wb_stb_o, s1_wb_ack_i, s1_wb_err_i, s1_wb_dat_i,
+ 	s1_wb_dat_o, 
+ 	
+  // WISHBONE slave 2
+ 	s2_wb_adr_o, s2_wb_sel_o, s2_wb_we_o,  s2_wb_cyc_o, 
+ 	s2_wb_stb_o, s2_wb_ack_i, s2_wb_err_i, s2_wb_dat_i,
+ 	s2_wb_dat_o
+);
+
+parameter Tp=1;
+
+// WISHBONE common
+input wb_clk_i, wb_rst_i;
+  
+// WISHBONE MASTER 1
+input  [31:0] m1_wb_adr_i, m1_wb_dat_i;
+input   [3:0] m1_wb_sel_i;
+input         m1_wb_cyc_i, m1_wb_stb_i, m1_wb_we_i;
+output [31:0] m1_wb_dat_o;
+output        m1_wb_ack_o, m1_wb_err_o;
+
+// WISHBONE MASTER 2
+input  [31:0] m2_wb_adr_i, m2_wb_dat_i;
+input   [3:0] m2_wb_sel_i;
+input         m2_wb_cyc_i, m2_wb_stb_i, m2_wb_we_i;
+output [31:0] m2_wb_dat_o;
+output        m2_wb_ack_o, m2_wb_err_o;
+
+// WISHBONE slave 1
+input  [31:0] s1_wb_dat_i;
+input         s1_wb_ack_i, s1_wb_err_i;
+output [31:0] s1_wb_adr_o, s1_wb_dat_o;
+output  [3:0] s1_wb_sel_o;
+output        s1_wb_we_o,  s1_wb_cyc_o, s1_wb_stb_o;
+ 	
+// WISHBONE slave 2
+input  [31:0] s2_wb_dat_i;
+input         s2_wb_ack_i, s2_wb_err_i;
+output [31:0] s2_wb_adr_o, s2_wb_dat_o;
+output  [3:0] s2_wb_sel_o;
+output        s2_wb_we_o,  s2_wb_cyc_o, s2_wb_stb_o;
+
+reg           m1_in_progress;
+reg           m2_in_progress;
+reg    [31:0] s1_wb_adr_o;
+reg     [3:0] s1_wb_sel_o;
+reg           s1_wb_we_o;
+reg    [31:0] s1_wb_dat_o;
+reg           s1_wb_cyc_o;
+reg           s1_wb_stb_o;
+reg    [31:0] s2_wb_adr_o;
+reg     [3:0] s2_wb_sel_o;
+reg           s2_wb_we_o;
+reg    [31:0] s2_wb_dat_o;
+reg           s2_wb_cyc_o;
+reg           s2_wb_stb_o;
+
+reg           m1_wb_ack_o;
+reg    [31:0] m1_wb_dat_o;
+reg           m2_wb_ack_o;
+reg    [31:0] m2_wb_dat_o;
+
+reg           m1_wb_err_o;
+reg           m2_wb_err_o;
+
+wire m_wb_access_finished;
+wire m1_req = m1_wb_cyc_i & m1_wb_stb_i & (`M1_ADDRESSED_S1 | `M1_ADDRESSED_S2);
+wire m2_req = m2_wb_cyc_i & m2_wb_stb_i & (`M2_ADDRESSED_S1 | `M2_ADDRESSED_S2);
+
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      m1_in_progress <=#Tp 0;
+      m2_in_progress <=#Tp 0;
+      s1_wb_adr_o    <=#Tp 0;
+      s1_wb_sel_o    <=#Tp 0;
+      s1_wb_we_o     <=#Tp 0;
+      s1_wb_dat_o    <=#Tp 0;
+      s1_wb_cyc_o    <=#Tp 0;
+      s1_wb_stb_o    <=#Tp 0;
+      s2_wb_adr_o    <=#Tp 0;
+      s2_wb_sel_o    <=#Tp 0;
+      s2_wb_we_o     <=#Tp 0;
+      s2_wb_dat_o    <=#Tp 0;
+      s2_wb_cyc_o    <=#Tp 0;
+      s2_wb_stb_o    <=#Tp 0;
+    end
+  else
+    begin
+      case({m1_in_progress, m2_in_progress, m1_req, m2_req, m_wb_access_finished})  // synopsys_full_case synopsys_paralel_case
+        5'b00_10_0, 5'b00_11_0 :
+          begin
+            m1_in_progress <=#Tp 1'b1;  // idle: m1 or (m1 & m2) want access: m1 -> m
+            if(`M1_ADDRESSED_S1)
+              begin
+                s1_wb_adr_o <=#Tp m1_wb_adr_i;
+                s1_wb_sel_o <=#Tp m1_wb_sel_i;
+                s1_wb_we_o  <=#Tp m1_wb_we_i;
+                s1_wb_dat_o <=#Tp m1_wb_dat_i;
+                s1_wb_cyc_o <=#Tp 1'b1;
+                s1_wb_stb_o <=#Tp 1'b1;
+              end
+            else if(`M1_ADDRESSED_S2)
+              begin
+                s2_wb_adr_o <=#Tp m1_wb_adr_i;
+                s2_wb_sel_o <=#Tp m1_wb_sel_i;
+                s2_wb_we_o  <=#Tp m1_wb_we_i;
+                s2_wb_dat_o <=#Tp m1_wb_dat_i;
+                s2_wb_cyc_o <=#Tp 1'b1;
+                s2_wb_stb_o <=#Tp 1'b1;
+              end
+            else
+              $display("(%t)(%m)WISHBONE ERROR: Unspecified address space accessed", $time);
+          end
+        5'b00_01_0 :
+          begin
+            m2_in_progress <=#Tp 1'b1;  // idle: m2 wants access: m2 -> m
+            if(`M2_ADDRESSED_S1)
+              begin
+                s1_wb_adr_o <=#Tp m2_wb_adr_i;
+                s1_wb_sel_o <=#Tp m2_wb_sel_i;
+                s1_wb_we_o  <=#Tp m2_wb_we_i;
+                s1_wb_dat_o <=#Tp m2_wb_dat_i;
+                s1_wb_cyc_o <=#Tp 1'b1;
+                s1_wb_stb_o <=#Tp 1'b1;
+              end
+            else if(`M2_ADDRESSED_S2)
+              begin
+                s2_wb_adr_o <=#Tp m2_wb_adr_i;
+                s2_wb_sel_o <=#Tp m2_wb_sel_i;
+                s2_wb_we_o  <=#Tp m2_wb_we_i;
+                s2_wb_dat_o <=#Tp m2_wb_dat_i;
+                s2_wb_cyc_o <=#Tp 1'b1;
+                s2_wb_stb_o <=#Tp 1'b1;
+              end
+            else
+              $display("(%t)(%m)WISHBONE ERROR: Unspecified address space accessed", $time);
+          end
+        5'b10_10_1, 5'b10_11_1 :
+          begin
+            m1_in_progress <=#Tp 1'b0;  // m1 in progress. Cycle is finished. Send ack or err to m1.
+            if(`M1_ADDRESSED_S1)
+              begin
+                s1_wb_cyc_o <=#Tp 1'b0;
+                s1_wb_stb_o <=#Tp 1'b0;
+              end
+            else if(`M1_ADDRESSED_S2)
+              begin
+                s2_wb_cyc_o <=#Tp 1'b0;
+                s2_wb_stb_o <=#Tp 1'b0;
+              end
+          end
+        5'b01_01_1, 5'b01_11_1 :
+          begin
+            m2_in_progress <=#Tp 1'b0;  // m2 in progress. Cycle is finished. Send ack or err to m2.
+            if(`M2_ADDRESSED_S1)
+              begin
+                s1_wb_cyc_o <=#Tp 1'b0;
+                s1_wb_stb_o <=#Tp 1'b0;
+              end
+            else if(`M2_ADDRESSED_S2)
+              begin
+                s2_wb_cyc_o <=#Tp 1'b0;
+                s2_wb_stb_o <=#Tp 1'b0;
+              end
+          end
+      endcase
+    end
+end
+
+// Generating Ack for master 1
+always @ (m1_in_progress or m1_wb_adr_i or s1_wb_ack_i or s2_wb_ack_i or s1_wb_dat_i or s2_wb_dat_i or `M1_ADDRESSED_S1 or `M1_ADDRESSED_S2)
+begin
+  if(m1_in_progress)
+    begin
+      if(`M1_ADDRESSED_S1) begin
+        m1_wb_ack_o <= s1_wb_ack_i;
+        m1_wb_dat_o <= s1_wb_dat_i;
+      end
+      else if(`M1_ADDRESSED_S2) begin
+        m1_wb_ack_o <= s2_wb_ack_i;
+        m1_wb_dat_o <= s2_wb_dat_i;
+      end
+    end
+  else
+    m1_wb_ack_o <= 0;
+end
+
+
+// Generating Ack for master 2
+always @ (m2_in_progress or m2_wb_adr_i or s1_wb_ack_i or s2_wb_ack_i or s1_wb_dat_i or s2_wb_dat_i or `M2_ADDRESSED_S1 or `M2_ADDRESSED_S2)
+begin
+  if(m2_in_progress)
+    begin
+      if(`M2_ADDRESSED_S1) begin
+        m2_wb_ack_o <= s1_wb_ack_i;
+        m2_wb_dat_o <= s1_wb_dat_i;
+      end
+      else if(`M2_ADDRESSED_S2) begin
+        m2_wb_ack_o <= s2_wb_ack_i;
+        m2_wb_dat_o <= s2_wb_dat_i;
+      end
+    end
+  else
+    m2_wb_ack_o <= 0;
+end
+
+
+// Generating Err for master 1
+always @ (m1_in_progress or m1_wb_adr_i or s1_wb_err_i or s2_wb_err_i or `M2_ADDRESSED_S1 or `M2_ADDRESSED_S2 or
+          m1_wb_cyc_i or m1_wb_stb_i)
+begin
+  if(m1_in_progress)  begin
+    if(`M1_ADDRESSED_S1)
+      m1_wb_err_o <= s1_wb_err_i;
+    else if(`M1_ADDRESSED_S2)
+      m1_wb_err_o <= s2_wb_err_i;
+  end
+  else if(m1_wb_cyc_i & m1_wb_stb_i & ~`M1_ADDRESSED_S1 & ~`M1_ADDRESSED_S2)
+    m1_wb_err_o <= 1'b1;
+  else
+    m1_wb_err_o <= 1'b0;
+end
+
+
+// Generating Err for master 2
+always @ (m2_in_progress or m2_wb_adr_i or s1_wb_err_i or s2_wb_err_i or `M2_ADDRESSED_S1 or `M2_ADDRESSED_S2 or
+          m2_wb_cyc_i or m2_wb_stb_i)
+begin
+  if(m2_in_progress)  begin
+    if(`M2_ADDRESSED_S1)
+      m2_wb_err_o <= s1_wb_err_i;
+    else if(`M2_ADDRESSED_S2)
+      m2_wb_err_o <= s2_wb_err_i;
+  end
+  else if(m2_wb_cyc_i & m2_wb_stb_i & ~`M2_ADDRESSED_S1 & ~`M2_ADDRESSED_S2)
+    m2_wb_err_o <= 1'b1;
+  else
+    m2_wb_err_o <= 1'b0;
+end
+
+
+assign m_wb_access_finished = m1_wb_ack_o | m1_wb_err_o | m2_wb_ack_o | m2_wb_err_o;
+
+
+// Activity monitor
+integer cnt;
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    cnt <=#Tp 0;
+  else
+  if(s1_wb_ack_i | s1_wb_err_i | s2_wb_ack_i | s2_wb_err_i)
+    cnt <=#Tp 0;
+  else
+  if(s1_wb_cyc_o | s2_wb_cyc_o)
+    cnt <=#Tp cnt+1;
+end
+
+always @ (posedge wb_clk_i)
+begin
+  if(cnt==1000) begin
+    $display("(%0t)(%m) ERROR: WB activity ??? ", $time);
+    if(s1_wb_cyc_o) begin
+      $display("s1_wb_dat_o = 0x%0x", s1_wb_dat_o);
+      $display("s1_wb_adr_o = 0x%0x", s1_wb_adr_o);
+      $display("s1_wb_sel_o = 0x%0x", s1_wb_sel_o);
+      $display("s1_wb_we_o = 0x%0x", s1_wb_we_o);
+    end
+    else if(s2_wb_cyc_o) begin
+      $display("s2_wb_dat_o = 0x%0x", s2_wb_dat_o);
+      $display("s2_wb_adr_o = 0x%0x", s2_wb_adr_o);
+      $display("s2_wb_sel_o = 0x%0x", s2_wb_sel_o);
+      $display("s2_wb_we_o = 0x%0x", s2_wb_we_o);
+    end
+
+    $stop;
+  end
+end
+
+
+always @ (posedge wb_clk_i)
+begin
+  if(s1_wb_err_i & s1_wb_cyc_o) begin
+    $display("(%0t) ERROR: WB cycle finished with error acknowledge ", $time);
+    $display("s1_wb_dat_o = 0x%0x", s1_wb_dat_o);
+    $display("s1_wb_adr_o = 0x%0x", s1_wb_adr_o);
+    $display("s1_wb_sel_o = 0x%0x", s1_wb_sel_o);
+    $display("s1_wb_we_o = 0x%0x", s1_wb_we_o);
+    $stop;
+  end
+  if(s2_wb_err_i & s2_wb_cyc_o) begin
+    $display("(%0t) ERROR: WB cycle finished with error acknowledge ", $time);
+    $display("s2_wb_dat_o = 0x%0x", s2_wb_dat_o);
+    $display("s2_wb_adr_o = 0x%0x", s2_wb_adr_o);
+    $display("s2_wb_sel_o = 0x%0x", s2_wb_sel_o);
+    $display("s2_wb_we_o = 0x%0x", s2_wb_we_o);
+    $stop;
+  end
+end
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_rxaddrcheck.v
===================================================================
--- /trunk/OC-Ethernet/eth_rxaddrcheck.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_rxaddrcheck.v	(revision 6)
@@ -0,0 +1,207 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_rxaddrcheck.v                                           ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/cores/ethmac/                      ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Bill Dittenhofer (billditt@aol.com)                   ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.8  2002/11/19 17:34:52  mohor
+// AddressMiss status is connecting to the Rx BD. AddressMiss is identifying
+// that a frame was received because of the promiscous mode.
+//
+// Revision 1.7  2002/09/04 18:41:06  mohor
+// Bug when last byte of destination address was not checked fixed.
+//
+// Revision 1.6  2002/03/20 15:14:11  mohor
+// When in promiscous mode some frames were not received correctly. Fixed.
+//
+// Revision 1.5  2002/03/02 21:06:32  mohor
+// Log info was missing.
+//
+//
+// Revision 1.1  2002/02/08 12:51:54  ditt
+// Initial release of the ethernet addresscheck module.
+//
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_rxaddrcheck(MRxClk,  Reset, RxData, Broadcast ,r_Bro ,r_Pro,
+                       ByteCntEq2, ByteCntEq3, ByteCntEq4, ByteCntEq5,
+                       ByteCntEq6, ByteCntEq7, HASH0, HASH1, 
+                       CrcHash,    CrcHashGood, StateData, RxEndFrm,
+                       Multicast, MAC, RxAbort, AddressMiss, PassAll,
+                       ControlFrmAddressOK
+                      );
+
+parameter Tp = 1;
+
+  input        MRxClk; 
+  input        Reset; 
+  input [7:0]  RxData; 
+  input        Broadcast; 
+  input        r_Bro; 
+  input        r_Pro; 
+  input        ByteCntEq2;
+  input        ByteCntEq3;
+  input        ByteCntEq4;
+  input        ByteCntEq5;
+  input        ByteCntEq6;
+  input        ByteCntEq7;
+  input [31:0] HASH0; 
+  input [31:0] HASH1; 
+  input [5:0]  CrcHash; 
+  input        CrcHashGood; 
+  input        Multicast; 
+  input [47:0] MAC;
+  input [1:0]  StateData;
+  input        RxEndFrm;
+  input        PassAll;
+  input        ControlFrmAddressOK;
+  
+  output       RxAbort;
+  output       AddressMiss;
+
+ wire BroadcastOK;
+ wire ByteCntEq2;
+ wire ByteCntEq3;
+ wire ByteCntEq4; 
+ wire ByteCntEq5;
+ wire RxAddressInvalid;
+ wire RxCheckEn;
+ wire HashBit;
+ wire [31:0] IntHash;
+ reg [7:0]  ByteHash;
+ reg MulticastOK;
+ reg UnicastOK;
+ reg RxAbort;
+ reg AddressMiss;
+ 
+assign RxAddressInvalid = ~(UnicastOK | BroadcastOK | MulticastOK | r_Pro);
+ 
+assign BroadcastOK = Broadcast & ~r_Bro;
+ 
+assign RxCheckEn   = | StateData;
+ 
+ // Address Error Reported at end of address cycle
+ // RxAbort clears after one cycle
+ 
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxAbort <= #Tp 1'b0;
+  else if(RxAddressInvalid & ByteCntEq7 & RxCheckEn)
+    RxAbort <= #Tp 1'b1;
+  else
+    RxAbort <= #Tp 1'b0;
+end
+ 
+
+// This ff holds the "Address Miss" information that is written to the RX BD status.
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    AddressMiss <= #Tp 1'b0;
+  else if(ByteCntEq7 & RxCheckEn)
+    AddressMiss <= #Tp (~(UnicastOK | BroadcastOK | MulticastOK | (PassAll & ControlFrmAddressOK)));
+end
+
+
+// Hash Address Check, Multicast
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    MulticastOK <= #Tp 1'b0;
+  else if(RxEndFrm | RxAbort)
+    MulticastOK <= #Tp 1'b0;
+  else if(CrcHashGood & Multicast)
+    MulticastOK <= #Tp HashBit;
+end
+ 
+ 
+// Address Detection (unicast)
+// start with ByteCntEq2 due to delay of addres from RxData
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    UnicastOK <= #Tp 1'b0;
+  else
+  if(RxCheckEn & ByteCntEq2)
+    UnicastOK <= #Tp   RxData[7:0] == MAC[47:40];
+  else
+  if(RxCheckEn & ByteCntEq3)
+    UnicastOK <= #Tp ( RxData[7:0] == MAC[39:32]) & UnicastOK;
+  else
+  if(RxCheckEn & ByteCntEq4)
+    UnicastOK <= #Tp ( RxData[7:0] == MAC[31:24]) & UnicastOK;
+  else
+  if(RxCheckEn & ByteCntEq5)
+    UnicastOK <= #Tp ( RxData[7:0] == MAC[23:16]) & UnicastOK;
+  else
+  if(RxCheckEn & ByteCntEq6)
+    UnicastOK <= #Tp ( RxData[7:0] == MAC[15:8])  & UnicastOK;
+  else
+  if(RxCheckEn & ByteCntEq7)
+    UnicastOK <= #Tp ( RxData[7:0] == MAC[7:0])   & UnicastOK;
+  else
+  if(RxEndFrm | RxAbort)
+    UnicastOK <= #Tp 1'b0;
+end
+   
+assign IntHash = (CrcHash[5])? HASH1 : HASH0;
+  
+always@(CrcHash or IntHash)
+begin
+  case(CrcHash[4:3])
+    2'b00: ByteHash = IntHash[7:0];
+    2'b01: ByteHash = IntHash[15:8];
+    2'b10: ByteHash = IntHash[23:16];
+    2'b11: ByteHash = IntHash[31:24];
+  endcase
+end
+      
+assign HashBit = ByteHash[CrcHash[2:0]];
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_fifo.v
===================================================================
--- /trunk/OC-Ethernet/eth_fifo.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_fifo.v	(revision 6)
@@ -0,0 +1,186 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_fifo.v                                                  ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2002/04/22 13:45:52  mohor
+// Generic ram or Xilinx ram can be used in fifo (selectable by setting
+// ETH_FIFO_XILINX in eth_defines.v).
+//
+// Revision 1.2  2002/03/25 13:33:04  mohor
+// When clear and read/write are active at the same time, cnt and pointers are
+// set to 1.
+//
+// Revision 1.1  2002/02/05 16:44:39  mohor
+// Both rx and tx part are finished. Tested with wb_clk_i between 10 and 200
+// MHz. Statuses, overrun, control frame transmission and reception still  need
+// to be fixed.
+//
+//
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+module eth_fifo (data_in, data_out, clk, reset, write, read, clear, almost_full, full, almost_empty, empty, cnt);
+
+parameter DATA_WIDTH    = 32;
+parameter DEPTH         = 8;
+parameter CNT_WIDTH     = 4;
+
+parameter Tp            = 1;
+
+input                     clk;
+input                     reset;
+input                     write;
+input                     read;
+input                     clear;
+input   [DATA_WIDTH-1:0]  data_in;
+
+output  [DATA_WIDTH-1:0]  data_out;
+output                    almost_full;
+output                    full;
+output                    almost_empty;
+output                    empty;
+output  [CNT_WIDTH-1:0]   cnt;
+
+`ifdef ETH_FIFO_XILINX
+`else
+  `ifdef ETH_ALTERA_ALTSYNCRAM
+  `else
+    reg     [DATA_WIDTH-1:0]  fifo  [0:DEPTH-1];
+    reg     [DATA_WIDTH-1:0]  data_out;
+  `endif
+`endif
+
+reg     [CNT_WIDTH-1:0]   cnt;
+reg     [CNT_WIDTH-2:0]   read_pointer;
+reg     [CNT_WIDTH-2:0]   write_pointer;
+
+
+always @ (posedge clk or posedge reset)
+begin
+  if(reset)
+    cnt <=#Tp 0;
+  else
+  if(clear)
+    cnt <=#Tp { {(CNT_WIDTH-1){1'b0}}, read^write};
+  else
+  if(read ^ write)
+    if(read)
+      cnt <=#Tp cnt - 1'b1;
+    else
+      cnt <=#Tp cnt + 1'b1;
+end
+
+always @ (posedge clk or posedge reset)
+begin
+  if(reset)
+    read_pointer <=#Tp 0;
+  else
+  if(clear)
+    read_pointer <=#Tp { {(CNT_WIDTH-2){1'b0}}, read};
+  else
+  if(read & ~empty)
+    read_pointer <=#Tp read_pointer + 1'b1;
+end
+
+always @ (posedge clk or posedge reset)
+begin
+  if(reset)
+    write_pointer <=#Tp 0;
+  else
+  if(clear)
+    write_pointer <=#Tp { {(CNT_WIDTH-2){1'b0}}, write};
+  else
+  if(write & ~full)
+    write_pointer <=#Tp write_pointer + 1'b1;
+end
+
+assign empty = ~(|cnt);
+assign almost_empty = cnt == 1;
+assign full  = cnt == DEPTH;
+assign almost_full  = &cnt[CNT_WIDTH-2:0];
+
+
+
+`ifdef ETH_FIFO_XILINX
+  xilinx_dist_ram_16x32 fifo
+  ( .data_out(data_out), 
+    .we(write & ~full),
+    .data_in(data_in),
+    .read_address( clear ? {CNT_WIDTH-1{1'b0}} : read_pointer),
+    .write_address(clear ? {CNT_WIDTH-1{1'b0}} : write_pointer),
+    .wclk(clk)
+  );
+`else   // !ETH_FIFO_XILINX
+`ifdef ETH_ALTERA_ALTSYNCRAM
+  altera_dpram_16x32	altera_dpram_16x32_inst
+  (
+  	.data             (data_in),
+  	.wren             (write & ~full),
+  	.wraddress        (clear ? {CNT_WIDTH-1{1'b0}} : write_pointer),
+  	.rdaddress        (clear ? {CNT_WIDTH-1{1'b0}} : read_pointer ),
+  	.clock            (clk),
+  	.q                (data_out)
+  );  //exemplar attribute altera_dpram_16x32_inst NOOPT TRUE
+`else   // !ETH_ALTERA_ALTSYNCRAM
+  always @ (posedge clk)
+  begin
+    if(write & clear)
+      fifo[0] <=#Tp data_in;
+    else
+   if(write & ~full)
+      fifo[write_pointer] <=#Tp data_in;
+  end
+  
+
+  always @ (posedge clk)
+  begin
+    if(clear)
+      data_out <=#Tp fifo[0];
+    else
+      data_out <=#Tp fifo[read_pointer];
+  end
+`endif  // !ETH_ALTERA_ALTSYNCRAM
+`endif  // !ETH_FIFO_XILINX
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_receivecontrol.v
===================================================================
--- /trunk/OC-Ethernet/eth_receivecontrol.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_receivecontrol.v	(revision 6)
@@ -0,0 +1,438 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_receivecontrol.v                                        ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.4  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.1  2001/07/03 12:51:54  mohor
+// Initial release of the MAC Control module.
+//
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_receivecontrol (MTxClk, MRxClk, TxReset, RxReset, RxData, RxValid, RxStartFrm, 
+                           RxEndFrm, RxFlow, ReceiveEnd, MAC, DlyCrcEn, TxDoneIn, 
+                           TxAbortIn, TxStartFrmOut, ReceivedLengthOK, ReceivedPacketGood, 
+                           TxUsedDataOutDetected, Pause, ReceivedPauseFrm, AddressOK, 
+                           RxStatusWriteLatched_sync2, r_PassAll, SetPauseTimer
+                          );
+
+parameter Tp = 1;
+
+
+input       MTxClk;
+input       MRxClk;
+input       TxReset; 
+input       RxReset; 
+input [7:0] RxData;
+input       RxValid;
+input       RxStartFrm;
+input       RxEndFrm;
+input       RxFlow;
+input       ReceiveEnd;
+input [47:0]MAC;
+input       DlyCrcEn;
+input       TxDoneIn;
+input       TxAbortIn;
+input       TxStartFrmOut;
+input       ReceivedLengthOK;
+input       ReceivedPacketGood;
+input       TxUsedDataOutDetected;
+input       RxStatusWriteLatched_sync2;
+input       r_PassAll;
+
+output      Pause;
+output      ReceivedPauseFrm;
+output      AddressOK;
+output      SetPauseTimer;
+
+
+reg         Pause;
+reg         AddressOK;                // Multicast or unicast address detected
+reg         TypeLengthOK;             // Type/Length field contains 0x8808
+reg         DetectionWindow;          // Detection of the PAUSE frame is possible within this window
+reg         OpCodeOK;                 // PAUSE opcode detected (0x0001)
+reg  [2:0]  DlyCrcCnt;
+reg  [4:0]  ByteCnt;
+reg [15:0]  AssembledTimerValue;
+reg [15:0]  LatchedTimerValue;
+reg         ReceivedPauseFrm;
+reg         ReceivedPauseFrmWAddr;
+reg         PauseTimerEq0_sync1;
+reg         PauseTimerEq0_sync2;
+reg [15:0]  PauseTimer;
+reg         Divider2;
+reg  [5:0]  SlotTimer;
+
+wire [47:0] ReservedMulticast;        // 0x0180C2000001
+wire [15:0] TypeLength;               // 0x8808
+wire        ResetByteCnt;             // 
+wire        IncrementByteCnt;         // 
+wire        ByteCntEq0;               // ByteCnt = 0
+wire        ByteCntEq1;               // ByteCnt = 1
+wire        ByteCntEq2;               // ByteCnt = 2
+wire        ByteCntEq3;               // ByteCnt = 3
+wire        ByteCntEq4;               // ByteCnt = 4
+wire        ByteCntEq5;               // ByteCnt = 5
+wire        ByteCntEq12;              // ByteCnt = 12
+wire        ByteCntEq13;              // ByteCnt = 13
+wire        ByteCntEq14;              // ByteCnt = 14
+wire        ByteCntEq15;              // ByteCnt = 15
+wire        ByteCntEq16;              // ByteCnt = 16
+wire        ByteCntEq17;              // ByteCnt = 17
+wire        ByteCntEq18;              // ByteCnt = 18
+wire        DecrementPauseTimer;      // 
+wire        PauseTimerEq0;            // 
+wire        ResetSlotTimer;           // 
+wire        IncrementSlotTimer;       // 
+wire        SlotFinished;             // 
+
+
+
+// Reserved multicast address and Type/Length for PAUSE control
+assign ReservedMulticast = 48'h0180C2000001;
+assign TypeLength = 16'h8808;
+
+
+// Address Detection (Multicast or unicast)
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    AddressOK <= #Tp 1'b0;
+  else
+  if(DetectionWindow & ByteCntEq0)
+    AddressOK <= #Tp  RxData[7:0] == ReservedMulticast[47:40] | RxData[7:0] == MAC[47:40];
+  else
+  if(DetectionWindow & ByteCntEq1)
+    AddressOK <= #Tp (RxData[7:0] == ReservedMulticast[39:32] | RxData[7:0] == MAC[39:32]) & AddressOK;
+  else
+  if(DetectionWindow & ByteCntEq2)
+    AddressOK <= #Tp (RxData[7:0] == ReservedMulticast[31:24] | RxData[7:0] == MAC[31:24]) & AddressOK;
+  else
+  if(DetectionWindow & ByteCntEq3)
+    AddressOK <= #Tp (RxData[7:0] == ReservedMulticast[23:16] | RxData[7:0] == MAC[23:16]) & AddressOK;
+  else
+  if(DetectionWindow & ByteCntEq4)
+    AddressOK <= #Tp (RxData[7:0] == ReservedMulticast[15:8]  | RxData[7:0] == MAC[15:8])  & AddressOK;
+  else
+  if(DetectionWindow & ByteCntEq5)
+    AddressOK <= #Tp (RxData[7:0] == ReservedMulticast[7:0]   | RxData[7:0] == MAC[7:0])   & AddressOK;
+  else
+  if(ReceiveEnd)
+    AddressOK <= #Tp 1'b0;
+end
+
+
+
+// TypeLengthOK (Type/Length Control frame detected)
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    TypeLengthOK <= #Tp 1'b0;
+  else
+  if(DetectionWindow & ByteCntEq12)
+    TypeLengthOK <= #Tp ByteCntEq12 & (RxData[7:0] == TypeLength[15:8]);
+  else
+  if(DetectionWindow & ByteCntEq13)
+    TypeLengthOK <= #Tp ByteCntEq13 & (RxData[7:0] == TypeLength[7:0]) & TypeLengthOK;
+  else
+  if(ReceiveEnd)
+    TypeLengthOK <= #Tp 1'b0;
+end
+
+
+
+// Latch Control Frame Opcode
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    OpCodeOK <= #Tp 1'b0;
+  else
+  if(ByteCntEq16)
+    OpCodeOK <= #Tp 1'b0;
+  else
+    begin
+      if(DetectionWindow & ByteCntEq14)
+        OpCodeOK <= #Tp ByteCntEq14 & RxData[7:0] == 8'h00;
+    
+      if(DetectionWindow & ByteCntEq15)
+        OpCodeOK <= #Tp ByteCntEq15 & RxData[7:0] == 8'h01 & OpCodeOK;
+    end
+end
+
+
+// ReceivedPauseFrmWAddr (+Address Check)
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    ReceivedPauseFrmWAddr <= #Tp 1'b0;
+  else
+  if(ReceiveEnd)
+    ReceivedPauseFrmWAddr <= #Tp 1'b0;
+  else
+  if(ByteCntEq16 & TypeLengthOK & OpCodeOK & AddressOK)
+    ReceivedPauseFrmWAddr <= #Tp 1'b1;        
+end
+
+
+
+// Assembling 16-bit timer value from two 8-bit data
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    AssembledTimerValue[15:0] <= #Tp 16'h0;
+  else
+  if(RxStartFrm)
+    AssembledTimerValue[15:0] <= #Tp 16'h0;
+  else
+    begin
+      if(DetectionWindow & ByteCntEq16)
+        AssembledTimerValue[15:8] <= #Tp RxData[7:0];
+      if(DetectionWindow & ByteCntEq17)
+        AssembledTimerValue[7:0] <= #Tp RxData[7:0];
+    end
+end
+
+
+// Detection window (while PAUSE detection is possible)
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    DetectionWindow <= #Tp 1'b1;
+  else
+  if(ByteCntEq18)
+    DetectionWindow <= #Tp 1'b0;
+  else
+  if(ReceiveEnd)
+    DetectionWindow <= #Tp 1'b1;
+end
+
+
+
+// Latching Timer Value
+always @ (posedge MRxClk or posedge RxReset )
+begin
+  if(RxReset)
+    LatchedTimerValue[15:0] <= #Tp 16'h0;
+  else
+  if(DetectionWindow &  ReceivedPauseFrmWAddr &  ByteCntEq18)
+    LatchedTimerValue[15:0] <= #Tp AssembledTimerValue[15:0];
+  else
+  if(ReceiveEnd)
+    LatchedTimerValue[15:0] <= #Tp 16'h0;
+end
+
+
+
+// Delayed CEC counter
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    DlyCrcCnt <= #Tp 3'h0;
+  else
+  if(RxValid & RxEndFrm)
+    DlyCrcCnt <= #Tp 3'h0;
+  else
+  if(RxValid & ~RxEndFrm & ~DlyCrcCnt[2])
+    DlyCrcCnt <= #Tp DlyCrcCnt + 1'b1;
+end
+
+             
+assign ResetByteCnt = RxEndFrm;
+assign IncrementByteCnt = RxValid & DetectionWindow & ~ByteCntEq18 & (~DlyCrcEn | DlyCrcEn & DlyCrcCnt[2]);
+
+
+// Byte counter
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    ByteCnt[4:0] <= #Tp 5'h0;
+  else
+  if(ResetByteCnt)
+    ByteCnt[4:0] <= #Tp 5'h0;
+  else
+  if(IncrementByteCnt)
+    ByteCnt[4:0] <= #Tp ByteCnt[4:0] + 1'b1;
+end
+
+
+assign ByteCntEq0 = RxValid & ByteCnt[4:0] == 5'h0;
+assign ByteCntEq1 = RxValid & ByteCnt[4:0] == 5'h1;
+assign ByteCntEq2 = RxValid & ByteCnt[4:0] == 5'h2;
+assign ByteCntEq3 = RxValid & ByteCnt[4:0] == 5'h3;
+assign ByteCntEq4 = RxValid & ByteCnt[4:0] == 5'h4;
+assign ByteCntEq5 = RxValid & ByteCnt[4:0] == 5'h5;
+assign ByteCntEq12 = RxValid & ByteCnt[4:0] == 5'h0C;
+assign ByteCntEq13 = RxValid & ByteCnt[4:0] == 5'h0D;
+assign ByteCntEq14 = RxValid & ByteCnt[4:0] == 5'h0E;
+assign ByteCntEq15 = RxValid & ByteCnt[4:0] == 5'h0F;
+assign ByteCntEq16 = RxValid & ByteCnt[4:0] == 5'h10;
+assign ByteCntEq17 = RxValid & ByteCnt[4:0] == 5'h11;
+assign ByteCntEq18 = RxValid & ByteCnt[4:0] == 5'h12 & DetectionWindow;
+
+
+assign SetPauseTimer = ReceiveEnd & ReceivedPauseFrmWAddr & ReceivedPacketGood & ReceivedLengthOK & RxFlow;
+assign DecrementPauseTimer = SlotFinished & |PauseTimer;
+
+
+// PauseTimer[15:0]
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    PauseTimer[15:0] <= #Tp 16'h0;
+  else
+  if(SetPauseTimer)
+    PauseTimer[15:0] <= #Tp LatchedTimerValue[15:0];
+  else
+  if(DecrementPauseTimer)
+    PauseTimer[15:0] <= #Tp PauseTimer[15:0] - 1'b1;
+end
+
+assign PauseTimerEq0 = ~(|PauseTimer[15:0]);
+
+
+
+// Synchronization of the pause timer
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    begin
+      PauseTimerEq0_sync1 <= #Tp 1'b1;
+      PauseTimerEq0_sync2 <= #Tp 1'b1;
+    end
+  else
+    begin
+      PauseTimerEq0_sync1 <= #Tp PauseTimerEq0;
+      PauseTimerEq0_sync2 <= #Tp PauseTimerEq0_sync1;
+    end
+end
+
+
+// Pause signal generation
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    Pause <= #Tp 1'b0;
+  else
+  if((TxDoneIn | TxAbortIn | ~TxUsedDataOutDetected) & ~TxStartFrmOut)
+    Pause <= #Tp RxFlow & ~PauseTimerEq0_sync2;
+end
+
+
+// Divider2 is used for incrementing the Slot timer every other clock
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    Divider2 <= #Tp 1'b0;
+  else
+  if(|PauseTimer[15:0] & RxFlow)
+    Divider2 <= #Tp ~Divider2;
+  else
+    Divider2 <= #Tp 1'b0;
+end
+
+
+assign ResetSlotTimer = RxReset;
+assign IncrementSlotTimer =  Pause & RxFlow & Divider2;
+
+
+// SlotTimer
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    SlotTimer[5:0] <= #Tp 6'h0;
+  else
+  if(ResetSlotTimer)
+    SlotTimer[5:0] <= #Tp 6'h0;
+  else
+  if(IncrementSlotTimer)
+    SlotTimer[5:0] <= #Tp SlotTimer[5:0] + 1'b1;
+end
+
+
+assign SlotFinished = &SlotTimer[5:0] & IncrementSlotTimer;  // Slot is 512 bits (64 bytes)
+
+
+
+// Pause Frame received
+always @ (posedge MRxClk or posedge RxReset)
+begin
+  if(RxReset)
+    ReceivedPauseFrm <=#Tp 1'b0;
+  else
+  if(RxStatusWriteLatched_sync2 & r_PassAll | ReceivedPauseFrm & (~r_PassAll))
+    ReceivedPauseFrm <=#Tp 1'b0;
+  else
+  if(ByteCntEq16 & TypeLengthOK & OpCodeOK)
+    ReceivedPauseFrm <=#Tp 1'b1;        
+end
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_register.v
===================================================================
--- /trunk/OC-Ethernet/eth_register.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_register.v	(revision 6)
@@ -0,0 +1,108 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_register.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/08/16 12:33:27  mohor
+// Parameter ResetValue changed to capital letters.
+//
+// Revision 1.4  2002/02/26 16:18:08  mohor
+// Reset values are passed to registers through parameters
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+//
+//
+//
+//
+//
+//
+
+`include "timescale.v"
+
+
+module eth_register(DataIn, DataOut, Write, Clk, Reset, SyncReset);
+
+parameter WIDTH = 8; // default parameter of the register width
+parameter RESET_VALUE = 0;
+
+input [WIDTH-1:0] DataIn;
+
+input Write;
+input Clk;
+input Reset;
+input SyncReset;
+
+output [WIDTH-1:0] DataOut;
+reg    [WIDTH-1:0] DataOut;
+
+
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    DataOut<=#1 RESET_VALUE;
+  else
+  if(SyncReset)
+    DataOut<=#1 RESET_VALUE;
+  else
+  if(Write)                         // write
+    DataOut<=#1 DataIn;
+end
+
+
+
+endmodule   // Register
Index: /trunk/OC-Ethernet/eth_clockgen.v
===================================================================
--- /trunk/OC-Ethernet/eth_clockgen.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_clockgen.v	(revision 6)
@@ -0,0 +1,131 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_clockgen.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/01 22:28:55  mohor
+// This files (MIIM) are fully working. They were thoroughly tested. The testbench is not updated.
+//
+//
+
+`include "timescale.v"
+
+module eth_clockgen(Clk, Reset, Divider, MdcEn, MdcEn_n, Mdc);
+
+parameter Tp=1;
+
+input       Clk;              // Input clock (Host clock)
+input       Reset;            // Reset signal
+input [7:0] Divider;          // Divider (input clock will be divided by the Divider[7:0])
+
+output      Mdc;              // Output clock
+output      MdcEn;            // Enable signal is asserted for one Clk period before Mdc rises.
+output      MdcEn_n;          // Enable signal is asserted for one Clk period before Mdc falls.
+
+reg         Mdc;
+reg   [7:0] Counter;
+
+wire        CountEq0;
+wire  [7:0] CounterPreset;
+wire  [7:0] TempDivider;
+
+
+assign TempDivider[7:0]   = (Divider[7:0]<2)? 8'h02 : Divider[7:0]; // If smaller than 2
+assign CounterPreset[7:0] = (TempDivider[7:0]>>1) - 1'b1;           // We are counting half of period
+
+
+// Counter counts half period
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    Counter[7:0] <= #Tp 8'h1;
+  else
+    begin
+      if(CountEq0)
+        begin
+          Counter[7:0] <= #Tp CounterPreset[7:0];
+        end
+      else
+        Counter[7:0] <= #Tp Counter - 8'h1;
+    end
+end
+
+
+// Mdc is asserted every other half period
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    Mdc <= #Tp 1'b0;
+  else
+    begin
+      if(CountEq0)
+        Mdc <= #Tp ~Mdc;
+    end
+end
+
+
+assign CountEq0 = Counter == 8'h0;
+assign MdcEn = CountEq0 & ~Mdc;
+assign MdcEn_n = CountEq0 & Mdc;
+
+endmodule
+
+
Index: /trunk/OC-Ethernet/eth_miim.v
===================================================================
--- /trunk/OC-Ethernet/eth_miim.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_miim.v	(revision 6)
@@ -0,0 +1,448 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_miim.v                                                  ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.6  2005/02/21 12:48:07  igorm
+// Warning fixes.
+//
+// Revision 1.5  2003/05/16 10:08:27  mohor
+// Busy was set 2 cycles too late. Reported by Dennis Scott.
+//
+// Revision 1.4  2002/08/14 18:32:10  mohor
+// - Busy signal was not set on time when scan status operation was performed
+// and clock was divided with more than 2.
+// - Nvalid remains valid two more clocks (was previously cleared too soon).
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.2  2001/08/02 09:25:31  mohor
+// Unconnected signals are now connected.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/01 22:28:56  mohor
+// This files (MIIM) are fully working. They were thoroughly tested. The testbench is not updated.
+//
+//
+
+`include "timescale.v"
+
+
+module eth_miim
+(
+  Clk,
+  Reset,
+  Divider,
+  NoPre,
+  CtrlData,
+  Rgad,
+  Fiad,
+  WCtrlData,
+  RStat,
+  ScanStat,
+  Mdi,
+  Mdo,
+  MdoEn,
+  Mdc,
+  Busy,
+  Prsd,
+  LinkFail,
+  Nvalid,
+  WCtrlDataStart,
+  RStatStart,
+  UpdateMIIRX_DATAReg
+);
+
+
+
+input         Clk;                // Host Clock
+input         Reset;              // General Reset
+input   [7:0] Divider;            // Divider for the host clock
+input  [15:0] CtrlData;           // Control Data (to be written to the PHY reg.)
+input   [4:0] Rgad;               // Register Address (within the PHY)
+input   [4:0] Fiad;               // PHY Address
+input         NoPre;              // No Preamble (no 32-bit preamble)
+input         WCtrlData;          // Write Control Data operation
+input         RStat;              // Read Status operation
+input         ScanStat;           // Scan Status operation
+input         Mdi;                // MII Management Data In
+
+output        Mdc;                // MII Management Data Clock
+output        Mdo;                // MII Management Data Output
+output        MdoEn;              // MII Management Data Output Enable
+output        Busy;               // Busy Signal
+output        LinkFail;           // Link Integrity Signal
+output        Nvalid;             // Invalid Status (qualifier for the valid scan result)
+
+output [15:0] Prsd;               // Read Status Data (data read from the PHY)
+
+output        WCtrlDataStart;     // This signals resets the WCTRLDATA bit in the MIIM Command register
+output        RStatStart;         // This signal resets the RSTAT BIT in the MIIM Command register
+output        UpdateMIIRX_DATAReg;// Updates MII RX_DATA register with read data
+
+parameter Tp = 1;
+
+
+reg           Nvalid;
+reg           EndBusy_d;          // Pre-end Busy signal
+reg           EndBusy;            // End Busy signal (stops the operation in progress)
+
+reg           WCtrlData_q1;       // Write Control Data operation delayed 1 Clk cycle
+reg           WCtrlData_q2;       // Write Control Data operation delayed 2 Clk cycles
+reg           WCtrlData_q3;       // Write Control Data operation delayed 3 Clk cycles
+reg           WCtrlDataStart;     // Start Write Control Data Command (positive edge detected)
+reg           WCtrlDataStart_q;
+reg           WCtrlDataStart_q1;  // Start Write Control Data Command delayed 1 Mdc cycle
+reg           WCtrlDataStart_q2;  // Start Write Control Data Command delayed 2 Mdc cycles
+
+reg           RStat_q1;           // Read Status operation delayed 1 Clk cycle
+reg           RStat_q2;           // Read Status operation delayed 2 Clk cycles
+reg           RStat_q3;           // Read Status operation delayed 3 Clk cycles
+reg           RStatStart;         // Start Read Status Command (positive edge detected)
+reg           RStatStart_q1;      // Start Read Status Command delayed 1 Mdc cycle
+reg           RStatStart_q2;      // Start Read Status Command delayed 2 Mdc cycles
+
+reg           ScanStat_q1;        // Scan Status operation delayed 1 cycle
+reg           ScanStat_q2;        // Scan Status operation delayed 2 cycles
+reg           SyncStatMdcEn;      // Scan Status operation delayed at least cycles and synchronized to MdcEn
+
+wire          WriteDataOp;        // Write Data Operation (positive edge detected)
+wire          ReadStatusOp;       // Read Status Operation (positive edge detected)
+wire          ScanStatusOp;       // Scan Status Operation (positive edge detected)
+wire          StartOp;            // Start Operation (start of any of the preceding operations)
+wire          EndOp;              // End of Operation
+
+reg           InProgress;         // Operation in progress
+reg           InProgress_q1;      // Operation in progress delayed 1 Mdc cycle
+reg           InProgress_q2;      // Operation in progress delayed 2 Mdc cycles
+reg           InProgress_q3;      // Operation in progress delayed 3 Mdc cycles
+
+reg           WriteOp;            // Write Operation Latch (When asserted, write operation is in progress)
+reg     [6:0] BitCounter;         // Bit Counter
+
+
+wire    [3:0] ByteSelect;         // Byte Select defines which byte (preamble, data, operation, etc.) is loaded and shifted through the shift register.
+wire          MdcEn;              // MII Management Data Clock Enable signal is asserted for one Clk period before Mdc rises.
+wire          ShiftedBit;         // This bit is output of the shift register and is connected to the Mdo signal
+wire          MdcEn_n;
+
+wire          LatchByte1_d2;
+wire          LatchByte0_d2;
+reg           LatchByte1_d;
+reg           LatchByte0_d;
+reg     [1:0] LatchByte;          // Latch Byte selects which part of Read Status Data is updated from the shift register
+
+reg           UpdateMIIRX_DATAReg;// Updates MII RX_DATA register with read data
+
+
+
+
+
+// Generation of the EndBusy signal. It is used for ending the MII Management operation.
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      EndBusy_d <= #Tp 1'b0;
+      EndBusy <= #Tp 1'b0;
+    end
+  else
+    begin
+      EndBusy_d <= #Tp ~InProgress_q2 & InProgress_q3;
+      EndBusy   <= #Tp EndBusy_d;
+    end
+end
+
+
+// Update MII RX_DATA register
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    UpdateMIIRX_DATAReg <= #Tp 0;
+  else
+  if(EndBusy & ~WCtrlDataStart_q)
+    UpdateMIIRX_DATAReg <= #Tp 1;
+  else
+    UpdateMIIRX_DATAReg <= #Tp 0;    
+end
+
+
+
+// Generation of the delayed signals used for positive edge triggering.
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      WCtrlData_q1 <= #Tp 1'b0;
+      WCtrlData_q2 <= #Tp 1'b0;
+      WCtrlData_q3 <= #Tp 1'b0;
+      
+      RStat_q1 <= #Tp 1'b0;
+      RStat_q2 <= #Tp 1'b0;
+      RStat_q3 <= #Tp 1'b0;
+
+      ScanStat_q1  <= #Tp 1'b0;
+      ScanStat_q2  <= #Tp 1'b0;
+      SyncStatMdcEn <= #Tp 1'b0;
+    end
+  else
+    begin
+      WCtrlData_q1 <= #Tp WCtrlData;
+      WCtrlData_q2 <= #Tp WCtrlData_q1;
+      WCtrlData_q3 <= #Tp WCtrlData_q2;
+
+      RStat_q1 <= #Tp RStat;
+      RStat_q2 <= #Tp RStat_q1;
+      RStat_q3 <= #Tp RStat_q2;
+
+      ScanStat_q1  <= #Tp ScanStat;
+      ScanStat_q2  <= #Tp ScanStat_q1;
+      if(MdcEn)
+        SyncStatMdcEn  <= #Tp ScanStat_q2;
+    end
+end
+
+
+// Generation of the Start Commands (Write Control Data or Read Status)
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      WCtrlDataStart <= #Tp 1'b0;
+      WCtrlDataStart_q <= #Tp 1'b0;
+      RStatStart <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(EndBusy)
+        begin
+          WCtrlDataStart <= #Tp 1'b0;
+          RStatStart <= #Tp 1'b0;
+        end
+      else
+        begin
+          if(WCtrlData_q2 & ~WCtrlData_q3)
+            WCtrlDataStart <= #Tp 1'b1;
+          if(RStat_q2 & ~RStat_q3)
+            RStatStart <= #Tp 1'b1;
+          WCtrlDataStart_q <= #Tp WCtrlDataStart;
+        end
+    end
+end 
+
+
+// Generation of the Nvalid signal (indicates when the status is invalid)
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    Nvalid <= #Tp 1'b0;
+  else
+    begin
+      if(~InProgress_q2 & InProgress_q3)
+        begin
+          Nvalid <= #Tp 1'b0;
+        end
+      else
+        begin
+          if(ScanStat_q2  & ~SyncStatMdcEn)
+            Nvalid <= #Tp 1'b1;
+        end
+    end
+end 
+
+// Signals used for the generation of the Operation signals (positive edge)
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      WCtrlDataStart_q1 <= #Tp 1'b0;
+      WCtrlDataStart_q2 <= #Tp 1'b0;
+
+      RStatStart_q1 <= #Tp 1'b0;
+      RStatStart_q2 <= #Tp 1'b0;
+
+      InProgress_q1 <= #Tp 1'b0;
+      InProgress_q2 <= #Tp 1'b0;
+      InProgress_q3 <= #Tp 1'b0;
+
+  	  LatchByte0_d <= #Tp 1'b0;
+  	  LatchByte1_d <= #Tp 1'b0;
+
+  	  LatchByte <= #Tp 2'b00;
+    end
+  else
+    begin
+      if(MdcEn)
+        begin
+          WCtrlDataStart_q1 <= #Tp WCtrlDataStart;
+          WCtrlDataStart_q2 <= #Tp WCtrlDataStart_q1;
+
+          RStatStart_q1 <= #Tp RStatStart;
+          RStatStart_q2 <= #Tp RStatStart_q1;
+
+          LatchByte[0] <= #Tp LatchByte0_d;
+          LatchByte[1] <= #Tp LatchByte1_d;
+
+          LatchByte0_d <= #Tp LatchByte0_d2;
+          LatchByte1_d <= #Tp LatchByte1_d2;
+
+          InProgress_q1 <= #Tp InProgress;
+          InProgress_q2 <= #Tp InProgress_q1;
+          InProgress_q3 <= #Tp InProgress_q2;
+        end
+    end
+end 
+
+
+// Generation of the Operation signals
+assign WriteDataOp  = WCtrlDataStart_q1 & ~WCtrlDataStart_q2;    
+assign ReadStatusOp = RStatStart_q1     & ~RStatStart_q2;
+assign ScanStatusOp = SyncStatMdcEn     & ~InProgress & ~InProgress_q1 & ~InProgress_q2;
+assign StartOp      = WriteDataOp | ReadStatusOp | ScanStatusOp;
+
+// Busy
+assign Busy = WCtrlData | WCtrlDataStart | RStat | RStatStart | SyncStatMdcEn | EndBusy | InProgress | InProgress_q3 | Nvalid;
+
+
+// Generation of the InProgress signal (indicates when an operation is in progress)
+// Generation of the WriteOp signal (indicates when a write is in progress)
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      InProgress <= #Tp 1'b0;
+      WriteOp <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(MdcEn)
+        begin
+          if(StartOp)
+            begin
+              if(~InProgress)
+                WriteOp <= #Tp WriteDataOp;
+              InProgress <= #Tp 1'b1;
+            end
+          else
+            begin
+              if(EndOp)
+                begin
+                  InProgress <= #Tp 1'b0;
+                  WriteOp <= #Tp 1'b0;
+                end
+            end
+        end
+    end
+end
+
+
+
+// Bit Counter counts from 0 to 63 (from 32 to 63 when NoPre is asserted)
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    BitCounter[6:0] <= #Tp 7'h0;
+  else
+    begin
+      if(MdcEn)
+        begin
+          if(InProgress)
+            begin
+              if(NoPre & ( BitCounter == 7'h0 ))
+                BitCounter[6:0] <= #Tp 7'h21;
+              else
+                BitCounter[6:0] <= #Tp BitCounter[6:0] + 1'b1;
+            end
+          else
+            BitCounter[6:0] <= #Tp 7'h0;
+        end
+    end
+end
+
+
+// Operation ends when the Bit Counter reaches 63
+assign EndOp = BitCounter==63;
+
+assign ByteSelect[0] = InProgress & ((NoPre & (BitCounter == 7'h0)) | (~NoPre & (BitCounter == 7'h20)));
+assign ByteSelect[1] = InProgress & (BitCounter == 7'h28);
+assign ByteSelect[2] = InProgress & WriteOp & (BitCounter == 7'h30);
+assign ByteSelect[3] = InProgress & WriteOp & (BitCounter == 7'h38);
+
+
+// Latch Byte selects which part of Read Status Data is updated from the shift register
+assign LatchByte1_d2 = InProgress & ~WriteOp & BitCounter == 7'h37;
+assign LatchByte0_d2 = InProgress & ~WriteOp & BitCounter == 7'h3F;
+
+
+// Connecting the Clock Generator Module
+eth_clockgen clkgen(.Clk(Clk), .Reset(Reset), .Divider(Divider[7:0]), .MdcEn(MdcEn), .MdcEn_n(MdcEn_n), .Mdc(Mdc) 
+                   );
+
+// Connecting the Shift Register Module
+eth_shiftreg shftrg(.Clk(Clk), .Reset(Reset), .MdcEn_n(MdcEn_n), .Mdi(Mdi), .Fiad(Fiad), .Rgad(Rgad), 
+                    .CtrlData(CtrlData), .WriteOp(WriteOp), .ByteSelect(ByteSelect), .LatchByte(LatchByte), 
+                    .ShiftedBit(ShiftedBit), .Prsd(Prsd), .LinkFail(LinkFail)
+                   );
+
+// Connecting the Output Control Module
+eth_outputcontrol outctrl(.Clk(Clk), .Reset(Reset), .MdcEn_n(MdcEn_n), .InProgress(InProgress), 
+                          .ShiftedBit(ShiftedBit), .BitCounter(BitCounter), .WriteOp(WriteOp), .NoPre(NoPre), 
+                          .Mdo(Mdo), .MdoEn(MdoEn)
+                         );
+
+endmodule
Index: /trunk/OC-Ethernet/xilinx_dist_ram_16x32.v
===================================================================
--- /trunk/OC-Ethernet/xilinx_dist_ram_16x32.v	(revision 6)
+++ /trunk/OC-Ethernet/xilinx_dist_ram_16x32.v	(revision 6)
@@ -0,0 +1,50 @@
+module xilinx_dist_ram_16x32
+(
+    data_out,
+    we,
+    data_in,
+    read_address,
+    write_address,
+    wclk
+);
+    output [31:0] data_out;
+    input we, wclk;
+    input [31:0] data_in;
+    input [3:0] write_address, read_address;
+
+    wire [3:0] waddr = write_address ;
+    wire [3:0] raddr = read_address ;
+
+    RAM16X1D ram00 (.DPO(data_out[0]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[0]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram01 (.DPO(data_out[1]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[1]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram02 (.DPO(data_out[2]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[2]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram03 (.DPO(data_out[3]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[3]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram04 (.DPO(data_out[4]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[4]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram05 (.DPO(data_out[5]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[5]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram06 (.DPO(data_out[6]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[6]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram07 (.DPO(data_out[7]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[7]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram08 (.DPO(data_out[8]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[8]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram09 (.DPO(data_out[9]),  .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[9]),  .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram10 (.DPO(data_out[10]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[10]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram11 (.DPO(data_out[11]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[11]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram12 (.DPO(data_out[12]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[12]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram13 (.DPO(data_out[13]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[13]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram14 (.DPO(data_out[14]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[14]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram15 (.DPO(data_out[15]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[15]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram16 (.DPO(data_out[16]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[16]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram17 (.DPO(data_out[17]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[17]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram18 (.DPO(data_out[18]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[18]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram19 (.DPO(data_out[19]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[19]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram20 (.DPO(data_out[20]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[20]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram21 (.DPO(data_out[21]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[21]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram22 (.DPO(data_out[22]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[22]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram23 (.DPO(data_out[23]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[23]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram24 (.DPO(data_out[24]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[24]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram25 (.DPO(data_out[25]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[25]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram26 (.DPO(data_out[26]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[26]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram27 (.DPO(data_out[27]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[27]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram28 (.DPO(data_out[28]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[28]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram29 (.DPO(data_out[29]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[29]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram30 (.DPO(data_out[30]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[30]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+    RAM16X1D ram31 (.DPO(data_out[31]), .SPO(), .A0(waddr[0]), .A1(waddr[1]), .A2(waddr[2]), .A3(waddr[3]), .D(data_in[31]), .DPRA0(raddr[0]), .DPRA1(raddr[1]), .DPRA2(raddr[2]), .DPRA3(raddr[3]), .WCLK(wclk), .WE(we));
+endmodule
Index: /trunk/OC-Ethernet/eth_outputcontrol.v
===================================================================
--- /trunk/OC-Ethernet/eth_outputcontrol.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_outputcontrol.v	(revision 6)
@@ -0,0 +1,147 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_outputcontrol.v                                         ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/01 22:28:56  mohor
+// This files (MIIM) are fully working. They were thoroughly tested. The testbench is not updated.
+//
+//
+
+`include "timescale.v"
+
+module eth_outputcontrol(Clk, Reset, InProgress, ShiftedBit, BitCounter, WriteOp, NoPre, MdcEn_n, Mdo, MdoEn);
+
+parameter Tp = 1;
+
+input         Clk;                // Host Clock
+input         Reset;              // General Reset
+input         WriteOp;            // Write Operation Latch (When asserted, write operation is in progress)
+input         NoPre;              // No Preamble (no 32-bit preamble)
+input         InProgress;         // Operation in progress
+input         ShiftedBit;         // This bit is output of the shift register and is connected to the Mdo signal
+input   [6:0] BitCounter;         // Bit Counter
+input         MdcEn_n;            // MII Management Data Clock Enable signal is asserted for one Clk period before Mdc falls.
+
+output        Mdo;                // MII Management Data Output
+output        MdoEn;              // MII Management Data Output Enable
+
+wire          SerialEn;
+
+reg           MdoEn_2d;
+reg           MdoEn_d;
+reg           MdoEn;
+
+reg           Mdo_2d;
+reg           Mdo_d;
+reg           Mdo;                // MII Management Data Output
+
+
+
+// Generation of the Serial Enable signal (enables the serialization of the data)
+assign SerialEn =  WriteOp & InProgress & ( BitCounter>31 | ( ( BitCounter == 0 ) & NoPre ) )
+                | ~WriteOp & InProgress & (( BitCounter>31 & BitCounter<46 ) | ( ( BitCounter == 0 ) & NoPre ));
+
+
+// Generation of the MdoEn signal
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      MdoEn_2d <= #Tp 1'b0;
+      MdoEn_d <= #Tp 1'b0;
+      MdoEn <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(MdcEn_n)
+        begin
+          MdoEn_2d <= #Tp SerialEn | InProgress & BitCounter<32;
+          MdoEn_d <= #Tp MdoEn_2d;
+          MdoEn <= #Tp MdoEn_d;
+        end
+    end
+end
+
+
+// Generation of the Mdo signal.
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      Mdo_2d <= #Tp 1'b0;
+      Mdo_d <= #Tp 1'b0;
+      Mdo <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(MdcEn_n)
+        begin
+          Mdo_2d <= #Tp ~SerialEn & BitCounter<32;
+          Mdo_d <= #Tp ShiftedBit | Mdo_2d;
+          Mdo <= #Tp Mdo_d;
+        end
+    end
+end
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_maccontrol.v
===================================================================
--- /trunk/OC-Ethernet/eth_maccontrol.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_maccontrol.v	(revision 6)
@@ -0,0 +1,271 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_maccontrol.v                                            ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.6  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.5  2002/11/21 00:14:39  mohor
+// TxDone and TxAbort changed so they're not propagated to the wishbone
+// module when control frame is transmitted.
+//
+// Revision 1.4  2002/11/19 17:37:32  mohor
+// When control frame (PAUSE) was sent, status was written in the
+// eth_wishbone module and both TXB and TXC interrupts were set. Fixed.
+// Only TXC interrupt is set.
+//
+// Revision 1.3  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.1  2001/07/03 12:51:54  mohor
+// Initial release of the MAC Control module.
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_maccontrol (MTxClk, MRxClk, TxReset, RxReset, TPauseRq, TxDataIn, TxStartFrmIn, TxUsedDataIn, 
+                       TxEndFrmIn, TxDoneIn, TxAbortIn, RxData, RxValid, RxStartFrm, RxEndFrm, ReceiveEnd, 
+                       ReceivedPacketGood, ReceivedLengthOK, TxFlow, RxFlow, DlyCrcEn, TxPauseTV, 
+                       MAC, PadIn, PadOut, CrcEnIn, CrcEnOut, TxDataOut, TxStartFrmOut, TxEndFrmOut, 
+                       TxDoneOut, TxAbortOut, TxUsedDataOut, WillSendControlFrame, TxCtrlEndFrm, 
+                       ReceivedPauseFrm, ControlFrmAddressOK, SetPauseTimer, r_PassAll, RxStatusWriteLatched_sync2
+                      );
+
+
+parameter   Tp = 1;
+
+
+input         MTxClk;                   // Transmit clock (from PHY)
+input         MRxClk;                   // Receive clock (from PHY)
+input         TxReset;                  // Transmit reset
+input         RxReset;                  // Receive reset
+input         TPauseRq;                 // Transmit control frame (from host)
+input   [7:0] TxDataIn;                 // Transmit packet data byte (from host)
+input         TxStartFrmIn;             // Transmit packet start frame input (from host)
+input         TxUsedDataIn;             // Transmit packet used data (from TxEthMAC)
+input         TxEndFrmIn;               // Transmit packet end frame input (from host)
+input         TxDoneIn;                 // Transmit packet done (from TxEthMAC)
+input         TxAbortIn;                // Transmit packet abort (input from TxEthMAC)
+input         PadIn;                    // Padding (input from registers)
+input         CrcEnIn;                  // Crc append (input from registers)
+input   [7:0] RxData;                   // Receive Packet Data (from RxEthMAC)
+input         RxValid;                  // Received a valid packet
+input         RxStartFrm;               // Receive packet start frame (input from RxEthMAC)
+input         RxEndFrm;                 // Receive packet end frame (input from RxEthMAC)
+input         ReceiveEnd;               // End of receiving of the current packet (input from RxEthMAC)
+input         ReceivedPacketGood;       // Received packet is good
+input         ReceivedLengthOK;         // Length of the received packet is OK
+input         TxFlow;                   // Tx flow control (from registers)
+input         RxFlow;                   // Rx flow control (from registers)
+input         DlyCrcEn;                 // Delayed CRC enabled (from registers)
+input  [15:0] TxPauseTV;                // Transmit Pause Timer Value (from registers)
+input  [47:0] MAC;                      // MAC address (from registers)
+input         RxStatusWriteLatched_sync2;
+input         r_PassAll;
+
+output  [7:0] TxDataOut;                // Transmit Packet Data (to TxEthMAC)
+output        TxStartFrmOut;            // Transmit packet start frame (output to TxEthMAC)
+output        TxEndFrmOut;              // Transmit packet end frame (output to TxEthMAC)
+output        TxDoneOut;                // Transmit packet done (to host)
+output        TxAbortOut;               // Transmit packet aborted (to host)
+output        TxUsedDataOut;            // Transmit packet used data (to host)
+output        PadOut;                   // Padding (output to TxEthMAC)
+output        CrcEnOut;                 // Crc append (output to TxEthMAC)
+output        WillSendControlFrame;
+output        TxCtrlEndFrm;
+output        ReceivedPauseFrm;
+output        ControlFrmAddressOK;
+output        SetPauseTimer;
+
+reg           TxUsedDataOutDetected;    
+reg           TxAbortInLatched;         
+reg           TxDoneInLatched;          
+reg           MuxedDone;                
+reg           MuxedAbort;               
+
+wire          Pause;                    
+wire          TxCtrlStartFrm;
+wire    [7:0] ControlData;              
+wire          CtrlMux;                  
+wire          SendingCtrlFrm;           // Sending Control Frame (enables padding and CRC)
+wire          BlockTxDone;
+
+
+// Signal TxUsedDataOut was detected (a transfer is already in progress)
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    TxUsedDataOutDetected <= #Tp 1'b0;
+  else
+  if(TxDoneIn | TxAbortIn)
+    TxUsedDataOutDetected <= #Tp 1'b0;
+  else
+  if(TxUsedDataOut)
+    TxUsedDataOutDetected <= #Tp 1'b1;
+end    
+
+
+// Latching variables
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    begin
+      TxAbortInLatched <= #Tp 1'b0;
+      TxDoneInLatched  <= #Tp 1'b0;
+    end
+  else
+    begin
+      TxAbortInLatched <= #Tp TxAbortIn;
+      TxDoneInLatched  <= #Tp TxDoneIn;
+    end
+end
+
+
+
+// Generating muxed abort signal
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    MuxedAbort <= #Tp 1'b0;
+  else
+  if(TxStartFrmIn)
+    MuxedAbort <= #Tp 1'b0;
+  else
+  if(TxAbortIn & ~TxAbortInLatched & TxUsedDataOutDetected)
+    MuxedAbort <= #Tp 1'b1;
+end
+
+
+// Generating muxed done signal
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    MuxedDone <= #Tp 1'b0;
+  else
+  if(TxStartFrmIn)
+    MuxedDone <= #Tp 1'b0;
+  else
+  if(TxDoneIn & (~TxDoneInLatched) & TxUsedDataOutDetected)
+    MuxedDone <= #Tp 1'b1;
+end
+
+
+// TxDoneOut
+assign TxDoneOut  = CtrlMux? ((~TxStartFrmIn) & (~BlockTxDone) & MuxedDone) : 
+                             ((~TxStartFrmIn) & (~BlockTxDone) & TxDoneIn);
+
+// TxAbortOut
+assign TxAbortOut  = CtrlMux? ((~TxStartFrmIn) & (~BlockTxDone) & MuxedAbort) :
+                              ((~TxStartFrmIn) & (~BlockTxDone) & TxAbortIn);
+
+// TxUsedDataOut
+assign TxUsedDataOut  = ~CtrlMux & TxUsedDataIn;
+
+// TxStartFrmOut
+assign TxStartFrmOut = CtrlMux? TxCtrlStartFrm : (TxStartFrmIn & ~Pause);
+
+
+// TxEndFrmOut
+assign TxEndFrmOut = CtrlMux? TxCtrlEndFrm : TxEndFrmIn;
+
+
+// TxDataOut[7:0]
+assign TxDataOut[7:0] = CtrlMux? ControlData[7:0] : TxDataIn[7:0];
+
+
+// PadOut
+assign PadOut = PadIn | SendingCtrlFrm;
+
+
+// CrcEnOut
+assign CrcEnOut = CrcEnIn | SendingCtrlFrm;
+
+
+
+// Connecting receivecontrol module
+eth_receivecontrol receivecontrol1 
+(
+ .MTxClk(MTxClk), .MRxClk(MRxClk), .TxReset(TxReset), .RxReset(RxReset), .RxData(RxData), 
+ .RxValid(RxValid), .RxStartFrm(RxStartFrm), .RxEndFrm(RxEndFrm), .RxFlow(RxFlow), 
+ .ReceiveEnd(ReceiveEnd), .MAC(MAC), .DlyCrcEn(DlyCrcEn), .TxDoneIn(TxDoneIn), 
+ .TxAbortIn(TxAbortIn), .TxStartFrmOut(TxStartFrmOut), .ReceivedLengthOK(ReceivedLengthOK), 
+ .ReceivedPacketGood(ReceivedPacketGood), .TxUsedDataOutDetected(TxUsedDataOutDetected), 
+ .Pause(Pause), .ReceivedPauseFrm(ReceivedPauseFrm), .AddressOK(ControlFrmAddressOK), 
+ .r_PassAll(r_PassAll), .RxStatusWriteLatched_sync2(RxStatusWriteLatched_sync2), .SetPauseTimer(SetPauseTimer)
+);
+
+
+eth_transmitcontrol transmitcontrol1
+(
+ .MTxClk(MTxClk), .TxReset(TxReset), .TxUsedDataIn(TxUsedDataIn), .TxUsedDataOut(TxUsedDataOut), 
+ .TxDoneIn(TxDoneIn), .TxAbortIn(TxAbortIn), .TxStartFrmIn(TxStartFrmIn), .TPauseRq(TPauseRq), 
+ .TxUsedDataOutDetected(TxUsedDataOutDetected), .TxFlow(TxFlow), .DlyCrcEn(DlyCrcEn), .TxPauseTV(TxPauseTV), 
+ .MAC(MAC), .TxCtrlStartFrm(TxCtrlStartFrm), .TxCtrlEndFrm(TxCtrlEndFrm), .SendingCtrlFrm(SendingCtrlFrm), 
+ .CtrlMux(CtrlMux), .ControlData(ControlData), .WillSendControlFrame(WillSendControlFrame), .BlockTxDone(BlockTxDone)
+);
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_top.v
===================================================================
--- /trunk/OC-Ethernet/eth_top.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_top.v	(revision 6)
@@ -0,0 +1,968 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_top.v                                                   ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is available in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.51  2005/02/21 11:13:17  igorm
+// Defer indication fixed.
+//
+// Revision 1.50  2004/04/26 15:26:23  igorm
+// - Bug connected to the TX_BD_NUM_Wr signal fixed (bug came in with the
+//   previous update of the core.
+// - TxBDAddress is set to 0 after the TX is enabled in the MODER register.
+// - RxBDAddress is set to r_TxBDNum<<1 after the RX is enabled in the MODER
+//   register. (thanks to Mathias and Torbjorn)
+// - Multicast reception was fixed. Thanks to Ulrich Gries
+//
+// Revision 1.49  2003/11/12 18:24:59  tadejm
+// WISHBONE slave changed and tested from only 32-bit accesss to byte access.
+//
+// Revision 1.48  2003/10/17 07:46:16  markom
+// mbist signals updated according to newest convention
+//
+// Revision 1.47  2003/10/06 15:43:45  knguyen
+// Update RxEnSync only when mrxdv_pad_i is inactive (LOW).
+//
+// Revision 1.46  2003/01/30 13:30:22  tadejm
+// Defer indication changed.
+//
+// Revision 1.45  2003/01/22 13:49:26  tadejm
+// When control packets were received, they were ignored in some cases.
+//
+// Revision 1.44  2003/01/21 12:09:40  mohor
+// When receiving normal data frame and RxFlow control was switched on, RXB
+// interrupt was not set.
+//
+// Revision 1.43  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.42  2002/11/21 00:09:19  mohor
+// TPauseRq synchronized to tx_clk.
+//
+// Revision 1.41  2002/11/19 18:13:49  mohor
+// r_MiiMRst is not used for resetting the MIIM module. wb_rst used instead.
+//
+// Revision 1.40  2002/11/19 17:34:25  mohor
+// AddressMiss status is connecting to the Rx BD. AddressMiss is identifying
+// that a frame was received because of the promiscous mode.
+//
+// Revision 1.39  2002/11/18 17:31:55  mohor
+// wb_rst_i is used for MIIM reset.
+//
+// Revision 1.38  2002/11/14 18:37:20  mohor
+// r_Rst signal does not reset any module any more and is removed from the design.
+//
+// Revision 1.37  2002/11/13 22:25:36  tadejm
+// All modules are reset with wb_rst instead of the r_Rst. Exception is MII module.
+//
+// Revision 1.36  2002/10/18 17:04:20  tadejm
+// Changed BIST scan signals.
+//
+// Revision 1.35  2002/10/11 13:36:58  mohor
+// Typo error fixed. (When using Bist)
+//
+// Revision 1.34  2002/10/10 16:49:50  mohor
+// Signals for WISHBONE B3 compliant interface added.
+//
+// Revision 1.33  2002/10/10 16:29:30  mohor
+// BIST added.
+//
+// Revision 1.32  2002/09/20 17:12:58  mohor
+// CsMiss added. When address between 0x800 and 0xfff is accessed within
+// Ethernet Core, error acknowledge is generated.
+//
+// Revision 1.31  2002/09/12 14:50:17  mohor
+// CarrierSenseLost bug fixed when operating in full duplex mode.
+//
+// Revision 1.30  2002/09/10 10:35:23  mohor
+// Ethernet debug registers removed.
+//
+// Revision 1.29  2002/09/09 13:03:13  mohor
+// Error acknowledge is generated when accessing BDs and RST bit in the
+// MODER register (r_Rst) is set.
+//
+// Revision 1.28  2002/09/04 18:44:10  mohor
+// Signals related to the control frames connected. Debug registers reg1, 2, 3, 4
+// connected.
+//
+// Revision 1.27  2002/07/25 18:15:37  mohor
+// RxAbort changed. Packets received with MRxErr (from PHY) are also
+// aborted.
+//
+// Revision 1.26  2002/07/17 18:51:50  mohor
+// EXTERNAL_DMA removed. External DMA not supported.
+//
+// Revision 1.25  2002/05/03 10:15:50  mohor
+// Outputs registered. Reset changed for eth_wishbone module.
+//
+// Revision 1.24  2002/04/22 14:15:42  mohor
+// Wishbone signals are registered when ETH_REGISTERED_OUTPUTS is
+// selected in eth_defines.v
+//
+// Revision 1.23  2002/03/25 13:33:53  mohor
+// md_padoen_o changed to md_padoe_o. Signal was always active high, just
+// name was incorrect.
+//
+// Revision 1.22  2002/02/26 16:59:54  mohor
+// Small fixes for external/internal DMA missmatches.
+//
+// Revision 1.21  2002/02/26 16:21:00  mohor
+// Interrupts changed in the top file
+//
+// Revision 1.20  2002/02/18 10:40:17  mohor
+// Small fixes.
+//
+// Revision 1.19  2002/02/16 14:03:44  mohor
+// Registered trimmed. Unused registers removed.
+//
+// Revision 1.18  2002/02/16 13:06:33  mohor
+// EXTERNAL_DMA used instead of WISHBONE_DMA.
+//
+// Revision 1.17  2002/02/16 07:15:27  mohor
+// Testbench fixed, code simplified, unused signals removed.
+//
+// Revision 1.16  2002/02/15 13:49:39  mohor
+// RxAbort is connected differently.
+//
+// Revision 1.15  2002/02/15 11:38:26  mohor
+// Changes that were lost when updating from 1.11 to 1.14 fixed.
+//
+// Revision 1.14  2002/02/14 20:19:11  billditt
+// Modified for Address Checking,
+// addition of eth_addrcheck.v
+//
+// Revision 1.13  2002/02/12 17:03:03  mohor
+// HASH0 and HASH1 registers added. Registers address width was
+// changed to 8 bits.
+//
+// Revision 1.12  2002/02/11 09:18:22  mohor
+// Tx status is written back to the BD.
+//
+// Revision 1.11  2002/02/08 16:21:54  mohor
+// Rx status is written back to the BD.
+//
+// Revision 1.10  2002/02/06 14:10:21  mohor
+// non-DMA host interface added. Select the right configutation in eth_defines.
+//
+// Revision 1.9  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.8  2001/12/05 15:00:16  mohor
+// RX_BD_NUM changed to TX_BD_NUM (holds number of TX descriptors
+// instead of the number of RX descriptors).
+//
+// Revision 1.7  2001/12/05 10:45:59  mohor
+// ETH_RX_BD_ADR register deleted. ETH_RX_BD_NUM is used instead.
+//
+// Revision 1.6  2001/10/19 11:24:29  mohor
+// Number of addresses (wb_adr_i) minimized.
+//
+// Revision 1.5  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.4  2001/10/18 12:07:11  mohor
+// Status signals changed, Adress decoding changed, interrupt controller
+// added.
+//
+// Revision 1.3  2001/09/24 15:02:56  mohor
+// Defines changed (All precede with ETH_). Small changes because some
+// tools generate warnings when two operands are together. Synchronization
+// between two clocks domains in eth_wishbonedma.v is changed (due to ASIC
+// demands).
+//
+// Revision 1.2  2001/08/15 14:03:59  mohor
+// Signal names changed on the top level for easier pad insertion (ASIC).
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.2  2001/08/02 09:25:31  mohor
+// Unconnected signals are now connected.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+//
+//
+// 
+
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+
+module eth_top
+(
+  // WISHBONE common
+  wb_clk_i, wb_rst_i, wb_dat_i, wb_dat_o, 
+
+  // WISHBONE slave
+  wb_adr_i, wb_sel_i, wb_we_i, wb_cyc_i, wb_stb_i, wb_ack_o, wb_err_o, 
+
+  // WISHBONE master
+  m_wb_adr_o, m_wb_sel_o, m_wb_we_o, 
+  m_wb_dat_o, m_wb_dat_i, m_wb_cyc_o, 
+  m_wb_stb_o, m_wb_ack_i, m_wb_err_i, 
+
+`ifdef ETH_WISHBONE_B3
+  m_wb_cti_o, m_wb_bte_o, 
+`endif
+
+  //TX
+  mtx_clk_pad_i, mtxd_pad_o, mtxen_pad_o, mtxerr_pad_o,
+
+  //RX
+  mrx_clk_pad_i, mrxd_pad_i, mrxdv_pad_i, mrxerr_pad_i, mcoll_pad_i, mcrs_pad_i, 
+  
+  // MIIM
+  mdc_pad_o, md_pad_i, md_pad_o, md_padoe_o,
+
+  int_o
+
+  // Bist
+`ifdef ETH_BIST
+  ,
+  // debug chain signals
+  mbist_si_i,       // bist scan serial in
+  mbist_so_o,       // bist scan serial out
+  mbist_ctrl_i        // bist chain shift control
+`endif
+
+);
+
+
+parameter Tp = 1;
+
+
+// WISHBONE common
+input           wb_clk_i;     // WISHBONE clock
+input           wb_rst_i;     // WISHBONE reset
+input   [31:0]  wb_dat_i;     // WISHBONE data input
+output  [31:0]  wb_dat_o;     // WISHBONE data output
+output          wb_err_o;     // WISHBONE error output
+
+// WISHBONE slave
+input   [11:2]  wb_adr_i;     // WISHBONE address input
+input    [3:0]  wb_sel_i;     // WISHBONE byte select input
+input           wb_we_i;      // WISHBONE write enable input
+input           wb_cyc_i;     // WISHBONE cycle input
+input           wb_stb_i;     // WISHBONE strobe input
+output          wb_ack_o;     // WISHBONE acknowledge output
+
+// WISHBONE master
+output  [31:0]  m_wb_adr_o;
+output   [3:0]  m_wb_sel_o;
+output          m_wb_we_o;
+input   [31:0]  m_wb_dat_i;
+output  [31:0]  m_wb_dat_o;
+output          m_wb_cyc_o;
+output          m_wb_stb_o;
+input           m_wb_ack_i;
+input           m_wb_err_i;
+
+wire    [29:0]  m_wb_adr_tmp;
+
+`ifdef ETH_WISHBONE_B3
+output   [2:0]  m_wb_cti_o;   // Cycle Type Identifier
+output   [1:0]  m_wb_bte_o;   // Burst Type Extension
+`endif
+
+// Tx
+input           mtx_clk_pad_i; // Transmit clock (from PHY)
+output   [3:0]  mtxd_pad_o;    // Transmit nibble (to PHY)
+output          mtxen_pad_o;   // Transmit enable (to PHY)
+output          mtxerr_pad_o;  // Transmit error (to PHY)
+
+// Rx
+input           mrx_clk_pad_i; // Receive clock (from PHY)
+input    [3:0]  mrxd_pad_i;    // Receive nibble (from PHY)
+input           mrxdv_pad_i;   // Receive data valid (from PHY)
+input           mrxerr_pad_i;  // Receive data error (from PHY)
+
+// Common Tx and Rx
+input           mcoll_pad_i;   // Collision (from PHY)
+input           mcrs_pad_i;    // Carrier sense (from PHY)
+
+// MII Management interface
+input           md_pad_i;      // MII data input (from I/O cell)
+output          mdc_pad_o;     // MII Management data clock (to PHY)
+output          md_pad_o;      // MII data output (to I/O cell)
+output          md_padoe_o;    // MII data output enable (to I/O cell)
+
+output          int_o;         // Interrupt output
+
+// Bist
+`ifdef ETH_BIST
+input   mbist_si_i;       // bist scan serial in
+output  mbist_so_o;       // bist scan serial out
+input [`ETH_MBIST_CTRL_WIDTH - 1:0] mbist_ctrl_i;       // bist chain shift control
+`endif
+
+wire     [7:0]  r_ClkDiv;
+wire            r_MiiNoPre;
+wire    [15:0]  r_CtrlData;
+wire     [4:0]  r_FIAD;
+wire     [4:0]  r_RGAD;
+wire            r_WCtrlData;
+wire            r_RStat;
+wire            r_ScanStat;
+wire            NValid_stat;
+wire            Busy_stat;
+wire            LinkFail;
+wire    [15:0]  Prsd;             // Read Status Data (data read from the PHY)
+wire            WCtrlDataStart;
+wire            RStatStart;
+wire            UpdateMIIRX_DATAReg;
+
+wire            TxStartFrm;
+wire            TxEndFrm;
+wire            TxUsedData;
+wire     [7:0]  TxData;
+wire            TxRetry;
+wire            TxAbort;
+wire            TxUnderRun;
+wire            TxDone;
+
+
+reg             WillSendControlFrame_sync1;
+reg             WillSendControlFrame_sync2;
+reg             WillSendControlFrame_sync3;
+reg             RstTxPauseRq;
+
+reg             TxPauseRq_sync1;
+reg             TxPauseRq_sync2;
+reg             TxPauseRq_sync3;
+reg             TPauseRq;
+
+
+// Connecting Miim module
+eth_miim miim1
+(
+  .Clk(wb_clk_i),                         .Reset(wb_rst_i),                   .Divider(r_ClkDiv), 
+  .NoPre(r_MiiNoPre),                     .CtrlData(r_CtrlData),              .Rgad(r_RGAD), 
+  .Fiad(r_FIAD),                          .WCtrlData(r_WCtrlData),            .RStat(r_RStat), 
+  .ScanStat(r_ScanStat),                  .Mdi(md_pad_i),                     .Mdo(md_pad_o), 
+  .MdoEn(md_padoe_o),                     .Mdc(mdc_pad_o),                    .Busy(Busy_stat), 
+  .Prsd(Prsd),                            .LinkFail(LinkFail),                .Nvalid(NValid_stat), 
+  .WCtrlDataStart(WCtrlDataStart),        .RStatStart(RStatStart),            .UpdateMIIRX_DATAReg(UpdateMIIRX_DATAReg)
+);
+
+
+
+
+wire  [3:0] RegCs;          // Connected to registers
+wire [31:0] RegDataOut;     // Multiplexed to wb_dat_o
+wire        r_RecSmall;     // Receive small frames
+wire        r_LoopBck;      // Loopback
+wire        r_TxEn;         // Tx Enable
+wire        r_RxEn;         // Rx Enable
+
+wire        MRxDV_Lb;       // Muxed MII receive data valid
+wire        MRxErr_Lb;      // Muxed MII Receive Error
+wire  [3:0] MRxD_Lb;        // Muxed MII Receive Data
+wire        Transmitting;   // Indication that TxEthMAC is transmitting
+wire        r_HugEn;        // Huge packet enable
+wire        r_DlyCrcEn;     // Delayed CRC enabled
+wire [15:0] r_MaxFL;        // Maximum frame length
+
+wire [15:0] r_MinFL;        // Minimum frame length
+wire        ShortFrame;
+wire        DribbleNibble;  // Extra nibble received
+wire        ReceivedPacketTooBig; // Received packet is too big
+wire [47:0] r_MAC;          // MAC address
+wire        LoadRxStatus;   // Rx status was loaded
+wire [31:0] r_HASH0;        // HASH table, lower 4 bytes
+wire [31:0] r_HASH1;        // HASH table, upper 4 bytes
+wire  [7:0] r_TxBDNum;      // Receive buffer descriptor number
+wire  [6:0] r_IPGT;         // 
+wire  [6:0] r_IPGR1;        // 
+wire  [6:0] r_IPGR2;        // 
+wire  [5:0] r_CollValid;    // 
+wire [15:0] r_TxPauseTV;    // Transmit PAUSE value
+wire        r_TxPauseRq;    // Transmit PAUSE request
+
+wire  [3:0] r_MaxRet;       //
+wire        r_NoBckof;      // 
+wire        r_ExDfrEn;      // 
+wire        r_TxFlow;       // Tx flow control enable
+wire        r_IFG;          // Minimum interframe gap for incoming packets
+
+wire        TxB_IRQ;        // Interrupt Tx Buffer
+wire        TxE_IRQ;        // Interrupt Tx Error
+wire        RxB_IRQ;        // Interrupt Rx Buffer
+wire        RxE_IRQ;        // Interrupt Rx Error
+wire        Busy_IRQ;       // Interrupt Busy (lack of buffers)
+
+//wire        DWord;
+wire        ByteSelected;
+wire        BDAck;
+wire [31:0] BD_WB_DAT_O;    // wb_dat_o that comes from the Wishbone module (for buffer descriptors read/write)
+wire  [3:0] BDCs;           // Buffer descriptor CS
+wire        CsMiss;         // When access to the address between 0x800 and 0xfff occurs, acknowledge is set
+                            // but data is not valid.
+wire        r_Pad;
+wire        r_CrcEn;
+wire        r_FullD;
+wire        r_Pro;
+wire        r_Bro;
+wire        r_NoPre;
+wire        r_RxFlow;
+wire        r_PassAll;
+wire        TxCtrlEndFrm;
+wire        StartTxDone;
+wire        SetPauseTimer;
+wire        TxUsedDataIn;
+wire        TxDoneIn;
+wire        TxAbortIn;
+wire        PerPacketPad;
+wire        PadOut;
+wire        PerPacketCrcEn;
+wire        CrcEnOut;
+wire        TxStartFrmOut;
+wire        TxEndFrmOut;
+wire        ReceivedPauseFrm;
+wire        ControlFrmAddressOK;
+wire        RxStatusWriteLatched_sync2;
+wire        LateCollision;
+wire        DeferIndication;
+wire        LateCollLatched;
+wire        DeferLatched;
+wire        RstDeferLatched;
+wire        CarrierSenseLost;
+
+wire        temp_wb_ack_o;
+wire [31:0] temp_wb_dat_o;
+wire        temp_wb_err_o;
+
+`ifdef ETH_REGISTERED_OUTPUTS
+  reg         temp_wb_ack_o_reg;
+  reg [31:0]  temp_wb_dat_o_reg;
+  reg         temp_wb_err_o_reg;
+`endif
+
+//assign DWord = &wb_sel_i;
+assign ByteSelected = |wb_sel_i;
+assign RegCs[3] = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] & ~wb_adr_i[10] & wb_sel_i[3];   // 0x0   - 0x3FF
+assign RegCs[2] = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] & ~wb_adr_i[10] & wb_sel_i[2];   // 0x0   - 0x3FF
+assign RegCs[1] = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] & ~wb_adr_i[10] & wb_sel_i[1];   // 0x0   - 0x3FF
+assign RegCs[0] = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] & ~wb_adr_i[10] & wb_sel_i[0];   // 0x0   - 0x3FF
+assign BDCs[3]  = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] &  wb_adr_i[10] & wb_sel_i[3];   // 0x400 - 0x7FF
+assign BDCs[2]  = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] &  wb_adr_i[10] & wb_sel_i[2];   // 0x400 - 0x7FF
+assign BDCs[1]  = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] &  wb_adr_i[10] & wb_sel_i[1];   // 0x400 - 0x7FF
+assign BDCs[0]  = wb_stb_i & wb_cyc_i & ByteSelected & ~wb_adr_i[11] &  wb_adr_i[10] & wb_sel_i[0];   // 0x400 - 0x7FF
+assign CsMiss = wb_stb_i & wb_cyc_i & ByteSelected & wb_adr_i[11];                   // 0x800 - 0xfFF
+assign temp_wb_dat_o = ((|RegCs) & ~wb_we_i)? RegDataOut : BD_WB_DAT_O;
+assign temp_wb_err_o = wb_stb_i & wb_cyc_i & (~ByteSelected | CsMiss);
+
+`ifdef ETH_REGISTERED_OUTPUTS
+  assign wb_ack_o = temp_wb_ack_o_reg;
+  assign wb_dat_o[31:0] = temp_wb_dat_o_reg;
+  assign wb_err_o = temp_wb_err_o_reg;
+`else
+  assign wb_ack_o = temp_wb_ack_o;
+  assign wb_dat_o[31:0] = temp_wb_dat_o;
+  assign wb_err_o = temp_wb_err_o;
+`endif
+
+`ifdef ETH_AVALON_BUS
+  // As Avalon has no corresponding "error" signal, I (erroneously) will
+  // send an ack to Avalon, even when accessing undefined memory. This
+  // is a grey area in Avalon vs. Wishbone specs: My understanding
+  // is that Avalon expects all memory addressable by the addr bus feeding
+  // a slave to be, at the very minimum, readable.
+  assign temp_wb_ack_o = (|RegCs) | BDAck | CsMiss;
+`else // WISHBONE
+  assign temp_wb_ack_o = (|RegCs) | BDAck;
+`endif
+
+`ifdef ETH_REGISTERED_OUTPUTS
+  always @ (posedge wb_clk_i or posedge wb_rst_i)
+  begin
+    if(wb_rst_i)
+      begin
+        temp_wb_ack_o_reg <=#Tp 1'b0;
+        temp_wb_dat_o_reg <=#Tp 32'h0;
+        temp_wb_err_o_reg <=#Tp 1'b0;
+      end
+    else
+      begin
+        temp_wb_ack_o_reg <=#Tp temp_wb_ack_o & ~temp_wb_ack_o_reg;
+        temp_wb_dat_o_reg <=#Tp temp_wb_dat_o;
+        temp_wb_err_o_reg <=#Tp temp_wb_err_o & ~temp_wb_err_o_reg;
+      end
+  end
+`endif
+
+
+// Connecting Ethernet registers
+eth_registers ethreg1
+(
+  .DataIn(wb_dat_i),                      .Address(wb_adr_i[9:2]),                    .Rw(wb_we_i), 
+  .Cs(RegCs),                             .Clk(wb_clk_i),                             .Reset(wb_rst_i), 
+  .DataOut(RegDataOut),                   .r_RecSmall(r_RecSmall), 
+  .r_Pad(r_Pad),                          .r_HugEn(r_HugEn),                          .r_CrcEn(r_CrcEn), 
+  .r_DlyCrcEn(r_DlyCrcEn),                .r_FullD(r_FullD), 
+  .r_ExDfrEn(r_ExDfrEn),                  .r_NoBckof(r_NoBckof),                      .r_LoopBck(r_LoopBck), 
+  .r_IFG(r_IFG),                          .r_Pro(r_Pro),                              .r_Iam(), 
+  .r_Bro(r_Bro),                          .r_NoPre(r_NoPre),                          .r_TxEn(r_TxEn), 
+  .r_RxEn(r_RxEn),                        .Busy_IRQ(Busy_IRQ),                        .RxE_IRQ(RxE_IRQ), 
+  .RxB_IRQ(RxB_IRQ),                      .TxE_IRQ(TxE_IRQ),                          .TxB_IRQ(TxB_IRQ), 
+  .r_IPGT(r_IPGT), 
+  .r_IPGR1(r_IPGR1),                      .r_IPGR2(r_IPGR2),                          .r_MinFL(r_MinFL), 
+  .r_MaxFL(r_MaxFL),                      .r_MaxRet(r_MaxRet),                        .r_CollValid(r_CollValid), 
+  .r_TxFlow(r_TxFlow),                    .r_RxFlow(r_RxFlow),                        .r_PassAll(r_PassAll), 
+  .r_MiiNoPre(r_MiiNoPre),                .r_ClkDiv(r_ClkDiv), 
+  .r_WCtrlData(r_WCtrlData),              .r_RStat(r_RStat),                          .r_ScanStat(r_ScanStat), 
+  .r_RGAD(r_RGAD),                        .r_FIAD(r_FIAD),                            .r_CtrlData(r_CtrlData), 
+  .NValid_stat(NValid_stat),              .Busy_stat(Busy_stat),                   
+  .LinkFail(LinkFail),                    .r_MAC(r_MAC),                              .WCtrlDataStart(WCtrlDataStart),
+  .RStatStart(RStatStart),                .UpdateMIIRX_DATAReg(UpdateMIIRX_DATAReg),  .Prsd(Prsd), 
+  .r_TxBDNum(r_TxBDNum),                  .int_o(int_o),
+  .r_HASH0(r_HASH0),                      .r_HASH1(r_HASH1),                          .r_TxPauseRq(r_TxPauseRq), 
+  .r_TxPauseTV(r_TxPauseTV),              .RstTxPauseRq(RstTxPauseRq),                .TxCtrlEndFrm(TxCtrlEndFrm), 
+  .StartTxDone(StartTxDone),              .TxClk(mtx_clk_pad_i),                      .RxClk(mrx_clk_pad_i), 
+  .SetPauseTimer(SetPauseTimer)
+  
+);
+
+
+
+wire  [7:0] RxData;
+wire        RxValid;
+wire        RxStartFrm;
+wire        RxEndFrm;
+wire        RxAbort;
+
+wire        WillTransmit;            // Will transmit (to RxEthMAC)
+wire        ResetCollision;          // Reset Collision (for synchronizing collision)
+wire  [7:0] TxDataOut;               // Transmit Packet Data (to TxEthMAC)
+wire        WillSendControlFrame;
+wire        ReceiveEnd;
+wire        ReceivedPacketGood;
+wire        ReceivedLengthOK;
+wire        InvalidSymbol;
+wire        LatchedCrcError;
+wire        RxLateCollision;
+wire  [3:0] RetryCntLatched;   
+wire  [3:0] RetryCnt;   
+wire        StartTxAbort;   
+wire        MaxCollisionOccured;   
+wire        RetryLimit;   
+wire        StatePreamble;   
+wire  [1:0] StateData; 
+
+// Connecting MACControl
+eth_maccontrol maccontrol1
+(
+  .MTxClk(mtx_clk_pad_i),                       .TPauseRq(TPauseRq), 
+  .TxPauseTV(r_TxPauseTV),                      .TxDataIn(TxData), 
+  .TxStartFrmIn(TxStartFrm),                    .TxEndFrmIn(TxEndFrm), 
+  .TxUsedDataIn(TxUsedDataIn),                  .TxDoneIn(TxDoneIn), 
+  .TxAbortIn(TxAbortIn),                        .MRxClk(mrx_clk_pad_i), 
+  .RxData(RxData),                              .RxValid(RxValid), 
+  .RxStartFrm(RxStartFrm),                      .RxEndFrm(RxEndFrm),
+  .ReceiveEnd(ReceiveEnd),                      .ReceivedPacketGood(ReceivedPacketGood),
+  .TxFlow(r_TxFlow), 
+  .RxFlow(r_RxFlow),                            .DlyCrcEn(r_DlyCrcEn),
+  .MAC(r_MAC),                                  .PadIn(r_Pad | PerPacketPad), 
+  .PadOut(PadOut),                              .CrcEnIn(r_CrcEn | PerPacketCrcEn), 
+  .CrcEnOut(CrcEnOut),                          .TxReset(wb_rst_i), 
+  .RxReset(wb_rst_i),                           .ReceivedLengthOK(ReceivedLengthOK),
+  .TxDataOut(TxDataOut),                        .TxStartFrmOut(TxStartFrmOut), 
+  .TxEndFrmOut(TxEndFrmOut),                    .TxUsedDataOut(TxUsedData), 
+  .TxDoneOut(TxDone),                           .TxAbortOut(TxAbort), 
+  .WillSendControlFrame(WillSendControlFrame),  .TxCtrlEndFrm(TxCtrlEndFrm), 
+  .ReceivedPauseFrm(ReceivedPauseFrm),          .ControlFrmAddressOK(ControlFrmAddressOK),
+  .SetPauseTimer(SetPauseTimer),
+  .RxStatusWriteLatched_sync2(RxStatusWriteLatched_sync2),                .r_PassAll(r_PassAll)
+);
+
+
+
+wire TxCarrierSense;          // Synchronized CarrierSense (to Tx clock)
+wire Collision;               // Synchronized Collision
+
+reg CarrierSense_Tx1;
+reg CarrierSense_Tx2;
+reg Collision_Tx1;
+reg Collision_Tx2;
+
+reg RxEnSync;                 // Synchronized Receive Enable
+reg WillTransmit_q;
+reg WillTransmit_q2;
+
+
+
+// Muxed MII receive data valid
+assign MRxDV_Lb = r_LoopBck? mtxen_pad_o : mrxdv_pad_i & RxEnSync;
+
+// Muxed MII Receive Error
+assign MRxErr_Lb = r_LoopBck? mtxerr_pad_o : mrxerr_pad_i & RxEnSync;
+
+// Muxed MII Receive Data
+assign MRxD_Lb[3:0] = r_LoopBck? mtxd_pad_o[3:0] : mrxd_pad_i[3:0];
+
+
+
+// Connecting TxEthMAC
+eth_txethmac txethmac1
+(
+  .MTxClk(mtx_clk_pad_i),             .Reset(wb_rst_i),                   .CarrierSense(TxCarrierSense), 
+  .Collision(Collision),              .TxData(TxDataOut),                 .TxStartFrm(TxStartFrmOut), 
+  .TxUnderRun(TxUnderRun),            .TxEndFrm(TxEndFrmOut),             .Pad(PadOut),  
+  .MinFL(r_MinFL),                    .CrcEn(CrcEnOut),                   .FullD(r_FullD), 
+  .HugEn(r_HugEn),                    .DlyCrcEn(r_DlyCrcEn),              .IPGT(r_IPGT), 
+  .IPGR1(r_IPGR1),                    .IPGR2(r_IPGR2),                    .CollValid(r_CollValid), 
+  .MaxRet(r_MaxRet),                  .NoBckof(r_NoBckof),                .ExDfrEn(r_ExDfrEn), 
+  .MaxFL(r_MaxFL),                    .MTxEn(mtxen_pad_o),                .MTxD(mtxd_pad_o), 
+  .MTxErr(mtxerr_pad_o),              .TxUsedData(TxUsedDataIn),          .TxDone(TxDoneIn), 
+  .TxRetry(TxRetry),                  .TxAbort(TxAbortIn),                .WillTransmit(WillTransmit), 
+  .ResetCollision(ResetCollision),    .RetryCnt(RetryCnt),                .StartTxDone(StartTxDone), 
+  .StartTxAbort(StartTxAbort),        .MaxCollisionOccured(MaxCollisionOccured), .LateCollision(LateCollision),   
+  .DeferIndication(DeferIndication),  .StatePreamble(StatePreamble),      .StateData(StateData)   
+);
+
+
+
+
+wire  [15:0]  RxByteCnt;
+wire          RxByteCntEq0;
+wire          RxByteCntGreat2;
+wire          RxByteCntMaxFrame;
+wire          RxCrcError;
+wire          RxStateIdle;
+wire          RxStatePreamble;
+wire          RxStateSFD;
+wire   [1:0]  RxStateData;
+wire          AddressMiss;
+
+
+
+// Connecting RxEthMAC
+eth_rxethmac rxethmac1
+(
+  .MRxClk(mrx_clk_pad_i),               .MRxDV(MRxDV_Lb),                     .MRxD(MRxD_Lb),
+  .Transmitting(Transmitting),          .HugEn(r_HugEn),                      .DlyCrcEn(r_DlyCrcEn), 
+  .MaxFL(r_MaxFL),                      .r_IFG(r_IFG),                        .Reset(wb_rst_i),
+  .RxData(RxData),                      .RxValid(RxValid),                    .RxStartFrm(RxStartFrm), 
+  .RxEndFrm(RxEndFrm),                  .ByteCnt(RxByteCnt), 
+  .ByteCntEq0(RxByteCntEq0),            .ByteCntGreat2(RxByteCntGreat2),      .ByteCntMaxFrame(RxByteCntMaxFrame), 
+  .CrcError(RxCrcError),                .StateIdle(RxStateIdle),              .StatePreamble(RxStatePreamble), 
+  .StateSFD(RxStateSFD),                .StateData(RxStateData),
+  .MAC(r_MAC),                          .r_Pro(r_Pro),                        .r_Bro(r_Bro),
+  .r_HASH0(r_HASH0),                    .r_HASH1(r_HASH1),                    .RxAbort(RxAbort), 
+  .AddressMiss(AddressMiss),            .PassAll(r_PassAll),                  .ControlFrmAddressOK(ControlFrmAddressOK)
+);
+
+
+// MII Carrier Sense Synchronization
+always @ (posedge mtx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      CarrierSense_Tx1 <= #Tp 1'b0;
+      CarrierSense_Tx2 <= #Tp 1'b0;
+    end
+  else
+    begin
+      CarrierSense_Tx1 <= #Tp mcrs_pad_i;
+      CarrierSense_Tx2 <= #Tp CarrierSense_Tx1;
+    end
+end
+
+assign TxCarrierSense = ~r_FullD & CarrierSense_Tx2;
+
+
+// MII Collision Synchronization
+always @ (posedge mtx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      Collision_Tx1 <= #Tp 1'b0;
+      Collision_Tx2 <= #Tp 1'b0;
+    end
+  else
+    begin
+      Collision_Tx1 <= #Tp mcoll_pad_i;
+      if(ResetCollision)
+        Collision_Tx2 <= #Tp 1'b0;
+      else
+      if(Collision_Tx1)
+        Collision_Tx2 <= #Tp 1'b1;
+    end
+end
+
+
+// Synchronized Collision
+assign Collision = ~r_FullD & Collision_Tx2;
+
+
+
+// Delayed WillTransmit
+always @ (posedge mrx_clk_pad_i)
+begin
+  WillTransmit_q <= #Tp WillTransmit;
+  WillTransmit_q2 <= #Tp WillTransmit_q;
+end 
+
+
+assign Transmitting = ~r_FullD & WillTransmit_q2;
+
+
+
+// Synchronized Receive Enable
+always @ (posedge mrx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    RxEnSync <= #Tp 1'b0;
+  else
+  if(~mrxdv_pad_i)
+    RxEnSync <= #Tp r_RxEn;
+end 
+
+
+
+// Synchronizing WillSendControlFrame to WB_CLK;
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    WillSendControlFrame_sync1 <= 1'b0;
+  else
+    WillSendControlFrame_sync1 <=#Tp WillSendControlFrame;
+end
+
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    WillSendControlFrame_sync2 <= 1'b0;
+  else
+    WillSendControlFrame_sync2 <=#Tp WillSendControlFrame_sync1;
+end
+
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    WillSendControlFrame_sync3 <= 1'b0;
+  else
+    WillSendControlFrame_sync3 <=#Tp WillSendControlFrame_sync2;
+end
+
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    RstTxPauseRq <= 1'b0;
+  else
+    RstTxPauseRq <=#Tp WillSendControlFrame_sync2 & ~WillSendControlFrame_sync3;
+end
+
+
+
+
+// TX Pause request Synchronization
+always @ (posedge mtx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      TxPauseRq_sync1 <= #Tp 1'b0;
+      TxPauseRq_sync2 <= #Tp 1'b0;
+      TxPauseRq_sync3 <= #Tp 1'b0;
+    end
+  else
+    begin
+      TxPauseRq_sync1 <= #Tp (r_TxPauseRq & r_TxFlow);
+      TxPauseRq_sync2 <= #Tp TxPauseRq_sync1;
+      TxPauseRq_sync3 <= #Tp TxPauseRq_sync2;
+    end
+end
+
+
+always @ (posedge mtx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    TPauseRq <= #Tp 1'b0;
+  else
+    TPauseRq <= #Tp TxPauseRq_sync2 & (~TxPauseRq_sync3);
+end
+
+
+wire LatchedMRxErr;
+reg RxAbort_latch;
+reg RxAbort_sync1;
+reg RxAbort_wb;
+reg RxAbortRst_sync1;
+reg RxAbortRst;
+
+// Synchronizing RxAbort to the WISHBONE clock
+always @ (posedge mrx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    RxAbort_latch <= #Tp 1'b0;
+  else if(RxAbort | (ShortFrame & ~r_RecSmall) | LatchedMRxErr & ~InvalidSymbol | (ReceivedPauseFrm & (~r_PassAll)))
+    RxAbort_latch <= #Tp 1'b1;
+  else if(RxAbortRst)
+    RxAbort_latch <= #Tp 1'b0;
+end
+
+always @ (posedge wb_clk_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      RxAbort_sync1 <= #Tp 1'b0;
+      RxAbort_wb    <= #Tp 1'b0;
+      RxAbort_wb    <= #Tp 1'b0;
+    end
+  else
+    begin
+      RxAbort_sync1 <= #Tp RxAbort_latch;
+      RxAbort_wb    <= #Tp RxAbort_sync1;
+    end
+end
+
+always @ (posedge mrx_clk_pad_i or posedge wb_rst_i)
+begin
+  if(wb_rst_i)
+    begin
+      RxAbortRst_sync1 <= #Tp 1'b0;
+      RxAbortRst       <= #Tp 1'b0;
+    end
+  else
+    begin
+      RxAbortRst_sync1 <= #Tp RxAbort_wb;
+      RxAbortRst       <= #Tp RxAbortRst_sync1;
+    end
+end
+
+
+
+// Connecting Wishbone module
+eth_wishbone wishbone
+(
+  .WB_CLK_I(wb_clk_i),                .WB_DAT_I(wb_dat_i), 
+  .WB_DAT_O(BD_WB_DAT_O), 
+
+  // WISHBONE slave
+  .WB_ADR_I(wb_adr_i[9:2]),           .WB_WE_I(wb_we_i), 
+  .BDCs(BDCs),                        .WB_ACK_O(BDAck), 
+
+  .Reset(wb_rst_i), 
+
+  // WISHBONE master
+  .m_wb_adr_o(m_wb_adr_tmp),          .m_wb_sel_o(m_wb_sel_o),                  .m_wb_we_o(m_wb_we_o), 
+  .m_wb_dat_i(m_wb_dat_i),            .m_wb_dat_o(m_wb_dat_o),                  .m_wb_cyc_o(m_wb_cyc_o), 
+  .m_wb_stb_o(m_wb_stb_o),            .m_wb_ack_i(m_wb_ack_i),                  .m_wb_err_i(m_wb_err_i), 
+  
+`ifdef ETH_WISHBONE_B3
+  .m_wb_cti_o(m_wb_cti_o),            .m_wb_bte_o(m_wb_bte_o), 
+`endif
+  
+
+    //TX
+  .MTxClk(mtx_clk_pad_i),             .TxStartFrm(TxStartFrm),                  .TxEndFrm(TxEndFrm), 
+  .TxUsedData(TxUsedData),            .TxData(TxData), 
+  .TxRetry(TxRetry),                  .TxAbort(TxAbort),                        .TxUnderRun(TxUnderRun), 
+  .TxDone(TxDone), 
+  .PerPacketCrcEn(PerPacketCrcEn),    .PerPacketPad(PerPacketPad), 
+
+  // Register
+  .r_TxEn(r_TxEn),                    .r_RxEn(r_RxEn),                          .r_TxBDNum(r_TxBDNum), 
+  .r_RxFlow(r_RxFlow),                      .r_PassAll(r_PassAll), 
+
+  //RX
+  .MRxClk(mrx_clk_pad_i),             .RxData(RxData),                          .RxValid(RxValid), 
+  .RxStartFrm(RxStartFrm),            .RxEndFrm(RxEndFrm),                      
+  .Busy_IRQ(Busy_IRQ),                .RxE_IRQ(RxE_IRQ),                        .RxB_IRQ(RxB_IRQ), 
+  .TxE_IRQ(TxE_IRQ),                  .TxB_IRQ(TxB_IRQ), 
+
+  .RxAbort(RxAbort_wb),               .RxStatusWriteLatched_sync2(RxStatusWriteLatched_sync2), 
+
+  .InvalidSymbol(InvalidSymbol),      .LatchedCrcError(LatchedCrcError),        .RxLength(RxByteCnt),
+  .RxLateCollision(RxLateCollision),  .ShortFrame(ShortFrame),                  .DribbleNibble(DribbleNibble),
+  .ReceivedPacketTooBig(ReceivedPacketTooBig), .LoadRxStatus(LoadRxStatus),     .RetryCntLatched(RetryCntLatched),
+  .RetryLimit(RetryLimit),            .LateCollLatched(LateCollLatched),        .DeferLatched(DeferLatched),   
+  .RstDeferLatched(RstDeferLatched), 
+  .CarrierSenseLost(CarrierSenseLost),.ReceivedPacketGood(ReceivedPacketGood),  .AddressMiss(AddressMiss),
+  .ReceivedPauseFrm(ReceivedPauseFrm)
+  
+`ifdef ETH_BIST
+  ,
+  .mbist_si_i       (mbist_si_i),
+  .mbist_so_o       (mbist_so_o),
+  .mbist_ctrl_i       (mbist_ctrl_i)
+`endif
+);
+
+assign m_wb_adr_o = {m_wb_adr_tmp, 2'h0};
+
+// Connecting MacStatus module
+eth_macstatus macstatus1 
+(
+  .MRxClk(mrx_clk_pad_i),             .Reset(wb_rst_i),
+  .ReceiveEnd(ReceiveEnd),            .ReceivedPacketGood(ReceivedPacketGood),     .ReceivedLengthOK(ReceivedLengthOK), 
+  .RxCrcError(RxCrcError),            .MRxErr(MRxErr_Lb),                          .MRxDV(MRxDV_Lb), 
+  .RxStateSFD(RxStateSFD),            .RxStateData(RxStateData),                   .RxStatePreamble(RxStatePreamble), 
+  .RxStateIdle(RxStateIdle),          .Transmitting(Transmitting),                 .RxByteCnt(RxByteCnt), 
+  .RxByteCntEq0(RxByteCntEq0),        .RxByteCntGreat2(RxByteCntGreat2),           .RxByteCntMaxFrame(RxByteCntMaxFrame), 
+  .InvalidSymbol(InvalidSymbol),
+  .MRxD(MRxD_Lb),                     .LatchedCrcError(LatchedCrcError),           .Collision(mcoll_pad_i),
+  .CollValid(r_CollValid),            .RxLateCollision(RxLateCollision),           .r_RecSmall(r_RecSmall),
+  .r_MinFL(r_MinFL),                  .r_MaxFL(r_MaxFL),                           .ShortFrame(ShortFrame),
+  .DribbleNibble(DribbleNibble),      .ReceivedPacketTooBig(ReceivedPacketTooBig), .r_HugEn(r_HugEn),
+  .LoadRxStatus(LoadRxStatus),        .RetryCnt(RetryCnt),                         .StartTxDone(StartTxDone),
+  .StartTxAbort(StartTxAbort),        .RetryCntLatched(RetryCntLatched),           .MTxClk(mtx_clk_pad_i),
+  .MaxCollisionOccured(MaxCollisionOccured), .RetryLimit(RetryLimit),              .LateCollision(LateCollision),
+  .LateCollLatched(LateCollLatched),  .DeferIndication(DeferIndication),           .DeferLatched(DeferLatched),
+  .RstDeferLatched(RstDeferLatched), 
+  .TxStartFrm(TxStartFrmOut),         .StatePreamble(StatePreamble),               .StateData(StateData),
+  .CarrierSense(CarrierSense_Tx2),    .CarrierSenseLost(CarrierSenseLost),         .TxUsedData(TxUsedDataIn),
+  .LatchedMRxErr(LatchedMRxErr),      .Loopback(r_LoopBck),                        .r_FullD(r_FullD)
+);
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_transmitcontrol.v
===================================================================
--- /trunk/OC-Ethernet/eth_transmitcontrol.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_transmitcontrol.v	(revision 6)
@@ -0,0 +1,326 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_transmitcontrol.v                                       ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/11/19 17:37:32  mohor
+// When control frame (PAUSE) was sent, status was written in the
+// eth_wishbone module and both TXB and TXC interrupts were set. Fixed.
+// Only TXC interrupt is set.
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.1  2001/07/03 12:51:54  mohor
+// Initial release of the MAC Control module.
+//
+//
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_transmitcontrol (MTxClk, TxReset, TxUsedDataIn, TxUsedDataOut, TxDoneIn, TxAbortIn, 
+                            TxStartFrmIn, TPauseRq, TxUsedDataOutDetected, TxFlow, DlyCrcEn, 
+                            TxPauseTV, MAC, TxCtrlStartFrm, TxCtrlEndFrm, SendingCtrlFrm, CtrlMux, 
+                            ControlData, WillSendControlFrame, BlockTxDone
+                           );
+
+parameter Tp = 1;
+
+
+input         MTxClk;
+input         TxReset;
+input         TxUsedDataIn;
+input         TxUsedDataOut;
+input         TxDoneIn;
+input         TxAbortIn;
+input         TxStartFrmIn;
+input         TPauseRq;
+input         TxUsedDataOutDetected;
+input         TxFlow;
+input         DlyCrcEn;
+input  [15:0] TxPauseTV;
+input  [47:0] MAC;
+
+output        TxCtrlStartFrm;
+output        TxCtrlEndFrm;
+output        SendingCtrlFrm;
+output        CtrlMux;
+output [7:0]  ControlData;
+output        WillSendControlFrame;
+output        BlockTxDone;
+
+reg           SendingCtrlFrm;
+reg           CtrlMux;
+reg           WillSendControlFrame;
+reg    [3:0]  DlyCrcCnt;
+reg    [5:0]  ByteCnt;
+reg           ControlEnd_q;
+reg    [7:0]  MuxedCtrlData;
+reg           TxCtrlStartFrm;
+reg           TxCtrlStartFrm_q;
+reg           TxCtrlEndFrm;
+reg    [7:0]  ControlData;
+reg           TxUsedDataIn_q;
+reg           BlockTxDone;
+
+wire          IncrementDlyCrcCnt;
+wire          ResetByteCnt;
+wire          IncrementByteCnt;
+wire          ControlEnd;
+wire          IncrementByteCntBy2;
+wire          EnableCnt;
+
+
+// A command for Sending the control frame is active (latched)
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    WillSendControlFrame <= #Tp 1'b0;
+  else
+  if(TxCtrlEndFrm & CtrlMux)
+    WillSendControlFrame <= #Tp 1'b0;
+  else
+  if(TPauseRq & TxFlow)
+    WillSendControlFrame <= #Tp 1'b1;
+end
+
+
+// Generation of the transmit control packet start frame
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    TxCtrlStartFrm <= #Tp 1'b0;
+  else
+  if(TxUsedDataIn_q & CtrlMux)
+    TxCtrlStartFrm <= #Tp 1'b0;
+  else
+  if(WillSendControlFrame & ~TxUsedDataOut & (TxDoneIn | TxAbortIn | TxStartFrmIn | (~TxUsedDataOutDetected)))
+    TxCtrlStartFrm <= #Tp 1'b1;
+end
+
+
+
+// Generation of the transmit control packet end frame
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    TxCtrlEndFrm <= #Tp 1'b0;
+  else
+  if(ControlEnd | ControlEnd_q)
+    TxCtrlEndFrm <= #Tp 1'b1;
+  else
+    TxCtrlEndFrm <= #Tp 1'b0;
+end
+
+
+// Generation of the multiplexer signal (controls muxes for switching between
+// normal and control packets)
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    CtrlMux <= #Tp 1'b0;
+  else
+  if(WillSendControlFrame & ~TxUsedDataOut)
+    CtrlMux <= #Tp 1'b1;
+  else
+  if(TxDoneIn)
+    CtrlMux <= #Tp 1'b0;
+end
+
+
+
+// Generation of the Sending Control Frame signal (enables padding and CRC)
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    SendingCtrlFrm <= #Tp 1'b0;
+  else
+  if(WillSendControlFrame & TxCtrlStartFrm)
+    SendingCtrlFrm <= #Tp 1'b1;
+  else
+  if(TxDoneIn)
+    SendingCtrlFrm <= #Tp 1'b0;
+end
+
+
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    TxUsedDataIn_q <= #Tp 1'b0;
+  else
+    TxUsedDataIn_q <= #Tp TxUsedDataIn;
+end
+
+
+
+// Generation of the signal that will block sending the Done signal to the eth_wishbone module
+// While sending the control frame
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    BlockTxDone <= #Tp 1'b0;
+  else
+  if(TxCtrlStartFrm)
+    BlockTxDone <= #Tp 1'b1;
+  else
+  if(TxStartFrmIn)
+    BlockTxDone <= #Tp 1'b0;
+end
+
+
+always @ (posedge MTxClk)
+begin
+  ControlEnd_q     <= #Tp ControlEnd;
+  TxCtrlStartFrm_q <= #Tp TxCtrlStartFrm;
+end
+
+
+assign IncrementDlyCrcCnt = CtrlMux & TxUsedDataIn &  ~DlyCrcCnt[2];
+
+
+// Delayed CRC counter
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    DlyCrcCnt <= #Tp 4'h0;
+  else
+  if(ResetByteCnt)
+    DlyCrcCnt <= #Tp 4'h0;
+  else
+  if(IncrementDlyCrcCnt)
+    DlyCrcCnt <= #Tp DlyCrcCnt + 1'b1;
+end
+
+             
+assign ResetByteCnt = TxReset | (~TxCtrlStartFrm & (TxDoneIn | TxAbortIn));
+assign IncrementByteCnt = CtrlMux & (TxCtrlStartFrm & ~TxCtrlStartFrm_q & ~TxUsedDataIn | TxUsedDataIn & ~ControlEnd);
+assign IncrementByteCntBy2 = CtrlMux & TxCtrlStartFrm & (~TxCtrlStartFrm_q) & TxUsedDataIn;     // When TxUsedDataIn and CtrlMux are set at the same time
+
+assign EnableCnt = (~DlyCrcEn | DlyCrcEn & (&DlyCrcCnt[1:0]));
+// Byte counter
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    ByteCnt <= #Tp 6'h0;
+  else
+  if(ResetByteCnt)
+    ByteCnt <= #Tp 6'h0;
+  else
+  if(IncrementByteCntBy2 & EnableCnt)
+    ByteCnt <= #Tp (ByteCnt[5:0] ) + 2'h2;
+  else
+  if(IncrementByteCnt & EnableCnt)
+    ByteCnt <= #Tp (ByteCnt[5:0] ) + 1'b1;
+end
+
+
+assign ControlEnd = ByteCnt[5:0] == 6'h22;
+
+
+// Control data generation (goes to the TxEthMAC module)
+always @ (ByteCnt or DlyCrcEn or MAC or TxPauseTV or DlyCrcCnt)
+begin
+  case(ByteCnt)
+    6'h0:    if(~DlyCrcEn | DlyCrcEn & (&DlyCrcCnt[1:0]))
+               MuxedCtrlData[7:0] = 8'h01;                   // Reserved Multicast Address
+             else
+						 	 MuxedCtrlData[7:0] = 8'h0;
+    6'h2:      MuxedCtrlData[7:0] = 8'h80;
+    6'h4:      MuxedCtrlData[7:0] = 8'hC2;
+    6'h6:      MuxedCtrlData[7:0] = 8'h00;
+    6'h8:      MuxedCtrlData[7:0] = 8'h00;
+    6'hA:      MuxedCtrlData[7:0] = 8'h01;
+    6'hC:      MuxedCtrlData[7:0] = MAC[47:40];
+    6'hE:      MuxedCtrlData[7:0] = MAC[39:32];
+    6'h10:     MuxedCtrlData[7:0] = MAC[31:24];
+    6'h12:     MuxedCtrlData[7:0] = MAC[23:16];
+    6'h14:     MuxedCtrlData[7:0] = MAC[15:8];
+    6'h16:     MuxedCtrlData[7:0] = MAC[7:0];
+    6'h18:     MuxedCtrlData[7:0] = 8'h88;                   // Type/Length
+    6'h1A:     MuxedCtrlData[7:0] = 8'h08;
+    6'h1C:     MuxedCtrlData[7:0] = 8'h00;                   // Opcode
+    6'h1E:     MuxedCtrlData[7:0] = 8'h01;
+    6'h20:     MuxedCtrlData[7:0] = TxPauseTV[15:8];         // Pause timer value
+    6'h22:     MuxedCtrlData[7:0] = TxPauseTV[7:0];
+    default:   MuxedCtrlData[7:0] = 8'h0;
+  endcase
+end
+
+
+// Latched Control data
+always @ (posedge MTxClk or posedge TxReset)
+begin
+  if(TxReset)
+    ControlData[7:0] <= #Tp 8'h0;
+  else
+  if(~ByteCnt[0])
+    ControlData[7:0] <= #Tp MuxedCtrlData[7:0];
+end
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_macstatus.v
===================================================================
--- /trunk/OC-Ethernet/eth_macstatus.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_macstatus.v	(revision 6)
@@ -0,0 +1,425 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_macstatus.v                                             ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is available in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.16  2005/02/21 10:42:11  igorm
+// Defer indication fixed.
+//
+// Revision 1.15  2003/01/30 13:28:19  tadejm
+// Defer indication changed.
+//
+// Revision 1.14  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.13  2002/11/13 22:30:58  tadejm
+// Late collision is reported only when not in the full duplex.
+// Sample is taken (for status) as soon as MRxDV is not valid (regardless
+// of the received byte cnt).
+//
+// Revision 1.12  2002/09/12 14:50:16  mohor
+// CarrierSenseLost bug fixed when operating in full duplex mode.
+//
+// Revision 1.11  2002/09/04 18:38:03  mohor
+// CarrierSenseLost status is not set when working in loopback mode.
+//
+// Revision 1.10  2002/07/25 18:17:46  mohor
+// InvalidSymbol generation changed.
+//
+// Revision 1.9  2002/04/22 13:51:44  mohor
+// Short frame and ReceivedLengthOK were not detected correctly.
+//
+// Revision 1.8  2002/02/18 10:40:17  mohor
+// Small fixes.
+//
+// Revision 1.7  2002/02/15 17:07:39  mohor
+// Status was not written correctly when frames were discarted because of
+// address mismatch.
+//
+// Revision 1.6  2002/02/11 09:18:21  mohor
+// Tx status is written back to the BD.
+//
+// Revision 1.5  2002/02/08 16:21:54  mohor
+// Rx status is written back to the BD.
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+//
+//
+//
+//
+
+`include "timescale.v"
+
+
+module eth_macstatus(
+                      MRxClk, Reset, ReceivedLengthOK, ReceiveEnd, ReceivedPacketGood, RxCrcError, 
+                      MRxErr, MRxDV, RxStateSFD, RxStateData, RxStatePreamble, RxStateIdle, Transmitting, 
+                      RxByteCnt, RxByteCntEq0, RxByteCntGreat2, RxByteCntMaxFrame, 
+                      InvalidSymbol, MRxD, LatchedCrcError, Collision, CollValid, RxLateCollision,
+                      r_RecSmall, r_MinFL, r_MaxFL, ShortFrame, DribbleNibble, ReceivedPacketTooBig, r_HugEn,
+                      LoadRxStatus, StartTxDone, StartTxAbort, RetryCnt, RetryCntLatched, MTxClk, MaxCollisionOccured, 
+                      RetryLimit, LateCollision, LateCollLatched, DeferIndication, DeferLatched, RstDeferLatched, TxStartFrm,
+                      StatePreamble, StateData, CarrierSense, CarrierSenseLost, TxUsedData, LatchedMRxErr, Loopback, 
+                      r_FullD
+                    );
+
+
+
+parameter Tp = 1;
+
+
+input         MRxClk;
+input         Reset;
+input         RxCrcError;
+input         MRxErr;
+input         MRxDV;
+
+input         RxStateSFD;
+input   [1:0] RxStateData;
+input         RxStatePreamble;
+input         RxStateIdle;
+input         Transmitting;
+input  [15:0] RxByteCnt;
+input         RxByteCntEq0;
+input         RxByteCntGreat2;
+input         RxByteCntMaxFrame;
+input   [3:0] MRxD;
+input         Collision;
+input   [5:0] CollValid;
+input         r_RecSmall;
+input  [15:0] r_MinFL;
+input  [15:0] r_MaxFL;
+input         r_HugEn;
+input         StartTxDone;
+input         StartTxAbort;
+input   [3:0] RetryCnt;
+input         MTxClk;
+input         MaxCollisionOccured;
+input         LateCollision;
+input         DeferIndication;
+input         TxStartFrm;
+input         StatePreamble;
+input   [1:0] StateData;
+input         CarrierSense;
+input         TxUsedData;
+input         Loopback;
+input         r_FullD;
+
+
+output        ReceivedLengthOK;
+output        ReceiveEnd;
+output        ReceivedPacketGood;
+output        InvalidSymbol;
+output        LatchedCrcError;
+output        RxLateCollision;
+output        ShortFrame;
+output        DribbleNibble;
+output        ReceivedPacketTooBig;
+output        LoadRxStatus;
+output  [3:0] RetryCntLatched;
+output        RetryLimit;
+output        LateCollLatched;
+output        DeferLatched;
+input         RstDeferLatched;
+output        CarrierSenseLost;
+output        LatchedMRxErr;
+
+
+reg           ReceiveEnd;
+
+reg           LatchedCrcError;
+reg           LatchedMRxErr;
+reg           LoadRxStatus;
+reg           InvalidSymbol;
+reg     [3:0] RetryCntLatched;
+reg           RetryLimit;
+reg           LateCollLatched;
+reg           DeferLatched;
+reg           CarrierSenseLost;
+
+wire          TakeSample;
+wire          SetInvalidSymbol; // Invalid symbol was received during reception in 100Mbps 
+
+// Crc error
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LatchedCrcError <=#Tp 1'b0;
+  else
+  if(RxStateSFD)
+    LatchedCrcError <=#Tp 1'b0;
+  else
+  if(RxStateData[0])
+    LatchedCrcError <=#Tp RxCrcError & ~RxByteCntEq0;
+end
+
+
+// LatchedMRxErr
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LatchedMRxErr <=#Tp 1'b0;
+  else
+  if(MRxErr & MRxDV & (RxStatePreamble | RxStateSFD | (|RxStateData) | RxStateIdle & ~Transmitting))
+    LatchedMRxErr <=#Tp 1'b1;
+  else
+    LatchedMRxErr <=#Tp 1'b0;
+end
+
+
+// ReceivedPacketGood
+assign ReceivedPacketGood = ~LatchedCrcError;
+
+
+// ReceivedLengthOK
+assign ReceivedLengthOK = RxByteCnt[15:0] >= r_MinFL[15:0] & RxByteCnt[15:0] <= r_MaxFL[15:0];
+
+
+
+
+
+// Time to take a sample
+//assign TakeSample = |RxStateData     & ~MRxDV & RxByteCntGreat2  |
+assign TakeSample = (|RxStateData)   & (~MRxDV)                    |
+                      RxStateData[0] &   MRxDV & RxByteCntMaxFrame;
+
+
+// LoadRxStatus
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    LoadRxStatus <=#Tp 1'b0;
+  else
+    LoadRxStatus <=#Tp TakeSample;
+end
+
+
+
+// ReceiveEnd
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ReceiveEnd  <=#Tp 1'b0;
+  else
+    ReceiveEnd  <=#Tp LoadRxStatus;                     
+end
+
+
+// Invalid Symbol received during 100Mbps mode
+assign SetInvalidSymbol = MRxDV & MRxErr & MRxD[3:0] == 4'he;
+
+
+// InvalidSymbol
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    InvalidSymbol <=#Tp 1'b0;
+  else
+  if(LoadRxStatus & ~SetInvalidSymbol)
+    InvalidSymbol <=#Tp 1'b0;
+  else
+  if(SetInvalidSymbol)
+    InvalidSymbol <=#Tp 1'b1;
+end
+
+
+// Late Collision
+
+reg RxLateCollision;
+reg RxColWindow;
+// Collision Window
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxLateCollision <=#Tp 1'b0;
+  else
+  if(LoadRxStatus)
+    RxLateCollision <=#Tp 1'b0;
+  else
+  if(Collision & (~r_FullD) & (~RxColWindow | r_RecSmall))
+    RxLateCollision <=#Tp 1'b1;
+end
+
+// Collision Window
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    RxColWindow <=#Tp 1'b1;
+  else
+  if(~Collision & RxByteCnt[5:0] == CollValid[5:0] & RxStateData[1])
+    RxColWindow <=#Tp 1'b0;
+  else
+  if(RxStateIdle)
+    RxColWindow <=#Tp 1'b1;
+end
+
+
+// ShortFrame
+reg ShortFrame;
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ShortFrame <=#Tp 1'b0;
+  else
+  if(LoadRxStatus)
+    ShortFrame <=#Tp 1'b0;
+  else
+  if(TakeSample)
+    ShortFrame <=#Tp RxByteCnt[15:0] < r_MinFL[15:0];
+end
+
+
+// DribbleNibble
+reg DribbleNibble;
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    DribbleNibble <=#Tp 1'b0;
+  else
+  if(RxStateSFD)
+    DribbleNibble <=#Tp 1'b0;
+  else
+  if(~MRxDV & RxStateData[1])
+    DribbleNibble <=#Tp 1'b1;
+end
+
+
+reg ReceivedPacketTooBig;
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    ReceivedPacketTooBig <=#Tp 1'b0;
+  else
+  if(LoadRxStatus)
+    ReceivedPacketTooBig <=#Tp 1'b0;
+  else
+  if(TakeSample)
+    ReceivedPacketTooBig <=#Tp ~r_HugEn & RxByteCnt[15:0] > r_MaxFL[15:0];
+end
+
+
+
+// Latched Retry counter for tx status
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    RetryCntLatched <=#Tp 4'h0;
+  else
+  if(StartTxDone | StartTxAbort)
+    RetryCntLatched <=#Tp RetryCnt;
+end
+
+
+// Latched Retransmission limit
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    RetryLimit <=#Tp 1'h0;
+  else
+  if(StartTxDone | StartTxAbort)
+    RetryLimit <=#Tp MaxCollisionOccured;
+end
+
+
+// Latched Late Collision
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    LateCollLatched <=#Tp 1'b0;
+  else
+  if(StartTxDone | StartTxAbort)
+    LateCollLatched <=#Tp LateCollision;
+end
+
+
+
+// Latched Defer state
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    DeferLatched <=#Tp 1'b0;
+  else
+  if(DeferIndication)
+    DeferLatched <=#Tp 1'b1;
+  else
+  if(RstDeferLatched)
+    DeferLatched <=#Tp 1'b0;
+end
+
+
+// CarrierSenseLost
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    CarrierSenseLost <=#Tp 1'b0;
+  else
+  if((StatePreamble | (|StateData)) & ~CarrierSense & ~Loopback & ~Collision & ~r_FullD)
+    CarrierSenseLost <=#Tp 1'b1;
+  else
+  if(TxStartFrm)
+    CarrierSenseLost <=#Tp 1'b0;
+end
+
+
+endmodule
Index: /trunk/OC-Ethernet/timescale.v
===================================================================
--- /trunk/OC-Ethernet/timescale.v	(revision 6)
+++ /trunk/OC-Ethernet/timescale.v	(revision 6)
@@ -0,0 +1,50 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  timescale.v                                                 ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.2  2001/10/19 11:36:31  mohor
+// Log file added.
+//
+//
+//
+
+`timescale 1ns / 1ns
Index: /trunk/OC-Ethernet/eth_registers.v
===================================================================
--- /trunk/OC-Ethernet/eth_registers.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_registers.v	(revision 6)
@@ -0,0 +1,1181 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_registers.v                                             ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001, 2002 Authors                             ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.28  2004/04/26 15:26:23  igorm
+// - Bug connected to the TX_BD_NUM_Wr signal fixed (bug came in with the
+//   previous update of the core.
+// - TxBDAddress is set to 0 after the TX is enabled in the MODER register.
+// - RxBDAddress is set to r_TxBDNum<<1 after the RX is enabled in the MODER
+//   register. (thanks to Mathias and Torbjorn)
+// - Multicast reception was fixed. Thanks to Ulrich Gries
+//
+// Revision 1.27  2004/04/26 11:42:17  igorm
+// TX_BD_NUM_Wr error fixed. Error was entered with the last check-in.
+//
+// Revision 1.26  2003/11/12 18:24:59  tadejm
+// WISHBONE slave changed and tested from only 32-bit accesss to byte access.
+//
+// Revision 1.25  2003/04/18 16:26:25  mohor
+// RxBDAddress was updated also when value to r_TxBDNum was written with
+// greater value than allowed.
+//
+// Revision 1.24  2002/11/22 01:57:06  mohor
+// Rx Flow control fixed. CF flag added to the RX buffer descriptor. RxAbort
+// synchronized.
+//
+// Revision 1.23  2002/11/19 18:13:49  mohor
+// r_MiiMRst is not used for resetting the MIIM module. wb_rst used instead.
+//
+// Revision 1.22  2002/11/14 18:37:20  mohor
+// r_Rst signal does not reset any module any more and is removed from the design.
+//
+// Revision 1.21  2002/09/10 10:35:23  mohor
+// Ethernet debug registers removed.
+//
+// Revision 1.20  2002/09/04 18:40:25  mohor
+// ETH_TXCTRL and ETH_RXCTRL registers added. Interrupts related to
+// the control frames connected.
+//
+// Revision 1.19  2002/08/19 16:01:40  mohor
+// Only values smaller or equal to 0x80 can be written to TX_BD_NUM register.
+// r_TxEn and r_RxEn depend on the limit values of the TX_BD_NUMOut.
+//
+// Revision 1.18  2002/08/16 22:28:23  mohor
+// Syntax error fixed.
+//
+// Revision 1.17  2002/08/16 22:23:03  mohor
+// Syntax error fixed.
+//
+// Revision 1.16  2002/08/16 22:14:22  mohor
+// Synchronous reset added to all registers. Defines used for width. r_MiiMRst
+// changed from bit position 10 to 9.
+//
+// Revision 1.15  2002/08/14 18:26:37  mohor
+// LinkFailRegister is reflecting the status of the PHY's link fail status bit.
+//
+// Revision 1.14  2002/04/22 14:03:44  mohor
+// Interrupts are visible in the ETH_INT_SOURCE regardless if they are enabled
+// or not.
+//
+// Revision 1.13  2002/02/26 16:18:09  mohor
+// Reset values are passed to registers through parameters
+//
+// Revision 1.12  2002/02/17 13:23:42  mohor
+// Define missmatch fixed.
+//
+// Revision 1.11  2002/02/16 14:03:44  mohor
+// Registered trimmed. Unused registers removed.
+//
+// Revision 1.10  2002/02/15 11:08:25  mohor
+// File format fixed a bit.
+//
+// Revision 1.9  2002/02/14 20:19:41  billditt
+// Modified for Address Checking,
+// addition of eth_addrcheck.v
+//
+// Revision 1.8  2002/02/12 17:01:19  mohor
+// HASH0 and HASH1 registers added. 
+
+// Revision 1.7  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.6  2001/12/05 15:00:16  mohor
+// RX_BD_NUM changed to TX_BD_NUM (holds number of TX descriptors
+// instead of the number of RX descriptors).
+//
+// Revision 1.5  2001/12/05 10:22:19  mohor
+// ETH_RX_BD_ADR register deleted. ETH_RX_BD_NUM is used instead.
+//
+// Revision 1.4  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.3  2001/10/18 12:07:11  mohor
+// Status signals changed, Adress decoding changed, interrupt controller
+// added.
+//
+// Revision 1.2  2001/09/24 15:02:56  mohor
+// Defines changed (All precede with ETH_). Small changes because some
+// tools generate warnings when two operands are together. Synchronization
+// between two clocks domains in eth_wishbonedma.v is changed (due to ASIC
+// demands).
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.2  2001/08/02 09:25:31  mohor
+// Unconnected signals are now connected.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+//
+//
+//
+//
+//
+
+`include "eth_defines.v"
+`include "timescale.v"
+
+
+module eth_registers( DataIn, Address, Rw, Cs, Clk, Reset, DataOut, 
+                      r_RecSmall, r_Pad, r_HugEn, r_CrcEn, r_DlyCrcEn, 
+                      r_FullD, r_ExDfrEn, r_NoBckof, r_LoopBck, r_IFG, 
+                      r_Pro, r_Iam, r_Bro, r_NoPre, r_TxEn, r_RxEn, 
+                      TxB_IRQ, TxE_IRQ, RxB_IRQ, RxE_IRQ, Busy_IRQ, 
+                      r_IPGT, r_IPGR1, r_IPGR2, r_MinFL, r_MaxFL, r_MaxRet, 
+                      r_CollValid, r_TxFlow, r_RxFlow, r_PassAll, 
+                      r_MiiNoPre, r_ClkDiv, r_WCtrlData, r_RStat, r_ScanStat, 
+                      r_RGAD, r_FIAD, r_CtrlData, NValid_stat, Busy_stat, 
+                      LinkFail, r_MAC, WCtrlDataStart, RStatStart,
+                      UpdateMIIRX_DATAReg, Prsd, r_TxBDNum, int_o,
+                      r_HASH0, r_HASH1, r_TxPauseTV, r_TxPauseRq, RstTxPauseRq, TxCtrlEndFrm, 
+                      StartTxDone, TxClk, RxClk, SetPauseTimer
+                    );
+
+parameter Tp = 1;
+
+input [31:0] DataIn;
+input [7:0] Address;
+
+input Rw;
+input [3:0] Cs;
+input Clk;
+input Reset;
+
+input WCtrlDataStart;
+input RStatStart;
+
+input UpdateMIIRX_DATAReg;
+input [15:0] Prsd;
+
+output [31:0] DataOut;
+reg    [31:0] DataOut;
+
+output r_RecSmall;
+output r_Pad;
+output r_HugEn;
+output r_CrcEn;
+output r_DlyCrcEn;
+output r_FullD;
+output r_ExDfrEn;
+output r_NoBckof;
+output r_LoopBck;
+output r_IFG;
+output r_Pro;
+output r_Iam;
+output r_Bro;
+output r_NoPre;
+output r_TxEn;
+output r_RxEn;
+output [31:0] r_HASH0;
+output [31:0] r_HASH1;
+
+input TxB_IRQ;
+input TxE_IRQ;
+input RxB_IRQ;
+input RxE_IRQ;
+input Busy_IRQ;
+
+output [6:0] r_IPGT;
+
+output [6:0] r_IPGR1;
+
+output [6:0] r_IPGR2;
+
+output [15:0] r_MinFL;
+output [15:0] r_MaxFL;
+
+output [3:0] r_MaxRet;
+output [5:0] r_CollValid;
+
+output r_TxFlow;
+output r_RxFlow;
+output r_PassAll;
+
+output r_MiiNoPre;
+output [7:0] r_ClkDiv;
+
+output r_WCtrlData;
+output r_RStat;
+output r_ScanStat;
+
+output [4:0] r_RGAD;
+output [4:0] r_FIAD;
+
+output [15:0]r_CtrlData;
+
+
+input NValid_stat;
+input Busy_stat;
+input LinkFail;
+
+output [47:0]r_MAC;
+output [7:0] r_TxBDNum;
+output       int_o;
+output [15:0]r_TxPauseTV;
+output       r_TxPauseRq;
+input        RstTxPauseRq;
+input        TxCtrlEndFrm;
+input        StartTxDone;
+input        TxClk;
+input        RxClk;
+input        SetPauseTimer;
+
+reg          irq_txb;
+reg          irq_txe;
+reg          irq_rxb;
+reg          irq_rxe;
+reg          irq_busy;
+reg          irq_txc;
+reg          irq_rxc;
+
+reg SetTxCIrq_txclk;
+reg SetTxCIrq_sync1, SetTxCIrq_sync2, SetTxCIrq_sync3;
+reg SetTxCIrq;
+reg ResetTxCIrq_sync1, ResetTxCIrq_sync2;
+
+reg SetRxCIrq_rxclk;
+reg SetRxCIrq_sync1, SetRxCIrq_sync2, SetRxCIrq_sync3;
+reg SetRxCIrq;
+reg ResetRxCIrq_sync1;
+reg ResetRxCIrq_sync2;
+reg ResetRxCIrq_sync3;
+
+wire [3:0] Write =   Cs  & {4{Rw}};
+wire       Read  = (|Cs) &   ~Rw;
+
+wire MODER_Sel      = (Address == `ETH_MODER_ADR       );
+wire INT_SOURCE_Sel = (Address == `ETH_INT_SOURCE_ADR  );
+wire INT_MASK_Sel   = (Address == `ETH_INT_MASK_ADR    );
+wire IPGT_Sel       = (Address == `ETH_IPGT_ADR        );
+wire IPGR1_Sel      = (Address == `ETH_IPGR1_ADR       );
+wire IPGR2_Sel      = (Address == `ETH_IPGR2_ADR       );
+wire PACKETLEN_Sel  = (Address == `ETH_PACKETLEN_ADR   );
+wire COLLCONF_Sel   = (Address == `ETH_COLLCONF_ADR    );
+     
+wire CTRLMODER_Sel  = (Address == `ETH_CTRLMODER_ADR   );
+wire MIIMODER_Sel   = (Address == `ETH_MIIMODER_ADR    );
+wire MIICOMMAND_Sel = (Address == `ETH_MIICOMMAND_ADR  );
+wire MIIADDRESS_Sel = (Address == `ETH_MIIADDRESS_ADR  );
+wire MIITX_DATA_Sel = (Address == `ETH_MIITX_DATA_ADR  );
+wire MAC_ADDR0_Sel  = (Address == `ETH_MAC_ADDR0_ADR   );
+wire MAC_ADDR1_Sel  = (Address == `ETH_MAC_ADDR1_ADR   );
+wire HASH0_Sel      = (Address == `ETH_HASH0_ADR       );
+wire HASH1_Sel      = (Address == `ETH_HASH1_ADR       );
+wire TXCTRL_Sel     = (Address == `ETH_TX_CTRL_ADR     );
+wire RXCTRL_Sel     = (Address == `ETH_RX_CTRL_ADR     );
+wire TX_BD_NUM_Sel  = (Address == `ETH_TX_BD_NUM_ADR   );
+
+
+wire [2:0] MODER_Wr;
+wire [0:0] INT_SOURCE_Wr;
+wire [0:0] INT_MASK_Wr;
+wire [0:0] IPGT_Wr;
+wire [0:0] IPGR1_Wr;
+wire [0:0] IPGR2_Wr;
+wire [3:0] PACKETLEN_Wr;
+wire [2:0] COLLCONF_Wr;
+wire [0:0] CTRLMODER_Wr;
+wire [1:0] MIIMODER_Wr;
+wire [0:0] MIICOMMAND_Wr;
+wire [1:0] MIIADDRESS_Wr;
+wire [1:0] MIITX_DATA_Wr;
+wire       MIIRX_DATA_Wr;
+wire [3:0] MAC_ADDR0_Wr;
+wire [1:0] MAC_ADDR1_Wr;
+wire [3:0] HASH0_Wr;
+wire [3:0] HASH1_Wr;
+wire [2:0] TXCTRL_Wr;
+wire [0:0] TX_BD_NUM_Wr;
+
+assign MODER_Wr[0]       = Write[0]  & MODER_Sel; 
+assign MODER_Wr[1]       = Write[1]  & MODER_Sel; 
+assign MODER_Wr[2]       = Write[2]  & MODER_Sel; 
+assign INT_SOURCE_Wr[0]  = Write[0]  & INT_SOURCE_Sel; 
+assign INT_MASK_Wr[0]    = Write[0]  & INT_MASK_Sel; 
+assign IPGT_Wr[0]        = Write[0]  & IPGT_Sel; 
+assign IPGR1_Wr[0]       = Write[0]  & IPGR1_Sel; 
+assign IPGR2_Wr[0]       = Write[0]  & IPGR2_Sel; 
+assign PACKETLEN_Wr[0]   = Write[0]  & PACKETLEN_Sel; 
+assign PACKETLEN_Wr[1]   = Write[1]  & PACKETLEN_Sel; 
+assign PACKETLEN_Wr[2]   = Write[2]  & PACKETLEN_Sel; 
+assign PACKETLEN_Wr[3]   = Write[3]  & PACKETLEN_Sel; 
+assign COLLCONF_Wr[0]    = Write[0]  & COLLCONF_Sel; 
+assign COLLCONF_Wr[1]    = 1'b0;  // Not used
+assign COLLCONF_Wr[2]    = Write[2]  & COLLCONF_Sel; 
+     
+assign CTRLMODER_Wr[0]   = Write[0]  & CTRLMODER_Sel; 
+assign MIIMODER_Wr[0]    = Write[0]  & MIIMODER_Sel; 
+assign MIIMODER_Wr[1]    = Write[1]  & MIIMODER_Sel; 
+assign MIICOMMAND_Wr[0]  = Write[0]  & MIICOMMAND_Sel; 
+assign MIIADDRESS_Wr[0]  = Write[0]  & MIIADDRESS_Sel; 
+assign MIIADDRESS_Wr[1]  = Write[1]  & MIIADDRESS_Sel; 
+assign MIITX_DATA_Wr[0]  = Write[0]  & MIITX_DATA_Sel; 
+assign MIITX_DATA_Wr[1]  = Write[1]  & MIITX_DATA_Sel; 
+assign MIIRX_DATA_Wr     = UpdateMIIRX_DATAReg;     
+assign MAC_ADDR0_Wr[0]   = Write[0]  & MAC_ADDR0_Sel; 
+assign MAC_ADDR0_Wr[1]   = Write[1]  & MAC_ADDR0_Sel; 
+assign MAC_ADDR0_Wr[2]   = Write[2]  & MAC_ADDR0_Sel; 
+assign MAC_ADDR0_Wr[3]   = Write[3]  & MAC_ADDR0_Sel; 
+assign MAC_ADDR1_Wr[0]   = Write[0]  & MAC_ADDR1_Sel; 
+assign MAC_ADDR1_Wr[1]   = Write[1]  & MAC_ADDR1_Sel; 
+assign HASH0_Wr[0]       = Write[0]  & HASH0_Sel; 
+assign HASH0_Wr[1]       = Write[1]  & HASH0_Sel; 
+assign HASH0_Wr[2]       = Write[2]  & HASH0_Sel; 
+assign HASH0_Wr[3]       = Write[3]  & HASH0_Sel; 
+assign HASH1_Wr[0]       = Write[0]  & HASH1_Sel; 
+assign HASH1_Wr[1]       = Write[1]  & HASH1_Sel; 
+assign HASH1_Wr[2]       = Write[2]  & HASH1_Sel; 
+assign HASH1_Wr[3]       = Write[3]  & HASH1_Sel; 
+assign TXCTRL_Wr[0]      = Write[0]  & TXCTRL_Sel; 
+assign TXCTRL_Wr[1]      = Write[1]  & TXCTRL_Sel; 
+assign TXCTRL_Wr[2]      = Write[2]  & TXCTRL_Sel; 
+assign TX_BD_NUM_Wr[0]   = Write[0]  & TX_BD_NUM_Sel & (DataIn<='h80); 
+
+
+
+wire [31:0] MODEROut;
+wire [31:0] INT_SOURCEOut;
+wire [31:0] INT_MASKOut;
+wire [31:0] IPGTOut;
+wire [31:0] IPGR1Out;
+wire [31:0] IPGR2Out;
+wire [31:0] PACKETLENOut;
+wire [31:0] COLLCONFOut;
+wire [31:0] CTRLMODEROut;
+wire [31:0] MIIMODEROut;
+wire [31:0] MIICOMMANDOut;
+wire [31:0] MIIADDRESSOut;
+wire [31:0] MIITX_DATAOut;
+wire [31:0] MIIRX_DATAOut;
+wire [31:0] MIISTATUSOut;
+wire [31:0] MAC_ADDR0Out;
+wire [31:0] MAC_ADDR1Out;
+wire [31:0] TX_BD_NUMOut;
+wire [31:0] HASH0Out;
+wire [31:0] HASH1Out;
+wire [31:0] TXCTRLOut;
+
+// MODER Register
+eth_register #(`ETH_MODER_WIDTH_0, `ETH_MODER_DEF_0)        MODER_0
+  (
+   .DataIn    (DataIn[`ETH_MODER_WIDTH_0 - 1:0]),
+   .DataOut   (MODEROut[`ETH_MODER_WIDTH_0 - 1:0]),
+   .Write     (MODER_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MODER_WIDTH_1, `ETH_MODER_DEF_1)        MODER_1
+  (
+   .DataIn    (DataIn[`ETH_MODER_WIDTH_1 + 7:8]),
+   .DataOut   (MODEROut[`ETH_MODER_WIDTH_1 + 7:8]),
+   .Write     (MODER_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MODER_WIDTH_2, `ETH_MODER_DEF_2)        MODER_2
+  (
+   .DataIn    (DataIn[`ETH_MODER_WIDTH_2 + 15:16]),
+   .DataOut   (MODEROut[`ETH_MODER_WIDTH_2 + 15:16]),
+   .Write     (MODER_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MODEROut[31:`ETH_MODER_WIDTH_2 + 16] = 0;
+
+// INT_MASK Register
+eth_register #(`ETH_INT_MASK_WIDTH_0, `ETH_INT_MASK_DEF_0)  INT_MASK_0
+  (
+   .DataIn    (DataIn[`ETH_INT_MASK_WIDTH_0 - 1:0]),  
+   .DataOut   (INT_MASKOut[`ETH_INT_MASK_WIDTH_0 - 1:0]),
+   .Write     (INT_MASK_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign INT_MASKOut[31:`ETH_INT_MASK_WIDTH_0] = 0;
+
+// IPGT Register
+eth_register #(`ETH_IPGT_WIDTH_0, `ETH_IPGT_DEF_0)          IPGT_0
+  (
+   .DataIn    (DataIn[`ETH_IPGT_WIDTH_0 - 1:0]),
+   .DataOut   (IPGTOut[`ETH_IPGT_WIDTH_0 - 1:0]),
+   .Write     (IPGT_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign IPGTOut[31:`ETH_IPGT_WIDTH_0] = 0;
+
+// IPGR1 Register
+eth_register #(`ETH_IPGR1_WIDTH_0, `ETH_IPGR1_DEF_0)        IPGR1_0
+  (
+   .DataIn    (DataIn[`ETH_IPGR1_WIDTH_0 - 1:0]),
+   .DataOut   (IPGR1Out[`ETH_IPGR1_WIDTH_0 - 1:0]),
+   .Write     (IPGR1_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign IPGR1Out[31:`ETH_IPGR1_WIDTH_0] = 0;
+
+// IPGR2 Register
+eth_register #(`ETH_IPGR2_WIDTH_0, `ETH_IPGR2_DEF_0)        IPGR2_0
+  (
+   .DataIn    (DataIn[`ETH_IPGR2_WIDTH_0 - 1:0]),
+   .DataOut   (IPGR2Out[`ETH_IPGR2_WIDTH_0 - 1:0]),
+   .Write     (IPGR2_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign IPGR2Out[31:`ETH_IPGR2_WIDTH_0] = 0;
+
+// PACKETLEN Register
+eth_register #(`ETH_PACKETLEN_WIDTH_0, `ETH_PACKETLEN_DEF_0) PACKETLEN_0
+  (
+   .DataIn    (DataIn[`ETH_PACKETLEN_WIDTH_0 - 1:0]),
+   .DataOut   (PACKETLENOut[`ETH_PACKETLEN_WIDTH_0 - 1:0]),
+   .Write     (PACKETLEN_Wr[0]),
+   .Clk       (Clk), 
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_PACKETLEN_WIDTH_1, `ETH_PACKETLEN_DEF_1) PACKETLEN_1
+  (
+   .DataIn    (DataIn[`ETH_PACKETLEN_WIDTH_1 + 7:8]),
+   .DataOut   (PACKETLENOut[`ETH_PACKETLEN_WIDTH_1 + 7:8]),
+   .Write     (PACKETLEN_Wr[1]),
+   .Clk       (Clk), 
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_PACKETLEN_WIDTH_2, `ETH_PACKETLEN_DEF_2) PACKETLEN_2
+  (
+   .DataIn    (DataIn[`ETH_PACKETLEN_WIDTH_2 + 15:16]),
+   .DataOut   (PACKETLENOut[`ETH_PACKETLEN_WIDTH_2 + 15:16]),
+   .Write     (PACKETLEN_Wr[2]),
+   .Clk       (Clk), 
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_PACKETLEN_WIDTH_3, `ETH_PACKETLEN_DEF_3) PACKETLEN_3
+  (
+   .DataIn    (DataIn[`ETH_PACKETLEN_WIDTH_3 + 23:24]),
+   .DataOut   (PACKETLENOut[`ETH_PACKETLEN_WIDTH_3 + 23:24]),
+   .Write     (PACKETLEN_Wr[3]),
+   .Clk       (Clk), 
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+
+// COLLCONF Register
+eth_register #(`ETH_COLLCONF_WIDTH_0, `ETH_COLLCONF_DEF_0)   COLLCONF_0
+  (
+   .DataIn    (DataIn[`ETH_COLLCONF_WIDTH_0 - 1:0]),
+   .DataOut   (COLLCONFOut[`ETH_COLLCONF_WIDTH_0 - 1:0]),
+   .Write     (COLLCONF_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_COLLCONF_WIDTH_2, `ETH_COLLCONF_DEF_2)   COLLCONF_2
+  (
+   .DataIn    (DataIn[`ETH_COLLCONF_WIDTH_2 + 15:16]),
+   .DataOut   (COLLCONFOut[`ETH_COLLCONF_WIDTH_2 + 15:16]),
+   .Write     (COLLCONF_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign COLLCONFOut[15:`ETH_COLLCONF_WIDTH_0] = 0;
+assign COLLCONFOut[31:`ETH_COLLCONF_WIDTH_2 + 16] = 0;
+
+// TX_BD_NUM Register
+eth_register #(`ETH_TX_BD_NUM_WIDTH_0, `ETH_TX_BD_NUM_DEF_0) TX_BD_NUM_0
+  (
+   .DataIn    (DataIn[`ETH_TX_BD_NUM_WIDTH_0 - 1:0]),
+   .DataOut   (TX_BD_NUMOut[`ETH_TX_BD_NUM_WIDTH_0 - 1:0]),
+   .Write     (TX_BD_NUM_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign TX_BD_NUMOut[31:`ETH_TX_BD_NUM_WIDTH_0] = 0;
+
+// CTRLMODER Register
+eth_register #(`ETH_CTRLMODER_WIDTH_0, `ETH_CTRLMODER_DEF_0)  CTRLMODER_0
+  (
+   .DataIn    (DataIn[`ETH_CTRLMODER_WIDTH_0 - 1:0]),
+   .DataOut   (CTRLMODEROut[`ETH_CTRLMODER_WIDTH_0 - 1:0]),
+   .Write     (CTRLMODER_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign CTRLMODEROut[31:`ETH_CTRLMODER_WIDTH_0] = 0;
+
+// MIIMODER Register
+eth_register #(`ETH_MIIMODER_WIDTH_0, `ETH_MIIMODER_DEF_0)    MIIMODER_0
+  (
+   .DataIn    (DataIn[`ETH_MIIMODER_WIDTH_0 - 1:0]),
+   .DataOut   (MIIMODEROut[`ETH_MIIMODER_WIDTH_0 - 1:0]),
+   .Write     (MIIMODER_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MIIMODER_WIDTH_1, `ETH_MIIMODER_DEF_1)    MIIMODER_1
+  (
+   .DataIn    (DataIn[`ETH_MIIMODER_WIDTH_1 + 7:8]),
+   .DataOut   (MIIMODEROut[`ETH_MIIMODER_WIDTH_1 + 7:8]),
+   .Write     (MIIMODER_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MIIMODEROut[31:`ETH_MIIMODER_WIDTH_1 + 8] = 0;
+
+// MIICOMMAND Register
+eth_register #(1, 0)                                      MIICOMMAND0
+  (
+   .DataIn    (DataIn[0]),
+   .DataOut   (MIICOMMANDOut[0]),
+   .Write     (MIICOMMAND_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(1, 0)                                      MIICOMMAND1
+  (
+   .DataIn    (DataIn[1]),
+   .DataOut   (MIICOMMANDOut[1]),
+   .Write     (MIICOMMAND_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (RStatStart)
+  );
+eth_register #(1, 0)                                      MIICOMMAND2
+  (
+   .DataIn    (DataIn[2]),
+   .DataOut   (MIICOMMANDOut[2]),
+   .Write     (MIICOMMAND_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (WCtrlDataStart)
+  );
+assign MIICOMMANDOut[31:`ETH_MIICOMMAND_WIDTH_0] = 29'h0;
+
+// MIIADDRESSRegister
+eth_register #(`ETH_MIIADDRESS_WIDTH_0, `ETH_MIIADDRESS_DEF_0) MIIADDRESS_0
+  (
+   .DataIn    (DataIn[`ETH_MIIADDRESS_WIDTH_0 - 1:0]),
+   .DataOut   (MIIADDRESSOut[`ETH_MIIADDRESS_WIDTH_0 - 1:0]),
+   .Write     (MIIADDRESS_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MIIADDRESS_WIDTH_1, `ETH_MIIADDRESS_DEF_1) MIIADDRESS_1
+  (
+   .DataIn    (DataIn[`ETH_MIIADDRESS_WIDTH_1 + 7:8]),
+   .DataOut   (MIIADDRESSOut[`ETH_MIIADDRESS_WIDTH_1 + 7:8]),
+   .Write     (MIIADDRESS_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MIIADDRESSOut[7:`ETH_MIIADDRESS_WIDTH_0] = 0;
+assign MIIADDRESSOut[31:`ETH_MIIADDRESS_WIDTH_1 + 8] = 0;
+
+// MIITX_DATA Register
+eth_register #(`ETH_MIITX_DATA_WIDTH_0, `ETH_MIITX_DATA_DEF_0) MIITX_DATA_0
+  (
+   .DataIn    (DataIn[`ETH_MIITX_DATA_WIDTH_0 - 1:0]),
+   .DataOut   (MIITX_DATAOut[`ETH_MIITX_DATA_WIDTH_0 - 1:0]), 
+   .Write     (MIITX_DATA_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MIITX_DATA_WIDTH_1, `ETH_MIITX_DATA_DEF_1) MIITX_DATA_1
+  (
+   .DataIn    (DataIn[`ETH_MIITX_DATA_WIDTH_1 + 7:8]),
+   .DataOut   (MIITX_DATAOut[`ETH_MIITX_DATA_WIDTH_1 + 7:8]), 
+   .Write     (MIITX_DATA_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MIITX_DATAOut[31:`ETH_MIITX_DATA_WIDTH_1 + 8] = 0;
+
+// MIIRX_DATA Register
+eth_register #(`ETH_MIIRX_DATA_WIDTH, `ETH_MIIRX_DATA_DEF) MIIRX_DATA
+  (
+   .DataIn    (Prsd[`ETH_MIIRX_DATA_WIDTH-1:0]),
+   .DataOut   (MIIRX_DATAOut[`ETH_MIIRX_DATA_WIDTH-1:0]),
+   .Write     (MIIRX_DATA_Wr), // not written from WB
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MIIRX_DATAOut[31:`ETH_MIIRX_DATA_WIDTH] = 0;
+
+// MAC_ADDR0 Register
+eth_register #(`ETH_MAC_ADDR0_WIDTH_0, `ETH_MAC_ADDR0_DEF_0)  MAC_ADDR0_0
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR0_WIDTH_0 - 1:0]),
+   .DataOut   (MAC_ADDR0Out[`ETH_MAC_ADDR0_WIDTH_0 - 1:0]),
+   .Write     (MAC_ADDR0_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MAC_ADDR0_WIDTH_1, `ETH_MAC_ADDR0_DEF_1)  MAC_ADDR0_1
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR0_WIDTH_1 + 7:8]),
+   .DataOut   (MAC_ADDR0Out[`ETH_MAC_ADDR0_WIDTH_1 + 7:8]),
+   .Write     (MAC_ADDR0_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MAC_ADDR0_WIDTH_2, `ETH_MAC_ADDR0_DEF_2)  MAC_ADDR0_2
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR0_WIDTH_2 + 15:16]),
+   .DataOut   (MAC_ADDR0Out[`ETH_MAC_ADDR0_WIDTH_2 + 15:16]),
+   .Write     (MAC_ADDR0_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MAC_ADDR0_WIDTH_3, `ETH_MAC_ADDR0_DEF_3)  MAC_ADDR0_3
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR0_WIDTH_3 + 23:24]),
+   .DataOut   (MAC_ADDR0Out[`ETH_MAC_ADDR0_WIDTH_3 + 23:24]),
+   .Write     (MAC_ADDR0_Wr[3]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+
+// MAC_ADDR1 Register
+eth_register #(`ETH_MAC_ADDR1_WIDTH_0, `ETH_MAC_ADDR1_DEF_0)  MAC_ADDR1_0
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR1_WIDTH_0 - 1:0]),
+   .DataOut   (MAC_ADDR1Out[`ETH_MAC_ADDR1_WIDTH_0 - 1:0]),
+   .Write     (MAC_ADDR1_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_MAC_ADDR1_WIDTH_1, `ETH_MAC_ADDR1_DEF_1)  MAC_ADDR1_1
+  (
+   .DataIn    (DataIn[`ETH_MAC_ADDR1_WIDTH_1 + 7:8]),
+   .DataOut   (MAC_ADDR1Out[`ETH_MAC_ADDR1_WIDTH_1 + 7:8]),
+   .Write     (MAC_ADDR1_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+assign MAC_ADDR1Out[31:`ETH_MAC_ADDR1_WIDTH_1 + 8] = 0;
+
+// RXHASH0 Register
+eth_register #(`ETH_HASH0_WIDTH_0, `ETH_HASH0_DEF_0)          RXHASH0_0
+  (
+   .DataIn    (DataIn[`ETH_HASH0_WIDTH_0 - 1:0]),
+   .DataOut   (HASH0Out[`ETH_HASH0_WIDTH_0 - 1:0]),
+   .Write     (HASH0_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH0_WIDTH_1, `ETH_HASH0_DEF_1)          RXHASH0_1
+  (
+   .DataIn    (DataIn[`ETH_HASH0_WIDTH_1 + 7:8]),
+   .DataOut   (HASH0Out[`ETH_HASH0_WIDTH_1 + 7:8]),
+   .Write     (HASH0_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH0_WIDTH_2, `ETH_HASH0_DEF_2)          RXHASH0_2
+  (
+   .DataIn    (DataIn[`ETH_HASH0_WIDTH_2 + 15:16]),
+   .DataOut   (HASH0Out[`ETH_HASH0_WIDTH_2 + 15:16]),
+   .Write     (HASH0_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH0_WIDTH_3, `ETH_HASH0_DEF_3)          RXHASH0_3
+  (
+   .DataIn    (DataIn[`ETH_HASH0_WIDTH_3 + 23:24]),
+   .DataOut   (HASH0Out[`ETH_HASH0_WIDTH_3 + 23:24]),
+   .Write     (HASH0_Wr[3]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+
+// RXHASH1 Register
+eth_register #(`ETH_HASH1_WIDTH_0, `ETH_HASH1_DEF_0)          RXHASH1_0
+  (
+   .DataIn    (DataIn[`ETH_HASH1_WIDTH_0 - 1:0]),
+   .DataOut   (HASH1Out[`ETH_HASH1_WIDTH_0 - 1:0]),
+   .Write     (HASH1_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH1_WIDTH_1, `ETH_HASH1_DEF_1)          RXHASH1_1
+  (
+   .DataIn    (DataIn[`ETH_HASH1_WIDTH_1 + 7:8]),
+   .DataOut   (HASH1Out[`ETH_HASH1_WIDTH_1 + 7:8]),
+   .Write     (HASH1_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH1_WIDTH_2, `ETH_HASH1_DEF_2)          RXHASH1_2
+  (
+   .DataIn    (DataIn[`ETH_HASH1_WIDTH_2 + 15:16]),
+   .DataOut   (HASH1Out[`ETH_HASH1_WIDTH_2 + 15:16]),
+   .Write     (HASH1_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_HASH1_WIDTH_3, `ETH_HASH1_DEF_3)          RXHASH1_3
+  (
+   .DataIn    (DataIn[`ETH_HASH1_WIDTH_3 + 23:24]),
+   .DataOut   (HASH1Out[`ETH_HASH1_WIDTH_3 + 23:24]),
+   .Write     (HASH1_Wr[3]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+
+// TXCTRL Register
+eth_register #(`ETH_TX_CTRL_WIDTH_0, `ETH_TX_CTRL_DEF_0)  TXCTRL_0
+  (
+   .DataIn    (DataIn[`ETH_TX_CTRL_WIDTH_0 - 1:0]),
+   .DataOut   (TXCTRLOut[`ETH_TX_CTRL_WIDTH_0 - 1:0]),
+   .Write     (TXCTRL_Wr[0]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_TX_CTRL_WIDTH_1, `ETH_TX_CTRL_DEF_1)  TXCTRL_1
+  (
+   .DataIn    (DataIn[`ETH_TX_CTRL_WIDTH_1 + 7:8]),
+   .DataOut   (TXCTRLOut[`ETH_TX_CTRL_WIDTH_1 + 7:8]),
+   .Write     (TXCTRL_Wr[1]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (1'b0)
+  );
+eth_register #(`ETH_TX_CTRL_WIDTH_2, `ETH_TX_CTRL_DEF_2)  TXCTRL_2 // Request bit is synchronously reset
+  (
+   .DataIn    (DataIn[`ETH_TX_CTRL_WIDTH_2 + 15:16]),
+   .DataOut   (TXCTRLOut[`ETH_TX_CTRL_WIDTH_2 + 15:16]),
+   .Write     (TXCTRL_Wr[2]),
+   .Clk       (Clk),
+   .Reset     (Reset),
+   .SyncReset (RstTxPauseRq)
+  );
+assign TXCTRLOut[31:`ETH_TX_CTRL_WIDTH_2 + 16] = 0;
+
+
+
+// Reading data from registers
+always @ (Address       or Read           or MODEROut       or INT_SOURCEOut  or
+          INT_MASKOut   or IPGTOut        or IPGR1Out       or IPGR2Out       or
+          PACKETLENOut  or COLLCONFOut    or CTRLMODEROut   or MIIMODEROut    or
+          MIICOMMANDOut or MIIADDRESSOut  or MIITX_DATAOut  or MIIRX_DATAOut  or 
+          MIISTATUSOut  or MAC_ADDR0Out   or MAC_ADDR1Out   or TX_BD_NUMOut   or
+          HASH0Out      or HASH1Out       or TXCTRLOut       
+         )
+begin
+  if(Read)  // read
+    begin
+      case(Address)
+        `ETH_MODER_ADR        :  DataOut<=MODEROut;
+        `ETH_INT_SOURCE_ADR   :  DataOut<=INT_SOURCEOut;
+        `ETH_INT_MASK_ADR     :  DataOut<=INT_MASKOut;
+        `ETH_IPGT_ADR         :  DataOut<=IPGTOut;
+        `ETH_IPGR1_ADR        :  DataOut<=IPGR1Out;
+        `ETH_IPGR2_ADR        :  DataOut<=IPGR2Out;
+        `ETH_PACKETLEN_ADR    :  DataOut<=PACKETLENOut;
+        `ETH_COLLCONF_ADR     :  DataOut<=COLLCONFOut;
+        `ETH_CTRLMODER_ADR    :  DataOut<=CTRLMODEROut;
+        `ETH_MIIMODER_ADR     :  DataOut<=MIIMODEROut;
+        `ETH_MIICOMMAND_ADR   :  DataOut<=MIICOMMANDOut;
+        `ETH_MIIADDRESS_ADR   :  DataOut<=MIIADDRESSOut;
+        `ETH_MIITX_DATA_ADR   :  DataOut<=MIITX_DATAOut;
+        `ETH_MIIRX_DATA_ADR   :  DataOut<=MIIRX_DATAOut;
+        `ETH_MIISTATUS_ADR    :  DataOut<=MIISTATUSOut;
+        `ETH_MAC_ADDR0_ADR    :  DataOut<=MAC_ADDR0Out;
+        `ETH_MAC_ADDR1_ADR    :  DataOut<=MAC_ADDR1Out;
+        `ETH_TX_BD_NUM_ADR    :  DataOut<=TX_BD_NUMOut;
+        `ETH_HASH0_ADR        :  DataOut<=HASH0Out;
+        `ETH_HASH1_ADR        :  DataOut<=HASH1Out;
+        `ETH_TX_CTRL_ADR      :  DataOut<=TXCTRLOut;
+
+        default:             DataOut<=32'h0;
+      endcase
+    end
+  else
+    DataOut<=32'h0;
+end
+
+
+assign r_RecSmall         = MODEROut[16];
+assign r_Pad              = MODEROut[15];
+assign r_HugEn            = MODEROut[14];
+assign r_CrcEn            = MODEROut[13];
+assign r_DlyCrcEn         = MODEROut[12];
+// assign r_Rst           = MODEROut[11];   This signal is not used any more
+assign r_FullD            = MODEROut[10];
+assign r_ExDfrEn          = MODEROut[9];
+assign r_NoBckof          = MODEROut[8];
+assign r_LoopBck          = MODEROut[7];
+assign r_IFG              = MODEROut[6];
+assign r_Pro              = MODEROut[5];
+assign r_Iam              = MODEROut[4];
+assign r_Bro              = MODEROut[3];
+assign r_NoPre            = MODEROut[2];
+assign r_TxEn             = MODEROut[1] & (TX_BD_NUMOut>0);     // Transmission is enabled when there is at least one TxBD.
+assign r_RxEn             = MODEROut[0] & (TX_BD_NUMOut<'h80);  // Reception is enabled when there is  at least one RxBD.
+
+assign r_IPGT[6:0]        = IPGTOut[6:0];
+
+assign r_IPGR1[6:0]       = IPGR1Out[6:0];
+
+assign r_IPGR2[6:0]       = IPGR2Out[6:0];
+
+assign r_MinFL[15:0]      = PACKETLENOut[31:16];
+assign r_MaxFL[15:0]      = PACKETLENOut[15:0];
+
+assign r_MaxRet[3:0]      = COLLCONFOut[19:16];
+assign r_CollValid[5:0]   = COLLCONFOut[5:0];
+
+assign r_TxFlow           = CTRLMODEROut[2];
+assign r_RxFlow           = CTRLMODEROut[1];
+assign r_PassAll          = CTRLMODEROut[0];
+
+assign r_MiiNoPre         = MIIMODEROut[8];
+assign r_ClkDiv[7:0]      = MIIMODEROut[7:0];
+
+assign r_WCtrlData        = MIICOMMANDOut[2];
+assign r_RStat            = MIICOMMANDOut[1];
+assign r_ScanStat         = MIICOMMANDOut[0];
+
+assign r_RGAD[4:0]        = MIIADDRESSOut[12:8];
+assign r_FIAD[4:0]        = MIIADDRESSOut[4:0];
+
+assign r_CtrlData[15:0]   = MIITX_DATAOut[15:0];
+
+assign MIISTATUSOut[31:`ETH_MIISTATUS_WIDTH] = 0; 
+assign MIISTATUSOut[2]    = NValid_stat         ; 
+assign MIISTATUSOut[1]    = Busy_stat           ; 
+assign MIISTATUSOut[0]    = LinkFail            ; 
+
+assign r_MAC[31:0]        = MAC_ADDR0Out[31:0];
+assign r_MAC[47:32]       = MAC_ADDR1Out[15:0];
+assign r_HASH1[31:0]      = HASH1Out;
+assign r_HASH0[31:0]      = HASH0Out;
+
+assign r_TxBDNum[7:0]     = TX_BD_NUMOut[7:0];
+
+assign r_TxPauseTV[15:0]  = TXCTRLOut[15:0];
+assign r_TxPauseRq        = TXCTRLOut[16];
+
+
+// Synchronizing TxC Interrupt
+always @ (posedge TxClk or posedge Reset)
+begin
+  if(Reset)
+    SetTxCIrq_txclk <=#Tp 1'b0;
+  else
+  if(TxCtrlEndFrm & StartTxDone & r_TxFlow)
+    SetTxCIrq_txclk <=#Tp 1'b1;
+  else
+  if(ResetTxCIrq_sync2)
+    SetTxCIrq_txclk <=#Tp 1'b0;
+end
+
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetTxCIrq_sync1 <=#Tp 1'b0;
+  else
+    SetTxCIrq_sync1 <=#Tp SetTxCIrq_txclk;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetTxCIrq_sync2 <=#Tp 1'b0;
+  else
+    SetTxCIrq_sync2 <=#Tp SetTxCIrq_sync1;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetTxCIrq_sync3 <=#Tp 1'b0;
+  else
+    SetTxCIrq_sync3 <=#Tp SetTxCIrq_sync2;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetTxCIrq <=#Tp 1'b0;
+  else
+    SetTxCIrq <=#Tp SetTxCIrq_sync2 & ~SetTxCIrq_sync3;
+end
+
+always @ (posedge TxClk or posedge Reset)
+begin
+  if(Reset)
+    ResetTxCIrq_sync1 <=#Tp 1'b0;
+  else
+    ResetTxCIrq_sync1 <=#Tp SetTxCIrq_sync2;
+end
+
+always @ (posedge TxClk or posedge Reset)
+begin
+  if(Reset)
+    ResetTxCIrq_sync2 <=#Tp 1'b0;
+  else
+    ResetTxCIrq_sync2 <=#Tp SetTxCIrq_sync1;
+end
+
+
+// Synchronizing RxC Interrupt
+always @ (posedge RxClk or posedge Reset)
+begin
+  if(Reset)
+    SetRxCIrq_rxclk <=#Tp 1'b0;
+  else
+  if(SetPauseTimer & r_RxFlow)
+    SetRxCIrq_rxclk <=#Tp 1'b1;
+  else
+  if(ResetRxCIrq_sync2 & (~ResetRxCIrq_sync3))
+    SetRxCIrq_rxclk <=#Tp 1'b0;
+end
+
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetRxCIrq_sync1 <=#Tp 1'b0;
+  else
+    SetRxCIrq_sync1 <=#Tp SetRxCIrq_rxclk;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetRxCIrq_sync2 <=#Tp 1'b0;
+  else
+    SetRxCIrq_sync2 <=#Tp SetRxCIrq_sync1;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetRxCIrq_sync3 <=#Tp 1'b0;
+  else
+    SetRxCIrq_sync3 <=#Tp SetRxCIrq_sync2;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    SetRxCIrq <=#Tp 1'b0;
+  else
+    SetRxCIrq <=#Tp SetRxCIrq_sync2 & ~SetRxCIrq_sync3;
+end
+
+always @ (posedge RxClk or posedge Reset)
+begin
+  if(Reset)
+    ResetRxCIrq_sync1 <=#Tp 1'b0;
+  else
+    ResetRxCIrq_sync1 <=#Tp SetRxCIrq_sync2;
+end
+
+always @ (posedge RxClk or posedge Reset)
+begin
+  if(Reset)
+    ResetRxCIrq_sync2 <=#Tp 1'b0;
+  else
+    ResetRxCIrq_sync2 <=#Tp ResetRxCIrq_sync1;
+end
+
+always @ (posedge RxClk or posedge Reset)
+begin
+  if(Reset)
+    ResetRxCIrq_sync3 <=#Tp 1'b0;
+  else
+    ResetRxCIrq_sync3 <=#Tp ResetRxCIrq_sync2;
+end
+
+
+
+// Interrupt generation
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_txb <= 1'b0;
+  else
+  if(TxB_IRQ)
+    irq_txb <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[0])
+    irq_txb <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_txe <= 1'b0;
+  else
+  if(TxE_IRQ)
+    irq_txe <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[1])
+    irq_txe <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_rxb <= 1'b0;
+  else
+  if(RxB_IRQ)
+    irq_rxb <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[2])
+    irq_rxb <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_rxe <= 1'b0;
+  else
+  if(RxE_IRQ)
+    irq_rxe <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[3])
+    irq_rxe <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_busy <= 1'b0;
+  else
+  if(Busy_IRQ)
+    irq_busy <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[4])
+    irq_busy <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_txc <= 1'b0;
+  else
+  if(SetTxCIrq)
+    irq_txc <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[5])
+    irq_txc <= #Tp 1'b0;
+end
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if(Reset)
+    irq_rxc <= 1'b0;
+  else
+  if(SetRxCIrq)
+    irq_rxc <= #Tp 1'b1;
+  else
+  if(INT_SOURCE_Wr[0] & DataIn[6])
+    irq_rxc <= #Tp 1'b0;
+end
+
+// Generating interrupt signal
+assign int_o = irq_txb  & INT_MASKOut[0] | 
+               irq_txe  & INT_MASKOut[1] | 
+               irq_rxb  & INT_MASKOut[2] | 
+               irq_rxe  & INT_MASKOut[3] | 
+               irq_busy & INT_MASKOut[4] | 
+               irq_txc  & INT_MASKOut[5] | 
+               irq_rxc  & INT_MASKOut[6] ;
+
+// For reading interrupt status
+assign INT_SOURCEOut = {{(32-`ETH_INT_SOURCE_WIDTH_0){1'b0}}, irq_rxc, irq_txc, irq_busy, irq_rxe, irq_rxb, irq_txe, irq_txb};
+
+
+
+endmodule
Index: /trunk/OC-Ethernet/eth_crc.v
===================================================================
--- /trunk/OC-Ethernet/eth_crc.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_crc.v	(revision 6)
@@ -0,0 +1,145 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_crc.v                                                   ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.2  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/19 18:16:40  mohor
+// TxClk changed to MTxClk (as discribed in the documentation).
+// Crc changed so only one file can be used instead of two.
+//
+// Revision 1.2  2001/06/19 10:38:07  mohor
+// Minor changes in header.
+//
+// Revision 1.1  2001/06/19 10:27:57  mohor
+// TxEthMAC initial release.
+//
+//
+//
+
+
+`include "timescale.v"
+
+module eth_crc (Clk, Reset, Data, Enable, Initialize, Crc, CrcError);
+
+
+parameter Tp = 1;
+
+input Clk;
+input Reset;
+input [3:0] Data;
+input Enable;
+input Initialize;
+
+output [31:0] Crc;
+output CrcError;
+
+reg  [31:0] Crc;
+
+wire [31:0] CrcNext;
+
+
+assign CrcNext[0] = Enable & (Data[0] ^ Crc[28]); 
+assign CrcNext[1] = Enable & (Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29]); 
+assign CrcNext[2] = Enable & (Data[2] ^ Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29] ^ Crc[30]); 
+assign CrcNext[3] = Enable & (Data[3] ^ Data[2] ^ Data[1] ^ Crc[29] ^ Crc[30] ^ Crc[31]); 
+assign CrcNext[4] = (Enable & (Data[3] ^ Data[2] ^ Data[0] ^ Crc[28] ^ Crc[30] ^ Crc[31])) ^ Crc[0]; 
+assign CrcNext[5] = (Enable & (Data[3] ^ Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29] ^ Crc[31])) ^ Crc[1]; 
+assign CrcNext[6] = (Enable & (Data[2] ^ Data[1] ^ Crc[29] ^ Crc[30])) ^ Crc[ 2]; 
+assign CrcNext[7] = (Enable & (Data[3] ^ Data[2] ^ Data[0] ^ Crc[28] ^ Crc[30] ^ Crc[31])) ^ Crc[3]; 
+assign CrcNext[8] = (Enable & (Data[3] ^ Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29] ^ Crc[31])) ^ Crc[4]; 
+assign CrcNext[9] = (Enable & (Data[2] ^ Data[1] ^ Crc[29] ^ Crc[30])) ^ Crc[5]; 
+assign CrcNext[10] = (Enable & (Data[3] ^ Data[2] ^ Data[0] ^ Crc[28] ^ Crc[30] ^ Crc[31])) ^ Crc[6]; 
+assign CrcNext[11] = (Enable & (Data[3] ^ Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29] ^ Crc[31])) ^ Crc[7]; 
+assign CrcNext[12] = (Enable & (Data[2] ^ Data[1] ^ Data[0] ^ Crc[28] ^ Crc[29] ^ Crc[30])) ^ Crc[8]; 
+assign CrcNext[13] = (Enable & (Data[3] ^ Data[2] ^ Data[1] ^ Crc[29] ^ Crc[30] ^ Crc[31])) ^ Crc[9]; 
+assign CrcNext[14] = (Enable & (Data[3] ^ Data[2] ^ Crc[30] ^ Crc[31])) ^ Crc[10]; 
+assign CrcNext[15] = (Enable & (Data[3] ^ Crc[31])) ^ Crc[11]; 
+assign CrcNext[16] = (Enable & (Data[0] ^ Crc[28])) ^ Crc[12]; 
+assign CrcNext[17] = (Enable & (Data[1] ^ Crc[29])) ^ Crc[13]; 
+assign CrcNext[18] = (Enable & (Data[2] ^ Crc[30])) ^ Crc[14]; 
+assign CrcNext[19] = (Enable & (Data[3] ^ Crc[31])) ^ Crc[15]; 
+assign CrcNext[20] = Crc[16]; 
+assign CrcNext[21] = Crc[17]; 
+assign CrcNext[22] = (Enable & (Data[0] ^ Crc[28])) ^ Crc[18]; 
+assign CrcNext[23] = (Enable & (Data[1] ^ Data[0] ^ Crc[29] ^ Crc[28])) ^ Crc[19]; 
+assign CrcNext[24] = (Enable & (Data[2] ^ Data[1] ^ Crc[30] ^ Crc[29])) ^ Crc[20]; 
+assign CrcNext[25] = (Enable & (Data[3] ^ Data[2] ^ Crc[31] ^ Crc[30])) ^ Crc[21]; 
+assign CrcNext[26] = (Enable & (Data[3] ^ Data[0] ^ Crc[31] ^ Crc[28])) ^ Crc[22]; 
+assign CrcNext[27] = (Enable & (Data[1] ^ Crc[29])) ^ Crc[23]; 
+assign CrcNext[28] = (Enable & (Data[2] ^ Crc[30])) ^ Crc[24]; 
+assign CrcNext[29] = (Enable & (Data[3] ^ Crc[31])) ^ Crc[25]; 
+assign CrcNext[30] = Crc[26]; 
+assign CrcNext[31] = Crc[27]; 
+
+
+always @ (posedge Clk or posedge Reset)
+begin
+  if (Reset)
+    Crc <= #1 32'hffffffff;
+  else
+  if(Initialize)
+    Crc <= #Tp 32'hffffffff;
+  else
+    Crc <= #Tp CrcNext;
+end
+
+assign CrcError = Crc[31:0] != 32'hc704dd7b;  // CRC not equal to magic number
+
+endmodule
Index: /trunk/OC-Ethernet/eth_rxstatem.v
===================================================================
--- /trunk/OC-Ethernet/eth_rxstatem.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_rxstatem.v	(revision 6)
@@ -0,0 +1,196 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_rxstatem.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.4  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.3  2001/10/18 12:07:11  mohor
+// Status signals changed, Adress decoding changed, interrupt controller
+// added.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.2  2001/07/03 12:55:41  mohor
+// Minor changes because of the synthesys warnings.
+//
+//
+// Revision 1.1  2001/06/27 21:26:19  mohor
+// Initial release of the RxEthMAC module.
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_rxstatem (MRxClk, Reset, MRxDV, ByteCntEq0, ByteCntGreat2, Transmitting, MRxDEq5, MRxDEqD, 
+                     IFGCounterEq24, ByteCntMaxFrame, StateData, StateIdle, StatePreamble, StateSFD, 
+                     StateDrop
+                    );
+
+parameter Tp = 1;
+
+input         MRxClk;
+input         Reset;
+input         MRxDV;
+input         ByteCntEq0;
+input         ByteCntGreat2;
+input         MRxDEq5;
+input         Transmitting;
+input         MRxDEqD;
+input         IFGCounterEq24;
+input         ByteCntMaxFrame;
+
+output [1:0]  StateData;
+output        StateIdle;
+output        StateDrop;
+output        StatePreamble;
+output        StateSFD;
+
+reg           StateData0;
+reg           StateData1;
+reg           StateIdle;
+reg           StateDrop;
+reg           StatePreamble;
+reg           StateSFD;
+
+wire          StartIdle;
+wire          StartDrop;
+wire          StartData0;
+wire          StartData1;
+wire          StartPreamble;
+wire          StartSFD;
+
+
+// Defining the next state
+assign StartIdle = ~MRxDV & (StateDrop | StatePreamble | StateSFD | (|StateData));
+
+assign StartPreamble = MRxDV & ~MRxDEq5 & (StateIdle & ~Transmitting);
+
+assign StartSFD = MRxDV & MRxDEq5 & (StateIdle & ~Transmitting | StatePreamble);
+
+assign StartData0 = MRxDV & (StateSFD & MRxDEqD & IFGCounterEq24 | StateData1);
+
+assign StartData1 = MRxDV & StateData0 & (~ByteCntMaxFrame);
+
+assign StartDrop = MRxDV & (StateIdle & Transmitting | StateSFD & ~IFGCounterEq24 &  MRxDEqD 
+                         |  StateData0 &  ByteCntMaxFrame
+                           );
+
+// Rx State Machine
+always @ (posedge MRxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      StateIdle     <= #Tp 1'b0;
+      StateDrop     <= #Tp 1'b1;
+      StatePreamble <= #Tp 1'b0;
+      StateSFD      <= #Tp 1'b0;
+      StateData0    <= #Tp 1'b0;
+      StateData1    <= #Tp 1'b0;
+    end
+  else
+    begin
+      if(StartPreamble | StartSFD | StartDrop)
+        StateIdle <= #Tp 1'b0;
+      else
+      if(StartIdle)
+        StateIdle <= #Tp 1'b1;
+
+      if(StartIdle)
+        StateDrop <= #Tp 1'b0;
+      else
+      if(StartDrop)
+        StateDrop <= #Tp 1'b1;
+
+      if(StartSFD | StartIdle | StartDrop)
+        StatePreamble <= #Tp 1'b0;
+      else
+      if(StartPreamble)
+        StatePreamble <= #Tp 1'b1;
+
+      if(StartPreamble | StartIdle | StartData0 | StartDrop)
+        StateSFD <= #Tp 1'b0;
+      else
+      if(StartSFD)
+        StateSFD <= #Tp 1'b1;
+
+      if(StartIdle | StartData1 | StartDrop)
+        StateData0 <= #Tp 1'b0;
+      else
+      if(StartData0)
+        StateData0 <= #Tp 1'b1;
+
+      if(StartIdle | StartData0 | StartDrop)
+        StateData1 <= #Tp 1'b0;
+      else
+      if(StartData1)
+        StateData1 <= #Tp 1'b1;
+    end
+end
+
+assign StateData[1:0] = {StateData1, StateData0};
+
+endmodule
Index: /trunk/OC-Ethernet/eth_sgmii.v
===================================================================
--- /trunk/OC-Ethernet/eth_sgmii.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_sgmii.v	(revision 6)
@@ -0,0 +1,201 @@
+//////////////////////////////////////////////////////////////////////////////////
+// Company:  (C) Athree, 2009
+// Engineer: Dmitry Rozhdestvenskiy 
+// Email dmitry.rozhdestvenskiy@srisc.com dmitryr@a3.spb.ru divx4log@narod.ru
+// 
+// Design Name:    OpenCores 10/10 Ethernet combined with Altera MII->SGMII bridge
+// Module Name:    eth_sgmii 
+// Project Name:   SPARC SoC single-core
+//
+// LICENSE:
+// This is a Free Hardware Design; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License
+// version 2 as published by the Free Software Foundation.
+// The above named program is distributed in the hope that it will
+// be useful, but WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+// See the GNU General Public License for more details.
+//
+//////////////////////////////////////////////////////////////////////////////////
+module eth_sgmii (
+    input  wb_clk_i,
+    input  wb_rst_i,
+    input  sysclk,
+    
+    input  [63:0] wb_dat_i,
+    output [63:0] wb_dat_o,
+    input  [63:0] wb_adr_i,
+    input  [ 7:0] wb_sel_i,
+    input         wb_we_i,
+    input         wb_cyc_i,
+    input         wb_stb_i,
+    output        wb_ack_o,
+    output        wb_err_o,
+    
+    output [63:0] m_wb_adr_o,
+    output [ 7:0] m_wb_sel_o,
+    output        m_wb_we_o,
+    output [63:0] m_wb_dat_o,
+    input  [63:0] m_wb_dat_i,
+    output        m_wb_cyc_o,
+    output        m_wb_stb_o,
+    input         m_wb_ack_i,
+    input         m_wb_err_i,
+    
+    input         sgmii_rx,
+    output        sgmii_tx,
+    
+    output        int_eth,
+    
+    output        led_10,
+    output        led_100,
+    output        led_1000,
+    output        led_an,
+    output        led_disp_err,
+    output        led_char_err,
+    output        led_link,
+    
+    inout         md,
+    output        mdc
+);
+
+wire [ 3:0] mrxd;
+wire [ 3:0] mtxd;
+wire [31:0] dat_o;
+wire [ 3:0] sel_o;
+wire [31:0] mdat_o;
+
+assign wb_dat_o={dat_o[7:0],dat_o[15:8],dat_o[23:16],dat_o[31:24],dat_o[7:0],dat_o[15:8],dat_o[23:16],dat_o[31:24]};
+assign m_wb_adr_o[63:32]=0;
+assign m_wb_sel_o=m_wb_adr_o[2] ? {4'b0000,sel_o[0],sel_o[1],sel_o[2],sel_o[3]}:{sel_o[0],sel_o[1],sel_o[2],sel_o[3],4'b0000};
+assign m_wb_dat_o={mdat_o[7:0],mdat_o[15:8],mdat_o[23:16],mdat_o[31:24],mdat_o[7:0],mdat_o[15:8],mdat_o[23:16],mdat_o[31:24]};
+
+// OpenCores 10/100 Ethernet MAC
+eth_top eth_mac (
+    .wb_clk_i(wb_clk_i), 
+    .wb_rst_i(wb_rst_i), 
+    
+    .wb_dat_i(wb_sel_i[7:4]==4'b0 ? {wb_dat_i[7:0],wb_dat_i[15:8],wb_dat_i[23:16],wb_dat_i[31:24]}:{wb_dat_i[39:32],wb_dat_i[47:40],wb_dat_i[55:48],wb_dat_i[63:56]}), 
+    .wb_dat_o(dat_o), 
+    .wb_adr_i(wb_adr_i[31:0]), 
+    .wb_sel_i(wb_sel_i[7:4]==4'b0 ? {wb_sel_i[0],wb_sel_i[1],wb_sel_i[2],wb_sel_i[3]}:{wb_sel_i[4],wb_sel_i[5],wb_sel_i[6],wb_sel_i[7]}), 
+    .wb_we_i(wb_we_i), 
+    .wb_cyc_i(wb_cyc_i), 
+    .wb_stb_i(wb_stb_i), 
+    .wb_ack_o(wb_ack_o), 
+    .wb_err_o(wb_err_o), 
+    .m_wb_adr_o(m_wb_adr_o[31:0]), 
+    .m_wb_sel_o(sel_o), 
+    .m_wb_we_o(m_wb_we_o), 
+    .m_wb_dat_o(mdat_o), 
+    .m_wb_dat_i(m_wb_adr_o[2] ? {m_wb_dat_i[7:0],m_wb_dat_i[15:8],m_wb_dat_i[23:16],m_wb_dat_i[31:24]}:{m_wb_dat_i[39:32],m_wb_dat_i[47:40],m_wb_dat_i[55:48],m_wb_dat_i[63:56]}), 
+    .m_wb_cyc_o(m_wb_cyc_o), 
+    .m_wb_stb_o(m_wb_stb_o), 
+    .m_wb_ack_i(m_wb_ack_i), 
+    .m_wb_err_i(m_wb_err_i), 
+    
+    .mtx_clk_pad_i(mtx_clk), 
+    .mtxd_pad_o(mtxd), 
+    .mtxen_pad_o(mtxen), 
+    .mtxerr_pad_o(mtxerr), 
+    .mrx_clk_pad_i(mrx_clk), 
+    .mrxd_pad_i(mrxd), 
+    .mrxdv_pad_i(mrxdv), 
+    .mrxerr_pad_i(mrxerr), 
+    .mcoll_pad_i(mcoll), 
+    .mcrs_pad_i(mcrs), 
+    .mdc_pad_o(mdc), 
+    .md_pad_i(md_i), 
+    .md_pad_o(md_o), 
+    .md_padoe_o(md_oe), 
+    .int_o(int_eth)
+);
+
+assign md_i=md;
+assign md=md_oe ? md_o:1'bZ;
+
+/*reg  [63:0] mdio_shift;
+reg  [ 5:0] mdio_cnt;
+wire [15:0] mdio_wrdata;
+wire [15:0] mdio_rdata;
+wire [ 4:0] mdio_addr;
+reg mdio_wr;
+
+assign mdio_rd=(mdio_cnt==6'd46) && mdio_shift[45:14]==32'hFFFFFFFF; // Address just latched, frame valid
+assign mdio_wrdata=mdio_shift[15:0];
+assign md_i=mdio_rdata[~mdio_cnt+1];
+assign mdio_addr=(mdio_cnt<6'd48) ? mdio_shift[4:0]:mdio_shift[22:18];
+
+always @(posedge mdc or posedge wb_rst_i)
+   if(wb_rst_i)
+      begin
+         mdio_cnt<=0;
+         mdio_shift<=64'b0;
+      end
+   else
+      begin
+          mdio_shift[0]<=md_o;
+          mdio_shift[63:1]<=mdio_shift[62:0];
+          mdio_cnt<=mdio_cnt+1;
+          if(mdio_cnt==6'd63 && mdio_shift[62:27]==36'hFFFFFFFF5)
+             mdio_wr<=1;
+          else
+             mdio_wr<=0;
+      end*/
+      
+// Altera Ethernet controller in MII->SGMII bridge mode
+// You may generate it with Quartus use it for free in test mode
+// (either time-limited or connected to PC)
+MII2SGMII eth_pcs(
+	.ref_clk(sysclk),
+	.reset(wb_rst_i),
+
+	.gmii_rx_d(),
+	.gmii_rx_dv(),
+	.gmii_rx_err(),
+	.gmii_tx_d(0),
+	.gmii_tx_en(0),
+	.gmii_tx_err(0),
+
+	.tx_clk(mtx_clk),
+	.reset_tx_clk(wb_rst_i),
+	.tx_clkena(),
+	.mii_tx_d(mtxd),
+	.mii_tx_en(mtxen),
+	.mii_tx_err(mtxerr),
+
+	.rx_clk(mrx_clk),
+	.reset_rx_clk(wb_rst_i),
+	.rx_clkena(),
+	.mii_rx_d(mrxd),
+	.mii_rx_dv(mrxdv),
+	.mii_rx_err(mrxerr),
+	.mii_col(mcoll),
+	.mii_crs(mcrs),
+
+	.set_10(led_10),
+	.set_100(led_100),
+	.set_1000(led_1000),
+
+	.hd_ena(),
+
+	.txp(sgmii_tx),
+	.rxp(sgmii_rx),
+
+	.led_col(),
+	.led_crs(),
+	.led_an(led_an),
+	.led_disp_err(led_disp_err),
+	.led_char_err(led_char_err),
+	.led_link(led_link),
+
+	.clk(0),
+	.readdata(),
+	.waitrequest(),
+	.address(),
+	.read(0),
+	.writedata(),
+	.write(0)
+);
+
+endmodule 
Index: /trunk/OC-Ethernet/eth_txstatem.v
===================================================================
--- /trunk/OC-Ethernet/eth_txstatem.v	(revision 6)
+++ /trunk/OC-Ethernet/eth_txstatem.v	(revision 6)
@@ -0,0 +1,284 @@
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+////  eth_txstatem.v                                              ////
+////                                                              ////
+////  This file is part of the Ethernet IP core project           ////
+////  http://www.opencores.org/projects/ethmac/                   ////
+////                                                              ////
+////  Author(s):                                                  ////
+////      - Igor Mohor (igorM@opencores.org)                      ////
+////      - Novan Hartadi (novan@vlsi.itb.ac.id)                  ////
+////      - Mahmud Galela (mgalela@vlsi.itb.ac.id)                ////
+////                                                              ////
+////  All additional information is avaliable in the Readme.txt   ////
+////  file.                                                       ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+////                                                              ////
+//// Copyright (C) 2001 Authors                                   ////
+////                                                              ////
+//// This source file may be used and distributed without         ////
+//// restriction provided that this copyright statement is not    ////
+//// removed from the file and that any derivative work contains  ////
+//// the original copyright notice and the associated disclaimer. ////
+////                                                              ////
+//// This source file is free software; you can redistribute it   ////
+//// and/or modify it under the terms of the GNU Lesser General   ////
+//// Public License as published by the Free Software Foundation; ////
+//// either version 2.1 of the License, or (at your option) any   ////
+//// later version.                                               ////
+////                                                              ////
+//// This source is distributed in the hope that it will be       ////
+//// useful, but WITHOUT ANY WARRANTY; without even the implied   ////
+//// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      ////
+//// PURPOSE.  See the GNU Lesser General Public License for more ////
+//// details.                                                     ////
+////                                                              ////
+//// You should have received a copy of the GNU Lesser General    ////
+//// Public License along with this source; if not, download it   ////
+//// from http://www.opencores.org/lgpl.shtml                     ////
+////                                                              ////
+//////////////////////////////////////////////////////////////////////
+//
+// CVS Revision History
+//
+// $Log: not supported by cvs2svn $
+// Revision 1.5  2002/10/30 12:54:50  mohor
+// State machine goes from idle to the defer state when CarrierSense is 1. FCS (CRC appending) fixed to check the CrcEn bit also when padding is necessery.
+//
+// Revision 1.4  2002/01/23 10:28:16  mohor
+// Link in the header changed.
+//
+// Revision 1.3  2001/10/19 08:43:51  mohor
+// eth_timescale.v changed to timescale.v This is done because of the
+// simulation of the few cores in a one joined project.
+//
+// Revision 1.2  2001/09/11 14:17:00  mohor
+// Few little NCSIM warnings fixed.
+//
+// Revision 1.1  2001/08/06 14:44:29  mohor
+// A define FPGA added to select between Artisan RAM (for ASIC) and Block Ram (For Virtex).
+// Include files fixed to contain no path.
+// File names and module names changed ta have a eth_ prologue in the name.
+// File eth_timescale.v is used to define timescale
+// All pin names on the top module are changed to contain _I, _O or _OE at the end.
+// Bidirectional signal MDIO is changed to three signals (Mdc_O, Mdi_I, Mdo_O
+// and Mdo_OE. The bidirectional signal must be created on the top level. This
+// is done due to the ASIC tools.
+//
+// Revision 1.1  2001/07/30 21:23:42  mohor
+// Directory structure changed. Files checked and joind together.
+//
+// Revision 1.3  2001/06/19 18:16:40  mohor
+// TxClk changed to MTxClk (as discribed in the documentation).
+// Crc changed so only one file can be used instead of two.
+//
+// Revision 1.2  2001/06/19 10:38:07  mohor
+// Minor changes in header.
+//
+// Revision 1.1  2001/06/19 10:27:57  mohor
+// TxEthMAC initial release.
+//
+//
+//
+//
+
+
+`include "timescale.v"
+
+
+module eth_txstatem  (MTxClk, Reset, ExcessiveDefer, CarrierSense, NibCnt, IPGT, IPGR1, 
+                      IPGR2, FullD, TxStartFrm, TxEndFrm, TxUnderRun, Collision, UnderRun, 
+                      StartTxDone, TooBig, NibCntEq7, NibCntEq15, MaxFrame, Pad, CrcEn, 
+                      NibbleMinFl, RandomEq0, ColWindow, RetryMax, NoBckof, RandomEqByteCnt,
+                      StateIdle, StateIPG, StatePreamble, StateData, StatePAD, StateFCS, 
+                      StateJam, StateJam_q, StateBackOff, StateDefer, StartFCS, StartJam, 
+                      StartBackoff, StartDefer, DeferIndication, StartPreamble, StartData, StartIPG
+                     );
+
+parameter Tp = 1;
+
+input MTxClk;
+input Reset;
+input ExcessiveDefer;
+input CarrierSense;
+input [6:0] NibCnt;
+input [6:0] IPGT;
+input [6:0] IPGR1;
+input [6:0] IPGR2;
+input FullD;
+input TxStartFrm;
+input TxEndFrm;
+input TxUnderRun;
+input Collision;
+input UnderRun;
+input StartTxDone; 
+input TooBig;
+input NibCntEq7;
+input NibCntEq15;
+input MaxFrame;
+input Pad;
+input CrcEn;
+input NibbleMinFl;
+input RandomEq0;
+input ColWindow;
+input RetryMax;
+input NoBckof;
+input RandomEqByteCnt;
+
+
+output StateIdle;         // Idle state
+output StateIPG;          // IPG state
+output StatePreamble;     // Preamble state
+output [1:0] StateData;   // Data state
+output StatePAD;          // PAD state
+output StateFCS;          // FCS state
+output StateJam;          // Jam state
+output StateJam_q;        // Delayed Jam state
+output StateBackOff;      // Backoff state
+output StateDefer;        // Defer state
+
+output StartFCS;          // FCS state will be activated in next clock
+output StartJam;          // Jam state will be activated in next clock
+output StartBackoff;      // Backoff state will be activated in next clock
+output StartDefer;        // Defer state will be activated in next clock
+output DeferIndication;
+output StartPreamble;     // Preamble state will be activated in next clock
+output [1:0] StartData;   // Data state will be activated in next clock
+output StartIPG;          // IPG state will be activated in next clock
+
+wire StartIdle;           // Idle state will be activated in next clock
+wire StartPAD;            // PAD state will be activated in next clock
+
+
+reg StateIdle;
+reg StateIPG;
+reg StatePreamble;
+reg [1:0] StateData;
+reg StatePAD;
+reg StateFCS;
+reg StateJam;
+reg StateJam_q;
+reg StateBackOff;
+reg StateDefer;
+reg Rule1;
+
+
+// Defining the next state
+assign StartIPG = StateDefer & ~ExcessiveDefer & ~CarrierSense;
+
+assign StartIdle = StateIPG & (Rule1 & NibCnt[6:0] >= IPGT | ~Rule1 & NibCnt[6:0] >= IPGR2);
+
+assign StartPreamble = StateIdle & TxStartFrm & ~CarrierSense;
+
+assign StartData[0] = ~Collision & (StatePreamble & NibCntEq15 | StateData[1] & ~TxEndFrm);
+
+assign StartData[1] = ~Collision & StateData[0] & ~TxUnderRun & ~MaxFrame;
+
+assign StartPAD = ~Collision & StateData[1] & TxEndFrm & Pad & ~NibbleMinFl;
+
+assign StartFCS = ~Collision & StateData[1] & TxEndFrm & (~Pad | Pad & NibbleMinFl) & CrcEn
+                | ~Collision & StatePAD & NibbleMinFl & CrcEn;
+
+assign StartJam = (Collision | UnderRun) & ((StatePreamble & NibCntEq15) | (|StateData[1:0]) | StatePAD | StateFCS);
+
+assign StartBackoff = StateJam & ~RandomEq0 & ColWindow & ~RetryMax & NibCntEq7 & ~NoBckof;
+
+assign StartDefer = StateIPG & ~Rule1 & CarrierSense & NibCnt[6:0] <= IPGR1 & NibCnt[6:0] != IPGR2
+                  | StateIdle & CarrierSense 
+                  | StateJam & NibCntEq7 & (NoBckof | RandomEq0 | ~ColWindow | RetryMax)
+                  | StateBackOff & (TxUnderRun | RandomEqByteCnt)
+                  | StartTxDone | TooBig;
+
+assign DeferIndication = StateIdle & CarrierSense;
+
+// Tx State Machine
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    begin
+      StateIPG        <= #Tp 1'b0;
+      StateIdle       <= #Tp 1'b0;
+      StatePreamble   <= #Tp 1'b0;
+      StateData[1:0]  <= #Tp 2'b0;
+      StatePAD        <= #Tp 1'b0;
+      StateFCS        <= #Tp 1'b0;
+      StateJam        <= #Tp 1'b0;
+      StateJam_q      <= #Tp 1'b0;
+      StateBackOff    <= #Tp 1'b0;
+      StateDefer      <= #Tp 1'b1;
+    end
+  else
+    begin
+      StateData[1:0] <= #Tp StartData[1:0];
+      StateJam_q <= #Tp StateJam;
+
+      if(StartDefer | StartIdle)
+        StateIPG <= #Tp 1'b0;
+      else
+      if(StartIPG)
+        StateIPG <= #Tp 1'b1;
+
+      if(StartDefer | StartPreamble)
+        StateIdle <= #Tp 1'b0;
+      else
+      if(StartIdle)
+        StateIdle <= #Tp 1'b1;
+
+      if(StartData[0] | StartJam)
+        StatePreamble <= #Tp 1'b0;
+      else
+      if(StartPreamble)
+        StatePreamble <= #Tp 1'b1;
+
+      if(StartFCS | StartJam)
+        StatePAD <= #Tp 1'b0;
+      else
+      if(StartPAD)
+        StatePAD <= #Tp 1'b1;
+
+      if(StartJam | StartDefer)
+        StateFCS <= #Tp 1'b0;
+      else
+      if(StartFCS)
+        StateFCS <= #Tp 1'b1;
+
+      if(StartBackoff | StartDefer)
+        StateJam <= #Tp 1'b0;
+      else
+      if(StartJam)
+        StateJam <= #Tp 1'b1;
+
+      if(StartDefer)
+        StateBackOff <= #Tp 1'b0;
+      else
+      if(StartBackoff)
+        StateBackOff <= #Tp 1'b1;
+
+      if(StartIPG)
+        StateDefer <= #Tp 1'b0;
+      else
+      if(StartDefer)
+        StateDefer <= #Tp 1'b1;
+    end
+end
+
+
+// This sections defines which interpack gap rule to use
+always @ (posedge MTxClk or posedge Reset)
+begin
+  if(Reset)
+    Rule1 <= #Tp 1'b0;
+  else
+    begin
+      if(StateIdle | StateBackOff)
+        Rule1 <= #Tp 1'b0;
+      else
+      if(StatePreamble | FullD)
+        Rule1 <= #Tp 1'b1;
+    end
+end
+
+
+
+endmodule
