// ========== Copyright Header Begin ========================================== // // OpenSPARC T1 Processor File: lsu_stb_rwctl.v // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. // // The above named program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public // License version 2 as published by the Free Software Foundation. // // The above named program is distributed in the hope that it will be // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details. // // You should have received a copy of the GNU General Public // License along with this work; if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. // // ========== Copyright Header End ============================================ /////////////////////////////////////////////////////////////////// /* // Description: Control for Unified STB CAM/DATA of LSU */ //////////////////////////////////////////////////////////////////////// // Global header file includes //////////////////////////////////////////////////////////////////////// `include "sys.h" // system level definition file which contains the // time scale definition `include "iop.h" //////////////////////////////////////////////////////////////////////// // Local header file includes / local defines //////////////////////////////////////////////////////////////////////// module lsu_stb_rwctl (/*AUTOARG*/ // Outputs so, lsu_stbctl_flush_pipe_w, stb_cam_wr_no_ivld_m, ld_rawp_st_ced_w2, stb_data_wr_ptr, stb_data_wptr_vld, stb_data_rd_ptr, stb_data_rptr_vld, stb_wdata_ramd_b75_b64, stb_cam_cm_tid, stb_ldst_byte_msk, stb_ldst_byte_msk_min, stb_cam_rw_ptr, stb_cam_wptr_vld, stb_cam_rptr_vld, lsu_st_sz_bhww_m, lsu_st_sz_dw_m, lsu_st_sz_bhw_m, lsu_st_sz_wdw_m, lsu_st_sz_b_m, lsu_st_sz_w_m, lsu_st_sz_hw_m, lsu_st_sz_hww_m, ld_rawp_st_ackid_w2, stb_flush_st_g, stb_cam_wvld_m, lsu_st_rq_type_m, lsu_stb_data_early_sel_e, lsu_stb_data_final_sel_m, lsu_ldquad_inst_m, stb_thrd_en_g, flsh_inst_m, lsu_stb_va_m, lsu_stb_empty_buf, lsu_spu_stb_empty, ifu_tlu_inst_vld_m_bf1, ifu_tlu_inst_vld_m_bf2, lsu_ifu_stbcnt0, lsu_ifu_stbcnt1, lsu_ifu_stbcnt2, lsu_ifu_stbcnt3, lsu_ffu_stb_full0, lsu_ffu_stb_full1, lsu_ffu_stb_full2, lsu_ffu_stb_full3, // Inputs rclk, rst_tri_en, si, se, ld_inst_vld_e, ldst_sz_e, st_inst_vld_e, stb_pcx_rptr0, stb_wrptr0, stb_pcx_rptr1, stb_wrptr1, stb_pcx_rptr2, stb_wrptr2, stb_pcx_rptr3, stb_wrptr3, stb_cam_hit_ptr, stb_cam_hit, lsu_ldst_va_m, sta_internal_m, ifu_tlu_thrid_e, tlu_exu_early_flush_pipe_w, lsu_ttype_vld_m2, ifu_lsu_flush_w, lsu_defr_trp_taken_g, ifu_lsu_casa_e, ifu_lsu_ldstub_e, ifu_lsu_swap_e, ifu_lsu_ldst_dbl_e, stb_state_ced0, stb_state_ced1, stb_state_ced2, stb_state_ced3, stb_ld_full_raw, stb_ld_partial_raw, stb_wrptr0_prev, stb_wrptr1_prev, stb_wrptr2_prev, stb_wrptr3_prev, ifu_lsu_alt_space_e, ifu_lsu_ldst_fp_e, lsu_quad_asi_e, lsu_st_rmo_m, lsu_bst_in_pipe_m, ffu_lsu_kill_fst_w, ffu_lsu_blk_st_e, ffu_lsu_blk_st_tid_m, ffu_lsu_blk_st_va_e, lsu_snap_blk_st_m, tlb_pgnum_b39_g, lsu_stb_empty, ifu_tlu_flsh_inst_e, stb_cam_mhit, ifu_tlu_inst_vld_m, lsu_st_pcx_rq_pick, lsu_st_pcx_rq_vld, stb_rdata_ramc_b8t0, lsu_stbcnt0, lsu_stbcnt1, lsu_stbcnt2, lsu_stbcnt3 ) ; input rclk ; //input grst_l ; //input arst_l ; input rst_tri_en; input si; input se; output so; input ld_inst_vld_e ; // load in pipe. input [1:0] ldst_sz_e ; // size of load. input st_inst_vld_e ; // store in pipe. // Currently bypass flop make request //input [3:0] pcx_rq_for_stb ; // pcx request rd of dfq - threaded //input [2:0] stb_dfq_rptr0 ; // dfq rptr for stb0 input [2:0] stb_pcx_rptr0 ; // pcx rptr for stb0 input [2:0] stb_wrptr0 ; // wrt ptr - stb0 //input [2:0] stb_dfq_rptr1 ; // dfq rptr for stb1 input [2:0] stb_pcx_rptr1 ; // pcx rptr for stb1 input [2:0] stb_wrptr1 ; // wrt ptr - stb1 //input [2:0] stb_dfq_rptr2 ; // dfq rptr for stb2 input [2:0] stb_pcx_rptr2 ; // pcx rptr for stb2 input [2:0] stb_wrptr2 ; // wrt ptr - stb2 //input [2:0] stb_dfq_rptr3 ; // dfq rptr for stb3 input [2:0] stb_pcx_rptr3 ; // pcx rptr for stb3 input [2:0] stb_wrptr3 ; // wrt ptr - stb3 input [2:0] stb_cam_hit_ptr ; // entry which hit input stb_cam_hit ; // hit has occurred //input [7:0] stb_state_vld0 ; // valid bits - stb0 //input [7:0] stb_state_vld1 ; // valid bits - stb1 //input [7:0] stb_state_vld2 ; // valid bits - stb2 //input [7:0] stb_state_vld3 ; // valid bits - stb3 input [9:0] lsu_ldst_va_m ; input sta_internal_m ; // internal stxa input [1:0] ifu_tlu_thrid_e ; // thread-id. // output lsu_stbrwctl_flush_pipe_w ; // tmp for tso_mon input tlu_exu_early_flush_pipe_w; input lsu_ttype_vld_m2; input ifu_lsu_flush_w; input lsu_defr_trp_taken_g; output lsu_stbctl_flush_pipe_w; input ifu_lsu_casa_e ; // compare-swap instr input ifu_lsu_ldstub_e ; // ldstub input ifu_lsu_swap_e ; // swap input ifu_lsu_ldst_dbl_e; // ldst dbl, specifically for stquad. //input [63:0] lsu_stb_st_data_g ; // data to be written to stb input [7:0] stb_state_ced0 ; input [7:0] stb_state_ced1 ; input [7:0] stb_state_ced2 ; input [7:0] stb_state_ced3 ; input [7:0] stb_ld_full_raw ; input [7:0] stb_ld_partial_raw ; input [2:0] stb_wrptr0_prev ; input [2:0] stb_wrptr1_prev ; input [2:0] stb_wrptr2_prev ; input [2:0] stb_wrptr3_prev ; input ifu_lsu_alt_space_e ; // alt_space inst input ifu_lsu_ldst_fp_e ; //input tlb_cam_hit ; // tlb cam hit - mstage input lsu_quad_asi_e ; // quad ldst asi //input [3:0] lsu_st_ack_rq_stb ; //input lsu_dtlb_bypass_e ; input lsu_st_rmo_m ; // rmo st in m cycle. input lsu_bst_in_pipe_m ; // 1st helper for bst. input ffu_lsu_kill_fst_w ; // ecc error on st. input ffu_lsu_blk_st_e ; // blk st helper signalled by ffu input [1:0] ffu_lsu_blk_st_tid_m ; // blk st tid - from ffu_lsu_data input [5:3] ffu_lsu_blk_st_va_e ; // bits 5:3 of va from increment input lsu_snap_blk_st_m ; // snap blk st state input tlb_pgnum_b39_g ; input [3:0] lsu_stb_empty ; // thread's stb is empty input ifu_tlu_flsh_inst_e; input stb_cam_mhit ; input ifu_tlu_inst_vld_m ; //input [3:0] lsu_st_pcx_rq_kill_w2 ; input [3:0] lsu_st_pcx_rq_pick ; input lsu_st_pcx_rq_vld ; input [8:0] stb_rdata_ramc_b8t0 ; // scan-only output stb_cam_wr_no_ivld_m ; //output ld_rawp_st_ced_g ; output ld_rawp_st_ced_w2 ; output [4:0] stb_data_wr_ptr ; // write ptr - stb data output stb_data_wptr_vld ; // wr vld for stb data output [4:0] stb_data_rd_ptr ; // rd ptr for stb data output stb_data_rptr_vld ; // rptr vld for stb data output [75:64] stb_wdata_ramd_b75_b64 ; // write data for DATA RAM. // partial or full raw required output [1:0] stb_cam_cm_tid ; // cam tid - stb cam //output [7:0] stb_cam_sqsh_msk ; // squash spurious hits //output stb_cam_vld ; output [7:0] stb_ldst_byte_msk ; // byte mask for write/cam output [7:0] stb_ldst_byte_msk_min ; // byte mask for write/cam for min path //output [3:0] stb_rd_for_pcx_sel ; // stb's st selected for read for pcx output [4:0] stb_cam_rw_ptr ; // rw ptr for shared stb cam port output stb_cam_wptr_vld ; // wr vld for stb write output stb_cam_rptr_vld ; // rd vld for stb write //output lsu_stb_pcx_rvld_d1 ; // stb has been read-delayby1cycle //output lsu_stb_dfq_rvld ; // wr to dfq stb bypass ff output lsu_st_sz_bhww_m ; // byte or hword or word output lsu_st_sz_dw_m ; // double word output lsu_st_sz_bhw_m ; // byte or hword output lsu_st_sz_wdw_m ; // word or dword output lsu_st_sz_b_m ; // byte output lsu_st_sz_w_m ; // word output lsu_st_sz_hw_m ; // hword output lsu_st_sz_hww_m ; // hword or word //output ld_stb_full_raw_g ; //output ld_stb_partial_raw_g ; //output [3:0] ld_stb_full_raw_g ; //output [3:0] ld_stb_partial_raw_g ; output [2:0] ld_rawp_st_ackid_w2 ; //output [2:0] stb_dfq_rd_id ; // stb entry being read for current thread for current thread output [3:0] stb_flush_st_g ; // st is flushed in cycle g output [3:0] stb_cam_wvld_m ; output [2:1] lsu_st_rq_type_m ; output [3:0] lsu_stb_data_early_sel_e ;// select source of stb data. output lsu_stb_data_final_sel_m ;// select source of stb data. output lsu_ldquad_inst_m ; // stquad inst //output lsu_stdbl_inst_m ; // stdbl inst //output [1:0] lsu_stb_rd_tid ; // thread for which stb read occurs output [3:0] stb_thrd_en_g ; // thread id for current stb access output flsh_inst_m; output [9:3] lsu_stb_va_m; output [3:0] lsu_stb_empty_buf ; output [3:0] lsu_spu_stb_empty ; output ifu_tlu_inst_vld_m_bf1; output ifu_tlu_inst_vld_m_bf2; input [3:0] lsu_stbcnt0; input [3:0] lsu_stbcnt1; input [3:0] lsu_stbcnt2; input [3:0] lsu_stbcnt3; output [3:0] lsu_ifu_stbcnt0; output [3:0] lsu_ifu_stbcnt1; output [3:0] lsu_ifu_stbcnt2; output [3:0] lsu_ifu_stbcnt3; output lsu_ffu_stb_full0; output lsu_ffu_stb_full1; output lsu_ffu_stb_full2; output lsu_ffu_stb_full3; /*AUTOWIRE*/ // Beginning of automatic wires (for undeclared instantiated-module outputs) // End of automatics // Beginning of automatic wires (for undeclared instantiated-module outputs) // End of automatics //wire [4:0] stb_dequeue_ptr ; wire [2:0] stb_wptr_prev ; wire [1:0] st_thrid_m,st_thrid_g ; wire [7:0] ld_any_raw_vld ; wire [7:0] ld_any_raw_vld_d1 ; //wire ld_raw_mhit ; wire [2:0] st_rq_type_m,st_rq_type_g ; wire [1:0] ldst_sz_m,ldst_sz_g, pipe_ldst_sz_m ; wire ldst_byte, ldst_hwrd, ldst_word, ldst_dwrd ; wire [7:0] ldst_byte_mask ; wire [2:0] stb_wptr ; wire [1:0] thrid_m,thrid_g ; wire ld_inst_vld_m, st_inst_vld_m ; wire ldst_dbl_m; wire atomic_m ; wire ldstub_m ; wire casa_m, casa_g ; wire swap_m; wire flush_st_g ; wire cam_wptr_vld_g ; wire [2:0] cam_wptr_d1 ; wire [2:0] stb_rdptr0,stb_rdptr1 ; wire [2:0] stb_rdptr2,stb_rdptr3 ; //wire [3:0] stb_rd_mask ; wire [3:0] stb_select_rptr ; wire [1:0] stb_rd_thrid ; //wire cam_vld_g ; wire [9:0] ldst_va_m, pipe_ldst_va_m ; wire [3:0] ldst_va_g ; wire [2:0] cam_wr_ptr ; wire thread0_m, thread1_m, thread2_m, thread3_m ; wire thread0_g, thread1_g, thread2_g, thread3_g ; wire [2:0] ld_rawp_stb_id ; //wire rd_for_dfq_granted ; wire [7:0] stb_state_ced,stb_state_ced_d1 ; //wire stq_wr_en ; //wire [3:0] stq_wr_en_g ; //wire [3:0] stquad_vld ; //wire [2:0] stquad_ptr0,stquad_ptr1,stquad_ptr2,stquad_ptr3 ; //wire [3:0] ld_stq_hit_g ; //wire ldq_hit_g ; //wire [3:0] ldq_hit_g ; wire ldst_fp_m; wire ldstub_e,casa_e,ldst_dbl_e; //wire stb_data_final_sel_e ; wire alt_space_e,alt_space_m ; wire quad_asi_m ; //wire stquad_e, stquad_m ; wire stdbl_e ; //wire dfq_any_rq_for_stb ; //wire [3:0] stb_rd_for_dfq ; // read rq for dfq - threaded wire blkst_m,blkst_g ; wire stb_not_empty ; wire clk; assign clk = rclk; // wire rst_l; // wire stb_rwctl_rst_l; // dffrl_async rstff(.din (grst_l), // .q (stb_rwctl_rst_l), // .clk (clk), .se(se), .si(), .so(), // .rst_l (arst_l)); //========================================================================================= // MISC //========================================================================================= // Scan-only flops. wire [8:0] stb_rdata_ramc_b8t0_so ; dff_s #(9) scmscan_ff ( .din (stb_rdata_ramc_b8t0[8:0]), .q (stb_rdata_ramc_b8t0_so[8:0]), .clk (clk), .se (se), .si (), .so () ); //========================================================================================= // INST_VLD_W GENERATION //========================================================================================= wire flush_w_inst_vld_m ; wire lsu_inst_vld_w ; wire lsu_stbrwctl_flush_pipe_w; //======================================= //instaniate buffers //====================================== wire ifu_tlu_inst_vld_m_bf0; bw_u1_buf_10x UZfix_ifu_tlu_inst_vld_m_bf0 ( .a(ifu_tlu_inst_vld_m), .z(ifu_tlu_inst_vld_m_bf0) ); bw_u1_buf_30x UZfix_ifu_tlu_inst_vld_m_bf1 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf1) ); bw_u1_buf_20x UZfix_ifu_tlu_inst_vld_m_bf2 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf2) ); assign flush_w_inst_vld_m = ifu_tlu_inst_vld_m_bf0 & ~(lsu_stbrwctl_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w dff_s stgw_ivld ( .din (flush_w_inst_vld_m), .q (lsu_inst_vld_w), .clk (clk), .se (se), .si (), .so () ); wire other_flush_pipe_w; wire tlu_early_flush_pipe_w; assign tlu_early_flush_pipe_w = tlu_exu_early_flush_pipe_w; assign other_flush_pipe_w = tlu_early_flush_pipe_w | (lsu_ttype_vld_m2 & lsu_inst_vld_w) | lsu_defr_trp_taken_g ; wire lsu_flush_pipe_w; assign lsu_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ; assign lsu_stbctl_flush_pipe_w = lsu_flush_pipe_w ; assign lsu_stbrwctl_flush_pipe_w = lsu_flush_pipe_w ; //========================================================================================= // STB Array Addr/Ctl Generation //========================================================================================= assign ldstub_e = ifu_lsu_ldstub_e ; assign casa_e = ifu_lsu_casa_e ; assign ldst_dbl_e = ifu_lsu_ldst_dbl_e ; assign alt_space_e = ifu_lsu_alt_space_e ; //assign stdbl_e = ldst_dbl_e & (~alt_space_e | (alt_space_e & ~lsu_quad_asi_e)) ; assign stdbl_e = ldst_dbl_e ; // wire lsu_stdbl_inst_m; //dff stq_stgm ( // .din (stdbl_e), // .q (lsu_stdbl_inst_m), // .clk (clk), // .se (se), .si (), .so () // ); // This path can probably be eased. assign lsu_stb_data_early_sel_e[0] = ldstub_e & ~rst_tri_en; assign lsu_stb_data_early_sel_e[1] = casa_e & ~rst_tri_en; assign lsu_stb_data_early_sel_e[2] = ~(ldstub_e | casa_e | stdbl_e) | rst_tri_en; assign lsu_stb_data_early_sel_e[3] = stdbl_e & ~rst_tri_en ; // modify for accepting bst data out of pipe. //assign stb_data_final_sel_e = ~(ldst_fp_e | ffu_lsu_blk_st_e) ; /*dff lsel_g ( .din (stb_data_final_sel_e), .q (lsu_stb_data_final_sel_m), .clk (clk), .se (se), .si (), .so () );*/ assign lsu_stb_data_final_sel_m = ~(ldst_fp_m | blkst_m) ; wire real_st_m ; wire flsh_inst_m, flsh_inst_g ; // !!! could qualify st_inst_vld_e with stxa_internal !!! dff_s #(13) stgm_vld ( .din ({ld_inst_vld_e,st_inst_vld_e,ldst_sz_e[1:0], ifu_lsu_swap_e, ifu_lsu_ldstub_e, ifu_lsu_casa_e,ifu_lsu_ldst_dbl_e, ifu_tlu_thrid_e[1:0],ifu_lsu_ldst_fp_e,lsu_quad_asi_e,ifu_tlu_flsh_inst_e}), .q ({ld_inst_vld_m,real_st_m,pipe_ldst_sz_m[1:0], swap_m,ldstub_m,casa_m,ldst_dbl_m,thrid_m[1:0],ldst_fp_m,quad_asi_m,flsh_inst_m}), .clk (clk), .se (se), .si (), .so () ); assign st_inst_vld_m = real_st_m | flsh_inst_m ; // do we need ld/st unflushed ? wire sta_internal_g; dff_s #(7) stgw_vld ( .din ({sta_internal_m, casa_m, thrid_m[1:0],ldst_sz_m[1:0], flsh_inst_m}), .q ({sta_internal_g, casa_g, thrid_g[1:0],ldst_sz_g[1:0], flsh_inst_g}), .clk (clk), .se (se), .si (), .so () ); // stb-cam will be written by st at rising edge of g-stage. // However, st can be flushed after write. To keep, the stb state consistent, // The valid and write ptr will not be updated until the rising edge of w2. wire early_flush_cond_g,partial_flush_st_g ; assign early_flush_cond_g = (sta_internal_g | ~(lsu_inst_vld_w | blkst_g) | ffu_lsu_kill_fst_w) ; assign flush_st_g = (early_flush_cond_g | lsu_stbrwctl_flush_pipe_w) & cam_wptr_vld_g ; //timing, send to stb_ctl and qualified by stb_cam_wvld_g (thread version of cam_wptr_vld_g) //assign partial_flush_st_g = early_flush_cond_g & cam_wptr_vld_g ; assign partial_flush_st_g = early_flush_cond_g ; assign atomic_m = (casa_m | ldstub_m | swap_m) & st_inst_vld_m ; // WRITE PTR VALID GENERATION. // meant specifically to squash pcx_rq_for_stb. assign stb_cam_wr_no_ivld_m = (st_inst_vld_m | casa_m | ldstub_m | swap_m | blkst_m) ; //bug3610 - kill cam write vld(==stb data write vld next cycle) to avoid datat read and write same cycle // to the same entry wire b2b_st_detect ; assign stb_cam_wptr_vld = (((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m) & ~(flush_st_g & b2b_st_detect) ; //= ((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m ; // bug3610 //= (st_inst_vld_m | atomic_m | (ldst_dbl_m & st_inst_vld_m) | blkst_m) ; dff_s wptr_g ( .din (stb_cam_wptr_vld), .q (cam_wptr_vld_g), .clk (clk), .se (se), .si (), .so () ); //flop move into mem cell (roll back) assign stb_data_wptr_vld = cam_wptr_vld_g ; // WRITE PTR GENERATION // It is assumed that if there is a store in the pipe, there is a // free entry in the corresponding stb. Otherwise, the pipe would've // stalled for the thread. // If a store-like inst has been flushed, then the old ptr has to be restored // and used. This is done within thread specific stb control assign thread0_m = ~st_thrid_m[1] & ~st_thrid_m[0] ; assign thread1_m = ~st_thrid_m[1] & st_thrid_m[0] ; assign thread2_m = st_thrid_m[1] & ~st_thrid_m[0] ; assign thread3_m = st_thrid_m[1] & st_thrid_m[0] ; dff_s #(4) stgg_thrd ( .din ({thread0_m,thread1_m,thread2_m,thread3_m}), .q ({thread0_g,thread1_g,thread2_g,thread3_g}), .clk (clk), .se (se), .si (), .so () ); assign stb_thrd_en_g[0] = thread0_g ; assign stb_thrd_en_g[1] = thread1_g ; assign stb_thrd_en_g[2] = thread2_g ; assign stb_thrd_en_g[3] = thread3_g ; //assign stb_wptr[2:0] = // thread0_m ? stb_wrptr0[2:0] : // thread1_m ? stb_wrptr1[2:0] : // thread2_m ? stb_wrptr2[2:0] : // thread3_m ? stb_wrptr3[2:0] : 3'bxxx ; assign stb_wptr[2:0] = (thread0_m ? stb_wrptr0[2:0] : 3'b000) | (thread1_m ? stb_wrptr1[2:0] : 3'b000) | (thread2_m ? stb_wrptr2[2:0] : 3'b000) | (thread3_m ? stb_wrptr3[2:0] : 3'b000) ; assign b2b_st_detect = // detect back-to-back store (thread0_m & thread0_g) | (thread1_m & thread1_g) | (thread2_m & thread2_g) | (thread3_m & thread3_g) ; assign cam_wr_ptr[2:0] = (flush_st_g & b2b_st_detect) ? cam_wptr_d1[2:0] : stb_wptr[2:0] ; dff_s #(3) wptr_d1 ( .din (cam_wr_ptr[2:0]), .q (cam_wptr_d1[2:0]), .clk (clk), .se (se), .si (), .so () ); assign stb_cam_wvld_m[0] = stb_cam_wptr_vld & thread0_m ; assign stb_cam_wvld_m[1] = stb_cam_wptr_vld & thread1_m ; assign stb_cam_wvld_m[2] = stb_cam_wptr_vld & thread2_m ; assign stb_cam_wvld_m[3] = stb_cam_wptr_vld & thread3_m ; // contains potential flush conditions. assign stb_flush_st_g[0] = partial_flush_st_g ; assign stb_flush_st_g[1] = partial_flush_st_g ; assign stb_flush_st_g[2] = partial_flush_st_g ; assign stb_flush_st_g[3] = partial_flush_st_g ; // stb-data has a delayed write in w2. Alignment of stb data will be done on write // of 64b into stb. This allows write of stb cam and data to be done in the // same cycle, and thus read can occur simultaneously for pcx. //mem cell change to bw_r_rf32x80, flop move into mem cell (roll back) //flop outside mem cell assign stb_data_wr_ptr[4:0] = {st_thrid_g[1:0],cam_wptr_d1[2:0]}; // RD PTR/VLD GENERATION // stb read for dfq dumps data into a bypass flop. Thus a read for the dfq can occur // if a thread's stb has an acked entry and the bypass flop is empty. // stb read for pcx occurs on availability of queue entry. // Both dfq and pcx require a read of the cam and data. The reads // can thus not happen when load that hits in the stb is in the w2 (change to W3) // stage and a store is in the g-stage of the pipe. Both // probabilities are low. // ??Read for pcx takes priority over dfq. No deadlock can occur // ??as at some point the pcx reads will be exhausted and the stb // ??will have to drain itself. The stb is self-regulating in this regard. // priority of stb read: ld_cam_hit (full raw bypass) > dfq > pcx //==================================================================================== //raw bypass timing //G/WB W2 W3 W4 //cam_hit(from stb_cam output) flop stb_data rd_ptr/rd_vld read STB_DATA/BYP //==================================================================================== wire [1:0] thrid_w2; wire [2:0] stb_cam_hit_ptr_w2; wire stb_cam_hit_w2; wire stb_cam_hit_w; //bug3503 assign stb_cam_hit_w = stb_cam_hit & lsu_inst_vld_w & ~lsu_stbrwctl_flush_pipe_w; dff_s #(6) stb_cam_hit_stg_w2 ( .din ({thrid_g[1:0], stb_cam_hit_ptr[2:0], stb_cam_hit_w }), .q ({thrid_w2[1:0], stb_cam_hit_ptr_w2[2:0], stb_cam_hit_w2}), .clk (clk), .se (se), .si (), .so () ); // logic moved to qctl1 // pcx is making request for data in current cycle. Can be multi-hot. //assign pcx_any_rq_for_stb = |pcx_rq_for_stb[3:0] ; //assign pcx_any_rq_for_stb = // (pcx_rq_for_stb[0] & ~lsu_st_pcx_rq_kill_w2[0]) | // (pcx_rq_for_stb[1] & ~lsu_st_pcx_rq_kill_w2[1]) | // (pcx_rq_for_stb[2] & ~lsu_st_pcx_rq_kill_w2[2]) | // (pcx_rq_for_stb[3] & ~lsu_st_pcx_rq_kill_w2[3]) ; // ??ld-cam hit based read takes precedence // ??Timing : This could be made pessimistic by using ld_inst_vld_g //assign stb_select_rptr[3:0] = pcx_rq_for_stb[3:0] ; // timing fix assign stb_select_rptr[3:0] = lsu_st_pcx_rq_pick[3:0] ; // This could be a critical path. Be careful ! //assign stb_rdptr0[2:0] = ~dfq_any_rq_for_stb ? stb_pcx_rptr0[2:0] : stb_dfq_rptr0[2:0] ; assign stb_rdptr0[2:0] = stb_pcx_rptr0[2:0] ; assign stb_rdptr1[2:0] = stb_pcx_rptr1[2:0] ; assign stb_rdptr2[2:0] = stb_pcx_rptr2[2:0] ; assign stb_rdptr3[2:0] = stb_pcx_rptr3[2:0] ; // logic moved to qctl1 //wire [1:0] stb_rd_tid ; // //assign stb_rd_tid[0] = pcx_rq_for_stb[1] | pcx_rq_for_stb[3] ; //assign stb_rd_tid[1] = pcx_rq_for_stb[2] | pcx_rq_for_stb[3] ; // //dff #(2) stbtid_stgd1 ( // .din (stb_rd_tid[1:0]), .q (lsu_stb_rd_tid[1:0]), // .clk (clk), // .se (se), .si (), .so () // ); //assign stb_dfq_rd_id[2:0] = stb_data_rd_ptr[2:0] ; // or cam rd ptr //timing fix:5/6/03 //bug4988 - change the prirority from 0->3 to 3->0; the reason is when select_rptr=0, the // default thread id(rptr[4:3])=thread0 but the default rptr[2:0]=thread3. If // thread0 and thread3 rptr are the same and the thread0 write is occuring, the // rptr[4:0] is same as wptr[4:0] wire [2:0] stb_rdptr ; //assign stb_rdptr[2:0] = // stb_select_rptr[0] ? stb_rdptr0[2:0] : // stb_select_rptr[1] ? stb_rdptr1[2:0] : // stb_select_rptr[2] ? stb_rdptr2[2:0] : // stb_rdptr3[2:0] ; //assign stb_rdptr[2:0] = // stb_select_rptr[3] ? stb_rdptr3[2:0] : // stb_select_rptr[2] ? stb_rdptr2[2:0] : // stb_select_rptr[1] ? stb_rdptr1[2:0] : // stb_rdptr0[2:0] ; assign stb_rdptr[2:0] = (stb_select_rptr[3] ? stb_rdptr3[2:0] : 3'b0) | (stb_select_rptr[2] ? stb_rdptr2[2:0] : 3'b0) | (stb_select_rptr[1] ? stb_rdptr1[2:0] : 3'b0) | (stb_select_rptr[0] ? stb_rdptr0[2:0] : 3'b0) ; //timing fix: 8/29/03 - remove the default select logic for stb_select_rptr since synthesis is forced to replace // 4to1 mux w/ and-or mux or 2to1 mux //wire stb_select_rptr_b3; //assign stb_select_rptr_b3 = ~|stb_select_rptr[2:0]; wire [2:0] stb_rdptr_l; assign stb_rdptr_l[2:0] = ~stb_rdptr[2:0] ; //bw_u1_muxi41d_2x UZsize_stb_rdptr_b0_mux( // .z(stb_rdptr_l[0]), // .d0(stb_rdptr0[0]), // .d1(stb_rdptr1[0]), // .d2(stb_rdptr2[0]), // .d3(stb_rdptr3[0]), // .s0(stb_select_rptr[0]), // .s1(stb_select_rptr[1]), // .s2(stb_select_rptr[2]), // .s3(stb_select_rptr[3])); // //bw_u1_muxi41d_2x UZsize_stb_rdptr_b1_mux( // .z(stb_rdptr_l[1]), // .d0(stb_rdptr0[1]), // .d1(stb_rdptr1[1]), // .d2(stb_rdptr2[1]), // .d3(stb_rdptr3[1]), // .s0(stb_select_rptr[0]), // .s1(stb_select_rptr[1]), // .s2(stb_select_rptr[2]), // .s3(stb_select_rptr[3])); // //bw_u1_muxi41d_2x UZsize_stb_rdptr_b2_mux( // .z(stb_rdptr_l[2]), // .d0(stb_rdptr0[2]), // .d1(stb_rdptr1[2]), // .d2(stb_rdptr2[2]), // .d3(stb_rdptr3[2]), // .s0(stb_select_rptr[0]), // .s1(stb_select_rptr[1]), // .s2(stb_select_rptr[2]), // .s3(stb_select_rptr[3])); // assign stb_rd_thrid[0] = stb_select_rptr[1] | stb_select_rptr[3] ; assign stb_rd_thrid[1] = stb_select_rptr[2] | stb_select_rptr[3] ; // read // this mux will have to be accommodated in path !!! Talk to Satya. // Timing : This could be made pessimistic by using ld_inst_vld_g // raw read STB at W3 (changed from W2) assign stb_data_rd_ptr[4:0] = stb_cam_hit_w2 ? {thrid_w2[1:0],stb_cam_hit_ptr_w2[2:0]} : // rd based on ld hit {stb_rd_thrid[1:0],~stb_rdptr_l[2:0]} ; // rd for pcx or dfq // Blk-st modification for thread. assign st_thrid_m[1:0] = blkst_m ? ffu_lsu_blk_st_tid_m[1:0] : thrid_m[1:0] ; dff_s #(2) stid_stgg ( .din (st_thrid_m[1:0]), .q (st_thrid_g[1:0]), .clk (clk), .se (se), .si (), .so () ); //timing fix: 5/6/03 //assign stb_cam_rw_ptr[4:0] = stb_cam_wptr_vld ? // {st_thrid_m[1:0],cam_wr_ptr[2:0]} : // write // {stb_rd_thrid[1:0],stb_rdptr[2:0]} ; // read wire [2:0] cam_wr_ptr_l; wire [1:0] stb_rd_thrid_l; wire [1:0] st_thrid_m_l; assign cam_wr_ptr_l[2:0] = ~cam_wr_ptr[2:0]; assign stb_rd_thrid_l[1:0] = ~stb_rd_thrid[1:0]; assign st_thrid_m_l[1:0] = ~st_thrid_m[1:0]; bw_u1_muxi21_2x UZsize_stb_cam_rw_ptr_b0_mux( .z(stb_cam_rw_ptr[0]), .d0(stb_rdptr_l[0]), .d1(cam_wr_ptr_l[0]), .s(stb_cam_wptr_vld)); bw_u1_muxi21_2x UZsize_stb_cam_rw_ptr_b1_mux( .z(stb_cam_rw_ptr[1]), .d0(stb_rdptr_l[1]), .d1(cam_wr_ptr_l[1]), .s(stb_cam_wptr_vld)); bw_u1_muxi21_2x UZsize_stb_cam_rw_ptr_b2_mux( .z(stb_cam_rw_ptr[2]), .d0(stb_rdptr_l[2]), .d1(cam_wr_ptr_l[2]), .s(stb_cam_wptr_vld)); bw_u1_muxi21_2x UZsize_stb_cam_rw_ptr_b3_mux( .z(stb_cam_rw_ptr[3]), .d0(stb_rd_thrid_l[0]), .d1(st_thrid_m_l[0]), .s(stb_cam_wptr_vld)); bw_u1_muxi21_2x UZsize_stb_cam_rw_ptr_b4_mux( .z(stb_cam_rw_ptr[4]), .d0(stb_rd_thrid_l[1]), .d1(st_thrid_m_l[1]), .s(stb_cam_wptr_vld)); //raw read STB at W3 (not W2) //timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for // stb_cam_rptr_vld and stb_data_rptr_vld //bug4988 - qual lsu_st_pcx_rq_vld w/ no write vld to stb_data. use stb_cam_wr_no_ivld_m instead of write vld. // this is the same signal used to kill pcx_rq_for_stb // stb_cam_rptr_vld is not set if stb_cam_wptr_vld=1 assign stb_data_rptr_vld = //(|stb_select_rptr[3:0]) | // pcx/dfq rd - timing fix //lsu_st_pcx_rq_vld | // pcx/dfq rd // bug4988 (lsu_st_pcx_rq_vld & ~stb_cam_wr_no_ivld_m) | // pcx/dfq rd stb_cam_hit_w2 ; // cam hit requires read whether single or multiple //raw read STB at W3 (not W2) //timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for // stb_cam_rptr_vld and stb_data_rptr_vld assign stb_cam_rptr_vld = //((|stb_select_rptr[3:0]) & ~(stb_cam_hit_w2)) & // only pcx read - timing fix (lsu_st_pcx_rq_vld & ~(stb_cam_hit_w2)) & // only pcx read ~stb_cam_wptr_vld ; // st,st-like write does not block // lsu_stb_rd_vld_d1 - not used //dff stbrd_stgd1 ( // .din (stb_cam_rptr_vld), .q (lsu_stb_rd_vld_d1), // .clk (clk), // .se (se), .si (), .so () // ); // logic moved to qctl1 //dff #(1) prvld_stgd1 ( // .din (pcx_any_rq_for_stb), // .q (lsu_stb_pcx_rvld_d1), // .clk (clk), // .se (se), .si (), .so () // ); assign stb_cam_cm_tid[1:0] = thrid_m[1:0] ; //========================================================================================= // BYTE MASK FORMATTING //========================================================================================= // Write/CAM Data for CAM RAM. // Physical dword aligned addr - PA[39:3] (37b) // Byte Mask - (8b) // Total - 45b // | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 | // | hw3 | hw2 | hw1 | hw0 | // | w1 | w0 | // | dw | //dff #(11) va_m ( // .din (exu_lsu_ldst_va_e[10:0]), .q (pipe_ldst_va_m[10:0]), // .clk (clk), // .se (se), .si (), .so () // ); assign pipe_ldst_va_m[9:0] = lsu_ldst_va_m[9:0]; // ldst_byte may not be needed assign ldst_byte = ~ldst_sz_m[1] & ~ldst_sz_m[0] ; // 00 assign ldst_hwrd = ~ldst_sz_m[1] & ldst_sz_m[0] ; // 01 assign ldst_word = ldst_sz_m[1] & ~ldst_sz_m[0] ; // 10 assign ldst_dwrd = ldst_sz_m[1] & ldst_sz_m[0] ; // 11 // Note : dword term is common. assign ldst_byte_mask[0] = ( ldst_va_m[2] & ldst_va_m[1] & ldst_va_m[0] ) | ( ldst_va_m[2] & ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) | ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word)) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd)) ; assign ldst_byte_mask[1] = ( ldst_va_m[2] & ldst_va_m[1] & ~ldst_va_m[0]) | ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word)) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd)) ; assign ldst_byte_mask[2] = ( ldst_va_m[2] & ~ldst_va_m[1] & ldst_va_m[0]) | ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd | ldst_word)) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd)) ; assign ldst_byte_mask[3] = ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0]) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd)) ; assign ldst_byte_mask[4] = (~ldst_va_m[2] & ldst_va_m[1] & ldst_va_m[0]) | (~ldst_va_m[2] & ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word)) ; assign ldst_byte_mask[5] = (~ldst_va_m[2] & ldst_va_m[1] & ~ldst_va_m[0]) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word)) ; assign ldst_byte_mask[6] = (~ldst_va_m[2] & ~ldst_va_m[1] & ldst_va_m[0]) | (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word | ldst_hwrd)) ; assign ldst_byte_mask[7] = (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0]) ; assign stb_ldst_byte_msk[7:0] = ldst_byte_mask[7:0]; bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b0 (.a(ldst_byte_mask[0]), .z(stb_ldst_byte_msk_min[0])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b1 (.a(ldst_byte_mask[1]), .z(stb_ldst_byte_msk_min[1])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b2 (.a(ldst_byte_mask[2]), .z(stb_ldst_byte_msk_min[2])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b3 (.a(ldst_byte_mask[3]), .z(stb_ldst_byte_msk_min[3])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b4 (.a(ldst_byte_mask[4]), .z(stb_ldst_byte_msk_min[4])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b5 (.a(ldst_byte_mask[5]), .z(stb_ldst_byte_msk_min[5])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b6 (.a(ldst_byte_mask[6]), .z(stb_ldst_byte_msk_min[6])); bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b7 (.a(ldst_byte_mask[7]), .z(stb_ldst_byte_msk_min[7])); // Generate selects to format st data assign lsu_st_sz_bhww_m = ldst_byte | ldst_hwrd | ldst_word ; // byte or hword or word assign lsu_st_sz_dw_m = ldst_dwrd ; // double word assign lsu_st_sz_bhw_m = ldst_byte | ldst_hwrd ; // byte or hword assign lsu_st_sz_wdw_m = ldst_word | ldst_dwrd ; // word or dword assign lsu_st_sz_b_m = ldst_byte ; // byte assign lsu_st_sz_w_m = ldst_word ; // word assign lsu_st_sz_hw_m = ldst_hwrd ; // hword assign lsu_st_sz_hww_m = ldst_hwrd | ldst_word ; // hword or word //========================================================================================= // BLK-ST HANDLING //========================================================================================= wire blkst_m_tmp ; dff_s stgm_bst ( .din (ffu_lsu_blk_st_e), .q (blkst_m_tmp), .clk (clk), .se (se), .si (), .so () ); assign blkst_m = blkst_m_tmp & ~(real_st_m | flsh_inst_m | ld_inst_vld_m) ; // Bug 3444 dff_s stgg_bst ( .din (blkst_m), .q (blkst_g), .clk (clk), .se (se), .si (), .so () ); wire snap_blk_st_local_m ; assign snap_blk_st_local_m = lsu_snap_blk_st_m & ifu_tlu_inst_vld_m_bf0 ; wire [1:0] bst_sz_m ; wire [9:0] bst_va_m ; // output to be used in m-stage. dffe_s #(9) bst_state_m ( .din ({ldst_sz_m[1:0],ldst_va_m[9:6],ldst_va_m[2:0]}), .q ({bst_sz_m[1:0],bst_va_m[9:6],bst_va_m[2:0]}), .en (snap_blk_st_local_m), .clk (clk), .se (se), .si (), .so () ); dff_s #(3) bsva_stgm ( .din (ffu_lsu_blk_st_va_e[5:3]), .q (bst_va_m[5:3]), .clk (clk), .se (se), .si (), .so () ); //assign bst_va_m[5:3] = ffu_lsu_blk_st_va_e[5:3] ; //assign ldst_va_m[10] = pipe_ldst_va_m[10] ; assign ldst_va_m[9:0] = blkst_m ? bst_va_m[9:0] : pipe_ldst_va_m[9:0] ; assign lsu_stb_va_m[9:3] = ldst_va_m[9:3] ; assign ldst_sz_m[1:0] = blkst_m ? bst_sz_m[1:0] : pipe_ldst_sz_m[1:0] ; //========================================================================================= // WRITE DATA FOR DATA RAM //========================================================================================= // Write Data for DATA RAM. // Data - (64b) // (8b parity is generated on read) // Rqtype - (3b) // Size - (3b). // Addr - (3b). Lower 3b of 40b addr. // (set index and way available from ctl state. // Total - 73b. // st-quad requires own encoding. // assume does not have to be changed for blk-st assign st_rq_type_m[2:0] = casa_m ? 3'b010 : // cas pkt 1 (ldstub_m | swap_m) ? 3'b110 : // ldstub/swap //(stquad_m) ? 3'b111 : // stquad-pkt1 3'b001 ; // normal store or partial interrupt rq type //assign lsu_st_rq_type_m[2:0] = st_rq_type_m[2:0] ; assign lsu_st_rq_type_m[2:1] = st_rq_type_m[2:1] ; // Need ASI decode /*wire lsu_stquad_inst_m ; assign lsu_stquad_inst_m = ldst_dbl_m & st_inst_vld_m & quad_asi_m ; */ wire st_rmo_m,st_rmo_g ; assign st_rmo_m = lsu_st_rmo_m | blkst_m ; // binit and blk rmo stores. dff_s #(9) stgg_etc ( .din ({ldst_va_m[3:0],st_rq_type_m[2:0],st_rmo_m,lsu_bst_in_pipe_m}), .q ({ldst_va_g[3:0],st_rq_type_g[2:0],st_rmo_g,bst_in_pipe_g}), .clk (clk), .se (se), .si (), .so () ); wire bst_any_helper ; assign bst_any_helper = blkst_g | bst_in_pipe_g ; // Bug 3934 // Size will have to be changed to 2bits. // 7 more bits could be added to data ram to save read of cam in providing dfq pkt !!! assign stb_wdata_ramd_b75_b64[75:64] = {st_rmo_g,st_rq_type_g[2:0],flsh_inst_g,bst_any_helper,ldst_sz_g[1:0],ldst_va_g[3:0]}; // Bug3395, 3934 //========================================================================================= // FULL/PARTIAL RAW CALCULATION //========================================================================================= // io load cannot bypass from stb. A stb hit results in an io-ld being treated // as a partial-raw. (OR should it be serialized behind any io store ??) wire io_ld,io_ld_w2 ; assign io_ld = tlb_pgnum_b39_g ; // Bug 4362 // full-raw is squashed on multiple hits in stb. Treated like partial raw. // Ensure that all ld and ld-like instructions signal ld_inst_vld. We can then // remove qualification with ld_inst_vld_g. /*assign ld_stb_full_raw_g = (|stb_ld_full_raw[7:0]) & ~(stb_cam_mhit | ldq_hit_g | io_ld) ; assign ld_stb_full_raw_g[0] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & ~(stb_cam_mhit | ldq_hit_g[0] | io_ld) & thread0_g ; //~(ld_raw_mhit | ld_stq_hit_g[0] | io_ld) & thread0_g ; assign ld_stb_full_raw_g[1] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & ~(stb_cam_mhit | ldq_hit_g[1] | io_ld) & thread1_g ; assign ld_stb_full_raw_g[2] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & ~(stb_cam_mhit | ldq_hit_g[2] | io_ld) & thread2_g ; assign ld_stb_full_raw_g[3] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g & ~(stb_cam_mhit | ldq_hit_g[3] | io_ld) & thread3_g ; */ // Multiple full raws are also treated like a partial. /*assign ld_stb_partial_raw_g = ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g | (io_ld & stb_not_empty)) ; assign ld_stb_partial_raw_g[0] = ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[0] | (io_ld & stb_not_empty)) & ld_inst_vld_g & thread0_g ; assign ld_stb_partial_raw_g[1] = ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[1] | (io_ld & stb_not_empty)) & ld_inst_vld_g & thread1_g ; assign ld_stb_partial_raw_g[2] = ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[2] | (io_ld & stb_not_empty)) & ld_inst_vld_g & thread2_g ; assign ld_stb_partial_raw_g[3] = ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[3] | (io_ld & stb_not_empty)) & ld_inst_vld_g & thread3_g; */ //========================================================================================= // STQ HANDLING //========================================================================================= /* REMOVE STQUAD */ //========================================================================================= // LD QUAD HANDLING //========================================================================================= dff_s altsp_stgm ( .din (alt_space_e), .q (alt_space_m), .clk (clk), .se (se), .si (), .so () ); assign lsu_ldquad_inst_m = ldst_dbl_m & ld_inst_vld_m & quad_asi_m & alt_space_m ; /*wire ldquad_inst_g ; dff_s ldq_stgg ( .din (lsu_ldquad_inst_m), .q (ldquad_inst_g), .clk (clk), .se (se), .si (), .so () ); wire ldq_stb_cam_hit ; assign ldq_stb_cam_hit = stb_cam_hit & ldquad_inst_g ; // Terms can be made common. assign ldq_hit_g = ldq_stb_cam_hit ; */ /*assign ldq_hit_g[0] = thread0_g & ldq_stb_cam_hit ; assign ldq_hit_g[1] = thread1_g & ldq_stb_cam_hit ; assign ldq_hit_g[2] = thread2_g & ldq_stb_cam_hit ; assign ldq_hit_g[3] = thread3_g & ldq_stb_cam_hit ; */ //========================================================================================= // STB MULTIPLE HIT GENERATION //========================================================================================= // Multiple hits in stb is to be treated as a partial raw case. The ld however must wait // until the youngest store which hit exits the stb. A ptr needs to be calculated for this case. // A version of stb_wptr is used instead because it is easily available. (Would this have // any significant performance impact ? - No) assign ld_any_raw_vld[7:0] = stb_ld_full_raw[7:0] | stb_ld_partial_raw[7:0] ; dff_s #(16) stgw2_rvld ( .din ({ld_any_raw_vld[7:0],stb_state_ced[7:0]}), .q ({ld_any_raw_vld_d1[7:0],stb_state_ced_d1[7:0]}), .clk (clk), .se (se), .si (), .so () ); // This equation can be optimized for the grape flow. // This can be obtained from stb. /*assign ld_raw_mhit = (ld_any_raw_vld[7] & |(ld_any_raw_vld[6:0])) | (ld_any_raw_vld[6] & |(ld_any_raw_vld[5:0])) | (ld_any_raw_vld[5] & |(ld_any_raw_vld[4:0])) | (ld_any_raw_vld[4] & |(ld_any_raw_vld[3:0])) | (ld_any_raw_vld[3] & |(ld_any_raw_vld[2:0])) | (ld_any_raw_vld[2] & |(ld_any_raw_vld[1:0])) | (ld_any_raw_vld[1] & ld_any_raw_vld[0]) ; */ //========================================================================================= // STB Partial Raw ptr generation //========================================================================================= // The loading on the raw output of the stb cam will be significant if the signal // has to fan out to all 4 ctl blocks. That's why the control has to be localized. // Using the ack bit may result in pessimistic issue of partial raw loads. // For a single partial raw or multiple hit case, detecting whether there is any // unacked store is sufficient. Calculation is for no unacked store. // Can we use cam_hit ptr instead !!! //assign ld_rawp_st_ced_w2 = (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ; wire [2:0] wptr_prev ; assign wptr_prev[2:0] = stb_wptr_prev[2:0] ; wire [7:0] wptr_dcd ; // Bug 4294 assign wptr_dcd[0] = ~wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ; assign wptr_dcd[1] = ~wptr_prev[2] & ~wptr_prev[1] & wptr_prev[0] ; assign wptr_dcd[2] = ~wptr_prev[2] & wptr_prev[1] & ~wptr_prev[0] ; assign wptr_dcd[3] = ~wptr_prev[2] & wptr_prev[1] & wptr_prev[0] ; assign wptr_dcd[4] = wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ; assign wptr_dcd[5] = wptr_prev[2] & ~wptr_prev[1] & wptr_prev[0] ; assign wptr_dcd[6] = wptr_prev[2] & wptr_prev[1] & ~wptr_prev[0] ; assign wptr_dcd[7] = wptr_prev[2] & wptr_prev[1] & wptr_prev[0] ; wire iold_st_ced_g,iold_st_ced_w2 ; assign iold_st_ced_g = |(wptr_dcd[7:0] & stb_state_ced[7:0]) ; dff_s #(2) ioldced_stgw2 ( .din ({iold_st_ced_g,io_ld}), .q ({iold_st_ced_w2,io_ld_w2}), .clk (clk), .se (se), .si (), .so () ); assign ld_rawp_st_ced_w2 = io_ld_w2 ? iold_st_ced_w2 : (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ; // For the case of a single partial raw. assign ld_rawp_stb_id[0] = stb_cam_hit_ptr[0] ; assign ld_rawp_stb_id[1] = stb_cam_hit_ptr[1] ; assign ld_rawp_stb_id[2] = stb_cam_hit_ptr[2] ; /*assign ld_rawp_stb_id[0] = stb_ld_partial_raw[1] | stb_ld_partial_raw[3] | stb_ld_partial_raw[5] | stb_ld_partial_raw[7] ; assign ld_rawp_stb_id[1] = stb_ld_partial_raw[2] | stb_ld_partial_raw[3] | stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ; assign ld_rawp_stb_id[2] = stb_ld_partial_raw[4] | stb_ld_partial_raw[5] | stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ; */ wire [3:0] pipe_thread_g; assign pipe_thread_g[0] = ~thrid_g[1] & ~thrid_g[0]; assign pipe_thread_g[1] = ~thrid_g[1] & thrid_g[0]; assign pipe_thread_g[2] = thrid_g[1] & ~thrid_g[0]; assign pipe_thread_g[3] = thrid_g[1] & thrid_g[0]; assign stb_state_ced[7:0] = ( pipe_thread_g[0] ? stb_state_ced0[7:0] : 8'b0 ) | ( pipe_thread_g[1] ? stb_state_ced1[7:0] : 8'b0 ) | ( pipe_thread_g[2] ? stb_state_ced2[7:0] : 8'b0 ) | ( pipe_thread_g[3] ? stb_state_ced3[7:0] : 8'b0 ); assign stb_wptr_prev[2:0] = (pipe_thread_g[0] ? stb_wrptr0_prev[2:0] : 3'b0) | (pipe_thread_g[1] ? stb_wrptr1_prev[2:0] : 3'b0) | (pipe_thread_g[2] ? stb_wrptr2_prev[2:0] : 3'b0) | (pipe_thread_g[3] ? stb_wrptr3_prev[2:0] : 3'b0); assign stb_not_empty = (pipe_thread_g[0] & ~lsu_stb_empty[0] ) | (pipe_thread_g[1] & ~lsu_stb_empty[1] ) | (pipe_thread_g[2] & ~lsu_stb_empty[2] ) | (pipe_thread_g[3] & ~lsu_stb_empty[3] ) ; assign lsu_stb_empty_buf[3:0] = lsu_stb_empty[3:0] ; assign lsu_spu_stb_empty[3:0] = lsu_stb_empty[3:0] ; //wire ldstdbl_g ; // stdbl should be qualified with quad_asi_g !!! //assign ldstdbl_g = ldst_dbl_g & (ld_inst_vld_g | st_inst_vld_g) & ~ldst_fp_g ; // casa_g and stdbl_g may not be required. //assign ld_rawp_st_ackid_g[2:0] = // (casa_g | ldstdbl_g | stb_cam_mhit | (io_ld & stb_not_empty)) // ? stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ; //=================================================== //casa: need st-st order //st cam mhit: cannot figure out the youngest //io: side effect //remove int ldd and quad ldd, why need ldstdbl? //=================================================== wire [2:0] ld_rawp_st_ackid_g ; assign ld_rawp_st_ackid_g[2:0] = (casa_g | stb_cam_mhit | (io_ld & stb_not_empty))? stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ; dff_s #(3) rawpackid_w2 ( .din (ld_rawp_st_ackid_g[2:0]), .q (ld_rawp_st_ackid_w2[2:0]), .clk (clk), .se (se), .si (), .so () ); assign lsu_ifu_stbcnt0[3:0] = lsu_stbcnt0[3:0] ; assign lsu_ifu_stbcnt1[3:0] = lsu_stbcnt1[3:0] ; assign lsu_ifu_stbcnt2[3:0] = lsu_stbcnt2[3:0] ; assign lsu_ifu_stbcnt3[3:0] = lsu_stbcnt3[3:0] ; assign lsu_ffu_stb_full0 = lsu_stbcnt0[3]; assign lsu_ffu_stb_full1 = lsu_stbcnt1[3]; assign lsu_ffu_stb_full2 = lsu_stbcnt2[3]; assign lsu_ffu_stb_full3 = lsu_stbcnt3[3]; endmodule