source: XOpenSparcT1/trunk/T1-CPU/lsu/lsu_stb_rwctl.v @ 6

Revision 6, 45.3 KB checked in by pntsvt00, 14 years ago (diff)

versione iniziale opensparc

Line 
1// ========== Copyright Header Begin ==========================================
2//
3// OpenSPARC T1 Processor File: lsu_stb_rwctl.v
4// Copyright (c) 2006 Sun Microsystems, Inc.  All Rights Reserved.
5// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES.
6//
7// The above named program is free software; you can redistribute it and/or
8// modify it under the terms of the GNU General Public
9// License version 2 as published by the Free Software Foundation.
10//
11// The above named program is distributed in the hope that it will be
12// useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public
17// License along with this work; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20// ========== Copyright Header End ============================================
21///////////////////////////////////////////////////////////////////
22/*
23//  Description:  Control for Unified STB CAM/DATA of LSU
24*/
25////////////////////////////////////////////////////////////////////////
26// Global header file includes
27////////////////////////////////////////////////////////////////////////
28`include  "sys.h" // system level definition file which contains the
29          // time scale definition
30
31`include "iop.h"
32
33////////////////////////////////////////////////////////////////////////
34// Local header file includes / local defines
35////////////////////////////////////////////////////////////////////////
36
37module lsu_stb_rwctl (/*AUTOARG*/
38   // Outputs
39   so, lsu_stbctl_flush_pipe_w, stb_cam_wr_no_ivld_m, 
40   ld_rawp_st_ced_w2, stb_data_wr_ptr, stb_data_wptr_vld, 
41   stb_data_rd_ptr, stb_data_rptr_vld, stb_wdata_ramd_b75_b64, 
42   stb_cam_cm_tid, stb_ldst_byte_msk, stb_ldst_byte_msk_min, 
43   stb_cam_rw_ptr, stb_cam_wptr_vld, stb_cam_rptr_vld, 
44   lsu_st_sz_bhww_m, lsu_st_sz_dw_m, lsu_st_sz_bhw_m, 
45   lsu_st_sz_wdw_m, lsu_st_sz_b_m, lsu_st_sz_w_m, lsu_st_sz_hw_m, 
46   lsu_st_sz_hww_m, ld_rawp_st_ackid_w2, stb_flush_st_g, 
47   stb_cam_wvld_m, lsu_st_rq_type_m, lsu_stb_data_early_sel_e, 
48   lsu_stb_data_final_sel_m, lsu_ldquad_inst_m, stb_thrd_en_g, 
49   flsh_inst_m, lsu_stb_va_m, lsu_stb_empty_buf, lsu_spu_stb_empty, 
50   ifu_tlu_inst_vld_m_bf1, ifu_tlu_inst_vld_m_bf2, lsu_ifu_stbcnt0, 
51   lsu_ifu_stbcnt1, lsu_ifu_stbcnt2, lsu_ifu_stbcnt3, 
52   lsu_ffu_stb_full0, lsu_ffu_stb_full1, lsu_ffu_stb_full2, 
53   lsu_ffu_stb_full3, 
54   // Inputs
55   rclk, rst_tri_en, si, se, ld_inst_vld_e, ldst_sz_e, st_inst_vld_e, 
56   stb_pcx_rptr0, stb_wrptr0, stb_pcx_rptr1, stb_wrptr1, 
57   stb_pcx_rptr2, stb_wrptr2, stb_pcx_rptr3, stb_wrptr3, 
58   stb_cam_hit_ptr, stb_cam_hit, lsu_ldst_va_m, sta_internal_m, 
59   ifu_tlu_thrid_e, tlu_exu_early_flush_pipe_w, lsu_ttype_vld_m2, 
60   ifu_lsu_flush_w, lsu_defr_trp_taken_g, ifu_lsu_casa_e, 
61   ifu_lsu_ldstub_e, ifu_lsu_swap_e, ifu_lsu_ldst_dbl_e, 
62   stb_state_ced0, stb_state_ced1, stb_state_ced2, stb_state_ced3, 
63   stb_ld_full_raw, stb_ld_partial_raw, stb_wrptr0_prev, 
64   stb_wrptr1_prev, stb_wrptr2_prev, stb_wrptr3_prev, 
65   ifu_lsu_alt_space_e, ifu_lsu_ldst_fp_e, lsu_quad_asi_e, 
66   lsu_st_rmo_m, lsu_bst_in_pipe_m, ffu_lsu_kill_fst_w, 
67   ffu_lsu_blk_st_e, ffu_lsu_blk_st_tid_m, ffu_lsu_blk_st_va_e, 
68   lsu_snap_blk_st_m, tlb_pgnum_b39_g, lsu_stb_empty, 
69   ifu_tlu_flsh_inst_e, stb_cam_mhit, ifu_tlu_inst_vld_m, 
70   lsu_st_pcx_rq_pick, lsu_st_pcx_rq_vld, stb_rdata_ramc_b8t0, 
71   lsu_stbcnt0, lsu_stbcnt1, lsu_stbcnt2, lsu_stbcnt3
72   ) ; 
73
74input     rclk ;     
75//input     grst_l ;   
76//input     arst_l ;   
77   input  rst_tri_en;
78   
79   input  si;
80   input  se;
81   output so;
82   
83
84input     ld_inst_vld_e ;   // load in pipe.
85input [1:0]   ldst_sz_e ;   // size of load.
86input     st_inst_vld_e ;   // store in pipe.
87// Currently bypass flop make request
88//input [3:0]   pcx_rq_for_stb ;  // pcx request rd of dfq - threaded
89//input [2:0]   stb_dfq_rptr0 ;   // dfq rptr for stb0
90input [2:0]   stb_pcx_rptr0 ;   // pcx rptr for stb0
91input [2:0]   stb_wrptr0 ;    // wrt ptr - stb0
92//input [2:0]   stb_dfq_rptr1 ;   // dfq rptr for stb1
93input [2:0]   stb_pcx_rptr1 ;   // pcx rptr for stb1
94input [2:0]   stb_wrptr1 ;    // wrt ptr - stb1
95//input [2:0]   stb_dfq_rptr2 ;   // dfq rptr for stb2
96input [2:0]   stb_pcx_rptr2 ;   // pcx rptr for stb2
97input [2:0]   stb_wrptr2 ;    // wrt ptr - stb2
98//input [2:0]   stb_dfq_rptr3 ;   // dfq rptr for stb3
99input [2:0]   stb_pcx_rptr3 ;   // pcx rptr for stb3
100input [2:0]   stb_wrptr3 ;    // wrt ptr - stb3
101input [2:0]     stb_cam_hit_ptr ; // entry which hit
102input     stb_cam_hit ;   // hit has occurred
103//input [7:0]     stb_state_vld0 ;  // valid bits - stb0
104//input [7:0]     stb_state_vld1 ;  // valid bits - stb1
105//input [7:0]     stb_state_vld2 ;  // valid bits - stb2
106//input [7:0]     stb_state_vld3 ;  // valid bits - stb3
107input [9:0]    lsu_ldst_va_m ;
108input     sta_internal_m ;   // internal stxa
109input [1:0]   ifu_tlu_thrid_e ; // thread-id.
110
111//   output     lsu_stbrwctl_flush_pipe_w ;  // tmp for tso_mon
112   input      tlu_exu_early_flush_pipe_w;
113   input      lsu_ttype_vld_m2;
114   
115   input      ifu_lsu_flush_w;
116   input      lsu_defr_trp_taken_g;
117   output     lsu_stbctl_flush_pipe_w;
118   
119   
120input                   ifu_lsu_casa_e ;        // compare-swap instr
121input                   ifu_lsu_ldstub_e ;      // ldstub
122input                   ifu_lsu_swap_e ;        // swap
123input     ifu_lsu_ldst_dbl_e; // ldst dbl, specifically for stquad.
124//input   [63:0]          lsu_stb_st_data_g ;     // data to be written to stb
125input [7:0]   stb_state_ced0 ;
126input [7:0]   stb_state_ced1 ;
127input [7:0]   stb_state_ced2 ;
128input [7:0]   stb_state_ced3 ;
129input [7:0]   stb_ld_full_raw ;
130input [7:0]   stb_ld_partial_raw ;
131input   [2:0]   stb_wrptr0_prev ;
132input   [2:0]   stb_wrptr1_prev ;
133input   [2:0]     stb_wrptr2_prev ;
134input   [2:0]   stb_wrptr3_prev ;
135input     ifu_lsu_alt_space_e ; // alt_space inst
136input     ifu_lsu_ldst_fp_e ;
137//input     tlb_cam_hit ;   // tlb cam hit - mstage
138input     lsu_quad_asi_e ;  // quad ldst asi
139//input  [3:0]      lsu_st_ack_rq_stb ;
140//input     lsu_dtlb_bypass_e ;
141input   lsu_st_rmo_m ;  // rmo st in m cycle.
142input   lsu_bst_in_pipe_m ;     // 1st helper for bst.
143input           ffu_lsu_kill_fst_w ;    // ecc error on st.
144input           ffu_lsu_blk_st_e ;      // blk st helper signalled by ffu
145input   [1:0]   ffu_lsu_blk_st_tid_m ;  // blk st tid - from ffu_lsu_data
146input   [5:3]   ffu_lsu_blk_st_va_e ;   // bits 5:3 of va from increment
147input           lsu_snap_blk_st_m ;             // snap blk st state
148input           tlb_pgnum_b39_g ;
149
150input   [3:0]   lsu_stb_empty ;         // thread's stb is empty
151input           ifu_tlu_flsh_inst_e;
152input           stb_cam_mhit ;
153input           ifu_tlu_inst_vld_m ;
154//input   [3:0]   lsu_st_pcx_rq_kill_w2 ;
155
156input [3:0]   lsu_st_pcx_rq_pick ; 
157
158input         lsu_st_pcx_rq_vld ;
159
160input   [8:0]   stb_rdata_ramc_b8t0 ;   // scan-only
161
162output          stb_cam_wr_no_ivld_m ;
163
164//output      ld_rawp_st_ced_g ;
165output      ld_rawp_st_ced_w2 ;
166output  [4:0]   stb_data_wr_ptr ; // write ptr - stb data
167output      stb_data_wptr_vld ; // wr vld for stb data
168output  [4:0]   stb_data_rd_ptr ; // rd ptr for stb data
169output      stb_data_rptr_vld ; // rptr vld for stb data
170output  [75:64]    stb_wdata_ramd_b75_b64 ;  // write data for DATA RAM.
171
172// partial or full raw required
173output  [1:0]   stb_cam_cm_tid ;  // cam tid - stb cam
174//output  [7:0]   stb_cam_sqsh_msk ;  // squash spurious hits
175//output      stb_cam_vld ;
176output  [7:0]   stb_ldst_byte_msk ; // byte mask for write/cam
177output  [7:0]   stb_ldst_byte_msk_min ; // byte mask for write/cam for min path
178
179//output  [3:0]   stb_rd_for_pcx_sel ;    // stb's st selected for read for pcx
180output  [4:0]   stb_cam_rw_ptr ;        // rw ptr for shared stb cam port
181output          stb_cam_wptr_vld ;      // wr vld for stb write   
182output          stb_cam_rptr_vld ;      // rd vld for stb write   
183
184
185//output      lsu_stb_pcx_rvld_d1 ; // stb has been read-delayby1cycle
186//output      lsu_stb_dfq_rvld ;  // wr to dfq stb bypass ff
187
188output                  lsu_st_sz_bhww_m ;      // byte or hword or word
189output                  lsu_st_sz_dw_m ;        // double word
190output                  lsu_st_sz_bhw_m ;       // byte or hword
191output                  lsu_st_sz_wdw_m ;       // word or dword
192output                  lsu_st_sz_b_m ;         // byte
193output                  lsu_st_sz_w_m ;         // word
194output                  lsu_st_sz_hw_m ;        // hword
195output                  lsu_st_sz_hww_m ;       // hword or word
196
197//output     ld_stb_full_raw_g ;
198//output     ld_stb_partial_raw_g ;
199//output  [3:0]   ld_stb_full_raw_g ;
200//output  [3:0]   ld_stb_partial_raw_g ;
201
202output  [2:0]   ld_rawp_st_ackid_w2 ;
203
204//output  [2:0]   stb_dfq_rd_id ;   // stb entry being read for current thread for current thread
205
206output  [3:0]     stb_flush_st_g ;  // st is flushed in cycle g
207output  [3:0]     stb_cam_wvld_m ;
208
209output  [2:1]   lsu_st_rq_type_m ;
210
211output  [3:0]   lsu_stb_data_early_sel_e ;// select source of stb data.
212output      lsu_stb_data_final_sel_m ;// select source of stb data.
213
214output      lsu_ldquad_inst_m ; // stquad inst
215//output      lsu_stdbl_inst_m ;  // stdbl inst
216
217//output  [1:0]   lsu_stb_rd_tid ;  // thread for which stb read occurs
218
219output  [3:0]   stb_thrd_en_g ; // thread id for current stb access
220
221   output     flsh_inst_m;
222
223   output [9:3] lsu_stb_va_m;
224
225output  [3:0]   lsu_stb_empty_buf ;
226output  [3:0]   lsu_spu_stb_empty ;
227
228   output     ifu_tlu_inst_vld_m_bf1;
229   output     ifu_tlu_inst_vld_m_bf2;
230
231   input [3:0] lsu_stbcnt0;
232   input [3:0] lsu_stbcnt1;
233   input [3:0] lsu_stbcnt2;
234   input [3:0] lsu_stbcnt3;
235
236   output [3:0] lsu_ifu_stbcnt0;
237   output [3:0] lsu_ifu_stbcnt1;
238   output [3:0] lsu_ifu_stbcnt2;
239   output [3:0] lsu_ifu_stbcnt3;
240
241   output       lsu_ffu_stb_full0;
242   output       lsu_ffu_stb_full1;
243   output       lsu_ffu_stb_full2;
244   output       lsu_ffu_stb_full3;
245 
246/*AUTOWIRE*/
247// Beginning of automatic wires (for undeclared instantiated-module outputs)
248// End of automatics
249// Beginning of automatic wires (for undeclared instantiated-module outputs)
250// End of automatics
251//wire  [4:0] stb_dequeue_ptr ;
252wire  [2:0] stb_wptr_prev ;
253wire  [1:0] st_thrid_m,st_thrid_g ;
254wire  [7:0] ld_any_raw_vld ;
255wire  [7:0] ld_any_raw_vld_d1 ;
256//wire    ld_raw_mhit ;
257wire  [2:0] st_rq_type_m,st_rq_type_g ;
258
259wire  [1:0] ldst_sz_m,ldst_sz_g, pipe_ldst_sz_m ;
260wire    ldst_byte, ldst_hwrd, ldst_word, ldst_dwrd ;
261wire  [7:0] ldst_byte_mask ;
262wire  [2:0] stb_wptr ; 
263wire  [1:0] thrid_m,thrid_g ;
264wire    ld_inst_vld_m, st_inst_vld_m ;
265
266wire    ldst_dbl_m;
267wire    atomic_m ;
268wire    ldstub_m ;
269wire    casa_m, casa_g ;
270wire    swap_m;
271wire    flush_st_g ;
272wire    cam_wptr_vld_g ;
273wire  [2:0] cam_wptr_d1 ;
274
275wire  [2:0] stb_rdptr0,stb_rdptr1 ;
276wire  [2:0] stb_rdptr2,stb_rdptr3 ;
277
278//wire  [3:0] stb_rd_mask ;
279wire  [3:0] stb_select_rptr ;
280wire  [1:0] stb_rd_thrid ;
281//wire    cam_vld_g ;
282wire  [9:0]  ldst_va_m, pipe_ldst_va_m ;
283wire  [3:0]  ldst_va_g ;
284wire  [2:0] cam_wr_ptr ;
285wire  thread0_m, thread1_m, thread2_m, thread3_m ;
286wire  thread0_g, thread1_g, thread2_g, thread3_g ;
287wire  [2:0]   ld_rawp_stb_id ;
288
289//wire  rd_for_dfq_granted ;
290wire  [7:0] stb_state_ced,stb_state_ced_d1 ;
291//wire    stq_wr_en ;
292//wire  [3:0] stq_wr_en_g ;
293//wire  [3:0] stquad_vld ;
294//wire  [2:0] stquad_ptr0,stquad_ptr1,stquad_ptr2,stquad_ptr3 ;
295//wire  [3:0] ld_stq_hit_g ;
296//wire  ldq_hit_g ;
297//wire  [3:0] ldq_hit_g ;
298wire  ldst_fp_m;
299wire  ldstub_e,casa_e,ldst_dbl_e;
300//wire  stb_data_final_sel_e ;
301wire  alt_space_e,alt_space_m ;
302wire  quad_asi_m ;
303//wire  stquad_e, stquad_m ;
304wire  stdbl_e ;
305//wire  dfq_any_rq_for_stb ;
306//wire  [3:0]   stb_rd_for_dfq ;  // read rq for dfq - threaded
307wire    blkst_m,blkst_g ;
308wire    stb_not_empty ;
309
310   wire       clk;
311   assign     clk = rclk;
312
313//   wire       rst_l;
314//   wire       stb_rwctl_rst_l;
315   
316//   dffrl_async rstff(.din (grst_l),
317//                     .q   (stb_rwctl_rst_l),
318//                     .clk (clk), .se(se), .si(), .so(),
319//                     .rst_l (arst_l));
320
321//=========================================================================================
322//  MISC
323//=========================================================================================
324
325// Scan-only flops.
326
327wire    [8:0]   stb_rdata_ramc_b8t0_so ;
328dff_s #(9)  scmscan_ff (
329        .din    (stb_rdata_ramc_b8t0[8:0]),
330        .q      (stb_rdata_ramc_b8t0_so[8:0]),
331        .clk    (clk),
332        .se   (se),       .si (),          .so ()
333        );
334
335//=========================================================================================
336//  INST_VLD_W GENERATION
337//=========================================================================================
338
339wire    flush_w_inst_vld_m ;
340wire    lsu_inst_vld_w ;
341wire    lsu_stbrwctl_flush_pipe_w;
342
343//=======================================
344//instaniate buffers
345//======================================
346
347   wire   ifu_tlu_inst_vld_m_bf0;
348   
349bw_u1_buf_10x UZfix_ifu_tlu_inst_vld_m_bf0 ( .a(ifu_tlu_inst_vld_m), .z(ifu_tlu_inst_vld_m_bf0) );
350bw_u1_buf_30x UZfix_ifu_tlu_inst_vld_m_bf1 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf1) );
351bw_u1_buf_20x UZfix_ifu_tlu_inst_vld_m_bf2 ( .a(ifu_tlu_inst_vld_m_bf0), .z(ifu_tlu_inst_vld_m_bf2) );
352   
353assign  flush_w_inst_vld_m =
354        ifu_tlu_inst_vld_m_bf0 &
355        ~(lsu_stbrwctl_flush_pipe_w & (thrid_m[1:0] == thrid_g[1:0])) ; // really lsu_flush_pipe_w
356
357dff_s  stgw_ivld (
358        .din    (flush_w_inst_vld_m),
359        .q      (lsu_inst_vld_w),
360        .clk    (clk),
361        .se   (se),       .si (),          .so ()
362        );
363
364   wire other_flush_pipe_w;
365   wire tlu_early_flush_pipe_w;
366   assign tlu_early_flush_pipe_w = tlu_exu_early_flush_pipe_w;
367   
368assign  other_flush_pipe_w = 
369tlu_early_flush_pipe_w | (lsu_ttype_vld_m2 & lsu_inst_vld_w) |
370lsu_defr_trp_taken_g ;
371   
372   wire lsu_flush_pipe_w;
373   
374assign  lsu_flush_pipe_w = other_flush_pipe_w | ifu_lsu_flush_w ;
375assign  lsu_stbctl_flush_pipe_w = lsu_flush_pipe_w ;
376assign  lsu_stbrwctl_flush_pipe_w = lsu_flush_pipe_w ;   
377
378//=========================================================================================
379//  STB Array Addr/Ctl Generation
380//=========================================================================================
381
382assign  ldstub_e = ifu_lsu_ldstub_e ;
383assign  casa_e   = ifu_lsu_casa_e ;
384assign  ldst_dbl_e = ifu_lsu_ldst_dbl_e ;
385
386assign  alt_space_e = ifu_lsu_alt_space_e ;
387
388//assign  stdbl_e =  ldst_dbl_e & (~alt_space_e | (alt_space_e & ~lsu_quad_asi_e)) ;
389assign  stdbl_e =  ldst_dbl_e ;
390
391//   wire lsu_stdbl_inst_m;
392   
393//dff  stq_stgm (
394//  .din  (stdbl_e),
395//  .q          (lsu_stdbl_inst_m), 
396//  .clk  (clk),
397//  .se (se), .si (), .so ()
398//  );
399
400// This path can probably be eased.
401assign  lsu_stb_data_early_sel_e[0] = ldstub_e  & ~rst_tri_en;
402assign  lsu_stb_data_early_sel_e[1] = casa_e & ~rst_tri_en;
403assign  lsu_stb_data_early_sel_e[2] = ~(ldstub_e | casa_e |  stdbl_e) | rst_tri_en;
404assign  lsu_stb_data_early_sel_e[3] = stdbl_e & ~rst_tri_en ;
405
406// modify for accepting bst data out of pipe.
407//assign  stb_data_final_sel_e = ~(ldst_fp_e | ffu_lsu_blk_st_e) ;
408
409/*dff  lsel_g (
410  .din  (stb_data_final_sel_e),
411  .q  (lsu_stb_data_final_sel_m),
412  .clk  (clk),
413  .se (se), .si (), .so ()
414  );*/
415
416assign  lsu_stb_data_final_sel_m = ~(ldst_fp_m | blkst_m) ;
417
418wire    real_st_m ;
419wire    flsh_inst_m, flsh_inst_g ;
420// !!! could qualify st_inst_vld_e with stxa_internal !!!
421dff_s #(13) stgm_vld  (
422  .din  ({ld_inst_vld_e,st_inst_vld_e,ldst_sz_e[1:0], 
423    ifu_lsu_swap_e, ifu_lsu_ldstub_e, ifu_lsu_casa_e,ifu_lsu_ldst_dbl_e,
424    ifu_tlu_thrid_e[1:0],ifu_lsu_ldst_fp_e,lsu_quad_asi_e,ifu_tlu_flsh_inst_e}), 
425  .({ld_inst_vld_m,real_st_m,pipe_ldst_sz_m[1:0], 
426    swap_m,ldstub_m,casa_m,ldst_dbl_m,thrid_m[1:0],ldst_fp_m,quad_asi_m,flsh_inst_m}),
427  .clk  (clk), 
428  .se   (se), .si (), .so ()
429  );
430
431assign  st_inst_vld_m = real_st_m | flsh_inst_m ;
432
433// do we need ld/st unflushed ?
434   wire sta_internal_g;
435   
436dff_s #(7) stgw_vld  (
437  .din  ({sta_internal_m,   
438    casa_m, thrid_m[1:0],ldst_sz_m[1:0], flsh_inst_m}), 
439  .q    ({sta_internal_g,   
440    casa_g, thrid_g[1:0],ldst_sz_g[1:0], flsh_inst_g}),
441  .clk  (clk), 
442  .se   (se), .si (), .so ()
443  );
444
445
446// stb-cam will be written by st at rising edge of g-stage.
447// However, st can be flushed after write. To keep, the stb state consistent,
448// The valid and write ptr will not be updated until the rising edge of w2.
449
450wire    early_flush_cond_g,partial_flush_st_g ;
451assign early_flush_cond_g = 
452(sta_internal_g | ~(lsu_inst_vld_w | blkst_g) | ffu_lsu_kill_fst_w) ;
453assign  flush_st_g = (early_flush_cond_g | lsu_stbrwctl_flush_pipe_w) & cam_wptr_vld_g ;
454
455//timing, send to stb_ctl and qualified by stb_cam_wvld_g (thread version of cam_wptr_vld_g)   
456//assign        partial_flush_st_g = early_flush_cond_g & cam_wptr_vld_g ;
457assign  partial_flush_st_g = early_flush_cond_g ; 
458
459assign  atomic_m = (casa_m | ldstub_m | swap_m) & st_inst_vld_m ;
460
461// WRITE PTR VALID GENERATION.
462
463// meant specifically to squash pcx_rq_for_stb.
464assign  stb_cam_wr_no_ivld_m
465  = (st_inst_vld_m | casa_m | ldstub_m | swap_m | blkst_m) ;
466
467//bug3610 - kill cam write vld(==stb data write vld next cycle) to avoid datat read and write same cycle
468//          to the same entry
469wire  b2b_st_detect ;
470
471assign  stb_cam_wptr_vld 
472  = (((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m) & ~(flush_st_g & b2b_st_detect) ;
473  //= ((st_inst_vld_m | atomic_m) & ifu_tlu_inst_vld_m_bf0) | blkst_m ;  // bug3610
474  //= (st_inst_vld_m | atomic_m | (ldst_dbl_m & st_inst_vld_m) | blkst_m) ;
475
476dff_s  wptr_g (
477  .din  (stb_cam_wptr_vld), .(cam_wptr_vld_g),
478  .clk  (clk), 
479  .se   (se), .si (), .so ()
480  );
481
482//flop move into mem cell (roll back) 
483assign  stb_data_wptr_vld = cam_wptr_vld_g ;
484
485// WRITE PTR GENERATION
486
487// It is assumed that if there is a store in the pipe, there is a
488// free entry in the corresponding stb. Otherwise, the pipe would've
489// stalled for the thread.     
490
491// If a store-like inst has been flushed, then the old ptr has to be restored
492// and used.  This is done within thread specific stb control
493
494assign  thread0_m = ~st_thrid_m[1] & ~st_thrid_m[0] ;
495assign  thread1_m = ~st_thrid_m[1] &  st_thrid_m[0] ;
496assign  thread2_m =  st_thrid_m[1] & ~st_thrid_m[0] ;
497assign  thread3_m =  st_thrid_m[1] &  st_thrid_m[0] ;
498
499dff_s #(4) stgg_thrd (
500  .din  ({thread0_m,thread1_m,thread2_m,thread3_m}), 
501  .({thread0_g,thread1_g,thread2_g,thread3_g}), 
502  .clk  (clk), 
503  .se (se), .si (), .so ()
504  );
505
506assign  stb_thrd_en_g[0] = thread0_g ;
507assign  stb_thrd_en_g[1] = thread1_g ;
508assign  stb_thrd_en_g[2] = thread2_g ;
509assign  stb_thrd_en_g[3] = thread3_g ;
510
511//assign  stb_wptr[2:0] =
512//  thread0_m ? stb_wrptr0[2:0] :
513//    thread1_m ? stb_wrptr1[2:0] :
514//      thread2_m ? stb_wrptr2[2:0] :
515//        thread3_m ? stb_wrptr3[2:0] : 3'bxxx ;
516
517assign  stb_wptr[2:0] = 
518  (thread0_m ? stb_wrptr0[2:0] :  3'b000) |
519  (thread1_m ? stb_wrptr1[2:0] :  3'b000) |
520  (thread2_m ? stb_wrptr2[2:0] :  3'b000) |
521  (thread3_m ? stb_wrptr3[2:0] :  3'b000) ;
522   
523assign  b2b_st_detect =   // detect back-to-back store
524  (thread0_m & thread0_g) |
525  (thread1_m & thread1_g) |
526  (thread2_m & thread2_g) |
527  (thread3_m & thread3_g) ;
528
529assign  cam_wr_ptr[2:0] = (flush_st_g & b2b_st_detect) ? cam_wptr_d1[2:0] : stb_wptr[2:0] ;
530
531dff_s #(3)  wptr_d1 (
532  .din  (cam_wr_ptr[2:0]),  .(cam_wptr_d1[2:0]),
533  .clk  (clk), 
534  .se (se), .si (), .so ()
535  );
536
537assign  stb_cam_wvld_m[0] = stb_cam_wptr_vld & thread0_m ;
538assign  stb_cam_wvld_m[1] = stb_cam_wptr_vld & thread1_m ;
539assign  stb_cam_wvld_m[2] = stb_cam_wptr_vld & thread2_m ;
540assign  stb_cam_wvld_m[3] = stb_cam_wptr_vld & thread3_m ;
541
542// contains potential flush conditions.
543assign  stb_flush_st_g[0] = partial_flush_st_g ;
544assign  stb_flush_st_g[1] = partial_flush_st_g ;
545assign  stb_flush_st_g[2] = partial_flush_st_g ;
546assign  stb_flush_st_g[3] = partial_flush_st_g ;
547
548// stb-data has a delayed write in w2. Alignment of stb data will be done on write
549// of 64b into stb. This allows write of stb cam and data to be done in the
550// same cycle, and thus read can occur simultaneously for pcx.
551
552//mem cell change to bw_r_rf32x80, flop move into mem cell (roll back)
553//flop outside mem cell
554assign  stb_data_wr_ptr[4:0] =  {st_thrid_g[1:0],cam_wptr_d1[2:0]};
555   
556// RD PTR/VLD GENERATION
557
558// stb read for dfq dumps data into a bypass flop. Thus a read for the dfq can occur
559// if a thread's stb has an acked entry and the bypass flop is empty.
560// stb read for pcx occurs on availability of queue entry.
561
562// Both dfq and pcx require a read of the cam and data. The reads
563// can thus not happen when load that hits in the stb is in the w2 (change to W3)
564// stage and a store is in the g-stage of the pipe. Both
565// probabilities are low.
566
567// ??Read for pcx takes priority over dfq. No deadlock can occur
568// ??as at some point the pcx reads will be exhausted and the stb
569// ??will have to drain itself. The stb is self-regulating in this regard.
570
571// priority of stb read: ld_cam_hit (full raw bypass) > dfq > pcx
572
573//====================================================================================
574//raw bypass timing
575//G/WB                          W2     W3                      W4
576//cam_hit(from stb_cam output)  flop   stb_data rd_ptr/rd_vld  read STB_DATA/BYP
577//====================================================================================
578
579   wire [1:0] thrid_w2;
580   wire [2:0] stb_cam_hit_ptr_w2;
581   wire       stb_cam_hit_w2;   
582   wire       stb_cam_hit_w;   
583   
584   //bug3503
585   assign stb_cam_hit_w  =  stb_cam_hit & lsu_inst_vld_w & ~lsu_stbrwctl_flush_pipe_w;
586
587dff_s #(6) stb_cam_hit_stg_w2 (
588  .din  ({thrid_g[1:0],  stb_cam_hit_ptr[2:0],    stb_cam_hit_w   }), 
589  .q    ({thrid_w2[1:0], stb_cam_hit_ptr_w2[2:0], stb_cam_hit_w2}),
590  .clk  (clk), 
591  .se   (se), .si (), .so ()
592  );
593   
594// logic moved to qctl1
595// pcx is making request for data in current cycle. Can be multi-hot.
596//assign  pcx_any_rq_for_stb = |pcx_rq_for_stb[3:0] ;
597//assign  pcx_any_rq_for_stb =
598//      (pcx_rq_for_stb[0] & ~lsu_st_pcx_rq_kill_w2[0]) |
599//      (pcx_rq_for_stb[1] & ~lsu_st_pcx_rq_kill_w2[1]) |
600//      (pcx_rq_for_stb[2] & ~lsu_st_pcx_rq_kill_w2[2]) |
601//      (pcx_rq_for_stb[3] & ~lsu_st_pcx_rq_kill_w2[3]) ;
602
603// ??ld-cam hit based read takes precedence
604// ??Timing : This could be made pessimistic by using ld_inst_vld_g
605
606//assign  stb_select_rptr[3:0] =  pcx_rq_for_stb[3:0] ;  // timing fix
607assign  stb_select_rptr[3:0] =  lsu_st_pcx_rq_pick[3:0] ; 
608
609// This could be a critical path. Be careful !
610//assign  stb_rdptr0[2:0] = ~dfq_any_rq_for_stb ? stb_pcx_rptr0[2:0] : stb_dfq_rptr0[2:0] ;
611assign  stb_rdptr0[2:0] = stb_pcx_rptr0[2:0] ;
612assign  stb_rdptr1[2:0] = stb_pcx_rptr1[2:0] ;
613assign  stb_rdptr2[2:0] = stb_pcx_rptr2[2:0] ;
614assign  stb_rdptr3[2:0] = stb_pcx_rptr3[2:0] ;
615
616// logic moved to qctl1
617//wire  [1:0] stb_rd_tid ;
618//
619//assign  stb_rd_tid[0] = pcx_rq_for_stb[1] | pcx_rq_for_stb[3] ;
620//assign  stb_rd_tid[1] = pcx_rq_for_stb[2] | pcx_rq_for_stb[3] ;
621//   
622//dff #(2) stbtid_stgd1 (
623//  .din    (stb_rd_tid[1:0]),  .q  (lsu_stb_rd_tid[1:0]),
624//  .clk    (clk),
625//  .se   (se), .si (), .so ()
626//  );
627
628//assign  stb_dfq_rd_id[2:0] = stb_data_rd_ptr[2:0] ; // or cam rd ptr
629
630//timing fix:5/6/03
631//bug4988 - change the prirority from 0->3 to 3->0; the reason is when select_rptr=0, the
632//          default thread id(rptr[4:3])=thread0 but the default rptr[2:0]=thread3. If
633//          thread0 and thread3 rptr are the same and the thread0 write is occuring, the
634//          rptr[4:0] is same as wptr[4:0]
635wire  [2:0]  stb_rdptr ;
636//assign  stb_rdptr[2:0] =
637//  stb_select_rptr[0] ? stb_rdptr0[2:0] :
638//    stb_select_rptr[1] ? stb_rdptr1[2:0] :
639//      stb_select_rptr[2] ? stb_rdptr2[2:0] :
640//                             stb_rdptr3[2:0] ;
641
642//assign  stb_rdptr[2:0] =
643//  stb_select_rptr[3] ? stb_rdptr3[2:0] :
644//    stb_select_rptr[2] ? stb_rdptr2[2:0] :
645//      stb_select_rptr[1] ? stb_rdptr1[2:0] :
646//                             stb_rdptr0[2:0] ;
647
648assign  stb_rdptr[2:0] = 
649  (stb_select_rptr[3] ? stb_rdptr3[2:0] : 3'b0) |
650  (stb_select_rptr[2] ? stb_rdptr2[2:0] : 3'b0) |
651  (stb_select_rptr[1] ? stb_rdptr1[2:0] : 3'b0) |
652  (stb_select_rptr[0] ? stb_rdptr0[2:0] : 3'b0) ;
653   
654//timing fix: 8/29/03 - remove the default select logic for stb_select_rptr since synthesis is forced to replace
655//            4to1 mux w/ and-or mux or 2to1 mux
656//wire   stb_select_rptr_b3;
657//assign stb_select_rptr_b3 =  ~|stb_select_rptr[2:0];
658
659wire  [2:0]  stb_rdptr_l;
660
661assign stb_rdptr_l[2:0] =  ~stb_rdptr[2:0] ;
662//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b0_mux(
663//                  .z(stb_rdptr_l[0]),
664//                  .d0(stb_rdptr0[0]),
665//                  .d1(stb_rdptr1[0]),
666//                  .d2(stb_rdptr2[0]),
667//                  .d3(stb_rdptr3[0]),
668//                  .s0(stb_select_rptr[0]),
669//                  .s1(stb_select_rptr[1]),
670//                  .s2(stb_select_rptr[2]),
671//                  .s3(stb_select_rptr[3]));
672//   
673//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b1_mux(
674//                  .z(stb_rdptr_l[1]),
675//                  .d0(stb_rdptr0[1]),
676//                  .d1(stb_rdptr1[1]),
677//                  .d2(stb_rdptr2[1]),
678//                  .d3(stb_rdptr3[1]),
679//                  .s0(stb_select_rptr[0]),
680//                  .s1(stb_select_rptr[1]),
681//                  .s2(stb_select_rptr[2]),
682//                  .s3(stb_select_rptr[3]));
683//   
684//bw_u1_muxi41d_2x  UZsize_stb_rdptr_b2_mux(
685//                  .z(stb_rdptr_l[2]),
686//                  .d0(stb_rdptr0[2]),
687//                  .d1(stb_rdptr1[2]),
688//                  .d2(stb_rdptr2[2]),
689//                  .d3(stb_rdptr3[2]),
690//                  .s0(stb_select_rptr[0]),
691//                  .s1(stb_select_rptr[1]),
692//                  .s2(stb_select_rptr[2]),
693//                  .s3(stb_select_rptr[3]));
694//   
695   
696assign  stb_rd_thrid[0] = stb_select_rptr[1] | stb_select_rptr[3] ;
697assign  stb_rd_thrid[1] = stb_select_rptr[2] | stb_select_rptr[3] ;
698
699// read
700// this mux will have to be accommodated in path !!! Talk to Satya.
701// Timing : This could be made pessimistic by using ld_inst_vld_g
702
703// raw read STB at W3 (changed from W2)       
704assign  stb_data_rd_ptr[4:0] = stb_cam_hit_w2 ? 
705        {thrid_w2[1:0],stb_cam_hit_ptr_w2[2:0]} :  // rd based on ld hit
706        {stb_rd_thrid[1:0],~stb_rdptr_l[2:0]} ;       // rd for pcx or dfq
707   
708// Blk-st modification for thread.
709assign  st_thrid_m[1:0] = blkst_m ? ffu_lsu_blk_st_tid_m[1:0] : thrid_m[1:0] ;
710dff_s #(2)  stid_stgg (
711  .din  (st_thrid_m[1:0]), 
712  .q    (st_thrid_g[1:0]),
713  .clk  (clk), 
714  .se (se), .si (), .so ()
715  );
716
717//timing fix: 5/6/03
718//assign  stb_cam_rw_ptr[4:0]  = stb_cam_wptr_vld ?
719//        {st_thrid_m[1:0],cam_wr_ptr[2:0]} :  // write
720//        {stb_rd_thrid[1:0],stb_rdptr[2:0]} ;  // read
721
722wire [2:0] cam_wr_ptr_l;
723wire [1:0] stb_rd_thrid_l;
724wire [1:0] st_thrid_m_l;
725
726assign cam_wr_ptr_l[2:0]  =  ~cam_wr_ptr[2:0];
727assign stb_rd_thrid_l[1:0]  =  ~stb_rd_thrid[1:0];
728assign st_thrid_m_l[1:0]  =  ~st_thrid_m[1:0];
729
730bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b0_mux(
731                  .z(stb_cam_rw_ptr[0]), 
732                  .d0(stb_rdptr_l[0]), 
733                  .d1(cam_wr_ptr_l[0]), 
734                  .s(stb_cam_wptr_vld));
735   
736bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b1_mux(
737                  .z(stb_cam_rw_ptr[1]), 
738                  .d0(stb_rdptr_l[1]), 
739                  .d1(cam_wr_ptr_l[1]), 
740                  .s(stb_cam_wptr_vld));
741   
742bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b2_mux(
743                  .z(stb_cam_rw_ptr[2]), 
744                  .d0(stb_rdptr_l[2]), 
745                  .d1(cam_wr_ptr_l[2]), 
746                  .s(stb_cam_wptr_vld));
747   
748bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b3_mux(
749                  .z(stb_cam_rw_ptr[3]), 
750                  .d0(stb_rd_thrid_l[0]), 
751                  .d1(st_thrid_m_l[0]), 
752                  .s(stb_cam_wptr_vld));
753   
754bw_u1_muxi21_2x  UZsize_stb_cam_rw_ptr_b4_mux(
755                  .z(stb_cam_rw_ptr[4]), 
756                  .d0(stb_rd_thrid_l[1]), 
757                  .d1(st_thrid_m_l[1]), 
758                  .s(stb_cam_wptr_vld));
759   
760
761
762//raw read STB at W3 (not W2)
763//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
764//                     stb_cam_rptr_vld and stb_data_rptr_vld
765
766//bug4988 - qual lsu_st_pcx_rq_vld w/ no write vld to stb_data. use stb_cam_wr_no_ivld_m instead of write vld.
767//          this is the same signal used to kill pcx_rq_for_stb
768//          stb_cam_rptr_vld is not set if stb_cam_wptr_vld=1
769     
770assign  stb_data_rptr_vld = 
771  //(|stb_select_rptr[3:0]) |  // pcx/dfq rd - timing fix
772  //lsu_st_pcx_rq_vld |  // pcx/dfq rd  // bug4988
773   (lsu_st_pcx_rq_vld & ~stb_cam_wr_no_ivld_m) |  // pcx/dfq rd
774    stb_cam_hit_w2 ;         // cam hit requires read whether single or multiple
775
776//raw read STB at W3 (not W2)     
777//timing fix: 9/2/03 - reduce fanout in stb_rwctl for lsu_st_pcx_rq_pick - gen separate signal for
778//                     stb_cam_rptr_vld and stb_data_rptr_vld
779assign  stb_cam_rptr_vld = 
780  //((|stb_select_rptr[3:0]) & ~(stb_cam_hit_w2)) & // only pcx read  - timing fix
781  (lsu_st_pcx_rq_vld & ~(stb_cam_hit_w2)) & // only pcx read
782      ~stb_cam_wptr_vld ;   // st,st-like write does not block
783   
784// lsu_stb_rd_vld_d1 - not used
785//dff  stbrd_stgd1  (
786//  .din    (stb_cam_rptr_vld), .q  (lsu_stb_rd_vld_d1),
787//  .clk    (clk),
788//  .se   (se), .si (), .so ()
789//  );
790
791// logic moved to qctl1
792//dff #(1)  prvld_stgd1 (
793//  .din  (pcx_any_rq_for_stb),
794//  .q  (lsu_stb_pcx_rvld_d1),
795//  .clk  (clk),
796//  .se (se), .si (), .so ()
797//  );
798
799assign  stb_cam_cm_tid[1:0] = thrid_m[1:0] ;
800
801
802//=========================================================================================
803//  BYTE MASK FORMATTING
804//=========================================================================================
805
806
807// Write/CAM Data for CAM RAM.
808// Physical dword aligned addr - PA[39:3] (37b)
809// Byte Mask - (8b)
810// Total - 45b
811
812//  | b7  |  b6 | b5  | b4  | b3  | b2  | b1  | b0  |
813//  |   hw3 |   hw2 |   hw1 |   hw0 |
814//  |     w1    |   w0    |
815//  |       dw        |
816
817
818
819//dff  #(11) va_m (
820//  .din    (exu_lsu_ldst_va_e[10:0]),  .q  (pipe_ldst_va_m[10:0]),
821//  .clk    (clk),
822//  .se   (se), .si (), .so ()
823//  );
824
825assign pipe_ldst_va_m[9:0] = lsu_ldst_va_m[9:0];
826
827// ldst_byte may not be needed
828assign ldst_byte = ~ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 00
829assign ldst_hwrd = ~ldst_sz_m[1] &  ldst_sz_m[0] ;  // 01
830assign ldst_word =  ldst_sz_m[1] & ~ldst_sz_m[0] ;  // 10
831assign ldst_dwrd =  ldst_sz_m[1] &  ldst_sz_m[0] ;  // 11
832
833// Note : dword term is common.
834assign ldst_byte_mask[0]  =
835  ( ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0] )       |
836  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
837  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
838  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
839assign ldst_byte_mask[1]  =
840  ( ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])        |
841  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_word))  |
842  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
843assign ldst_byte_mask[2]  =
844  ( ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])         |
845  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd | ldst_word))  |
846  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
847assign ldst_byte_mask[3]  =
848  ( ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])       |
849  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd))  ; 
850assign ldst_byte_mask[4]  =
851  (~ldst_va_m[2] &  ldst_va_m[1] &  ldst_va_m[0])        |
852  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0] & (ldst_hwrd)) |
853  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word)) ;
854assign ldst_byte_mask[5]  =
855  (~ldst_va_m[2] &  ldst_va_m[1] & ~ldst_va_m[0])         |
856  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] &  (ldst_dwrd | ldst_word))  ;
857assign ldst_byte_mask[6]  =
858  (~ldst_va_m[2] & ~ldst_va_m[1] &  ldst_va_m[0])     |
859  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0] & (ldst_dwrd | ldst_word | ldst_hwrd)) ;
860assign ldst_byte_mask[7]  =
861  (~ldst_va_m[2] & ~ldst_va_m[1] & ~ldst_va_m[0])   ;
862
863assign  stb_ldst_byte_msk[7:0]  = ldst_byte_mask[7:0]; 
864
865   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b0 (.a(ldst_byte_mask[0]), .z(stb_ldst_byte_msk_min[0]));
866   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b1 (.a(ldst_byte_mask[1]), .z(stb_ldst_byte_msk_min[1]));
867   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b2 (.a(ldst_byte_mask[2]), .z(stb_ldst_byte_msk_min[2]));
868   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b3 (.a(ldst_byte_mask[3]), .z(stb_ldst_byte_msk_min[3]));
869   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b4 (.a(ldst_byte_mask[4]), .z(stb_ldst_byte_msk_min[4]));
870   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b5 (.a(ldst_byte_mask[5]), .z(stb_ldst_byte_msk_min[5]));
871   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b6 (.a(ldst_byte_mask[6]), .z(stb_ldst_byte_msk_min[6]));
872   bw_u1_minbuf_5x UZfix_stb_ldst_byte_msk_min_b7 (.a(ldst_byte_mask[7]), .z(stb_ldst_byte_msk_min[7]));
873   
874   
875// Generate selects to format st data
876assign  lsu_st_sz_bhww_m = ldst_byte | ldst_hwrd | ldst_word ;      // byte or hword or word
877assign  lsu_st_sz_dw_m   = ldst_dwrd ;            // double word
878assign  lsu_st_sz_bhw_m  = ldst_byte | ldst_hwrd ;      // byte or hword
879assign  lsu_st_sz_wdw_m  = ldst_word | ldst_dwrd ;      // word or dword
880assign  lsu_st_sz_b_m    = ldst_byte ;            // byte
881assign  lsu_st_sz_w_m    = ldst_word ;            // word
882assign  lsu_st_sz_hw_m   = ldst_hwrd ;            // hword
883assign  lsu_st_sz_hww_m  = ldst_hwrd | ldst_word ;      // hword or word
884
885//=========================================================================================
886//  BLK-ST HANDLING
887//=========================================================================================
888
889wire    blkst_m_tmp ;
890dff_s  stgm_bst (
891  .din (ffu_lsu_blk_st_e),
892  .q   (blkst_m_tmp),
893  .clk (clk),
894  .se   (se),       .si (),          .so ()
895);
896
897assign  blkst_m = blkst_m_tmp & ~(real_st_m  | flsh_inst_m |
898                ld_inst_vld_m) ; // Bug 3444
899
900dff_s  stgg_bst (
901  .din (blkst_m),
902  .q   (blkst_g),
903  .clk (clk),
904  .se   (se),       .si (),          .so ()
905);
906
907wire    snap_blk_st_local_m ;
908assign  snap_blk_st_local_m = lsu_snap_blk_st_m & ifu_tlu_inst_vld_m_bf0 ;
909
910wire    [1:0]   bst_sz_m ;
911wire    [9:0]   bst_va_m ;
912// output to be used in m-stage.
913dffe_s #(9) bst_state_m (
914        .din    ({ldst_sz_m[1:0],ldst_va_m[9:6],ldst_va_m[2:0]}),
915        .q      ({bst_sz_m[1:0],bst_va_m[9:6],bst_va_m[2:0]}),
916        .en     (snap_blk_st_local_m),
917        .clk    (clk),
918        .se   (se),       .si (),          .so ()
919        );
920
921dff_s #(3)  bsva_stgm (
922  .din    (ffu_lsu_blk_st_va_e[5:3]), .q (bst_va_m[5:3]),
923  .clk    (clk), 
924  .se   (se), .si (), .so ()
925  );
926
927//assign        bst_va_m[5:3]   = ffu_lsu_blk_st_va_e[5:3] ;
928
929//assign  ldst_va_m[10] =  pipe_ldst_va_m[10] ;
930assign  ldst_va_m[9:0] = blkst_m ?  bst_va_m[9:0] : pipe_ldst_va_m[9:0] ;
931
932assign  lsu_stb_va_m[9:3] = ldst_va_m[9:3] ;
933
934assign  ldst_sz_m[1:0]  =  blkst_m ? bst_sz_m[1:0] : pipe_ldst_sz_m[1:0] ;
935
936//=========================================================================================
937//  WRITE DATA FOR DATA RAM
938//=========================================================================================
939
940// Write Data for DATA RAM.
941// Data - (64b)
942// (8b parity is generated on read)
943// Rqtype - (3b)
944// Size - (3b).
945// Addr - (3b). Lower 3b of 40b addr.
946// (set index and way available from ctl state.
947// Total - 73b.
948
949// st-quad requires own encoding.
950// assume does not have to be changed for blk-st
951assign  st_rq_type_m[2:0] =
952                casa_m ? 3'b010 :                       // cas pkt 1
953                        (ldstub_m | swap_m) ? 3'b110 :  // ldstub/swap
954                          //(stquad_m)  ? 3'b111 :  // stquad-pkt1
955                                  3'b001 ;        // normal store or partial interrupt rq type
956
957//assign  lsu_st_rq_type_m[2:0] = st_rq_type_m[2:0] ;
958assign  lsu_st_rq_type_m[2:1] = st_rq_type_m[2:1] ;
959
960// Need ASI decode
961/*wire  lsu_stquad_inst_m ;
962assign  lsu_stquad_inst_m = ldst_dbl_m & st_inst_vld_m & quad_asi_m ;
963*/
964
965wire    st_rmo_m,st_rmo_g ;
966assign  st_rmo_m = lsu_st_rmo_m | blkst_m ; // binit and blk rmo stores.
967dff_s #(9)  stgg_etc  (
968  .din    ({ldst_va_m[3:0],st_rq_type_m[2:0],st_rmo_m,lsu_bst_in_pipe_m}), 
969  .q      ({ldst_va_g[3:0],st_rq_type_g[2:0],st_rmo_g,bst_in_pipe_g}),
970  .clk    (clk), 
971  .se   (se), .si (), .so ()
972  );
973
974wire    bst_any_helper ;
975assign  bst_any_helper = blkst_g | bst_in_pipe_g ; // Bug 3934
976
977// Size will have to be changed to 2bits.
978// 7 more bits could be added to data ram to save read of cam in providing dfq pkt !!!
979assign stb_wdata_ramd_b75_b64[75:64]   = 
980  {st_rmo_g,st_rq_type_g[2:0],flsh_inst_g,bst_any_helper,ldst_sz_g[1:0],ldst_va_g[3:0]}; 
981        // Bug3395, 3934
982
983//=========================================================================================
984//  FULL/PARTIAL RAW CALCULATION
985//=========================================================================================
986
987// io load cannot bypass from stb. A stb hit results in an io-ld being treated
988// as a partial-raw. (OR should it be serialized behind any io store ??)
989wire    io_ld,io_ld_w2 ;
990assign  io_ld = tlb_pgnum_b39_g ; // Bug 4362
991
992// full-raw is squashed on multiple hits in stb. Treated like partial raw.
993// Ensure that all ld and ld-like instructions signal ld_inst_vld. We can then
994// remove qualification with ld_inst_vld_g.
995/*assign  ld_stb_full_raw_g =
996        (|stb_ld_full_raw[7:0]) & ~(stb_cam_mhit | ldq_hit_g | io_ld) ;
997assign  ld_stb_full_raw_g[0] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
998          ~(stb_cam_mhit | ldq_hit_g[0] | io_ld) & thread0_g ;
999          //~(ld_raw_mhit | ld_stq_hit_g[0] | io_ld) & thread0_g ;
1000assign  ld_stb_full_raw_g[1] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1001          ~(stb_cam_mhit | ldq_hit_g[1] | io_ld) & thread1_g ;
1002assign  ld_stb_full_raw_g[2] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1003          ~(stb_cam_mhit | ldq_hit_g[2] | io_ld) & thread2_g ;
1004assign  ld_stb_full_raw_g[3] = (|stb_ld_full_raw[7:0]) & ld_inst_vld_g &
1005          ~(stb_cam_mhit | ldq_hit_g[3] | io_ld) & thread3_g ; */
1006// Multiple full raws are also treated like a partial.
1007/*assign  ld_stb_partial_raw_g =
1008        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g | (io_ld & stb_not_empty)) ;
1009assign  ld_stb_partial_raw_g[0] =
1010        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[0] | (io_ld & stb_not_empty))
1011          & ld_inst_vld_g & thread0_g ;
1012assign  ld_stb_partial_raw_g[1] =
1013        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[1] | (io_ld & stb_not_empty))
1014          & ld_inst_vld_g & thread1_g ;
1015assign  ld_stb_partial_raw_g[2] =
1016        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[2] | (io_ld & stb_not_empty))
1017          & ld_inst_vld_g & thread2_g ;
1018assign  ld_stb_partial_raw_g[3] =
1019        ((|stb_ld_partial_raw[7:0]) | stb_cam_mhit | ldq_hit_g[3] | (io_ld & stb_not_empty))
1020          & ld_inst_vld_g & thread3_g; */
1021
1022//=========================================================================================
1023//  STQ HANDLING
1024//=========================================================================================
1025
1026/*      REMOVE STQUAD */
1027
1028//=========================================================================================
1029//      LD QUAD HANDLING
1030//=========================================================================================
1031
1032dff_s  altsp_stgm (
1033  .din    (alt_space_e), .q (alt_space_m),
1034  .clk    (clk), 
1035  .se   (se), .si (), .so ()
1036  );
1037
1038assign  lsu_ldquad_inst_m = ldst_dbl_m & ld_inst_vld_m & quad_asi_m & alt_space_m ; 
1039
1040/*wire  ldquad_inst_g ;
1041dff_s  ldq_stgg (
1042  .din    (lsu_ldquad_inst_m), .q (ldquad_inst_g),
1043  .clk    (clk),
1044  .se   (se), .si (), .so ()
1045  );
1046
1047wire    ldq_stb_cam_hit ;
1048assign  ldq_stb_cam_hit = stb_cam_hit & ldquad_inst_g ;
1049// Terms can be made common.
1050assign  ldq_hit_g = ldq_stb_cam_hit ; */
1051/*assign  ldq_hit_g[0] = thread0_g & ldq_stb_cam_hit ;
1052assign  ldq_hit_g[1] = thread1_g & ldq_stb_cam_hit ;
1053assign  ldq_hit_g[2] = thread2_g & ldq_stb_cam_hit ;
1054assign  ldq_hit_g[3] = thread3_g & ldq_stb_cam_hit ; */
1055
1056//=========================================================================================
1057//  STB MULTIPLE HIT GENERATION
1058//=========================================================================================
1059
1060// Multiple hits in stb is to be treated as a partial raw case. The ld however must wait
1061// until the youngest store which hit exits the stb. A ptr needs to be calculated for this case.
1062// A version of stb_wptr is used instead because it is easily available. (Would this have
1063// any significant performance impact ? - No)
1064
1065assign  ld_any_raw_vld[7:0] = stb_ld_full_raw[7:0] | stb_ld_partial_raw[7:0] ;
1066
1067dff_s #(16)  stgw2_rvld (
1068        .din    ({ld_any_raw_vld[7:0],stb_state_ced[7:0]}),
1069        .q      ({ld_any_raw_vld_d1[7:0],stb_state_ced_d1[7:0]}),
1070        .clk    (clk),
1071        .se     (se),       .si (),          .so ()
1072        );
1073
1074
1075// This equation can be optimized for the grape flow.
1076// This can be obtained from stb.
1077/*assign  ld_raw_mhit =
1078  (ld_any_raw_vld[7] & |(ld_any_raw_vld[6:0])) |
1079  (ld_any_raw_vld[6] & |(ld_any_raw_vld[5:0])) |
1080  (ld_any_raw_vld[5] & |(ld_any_raw_vld[4:0])) |
1081  (ld_any_raw_vld[4] & |(ld_any_raw_vld[3:0])) |
1082  (ld_any_raw_vld[3] & |(ld_any_raw_vld[2:0])) |
1083  (ld_any_raw_vld[2] & |(ld_any_raw_vld[1:0])) |
1084  (ld_any_raw_vld[1] &   ld_any_raw_vld[0]) ; */
1085
1086//=========================================================================================
1087//  STB Partial Raw ptr generation
1088//=========================================================================================
1089
1090// The loading on the raw output of the stb cam will be significant if the signal
1091// has to fan out to all 4 ctl blocks. That's why the control has to be localized.
1092
1093// Using the ack bit may result in pessimistic issue of partial raw loads.
1094// For a single partial raw or multiple hit case, detecting whether there is any
1095// unacked store is sufficient. Calculation is for no unacked store.
1096// Can we use cam_hit ptr instead !!!
1097
1098//assign  ld_rawp_st_ced_w2 = (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
1099wire [2:0] wptr_prev ;
1100assign  wptr_prev[2:0] = stb_wptr_prev[2:0] ;
1101wire [7:0] wptr_dcd ; // Bug 4294
1102assign  wptr_dcd[0] = ~wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
1103assign  wptr_dcd[1] = ~wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
1104assign  wptr_dcd[2] = ~wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
1105assign  wptr_dcd[3] = ~wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
1106assign  wptr_dcd[4] =  wptr_prev[2] & ~wptr_prev[1] & ~wptr_prev[0] ;
1107assign  wptr_dcd[5] =  wptr_prev[2] & ~wptr_prev[1] &  wptr_prev[0] ;
1108assign  wptr_dcd[6] =  wptr_prev[2] &  wptr_prev[1] & ~wptr_prev[0] ;
1109assign  wptr_dcd[7] =  wptr_prev[2] &  wptr_prev[1] &  wptr_prev[0] ;
1110
1111wire iold_st_ced_g,iold_st_ced_w2 ;
1112assign  iold_st_ced_g = |(wptr_dcd[7:0] & stb_state_ced[7:0]) ;
1113
1114dff_s #(2)   ioldced_stgw2  (
1115  .din  ({iold_st_ced_g,io_ld}), 
1116  .q    ({iold_st_ced_w2,io_ld_w2}),
1117  .clk  (clk), 
1118  .se   (se), .si (), .so ()
1119  );
1120
1121assign  ld_rawp_st_ced_w2 = 
1122        io_ld_w2 ? iold_st_ced_w2 :
1123        (~(|(ld_any_raw_vld_d1[7:0] & ~stb_state_ced_d1[7:0]))) ;
1124
1125// For the case of a single partial raw.
1126assign  ld_rawp_stb_id[0] = stb_cam_hit_ptr[0] ;
1127assign  ld_rawp_stb_id[1] = stb_cam_hit_ptr[1] ;
1128assign  ld_rawp_stb_id[2] = stb_cam_hit_ptr[2] ;
1129/*assign  ld_rawp_stb_id[0] = stb_ld_partial_raw[1] | stb_ld_partial_raw[3] |
1130        stb_ld_partial_raw[5] | stb_ld_partial_raw[7] ;
1131assign  ld_rawp_stb_id[1] = stb_ld_partial_raw[2] | stb_ld_partial_raw[3] |
1132        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ;
1133assign  ld_rawp_stb_id[2] = stb_ld_partial_raw[4] | stb_ld_partial_raw[5] |
1134        stb_ld_partial_raw[6] | stb_ld_partial_raw[7] ; */
1135
1136   wire [3:0] pipe_thread_g;
1137   assign     pipe_thread_g[0] = ~thrid_g[1] & ~thrid_g[0];
1138   assign     pipe_thread_g[1] = ~thrid_g[1] &  thrid_g[0];
1139   assign     pipe_thread_g[2] =  thrid_g[1] & ~thrid_g[0];
1140   assign     pipe_thread_g[3] =  thrid_g[1] &  thrid_g[0];
1141 
1142assign  stb_state_ced[7:0] = 
1143( pipe_thread_g[0] ? stb_state_ced0[7:0] : 8'b0 ) |
1144( pipe_thread_g[1] ? stb_state_ced1[7:0] : 8'b0 ) |
1145( pipe_thread_g[2] ? stb_state_ced2[7:0] : 8'b0 ) |
1146( pipe_thread_g[3] ? stb_state_ced3[7:0] : 8'b0 );
1147
1148assign  stb_wptr_prev[2:0] = 
1149  (pipe_thread_g[0] ? stb_wrptr0_prev[2:0] : 3'b0) |
1150  (pipe_thread_g[1] ? stb_wrptr1_prev[2:0] : 3'b0) |
1151  (pipe_thread_g[2] ? stb_wrptr2_prev[2:0] : 3'b0) |
1152  (pipe_thread_g[3] ? stb_wrptr3_prev[2:0] : 3'b0);
1153
1154assign  stb_not_empty  =
1155  (pipe_thread_g[0]  & ~lsu_stb_empty[0] ) | 
1156  (pipe_thread_g[1]  & ~lsu_stb_empty[1] ) |
1157  (pipe_thread_g[2]  & ~lsu_stb_empty[2] ) |
1158  (pipe_thread_g[3]  & ~lsu_stb_empty[3] ) ;
1159
1160assign  lsu_stb_empty_buf[3:0] = lsu_stb_empty[3:0] ;
1161assign  lsu_spu_stb_empty[3:0] = lsu_stb_empty[3:0] ;
1162
1163//wire ldstdbl_g ;
1164// stdbl should be qualified with quad_asi_g !!!
1165//assign  ldstdbl_g = ldst_dbl_g & (ld_inst_vld_g | st_inst_vld_g) & ~ldst_fp_g ;
1166
1167// casa_g and stdbl_g may not be required.
1168//assign  ld_rawp_st_ackid_g[2:0] =
1169//  (casa_g | ldstdbl_g | stb_cam_mhit | (io_ld & stb_not_empty))
1170//  ? stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
1171
1172//===================================================
1173//casa: need st-st order
1174//st cam mhit: cannot figure out the youngest
1175//io: side effect
1176//remove int ldd and quad ldd, why need ldstdbl?
1177//===================================================
1178wire    [2:0]   ld_rawp_st_ackid_g ;
1179
1180assign  ld_rawp_st_ackid_g[2:0] = 
1181  (casa_g | stb_cam_mhit | (io_ld & stb_not_empty))?
1182   stb_wptr_prev[2:0] : ld_rawp_stb_id[2:0] ;
1183   
1184dff_s #(3)  rawpackid_w2 (
1185  .din  (ld_rawp_st_ackid_g[2:0]), 
1186  .q    (ld_rawp_st_ackid_w2[2:0]),
1187  .clk  (clk), 
1188  .se   (se), .si (), .so ()
1189  );
1190
1191
1192   assign lsu_ifu_stbcnt0[3:0] = lsu_stbcnt0[3:0] ;
1193   assign lsu_ifu_stbcnt1[3:0] = lsu_stbcnt1[3:0] ;
1194   assign lsu_ifu_stbcnt2[3:0] = lsu_stbcnt2[3:0] ;
1195   assign lsu_ifu_stbcnt3[3:0] = lsu_stbcnt3[3:0] ;
1196
1197   assign lsu_ffu_stb_full0 =    lsu_stbcnt0[3];
1198   assign lsu_ffu_stb_full1 =    lsu_stbcnt1[3];
1199   assign lsu_ffu_stb_full2 =    lsu_stbcnt2[3];
1200   assign lsu_ffu_stb_full3 =    lsu_stbcnt3[3];
1201   
1202endmodule
1203
Note: See TracBrowser for help on using the repository browser.