Context Navigation

ddr2_phy_calib.v @ 10

Revision 10, 108.1 KB checked in by pntsvt00, 14 years ago (diff)
versione sintetizzabile

Rev	Line
[10]	1	//*****************************************************************************
	2	// (c) Copyright 2006-2009 Xilinx, Inc. All rights reserved.
	3	//
	4	// This file contains confidential and proprietary information
	5	// of Xilinx, Inc. and is protected under U.S. and
	6	// international copyright and other intellectual property
	7	// laws.
	8	//
	9	// DISCLAIMER
	10	// This disclaimer is not a license and does not grant any
	11	// rights to the materials distributed herewith. Except as
	12	// otherwise provided in a valid license issued to you by
	13	// Xilinx, and to the maximum extent permitted by applicable
	14	// law: (1) THESE MATERIALS ARE MADE AVAILABLE "AS IS" AND
	15	// WITH ALL FAULTS, AND XILINX HEREBY DISCLAIMS ALL WARRANTIES
	16	// AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, INCLUDING
	17	// BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NON-
	18	// INFRINGEMENT, OR FITNESS FOR ANY PARTICULAR PURPOSE; and
	19	// (2) Xilinx shall not be liable (whether in contract or tort,
	20	// including negligence, or under any other theory of
	21	// liability) for any loss or damage of any kind or nature
	22	// related to, arising under or in connection with these
	23	// materials, including for any direct, or any indirect,
	24	// special, incidental, or consequential loss or damage
	25	// (including loss of data, profits, goodwill, or any type of
	26	// loss or damage suffered as a result of any action brought
	27	// by a third party) even if such damage or loss was
	28	// reasonably foreseeable or Xilinx had been advised of the
	29	// possibility of the same.
	30	//
	31	// CRITICAL APPLICATIONS
	32	// Xilinx products are not designed or intended to be fail-
	33	// safe, or for use in any application requiring fail-safe
	34	// performance, such as life-support or safety devices or
	35	// systems, Class III medical devices, nuclear facilities,
	36	// applications related to the deployment of airbags, or any
	37	// other applications that could lead to death, personal
	38	// injury, or severe property or environmental damage
	39	// (individually and collectively, "Critical
	40	// Applications"). Customer assumes the sole risk and
	41	// liability of any use of Xilinx products in Critical
	42	// Applications, subject only to applicable laws and
	43	// regulations governing limitations on product liability.
	44	//
	45	// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS
	46	// PART OF THIS FILE AT ALL TIMES.
	47	//*****************************************************************************
	48	// ____ ____
	49	// / /\/ /
	50	// /___/ \ / Vendor: Xilinx
	51	// \ \ \/ Version: 3.6
	52	// \ \ Application: MIG
	53	// / / Filename: ddr2_phy_calib.v
	54	// /___/ /\ Date Last Modified: $Date: 2010/06/29 12:03:43 $
	55	// \ \ / \ Date Created: Thu Aug 10 2006
	56	// \___\/\___\
	57	//
	58	//Device: Virtex-5
	59	//Design Name: DDR2
	60	//Purpose:
	61	// This module handles calibration after memory initialization.
	62	//Reference:
	63	//Revision History:
	64	// Rev 1.1 - Default statement is added for the CASE statement of
	65	// rdd_mux_sel logic. PK. 03/23/09
	66	// Rev 1.2 - Change training pattern detected for stage 3 calibration.
	67	// Use 2-bits per DQS group for stage 3 pattern detection.
	68	// RC. 09/21/09
	69	//*****************************************************************************
	70
	71	`timescale 1ns/1ps
	72
	73	module ddr2_phy_calib #
	74	(
	75	// Following parameters are for 72-bit RDIMM design (for ML561 Reference
	76	// board design). Actual values may be different. Actual parameters values
	77	// are passed from design top module dram module. Please refer to
	78	// the dram module for actual values.
	79	parameter DQ_WIDTH = 72,
	80	parameter DQ_BITS = 7,
	81	parameter DQ_PER_DQS = 8,
	82	parameter DQS_BITS = 4,
	83	parameter DQS_WIDTH = 9,
	84	parameter ADDITIVE_LAT = 0,
	85	parameter CAS_LAT = 5,
	86	parameter REG_ENABLE = 1,
	87	parameter CLK_PERIOD = 3000,
	88	parameter SIM_ONLY = 0,
	89	parameter DEBUG_EN = 0
	90	)
	91	(
	92	input clk,
	93	input clkdiv,
	94	input rstdiv,
	95	input [3:0] calib_start,
	96	input ctrl_rden,
	97	input phy_init_rden,
	98	input [DQ_WIDTH-1:0] rd_data_rise,
	99	input [DQ_WIDTH-1:0] rd_data_fall,
	100	input calib_ref_done,
	101	output reg [3:0] calib_done,
	102	output reg calib_ref_req,
	103	output [DQS_WIDTH-1:0] calib_rden,
	104	output reg [DQS_WIDTH-1:0] calib_rden_sel,
	105	output reg dlyrst_dq,
	106	output reg [DQ_WIDTH-1:0] dlyce_dq,
	107	output reg [DQ_WIDTH-1:0] dlyinc_dq,
	108	output reg dlyrst_dqs,
	109	output reg [DQS_WIDTH-1:0] dlyce_dqs,
	110	output reg [DQS_WIDTH-1:0] dlyinc_dqs,
	111	output reg [DQS_WIDTH-1:0] dlyrst_gate,
	112	output reg [DQS_WIDTH-1:0] dlyce_gate,
	113	output reg [DQS_WIDTH-1:0] dlyinc_gate,
	114	//(* XIL_PAR_NO_REG_ORDER = "TRUE", XIL_PAR_PATH="Q->u_iodelay_dq_ce.DATAIN", syn_keep = "1", keep = "TRUE"*)
	115	output [DQS_WIDTH-1:0] en_dqs,
	116	output [DQS_WIDTH-1:0] rd_data_sel,
	117	// Debug signals (optional use)
	118	input dbg_idel_up_all,
	119	input dbg_idel_down_all,
	120	input dbg_idel_up_dq,
	121	input dbg_idel_down_dq,
	122	input dbg_idel_up_dqs,
	123	input dbg_idel_down_dqs,
	124	input dbg_idel_up_gate,
	125	input dbg_idel_down_gate,
	126	input [DQ_BITS-1:0] dbg_sel_idel_dq,
	127	input dbg_sel_all_idel_dq,
	128	input [DQS_BITS:0] dbg_sel_idel_dqs,
	129	input dbg_sel_all_idel_dqs,
	130	input [DQS_BITS:0] dbg_sel_idel_gate,
	131	input dbg_sel_all_idel_gate,
	132	output [3:0] dbg_calib_done,
	133	output [3:0] dbg_calib_err,
	134	output [(6*DQ_WIDTH)-1:0] dbg_calib_dq_tap_cnt,
	135	output [(6*DQS_WIDTH)-1:0] dbg_calib_dqs_tap_cnt,
	136	output [(6*DQS_WIDTH)-1:0] dbg_calib_gate_tap_cnt,
	137	output [DQS_WIDTH-1:0] dbg_calib_rd_data_sel,
	138	output [(5*DQS_WIDTH)-1:0] dbg_calib_rden_dly,
	139	output [(5*DQS_WIDTH)-1:0] dbg_calib_gate_dly
	140	);
	141
	142	// minimum time (in IDELAY taps) for which capture data must be stable for
	143	// algorithm to consider
	144	localparam MIN_WIN_SIZE = 5;
	145	// IDEL_SET_VAL = (# of cycles - 1) to wait after changing IDELAY value
	146	// we only have to wait enough for input with new IDELAY value to
	147	// propagate through pipeline stages.
	148	localparam IDEL_SET_VAL = 3'b111;
	149	// # of clock cycles to delay read enable to determine if read data pattern
	150	// is correct for stage 3/4 (RDEN, DQS gate) calibration
	151	localparam CALIB_RDEN_PIPE_LEN = 31;
	152	// translate CAS latency into number of clock cycles for read valid delay
	153	// determination. Really only needed for CL = 2.5 (set to 2)
	154	localparam CAS_LAT_RDEN = (CAS_LAT == 25) ? 2 : CAS_LAT;
	155	// an SRL32 is used to delay CTRL_RDEN to generate read valid signal. This
	156	// is min possible value delay through SRL32 can be
	157	localparam RDEN_BASE_DELAY = CAS_LAT_RDEN + ADDITIVE_LAT + REG_ENABLE;
	158	// an SRL32 is used to delay the CTRL_RDEN from the read postamble DQS
	159	// gate. This is min possible value the SRL32 delay can be:
	160	// - Delay from end of deassertion of CTRL_RDEN to last falling edge of
	161	// read burst = 3.5 (CTRL_RDEN -> CAS delay) + 3 (min CAS latency) = 6.5
	162	// - Minimum time for DQS gate circuit to be generated:
	163	// * 1 cyc to register CTRL_RDEN from controller
	164	// * 1 cyc after RDEN_CTRL falling edge
	165	// * 1 cyc min through SRL32
	166	// * 1 cyc through SRL32 output flop
	167	// * 0 (<1) cyc of synchronization to DQS domain via IDELAY
	168	// * 1 cyc of delay through IDDR to generate CE to DQ IDDR's
	169	// Total = 5 cyc < 6.5 cycles
	170	// The total should be less than 5.5 cycles to account prop delays
	171	// adding one cycle to the synchronization time via the IDELAY.
	172	// NOTE: Value differs because of optional pipeline register added
	173	// for case of RDEN_BASE_DELAY > 3 to improve timing
	174	localparam GATE_BASE_DELAY = RDEN_BASE_DELAY - 3;
	175	localparam GATE_BASE_INIT = (GATE_BASE_DELAY <= 1) ? 0 : GATE_BASE_DELAY;
	176	// used for RDEN calibration: difference between shift value used during
	177	// calibration, and shift value for actual RDEN SRL. Only applies when
	178	// RDEN edge is immediately captured by CLKDIV0. If not (depends on phase
	179	// of CLK0 and CLKDIV0 when RDEN is asserted), then add 1 to this value.
	180	localparam CAL3_RDEN_SRL_DLY_DELTA = 6;
	181	// fix minimum value of DQS to be 1 to handle the case where's there's only
	182	// one DQS group. We could also enforce that user always inputs minimum
	183	// value of 1 for DQS_BITS (even when DQS_WIDTH=1). Leave this as safeguard
	184	// Assume we don't have to do this for DQ, DQ_WIDTH always > 1
	185	localparam DQS_BITS_FIX = (DQS_BITS == 0) ? 1 : DQS_BITS;
	186	// how many taps to "pre-delay" DQ before stg 1 calibration - not needed for
	187	// current calibration, but leave for debug
	188	localparam DQ_IDEL_INIT = 6'b000000;
	189	// # IDELAY taps per bit time (i.e. half cycle). Limit to 63.
	190	localparam integer BIT_TIME_TAPS = (CLK_PERIOD/150 < 64) ?
	191	CLK_PERIOD/150 : 63;
	192
	193	// used in various places during stage 4 cal: (1) determines maximum taps
	194	// to increment when finding right edge, (2) amount to decrement after
	195	// finding left edge, (3) amount to increment after finding right edge
	196	localparam CAL4_IDEL_BIT_VAL = (BIT_TIME_TAPS >= 6'b100000) ?
	197	6'b100000 : BIT_TIME_TAPS;
	198
	199	localparam CAL1_IDLE = 4'h0;
	200	localparam CAL1_INIT = 4'h1;
	201	localparam CAL1_INC_IDEL = 4'h2;
	202	localparam CAL1_FIND_FIRST_EDGE = 4'h3;
	203	localparam CAL1_FIRST_EDGE_IDEL_WAIT = 4'h4;
	204	localparam CAL1_FOUND_FIRST_EDGE_WAIT = 4'h5;
	205	localparam CAL1_FIND_SECOND_EDGE = 4'h6;
	206	localparam CAL1_SECOND_EDGE_IDEL_WAIT = 4'h7;
	207	localparam CAL1_CALC_IDEL = 4'h8;
	208	localparam CAL1_DEC_IDEL = 4'h9;
	209	localparam CAL1_DONE = 4'hA;
	210
	211	localparam CAL2_IDLE = 4'h0;
	212	localparam CAL2_INIT = 4'h1;
	213	localparam CAL2_INIT_IDEL_WAIT = 4'h2;
	214	localparam CAL2_FIND_EDGE_POS = 4'h3;
	215	localparam CAL2_FIND_EDGE_IDEL_WAIT_POS = 4'h4;
	216	localparam CAL2_FIND_EDGE_NEG = 4'h5;
	217	localparam CAL2_FIND_EDGE_IDEL_WAIT_NEG = 4'h6;
	218	localparam CAL2_DEC_IDEL = 4'h7;
	219	localparam CAL2_DONE = 4'h8;
	220
	221	localparam CAL3_IDLE = 3'h0;
	222	localparam CAL3_INIT = 3'h1;
	223	localparam CAL3_DETECT = 3'h2;
	224	localparam CAL3_RDEN_PIPE_CLR_WAIT = 3'h3;
	225	localparam CAL3_DONE = 3'h4;
	226
	227	localparam CAL4_IDLE = 3'h0;
	228	localparam CAL4_INIT = 3'h1;
	229	localparam CAL4_FIND_WINDOW = 3'h2;
	230	localparam CAL4_FIND_EDGE = 3'h3;
	231	localparam CAL4_IDEL_WAIT = 3'h4;
	232	localparam CAL4_RDEN_PIPE_CLR_WAIT = 3'h5;
	233	localparam CAL4_ADJ_IDEL = 3'h6;
	234	localparam CAL4_DONE = 3'h7;
	235
	236	integer i, j;
	237
	238	reg [5:0] cal1_bit_time_tap_cnt;
	239	reg [1:0] cal1_data_chk_last;
	240	reg cal1_data_chk_last_valid;
	241	reg [1:0] cal1_data_chk_r;
	242	reg cal1_dlyce_dq;
	243	reg cal1_dlyinc_dq;
	244	reg cal1_dqs_dq_init_phase;
	245	reg cal1_detect_edge;
	246	reg cal1_detect_stable;
	247	reg cal1_found_second_edge;
	248	reg cal1_found_rising;
	249	reg cal1_found_window;
	250	reg cal1_first_edge_done;
	251	reg [5:0] cal1_first_edge_tap_cnt;
	252	reg [6:0] cal1_idel_dec_cnt;
	253	reg [5:0] cal1_idel_inc_cnt;
	254	reg [5:0] cal1_idel_max_tap;
	255	reg cal1_idel_max_tap_we;
	256	reg [5:0] cal1_idel_tap_cnt;
	257	reg cal1_idel_tap_limit_hit;
	258	reg [6:0] cal1_low_freq_idel_dec;
	259	reg cal1_ref_req;
	260	wire cal1_refresh;
	261	reg [3:0] cal1_state;
	262	reg [3:0] cal1_window_cnt;
	263	reg cal2_curr_sel;
	264	wire cal2_detect_edge;
	265	reg cal2_dlyce_dqs;
	266	reg cal2_dlyinc_dqs;
	267	reg [5:0] cal2_idel_dec_cnt;
	268	reg [5:0] cal2_idel_tap_cnt;
	269	reg [5:0] cal2_idel_tap_limit;
	270	reg cal2_idel_tap_limit_hit;
	271	reg cal2_rd_data_fall_last_neg;
	272	reg cal2_rd_data_fall_last_pos;
	273	reg cal2_rd_data_last_valid_neg;
	274	reg cal2_rd_data_last_valid_pos;
	275	reg cal2_rd_data_rise_last_neg;
	276	reg cal2_rd_data_rise_last_pos;
	277	reg [DQS_WIDTH-1:0] cal2_rd_data_sel;
	278	wire cal2_rd_data_sel_edge;
	279	reg [DQS_WIDTH-1:0] cal2_rd_data_sel_r;
	280	reg cal2_ref_req;
	281	reg [3:0] cal2_state;
	282	reg cal3_data_match;
	283	reg cal3_data_match_stgd;
	284	wire cal3_data_valid;
	285	wire cal3_match_found;
	286	wire [4:0] cal3_rden_dly;
	287	reg [4:0] cal3_rden_srl_a;
	288	reg [2:0] cal3_state;
	289	wire cal4_data_good;
	290	reg cal4_data_match;
	291	reg cal4_data_match_stgd;
	292	wire cal4_data_valid;
	293	reg cal4_dlyce_gate;
	294	reg cal4_dlyinc_gate;
	295	reg cal4_dlyrst_gate;
	296	reg [4:0] cal4_gate_srl_a;
	297	reg [5:0] cal4_idel_adj_cnt;
	298	reg cal4_idel_adj_inc;
	299	reg cal4_idel_bit_tap;
	300	reg [5:0] cal4_idel_tap_cnt;
	301	reg cal4_idel_max_tap;
	302	reg [4:0] cal4_rden_srl_a;
	303	reg cal4_ref_req;
	304	reg cal4_seek_left;
	305	reg cal4_stable_window;
	306	reg [2:0] cal4_state;
	307	reg [3:0] cal4_window_cnt;
	308	reg [3:0] calib_done_tmp; // only for stg1/2/4
	309	reg calib_ctrl_gate_pulse_r;
	310	reg calib_ctrl_rden;
	311	reg calib_ctrl_rden_r;
	312	wire calib_ctrl_rden_negedge;
	313	reg calib_ctrl_rden_negedge_r;
	314	reg [3:0] calib_done_r;
	315	reg [3:0] calib_err;
	316	reg [1:0] calib_err_2;
	317	wire calib_init_gate_pulse;
	318	reg calib_init_gate_pulse_r;
	319	reg calib_init_gate_pulse_r1;
	320	reg calib_init_rden;
	321	reg calib_init_rden_r;
	322	reg [4:0] calib_rden_srl_a;
	323	wire [4:0] calib_rden_srl_a_r;
	324	reg [(5*DQS_WIDTH)-1:0] calib_rden_dly;
	325	reg calib_rden_edge_r;
	326	reg [4:0] calib_rden_pipe_cnt;
	327	wire calib_rden_srl_out;
	328	wire calib_rden_srl_out_r;
	329	reg calib_rden_srl_out_r1;
	330	reg calib_rden_valid;
	331	reg calib_rden_valid_stgd;
	332	reg [DQ_BITS-1:0] count_dq;
	333	reg [DQS_BITS_FIX-1:0] count_dqs;
	334	reg [DQS_BITS_FIX-1:0] count_gate;
	335	reg [DQS_BITS_FIX-1:0] count_rden;
	336	reg ctrl_rden_r;
	337	wire dlyce_or;
	338	reg [(5*DQS_WIDTH)-1:0] gate_dly;
	339	wire [(5*DQS_WIDTH)-1:0] gate_dly_r;
	340	wire gate_srl_in;
	341	wire [DQS_WIDTH-1:0] gate_srl_out;
	342	wire [DQS_WIDTH-1:0] gate_srl_out_r;
	343	reg [2:0] idel_set_cnt;
	344	wire idel_set_wait;
	345	reg [DQ_BITS-1:0] next_count_dq;
	346	reg [DQS_BITS_FIX-1:0] next_count_dqs;
	347	reg [DQS_BITS_FIX-1:0] next_count_gate;
	348	reg phy_init_rden_r;
	349	reg phy_init_rden_r1;
	350	reg [DQS_WIDTH-1:0] rd_data_fall_1x_bit1_r1;
	351	reg [DQ_WIDTH-1:0] rd_data_fall_1x_r;
	352	reg [DQS_WIDTH-1:0] rd_data_fall_1x_r1;
	353	reg [DQS_WIDTH-1:0] rd_data_fall_2x_bit1_r;
	354	reg [DQS_WIDTH-1:0] rd_data_fall_2x_r;
	355	wire [DQS_WIDTH-1:0] rd_data_fall_chk_q1;
	356	wire [DQS_WIDTH-1:0] rd_data_fall_chk_q1_bit1;
	357	wire [DQS_WIDTH-1:0] rd_data_fall_chk_q2;
	358	wire [DQS_WIDTH-1:0] rd_data_fall_chk_q2_bit1;
	359	reg [DQS_WIDTH-1:0] rd_data_rise_1x_bit1_r1;
	360	reg [DQ_WIDTH-1:0] rd_data_rise_1x_r;
	361	reg [DQS_WIDTH-1:0] rd_data_rise_1x_r1;
	362	reg [DQS_WIDTH-1:0] rd_data_rise_2x_bit1_r;
	363	reg [DQS_WIDTH-1:0] rd_data_rise_2x_r;
	364	wire [DQS_WIDTH-1:0] rd_data_rise_chk_q1;
	365	wire [DQS_WIDTH-1:0] rd_data_rise_chk_q1_bit1;
	366	wire [DQS_WIDTH-1:0] rd_data_rise_chk_q2;
	367	wire [DQS_WIDTH-1:0] rd_data_rise_chk_q2_bit1;
	368	reg rdd_fall_q1;
	369	reg rdd_fall_q1_bit1;
	370	reg rdd_fall_q1_bit1_r;
	371	reg rdd_fall_q1_bit1_r1;
	372	reg rdd_fall_q1_r;
	373	reg rdd_fall_q1_r1;
	374	reg rdd_fall_q2;
	375	reg rdd_fall_q2_bit1;
	376	reg rdd_fall_q2_bit1_r;
	377	reg rdd_fall_q2_r;
	378	reg rdd_rise_q1;
	379	reg rdd_rise_q1_bit1;
	380	reg rdd_rise_q1_bit1_r;
	381	reg rdd_rise_q1_bit1_r1;
	382	reg rdd_rise_q1_r;
	383	reg rdd_rise_q1_r1;
	384	reg rdd_rise_q2;
	385	reg rdd_rise_q2_bit1;
	386	reg rdd_rise_q2_bit1_r;
	387	reg rdd_rise_q2_r;
	388	reg [DQS_BITS_FIX-1:0] rdd_mux_sel;
	389	reg rden_dec;
	390	reg [(5*DQS_WIDTH)-1:0] rden_dly;
	391	wire [(5*DQS_WIDTH)-1:0] rden_dly_r;
	392	reg [4:0] rden_dly_0;
	393	reg rden_inc;
	394	reg [DQS_WIDTH-1:0] rden_mux;
	395	wire [DQS_WIDTH-1:0] rden_srl_out;
	396
	397	// Debug
	398	integer x;
	399	reg [5:0] dbg_dq_tap_cnt [DQ_WIDTH-1:0];
	400	reg [5:0] dbg_dqs_tap_cnt [DQS_WIDTH-1:0];
	401	reg [5:0] dbg_gate_tap_cnt [DQS_WIDTH-1:0];
	402
	403	//***************************************************************************
	404	// Debug output ("dbg_phy_calib_*")
	405	// NOTES:
	406	// 1. All debug outputs coming out of PHY_CALIB are clocked off CLKDIV0,
	407	// although they are also static after calibration is complete. This
	408	// means the user can either connect them to a Chipscope ILA, or to
	409	// either a sync/async VIO input block. Using an async VIO has the
	410	// advantage of not requiring these paths to meet cycle-to-cycle timing.
	411	// 2. The widths of most of these debug buses are dependent on the # of
	412	// DQS/DQ bits (e.g. dq_tap_cnt width = 6 * (# of DQ bits)
	413	// SIGNAL DESCRIPTION:
	414	// 1. calib_done: 4 bits - each one asserted as each phase of calibration
	415	// is completed.
	416	// 2. calib_err: 4 bits - each one asserted when a calibration error
	417	// encountered for that stage. Some of these bits may not
	418	// be used (not all cal stages report an error).
	419	// 3. dq_tap_cnt: final IDELAY tap counts for all DQ IDELAYs
	420	// 4. dqs_tap_cnt: final IDELAY tap counts for all DQS IDELAYs
	421	// 5. gate_tap_cnt: final IDELAY tap counts for all DQS gate
	422	// synchronization IDELAYs
	423	// 6. rd_data_sel: final read capture MUX (either "positive" or "negative"
	424	// edge capture) settings for all DQS groups
	425	// 7. rden_dly: related to # of cycles after issuing a read until when
	426	// read data is valid - for all DQS groups
	427	// 8. gate_dly: related to # of cycles after issuing a read until when
	428	// clock enable for all DQ's is deasserted to prevent
	429	// effect of DQS postamble glitch - for all DQS groups
	430	//***************************************************************************
	431
	432	//*****************************************************************
	433	// Record IDELAY tap values by "snooping" IDELAY control signals
	434	//*****************************************************************
	435
	436	// record DQ IDELAY tap values
	437	genvar dbg_dq_tc_i;
	438	generate
	439	for (dbg_dq_tc_i = 0; dbg_dq_tc_i < DQ_WIDTH;
	440	dbg_dq_tc_i = dbg_dq_tc_i + 1) begin: gen_dbg_dq_tap_cnt
	441	assign dbg_calib_dq_tap_cnt[(6dbg_dq_tc_i)+5:(6dbg_dq_tc_i)]
	442	= dbg_dq_tap_cnt[dbg_dq_tc_i];
	443	always @(posedge clkdiv)
	444	if (rstdiv \| dlyrst_dq)
	445	dbg_dq_tap_cnt[dbg_dq_tc_i] <= 6'b000000;
	446	else
	447	if (dlyce_dq[dbg_dq_tc_i])
	448	if (dlyinc_dq[dbg_dq_tc_i])
	449	dbg_dq_tap_cnt[dbg_dq_tc_i]
	450	<= dbg_dq_tap_cnt[dbg_dq_tc_i] + 1;
	451	else
	452	dbg_dq_tap_cnt[dbg_dq_tc_i]
	453	<= dbg_dq_tap_cnt[dbg_dq_tc_i] - 1;
	454	end
	455	endgenerate
	456
	457	// record DQS IDELAY tap values
	458	genvar dbg_dqs_tc_i;
	459	generate
	460	for (dbg_dqs_tc_i = 0; dbg_dqs_tc_i < DQS_WIDTH;
	461	dbg_dqs_tc_i = dbg_dqs_tc_i + 1) begin: gen_dbg_dqs_tap_cnt
	462	assign dbg_calib_dqs_tap_cnt[(6dbg_dqs_tc_i)+5:(6dbg_dqs_tc_i)]
	463	= dbg_dqs_tap_cnt[dbg_dqs_tc_i];
	464	always @(posedge clkdiv)
	465	if (rstdiv \| dlyrst_dqs)
	466	dbg_dqs_tap_cnt[dbg_dqs_tc_i] <= 6'b000000;
	467	else
	468	if (dlyce_dqs[dbg_dqs_tc_i])
	469	if (dlyinc_dqs[dbg_dqs_tc_i])
	470	dbg_dqs_tap_cnt[dbg_dqs_tc_i]
	471	<= dbg_dqs_tap_cnt[dbg_dqs_tc_i] + 1;
	472	else
	473	dbg_dqs_tap_cnt[dbg_dqs_tc_i]
	474	<= dbg_dqs_tap_cnt[dbg_dqs_tc_i] - 1;
	475	end
	476	endgenerate
	477
	478	// record DQS gate IDELAY tap values
	479	genvar dbg_gate_tc_i;
	480	generate
	481	for (dbg_gate_tc_i = 0; dbg_gate_tc_i < DQS_WIDTH;
	482	dbg_gate_tc_i = dbg_gate_tc_i + 1) begin: gen_dbg_gate_tap_cnt
	483	assign dbg_calib_gate_tap_cnt[(6dbg_gate_tc_i)+5:(6dbg_gate_tc_i)]
	484	= dbg_gate_tap_cnt[dbg_gate_tc_i];
	485	always @(posedge clkdiv)
	486	if (rstdiv \| dlyrst_gate[dbg_gate_tc_i])
	487	dbg_gate_tap_cnt[dbg_gate_tc_i] <= 6'b000000;
	488	else
	489	if (dlyce_gate[dbg_gate_tc_i])
	490	if (dlyinc_gate[dbg_gate_tc_i])
	491	dbg_gate_tap_cnt[dbg_gate_tc_i]
	492	<= dbg_gate_tap_cnt[dbg_gate_tc_i] + 1;
	493	else
	494	dbg_gate_tap_cnt[dbg_gate_tc_i]
	495	<= dbg_gate_tap_cnt[dbg_gate_tc_i] - 1;
	496	end
	497	endgenerate
	498
	499	assign dbg_calib_done = calib_done;
	500	assign dbg_calib_err = calib_err;
	501	assign dbg_calib_rd_data_sel = cal2_rd_data_sel;
	502	assign dbg_calib_rden_dly = rden_dly;
	503	assign dbg_calib_gate_dly = gate_dly;
	504
	505	//***************************************************************************
	506	// Read data pipelining, and read data "ISERDES" data width expansion
	507	//***************************************************************************
	508
	509	// For all data bits, register incoming capture data to slow clock to improve
	510	// timing. Adding single pipeline stage does not affect functionality (as
	511	// long as we make sure to wait extra clock cycle after changing DQ IDELAY)
	512	// Also note in this case that we're "missing" every other clock cycle's
	513	// worth of data capture since we're sync'ing to the slow clock. This is
	514	// fine for stage 1 and stage 2 cal, but not for stage 3 and 4 (see below
	515	// for different circuit to handle those stages)
	516	always @(posedge clkdiv) begin
	517	rd_data_rise_1x_r <= rd_data_rise;
	518	rd_data_fall_1x_r <= rd_data_fall;
	519	end
	520
	521	// For every DQ_PER_DQS bit, generate what is essentially a ISERDES-type
	522	// data width expander. Will need this for stage 3 and 4 cal, where we need
	523	// to compare data over consecutive clock cycles. We can also use this for
	524	// stage 2 as well (stage 2 doesn't require every bit to be looked at, only
	525	// one bit per DQS group)
	526	// MIG 3.3: Expand to use lower two bits of each DQS group - use for stage
	527	// 3 calibration for added robustness, since we will be checking for the
	528	// training pattern from the memory even when the data bus is 3-stated.
	529	// Theoretically it is possible for whatever garbage data is on the bus
	530	// to be interpreted as the training sequence, although this can be made
	531	// very unlikely by the choice of training sequence (bit sequence, length)
	532	// and the number of bits compared for each DQS group.
	533	genvar rdd_i;
	534	generate
	535	for (rdd_i = 0; rdd_i < DQS_WIDTH; rdd_i = rdd_i + 1) begin: gen_rdd
	536	// first stage: keep data in fast clk domain. Store data over two
	537	// consecutive clock cycles for rise/fall data for proper transfer
	538	// to slow clock domain
	539	always @(posedge clk) begin
	540	rd_data_rise_2x_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)];
	541	rd_data_fall_2x_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)];
	542	rd_data_rise_2x_bit1_r[rdd_i] <= rd_data_rise[(rdd_i*DQ_PER_DQS)+1];
	543	rd_data_fall_2x_bit1_r[rdd_i] <= rd_data_fall[(rdd_i*DQ_PER_DQS)+1];
	544	end
	545	// second stage, register first stage to slow clock domain, 2nd stage
	546	// consists of both these flops, and the rd_data_rise_1x_r flops
	547	always @(posedge clkdiv) begin
	548	rd_data_rise_1x_r1[rdd_i] <= rd_data_rise_2x_r[rdd_i];
	549	rd_data_fall_1x_r1[rdd_i] <= rd_data_fall_2x_r[rdd_i];
	550	rd_data_rise_1x_bit1_r1[rdd_i] <= rd_data_rise_2x_bit1_r[rdd_i];
	551	rd_data_fall_1x_bit1_r1[rdd_i] <= rd_data_fall_2x_bit1_r[rdd_i];
	552	end
	553	// now we have four outputs - representing rise/fall outputs over last
	554	// 2 fast clock cycles. However, the ordering these represent can either
	555	// be: (1) Q2 = data @ time = n, Q1 = data @ time = n+1, or (2)
	556	// Q2 = data @ time = n - 1, Q1 = data @ time = n (and data at [Q1,Q2]
	557	// is "staggered") - leave it up to the stage of calibration using this
	558	// to figure out which is which, if they care at all (e.g. stage 2 cal
	559	// doesn't care about the ordering)
	560	assign rd_data_rise_chk_q1[rdd_i]
	561	= rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)];
	562	assign rd_data_rise_chk_q2[rdd_i]
	563	= rd_data_rise_1x_r1[rdd_i];
	564	assign rd_data_fall_chk_q1[rdd_i]
	565	= rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)];
	566	assign rd_data_fall_chk_q2[rdd_i]
	567	= rd_data_fall_1x_r1[rdd_i];
	568	// MIG 3.3: Added comparison for second bit in DQS group for stage 3 cal
	569	assign rd_data_rise_chk_q1_bit1[rdd_i]
	570	= rd_data_rise_1x_r[(rdd_i*DQ_PER_DQS)+1];
	571	assign rd_data_rise_chk_q2_bit1[rdd_i]
	572	= rd_data_rise_1x_bit1_r1[rdd_i];
	573	assign rd_data_fall_chk_q1_bit1[rdd_i]
	574	= rd_data_fall_1x_r[(rdd_i*DQ_PER_DQS)+1];
	575	assign rd_data_fall_chk_q2_bit1[rdd_i]
	576	= rd_data_fall_1x_bit1_r1[rdd_i];
	577	end
	578	endgenerate
	579
	580	//*****************************************************************
	581	// Outputs of these simplified ISERDES circuits then feed MUXes based on
	582	// which DQ the current calibration algorithm needs to look at
	583	//*****************************************************************
	584
	585	// generate MUX control; assume that adding an extra pipeline stage isn't
	586	// an issue - whatever stage cal logic is using output of MUX will wait
	587	// enough time after changing it
	588	always @(posedge clkdiv) begin
	589	(* full_case, parallel_case *) case (calib_done[2:0])
	590	3'b001: rdd_mux_sel <= next_count_dqs;
	591	3'b011: rdd_mux_sel <= count_rden;
	592	3'b111: rdd_mux_sel <= next_count_gate;
	593	default: rdd_mux_sel <= {DQS_BITS_FIX{1'bx}};
	594	endcase
	595	end
	596
	597	always @(posedge clkdiv) begin
	598	rdd_rise_q1 <= rd_data_rise_chk_q1[rdd_mux_sel];
	599	rdd_rise_q2 <= rd_data_rise_chk_q2[rdd_mux_sel];
	600	rdd_fall_q1 <= rd_data_fall_chk_q1[rdd_mux_sel];
	601	rdd_fall_q2 <= rd_data_fall_chk_q2[rdd_mux_sel];
	602	rdd_rise_q1_bit1 <= rd_data_rise_chk_q1_bit1[rdd_mux_sel];
	603	rdd_rise_q2_bit1 <= rd_data_rise_chk_q2_bit1[rdd_mux_sel];
	604	rdd_fall_q1_bit1 <= rd_data_fall_chk_q1_bit1[rdd_mux_sel];
	605	rdd_fall_q2_bit1 <= rd_data_fall_chk_q2_bit1[rdd_mux_sel];
	606	end
	607
	608	//***************************************************************************
	609	// Demultiplexor to control (reset, increment, decrement) IDELAY tap values
	610	// For DQ:
	611	// STG1: for per-bit-deskew, only inc/dec the current DQ. For non-per
	612	// deskew, increment all bits in the current DQS set
	613	// STG2: inc/dec all DQ's in the current DQS set.
	614	// NOTE: Nice to add some error checking logic here (or elsewhere in the
	615	// code) to check if logic attempts to overflow tap value
	616	//***************************************************************************
	617
	618	// don't use DLYRST to reset value of IDELAY after reset. Need to change this
	619	// if we want to allow user to recalibrate after initial reset
	620	always @(posedge clkdiv)
	621	if (rstdiv) begin
	622	dlyrst_dq <= 1'b1;
	623	dlyrst_dqs <= 1'b1;
	624	end else begin
	625	dlyrst_dq <= 1'b0;
	626	dlyrst_dqs <= 1'b0;
	627	end
	628
	629	always @(posedge clkdiv) begin
	630	if (rstdiv) begin
	631	dlyce_dq <= 'b0;
	632	dlyinc_dq <= 'b0;
	633	dlyce_dqs <= 'b0;
	634	dlyinc_dqs <= 'b0;
	635	end else begin
	636	dlyce_dq <= 'b0;
	637	dlyinc_dq <= 'b0;
	638	dlyce_dqs <= 'b0;
	639	dlyinc_dqs <= 'b0;
	640
	641	// stage 1 cal: change only specified DQ
	642	if (cal1_dlyce_dq) begin
	643	if (SIM_ONLY == 0) begin
	644	dlyce_dq[count_dq] <= 1'b1;
	645	dlyinc_dq[count_dq] <= cal1_dlyinc_dq;
	646	end else begin
	647	// if simulation, then calibrate only first DQ, apply results
	648	// to all DQs (i.e. assume delay on all DQs is the same)
	649	for (i = 0; i < DQ_WIDTH; i = i + 1) begin: loop_sim_dq_dly
	650	dlyce_dq[i] <= 1'b1;
	651	dlyinc_dq[i] <= cal1_dlyinc_dq;
	652	end
	653	end
	654	end else if (cal2_dlyce_dqs) begin
	655	// stage 2 cal: change DQS and all corresponding DQ's
	656	if (SIM_ONLY == 0) begin
	657	dlyce_dqs[count_dqs] <= 1'b1;
	658	dlyinc_dqs[count_dqs] <= cal2_dlyinc_dqs;
	659	for (i = 0; i < DQ_PER_DQS; i = i + 1) begin: loop_dqs_dly
	660	dlyce_dq[(DQ_PER_DQS*count_dqs)+i] <= 1'b1;
	661	dlyinc_dq[(DQ_PER_DQS*count_dqs)+i] <= cal2_dlyinc_dqs;
	662	end
	663	end else begin
	664	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_dqs_dly
	665	// if simulation, then calibrate only first DQS
	666	dlyce_dqs[i] <= 1'b1;
	667	dlyinc_dqs[i] <= cal2_dlyinc_dqs;
	668	for (j = 0; j < DQ_PER_DQS; j = j + 1) begin: loop_sim_dq_dqs_dly
	669	dlyce_dq[(DQ_PER_DQS*i)+j] <= 1'b1;
	670	dlyinc_dq[(DQ_PER_DQS*i)+j] <= cal2_dlyinc_dqs;
	671	end
	672	end
	673	end
	674	end else if (DEBUG_EN != 0) begin
	675	// DEBUG: allow user to vary IDELAY tap settings
	676	// For DQ IDELAY taps
	677	if (dbg_idel_up_all \|\| dbg_idel_down_all \|\|
	678	dbg_sel_all_idel_dq) begin
	679	for (x = 0; x < DQ_WIDTH; x = x + 1) begin: loop_dly_inc_dq
	680	dlyce_dq[x] <= dbg_idel_up_all \| dbg_idel_down_all \|
	681	dbg_idel_up_dq \| dbg_idel_down_dq;
	682	dlyinc_dq[x] <= dbg_idel_up_all \| dbg_idel_up_dq;
	683	end
	684	end else begin
	685	dlyce_dq <= 'b0;
	686	dlyce_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq \|
	687	dbg_idel_down_dq;
	688	dlyinc_dq[dbg_sel_idel_dq] <= dbg_idel_up_dq;
	689	end
	690	// For DQS IDELAY taps
	691	if (dbg_idel_up_all \|\| dbg_idel_down_all \|\|
	692	dbg_sel_all_idel_dqs) begin
	693	for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_dqs
	694	dlyce_dqs[x] <= dbg_idel_up_all \| dbg_idel_down_all \|
	695	dbg_idel_up_dqs \| dbg_idel_down_dqs;
	696	dlyinc_dqs[x] <= dbg_idel_up_all \| dbg_idel_up_dqs;
	697	end
	698	end else begin
	699	dlyce_dqs <= 'b0;
	700	dlyce_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs \|
	701	dbg_idel_down_dqs;
	702	dlyinc_dqs[dbg_sel_idel_dqs] <= dbg_idel_up_dqs;
	703	end
	704	end
	705	end
	706	end
	707
	708	// GATE synchronization is handled directly by Stage 4 calibration FSM
	709	always @(posedge clkdiv)
	710	if (rstdiv) begin
	711	dlyrst_gate <= {DQS_WIDTH{1'b1}};
	712	dlyce_gate <= {DQS_WIDTH{1'b0}};
	713	dlyinc_gate <= {DQS_WIDTH{1'b0}};
	714	end else begin
	715	dlyrst_gate <= {DQS_WIDTH{1'b0}};
	716	dlyce_gate <= {DQS_WIDTH{1'b0}};
	717	dlyinc_gate <= {DQS_WIDTH{1'b0}};
	718
	719	if (cal4_dlyrst_gate) begin
	720	if (SIM_ONLY == 0)
	721	dlyrst_gate[count_gate] <= 1'b1;
	722	else
	723	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly_rst
	724	dlyrst_gate[i] <= 1'b1;
	725	end
	726	end
	727
	728	if (cal4_dlyce_gate) begin
	729	if (SIM_ONLY == 0) begin
	730	dlyce_gate[count_gate] <= 1'b1;
	731	dlyinc_gate[count_gate] <= cal4_dlyinc_gate;
	732	end else begin
	733	// if simulation, then calibrate only first gate
	734	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_gate_sim_dly
	735	dlyce_gate[i] <= 1'b1;
	736	dlyinc_gate[i] <= cal4_dlyinc_gate;
	737	end
	738	end
	739	end else if (DEBUG_EN != 0) begin
	740	// DEBUG: allow user to vary IDELAY tap settings
	741	if (dbg_idel_up_all \|\| dbg_idel_down_all \|\|
	742	dbg_sel_all_idel_gate) begin
	743	for (x = 0; x < DQS_WIDTH; x = x + 1) begin: loop_dly_inc_gate
	744	dlyce_gate[x] <= dbg_idel_up_all \| dbg_idel_down_all \|
	745	dbg_idel_up_gate \| dbg_idel_down_gate;
	746	dlyinc_gate[x] <= dbg_idel_up_all \| dbg_idel_up_gate;
	747	end
	748	end else begin
	749	dlyce_gate <= {DQS_WIDTH{1'b0}};
	750	dlyce_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate \|
	751	dbg_idel_down_gate;
	752	dlyinc_gate[dbg_sel_idel_gate] <= dbg_idel_up_gate;
	753	end
	754	end
	755	end
	756
	757	//***************************************************************************
	758	// signal to tell calibration state machines to wait and give IDELAY time to
	759	// settle after it's value is changed (both time for IDELAY chain to settle,
	760	// and for settled output to propagate through ISERDES). For general use: use
	761	// for any calibration state machines that modify any IDELAY.
	762	// Should give at least enough time for IDELAY output to settle (technically
	763	// for V5, this should be "glitchless" when IDELAY taps are changed, so don't
	764	// need any time here), and also time for new data to propagate through both
	765	// ISERDES and the "RDD" MUX + associated pipelining
	766	// For now, give very "generous" delay - doesn't really matter since only
	767	// needed during calibration
	768	//***************************************************************************
	769
	770	// determine if calibration polarity has changed
	771	always @(posedge clkdiv)
	772	cal2_rd_data_sel_r <= cal2_rd_data_sel;
	773
	774	assign cal2_rd_data_sel_edge = \|(cal2_rd_data_sel ^ cal2_rd_data_sel_r);
	775
	776	// combine requests to modify any of the IDELAYs into one. Also when second
	777	// stage capture "edge" polarity is changed (IDELAY isn't changed in this
	778	// case, but use the same counter to stall cal logic)
	779	assign dlyce_or = cal1_dlyce_dq \|
	780	cal2_dlyce_dqs \|
	781	cal2_rd_data_sel_edge \|
	782	cal4_dlyce_gate \|
	783	cal4_dlyrst_gate;
	784
	785	// SYN_NOTE: Can later recode to avoid combinational path
	786	assign idel_set_wait = dlyce_or \|\| (idel_set_cnt != IDEL_SET_VAL);
	787
	788	always @(posedge clkdiv)
	789	if (rstdiv)
	790	idel_set_cnt <= 4'b0000;
	791	else if (dlyce_or)
	792	idel_set_cnt <= 4'b0000;
	793	else if (idel_set_cnt != IDEL_SET_VAL)
	794	idel_set_cnt <= idel_set_cnt + 1;
	795
	796	// generate request to PHY_INIT logic to issue auto-refresh
	797	// used by certain states to force prech/auto-refresh part way through
	798	// calibration to avoid a tRAS violation (which will happen if that
	799	// stage of calibration lasts long enough). This signal must meet the
	800	// following requirements: (1) only transition from 0->1 when the refresh
	801	// request is needed, (2) stay at 1 and only transition 1->0 when
	802	// CALIB_REF_DONE is asserted
	803	always @(posedge clkdiv)
	804	if (rstdiv)
	805	calib_ref_req <= 1'b0;
	806	else
	807	calib_ref_req <= cal1_ref_req \| cal2_ref_req \| cal4_ref_req;
	808
	809	// stage 1 calibration requests auto-refresh every 4 bits
	810	generate
	811	if (DQ_BITS < 2) begin: gen_cal1_refresh_dq_lte4
	812	assign cal1_refresh = 1'b0;
	813	end else begin: gen_cal1_refresh_dq_gt4
	814	assign cal1_refresh = (next_count_dq[1:0] == 2'b00);
	815	end
	816	endgenerate
	817
	818	//***************************************************************************
	819	// First stage calibration: DQ-DQS
	820	// Definitions:
	821	// edge: detected when varying IDELAY, and current capture data != prev
	822	// capture data
	823	// valid bit window: detected when current capture data == prev capture
	824	// data for more than half the bit time
	825	// starting conditions for DQS-DQ phase:
	826	// case 1: when DQS starts somewhere in rising edge bit window, or
	827	// on the right edge of the rising bit window.
	828	// case 2: when DQS starts somewhere in falling edge bit window, or
	829	// on the right edge of the falling bit window.
	830	// Algorithm Description:
	831	// 1. Increment DQ IDELAY until we find an edge.
	832	// 2. While we're finding the first edge, note whether a valid bit window
	833	// has been detected before we found an edge. If so, then figure out if
	834	// this is the rising or falling bit window. If rising, then our starting
	835	// DQS-DQ phase is case 1. If falling, then it's case 2. If don't detect
	836	// a valid bit window, then we must have started on the edge of a window.
	837	// Need to wait until later on to decide which case we are.
	838	// - Store FIRST_EDGE IDELAY value
	839	// 3. Now look for second edge.
	840	// 4. While we're finding the second edge, note whether valid bit window
	841	// is detected. If so, then use to, along with results from (2) to figure
	842	// out what the starting case is. If in rising bit window, then we're in
	843	// case 2. If falling, then case 1.
	844	// - Store SECOND_EDGE IDELAY value
	845	// NOTES:
	846	// a. Finding two edges allows us to calculate the bit time (although
	847	// not the "same" bit time polarity - need to investigate this
	848	// more).
	849	// b. If we run out of taps looking for the second edge, then the bit
	850	// time must be too long (>= 2.5ns, and DQS-DQ starting phase must be
	851	// case 1).
	852	// 5. Calculate absolute amount to delay DQ as:
	853	// If second edge found, and case 1:
	854	// - DQ_IDELAY = FIRST_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
	855	// If second edge found, and case 2:
	856	// - DQ_IDELAY = SECOND_EDGE - 0.5*(SECOND_EDGE - FIRST_EDGE)
	857	// If second edge not found, then need to make an approximation on
	858	// how much to shift by (should be okay, because we have more timing
	859	// margin):
	860	// - DQ_IDELAY = FIRST_EDGE - 0.5 * (bit_time)
	861	// NOTE: Does this account for either case 1 or case 2?????
	862	// NOTE: It's also possible even when we find the second edge, that
	863	// to instead just use half the bit time to subtract from either
	864	// FIRST or SECOND_EDGE. Finding the actual bit time (which is
	865	// what (SECOND_EDGE - FIRST_EDGE) is, is slightly more accurate,
	866	// since it takes into account duty cycle distortion.
	867	// 6. Repeat for each DQ in current DQS set.
	868	//***************************************************************************
	869
	870	//*****************************************************************
	871	// for first stage calibration - used for checking if DQS is aligned to the
	872	// particular DQ, such that we're in the data valid window. Basically, this
	873	// is one giant MUX.
	874	// = [falling data, rising data]
	875	// = [0, 1] = rising DQS aligned in proper (rising edge) bit window
	876	// = [1, 0] = rising DQS aligned in wrong (falling edge) bit window
	877	// = [0, 0], or [1,1] = in uncertain region between windows
	878	//*****************************************************************
	879
	880	// SYN_NOTE: May have to split this up into multiple levels - MUX can get
	881	// very wide - as wide as the data bus width
	882	always @(posedge clkdiv)
	883	cal1_data_chk_r <= {rd_data_fall_1x_r[next_count_dq],
	884	rd_data_rise_1x_r[next_count_dq]};
	885
	886	//*****************************************************************
	887	// determine when an edge has occurred - when either the current value
	888	// is different from the previous latched value or when the DATA_CHK
	889	// outputs are the same (rare, but indicates that we're at an edge)
	890	// This is only valid when the IDELAY output and propagation of the
	891	// data through the capture flops has had a chance to settle out.
	892	//*****************************************************************
	893
	894	// write CAL1_DETECT_EDGE and CAL1_DETECT_STABLE in such a way that
	895	// if X's are captured on the bus during functional simulation, that
	896	// the logic will register this as an edge detected. Do this to allow
	897	// use of this HDL with Denali memory models (Denali models drive DQ
	898	// to X's on both edges of the data valid window to simulate jitter)
	899	// This is only done for functional simulation purposes. Should not
	900	// make the final synthesized logic more complicated, but it does make
	901	// the HDL harder to understand b/c we have to "phrase" the logic
	902	// slightly differently than when not worrying about X's
	903	always @(*) begin
	904	// no edge found if: (1) we have recorded prev edge, and rise
	905	// data == fall data, (2) we haven't yet recorded prev edge, but
	906	// rise/fall data is equal to either [0,1] or [1,0] (i.e. rise/fall
	907	// data isn't either X's, or [0,0] or [1,1], which indicates we're
	908	// in the middle of an edge, since normally rise != fall data for stg1)
	909	if ((cal1_data_chk_last_valid &&
	910	(cal1_data_chk_r == cal1_data_chk_last)) \|\|
	911	(!cal1_data_chk_last_valid &&
	912	((cal1_data_chk_r == 2'b01) \|\| (cal1_data_chk_r == 2'b10))))
	913	cal1_detect_edge = 1'b0;
	914	else
	915	cal1_detect_edge = 1'b1;
	916	end
	917
	918	always @(*) begin
	919	// assert if we've found a region where data valid window is stable
	920	// over consecutive IDELAY taps, and either rise/fall = [1,0], or [0,1]
	921	if ((cal1_data_chk_last_valid &&
	922	(cal1_data_chk_r == cal1_data_chk_last)) &&
	923	((cal1_data_chk_r == 2'b01) \|\| (cal1_data_chk_r == 2'b10)))
	924	cal1_detect_stable = 1'b1;
	925	else
	926	cal1_detect_stable = 1'b0;
	927	end
	928
	929	//*****************************************************************
	930	// Find valid window: keep track of how long we've been in the same data
	931	// window. If it's been long enough, then declare that we've found a valid
	932	// window. Also returns whether we found a rising or falling window (only
	933	// valid when found_window is asserted)
	934	//*****************************************************************
	935
	936	always @(posedge clkdiv) begin
	937	if (cal1_state == CAL1_INIT) begin
	938	cal1_window_cnt <= 4'b0000;
	939	cal1_found_window <= 1'b0;
	940	cal1_found_rising <= 1'bx;
	941	end else if (!cal1_data_chk_last_valid) begin
	942	// if we haven't stored a previous value of CAL1_DATA_CHK (or it got
	943	// invalidated because we detected an edge, and are now looking for the
	944	// second edge), then make sure FOUND_WINDOW deasserted on following
	945	// clock edge (to avoid finding a false window immediately after finding
	946	// an edge). Note that because of jitter, it's possible to not find an
	947	// edge at the end of the IDELAY increment settling time, but to find an
	948	// edge on the next clock cycle (e.g. during CAL1_FIND_FIRST_EDGE)
	949	cal1_window_cnt <= 4'b0000;
	950	cal1_found_window <= 1'b0;
	951	cal1_found_rising <= 1'bx;
	952	end else if (((cal1_state == CAL1_FIRST_EDGE_IDEL_WAIT) \|\|
	953	(cal1_state == CAL1_SECOND_EDGE_IDEL_WAIT)) &&
	954	!idel_set_wait) begin
	955	// while finding the first and second edges, see if we can detect a
	956	// stable bit window (occurs over MIN_WIN_SIZE number of taps). If
	957	// so, then we're away from an edge, and can conclusively determine the
	958	// starting DQS-DQ phase.
	959	if (cal1_detect_stable) begin
	960	cal1_window_cnt <= cal1_window_cnt + 1;
	961	if (cal1_window_cnt == MIN_WIN_SIZE-1) begin
	962	cal1_found_window <= 1'b1;
	963	if (cal1_data_chk_r == 2'b01)
	964	cal1_found_rising <= 1'b1;
	965	else
	966	cal1_found_rising <= 1'b0;
	967	end
	968	end else begin
	969	// otherwise, we're not in a data valid window, reset the window
	970	// counter, and indicate we're not currently in window. This should
	971	// happen by design at least once after finding the first edge.
	972	cal1_window_cnt <= 4'b0000;
	973	cal1_found_window <= 1'b0;
	974	cal1_found_rising <= 1'bx;
	975	end
	976	end
	977	end
	978
	979	//*****************************************************************
	980	// keep track of edge tap counts found, and whether we've
	981	// incremented to the maximum number of taps allowed
	982	//*****************************************************************
	983
	984	always @(posedge clkdiv)
	985	if (cal1_state == CAL1_INIT) begin
	986	cal1_idel_tap_limit_hit <= 1'b0;
	987	cal1_idel_tap_cnt <= 6'b000000;
	988	end else if (cal1_dlyce_dq) begin
	989	if (cal1_dlyinc_dq) begin
	990	cal1_idel_tap_cnt <= cal1_idel_tap_cnt + 1;
	991	cal1_idel_tap_limit_hit <= (cal1_idel_tap_cnt == 6'b111110);
	992	end else begin
	993	cal1_idel_tap_cnt <= cal1_idel_tap_cnt - 1;
	994	cal1_idel_tap_limit_hit <= 1'b0;
	995	end
	996	end
	997
	998	//*****************************************************************
	999	// Pipeline for better timing - amount to decrement by if second
	1000	// edge not found
	1001	//*****************************************************************
	1002	// if only one edge found (possible for low frequencies), then:
	1003	// 1. Assume starting DQS-DQ phase has DQS in DQ window (aka "case 1")
	1004	// 2. We have to decrement by (63 - first_edge_tap_cnt) + (BIT_TIME_TAPS/2)
	1005	// (i.e. decrement by 63-first_edge_tap_cnt to get to right edge of
	1006	// DQ window. Then decrement again by (BIT_TIME_TAPS/2) to get to center
	1007	// of DQ window.
	1008	// 3. Clamp the above value at 63 to ensure we don't underflow IDELAY
	1009	// (note: clamping happens in the CAL1 state machine)
	1010	always @(posedge clkdiv)
	1011	cal1_low_freq_idel_dec
	1012	<= (7'b0111111 - {1'b0, cal1_first_edge_tap_cnt}) +
	1013	(BIT_TIME_TAPS/2);
	1014
	1015	//*****************************************************************
	1016	// Keep track of max taps used during stage 1, use this to limit
	1017	// the number of taps that can be used in stage 2
	1018	//*****************************************************************
	1019
	1020	always @(posedge clkdiv)
	1021	if (rstdiv) begin
	1022	cal1_idel_max_tap <= 6'b000000;
	1023	cal1_idel_max_tap_we <= 1'b0;
	1024	end else begin
	1025	// pipeline latch enable for CAL1_IDEL_MAX_TAP - we have plenty
	1026	// of time, tap count gets updated, then dead cycles waiting for
	1027	// IDELAY output to settle
	1028	cal1_idel_max_tap_we <= (cal1_idel_max_tap < cal1_idel_tap_cnt);
	1029	// record maximum # of taps used for stg 1 cal
	1030	if ((cal1_state == CAL1_DONE) && cal1_idel_max_tap_we)
	1031	cal1_idel_max_tap <= cal1_idel_tap_cnt;
	1032	end
	1033
	1034	//*****************************************************************
	1035
	1036	always @(posedge clkdiv)
	1037	if (rstdiv) begin
	1038	calib_done[0] <= 1'b0;
	1039	calib_done_tmp[0] <= 1'bx;
	1040	calib_err[0] <= 1'b0;
	1041	count_dq <= {DQ_BITS{1'b0}};
	1042	next_count_dq <= {DQ_BITS{1'b0}};
	1043	cal1_bit_time_tap_cnt <= 6'bxxxxxx;
	1044	cal1_data_chk_last <= 2'bxx;
	1045	cal1_data_chk_last_valid <= 1'bx;
	1046	cal1_dlyce_dq <= 1'b0;
	1047	cal1_dlyinc_dq <= 1'b0;
	1048	cal1_dqs_dq_init_phase <= 1'bx;
	1049	cal1_first_edge_done <= 1'bx;
	1050	cal1_found_second_edge <= 1'bx;
	1051	cal1_first_edge_tap_cnt <= 6'bxxxxxx;
	1052	cal1_idel_dec_cnt <= 7'bxxxxxxx;
	1053	cal1_idel_inc_cnt <= 6'bxxxxxx;
	1054	cal1_ref_req <= 1'b0;
	1055	cal1_state <= CAL1_IDLE;
	1056	end else begin
	1057	// default values for all "pulse" outputs
	1058	cal1_ref_req <= 1'b0;
	1059	cal1_dlyce_dq <= 1'b0;
	1060	cal1_dlyinc_dq <= 1'b0;
	1061
	1062	case (cal1_state)
	1063	CAL1_IDLE: begin
	1064	count_dq <= {DQ_BITS{1'b0}};
	1065	next_count_dq <= {DQ_BITS{1'b0}};
	1066	if (calib_start[0]) begin
	1067	calib_done[0] <= 1'b0;
	1068	calib_done_tmp[0] <= 1'b0;
	1069	cal1_state <= CAL1_INIT;
	1070	end
	1071	end
	1072
	1073	CAL1_INIT: begin
	1074	cal1_data_chk_last_valid <= 1'b0;
	1075	cal1_found_second_edge <= 1'b0;
	1076	cal1_dqs_dq_init_phase <= 1'b0;
	1077	cal1_idel_inc_cnt <= 6'b000000;
	1078	cal1_state <= CAL1_INC_IDEL;
	1079	end
	1080
	1081	// increment DQ IDELAY so that either: (1) DQS starts somewhere in
	1082	// first rising DQ window, or (2) DQS starts in first falling DQ
	1083	// window. The amount to shift is frequency dependent (and is either
	1084	// precalculated by MIG or possibly adjusted by the user)
	1085	CAL1_INC_IDEL:
	1086	if ((cal1_idel_inc_cnt == DQ_IDEL_INIT) && !idel_set_wait) begin
	1087	cal1_state <= CAL1_FIND_FIRST_EDGE;
	1088	end else if (cal1_idel_inc_cnt != DQ_IDEL_INIT) begin
	1089	cal1_idel_inc_cnt <= cal1_idel_inc_cnt + 1;
	1090	cal1_dlyce_dq <= 1'b1;
	1091	cal1_dlyinc_dq <= 1'b1;
	1092	end
	1093
	1094	// look for first edge
	1095	CAL1_FIND_FIRST_EDGE: begin
	1096	// Determine DQS-DQ phase if we can detect enough of a valid window
	1097	if (cal1_found_window)
	1098	cal1_dqs_dq_init_phase <= ~cal1_found_rising;
	1099	// find first edge - if found then record position
	1100	if (cal1_detect_edge) begin
	1101	cal1_state <= CAL1_FOUND_FIRST_EDGE_WAIT;
	1102	cal1_first_edge_done <= 1'b0;
	1103	cal1_first_edge_tap_cnt <= cal1_idel_tap_cnt;
	1104	cal1_data_chk_last_valid <= 1'b0;
	1105	end else begin
	1106	// otherwise, store the current value of DATA_CHK, increment
	1107	// DQ IDELAY, and compare again
	1108	cal1_state <= CAL1_FIRST_EDGE_IDEL_WAIT;
	1109	cal1_data_chk_last <= cal1_data_chk_r;
	1110	// avoid comparing against DATA_CHK_LAST for previous iteration
	1111	cal1_data_chk_last_valid <= 1'b1;
	1112	cal1_dlyce_dq <= 1'b1;
	1113	cal1_dlyinc_dq <= 1'b1;
	1114	end
	1115	end
	1116
	1117	// wait for DQ IDELAY to settle
	1118	CAL1_FIRST_EDGE_IDEL_WAIT:
	1119	if (!idel_set_wait)
	1120	cal1_state <= CAL1_FIND_FIRST_EDGE;
	1121
	1122	// delay state between finding first edge and looking for second
	1123	// edge. Necessary in order to invalidate CAL1_FOUND_WINDOW before
	1124	// starting to look for second edge
	1125	CAL1_FOUND_FIRST_EDGE_WAIT:
	1126	cal1_state <= CAL1_FIND_SECOND_EDGE;
	1127
	1128	// Try and find second edge
	1129	CAL1_FIND_SECOND_EDGE: begin
	1130	// When looking for 2nd edge, first make sure data stabilized (by
	1131	// detecting valid data window) - needed to avoid false edges
	1132	if (cal1_found_window) begin
	1133	cal1_first_edge_done <= 1'b1;
	1134	cal1_dqs_dq_init_phase <= cal1_found_rising;
	1135	end
	1136	// exit if run out of taps to increment
	1137	if (cal1_idel_tap_limit_hit)
	1138	cal1_state <= CAL1_CALC_IDEL;
	1139	else begin
	1140	// found second edge, record the current edge count
	1141	if (cal1_first_edge_done && cal1_detect_edge) begin
	1142	cal1_state <= CAL1_CALC_IDEL;
	1143	cal1_found_second_edge <= 1'b1;
	1144	cal1_bit_time_tap_cnt <= cal1_idel_tap_cnt -
	1145	cal1_first_edge_tap_cnt + 1;
	1146	end else begin
	1147	cal1_state <= CAL1_SECOND_EDGE_IDEL_WAIT;
	1148	cal1_data_chk_last <= cal1_data_chk_r;
	1149	cal1_data_chk_last_valid <= 1'b1;
	1150	cal1_dlyce_dq <= 1'b1;
	1151	cal1_dlyinc_dq <= 1'b1;
	1152	end
	1153	end
	1154	end
	1155
	1156	// wait for DQ IDELAY to settle, then store ISERDES output
	1157	CAL1_SECOND_EDGE_IDEL_WAIT:
	1158	if (!idel_set_wait)
	1159	cal1_state <= CAL1_FIND_SECOND_EDGE;
	1160
	1161	// pipeline delay state to calculate amount to decrement DQ IDELAY
	1162	// NOTE: We're calculating the amount to decrement by, not the
	1163	// absolute setting for DQ IDELAY
	1164	CAL1_CALC_IDEL: begin
	1165	// if two edges found
	1166	if (cal1_found_second_edge)
	1167	// case 1: DQS was in DQ window to start with. First edge found
	1168	// corresponds to left edge of DQ rising window. Backup by 1.5*BT
	1169	// NOTE: In this particular case, it is possible to decrement
	1170	// "below 0" in the case where DQS delay is less than 0.5*BT,
	1171	// need to limit decrement to prevent IDELAY tap underflow
	1172	if (!cal1_dqs_dq_init_phase)
	1173	cal1_idel_dec_cnt <= {1'b0, cal1_bit_time_tap_cnt} +
	1174	{1'b0, (cal1_bit_time_tap_cnt >> 1)};
	1175	// case 2: DQS was in wrong DQ window (in DQ falling window).
	1176	// First edge found is right edge of DQ rising window. Second
	1177	// edge is left edge of DQ rising window. Backup by 0.5*BT
	1178	else
	1179	cal1_idel_dec_cnt <= {1'b0, (cal1_bit_time_tap_cnt >> 1)};
	1180	// if only one edge found - assume will always be case 1 - DQS in
	1181	// DQS window. Case 2 only possible if path delay on DQS > 5ns
	1182	else
	1183	cal1_idel_dec_cnt <= cal1_low_freq_idel_dec;
	1184	cal1_state <= CAL1_DEC_IDEL;
	1185	end
	1186
	1187	// decrement DQ IDELAY for final adjustment
	1188	CAL1_DEC_IDEL:
	1189	// once adjustment is complete, we're done with calibration for
	1190	// this DQ, now return to IDLE state and repeat for next DQ
	1191	// Add underflow protection for case of 2 edges found and DQS
	1192	// starting in DQ window (see comments for above state) - note we
	1193	// have to take into account delayed value of CAL1_IDEL_TAP_CNT -
	1194	// gets updated one clock cycle after CAL1_DLYCE/INC_DQ
	1195	if ((cal1_idel_dec_cnt == 7'b0000000) \|\|
	1196	(cal1_dlyce_dq && (cal1_idel_tap_cnt == 6'b000001))) begin
	1197	cal1_state <= CAL1_DONE;
	1198	// stop when all DQ's calibrated, or DQ[0] cal'ed (for sim)
	1199	if ((count_dq == DQ_WIDTH-1) \|\| (SIM_ONLY != 0))
	1200	calib_done_tmp[0] <= 1'b1;
	1201	else
	1202	// need for VHDL simulation to prevent out-of-index error
	1203	next_count_dq <= count_dq + 1;
	1204	end else begin
	1205	// keep decrementing until final tap count reached
	1206	cal1_idel_dec_cnt <= cal1_idel_dec_cnt - 1;
	1207	cal1_dlyce_dq <= 1'b1;
	1208	cal1_dlyinc_dq <= 1'b0;
	1209	end
	1210
	1211	// delay state to allow count_dq and DATA_CHK to point to the next
	1212	// DQ bit (allows us to potentially begin checking for an edge on
	1213	// next DQ right away).
	1214	CAL1_DONE:
	1215	if (!idel_set_wait) begin
	1216	count_dq <= next_count_dq;
	1217	if (calib_done_tmp[0]) begin
	1218	calib_done[0] <= 1'b1;
	1219	cal1_state <= CAL1_IDLE;
	1220	end else begin
	1221	// request auto-refresh after every 8-bits calibrated to
	1222	// avoid tRAS violation
	1223	if (cal1_refresh) begin
	1224	cal1_ref_req <= 1'b1;
	1225	if (calib_ref_done)
	1226	cal1_state <= CAL1_INIT;
	1227	end else
	1228	// if no need this time for refresh, proceed to next bit
	1229	cal1_state <= CAL1_INIT;
	1230	end
	1231	end
	1232	endcase
	1233	end
	1234
	1235	//***************************************************************************
	1236	// Second stage calibration: DQS-FPGA Clock
	1237	// Algorithm Description:
	1238	// 1. Assumes a training pattern that will produce a pattern oscillating at
	1239	// half the core clock frequency each on rise and fall outputs, and such
	1240	// that rise and fall outputs are 180 degrees out of phase from each
	1241	// other. Note that since the calibration logic runs at half the speed
	1242	// of the interface, expect that data sampled with the slow clock always
	1243	// to be constant (either always = 1, or = 0, and rise data != fall data)
	1244	// unless we cross the edge of the data valid window
	1245	// 2. Start by setting RD_DATA_SEL = 0. This selects the rising capture data
	1246	// sync'ed to rising edge of core clock, and falling edge data sync'ed
	1247	// to falling edge of core clock
	1248	// 3. Start looking for an edge. An edge is defined as either: (1) a
	1249	// change in capture value or (2) an invalid capture value (e.g. rising
	1250	// data != falling data for that same clock cycle).
	1251	// 4. If an edge is found, go to step (6). If edge hasn't been found, then
	1252	// set RD_DATA_SEL = 1, and try again.
	1253	// 5. If no edge is found, then increment IDELAY and return to step (3)
	1254	// 6. If an edge if found, then invert RD_DATA_SEL - this shifts the
	1255	// capture point 180 degrees from the edge of the window (minus duty
	1256	// cycle distortion, delay skew between rising/falling edge capture
	1257	// paths, etc.)
	1258	// 7. If no edge is found by CAL2_IDEL_TAP_LIMIT (= 63 - # taps used for
	1259	// stage 1 calibration), then decrement IDELAY (without reinverting
	1260	// RD_DATA_SEL) by CAL2_IDEL_TAP_LIMIT/2. This guarantees we at least
	1261	// have CAL2_IDEL_TAP_LIMIT/2 of slack both before and after the
	1262	// capture point (not optimal, but best we can do not having found an
	1263	// of the window). This happens only for very low frequencies.
	1264	// 8. Repeat for each DQS group.
	1265	// NOTE: Step 6 is not optimal. A better (and perhaps more complicated)
	1266	// algorithm might be to find both edges of the data valid window (using
	1267	// the same polarity of RD_DATA_SEL), and then decrement to the midpoint.
	1268	//***************************************************************************
	1269
	1270	// RD_DATA_SEL should be tagged with FROM-TO (multi-cycle) constraint in
	1271	// UCF file to relax timing. This net is "pseudo-static" (after value is
	1272	// changed, FSM waits number of cycles before using the output).
	1273	// Note that we are adding one clock cycle of delay (to isolate it from
	1274	// the other logic CAL2_RD_DATA_SEL feeds), make sure FSM waits long
	1275	// enough to compensate (by default it does, it waits a few cycles more
	1276	// than minimum # of clock cycles)
	1277	genvar rd_i;
	1278	generate
	1279	for (rd_i = 0; rd_i < DQS_WIDTH; rd_i = rd_i+1) begin: gen_rd_data_sel
	1280	FDRSE u_ff_rd_data_sel
	1281	(
	1282	.Q (rd_data_sel[rd_i]),
	1283	.C (clkdiv),
	1284	.CE (1'b1),
	1285	.D (cal2_rd_data_sel[rd_i]),
	1286	.R (1'b0),
	1287	.S (1'b0)
	1288	) /* synthesis syn_preserve = 1 */
	1289	/* synthesis syn_replicate = 0 */;
	1290	end
	1291	endgenerate
	1292
	1293	//*****************************************************************
	1294	// Max number of taps used for stg2 cal dependent on number of taps
	1295	// used for stg1 (give priority to stg1 cal - let it use as many
	1296	// taps as it needs - the remainder of the IDELAY taps can be used
	1297	// by stg2)
	1298	//*****************************************************************
	1299
	1300	always @(posedge clkdiv)
	1301	cal2_idel_tap_limit <= 6'b111111 - cal1_idel_max_tap;
	1302
	1303	//*****************************************************************
	1304	// second stage calibration uses readback pattern of "1100" (i.e.
	1305	// 1st rising = 1, 1st falling = 1, 2nd rising = 0, 2nd falling = 0)
	1306	// only look at the first bit of each DQS group
	1307	//*****************************************************************
	1308
	1309	// deasserted when captured data has changed since IDELAY was
	1310	// incremented, or when we're right on the edge (i.e. rise data =
	1311	// fall data).
	1312	assign cal2_detect_edge =
	1313	((((rdd_rise_q1 != cal2_rd_data_rise_last_pos) \|\|
	1314	(rdd_fall_q1 != cal2_rd_data_fall_last_pos)) &&
	1315	cal2_rd_data_last_valid_pos && (!cal2_curr_sel)) \|\|
	1316	(((rdd_rise_q1 != cal2_rd_data_rise_last_neg) \|\|
	1317	(rdd_fall_q1 != cal2_rd_data_fall_last_neg)) &&
	1318	cal2_rd_data_last_valid_neg && (cal2_curr_sel)) \|\|
	1319	(rdd_rise_q1 != rdd_fall_q1));
	1320
	1321	//*****************************************************************
	1322	// keep track of edge tap counts found, and whether we've
	1323	// incremented to the maximum number of taps allowed
	1324	// NOTE: Assume stage 2 cal always increments the tap count (never
	1325	// decrements) when searching for edge of the data valid window
	1326	//*****************************************************************
	1327
	1328	always @(posedge clkdiv)
	1329	if (cal2_state == CAL2_INIT) begin
	1330	cal2_idel_tap_limit_hit <= 1'b0;
	1331	cal2_idel_tap_cnt <= 6'b000000;
	1332	end else if (cal2_dlyce_dqs) begin
	1333	cal2_idel_tap_cnt <= cal2_idel_tap_cnt + 1;
	1334	cal2_idel_tap_limit_hit <= (cal2_idel_tap_cnt ==
	1335	cal2_idel_tap_limit - 1);
	1336	end
	1337
	1338	//*****************************************************************
	1339
	1340	always @(posedge clkdiv)
	1341	if (rstdiv) begin
	1342	calib_done[1] <= 1'b0;
	1343	calib_done_tmp[1] <= 1'bx;
	1344	calib_err[1] <= 1'b0;
	1345	count_dqs <= 'b0;
	1346	next_count_dqs <= 'b0;
	1347	cal2_dlyce_dqs <= 1'b0;
	1348	cal2_dlyinc_dqs <= 1'b0;
	1349	cal2_idel_dec_cnt <= 6'bxxxxxx;
	1350	cal2_rd_data_last_valid_neg <= 1'bx;
	1351	cal2_rd_data_last_valid_pos <= 1'bx;
	1352	cal2_rd_data_sel <= 'b0;
	1353	cal2_ref_req <= 1'b0;
	1354	cal2_state <= CAL2_IDLE;
	1355	end else begin
	1356	cal2_ref_req <= 1'b0;
	1357	cal2_dlyce_dqs <= 1'b0;
	1358	cal2_dlyinc_dqs <= 1'b0;
	1359
	1360	case (cal2_state)
	1361	CAL2_IDLE: begin
	1362	count_dqs <= 'b0;
	1363	next_count_dqs <= 'b0;
	1364	if (calib_start[1]) begin
	1365	cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
	1366	calib_done[1] <= 1'b0;
	1367	calib_done_tmp[1] <= 1'b0;
	1368	cal2_state <= CAL2_INIT;
	1369	end
	1370	end
	1371
	1372	// Pass through this state every time we calibrate a new DQS group
	1373	CAL2_INIT: begin
	1374	cal2_curr_sel <= 1'b0;
	1375	cal2_rd_data_last_valid_neg <= 1'b0;
	1376	cal2_rd_data_last_valid_pos <= 1'b0;
	1377	cal2_state <= CAL2_INIT_IDEL_WAIT;
	1378	end
	1379
	1380	// Stall state only used if calibration run more than once. Can take
	1381	// this state out if design never runs calibration more than once.
	1382	// We need this state to give time for MUX'ed data to settle after
	1383	// resetting RD_DATA_SEL
	1384	CAL2_INIT_IDEL_WAIT:
	1385	if (!idel_set_wait)
	1386	cal2_state <= CAL2_FIND_EDGE_POS;
	1387
	1388	// Look for an edge - first check "positive-edge" stage 2 capture
	1389	CAL2_FIND_EDGE_POS: begin
	1390	// if found an edge, then switch to the opposite edge stage 2
	1391	// capture and we're done - no need to decrement the tap count,
	1392	// since switching to the opposite edge will shift the capture
	1393	// point by 180 degrees
	1394	if (cal2_detect_edge) begin
	1395	cal2_curr_sel <= 1'b1;
	1396	cal2_state <= CAL2_DONE;
	1397	// set all DQS groups to be the same for simulation
	1398	if (SIM_ONLY != 0)
	1399	cal2_rd_data_sel <= {DQS_WIDTH{1'b1}};
	1400	else
	1401	cal2_rd_data_sel[count_dqs] <= 1'b1;
	1402	if ((count_dqs == DQS_WIDTH-1) \|\| (SIM_ONLY != 0))
	1403	calib_done_tmp[1] <= 1'b1;
	1404	else
	1405	// MIG 2.1: Fix for simulation out-of-bounds error when
	1406	// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
	1407	next_count_dqs <= count_dqs + 1;
	1408	end else begin
	1409	// otherwise, invert polarity of stage 2 capture and look for
	1410	// an edge with opposite capture clock polarity
	1411	cal2_curr_sel <= 1'b1;
	1412	cal2_rd_data_sel[count_dqs] <= 1'b1;
	1413	cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_POS;
	1414	cal2_rd_data_rise_last_pos <= rdd_rise_q1;
	1415	cal2_rd_data_fall_last_pos <= rdd_fall_q1;
	1416	cal2_rd_data_last_valid_pos <= 1'b1;
	1417	end
	1418	end
	1419
	1420	// Give time to switch from positive-edge to negative-edge second
	1421	// stage capture (need time for data to filter though pipe stages)
	1422	CAL2_FIND_EDGE_IDEL_WAIT_POS:
	1423	if (!idel_set_wait)
	1424	cal2_state <= CAL2_FIND_EDGE_NEG;
	1425
	1426	// Look for an edge - check "negative-edge" stage 2 capture
	1427	CAL2_FIND_EDGE_NEG:
	1428	if (cal2_detect_edge) begin
	1429	cal2_curr_sel <= 1'b0;
	1430	cal2_state <= CAL2_DONE;
	1431	// set all DQS groups to be the same for simulation
	1432	if (SIM_ONLY != 0)
	1433	cal2_rd_data_sel <= {DQS_WIDTH{1'b0}};
	1434	else
	1435	cal2_rd_data_sel[count_dqs] <= 1'b0;
	1436	if ((count_dqs == DQS_WIDTH-1) \|\| (SIM_ONLY != 0))
	1437	calib_done_tmp[1] <= 1'b1;
	1438	else
	1439	// MIG 2.1: Fix for simulation out-of-bounds error when
	1440	// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
	1441	next_count_dqs <= count_dqs + 1;
	1442	end else if (cal2_idel_tap_limit_hit) begin
	1443	// otherwise, if we've run out of taps, then immediately
	1444	// backoff by half # of taps used - that's our best estimate
	1445	// for optimal calibration point. Doesn't matter whether which
	1446	// polarity we're using for capture (we don't know which one is
	1447	// best to use)
	1448	cal2_idel_dec_cnt <= {1'b0, cal2_idel_tap_limit[5:1]};
	1449	cal2_state <= CAL2_DEC_IDEL;
	1450	if ((count_dqs == DQS_WIDTH-1) \|\| (SIM_ONLY != 0))
	1451	calib_done_tmp[1] <= 1'b1;
	1452	else
	1453	// MIG 2.1: Fix for simulation out-of-bounds error when
	1454	// SIM_ONLY=0, and DQS_WIDTH=(power of 2) (needed for VHDL)
	1455	next_count_dqs <= count_dqs + 1;
	1456	end else begin
	1457	// otherwise, increment IDELAY, and start looking for edge again
	1458	cal2_curr_sel <= 1'b0;
	1459	cal2_rd_data_sel[count_dqs] <= 1'b0;
	1460	cal2_state <= CAL2_FIND_EDGE_IDEL_WAIT_NEG;
	1461	cal2_rd_data_rise_last_neg <= rdd_rise_q1;
	1462	cal2_rd_data_fall_last_neg <= rdd_fall_q1;
	1463	cal2_rd_data_last_valid_neg <= 1'b1;
	1464	cal2_dlyce_dqs <= 1'b1;
	1465	cal2_dlyinc_dqs <= 1'b1;
	1466	end
	1467
	1468	CAL2_FIND_EDGE_IDEL_WAIT_NEG:
	1469	if (!idel_set_wait)
	1470	cal2_state <= CAL2_FIND_EDGE_POS;
	1471
	1472	// if no edge found, then decrement by half # of taps used
	1473	CAL2_DEC_IDEL: begin
	1474	if (cal2_idel_dec_cnt == 6'b000000)
	1475	cal2_state <= CAL2_DONE;
	1476	else begin
	1477	cal2_idel_dec_cnt <= cal2_idel_dec_cnt - 1;
	1478	cal2_dlyce_dqs <= 1'b1;
	1479	cal2_dlyinc_dqs <= 1'b0;
	1480	end
	1481	end
	1482
	1483	// delay state to allow count_dqs and ISERDES data to point to next
	1484	// DQ bit (DQS group) before going to INIT
	1485	CAL2_DONE:
	1486	if (!idel_set_wait) begin
	1487	count_dqs <= next_count_dqs;
	1488	if (calib_done_tmp[1]) begin
	1489	calib_done[1] <= 1'b1;
	1490	cal2_state <= CAL2_IDLE;
	1491	end else begin
	1492	// request auto-refresh after every DQS group calibrated to
	1493	// avoid tRAS violation
	1494	cal2_ref_req <= 1'b1;
	1495	if (calib_ref_done)
	1496	cal2_state <= CAL2_INIT;
	1497	end
	1498	end
	1499	endcase
	1500	end
	1501
	1502	//***************************************************************************
	1503	// Stage 3 calibration: Read Enable
	1504	// Description:
	1505	// read enable calibration determines the "round-trip" time (in # of CLK0
	1506	// cycles) between when a read command is issued by the controller, and
	1507	// when the corresponding read data is synchronized by into the CLK0 domain
	1508	// this is a long delay chain to delay read enable signal from controller/
	1509	// initialization logic (i.e. this is used for both initialization and
	1510	// during normal controller operation). Stage 3 calibration logic decides
	1511	// which delayed version is appropriate to use (which is affected by the
	1512	// round trip delay of DQ/DQS) as a "valid" signal to tell rest of logic
	1513	// when the captured data output from ISERDES is valid.
	1514	//***************************************************************************
	1515
	1516	//*****************************************************************
	1517	// Delay chains: Use shift registers
	1518	// Two sets of delay chains are used:
	1519	// 1. One to delay RDEN from PHY_INIT module for calibration
	1520	// purposes (delay required for RDEN for calibration is different
	1521	// than during normal operation)
	1522	// 2. One per DQS group to delay RDEN from controller for normal
	1523	// operation - the value to delay for each DQS group can be different
	1524	// as is determined during calibration
	1525	//*****************************************************************
	1526
	1527	//*****************************************************************
	1528	// First delay chain, use only for calibration
	1529	// input = asserted on rising edge of RDEN from PHY_INIT module
	1530	//*****************************************************************
	1531
	1532	always @(posedge clk) begin
	1533	ctrl_rden_r <= ctrl_rden;
	1534	phy_init_rden_r <= phy_init_rden;
	1535	phy_init_rden_r1 <= phy_init_rden_r;
	1536	calib_rden_edge_r <= phy_init_rden_r & ~phy_init_rden_r1;
	1537	end
	1538
	1539	// Calibration shift register used for both Stage 3 and Stage 4 cal
	1540	// (not strictly necessary for stage 4, but use as an additional check
	1541	// to make sure we're checking for correct data on the right clock cycle)
	1542	always @(posedge clkdiv)
	1543	if (!calib_done[2])
	1544	calib_rden_srl_a <= cal3_rden_srl_a;
	1545	else
	1546	calib_rden_srl_a <= cal4_rden_srl_a;
	1547
	1548	// Flops for targetting of multi-cycle path in UCF
	1549	genvar cal_rden_ff_i;
	1550	generate
	1551	for (cal_rden_ff_i = 0; cal_rden_ff_i < 5;
	1552	cal_rden_ff_i = cal_rden_ff_i+1) begin: gen_cal_rden_dly
	1553	FDRSE u_ff_cal_rden_dly
	1554	(
	1555	.Q (calib_rden_srl_a_r[cal_rden_ff_i]),
	1556	.C (clkdiv),
	1557	.CE (1'b1),
	1558	.D (calib_rden_srl_a[cal_rden_ff_i]),
	1559	.R (1'b0),
	1560	.S (1'b0)
	1561	) /* synthesis syn_preserve = 1 */
	1562	/* synthesis syn_replicate = 0 */;
	1563	end
	1564	endgenerate
	1565
	1566	SRLC32E u_calib_rden_srl
	1567	(
	1568	.Q (calib_rden_srl_out),
	1569	.Q31 (),
	1570	.A (calib_rden_srl_a_r),
	1571	.CE (1'b1),
	1572	.CLK (clk),
	1573	.D (calib_rden_edge_r)
	1574	);
	1575
	1576	FDRSE u_calib_rden_srl_out_r
	1577	(
	1578	.Q (calib_rden_srl_out_r),
	1579	.C (clk),
	1580	.CE (1'b1),
	1581	.D (calib_rden_srl_out),
	1582	.R (1'b0),
	1583	.S (1'b0)
	1584	) /* synthesis syn_preserve = 1 */;
	1585
	1586	// convert to CLKDIV domain. Two version are generated because we need
	1587	// to be able to tell exactly which fast (clk) clock cycle the read
	1588	// enable was asserted in. Only one of CALIB_DATA_VALID or
	1589	// CALIB_DATA_VALID_STGD will be asserted for any given shift value
	1590	always @(posedge clk)
	1591	calib_rden_srl_out_r1 <= calib_rden_srl_out_r;
	1592
	1593	always @(posedge clkdiv) begin
	1594	calib_rden_valid <= calib_rden_srl_out_r;
	1595	calib_rden_valid_stgd <= calib_rden_srl_out_r1;
	1596	end
	1597
	1598	//*****************************************************************
	1599	// Second set of delays chain, use for normal reads
	1600	// input = RDEN from controller
	1601	//*****************************************************************
	1602
	1603	// Flops for targetting of multi-cycle path in UCF
	1604	genvar rden_ff_i;
	1605	generate
	1606	for (rden_ff_i = 0; rden_ff_i < 5*DQS_WIDTH;
	1607	rden_ff_i = rden_ff_i+1) begin: gen_rden_dly
	1608	FDRSE u_ff_rden_dly
	1609	(
	1610	.Q (rden_dly_r[rden_ff_i]),
	1611	.C (clkdiv),
	1612	.CE (1'b1),
	1613	.D (rden_dly[rden_ff_i]),
	1614	.R (1'b0),
	1615	.S (1'b0)
	1616	) /* synthesis syn_preserve = 1 */
	1617	/* synthesis syn_replicate = 0 */;
	1618	end
	1619	endgenerate
	1620
	1621	// NOTE: Comment this section explaining purpose of SRL's
	1622	genvar rden_i;
	1623	generate
	1624	for (rden_i = 0; rden_i < DQS_WIDTH; rden_i = rden_i + 1) begin: gen_rden
	1625	SRLC32E u_rden_srl
	1626	(
	1627	.Q (rden_srl_out[rden_i]),
	1628	.Q31 (),
	1629	.A ({rden_dly_r[(rden_i*5)+4],
	1630	rden_dly_r[(rden_i*5)+3],
	1631	rden_dly_r[(rden_i*5)+2],
	1632	rden_dly_r[(rden_i*5)+1],
	1633	rden_dly_r[(rden_i*5)]}),
	1634	.CE (1'b1),
	1635	.CLK (clk),
	1636	.D (ctrl_rden_r)
	1637	);
	1638	FDRSE u_calib_rden_r
	1639	(
	1640	.Q (calib_rden[rden_i]),
	1641	.C (clk),
	1642	.CE (1'b1),
	1643	.D (rden_srl_out[rden_i]),
	1644	.R (1'b0),
	1645	.S (1'b0)
	1646	) /* synthesis syn_preserve = 1 */;
	1647	end
	1648	endgenerate
	1649
	1650	//*****************************************************************
	1651	// indicates that current received data is the correct pattern. Check both
	1652	// rising and falling data for first DQ in each DQS group. Note that
	1653	// we're checking using a pipelined version of read data, so need to take
	1654	// this inherent delay into account in determining final read valid delay
	1655	// Data is written to the memory in the following order (first -> last):
	1656	// 0x1, 0xE, 0xE, 0x1, 0x1, 0xE, 0x1, 0xE
	1657	// Looking at the two LSb bits, expect data in sequence (in binary):
	1658	// bit[0]: 1, 0, 0, 1, 0, 1, 0, 1
	1659	// bit[1]: 0, 1, 1, 0, 1, 0, 1, 0
	1660	// Check for the presence of the first 7 words, and compensate read valid
	1661	// delay accordingly. Don't check last falling edge data, it may be
	1662	// corrupted by the DQS tri-state glitch at end of read postamble
	1663	// (glitch protection not yet active until stage 4 cal)
	1664	//*****************************************************************
	1665
	1666	always @(posedge clkdiv) begin
	1667	rdd_rise_q1_r <= rdd_rise_q1;
	1668	rdd_fall_q1_r <= rdd_fall_q1;
	1669	rdd_rise_q2_r <= rdd_rise_q2;
	1670	rdd_fall_q2_r <= rdd_fall_q2;
	1671	rdd_rise_q1_r1 <= rdd_rise_q1_r;
	1672	rdd_fall_q1_r1 <= rdd_fall_q1_r;
	1673	// MIG 3.3: Added comparison for second bit in DQS group for stage 3 cal
	1674	rdd_rise_q1_bit1_r <= rdd_rise_q1_bit1;
	1675	rdd_fall_q1_bit1_r <= rdd_fall_q1_bit1;
	1676	rdd_rise_q2_bit1_r <= rdd_rise_q2_bit1;
	1677	rdd_fall_q2_bit1_r <= rdd_fall_q2_bit1;
	1678	rdd_rise_q1_bit1_r1 <= rdd_rise_q1_bit1_r;
	1679	rdd_fall_q1_bit1_r1 <= rdd_fall_q1_bit1_r;
	1680	end
	1681
	1682	always @(posedge clkdiv) begin
	1683	// For the following sequence from memory:
	1684	// rise[0], fall[0], rise[1], fall[1]
	1685	// if data is aligned out of fabric ISERDES:
	1686	// RDD_RISE_Q2 = rise[0]
	1687	// RDD_FALL_Q2 = fall[0]
	1688	// RDD_RISE_Q1 = rise[1]
	1689	// RDD_FALL_Q1 = fall[1]
	1690	cal3_data_match <= ((rdd_rise_q2_r == 1) &&
	1691	(rdd_fall_q2_r == 0) &&
	1692	(rdd_rise_q1_r == 0) &&
	1693	(rdd_fall_q1_r == 1) &&
	1694	(rdd_rise_q2 == 0) &&
	1695	(rdd_fall_q2 == 1) &&
	1696	(rdd_rise_q1 == 0) &&
	1697	(rdd_rise_q2_bit1_r == 0) &&
	1698	(rdd_fall_q2_bit1_r == 1) &&
	1699	(rdd_rise_q1_bit1_r == 1) &&
	1700	(rdd_fall_q1_bit1_r == 0) &&
	1701	(rdd_rise_q2_bit1 == 1) &&
	1702	(rdd_fall_q2_bit1 == 0) &&
	1703	(rdd_rise_q1_bit1 == 1));
	1704
	1705	// if data is staggered out of fabric ISERDES:
	1706	// RDD_RISE_Q1_R = rise[0]
	1707	// RDD_FALL_Q1_R = fall[0]
	1708	// RDD_RISE_Q2 = rise[1]
	1709	// RDD_FALL_Q2 = fall[1]
	1710	cal3_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
	1711	(rdd_fall_q1_r1 == 0) &&
	1712	(rdd_rise_q2_r == 0) &&
	1713	(rdd_fall_q2_r == 1) &&
	1714	(rdd_rise_q1_r == 0) &&
	1715	(rdd_fall_q1_r == 1) &&
	1716	(rdd_rise_q2 == 0) &&
	1717	(rdd_rise_q1_bit1_r1 == 0) &&
	1718	(rdd_fall_q1_bit1_r1 == 1) &&
	1719	(rdd_rise_q2_bit1_r == 1) &&
	1720	(rdd_fall_q2_bit1_r == 0) &&
	1721	(rdd_rise_q1_bit1_r == 1) &&
	1722	(rdd_fall_q1_bit1_r == 0) &&
	1723	(rdd_rise_q2_bit1 == 1));
	1724	end
	1725
	1726	assign cal3_rden_dly = cal3_rden_srl_a - CAL3_RDEN_SRL_DLY_DELTA;
	1727	assign cal3_data_valid = (calib_rden_valid \| calib_rden_valid_stgd);
	1728	assign cal3_match_found
	1729	= ((calib_rden_valid && cal3_data_match) \|\|
	1730	(calib_rden_valid_stgd && cal3_data_match_stgd));
	1731
	1732	// when calibrating, check to see which clock cycle (after the read is
	1733	// issued) does the expected data pattern arrive. Record this result
	1734	// NOTE: Can add error checking here in case valid data not found on any
	1735	// of the available pipeline stages
	1736	always @(posedge clkdiv) begin
	1737	if (rstdiv) begin
	1738	cal3_rden_srl_a <= 5'bxxxxx;
	1739	cal3_state <= CAL3_IDLE;
	1740	calib_done[2] <= 1'b0;
	1741	calib_err_2[0] <= 1'b0;
	1742	count_rden <= {DQS_WIDTH{1'b0}};
	1743	rden_dly <= {5*DQS_WIDTH{1'b0}};
	1744	end else begin
	1745
	1746	case (cal3_state)
	1747	CAL3_IDLE: begin
	1748	count_rden <= {DQS_WIDTH{1'b0}};
	1749	if (calib_start[2]) begin
	1750	calib_done[2] <= 1'b0;
	1751	cal3_state <= CAL3_INIT;
	1752	end
	1753	end
	1754
	1755	CAL3_INIT: begin
	1756	cal3_rden_srl_a <= RDEN_BASE_DELAY;
	1757	// let SRL pipe clear after loading initial shift value
	1758	cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
	1759	end
	1760
	1761	CAL3_DETECT:
	1762	if (cal3_data_valid)
	1763	// if match found at the correct clock cycle
	1764	if (cal3_match_found) begin
	1765
	1766	// For simulation, load SRL addresses for all DQS with same value
	1767	if (SIM_ONLY != 0) begin
	1768	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_rden_dly
	1769	rden_dly[(i*5)] <= cal3_rden_dly[0];
	1770	rden_dly[(i*5)+1] <= cal3_rden_dly[1];
	1771	rden_dly[(i*5)+2] <= cal3_rden_dly[2];
	1772	rden_dly[(i*5)+3] <= cal3_rden_dly[3];
	1773	rden_dly[(i*5)+4] <= cal3_rden_dly[4];
	1774	end
	1775	end else begin
	1776	rden_dly[(count_rden*5)] <= cal3_rden_dly[0];
	1777	rden_dly[(count_rden*5)+1] <= cal3_rden_dly[1];
	1778	rden_dly[(count_rden*5)+2] <= cal3_rden_dly[2];
	1779	rden_dly[(count_rden*5)+3] <= cal3_rden_dly[3];
	1780	rden_dly[(count_rden*5)+4] <= cal3_rden_dly[4];
	1781	end
	1782
	1783	// Use for stage 4 calibration
	1784	calib_rden_dly[(count_rden*5)] <= cal3_rden_srl_a[0];
	1785	calib_rden_dly[(count_rden*5)+1] <= cal3_rden_srl_a[1];
	1786	calib_rden_dly[(count_rden*5)+2] <= cal3_rden_srl_a[2];
	1787	calib_rden_dly[(count_rden*5)+3] <= cal3_rden_srl_a[3];
	1788	calib_rden_dly[(count_rden*5)+4] <= cal3_rden_srl_a[4];
	1789	cal3_state <= CAL3_DONE;
	1790	end else begin
	1791	// If we run out of stages to shift, without finding correct
	1792	// result, the stop and assert error
	1793	if (cal3_rden_srl_a == 5'b11111) begin
	1794	calib_err_2[0] <= 1'b1;
	1795	cal3_state <= CAL3_IDLE;
	1796	end else begin
	1797	// otherwise, increase the shift value and try again
	1798	cal3_rden_srl_a <= cal3_rden_srl_a + 1;
	1799	cal3_state <= CAL3_RDEN_PIPE_CLR_WAIT;
	1800	end
	1801	end
	1802
	1803	// give additional time for RDEN_R pipe to clear from effects of
	1804	// previous pipeline or IDELAY tap change
	1805	CAL3_RDEN_PIPE_CLR_WAIT:
	1806	if (calib_rden_pipe_cnt == 5'b00000)
	1807	cal3_state <= CAL3_DETECT;
	1808
	1809	CAL3_DONE: begin
	1810	if ((count_rden == DQS_WIDTH-1) \|\| (SIM_ONLY != 0)) begin
	1811	calib_done[2] <= 1'b1;
	1812	cal3_state <= CAL3_IDLE;
	1813	end else begin
	1814	count_rden <= count_rden + 1;
	1815	cal3_state <= CAL3_INIT;
	1816	end
	1817	end
	1818	endcase
	1819	end
	1820	end
	1821
	1822	//*****************************************************************
	1823	// Last part of stage 3 calibration - compensate for differences
	1824	// in delay between different DQS groups. Assume that in the worst
	1825	// case, DQS groups can only differ by one clock cycle. Data for
	1826	// certain DQS groups must be delayed by one clock cycle.
	1827	// NOTE: May need to increase allowable variation to greater than
	1828	// one clock cycle in certain customer designs.
	1829	// Algorithm is:
	1830	// 1. Record shift delay value for DQS[0]
	1831	// 2. Compare each DQS[x] delay value to that of DQS[0]:
	1832	// - If different, than record this fact (RDEN_MUX)
	1833	// - If greater than DQS[0], set RDEN_INC. Assume greater by
	1834	// one clock cycle only - this is a key assumption, assume no
	1835	// more than a one clock cycle variation.
	1836	// - If less than DQS[0], set RDEN_DEC
	1837	// 3. After calibration is complete, set control for DQS group
	1838	// delay (CALIB_RDEN_SEL):
	1839	// - If RDEN_DEC = 1, then assume that DQS[0] is the lowest
	1840	// delay (and at least one other DQS group has a higher
	1841	// delay).
	1842	// - If RDEN_INC = 1, then assume that DQS[0] is the highest
	1843	// delay (and that all other DQS groups have the same or
	1844	// lower delay).
	1845	// - If both RDEN_INC and RDEN_DEC = 1, then flag error
	1846	// (variation is too high for this algorithm to handle)
	1847	//*****************************************************************
	1848
	1849	always @(posedge clkdiv) begin
	1850	if (rstdiv) begin
	1851	calib_err_2[1] <= 1'b0;
	1852	calib_rden_sel <= {DQS_WIDTH{1'bx}};
	1853	rden_dec <= 1'b0;
	1854	rden_dly_0 <= 5'bxxxxx;
	1855	rden_inc <= 1'b0;
	1856	rden_mux <= {DQS_WIDTH{1'b0}};
	1857	end else begin
	1858	// if a match if found, then store the value of rden_dly
	1859	if (!calib_done[2]) begin
	1860	if ((cal3_state == CAL3_DETECT) && cal3_match_found) begin
	1861	// store the value for DQS[0] as a reference
	1862	if (count_rden == 0) begin
	1863	// for simulation, RDEN calibration only happens for DQS[0]
	1864	// set RDEN_MUX for all DQS groups to be the same as DQS[0]
	1865	if (SIM_ONLY != 0)
	1866	rden_mux <= {DQS_WIDTH{1'b0}};
	1867	else begin
	1868	// otherwise, load values for DQS[0]
	1869	rden_dly_0 <= cal3_rden_srl_a;
	1870	rden_mux[0] <= 1'b0;
	1871	end
	1872	end else if (SIM_ONLY == 0) begin
	1873	// for all other DQS groups, compare RDEN_DLY delay value with
	1874	// that of DQS[0]
	1875	if (rden_dly_0 != cal3_rden_srl_a) begin
	1876	// record that current DQS group has a different delay
	1877	// than DQS[0] (the "reference" DQS group)
	1878	rden_mux[count_rden] <= 1'b1;
	1879	if (rden_dly_0 > cal3_rden_srl_a)
	1880	rden_inc <= 1'b1;
	1881	else if (rden_dly_0 < cal3_rden_srl_a)
	1882	rden_dec <= 1'b1;
	1883	// otherwise, if current DQS group has same delay as DQS[0],
	1884	// then rden_mux[count_rden] remains at 0 (since rden_mux
	1885	// array contents initialized to 0)
	1886	end
	1887	end
	1888	end
	1889	end else begin
	1890	// Otherwise - if we're done w/ stage 2 calibration:
	1891	// set final value for RDEN data delay
	1892	// flag error if there's more than one cycle variation from DQS[0]
	1893	calib_err_2[1] <= (rden_inc && rden_dec);
	1894	if (rden_inc)
	1895	// if DQS[0] delay represents max delay
	1896	calib_rden_sel <= ~rden_mux;
	1897	else
	1898	// if DQS[0] delay represents min delay (or all the delays are
	1899	// the same between DQS groups)
	1900	calib_rden_sel <= rden_mux;
	1901	end
	1902	end
	1903	end
	1904
	1905	// flag error for stage 3 if appropriate
	1906	always @(posedge clkdiv)
	1907	calib_err[2] <= calib_err_2[0] \| calib_err_2[1];
	1908
	1909	//***************************************************************************
	1910	// Stage 4 calibration: DQS gate
	1911	//***************************************************************************
	1912
	1913	//*****************************************************************
	1914	// indicates that current received data is the correct pattern. Same as
	1915	// for READ VALID calibration, except that the expected data sequence is
	1916	// different since DQS gate is asserted after the 6th word.
	1917	// Data sequence:
	1918	// Arrives from memory (at FPGA input) (R, F): 1 0 0 1 1 0 0 1
	1919	// After gating the sequence looks like: 1 0 0 1 1 0 1 0 (7th word =
	1920	// 5th word, 8th word = 6th word)
	1921	// What is the gate timing is off? Need to make sure we can distinquish
	1922	// between the results of correct vs. incorrect gate timing. We also use
	1923	// the "read_valid" signal from stage 3 calibration to help us determine
	1924	// when to check for a valid sequence for stage 4 calibration (i.e. use
	1925	// CAL4_DATA_VALID in addition to CAL4_DATA_MATCH/CAL4_DATA_MATCH_STGD)
	1926	// Note that since the gate signal from the CLK0 domain is synchronized
	1927	// to the falling edge of DQS, that the effect of the gate will only be
	1928	// seen starting with a rising edge data (although it is possible
	1929	// the GATE IDDR output could go metastable and cause a unexpected result
	1930	// on the first rising and falling edges after the gate is enabled).
	1931	// Also note that the actual DQS glitch can come more than 0.5*tCK after
	1932	// the last falling edge of DQS and the constraint for this path is can
	1933	// be > 0.5*tCK; however, this means when calibrating, the output of the
	1934	// GATE IDDR may miss the setup time requirement of the rising edge flop
	1935	// and only meet it for the falling edge flop. Therefore the rising
	1936	// edge data immediately following the assertion of the gate can either
	1937	// be a 1 or 0 (can rely on either)
	1938	// As the timing on the gate is varied, we expect to see (sequence of
	1939	// captured read data shown below):
	1940	// - 1 0 0 1 1 0 0 1 (gate is really early, starts and ends before
	1941	// read burst even starts)
	1942	// - x 0 0 1 1 0 0 1 (gate pulse starts before the burst, and ends
	1943	// - x y 0 1 1 0 0 1 sometime during the burst; x,y = 0, or 1, but
	1944	// - x y x 1 1 0 0 1 all bits that show an x are the same value,
	1945	// - x y x y 1 0 0 1 and y are the same value)
	1946	// - x y x y x 0 0 1
	1947	// - x y x y x y 0 1 (gate starts just before start of burst)
	1948	// - 1 0 x 0 x 0 x 0 (gate starts after 1st falling word. The "x"
	1949	// represents possiblity that gate may not disable
	1950	// clock for 2nd rising word in time)
	1951	// - 1 0 0 1 x 1 x 1 (gate starts after 2nd falling word)
	1952	// - 1 0 0 1 1 0 x 0 (gate starts after 3rd falling word - GOOD!!)
	1953	// - 1 0 0 1 1 0 0 1 (gate starts after burst is already done)
	1954	//*****************************************************************
	1955
	1956	assign cal4_data_valid = calib_rden_valid \| calib_rden_valid_stgd;
	1957	assign cal4_data_good = (calib_rden_valid &
	1958	cal4_data_match) \|
	1959	(calib_rden_valid_stgd &
	1960	cal4_data_match_stgd);
	1961
	1962	always @(posedge clkdiv) begin
	1963	// if data is aligned out of fabric ISERDES:
	1964	cal4_data_match <= ((rdd_rise_q2_r == 1) &&
	1965	(rdd_fall_q2_r == 0) &&
	1966	(rdd_rise_q1_r == 0) &&
	1967	(rdd_fall_q1_r == 1) &&
	1968	(rdd_rise_q2 == 1) &&
	1969	(rdd_fall_q2 == 0) &&
	1970	// MIG 2.1: Last rising edge data value not
	1971	// guaranteed to be certain value at higher
	1972	// frequencies
	1973	// (rdd_rise_q1 == 0) &&
	1974	(rdd_fall_q1 == 0));
	1975	// if data is staggered out of fabric ISERDES:
	1976	cal4_data_match_stgd <= ((rdd_rise_q1_r1 == 1) &&
	1977	(rdd_fall_q1_r1 == 0) &&
	1978	(rdd_rise_q2_r == 0) &&
	1979	(rdd_fall_q2_r == 1) &&
	1980	(rdd_rise_q1_r == 1) &&
	1981	(rdd_fall_q1_r == 0) &&
	1982	// MIG 2.1: Last rising edge data value not
	1983	// guaranteed to be certain value at higher
	1984	// frequencies
	1985	// (rdd_rise_q2 == 0) &&
	1986	(rdd_fall_q2 == 0));
	1987	end
	1988
	1989	//*****************************************************************
	1990	// DQS gate enable generation:
	1991	// This signal gets synchronized to DQS domain, and drives IDDR
	1992	// register that in turn asserts/deasserts CE to all 4 or 8 DQ
	1993	// IDDR's in that DQS group.
	1994	// 1. During normal (post-cal) operation, this is only for 2 clock
	1995	// cycles following the end of a burst. Check for falling edge
	1996	// of RDEN. But must also make sure NOT assert for a read-idle-
	1997	// read (two non-consecutive reads, separated by exactly one
	1998	// idle cycle) - in this case, don't assert the gate because:
	1999	// (1) we don't have enough time to deassert the gate before the
	2000	// first rising edge of DQS for second burst (b/c of fact
	2001	// that DQS gate is generated in the fabric only off rising
	2002	// edge of CLK0 - if we somehow had an ODDR in fabric, we
	2003	// could pull this off, (2) assumption is that the DQS glitch
	2004	// will not rise enough to cause a glitch because the
	2005	// post-amble of the first burst is followed immediately by
	2006	// the pre-amble of the next burst
	2007	// 2. During stage 4 calibration, assert for 3 clock cycles
	2008	// (assert gate enable one clock cycle early), since we gate out
	2009	// the last two words (in addition to the crap on the DQ bus after
	2010	// the DQS read postamble).
	2011	// NOTE: PHY_INIT_RDEN and CTRL_RDEN have slightly different timing w/r
	2012	// to when they are asserted w/r to the start of the read burst
	2013	// (PHY_INIT_RDEN is one cycle earlier than CTRL_RDEN).
	2014	//*****************************************************************
	2015
	2016	// register for timing purposes for fast clock path - currently only
	2017	// calib_done_r[2] used
	2018	always @(posedge clk)
	2019	calib_done_r <= calib_done;
	2020
	2021	always @(*) begin
	2022	calib_ctrl_rden = ctrl_rden;
	2023	calib_init_rden = calib_done_r[2] & phy_init_rden;
	2024	end
	2025
	2026	assign calib_ctrl_rden_negedge = ~calib_ctrl_rden & calib_ctrl_rden_r;
	2027	// check for read-idle-read before asserting DQS pulse at end of read
	2028	assign calib_ctrl_gate_pulse = calib_ctrl_rden_negedge_r &
	2029	~calib_ctrl_rden;
	2030	always @(posedge clk) begin
	2031	calib_ctrl_rden_r <= calib_ctrl_rden;
	2032	calib_ctrl_rden_negedge_r <= calib_ctrl_rden_negedge;
	2033	calib_ctrl_gate_pulse_r <= calib_ctrl_gate_pulse;
	2034	end
	2035
	2036	assign calib_init_gate_pulse = ~calib_init_rden & calib_init_rden_r;
	2037	always @(posedge clk) begin
	2038	calib_init_rden_r <= calib_init_rden;
	2039	calib_init_gate_pulse_r <= calib_init_gate_pulse;
	2040	calib_init_gate_pulse_r1 <= calib_init_gate_pulse_r;
	2041	end
	2042
	2043	// Gate is asserted: (1) during cal, for 3 cycles, starting 1 cycle
	2044	// after falling edge of CTRL_RDEN, (2) during normal ops, for 2
	2045	// cycles, starting 2 cycles after falling edge of CTRL_RDEN
	2046	assign gate_srl_in = ~((calib_ctrl_gate_pulse \|
	2047	calib_ctrl_gate_pulse_r) \|
	2048	(calib_init_gate_pulse \|
	2049	calib_init_gate_pulse_r \|
	2050	calib_init_gate_pulse_r1));
	2051
	2052	//*****************************************************************
	2053	// generate DQS enable signal for each DQS group
	2054	// There are differences between DQS gate signal for calibration vs. during
	2055	// normal operation:
	2056	// * calibration gates the second to last clock cycle of the burst,
	2057	// rather than after the last word (e.g. for a 8-word, 4-cycle burst,
	2058	// cycle 4 is gated for calibration; during normal operation, cycle
	2059	// 5 (i.e. cycle after the last word) is gated)
	2060	// enable for DQS is deasserted for two clock cycles, except when
	2061	// we have the preamble for the next read immediately following
	2062	// the postamble of the current read - assume DQS does not glitch
	2063	// during this time, that it stays low. Also if we did have to gate
	2064	// the DQS for this case, then we don't have enough time to deassert
	2065	// the gate in time for the first rising edge of DQS for the second
	2066	// read
	2067	//*****************************************************************
	2068
	2069	// Flops for targetting of multi-cycle path in UCF
	2070	genvar gate_ff_i;
	2071	generate
	2072	for (gate_ff_i = 0; gate_ff_i < 5*DQS_WIDTH;
	2073	gate_ff_i = gate_ff_i+1) begin: gen_gate_dly
	2074	FDRSE u_ff_gate_dly
	2075	(
	2076	.Q (gate_dly_r[gate_ff_i]),
	2077	.C (clkdiv),
	2078	.CE (1'b1),
	2079	.D (gate_dly[gate_ff_i]),
	2080	.R (1'b0),
	2081	.S (1'b0)
	2082	) /* synthesis syn_preserve = 1 */
	2083	/* synthesis syn_replicate = 0 */;
	2084	end
	2085	endgenerate
	2086
	2087	genvar gate_i;
	2088	generate
	2089	for (gate_i = 0; gate_i < DQS_WIDTH; gate_i = gate_i + 1) begin: gen_gate
	2090	SRLC32E u_gate_srl
	2091	(
	2092	.Q (gate_srl_out[gate_i]),
	2093	.Q31 (),
	2094	.A ({gate_dly_r[(gate_i*5)+4],
	2095	gate_dly_r[(gate_i*5)+3],
	2096	gate_dly_r[(gate_i*5)+2],
	2097	gate_dly_r[(gate_i*5)+1],
	2098	gate_dly_r[(gate_i*5)]}),
	2099	.CE (1'b1),
	2100	.CLK (clk),
	2101	.D (gate_srl_in)
	2102	);
	2103
	2104	// For GATE_BASE_DELAY > 0, have one extra cycle to register outputs
	2105	// from controller before generating DQS gate pulse. In PAR, the
	2106	// location of the controller logic can be far from the DQS gate
	2107	// logic (DQS gate logic located near the DQS I/O's), contributing
	2108	// to large net delays. Registering the controller outputs for
	2109	// CL >= 4 (above 200MHz) adds a stage of pipelining to reduce net
	2110	// delays
	2111	if (GATE_BASE_DELAY > 0) begin: gen_gate_base_dly_gt3
	2112	// add flop between SRL32 and EN_DQS flop (which is located near the
	2113	// DDR2 IOB's)
	2114	FDRSE u_gate_srl_ff
	2115	(
	2116	.Q (gate_srl_out_r[gate_i]),
	2117	.C (clk),
	2118	.CE (1'b1),
	2119	.D (gate_srl_out[gate_i]),
	2120	.R (1'b0),
	2121	.S (1'b0)
	2122	) /* synthesis syn_preserve = 1 */;
	2123	end else begin: gen_gate_base_dly_le3
	2124	assign gate_srl_out_r[gate_i] = gate_srl_out[gate_i];
	2125	end
	2126
	2127	FDRSE u_en_dqs_ff
	2128	(
	2129	.Q (en_dqs[gate_i]),
	2130	.C (clk),
	2131	.CE (1'b1),
	2132	.D (gate_srl_out_r[gate_i]),
	2133	.R (1'b0),
	2134	.S (1'b0)
	2135	) /* synthesis syn_preserve = 1 */
	2136	/* synthesis syn_replicate = 0 */;
	2137	end
	2138	endgenerate
	2139
	2140	//*****************************************************************
	2141	// Find valid window: keep track of how long we've been in the same data
	2142	// window. If it's been long enough, then declare that we've found a stable
	2143	// valid window - in particular, that we're past any region of instability
	2144	// associated with the edge of the window. Use only when finding left edge
	2145	//*****************************************************************
	2146
	2147	always @(posedge clkdiv)
	2148	// reset before we start to look for window
	2149	if (cal4_state == CAL4_INIT) begin
	2150	cal4_window_cnt <= 4'b0000;
	2151	cal4_stable_window <= 1'b0;
	2152	end else if ((cal4_state == CAL4_FIND_EDGE) && cal4_seek_left) begin
	2153	// if we're looking for left edge, and incrementing IDELAY, count
	2154	// consecutive taps over which we're in the window
	2155	if (cal4_data_valid) begin
	2156	if (cal4_data_good)
	2157	cal4_window_cnt <= cal4_window_cnt + 1;
	2158	else
	2159	cal4_window_cnt <= 4'b0000;
	2160	end
	2161
	2162	if (cal4_window_cnt == MIN_WIN_SIZE-1)
	2163	cal4_stable_window <= 1'b1;
	2164	end
	2165
	2166	//*****************************************************************
	2167	// keep track of edge tap counts found, and whether we've
	2168	// incremented to the maximum number of taps allowed
	2169	//*****************************************************************
	2170
	2171	always @(posedge clkdiv)
	2172	if ((cal4_state == CAL4_INIT) \|\| cal4_dlyrst_gate) begin
	2173	cal4_idel_max_tap <= 1'b0;
	2174	cal4_idel_bit_tap <= 1'b0;
	2175	cal4_idel_tap_cnt <= 6'b000000;
	2176	end else if (cal4_dlyce_gate) begin
	2177	if (cal4_dlyinc_gate) begin
	2178	cal4_idel_tap_cnt <= cal4_idel_tap_cnt + 1;
	2179	cal4_idel_bit_tap <= (cal4_idel_tap_cnt == CAL4_IDEL_BIT_VAL-2);
	2180	cal4_idel_max_tap <= (cal4_idel_tap_cnt == 6'b111110);
	2181	end else begin
	2182	cal4_idel_tap_cnt <= cal4_idel_tap_cnt - 1;
	2183	cal4_idel_bit_tap <= 1'b0;
	2184	cal4_idel_max_tap <= 1'b0;
	2185	end
	2186	end
	2187
	2188	always @(posedge clkdiv)
	2189	if ((cal4_state != CAL4_RDEN_PIPE_CLR_WAIT) &&
	2190	(cal3_state != CAL3_RDEN_PIPE_CLR_WAIT))
	2191	calib_rden_pipe_cnt <= CALIB_RDEN_PIPE_LEN-1;
	2192	else
	2193	calib_rden_pipe_cnt <= calib_rden_pipe_cnt - 1;
	2194
	2195	//*****************************************************************
	2196	// Stage 4 cal state machine
	2197	//*****************************************************************
	2198
	2199	always @(posedge clkdiv)
	2200	if (rstdiv) begin
	2201	calib_done[3] <= 1'b0;
	2202	calib_done_tmp[3] <= 1'b0;
	2203	calib_err[3] <= 1'b0;
	2204	count_gate <= 'b0;
	2205	gate_dly <= 'b0;
	2206	next_count_gate <= 'b0;
	2207	cal4_idel_adj_cnt <= 6'bxxxxxx;
	2208	cal4_dlyce_gate <= 1'b0;
	2209	cal4_dlyinc_gate <= 1'b0;
	2210	cal4_dlyrst_gate <= 1'b0; // reset handled elsewhere in code
	2211	cal4_gate_srl_a <= 5'bxxxxx;
	2212	cal4_rden_srl_a <= 5'bxxxxx;
	2213	cal4_ref_req <= 1'b0;
	2214	cal4_seek_left <= 1'bx;
	2215	cal4_state <= CAL4_IDLE;
	2216	end else begin
	2217	cal4_ref_req <= 1'b0;
	2218	cal4_dlyce_gate <= 1'b0;
	2219	cal4_dlyinc_gate <= 1'b0;
	2220	cal4_dlyrst_gate <= 1'b0;
	2221
	2222	case (cal4_state)
	2223	CAL4_IDLE: begin
	2224	count_gate <= 'b0;
	2225	next_count_gate <= 'b0;
	2226	if (calib_start[3]) begin
	2227	gate_dly <= 'b0;
	2228	calib_done[3] <= 1'b0;
	2229	cal4_state <= CAL4_INIT;
	2230	end
	2231	end
	2232
	2233	CAL4_INIT: begin
	2234	// load: (1) initial value of gate delay SRL, (2) appropriate
	2235	// value of RDEN SRL (so that we get correct "data valid" timing)
	2236	cal4_gate_srl_a <= GATE_BASE_INIT;
	2237	cal4_rden_srl_a <= {calib_rden_dly[(count_gate*5)+4],
	2238	calib_rden_dly[(count_gate*5)+3],
	2239	calib_rden_dly[(count_gate*5)+2],
	2240	calib_rden_dly[(count_gate*5)+1],
	2241	calib_rden_dly[(count_gate*5)]};
	2242	// let SRL pipe clear after loading initial shift value
	2243	cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
	2244	end
	2245
	2246	// sort of an initial state - start checking to see whether we're
	2247	// already in the window or not
	2248	CAL4_FIND_WINDOW:
	2249	// decide right away if we start in the proper window - this
	2250	// determines if we are then looking for the left (trailing) or
	2251	// right (leading) edge of the data valid window
	2252	if (cal4_data_valid) begin
	2253	// if we find a match - then we're already in window, now look
	2254	// for left edge. Otherwise, look for right edge of window
	2255	cal4_seek_left <= cal4_data_good;
	2256	cal4_state <= CAL4_FIND_EDGE;
	2257	end
	2258
	2259	CAL4_FIND_EDGE:
	2260	// don't do anything until the exact clock cycle when to check that
	2261	// readback data is valid or not
	2262	if (cal4_data_valid) begin
	2263	// we're currently in the window, look for left edge of window
	2264	if (cal4_seek_left) begin
	2265	// make sure we've passed the right edge before trying to detect
	2266	// the left edge (i.e. avoid any edge "instability") - else, we
	2267	// may detect an "false" edge too soon. By design, if we start in
	2268	// the data valid window, always expect at least
	2269	// MIN(BIT_TIME_TAPS,32) (-/+ jitter, see below) taps of valid
	2270	// window before we hit the left edge (this is because when stage
	2271	// 4 calibration first begins (i.e., gate_dly = 00, and IDELAY =
	2272	// 00), we're guaranteed to NOT be in the window, and we always
	2273	// start searching for MIN(BIT_TIME_TAPS,32) for the right edge
	2274	// of window. If we don't find it, increment gate_dly, and if we
	2275	// now start in the window, we have at least approximately
	2276	// CLK_PERIOD-MIN(BIT_TIME_TAPS,32) = MIN(BIT_TIME_TAPS,32) taps.
	2277	// It's approximately because jitter, noise, etc. can bring this
	2278	// value down slightly. Because of this (although VERY UNLIKELY),
	2279	// we have to protect against not decrementing IDELAY below 0
	2280	// during adjustment phase).
	2281	if (cal4_stable_window && !cal4_data_good) begin
	2282	// found left edge of window, dec by MIN(BIT_TIME_TAPS,32)
	2283	cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
	2284	cal4_idel_adj_inc <= 1'b0;
	2285	cal4_state <= CAL4_ADJ_IDEL;
	2286	end else begin
	2287	// Otherwise, keep looking for left edge:
	2288	if (cal4_idel_max_tap) begin
	2289	// ran out of taps looking for left edge (max=63) - happens
	2290	// for low frequency case, decrement by 32
	2291	cal4_idel_adj_cnt <= 6'b100000;
	2292	cal4_idel_adj_inc <= 1'b0;
	2293	cal4_state <= CAL4_ADJ_IDEL;
	2294	end else begin
	2295	cal4_dlyce_gate <= 1'b1;
	2296	cal4_dlyinc_gate <= 1'b1;
	2297	cal4_state <= CAL4_IDEL_WAIT;
	2298	end
	2299	end
	2300	end else begin
	2301	// looking for right edge of window:
	2302	// look for the first match - this means we've found the right
	2303	// (leading) edge of the data valid window, increment by
	2304	// MIN(BIT_TIME_TAPS,32)
	2305	if (cal4_data_good) begin
	2306	cal4_idel_adj_cnt <= CAL4_IDEL_BIT_VAL;
	2307	cal4_idel_adj_inc <= 1'b1;
	2308	cal4_state <= CAL4_ADJ_IDEL;
	2309	end else begin
	2310	// Otherwise, keep looking:
	2311	// only look for MIN(BIT_TIME_TAPS,32) taps for right edge,
	2312	// if we haven't found it, then inc gate delay, try again
	2313	if (cal4_idel_bit_tap) begin
	2314	// if we're already maxed out on gate delay, then error out
	2315	// (simulation only - calib_err isn't currently connected)
	2316	if (cal4_gate_srl_a == 5'b11111) begin
	2317	calib_err[3] <= 1'b1;
	2318	cal4_state <= CAL4_IDLE;
	2319	end else begin
	2320	// otherwise, increment gate delay count, and start
	2321	// over again
	2322	cal4_gate_srl_a <= cal4_gate_srl_a + 1;
	2323	cal4_dlyrst_gate <= 1'b1;
	2324	cal4_state <= CAL4_RDEN_PIPE_CLR_WAIT;
	2325	end
	2326	end else begin
	2327	// keep looking for right edge
	2328	cal4_dlyce_gate <= 1'b1;
	2329	cal4_dlyinc_gate <= 1'b1;
	2330	cal4_state <= CAL4_IDEL_WAIT;
	2331	end
	2332	end
	2333	end
	2334	end
	2335
	2336	// wait for GATE IDELAY to settle, after reset or increment
	2337	CAL4_IDEL_WAIT: begin
	2338	// For simulation, load SRL addresses for all DQS with same value
	2339	if (SIM_ONLY != 0) begin
	2340	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly
	2341	gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
	2342	gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
	2343	gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
	2344	gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
	2345	gate_dly[(i*5)] <= cal4_gate_srl_a[0];
	2346	end
	2347	end else begin
	2348	gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
	2349	gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
	2350	gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
	2351	gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
	2352	gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
	2353	end
	2354	// check to see if we've found edge of window
	2355	if (!idel_set_wait)
	2356	cal4_state <= CAL4_FIND_EDGE;
	2357	end
	2358
	2359	// give additional time for RDEN_R pipe to clear from effects of
	2360	// previous pipeline (and IDELAY reset)
	2361	CAL4_RDEN_PIPE_CLR_WAIT: begin
	2362	// MIG 2.2: Bug fix - make sure to update GATE_DLY count, since
	2363	// possible for FIND_EDGE->RDEN_PIPE_CLR_WAIT->FIND_WINDOW
	2364	// transition (i.e. need to make sure the gate count updated in
	2365	// FIND_EDGE gets reflected in GATE_DLY by the time we reach
	2366	// state FIND_WINDOW) - previously GATE_DLY only being updated
	2367	// during state CAL4_IDEL_WAIT
	2368	if (SIM_ONLY != 0) begin
	2369	for (i = 0; i < DQS_WIDTH; i = i + 1) begin: loop_sim_gate_dly_pipe
	2370	gate_dly[(i*5)+4] <= cal4_gate_srl_a[4];
	2371	gate_dly[(i*5)+3] <= cal4_gate_srl_a[3];
	2372	gate_dly[(i*5)+2] <= cal4_gate_srl_a[2];
	2373	gate_dly[(i*5)+1] <= cal4_gate_srl_a[1];
	2374	gate_dly[(i*5)] <= cal4_gate_srl_a[0];
	2375	end
	2376	end else begin
	2377	gate_dly[(count_gate*5)+4] <= cal4_gate_srl_a[4];
	2378	gate_dly[(count_gate*5)+3] <= cal4_gate_srl_a[3];
	2379	gate_dly[(count_gate*5)+2] <= cal4_gate_srl_a[2];
	2380	gate_dly[(count_gate*5)+1] <= cal4_gate_srl_a[1];
	2381	gate_dly[(count_gate*5)] <= cal4_gate_srl_a[0];
	2382	end
	2383	// look for new window
	2384	if (calib_rden_pipe_cnt == 5'b00000)
	2385	cal4_state <= CAL4_FIND_WINDOW;
	2386	end
	2387
	2388	// increment/decrement DQS/DQ IDELAY for final adjustment
	2389	CAL4_ADJ_IDEL:
	2390	// add underflow protection for corner case when left edge found
	2391	// using fewer than MIN(BIT_TIME_TAPS,32) taps
	2392	if ((cal4_idel_adj_cnt == 6'b000000) \|\|
	2393	(cal4_dlyce_gate && !cal4_dlyinc_gate &&
	2394	(cal4_idel_tap_cnt == 6'b000001))) begin
	2395	cal4_state <= CAL4_DONE;
	2396	// stop when all gates calibrated, or gate[0] cal'ed (for sim)
	2397	if ((count_gate == DQS_WIDTH-1) \|\| (SIM_ONLY != 0))
	2398	calib_done_tmp[3] <= 1'b1;
	2399	else
	2400	// need for VHDL simulation to prevent out-of-index error
	2401	next_count_gate <= count_gate + 1;
	2402	end else begin
	2403	cal4_idel_adj_cnt <= cal4_idel_adj_cnt - 1;
	2404	cal4_dlyce_gate <= 1'b1;
	2405	// whether inc or dec depends on whether left or right edge found
	2406	cal4_dlyinc_gate <= cal4_idel_adj_inc;
	2407	end
	2408
	2409	// wait for IDELAY output to settle after decrement. Check current
	2410	// COUNT_GATE value and decide if we're done
	2411	CAL4_DONE:
	2412	if (!idel_set_wait) begin
	2413	count_gate <= next_count_gate;
	2414	if (calib_done_tmp[3]) begin
	2415	calib_done[3] <= 1'b1;
	2416	cal4_state <= CAL4_IDLE;
	2417	end else begin
	2418	// request auto-refresh after every DQS group calibrated to
	2419	// avoid tRAS violation
	2420	cal4_ref_req <= 1'b1;
	2421	if (calib_ref_done)
	2422	cal4_state <= CAL4_INIT;
	2423	end
	2424	end
	2425	endcase
	2426	end
	2427
	2428	endmodule

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: XOpenSparcT1/trunk/Xilinx/ddr2_phy_calib.v @ 10

Download in other formats: