1 | // ========== Copyright Header Begin ========================================== |
---|
2 | // |
---|
3 | // OpenSPARC T1 Processor File: sparc_exu_div.v |
---|
4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. |
---|
5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. |
---|
6 | // |
---|
7 | // The above named program is free software; you can redistribute it and/or |
---|
8 | // modify it under the terms of the GNU General Public |
---|
9 | // License version 2 as published by the Free Software Foundation. |
---|
10 | // |
---|
11 | // The above named program is distributed in the hope that it will be |
---|
12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
14 | // General Public License for more details. |
---|
15 | // |
---|
16 | // You should have received a copy of the GNU General Public |
---|
17 | // License along with this work; if not, write to the Free Software |
---|
18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
---|
19 | // |
---|
20 | // ========== Copyright Header End ============================================ |
---|
21 | //////////////////////////////////////////////////////////////////////// |
---|
22 | /* |
---|
23 | // Module Name: sparc_exu_div |
---|
24 | */ |
---|
25 | module sparc_exu_div (/*AUTOARG*/ |
---|
26 | // Outputs |
---|
27 | so, div_ecl_xin_msb_l, div_ecl_x_msb, div_ecl_d_msb, |
---|
28 | div_ecl_cout64, div_ecl_cout32, div_ecl_gencc_in_msb_l, |
---|
29 | div_ecl_gencc_in_31, div_ecl_upper32_equal, div_ecl_low32_nonzero, |
---|
30 | div_ecl_dividend_msb, div_byp_muldivout_g, div_byp_yreg_e, |
---|
31 | div_ecl_yreg_0_l, exu_mul_rs1_data, exu_mul_rs2_data, |
---|
32 | div_ecl_adder_out_31, div_ecl_detect_zero_low, |
---|
33 | div_ecl_detect_zero_high, div_ecl_d_62, |
---|
34 | // Inputs |
---|
35 | ecl_div_yreg_wen_w, ecl_div_yreg_wen_l, ecl_div_yreg_wen_g, |
---|
36 | ecl_div_yreg_shift_g, ecl_div_yreg_data_31_g, ecl_div_thr_e, |
---|
37 | byp_div_yreg_data_w, rclk, se, si, ecl_div_keep_d, |
---|
38 | ecl_div_ld_inputs, ecl_div_sel_adder, ecl_div_last_cycle, |
---|
39 | ecl_div_almostlast_cycle, ecl_div_div64, ecl_div_sel_u32, |
---|
40 | ecl_div_sel_pos32, ecl_div_sel_neg32, ecl_div_sel_64b, |
---|
41 | ecl_div_upper32_zero, ecl_div_upper33_one, ecl_div_upper33_zero, |
---|
42 | mul_exu_data_g, ecl_div_sel_div, ecl_div_mul_wen, |
---|
43 | ecl_div_dividend_sign, ecl_div_subtract_l, ecl_div_cin, |
---|
44 | ecl_div_newq, ecl_div_xinmask, ecl_div_keepx, |
---|
45 | ecl_div_mul_get_new_data, ecl_div_mul_keep_data, |
---|
46 | ecl_div_mul_get_32bit_data, ecl_div_mul_sext_rs2_e, |
---|
47 | ecl_div_mul_sext_rs1_e, byp_div_rs1_data_e, byp_div_rs2_data_e, |
---|
48 | ecl_div_muls_rs1_31_e_l, ecl_div_muls, ecl_div_zero_rs2_e |
---|
49 | ) ; |
---|
50 | /*AUTOINPUT*/ |
---|
51 | // Beginning of automatic inputs (from unused autoinst inputs) |
---|
52 | input [31:0] byp_div_yreg_data_w; // To yreg of sparc_exu_div_yreg.v |
---|
53 | input [3:0] ecl_div_thr_e; // To yreg of sparc_exu_div_yreg.v |
---|
54 | input ecl_div_yreg_data_31_g; // To yreg of sparc_exu_div_yreg.v |
---|
55 | input [3:0] ecl_div_yreg_shift_g; // To yreg of sparc_exu_div_yreg.v |
---|
56 | input [3:0] ecl_div_yreg_wen_g; // To yreg of sparc_exu_div_yreg.v |
---|
57 | input [3:0] ecl_div_yreg_wen_l; // To yreg of sparc_exu_div_yreg.v |
---|
58 | input [3:0] ecl_div_yreg_wen_w; // To yreg of sparc_exu_div_yreg.v |
---|
59 | // End of automatics |
---|
60 | input rclk; |
---|
61 | input se; |
---|
62 | input si; |
---|
63 | input ecl_div_keep_d; // d should store (w/ overflow calcs) |
---|
64 | input ecl_div_ld_inputs;// load in d and x |
---|
65 | input ecl_div_sel_adder;// d should use adder output |
---|
66 | input ecl_div_last_cycle;// last cycle of computations |
---|
67 | input ecl_div_almostlast_cycle;// 2nd to last cycle of div |
---|
68 | input ecl_div_div64; |
---|
69 | input ecl_div_sel_u32; |
---|
70 | input ecl_div_sel_pos32; |
---|
71 | input ecl_div_sel_neg32; |
---|
72 | input ecl_div_sel_64b; |
---|
73 | input ecl_div_upper32_zero; |
---|
74 | input ecl_div_upper33_one; |
---|
75 | input ecl_div_upper33_zero; |
---|
76 | input [63:0] mul_exu_data_g; |
---|
77 | input ecl_div_sel_div; |
---|
78 | input ecl_div_mul_wen; |
---|
79 | input ecl_div_dividend_sign; |
---|
80 | input ecl_div_subtract_l; // add/subtract to adder |
---|
81 | input ecl_div_cin; |
---|
82 | input ecl_div_newq; // newest q bit |
---|
83 | input ecl_div_xinmask; |
---|
84 | input ecl_div_keepx; |
---|
85 | input ecl_div_mul_get_new_data; |
---|
86 | input ecl_div_mul_keep_data; |
---|
87 | input ecl_div_mul_get_32bit_data; |
---|
88 | input ecl_div_mul_sext_rs2_e; |
---|
89 | input ecl_div_mul_sext_rs1_e; |
---|
90 | input [63:0] byp_div_rs1_data_e; |
---|
91 | input [63:0] byp_div_rs2_data_e; |
---|
92 | input ecl_div_muls_rs1_31_e_l; |
---|
93 | input ecl_div_muls; |
---|
94 | input ecl_div_zero_rs2_e; |
---|
95 | |
---|
96 | output so; |
---|
97 | output div_ecl_xin_msb_l; |
---|
98 | output div_ecl_x_msb; |
---|
99 | output div_ecl_d_msb; |
---|
100 | output div_ecl_cout64; // cout from adder |
---|
101 | output div_ecl_cout32; // cout from adder |
---|
102 | output div_ecl_gencc_in_msb_l; |
---|
103 | output div_ecl_gencc_in_31; |
---|
104 | output div_ecl_upper32_equal; |
---|
105 | output div_ecl_low32_nonzero; |
---|
106 | output div_ecl_dividend_msb; |
---|
107 | output [63:0] div_byp_muldivout_g; |
---|
108 | output [31:0] div_byp_yreg_e; |
---|
109 | output [3:0] div_ecl_yreg_0_l; |
---|
110 | output [63:0] exu_mul_rs1_data; |
---|
111 | output [63:0] exu_mul_rs2_data; |
---|
112 | output div_ecl_adder_out_31; |
---|
113 | output div_ecl_detect_zero_low; |
---|
114 | output div_ecl_detect_zero_high; |
---|
115 | output div_ecl_d_62; |
---|
116 | |
---|
117 | /*AUTOWIRE*/ |
---|
118 | // Beginning of automatic wires (for undeclared instantiated-module outputs) |
---|
119 | wire [31:0] yreg_mdq_y_e; // From yreg of sparc_exu_div_yreg.v |
---|
120 | // End of automatics |
---|
121 | wire clk; |
---|
122 | wire [127:0] din; // sign extended dividend |
---|
123 | wire [127:0] d; // current dividend/quotient |
---|
124 | wire [63:0] adder_out; // output of adder |
---|
125 | wire [127:0] dnext; // input to d flop |
---|
126 | wire [127:0] adder_dnext; // combination of adder out and quotient |
---|
127 | wire [63:0] x; // divisor |
---|
128 | wire [63:0] xin; // sign extended (for 32bit) divisor |
---|
129 | wire [63:0] xnext; // input to divisor flop |
---|
130 | wire [63:0] adderin1; // first input to adder |
---|
131 | wire [63:0] adderin2; // 2nd input to adder |
---|
132 | |
---|
133 | wire [63:0] curr_q; // current quotient |
---|
134 | wire [63:0] out64; // 64 bit result |
---|
135 | wire [63:0] pos32; // positive 32 bit result w/ ovfl |
---|
136 | wire [63:0] neg32; // negative 32 bit result w/ ovfl |
---|
137 | wire [63:0] u32; // unsigned 32 bit result w/ ovfl |
---|
138 | wire [63:0] gencc_in; |
---|
139 | wire [63:0] mul_result; |
---|
140 | wire [63:0] mul_result_next; |
---|
141 | wire [127:0] input_data_e; |
---|
142 | wire [63:0] dividend; |
---|
143 | wire [63:0] divisor; |
---|
144 | wire [127:0] next_mul_data; |
---|
145 | wire [127:0] mul_data_out; |
---|
146 | wire [127:0] mul32_input_data_e; |
---|
147 | wire subtract; |
---|
148 | wire [63:0] spr_out; |
---|
149 | wire [63:0] z_in; |
---|
150 | |
---|
151 | assign clk = rclk; |
---|
152 | /////////////////////////////////////// |
---|
153 | // Input masking for 32 bit operations |
---|
154 | /////////////////////////////////////// |
---|
155 | dp_buffer #(128) buf_input_data(.dout(input_data_e[127:0]), |
---|
156 | .in({byp_div_rs2_data_e[63:0], byp_div_rs1_data_e[63:0]})); |
---|
157 | // Mux in yreg into upper 32 bits on 32 bit divides |
---|
158 | dp_mux2es #(32) dividendmux(.dout(dividend[63:32]), |
---|
159 | .in0(yreg_mdq_y_e[31:0]), |
---|
160 | .in1(input_data_e[63:32]), |
---|
161 | .sel(ecl_div_div64)); |
---|
162 | assign dividend[31:0] = input_data_e[31:0]; |
---|
163 | assign divisor[63:0] = input_data_e[127:64]; |
---|
164 | |
---|
165 | |
---|
166 | ///////////////////// |
---|
167 | // Output assignment |
---|
168 | ///////////////////// |
---|
169 | dp_mux2es #(64) output_mux(.dout(div_byp_muldivout_g[63:0]), .in1(d[63:0]), |
---|
170 | .in0(mul_result[63:0]), |
---|
171 | .sel(ecl_div_sel_div)); |
---|
172 | /////////////////////////// |
---|
173 | // Generate Condition Codes and divide by zero exception and overflow |
---|
174 | /////////////////////////// |
---|
175 | dp_mux2es #(64) gencc_mux(.dout(gencc_in[63:0]), |
---|
176 | .in0(mul_result[63:0]), |
---|
177 | .in1(curr_q[63:0]), |
---|
178 | .sel(ecl_div_sel_div)); |
---|
179 | sparc_exu_div_32eql u32eql(.in(gencc_in[63:32]), .equal(div_ecl_upper32_equal)); |
---|
180 | sparc_exu_aluor32 low32or(// Outputs |
---|
181 | .out (div_ecl_low32_nonzero), |
---|
182 | // Inputs |
---|
183 | .in (gencc_in[31:0])); |
---|
184 | assign div_ecl_gencc_in_msb_l = ~gencc_in[63]; |
---|
185 | assign div_ecl_gencc_in_31 = gencc_in[31]; |
---|
186 | |
---|
187 | |
---|
188 | // Division overflow calculations |
---|
189 | assign curr_q = d[127:64]; |
---|
190 | assign u32 = {32'b0, (curr_q[31:0] | {32{~ecl_div_upper32_zero}})}; |
---|
191 | assign pos32 = {33'b0, (curr_q[30:0] | {31{~ecl_div_upper33_zero}})}; |
---|
192 | assign neg32 = {{33{1'b1}}, (curr_q[30:0] & {31{ecl_div_upper33_one}})}; |
---|
193 | |
---|
194 | mux4ds #(64) result_mux(.dout(out64[63:0]), .in0(curr_q[63:0]), .in1(u32[63:0]), |
---|
195 | .in2(pos32[63:0]), .in3(neg32[63:0]), .sel0(ecl_div_sel_64b), |
---|
196 | .sel1(ecl_div_sel_u32), .sel2(ecl_div_sel_pos32), |
---|
197 | .sel3(ecl_div_sel_neg32)); |
---|
198 | |
---|
199 | ////////////////////////// |
---|
200 | // Logic for D (dividend) |
---|
201 | ////////////////////////// |
---|
202 | |
---|
203 | // If signed div sign extend dividend to 127 bits |
---|
204 | assign div_ecl_dividend_msb = dividend[63]; |
---|
205 | assign din[62:0] = dividend[62:0]; |
---|
206 | dp_mux2es #(32) din_mux(.dout(din[94:63]), |
---|
207 | .in0({{31{ecl_div_dividend_sign}}, dividend[63]}), |
---|
208 | .in1({~ecl_div_muls_rs1_31_e_l, dividend[31:1]}), |
---|
209 | .sel(ecl_div_muls)); |
---|
210 | assign din[127:95] = {33{ecl_div_dividend_sign}}; |
---|
211 | // assign din = {{64{ecl_div_dividend_sign}}, dividend[63:0]}; |
---|
212 | |
---|
213 | |
---|
214 | // Select input to FF for d |
---|
215 | mux3ds #(128) d_mux(.dout(dnext[127:0]), .in0({d[127:64], out64[63:0]}), |
---|
216 | .in1(adder_dnext[127:0]), .in2(din[127:0]), |
---|
217 | .sel0(ecl_div_keep_d), |
---|
218 | .sel1(ecl_div_sel_adder), |
---|
219 | .sel2(ecl_div_ld_inputs)); |
---|
220 | assign div_ecl_d_62 = d[62]; |
---|
221 | |
---|
222 | // FF for d |
---|
223 | dff_s #(128) d_dff(.din(dnext[127:0]), .clk(clk), .q(d[127:0]), .se(se), .si(), .so()); |
---|
224 | |
---|
225 | //////////////////////////// |
---|
226 | // Logic for X (divisor) |
---|
227 | //////////////////////////// |
---|
228 | // if signed div and 32 bits sign extend to upper 32 bits |
---|
229 | dp_mux2es #(32) xin_mux(.dout(xin[63:32]), .in1(divisor[63:32]), |
---|
230 | .in0({32{ecl_div_xinmask}}), |
---|
231 | .sel(ecl_div_div64)); |
---|
232 | assign xin[31:0] = divisor[31:0] & {32{~ecl_div_zero_rs2_e}}; |
---|
233 | //assign xin[31:0] = divisor[31:0]; |
---|
234 | |
---|
235 | // Pick between x and divisor and 1 (use divisor on first cycle, 1 last cycle) |
---|
236 | mux3ds #(64) x_mux(.dout(xnext[63:0]), .in0(x[63:0]), .in1(xin[63:0]), .in2({64'b0}), |
---|
237 | .sel0(ecl_div_keepx), |
---|
238 | .sel1(ecl_div_ld_inputs), |
---|
239 | .sel2(ecl_div_almostlast_cycle)); |
---|
240 | |
---|
241 | // FF for x |
---|
242 | dff_s #(64) x_dff(.din(xnext[63:0]), .clk(clk), .q(x[63:0]), .se(se), .si(), .so()); |
---|
243 | |
---|
244 | |
---|
245 | /////////////////////////// |
---|
246 | // Logic for inputs to adder |
---|
247 | ////////////////////////// |
---|
248 | assign div_ecl_xin_msb_l = ~xin[63]; |
---|
249 | assign div_ecl_x_msb = x[63]; |
---|
250 | assign div_ecl_d_msb = d[127]; |
---|
251 | dp_mux2es #(64) in1_mux(.dout(adderin1[63:0]), .in0(d[126:63]), |
---|
252 | .in1({d[62:0], ecl_div_newq}), .sel(ecl_div_last_cycle)); |
---|
253 | |
---|
254 | assign subtract = ~ecl_div_subtract_l; |
---|
255 | assign adderin2[63:0] = x[63:0] ^ {64{subtract}}; |
---|
256 | |
---|
257 | ////////////////////////// |
---|
258 | // Adder |
---|
259 | ///////////////////////// |
---|
260 | sparc_exu_aluadder64 add64(// Outputs |
---|
261 | .adder_out(adder_out[63:0]), |
---|
262 | .cout32 (div_ecl_cout32), |
---|
263 | .cout64 (div_ecl_cout64), |
---|
264 | // Inputs |
---|
265 | .rs1_data (adderin1[63:0]), |
---|
266 | .rs2_data (adderin2[63:0]), |
---|
267 | .cin (ecl_div_cin)); |
---|
268 | |
---|
269 | assign adder_dnext = {adder_out[63:0], d[62:0], ecl_div_newq}; |
---|
270 | assign div_ecl_adder_out_31 = adder_out[31]; |
---|
271 | |
---|
272 | // sum predict and zero detection |
---|
273 | sparc_exu_aluspr spr(.rs1_data(adderin1[63:0]), .rs2_data(adderin2[63:0]), .cin(ecl_div_cin), |
---|
274 | .spr_out(spr_out[63:0])); |
---|
275 | dp_mux2es #(64) zero_detect_mux(.dout(z_in[63:0]), |
---|
276 | .in0(spr_out[63:0]), |
---|
277 | .in1(xin[63:0]), |
---|
278 | .sel(ecl_div_ld_inputs)); |
---|
279 | //sparc_exu_aluzcmp64 regzcmp(.in(z_in[63:0]), .zero64(div_ecl_detect_zero)); |
---|
280 | assign div_ecl_detect_zero_low = ~(|z_in[31:0]); |
---|
281 | assign div_ecl_detect_zero_high = ~(|z_in[63:32]); |
---|
282 | |
---|
283 | |
---|
284 | // y register |
---|
285 | assign div_byp_yreg_e = yreg_mdq_y_e; |
---|
286 | sparc_exu_div_yreg yreg(.mul_div_yreg_data_g(mul_exu_data_g[63:32]), |
---|
287 | /*AUTOINST*/ |
---|
288 | // Outputs |
---|
289 | .yreg_mdq_y_e(yreg_mdq_y_e[31:0]), |
---|
290 | .div_ecl_yreg_0_l(div_ecl_yreg_0_l[3:0]), |
---|
291 | // Inputs |
---|
292 | .clk (clk), |
---|
293 | .se (se), |
---|
294 | .byp_div_yreg_data_w(byp_div_yreg_data_w[31:0]), |
---|
295 | .ecl_div_thr_e(ecl_div_thr_e[3:0]), |
---|
296 | .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]), |
---|
297 | .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]), |
---|
298 | .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]), |
---|
299 | .ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g), |
---|
300 | .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0])); |
---|
301 | |
---|
302 | |
---|
303 | ////////////////////////////////// |
---|
304 | // MULTIPLIER inputs |
---|
305 | ////////////////////////////////// |
---|
306 | assign mul32_input_data_e[127:64] = {{32{ecl_div_mul_sext_rs2_e}}, input_data_e[95:64]}; |
---|
307 | assign mul32_input_data_e[63:0] = {{32{ecl_div_mul_sext_rs1_e}}, input_data_e[31:0]}; |
---|
308 | mux3ds #(128) mul_data_mux(.dout(next_mul_data[127:0]), |
---|
309 | .in0(input_data_e[127:0]), |
---|
310 | .in1(mul32_input_data_e[127:0]), |
---|
311 | .in2(mul_data_out[127:0]), |
---|
312 | .sel0(ecl_div_mul_get_new_data), |
---|
313 | .sel1(ecl_div_mul_get_32bit_data), |
---|
314 | .sel2(ecl_div_mul_keep_data)); |
---|
315 | dff_s #(128) mul_data_dff(.din(next_mul_data[127:0]), .clk(clk), .q(mul_data_out[127:0]), |
---|
316 | .se(se), .si(), .so()); |
---|
317 | assign exu_mul_rs1_data = mul_data_out[63:0]; |
---|
318 | assign exu_mul_rs2_data = mul_data_out[127:64]; |
---|
319 | |
---|
320 | /////////////////////////////////// |
---|
321 | // Store output from mul |
---|
322 | ////////////////////////////////// |
---|
323 | dp_mux2es #(64) mul_result_mux(.dout(mul_result_next[63:0]), .in0(mul_result[63:0]), |
---|
324 | .in1(mul_exu_data_g[63:0]), |
---|
325 | .sel(ecl_div_mul_wen)); |
---|
326 | dff_s #(64) mul_result_dff(.din(mul_result_next[63:0]), .clk(clk), .q(mul_result[63:0]), |
---|
327 | .se(se), .si(), .so()); |
---|
328 | |
---|
329 | |
---|
330 | endmodule // sparc_exu_div |
---|