[6] | 1 | // ========== Copyright Header Begin ========================================== |
---|
| 2 | // |
---|
| 3 | // OpenSPARC T1 Processor File: sparc_exu_div.v |
---|
| 4 | // Copyright (c) 2006 Sun Microsystems, Inc. All Rights Reserved. |
---|
| 5 | // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES. |
---|
| 6 | // |
---|
| 7 | // The above named program is free software; you can redistribute it and/or |
---|
| 8 | // modify it under the terms of the GNU General Public |
---|
| 9 | // License version 2 as published by the Free Software Foundation. |
---|
| 10 | // |
---|
| 11 | // The above named program is distributed in the hope that it will be |
---|
| 12 | // useful, but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
| 14 | // General Public License for more details. |
---|
| 15 | // |
---|
| 16 | // You should have received a copy of the GNU General Public |
---|
| 17 | // License along with this work; if not, write to the Free Software |
---|
| 18 | // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
---|
| 19 | // |
---|
| 20 | // ========== Copyright Header End ============================================ |
---|
| 21 | //////////////////////////////////////////////////////////////////////// |
---|
| 22 | /* |
---|
| 23 | // Module Name: sparc_exu_div |
---|
| 24 | */ |
---|
| 25 | module sparc_exu_div (/*AUTOARG*/ |
---|
| 26 | // Outputs |
---|
| 27 | so, div_ecl_xin_msb_l, div_ecl_x_msb, div_ecl_d_msb, |
---|
| 28 | div_ecl_cout64, div_ecl_cout32, div_ecl_gencc_in_msb_l, |
---|
| 29 | div_ecl_gencc_in_31, div_ecl_upper32_equal, div_ecl_low32_nonzero, |
---|
| 30 | div_ecl_dividend_msb, div_byp_muldivout_g, div_byp_yreg_e, |
---|
| 31 | div_ecl_yreg_0_l, exu_mul_rs1_data, exu_mul_rs2_data, |
---|
| 32 | div_ecl_adder_out_31, div_ecl_detect_zero_low, |
---|
| 33 | div_ecl_detect_zero_high, div_ecl_d_62, |
---|
| 34 | // Inputs |
---|
| 35 | ecl_div_yreg_wen_w, ecl_div_yreg_wen_l, ecl_div_yreg_wen_g, |
---|
| 36 | ecl_div_yreg_shift_g, ecl_div_yreg_data_31_g, ecl_div_thr_e, |
---|
| 37 | byp_div_yreg_data_w, rclk, se, si, ecl_div_keep_d, |
---|
| 38 | ecl_div_ld_inputs, ecl_div_sel_adder, ecl_div_last_cycle, |
---|
| 39 | ecl_div_almostlast_cycle, ecl_div_div64, ecl_div_sel_u32, |
---|
| 40 | ecl_div_sel_pos32, ecl_div_sel_neg32, ecl_div_sel_64b, |
---|
| 41 | ecl_div_upper32_zero, ecl_div_upper33_one, ecl_div_upper33_zero, |
---|
| 42 | mul_exu_data_g, ecl_div_sel_div, ecl_div_mul_wen, |
---|
| 43 | ecl_div_dividend_sign, ecl_div_subtract_l, ecl_div_cin, |
---|
| 44 | ecl_div_newq, ecl_div_xinmask, ecl_div_keepx, |
---|
| 45 | ecl_div_mul_get_new_data, ecl_div_mul_keep_data, |
---|
| 46 | ecl_div_mul_get_32bit_data, ecl_div_mul_sext_rs2_e, |
---|
| 47 | ecl_div_mul_sext_rs1_e, byp_div_rs1_data_e, byp_div_rs2_data_e, |
---|
| 48 | ecl_div_muls_rs1_31_e_l, ecl_div_muls, ecl_div_zero_rs2_e |
---|
| 49 | ) ; |
---|
| 50 | /*AUTOINPUT*/ |
---|
| 51 | // Beginning of automatic inputs (from unused autoinst inputs) |
---|
| 52 | input [31:0] byp_div_yreg_data_w; // To yreg of sparc_exu_div_yreg.v |
---|
| 53 | input [3:0] ecl_div_thr_e; // To yreg of sparc_exu_div_yreg.v |
---|
| 54 | input ecl_div_yreg_data_31_g; // To yreg of sparc_exu_div_yreg.v |
---|
| 55 | input [3:0] ecl_div_yreg_shift_g; // To yreg of sparc_exu_div_yreg.v |
---|
| 56 | input [3:0] ecl_div_yreg_wen_g; // To yreg of sparc_exu_div_yreg.v |
---|
| 57 | input [3:0] ecl_div_yreg_wen_l; // To yreg of sparc_exu_div_yreg.v |
---|
| 58 | input [3:0] ecl_div_yreg_wen_w; // To yreg of sparc_exu_div_yreg.v |
---|
| 59 | // End of automatics |
---|
| 60 | input rclk; |
---|
| 61 | input se; |
---|
| 62 | input si; |
---|
| 63 | input ecl_div_keep_d; // d should store (w/ overflow calcs) |
---|
| 64 | input ecl_div_ld_inputs;// load in d and x |
---|
| 65 | input ecl_div_sel_adder;// d should use adder output |
---|
| 66 | input ecl_div_last_cycle;// last cycle of computations |
---|
| 67 | input ecl_div_almostlast_cycle;// 2nd to last cycle of div |
---|
| 68 | input ecl_div_div64; |
---|
| 69 | input ecl_div_sel_u32; |
---|
| 70 | input ecl_div_sel_pos32; |
---|
| 71 | input ecl_div_sel_neg32; |
---|
| 72 | input ecl_div_sel_64b; |
---|
| 73 | input ecl_div_upper32_zero; |
---|
| 74 | input ecl_div_upper33_one; |
---|
| 75 | input ecl_div_upper33_zero; |
---|
| 76 | input [63:0] mul_exu_data_g; |
---|
| 77 | input ecl_div_sel_div; |
---|
| 78 | input ecl_div_mul_wen; |
---|
| 79 | input ecl_div_dividend_sign; |
---|
| 80 | input ecl_div_subtract_l; // add/subtract to adder |
---|
| 81 | input ecl_div_cin; |
---|
| 82 | input ecl_div_newq; // newest q bit |
---|
| 83 | input ecl_div_xinmask; |
---|
| 84 | input ecl_div_keepx; |
---|
| 85 | input ecl_div_mul_get_new_data; |
---|
| 86 | input ecl_div_mul_keep_data; |
---|
| 87 | input ecl_div_mul_get_32bit_data; |
---|
| 88 | input ecl_div_mul_sext_rs2_e; |
---|
| 89 | input ecl_div_mul_sext_rs1_e; |
---|
| 90 | input [63:0] byp_div_rs1_data_e; |
---|
| 91 | input [63:0] byp_div_rs2_data_e; |
---|
| 92 | input ecl_div_muls_rs1_31_e_l; |
---|
| 93 | input ecl_div_muls; |
---|
| 94 | input ecl_div_zero_rs2_e; |
---|
| 95 | |
---|
| 96 | output so; |
---|
| 97 | output div_ecl_xin_msb_l; |
---|
| 98 | output div_ecl_x_msb; |
---|
| 99 | output div_ecl_d_msb; |
---|
| 100 | output div_ecl_cout64; // cout from adder |
---|
| 101 | output div_ecl_cout32; // cout from adder |
---|
| 102 | output div_ecl_gencc_in_msb_l; |
---|
| 103 | output div_ecl_gencc_in_31; |
---|
| 104 | output div_ecl_upper32_equal; |
---|
| 105 | output div_ecl_low32_nonzero; |
---|
| 106 | output div_ecl_dividend_msb; |
---|
| 107 | output [63:0] div_byp_muldivout_g; |
---|
| 108 | output [31:0] div_byp_yreg_e; |
---|
| 109 | output [3:0] div_ecl_yreg_0_l; |
---|
| 110 | output [63:0] exu_mul_rs1_data; |
---|
| 111 | output [63:0] exu_mul_rs2_data; |
---|
| 112 | output div_ecl_adder_out_31; |
---|
| 113 | output div_ecl_detect_zero_low; |
---|
| 114 | output div_ecl_detect_zero_high; |
---|
| 115 | output div_ecl_d_62; |
---|
| 116 | |
---|
| 117 | /*AUTOWIRE*/ |
---|
| 118 | // Beginning of automatic wires (for undeclared instantiated-module outputs) |
---|
| 119 | wire [31:0] yreg_mdq_y_e; // From yreg of sparc_exu_div_yreg.v |
---|
| 120 | // End of automatics |
---|
| 121 | wire clk; |
---|
| 122 | wire [127:0] din; // sign extended dividend |
---|
| 123 | wire [127:0] d; // current dividend/quotient |
---|
| 124 | wire [63:0] adder_out; // output of adder |
---|
| 125 | wire [127:0] dnext; // input to d flop |
---|
| 126 | wire [127:0] adder_dnext; // combination of adder out and quotient |
---|
| 127 | wire [63:0] x; // divisor |
---|
| 128 | wire [63:0] xin; // sign extended (for 32bit) divisor |
---|
| 129 | wire [63:0] xnext; // input to divisor flop |
---|
| 130 | wire [63:0] adderin1; // first input to adder |
---|
| 131 | wire [63:0] adderin2; // 2nd input to adder |
---|
| 132 | |
---|
| 133 | wire [63:0] curr_q; // current quotient |
---|
| 134 | wire [63:0] out64; // 64 bit result |
---|
| 135 | wire [63:0] pos32; // positive 32 bit result w/ ovfl |
---|
| 136 | wire [63:0] neg32; // negative 32 bit result w/ ovfl |
---|
| 137 | wire [63:0] u32; // unsigned 32 bit result w/ ovfl |
---|
| 138 | wire [63:0] gencc_in; |
---|
| 139 | wire [63:0] mul_result; |
---|
| 140 | wire [63:0] mul_result_next; |
---|
| 141 | wire [127:0] input_data_e; |
---|
| 142 | wire [63:0] dividend; |
---|
| 143 | wire [63:0] divisor; |
---|
| 144 | wire [127:0] next_mul_data; |
---|
| 145 | wire [127:0] mul_data_out; |
---|
| 146 | wire [127:0] mul32_input_data_e; |
---|
| 147 | wire subtract; |
---|
| 148 | wire [63:0] spr_out; |
---|
| 149 | wire [63:0] z_in; |
---|
| 150 | |
---|
| 151 | assign clk = rclk; |
---|
| 152 | /////////////////////////////////////// |
---|
| 153 | // Input masking for 32 bit operations |
---|
| 154 | /////////////////////////////////////// |
---|
| 155 | dp_buffer #(128) buf_input_data(.dout(input_data_e[127:0]), |
---|
| 156 | .in({byp_div_rs2_data_e[63:0], byp_div_rs1_data_e[63:0]})); |
---|
| 157 | // Mux in yreg into upper 32 bits on 32 bit divides |
---|
| 158 | dp_mux2es #(32) dividendmux(.dout(dividend[63:32]), |
---|
| 159 | .in0(yreg_mdq_y_e[31:0]), |
---|
| 160 | .in1(input_data_e[63:32]), |
---|
| 161 | .sel(ecl_div_div64)); |
---|
| 162 | assign dividend[31:0] = input_data_e[31:0]; |
---|
| 163 | assign divisor[63:0] = input_data_e[127:64]; |
---|
| 164 | |
---|
| 165 | |
---|
| 166 | ///////////////////// |
---|
| 167 | // Output assignment |
---|
| 168 | ///////////////////// |
---|
| 169 | dp_mux2es #(64) output_mux(.dout(div_byp_muldivout_g[63:0]), .in1(d[63:0]), |
---|
| 170 | .in0(mul_result[63:0]), |
---|
| 171 | .sel(ecl_div_sel_div)); |
---|
| 172 | /////////////////////////// |
---|
| 173 | // Generate Condition Codes and divide by zero exception and overflow |
---|
| 174 | /////////////////////////// |
---|
| 175 | dp_mux2es #(64) gencc_mux(.dout(gencc_in[63:0]), |
---|
| 176 | .in0(mul_result[63:0]), |
---|
| 177 | .in1(curr_q[63:0]), |
---|
| 178 | .sel(ecl_div_sel_div)); |
---|
| 179 | sparc_exu_div_32eql u32eql(.in(gencc_in[63:32]), .equal(div_ecl_upper32_equal)); |
---|
| 180 | sparc_exu_aluor32 low32or(// Outputs |
---|
| 181 | .out (div_ecl_low32_nonzero), |
---|
| 182 | // Inputs |
---|
| 183 | .in (gencc_in[31:0])); |
---|
| 184 | assign div_ecl_gencc_in_msb_l = ~gencc_in[63]; |
---|
| 185 | assign div_ecl_gencc_in_31 = gencc_in[31]; |
---|
| 186 | |
---|
| 187 | |
---|
| 188 | // Division overflow calculations |
---|
| 189 | assign curr_q = d[127:64]; |
---|
| 190 | assign u32 = {32'b0, (curr_q[31:0] | {32{~ecl_div_upper32_zero}})}; |
---|
| 191 | assign pos32 = {33'b0, (curr_q[30:0] | {31{~ecl_div_upper33_zero}})}; |
---|
| 192 | assign neg32 = {{33{1'b1}}, (curr_q[30:0] & {31{ecl_div_upper33_one}})}; |
---|
| 193 | |
---|
| 194 | mux4ds #(64) result_mux(.dout(out64[63:0]), .in0(curr_q[63:0]), .in1(u32[63:0]), |
---|
| 195 | .in2(pos32[63:0]), .in3(neg32[63:0]), .sel0(ecl_div_sel_64b), |
---|
| 196 | .sel1(ecl_div_sel_u32), .sel2(ecl_div_sel_pos32), |
---|
| 197 | .sel3(ecl_div_sel_neg32)); |
---|
| 198 | |
---|
| 199 | ////////////////////////// |
---|
| 200 | // Logic for D (dividend) |
---|
| 201 | ////////////////////////// |
---|
| 202 | |
---|
| 203 | // If signed div sign extend dividend to 127 bits |
---|
| 204 | assign div_ecl_dividend_msb = dividend[63]; |
---|
| 205 | assign din[62:0] = dividend[62:0]; |
---|
| 206 | dp_mux2es #(32) din_mux(.dout(din[94:63]), |
---|
| 207 | .in0({{31{ecl_div_dividend_sign}}, dividend[63]}), |
---|
| 208 | .in1({~ecl_div_muls_rs1_31_e_l, dividend[31:1]}), |
---|
| 209 | .sel(ecl_div_muls)); |
---|
| 210 | assign din[127:95] = {33{ecl_div_dividend_sign}}; |
---|
| 211 | // assign din = {{64{ecl_div_dividend_sign}}, dividend[63:0]}; |
---|
| 212 | |
---|
| 213 | |
---|
| 214 | // Select input to FF for d |
---|
| 215 | mux3ds #(128) d_mux(.dout(dnext[127:0]), .in0({d[127:64], out64[63:0]}), |
---|
| 216 | .in1(adder_dnext[127:0]), .in2(din[127:0]), |
---|
| 217 | .sel0(ecl_div_keep_d), |
---|
| 218 | .sel1(ecl_div_sel_adder), |
---|
| 219 | .sel2(ecl_div_ld_inputs)); |
---|
| 220 | assign div_ecl_d_62 = d[62]; |
---|
| 221 | |
---|
| 222 | // FF for d |
---|
| 223 | dff_s #(128) d_dff(.din(dnext[127:0]), .clk(clk), .q(d[127:0]), .se(se), .si(), .so()); |
---|
| 224 | |
---|
| 225 | //////////////////////////// |
---|
| 226 | // Logic for X (divisor) |
---|
| 227 | //////////////////////////// |
---|
| 228 | // if signed div and 32 bits sign extend to upper 32 bits |
---|
| 229 | dp_mux2es #(32) xin_mux(.dout(xin[63:32]), .in1(divisor[63:32]), |
---|
| 230 | .in0({32{ecl_div_xinmask}}), |
---|
| 231 | .sel(ecl_div_div64)); |
---|
| 232 | assign xin[31:0] = divisor[31:0] & {32{~ecl_div_zero_rs2_e}}; |
---|
| 233 | //assign xin[31:0] = divisor[31:0]; |
---|
| 234 | |
---|
| 235 | // Pick between x and divisor and 1 (use divisor on first cycle, 1 last cycle) |
---|
| 236 | mux3ds #(64) x_mux(.dout(xnext[63:0]), .in0(x[63:0]), .in1(xin[63:0]), .in2({64'b0}), |
---|
| 237 | .sel0(ecl_div_keepx), |
---|
| 238 | .sel1(ecl_div_ld_inputs), |
---|
| 239 | .sel2(ecl_div_almostlast_cycle)); |
---|
| 240 | |
---|
| 241 | // FF for x |
---|
| 242 | dff_s #(64) x_dff(.din(xnext[63:0]), .clk(clk), .q(x[63:0]), .se(se), .si(), .so()); |
---|
| 243 | |
---|
| 244 | |
---|
| 245 | /////////////////////////// |
---|
| 246 | // Logic for inputs to adder |
---|
| 247 | ////////////////////////// |
---|
| 248 | assign div_ecl_xin_msb_l = ~xin[63]; |
---|
| 249 | assign div_ecl_x_msb = x[63]; |
---|
| 250 | assign div_ecl_d_msb = d[127]; |
---|
| 251 | dp_mux2es #(64) in1_mux(.dout(adderin1[63:0]), .in0(d[126:63]), |
---|
| 252 | .in1({d[62:0], ecl_div_newq}), .sel(ecl_div_last_cycle)); |
---|
| 253 | |
---|
| 254 | assign subtract = ~ecl_div_subtract_l; |
---|
| 255 | assign adderin2[63:0] = x[63:0] ^ {64{subtract}}; |
---|
| 256 | |
---|
| 257 | ////////////////////////// |
---|
| 258 | // Adder |
---|
| 259 | ///////////////////////// |
---|
| 260 | sparc_exu_aluadder64 add64(// Outputs |
---|
| 261 | .adder_out(adder_out[63:0]), |
---|
| 262 | .cout32 (div_ecl_cout32), |
---|
| 263 | .cout64 (div_ecl_cout64), |
---|
| 264 | // Inputs |
---|
| 265 | .rs1_data (adderin1[63:0]), |
---|
| 266 | .rs2_data (adderin2[63:0]), |
---|
| 267 | .cin (ecl_div_cin)); |
---|
| 268 | |
---|
| 269 | assign adder_dnext = {adder_out[63:0], d[62:0], ecl_div_newq}; |
---|
| 270 | assign div_ecl_adder_out_31 = adder_out[31]; |
---|
| 271 | |
---|
| 272 | // sum predict and zero detection |
---|
| 273 | sparc_exu_aluspr spr(.rs1_data(adderin1[63:0]), .rs2_data(adderin2[63:0]), .cin(ecl_div_cin), |
---|
| 274 | .spr_out(spr_out[63:0])); |
---|
| 275 | dp_mux2es #(64) zero_detect_mux(.dout(z_in[63:0]), |
---|
| 276 | .in0(spr_out[63:0]), |
---|
| 277 | .in1(xin[63:0]), |
---|
| 278 | .sel(ecl_div_ld_inputs)); |
---|
| 279 | //sparc_exu_aluzcmp64 regzcmp(.in(z_in[63:0]), .zero64(div_ecl_detect_zero)); |
---|
| 280 | assign div_ecl_detect_zero_low = ~(|z_in[31:0]); |
---|
| 281 | assign div_ecl_detect_zero_high = ~(|z_in[63:32]); |
---|
| 282 | |
---|
| 283 | |
---|
| 284 | // y register |
---|
| 285 | assign div_byp_yreg_e = yreg_mdq_y_e; |
---|
| 286 | sparc_exu_div_yreg yreg(.mul_div_yreg_data_g(mul_exu_data_g[63:32]), |
---|
| 287 | /*AUTOINST*/ |
---|
| 288 | // Outputs |
---|
| 289 | .yreg_mdq_y_e(yreg_mdq_y_e[31:0]), |
---|
| 290 | .div_ecl_yreg_0_l(div_ecl_yreg_0_l[3:0]), |
---|
| 291 | // Inputs |
---|
| 292 | .clk (clk), |
---|
| 293 | .se (se), |
---|
| 294 | .byp_div_yreg_data_w(byp_div_yreg_data_w[31:0]), |
---|
| 295 | .ecl_div_thr_e(ecl_div_thr_e[3:0]), |
---|
| 296 | .ecl_div_yreg_wen_w(ecl_div_yreg_wen_w[3:0]), |
---|
| 297 | .ecl_div_yreg_wen_g(ecl_div_yreg_wen_g[3:0]), |
---|
| 298 | .ecl_div_yreg_wen_l(ecl_div_yreg_wen_l[3:0]), |
---|
| 299 | .ecl_div_yreg_data_31_g(ecl_div_yreg_data_31_g), |
---|
| 300 | .ecl_div_yreg_shift_g(ecl_div_yreg_shift_g[3:0])); |
---|
| 301 | |
---|
| 302 | |
---|
| 303 | ////////////////////////////////// |
---|
| 304 | // MULTIPLIER inputs |
---|
| 305 | ////////////////////////////////// |
---|
| 306 | assign mul32_input_data_e[127:64] = {{32{ecl_div_mul_sext_rs2_e}}, input_data_e[95:64]}; |
---|
| 307 | assign mul32_input_data_e[63:0] = {{32{ecl_div_mul_sext_rs1_e}}, input_data_e[31:0]}; |
---|
| 308 | mux3ds #(128) mul_data_mux(.dout(next_mul_data[127:0]), |
---|
| 309 | .in0(input_data_e[127:0]), |
---|
| 310 | .in1(mul32_input_data_e[127:0]), |
---|
| 311 | .in2(mul_data_out[127:0]), |
---|
| 312 | .sel0(ecl_div_mul_get_new_data), |
---|
| 313 | .sel1(ecl_div_mul_get_32bit_data), |
---|
| 314 | .sel2(ecl_div_mul_keep_data)); |
---|
| 315 | dff_s #(128) mul_data_dff(.din(next_mul_data[127:0]), .clk(clk), .q(mul_data_out[127:0]), |
---|
| 316 | .se(se), .si(), .so()); |
---|
| 317 | assign exu_mul_rs1_data = mul_data_out[63:0]; |
---|
| 318 | assign exu_mul_rs2_data = mul_data_out[127:64]; |
---|
| 319 | |
---|
| 320 | /////////////////////////////////// |
---|
| 321 | // Store output from mul |
---|
| 322 | ////////////////////////////////// |
---|
| 323 | dp_mux2es #(64) mul_result_mux(.dout(mul_result_next[63:0]), .in0(mul_result[63:0]), |
---|
| 324 | .in1(mul_exu_data_g[63:0]), |
---|
| 325 | .sel(ecl_div_mul_wen)); |
---|
| 326 | dff_s #(64) mul_result_dff(.din(mul_result_next[63:0]), .clk(clk), .q(mul_result[63:0]), |
---|
| 327 | .se(se), .si(), .so()); |
---|
| 328 | |
---|
| 329 | |
---|
| 330 | endmodule // sparc_exu_div |
---|