1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 extern int ix86_force_align_arg_pointer;
1902 static const char ix86_force_align_arg_pointer_string[]
1903 = "force_align_arg_pointer";
1905 static rtx (*ix86_gen_leave) (void);
1906 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1909 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1910 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1912 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1913 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1914 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1916 /* Preferred alignment for stack boundary in bits. */
1917 unsigned int ix86_preferred_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits specified at
1921 static unsigned int ix86_user_incoming_stack_boundary;
1923 /* Default alignment for incoming stack boundary in bits. */
1924 static unsigned int ix86_default_incoming_stack_boundary;
1926 /* Alignment for incoming stack boundary in bits. */
1927 unsigned int ix86_incoming_stack_boundary;
1929 /* The abi used by target. */
1930 enum calling_abi ix86_abi;
1932 /* Values 1-5: see jump.c */
1933 int ix86_branch_cost;
1935 /* Calling abi specific va_list type nodes. */
1936 static GTY(()) tree sysv_va_list_type_node;
1937 static GTY(()) tree ms_va_list_type_node;
1939 /* Variables which are this size or smaller are put in the data/bss
1940 or ldata/lbss sections. */
1942 int ix86_section_threshold = 65536;
1944 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1945 char internal_label_prefix[16];
1946 int internal_label_prefix_len;
1948 /* Fence to use after loop using movnt. */
1951 /* Register class used for passing given 64bit part of the argument.
1952 These represent classes as documented by the PS ABI, with the exception
1953 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1954 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1956 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1957 whenever possible (upper half does contain padding). */
1958 enum x86_64_reg_class
1961 X86_64_INTEGER_CLASS,
1962 X86_64_INTEGERSI_CLASS,
1969 X86_64_COMPLEX_X87_CLASS,
1973 #define MAX_CLASSES 4
1975 /* Table of constants used by fldpi, fldln2, etc.... */
1976 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1977 static bool ext_80387_constants_init = 0;
1980 static struct machine_function * ix86_init_machine_status (void);
1981 static rtx ix86_function_value (const_tree, const_tree, bool);
1982 static bool ix86_function_value_regno_p (const unsigned int);
1983 static rtx ix86_static_chain (const_tree, bool);
1984 static int ix86_function_regparm (const_tree, const_tree);
1985 static void ix86_compute_frame_layout (struct ix86_frame *);
1986 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1988 static void ix86_add_new_builtins (int);
1989 static rtx ix86_expand_vec_perm_builtin (tree);
1990 static tree ix86_canonical_va_list_type (tree);
1991 static void predict_jump (int);
1993 enum ix86_function_specific_strings
1995 IX86_FUNCTION_SPECIFIC_ARCH,
1996 IX86_FUNCTION_SPECIFIC_TUNE,
1997 IX86_FUNCTION_SPECIFIC_FPMATH,
1998 IX86_FUNCTION_SPECIFIC_MAX
2001 static char *ix86_target_string (int, int, const char *, const char *,
2002 const char *, bool);
2003 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2004 static void ix86_function_specific_save (struct cl_target_option *);
2005 static void ix86_function_specific_restore (struct cl_target_option *);
2006 static void ix86_function_specific_print (FILE *, int,
2007 struct cl_target_option *);
2008 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2009 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2010 static bool ix86_can_inline_p (tree, tree);
2011 static void ix86_set_current_function (tree);
2012 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2014 static enum calling_abi ix86_function_abi (const_tree);
2017 #ifndef SUBTARGET32_DEFAULT_CPU
2018 #define SUBTARGET32_DEFAULT_CPU "i386"
2021 /* The svr4 ABI for the i386 says that records and unions are returned
2023 #ifndef DEFAULT_PCC_STRUCT_RETURN
2024 #define DEFAULT_PCC_STRUCT_RETURN 1
2027 /* Whether -mtune= or -march= were specified */
2028 static int ix86_tune_defaulted;
2029 static int ix86_arch_specified;
2031 /* A mask of ix86_isa_flags that includes bit X if X
2032 was set or cleared on the command line. */
2033 static int ix86_isa_flags_explicit;
2035 /* Define a set of ISAs which are available when a given ISA is
2036 enabled. MMX and SSE ISAs are handled separately. */
2038 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2039 #define OPTION_MASK_ISA_3DNOW_SET \
2040 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2042 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2043 #define OPTION_MASK_ISA_SSE2_SET \
2044 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2045 #define OPTION_MASK_ISA_SSE3_SET \
2046 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2047 #define OPTION_MASK_ISA_SSSE3_SET \
2048 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2049 #define OPTION_MASK_ISA_SSE4_1_SET \
2050 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2051 #define OPTION_MASK_ISA_SSE4_2_SET \
2052 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2053 #define OPTION_MASK_ISA_AVX_SET \
2054 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2055 #define OPTION_MASK_ISA_FMA_SET \
2056 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2058 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2060 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2062 #define OPTION_MASK_ISA_SSE4A_SET \
2063 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2064 #define OPTION_MASK_ISA_FMA4_SET \
2065 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2066 | OPTION_MASK_ISA_AVX_SET)
2067 #define OPTION_MASK_ISA_XOP_SET \
2068 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2069 #define OPTION_MASK_ISA_LWP_SET \
2072 /* AES and PCLMUL need SSE2 because they use xmm registers */
2073 #define OPTION_MASK_ISA_AES_SET \
2074 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2075 #define OPTION_MASK_ISA_PCLMUL_SET \
2076 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2078 #define OPTION_MASK_ISA_ABM_SET \
2079 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2081 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2082 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2083 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2084 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2085 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2087 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2088 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2089 #define OPTION_MASK_ISA_F16C_SET \
2090 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2092 /* Define a set of ISAs which aren't available when a given ISA is
2093 disabled. MMX and SSE ISAs are handled separately. */
2095 #define OPTION_MASK_ISA_MMX_UNSET \
2096 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2097 #define OPTION_MASK_ISA_3DNOW_UNSET \
2098 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2099 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2101 #define OPTION_MASK_ISA_SSE_UNSET \
2102 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2103 #define OPTION_MASK_ISA_SSE2_UNSET \
2104 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2105 #define OPTION_MASK_ISA_SSE3_UNSET \
2106 (OPTION_MASK_ISA_SSE3 \
2107 | OPTION_MASK_ISA_SSSE3_UNSET \
2108 | OPTION_MASK_ISA_SSE4A_UNSET )
2109 #define OPTION_MASK_ISA_SSSE3_UNSET \
2110 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2111 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2112 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2113 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2114 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2115 #define OPTION_MASK_ISA_AVX_UNSET \
2116 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2117 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2118 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2120 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2122 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2124 #define OPTION_MASK_ISA_SSE4A_UNSET \
2125 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2127 #define OPTION_MASK_ISA_FMA4_UNSET \
2128 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2129 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2130 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2132 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2133 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2134 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2135 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2136 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2137 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2138 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2139 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2141 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2142 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2143 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2145 /* Vectorization library interface and handlers. */
2146 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2148 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2151 /* Processor target table, indexed by processor number */
2154 const struct processor_costs *cost; /* Processor costs */
2155 const int align_loop; /* Default alignments. */
2156 const int align_loop_max_skip;
2157 const int align_jump;
2158 const int align_jump_max_skip;
2159 const int align_func;
2162 static const struct ptt processor_target_table[PROCESSOR_max] =
2164 {&i386_cost, 4, 3, 4, 3, 4},
2165 {&i486_cost, 16, 15, 16, 15, 16},
2166 {&pentium_cost, 16, 7, 16, 7, 16},
2167 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2168 {&geode_cost, 0, 0, 0, 0, 0},
2169 {&k6_cost, 32, 7, 32, 7, 32},
2170 {&athlon_cost, 16, 7, 16, 7, 16},
2171 {&pentium4_cost, 0, 0, 0, 0, 0},
2172 {&k8_cost, 16, 7, 16, 7, 16},
2173 {&nocona_cost, 0, 0, 0, 0, 0},
2174 {&core2_cost, 16, 10, 16, 10, 16},
2175 {&generic32_cost, 16, 7, 16, 7, 16},
2176 {&generic64_cost, 16, 10, 16, 10, 16},
2177 {&amdfam10_cost, 32, 24, 32, 7, 32},
2178 {&bdver1_cost, 32, 24, 32, 7, 32},
2179 {&atom_cost, 16, 7, 16, 7, 16}
2182 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2209 /* Return true if a red-zone is in use. */
2212 ix86_using_red_zone (void)
2214 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2217 /* Implement TARGET_HANDLE_OPTION. */
2220 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2227 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2240 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2245 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2347 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2348 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2352 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2353 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2358 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2359 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2363 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2364 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2370 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2371 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2375 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2376 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2383 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2384 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2388 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2389 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2396 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2397 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2401 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2402 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2409 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2410 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2414 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2415 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2422 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2423 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2427 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2428 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2435 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2436 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2440 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2441 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2448 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2449 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2453 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2454 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2461 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2462 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2466 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2467 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2474 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2475 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2479 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2480 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2487 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2488 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2492 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2493 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2500 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2501 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2505 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2506 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2513 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2518 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2519 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2526 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2531 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2532 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2539 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2544 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2545 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2552 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2557 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2558 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2567 /* Return a string that documents the current -m options. The caller is
2568 responsible for freeing the string. */
2571 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2572 const char *fpmath, bool add_nl_p)
2574 struct ix86_target_opts
2576 const char *option; /* option string */
2577 int mask; /* isa mask options */
2580 /* This table is ordered so that options like -msse4.2 that imply
2581 preceding options while match those first. */
2582 static struct ix86_target_opts isa_opts[] =
2584 { "-m64", OPTION_MASK_ISA_64BIT },
2585 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2586 { "-mfma", OPTION_MASK_ISA_FMA },
2587 { "-mxop", OPTION_MASK_ISA_XOP },
2588 { "-mlwp", OPTION_MASK_ISA_LWP },
2589 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2590 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2591 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2592 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2593 { "-msse3", OPTION_MASK_ISA_SSE3 },
2594 { "-msse2", OPTION_MASK_ISA_SSE2 },
2595 { "-msse", OPTION_MASK_ISA_SSE },
2596 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2597 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2598 { "-mmmx", OPTION_MASK_ISA_MMX },
2599 { "-mabm", OPTION_MASK_ISA_ABM },
2600 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2601 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2602 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2603 { "-maes", OPTION_MASK_ISA_AES },
2604 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2605 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2606 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2607 { "-mf16c", OPTION_MASK_ISA_F16C },
2611 static struct ix86_target_opts flag_opts[] =
2613 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2614 { "-m80387", MASK_80387 },
2615 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2616 { "-malign-double", MASK_ALIGN_DOUBLE },
2617 { "-mcld", MASK_CLD },
2618 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2619 { "-mieee-fp", MASK_IEEE_FP },
2620 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2621 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2622 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2623 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2624 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2625 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2626 { "-mno-red-zone", MASK_NO_RED_ZONE },
2627 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2628 { "-mrecip", MASK_RECIP },
2629 { "-mrtd", MASK_RTD },
2630 { "-msseregparm", MASK_SSEREGPARM },
2631 { "-mstack-arg-probe", MASK_STACK_PROBE },
2632 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2633 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2636 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2639 char target_other[40];
2648 memset (opts, '\0', sizeof (opts));
2650 /* Add -march= option. */
2653 opts[num][0] = "-march=";
2654 opts[num++][1] = arch;
2657 /* Add -mtune= option. */
2660 opts[num][0] = "-mtune=";
2661 opts[num++][1] = tune;
2664 /* Pick out the options in isa options. */
2665 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2667 if ((isa & isa_opts[i].mask) != 0)
2669 opts[num++][0] = isa_opts[i].option;
2670 isa &= ~ isa_opts[i].mask;
2674 if (isa && add_nl_p)
2676 opts[num++][0] = isa_other;
2677 sprintf (isa_other, "(other isa: %#x)", isa);
2680 /* Add flag options. */
2681 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2683 if ((flags & flag_opts[i].mask) != 0)
2685 opts[num++][0] = flag_opts[i].option;
2686 flags &= ~ flag_opts[i].mask;
2690 if (flags && add_nl_p)
2692 opts[num++][0] = target_other;
2693 sprintf (target_other, "(other flags: %#x)", flags);
2696 /* Add -fpmath= option. */
2699 opts[num][0] = "-mfpmath=";
2700 opts[num++][1] = fpmath;
2707 gcc_assert (num < ARRAY_SIZE (opts));
2709 /* Size the string. */
2711 sep_len = (add_nl_p) ? 3 : 1;
2712 for (i = 0; i < num; i++)
2715 for (j = 0; j < 2; j++)
2717 len += strlen (opts[i][j]);
2720 /* Build the string. */
2721 ret = ptr = (char *) xmalloc (len);
2724 for (i = 0; i < num; i++)
2728 for (j = 0; j < 2; j++)
2729 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2736 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2744 for (j = 0; j < 2; j++)
2747 memcpy (ptr, opts[i][j], len2[j]);
2749 line_len += len2[j];
2754 gcc_assert (ret + len >= ptr);
2759 /* Return TRUE if software prefetching is beneficial for the
2763 software_prefetching_beneficial_p (void)
2767 case PROCESSOR_GEODE:
2769 case PROCESSOR_ATHLON:
2771 case PROCESSOR_AMDFAM10:
2779 /* Return true, if profiling code should be emitted before
2780 prologue. Otherwise it returns false.
2781 Note: For x86 with "hotfix" it is sorried. */
2783 ix86_profile_before_prologue (void)
2785 return flag_fentry != 0;
2788 /* Function that is callable from the debugger to print the current
2791 ix86_debug_options (void)
2793 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2794 ix86_arch_string, ix86_tune_string,
2795 ix86_fpmath_string, true);
2799 fprintf (stderr, "%s\n\n", opts);
2803 fputs ("<no options>\n\n", stderr);
2808 /* Override various settings based on options. If MAIN_ARGS_P, the
2809 options are from the command line, otherwise they are from
2813 ix86_option_override_internal (bool main_args_p)
2816 unsigned int ix86_arch_mask, ix86_tune_mask;
2817 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2822 /* Comes from final.c -- no real reason to change it. */
2823 #define MAX_CODE_ALIGN 16
2831 PTA_PREFETCH_SSE = 1 << 4,
2833 PTA_3DNOW_A = 1 << 6,
2837 PTA_POPCNT = 1 << 10,
2839 PTA_SSE4A = 1 << 12,
2840 PTA_NO_SAHF = 1 << 13,
2841 PTA_SSE4_1 = 1 << 14,
2842 PTA_SSE4_2 = 1 << 15,
2844 PTA_PCLMUL = 1 << 17,
2847 PTA_MOVBE = 1 << 20,
2851 PTA_FSGSBASE = 1 << 24,
2852 PTA_RDRND = 1 << 25,
2858 const char *const name; /* processor name or nickname. */
2859 const enum processor_type processor;
2860 const enum attr_cpu schedule;
2861 const unsigned /*enum pta_flags*/ flags;
2863 const processor_alias_table[] =
2865 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2866 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2867 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2869 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2870 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2871 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2873 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2874 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2876 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2877 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2879 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE | PTA_SSE2},
2883 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2884 PTA_MMX |PTA_SSE | PTA_SSE2},
2885 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX | PTA_SSE | PTA_SSE2},
2887 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2889 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2891 | PTA_CX16 | PTA_NO_SAHF},
2892 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2893 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2894 | PTA_SSSE3 | PTA_CX16},
2895 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2896 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2897 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2898 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2900 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2901 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2903 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2904 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2905 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2909 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"x86-64", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2915 {"k8", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"opteron", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2927 {"athlon64", PROCESSOR_K8, CPU_K8,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_NO_SAHF},
2930 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2933 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_NO_SAHF},
2936 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2937 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2938 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2939 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2940 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2941 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2942 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2943 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2944 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2945 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2946 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2947 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2948 0 /* flags are only used for -march switch. */ },
2949 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2950 PTA_64BIT /* flags are only used for -march switch. */ },
2953 int const pta_size = ARRAY_SIZE (processor_alias_table);
2955 /* Set up prefix/suffix so the error messages refer to either the command
2956 line argument, or the attribute(target). */
2965 prefix = "option(\"";
2970 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2971 SUBTARGET_OVERRIDE_OPTIONS;
2974 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2975 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 /* -fPIC is the default for x86_64. */
2979 if (TARGET_MACHO && TARGET_64BIT)
2982 /* Need to check -mtune=generic first. */
2983 if (ix86_tune_string)
2985 if (!strcmp (ix86_tune_string, "generic")
2986 || !strcmp (ix86_tune_string, "i686")
2987 /* As special support for cross compilers we read -mtune=native
2988 as -mtune=generic. With native compilers we won't see the
2989 -mtune=native, as it was changed by the driver. */
2990 || !strcmp (ix86_tune_string, "native"))
2993 ix86_tune_string = "generic64";
2995 ix86_tune_string = "generic32";
2997 /* If this call is for setting the option attribute, allow the
2998 generic32/generic64 that was previously set. */
2999 else if (!main_args_p
3000 && (!strcmp (ix86_tune_string, "generic32")
3001 || !strcmp (ix86_tune_string, "generic64")))
3003 else if (!strncmp (ix86_tune_string, "generic", 7))
3004 error ("bad value (%s) for %stune=%s %s",
3005 ix86_tune_string, prefix, suffix, sw);
3006 else if (!strcmp (ix86_tune_string, "x86-64"))
3007 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3008 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3009 prefix, suffix, prefix, suffix, prefix, suffix);
3013 if (ix86_arch_string)
3014 ix86_tune_string = ix86_arch_string;
3015 if (!ix86_tune_string)
3017 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3018 ix86_tune_defaulted = 1;
3021 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3022 need to use a sensible tune option. */
3023 if (!strcmp (ix86_tune_string, "generic")
3024 || !strcmp (ix86_tune_string, "x86-64")
3025 || !strcmp (ix86_tune_string, "i686"))
3028 ix86_tune_string = "generic64";
3030 ix86_tune_string = "generic32";
3034 if (ix86_stringop_string)
3036 if (!strcmp (ix86_stringop_string, "rep_byte"))
3037 stringop_alg = rep_prefix_1_byte;
3038 else if (!strcmp (ix86_stringop_string, "libcall"))
3039 stringop_alg = libcall;
3040 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3041 stringop_alg = rep_prefix_4_byte;
3042 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3044 /* rep; movq isn't available in 32-bit code. */
3045 stringop_alg = rep_prefix_8_byte;
3046 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3047 stringop_alg = loop_1_byte;
3048 else if (!strcmp (ix86_stringop_string, "loop"))
3049 stringop_alg = loop;
3050 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3051 stringop_alg = unrolled_loop;
3053 error ("bad value (%s) for %sstringop-strategy=%s %s",
3054 ix86_stringop_string, prefix, suffix, sw);
3057 if (!ix86_arch_string)
3058 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3060 ix86_arch_specified = 1;
3062 /* Validate -mabi= value. */
3063 if (ix86_abi_string)
3065 if (strcmp (ix86_abi_string, "sysv") == 0)
3066 ix86_abi = SYSV_ABI;
3067 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 error ("unknown ABI (%s) for %sabi=%s %s",
3071 ix86_abi_string, prefix, suffix, sw);
3074 ix86_abi = DEFAULT_ABI;
3076 if (ix86_cmodel_string != 0)
3078 if (!strcmp (ix86_cmodel_string, "small"))
3079 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3080 else if (!strcmp (ix86_cmodel_string, "medium"))
3081 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3082 else if (!strcmp (ix86_cmodel_string, "large"))
3083 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3085 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3086 else if (!strcmp (ix86_cmodel_string, "32"))
3087 ix86_cmodel = CM_32;
3088 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3089 ix86_cmodel = CM_KERNEL;
3091 error ("bad value (%s) for %scmodel=%s %s",
3092 ix86_cmodel_string, prefix, suffix, sw);
3096 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3097 use of rip-relative addressing. This eliminates fixups that
3098 would otherwise be needed if this object is to be placed in a
3099 DLL, and is essentially just as efficient as direct addressing. */
3100 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3101 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3102 else if (TARGET_64BIT)
3103 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3105 ix86_cmodel = CM_32;
3107 if (ix86_asm_string != 0)
3110 && !strcmp (ix86_asm_string, "intel"))
3111 ix86_asm_dialect = ASM_INTEL;
3112 else if (!strcmp (ix86_asm_string, "att"))
3113 ix86_asm_dialect = ASM_ATT;
3115 error ("bad value (%s) for %sasm=%s %s",
3116 ix86_asm_string, prefix, suffix, sw);
3118 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3119 error ("code model %qs not supported in the %s bit mode",
3120 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3121 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3122 sorry ("%i-bit mode not compiled in",
3123 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3125 for (i = 0; i < pta_size; i++)
3126 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3128 ix86_schedule = processor_alias_table[i].schedule;
3129 ix86_arch = processor_alias_table[i].processor;
3130 /* Default cpu tuning to the architecture. */
3131 ix86_tune = ix86_arch;
3133 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3134 error ("CPU you selected does not support x86-64 "
3137 if (processor_alias_table[i].flags & PTA_MMX
3138 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3139 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3140 if (processor_alias_table[i].flags & PTA_3DNOW
3141 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3142 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3143 if (processor_alias_table[i].flags & PTA_3DNOW_A
3144 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3145 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3146 if (processor_alias_table[i].flags & PTA_SSE
3147 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3148 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3149 if (processor_alias_table[i].flags & PTA_SSE2
3150 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3151 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3152 if (processor_alias_table[i].flags & PTA_SSE3
3153 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3154 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3155 if (processor_alias_table[i].flags & PTA_SSSE3
3156 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3158 if (processor_alias_table[i].flags & PTA_SSE4_1
3159 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3160 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3161 if (processor_alias_table[i].flags & PTA_SSE4_2
3162 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3163 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3164 if (processor_alias_table[i].flags & PTA_AVX
3165 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3166 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3167 if (processor_alias_table[i].flags & PTA_FMA
3168 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3169 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3170 if (processor_alias_table[i].flags & PTA_SSE4A
3171 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3172 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3173 if (processor_alias_table[i].flags & PTA_FMA4
3174 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3175 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3176 if (processor_alias_table[i].flags & PTA_XOP
3177 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3178 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3179 if (processor_alias_table[i].flags & PTA_LWP
3180 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3181 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3182 if (processor_alias_table[i].flags & PTA_ABM
3183 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3184 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3185 if (processor_alias_table[i].flags & PTA_CX16
3186 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3187 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3188 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3189 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3190 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3191 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3192 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3193 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3194 if (processor_alias_table[i].flags & PTA_MOVBE
3195 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3196 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3197 if (processor_alias_table[i].flags & PTA_AES
3198 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3199 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3200 if (processor_alias_table[i].flags & PTA_PCLMUL
3201 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3202 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3203 if (processor_alias_table[i].flags & PTA_FSGSBASE
3204 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3205 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3206 if (processor_alias_table[i].flags & PTA_RDRND
3207 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3208 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3209 if (processor_alias_table[i].flags & PTA_F16C
3210 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3211 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3212 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3213 x86_prefetch_sse = true;
3218 if (!strcmp (ix86_arch_string, "generic"))
3219 error ("generic CPU can be used only for %stune=%s %s",
3220 prefix, suffix, sw);
3221 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3222 error ("bad value (%s) for %sarch=%s %s",
3223 ix86_arch_string, prefix, suffix, sw);
3225 ix86_arch_mask = 1u << ix86_arch;
3226 for (i = 0; i < X86_ARCH_LAST; ++i)
3227 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3229 for (i = 0; i < pta_size; i++)
3230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3232 ix86_schedule = processor_alias_table[i].schedule;
3233 ix86_tune = processor_alias_table[i].processor;
3234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3236 if (ix86_tune_defaulted)
3238 ix86_tune_string = "x86-64";
3239 for (i = 0; i < pta_size; i++)
3240 if (! strcmp (ix86_tune_string,
3241 processor_alias_table[i].name))
3243 ix86_schedule = processor_alias_table[i].schedule;
3244 ix86_tune = processor_alias_table[i].processor;
3247 error ("CPU you selected does not support x86-64 "
3250 /* Intel CPUs have always interpreted SSE prefetch instructions as
3251 NOPs; so, we can enable SSE prefetch instructions even when
3252 -mtune (rather than -march) points us to a processor that has them.
3253 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3254 higher processors. */
3256 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3257 x86_prefetch_sse = true;
3261 if (ix86_tune_specified && i == pta_size)
3262 error ("bad value (%s) for %stune=%s %s",
3263 ix86_tune_string, prefix, suffix, sw);
3265 ix86_tune_mask = 1u << ix86_tune;
3266 for (i = 0; i < X86_TUNE_LAST; ++i)
3267 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3269 #ifndef USE_IX86_FRAME_POINTER
3270 #define USE_IX86_FRAME_POINTER 0
3273 /* Set the default values for switches whose default depends on TARGET_64BIT
3274 in case they weren't overwritten by command line options. */
3279 if (flag_omit_frame_pointer == 2)
3280 flag_omit_frame_pointer = 1;
3281 if (flag_asynchronous_unwind_tables == 2)
3282 flag_asynchronous_unwind_tables = 1;
3283 if (flag_pcc_struct_return == 2)
3284 flag_pcc_struct_return = 0;
3290 if (flag_omit_frame_pointer == 2)
3291 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3292 if (flag_asynchronous_unwind_tables == 2)
3293 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3294 if (flag_pcc_struct_return == 2)
3295 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3299 ix86_cost = &ix86_size_cost;
3301 ix86_cost = processor_target_table[ix86_tune].cost;
3303 /* Arrange to set up i386_stack_locals for all functions. */
3304 init_machine_status = ix86_init_machine_status;
3306 /* Validate -mregparm= value. */
3307 if (ix86_regparm_string)
3310 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3311 i = atoi (ix86_regparm_string);
3312 if (i < 0 || i > REGPARM_MAX)
3313 error ("%sregparm=%d%s is not between 0 and %d",
3314 prefix, i, suffix, REGPARM_MAX);
3319 ix86_regparm = REGPARM_MAX;
3321 /* If the user has provided any of the -malign-* options,
3322 warn and use that value only if -falign-* is not set.
3323 Remove this code in GCC 3.2 or later. */
3324 if (ix86_align_loops_string)
3326 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3327 prefix, suffix, suffix);
3328 if (align_loops == 0)
3330 i = atoi (ix86_align_loops_string);
3331 if (i < 0 || i > MAX_CODE_ALIGN)
3332 error ("%salign-loops=%d%s is not between 0 and %d",
3333 prefix, i, suffix, MAX_CODE_ALIGN);
3335 align_loops = 1 << i;
3339 if (ix86_align_jumps_string)
3341 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3342 prefix, suffix, suffix);
3343 if (align_jumps == 0)
3345 i = atoi (ix86_align_jumps_string);
3346 if (i < 0 || i > MAX_CODE_ALIGN)
3347 error ("%salign-loops=%d%s is not between 0 and %d",
3348 prefix, i, suffix, MAX_CODE_ALIGN);
3350 align_jumps = 1 << i;
3354 if (ix86_align_funcs_string)
3356 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3357 prefix, suffix, suffix);
3358 if (align_functions == 0)
3360 i = atoi (ix86_align_funcs_string);
3361 if (i < 0 || i > MAX_CODE_ALIGN)
3362 error ("%salign-loops=%d%s is not between 0 and %d",
3363 prefix, i, suffix, MAX_CODE_ALIGN);
3365 align_functions = 1 << i;
3369 /* Default align_* from the processor table. */
3370 if (align_loops == 0)
3372 align_loops = processor_target_table[ix86_tune].align_loop;
3373 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3375 if (align_jumps == 0)
3377 align_jumps = processor_target_table[ix86_tune].align_jump;
3378 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3380 if (align_functions == 0)
3382 align_functions = processor_target_table[ix86_tune].align_func;
3385 /* Validate -mbranch-cost= value, or provide default. */
3386 ix86_branch_cost = ix86_cost->branch_cost;
3387 if (ix86_branch_cost_string)
3389 i = atoi (ix86_branch_cost_string);
3391 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3393 ix86_branch_cost = i;
3395 if (ix86_section_threshold_string)
3397 i = atoi (ix86_section_threshold_string);
3399 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3401 ix86_section_threshold = i;
3404 if (ix86_tls_dialect_string)
3406 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3407 ix86_tls_dialect = TLS_DIALECT_GNU;
3408 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU2;
3411 error ("bad value (%s) for %stls-dialect=%s %s",
3412 ix86_tls_dialect_string, prefix, suffix, sw);
3415 if (ix87_precision_string)
3417 i = atoi (ix87_precision_string);
3418 if (i != 32 && i != 64 && i != 80)
3419 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3424 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3426 /* Enable by default the SSE and MMX builtins. Do allow the user to
3427 explicitly disable any of these. In particular, disabling SSE and
3428 MMX for kernel code is extremely useful. */
3429 if (!ix86_arch_specified)
3431 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3432 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3435 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3439 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3441 if (!ix86_arch_specified)
3443 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3445 /* i386 ABI does not specify red zone. It still makes sense to use it
3446 when programmer takes care to stack from being destroyed. */
3447 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3448 target_flags |= MASK_NO_RED_ZONE;
3451 /* Keep nonleaf frame pointers. */
3452 if (flag_omit_frame_pointer)
3453 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3454 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3455 flag_omit_frame_pointer = 1;
3457 /* If we're doing fast math, we don't care about comparison order
3458 wrt NaNs. This lets us use a shorter comparison sequence. */
3459 if (flag_finite_math_only)
3460 target_flags &= ~MASK_IEEE_FP;
3462 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3463 since the insns won't need emulation. */
3464 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3465 target_flags &= ~MASK_NO_FANCY_MATH_387;
3467 /* Likewise, if the target doesn't have a 387, or we've specified
3468 software floating point, don't use 387 inline intrinsics. */
3470 target_flags |= MASK_NO_FANCY_MATH_387;
3472 /* Turn on MMX builtins for -msse. */
3475 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3476 x86_prefetch_sse = true;
3479 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3480 if (TARGET_SSE4_2 || TARGET_ABM)
3481 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3483 /* Validate -mpreferred-stack-boundary= value or default it to
3484 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3485 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3486 if (ix86_preferred_stack_boundary_string)
3488 i = atoi (ix86_preferred_stack_boundary_string);
3489 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3490 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3491 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3493 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3496 /* Set the default value for -mstackrealign. */
3497 if (ix86_force_align_arg_pointer == -1)
3498 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3500 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3502 /* Validate -mincoming-stack-boundary= value or default it to
3503 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3504 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3505 if (ix86_incoming_stack_boundary_string)
3507 i = atoi (ix86_incoming_stack_boundary_string);
3508 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3509 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3510 i, TARGET_64BIT ? 4 : 2);
3513 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3514 ix86_incoming_stack_boundary
3515 = ix86_user_incoming_stack_boundary;
3519 /* Accept -msseregparm only if at least SSE support is enabled. */
3520 if (TARGET_SSEREGPARM
3522 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3524 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3525 if (ix86_fpmath_string != 0)
3527 if (! strcmp (ix86_fpmath_string, "387"))
3528 ix86_fpmath = FPMATH_387;
3529 else if (! strcmp (ix86_fpmath_string, "sse"))
3533 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3534 ix86_fpmath = FPMATH_387;
3537 ix86_fpmath = FPMATH_SSE;
3539 else if (! strcmp (ix86_fpmath_string, "387,sse")
3540 || ! strcmp (ix86_fpmath_string, "387+sse")
3541 || ! strcmp (ix86_fpmath_string, "sse,387")
3542 || ! strcmp (ix86_fpmath_string, "sse+387")
3543 || ! strcmp (ix86_fpmath_string, "both"))
3547 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3548 ix86_fpmath = FPMATH_387;
3550 else if (!TARGET_80387)
3552 warning (0, "387 instruction set disabled, using SSE arithmetics");
3553 ix86_fpmath = FPMATH_SSE;
3556 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3559 error ("bad value (%s) for %sfpmath=%s %s",
3560 ix86_fpmath_string, prefix, suffix, sw);
3563 /* If the i387 is disabled, then do not return values in it. */
3565 target_flags &= ~MASK_FLOAT_RETURNS;
3567 /* Use external vectorized library in vectorizing intrinsics. */
3568 if (ix86_veclibabi_string)
3570 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3571 ix86_veclib_handler = ix86_veclibabi_svml;
3572 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_acml;
3575 error ("unknown vectorization library ABI type (%s) for "
3576 "%sveclibabi=%s %s", ix86_veclibabi_string,
3577 prefix, suffix, sw);
3580 if ((!USE_IX86_FRAME_POINTER
3581 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3582 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3584 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3586 /* ??? Unwind info is not correct around the CFG unless either a frame
3587 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3588 unwind info generation to be aware of the CFG and propagating states
3590 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3591 || flag_exceptions || flag_non_call_exceptions)
3592 && flag_omit_frame_pointer
3593 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3595 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3596 warning (0, "unwind tables currently require either a frame pointer "
3597 "or %saccumulate-outgoing-args%s for correctness",
3599 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3602 /* If stack probes are required, the space used for large function
3603 arguments on the stack must also be probed, so enable
3604 -maccumulate-outgoing-args so this happens in the prologue. */
3605 if (TARGET_STACK_PROBE
3606 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3608 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3609 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3610 "for correctness", prefix, suffix);
3611 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3614 /* For sane SSE instruction set generation we need fcomi instruction.
3615 It is safe to enable all CMOVE instructions. */
3619 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3622 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3623 p = strchr (internal_label_prefix, 'X');
3624 internal_label_prefix_len = p - internal_label_prefix;
3628 /* When scheduling description is not available, disable scheduler pass
3629 so it won't slow down the compilation and make x87 code slower. */
3630 if (!TARGET_SCHEDULE)
3631 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3633 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3634 set_param_value ("simultaneous-prefetches",
3635 ix86_cost->simultaneous_prefetches);
3636 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3637 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3639 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3640 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3641 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3643 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3644 if (flag_prefetch_loop_arrays < 0
3647 && software_prefetching_beneficial_p ())
3648 flag_prefetch_loop_arrays = 1;
3650 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3651 can be optimized to ap = __builtin_next_arg (0). */
3653 targetm.expand_builtin_va_start = NULL;
3657 ix86_gen_leave = gen_leave_rex64;
3658 ix86_gen_add3 = gen_adddi3;
3659 ix86_gen_sub3 = gen_subdi3;
3660 ix86_gen_sub3_carry = gen_subdi3_carry;
3661 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3662 ix86_gen_monitor = gen_sse3_monitor64;
3663 ix86_gen_andsp = gen_anddi3;
3664 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3665 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3666 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3670 ix86_gen_leave = gen_leave;
3671 ix86_gen_add3 = gen_addsi3;
3672 ix86_gen_sub3 = gen_subsi3;
3673 ix86_gen_sub3_carry = gen_subsi3_carry;
3674 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3675 ix86_gen_monitor = gen_sse3_monitor;
3676 ix86_gen_andsp = gen_andsi3;
3677 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3678 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3679 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3683 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3685 target_flags |= MASK_CLD & ~target_flags_explicit;
3688 if (!TARGET_64BIT && flag_pic)
3690 if (flag_fentry > 0)
3691 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3694 if (flag_fentry < 0)
3696 #if defined(PROFILE_BEFORE_PROLOGUE)
3703 /* Save the initial options in case the user does function specific options */
3705 target_option_default_node = target_option_current_node
3706 = build_target_option_node ();
3709 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3712 ix86_option_override (void)
3714 ix86_option_override_internal (true);
3717 /* Update register usage after having seen the compiler flags. */
3720 ix86_conditional_register_usage (void)
3725 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3727 if (fixed_regs[i] > 1)
3728 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3729 if (call_used_regs[i] > 1)
3730 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3733 /* The PIC register, if it exists, is fixed. */
3734 j = PIC_OFFSET_TABLE_REGNUM;
3735 if (j != INVALID_REGNUM)
3736 fixed_regs[j] = call_used_regs[j] = 1;
3738 /* The MS_ABI changes the set of call-used registers. */
3739 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3741 call_used_regs[SI_REG] = 0;
3742 call_used_regs[DI_REG] = 0;
3743 call_used_regs[XMM6_REG] = 0;
3744 call_used_regs[XMM7_REG] = 0;
3745 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3746 call_used_regs[i] = 0;
3749 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3750 other call-clobbered regs for 64-bit. */
3753 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3755 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3756 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3757 && call_used_regs[i])
3758 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3761 /* If MMX is disabled, squash the registers. */
3763 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3764 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3765 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3767 /* If SSE is disabled, squash the registers. */
3769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3770 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3771 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3773 /* If the FPU is disabled, squash the registers. */
3774 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3776 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3777 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3779 /* If 32-bit, squash the 64-bit registers. */
3782 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3784 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3790 /* Save the current options */
3793 ix86_function_specific_save (struct cl_target_option *ptr)
3795 ptr->arch = ix86_arch;
3796 ptr->schedule = ix86_schedule;
3797 ptr->tune = ix86_tune;
3798 ptr->fpmath = ix86_fpmath;
3799 ptr->branch_cost = ix86_branch_cost;
3800 ptr->tune_defaulted = ix86_tune_defaulted;
3801 ptr->arch_specified = ix86_arch_specified;
3802 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3803 ptr->target_flags_explicit = target_flags_explicit;
3805 /* The fields are char but the variables are not; make sure the
3806 values fit in the fields. */
3807 gcc_assert (ptr->arch == ix86_arch);
3808 gcc_assert (ptr->schedule == ix86_schedule);
3809 gcc_assert (ptr->tune == ix86_tune);
3810 gcc_assert (ptr->fpmath == ix86_fpmath);
3811 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3814 /* Restore the current options */
3817 ix86_function_specific_restore (struct cl_target_option *ptr)
3819 enum processor_type old_tune = ix86_tune;
3820 enum processor_type old_arch = ix86_arch;
3821 unsigned int ix86_arch_mask, ix86_tune_mask;
3824 ix86_arch = (enum processor_type) ptr->arch;
3825 ix86_schedule = (enum attr_cpu) ptr->schedule;
3826 ix86_tune = (enum processor_type) ptr->tune;
3827 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3828 ix86_branch_cost = ptr->branch_cost;
3829 ix86_tune_defaulted = ptr->tune_defaulted;
3830 ix86_arch_specified = ptr->arch_specified;
3831 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3832 target_flags_explicit = ptr->target_flags_explicit;
3834 /* Recreate the arch feature tests if the arch changed */
3835 if (old_arch != ix86_arch)
3837 ix86_arch_mask = 1u << ix86_arch;
3838 for (i = 0; i < X86_ARCH_LAST; ++i)
3839 ix86_arch_features[i]
3840 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3843 /* Recreate the tune optimization tests */
3844 if (old_tune != ix86_tune)
3846 ix86_tune_mask = 1u << ix86_tune;
3847 for (i = 0; i < X86_TUNE_LAST; ++i)
3848 ix86_tune_features[i]
3849 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3853 /* Print the current options */
3856 ix86_function_specific_print (FILE *file, int indent,
3857 struct cl_target_option *ptr)
3860 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3861 NULL, NULL, NULL, false);
3863 fprintf (file, "%*sarch = %d (%s)\n",
3866 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3867 ? cpu_names[ptr->arch]
3870 fprintf (file, "%*stune = %d (%s)\n",
3873 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3874 ? cpu_names[ptr->tune]
3877 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3878 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3879 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3880 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3884 fprintf (file, "%*s%s\n", indent, "", target_string);
3885 free (target_string);
3890 /* Inner function to process the attribute((target(...))), take an argument and
3891 set the current options from the argument. If we have a list, recursively go
3895 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3900 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3901 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3902 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3903 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3918 enum ix86_opt_type type;
3923 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3924 IX86_ATTR_ISA ("abm", OPT_mabm),
3925 IX86_ATTR_ISA ("aes", OPT_maes),
3926 IX86_ATTR_ISA ("avx", OPT_mavx),
3927 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3928 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3929 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3930 IX86_ATTR_ISA ("sse", OPT_msse),
3931 IX86_ATTR_ISA ("sse2", OPT_msse2),
3932 IX86_ATTR_ISA ("sse3", OPT_msse3),
3933 IX86_ATTR_ISA ("sse4", OPT_msse4),
3934 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3935 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3936 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3937 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3938 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3939 IX86_ATTR_ISA ("xop", OPT_mxop),
3940 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3941 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3942 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3943 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3945 /* string options */
3946 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3947 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3948 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3951 IX86_ATTR_YES ("cld",
3955 IX86_ATTR_NO ("fancy-math-387",
3956 OPT_mfancy_math_387,
3957 MASK_NO_FANCY_MATH_387),
3959 IX86_ATTR_YES ("ieee-fp",
3963 IX86_ATTR_YES ("inline-all-stringops",
3964 OPT_minline_all_stringops,
3965 MASK_INLINE_ALL_STRINGOPS),
3967 IX86_ATTR_YES ("inline-stringops-dynamically",
3968 OPT_minline_stringops_dynamically,
3969 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3971 IX86_ATTR_NO ("align-stringops",
3972 OPT_mno_align_stringops,
3973 MASK_NO_ALIGN_STRINGOPS),
3975 IX86_ATTR_YES ("recip",
3981 /* If this is a list, recurse to get the options. */
3982 if (TREE_CODE (args) == TREE_LIST)
3986 for (; args; args = TREE_CHAIN (args))
3987 if (TREE_VALUE (args)
3988 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3994 else if (TREE_CODE (args) != STRING_CST)
3997 /* Handle multiple arguments separated by commas. */
3998 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4000 while (next_optstr && *next_optstr != '\0')
4002 char *p = next_optstr;
4004 char *comma = strchr (next_optstr, ',');
4005 const char *opt_string;
4006 size_t len, opt_len;
4011 enum ix86_opt_type type = ix86_opt_unknown;
4017 len = comma - next_optstr;
4018 next_optstr = comma + 1;
4026 /* Recognize no-xxx. */
4027 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4036 /* Find the option. */
4039 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4041 type = attrs[i].type;
4042 opt_len = attrs[i].len;
4043 if (ch == attrs[i].string[0]
4044 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4045 && memcmp (p, attrs[i].string, opt_len) == 0)
4048 mask = attrs[i].mask;
4049 opt_string = attrs[i].string;
4054 /* Process the option. */
4057 error ("attribute(target(\"%s\")) is unknown", orig_p);
4061 else if (type == ix86_opt_isa)
4062 ix86_handle_option (opt, p, opt_set_p);
4064 else if (type == ix86_opt_yes || type == ix86_opt_no)
4066 if (type == ix86_opt_no)
4067 opt_set_p = !opt_set_p;
4070 target_flags |= mask;
4072 target_flags &= ~mask;
4075 else if (type == ix86_opt_str)
4079 error ("option(\"%s\") was already specified", opt_string);
4083 p_strings[opt] = xstrdup (p + opt_len);
4093 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4096 ix86_valid_target_attribute_tree (tree args)
4098 const char *orig_arch_string = ix86_arch_string;
4099 const char *orig_tune_string = ix86_tune_string;
4100 const char *orig_fpmath_string = ix86_fpmath_string;
4101 int orig_tune_defaulted = ix86_tune_defaulted;
4102 int orig_arch_specified = ix86_arch_specified;
4103 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4106 struct cl_target_option *def
4107 = TREE_TARGET_OPTION (target_option_default_node);
4109 /* Process each of the options on the chain. */
4110 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4113 /* If the changed options are different from the default, rerun
4114 ix86_option_override_internal, and then save the options away.
4115 The string options are are attribute options, and will be undone
4116 when we copy the save structure. */
4117 if (ix86_isa_flags != def->ix86_isa_flags
4118 || target_flags != def->target_flags
4119 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4120 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4121 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4123 /* If we are using the default tune= or arch=, undo the string assigned,
4124 and use the default. */
4125 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4126 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4127 else if (!orig_arch_specified)
4128 ix86_arch_string = NULL;
4130 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4131 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4132 else if (orig_tune_defaulted)
4133 ix86_tune_string = NULL;
4135 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4136 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4137 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4138 else if (!TARGET_64BIT && TARGET_SSE)
4139 ix86_fpmath_string = "sse,387";
4141 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4142 ix86_option_override_internal (false);
4144 /* Add any builtin functions with the new isa if any. */
4145 ix86_add_new_builtins (ix86_isa_flags);
4147 /* Save the current options unless we are validating options for
4149 t = build_target_option_node ();
4151 ix86_arch_string = orig_arch_string;
4152 ix86_tune_string = orig_tune_string;
4153 ix86_fpmath_string = orig_fpmath_string;
4155 /* Free up memory allocated to hold the strings */
4156 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4157 if (option_strings[i])
4158 free (option_strings[i]);
4164 /* Hook to validate attribute((target("string"))). */
4167 ix86_valid_target_attribute_p (tree fndecl,
4168 tree ARG_UNUSED (name),
4170 int ARG_UNUSED (flags))
4172 struct cl_target_option cur_target;
4174 tree old_optimize = build_optimization_node ();
4175 tree new_target, new_optimize;
4176 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4178 /* If the function changed the optimization levels as well as setting target
4179 options, start with the optimizations specified. */
4180 if (func_optimize && func_optimize != old_optimize)
4181 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4183 /* The target attributes may also change some optimization flags, so update
4184 the optimization options if necessary. */
4185 cl_target_option_save (&cur_target);
4186 new_target = ix86_valid_target_attribute_tree (args);
4187 new_optimize = build_optimization_node ();
4194 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4196 if (old_optimize != new_optimize)
4197 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4200 cl_target_option_restore (&cur_target);
4202 if (old_optimize != new_optimize)
4203 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4209 /* Hook to determine if one function can safely inline another. */
4212 ix86_can_inline_p (tree caller, tree callee)
4215 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4216 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4218 /* If callee has no option attributes, then it is ok to inline. */
4222 /* If caller has no option attributes, but callee does then it is not ok to
4224 else if (!caller_tree)
4229 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4230 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4232 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4233 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4235 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4236 != callee_opts->ix86_isa_flags)
4239 /* See if we have the same non-isa options. */
4240 else if (caller_opts->target_flags != callee_opts->target_flags)
4243 /* See if arch, tune, etc. are the same. */
4244 else if (caller_opts->arch != callee_opts->arch)
4247 else if (caller_opts->tune != callee_opts->tune)
4250 else if (caller_opts->fpmath != callee_opts->fpmath)
4253 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4264 /* Remember the last target of ix86_set_current_function. */
4265 static GTY(()) tree ix86_previous_fndecl;
4267 /* Establish appropriate back-end context for processing the function
4268 FNDECL. The argument might be NULL to indicate processing at top
4269 level, outside of any function scope. */
4271 ix86_set_current_function (tree fndecl)
4273 /* Only change the context if the function changes. This hook is called
4274 several times in the course of compiling a function, and we don't want to
4275 slow things down too much or call target_reinit when it isn't safe. */
4276 if (fndecl && fndecl != ix86_previous_fndecl)
4278 tree old_tree = (ix86_previous_fndecl
4279 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4282 tree new_tree = (fndecl
4283 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4286 ix86_previous_fndecl = fndecl;
4287 if (old_tree == new_tree)
4292 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4298 struct cl_target_option *def
4299 = TREE_TARGET_OPTION (target_option_current_node);
4301 cl_target_option_restore (def);
4308 /* Return true if this goes in large data/bss. */
4311 ix86_in_large_data_p (tree exp)
4313 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4316 /* Functions are never large data. */
4317 if (TREE_CODE (exp) == FUNCTION_DECL)
4320 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4322 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4323 if (strcmp (section, ".ldata") == 0
4324 || strcmp (section, ".lbss") == 0)
4330 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4332 /* If this is an incomplete type with size 0, then we can't put it
4333 in data because it might be too big when completed. */
4334 if (!size || size > ix86_section_threshold)
4341 /* Switch to the appropriate section for output of DECL.
4342 DECL is either a `VAR_DECL' node or a constant of some sort.
4343 RELOC indicates whether forming the initial value of DECL requires
4344 link-time relocations. */
4346 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4350 x86_64_elf_select_section (tree decl, int reloc,
4351 unsigned HOST_WIDE_INT align)
4353 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4354 && ix86_in_large_data_p (decl))
4356 const char *sname = NULL;
4357 unsigned int flags = SECTION_WRITE;
4358 switch (categorize_decl_for_section (decl, reloc))
4363 case SECCAT_DATA_REL:
4364 sname = ".ldata.rel";
4366 case SECCAT_DATA_REL_LOCAL:
4367 sname = ".ldata.rel.local";
4369 case SECCAT_DATA_REL_RO:
4370 sname = ".ldata.rel.ro";
4372 case SECCAT_DATA_REL_RO_LOCAL:
4373 sname = ".ldata.rel.ro.local";
4377 flags |= SECTION_BSS;
4380 case SECCAT_RODATA_MERGE_STR:
4381 case SECCAT_RODATA_MERGE_STR_INIT:
4382 case SECCAT_RODATA_MERGE_CONST:
4386 case SECCAT_SRODATA:
4393 /* We don't split these for medium model. Place them into
4394 default sections and hope for best. */
4399 /* We might get called with string constants, but get_named_section
4400 doesn't like them as they are not DECLs. Also, we need to set
4401 flags in that case. */
4403 return get_section (sname, flags, NULL);
4404 return get_named_section (decl, sname, reloc);
4407 return default_elf_select_section (decl, reloc, align);
4410 /* Build up a unique section name, expressed as a
4411 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4412 RELOC indicates whether the initial value of EXP requires
4413 link-time relocations. */
4415 static void ATTRIBUTE_UNUSED
4416 x86_64_elf_unique_section (tree decl, int reloc)
4418 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4419 && ix86_in_large_data_p (decl))
4421 const char *prefix = NULL;
4422 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4423 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4425 switch (categorize_decl_for_section (decl, reloc))
4428 case SECCAT_DATA_REL:
4429 case SECCAT_DATA_REL_LOCAL:
4430 case SECCAT_DATA_REL_RO:
4431 case SECCAT_DATA_REL_RO_LOCAL:
4432 prefix = one_only ? ".ld" : ".ldata";
4435 prefix = one_only ? ".lb" : ".lbss";
4438 case SECCAT_RODATA_MERGE_STR:
4439 case SECCAT_RODATA_MERGE_STR_INIT:
4440 case SECCAT_RODATA_MERGE_CONST:
4441 prefix = one_only ? ".lr" : ".lrodata";
4443 case SECCAT_SRODATA:
4450 /* We don't split these for medium model. Place them into
4451 default sections and hope for best. */
4456 const char *name, *linkonce;
4459 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4460 name = targetm.strip_name_encoding (name);
4462 /* If we're using one_only, then there needs to be a .gnu.linkonce
4463 prefix to the section name. */
4464 linkonce = one_only ? ".gnu.linkonce" : "";
4466 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4468 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4472 default_unique_section (decl, reloc);
4475 #ifdef COMMON_ASM_OP
4476 /* This says how to output assembler code to declare an
4477 uninitialized external linkage data object.
4479 For medium model x86-64 we need to use .largecomm opcode for
4482 x86_elf_aligned_common (FILE *file,
4483 const char *name, unsigned HOST_WIDE_INT size,
4486 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4487 && size > (unsigned int)ix86_section_threshold)
4488 fputs (".largecomm\t", file);
4490 fputs (COMMON_ASM_OP, file);
4491 assemble_name (file, name);
4492 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4493 size, align / BITS_PER_UNIT);
4497 /* Utility function for targets to use in implementing
4498 ASM_OUTPUT_ALIGNED_BSS. */
4501 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4502 const char *name, unsigned HOST_WIDE_INT size,
4505 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4506 && size > (unsigned int)ix86_section_threshold)
4507 switch_to_section (get_named_section (decl, ".lbss", 0));
4509 switch_to_section (bss_section);
4510 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4511 #ifdef ASM_DECLARE_OBJECT_NAME
4512 last_assemble_variable_decl = decl;
4513 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4515 /* Standard thing is just output label for the object. */
4516 ASM_OUTPUT_LABEL (file, name);
4517 #endif /* ASM_DECLARE_OBJECT_NAME */
4518 ASM_OUTPUT_SKIP (file, size ? size : 1);
4522 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4524 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4525 make the problem with not enough registers even worse. */
4526 #ifdef INSN_SCHEDULING
4528 flag_schedule_insns = 0;
4532 /* The Darwin libraries never set errno, so we might as well
4533 avoid calling them when that's the only reason we would. */
4534 flag_errno_math = 0;
4536 /* The default values of these switches depend on the TARGET_64BIT
4537 that is not known at this moment. Mark these values with 2 and
4538 let user the to override these. In case there is no command line
4539 option specifying them, we will set the defaults in
4540 ix86_option_override_internal. */
4542 flag_omit_frame_pointer = 2;
4544 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4548 flag_pcc_struct_return = 2;
4549 flag_asynchronous_unwind_tables = 2;
4550 flag_vect_cost_model = 1;
4551 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4552 SUBTARGET_OPTIMIZATION_OPTIONS;
4556 /* Decide whether we must probe the stack before any space allocation
4557 on this target. It's essentially TARGET_STACK_PROBE except when
4558 -fstack-check causes the stack to be already probed differently. */
4561 ix86_target_stack_probe (void)
4563 /* Do not probe the stack twice if static stack checking is enabled. */
4564 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4567 return TARGET_STACK_PROBE;
4570 /* Decide whether we can make a sibling call to a function. DECL is the
4571 declaration of the function being targeted by the call and EXP is the
4572 CALL_EXPR representing the call. */
4575 ix86_function_ok_for_sibcall (tree decl, tree exp)
4577 tree type, decl_or_type;
4580 /* If we are generating position-independent code, we cannot sibcall
4581 optimize any indirect call, or a direct call to a global function,
4582 as the PLT requires %ebx be live. */
4583 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4586 /* If we need to align the outgoing stack, then sibcalling would
4587 unalign the stack, which may break the called function. */
4588 if (ix86_minimum_incoming_stack_boundary (true)
4589 < PREFERRED_STACK_BOUNDARY)
4594 decl_or_type = decl;
4595 type = TREE_TYPE (decl);
4599 /* We're looking at the CALL_EXPR, we need the type of the function. */
4600 type = CALL_EXPR_FN (exp); /* pointer expression */
4601 type = TREE_TYPE (type); /* pointer type */
4602 type = TREE_TYPE (type); /* function type */
4603 decl_or_type = type;
4606 /* Check that the return value locations are the same. Like
4607 if we are returning floats on the 80387 register stack, we cannot
4608 make a sibcall from a function that doesn't return a float to a
4609 function that does or, conversely, from a function that does return
4610 a float to a function that doesn't; the necessary stack adjustment
4611 would not be executed. This is also the place we notice
4612 differences in the return value ABI. Note that it is ok for one
4613 of the functions to have void return type as long as the return
4614 value of the other is passed in a register. */
4615 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4616 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4618 if (STACK_REG_P (a) || STACK_REG_P (b))
4620 if (!rtx_equal_p (a, b))
4623 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4625 else if (!rtx_equal_p (a, b))
4630 /* The SYSV ABI has more call-clobbered registers;
4631 disallow sibcalls from MS to SYSV. */
4632 if (cfun->machine->call_abi == MS_ABI
4633 && ix86_function_type_abi (type) == SYSV_ABI)
4638 /* If this call is indirect, we'll need to be able to use a
4639 call-clobbered register for the address of the target function.
4640 Make sure that all such registers are not used for passing
4641 parameters. Note that DLLIMPORT functions are indirect. */
4643 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4645 if (ix86_function_regparm (type, NULL) >= 3)
4647 /* ??? Need to count the actual number of registers to be used,
4648 not the possible number of registers. Fix later. */
4654 /* Otherwise okay. That also includes certain types of indirect calls. */
4658 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4659 and "sseregparm" calling convention attributes;
4660 arguments as in struct attribute_spec.handler. */
4663 ix86_handle_cconv_attribute (tree *node, tree name,
4665 int flags ATTRIBUTE_UNUSED,
4668 if (TREE_CODE (*node) != FUNCTION_TYPE
4669 && TREE_CODE (*node) != METHOD_TYPE
4670 && TREE_CODE (*node) != FIELD_DECL
4671 && TREE_CODE (*node) != TYPE_DECL)
4673 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4675 *no_add_attrs = true;
4679 /* Can combine regparm with all attributes but fastcall. */
4680 if (is_attribute_p ("regparm", name))
4684 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4686 error ("fastcall and regparm attributes are not compatible");
4689 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4691 error ("regparam and thiscall attributes are not compatible");
4694 cst = TREE_VALUE (args);
4695 if (TREE_CODE (cst) != INTEGER_CST)
4697 warning (OPT_Wattributes,
4698 "%qE attribute requires an integer constant argument",
4700 *no_add_attrs = true;
4702 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4704 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4706 *no_add_attrs = true;
4714 /* Do not warn when emulating the MS ABI. */
4715 if ((TREE_CODE (*node) != FUNCTION_TYPE
4716 && TREE_CODE (*node) != METHOD_TYPE)
4717 || ix86_function_type_abi (*node) != MS_ABI)
4718 warning (OPT_Wattributes, "%qE attribute ignored",
4720 *no_add_attrs = true;
4724 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4725 if (is_attribute_p ("fastcall", name))
4727 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4729 error ("fastcall and cdecl attributes are not compatible");
4731 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4733 error ("fastcall and stdcall attributes are not compatible");
4735 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4737 error ("fastcall and regparm attributes are not compatible");
4739 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4741 error ("fastcall and thiscall attributes are not compatible");
4745 /* Can combine stdcall with fastcall (redundant), regparm and
4747 else if (is_attribute_p ("stdcall", name))
4749 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4751 error ("stdcall and cdecl attributes are not compatible");
4753 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4755 error ("stdcall and fastcall attributes are not compatible");
4757 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4759 error ("stdcall and thiscall attributes are not compatible");
4763 /* Can combine cdecl with regparm and sseregparm. */
4764 else if (is_attribute_p ("cdecl", name))
4766 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4768 error ("stdcall and cdecl attributes are not compatible");
4770 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4772 error ("fastcall and cdecl attributes are not compatible");
4774 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4776 error ("cdecl and thiscall attributes are not compatible");
4779 else if (is_attribute_p ("thiscall", name))
4781 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4782 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4784 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4786 error ("stdcall and thiscall attributes are not compatible");
4788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4790 error ("fastcall and thiscall attributes are not compatible");
4792 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4794 error ("cdecl and thiscall attributes are not compatible");
4798 /* Can combine sseregparm with all attributes. */
4803 /* Return 0 if the attributes for two types are incompatible, 1 if they
4804 are compatible, and 2 if they are nearly compatible (which causes a
4805 warning to be generated). */
4808 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4810 /* Check for mismatch of non-default calling convention. */
4811 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4813 if (TREE_CODE (type1) != FUNCTION_TYPE
4814 && TREE_CODE (type1) != METHOD_TYPE)
4817 /* Check for mismatched fastcall/regparm types. */
4818 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4819 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4820 || (ix86_function_regparm (type1, NULL)
4821 != ix86_function_regparm (type2, NULL)))
4824 /* Check for mismatched sseregparm types. */
4825 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4826 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4829 /* Check for mismatched thiscall types. */
4830 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4834 /* Check for mismatched return types (cdecl vs stdcall). */
4835 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4836 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4842 /* Return the regparm value for a function with the indicated TYPE and DECL.
4843 DECL may be NULL when calling function indirectly
4844 or considering a libcall. */
4847 ix86_function_regparm (const_tree type, const_tree decl)
4853 return (ix86_function_type_abi (type) == SYSV_ABI
4854 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4856 regparm = ix86_regparm;
4857 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4860 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4864 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4867 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4870 /* Use register calling convention for local functions when possible. */
4872 && TREE_CODE (decl) == FUNCTION_DECL
4874 && !(profile_flag && !flag_fentry))
4876 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4877 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4880 int local_regparm, globals = 0, regno;
4882 /* Make sure no regparm register is taken by a
4883 fixed register variable. */
4884 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4885 if (fixed_regs[local_regparm])
4888 /* We don't want to use regparm(3) for nested functions as
4889 these use a static chain pointer in the third argument. */
4890 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4893 /* Each fixed register usage increases register pressure,
4894 so less registers should be used for argument passing.
4895 This functionality can be overriden by an explicit
4897 for (regno = 0; regno <= DI_REG; regno++)
4898 if (fixed_regs[regno])
4902 = globals < local_regparm ? local_regparm - globals : 0;
4904 if (local_regparm > regparm)
4905 regparm = local_regparm;
4912 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4913 DFmode (2) arguments in SSE registers for a function with the
4914 indicated TYPE and DECL. DECL may be NULL when calling function
4915 indirectly or considering a libcall. Otherwise return 0. */
4918 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4920 gcc_assert (!TARGET_64BIT);
4922 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4923 by the sseregparm attribute. */
4924 if (TARGET_SSEREGPARM
4925 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4932 error ("Calling %qD with attribute sseregparm without "
4933 "SSE/SSE2 enabled", decl);
4935 error ("Calling %qT with attribute sseregparm without "
4936 "SSE/SSE2 enabled", type);
4944 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4945 (and DFmode for SSE2) arguments in SSE registers. */
4946 if (decl && TARGET_SSE_MATH && optimize
4947 && !(profile_flag && !flag_fentry))
4949 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4950 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4952 return TARGET_SSE2 ? 2 : 1;
4958 /* Return true if EAX is live at the start of the function. Used by
4959 ix86_expand_prologue to determine if we need special help before
4960 calling allocate_stack_worker. */
4963 ix86_eax_live_at_start_p (void)
4965 /* Cheat. Don't bother working forward from ix86_function_regparm
4966 to the function type to whether an actual argument is located in
4967 eax. Instead just look at cfg info, which is still close enough
4968 to correct at this point. This gives false positives for broken
4969 functions that might use uninitialized data that happens to be
4970 allocated in eax, but who cares? */
4971 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4974 /* Value is the number of bytes of arguments automatically
4975 popped when returning from a subroutine call.
4976 FUNDECL is the declaration node of the function (as a tree),
4977 FUNTYPE is the data type of the function (as a tree),
4978 or for a library call it is an identifier node for the subroutine name.
4979 SIZE is the number of bytes of arguments passed on the stack.
4981 On the 80386, the RTD insn may be used to pop them if the number
4982 of args is fixed, but if the number is variable then the caller
4983 must pop them all. RTD can't be used for library calls now
4984 because the library is compiled with the Unix compiler.
4985 Use of RTD is a selectable option, since it is incompatible with
4986 standard Unix calling sequences. If the option is not selected,
4987 the caller must always pop the args.
4989 The attribute stdcall is equivalent to RTD on a per module basis. */
4992 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4996 /* None of the 64-bit ABIs pop arguments. */
5000 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5002 /* Cdecl functions override -mrtd, and never pop the stack. */
5003 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5005 /* Stdcall and fastcall functions will pop the stack if not
5007 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5008 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5009 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5012 if (rtd && ! stdarg_p (funtype))
5016 /* Lose any fake structure return argument if it is passed on the stack. */
5017 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5018 && !KEEP_AGGREGATE_RETURN_POINTER)
5020 int nregs = ix86_function_regparm (funtype, fundecl);
5022 return GET_MODE_SIZE (Pmode);
5028 /* Argument support functions. */
5030 /* Return true when register may be used to pass function parameters. */
5032 ix86_function_arg_regno_p (int regno)
5035 const int *parm_regs;
5040 return (regno < REGPARM_MAX
5041 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5043 return (regno < REGPARM_MAX
5044 || (TARGET_MMX && MMX_REGNO_P (regno)
5045 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5046 || (TARGET_SSE && SSE_REGNO_P (regno)
5047 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5052 if (SSE_REGNO_P (regno) && TARGET_SSE)
5057 if (TARGET_SSE && SSE_REGNO_P (regno)
5058 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5062 /* TODO: The function should depend on current function ABI but
5063 builtins.c would need updating then. Therefore we use the
5066 /* RAX is used as hidden argument to va_arg functions. */
5067 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5070 if (ix86_abi == MS_ABI)
5071 parm_regs = x86_64_ms_abi_int_parameter_registers;
5073 parm_regs = x86_64_int_parameter_registers;
5074 for (i = 0; i < (ix86_abi == MS_ABI
5075 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5076 if (regno == parm_regs[i])
5081 /* Return if we do not know how to pass TYPE solely in registers. */
5084 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5086 if (must_pass_in_stack_var_size_or_pad (mode, type))
5089 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5090 The layout_type routine is crafty and tries to trick us into passing
5091 currently unsupported vector types on the stack by using TImode. */
5092 return (!TARGET_64BIT && mode == TImode
5093 && type && TREE_CODE (type) != VECTOR_TYPE);
5096 /* It returns the size, in bytes, of the area reserved for arguments passed
5097 in registers for the function represented by fndecl dependent to the used
5100 ix86_reg_parm_stack_space (const_tree fndecl)
5102 enum calling_abi call_abi = SYSV_ABI;
5103 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5104 call_abi = ix86_function_abi (fndecl);
5106 call_abi = ix86_function_type_abi (fndecl);
5107 if (call_abi == MS_ABI)
5112 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5115 ix86_function_type_abi (const_tree fntype)
5117 if (TARGET_64BIT && fntype != NULL)
5119 enum calling_abi abi = ix86_abi;
5120 if (abi == SYSV_ABI)
5122 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5125 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5133 ix86_function_ms_hook_prologue (const_tree fn)
5135 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5137 if (decl_function_context (fn) != NULL_TREE)
5138 error_at (DECL_SOURCE_LOCATION (fn),
5139 "ms_hook_prologue is not compatible with nested function");
5146 static enum calling_abi
5147 ix86_function_abi (const_tree fndecl)
5151 return ix86_function_type_abi (TREE_TYPE (fndecl));
5154 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5157 ix86_cfun_abi (void)
5159 if (! cfun || ! TARGET_64BIT)
5161 return cfun->machine->call_abi;
5164 /* Write the extra assembler code needed to declare a function properly. */
5167 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5170 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5174 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5175 unsigned int filler_cc = 0xcccccccc;
5177 for (i = 0; i < filler_count; i += 4)
5178 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5181 ASM_OUTPUT_LABEL (asm_out_file, fname);
5183 /* Output magic byte marker, if hot-patch attribute is set. */
5188 /* leaq [%rsp + 0], %rsp */
5189 asm_fprintf (asm_out_file, ASM_BYTE
5190 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5194 /* movl.s %edi, %edi
5196 movl.s %esp, %ebp */
5197 asm_fprintf (asm_out_file, ASM_BYTE
5198 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5204 extern void init_regs (void);
5206 /* Implementation of call abi switching target hook. Specific to FNDECL
5207 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5208 for more details. */
5210 ix86_call_abi_override (const_tree fndecl)
5212 if (fndecl == NULL_TREE)
5213 cfun->machine->call_abi = ix86_abi;
5215 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5218 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5219 re-initialization of init_regs each time we switch function context since
5220 this is needed only during RTL expansion. */
5222 ix86_maybe_switch_abi (void)
5225 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5229 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5230 for a call to a function whose data type is FNTYPE.
5231 For a library call, FNTYPE is 0. */
5234 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5235 tree fntype, /* tree ptr for function decl */
5236 rtx libname, /* SYMBOL_REF of library name or 0 */
5239 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5240 memset (cum, 0, sizeof (*cum));
5243 cum->call_abi = ix86_function_abi (fndecl);
5245 cum->call_abi = ix86_function_type_abi (fntype);
5246 /* Set up the number of registers to use for passing arguments. */
5248 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5249 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5250 "or subtarget optimization implying it");
5251 cum->nregs = ix86_regparm;
5254 cum->nregs = (cum->call_abi == SYSV_ABI
5255 ? X86_64_REGPARM_MAX
5256 : X86_64_MS_REGPARM_MAX);
5260 cum->sse_nregs = SSE_REGPARM_MAX;
5263 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5264 ? X86_64_SSE_REGPARM_MAX
5265 : X86_64_MS_SSE_REGPARM_MAX);
5269 cum->mmx_nregs = MMX_REGPARM_MAX;
5270 cum->warn_avx = true;
5271 cum->warn_sse = true;
5272 cum->warn_mmx = true;
5274 /* Because type might mismatch in between caller and callee, we need to
5275 use actual type of function for local calls.
5276 FIXME: cgraph_analyze can be told to actually record if function uses
5277 va_start so for local functions maybe_vaarg can be made aggressive
5279 FIXME: once typesytem is fixed, we won't need this code anymore. */
5281 fntype = TREE_TYPE (fndecl);
5282 cum->maybe_vaarg = (fntype
5283 ? (!prototype_p (fntype) || stdarg_p (fntype))
5288 /* If there are variable arguments, then we won't pass anything
5289 in registers in 32-bit mode. */
5290 if (stdarg_p (fntype))
5301 /* Use ecx and edx registers if function has fastcall attribute,
5302 else look for regparm information. */
5305 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5308 cum->fastcall = 1; /* Same first register as in fastcall. */
5310 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5316 cum->nregs = ix86_function_regparm (fntype, fndecl);
5319 /* Set up the number of SSE registers used for passing SFmode
5320 and DFmode arguments. Warn for mismatching ABI. */
5321 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5325 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5326 But in the case of vector types, it is some vector mode.
5328 When we have only some of our vector isa extensions enabled, then there
5329 are some modes for which vector_mode_supported_p is false. For these
5330 modes, the generic vector support in gcc will choose some non-vector mode
5331 in order to implement the type. By computing the natural mode, we'll
5332 select the proper ABI location for the operand and not depend on whatever
5333 the middle-end decides to do with these vector types.
5335 The midde-end can't deal with the vector types > 16 bytes. In this
5336 case, we return the original mode and warn ABI change if CUM isn't
5339 static enum machine_mode
5340 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5342 enum machine_mode mode = TYPE_MODE (type);
5344 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5346 HOST_WIDE_INT size = int_size_in_bytes (type);
5347 if ((size == 8 || size == 16 || size == 32)
5348 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5349 && TYPE_VECTOR_SUBPARTS (type) > 1)
5351 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5353 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5354 mode = MIN_MODE_VECTOR_FLOAT;
5356 mode = MIN_MODE_VECTOR_INT;
5358 /* Get the mode which has this inner mode and number of units. */
5359 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5360 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5361 && GET_MODE_INNER (mode) == innermode)
5363 if (size == 32 && !TARGET_AVX)
5365 static bool warnedavx;
5372 warning (0, "AVX vector argument without AVX "
5373 "enabled changes the ABI");
5375 return TYPE_MODE (type);
5388 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5389 this may not agree with the mode that the type system has chosen for the
5390 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5391 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5394 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5399 if (orig_mode != BLKmode)
5400 tmp = gen_rtx_REG (orig_mode, regno);
5403 tmp = gen_rtx_REG (mode, regno);
5404 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5405 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5411 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5412 of this code is to classify each 8bytes of incoming argument by the register
5413 class and assign registers accordingly. */
5415 /* Return the union class of CLASS1 and CLASS2.
5416 See the x86-64 PS ABI for details. */
5418 static enum x86_64_reg_class
5419 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5421 /* Rule #1: If both classes are equal, this is the resulting class. */
5422 if (class1 == class2)
5425 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5427 if (class1 == X86_64_NO_CLASS)
5429 if (class2 == X86_64_NO_CLASS)
5432 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5433 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5434 return X86_64_MEMORY_CLASS;
5436 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5437 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5438 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5439 return X86_64_INTEGERSI_CLASS;
5440 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5441 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5442 return X86_64_INTEGER_CLASS;
5444 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5446 if (class1 == X86_64_X87_CLASS
5447 || class1 == X86_64_X87UP_CLASS
5448 || class1 == X86_64_COMPLEX_X87_CLASS
5449 || class2 == X86_64_X87_CLASS
5450 || class2 == X86_64_X87UP_CLASS
5451 || class2 == X86_64_COMPLEX_X87_CLASS)
5452 return X86_64_MEMORY_CLASS;
5454 /* Rule #6: Otherwise class SSE is used. */
5455 return X86_64_SSE_CLASS;
5458 /* Classify the argument of type TYPE and mode MODE.
5459 CLASSES will be filled by the register class used to pass each word
5460 of the operand. The number of words is returned. In case the parameter
5461 should be passed in memory, 0 is returned. As a special case for zero
5462 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5464 BIT_OFFSET is used internally for handling records and specifies offset
5465 of the offset in bits modulo 256 to avoid overflow cases.
5467 See the x86-64 PS ABI for details.
5471 classify_argument (enum machine_mode mode, const_tree type,
5472 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5474 HOST_WIDE_INT bytes =
5475 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5476 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5478 /* Variable sized entities are always passed/returned in memory. */
5482 if (mode != VOIDmode
5483 && targetm.calls.must_pass_in_stack (mode, type))
5486 if (type && AGGREGATE_TYPE_P (type))
5490 enum x86_64_reg_class subclasses[MAX_CLASSES];
5492 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5496 for (i = 0; i < words; i++)
5497 classes[i] = X86_64_NO_CLASS;
5499 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5500 signalize memory class, so handle it as special case. */
5503 classes[0] = X86_64_NO_CLASS;
5507 /* Classify each field of record and merge classes. */
5508 switch (TREE_CODE (type))
5511 /* And now merge the fields of structure. */
5512 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5514 if (TREE_CODE (field) == FIELD_DECL)
5518 if (TREE_TYPE (field) == error_mark_node)
5521 /* Bitfields are always classified as integer. Handle them
5522 early, since later code would consider them to be
5523 misaligned integers. */
5524 if (DECL_BIT_FIELD (field))
5526 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5527 i < ((int_bit_position (field) + (bit_offset % 64))
5528 + tree_low_cst (DECL_SIZE (field), 0)
5531 merge_classes (X86_64_INTEGER_CLASS,
5538 type = TREE_TYPE (field);
5540 /* Flexible array member is ignored. */
5541 if (TYPE_MODE (type) == BLKmode
5542 && TREE_CODE (type) == ARRAY_TYPE
5543 && TYPE_SIZE (type) == NULL_TREE
5544 && TYPE_DOMAIN (type) != NULL_TREE
5545 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5550 if (!warned && warn_psabi)
5553 inform (input_location,
5554 "The ABI of passing struct with"
5555 " a flexible array member has"
5556 " changed in GCC 4.4");
5560 num = classify_argument (TYPE_MODE (type), type,
5562 (int_bit_position (field)
5563 + bit_offset) % 256);
5566 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5567 for (i = 0; i < num && (i + pos) < words; i++)
5569 merge_classes (subclasses[i], classes[i + pos]);
5576 /* Arrays are handled as small records. */
5579 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5580 TREE_TYPE (type), subclasses, bit_offset);
5584 /* The partial classes are now full classes. */
5585 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5586 subclasses[0] = X86_64_SSE_CLASS;
5587 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5588 && !((bit_offset % 64) == 0 && bytes == 4))
5589 subclasses[0] = X86_64_INTEGER_CLASS;
5591 for (i = 0; i < words; i++)
5592 classes[i] = subclasses[i % num];
5597 case QUAL_UNION_TYPE:
5598 /* Unions are similar to RECORD_TYPE but offset is always 0.
5600 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5602 if (TREE_CODE (field) == FIELD_DECL)
5606 if (TREE_TYPE (field) == error_mark_node)
5609 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5610 TREE_TYPE (field), subclasses,
5614 for (i = 0; i < num; i++)
5615 classes[i] = merge_classes (subclasses[i], classes[i]);
5626 /* When size > 16 bytes, if the first one isn't
5627 X86_64_SSE_CLASS or any other ones aren't
5628 X86_64_SSEUP_CLASS, everything should be passed in
5630 if (classes[0] != X86_64_SSE_CLASS)
5633 for (i = 1; i < words; i++)
5634 if (classes[i] != X86_64_SSEUP_CLASS)
5638 /* Final merger cleanup. */
5639 for (i = 0; i < words; i++)
5641 /* If one class is MEMORY, everything should be passed in
5643 if (classes[i] == X86_64_MEMORY_CLASS)
5646 /* The X86_64_SSEUP_CLASS should be always preceded by
5647 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5648 if (classes[i] == X86_64_SSEUP_CLASS
5649 && classes[i - 1] != X86_64_SSE_CLASS
5650 && classes[i - 1] != X86_64_SSEUP_CLASS)
5652 /* The first one should never be X86_64_SSEUP_CLASS. */
5653 gcc_assert (i != 0);
5654 classes[i] = X86_64_SSE_CLASS;
5657 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5658 everything should be passed in memory. */
5659 if (classes[i] == X86_64_X87UP_CLASS
5660 && (classes[i - 1] != X86_64_X87_CLASS))
5664 /* The first one should never be X86_64_X87UP_CLASS. */
5665 gcc_assert (i != 0);
5666 if (!warned && warn_psabi)
5669 inform (input_location,
5670 "The ABI of passing union with long double"
5671 " has changed in GCC 4.4");
5679 /* Compute alignment needed. We align all types to natural boundaries with
5680 exception of XFmode that is aligned to 64bits. */
5681 if (mode != VOIDmode && mode != BLKmode)
5683 int mode_alignment = GET_MODE_BITSIZE (mode);
5686 mode_alignment = 128;
5687 else if (mode == XCmode)
5688 mode_alignment = 256;
5689 if (COMPLEX_MODE_P (mode))
5690 mode_alignment /= 2;
5691 /* Misaligned fields are always returned in memory. */
5692 if (bit_offset % mode_alignment)
5696 /* for V1xx modes, just use the base mode */
5697 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5698 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5699 mode = GET_MODE_INNER (mode);
5701 /* Classification of atomic types. */
5706 classes[0] = X86_64_SSE_CLASS;
5709 classes[0] = X86_64_SSE_CLASS;
5710 classes[1] = X86_64_SSEUP_CLASS;
5720 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5724 classes[0] = X86_64_INTEGERSI_CLASS;
5727 else if (size <= 64)
5729 classes[0] = X86_64_INTEGER_CLASS;
5732 else if (size <= 64+32)
5734 classes[0] = X86_64_INTEGER_CLASS;
5735 classes[1] = X86_64_INTEGERSI_CLASS;
5738 else if (size <= 64+64)
5740 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5748 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5752 /* OImode shouldn't be used directly. */
5757 if (!(bit_offset % 64))
5758 classes[0] = X86_64_SSESF_CLASS;
5760 classes[0] = X86_64_SSE_CLASS;
5763 classes[0] = X86_64_SSEDF_CLASS;
5766 classes[0] = X86_64_X87_CLASS;
5767 classes[1] = X86_64_X87UP_CLASS;
5770 classes[0] = X86_64_SSE_CLASS;
5771 classes[1] = X86_64_SSEUP_CLASS;
5774 classes[0] = X86_64_SSE_CLASS;
5775 if (!(bit_offset % 64))
5781 if (!warned && warn_psabi)
5784 inform (input_location,
5785 "The ABI of passing structure with complex float"
5786 " member has changed in GCC 4.4");
5788 classes[1] = X86_64_SSESF_CLASS;
5792 classes[0] = X86_64_SSEDF_CLASS;
5793 classes[1] = X86_64_SSEDF_CLASS;
5796 classes[0] = X86_64_COMPLEX_X87_CLASS;
5799 /* This modes is larger than 16 bytes. */
5807 classes[0] = X86_64_SSE_CLASS;
5808 classes[1] = X86_64_SSEUP_CLASS;
5809 classes[2] = X86_64_SSEUP_CLASS;
5810 classes[3] = X86_64_SSEUP_CLASS;
5818 classes[0] = X86_64_SSE_CLASS;
5819 classes[1] = X86_64_SSEUP_CLASS;
5827 classes[0] = X86_64_SSE_CLASS;
5833 gcc_assert (VECTOR_MODE_P (mode));
5838 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5840 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5841 classes[0] = X86_64_INTEGERSI_CLASS;
5843 classes[0] = X86_64_INTEGER_CLASS;
5844 classes[1] = X86_64_INTEGER_CLASS;
5845 return 1 + (bytes > 8);
5849 /* Examine the argument and return set number of register required in each
5850 class. Return 0 iff parameter should be passed in memory. */
5852 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5853 int *int_nregs, int *sse_nregs)
5855 enum x86_64_reg_class regclass[MAX_CLASSES];
5856 int n = classify_argument (mode, type, regclass, 0);
5862 for (n--; n >= 0; n--)
5863 switch (regclass[n])
5865 case X86_64_INTEGER_CLASS:
5866 case X86_64_INTEGERSI_CLASS:
5869 case X86_64_SSE_CLASS:
5870 case X86_64_SSESF_CLASS:
5871 case X86_64_SSEDF_CLASS:
5874 case X86_64_NO_CLASS:
5875 case X86_64_SSEUP_CLASS:
5877 case X86_64_X87_CLASS:
5878 case X86_64_X87UP_CLASS:
5882 case X86_64_COMPLEX_X87_CLASS:
5883 return in_return ? 2 : 0;
5884 case X86_64_MEMORY_CLASS:
5890 /* Construct container for the argument used by GCC interface. See
5891 FUNCTION_ARG for the detailed description. */
5894 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5895 const_tree type, int in_return, int nintregs, int nsseregs,
5896 const int *intreg, int sse_regno)
5898 /* The following variables hold the static issued_error state. */
5899 static bool issued_sse_arg_error;
5900 static bool issued_sse_ret_error;
5901 static bool issued_x87_ret_error;
5903 enum machine_mode tmpmode;
5905 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5906 enum x86_64_reg_class regclass[MAX_CLASSES];
5910 int needed_sseregs, needed_intregs;
5911 rtx exp[MAX_CLASSES];
5914 n = classify_argument (mode, type, regclass, 0);
5917 if (!examine_argument (mode, type, in_return, &needed_intregs,
5920 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5923 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5924 some less clueful developer tries to use floating-point anyway. */
5925 if (needed_sseregs && !TARGET_SSE)
5929 if (!issued_sse_ret_error)
5931 error ("SSE register return with SSE disabled");
5932 issued_sse_ret_error = true;
5935 else if (!issued_sse_arg_error)
5937 error ("SSE register argument with SSE disabled");
5938 issued_sse_arg_error = true;
5943 /* Likewise, error if the ABI requires us to return values in the
5944 x87 registers and the user specified -mno-80387. */
5945 if (!TARGET_80387 && in_return)
5946 for (i = 0; i < n; i++)
5947 if (regclass[i] == X86_64_X87_CLASS
5948 || regclass[i] == X86_64_X87UP_CLASS
5949 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5951 if (!issued_x87_ret_error)
5953 error ("x87 register return with x87 disabled");
5954 issued_x87_ret_error = true;
5959 /* First construct simple cases. Avoid SCmode, since we want to use
5960 single register to pass this type. */
5961 if (n == 1 && mode != SCmode)
5962 switch (regclass[0])
5964 case X86_64_INTEGER_CLASS:
5965 case X86_64_INTEGERSI_CLASS:
5966 return gen_rtx_REG (mode, intreg[0]);
5967 case X86_64_SSE_CLASS:
5968 case X86_64_SSESF_CLASS:
5969 case X86_64_SSEDF_CLASS:
5970 if (mode != BLKmode)
5971 return gen_reg_or_parallel (mode, orig_mode,
5972 SSE_REGNO (sse_regno));
5974 case X86_64_X87_CLASS:
5975 case X86_64_COMPLEX_X87_CLASS:
5976 return gen_rtx_REG (mode, FIRST_STACK_REG);
5977 case X86_64_NO_CLASS:
5978 /* Zero sized array, struct or class. */
5983 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5984 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5985 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5987 && regclass[0] == X86_64_SSE_CLASS
5988 && regclass[1] == X86_64_SSEUP_CLASS
5989 && regclass[2] == X86_64_SSEUP_CLASS
5990 && regclass[3] == X86_64_SSEUP_CLASS
5992 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5995 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5996 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5997 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5998 && regclass[1] == X86_64_INTEGER_CLASS
5999 && (mode == CDImode || mode == TImode || mode == TFmode)
6000 && intreg[0] + 1 == intreg[1])
6001 return gen_rtx_REG (mode, intreg[0]);
6003 /* Otherwise figure out the entries of the PARALLEL. */
6004 for (i = 0; i < n; i++)
6008 switch (regclass[i])
6010 case X86_64_NO_CLASS:
6012 case X86_64_INTEGER_CLASS:
6013 case X86_64_INTEGERSI_CLASS:
6014 /* Merge TImodes on aligned occasions here too. */
6015 if (i * 8 + 8 > bytes)
6016 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6017 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6021 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6022 if (tmpmode == BLKmode)
6024 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6025 gen_rtx_REG (tmpmode, *intreg),
6029 case X86_64_SSESF_CLASS:
6030 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6031 gen_rtx_REG (SFmode,
6032 SSE_REGNO (sse_regno)),
6036 case X86_64_SSEDF_CLASS:
6037 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6038 gen_rtx_REG (DFmode,
6039 SSE_REGNO (sse_regno)),
6043 case X86_64_SSE_CLASS:
6051 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6061 && regclass[1] == X86_64_SSEUP_CLASS
6062 && regclass[2] == X86_64_SSEUP_CLASS
6063 && regclass[3] == X86_64_SSEUP_CLASS);
6070 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6071 gen_rtx_REG (tmpmode,
6072 SSE_REGNO (sse_regno)),
6081 /* Empty aligned struct, union or class. */
6085 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6086 for (i = 0; i < nexps; i++)
6087 XVECEXP (ret, 0, i) = exp [i];
6091 /* Update the data in CUM to advance over an argument of mode MODE
6092 and data type TYPE. (TYPE is null for libcalls where that information
6093 may not be available.) */
6096 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6097 const_tree type, HOST_WIDE_INT bytes,
6098 HOST_WIDE_INT words)
6114 cum->words += words;
6115 cum->nregs -= words;
6116 cum->regno += words;
6118 if (cum->nregs <= 0)
6126 /* OImode shouldn't be used directly. */
6130 if (cum->float_in_sse < 2)
6133 if (cum->float_in_sse < 1)
6150 if (!type || !AGGREGATE_TYPE_P (type))
6152 cum->sse_words += words;
6153 cum->sse_nregs -= 1;
6154 cum->sse_regno += 1;
6155 if (cum->sse_nregs <= 0)
6169 if (!type || !AGGREGATE_TYPE_P (type))
6171 cum->mmx_words += words;
6172 cum->mmx_nregs -= 1;
6173 cum->mmx_regno += 1;
6174 if (cum->mmx_nregs <= 0)
6185 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6186 const_tree type, HOST_WIDE_INT words, bool named)
6188 int int_nregs, sse_nregs;
6190 /* Unnamed 256bit vector mode parameters are passed on stack. */
6191 if (!named && VALID_AVX256_REG_MODE (mode))
6194 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6195 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6197 cum->nregs -= int_nregs;
6198 cum->sse_nregs -= sse_nregs;
6199 cum->regno += int_nregs;
6200 cum->sse_regno += sse_nregs;
6204 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6205 cum->words = (cum->words + align - 1) & ~(align - 1);
6206 cum->words += words;
6211 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6212 HOST_WIDE_INT words)
6214 /* Otherwise, this should be passed indirect. */
6215 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6217 cum->words += words;
6225 /* Update the data in CUM to advance over an argument of mode MODE and
6226 data type TYPE. (TYPE is null for libcalls where that information
6227 may not be available.) */
6230 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6231 const_tree type, bool named)
6233 HOST_WIDE_INT bytes, words;
6235 if (mode == BLKmode)
6236 bytes = int_size_in_bytes (type);
6238 bytes = GET_MODE_SIZE (mode);
6239 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6242 mode = type_natural_mode (type, NULL);
6244 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6245 function_arg_advance_ms_64 (cum, bytes, words);
6246 else if (TARGET_64BIT)
6247 function_arg_advance_64 (cum, mode, type, words, named);
6249 function_arg_advance_32 (cum, mode, type, bytes, words);
6252 /* Define where to put the arguments to a function.
6253 Value is zero to push the argument on the stack,
6254 or a hard register in which to store the argument.
6256 MODE is the argument's machine mode.
6257 TYPE is the data type of the argument (as a tree).
6258 This is null for libcalls where that information may
6260 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6261 the preceding args and about the function being called.
6262 NAMED is nonzero if this argument is a named parameter
6263 (otherwise it is an extra parameter matching an ellipsis). */
6266 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6267 enum machine_mode orig_mode, const_tree type,
6268 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6270 static bool warnedsse, warnedmmx;
6272 /* Avoid the AL settings for the Unix64 ABI. */
6273 if (mode == VOIDmode)
6289 if (words <= cum->nregs)
6291 int regno = cum->regno;
6293 /* Fastcall allocates the first two DWORD (SImode) or
6294 smaller arguments to ECX and EDX if it isn't an
6300 || (type && AGGREGATE_TYPE_P (type)))
6303 /* ECX not EAX is the first allocated register. */
6304 if (regno == AX_REG)
6307 return gen_rtx_REG (mode, regno);
6312 if (cum->float_in_sse < 2)
6315 if (cum->float_in_sse < 1)
6319 /* In 32bit, we pass TImode in xmm registers. */
6326 if (!type || !AGGREGATE_TYPE_P (type))
6328 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6331 warning (0, "SSE vector argument without SSE enabled "
6335 return gen_reg_or_parallel (mode, orig_mode,
6336 cum->sse_regno + FIRST_SSE_REG);
6341 /* OImode shouldn't be used directly. */
6350 if (!type || !AGGREGATE_TYPE_P (type))
6353 return gen_reg_or_parallel (mode, orig_mode,
6354 cum->sse_regno + FIRST_SSE_REG);
6364 if (!type || !AGGREGATE_TYPE_P (type))
6366 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6369 warning (0, "MMX vector argument without MMX enabled "
6373 return gen_reg_or_parallel (mode, orig_mode,
6374 cum->mmx_regno + FIRST_MMX_REG);
6383 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6384 enum machine_mode orig_mode, const_tree type, bool named)
6386 /* Handle a hidden AL argument containing number of registers
6387 for varargs x86-64 functions. */
6388 if (mode == VOIDmode)
6389 return GEN_INT (cum->maybe_vaarg
6390 ? (cum->sse_nregs < 0
6391 ? X86_64_SSE_REGPARM_MAX
6406 /* Unnamed 256bit vector mode parameters are passed on stack. */
6412 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6414 &x86_64_int_parameter_registers [cum->regno],
6419 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6420 enum machine_mode orig_mode, bool named,
6421 HOST_WIDE_INT bytes)
6425 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6426 We use value of -2 to specify that current function call is MSABI. */
6427 if (mode == VOIDmode)
6428 return GEN_INT (-2);
6430 /* If we've run out of registers, it goes on the stack. */
6431 if (cum->nregs == 0)
6434 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6436 /* Only floating point modes are passed in anything but integer regs. */
6437 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6440 regno = cum->regno + FIRST_SSE_REG;
6445 /* Unnamed floating parameters are passed in both the
6446 SSE and integer registers. */
6447 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6448 t2 = gen_rtx_REG (mode, regno);
6449 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6450 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6451 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6454 /* Handle aggregated types passed in register. */
6455 if (orig_mode == BLKmode)
6457 if (bytes > 0 && bytes <= 8)
6458 mode = (bytes > 4 ? DImode : SImode);
6459 if (mode == BLKmode)
6463 return gen_reg_or_parallel (mode, orig_mode, regno);
6466 /* Return where to put the arguments to a function.
6467 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6469 MODE is the argument's machine mode. TYPE is the data type of the
6470 argument. It is null for libcalls where that information may not be
6471 available. CUM gives information about the preceding args and about
6472 the function being called. NAMED is nonzero if this argument is a
6473 named parameter (otherwise it is an extra parameter matching an
6477 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6478 const_tree type, bool named)
6480 enum machine_mode mode = omode;
6481 HOST_WIDE_INT bytes, words;
6483 if (mode == BLKmode)
6484 bytes = int_size_in_bytes (type);
6486 bytes = GET_MODE_SIZE (mode);
6487 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6489 /* To simplify the code below, represent vector types with a vector mode
6490 even if MMX/SSE are not active. */
6491 if (type && TREE_CODE (type) == VECTOR_TYPE)
6492 mode = type_natural_mode (type, cum);
6494 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6495 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6496 else if (TARGET_64BIT)
6497 return function_arg_64 (cum, mode, omode, type, named);
6499 return function_arg_32 (cum, mode, omode, type, bytes, words);
6502 /* A C expression that indicates when an argument must be passed by
6503 reference. If nonzero for an argument, a copy of that argument is
6504 made in memory and a pointer to the argument is passed instead of
6505 the argument itself. The pointer is passed in whatever way is
6506 appropriate for passing a pointer to that type. */
6509 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6510 enum machine_mode mode ATTRIBUTE_UNUSED,
6511 const_tree type, bool named ATTRIBUTE_UNUSED)
6513 /* See Windows x64 Software Convention. */
6514 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6516 int msize = (int) GET_MODE_SIZE (mode);
6519 /* Arrays are passed by reference. */
6520 if (TREE_CODE (type) == ARRAY_TYPE)
6523 if (AGGREGATE_TYPE_P (type))
6525 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6526 are passed by reference. */
6527 msize = int_size_in_bytes (type);
6531 /* __m128 is passed by reference. */
6533 case 1: case 2: case 4: case 8:
6539 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6545 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6548 contains_aligned_value_p (const_tree type)
6550 enum machine_mode mode = TYPE_MODE (type);
6551 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6555 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6557 if (TYPE_ALIGN (type) < 128)
6560 if (AGGREGATE_TYPE_P (type))
6562 /* Walk the aggregates recursively. */
6563 switch (TREE_CODE (type))
6567 case QUAL_UNION_TYPE:
6571 /* Walk all the structure fields. */
6572 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6574 if (TREE_CODE (field) == FIELD_DECL
6575 && contains_aligned_value_p (TREE_TYPE (field)))
6582 /* Just for use if some languages passes arrays by value. */
6583 if (contains_aligned_value_p (TREE_TYPE (type)))
6594 /* Gives the alignment boundary, in bits, of an argument with the
6595 specified mode and type. */
6598 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6603 /* Since the main variant type is used for call, we convert it to
6604 the main variant type. */
6605 type = TYPE_MAIN_VARIANT (type);
6606 align = TYPE_ALIGN (type);
6609 align = GET_MODE_ALIGNMENT (mode);
6610 if (align < PARM_BOUNDARY)
6611 align = PARM_BOUNDARY;
6612 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6613 natural boundaries. */
6614 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6616 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6617 make an exception for SSE modes since these require 128bit
6620 The handling here differs from field_alignment. ICC aligns MMX
6621 arguments to 4 byte boundaries, while structure fields are aligned
6622 to 8 byte boundaries. */
6625 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6626 align = PARM_BOUNDARY;
6630 if (!contains_aligned_value_p (type))
6631 align = PARM_BOUNDARY;
6634 if (align > BIGGEST_ALIGNMENT)
6635 align = BIGGEST_ALIGNMENT;
6639 /* Return true if N is a possible register number of function value. */
6642 ix86_function_value_regno_p (const unsigned int regno)
6649 case FIRST_FLOAT_REG:
6650 /* TODO: The function should depend on current function ABI but
6651 builtins.c would need updating then. Therefore we use the
6653 if (TARGET_64BIT && ix86_abi == MS_ABI)
6655 return TARGET_FLOAT_RETURNS_IN_80387;
6661 if (TARGET_MACHO || TARGET_64BIT)
6669 /* Define how to find the value returned by a function.
6670 VALTYPE is the data type of the value (as a tree).
6671 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6672 otherwise, FUNC is 0. */
6675 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6676 const_tree fntype, const_tree fn)
6680 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6681 we normally prevent this case when mmx is not available. However
6682 some ABIs may require the result to be returned like DImode. */
6683 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6684 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6686 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6687 we prevent this case when sse is not available. However some ABIs
6688 may require the result to be returned like integer TImode. */
6689 else if (mode == TImode
6690 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6691 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6693 /* 32-byte vector modes in %ymm0. */
6694 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6695 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6697 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6698 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6699 regno = FIRST_FLOAT_REG;
6701 /* Most things go in %eax. */
6704 /* Override FP return register with %xmm0 for local functions when
6705 SSE math is enabled or for functions with sseregparm attribute. */
6706 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6708 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6709 if ((sse_level >= 1 && mode == SFmode)
6710 || (sse_level == 2 && mode == DFmode))
6711 regno = FIRST_SSE_REG;
6714 /* OImode shouldn't be used directly. */
6715 gcc_assert (mode != OImode);
6717 return gen_rtx_REG (orig_mode, regno);
6721 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6726 /* Handle libcalls, which don't provide a type node. */
6727 if (valtype == NULL)
6739 return gen_rtx_REG (mode, FIRST_SSE_REG);
6742 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6746 return gen_rtx_REG (mode, AX_REG);
6750 ret = construct_container (mode, orig_mode, valtype, 1,
6751 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6752 x86_64_int_return_registers, 0);
6754 /* For zero sized structures, construct_container returns NULL, but we
6755 need to keep rest of compiler happy by returning meaningful value. */
6757 ret = gen_rtx_REG (orig_mode, AX_REG);
6763 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6765 unsigned int regno = AX_REG;
6769 switch (GET_MODE_SIZE (mode))
6772 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6773 && !COMPLEX_MODE_P (mode))
6774 regno = FIRST_SSE_REG;
6778 if (mode == SFmode || mode == DFmode)
6779 regno = FIRST_SSE_REG;
6785 return gen_rtx_REG (orig_mode, regno);
6789 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6790 enum machine_mode orig_mode, enum machine_mode mode)
6792 const_tree fn, fntype;
6795 if (fntype_or_decl && DECL_P (fntype_or_decl))
6796 fn = fntype_or_decl;
6797 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6799 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6800 return function_value_ms_64 (orig_mode, mode);
6801 else if (TARGET_64BIT)
6802 return function_value_64 (orig_mode, mode, valtype);
6804 return function_value_32 (orig_mode, mode, fntype, fn);
6808 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6809 bool outgoing ATTRIBUTE_UNUSED)
6811 enum machine_mode mode, orig_mode;
6813 orig_mode = TYPE_MODE (valtype);
6814 mode = type_natural_mode (valtype, NULL);
6815 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6819 ix86_libcall_value (enum machine_mode mode)
6821 return ix86_function_value_1 (NULL, NULL, mode, mode);
6824 /* Return true iff type is returned in memory. */
6826 static bool ATTRIBUTE_UNUSED
6827 return_in_memory_32 (const_tree type, enum machine_mode mode)
6831 if (mode == BLKmode)
6834 size = int_size_in_bytes (type);
6836 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6839 if (VECTOR_MODE_P (mode) || mode == TImode)
6841 /* User-created vectors small enough to fit in EAX. */
6845 /* MMX/3dNow values are returned in MM0,
6846 except when it doesn't exits. */
6850 /* SSE values are returned in XMM0, except when it doesn't exist. */
6854 /* AVX values are returned in YMM0, except when it doesn't exist. */
6865 /* OImode shouldn't be used directly. */
6866 gcc_assert (mode != OImode);
6871 static bool ATTRIBUTE_UNUSED
6872 return_in_memory_64 (const_tree type, enum machine_mode mode)
6874 int needed_intregs, needed_sseregs;
6875 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6878 static bool ATTRIBUTE_UNUSED
6879 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6881 HOST_WIDE_INT size = int_size_in_bytes (type);
6883 /* __m128 is returned in xmm0. */
6884 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6885 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6888 /* Otherwise, the size must be exactly in [1248]. */
6889 return size != 1 && size != 2 && size != 4 && size != 8;
6893 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6895 #ifdef SUBTARGET_RETURN_IN_MEMORY
6896 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6898 const enum machine_mode mode = type_natural_mode (type, NULL);
6902 if (ix86_function_type_abi (fntype) == MS_ABI)
6903 return return_in_memory_ms_64 (type, mode);
6905 return return_in_memory_64 (type, mode);
6908 return return_in_memory_32 (type, mode);
6912 /* Return false iff TYPE is returned in memory. This version is used
6913 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6914 but differs notably in that when MMX is available, 8-byte vectors
6915 are returned in memory, rather than in MMX registers. */
6918 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6921 enum machine_mode mode = type_natural_mode (type, NULL);
6924 return return_in_memory_64 (type, mode);
6926 if (mode == BLKmode)
6929 size = int_size_in_bytes (type);
6931 if (VECTOR_MODE_P (mode))
6933 /* Return in memory only if MMX registers *are* available. This
6934 seems backwards, but it is consistent with the existing
6941 else if (mode == TImode)
6943 else if (mode == XFmode)
6949 /* When returning SSE vector types, we have a choice of either
6950 (1) being abi incompatible with a -march switch, or
6951 (2) generating an error.
6952 Given no good solution, I think the safest thing is one warning.
6953 The user won't be able to use -Werror, but....
6955 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6956 called in response to actually generating a caller or callee that
6957 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6958 via aggregate_value_p for general type probing from tree-ssa. */
6961 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6963 static bool warnedsse, warnedmmx;
6965 if (!TARGET_64BIT && type)
6967 /* Look at the return type of the function, not the function type. */
6968 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6970 if (!TARGET_SSE && !warnedsse)
6973 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6976 warning (0, "SSE vector return without SSE enabled "
6981 if (!TARGET_MMX && !warnedmmx)
6983 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6986 warning (0, "MMX vector return without MMX enabled "
6996 /* Create the va_list data type. */
6998 /* Returns the calling convention specific va_list date type.
6999 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7002 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7004 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7006 /* For i386 we use plain pointer to argument area. */
7007 if (!TARGET_64BIT || abi == MS_ABI)
7008 return build_pointer_type (char_type_node);
7010 record = lang_hooks.types.make_type (RECORD_TYPE);
7011 type_decl = build_decl (BUILTINS_LOCATION,
7012 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7014 f_gpr = build_decl (BUILTINS_LOCATION,
7015 FIELD_DECL, get_identifier ("gp_offset"),
7016 unsigned_type_node);
7017 f_fpr = build_decl (BUILTINS_LOCATION,
7018 FIELD_DECL, get_identifier ("fp_offset"),
7019 unsigned_type_node);
7020 f_ovf = build_decl (BUILTINS_LOCATION,
7021 FIELD_DECL, get_identifier ("overflow_arg_area"),
7023 f_sav = build_decl (BUILTINS_LOCATION,
7024 FIELD_DECL, get_identifier ("reg_save_area"),
7027 va_list_gpr_counter_field = f_gpr;
7028 va_list_fpr_counter_field = f_fpr;
7030 DECL_FIELD_CONTEXT (f_gpr) = record;
7031 DECL_FIELD_CONTEXT (f_fpr) = record;
7032 DECL_FIELD_CONTEXT (f_ovf) = record;
7033 DECL_FIELD_CONTEXT (f_sav) = record;
7035 TREE_CHAIN (record) = type_decl;
7036 TYPE_NAME (record) = type_decl;
7037 TYPE_FIELDS (record) = f_gpr;
7038 DECL_CHAIN (f_gpr) = f_fpr;
7039 DECL_CHAIN (f_fpr) = f_ovf;
7040 DECL_CHAIN (f_ovf) = f_sav;
7042 layout_type (record);
7044 /* The correct type is an array type of one element. */
7045 return build_array_type (record, build_index_type (size_zero_node));
7048 /* Setup the builtin va_list data type and for 64-bit the additional
7049 calling convention specific va_list data types. */
7052 ix86_build_builtin_va_list (void)
7054 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7056 /* Initialize abi specific va_list builtin types. */
7060 if (ix86_abi == MS_ABI)
7062 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7063 if (TREE_CODE (t) != RECORD_TYPE)
7064 t = build_variant_type_copy (t);
7065 sysv_va_list_type_node = t;
7070 if (TREE_CODE (t) != RECORD_TYPE)
7071 t = build_variant_type_copy (t);
7072 sysv_va_list_type_node = t;
7074 if (ix86_abi != MS_ABI)
7076 t = ix86_build_builtin_va_list_abi (MS_ABI);
7077 if (TREE_CODE (t) != RECORD_TYPE)
7078 t = build_variant_type_copy (t);
7079 ms_va_list_type_node = t;
7084 if (TREE_CODE (t) != RECORD_TYPE)
7085 t = build_variant_type_copy (t);
7086 ms_va_list_type_node = t;
7093 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7096 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7102 /* GPR size of varargs save area. */
7103 if (cfun->va_list_gpr_size)
7104 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7106 ix86_varargs_gpr_size = 0;
7108 /* FPR size of varargs save area. We don't need it if we don't pass
7109 anything in SSE registers. */
7110 if (TARGET_SSE && cfun->va_list_fpr_size)
7111 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7113 ix86_varargs_fpr_size = 0;
7115 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7118 save_area = frame_pointer_rtx;
7119 set = get_varargs_alias_set ();
7121 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7122 if (max > X86_64_REGPARM_MAX)
7123 max = X86_64_REGPARM_MAX;
7125 for (i = cum->regno; i < max; i++)
7127 mem = gen_rtx_MEM (Pmode,
7128 plus_constant (save_area, i * UNITS_PER_WORD));
7129 MEM_NOTRAP_P (mem) = 1;
7130 set_mem_alias_set (mem, set);
7131 emit_move_insn (mem, gen_rtx_REG (Pmode,
7132 x86_64_int_parameter_registers[i]));
7135 if (ix86_varargs_fpr_size)
7137 enum machine_mode smode;
7140 /* Now emit code to save SSE registers. The AX parameter contains number
7141 of SSE parameter registers used to call this function, though all we
7142 actually check here is the zero/non-zero status. */
7144 label = gen_label_rtx ();
7145 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7146 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7149 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7150 we used movdqa (i.e. TImode) instead? Perhaps even better would
7151 be if we could determine the real mode of the data, via a hook
7152 into pass_stdarg. Ignore all that for now. */
7154 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7155 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7157 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7158 if (max > X86_64_SSE_REGPARM_MAX)
7159 max = X86_64_SSE_REGPARM_MAX;
7161 for (i = cum->sse_regno; i < max; ++i)
7163 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7164 mem = gen_rtx_MEM (smode, mem);
7165 MEM_NOTRAP_P (mem) = 1;
7166 set_mem_alias_set (mem, set);
7167 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7169 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7177 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7179 alias_set_type set = get_varargs_alias_set ();
7182 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7186 mem = gen_rtx_MEM (Pmode,
7187 plus_constant (virtual_incoming_args_rtx,
7188 i * UNITS_PER_WORD));
7189 MEM_NOTRAP_P (mem) = 1;
7190 set_mem_alias_set (mem, set);
7192 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7193 emit_move_insn (mem, reg);
7198 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7199 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7202 CUMULATIVE_ARGS next_cum;
7205 /* This argument doesn't appear to be used anymore. Which is good,
7206 because the old code here didn't suppress rtl generation. */
7207 gcc_assert (!no_rtl);
7212 fntype = TREE_TYPE (current_function_decl);
7214 /* For varargs, we do not want to skip the dummy va_dcl argument.
7215 For stdargs, we do want to skip the last named argument. */
7217 if (stdarg_p (fntype))
7218 ix86_function_arg_advance (&next_cum, mode, type, true);
7220 if (cum->call_abi == MS_ABI)
7221 setup_incoming_varargs_ms_64 (&next_cum);
7223 setup_incoming_varargs_64 (&next_cum);
7226 /* Checks if TYPE is of kind va_list char *. */
7229 is_va_list_char_pointer (tree type)
7233 /* For 32-bit it is always true. */
7236 canonic = ix86_canonical_va_list_type (type);
7237 return (canonic == ms_va_list_type_node
7238 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7241 /* Implement va_start. */
7244 ix86_va_start (tree valist, rtx nextarg)
7246 HOST_WIDE_INT words, n_gpr, n_fpr;
7247 tree f_gpr, f_fpr, f_ovf, f_sav;
7248 tree gpr, fpr, ovf, sav, t;
7251 /* Only 64bit target needs something special. */
7252 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7254 std_expand_builtin_va_start (valist, nextarg);
7258 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7259 f_fpr = DECL_CHAIN (f_gpr);
7260 f_ovf = DECL_CHAIN (f_fpr);
7261 f_sav = DECL_CHAIN (f_ovf);
7263 valist = build_simple_mem_ref (valist);
7264 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7265 /* The following should be folded into the MEM_REF offset. */
7266 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7268 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7270 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7272 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7275 /* Count number of gp and fp argument registers used. */
7276 words = crtl->args.info.words;
7277 n_gpr = crtl->args.info.regno;
7278 n_fpr = crtl->args.info.sse_regno;
7280 if (cfun->va_list_gpr_size)
7282 type = TREE_TYPE (gpr);
7283 t = build2 (MODIFY_EXPR, type,
7284 gpr, build_int_cst (type, n_gpr * 8));
7285 TREE_SIDE_EFFECTS (t) = 1;
7286 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7289 if (TARGET_SSE && cfun->va_list_fpr_size)
7291 type = TREE_TYPE (fpr);
7292 t = build2 (MODIFY_EXPR, type, fpr,
7293 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7294 TREE_SIDE_EFFECTS (t) = 1;
7295 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7298 /* Find the overflow area. */
7299 type = TREE_TYPE (ovf);
7300 t = make_tree (type, crtl->args.internal_arg_pointer);
7302 t = build2 (POINTER_PLUS_EXPR, type, t,
7303 size_int (words * UNITS_PER_WORD));
7304 t = build2 (MODIFY_EXPR, type, ovf, t);
7305 TREE_SIDE_EFFECTS (t) = 1;
7306 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7308 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7310 /* Find the register save area.
7311 Prologue of the function save it right above stack frame. */
7312 type = TREE_TYPE (sav);
7313 t = make_tree (type, frame_pointer_rtx);
7314 if (!ix86_varargs_gpr_size)
7315 t = build2 (POINTER_PLUS_EXPR, type, t,
7316 size_int (-8 * X86_64_REGPARM_MAX));
7317 t = build2 (MODIFY_EXPR, type, sav, t);
7318 TREE_SIDE_EFFECTS (t) = 1;
7319 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7323 /* Implement va_arg. */
7326 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7329 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7330 tree f_gpr, f_fpr, f_ovf, f_sav;
7331 tree gpr, fpr, ovf, sav, t;
7333 tree lab_false, lab_over = NULL_TREE;
7338 enum machine_mode nat_mode;
7339 unsigned int arg_boundary;
7341 /* Only 64bit target needs something special. */
7342 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7343 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7345 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7346 f_fpr = DECL_CHAIN (f_gpr);
7347 f_ovf = DECL_CHAIN (f_fpr);
7348 f_sav = DECL_CHAIN (f_ovf);
7350 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7351 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7352 valist = build_va_arg_indirect_ref (valist);
7353 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7354 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7355 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7357 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7359 type = build_pointer_type (type);
7360 size = int_size_in_bytes (type);
7361 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7363 nat_mode = type_natural_mode (type, NULL);
7372 /* Unnamed 256bit vector mode parameters are passed on stack. */
7373 if (ix86_cfun_abi () == SYSV_ABI)
7380 container = construct_container (nat_mode, TYPE_MODE (type),
7381 type, 0, X86_64_REGPARM_MAX,
7382 X86_64_SSE_REGPARM_MAX, intreg,
7387 /* Pull the value out of the saved registers. */
7389 addr = create_tmp_var (ptr_type_node, "addr");
7393 int needed_intregs, needed_sseregs;
7395 tree int_addr, sse_addr;
7397 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7398 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7400 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7402 need_temp = (!REG_P (container)
7403 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7404 || TYPE_ALIGN (type) > 128));
7406 /* In case we are passing structure, verify that it is consecutive block
7407 on the register save area. If not we need to do moves. */
7408 if (!need_temp && !REG_P (container))
7410 /* Verify that all registers are strictly consecutive */
7411 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7415 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7417 rtx slot = XVECEXP (container, 0, i);
7418 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7419 || INTVAL (XEXP (slot, 1)) != i * 16)
7427 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7429 rtx slot = XVECEXP (container, 0, i);
7430 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7431 || INTVAL (XEXP (slot, 1)) != i * 8)
7443 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7444 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7447 /* First ensure that we fit completely in registers. */
7450 t = build_int_cst (TREE_TYPE (gpr),
7451 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7452 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7453 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7454 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7455 gimplify_and_add (t, pre_p);
7459 t = build_int_cst (TREE_TYPE (fpr),
7460 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7461 + X86_64_REGPARM_MAX * 8);
7462 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7463 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7464 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7465 gimplify_and_add (t, pre_p);
7468 /* Compute index to start of area used for integer regs. */
7471 /* int_addr = gpr + sav; */
7472 t = fold_convert (sizetype, gpr);
7473 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7474 gimplify_assign (int_addr, t, pre_p);
7478 /* sse_addr = fpr + sav; */
7479 t = fold_convert (sizetype, fpr);
7480 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7481 gimplify_assign (sse_addr, t, pre_p);
7485 int i, prev_size = 0;
7486 tree temp = create_tmp_var (type, "va_arg_tmp");
7489 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7490 gimplify_assign (addr, t, pre_p);
7492 for (i = 0; i < XVECLEN (container, 0); i++)
7494 rtx slot = XVECEXP (container, 0, i);
7495 rtx reg = XEXP (slot, 0);
7496 enum machine_mode mode = GET_MODE (reg);
7502 tree dest_addr, dest;
7503 int cur_size = GET_MODE_SIZE (mode);
7505 if (prev_size + cur_size > size)
7507 cur_size = size - prev_size;
7508 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7509 if (mode == BLKmode)
7512 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7513 if (mode == GET_MODE (reg))
7514 addr_type = build_pointer_type (piece_type);
7516 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7518 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7521 if (SSE_REGNO_P (REGNO (reg)))
7523 src_addr = sse_addr;
7524 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7528 src_addr = int_addr;
7529 src_offset = REGNO (reg) * 8;
7531 src_addr = fold_convert (addr_type, src_addr);
7532 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7533 size_int (src_offset));
7535 dest_addr = fold_convert (daddr_type, addr);
7536 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7537 size_int (INTVAL (XEXP (slot, 1))));
7538 if (cur_size == GET_MODE_SIZE (mode))
7540 src = build_va_arg_indirect_ref (src_addr);
7541 dest = build_va_arg_indirect_ref (dest_addr);
7543 gimplify_assign (dest, src, pre_p);
7548 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7549 3, dest_addr, src_addr,
7550 size_int (cur_size));
7551 gimplify_and_add (copy, pre_p);
7553 prev_size += cur_size;
7559 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7560 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7561 gimplify_assign (gpr, t, pre_p);
7566 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7567 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7568 gimplify_assign (fpr, t, pre_p);
7571 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7573 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7576 /* ... otherwise out of the overflow area. */
7578 /* When we align parameter on stack for caller, if the parameter
7579 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7580 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7581 here with caller. */
7582 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7583 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7584 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7586 /* Care for on-stack alignment if needed. */
7587 if (arg_boundary <= 64 || size == 0)
7591 HOST_WIDE_INT align = arg_boundary / 8;
7592 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7593 size_int (align - 1));
7594 t = fold_convert (sizetype, t);
7595 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7597 t = fold_convert (TREE_TYPE (ovf), t);
7600 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7601 gimplify_assign (addr, t, pre_p);
7603 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7604 size_int (rsize * UNITS_PER_WORD));
7605 gimplify_assign (unshare_expr (ovf), t, pre_p);
7608 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7610 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7611 addr = fold_convert (ptrtype, addr);
7614 addr = build_va_arg_indirect_ref (addr);
7615 return build_va_arg_indirect_ref (addr);
7618 /* Return true if OPNUM's MEM should be matched
7619 in movabs* patterns. */
7622 ix86_check_movabs (rtx insn, int opnum)
7626 set = PATTERN (insn);
7627 if (GET_CODE (set) == PARALLEL)
7628 set = XVECEXP (set, 0, 0);
7629 gcc_assert (GET_CODE (set) == SET);
7630 mem = XEXP (set, opnum);
7631 while (GET_CODE (mem) == SUBREG)
7632 mem = SUBREG_REG (mem);
7633 gcc_assert (MEM_P (mem));
7634 return volatile_ok || !MEM_VOLATILE_P (mem);
7637 /* Initialize the table of extra 80387 mathematical constants. */
7640 init_ext_80387_constants (void)
7642 static const char * cst[5] =
7644 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7645 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7646 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7647 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7648 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7652 for (i = 0; i < 5; i++)
7654 real_from_string (&ext_80387_constants_table[i], cst[i]);
7655 /* Ensure each constant is rounded to XFmode precision. */
7656 real_convert (&ext_80387_constants_table[i],
7657 XFmode, &ext_80387_constants_table[i]);
7660 ext_80387_constants_init = 1;
7663 /* Return non-zero if the constant is something that
7664 can be loaded with a special instruction. */
7667 standard_80387_constant_p (rtx x)
7669 enum machine_mode mode = GET_MODE (x);
7673 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7676 if (x == CONST0_RTX (mode))
7678 if (x == CONST1_RTX (mode))
7681 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7683 /* For XFmode constants, try to find a special 80387 instruction when
7684 optimizing for size or on those CPUs that benefit from them. */
7686 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7690 if (! ext_80387_constants_init)
7691 init_ext_80387_constants ();
7693 for (i = 0; i < 5; i++)
7694 if (real_identical (&r, &ext_80387_constants_table[i]))
7698 /* Load of the constant -0.0 or -1.0 will be split as
7699 fldz;fchs or fld1;fchs sequence. */
7700 if (real_isnegzero (&r))
7702 if (real_identical (&r, &dconstm1))
7708 /* Return the opcode of the special instruction to be used to load
7712 standard_80387_constant_opcode (rtx x)
7714 switch (standard_80387_constant_p (x))
7738 /* Return the CONST_DOUBLE representing the 80387 constant that is
7739 loaded by the specified special instruction. The argument IDX
7740 matches the return value from standard_80387_constant_p. */
7743 standard_80387_constant_rtx (int idx)
7747 if (! ext_80387_constants_init)
7748 init_ext_80387_constants ();
7764 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7768 /* Return 1 if X is all 0s and 2 if x is all 1s
7769 in supported SSE vector mode. */
7772 standard_sse_constant_p (rtx x)
7774 enum machine_mode mode = GET_MODE (x);
7776 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7778 if (vector_all_ones_operand (x, mode))
7794 /* Return the opcode of the special instruction to be used to load
7798 standard_sse_constant_opcode (rtx insn, rtx x)
7800 switch (standard_sse_constant_p (x))
7803 switch (get_attr_mode (insn))
7806 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7808 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7809 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7811 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7813 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7814 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7816 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7818 return "vxorps\t%x0, %x0, %x0";
7820 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7821 return "vxorps\t%x0, %x0, %x0";
7823 return "vxorpd\t%x0, %x0, %x0";
7825 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7826 return "vxorps\t%x0, %x0, %x0";
7828 return "vpxor\t%x0, %x0, %x0";
7833 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7840 /* Returns true if OP contains a symbol reference */
7843 symbolic_reference_mentioned_p (rtx op)
7848 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7851 fmt = GET_RTX_FORMAT (GET_CODE (op));
7852 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7858 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7859 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7863 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7870 /* Return true if it is appropriate to emit `ret' instructions in the
7871 body of a function. Do this only if the epilogue is simple, needing a
7872 couple of insns. Prior to reloading, we can't tell how many registers
7873 must be saved, so return false then. Return false if there is no frame
7874 marker to de-allocate. */
7877 ix86_can_use_return_insn_p (void)
7879 struct ix86_frame frame;
7881 if (! reload_completed || frame_pointer_needed)
7884 /* Don't allow more than 32k pop, since that's all we can do
7885 with one instruction. */
7886 if (crtl->args.pops_args && crtl->args.size >= 32768)
7889 ix86_compute_frame_layout (&frame);
7890 return (frame.stack_pointer_offset == UNITS_PER_WORD
7891 && (frame.nregs + frame.nsseregs) == 0);
7894 /* Value should be nonzero if functions must have frame pointers.
7895 Zero means the frame pointer need not be set up (and parms may
7896 be accessed via the stack pointer) in functions that seem suitable. */
7899 ix86_frame_pointer_required (void)
7901 /* If we accessed previous frames, then the generated code expects
7902 to be able to access the saved ebp value in our frame. */
7903 if (cfun->machine->accesses_prev_frame)
7906 /* Several x86 os'es need a frame pointer for other reasons,
7907 usually pertaining to setjmp. */
7908 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7911 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7912 turns off the frame pointer by default. Turn it back on now if
7913 we've not got a leaf function. */
7914 if (TARGET_OMIT_LEAF_FRAME_POINTER
7915 && (!current_function_is_leaf
7916 || ix86_current_function_calls_tls_descriptor))
7919 if (crtl->profile && !flag_fentry)
7925 /* Record that the current function accesses previous call frames. */
7928 ix86_setup_frame_addresses (void)
7930 cfun->machine->accesses_prev_frame = 1;
7933 #ifndef USE_HIDDEN_LINKONCE
7934 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7935 # define USE_HIDDEN_LINKONCE 1
7937 # define USE_HIDDEN_LINKONCE 0
7941 static int pic_labels_used;
7943 /* Fills in the label name that should be used for a pc thunk for
7944 the given register. */
7947 get_pc_thunk_name (char name[32], unsigned int regno)
7949 gcc_assert (!TARGET_64BIT);
7951 if (USE_HIDDEN_LINKONCE)
7952 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7954 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7958 /* This function generates code for -fpic that loads %ebx with
7959 the return address of the caller and then returns. */
7962 ix86_code_end (void)
7967 for (regno = AX_REG; regno <= SP_REG; regno++)
7972 if (!(pic_labels_used & (1 << regno)))
7975 get_pc_thunk_name (name, regno);
7977 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7978 get_identifier (name),
7979 build_function_type (void_type_node, void_list_node));
7980 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7981 NULL_TREE, void_type_node);
7982 TREE_PUBLIC (decl) = 1;
7983 TREE_STATIC (decl) = 1;
7988 switch_to_section (darwin_sections[text_coal_section]);
7989 fputs ("\t.weak_definition\t", asm_out_file);
7990 assemble_name (asm_out_file, name);
7991 fputs ("\n\t.private_extern\t", asm_out_file);
7992 assemble_name (asm_out_file, name);
7993 putc ('\n', asm_out_file);
7994 ASM_OUTPUT_LABEL (asm_out_file, name);
7995 DECL_WEAK (decl) = 1;
7999 if (USE_HIDDEN_LINKONCE)
8001 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8003 targetm.asm_out.unique_section (decl, 0);
8004 switch_to_section (get_named_section (decl, NULL, 0));
8006 targetm.asm_out.globalize_label (asm_out_file, name);
8007 fputs ("\t.hidden\t", asm_out_file);
8008 assemble_name (asm_out_file, name);
8009 putc ('\n', asm_out_file);
8010 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8014 switch_to_section (text_section);
8015 ASM_OUTPUT_LABEL (asm_out_file, name);
8018 DECL_INITIAL (decl) = make_node (BLOCK);
8019 current_function_decl = decl;
8020 init_function_start (decl);
8021 first_function_block_is_cold = false;
8022 /* Make sure unwind info is emitted for the thunk if needed. */
8023 final_start_function (emit_barrier (), asm_out_file, 1);
8025 /* Pad stack IP move with 4 instructions (two NOPs count
8026 as one instruction). */
8027 if (TARGET_PAD_SHORT_FUNCTION)
8032 fputs ("\tnop\n", asm_out_file);
8035 xops[0] = gen_rtx_REG (Pmode, regno);
8036 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8037 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8038 fputs ("\tret\n", asm_out_file);
8039 final_end_function ();
8040 init_insn_lengths ();
8041 free_after_compilation (cfun);
8043 current_function_decl = NULL;
8047 /* Emit code for the SET_GOT patterns. */
8050 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8056 if (TARGET_VXWORKS_RTP && flag_pic)
8058 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8059 xops[2] = gen_rtx_MEM (Pmode,
8060 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8061 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8063 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8064 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8065 an unadorned address. */
8066 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8067 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8068 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8072 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8074 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8076 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8079 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8082 output_asm_insn ("call\t%a2", xops);
8083 #ifdef DWARF2_UNWIND_INFO
8084 /* The call to next label acts as a push. */
8085 if (dwarf2out_do_frame ())
8089 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8090 gen_rtx_PLUS (Pmode,
8093 RTX_FRAME_RELATED_P (insn) = 1;
8094 dwarf2out_frame_debug (insn, true);
8101 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8102 is what will be referenced by the Mach-O PIC subsystem. */
8104 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8107 targetm.asm_out.internal_label (asm_out_file, "L",
8108 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8112 output_asm_insn ("pop%z0\t%0", xops);
8113 #ifdef DWARF2_UNWIND_INFO
8114 /* The pop is a pop and clobbers dest, but doesn't restore it
8115 for unwind info purposes. */
8116 if (dwarf2out_do_frame ())
8120 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8121 dwarf2out_frame_debug (insn, true);
8122 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8123 gen_rtx_PLUS (Pmode,
8126 RTX_FRAME_RELATED_P (insn) = 1;
8127 dwarf2out_frame_debug (insn, true);
8136 get_pc_thunk_name (name, REGNO (dest));
8137 pic_labels_used |= 1 << REGNO (dest);
8139 #ifdef DWARF2_UNWIND_INFO
8140 /* Ensure all queued register saves are flushed before the
8142 if (dwarf2out_do_frame ())
8143 dwarf2out_flush_queued_reg_saves ();
8145 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8146 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8147 output_asm_insn ("call\t%X2", xops);
8148 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8149 is what will be referenced by the Mach-O PIC subsystem. */
8152 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8154 targetm.asm_out.internal_label (asm_out_file, "L",
8155 CODE_LABEL_NUMBER (label));
8162 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8163 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8165 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8170 /* Generate an "push" pattern for input ARG. */
8175 struct machine_function *m = cfun->machine;
8177 if (m->fs.cfa_reg == stack_pointer_rtx)
8178 m->fs.cfa_offset += UNITS_PER_WORD;
8179 m->fs.sp_offset += UNITS_PER_WORD;
8181 return gen_rtx_SET (VOIDmode,
8183 gen_rtx_PRE_DEC (Pmode,
8184 stack_pointer_rtx)),
8188 /* Generate an "pop" pattern for input ARG. */
8193 return gen_rtx_SET (VOIDmode,
8196 gen_rtx_POST_INC (Pmode,
8197 stack_pointer_rtx)));
8200 /* Return >= 0 if there is an unused call-clobbered register available
8201 for the entire function. */
8204 ix86_select_alt_pic_regnum (void)
8206 if (current_function_is_leaf
8208 && !ix86_current_function_calls_tls_descriptor)
8211 /* Can't use the same register for both PIC and DRAP. */
8213 drap = REGNO (crtl->drap_reg);
8216 for (i = 2; i >= 0; --i)
8217 if (i != drap && !df_regs_ever_live_p (i))
8221 return INVALID_REGNUM;
8224 /* Return 1 if we need to save REGNO. */
8226 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8228 if (pic_offset_table_rtx
8229 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8230 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8232 || crtl->calls_eh_return
8233 || crtl->uses_const_pool))
8235 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8240 if (crtl->calls_eh_return && maybe_eh_return)
8245 unsigned test = EH_RETURN_DATA_REGNO (i);
8246 if (test == INVALID_REGNUM)
8253 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8256 return (df_regs_ever_live_p (regno)
8257 && !call_used_regs[regno]
8258 && !fixed_regs[regno]
8259 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8262 /* Return number of saved general prupose registers. */
8265 ix86_nsaved_regs (void)
8270 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8271 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8276 /* Return number of saved SSE registrers. */
8279 ix86_nsaved_sseregs (void)
8284 if (ix86_cfun_abi () != MS_ABI)
8286 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8287 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8292 /* Given FROM and TO register numbers, say whether this elimination is
8293 allowed. If stack alignment is needed, we can only replace argument
8294 pointer with hard frame pointer, or replace frame pointer with stack
8295 pointer. Otherwise, frame pointer elimination is automatically
8296 handled and all other eliminations are valid. */
8299 ix86_can_eliminate (const int from, const int to)
8301 if (stack_realign_fp)
8302 return ((from == ARG_POINTER_REGNUM
8303 && to == HARD_FRAME_POINTER_REGNUM)
8304 || (from == FRAME_POINTER_REGNUM
8305 && to == STACK_POINTER_REGNUM));
8307 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8310 /* Return the offset between two registers, one to be eliminated, and the other
8311 its replacement, at the start of a routine. */
8314 ix86_initial_elimination_offset (int from, int to)
8316 struct ix86_frame frame;
8317 ix86_compute_frame_layout (&frame);
8319 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8320 return frame.hard_frame_pointer_offset;
8321 else if (from == FRAME_POINTER_REGNUM
8322 && to == HARD_FRAME_POINTER_REGNUM)
8323 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8326 gcc_assert (to == STACK_POINTER_REGNUM);
8328 if (from == ARG_POINTER_REGNUM)
8329 return frame.stack_pointer_offset;
8331 gcc_assert (from == FRAME_POINTER_REGNUM);
8332 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8336 /* In a dynamically-aligned function, we can't know the offset from
8337 stack pointer to frame pointer, so we must ensure that setjmp
8338 eliminates fp against the hard fp (%ebp) rather than trying to
8339 index from %esp up to the top of the frame across a gap that is
8340 of unknown (at compile-time) size. */
8342 ix86_builtin_setjmp_frame_value (void)
8344 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8347 /* Fill structure ix86_frame about frame of currently computed function. */
8350 ix86_compute_frame_layout (struct ix86_frame *frame)
8352 unsigned int stack_alignment_needed;
8353 HOST_WIDE_INT offset;
8354 unsigned int preferred_alignment;
8355 HOST_WIDE_INT size = get_frame_size ();
8356 HOST_WIDE_INT to_allocate;
8358 frame->nregs = ix86_nsaved_regs ();
8359 frame->nsseregs = ix86_nsaved_sseregs ();
8361 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8362 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8364 /* MS ABI seem to require stack alignment to be always 16 except for function
8365 prologues and leaf. */
8366 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8367 && (!current_function_is_leaf || cfun->calls_alloca != 0
8368 || ix86_current_function_calls_tls_descriptor))
8370 preferred_alignment = 16;
8371 stack_alignment_needed = 16;
8372 crtl->preferred_stack_boundary = 128;
8373 crtl->stack_alignment_needed = 128;
8376 gcc_assert (!size || stack_alignment_needed);
8377 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8378 gcc_assert (preferred_alignment <= stack_alignment_needed);
8380 /* During reload iteration the amount of registers saved can change.
8381 Recompute the value as needed. Do not recompute when amount of registers
8382 didn't change as reload does multiple calls to the function and does not
8383 expect the decision to change within single iteration. */
8384 if (!optimize_function_for_size_p (cfun)
8385 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8387 int count = frame->nregs;
8388 struct cgraph_node *node = cgraph_node (current_function_decl);
8390 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8391 /* The fast prologue uses move instead of push to save registers. This
8392 is significantly longer, but also executes faster as modern hardware
8393 can execute the moves in parallel, but can't do that for push/pop.
8395 Be careful about choosing what prologue to emit: When function takes
8396 many instructions to execute we may use slow version as well as in
8397 case function is known to be outside hot spot (this is known with
8398 feedback only). Weight the size of function by number of registers
8399 to save as it is cheap to use one or two push instructions but very
8400 slow to use many of them. */
8402 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8403 if (node->frequency < NODE_FREQUENCY_NORMAL
8404 || (flag_branch_probabilities
8405 && node->frequency < NODE_FREQUENCY_HOT))
8406 cfun->machine->use_fast_prologue_epilogue = false;
8408 cfun->machine->use_fast_prologue_epilogue
8409 = !expensive_function_p (count);
8411 if (TARGET_PROLOGUE_USING_MOVE
8412 && cfun->machine->use_fast_prologue_epilogue)
8413 frame->save_regs_using_mov = true;
8415 frame->save_regs_using_mov = false;
8417 /* If static stack checking is enabled and done with probes, the registers
8418 need to be saved before allocating the frame. */
8419 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8420 frame->save_regs_using_mov = false;
8422 /* Skip return address. */
8423 offset = UNITS_PER_WORD;
8425 /* Skip pushed static chain. */
8426 if (ix86_static_chain_on_stack)
8427 offset += UNITS_PER_WORD;
8429 /* Skip saved base pointer. */
8430 if (frame_pointer_needed)
8431 offset += UNITS_PER_WORD;
8433 frame->hard_frame_pointer_offset = offset;
8435 /* Register save area */
8436 offset += frame->nregs * UNITS_PER_WORD;
8437 frame->reg_save_offset = offset;
8439 /* Align and set SSE register save area. */
8440 if (frame->nsseregs)
8442 /* The only ABI that has saved SSE registers (Win64) also has a
8443 16-byte aligned default stack, and thus we don't need to be
8444 within the re-aligned local stack frame to save them. */
8445 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8446 offset = (offset + 16 - 1) & -16;
8447 offset += frame->nsseregs * 16;
8449 frame->sse_reg_save_offset = offset;
8451 /* The re-aligned stack starts here. Values before this point are not
8452 directly comparable with values below this point. In order to make
8453 sure that no value happens to be the same before and after, force
8454 the alignment computation below to add a non-zero value. */
8455 if (stack_realign_fp)
8456 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8459 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8460 offset += frame->va_arg_size;
8462 /* Align start of frame for local function. */
8463 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8465 /* Frame pointer points here. */
8466 frame->frame_pointer_offset = offset;
8470 /* Add outgoing arguments area. Can be skipped if we eliminated
8471 all the function calls as dead code.
8472 Skipping is however impossible when function calls alloca. Alloca
8473 expander assumes that last crtl->outgoing_args_size
8474 of stack frame are unused. */
8475 if (ACCUMULATE_OUTGOING_ARGS
8476 && (!current_function_is_leaf || cfun->calls_alloca
8477 || ix86_current_function_calls_tls_descriptor))
8479 offset += crtl->outgoing_args_size;
8480 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8483 frame->outgoing_arguments_size = 0;
8485 /* Align stack boundary. Only needed if we're calling another function
8487 if (!current_function_is_leaf || cfun->calls_alloca
8488 || ix86_current_function_calls_tls_descriptor)
8489 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8491 /* We've reached end of stack frame. */
8492 frame->stack_pointer_offset = offset;
8494 /* Size prologue needs to allocate. */
8495 to_allocate = offset - frame->sse_reg_save_offset;
8497 if ((!to_allocate && frame->nregs <= 1)
8498 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8499 frame->save_regs_using_mov = false;
8501 if (ix86_using_red_zone ()
8502 && current_function_sp_is_unchanging
8503 && current_function_is_leaf
8504 && !ix86_current_function_calls_tls_descriptor)
8506 frame->red_zone_size = to_allocate;
8507 if (frame->save_regs_using_mov)
8508 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8509 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8510 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8513 frame->red_zone_size = 0;
8514 frame->stack_pointer_offset -= frame->red_zone_size;
8517 /* This is semi-inlined memory_address_length, but simplified
8518 since we know that we're always dealing with reg+offset, and
8519 to avoid having to create and discard all that rtl. */
8522 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8528 /* EBP and R13 cannot be encoded without an offset. */
8529 len = (regno == BP_REG || regno == R13_REG);
8531 else if (IN_RANGE (offset, -128, 127))
8534 /* ESP and R12 must be encoded with a SIB byte. */
8535 if (regno == SP_REG || regno == R12_REG)
8541 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8542 The valid base registers are taken from CFUN->MACHINE->FS. */
8545 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8547 const struct machine_function *m = cfun->machine;
8548 rtx base_reg = NULL;
8549 HOST_WIDE_INT base_offset = 0;
8551 if (m->use_fast_prologue_epilogue)
8553 /* Choose the base register most likely to allow the most scheduling
8554 opportunities. Generally FP is valid througout the function,
8555 while DRAP must be reloaded within the epilogue. But choose either
8556 over the SP due to increased encoding size. */
8560 base_reg = hard_frame_pointer_rtx;
8561 base_offset = m->fs.fp_offset - cfa_offset;
8563 else if (m->fs.drap_valid)
8565 base_reg = crtl->drap_reg;
8566 base_offset = 0 - cfa_offset;
8568 else if (m->fs.sp_valid)
8570 base_reg = stack_pointer_rtx;
8571 base_offset = m->fs.sp_offset - cfa_offset;
8576 HOST_WIDE_INT toffset;
8579 /* Choose the base register with the smallest address encoding.
8580 With a tie, choose FP > DRAP > SP. */
8583 base_reg = stack_pointer_rtx;
8584 base_offset = m->fs.sp_offset - cfa_offset;
8585 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8587 if (m->fs.drap_valid)
8589 toffset = 0 - cfa_offset;
8590 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8593 base_reg = crtl->drap_reg;
8594 base_offset = toffset;
8600 toffset = m->fs.fp_offset - cfa_offset;
8601 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8604 base_reg = hard_frame_pointer_rtx;
8605 base_offset = toffset;
8610 gcc_assert (base_reg != NULL);
8612 return plus_constant (base_reg, base_offset);
8615 /* Emit code to save registers in the prologue. */
8618 ix86_emit_save_regs (void)
8623 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8624 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8626 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8627 RTX_FRAME_RELATED_P (insn) = 1;
8631 /* Emit a single register save at CFA - CFA_OFFSET. */
8634 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8635 HOST_WIDE_INT cfa_offset)
8637 struct machine_function *m = cfun->machine;
8638 rtx reg = gen_rtx_REG (mode, regno);
8639 rtx mem, addr, base, insn;
8641 addr = choose_baseaddr (cfa_offset);
8642 mem = gen_frame_mem (mode, addr);
8644 /* For SSE saves, we need to indicate the 128-bit alignment. */
8645 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8647 insn = emit_move_insn (mem, reg);
8648 RTX_FRAME_RELATED_P (insn) = 1;
8651 if (GET_CODE (base) == PLUS)
8652 base = XEXP (base, 0);
8653 gcc_checking_assert (REG_P (base));
8655 /* When saving registers into a re-aligned local stack frame, avoid
8656 any tricky guessing by dwarf2out. */
8657 if (m->fs.realigned)
8659 gcc_checking_assert (stack_realign_drap);
8661 if (regno == REGNO (crtl->drap_reg))
8663 /* A bit of a hack. We force the DRAP register to be saved in
8664 the re-aligned stack frame, which provides us with a copy
8665 of the CFA that will last past the prologue. Install it. */
8666 gcc_checking_assert (cfun->machine->fs.fp_valid);
8667 addr = plus_constant (hard_frame_pointer_rtx,
8668 cfun->machine->fs.fp_offset - cfa_offset);
8669 mem = gen_rtx_MEM (mode, addr);
8670 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8674 /* The frame pointer is a stable reference within the
8675 aligned frame. Use it. */
8676 gcc_checking_assert (cfun->machine->fs.fp_valid);
8677 addr = plus_constant (hard_frame_pointer_rtx,
8678 cfun->machine->fs.fp_offset - cfa_offset);
8679 mem = gen_rtx_MEM (mode, addr);
8680 add_reg_note (insn, REG_CFA_EXPRESSION,
8681 gen_rtx_SET (VOIDmode, mem, reg));
8685 /* The memory may not be relative to the current CFA register,
8686 which means that we may need to generate a new pattern for
8687 use by the unwind info. */
8688 else if (base != m->fs.cfa_reg)
8690 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8691 mem = gen_rtx_MEM (mode, addr);
8692 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8696 /* Emit code to save registers using MOV insns.
8697 First register is stored at CFA - CFA_OFFSET. */
8699 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8704 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8706 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8707 cfa_offset -= UNITS_PER_WORD;
8711 /* Emit code to save SSE registers using MOV insns.
8712 First register is stored at CFA - CFA_OFFSET. */
8714 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8718 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8719 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8721 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8726 static GTY(()) rtx queued_cfa_restores;
8728 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8729 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8730 Don't add the note if the previously saved value will be left untouched
8731 within stack red-zone till return, as unwinders can find the same value
8732 in the register and on the stack. */
8735 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8737 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8742 add_reg_note (insn, REG_CFA_RESTORE, reg);
8743 RTX_FRAME_RELATED_P (insn) = 1;
8747 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8750 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8753 ix86_add_queued_cfa_restore_notes (rtx insn)
8756 if (!queued_cfa_restores)
8758 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8760 XEXP (last, 1) = REG_NOTES (insn);
8761 REG_NOTES (insn) = queued_cfa_restores;
8762 queued_cfa_restores = NULL_RTX;
8763 RTX_FRAME_RELATED_P (insn) = 1;
8766 /* Expand prologue or epilogue stack adjustment.
8767 The pattern exist to put a dependency on all ebp-based memory accesses.
8768 STYLE should be negative if instructions should be marked as frame related,
8769 zero if %r11 register is live and cannot be freely used and positive
8773 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8774 int style, bool set_cfa)
8776 struct machine_function *m = cfun->machine;
8780 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8781 else if (x86_64_immediate_operand (offset, DImode))
8782 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8786 /* r11 is used by indirect sibcall return as well, set before the
8787 epilogue and used after the epilogue. */
8789 tmp = gen_rtx_REG (DImode, R11_REG);
8792 gcc_assert (src != hard_frame_pointer_rtx
8793 && dest != hard_frame_pointer_rtx);
8794 tmp = hard_frame_pointer_rtx;
8796 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8798 RTX_FRAME_RELATED_P (insn) = 1;
8799 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8804 ix86_add_queued_cfa_restore_notes (insn);
8810 gcc_assert (m->fs.cfa_reg == src);
8811 m->fs.cfa_offset += INTVAL (offset);
8812 m->fs.cfa_reg = dest;
8814 r = gen_rtx_PLUS (Pmode, src, offset);
8815 r = gen_rtx_SET (VOIDmode, dest, r);
8816 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8817 RTX_FRAME_RELATED_P (insn) = 1;
8820 RTX_FRAME_RELATED_P (insn) = 1;
8822 if (dest == stack_pointer_rtx)
8824 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8825 bool valid = m->fs.sp_valid;
8827 if (src == hard_frame_pointer_rtx)
8829 valid = m->fs.fp_valid;
8830 ooffset = m->fs.fp_offset;
8832 else if (src == crtl->drap_reg)
8834 valid = m->fs.drap_valid;
8839 /* Else there are two possibilities: SP itself, which we set
8840 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8841 taken care of this by hand along the eh_return path. */
8842 gcc_checking_assert (src == stack_pointer_rtx
8843 || offset == const0_rtx);
8846 m->fs.sp_offset = ooffset - INTVAL (offset);
8847 m->fs.sp_valid = valid;
8851 /* Find an available register to be used as dynamic realign argument
8852 pointer regsiter. Such a register will be written in prologue and
8853 used in begin of body, so it must not be
8854 1. parameter passing register.
8856 We reuse static-chain register if it is available. Otherwise, we
8857 use DI for i386 and R13 for x86-64. We chose R13 since it has
8860 Return: the regno of chosen register. */
8863 find_drap_reg (void)
8865 tree decl = cfun->decl;
8869 /* Use R13 for nested function or function need static chain.
8870 Since function with tail call may use any caller-saved
8871 registers in epilogue, DRAP must not use caller-saved
8872 register in such case. */
8873 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8880 /* Use DI for nested function or function need static chain.
8881 Since function with tail call may use any caller-saved
8882 registers in epilogue, DRAP must not use caller-saved
8883 register in such case. */
8884 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8887 /* Reuse static chain register if it isn't used for parameter
8889 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8890 && !lookup_attribute ("fastcall",
8891 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8892 && !lookup_attribute ("thiscall",
8893 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8900 /* Return minimum incoming stack alignment. */
8903 ix86_minimum_incoming_stack_boundary (bool sibcall)
8905 unsigned int incoming_stack_boundary;
8907 /* Prefer the one specified at command line. */
8908 if (ix86_user_incoming_stack_boundary)
8909 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8910 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8911 if -mstackrealign is used, it isn't used for sibcall check and
8912 estimated stack alignment is 128bit. */
8915 && ix86_force_align_arg_pointer
8916 && crtl->stack_alignment_estimated == 128)
8917 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8919 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8921 /* Incoming stack alignment can be changed on individual functions
8922 via force_align_arg_pointer attribute. We use the smallest
8923 incoming stack boundary. */
8924 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8925 && lookup_attribute (ix86_force_align_arg_pointer_string,
8926 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8927 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8929 /* The incoming stack frame has to be aligned at least at
8930 parm_stack_boundary. */
8931 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8932 incoming_stack_boundary = crtl->parm_stack_boundary;
8934 /* Stack at entrance of main is aligned by runtime. We use the
8935 smallest incoming stack boundary. */
8936 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8937 && DECL_NAME (current_function_decl)
8938 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8939 && DECL_FILE_SCOPE_P (current_function_decl))
8940 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8942 return incoming_stack_boundary;
8945 /* Update incoming stack boundary and estimated stack alignment. */
8948 ix86_update_stack_boundary (void)
8950 ix86_incoming_stack_boundary
8951 = ix86_minimum_incoming_stack_boundary (false);
8953 /* x86_64 vararg needs 16byte stack alignment for register save
8957 && crtl->stack_alignment_estimated < 128)
8958 crtl->stack_alignment_estimated = 128;
8961 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8962 needed or an rtx for DRAP otherwise. */
8965 ix86_get_drap_rtx (void)
8967 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8968 crtl->need_drap = true;
8970 if (stack_realign_drap)
8972 /* Assign DRAP to vDRAP and returns vDRAP */
8973 unsigned int regno = find_drap_reg ();
8978 arg_ptr = gen_rtx_REG (Pmode, regno);
8979 crtl->drap_reg = arg_ptr;
8982 drap_vreg = copy_to_reg (arg_ptr);
8986 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8989 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8990 RTX_FRAME_RELATED_P (insn) = 1;
8998 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9001 ix86_internal_arg_pointer (void)
9003 return virtual_incoming_args_rtx;
9006 struct scratch_reg {
9011 /* Return a short-lived scratch register for use on function entry.
9012 In 32-bit mode, it is valid only after the registers are saved
9013 in the prologue. This register must be released by means of
9014 release_scratch_register_on_entry once it is dead. */
9017 get_scratch_register_on_entry (struct scratch_reg *sr)
9025 /* We always use R11 in 64-bit mode. */
9030 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9032 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9033 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9034 int regparm = ix86_function_regparm (fntype, decl);
9036 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9038 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9039 for the static chain register. */
9040 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9041 && drap_regno != AX_REG)
9043 else if (regparm < 2 && drap_regno != DX_REG)
9045 /* ecx is the static chain register. */
9046 else if (regparm < 3 && !fastcall_p && !static_chain_p
9047 && drap_regno != CX_REG)
9049 else if (ix86_save_reg (BX_REG, true))
9051 /* esi is the static chain register. */
9052 else if (!(regparm == 3 && static_chain_p)
9053 && ix86_save_reg (SI_REG, true))
9055 else if (ix86_save_reg (DI_REG, true))
9059 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9064 sr->reg = gen_rtx_REG (Pmode, regno);
9067 rtx insn = emit_insn (gen_push (sr->reg));
9068 RTX_FRAME_RELATED_P (insn) = 1;
9072 /* Release a scratch register obtained from the preceding function. */
9075 release_scratch_register_on_entry (struct scratch_reg *sr)
9079 rtx x, insn = emit_insn (gen_pop (sr->reg));
9081 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9082 RTX_FRAME_RELATED_P (insn) = 1;
9083 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9084 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9085 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9089 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9091 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9094 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9096 /* We skip the probe for the first interval + a small dope of 4 words and
9097 probe that many bytes past the specified size to maintain a protection
9098 area at the botton of the stack. */
9099 const int dope = 4 * UNITS_PER_WORD;
9100 rtx size_rtx = GEN_INT (size);
9102 /* See if we have a constant small number of probes to generate. If so,
9103 that's the easy case. The run-time loop is made up of 11 insns in the
9104 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9105 for n # of intervals. */
9106 if (size <= 5 * PROBE_INTERVAL)
9108 HOST_WIDE_INT i, adjust;
9109 bool first_probe = true;
9111 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9112 values of N from 1 until it exceeds SIZE. If only one probe is
9113 needed, this will not generate any code. Then adjust and probe
9114 to PROBE_INTERVAL + SIZE. */
9115 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9119 adjust = 2 * PROBE_INTERVAL + dope;
9120 first_probe = false;
9123 adjust = PROBE_INTERVAL;
9125 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9126 plus_constant (stack_pointer_rtx, -adjust)));
9127 emit_stack_probe (stack_pointer_rtx);
9131 adjust = size + PROBE_INTERVAL + dope;
9133 adjust = size + PROBE_INTERVAL - i;
9135 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9136 plus_constant (stack_pointer_rtx, -adjust)));
9137 emit_stack_probe (stack_pointer_rtx);
9139 /* Adjust back to account for the additional first interval. */
9140 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9141 plus_constant (stack_pointer_rtx,
9142 PROBE_INTERVAL + dope)));
9145 /* Otherwise, do the same as above, but in a loop. Note that we must be
9146 extra careful with variables wrapping around because we might be at
9147 the very top (or the very bottom) of the address space and we have
9148 to be able to handle this case properly; in particular, we use an
9149 equality test for the loop condition. */
9152 HOST_WIDE_INT rounded_size;
9153 struct scratch_reg sr;
9155 get_scratch_register_on_entry (&sr);
9158 /* Step 1: round SIZE to the previous multiple of the interval. */
9160 rounded_size = size & -PROBE_INTERVAL;
9163 /* Step 2: compute initial and final value of the loop counter. */
9165 /* SP = SP_0 + PROBE_INTERVAL. */
9166 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9167 plus_constant (stack_pointer_rtx,
9168 - (PROBE_INTERVAL + dope))));
9170 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9171 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9172 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9173 gen_rtx_PLUS (Pmode, sr.reg,
9174 stack_pointer_rtx)));
9179 while (SP != LAST_ADDR)
9181 SP = SP + PROBE_INTERVAL
9185 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9186 values of N from 1 until it is equal to ROUNDED_SIZE. */
9188 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9191 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9192 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9194 if (size != rounded_size)
9196 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9197 plus_constant (stack_pointer_rtx,
9198 rounded_size - size)));
9199 emit_stack_probe (stack_pointer_rtx);
9202 /* Adjust back to account for the additional first interval. */
9203 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9204 plus_constant (stack_pointer_rtx,
9205 PROBE_INTERVAL + dope)));
9207 release_scratch_register_on_entry (&sr);
9210 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9211 cfun->machine->fs.sp_offset += size;
9213 /* Make sure nothing is scheduled before we are done. */
9214 emit_insn (gen_blockage ());
9217 /* Adjust the stack pointer up to REG while probing it. */
9220 output_adjust_stack_and_probe (rtx reg)
9222 static int labelno = 0;
9223 char loop_lab[32], end_lab[32];
9226 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9227 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9229 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9231 /* Jump to END_LAB if SP == LAST_ADDR. */
9232 xops[0] = stack_pointer_rtx;
9234 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9235 fputs ("\tje\t", asm_out_file);
9236 assemble_name_raw (asm_out_file, end_lab);
9237 fputc ('\n', asm_out_file);
9239 /* SP = SP + PROBE_INTERVAL. */
9240 xops[1] = GEN_INT (PROBE_INTERVAL);
9241 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9244 xops[1] = const0_rtx;
9245 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9247 fprintf (asm_out_file, "\tjmp\t");
9248 assemble_name_raw (asm_out_file, loop_lab);
9249 fputc ('\n', asm_out_file);
9251 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9256 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9257 inclusive. These are offsets from the current stack pointer. */
9260 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9262 /* See if we have a constant small number of probes to generate. If so,
9263 that's the easy case. The run-time loop is made up of 7 insns in the
9264 generic case while the compile-time loop is made up of n insns for n #
9266 if (size <= 7 * PROBE_INTERVAL)
9270 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9271 it exceeds SIZE. If only one probe is needed, this will not
9272 generate any code. Then probe at FIRST + SIZE. */
9273 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9274 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9276 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9279 /* Otherwise, do the same as above, but in a loop. Note that we must be
9280 extra careful with variables wrapping around because we might be at
9281 the very top (or the very bottom) of the address space and we have
9282 to be able to handle this case properly; in particular, we use an
9283 equality test for the loop condition. */
9286 HOST_WIDE_INT rounded_size, last;
9287 struct scratch_reg sr;
9289 get_scratch_register_on_entry (&sr);
9292 /* Step 1: round SIZE to the previous multiple of the interval. */
9294 rounded_size = size & -PROBE_INTERVAL;
9297 /* Step 2: compute initial and final value of the loop counter. */
9299 /* TEST_OFFSET = FIRST. */
9300 emit_move_insn (sr.reg, GEN_INT (-first));
9302 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9303 last = first + rounded_size;
9308 while (TEST_ADDR != LAST_ADDR)
9310 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9314 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9315 until it is equal to ROUNDED_SIZE. */
9317 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9320 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9321 that SIZE is equal to ROUNDED_SIZE. */
9323 if (size != rounded_size)
9324 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9327 rounded_size - size));
9329 release_scratch_register_on_entry (&sr);
9332 /* Make sure nothing is scheduled before we are done. */
9333 emit_insn (gen_blockage ());
9336 /* Probe a range of stack addresses from REG to END, inclusive. These are
9337 offsets from the current stack pointer. */
9340 output_probe_stack_range (rtx reg, rtx end)
9342 static int labelno = 0;
9343 char loop_lab[32], end_lab[32];
9346 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9347 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9349 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9351 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9354 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9355 fputs ("\tje\t", asm_out_file);
9356 assemble_name_raw (asm_out_file, end_lab);
9357 fputc ('\n', asm_out_file);
9359 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9360 xops[1] = GEN_INT (PROBE_INTERVAL);
9361 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9363 /* Probe at TEST_ADDR. */
9364 xops[0] = stack_pointer_rtx;
9366 xops[2] = const0_rtx;
9367 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9369 fprintf (asm_out_file, "\tjmp\t");
9370 assemble_name_raw (asm_out_file, loop_lab);
9371 fputc ('\n', asm_out_file);
9373 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9378 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9379 to be generated in correct form. */
9381 ix86_finalize_stack_realign_flags (void)
9383 /* Check if stack realign is really needed after reload, and
9384 stores result in cfun */
9385 unsigned int incoming_stack_boundary
9386 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9387 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9388 unsigned int stack_realign = (incoming_stack_boundary
9389 < (current_function_is_leaf
9390 ? crtl->max_used_stack_slot_alignment
9391 : crtl->stack_alignment_needed));
9393 if (crtl->stack_realign_finalized)
9395 /* After stack_realign_needed is finalized, we can't no longer
9397 gcc_assert (crtl->stack_realign_needed == stack_realign);
9401 crtl->stack_realign_needed = stack_realign;
9402 crtl->stack_realign_finalized = true;
9406 /* Expand the prologue into a bunch of separate insns. */
9409 ix86_expand_prologue (void)
9411 struct machine_function *m = cfun->machine;
9414 struct ix86_frame frame;
9415 HOST_WIDE_INT allocate;
9416 bool int_registers_saved;
9418 ix86_finalize_stack_realign_flags ();
9420 /* DRAP should not coexist with stack_realign_fp */
9421 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9423 memset (&m->fs, 0, sizeof (m->fs));
9425 /* Initialize CFA state for before the prologue. */
9426 m->fs.cfa_reg = stack_pointer_rtx;
9427 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9429 /* Track SP offset to the CFA. We continue tracking this after we've
9430 swapped the CFA register away from SP. In the case of re-alignment
9431 this is fudged; we're interested to offsets within the local frame. */
9432 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9433 m->fs.sp_valid = true;
9435 ix86_compute_frame_layout (&frame);
9437 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9439 /* We should have already generated an error for any use of
9440 ms_hook on a nested function. */
9441 gcc_checking_assert (!ix86_static_chain_on_stack);
9443 /* Check if profiling is active and we shall use profiling before
9444 prologue variant. If so sorry. */
9445 if (crtl->profile && flag_fentry != 0)
9446 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9448 /* In ix86_asm_output_function_label we emitted:
9449 8b ff movl.s %edi,%edi
9451 8b ec movl.s %esp,%ebp
9453 This matches the hookable function prologue in Win32 API
9454 functions in Microsoft Windows XP Service Pack 2 and newer.
9455 Wine uses this to enable Windows apps to hook the Win32 API
9456 functions provided by Wine.
9458 What that means is that we've already set up the frame pointer. */
9460 if (frame_pointer_needed
9461 && !(crtl->drap_reg && crtl->stack_realign_needed))
9465 /* We've decided to use the frame pointer already set up.
9466 Describe this to the unwinder by pretending that both
9467 push and mov insns happen right here.
9469 Putting the unwind info here at the end of the ms_hook
9470 is done so that we can make absolutely certain we get
9471 the required byte sequence at the start of the function,
9472 rather than relying on an assembler that can produce
9473 the exact encoding required.
9475 However it does mean (in the unpatched case) that we have
9476 a 1 insn window where the asynchronous unwind info is
9477 incorrect. However, if we placed the unwind info at
9478 its correct location we would have incorrect unwind info
9479 in the patched case. Which is probably all moot since
9480 I don't expect Wine generates dwarf2 unwind info for the
9481 system libraries that use this feature. */
9483 insn = emit_insn (gen_blockage ());
9485 push = gen_push (hard_frame_pointer_rtx);
9486 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9488 RTX_FRAME_RELATED_P (push) = 1;
9489 RTX_FRAME_RELATED_P (mov) = 1;
9491 RTX_FRAME_RELATED_P (insn) = 1;
9492 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9493 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9495 /* Note that gen_push incremented m->fs.cfa_offset, even
9496 though we didn't emit the push insn here. */
9497 m->fs.cfa_reg = hard_frame_pointer_rtx;
9498 m->fs.fp_offset = m->fs.cfa_offset;
9499 m->fs.fp_valid = true;
9503 /* The frame pointer is not needed so pop %ebp again.
9504 This leaves us with a pristine state. */
9505 emit_insn (gen_pop (hard_frame_pointer_rtx));
9509 /* The first insn of a function that accepts its static chain on the
9510 stack is to push the register that would be filled in by a direct
9511 call. This insn will be skipped by the trampoline. */
9512 else if (ix86_static_chain_on_stack)
9514 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9515 emit_insn (gen_blockage ());
9517 /* We don't want to interpret this push insn as a register save,
9518 only as a stack adjustment. The real copy of the register as
9519 a save will be done later, if needed. */
9520 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9521 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9522 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9523 RTX_FRAME_RELATED_P (insn) = 1;
9526 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9527 of DRAP is needed and stack realignment is really needed after reload */
9528 if (stack_realign_drap)
9530 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9532 /* Only need to push parameter pointer reg if it is caller saved. */
9533 if (!call_used_regs[REGNO (crtl->drap_reg)])
9535 /* Push arg pointer reg */
9536 insn = emit_insn (gen_push (crtl->drap_reg));
9537 RTX_FRAME_RELATED_P (insn) = 1;
9540 /* Grab the argument pointer. */
9541 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9542 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9543 RTX_FRAME_RELATED_P (insn) = 1;
9544 m->fs.cfa_reg = crtl->drap_reg;
9545 m->fs.cfa_offset = 0;
9547 /* Align the stack. */
9548 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9550 GEN_INT (-align_bytes)));
9551 RTX_FRAME_RELATED_P (insn) = 1;
9553 /* Replicate the return address on the stack so that return
9554 address can be reached via (argp - 1) slot. This is needed
9555 to implement macro RETURN_ADDR_RTX and intrinsic function
9556 expand_builtin_return_addr etc. */
9557 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9558 t = gen_frame_mem (Pmode, t);
9559 insn = emit_insn (gen_push (t));
9560 RTX_FRAME_RELATED_P (insn) = 1;
9562 /* For the purposes of frame and register save area addressing,
9563 we've started over with a new frame. */
9564 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9565 m->fs.realigned = true;
9568 if (frame_pointer_needed && !m->fs.fp_valid)
9570 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9571 slower on all targets. Also sdb doesn't like it. */
9572 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9573 RTX_FRAME_RELATED_P (insn) = 1;
9575 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9576 RTX_FRAME_RELATED_P (insn) = 1;
9578 if (m->fs.cfa_reg == stack_pointer_rtx)
9579 m->fs.cfa_reg = hard_frame_pointer_rtx;
9580 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9581 m->fs.fp_offset = m->fs.sp_offset;
9582 m->fs.fp_valid = true;
9585 int_registers_saved = (frame.nregs == 0);
9587 if (!int_registers_saved)
9589 /* If saving registers via PUSH, do so now. */
9590 if (!frame.save_regs_using_mov)
9592 ix86_emit_save_regs ();
9593 int_registers_saved = true;
9594 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9597 /* When using red zone we may start register saving before allocating
9598 the stack frame saving one cycle of the prologue. However, avoid
9599 doing this if we have to probe the stack; at least on x86_64 the
9600 stack probe can turn into a call that clobbers a red zone location. */
9601 else if (ix86_using_red_zone ()
9602 && (! TARGET_STACK_PROBE
9603 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9605 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9606 int_registers_saved = true;
9610 if (stack_realign_fp)
9612 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9613 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9615 /* The computation of the size of the re-aligned stack frame means
9616 that we must allocate the size of the register save area before
9617 performing the actual alignment. Otherwise we cannot guarantee
9618 that there's enough storage above the realignment point. */
9619 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9620 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9621 GEN_INT (m->fs.sp_offset
9622 - frame.sse_reg_save_offset),
9625 /* Align the stack. */
9626 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9628 GEN_INT (-align_bytes)));
9630 /* For the purposes of register save area addressing, the stack
9631 pointer is no longer valid. As for the value of sp_offset,
9632 see ix86_compute_frame_layout, which we need to match in order
9633 to pass verification of stack_pointer_offset at the end. */
9634 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9635 m->fs.sp_valid = false;
9638 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9640 if (flag_stack_usage)
9642 /* We start to count from ARG_POINTER. */
9643 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9645 /* If it was realigned, take into account the fake frame. */
9646 if (stack_realign_drap)
9648 if (ix86_static_chain_on_stack)
9649 stack_size += UNITS_PER_WORD;
9651 if (!call_used_regs[REGNO (crtl->drap_reg)])
9652 stack_size += UNITS_PER_WORD;
9654 /* This over-estimates by 1 minimal-stack-alignment-unit but
9655 mitigates that by counting in the new return address slot. */
9656 current_function_dynamic_stack_size
9657 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9660 current_function_static_stack_size = stack_size;
9663 /* The stack has already been decremented by the instruction calling us
9664 so we need to probe unconditionally to preserve the protection area. */
9665 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9667 /* We expect the registers to be saved when probes are used. */
9668 gcc_assert (int_registers_saved);
9670 if (STACK_CHECK_MOVING_SP)
9672 ix86_adjust_stack_and_probe (allocate);
9677 HOST_WIDE_INT size = allocate;
9679 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9680 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9682 if (TARGET_STACK_PROBE)
9683 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9685 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9691 else if (!ix86_target_stack_probe ()
9692 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9694 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9695 GEN_INT (-allocate), -1,
9696 m->fs.cfa_reg == stack_pointer_rtx);
9700 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9702 bool eax_live = false;
9703 bool r10_live = false;
9706 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9707 if (!TARGET_64BIT_MS_ABI)
9708 eax_live = ix86_eax_live_at_start_p ();
9712 emit_insn (gen_push (eax));
9713 allocate -= UNITS_PER_WORD;
9717 r10 = gen_rtx_REG (Pmode, R10_REG);
9718 emit_insn (gen_push (r10));
9719 allocate -= UNITS_PER_WORD;
9722 emit_move_insn (eax, GEN_INT (allocate));
9724 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9726 if (m->fs.cfa_reg == stack_pointer_rtx)
9728 m->fs.cfa_offset += allocate;
9729 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9730 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9731 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9732 RTX_FRAME_RELATED_P (insn) = 1;
9734 m->fs.sp_offset += allocate;
9736 if (r10_live && eax_live)
9738 t = choose_baseaddr (m->fs.sp_offset - allocate);
9739 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9740 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9741 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9743 else if (eax_live || r10_live)
9745 t = choose_baseaddr (m->fs.sp_offset - allocate);
9746 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9749 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9751 if (!int_registers_saved)
9752 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9754 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9756 pic_reg_used = false;
9757 if (pic_offset_table_rtx
9758 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9761 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9763 if (alt_pic_reg_used != INVALID_REGNUM)
9764 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9766 pic_reg_used = true;
9773 if (ix86_cmodel == CM_LARGE_PIC)
9775 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9776 rtx label = gen_label_rtx ();
9778 LABEL_PRESERVE_P (label) = 1;
9779 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9780 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9781 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9782 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9783 pic_offset_table_rtx, tmp_reg));
9786 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9789 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9792 /* In the pic_reg_used case, make sure that the got load isn't deleted
9793 when mcount needs it. Blockage to avoid call movement across mcount
9794 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9796 if (crtl->profile && !flag_fentry && pic_reg_used)
9797 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9799 if (crtl->drap_reg && !crtl->stack_realign_needed)
9801 /* vDRAP is setup but after reload it turns out stack realign
9802 isn't necessary, here we will emit prologue to setup DRAP
9803 without stack realign adjustment */
9804 t = choose_baseaddr (0);
9805 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9808 /* Prevent instructions from being scheduled into register save push
9809 sequence when access to the redzone area is done through frame pointer.
9810 The offset between the frame pointer and the stack pointer is calculated
9811 relative to the value of the stack pointer at the end of the function
9812 prologue, and moving instructions that access redzone area via frame
9813 pointer inside push sequence violates this assumption. */
9814 if (frame_pointer_needed && frame.red_zone_size)
9815 emit_insn (gen_memory_blockage ());
9817 /* Emit cld instruction if stringops are used in the function. */
9818 if (TARGET_CLD && ix86_current_function_needs_cld)
9819 emit_insn (gen_cld ());
9822 /* Emit code to restore REG using a POP insn. */
9825 ix86_emit_restore_reg_using_pop (rtx reg)
9827 struct machine_function *m = cfun->machine;
9828 rtx insn = emit_insn (gen_pop (reg));
9830 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9831 m->fs.sp_offset -= UNITS_PER_WORD;
9833 if (m->fs.cfa_reg == crtl->drap_reg
9834 && REGNO (reg) == REGNO (crtl->drap_reg))
9836 /* Previously we'd represented the CFA as an expression
9837 like *(%ebp - 8). We've just popped that value from
9838 the stack, which means we need to reset the CFA to
9839 the drap register. This will remain until we restore
9840 the stack pointer. */
9841 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9842 RTX_FRAME_RELATED_P (insn) = 1;
9844 /* This means that the DRAP register is valid for addressing too. */
9845 m->fs.drap_valid = true;
9849 if (m->fs.cfa_reg == stack_pointer_rtx)
9851 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9852 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9853 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9854 RTX_FRAME_RELATED_P (insn) = 1;
9856 m->fs.cfa_offset -= UNITS_PER_WORD;
9859 /* When the frame pointer is the CFA, and we pop it, we are
9860 swapping back to the stack pointer as the CFA. This happens
9861 for stack frames that don't allocate other data, so we assume
9862 the stack pointer is now pointing at the return address, i.e.
9863 the function entry state, which makes the offset be 1 word. */
9864 if (reg == hard_frame_pointer_rtx)
9866 m->fs.fp_valid = false;
9867 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9869 m->fs.cfa_reg = stack_pointer_rtx;
9870 m->fs.cfa_offset -= UNITS_PER_WORD;
9872 add_reg_note (insn, REG_CFA_DEF_CFA,
9873 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9874 GEN_INT (m->fs.cfa_offset)));
9875 RTX_FRAME_RELATED_P (insn) = 1;
9880 /* Emit code to restore saved registers using POP insns. */
9883 ix86_emit_restore_regs_using_pop (void)
9887 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9888 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9889 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9892 /* Emit code and notes for the LEAVE instruction. */
9895 ix86_emit_leave (void)
9897 struct machine_function *m = cfun->machine;
9898 rtx insn = emit_insn (ix86_gen_leave ());
9900 ix86_add_queued_cfa_restore_notes (insn);
9902 gcc_assert (m->fs.fp_valid);
9903 m->fs.sp_valid = true;
9904 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9905 m->fs.fp_valid = false;
9907 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9909 m->fs.cfa_reg = stack_pointer_rtx;
9910 m->fs.cfa_offset = m->fs.sp_offset;
9912 add_reg_note (insn, REG_CFA_DEF_CFA,
9913 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9914 RTX_FRAME_RELATED_P (insn) = 1;
9915 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9920 /* Emit code to restore saved registers using MOV insns.
9921 First register is restored from CFA - CFA_OFFSET. */
9923 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9924 int maybe_eh_return)
9926 struct machine_function *m = cfun->machine;
9929 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9930 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9932 rtx reg = gen_rtx_REG (Pmode, regno);
9935 mem = choose_baseaddr (cfa_offset);
9936 mem = gen_frame_mem (Pmode, mem);
9937 insn = emit_move_insn (reg, mem);
9939 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9941 /* Previously we'd represented the CFA as an expression
9942 like *(%ebp - 8). We've just popped that value from
9943 the stack, which means we need to reset the CFA to
9944 the drap register. This will remain until we restore
9945 the stack pointer. */
9946 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9947 RTX_FRAME_RELATED_P (insn) = 1;
9949 /* This means that the DRAP register is valid for addressing. */
9950 m->fs.drap_valid = true;
9953 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9955 cfa_offset -= UNITS_PER_WORD;
9959 /* Emit code to restore saved registers using MOV insns.
9960 First register is restored from CFA - CFA_OFFSET. */
9962 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9963 int maybe_eh_return)
9967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9968 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9970 rtx reg = gen_rtx_REG (V4SFmode, regno);
9973 mem = choose_baseaddr (cfa_offset);
9974 mem = gen_rtx_MEM (V4SFmode, mem);
9975 set_mem_align (mem, 128);
9976 emit_move_insn (reg, mem);
9978 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9984 /* Restore function stack, frame, and registers. */
9987 ix86_expand_epilogue (int style)
9989 struct machine_function *m = cfun->machine;
9990 struct machine_frame_state frame_state_save = m->fs;
9991 struct ix86_frame frame;
9992 bool restore_regs_via_mov;
9995 ix86_finalize_stack_realign_flags ();
9996 ix86_compute_frame_layout (&frame);
9998 m->fs.sp_valid = (!frame_pointer_needed
9999 || (current_function_sp_is_unchanging
10000 && !stack_realign_fp));
10001 gcc_assert (!m->fs.sp_valid
10002 || m->fs.sp_offset == frame.stack_pointer_offset);
10004 /* The FP must be valid if the frame pointer is present. */
10005 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10006 gcc_assert (!m->fs.fp_valid
10007 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10009 /* We must have *some* valid pointer to the stack frame. */
10010 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10012 /* The DRAP is never valid at this point. */
10013 gcc_assert (!m->fs.drap_valid);
10015 /* See the comment about red zone and frame
10016 pointer usage in ix86_expand_prologue. */
10017 if (frame_pointer_needed && frame.red_zone_size)
10018 emit_insn (gen_memory_blockage ());
10020 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10021 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10023 /* Determine the CFA offset of the end of the red-zone. */
10024 m->fs.red_zone_offset = 0;
10025 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10027 /* The red-zone begins below the return address. */
10028 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10030 /* When the register save area is in the aligned portion of
10031 the stack, determine the maximum runtime displacement that
10032 matches up with the aligned frame. */
10033 if (stack_realign_drap)
10034 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10038 /* Special care must be taken for the normal return case of a function
10039 using eh_return: the eax and edx registers are marked as saved, but
10040 not restored along this path. Adjust the save location to match. */
10041 if (crtl->calls_eh_return && style != 2)
10042 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10044 /* If we're only restoring one register and sp is not valid then
10045 using a move instruction to restore the register since it's
10046 less work than reloading sp and popping the register. */
10047 if (!m->fs.sp_valid && frame.nregs <= 1)
10048 restore_regs_via_mov = true;
10049 /* EH_RETURN requires the use of moves to function properly. */
10050 else if (crtl->calls_eh_return)
10051 restore_regs_via_mov = true;
10052 else if (TARGET_EPILOGUE_USING_MOVE
10053 && cfun->machine->use_fast_prologue_epilogue
10054 && (frame.nregs > 1
10055 || m->fs.sp_offset != frame.reg_save_offset))
10056 restore_regs_via_mov = true;
10057 else if (frame_pointer_needed
10059 && m->fs.sp_offset != frame.reg_save_offset)
10060 restore_regs_via_mov = true;
10061 else if (frame_pointer_needed
10062 && TARGET_USE_LEAVE
10063 && cfun->machine->use_fast_prologue_epilogue
10064 && frame.nregs == 1)
10065 restore_regs_via_mov = true;
10067 restore_regs_via_mov = false;
10069 if (restore_regs_via_mov || frame.nsseregs)
10071 /* Ensure that the entire register save area is addressable via
10072 the stack pointer, if we will restore via sp. */
10074 && m->fs.sp_offset > 0x7fffffff
10075 && !(m->fs.fp_valid || m->fs.drap_valid)
10076 && (frame.nsseregs + frame.nregs) != 0)
10078 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10079 GEN_INT (m->fs.sp_offset
10080 - frame.sse_reg_save_offset),
10082 m->fs.cfa_reg == stack_pointer_rtx);
10086 /* If there are any SSE registers to restore, then we have to do it
10087 via moves, since there's obviously no pop for SSE regs. */
10088 if (frame.nsseregs)
10089 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10092 if (restore_regs_via_mov)
10097 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10099 /* eh_return epilogues need %ecx added to the stack pointer. */
10102 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10104 /* Stack align doesn't work with eh_return. */
10105 gcc_assert (!stack_realign_drap);
10106 /* Neither does regparm nested functions. */
10107 gcc_assert (!ix86_static_chain_on_stack);
10109 if (frame_pointer_needed)
10111 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10112 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10113 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10115 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10116 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10118 /* Note that we use SA as a temporary CFA, as the return
10119 address is at the proper place relative to it. We
10120 pretend this happens at the FP restore insn because
10121 prior to this insn the FP would be stored at the wrong
10122 offset relative to SA, and after this insn we have no
10123 other reasonable register to use for the CFA. We don't
10124 bother resetting the CFA to the SP for the duration of
10125 the return insn. */
10126 add_reg_note (insn, REG_CFA_DEF_CFA,
10127 plus_constant (sa, UNITS_PER_WORD));
10128 ix86_add_queued_cfa_restore_notes (insn);
10129 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10130 RTX_FRAME_RELATED_P (insn) = 1;
10132 m->fs.cfa_reg = sa;
10133 m->fs.cfa_offset = UNITS_PER_WORD;
10134 m->fs.fp_valid = false;
10136 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10137 const0_rtx, style, false);
10141 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10142 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10143 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10144 ix86_add_queued_cfa_restore_notes (insn);
10146 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10147 if (m->fs.cfa_offset != UNITS_PER_WORD)
10149 m->fs.cfa_offset = UNITS_PER_WORD;
10150 add_reg_note (insn, REG_CFA_DEF_CFA,
10151 plus_constant (stack_pointer_rtx,
10153 RTX_FRAME_RELATED_P (insn) = 1;
10156 m->fs.sp_offset = UNITS_PER_WORD;
10157 m->fs.sp_valid = true;
10162 /* First step is to deallocate the stack frame so that we can
10163 pop the registers. */
10164 if (!m->fs.sp_valid)
10166 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10167 GEN_INT (m->fs.fp_offset
10168 - frame.reg_save_offset),
10171 else if (m->fs.sp_offset != frame.reg_save_offset)
10173 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10174 GEN_INT (m->fs.sp_offset
10175 - frame.reg_save_offset),
10177 m->fs.cfa_reg == stack_pointer_rtx);
10180 ix86_emit_restore_regs_using_pop ();
10183 /* If we used a stack pointer and haven't already got rid of it,
10185 if (m->fs.fp_valid)
10187 /* If the stack pointer is valid and pointing at the frame
10188 pointer store address, then we only need a pop. */
10189 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10190 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10191 /* Leave results in shorter dependency chains on CPUs that are
10192 able to grok it fast. */
10193 else if (TARGET_USE_LEAVE
10194 || optimize_function_for_size_p (cfun)
10195 || !cfun->machine->use_fast_prologue_epilogue)
10196 ix86_emit_leave ();
10199 pro_epilogue_adjust_stack (stack_pointer_rtx,
10200 hard_frame_pointer_rtx,
10201 const0_rtx, style, !using_drap);
10202 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10208 int param_ptr_offset = UNITS_PER_WORD;
10211 gcc_assert (stack_realign_drap);
10213 if (ix86_static_chain_on_stack)
10214 param_ptr_offset += UNITS_PER_WORD;
10215 if (!call_used_regs[REGNO (crtl->drap_reg)])
10216 param_ptr_offset += UNITS_PER_WORD;
10218 insn = emit_insn (gen_rtx_SET
10219 (VOIDmode, stack_pointer_rtx,
10220 gen_rtx_PLUS (Pmode,
10222 GEN_INT (-param_ptr_offset))));
10223 m->fs.cfa_reg = stack_pointer_rtx;
10224 m->fs.cfa_offset = param_ptr_offset;
10225 m->fs.sp_offset = param_ptr_offset;
10226 m->fs.realigned = false;
10228 add_reg_note (insn, REG_CFA_DEF_CFA,
10229 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10230 GEN_INT (param_ptr_offset)));
10231 RTX_FRAME_RELATED_P (insn) = 1;
10233 if (!call_used_regs[REGNO (crtl->drap_reg)])
10234 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10237 /* At this point the stack pointer must be valid, and we must have
10238 restored all of the registers. We may not have deallocated the
10239 entire stack frame. We've delayed this until now because it may
10240 be possible to merge the local stack deallocation with the
10241 deallocation forced by ix86_static_chain_on_stack. */
10242 gcc_assert (m->fs.sp_valid);
10243 gcc_assert (!m->fs.fp_valid);
10244 gcc_assert (!m->fs.realigned);
10245 if (m->fs.sp_offset != UNITS_PER_WORD)
10247 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10248 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10252 /* Sibcall epilogues don't want a return instruction. */
10255 m->fs = frame_state_save;
10259 if (crtl->args.pops_args && crtl->args.size)
10261 rtx popc = GEN_INT (crtl->args.pops_args);
10263 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10264 address, do explicit add, and jump indirectly to the caller. */
10266 if (crtl->args.pops_args >= 65536)
10268 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10271 /* There is no "pascal" calling convention in any 64bit ABI. */
10272 gcc_assert (!TARGET_64BIT);
10274 insn = emit_insn (gen_pop (ecx));
10275 m->fs.cfa_offset -= UNITS_PER_WORD;
10276 m->fs.sp_offset -= UNITS_PER_WORD;
10278 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10279 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10280 add_reg_note (insn, REG_CFA_REGISTER,
10281 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10282 RTX_FRAME_RELATED_P (insn) = 1;
10284 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10286 emit_jump_insn (gen_return_indirect_internal (ecx));
10289 emit_jump_insn (gen_return_pop_internal (popc));
10292 emit_jump_insn (gen_return_internal ());
10294 /* Restore the state back to the state from the prologue,
10295 so that it's correct for the next epilogue. */
10296 m->fs = frame_state_save;
10299 /* Reset from the function's potential modifications. */
10302 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10303 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10305 if (pic_offset_table_rtx)
10306 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10308 /* Mach-O doesn't support labels at the end of objects, so if
10309 it looks like we might want one, insert a NOP. */
10311 rtx insn = get_last_insn ();
10314 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10315 insn = PREV_INSN (insn);
10319 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10320 fputs ("\tnop\n", file);
10326 /* Extract the parts of an RTL expression that is a valid memory address
10327 for an instruction. Return 0 if the structure of the address is
10328 grossly off. Return -1 if the address contains ASHIFT, so it is not
10329 strictly valid, but still used for computing length of lea instruction. */
10332 ix86_decompose_address (rtx addr, struct ix86_address *out)
10334 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10335 rtx base_reg, index_reg;
10336 HOST_WIDE_INT scale = 1;
10337 rtx scale_rtx = NULL_RTX;
10340 enum ix86_address_seg seg = SEG_DEFAULT;
10342 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10344 else if (GET_CODE (addr) == PLUS)
10346 rtx addends[4], op;
10354 addends[n++] = XEXP (op, 1);
10357 while (GET_CODE (op) == PLUS);
10362 for (i = n; i >= 0; --i)
10365 switch (GET_CODE (op))
10370 index = XEXP (op, 0);
10371 scale_rtx = XEXP (op, 1);
10377 index = XEXP (op, 0);
10378 tmp = XEXP (op, 1);
10379 if (!CONST_INT_P (tmp))
10381 scale = INTVAL (tmp);
10382 if ((unsigned HOST_WIDE_INT) scale > 3)
10384 scale = 1 << scale;
10388 if (XINT (op, 1) == UNSPEC_TP
10389 && TARGET_TLS_DIRECT_SEG_REFS
10390 && seg == SEG_DEFAULT)
10391 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10420 else if (GET_CODE (addr) == MULT)
10422 index = XEXP (addr, 0); /* index*scale */
10423 scale_rtx = XEXP (addr, 1);
10425 else if (GET_CODE (addr) == ASHIFT)
10427 /* We're called for lea too, which implements ashift on occasion. */
10428 index = XEXP (addr, 0);
10429 tmp = XEXP (addr, 1);
10430 if (!CONST_INT_P (tmp))
10432 scale = INTVAL (tmp);
10433 if ((unsigned HOST_WIDE_INT) scale > 3)
10435 scale = 1 << scale;
10439 disp = addr; /* displacement */
10441 /* Extract the integral value of scale. */
10444 if (!CONST_INT_P (scale_rtx))
10446 scale = INTVAL (scale_rtx);
10449 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10450 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10452 /* Avoid useless 0 displacement. */
10453 if (disp == const0_rtx && (base || index))
10456 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10457 if (base_reg && index_reg && scale == 1
10458 && (index_reg == arg_pointer_rtx
10459 || index_reg == frame_pointer_rtx
10460 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10463 tmp = base, base = index, index = tmp;
10464 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10467 /* Special case: %ebp cannot be encoded as a base without a displacement.
10471 && (base_reg == hard_frame_pointer_rtx
10472 || base_reg == frame_pointer_rtx
10473 || base_reg == arg_pointer_rtx
10474 || (REG_P (base_reg)
10475 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10476 || REGNO (base_reg) == R13_REG))))
10479 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10480 Avoid this by transforming to [%esi+0].
10481 Reload calls address legitimization without cfun defined, so we need
10482 to test cfun for being non-NULL. */
10483 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10484 && base_reg && !index_reg && !disp
10485 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10488 /* Special case: encode reg+reg instead of reg*2. */
10489 if (!base && index && scale == 2)
10490 base = index, base_reg = index_reg, scale = 1;
10492 /* Special case: scaling cannot be encoded without base or displacement. */
10493 if (!base && !disp && index && scale != 1)
10497 out->index = index;
10499 out->scale = scale;
10505 /* Return cost of the memory address x.
10506 For i386, it is better to use a complex address than let gcc copy
10507 the address into a reg and make a new pseudo. But not if the address
10508 requires to two regs - that would mean more pseudos with longer
10511 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10513 struct ix86_address parts;
10515 int ok = ix86_decompose_address (x, &parts);
10519 if (parts.base && GET_CODE (parts.base) == SUBREG)
10520 parts.base = SUBREG_REG (parts.base);
10521 if (parts.index && GET_CODE (parts.index) == SUBREG)
10522 parts.index = SUBREG_REG (parts.index);
10524 /* Attempt to minimize number of registers in the address. */
10526 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10528 && (!REG_P (parts.index)
10529 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10533 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10535 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10536 && parts.base != parts.index)
10539 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10540 since it's predecode logic can't detect the length of instructions
10541 and it degenerates to vector decoded. Increase cost of such
10542 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10543 to split such addresses or even refuse such addresses at all.
10545 Following addressing modes are affected:
10550 The first and last case may be avoidable by explicitly coding the zero in
10551 memory address, but I don't have AMD-K6 machine handy to check this
10555 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10556 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10557 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10563 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10564 this is used for to form addresses to local data when -fPIC is in
10568 darwin_local_data_pic (rtx disp)
10570 return (GET_CODE (disp) == UNSPEC
10571 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10574 /* Determine if a given RTX is a valid constant. We already know this
10575 satisfies CONSTANT_P. */
10578 legitimate_constant_p (rtx x)
10580 switch (GET_CODE (x))
10585 if (GET_CODE (x) == PLUS)
10587 if (!CONST_INT_P (XEXP (x, 1)))
10592 if (TARGET_MACHO && darwin_local_data_pic (x))
10595 /* Only some unspecs are valid as "constants". */
10596 if (GET_CODE (x) == UNSPEC)
10597 switch (XINT (x, 1))
10600 case UNSPEC_GOTOFF:
10601 case UNSPEC_PLTOFF:
10602 return TARGET_64BIT;
10604 case UNSPEC_NTPOFF:
10605 x = XVECEXP (x, 0, 0);
10606 return (GET_CODE (x) == SYMBOL_REF
10607 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10608 case UNSPEC_DTPOFF:
10609 x = XVECEXP (x, 0, 0);
10610 return (GET_CODE (x) == SYMBOL_REF
10611 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10616 /* We must have drilled down to a symbol. */
10617 if (GET_CODE (x) == LABEL_REF)
10619 if (GET_CODE (x) != SYMBOL_REF)
10624 /* TLS symbols are never valid. */
10625 if (SYMBOL_REF_TLS_MODEL (x))
10628 /* DLLIMPORT symbols are never valid. */
10629 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10630 && SYMBOL_REF_DLLIMPORT_P (x))
10635 if (GET_MODE (x) == TImode
10636 && x != CONST0_RTX (TImode)
10642 if (!standard_sse_constant_p (x))
10649 /* Otherwise we handle everything else in the move patterns. */
10653 /* Determine if it's legal to put X into the constant pool. This
10654 is not possible for the address of thread-local symbols, which
10655 is checked above. */
10658 ix86_cannot_force_const_mem (rtx x)
10660 /* We can always put integral constants and vectors in memory. */
10661 switch (GET_CODE (x))
10671 return !legitimate_constant_p (x);
10675 /* Nonzero if the constant value X is a legitimate general operand
10676 when generating PIC code. It is given that flag_pic is on and
10677 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10680 legitimate_pic_operand_p (rtx x)
10684 switch (GET_CODE (x))
10687 inner = XEXP (x, 0);
10688 if (GET_CODE (inner) == PLUS
10689 && CONST_INT_P (XEXP (inner, 1)))
10690 inner = XEXP (inner, 0);
10692 /* Only some unspecs are valid as "constants". */
10693 if (GET_CODE (inner) == UNSPEC)
10694 switch (XINT (inner, 1))
10697 case UNSPEC_GOTOFF:
10698 case UNSPEC_PLTOFF:
10699 return TARGET_64BIT;
10701 x = XVECEXP (inner, 0, 0);
10702 return (GET_CODE (x) == SYMBOL_REF
10703 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10704 case UNSPEC_MACHOPIC_OFFSET:
10705 return legitimate_pic_address_disp_p (x);
10713 return legitimate_pic_address_disp_p (x);
10720 /* Determine if a given CONST RTX is a valid memory displacement
10724 legitimate_pic_address_disp_p (rtx disp)
10728 /* In 64bit mode we can allow direct addresses of symbols and labels
10729 when they are not dynamic symbols. */
10732 rtx op0 = disp, op1;
10734 switch (GET_CODE (disp))
10740 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10742 op0 = XEXP (XEXP (disp, 0), 0);
10743 op1 = XEXP (XEXP (disp, 0), 1);
10744 if (!CONST_INT_P (op1)
10745 || INTVAL (op1) >= 16*1024*1024
10746 || INTVAL (op1) < -16*1024*1024)
10748 if (GET_CODE (op0) == LABEL_REF)
10750 if (GET_CODE (op0) != SYMBOL_REF)
10755 /* TLS references should always be enclosed in UNSPEC. */
10756 if (SYMBOL_REF_TLS_MODEL (op0))
10758 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10759 && ix86_cmodel != CM_LARGE_PIC)
10767 if (GET_CODE (disp) != CONST)
10769 disp = XEXP (disp, 0);
10773 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10774 of GOT tables. We should not need these anyway. */
10775 if (GET_CODE (disp) != UNSPEC
10776 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10777 && XINT (disp, 1) != UNSPEC_GOTOFF
10778 && XINT (disp, 1) != UNSPEC_PLTOFF))
10781 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10782 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10788 if (GET_CODE (disp) == PLUS)
10790 if (!CONST_INT_P (XEXP (disp, 1)))
10792 disp = XEXP (disp, 0);
10796 if (TARGET_MACHO && darwin_local_data_pic (disp))
10799 if (GET_CODE (disp) != UNSPEC)
10802 switch (XINT (disp, 1))
10807 /* We need to check for both symbols and labels because VxWorks loads
10808 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10810 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10811 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10812 case UNSPEC_GOTOFF:
10813 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10814 While ABI specify also 32bit relocation but we don't produce it in
10815 small PIC model at all. */
10816 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10817 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10819 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10821 case UNSPEC_GOTTPOFF:
10822 case UNSPEC_GOTNTPOFF:
10823 case UNSPEC_INDNTPOFF:
10826 disp = XVECEXP (disp, 0, 0);
10827 return (GET_CODE (disp) == SYMBOL_REF
10828 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10829 case UNSPEC_NTPOFF:
10830 disp = XVECEXP (disp, 0, 0);
10831 return (GET_CODE (disp) == SYMBOL_REF
10832 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10833 case UNSPEC_DTPOFF:
10834 disp = XVECEXP (disp, 0, 0);
10835 return (GET_CODE (disp) == SYMBOL_REF
10836 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10842 /* Recognizes RTL expressions that are valid memory addresses for an
10843 instruction. The MODE argument is the machine mode for the MEM
10844 expression that wants to use this address.
10846 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10847 convert common non-canonical forms to canonical form so that they will
10851 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10852 rtx addr, bool strict)
10854 struct ix86_address parts;
10855 rtx base, index, disp;
10856 HOST_WIDE_INT scale;
10858 if (ix86_decompose_address (addr, &parts) <= 0)
10859 /* Decomposition failed. */
10863 index = parts.index;
10865 scale = parts.scale;
10867 /* Validate base register.
10869 Don't allow SUBREG's that span more than a word here. It can lead to spill
10870 failures when the base is one word out of a two word structure, which is
10871 represented internally as a DImode int. */
10879 else if (GET_CODE (base) == SUBREG
10880 && REG_P (SUBREG_REG (base))
10881 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10883 reg = SUBREG_REG (base);
10885 /* Base is not a register. */
10888 if (GET_MODE (base) != Pmode)
10889 /* Base is not in Pmode. */
10892 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10893 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10894 /* Base is not valid. */
10898 /* Validate index register.
10900 Don't allow SUBREG's that span more than a word here -- same as above. */
10908 else if (GET_CODE (index) == SUBREG
10909 && REG_P (SUBREG_REG (index))
10910 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10912 reg = SUBREG_REG (index);
10914 /* Index is not a register. */
10917 if (GET_MODE (index) != Pmode)
10918 /* Index is not in Pmode. */
10921 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10922 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10923 /* Index is not valid. */
10927 /* Validate scale factor. */
10931 /* Scale without index. */
10934 if (scale != 2 && scale != 4 && scale != 8)
10935 /* Scale is not a valid multiplier. */
10939 /* Validate displacement. */
10942 if (GET_CODE (disp) == CONST
10943 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10944 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10945 switch (XINT (XEXP (disp, 0), 1))
10947 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10948 used. While ABI specify also 32bit relocations, we don't produce
10949 them at all and use IP relative instead. */
10951 case UNSPEC_GOTOFF:
10952 gcc_assert (flag_pic);
10954 goto is_legitimate_pic;
10956 /* 64bit address unspec. */
10959 case UNSPEC_GOTPCREL:
10960 gcc_assert (flag_pic);
10961 goto is_legitimate_pic;
10963 case UNSPEC_GOTTPOFF:
10964 case UNSPEC_GOTNTPOFF:
10965 case UNSPEC_INDNTPOFF:
10966 case UNSPEC_NTPOFF:
10967 case UNSPEC_DTPOFF:
10971 /* Invalid address unspec. */
10975 else if (SYMBOLIC_CONST (disp)
10979 && MACHOPIC_INDIRECT
10980 && !machopic_operand_p (disp)
10986 if (TARGET_64BIT && (index || base))
10988 /* foo@dtpoff(%rX) is ok. */
10989 if (GET_CODE (disp) != CONST
10990 || GET_CODE (XEXP (disp, 0)) != PLUS
10991 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10992 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10993 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10994 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10995 /* Non-constant pic memory reference. */
10998 else if (! legitimate_pic_address_disp_p (disp))
10999 /* Displacement is an invalid pic construct. */
11002 /* This code used to verify that a symbolic pic displacement
11003 includes the pic_offset_table_rtx register.
11005 While this is good idea, unfortunately these constructs may
11006 be created by "adds using lea" optimization for incorrect
11015 This code is nonsensical, but results in addressing
11016 GOT table with pic_offset_table_rtx base. We can't
11017 just refuse it easily, since it gets matched by
11018 "addsi3" pattern, that later gets split to lea in the
11019 case output register differs from input. While this
11020 can be handled by separate addsi pattern for this case
11021 that never results in lea, this seems to be easier and
11022 correct fix for crash to disable this test. */
11024 else if (GET_CODE (disp) != LABEL_REF
11025 && !CONST_INT_P (disp)
11026 && (GET_CODE (disp) != CONST
11027 || !legitimate_constant_p (disp))
11028 && (GET_CODE (disp) != SYMBOL_REF
11029 || !legitimate_constant_p (disp)))
11030 /* Displacement is not constant. */
11032 else if (TARGET_64BIT
11033 && !x86_64_immediate_operand (disp, VOIDmode))
11034 /* Displacement is out of range. */
11038 /* Everything looks valid. */
11042 /* Determine if a given RTX is a valid constant address. */
11045 constant_address_p (rtx x)
11047 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11050 /* Return a unique alias set for the GOT. */
11052 static alias_set_type
11053 ix86_GOT_alias_set (void)
11055 static alias_set_type set = -1;
11057 set = new_alias_set ();
11061 /* Return a legitimate reference for ORIG (an address) using the
11062 register REG. If REG is 0, a new pseudo is generated.
11064 There are two types of references that must be handled:
11066 1. Global data references must load the address from the GOT, via
11067 the PIC reg. An insn is emitted to do this load, and the reg is
11070 2. Static data references, constant pool addresses, and code labels
11071 compute the address as an offset from the GOT, whose base is in
11072 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11073 differentiate them from global data objects. The returned
11074 address is the PIC reg + an unspec constant.
11076 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11077 reg also appears in the address. */
11080 legitimize_pic_address (rtx orig, rtx reg)
11083 rtx new_rtx = orig;
11087 if (TARGET_MACHO && !TARGET_64BIT)
11090 reg = gen_reg_rtx (Pmode);
11091 /* Use the generic Mach-O PIC machinery. */
11092 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11096 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11098 else if (TARGET_64BIT
11099 && ix86_cmodel != CM_SMALL_PIC
11100 && gotoff_operand (addr, Pmode))
11103 /* This symbol may be referenced via a displacement from the PIC
11104 base address (@GOTOFF). */
11106 if (reload_in_progress)
11107 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11108 if (GET_CODE (addr) == CONST)
11109 addr = XEXP (addr, 0);
11110 if (GET_CODE (addr) == PLUS)
11112 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11114 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11117 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11118 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11120 tmpreg = gen_reg_rtx (Pmode);
11123 emit_move_insn (tmpreg, new_rtx);
11127 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11128 tmpreg, 1, OPTAB_DIRECT);
11131 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11133 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11135 /* This symbol may be referenced via a displacement from the PIC
11136 base address (@GOTOFF). */
11138 if (reload_in_progress)
11139 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11140 if (GET_CODE (addr) == CONST)
11141 addr = XEXP (addr, 0);
11142 if (GET_CODE (addr) == PLUS)
11144 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11146 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11149 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11150 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11151 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11155 emit_move_insn (reg, new_rtx);
11159 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11160 /* We can't use @GOTOFF for text labels on VxWorks;
11161 see gotoff_operand. */
11162 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11164 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11166 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11167 return legitimize_dllimport_symbol (addr, true);
11168 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11169 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11170 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11172 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11173 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11177 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11179 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11180 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11181 new_rtx = gen_const_mem (Pmode, new_rtx);
11182 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11185 reg = gen_reg_rtx (Pmode);
11186 /* Use directly gen_movsi, otherwise the address is loaded
11187 into register for CSE. We don't want to CSE this addresses,
11188 instead we CSE addresses from the GOT table, so skip this. */
11189 emit_insn (gen_movsi (reg, new_rtx));
11194 /* This symbol must be referenced via a load from the
11195 Global Offset Table (@GOT). */
11197 if (reload_in_progress)
11198 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11199 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11200 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11202 new_rtx = force_reg (Pmode, new_rtx);
11203 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11204 new_rtx = gen_const_mem (Pmode, new_rtx);
11205 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11208 reg = gen_reg_rtx (Pmode);
11209 emit_move_insn (reg, new_rtx);
11215 if (CONST_INT_P (addr)
11216 && !x86_64_immediate_operand (addr, VOIDmode))
11220 emit_move_insn (reg, addr);
11224 new_rtx = force_reg (Pmode, addr);
11226 else if (GET_CODE (addr) == CONST)
11228 addr = XEXP (addr, 0);
11230 /* We must match stuff we generate before. Assume the only
11231 unspecs that can get here are ours. Not that we could do
11232 anything with them anyway.... */
11233 if (GET_CODE (addr) == UNSPEC
11234 || (GET_CODE (addr) == PLUS
11235 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11237 gcc_assert (GET_CODE (addr) == PLUS);
11239 if (GET_CODE (addr) == PLUS)
11241 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11243 /* Check first to see if this is a constant offset from a @GOTOFF
11244 symbol reference. */
11245 if (gotoff_operand (op0, Pmode)
11246 && CONST_INT_P (op1))
11250 if (reload_in_progress)
11251 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11252 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11254 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11255 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11256 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11260 emit_move_insn (reg, new_rtx);
11266 if (INTVAL (op1) < -16*1024*1024
11267 || INTVAL (op1) >= 16*1024*1024)
11269 if (!x86_64_immediate_operand (op1, Pmode))
11270 op1 = force_reg (Pmode, op1);
11271 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11277 base = legitimize_pic_address (XEXP (addr, 0), reg);
11278 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11279 base == reg ? NULL_RTX : reg);
11281 if (CONST_INT_P (new_rtx))
11282 new_rtx = plus_constant (base, INTVAL (new_rtx));
11285 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11287 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11288 new_rtx = XEXP (new_rtx, 1);
11290 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11298 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11301 get_thread_pointer (int to_reg)
11305 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11309 reg = gen_reg_rtx (Pmode);
11310 insn = gen_rtx_SET (VOIDmode, reg, tp);
11311 insn = emit_insn (insn);
11316 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11317 false if we expect this to be used for a memory address and true if
11318 we expect to load the address into a register. */
11321 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11323 rtx dest, base, off, pic, tp;
11328 case TLS_MODEL_GLOBAL_DYNAMIC:
11329 dest = gen_reg_rtx (Pmode);
11330 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11332 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11334 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11337 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11338 insns = get_insns ();
11341 RTL_CONST_CALL_P (insns) = 1;
11342 emit_libcall_block (insns, dest, rax, x);
11344 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11345 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11347 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11349 if (TARGET_GNU2_TLS)
11351 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11353 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11357 case TLS_MODEL_LOCAL_DYNAMIC:
11358 base = gen_reg_rtx (Pmode);
11359 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11361 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11363 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11366 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11367 insns = get_insns ();
11370 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11371 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11372 RTL_CONST_CALL_P (insns) = 1;
11373 emit_libcall_block (insns, base, rax, note);
11375 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11376 emit_insn (gen_tls_local_dynamic_base_64 (base));
11378 emit_insn (gen_tls_local_dynamic_base_32 (base));
11380 if (TARGET_GNU2_TLS)
11382 rtx x = ix86_tls_module_base ();
11384 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11385 gen_rtx_MINUS (Pmode, x, tp));
11388 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11389 off = gen_rtx_CONST (Pmode, off);
11391 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11393 if (TARGET_GNU2_TLS)
11395 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11397 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11402 case TLS_MODEL_INITIAL_EXEC:
11406 type = UNSPEC_GOTNTPOFF;
11410 if (reload_in_progress)
11411 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11412 pic = pic_offset_table_rtx;
11413 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11415 else if (!TARGET_ANY_GNU_TLS)
11417 pic = gen_reg_rtx (Pmode);
11418 emit_insn (gen_set_got (pic));
11419 type = UNSPEC_GOTTPOFF;
11424 type = UNSPEC_INDNTPOFF;
11427 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11428 off = gen_rtx_CONST (Pmode, off);
11430 off = gen_rtx_PLUS (Pmode, pic, off);
11431 off = gen_const_mem (Pmode, off);
11432 set_mem_alias_set (off, ix86_GOT_alias_set ());
11434 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11436 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11437 off = force_reg (Pmode, off);
11438 return gen_rtx_PLUS (Pmode, base, off);
11442 base = get_thread_pointer (true);
11443 dest = gen_reg_rtx (Pmode);
11444 emit_insn (gen_subsi3 (dest, base, off));
11448 case TLS_MODEL_LOCAL_EXEC:
11449 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11450 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11451 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11452 off = gen_rtx_CONST (Pmode, off);
11454 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11456 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11457 return gen_rtx_PLUS (Pmode, base, off);
11461 base = get_thread_pointer (true);
11462 dest = gen_reg_rtx (Pmode);
11463 emit_insn (gen_subsi3 (dest, base, off));
11468 gcc_unreachable ();
11474 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11477 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11478 htab_t dllimport_map;
11481 get_dllimport_decl (tree decl)
11483 struct tree_map *h, in;
11486 const char *prefix;
11487 size_t namelen, prefixlen;
11492 if (!dllimport_map)
11493 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11495 in.hash = htab_hash_pointer (decl);
11496 in.base.from = decl;
11497 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11498 h = (struct tree_map *) *loc;
11502 *loc = h = ggc_alloc_tree_map ();
11504 h->base.from = decl;
11505 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11506 VAR_DECL, NULL, ptr_type_node);
11507 DECL_ARTIFICIAL (to) = 1;
11508 DECL_IGNORED_P (to) = 1;
11509 DECL_EXTERNAL (to) = 1;
11510 TREE_READONLY (to) = 1;
11512 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11513 name = targetm.strip_name_encoding (name);
11514 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11515 ? "*__imp_" : "*__imp__";
11516 namelen = strlen (name);
11517 prefixlen = strlen (prefix);
11518 imp_name = (char *) alloca (namelen + prefixlen + 1);
11519 memcpy (imp_name, prefix, prefixlen);
11520 memcpy (imp_name + prefixlen, name, namelen + 1);
11522 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11523 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11524 SET_SYMBOL_REF_DECL (rtl, to);
11525 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11527 rtl = gen_const_mem (Pmode, rtl);
11528 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11530 SET_DECL_RTL (to, rtl);
11531 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11536 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11537 true if we require the result be a register. */
11540 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11545 gcc_assert (SYMBOL_REF_DECL (symbol));
11546 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11548 x = DECL_RTL (imp_decl);
11550 x = force_reg (Pmode, x);
11554 /* Try machine-dependent ways of modifying an illegitimate address
11555 to be legitimate. If we find one, return the new, valid address.
11556 This macro is used in only one place: `memory_address' in explow.c.
11558 OLDX is the address as it was before break_out_memory_refs was called.
11559 In some cases it is useful to look at this to decide what needs to be done.
11561 It is always safe for this macro to do nothing. It exists to recognize
11562 opportunities to optimize the output.
11564 For the 80386, we handle X+REG by loading X into a register R and
11565 using R+REG. R will go in a general reg and indexing will be used.
11566 However, if REG is a broken-out memory address or multiplication,
11567 nothing needs to be done because REG can certainly go in a general reg.
11569 When -fpic is used, special handling is needed for symbolic references.
11570 See comments by legitimize_pic_address in i386.c for details. */
11573 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11574 enum machine_mode mode)
11579 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11581 return legitimize_tls_address (x, (enum tls_model) log, false);
11582 if (GET_CODE (x) == CONST
11583 && GET_CODE (XEXP (x, 0)) == PLUS
11584 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11585 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11587 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11588 (enum tls_model) log, false);
11589 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11592 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11594 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11595 return legitimize_dllimport_symbol (x, true);
11596 if (GET_CODE (x) == CONST
11597 && GET_CODE (XEXP (x, 0)) == PLUS
11598 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11599 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11601 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11602 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11606 if (flag_pic && SYMBOLIC_CONST (x))
11607 return legitimize_pic_address (x, 0);
11609 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11610 if (GET_CODE (x) == ASHIFT
11611 && CONST_INT_P (XEXP (x, 1))
11612 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11615 log = INTVAL (XEXP (x, 1));
11616 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11617 GEN_INT (1 << log));
11620 if (GET_CODE (x) == PLUS)
11622 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11624 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11625 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11626 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11629 log = INTVAL (XEXP (XEXP (x, 0), 1));
11630 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11631 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11632 GEN_INT (1 << log));
11635 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11636 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11637 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11640 log = INTVAL (XEXP (XEXP (x, 1), 1));
11641 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11642 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11643 GEN_INT (1 << log));
11646 /* Put multiply first if it isn't already. */
11647 if (GET_CODE (XEXP (x, 1)) == MULT)
11649 rtx tmp = XEXP (x, 0);
11650 XEXP (x, 0) = XEXP (x, 1);
11655 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11656 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11657 created by virtual register instantiation, register elimination, and
11658 similar optimizations. */
11659 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11662 x = gen_rtx_PLUS (Pmode,
11663 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11664 XEXP (XEXP (x, 1), 0)),
11665 XEXP (XEXP (x, 1), 1));
11669 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11670 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11671 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11672 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11673 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11674 && CONSTANT_P (XEXP (x, 1)))
11677 rtx other = NULL_RTX;
11679 if (CONST_INT_P (XEXP (x, 1)))
11681 constant = XEXP (x, 1);
11682 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11684 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11686 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11687 other = XEXP (x, 1);
11695 x = gen_rtx_PLUS (Pmode,
11696 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11697 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11698 plus_constant (other, INTVAL (constant)));
11702 if (changed && ix86_legitimate_address_p (mode, x, false))
11705 if (GET_CODE (XEXP (x, 0)) == MULT)
11708 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11711 if (GET_CODE (XEXP (x, 1)) == MULT)
11714 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11718 && REG_P (XEXP (x, 1))
11719 && REG_P (XEXP (x, 0)))
11722 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11725 x = legitimize_pic_address (x, 0);
11728 if (changed && ix86_legitimate_address_p (mode, x, false))
11731 if (REG_P (XEXP (x, 0)))
11733 rtx temp = gen_reg_rtx (Pmode);
11734 rtx val = force_operand (XEXP (x, 1), temp);
11736 emit_move_insn (temp, val);
11738 XEXP (x, 1) = temp;
11742 else if (REG_P (XEXP (x, 1)))
11744 rtx temp = gen_reg_rtx (Pmode);
11745 rtx val = force_operand (XEXP (x, 0), temp);
11747 emit_move_insn (temp, val);
11749 XEXP (x, 0) = temp;
11757 /* Print an integer constant expression in assembler syntax. Addition
11758 and subtraction are the only arithmetic that may appear in these
11759 expressions. FILE is the stdio stream to write to, X is the rtx, and
11760 CODE is the operand print code from the output string. */
11763 output_pic_addr_const (FILE *file, rtx x, int code)
11767 switch (GET_CODE (x))
11770 gcc_assert (flag_pic);
11775 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11776 output_addr_const (file, x);
11779 const char *name = XSTR (x, 0);
11781 /* Mark the decl as referenced so that cgraph will
11782 output the function. */
11783 if (SYMBOL_REF_DECL (x))
11784 mark_decl_referenced (SYMBOL_REF_DECL (x));
11787 if (MACHOPIC_INDIRECT
11788 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11789 name = machopic_indirection_name (x, /*stub_p=*/true);
11791 assemble_name (file, name);
11793 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11794 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11795 fputs ("@PLT", file);
11802 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11803 assemble_name (asm_out_file, buf);
11807 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11811 /* This used to output parentheses around the expression,
11812 but that does not work on the 386 (either ATT or BSD assembler). */
11813 output_pic_addr_const (file, XEXP (x, 0), code);
11817 if (GET_MODE (x) == VOIDmode)
11819 /* We can use %d if the number is <32 bits and positive. */
11820 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11821 fprintf (file, "0x%lx%08lx",
11822 (unsigned long) CONST_DOUBLE_HIGH (x),
11823 (unsigned long) CONST_DOUBLE_LOW (x));
11825 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11828 /* We can't handle floating point constants;
11829 TARGET_PRINT_OPERAND must handle them. */
11830 output_operand_lossage ("floating constant misused");
11834 /* Some assemblers need integer constants to appear first. */
11835 if (CONST_INT_P (XEXP (x, 0)))
11837 output_pic_addr_const (file, XEXP (x, 0), code);
11839 output_pic_addr_const (file, XEXP (x, 1), code);
11843 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11844 output_pic_addr_const (file, XEXP (x, 1), code);
11846 output_pic_addr_const (file, XEXP (x, 0), code);
11852 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11853 output_pic_addr_const (file, XEXP (x, 0), code);
11855 output_pic_addr_const (file, XEXP (x, 1), code);
11857 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11861 gcc_assert (XVECLEN (x, 0) == 1);
11862 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11863 switch (XINT (x, 1))
11866 fputs ("@GOT", file);
11868 case UNSPEC_GOTOFF:
11869 fputs ("@GOTOFF", file);
11871 case UNSPEC_PLTOFF:
11872 fputs ("@PLTOFF", file);
11874 case UNSPEC_GOTPCREL:
11875 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11876 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11878 case UNSPEC_GOTTPOFF:
11879 /* FIXME: This might be @TPOFF in Sun ld too. */
11880 fputs ("@gottpoff", file);
11883 fputs ("@tpoff", file);
11885 case UNSPEC_NTPOFF:
11887 fputs ("@tpoff", file);
11889 fputs ("@ntpoff", file);
11891 case UNSPEC_DTPOFF:
11892 fputs ("@dtpoff", file);
11894 case UNSPEC_GOTNTPOFF:
11896 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11897 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11899 fputs ("@gotntpoff", file);
11901 case UNSPEC_INDNTPOFF:
11902 fputs ("@indntpoff", file);
11905 case UNSPEC_MACHOPIC_OFFSET:
11907 machopic_output_function_base_name (file);
11911 output_operand_lossage ("invalid UNSPEC as operand");
11917 output_operand_lossage ("invalid expression as operand");
11921 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11922 We need to emit DTP-relative relocations. */
11924 static void ATTRIBUTE_UNUSED
11925 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11927 fputs (ASM_LONG, file);
11928 output_addr_const (file, x);
11929 fputs ("@dtpoff", file);
11935 fputs (", 0", file);
11938 gcc_unreachable ();
11942 /* Return true if X is a representation of the PIC register. This copes
11943 with calls from ix86_find_base_term, where the register might have
11944 been replaced by a cselib value. */
11947 ix86_pic_register_p (rtx x)
11949 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11950 return (pic_offset_table_rtx
11951 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11953 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11956 /* In the name of slightly smaller debug output, and to cater to
11957 general assembler lossage, recognize PIC+GOTOFF and turn it back
11958 into a direct symbol reference.
11960 On Darwin, this is necessary to avoid a crash, because Darwin
11961 has a different PIC label for each routine but the DWARF debugging
11962 information is not associated with any particular routine, so it's
11963 necessary to remove references to the PIC label from RTL stored by
11964 the DWARF output code. */
11967 ix86_delegitimize_address (rtx x)
11969 rtx orig_x = delegitimize_mem_from_attrs (x);
11970 /* addend is NULL or some rtx if x is something+GOTOFF where
11971 something doesn't include the PIC register. */
11972 rtx addend = NULL_RTX;
11973 /* reg_addend is NULL or a multiple of some register. */
11974 rtx reg_addend = NULL_RTX;
11975 /* const_addend is NULL or a const_int. */
11976 rtx const_addend = NULL_RTX;
11977 /* This is the result, or NULL. */
11978 rtx result = NULL_RTX;
11987 if (GET_CODE (x) != CONST
11988 || GET_CODE (XEXP (x, 0)) != UNSPEC
11989 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11990 || !MEM_P (orig_x))
11992 x = XVECEXP (XEXP (x, 0), 0, 0);
11993 if (GET_MODE (orig_x) != Pmode)
11994 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11998 if (GET_CODE (x) != PLUS
11999 || GET_CODE (XEXP (x, 1)) != CONST)
12002 if (ix86_pic_register_p (XEXP (x, 0)))
12003 /* %ebx + GOT/GOTOFF */
12005 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12007 /* %ebx + %reg * scale + GOT/GOTOFF */
12008 reg_addend = XEXP (x, 0);
12009 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12010 reg_addend = XEXP (reg_addend, 1);
12011 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12012 reg_addend = XEXP (reg_addend, 0);
12015 reg_addend = NULL_RTX;
12016 addend = XEXP (x, 0);
12020 addend = XEXP (x, 0);
12022 x = XEXP (XEXP (x, 1), 0);
12023 if (GET_CODE (x) == PLUS
12024 && CONST_INT_P (XEXP (x, 1)))
12026 const_addend = XEXP (x, 1);
12030 if (GET_CODE (x) == UNSPEC
12031 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12032 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12033 result = XVECEXP (x, 0, 0);
12035 if (TARGET_MACHO && darwin_local_data_pic (x)
12036 && !MEM_P (orig_x))
12037 result = XVECEXP (x, 0, 0);
12043 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12045 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12048 /* If the rest of original X doesn't involve the PIC register, add
12049 addend and subtract pic_offset_table_rtx. This can happen e.g.
12051 leal (%ebx, %ecx, 4), %ecx
12053 movl foo@GOTOFF(%ecx), %edx
12054 in which case we return (%ecx - %ebx) + foo. */
12055 if (pic_offset_table_rtx)
12056 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12057 pic_offset_table_rtx),
12062 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12063 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12067 /* If X is a machine specific address (i.e. a symbol or label being
12068 referenced as a displacement from the GOT implemented using an
12069 UNSPEC), then return the base term. Otherwise return X. */
12072 ix86_find_base_term (rtx x)
12078 if (GET_CODE (x) != CONST)
12080 term = XEXP (x, 0);
12081 if (GET_CODE (term) == PLUS
12082 && (CONST_INT_P (XEXP (term, 1))
12083 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12084 term = XEXP (term, 0);
12085 if (GET_CODE (term) != UNSPEC
12086 || XINT (term, 1) != UNSPEC_GOTPCREL)
12089 return XVECEXP (term, 0, 0);
12092 return ix86_delegitimize_address (x);
12096 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12097 int fp, FILE *file)
12099 const char *suffix;
12101 if (mode == CCFPmode || mode == CCFPUmode)
12103 code = ix86_fp_compare_code_to_integer (code);
12107 code = reverse_condition (code);
12158 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12162 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12163 Those same assemblers have the same but opposite lossage on cmov. */
12164 if (mode == CCmode)
12165 suffix = fp ? "nbe" : "a";
12166 else if (mode == CCCmode)
12169 gcc_unreachable ();
12185 gcc_unreachable ();
12189 gcc_assert (mode == CCmode || mode == CCCmode);
12206 gcc_unreachable ();
12210 /* ??? As above. */
12211 gcc_assert (mode == CCmode || mode == CCCmode);
12212 suffix = fp ? "nb" : "ae";
12215 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12219 /* ??? As above. */
12220 if (mode == CCmode)
12222 else if (mode == CCCmode)
12223 suffix = fp ? "nb" : "ae";
12225 gcc_unreachable ();
12228 suffix = fp ? "u" : "p";
12231 suffix = fp ? "nu" : "np";
12234 gcc_unreachable ();
12236 fputs (suffix, file);
12239 /* Print the name of register X to FILE based on its machine mode and number.
12240 If CODE is 'w', pretend the mode is HImode.
12241 If CODE is 'b', pretend the mode is QImode.
12242 If CODE is 'k', pretend the mode is SImode.
12243 If CODE is 'q', pretend the mode is DImode.
12244 If CODE is 'x', pretend the mode is V4SFmode.
12245 If CODE is 't', pretend the mode is V8SFmode.
12246 If CODE is 'h', pretend the reg is the 'high' byte register.
12247 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12248 If CODE is 'd', duplicate the operand for AVX instruction.
12252 print_reg (rtx x, int code, FILE *file)
12255 bool duplicated = code == 'd' && TARGET_AVX;
12257 gcc_assert (x == pc_rtx
12258 || (REGNO (x) != ARG_POINTER_REGNUM
12259 && REGNO (x) != FRAME_POINTER_REGNUM
12260 && REGNO (x) != FLAGS_REG
12261 && REGNO (x) != FPSR_REG
12262 && REGNO (x) != FPCR_REG));
12264 if (ASSEMBLER_DIALECT == ASM_ATT)
12269 gcc_assert (TARGET_64BIT);
12270 fputs ("rip", file);
12274 if (code == 'w' || MMX_REG_P (x))
12276 else if (code == 'b')
12278 else if (code == 'k')
12280 else if (code == 'q')
12282 else if (code == 'y')
12284 else if (code == 'h')
12286 else if (code == 'x')
12288 else if (code == 't')
12291 code = GET_MODE_SIZE (GET_MODE (x));
12293 /* Irritatingly, AMD extended registers use different naming convention
12294 from the normal registers. */
12295 if (REX_INT_REG_P (x))
12297 gcc_assert (TARGET_64BIT);
12301 error ("extended registers have no high halves");
12304 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12307 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12310 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12313 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12316 error ("unsupported operand size for extended register");
12326 if (STACK_TOP_P (x))
12335 if (! ANY_FP_REG_P (x))
12336 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12341 reg = hi_reg_name[REGNO (x)];
12344 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12346 reg = qi_reg_name[REGNO (x)];
12349 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12351 reg = qi_high_reg_name[REGNO (x)];
12356 gcc_assert (!duplicated);
12358 fputs (hi_reg_name[REGNO (x)] + 1, file);
12363 gcc_unreachable ();
12369 if (ASSEMBLER_DIALECT == ASM_ATT)
12370 fprintf (file, ", %%%s", reg);
12372 fprintf (file, ", %s", reg);
12376 /* Locate some local-dynamic symbol still in use by this function
12377 so that we can print its name in some tls_local_dynamic_base
12381 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12385 if (GET_CODE (x) == SYMBOL_REF
12386 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12388 cfun->machine->some_ld_name = XSTR (x, 0);
12395 static const char *
12396 get_some_local_dynamic_name (void)
12400 if (cfun->machine->some_ld_name)
12401 return cfun->machine->some_ld_name;
12403 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12404 if (NONDEBUG_INSN_P (insn)
12405 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12406 return cfun->machine->some_ld_name;
12411 /* Meaning of CODE:
12412 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12413 C -- print opcode suffix for set/cmov insn.
12414 c -- like C, but print reversed condition
12415 F,f -- likewise, but for floating-point.
12416 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12418 R -- print the prefix for register names.
12419 z -- print the opcode suffix for the size of the current operand.
12420 Z -- likewise, with special suffixes for x87 instructions.
12421 * -- print a star (in certain assembler syntax)
12422 A -- print an absolute memory reference.
12423 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12424 s -- print a shift double count, followed by the assemblers argument
12426 b -- print the QImode name of the register for the indicated operand.
12427 %b0 would print %al if operands[0] is reg 0.
12428 w -- likewise, print the HImode name of the register.
12429 k -- likewise, print the SImode name of the register.
12430 q -- likewise, print the DImode name of the register.
12431 x -- likewise, print the V4SFmode name of the register.
12432 t -- likewise, print the V8SFmode name of the register.
12433 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12434 y -- print "st(0)" instead of "st" as a register.
12435 d -- print duplicated register operand for AVX instruction.
12436 D -- print condition for SSE cmp instruction.
12437 P -- if PIC, print an @PLT suffix.
12438 X -- don't print any sort of PIC '@' suffix for a symbol.
12439 & -- print some in-use local-dynamic symbol name.
12440 H -- print a memory address offset by 8; used for sse high-parts
12441 Y -- print condition for XOP pcom* instruction.
12442 + -- print a branch hint as 'cs' or 'ds' prefix
12443 ; -- print a semicolon (after prefixes due to bug in older gas).
12444 @ -- print a segment register of thread base pointer load
12448 ix86_print_operand (FILE *file, rtx x, int code)
12455 if (ASSEMBLER_DIALECT == ASM_ATT)
12461 const char *name = get_some_local_dynamic_name ();
12463 output_operand_lossage ("'%%&' used without any "
12464 "local dynamic TLS references");
12466 assemble_name (file, name);
12471 switch (ASSEMBLER_DIALECT)
12478 /* Intel syntax. For absolute addresses, registers should not
12479 be surrounded by braces. */
12483 ix86_print_operand (file, x, 0);
12490 gcc_unreachable ();
12493 ix86_print_operand (file, x, 0);
12498 if (ASSEMBLER_DIALECT == ASM_ATT)
12503 if (ASSEMBLER_DIALECT == ASM_ATT)
12508 if (ASSEMBLER_DIALECT == ASM_ATT)
12513 if (ASSEMBLER_DIALECT == ASM_ATT)
12518 if (ASSEMBLER_DIALECT == ASM_ATT)
12523 if (ASSEMBLER_DIALECT == ASM_ATT)
12528 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12530 /* Opcodes don't get size suffixes if using Intel opcodes. */
12531 if (ASSEMBLER_DIALECT == ASM_INTEL)
12534 switch (GET_MODE_SIZE (GET_MODE (x)))
12553 output_operand_lossage
12554 ("invalid operand size for operand code '%c'", code);
12559 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12561 (0, "non-integer operand used with operand code '%c'", code);
12565 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12566 if (ASSEMBLER_DIALECT == ASM_INTEL)
12569 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12571 switch (GET_MODE_SIZE (GET_MODE (x)))
12574 #ifdef HAVE_AS_IX86_FILDS
12584 #ifdef HAVE_AS_IX86_FILDQ
12587 fputs ("ll", file);
12595 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12597 /* 387 opcodes don't get size suffixes
12598 if the operands are registers. */
12599 if (STACK_REG_P (x))
12602 switch (GET_MODE_SIZE (GET_MODE (x)))
12623 output_operand_lossage
12624 ("invalid operand type used with operand code '%c'", code);
12628 output_operand_lossage
12629 ("invalid operand size for operand code '%c'", code);
12646 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12648 ix86_print_operand (file, x, 0);
12649 fputs (", ", file);
12654 /* Little bit of braindamage here. The SSE compare instructions
12655 does use completely different names for the comparisons that the
12656 fp conditional moves. */
12659 switch (GET_CODE (x))
12662 fputs ("eq", file);
12665 fputs ("eq_us", file);
12668 fputs ("lt", file);
12671 fputs ("nge", file);
12674 fputs ("le", file);
12677 fputs ("ngt", file);
12680 fputs ("unord", file);
12683 fputs ("neq", file);
12686 fputs ("neq_oq", file);
12689 fputs ("ge", file);
12692 fputs ("nlt", file);
12695 fputs ("gt", file);
12698 fputs ("nle", file);
12701 fputs ("ord", file);
12704 output_operand_lossage ("operand is not a condition code, "
12705 "invalid operand code 'D'");
12711 switch (GET_CODE (x))
12715 fputs ("eq", file);
12719 fputs ("lt", file);
12723 fputs ("le", file);
12726 fputs ("unord", file);
12730 fputs ("neq", file);
12734 fputs ("nlt", file);
12738 fputs ("nle", file);
12741 fputs ("ord", file);
12744 output_operand_lossage ("operand is not a condition code, "
12745 "invalid operand code 'D'");
12751 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12752 if (ASSEMBLER_DIALECT == ASM_ATT)
12754 switch (GET_MODE (x))
12756 case HImode: putc ('w', file); break;
12758 case SFmode: putc ('l', file); break;
12760 case DFmode: putc ('q', file); break;
12761 default: gcc_unreachable ();
12768 if (!COMPARISON_P (x))
12770 output_operand_lossage ("operand is neither a constant nor a "
12771 "condition code, invalid operand code "
12775 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12778 if (!COMPARISON_P (x))
12780 output_operand_lossage ("operand is neither a constant nor a "
12781 "condition code, invalid operand code "
12785 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12786 if (ASSEMBLER_DIALECT == ASM_ATT)
12789 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12792 /* Like above, but reverse condition */
12794 /* Check to see if argument to %c is really a constant
12795 and not a condition code which needs to be reversed. */
12796 if (!COMPARISON_P (x))
12798 output_operand_lossage ("operand is neither a constant nor a "
12799 "condition code, invalid operand "
12803 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12806 if (!COMPARISON_P (x))
12808 output_operand_lossage ("operand is neither a constant nor a "
12809 "condition code, invalid operand "
12813 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12814 if (ASSEMBLER_DIALECT == ASM_ATT)
12817 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12821 /* It doesn't actually matter what mode we use here, as we're
12822 only going to use this for printing. */
12823 x = adjust_address_nv (x, DImode, 8);
12831 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12834 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12837 int pred_val = INTVAL (XEXP (x, 0));
12839 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12840 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12842 int taken = pred_val > REG_BR_PROB_BASE / 2;
12843 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12845 /* Emit hints only in the case default branch prediction
12846 heuristics would fail. */
12847 if (taken != cputaken)
12849 /* We use 3e (DS) prefix for taken branches and
12850 2e (CS) prefix for not taken branches. */
12852 fputs ("ds ; ", file);
12854 fputs ("cs ; ", file);
12862 switch (GET_CODE (x))
12865 fputs ("neq", file);
12868 fputs ("eq", file);
12872 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12876 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12880 fputs ("le", file);
12884 fputs ("lt", file);
12887 fputs ("unord", file);
12890 fputs ("ord", file);
12893 fputs ("ueq", file);
12896 fputs ("nlt", file);
12899 fputs ("nle", file);
12902 fputs ("ule", file);
12905 fputs ("ult", file);
12908 fputs ("une", file);
12911 output_operand_lossage ("operand is not a condition code, "
12912 "invalid operand code 'Y'");
12918 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12924 if (ASSEMBLER_DIALECT == ASM_ATT)
12927 /* The kernel uses a different segment register for performance
12928 reasons; a system call would not have to trash the userspace
12929 segment register, which would be expensive. */
12930 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
12931 fputs ("fs", file);
12933 fputs ("gs", file);
12937 output_operand_lossage ("invalid operand code '%c'", code);
12942 print_reg (x, code, file);
12944 else if (MEM_P (x))
12946 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12947 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12948 && GET_MODE (x) != BLKmode)
12951 switch (GET_MODE_SIZE (GET_MODE (x)))
12953 case 1: size = "BYTE"; break;
12954 case 2: size = "WORD"; break;
12955 case 4: size = "DWORD"; break;
12956 case 8: size = "QWORD"; break;
12957 case 12: size = "TBYTE"; break;
12959 if (GET_MODE (x) == XFmode)
12964 case 32: size = "YMMWORD"; break;
12966 gcc_unreachable ();
12969 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12972 else if (code == 'w')
12974 else if (code == 'k')
12977 fputs (size, file);
12978 fputs (" PTR ", file);
12982 /* Avoid (%rip) for call operands. */
12983 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12984 && !CONST_INT_P (x))
12985 output_addr_const (file, x);
12986 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12987 output_operand_lossage ("invalid constraints for operand");
12989 output_address (x);
12992 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12997 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12998 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13000 if (ASSEMBLER_DIALECT == ASM_ATT)
13002 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13004 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13006 fprintf (file, "0x%08x", (unsigned int) l);
13009 /* These float cases don't actually occur as immediate operands. */
13010 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13014 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13015 fputs (dstr, file);
13018 else if (GET_CODE (x) == CONST_DOUBLE
13019 && GET_MODE (x) == XFmode)
13023 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13024 fputs (dstr, file);
13029 /* We have patterns that allow zero sets of memory, for instance.
13030 In 64-bit mode, we should probably support all 8-byte vectors,
13031 since we can in fact encode that into an immediate. */
13032 if (GET_CODE (x) == CONST_VECTOR)
13034 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13040 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13042 if (ASSEMBLER_DIALECT == ASM_ATT)
13045 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13046 || GET_CODE (x) == LABEL_REF)
13048 if (ASSEMBLER_DIALECT == ASM_ATT)
13051 fputs ("OFFSET FLAT:", file);
13054 if (CONST_INT_P (x))
13055 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13057 output_pic_addr_const (file, x, code);
13059 output_addr_const (file, x);
13064 ix86_print_operand_punct_valid_p (unsigned char code)
13066 return (code == '@' || code == '*' || code == '+'
13067 || code == '&' || code == ';');
13070 /* Print a memory operand whose address is ADDR. */
13073 ix86_print_operand_address (FILE *file, rtx addr)
13075 struct ix86_address parts;
13076 rtx base, index, disp;
13078 int ok = ix86_decompose_address (addr, &parts);
13083 index = parts.index;
13085 scale = parts.scale;
13093 if (ASSEMBLER_DIALECT == ASM_ATT)
13095 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13098 gcc_unreachable ();
13101 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13102 if (TARGET_64BIT && !base && !index)
13106 if (GET_CODE (disp) == CONST
13107 && GET_CODE (XEXP (disp, 0)) == PLUS
13108 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13109 symbol = XEXP (XEXP (disp, 0), 0);
13111 if (GET_CODE (symbol) == LABEL_REF
13112 || (GET_CODE (symbol) == SYMBOL_REF
13113 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13116 if (!base && !index)
13118 /* Displacement only requires special attention. */
13120 if (CONST_INT_P (disp))
13122 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13123 fputs ("ds:", file);
13124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13127 output_pic_addr_const (file, disp, 0);
13129 output_addr_const (file, disp);
13133 if (ASSEMBLER_DIALECT == ASM_ATT)
13138 output_pic_addr_const (file, disp, 0);
13139 else if (GET_CODE (disp) == LABEL_REF)
13140 output_asm_label (disp);
13142 output_addr_const (file, disp);
13147 print_reg (base, 0, file);
13151 print_reg (index, 0, file);
13153 fprintf (file, ",%d", scale);
13159 rtx offset = NULL_RTX;
13163 /* Pull out the offset of a symbol; print any symbol itself. */
13164 if (GET_CODE (disp) == CONST
13165 && GET_CODE (XEXP (disp, 0)) == PLUS
13166 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13168 offset = XEXP (XEXP (disp, 0), 1);
13169 disp = gen_rtx_CONST (VOIDmode,
13170 XEXP (XEXP (disp, 0), 0));
13174 output_pic_addr_const (file, disp, 0);
13175 else if (GET_CODE (disp) == LABEL_REF)
13176 output_asm_label (disp);
13177 else if (CONST_INT_P (disp))
13180 output_addr_const (file, disp);
13186 print_reg (base, 0, file);
13189 if (INTVAL (offset) >= 0)
13191 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13195 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13202 print_reg (index, 0, file);
13204 fprintf (file, "*%d", scale);
13211 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13214 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13218 if (GET_CODE (x) != UNSPEC)
13221 op = XVECEXP (x, 0, 0);
13222 switch (XINT (x, 1))
13224 case UNSPEC_GOTTPOFF:
13225 output_addr_const (file, op);
13226 /* FIXME: This might be @TPOFF in Sun ld. */
13227 fputs ("@gottpoff", file);
13230 output_addr_const (file, op);
13231 fputs ("@tpoff", file);
13233 case UNSPEC_NTPOFF:
13234 output_addr_const (file, op);
13236 fputs ("@tpoff", file);
13238 fputs ("@ntpoff", file);
13240 case UNSPEC_DTPOFF:
13241 output_addr_const (file, op);
13242 fputs ("@dtpoff", file);
13244 case UNSPEC_GOTNTPOFF:
13245 output_addr_const (file, op);
13247 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13248 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13250 fputs ("@gotntpoff", file);
13252 case UNSPEC_INDNTPOFF:
13253 output_addr_const (file, op);
13254 fputs ("@indntpoff", file);
13257 case UNSPEC_MACHOPIC_OFFSET:
13258 output_addr_const (file, op);
13260 machopic_output_function_base_name (file);
13271 /* Split one or more double-mode RTL references into pairs of half-mode
13272 references. The RTL can be REG, offsettable MEM, integer constant, or
13273 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13274 split and "num" is its length. lo_half and hi_half are output arrays
13275 that parallel "operands". */
13278 split_double_mode (enum machine_mode mode, rtx operands[],
13279 int num, rtx lo_half[], rtx hi_half[])
13281 enum machine_mode half_mode;
13287 half_mode = DImode;
13290 half_mode = SImode;
13293 gcc_unreachable ();
13296 byte = GET_MODE_SIZE (half_mode);
13300 rtx op = operands[num];
13302 /* simplify_subreg refuse to split volatile memory addresses,
13303 but we still have to handle it. */
13306 lo_half[num] = adjust_address (op, half_mode, 0);
13307 hi_half[num] = adjust_address (op, half_mode, byte);
13311 lo_half[num] = simplify_gen_subreg (half_mode, op,
13312 GET_MODE (op) == VOIDmode
13313 ? mode : GET_MODE (op), 0);
13314 hi_half[num] = simplify_gen_subreg (half_mode, op,
13315 GET_MODE (op) == VOIDmode
13316 ? mode : GET_MODE (op), byte);
13321 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13322 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13323 is the expression of the binary operation. The output may either be
13324 emitted here, or returned to the caller, like all output_* functions.
13326 There is no guarantee that the operands are the same mode, as they
13327 might be within FLOAT or FLOAT_EXTEND expressions. */
13329 #ifndef SYSV386_COMPAT
13330 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13331 wants to fix the assemblers because that causes incompatibility
13332 with gcc. No-one wants to fix gcc because that causes
13333 incompatibility with assemblers... You can use the option of
13334 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13335 #define SYSV386_COMPAT 1
13339 output_387_binary_op (rtx insn, rtx *operands)
13341 static char buf[40];
13344 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13346 #ifdef ENABLE_CHECKING
13347 /* Even if we do not want to check the inputs, this documents input
13348 constraints. Which helps in understanding the following code. */
13349 if (STACK_REG_P (operands[0])
13350 && ((REG_P (operands[1])
13351 && REGNO (operands[0]) == REGNO (operands[1])
13352 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13353 || (REG_P (operands[2])
13354 && REGNO (operands[0]) == REGNO (operands[2])
13355 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13356 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13359 gcc_assert (is_sse);
13362 switch (GET_CODE (operands[3]))
13365 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13366 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13374 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13375 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13383 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13384 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13392 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13393 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13401 gcc_unreachable ();
13408 strcpy (buf, ssep);
13409 if (GET_MODE (operands[0]) == SFmode)
13410 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13412 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13416 strcpy (buf, ssep + 1);
13417 if (GET_MODE (operands[0]) == SFmode)
13418 strcat (buf, "ss\t{%2, %0|%0, %2}");
13420 strcat (buf, "sd\t{%2, %0|%0, %2}");
13426 switch (GET_CODE (operands[3]))
13430 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13432 rtx temp = operands[2];
13433 operands[2] = operands[1];
13434 operands[1] = temp;
13437 /* know operands[0] == operands[1]. */
13439 if (MEM_P (operands[2]))
13445 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13447 if (STACK_TOP_P (operands[0]))
13448 /* How is it that we are storing to a dead operand[2]?
13449 Well, presumably operands[1] is dead too. We can't
13450 store the result to st(0) as st(0) gets popped on this
13451 instruction. Instead store to operands[2] (which I
13452 think has to be st(1)). st(1) will be popped later.
13453 gcc <= 2.8.1 didn't have this check and generated
13454 assembly code that the Unixware assembler rejected. */
13455 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13457 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13461 if (STACK_TOP_P (operands[0]))
13462 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13464 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13469 if (MEM_P (operands[1]))
13475 if (MEM_P (operands[2]))
13481 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13484 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13485 derived assemblers, confusingly reverse the direction of
13486 the operation for fsub{r} and fdiv{r} when the
13487 destination register is not st(0). The Intel assembler
13488 doesn't have this brain damage. Read !SYSV386_COMPAT to
13489 figure out what the hardware really does. */
13490 if (STACK_TOP_P (operands[0]))
13491 p = "{p\t%0, %2|rp\t%2, %0}";
13493 p = "{rp\t%2, %0|p\t%0, %2}";
13495 if (STACK_TOP_P (operands[0]))
13496 /* As above for fmul/fadd, we can't store to st(0). */
13497 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13499 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13504 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13507 if (STACK_TOP_P (operands[0]))
13508 p = "{rp\t%0, %1|p\t%1, %0}";
13510 p = "{p\t%1, %0|rp\t%0, %1}";
13512 if (STACK_TOP_P (operands[0]))
13513 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13515 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13520 if (STACK_TOP_P (operands[0]))
13522 if (STACK_TOP_P (operands[1]))
13523 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13525 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13528 else if (STACK_TOP_P (operands[1]))
13531 p = "{\t%1, %0|r\t%0, %1}";
13533 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13539 p = "{r\t%2, %0|\t%0, %2}";
13541 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13547 gcc_unreachable ();
13554 /* Return needed mode for entity in optimize_mode_switching pass. */
13557 ix86_mode_needed (int entity, rtx insn)
13559 enum attr_i387_cw mode;
13561 /* The mode UNINITIALIZED is used to store control word after a
13562 function call or ASM pattern. The mode ANY specify that function
13563 has no requirements on the control word and make no changes in the
13564 bits we are interested in. */
13567 || (NONJUMP_INSN_P (insn)
13568 && (asm_noperands (PATTERN (insn)) >= 0
13569 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13570 return I387_CW_UNINITIALIZED;
13572 if (recog_memoized (insn) < 0)
13573 return I387_CW_ANY;
13575 mode = get_attr_i387_cw (insn);
13580 if (mode == I387_CW_TRUNC)
13585 if (mode == I387_CW_FLOOR)
13590 if (mode == I387_CW_CEIL)
13595 if (mode == I387_CW_MASK_PM)
13600 gcc_unreachable ();
13603 return I387_CW_ANY;
13606 /* Output code to initialize control word copies used by trunc?f?i and
13607 rounding patterns. CURRENT_MODE is set to current control word,
13608 while NEW_MODE is set to new control word. */
13611 emit_i387_cw_initialization (int mode)
13613 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13616 enum ix86_stack_slot slot;
13618 rtx reg = gen_reg_rtx (HImode);
13620 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13621 emit_move_insn (reg, copy_rtx (stored_mode));
13623 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13624 || optimize_function_for_size_p (cfun))
13628 case I387_CW_TRUNC:
13629 /* round toward zero (truncate) */
13630 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13631 slot = SLOT_CW_TRUNC;
13634 case I387_CW_FLOOR:
13635 /* round down toward -oo */
13636 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13637 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13638 slot = SLOT_CW_FLOOR;
13642 /* round up toward +oo */
13643 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13644 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13645 slot = SLOT_CW_CEIL;
13648 case I387_CW_MASK_PM:
13649 /* mask precision exception for nearbyint() */
13650 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13651 slot = SLOT_CW_MASK_PM;
13655 gcc_unreachable ();
13662 case I387_CW_TRUNC:
13663 /* round toward zero (truncate) */
13664 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13665 slot = SLOT_CW_TRUNC;
13668 case I387_CW_FLOOR:
13669 /* round down toward -oo */
13670 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13671 slot = SLOT_CW_FLOOR;
13675 /* round up toward +oo */
13676 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13677 slot = SLOT_CW_CEIL;
13680 case I387_CW_MASK_PM:
13681 /* mask precision exception for nearbyint() */
13682 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13683 slot = SLOT_CW_MASK_PM;
13687 gcc_unreachable ();
13691 gcc_assert (slot < MAX_386_STACK_LOCALS);
13693 new_mode = assign_386_stack_local (HImode, slot);
13694 emit_move_insn (new_mode, reg);
13697 /* Output code for INSN to convert a float to a signed int. OPERANDS
13698 are the insn operands. The output may be [HSD]Imode and the input
13699 operand may be [SDX]Fmode. */
13702 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13704 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13705 int dimode_p = GET_MODE (operands[0]) == DImode;
13706 int round_mode = get_attr_i387_cw (insn);
13708 /* Jump through a hoop or two for DImode, since the hardware has no
13709 non-popping instruction. We used to do this a different way, but
13710 that was somewhat fragile and broke with post-reload splitters. */
13711 if ((dimode_p || fisttp) && !stack_top_dies)
13712 output_asm_insn ("fld\t%y1", operands);
13714 gcc_assert (STACK_TOP_P (operands[1]));
13715 gcc_assert (MEM_P (operands[0]));
13716 gcc_assert (GET_MODE (operands[1]) != TFmode);
13719 output_asm_insn ("fisttp%Z0\t%0", operands);
13722 if (round_mode != I387_CW_ANY)
13723 output_asm_insn ("fldcw\t%3", operands);
13724 if (stack_top_dies || dimode_p)
13725 output_asm_insn ("fistp%Z0\t%0", operands);
13727 output_asm_insn ("fist%Z0\t%0", operands);
13728 if (round_mode != I387_CW_ANY)
13729 output_asm_insn ("fldcw\t%2", operands);
13735 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13736 have the values zero or one, indicates the ffreep insn's operand
13737 from the OPERANDS array. */
13739 static const char *
13740 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13742 if (TARGET_USE_FFREEP)
13743 #ifdef HAVE_AS_IX86_FFREEP
13744 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13747 static char retval[32];
13748 int regno = REGNO (operands[opno]);
13750 gcc_assert (FP_REGNO_P (regno));
13752 regno -= FIRST_STACK_REG;
13754 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13759 return opno ? "fstp\t%y1" : "fstp\t%y0";
13763 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13764 should be used. UNORDERED_P is true when fucom should be used. */
13767 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13769 int stack_top_dies;
13770 rtx cmp_op0, cmp_op1;
13771 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13775 cmp_op0 = operands[0];
13776 cmp_op1 = operands[1];
13780 cmp_op0 = operands[1];
13781 cmp_op1 = operands[2];
13786 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13787 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13788 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13789 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13791 if (GET_MODE (operands[0]) == SFmode)
13793 return &ucomiss[TARGET_AVX ? 0 : 1];
13795 return &comiss[TARGET_AVX ? 0 : 1];
13798 return &ucomisd[TARGET_AVX ? 0 : 1];
13800 return &comisd[TARGET_AVX ? 0 : 1];
13803 gcc_assert (STACK_TOP_P (cmp_op0));
13805 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13807 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13809 if (stack_top_dies)
13811 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13812 return output_387_ffreep (operands, 1);
13815 return "ftst\n\tfnstsw\t%0";
13818 if (STACK_REG_P (cmp_op1)
13820 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13821 && REGNO (cmp_op1) != FIRST_STACK_REG)
13823 /* If both the top of the 387 stack dies, and the other operand
13824 is also a stack register that dies, then this must be a
13825 `fcompp' float compare */
13829 /* There is no double popping fcomi variant. Fortunately,
13830 eflags is immune from the fstp's cc clobbering. */
13832 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13834 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13835 return output_387_ffreep (operands, 0);
13840 return "fucompp\n\tfnstsw\t%0";
13842 return "fcompp\n\tfnstsw\t%0";
13847 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13849 static const char * const alt[16] =
13851 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13852 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13853 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13854 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13856 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13857 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13861 "fcomi\t{%y1, %0|%0, %y1}",
13862 "fcomip\t{%y1, %0|%0, %y1}",
13863 "fucomi\t{%y1, %0|%0, %y1}",
13864 "fucomip\t{%y1, %0|%0, %y1}",
13875 mask = eflags_p << 3;
13876 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13877 mask |= unordered_p << 1;
13878 mask |= stack_top_dies;
13880 gcc_assert (mask < 16);
13889 ix86_output_addr_vec_elt (FILE *file, int value)
13891 const char *directive = ASM_LONG;
13895 directive = ASM_QUAD;
13897 gcc_assert (!TARGET_64BIT);
13900 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13904 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13906 const char *directive = ASM_LONG;
13909 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13910 directive = ASM_QUAD;
13912 gcc_assert (!TARGET_64BIT);
13914 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13915 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13916 fprintf (file, "%s%s%d-%s%d\n",
13917 directive, LPREFIX, value, LPREFIX, rel);
13918 else if (HAVE_AS_GOTOFF_IN_DATA)
13919 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13921 else if (TARGET_MACHO)
13923 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13924 machopic_output_function_base_name (file);
13929 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13930 GOT_SYMBOL_NAME, LPREFIX, value);
13933 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13937 ix86_expand_clear (rtx dest)
13941 /* We play register width games, which are only valid after reload. */
13942 gcc_assert (reload_completed);
13944 /* Avoid HImode and its attendant prefix byte. */
13945 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13946 dest = gen_rtx_REG (SImode, REGNO (dest));
13947 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13949 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13950 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13952 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13953 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13959 /* X is an unchanging MEM. If it is a constant pool reference, return
13960 the constant pool rtx, else NULL. */
13963 maybe_get_pool_constant (rtx x)
13965 x = ix86_delegitimize_address (XEXP (x, 0));
13967 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13968 return get_pool_constant (x);
13974 ix86_expand_move (enum machine_mode mode, rtx operands[])
13977 enum tls_model model;
13982 if (GET_CODE (op1) == SYMBOL_REF)
13984 model = SYMBOL_REF_TLS_MODEL (op1);
13987 op1 = legitimize_tls_address (op1, model, true);
13988 op1 = force_operand (op1, op0);
13992 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13993 && SYMBOL_REF_DLLIMPORT_P (op1))
13994 op1 = legitimize_dllimport_symbol (op1, false);
13996 else if (GET_CODE (op1) == CONST
13997 && GET_CODE (XEXP (op1, 0)) == PLUS
13998 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14000 rtx addend = XEXP (XEXP (op1, 0), 1);
14001 rtx symbol = XEXP (XEXP (op1, 0), 0);
14004 model = SYMBOL_REF_TLS_MODEL (symbol);
14006 tmp = legitimize_tls_address (symbol, model, true);
14007 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14008 && SYMBOL_REF_DLLIMPORT_P (symbol))
14009 tmp = legitimize_dllimport_symbol (symbol, true);
14013 tmp = force_operand (tmp, NULL);
14014 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14015 op0, 1, OPTAB_DIRECT);
14021 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14023 if (TARGET_MACHO && !TARGET_64BIT)
14028 rtx temp = ((reload_in_progress
14029 || ((op0 && REG_P (op0))
14031 ? op0 : gen_reg_rtx (Pmode));
14032 op1 = machopic_indirect_data_reference (op1, temp);
14033 op1 = machopic_legitimize_pic_address (op1, mode,
14034 temp == op1 ? 0 : temp);
14036 else if (MACHOPIC_INDIRECT)
14037 op1 = machopic_indirect_data_reference (op1, 0);
14045 op1 = force_reg (Pmode, op1);
14046 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14048 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14049 op1 = legitimize_pic_address (op1, reg);
14058 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14059 || !push_operand (op0, mode))
14061 op1 = force_reg (mode, op1);
14063 if (push_operand (op0, mode)
14064 && ! general_no_elim_operand (op1, mode))
14065 op1 = copy_to_mode_reg (mode, op1);
14067 /* Force large constants in 64bit compilation into register
14068 to get them CSEed. */
14069 if (can_create_pseudo_p ()
14070 && (mode == DImode) && TARGET_64BIT
14071 && immediate_operand (op1, mode)
14072 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14073 && !register_operand (op0, mode)
14075 op1 = copy_to_mode_reg (mode, op1);
14077 if (can_create_pseudo_p ()
14078 && FLOAT_MODE_P (mode)
14079 && GET_CODE (op1) == CONST_DOUBLE)
14081 /* If we are loading a floating point constant to a register,
14082 force the value to memory now, since we'll get better code
14083 out the back end. */
14085 op1 = validize_mem (force_const_mem (mode, op1));
14086 if (!register_operand (op0, mode))
14088 rtx temp = gen_reg_rtx (mode);
14089 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14090 emit_move_insn (op0, temp);
14096 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14100 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14102 rtx op0 = operands[0], op1 = operands[1];
14103 unsigned int align = GET_MODE_ALIGNMENT (mode);
14105 /* Force constants other than zero into memory. We do not know how
14106 the instructions used to build constants modify the upper 64 bits
14107 of the register, once we have that information we may be able
14108 to handle some of them more efficiently. */
14109 if (can_create_pseudo_p ()
14110 && register_operand (op0, mode)
14111 && (CONSTANT_P (op1)
14112 || (GET_CODE (op1) == SUBREG
14113 && CONSTANT_P (SUBREG_REG (op1))))
14114 && !standard_sse_constant_p (op1))
14115 op1 = validize_mem (force_const_mem (mode, op1));
14117 /* We need to check memory alignment for SSE mode since attribute
14118 can make operands unaligned. */
14119 if (can_create_pseudo_p ()
14120 && SSE_REG_MODE_P (mode)
14121 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14122 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14126 /* ix86_expand_vector_move_misalign() does not like constants ... */
14127 if (CONSTANT_P (op1)
14128 || (GET_CODE (op1) == SUBREG
14129 && CONSTANT_P (SUBREG_REG (op1))))
14130 op1 = validize_mem (force_const_mem (mode, op1));
14132 /* ... nor both arguments in memory. */
14133 if (!register_operand (op0, mode)
14134 && !register_operand (op1, mode))
14135 op1 = force_reg (mode, op1);
14137 tmp[0] = op0; tmp[1] = op1;
14138 ix86_expand_vector_move_misalign (mode, tmp);
14142 /* Make operand1 a register if it isn't already. */
14143 if (can_create_pseudo_p ()
14144 && !register_operand (op0, mode)
14145 && !register_operand (op1, mode))
14147 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14151 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14154 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14155 straight to ix86_expand_vector_move. */
14156 /* Code generation for scalar reg-reg moves of single and double precision data:
14157 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14161 if (x86_sse_partial_reg_dependency == true)
14166 Code generation for scalar loads of double precision data:
14167 if (x86_sse_split_regs == true)
14168 movlpd mem, reg (gas syntax)
14172 Code generation for unaligned packed loads of single precision data
14173 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14174 if (x86_sse_unaligned_move_optimal)
14177 if (x86_sse_partial_reg_dependency == true)
14189 Code generation for unaligned packed loads of double precision data
14190 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14191 if (x86_sse_unaligned_move_optimal)
14194 if (x86_sse_split_regs == true)
14207 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14216 switch (GET_MODE_CLASS (mode))
14218 case MODE_VECTOR_INT:
14220 switch (GET_MODE_SIZE (mode))
14223 /* If we're optimizing for size, movups is the smallest. */
14224 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14226 op0 = gen_lowpart (V4SFmode, op0);
14227 op1 = gen_lowpart (V4SFmode, op1);
14228 emit_insn (gen_avx_movups (op0, op1));
14231 op0 = gen_lowpart (V16QImode, op0);
14232 op1 = gen_lowpart (V16QImode, op1);
14233 emit_insn (gen_avx_movdqu (op0, op1));
14236 op0 = gen_lowpart (V32QImode, op0);
14237 op1 = gen_lowpart (V32QImode, op1);
14238 emit_insn (gen_avx_movdqu256 (op0, op1));
14241 gcc_unreachable ();
14244 case MODE_VECTOR_FLOAT:
14245 op0 = gen_lowpart (mode, op0);
14246 op1 = gen_lowpart (mode, op1);
14251 emit_insn (gen_avx_movups (op0, op1));
14254 emit_insn (gen_avx_movups256 (op0, op1));
14257 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14259 op0 = gen_lowpart (V4SFmode, op0);
14260 op1 = gen_lowpart (V4SFmode, op1);
14261 emit_insn (gen_avx_movups (op0, op1));
14264 emit_insn (gen_avx_movupd (op0, op1));
14267 emit_insn (gen_avx_movupd256 (op0, op1));
14270 gcc_unreachable ();
14275 gcc_unreachable ();
14283 /* If we're optimizing for size, movups is the smallest. */
14284 if (optimize_insn_for_size_p ()
14285 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14287 op0 = gen_lowpart (V4SFmode, op0);
14288 op1 = gen_lowpart (V4SFmode, op1);
14289 emit_insn (gen_sse_movups (op0, op1));
14293 /* ??? If we have typed data, then it would appear that using
14294 movdqu is the only way to get unaligned data loaded with
14296 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14298 op0 = gen_lowpart (V16QImode, op0);
14299 op1 = gen_lowpart (V16QImode, op1);
14300 emit_insn (gen_sse2_movdqu (op0, op1));
14304 if (TARGET_SSE2 && mode == V2DFmode)
14308 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14310 op0 = gen_lowpart (V2DFmode, op0);
14311 op1 = gen_lowpart (V2DFmode, op1);
14312 emit_insn (gen_sse2_movupd (op0, op1));
14316 /* When SSE registers are split into halves, we can avoid
14317 writing to the top half twice. */
14318 if (TARGET_SSE_SPLIT_REGS)
14320 emit_clobber (op0);
14325 /* ??? Not sure about the best option for the Intel chips.
14326 The following would seem to satisfy; the register is
14327 entirely cleared, breaking the dependency chain. We
14328 then store to the upper half, with a dependency depth
14329 of one. A rumor has it that Intel recommends two movsd
14330 followed by an unpacklpd, but this is unconfirmed. And
14331 given that the dependency depth of the unpacklpd would
14332 still be one, I'm not sure why this would be better. */
14333 zero = CONST0_RTX (V2DFmode);
14336 m = adjust_address (op1, DFmode, 0);
14337 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14338 m = adjust_address (op1, DFmode, 8);
14339 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14343 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14345 op0 = gen_lowpart (V4SFmode, op0);
14346 op1 = gen_lowpart (V4SFmode, op1);
14347 emit_insn (gen_sse_movups (op0, op1));
14351 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14352 emit_move_insn (op0, CONST0_RTX (mode));
14354 emit_clobber (op0);
14356 if (mode != V4SFmode)
14357 op0 = gen_lowpart (V4SFmode, op0);
14358 m = adjust_address (op1, V2SFmode, 0);
14359 emit_insn (gen_sse_loadlps (op0, op0, m));
14360 m = adjust_address (op1, V2SFmode, 8);
14361 emit_insn (gen_sse_loadhps (op0, op0, m));
14364 else if (MEM_P (op0))
14366 /* If we're optimizing for size, movups is the smallest. */
14367 if (optimize_insn_for_size_p ()
14368 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14370 op0 = gen_lowpart (V4SFmode, op0);
14371 op1 = gen_lowpart (V4SFmode, op1);
14372 emit_insn (gen_sse_movups (op0, op1));
14376 /* ??? Similar to above, only less clear because of quote
14377 typeless stores unquote. */
14378 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14379 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14381 op0 = gen_lowpart (V16QImode, op0);
14382 op1 = gen_lowpart (V16QImode, op1);
14383 emit_insn (gen_sse2_movdqu (op0, op1));
14387 if (TARGET_SSE2 && mode == V2DFmode)
14389 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14391 op0 = gen_lowpart (V2DFmode, op0);
14392 op1 = gen_lowpart (V2DFmode, op1);
14393 emit_insn (gen_sse2_movupd (op0, op1));
14397 m = adjust_address (op0, DFmode, 0);
14398 emit_insn (gen_sse2_storelpd (m, op1));
14399 m = adjust_address (op0, DFmode, 8);
14400 emit_insn (gen_sse2_storehpd (m, op1));
14405 if (mode != V4SFmode)
14406 op1 = gen_lowpart (V4SFmode, op1);
14408 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14410 op0 = gen_lowpart (V4SFmode, op0);
14411 emit_insn (gen_sse_movups (op0, op1));
14415 m = adjust_address (op0, V2SFmode, 0);
14416 emit_insn (gen_sse_storelps (m, op1));
14417 m = adjust_address (op0, V2SFmode, 8);
14418 emit_insn (gen_sse_storehps (m, op1));
14423 gcc_unreachable ();
14426 /* Expand a push in MODE. This is some mode for which we do not support
14427 proper push instructions, at least from the registers that we expect
14428 the value to live in. */
14431 ix86_expand_push (enum machine_mode mode, rtx x)
14435 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14436 GEN_INT (-GET_MODE_SIZE (mode)),
14437 stack_pointer_rtx, 1, OPTAB_DIRECT);
14438 if (tmp != stack_pointer_rtx)
14439 emit_move_insn (stack_pointer_rtx, tmp);
14441 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14443 /* When we push an operand onto stack, it has to be aligned at least
14444 at the function argument boundary. However since we don't have
14445 the argument type, we can't determine the actual argument
14447 emit_move_insn (tmp, x);
14450 /* Helper function of ix86_fixup_binary_operands to canonicalize
14451 operand order. Returns true if the operands should be swapped. */
14454 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14457 rtx dst = operands[0];
14458 rtx src1 = operands[1];
14459 rtx src2 = operands[2];
14461 /* If the operation is not commutative, we can't do anything. */
14462 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14465 /* Highest priority is that src1 should match dst. */
14466 if (rtx_equal_p (dst, src1))
14468 if (rtx_equal_p (dst, src2))
14471 /* Next highest priority is that immediate constants come second. */
14472 if (immediate_operand (src2, mode))
14474 if (immediate_operand (src1, mode))
14477 /* Lowest priority is that memory references should come second. */
14487 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14488 destination to use for the operation. If different from the true
14489 destination in operands[0], a copy operation will be required. */
14492 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14495 rtx dst = operands[0];
14496 rtx src1 = operands[1];
14497 rtx src2 = operands[2];
14499 /* Canonicalize operand order. */
14500 if (ix86_swap_binary_operands_p (code, mode, operands))
14504 /* It is invalid to swap operands of different modes. */
14505 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14512 /* Both source operands cannot be in memory. */
14513 if (MEM_P (src1) && MEM_P (src2))
14515 /* Optimization: Only read from memory once. */
14516 if (rtx_equal_p (src1, src2))
14518 src2 = force_reg (mode, src2);
14522 src2 = force_reg (mode, src2);
14525 /* If the destination is memory, and we do not have matching source
14526 operands, do things in registers. */
14527 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14528 dst = gen_reg_rtx (mode);
14530 /* Source 1 cannot be a constant. */
14531 if (CONSTANT_P (src1))
14532 src1 = force_reg (mode, src1);
14534 /* Source 1 cannot be a non-matching memory. */
14535 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14536 src1 = force_reg (mode, src1);
14538 operands[1] = src1;
14539 operands[2] = src2;
14543 /* Similarly, but assume that the destination has already been
14544 set up properly. */
14547 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14548 enum machine_mode mode, rtx operands[])
14550 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14551 gcc_assert (dst == operands[0]);
14554 /* Attempt to expand a binary operator. Make the expansion closer to the
14555 actual machine, then just general_operand, which will allow 3 separate
14556 memory references (one output, two input) in a single insn. */
14559 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14562 rtx src1, src2, dst, op, clob;
14564 dst = ix86_fixup_binary_operands (code, mode, operands);
14565 src1 = operands[1];
14566 src2 = operands[2];
14568 /* Emit the instruction. */
14570 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14571 if (reload_in_progress)
14573 /* Reload doesn't know about the flags register, and doesn't know that
14574 it doesn't want to clobber it. We can only do this with PLUS. */
14575 gcc_assert (code == PLUS);
14580 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14581 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14584 /* Fix up the destination if needed. */
14585 if (dst != operands[0])
14586 emit_move_insn (operands[0], dst);
14589 /* Return TRUE or FALSE depending on whether the binary operator meets the
14590 appropriate constraints. */
14593 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14596 rtx dst = operands[0];
14597 rtx src1 = operands[1];
14598 rtx src2 = operands[2];
14600 /* Both source operands cannot be in memory. */
14601 if (MEM_P (src1) && MEM_P (src2))
14604 /* Canonicalize operand order for commutative operators. */
14605 if (ix86_swap_binary_operands_p (code, mode, operands))
14612 /* If the destination is memory, we must have a matching source operand. */
14613 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14616 /* Source 1 cannot be a constant. */
14617 if (CONSTANT_P (src1))
14620 /* Source 1 cannot be a non-matching memory. */
14621 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14627 /* Attempt to expand a unary operator. Make the expansion closer to the
14628 actual machine, then just general_operand, which will allow 2 separate
14629 memory references (one output, one input) in a single insn. */
14632 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14635 int matching_memory;
14636 rtx src, dst, op, clob;
14641 /* If the destination is memory, and we do not have matching source
14642 operands, do things in registers. */
14643 matching_memory = 0;
14646 if (rtx_equal_p (dst, src))
14647 matching_memory = 1;
14649 dst = gen_reg_rtx (mode);
14652 /* When source operand is memory, destination must match. */
14653 if (MEM_P (src) && !matching_memory)
14654 src = force_reg (mode, src);
14656 /* Emit the instruction. */
14658 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14659 if (reload_in_progress || code == NOT)
14661 /* Reload doesn't know about the flags register, and doesn't know that
14662 it doesn't want to clobber it. */
14663 gcc_assert (code == NOT);
14668 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14669 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14672 /* Fix up the destination if needed. */
14673 if (dst != operands[0])
14674 emit_move_insn (operands[0], dst);
14677 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
14678 divisor are within the the range [0-255]. */
14681 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
14684 rtx end_label, qimode_label;
14685 rtx insn, div, mod;
14686 rtx scratch, tmp0, tmp1, tmp2;
14687 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
14688 rtx (*gen_zero_extend) (rtx, rtx);
14689 rtx (*gen_test_ccno_1) (rtx, rtx);
14694 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
14695 gen_test_ccno_1 = gen_testsi_ccno_1;
14696 gen_zero_extend = gen_zero_extendqisi2;
14699 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
14700 gen_test_ccno_1 = gen_testdi_ccno_1;
14701 gen_zero_extend = gen_zero_extendqidi2;
14704 gcc_unreachable ();
14707 end_label = gen_label_rtx ();
14708 qimode_label = gen_label_rtx ();
14710 scratch = gen_reg_rtx (mode);
14712 /* Use 8bit unsigned divimod if dividend and divisor are within the
14713 the range [0-255]. */
14714 emit_move_insn (scratch, operands[2]);
14715 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
14716 scratch, 1, OPTAB_DIRECT);
14717 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
14718 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
14719 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
14720 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
14721 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
14723 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
14724 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14725 JUMP_LABEL (insn) = qimode_label;
14727 /* Generate original signed/unsigned divimod. */
14728 div = gen_divmod4_1 (operands[0], operands[1],
14729 operands[2], operands[3]);
14732 /* Branch to the end. */
14733 emit_jump_insn (gen_jump (end_label));
14736 /* Generate 8bit unsigned divide. */
14737 emit_label (qimode_label);
14738 /* Don't use operands[0] for result of 8bit divide since not all
14739 registers support QImode ZERO_EXTRACT. */
14740 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
14741 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
14742 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
14743 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
14747 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
14748 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
14752 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
14753 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
14756 /* Extract remainder from AH. */
14757 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
14758 if (REG_P (operands[1]))
14759 insn = emit_move_insn (operands[1], tmp1);
14762 /* Need a new scratch register since the old one has result
14764 scratch = gen_reg_rtx (mode);
14765 emit_move_insn (scratch, tmp1);
14766 insn = emit_move_insn (operands[1], scratch);
14768 set_unique_reg_note (insn, REG_EQUAL, mod);
14770 /* Zero extend quotient from AL. */
14771 tmp1 = gen_lowpart (QImode, tmp0);
14772 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
14773 set_unique_reg_note (insn, REG_EQUAL, div);
14775 emit_label (end_label);
14778 #define LEA_SEARCH_THRESHOLD 12
14780 /* Search backward for non-agu definition of register number REGNO1
14781 or register number REGNO2 in INSN's basic block until
14782 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14783 2. Reach BB boundary, or
14784 3. Reach agu definition.
14785 Returns the distance between the non-agu definition point and INSN.
14786 If no definition point, returns -1. */
14789 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14792 basic_block bb = BLOCK_FOR_INSN (insn);
14795 enum attr_type insn_type;
14797 if (insn != BB_HEAD (bb))
14799 rtx prev = PREV_INSN (insn);
14800 while (prev && distance < LEA_SEARCH_THRESHOLD)
14802 if (NONDEBUG_INSN_P (prev))
14805 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14806 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14807 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14808 && (regno1 == DF_REF_REGNO (*def_rec)
14809 || regno2 == DF_REF_REGNO (*def_rec)))
14811 insn_type = get_attr_type (prev);
14812 if (insn_type != TYPE_LEA)
14816 if (prev == BB_HEAD (bb))
14818 prev = PREV_INSN (prev);
14822 if (distance < LEA_SEARCH_THRESHOLD)
14826 bool simple_loop = false;
14828 FOR_EACH_EDGE (e, ei, bb->preds)
14831 simple_loop = true;
14837 rtx prev = BB_END (bb);
14840 && distance < LEA_SEARCH_THRESHOLD)
14842 if (NONDEBUG_INSN_P (prev))
14845 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14846 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14847 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14848 && (regno1 == DF_REF_REGNO (*def_rec)
14849 || regno2 == DF_REF_REGNO (*def_rec)))
14851 insn_type = get_attr_type (prev);
14852 if (insn_type != TYPE_LEA)
14856 prev = PREV_INSN (prev);
14864 /* get_attr_type may modify recog data. We want to make sure
14865 that recog data is valid for instruction INSN, on which
14866 distance_non_agu_define is called. INSN is unchanged here. */
14867 extract_insn_cached (insn);
14871 /* Return the distance between INSN and the next insn that uses
14872 register number REGNO0 in memory address. Return -1 if no such
14873 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14876 distance_agu_use (unsigned int regno0, rtx insn)
14878 basic_block bb = BLOCK_FOR_INSN (insn);
14883 if (insn != BB_END (bb))
14885 rtx next = NEXT_INSN (insn);
14886 while (next && distance < LEA_SEARCH_THRESHOLD)
14888 if (NONDEBUG_INSN_P (next))
14892 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14893 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14894 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14895 && regno0 == DF_REF_REGNO (*use_rec))
14897 /* Return DISTANCE if OP0 is used in memory
14898 address in NEXT. */
14902 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14903 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14904 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14905 && regno0 == DF_REF_REGNO (*def_rec))
14907 /* Return -1 if OP0 is set in NEXT. */
14911 if (next == BB_END (bb))
14913 next = NEXT_INSN (next);
14917 if (distance < LEA_SEARCH_THRESHOLD)
14921 bool simple_loop = false;
14923 FOR_EACH_EDGE (e, ei, bb->succs)
14926 simple_loop = true;
14932 rtx next = BB_HEAD (bb);
14935 && distance < LEA_SEARCH_THRESHOLD)
14937 if (NONDEBUG_INSN_P (next))
14941 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14942 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14943 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14944 && regno0 == DF_REF_REGNO (*use_rec))
14946 /* Return DISTANCE if OP0 is used in memory
14947 address in NEXT. */
14951 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14952 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14953 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14954 && regno0 == DF_REF_REGNO (*def_rec))
14956 /* Return -1 if OP0 is set in NEXT. */
14961 next = NEXT_INSN (next);
14969 /* Define this macro to tune LEA priority vs ADD, it take effect when
14970 there is a dilemma of choicing LEA or ADD
14971 Negative value: ADD is more preferred than LEA
14973 Positive value: LEA is more preferred than ADD*/
14974 #define IX86_LEA_PRIORITY 2
14976 /* Return true if it is ok to optimize an ADD operation to LEA
14977 operation to avoid flag register consumation. For most processors,
14978 ADD is faster than LEA. For the processors like ATOM, if the
14979 destination register of LEA holds an actual address which will be
14980 used soon, LEA is better and otherwise ADD is better. */
14983 ix86_lea_for_add_ok (rtx insn, rtx operands[])
14985 unsigned int regno0 = true_regnum (operands[0]);
14986 unsigned int regno1 = true_regnum (operands[1]);
14987 unsigned int regno2 = true_regnum (operands[2]);
14989 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14990 if (regno0 != regno1 && regno0 != regno2)
14993 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14997 int dist_define, dist_use;
14999 /* Return false if REGNO0 isn't used in memory address. */
15000 dist_use = distance_agu_use (regno0, insn);
15004 dist_define = distance_non_agu_define (regno1, regno2, insn);
15005 if (dist_define <= 0)
15008 /* If this insn has both backward non-agu dependence and forward
15009 agu dependence, the one with short distance take effect. */
15010 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15017 /* Return true if destination reg of SET_BODY is shift count of
15021 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15027 /* Retrieve destination of SET_BODY. */
15028 switch (GET_CODE (set_body))
15031 set_dest = SET_DEST (set_body);
15032 if (!set_dest || !REG_P (set_dest))
15036 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15037 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15045 /* Retrieve shift count of USE_BODY. */
15046 switch (GET_CODE (use_body))
15049 shift_rtx = XEXP (use_body, 1);
15052 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15053 if (ix86_dep_by_shift_count_body (set_body,
15054 XVECEXP (use_body, 0, i)))
15062 && (GET_CODE (shift_rtx) == ASHIFT
15063 || GET_CODE (shift_rtx) == LSHIFTRT
15064 || GET_CODE (shift_rtx) == ASHIFTRT
15065 || GET_CODE (shift_rtx) == ROTATE
15066 || GET_CODE (shift_rtx) == ROTATERT))
15068 rtx shift_count = XEXP (shift_rtx, 1);
15070 /* Return true if shift count is dest of SET_BODY. */
15071 if (REG_P (shift_count)
15072 && true_regnum (set_dest) == true_regnum (shift_count))
15079 /* Return true if destination reg of SET_INSN is shift count of
15083 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15085 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15086 PATTERN (use_insn));
15089 /* Return TRUE or FALSE depending on whether the unary operator meets the
15090 appropriate constraints. */
15093 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15094 enum machine_mode mode ATTRIBUTE_UNUSED,
15095 rtx operands[2] ATTRIBUTE_UNUSED)
15097 /* If one of operands is memory, source and destination must match. */
15098 if ((MEM_P (operands[0])
15099 || MEM_P (operands[1]))
15100 && ! rtx_equal_p (operands[0], operands[1]))
15105 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15106 are ok, keeping in mind the possible movddup alternative. */
15109 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15111 if (MEM_P (operands[0]))
15112 return rtx_equal_p (operands[0], operands[1 + high]);
15113 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15114 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15118 /* Post-reload splitter for converting an SF or DFmode value in an
15119 SSE register into an unsigned SImode. */
15122 ix86_split_convert_uns_si_sse (rtx operands[])
15124 enum machine_mode vecmode;
15125 rtx value, large, zero_or_two31, input, two31, x;
15127 large = operands[1];
15128 zero_or_two31 = operands[2];
15129 input = operands[3];
15130 two31 = operands[4];
15131 vecmode = GET_MODE (large);
15132 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15134 /* Load up the value into the low element. We must ensure that the other
15135 elements are valid floats -- zero is the easiest such value. */
15138 if (vecmode == V4SFmode)
15139 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15141 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15145 input = gen_rtx_REG (vecmode, REGNO (input));
15146 emit_move_insn (value, CONST0_RTX (vecmode));
15147 if (vecmode == V4SFmode)
15148 emit_insn (gen_sse_movss (value, value, input));
15150 emit_insn (gen_sse2_movsd (value, value, input));
15153 emit_move_insn (large, two31);
15154 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15156 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15157 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15159 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15160 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15162 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15163 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15165 large = gen_rtx_REG (V4SImode, REGNO (large));
15166 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15168 x = gen_rtx_REG (V4SImode, REGNO (value));
15169 if (vecmode == V4SFmode)
15170 emit_insn (gen_sse2_cvttps2dq (x, value));
15172 emit_insn (gen_sse2_cvttpd2dq (x, value));
15175 emit_insn (gen_xorv4si3 (value, value, large));
15178 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15179 Expects the 64-bit DImode to be supplied in a pair of integral
15180 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15181 -mfpmath=sse, !optimize_size only. */
15184 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15186 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15187 rtx int_xmm, fp_xmm;
15188 rtx biases, exponents;
15191 int_xmm = gen_reg_rtx (V4SImode);
15192 if (TARGET_INTER_UNIT_MOVES)
15193 emit_insn (gen_movdi_to_sse (int_xmm, input));
15194 else if (TARGET_SSE_SPLIT_REGS)
15196 emit_clobber (int_xmm);
15197 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15201 x = gen_reg_rtx (V2DImode);
15202 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15203 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15206 x = gen_rtx_CONST_VECTOR (V4SImode,
15207 gen_rtvec (4, GEN_INT (0x43300000UL),
15208 GEN_INT (0x45300000UL),
15209 const0_rtx, const0_rtx));
15210 exponents = validize_mem (force_const_mem (V4SImode, x));
15212 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15213 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15215 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15216 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15217 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15218 (0x1.0p84 + double(fp_value_hi_xmm)).
15219 Note these exponents differ by 32. */
15221 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15223 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15224 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15225 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15226 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15227 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15228 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15229 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15230 biases = validize_mem (force_const_mem (V2DFmode, biases));
15231 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15233 /* Add the upper and lower DFmode values together. */
15235 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15238 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15239 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15240 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15243 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15246 /* Not used, but eases macroization of patterns. */
15248 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15249 rtx input ATTRIBUTE_UNUSED)
15251 gcc_unreachable ();
15254 /* Convert an unsigned SImode value into a DFmode. Only currently used
15255 for SSE, but applicable anywhere. */
15258 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15260 REAL_VALUE_TYPE TWO31r;
15263 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15264 NULL, 1, OPTAB_DIRECT);
15266 fp = gen_reg_rtx (DFmode);
15267 emit_insn (gen_floatsidf2 (fp, x));
15269 real_ldexp (&TWO31r, &dconst1, 31);
15270 x = const_double_from_real_value (TWO31r, DFmode);
15272 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15274 emit_move_insn (target, x);
15277 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15278 32-bit mode; otherwise we have a direct convert instruction. */
15281 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15283 REAL_VALUE_TYPE TWO32r;
15284 rtx fp_lo, fp_hi, x;
15286 fp_lo = gen_reg_rtx (DFmode);
15287 fp_hi = gen_reg_rtx (DFmode);
15289 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15291 real_ldexp (&TWO32r, &dconst1, 32);
15292 x = const_double_from_real_value (TWO32r, DFmode);
15293 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15295 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15297 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15300 emit_move_insn (target, x);
15303 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15304 For x86_32, -mfpmath=sse, !optimize_size only. */
15306 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15308 REAL_VALUE_TYPE ONE16r;
15309 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15311 real_ldexp (&ONE16r, &dconst1, 16);
15312 x = const_double_from_real_value (ONE16r, SFmode);
15313 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15314 NULL, 0, OPTAB_DIRECT);
15315 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15316 NULL, 0, OPTAB_DIRECT);
15317 fp_hi = gen_reg_rtx (SFmode);
15318 fp_lo = gen_reg_rtx (SFmode);
15319 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15320 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15321 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15323 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15325 if (!rtx_equal_p (target, fp_hi))
15326 emit_move_insn (target, fp_hi);
15329 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15330 then replicate the value for all elements of the vector
15334 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15341 v = gen_rtvec (4, value, value, value, value);
15342 return gen_rtx_CONST_VECTOR (V4SImode, v);
15346 v = gen_rtvec (2, value, value);
15347 return gen_rtx_CONST_VECTOR (V2DImode, v);
15351 v = gen_rtvec (4, value, value, value, value);
15353 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15354 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15355 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15359 v = gen_rtvec (2, value, value);
15361 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15362 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15365 gcc_unreachable ();
15369 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15370 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15371 for an SSE register. If VECT is true, then replicate the mask for
15372 all elements of the vector register. If INVERT is true, then create
15373 a mask excluding the sign bit. */
15376 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15378 enum machine_mode vec_mode, imode;
15379 HOST_WIDE_INT hi, lo;
15384 /* Find the sign bit, sign extended to 2*HWI. */
15390 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15391 lo = 0x80000000, hi = lo < 0;
15397 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15398 if (HOST_BITS_PER_WIDE_INT >= 64)
15399 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15401 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15406 vec_mode = VOIDmode;
15407 if (HOST_BITS_PER_WIDE_INT >= 64)
15410 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15417 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15421 lo = ~lo, hi = ~hi;
15427 mask = immed_double_const (lo, hi, imode);
15429 vec = gen_rtvec (2, v, mask);
15430 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15431 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15438 gcc_unreachable ();
15442 lo = ~lo, hi = ~hi;
15444 /* Force this value into the low part of a fp vector constant. */
15445 mask = immed_double_const (lo, hi, imode);
15446 mask = gen_lowpart (mode, mask);
15448 if (vec_mode == VOIDmode)
15449 return force_reg (mode, mask);
15451 v = ix86_build_const_vector (mode, vect, mask);
15452 return force_reg (vec_mode, v);
15455 /* Generate code for floating point ABS or NEG. */
15458 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15461 rtx mask, set, use, clob, dst, src;
15462 bool use_sse = false;
15463 bool vector_mode = VECTOR_MODE_P (mode);
15464 enum machine_mode elt_mode = mode;
15468 elt_mode = GET_MODE_INNER (mode);
15471 else if (mode == TFmode)
15473 else if (TARGET_SSE_MATH)
15474 use_sse = SSE_FLOAT_MODE_P (mode);
15476 /* NEG and ABS performed with SSE use bitwise mask operations.
15477 Create the appropriate mask now. */
15479 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15488 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15489 set = gen_rtx_SET (VOIDmode, dst, set);
15494 set = gen_rtx_fmt_e (code, mode, src);
15495 set = gen_rtx_SET (VOIDmode, dst, set);
15498 use = gen_rtx_USE (VOIDmode, mask);
15499 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15500 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15501 gen_rtvec (3, set, use, clob)));
15508 /* Expand a copysign operation. Special case operand 0 being a constant. */
15511 ix86_expand_copysign (rtx operands[])
15513 enum machine_mode mode;
15514 rtx dest, op0, op1, mask, nmask;
15516 dest = operands[0];
15520 mode = GET_MODE (dest);
15522 if (GET_CODE (op0) == CONST_DOUBLE)
15524 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15526 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15527 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15529 if (mode == SFmode || mode == DFmode)
15531 enum machine_mode vmode;
15533 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15535 if (op0 == CONST0_RTX (mode))
15536 op0 = CONST0_RTX (vmode);
15539 rtx v = ix86_build_const_vector (mode, false, op0);
15541 op0 = force_reg (vmode, v);
15544 else if (op0 != CONST0_RTX (mode))
15545 op0 = force_reg (mode, op0);
15547 mask = ix86_build_signbit_mask (mode, 0, 0);
15549 if (mode == SFmode)
15550 copysign_insn = gen_copysignsf3_const;
15551 else if (mode == DFmode)
15552 copysign_insn = gen_copysigndf3_const;
15554 copysign_insn = gen_copysigntf3_const;
15556 emit_insn (copysign_insn (dest, op0, op1, mask));
15560 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15562 nmask = ix86_build_signbit_mask (mode, 0, 1);
15563 mask = ix86_build_signbit_mask (mode, 0, 0);
15565 if (mode == SFmode)
15566 copysign_insn = gen_copysignsf3_var;
15567 else if (mode == DFmode)
15568 copysign_insn = gen_copysigndf3_var;
15570 copysign_insn = gen_copysigntf3_var;
15572 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15576 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15577 be a constant, and so has already been expanded into a vector constant. */
15580 ix86_split_copysign_const (rtx operands[])
15582 enum machine_mode mode, vmode;
15583 rtx dest, op0, mask, x;
15585 dest = operands[0];
15587 mask = operands[3];
15589 mode = GET_MODE (dest);
15590 vmode = GET_MODE (mask);
15592 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15593 x = gen_rtx_AND (vmode, dest, mask);
15594 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15596 if (op0 != CONST0_RTX (vmode))
15598 x = gen_rtx_IOR (vmode, dest, op0);
15599 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15603 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15604 so we have to do two masks. */
15607 ix86_split_copysign_var (rtx operands[])
15609 enum machine_mode mode, vmode;
15610 rtx dest, scratch, op0, op1, mask, nmask, x;
15612 dest = operands[0];
15613 scratch = operands[1];
15616 nmask = operands[4];
15617 mask = operands[5];
15619 mode = GET_MODE (dest);
15620 vmode = GET_MODE (mask);
15622 if (rtx_equal_p (op0, op1))
15624 /* Shouldn't happen often (it's useless, obviously), but when it does
15625 we'd generate incorrect code if we continue below. */
15626 emit_move_insn (dest, op0);
15630 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15632 gcc_assert (REGNO (op1) == REGNO (scratch));
15634 x = gen_rtx_AND (vmode, scratch, mask);
15635 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15638 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15639 x = gen_rtx_NOT (vmode, dest);
15640 x = gen_rtx_AND (vmode, x, op0);
15641 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15645 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15647 x = gen_rtx_AND (vmode, scratch, mask);
15649 else /* alternative 2,4 */
15651 gcc_assert (REGNO (mask) == REGNO (scratch));
15652 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15653 x = gen_rtx_AND (vmode, scratch, op1);
15655 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15657 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15659 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15660 x = gen_rtx_AND (vmode, dest, nmask);
15662 else /* alternative 3,4 */
15664 gcc_assert (REGNO (nmask) == REGNO (dest));
15666 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15667 x = gen_rtx_AND (vmode, dest, op0);
15669 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15672 x = gen_rtx_IOR (vmode, dest, scratch);
15673 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15676 /* Return TRUE or FALSE depending on whether the first SET in INSN
15677 has source and destination with matching CC modes, and that the
15678 CC mode is at least as constrained as REQ_MODE. */
15681 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15684 enum machine_mode set_mode;
15686 set = PATTERN (insn);
15687 if (GET_CODE (set) == PARALLEL)
15688 set = XVECEXP (set, 0, 0);
15689 gcc_assert (GET_CODE (set) == SET);
15690 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15692 set_mode = GET_MODE (SET_DEST (set));
15696 if (req_mode != CCNOmode
15697 && (req_mode != CCmode
15698 || XEXP (SET_SRC (set), 1) != const0_rtx))
15702 if (req_mode == CCGCmode)
15706 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15710 if (req_mode == CCZmode)
15721 gcc_unreachable ();
15724 return GET_MODE (SET_SRC (set)) == set_mode;
15727 /* Generate insn patterns to do an integer compare of OPERANDS. */
15730 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15732 enum machine_mode cmpmode;
15735 cmpmode = SELECT_CC_MODE (code, op0, op1);
15736 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15738 /* This is very simple, but making the interface the same as in the
15739 FP case makes the rest of the code easier. */
15740 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15741 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15743 /* Return the test that should be put into the flags user, i.e.
15744 the bcc, scc, or cmov instruction. */
15745 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15748 /* Figure out whether to use ordered or unordered fp comparisons.
15749 Return the appropriate mode to use. */
15752 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15754 /* ??? In order to make all comparisons reversible, we do all comparisons
15755 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15756 all forms trapping and nontrapping comparisons, we can make inequality
15757 comparisons trapping again, since it results in better code when using
15758 FCOM based compares. */
15759 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15763 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15765 enum machine_mode mode = GET_MODE (op0);
15767 if (SCALAR_FLOAT_MODE_P (mode))
15769 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15770 return ix86_fp_compare_mode (code);
15775 /* Only zero flag is needed. */
15776 case EQ: /* ZF=0 */
15777 case NE: /* ZF!=0 */
15779 /* Codes needing carry flag. */
15780 case GEU: /* CF=0 */
15781 case LTU: /* CF=1 */
15782 /* Detect overflow checks. They need just the carry flag. */
15783 if (GET_CODE (op0) == PLUS
15784 && rtx_equal_p (op1, XEXP (op0, 0)))
15788 case GTU: /* CF=0 & ZF=0 */
15789 case LEU: /* CF=1 | ZF=1 */
15790 /* Detect overflow checks. They need just the carry flag. */
15791 if (GET_CODE (op0) == MINUS
15792 && rtx_equal_p (op1, XEXP (op0, 0)))
15796 /* Codes possibly doable only with sign flag when
15797 comparing against zero. */
15798 case GE: /* SF=OF or SF=0 */
15799 case LT: /* SF<>OF or SF=1 */
15800 if (op1 == const0_rtx)
15803 /* For other cases Carry flag is not required. */
15805 /* Codes doable only with sign flag when comparing
15806 against zero, but we miss jump instruction for it
15807 so we need to use relational tests against overflow
15808 that thus needs to be zero. */
15809 case GT: /* ZF=0 & SF=OF */
15810 case LE: /* ZF=1 | SF<>OF */
15811 if (op1 == const0_rtx)
15815 /* strcmp pattern do (use flags) and combine may ask us for proper
15820 gcc_unreachable ();
15824 /* Return the fixed registers used for condition codes. */
15827 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15834 /* If two condition code modes are compatible, return a condition code
15835 mode which is compatible with both. Otherwise, return
15838 static enum machine_mode
15839 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15844 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15847 if ((m1 == CCGCmode && m2 == CCGOCmode)
15848 || (m1 == CCGOCmode && m2 == CCGCmode))
15854 gcc_unreachable ();
15884 /* These are only compatible with themselves, which we already
15891 /* Return a comparison we can do and that it is equivalent to
15892 swap_condition (code) apart possibly from orderedness.
15893 But, never change orderedness if TARGET_IEEE_FP, returning
15894 UNKNOWN in that case if necessary. */
15896 static enum rtx_code
15897 ix86_fp_swap_condition (enum rtx_code code)
15901 case GT: /* GTU - CF=0 & ZF=0 */
15902 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15903 case GE: /* GEU - CF=0 */
15904 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15905 case UNLT: /* LTU - CF=1 */
15906 return TARGET_IEEE_FP ? UNKNOWN : GT;
15907 case UNLE: /* LEU - CF=1 | ZF=1 */
15908 return TARGET_IEEE_FP ? UNKNOWN : GE;
15910 return swap_condition (code);
15914 /* Return cost of comparison CODE using the best strategy for performance.
15915 All following functions do use number of instructions as a cost metrics.
15916 In future this should be tweaked to compute bytes for optimize_size and
15917 take into account performance of various instructions on various CPUs. */
15920 ix86_fp_comparison_cost (enum rtx_code code)
15924 /* The cost of code using bit-twiddling on %ah. */
15941 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15945 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15948 gcc_unreachable ();
15951 switch (ix86_fp_comparison_strategy (code))
15953 case IX86_FPCMP_COMI:
15954 return arith_cost > 4 ? 3 : 2;
15955 case IX86_FPCMP_SAHF:
15956 return arith_cost > 4 ? 4 : 3;
15962 /* Return strategy to use for floating-point. We assume that fcomi is always
15963 preferrable where available, since that is also true when looking at size
15964 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15966 enum ix86_fpcmp_strategy
15967 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15969 /* Do fcomi/sahf based test when profitable. */
15972 return IX86_FPCMP_COMI;
15974 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15975 return IX86_FPCMP_SAHF;
15977 return IX86_FPCMP_ARITH;
15980 /* Swap, force into registers, or otherwise massage the two operands
15981 to a fp comparison. The operands are updated in place; the new
15982 comparison code is returned. */
15984 static enum rtx_code
15985 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15987 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15988 rtx op0 = *pop0, op1 = *pop1;
15989 enum machine_mode op_mode = GET_MODE (op0);
15990 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15992 /* All of the unordered compare instructions only work on registers.
15993 The same is true of the fcomi compare instructions. The XFmode
15994 compare instructions require registers except when comparing
15995 against zero or when converting operand 1 from fixed point to
15999 && (fpcmp_mode == CCFPUmode
16000 || (op_mode == XFmode
16001 && ! (standard_80387_constant_p (op0) == 1
16002 || standard_80387_constant_p (op1) == 1)
16003 && GET_CODE (op1) != FLOAT)
16004 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16006 op0 = force_reg (op_mode, op0);
16007 op1 = force_reg (op_mode, op1);
16011 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16012 things around if they appear profitable, otherwise force op0
16013 into a register. */
16015 if (standard_80387_constant_p (op0) == 0
16017 && ! (standard_80387_constant_p (op1) == 0
16020 enum rtx_code new_code = ix86_fp_swap_condition (code);
16021 if (new_code != UNKNOWN)
16024 tmp = op0, op0 = op1, op1 = tmp;
16030 op0 = force_reg (op_mode, op0);
16032 if (CONSTANT_P (op1))
16034 int tmp = standard_80387_constant_p (op1);
16036 op1 = validize_mem (force_const_mem (op_mode, op1));
16040 op1 = force_reg (op_mode, op1);
16043 op1 = force_reg (op_mode, op1);
16047 /* Try to rearrange the comparison to make it cheaper. */
16048 if (ix86_fp_comparison_cost (code)
16049 > ix86_fp_comparison_cost (swap_condition (code))
16050 && (REG_P (op1) || can_create_pseudo_p ()))
16053 tmp = op0, op0 = op1, op1 = tmp;
16054 code = swap_condition (code);
16056 op0 = force_reg (op_mode, op0);
16064 /* Convert comparison codes we use to represent FP comparison to integer
16065 code that will result in proper branch. Return UNKNOWN if no such code
16069 ix86_fp_compare_code_to_integer (enum rtx_code code)
16098 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16101 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16103 enum machine_mode fpcmp_mode, intcmp_mode;
16106 fpcmp_mode = ix86_fp_compare_mode (code);
16107 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16109 /* Do fcomi/sahf based test when profitable. */
16110 switch (ix86_fp_comparison_strategy (code))
16112 case IX86_FPCMP_COMI:
16113 intcmp_mode = fpcmp_mode;
16114 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16115 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16120 case IX86_FPCMP_SAHF:
16121 intcmp_mode = fpcmp_mode;
16122 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16123 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16127 scratch = gen_reg_rtx (HImode);
16128 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16129 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16132 case IX86_FPCMP_ARITH:
16133 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16134 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16135 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16137 scratch = gen_reg_rtx (HImode);
16138 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16140 /* In the unordered case, we have to check C2 for NaN's, which
16141 doesn't happen to work out to anything nice combination-wise.
16142 So do some bit twiddling on the value we've got in AH to come
16143 up with an appropriate set of condition codes. */
16145 intcmp_mode = CCNOmode;
16150 if (code == GT || !TARGET_IEEE_FP)
16152 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16157 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16158 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16159 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16160 intcmp_mode = CCmode;
16166 if (code == LT && TARGET_IEEE_FP)
16168 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16169 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16170 intcmp_mode = CCmode;
16175 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16181 if (code == GE || !TARGET_IEEE_FP)
16183 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16188 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16189 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16195 if (code == LE && TARGET_IEEE_FP)
16197 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16198 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16199 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16200 intcmp_mode = CCmode;
16205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16211 if (code == EQ && TARGET_IEEE_FP)
16213 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16214 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16215 intcmp_mode = CCmode;
16220 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16226 if (code == NE && TARGET_IEEE_FP)
16228 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16229 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16235 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16245 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16250 gcc_unreachable ();
16258 /* Return the test that should be put into the flags user, i.e.
16259 the bcc, scc, or cmov instruction. */
16260 return gen_rtx_fmt_ee (code, VOIDmode,
16261 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16266 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16270 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16271 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16273 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16275 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16276 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16279 ret = ix86_expand_int_compare (code, op0, op1);
16285 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16287 enum machine_mode mode = GET_MODE (op0);
16299 tmp = ix86_expand_compare (code, op0, op1);
16300 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16301 gen_rtx_LABEL_REF (VOIDmode, label),
16303 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16310 /* Expand DImode branch into multiple compare+branch. */
16312 rtx lo[2], hi[2], label2;
16313 enum rtx_code code1, code2, code3;
16314 enum machine_mode submode;
16316 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16318 tmp = op0, op0 = op1, op1 = tmp;
16319 code = swap_condition (code);
16322 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16323 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16325 submode = mode == DImode ? SImode : DImode;
16327 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16328 avoid two branches. This costs one extra insn, so disable when
16329 optimizing for size. */
16331 if ((code == EQ || code == NE)
16332 && (!optimize_insn_for_size_p ()
16333 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16338 if (hi[1] != const0_rtx)
16339 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16340 NULL_RTX, 0, OPTAB_WIDEN);
16343 if (lo[1] != const0_rtx)
16344 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16345 NULL_RTX, 0, OPTAB_WIDEN);
16347 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16348 NULL_RTX, 0, OPTAB_WIDEN);
16350 ix86_expand_branch (code, tmp, const0_rtx, label);
16354 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16355 op1 is a constant and the low word is zero, then we can just
16356 examine the high word. Similarly for low word -1 and
16357 less-or-equal-than or greater-than. */
16359 if (CONST_INT_P (hi[1]))
16362 case LT: case LTU: case GE: case GEU:
16363 if (lo[1] == const0_rtx)
16365 ix86_expand_branch (code, hi[0], hi[1], label);
16369 case LE: case LEU: case GT: case GTU:
16370 if (lo[1] == constm1_rtx)
16372 ix86_expand_branch (code, hi[0], hi[1], label);
16380 /* Otherwise, we need two or three jumps. */
16382 label2 = gen_label_rtx ();
16385 code2 = swap_condition (code);
16386 code3 = unsigned_condition (code);
16390 case LT: case GT: case LTU: case GTU:
16393 case LE: code1 = LT; code2 = GT; break;
16394 case GE: code1 = GT; code2 = LT; break;
16395 case LEU: code1 = LTU; code2 = GTU; break;
16396 case GEU: code1 = GTU; code2 = LTU; break;
16398 case EQ: code1 = UNKNOWN; code2 = NE; break;
16399 case NE: code2 = UNKNOWN; break;
16402 gcc_unreachable ();
16407 * if (hi(a) < hi(b)) goto true;
16408 * if (hi(a) > hi(b)) goto false;
16409 * if (lo(a) < lo(b)) goto true;
16413 if (code1 != UNKNOWN)
16414 ix86_expand_branch (code1, hi[0], hi[1], label);
16415 if (code2 != UNKNOWN)
16416 ix86_expand_branch (code2, hi[0], hi[1], label2);
16418 ix86_expand_branch (code3, lo[0], lo[1], label);
16420 if (code2 != UNKNOWN)
16421 emit_label (label2);
16426 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16431 /* Split branch based on floating point condition. */
16433 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16434 rtx target1, rtx target2, rtx tmp, rtx pushed)
16439 if (target2 != pc_rtx)
16442 code = reverse_condition_maybe_unordered (code);
16447 condition = ix86_expand_fp_compare (code, op1, op2,
16450 /* Remove pushed operand from stack. */
16452 ix86_free_from_memory (GET_MODE (pushed));
16454 i = emit_jump_insn (gen_rtx_SET
16456 gen_rtx_IF_THEN_ELSE (VOIDmode,
16457 condition, target1, target2)));
16458 if (split_branch_probability >= 0)
16459 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16463 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16467 gcc_assert (GET_MODE (dest) == QImode);
16469 ret = ix86_expand_compare (code, op0, op1);
16470 PUT_MODE (ret, QImode);
16471 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16474 /* Expand comparison setting or clearing carry flag. Return true when
16475 successful and set pop for the operation. */
16477 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16479 enum machine_mode mode =
16480 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16482 /* Do not handle double-mode compares that go through special path. */
16483 if (mode == (TARGET_64BIT ? TImode : DImode))
16486 if (SCALAR_FLOAT_MODE_P (mode))
16488 rtx compare_op, compare_seq;
16490 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16492 /* Shortcut: following common codes never translate
16493 into carry flag compares. */
16494 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16495 || code == ORDERED || code == UNORDERED)
16498 /* These comparisons require zero flag; swap operands so they won't. */
16499 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16500 && !TARGET_IEEE_FP)
16505 code = swap_condition (code);
16508 /* Try to expand the comparison and verify that we end up with
16509 carry flag based comparison. This fails to be true only when
16510 we decide to expand comparison using arithmetic that is not
16511 too common scenario. */
16513 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16514 compare_seq = get_insns ();
16517 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16518 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16519 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16521 code = GET_CODE (compare_op);
16523 if (code != LTU && code != GEU)
16526 emit_insn (compare_seq);
16531 if (!INTEGRAL_MODE_P (mode))
16540 /* Convert a==0 into (unsigned)a<1. */
16543 if (op1 != const0_rtx)
16546 code = (code == EQ ? LTU : GEU);
16549 /* Convert a>b into b<a or a>=b-1. */
16552 if (CONST_INT_P (op1))
16554 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16555 /* Bail out on overflow. We still can swap operands but that
16556 would force loading of the constant into register. */
16557 if (op1 == const0_rtx
16558 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16560 code = (code == GTU ? GEU : LTU);
16567 code = (code == GTU ? LTU : GEU);
16571 /* Convert a>=0 into (unsigned)a<0x80000000. */
16574 if (mode == DImode || op1 != const0_rtx)
16576 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16577 code = (code == LT ? GEU : LTU);
16581 if (mode == DImode || op1 != constm1_rtx)
16583 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16584 code = (code == LE ? GEU : LTU);
16590 /* Swapping operands may cause constant to appear as first operand. */
16591 if (!nonimmediate_operand (op0, VOIDmode))
16593 if (!can_create_pseudo_p ())
16595 op0 = force_reg (mode, op0);
16597 *pop = ix86_expand_compare (code, op0, op1);
16598 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16603 ix86_expand_int_movcc (rtx operands[])
16605 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16606 rtx compare_seq, compare_op;
16607 enum machine_mode mode = GET_MODE (operands[0]);
16608 bool sign_bit_compare_p = false;
16609 rtx op0 = XEXP (operands[1], 0);
16610 rtx op1 = XEXP (operands[1], 1);
16613 compare_op = ix86_expand_compare (code, op0, op1);
16614 compare_seq = get_insns ();
16617 compare_code = GET_CODE (compare_op);
16619 if ((op1 == const0_rtx && (code == GE || code == LT))
16620 || (op1 == constm1_rtx && (code == GT || code == LE)))
16621 sign_bit_compare_p = true;
16623 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16624 HImode insns, we'd be swallowed in word prefix ops. */
16626 if ((mode != HImode || TARGET_FAST_PREFIX)
16627 && (mode != (TARGET_64BIT ? TImode : DImode))
16628 && CONST_INT_P (operands[2])
16629 && CONST_INT_P (operands[3]))
16631 rtx out = operands[0];
16632 HOST_WIDE_INT ct = INTVAL (operands[2]);
16633 HOST_WIDE_INT cf = INTVAL (operands[3]);
16634 HOST_WIDE_INT diff;
16637 /* Sign bit compares are better done using shifts than we do by using
16639 if (sign_bit_compare_p
16640 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16642 /* Detect overlap between destination and compare sources. */
16645 if (!sign_bit_compare_p)
16648 bool fpcmp = false;
16650 compare_code = GET_CODE (compare_op);
16652 flags = XEXP (compare_op, 0);
16654 if (GET_MODE (flags) == CCFPmode
16655 || GET_MODE (flags) == CCFPUmode)
16659 = ix86_fp_compare_code_to_integer (compare_code);
16662 /* To simplify rest of code, restrict to the GEU case. */
16663 if (compare_code == LTU)
16665 HOST_WIDE_INT tmp = ct;
16668 compare_code = reverse_condition (compare_code);
16669 code = reverse_condition (code);
16674 PUT_CODE (compare_op,
16675 reverse_condition_maybe_unordered
16676 (GET_CODE (compare_op)));
16678 PUT_CODE (compare_op,
16679 reverse_condition (GET_CODE (compare_op)));
16683 if (reg_overlap_mentioned_p (out, op0)
16684 || reg_overlap_mentioned_p (out, op1))
16685 tmp = gen_reg_rtx (mode);
16687 if (mode == DImode)
16688 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16690 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16691 flags, compare_op));
16695 if (code == GT || code == GE)
16696 code = reverse_condition (code);
16699 HOST_WIDE_INT tmp = ct;
16704 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16717 tmp = expand_simple_binop (mode, PLUS,
16719 copy_rtx (tmp), 1, OPTAB_DIRECT);
16730 tmp = expand_simple_binop (mode, IOR,
16732 copy_rtx (tmp), 1, OPTAB_DIRECT);
16734 else if (diff == -1 && ct)
16744 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16746 tmp = expand_simple_binop (mode, PLUS,
16747 copy_rtx (tmp), GEN_INT (cf),
16748 copy_rtx (tmp), 1, OPTAB_DIRECT);
16756 * andl cf - ct, dest
16766 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16769 tmp = expand_simple_binop (mode, AND,
16771 gen_int_mode (cf - ct, mode),
16772 copy_rtx (tmp), 1, OPTAB_DIRECT);
16774 tmp = expand_simple_binop (mode, PLUS,
16775 copy_rtx (tmp), GEN_INT (ct),
16776 copy_rtx (tmp), 1, OPTAB_DIRECT);
16779 if (!rtx_equal_p (tmp, out))
16780 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16787 enum machine_mode cmp_mode = GET_MODE (op0);
16790 tmp = ct, ct = cf, cf = tmp;
16793 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16795 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16797 /* We may be reversing unordered compare to normal compare, that
16798 is not valid in general (we may convert non-trapping condition
16799 to trapping one), however on i386 we currently emit all
16800 comparisons unordered. */
16801 compare_code = reverse_condition_maybe_unordered (compare_code);
16802 code = reverse_condition_maybe_unordered (code);
16806 compare_code = reverse_condition (compare_code);
16807 code = reverse_condition (code);
16811 compare_code = UNKNOWN;
16812 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16813 && CONST_INT_P (op1))
16815 if (op1 == const0_rtx
16816 && (code == LT || code == GE))
16817 compare_code = code;
16818 else if (op1 == constm1_rtx)
16822 else if (code == GT)
16827 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16828 if (compare_code != UNKNOWN
16829 && GET_MODE (op0) == GET_MODE (out)
16830 && (cf == -1 || ct == -1))
16832 /* If lea code below could be used, only optimize
16833 if it results in a 2 insn sequence. */
16835 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16836 || diff == 3 || diff == 5 || diff == 9)
16837 || (compare_code == LT && ct == -1)
16838 || (compare_code == GE && cf == -1))
16841 * notl op1 (if necessary)
16849 code = reverse_condition (code);
16852 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16854 out = expand_simple_binop (mode, IOR,
16856 out, 1, OPTAB_DIRECT);
16857 if (out != operands[0])
16858 emit_move_insn (operands[0], out);
16865 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16866 || diff == 3 || diff == 5 || diff == 9)
16867 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16869 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16875 * lea cf(dest*(ct-cf)),dest
16879 * This also catches the degenerate setcc-only case.
16885 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16888 /* On x86_64 the lea instruction operates on Pmode, so we need
16889 to get arithmetics done in proper mode to match. */
16891 tmp = copy_rtx (out);
16895 out1 = copy_rtx (out);
16896 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16900 tmp = gen_rtx_PLUS (mode, tmp, out1);
16906 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16909 if (!rtx_equal_p (tmp, out))
16912 out = force_operand (tmp, copy_rtx (out));
16914 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16916 if (!rtx_equal_p (out, operands[0]))
16917 emit_move_insn (operands[0], copy_rtx (out));
16923 * General case: Jumpful:
16924 * xorl dest,dest cmpl op1, op2
16925 * cmpl op1, op2 movl ct, dest
16926 * setcc dest jcc 1f
16927 * decl dest movl cf, dest
16928 * andl (cf-ct),dest 1:
16931 * Size 20. Size 14.
16933 * This is reasonably steep, but branch mispredict costs are
16934 * high on modern cpus, so consider failing only if optimizing
16938 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16939 && BRANCH_COST (optimize_insn_for_speed_p (),
16944 enum machine_mode cmp_mode = GET_MODE (op0);
16949 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16951 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16953 /* We may be reversing unordered compare to normal compare,
16954 that is not valid in general (we may convert non-trapping
16955 condition to trapping one), however on i386 we currently
16956 emit all comparisons unordered. */
16957 code = reverse_condition_maybe_unordered (code);
16961 code = reverse_condition (code);
16962 if (compare_code != UNKNOWN)
16963 compare_code = reverse_condition (compare_code);
16967 if (compare_code != UNKNOWN)
16969 /* notl op1 (if needed)
16974 For x < 0 (resp. x <= -1) there will be no notl,
16975 so if possible swap the constants to get rid of the
16977 True/false will be -1/0 while code below (store flag
16978 followed by decrement) is 0/-1, so the constants need
16979 to be exchanged once more. */
16981 if (compare_code == GE || !cf)
16983 code = reverse_condition (code);
16988 HOST_WIDE_INT tmp = cf;
16993 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16997 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16999 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17001 copy_rtx (out), 1, OPTAB_DIRECT);
17004 out = expand_simple_binop (mode, AND, copy_rtx (out),
17005 gen_int_mode (cf - ct, mode),
17006 copy_rtx (out), 1, OPTAB_DIRECT);
17008 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17009 copy_rtx (out), 1, OPTAB_DIRECT);
17010 if (!rtx_equal_p (out, operands[0]))
17011 emit_move_insn (operands[0], copy_rtx (out));
17017 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17019 /* Try a few things more with specific constants and a variable. */
17022 rtx var, orig_out, out, tmp;
17024 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17027 /* If one of the two operands is an interesting constant, load a
17028 constant with the above and mask it in with a logical operation. */
17030 if (CONST_INT_P (operands[2]))
17033 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17034 operands[3] = constm1_rtx, op = and_optab;
17035 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17036 operands[3] = const0_rtx, op = ior_optab;
17040 else if (CONST_INT_P (operands[3]))
17043 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17044 operands[2] = constm1_rtx, op = and_optab;
17045 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17046 operands[2] = const0_rtx, op = ior_optab;
17053 orig_out = operands[0];
17054 tmp = gen_reg_rtx (mode);
17057 /* Recurse to get the constant loaded. */
17058 if (ix86_expand_int_movcc (operands) == 0)
17061 /* Mask in the interesting variable. */
17062 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17064 if (!rtx_equal_p (out, orig_out))
17065 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17071 * For comparison with above,
17081 if (! nonimmediate_operand (operands[2], mode))
17082 operands[2] = force_reg (mode, operands[2]);
17083 if (! nonimmediate_operand (operands[3], mode))
17084 operands[3] = force_reg (mode, operands[3]);
17086 if (! register_operand (operands[2], VOIDmode)
17088 || ! register_operand (operands[3], VOIDmode)))
17089 operands[2] = force_reg (mode, operands[2]);
17092 && ! register_operand (operands[3], VOIDmode))
17093 operands[3] = force_reg (mode, operands[3]);
17095 emit_insn (compare_seq);
17096 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17097 gen_rtx_IF_THEN_ELSE (mode,
17098 compare_op, operands[2],
17103 /* Swap, force into registers, or otherwise massage the two operands
17104 to an sse comparison with a mask result. Thus we differ a bit from
17105 ix86_prepare_fp_compare_args which expects to produce a flags result.
17107 The DEST operand exists to help determine whether to commute commutative
17108 operators. The POP0/POP1 operands are updated in place. The new
17109 comparison code is returned, or UNKNOWN if not implementable. */
17111 static enum rtx_code
17112 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17113 rtx *pop0, rtx *pop1)
17121 /* We have no LTGT as an operator. We could implement it with
17122 NE & ORDERED, but this requires an extra temporary. It's
17123 not clear that it's worth it. */
17130 /* These are supported directly. */
17137 /* For commutative operators, try to canonicalize the destination
17138 operand to be first in the comparison - this helps reload to
17139 avoid extra moves. */
17140 if (!dest || !rtx_equal_p (dest, *pop1))
17148 /* These are not supported directly. Swap the comparison operands
17149 to transform into something that is supported. */
17153 code = swap_condition (code);
17157 gcc_unreachable ();
17163 /* Detect conditional moves that exactly match min/max operational
17164 semantics. Note that this is IEEE safe, as long as we don't
17165 interchange the operands.
17167 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17168 and TRUE if the operation is successful and instructions are emitted. */
17171 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17172 rtx cmp_op1, rtx if_true, rtx if_false)
17174 enum machine_mode mode;
17180 else if (code == UNGE)
17183 if_true = if_false;
17189 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17191 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17196 mode = GET_MODE (dest);
17198 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17199 but MODE may be a vector mode and thus not appropriate. */
17200 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17202 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17205 if_true = force_reg (mode, if_true);
17206 v = gen_rtvec (2, if_true, if_false);
17207 tmp = gen_rtx_UNSPEC (mode, v, u);
17211 code = is_min ? SMIN : SMAX;
17212 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17215 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17219 /* Expand an sse vector comparison. Return the register with the result. */
17222 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17223 rtx op_true, rtx op_false)
17225 enum machine_mode mode = GET_MODE (dest);
17228 cmp_op0 = force_reg (mode, cmp_op0);
17229 if (!nonimmediate_operand (cmp_op1, mode))
17230 cmp_op1 = force_reg (mode, cmp_op1);
17233 || reg_overlap_mentioned_p (dest, op_true)
17234 || reg_overlap_mentioned_p (dest, op_false))
17235 dest = gen_reg_rtx (mode);
17237 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17243 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17244 operations. This is used for both scalar and vector conditional moves. */
17247 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17249 enum machine_mode mode = GET_MODE (dest);
17252 if (op_false == CONST0_RTX (mode))
17254 op_true = force_reg (mode, op_true);
17255 x = gen_rtx_AND (mode, cmp, op_true);
17256 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17258 else if (op_true == CONST0_RTX (mode))
17260 op_false = force_reg (mode, op_false);
17261 x = gen_rtx_NOT (mode, cmp);
17262 x = gen_rtx_AND (mode, x, op_false);
17263 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17265 else if (TARGET_XOP)
17267 rtx pcmov = gen_rtx_SET (mode, dest,
17268 gen_rtx_IF_THEN_ELSE (mode, cmp,
17275 op_true = force_reg (mode, op_true);
17276 op_false = force_reg (mode, op_false);
17278 t2 = gen_reg_rtx (mode);
17280 t3 = gen_reg_rtx (mode);
17284 x = gen_rtx_AND (mode, op_true, cmp);
17285 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17287 x = gen_rtx_NOT (mode, cmp);
17288 x = gen_rtx_AND (mode, x, op_false);
17289 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17291 x = gen_rtx_IOR (mode, t3, t2);
17292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17296 /* Expand a floating-point conditional move. Return true if successful. */
17299 ix86_expand_fp_movcc (rtx operands[])
17301 enum machine_mode mode = GET_MODE (operands[0]);
17302 enum rtx_code code = GET_CODE (operands[1]);
17303 rtx tmp, compare_op;
17304 rtx op0 = XEXP (operands[1], 0);
17305 rtx op1 = XEXP (operands[1], 1);
17307 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17309 enum machine_mode cmode;
17311 /* Since we've no cmove for sse registers, don't force bad register
17312 allocation just to gain access to it. Deny movcc when the
17313 comparison mode doesn't match the move mode. */
17314 cmode = GET_MODE (op0);
17315 if (cmode == VOIDmode)
17316 cmode = GET_MODE (op1);
17320 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17321 if (code == UNKNOWN)
17324 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17325 operands[2], operands[3]))
17328 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17329 operands[2], operands[3]);
17330 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17334 /* The floating point conditional move instructions don't directly
17335 support conditions resulting from a signed integer comparison. */
17337 compare_op = ix86_expand_compare (code, op0, op1);
17338 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17340 tmp = gen_reg_rtx (QImode);
17341 ix86_expand_setcc (tmp, code, op0, op1);
17343 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17346 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17347 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17348 operands[2], operands[3])));
17353 /* Expand a floating-point vector conditional move; a vcond operation
17354 rather than a movcc operation. */
17357 ix86_expand_fp_vcond (rtx operands[])
17359 enum rtx_code code = GET_CODE (operands[3]);
17362 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17363 &operands[4], &operands[5]);
17364 if (code == UNKNOWN)
17367 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17368 operands[5], operands[1], operands[2]))
17371 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17372 operands[1], operands[2]);
17373 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17377 /* Expand a signed/unsigned integral vector conditional move. */
17380 ix86_expand_int_vcond (rtx operands[])
17382 enum machine_mode mode = GET_MODE (operands[0]);
17383 enum rtx_code code = GET_CODE (operands[3]);
17384 bool negate = false;
17387 cop0 = operands[4];
17388 cop1 = operands[5];
17390 /* XOP supports all of the comparisons on all vector int types. */
17393 /* Canonicalize the comparison to EQ, GT, GTU. */
17404 code = reverse_condition (code);
17410 code = reverse_condition (code);
17416 code = swap_condition (code);
17417 x = cop0, cop0 = cop1, cop1 = x;
17421 gcc_unreachable ();
17424 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17425 if (mode == V2DImode)
17430 /* SSE4.1 supports EQ. */
17431 if (!TARGET_SSE4_1)
17437 /* SSE4.2 supports GT/GTU. */
17438 if (!TARGET_SSE4_2)
17443 gcc_unreachable ();
17447 /* Unsigned parallel compare is not supported by the hardware.
17448 Play some tricks to turn this into a signed comparison
17452 cop0 = force_reg (mode, cop0);
17460 rtx (*gen_sub3) (rtx, rtx, rtx);
17462 /* Subtract (-(INT MAX) - 1) from both operands to make
17464 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17466 gen_sub3 = (mode == V4SImode
17467 ? gen_subv4si3 : gen_subv2di3);
17468 t1 = gen_reg_rtx (mode);
17469 emit_insn (gen_sub3 (t1, cop0, mask));
17471 t2 = gen_reg_rtx (mode);
17472 emit_insn (gen_sub3 (t2, cop1, mask));
17482 /* Perform a parallel unsigned saturating subtraction. */
17483 x = gen_reg_rtx (mode);
17484 emit_insn (gen_rtx_SET (VOIDmode, x,
17485 gen_rtx_US_MINUS (mode, cop0, cop1)));
17488 cop1 = CONST0_RTX (mode);
17494 gcc_unreachable ();
17499 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17500 operands[1+negate], operands[2-negate]);
17502 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17503 operands[2-negate]);
17507 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17508 true if we should do zero extension, else sign extension. HIGH_P is
17509 true if we want the N/2 high elements, else the low elements. */
17512 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17514 enum machine_mode imode = GET_MODE (operands[1]);
17515 rtx (*unpack)(rtx, rtx, rtx);
17522 unpack = gen_vec_interleave_highv16qi;
17524 unpack = gen_vec_interleave_lowv16qi;
17528 unpack = gen_vec_interleave_highv8hi;
17530 unpack = gen_vec_interleave_lowv8hi;
17534 unpack = gen_vec_interleave_highv4si;
17536 unpack = gen_vec_interleave_lowv4si;
17539 gcc_unreachable ();
17542 dest = gen_lowpart (imode, operands[0]);
17545 se = force_reg (imode, CONST0_RTX (imode));
17547 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17548 operands[1], pc_rtx, pc_rtx);
17550 emit_insn (unpack (dest, operands[1], se));
17553 /* This function performs the same task as ix86_expand_sse_unpack,
17554 but with SSE4.1 instructions. */
17557 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17559 enum machine_mode imode = GET_MODE (operands[1]);
17560 rtx (*unpack)(rtx, rtx);
17567 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17569 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17573 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17575 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17579 unpack = gen_sse4_1_zero_extendv2siv2di2;
17581 unpack = gen_sse4_1_sign_extendv2siv2di2;
17584 gcc_unreachable ();
17587 dest = operands[0];
17590 /* Shift higher 8 bytes to lower 8 bytes. */
17591 src = gen_reg_rtx (imode);
17592 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17593 gen_lowpart (V1TImode, operands[1]),
17599 emit_insn (unpack (dest, src));
17602 /* Expand conditional increment or decrement using adb/sbb instructions.
17603 The default case using setcc followed by the conditional move can be
17604 done by generic code. */
17606 ix86_expand_int_addcc (rtx operands[])
17608 enum rtx_code code = GET_CODE (operands[1]);
17610 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17612 rtx val = const0_rtx;
17613 bool fpcmp = false;
17614 enum machine_mode mode;
17615 rtx op0 = XEXP (operands[1], 0);
17616 rtx op1 = XEXP (operands[1], 1);
17618 if (operands[3] != const1_rtx
17619 && operands[3] != constm1_rtx)
17621 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17623 code = GET_CODE (compare_op);
17625 flags = XEXP (compare_op, 0);
17627 if (GET_MODE (flags) == CCFPmode
17628 || GET_MODE (flags) == CCFPUmode)
17631 code = ix86_fp_compare_code_to_integer (code);
17638 PUT_CODE (compare_op,
17639 reverse_condition_maybe_unordered
17640 (GET_CODE (compare_op)));
17642 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17645 mode = GET_MODE (operands[0]);
17647 /* Construct either adc or sbb insn. */
17648 if ((code == LTU) == (operands[3] == constm1_rtx))
17653 insn = gen_subqi3_carry;
17656 insn = gen_subhi3_carry;
17659 insn = gen_subsi3_carry;
17662 insn = gen_subdi3_carry;
17665 gcc_unreachable ();
17673 insn = gen_addqi3_carry;
17676 insn = gen_addhi3_carry;
17679 insn = gen_addsi3_carry;
17682 insn = gen_adddi3_carry;
17685 gcc_unreachable ();
17688 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17694 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
17695 but works for floating pointer parameters and nonoffsetable memories.
17696 For pushes, it returns just stack offsets; the values will be saved
17697 in the right order. Maximally three parts are generated. */
17700 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17705 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17707 size = (GET_MODE_SIZE (mode) + 4) / 8;
17709 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17710 gcc_assert (size >= 2 && size <= 4);
17712 /* Optimize constant pool reference to immediates. This is used by fp
17713 moves, that force all constants to memory to allow combining. */
17714 if (MEM_P (operand) && MEM_READONLY_P (operand))
17716 rtx tmp = maybe_get_pool_constant (operand);
17721 if (MEM_P (operand) && !offsettable_memref_p (operand))
17723 /* The only non-offsetable memories we handle are pushes. */
17724 int ok = push_operand (operand, VOIDmode);
17728 operand = copy_rtx (operand);
17729 PUT_MODE (operand, Pmode);
17730 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17734 if (GET_CODE (operand) == CONST_VECTOR)
17736 enum machine_mode imode = int_mode_for_mode (mode);
17737 /* Caution: if we looked through a constant pool memory above,
17738 the operand may actually have a different mode now. That's
17739 ok, since we want to pun this all the way back to an integer. */
17740 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17741 gcc_assert (operand != NULL);
17747 if (mode == DImode)
17748 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
17753 if (REG_P (operand))
17755 gcc_assert (reload_completed);
17756 for (i = 0; i < size; i++)
17757 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17759 else if (offsettable_memref_p (operand))
17761 operand = adjust_address (operand, SImode, 0);
17762 parts[0] = operand;
17763 for (i = 1; i < size; i++)
17764 parts[i] = adjust_address (operand, SImode, 4 * i);
17766 else if (GET_CODE (operand) == CONST_DOUBLE)
17771 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17775 real_to_target (l, &r, mode);
17776 parts[3] = gen_int_mode (l[3], SImode);
17777 parts[2] = gen_int_mode (l[2], SImode);
17780 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17781 parts[2] = gen_int_mode (l[2], SImode);
17784 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17787 gcc_unreachable ();
17789 parts[1] = gen_int_mode (l[1], SImode);
17790 parts[0] = gen_int_mode (l[0], SImode);
17793 gcc_unreachable ();
17798 if (mode == TImode)
17799 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
17800 if (mode == XFmode || mode == TFmode)
17802 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17803 if (REG_P (operand))
17805 gcc_assert (reload_completed);
17806 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17807 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17809 else if (offsettable_memref_p (operand))
17811 operand = adjust_address (operand, DImode, 0);
17812 parts[0] = operand;
17813 parts[1] = adjust_address (operand, upper_mode, 8);
17815 else if (GET_CODE (operand) == CONST_DOUBLE)
17820 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17821 real_to_target (l, &r, mode);
17823 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17824 if (HOST_BITS_PER_WIDE_INT >= 64)
17827 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17828 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17831 parts[0] = immed_double_const (l[0], l[1], DImode);
17833 if (upper_mode == SImode)
17834 parts[1] = gen_int_mode (l[2], SImode);
17835 else if (HOST_BITS_PER_WIDE_INT >= 64)
17838 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17839 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17842 parts[1] = immed_double_const (l[2], l[3], DImode);
17845 gcc_unreachable ();
17852 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17853 Return false when normal moves are needed; true when all required
17854 insns have been emitted. Operands 2-4 contain the input values
17855 int the correct order; operands 5-7 contain the output values. */
17858 ix86_split_long_move (rtx operands[])
17863 int collisions = 0;
17864 enum machine_mode mode = GET_MODE (operands[0]);
17865 bool collisionparts[4];
17867 /* The DFmode expanders may ask us to move double.
17868 For 64bit target this is single move. By hiding the fact
17869 here we simplify i386.md splitters. */
17870 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
17872 /* Optimize constant pool reference to immediates. This is used by
17873 fp moves, that force all constants to memory to allow combining. */
17875 if (MEM_P (operands[1])
17876 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17877 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17878 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17879 if (push_operand (operands[0], VOIDmode))
17881 operands[0] = copy_rtx (operands[0]);
17882 PUT_MODE (operands[0], Pmode);
17885 operands[0] = gen_lowpart (DImode, operands[0]);
17886 operands[1] = gen_lowpart (DImode, operands[1]);
17887 emit_move_insn (operands[0], operands[1]);
17891 /* The only non-offsettable memory we handle is push. */
17892 if (push_operand (operands[0], VOIDmode))
17895 gcc_assert (!MEM_P (operands[0])
17896 || offsettable_memref_p (operands[0]));
17898 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17899 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17901 /* When emitting push, take care for source operands on the stack. */
17902 if (push && MEM_P (operands[1])
17903 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17905 rtx src_base = XEXP (part[1][nparts - 1], 0);
17907 /* Compensate for the stack decrement by 4. */
17908 if (!TARGET_64BIT && nparts == 3
17909 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17910 src_base = plus_constant (src_base, 4);
17912 /* src_base refers to the stack pointer and is
17913 automatically decreased by emitted push. */
17914 for (i = 0; i < nparts; i++)
17915 part[1][i] = change_address (part[1][i],
17916 GET_MODE (part[1][i]), src_base);
17919 /* We need to do copy in the right order in case an address register
17920 of the source overlaps the destination. */
17921 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17925 for (i = 0; i < nparts; i++)
17928 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17929 if (collisionparts[i])
17933 /* Collision in the middle part can be handled by reordering. */
17934 if (collisions == 1 && nparts == 3 && collisionparts [1])
17936 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17937 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17939 else if (collisions == 1
17941 && (collisionparts [1] || collisionparts [2]))
17943 if (collisionparts [1])
17945 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17946 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17950 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17951 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17955 /* If there are more collisions, we can't handle it by reordering.
17956 Do an lea to the last part and use only one colliding move. */
17957 else if (collisions > 1)
17963 base = part[0][nparts - 1];
17965 /* Handle the case when the last part isn't valid for lea.
17966 Happens in 64-bit mode storing the 12-byte XFmode. */
17967 if (GET_MODE (base) != Pmode)
17968 base = gen_rtx_REG (Pmode, REGNO (base));
17970 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17971 part[1][0] = replace_equiv_address (part[1][0], base);
17972 for (i = 1; i < nparts; i++)
17974 tmp = plus_constant (base, UNITS_PER_WORD * i);
17975 part[1][i] = replace_equiv_address (part[1][i], tmp);
17986 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17987 emit_insn (gen_addsi3 (stack_pointer_rtx,
17988 stack_pointer_rtx, GEN_INT (-4)));
17989 emit_move_insn (part[0][2], part[1][2]);
17991 else if (nparts == 4)
17993 emit_move_insn (part[0][3], part[1][3]);
17994 emit_move_insn (part[0][2], part[1][2]);
17999 /* In 64bit mode we don't have 32bit push available. In case this is
18000 register, it is OK - we will just use larger counterpart. We also
18001 retype memory - these comes from attempt to avoid REX prefix on
18002 moving of second half of TFmode value. */
18003 if (GET_MODE (part[1][1]) == SImode)
18005 switch (GET_CODE (part[1][1]))
18008 part[1][1] = adjust_address (part[1][1], DImode, 0);
18012 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18016 gcc_unreachable ();
18019 if (GET_MODE (part[1][0]) == SImode)
18020 part[1][0] = part[1][1];
18023 emit_move_insn (part[0][1], part[1][1]);
18024 emit_move_insn (part[0][0], part[1][0]);
18028 /* Choose correct order to not overwrite the source before it is copied. */
18029 if ((REG_P (part[0][0])
18030 && REG_P (part[1][1])
18031 && (REGNO (part[0][0]) == REGNO (part[1][1])
18033 && REGNO (part[0][0]) == REGNO (part[1][2]))
18035 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18037 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18039 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18041 operands[2 + i] = part[0][j];
18042 operands[6 + i] = part[1][j];
18047 for (i = 0; i < nparts; i++)
18049 operands[2 + i] = part[0][i];
18050 operands[6 + i] = part[1][i];
18054 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18055 if (optimize_insn_for_size_p ())
18057 for (j = 0; j < nparts - 1; j++)
18058 if (CONST_INT_P (operands[6 + j])
18059 && operands[6 + j] != const0_rtx
18060 && REG_P (operands[2 + j]))
18061 for (i = j; i < nparts - 1; i++)
18062 if (CONST_INT_P (operands[7 + i])
18063 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18064 operands[7 + i] = operands[2 + j];
18067 for (i = 0; i < nparts; i++)
18068 emit_move_insn (operands[2 + i], operands[6 + i]);
18073 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18074 left shift by a constant, either using a single shift or
18075 a sequence of add instructions. */
18078 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18080 rtx (*insn)(rtx, rtx, rtx);
18083 || (count * ix86_cost->add <= ix86_cost->shift_const
18084 && !optimize_insn_for_size_p ()))
18086 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18087 while (count-- > 0)
18088 emit_insn (insn (operand, operand, operand));
18092 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18093 emit_insn (insn (operand, operand, GEN_INT (count)));
18098 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18100 rtx (*gen_ashl3)(rtx, rtx, rtx);
18101 rtx (*gen_shld)(rtx, rtx, rtx);
18102 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18104 rtx low[2], high[2];
18107 if (CONST_INT_P (operands[2]))
18109 split_double_mode (mode, operands, 2, low, high);
18110 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18112 if (count >= half_width)
18114 emit_move_insn (high[0], low[1]);
18115 emit_move_insn (low[0], const0_rtx);
18117 if (count > half_width)
18118 ix86_expand_ashl_const (high[0], count - half_width, mode);
18122 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18124 if (!rtx_equal_p (operands[0], operands[1]))
18125 emit_move_insn (operands[0], operands[1]);
18127 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18128 ix86_expand_ashl_const (low[0], count, mode);
18133 split_double_mode (mode, operands, 1, low, high);
18135 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18137 if (operands[1] == const1_rtx)
18139 /* Assuming we've chosen a QImode capable registers, then 1 << N
18140 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18141 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18143 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18145 ix86_expand_clear (low[0]);
18146 ix86_expand_clear (high[0]);
18147 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18149 d = gen_lowpart (QImode, low[0]);
18150 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18151 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18152 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18154 d = gen_lowpart (QImode, high[0]);
18155 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18156 s = gen_rtx_NE (QImode, flags, const0_rtx);
18157 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18160 /* Otherwise, we can get the same results by manually performing
18161 a bit extract operation on bit 5/6, and then performing the two
18162 shifts. The two methods of getting 0/1 into low/high are exactly
18163 the same size. Avoiding the shift in the bit extract case helps
18164 pentium4 a bit; no one else seems to care much either way. */
18167 enum machine_mode half_mode;
18168 rtx (*gen_lshr3)(rtx, rtx, rtx);
18169 rtx (*gen_and3)(rtx, rtx, rtx);
18170 rtx (*gen_xor3)(rtx, rtx, rtx);
18171 HOST_WIDE_INT bits;
18174 if (mode == DImode)
18176 half_mode = SImode;
18177 gen_lshr3 = gen_lshrsi3;
18178 gen_and3 = gen_andsi3;
18179 gen_xor3 = gen_xorsi3;
18184 half_mode = DImode;
18185 gen_lshr3 = gen_lshrdi3;
18186 gen_and3 = gen_anddi3;
18187 gen_xor3 = gen_xordi3;
18191 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18192 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18194 x = gen_lowpart (half_mode, operands[2]);
18195 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18197 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18198 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18199 emit_move_insn (low[0], high[0]);
18200 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18203 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18204 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18208 if (operands[1] == constm1_rtx)
18210 /* For -1 << N, we can avoid the shld instruction, because we
18211 know that we're shifting 0...31/63 ones into a -1. */
18212 emit_move_insn (low[0], constm1_rtx);
18213 if (optimize_insn_for_size_p ())
18214 emit_move_insn (high[0], low[0]);
18216 emit_move_insn (high[0], constm1_rtx);
18220 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18222 if (!rtx_equal_p (operands[0], operands[1]))
18223 emit_move_insn (operands[0], operands[1]);
18225 split_double_mode (mode, operands, 1, low, high);
18226 emit_insn (gen_shld (high[0], low[0], operands[2]));
18229 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18231 if (TARGET_CMOVE && scratch)
18233 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18234 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18236 ix86_expand_clear (scratch);
18237 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18241 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18242 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18244 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18249 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18251 rtx (*gen_ashr3)(rtx, rtx, rtx)
18252 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18253 rtx (*gen_shrd)(rtx, rtx, rtx);
18254 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18256 rtx low[2], high[2];
18259 if (CONST_INT_P (operands[2]))
18261 split_double_mode (mode, operands, 2, low, high);
18262 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18264 if (count == GET_MODE_BITSIZE (mode) - 1)
18266 emit_move_insn (high[0], high[1]);
18267 emit_insn (gen_ashr3 (high[0], high[0],
18268 GEN_INT (half_width - 1)));
18269 emit_move_insn (low[0], high[0]);
18272 else if (count >= half_width)
18274 emit_move_insn (low[0], high[1]);
18275 emit_move_insn (high[0], low[0]);
18276 emit_insn (gen_ashr3 (high[0], high[0],
18277 GEN_INT (half_width - 1)));
18279 if (count > half_width)
18280 emit_insn (gen_ashr3 (low[0], low[0],
18281 GEN_INT (count - half_width)));
18285 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18287 if (!rtx_equal_p (operands[0], operands[1]))
18288 emit_move_insn (operands[0], operands[1]);
18290 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18291 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18296 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18298 if (!rtx_equal_p (operands[0], operands[1]))
18299 emit_move_insn (operands[0], operands[1]);
18301 split_double_mode (mode, operands, 1, low, high);
18303 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18304 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18306 if (TARGET_CMOVE && scratch)
18308 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18309 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18311 emit_move_insn (scratch, high[0]);
18312 emit_insn (gen_ashr3 (scratch, scratch,
18313 GEN_INT (half_width - 1)));
18314 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18319 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18320 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18322 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18328 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18330 rtx (*gen_lshr3)(rtx, rtx, rtx)
18331 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18332 rtx (*gen_shrd)(rtx, rtx, rtx);
18333 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18335 rtx low[2], high[2];
18338 if (CONST_INT_P (operands[2]))
18340 split_double_mode (mode, operands, 2, low, high);
18341 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18343 if (count >= half_width)
18345 emit_move_insn (low[0], high[1]);
18346 ix86_expand_clear (high[0]);
18348 if (count > half_width)
18349 emit_insn (gen_lshr3 (low[0], low[0],
18350 GEN_INT (count - half_width)));
18354 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18356 if (!rtx_equal_p (operands[0], operands[1]))
18357 emit_move_insn (operands[0], operands[1]);
18359 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18360 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18365 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18367 if (!rtx_equal_p (operands[0], operands[1]))
18368 emit_move_insn (operands[0], operands[1]);
18370 split_double_mode (mode, operands, 1, low, high);
18372 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18373 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18375 if (TARGET_CMOVE && scratch)
18377 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18378 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18380 ix86_expand_clear (scratch);
18381 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18386 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18387 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18389 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18394 /* Predict just emitted jump instruction to be taken with probability PROB. */
18396 predict_jump (int prob)
18398 rtx insn = get_last_insn ();
18399 gcc_assert (JUMP_P (insn));
18400 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18403 /* Helper function for the string operations below. Dest VARIABLE whether
18404 it is aligned to VALUE bytes. If true, jump to the label. */
18406 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18408 rtx label = gen_label_rtx ();
18409 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18410 if (GET_MODE (variable) == DImode)
18411 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18413 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18414 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18417 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18419 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18423 /* Adjust COUNTER by the VALUE. */
18425 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18427 rtx (*gen_add)(rtx, rtx, rtx)
18428 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18430 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18433 /* Zero extend possibly SImode EXP to Pmode register. */
18435 ix86_zero_extend_to_Pmode (rtx exp)
18438 if (GET_MODE (exp) == VOIDmode)
18439 return force_reg (Pmode, exp);
18440 if (GET_MODE (exp) == Pmode)
18441 return copy_to_mode_reg (Pmode, exp);
18442 r = gen_reg_rtx (Pmode);
18443 emit_insn (gen_zero_extendsidi2 (r, exp));
18447 /* Divide COUNTREG by SCALE. */
18449 scale_counter (rtx countreg, int scale)
18455 if (CONST_INT_P (countreg))
18456 return GEN_INT (INTVAL (countreg) / scale);
18457 gcc_assert (REG_P (countreg));
18459 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18460 GEN_INT (exact_log2 (scale)),
18461 NULL, 1, OPTAB_DIRECT);
18465 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18466 DImode for constant loop counts. */
18468 static enum machine_mode
18469 counter_mode (rtx count_exp)
18471 if (GET_MODE (count_exp) != VOIDmode)
18472 return GET_MODE (count_exp);
18473 if (!CONST_INT_P (count_exp))
18475 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18480 /* When SRCPTR is non-NULL, output simple loop to move memory
18481 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18482 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18483 equivalent loop to set memory by VALUE (supposed to be in MODE).
18485 The size is rounded down to whole number of chunk size moved at once.
18486 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18490 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18491 rtx destptr, rtx srcptr, rtx value,
18492 rtx count, enum machine_mode mode, int unroll,
18495 rtx out_label, top_label, iter, tmp;
18496 enum machine_mode iter_mode = counter_mode (count);
18497 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18498 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18504 top_label = gen_label_rtx ();
18505 out_label = gen_label_rtx ();
18506 iter = gen_reg_rtx (iter_mode);
18508 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18509 NULL, 1, OPTAB_DIRECT);
18510 /* Those two should combine. */
18511 if (piece_size == const1_rtx)
18513 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18515 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18517 emit_move_insn (iter, const0_rtx);
18519 emit_label (top_label);
18521 tmp = convert_modes (Pmode, iter_mode, iter, true);
18522 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18523 destmem = change_address (destmem, mode, x_addr);
18527 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18528 srcmem = change_address (srcmem, mode, y_addr);
18530 /* When unrolling for chips that reorder memory reads and writes,
18531 we can save registers by using single temporary.
18532 Also using 4 temporaries is overkill in 32bit mode. */
18533 if (!TARGET_64BIT && 0)
18535 for (i = 0; i < unroll; i++)
18540 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18542 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18544 emit_move_insn (destmem, srcmem);
18550 gcc_assert (unroll <= 4);
18551 for (i = 0; i < unroll; i++)
18553 tmpreg[i] = gen_reg_rtx (mode);
18557 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18559 emit_move_insn (tmpreg[i], srcmem);
18561 for (i = 0; i < unroll; i++)
18566 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18568 emit_move_insn (destmem, tmpreg[i]);
18573 for (i = 0; i < unroll; i++)
18577 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18578 emit_move_insn (destmem, value);
18581 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18582 true, OPTAB_LIB_WIDEN);
18584 emit_move_insn (iter, tmp);
18586 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18588 if (expected_size != -1)
18590 expected_size /= GET_MODE_SIZE (mode) * unroll;
18591 if (expected_size == 0)
18593 else if (expected_size > REG_BR_PROB_BASE)
18594 predict_jump (REG_BR_PROB_BASE - 1);
18596 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18599 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18600 iter = ix86_zero_extend_to_Pmode (iter);
18601 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18602 true, OPTAB_LIB_WIDEN);
18603 if (tmp != destptr)
18604 emit_move_insn (destptr, tmp);
18607 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18608 true, OPTAB_LIB_WIDEN);
18610 emit_move_insn (srcptr, tmp);
18612 emit_label (out_label);
18615 /* Output "rep; mov" instruction.
18616 Arguments have same meaning as for previous function */
18618 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18619 rtx destptr, rtx srcptr,
18621 enum machine_mode mode)
18627 /* If the size is known, it is shorter to use rep movs. */
18628 if (mode == QImode && CONST_INT_P (count)
18629 && !(INTVAL (count) & 3))
18632 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18633 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18634 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18635 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18636 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18637 if (mode != QImode)
18639 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18640 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18641 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18642 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18643 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18644 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18648 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18649 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18651 if (CONST_INT_P (count))
18653 count = GEN_INT (INTVAL (count)
18654 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18655 destmem = shallow_copy_rtx (destmem);
18656 srcmem = shallow_copy_rtx (srcmem);
18657 set_mem_size (destmem, count);
18658 set_mem_size (srcmem, count);
18662 if (MEM_SIZE (destmem))
18663 set_mem_size (destmem, NULL_RTX);
18664 if (MEM_SIZE (srcmem))
18665 set_mem_size (srcmem, NULL_RTX);
18667 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18671 /* Output "rep; stos" instruction.
18672 Arguments have same meaning as for previous function */
18674 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18675 rtx count, enum machine_mode mode,
18681 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18682 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18683 value = force_reg (mode, gen_lowpart (mode, value));
18684 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18685 if (mode != QImode)
18687 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18688 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18689 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18692 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18693 if (orig_value == const0_rtx && CONST_INT_P (count))
18695 count = GEN_INT (INTVAL (count)
18696 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18697 destmem = shallow_copy_rtx (destmem);
18698 set_mem_size (destmem, count);
18700 else if (MEM_SIZE (destmem))
18701 set_mem_size (destmem, NULL_RTX);
18702 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18706 emit_strmov (rtx destmem, rtx srcmem,
18707 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18709 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18710 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18711 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18714 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18716 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18717 rtx destptr, rtx srcptr, rtx count, int max_size)
18720 if (CONST_INT_P (count))
18722 HOST_WIDE_INT countval = INTVAL (count);
18725 if ((countval & 0x10) && max_size > 16)
18729 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18730 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18733 gcc_unreachable ();
18736 if ((countval & 0x08) && max_size > 8)
18739 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18742 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18743 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18747 if ((countval & 0x04) && max_size > 4)
18749 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18752 if ((countval & 0x02) && max_size > 2)
18754 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18757 if ((countval & 0x01) && max_size > 1)
18759 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18766 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18767 count, 1, OPTAB_DIRECT);
18768 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18769 count, QImode, 1, 4);
18773 /* When there are stringops, we can cheaply increase dest and src pointers.
18774 Otherwise we save code size by maintaining offset (zero is readily
18775 available from preceding rep operation) and using x86 addressing modes.
18777 if (TARGET_SINGLE_STRINGOP)
18781 rtx label = ix86_expand_aligntest (count, 4, true);
18782 src = change_address (srcmem, SImode, srcptr);
18783 dest = change_address (destmem, SImode, destptr);
18784 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18785 emit_label (label);
18786 LABEL_NUSES (label) = 1;
18790 rtx label = ix86_expand_aligntest (count, 2, true);
18791 src = change_address (srcmem, HImode, srcptr);
18792 dest = change_address (destmem, HImode, destptr);
18793 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18794 emit_label (label);
18795 LABEL_NUSES (label) = 1;
18799 rtx label = ix86_expand_aligntest (count, 1, true);
18800 src = change_address (srcmem, QImode, srcptr);
18801 dest = change_address (destmem, QImode, destptr);
18802 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18803 emit_label (label);
18804 LABEL_NUSES (label) = 1;
18809 rtx offset = force_reg (Pmode, const0_rtx);
18814 rtx label = ix86_expand_aligntest (count, 4, true);
18815 src = change_address (srcmem, SImode, srcptr);
18816 dest = change_address (destmem, SImode, destptr);
18817 emit_move_insn (dest, src);
18818 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18819 true, OPTAB_LIB_WIDEN);
18821 emit_move_insn (offset, tmp);
18822 emit_label (label);
18823 LABEL_NUSES (label) = 1;
18827 rtx label = ix86_expand_aligntest (count, 2, true);
18828 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18829 src = change_address (srcmem, HImode, tmp);
18830 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18831 dest = change_address (destmem, HImode, tmp);
18832 emit_move_insn (dest, src);
18833 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18834 true, OPTAB_LIB_WIDEN);
18836 emit_move_insn (offset, tmp);
18837 emit_label (label);
18838 LABEL_NUSES (label) = 1;
18842 rtx label = ix86_expand_aligntest (count, 1, true);
18843 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18844 src = change_address (srcmem, QImode, tmp);
18845 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18846 dest = change_address (destmem, QImode, tmp);
18847 emit_move_insn (dest, src);
18848 emit_label (label);
18849 LABEL_NUSES (label) = 1;
18854 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18856 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18857 rtx count, int max_size)
18860 expand_simple_binop (counter_mode (count), AND, count,
18861 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18862 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18863 gen_lowpart (QImode, value), count, QImode,
18867 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18869 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18873 if (CONST_INT_P (count))
18875 HOST_WIDE_INT countval = INTVAL (count);
18878 if ((countval & 0x10) && max_size > 16)
18882 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18883 emit_insn (gen_strset (destptr, dest, value));
18884 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18885 emit_insn (gen_strset (destptr, dest, value));
18888 gcc_unreachable ();
18891 if ((countval & 0x08) && max_size > 8)
18895 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18896 emit_insn (gen_strset (destptr, dest, value));
18900 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18901 emit_insn (gen_strset (destptr, dest, value));
18902 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18903 emit_insn (gen_strset (destptr, dest, value));
18907 if ((countval & 0x04) && max_size > 4)
18909 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18910 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18913 if ((countval & 0x02) && max_size > 2)
18915 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18916 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18919 if ((countval & 0x01) && max_size > 1)
18921 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18922 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18929 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18934 rtx label = ix86_expand_aligntest (count, 16, true);
18937 dest = change_address (destmem, DImode, destptr);
18938 emit_insn (gen_strset (destptr, dest, value));
18939 emit_insn (gen_strset (destptr, dest, value));
18943 dest = change_address (destmem, SImode, destptr);
18944 emit_insn (gen_strset (destptr, dest, value));
18945 emit_insn (gen_strset (destptr, dest, value));
18946 emit_insn (gen_strset (destptr, dest, value));
18947 emit_insn (gen_strset (destptr, dest, value));
18949 emit_label (label);
18950 LABEL_NUSES (label) = 1;
18954 rtx label = ix86_expand_aligntest (count, 8, true);
18957 dest = change_address (destmem, DImode, destptr);
18958 emit_insn (gen_strset (destptr, dest, value));
18962 dest = change_address (destmem, SImode, destptr);
18963 emit_insn (gen_strset (destptr, dest, value));
18964 emit_insn (gen_strset (destptr, dest, value));
18966 emit_label (label);
18967 LABEL_NUSES (label) = 1;
18971 rtx label = ix86_expand_aligntest (count, 4, true);
18972 dest = change_address (destmem, SImode, destptr);
18973 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18974 emit_label (label);
18975 LABEL_NUSES (label) = 1;
18979 rtx label = ix86_expand_aligntest (count, 2, true);
18980 dest = change_address (destmem, HImode, destptr);
18981 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18982 emit_label (label);
18983 LABEL_NUSES (label) = 1;
18987 rtx label = ix86_expand_aligntest (count, 1, true);
18988 dest = change_address (destmem, QImode, destptr);
18989 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18990 emit_label (label);
18991 LABEL_NUSES (label) = 1;
18995 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18996 DESIRED_ALIGNMENT. */
18998 expand_movmem_prologue (rtx destmem, rtx srcmem,
18999 rtx destptr, rtx srcptr, rtx count,
19000 int align, int desired_alignment)
19002 if (align <= 1 && desired_alignment > 1)
19004 rtx label = ix86_expand_aligntest (destptr, 1, false);
19005 srcmem = change_address (srcmem, QImode, srcptr);
19006 destmem = change_address (destmem, QImode, destptr);
19007 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19008 ix86_adjust_counter (count, 1);
19009 emit_label (label);
19010 LABEL_NUSES (label) = 1;
19012 if (align <= 2 && desired_alignment > 2)
19014 rtx label = ix86_expand_aligntest (destptr, 2, false);
19015 srcmem = change_address (srcmem, HImode, srcptr);
19016 destmem = change_address (destmem, HImode, destptr);
19017 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19018 ix86_adjust_counter (count, 2);
19019 emit_label (label);
19020 LABEL_NUSES (label) = 1;
19022 if (align <= 4 && desired_alignment > 4)
19024 rtx label = ix86_expand_aligntest (destptr, 4, false);
19025 srcmem = change_address (srcmem, SImode, srcptr);
19026 destmem = change_address (destmem, SImode, destptr);
19027 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19028 ix86_adjust_counter (count, 4);
19029 emit_label (label);
19030 LABEL_NUSES (label) = 1;
19032 gcc_assert (desired_alignment <= 8);
19035 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19036 ALIGN_BYTES is how many bytes need to be copied. */
19038 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19039 int desired_align, int align_bytes)
19042 rtx src_size, dst_size;
19044 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19045 if (src_align_bytes >= 0)
19046 src_align_bytes = desired_align - src_align_bytes;
19047 src_size = MEM_SIZE (src);
19048 dst_size = MEM_SIZE (dst);
19049 if (align_bytes & 1)
19051 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19052 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19054 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19056 if (align_bytes & 2)
19058 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19059 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19060 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19061 set_mem_align (dst, 2 * BITS_PER_UNIT);
19062 if (src_align_bytes >= 0
19063 && (src_align_bytes & 1) == (align_bytes & 1)
19064 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19065 set_mem_align (src, 2 * BITS_PER_UNIT);
19067 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19069 if (align_bytes & 4)
19071 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19072 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19073 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19074 set_mem_align (dst, 4 * BITS_PER_UNIT);
19075 if (src_align_bytes >= 0)
19077 unsigned int src_align = 0;
19078 if ((src_align_bytes & 3) == (align_bytes & 3))
19080 else if ((src_align_bytes & 1) == (align_bytes & 1))
19082 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19083 set_mem_align (src, src_align * BITS_PER_UNIT);
19086 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19088 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19089 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19090 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19091 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19092 if (src_align_bytes >= 0)
19094 unsigned int src_align = 0;
19095 if ((src_align_bytes & 7) == (align_bytes & 7))
19097 else if ((src_align_bytes & 3) == (align_bytes & 3))
19099 else if ((src_align_bytes & 1) == (align_bytes & 1))
19101 if (src_align > (unsigned int) desired_align)
19102 src_align = desired_align;
19103 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19104 set_mem_align (src, src_align * BITS_PER_UNIT);
19107 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19109 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19114 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19115 DESIRED_ALIGNMENT. */
19117 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19118 int align, int desired_alignment)
19120 if (align <= 1 && desired_alignment > 1)
19122 rtx label = ix86_expand_aligntest (destptr, 1, false);
19123 destmem = change_address (destmem, QImode, destptr);
19124 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19125 ix86_adjust_counter (count, 1);
19126 emit_label (label);
19127 LABEL_NUSES (label) = 1;
19129 if (align <= 2 && desired_alignment > 2)
19131 rtx label = ix86_expand_aligntest (destptr, 2, false);
19132 destmem = change_address (destmem, HImode, destptr);
19133 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19134 ix86_adjust_counter (count, 2);
19135 emit_label (label);
19136 LABEL_NUSES (label) = 1;
19138 if (align <= 4 && desired_alignment > 4)
19140 rtx label = ix86_expand_aligntest (destptr, 4, false);
19141 destmem = change_address (destmem, SImode, destptr);
19142 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19143 ix86_adjust_counter (count, 4);
19144 emit_label (label);
19145 LABEL_NUSES (label) = 1;
19147 gcc_assert (desired_alignment <= 8);
19150 /* Set enough from DST to align DST known to by aligned by ALIGN to
19151 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19153 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19154 int desired_align, int align_bytes)
19157 rtx dst_size = MEM_SIZE (dst);
19158 if (align_bytes & 1)
19160 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19162 emit_insn (gen_strset (destreg, dst,
19163 gen_lowpart (QImode, value)));
19165 if (align_bytes & 2)
19167 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19168 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19169 set_mem_align (dst, 2 * BITS_PER_UNIT);
19171 emit_insn (gen_strset (destreg, dst,
19172 gen_lowpart (HImode, value)));
19174 if (align_bytes & 4)
19176 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19177 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19178 set_mem_align (dst, 4 * BITS_PER_UNIT);
19180 emit_insn (gen_strset (destreg, dst,
19181 gen_lowpart (SImode, value)));
19183 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19184 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19185 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19187 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19191 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19192 static enum stringop_alg
19193 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19194 int *dynamic_check)
19196 const struct stringop_algs * algs;
19197 bool optimize_for_speed;
19198 /* Algorithms using the rep prefix want at least edi and ecx;
19199 additionally, memset wants eax and memcpy wants esi. Don't
19200 consider such algorithms if the user has appropriated those
19201 registers for their own purposes. */
19202 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19204 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19206 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19207 || (alg != rep_prefix_1_byte \
19208 && alg != rep_prefix_4_byte \
19209 && alg != rep_prefix_8_byte))
19210 const struct processor_costs *cost;
19212 /* Even if the string operation call is cold, we still might spend a lot
19213 of time processing large blocks. */
19214 if (optimize_function_for_size_p (cfun)
19215 || (optimize_insn_for_size_p ()
19216 && expected_size != -1 && expected_size < 256))
19217 optimize_for_speed = false;
19219 optimize_for_speed = true;
19221 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19223 *dynamic_check = -1;
19225 algs = &cost->memset[TARGET_64BIT != 0];
19227 algs = &cost->memcpy[TARGET_64BIT != 0];
19228 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19229 return stringop_alg;
19230 /* rep; movq or rep; movl is the smallest variant. */
19231 else if (!optimize_for_speed)
19233 if (!count || (count & 3))
19234 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19236 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19238 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19240 else if (expected_size != -1 && expected_size < 4)
19241 return loop_1_byte;
19242 else if (expected_size != -1)
19245 enum stringop_alg alg = libcall;
19246 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19248 /* We get here if the algorithms that were not libcall-based
19249 were rep-prefix based and we are unable to use rep prefixes
19250 based on global register usage. Break out of the loop and
19251 use the heuristic below. */
19252 if (algs->size[i].max == 0)
19254 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19256 enum stringop_alg candidate = algs->size[i].alg;
19258 if (candidate != libcall && ALG_USABLE_P (candidate))
19260 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19261 last non-libcall inline algorithm. */
19262 if (TARGET_INLINE_ALL_STRINGOPS)
19264 /* When the current size is best to be copied by a libcall,
19265 but we are still forced to inline, run the heuristic below
19266 that will pick code for medium sized blocks. */
19267 if (alg != libcall)
19271 else if (ALG_USABLE_P (candidate))
19275 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19277 /* When asked to inline the call anyway, try to pick meaningful choice.
19278 We look for maximal size of block that is faster to copy by hand and
19279 take blocks of at most of that size guessing that average size will
19280 be roughly half of the block.
19282 If this turns out to be bad, we might simply specify the preferred
19283 choice in ix86_costs. */
19284 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19285 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19288 enum stringop_alg alg;
19290 bool any_alg_usable_p = true;
19292 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19294 enum stringop_alg candidate = algs->size[i].alg;
19295 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19297 if (candidate != libcall && candidate
19298 && ALG_USABLE_P (candidate))
19299 max = algs->size[i].max;
19301 /* If there aren't any usable algorithms, then recursing on
19302 smaller sizes isn't going to find anything. Just return the
19303 simple byte-at-a-time copy loop. */
19304 if (!any_alg_usable_p)
19306 /* Pick something reasonable. */
19307 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19308 *dynamic_check = 128;
19309 return loop_1_byte;
19313 alg = decide_alg (count, max / 2, memset, dynamic_check);
19314 gcc_assert (*dynamic_check == -1);
19315 gcc_assert (alg != libcall);
19316 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19317 *dynamic_check = max;
19320 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19321 #undef ALG_USABLE_P
19324 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19325 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19327 decide_alignment (int align,
19328 enum stringop_alg alg,
19331 int desired_align = 0;
19335 gcc_unreachable ();
19337 case unrolled_loop:
19338 desired_align = GET_MODE_SIZE (Pmode);
19340 case rep_prefix_8_byte:
19343 case rep_prefix_4_byte:
19344 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19345 copying whole cacheline at once. */
19346 if (TARGET_PENTIUMPRO)
19351 case rep_prefix_1_byte:
19352 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19353 copying whole cacheline at once. */
19354 if (TARGET_PENTIUMPRO)
19368 if (desired_align < align)
19369 desired_align = align;
19370 if (expected_size != -1 && expected_size < 4)
19371 desired_align = align;
19372 return desired_align;
19375 /* Return the smallest power of 2 greater than VAL. */
19377 smallest_pow2_greater_than (int val)
19385 /* Expand string move (memcpy) operation. Use i386 string operations when
19386 profitable. expand_setmem contains similar code. The code depends upon
19387 architecture, block size and alignment, but always has the same
19390 1) Prologue guard: Conditional that jumps up to epilogues for small
19391 blocks that can be handled by epilogue alone. This is faster but
19392 also needed for correctness, since prologue assume the block is larger
19393 than the desired alignment.
19395 Optional dynamic check for size and libcall for large
19396 blocks is emitted here too, with -minline-stringops-dynamically.
19398 2) Prologue: copy first few bytes in order to get destination aligned
19399 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19400 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19401 We emit either a jump tree on power of two sized blocks, or a byte loop.
19403 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19404 with specified algorithm.
19406 4) Epilogue: code copying tail of the block that is too small to be
19407 handled by main body (or up to size guarded by prologue guard). */
19410 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19411 rtx expected_align_exp, rtx expected_size_exp)
19417 rtx jump_around_label = NULL;
19418 HOST_WIDE_INT align = 1;
19419 unsigned HOST_WIDE_INT count = 0;
19420 HOST_WIDE_INT expected_size = -1;
19421 int size_needed = 0, epilogue_size_needed;
19422 int desired_align = 0, align_bytes = 0;
19423 enum stringop_alg alg;
19425 bool need_zero_guard = false;
19427 if (CONST_INT_P (align_exp))
19428 align = INTVAL (align_exp);
19429 /* i386 can do misaligned access on reasonably increased cost. */
19430 if (CONST_INT_P (expected_align_exp)
19431 && INTVAL (expected_align_exp) > align)
19432 align = INTVAL (expected_align_exp);
19433 /* ALIGN is the minimum of destination and source alignment, but we care here
19434 just about destination alignment. */
19435 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19436 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19438 if (CONST_INT_P (count_exp))
19439 count = expected_size = INTVAL (count_exp);
19440 if (CONST_INT_P (expected_size_exp) && count == 0)
19441 expected_size = INTVAL (expected_size_exp);
19443 /* Make sure we don't need to care about overflow later on. */
19444 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19447 /* Step 0: Decide on preferred algorithm, desired alignment and
19448 size of chunks to be copied by main loop. */
19450 alg = decide_alg (count, expected_size, false, &dynamic_check);
19451 desired_align = decide_alignment (align, alg, expected_size);
19453 if (!TARGET_ALIGN_STRINGOPS)
19454 align = desired_align;
19456 if (alg == libcall)
19458 gcc_assert (alg != no_stringop);
19460 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19461 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19462 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19467 gcc_unreachable ();
19469 need_zero_guard = true;
19470 size_needed = GET_MODE_SIZE (Pmode);
19472 case unrolled_loop:
19473 need_zero_guard = true;
19474 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19476 case rep_prefix_8_byte:
19479 case rep_prefix_4_byte:
19482 case rep_prefix_1_byte:
19486 need_zero_guard = true;
19491 epilogue_size_needed = size_needed;
19493 /* Step 1: Prologue guard. */
19495 /* Alignment code needs count to be in register. */
19496 if (CONST_INT_P (count_exp) && desired_align > align)
19498 if (INTVAL (count_exp) > desired_align
19499 && INTVAL (count_exp) > size_needed)
19502 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19503 if (align_bytes <= 0)
19506 align_bytes = desired_align - align_bytes;
19508 if (align_bytes == 0)
19509 count_exp = force_reg (counter_mode (count_exp), count_exp);
19511 gcc_assert (desired_align >= 1 && align >= 1);
19513 /* Ensure that alignment prologue won't copy past end of block. */
19514 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19516 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19517 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19518 Make sure it is power of 2. */
19519 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19523 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19525 /* If main algorithm works on QImode, no epilogue is needed.
19526 For small sizes just don't align anything. */
19527 if (size_needed == 1)
19528 desired_align = align;
19535 label = gen_label_rtx ();
19536 emit_cmp_and_jump_insns (count_exp,
19537 GEN_INT (epilogue_size_needed),
19538 LTU, 0, counter_mode (count_exp), 1, label);
19539 if (expected_size == -1 || expected_size < epilogue_size_needed)
19540 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19542 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19546 /* Emit code to decide on runtime whether library call or inline should be
19548 if (dynamic_check != -1)
19550 if (CONST_INT_P (count_exp))
19552 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19554 emit_block_move_via_libcall (dst, src, count_exp, false);
19555 count_exp = const0_rtx;
19561 rtx hot_label = gen_label_rtx ();
19562 jump_around_label = gen_label_rtx ();
19563 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19564 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19565 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19566 emit_block_move_via_libcall (dst, src, count_exp, false);
19567 emit_jump (jump_around_label);
19568 emit_label (hot_label);
19572 /* Step 2: Alignment prologue. */
19574 if (desired_align > align)
19576 if (align_bytes == 0)
19578 /* Except for the first move in epilogue, we no longer know
19579 constant offset in aliasing info. It don't seems to worth
19580 the pain to maintain it for the first move, so throw away
19582 src = change_address (src, BLKmode, srcreg);
19583 dst = change_address (dst, BLKmode, destreg);
19584 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19589 /* If we know how many bytes need to be stored before dst is
19590 sufficiently aligned, maintain aliasing info accurately. */
19591 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19592 desired_align, align_bytes);
19593 count_exp = plus_constant (count_exp, -align_bytes);
19594 count -= align_bytes;
19596 if (need_zero_guard
19597 && (count < (unsigned HOST_WIDE_INT) size_needed
19598 || (align_bytes == 0
19599 && count < ((unsigned HOST_WIDE_INT) size_needed
19600 + desired_align - align))))
19602 /* It is possible that we copied enough so the main loop will not
19604 gcc_assert (size_needed > 1);
19605 if (label == NULL_RTX)
19606 label = gen_label_rtx ();
19607 emit_cmp_and_jump_insns (count_exp,
19608 GEN_INT (size_needed),
19609 LTU, 0, counter_mode (count_exp), 1, label);
19610 if (expected_size == -1
19611 || expected_size < (desired_align - align) / 2 + size_needed)
19612 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19614 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19617 if (label && size_needed == 1)
19619 emit_label (label);
19620 LABEL_NUSES (label) = 1;
19622 epilogue_size_needed = 1;
19624 else if (label == NULL_RTX)
19625 epilogue_size_needed = size_needed;
19627 /* Step 3: Main loop. */
19633 gcc_unreachable ();
19635 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19636 count_exp, QImode, 1, expected_size);
19639 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19640 count_exp, Pmode, 1, expected_size);
19642 case unrolled_loop:
19643 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19644 registers for 4 temporaries anyway. */
19645 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19646 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19649 case rep_prefix_8_byte:
19650 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19653 case rep_prefix_4_byte:
19654 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19657 case rep_prefix_1_byte:
19658 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19662 /* Adjust properly the offset of src and dest memory for aliasing. */
19663 if (CONST_INT_P (count_exp))
19665 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19666 (count / size_needed) * size_needed);
19667 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19668 (count / size_needed) * size_needed);
19672 src = change_address (src, BLKmode, srcreg);
19673 dst = change_address (dst, BLKmode, destreg);
19676 /* Step 4: Epilogue to copy the remaining bytes. */
19680 /* When the main loop is done, COUNT_EXP might hold original count,
19681 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19682 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19683 bytes. Compensate if needed. */
19685 if (size_needed < epilogue_size_needed)
19688 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19689 GEN_INT (size_needed - 1), count_exp, 1,
19691 if (tmp != count_exp)
19692 emit_move_insn (count_exp, tmp);
19694 emit_label (label);
19695 LABEL_NUSES (label) = 1;
19698 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19699 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19700 epilogue_size_needed);
19701 if (jump_around_label)
19702 emit_label (jump_around_label);
19706 /* Helper function for memcpy. For QImode value 0xXY produce
19707 0xXYXYXYXY of wide specified by MODE. This is essentially
19708 a * 0x10101010, but we can do slightly better than
19709 synth_mult by unwinding the sequence by hand on CPUs with
19712 promote_duplicated_reg (enum machine_mode mode, rtx val)
19714 enum machine_mode valmode = GET_MODE (val);
19716 int nops = mode == DImode ? 3 : 2;
19718 gcc_assert (mode == SImode || mode == DImode);
19719 if (val == const0_rtx)
19720 return copy_to_mode_reg (mode, const0_rtx);
19721 if (CONST_INT_P (val))
19723 HOST_WIDE_INT v = INTVAL (val) & 255;
19727 if (mode == DImode)
19728 v |= (v << 16) << 16;
19729 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19732 if (valmode == VOIDmode)
19734 if (valmode != QImode)
19735 val = gen_lowpart (QImode, val);
19736 if (mode == QImode)
19738 if (!TARGET_PARTIAL_REG_STALL)
19740 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19741 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19742 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19743 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19745 rtx reg = convert_modes (mode, QImode, val, true);
19746 tmp = promote_duplicated_reg (mode, const1_rtx);
19747 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19752 rtx reg = convert_modes (mode, QImode, val, true);
19754 if (!TARGET_PARTIAL_REG_STALL)
19755 if (mode == SImode)
19756 emit_insn (gen_movsi_insv_1 (reg, reg));
19758 emit_insn (gen_movdi_insv_1 (reg, reg));
19761 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19762 NULL, 1, OPTAB_DIRECT);
19764 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19766 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19767 NULL, 1, OPTAB_DIRECT);
19768 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19769 if (mode == SImode)
19771 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19772 NULL, 1, OPTAB_DIRECT);
19773 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19778 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19779 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19780 alignment from ALIGN to DESIRED_ALIGN. */
19782 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19787 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19788 promoted_val = promote_duplicated_reg (DImode, val);
19789 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19790 promoted_val = promote_duplicated_reg (SImode, val);
19791 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19792 promoted_val = promote_duplicated_reg (HImode, val);
19794 promoted_val = val;
19796 return promoted_val;
19799 /* Expand string clear operation (bzero). Use i386 string operations when
19800 profitable. See expand_movmem comment for explanation of individual
19801 steps performed. */
19803 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19804 rtx expected_align_exp, rtx expected_size_exp)
19809 rtx jump_around_label = NULL;
19810 HOST_WIDE_INT align = 1;
19811 unsigned HOST_WIDE_INT count = 0;
19812 HOST_WIDE_INT expected_size = -1;
19813 int size_needed = 0, epilogue_size_needed;
19814 int desired_align = 0, align_bytes = 0;
19815 enum stringop_alg alg;
19816 rtx promoted_val = NULL;
19817 bool force_loopy_epilogue = false;
19819 bool need_zero_guard = false;
19821 if (CONST_INT_P (align_exp))
19822 align = INTVAL (align_exp);
19823 /* i386 can do misaligned access on reasonably increased cost. */
19824 if (CONST_INT_P (expected_align_exp)
19825 && INTVAL (expected_align_exp) > align)
19826 align = INTVAL (expected_align_exp);
19827 if (CONST_INT_P (count_exp))
19828 count = expected_size = INTVAL (count_exp);
19829 if (CONST_INT_P (expected_size_exp) && count == 0)
19830 expected_size = INTVAL (expected_size_exp);
19832 /* Make sure we don't need to care about overflow later on. */
19833 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19836 /* Step 0: Decide on preferred algorithm, desired alignment and
19837 size of chunks to be copied by main loop. */
19839 alg = decide_alg (count, expected_size, true, &dynamic_check);
19840 desired_align = decide_alignment (align, alg, expected_size);
19842 if (!TARGET_ALIGN_STRINGOPS)
19843 align = desired_align;
19845 if (alg == libcall)
19847 gcc_assert (alg != no_stringop);
19849 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19850 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19855 gcc_unreachable ();
19857 need_zero_guard = true;
19858 size_needed = GET_MODE_SIZE (Pmode);
19860 case unrolled_loop:
19861 need_zero_guard = true;
19862 size_needed = GET_MODE_SIZE (Pmode) * 4;
19864 case rep_prefix_8_byte:
19867 case rep_prefix_4_byte:
19870 case rep_prefix_1_byte:
19874 need_zero_guard = true;
19878 epilogue_size_needed = size_needed;
19880 /* Step 1: Prologue guard. */
19882 /* Alignment code needs count to be in register. */
19883 if (CONST_INT_P (count_exp) && desired_align > align)
19885 if (INTVAL (count_exp) > desired_align
19886 && INTVAL (count_exp) > size_needed)
19889 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19890 if (align_bytes <= 0)
19893 align_bytes = desired_align - align_bytes;
19895 if (align_bytes == 0)
19897 enum machine_mode mode = SImode;
19898 if (TARGET_64BIT && (count & ~0xffffffff))
19900 count_exp = force_reg (mode, count_exp);
19903 /* Do the cheap promotion to allow better CSE across the
19904 main loop and epilogue (ie one load of the big constant in the
19905 front of all code. */
19906 if (CONST_INT_P (val_exp))
19907 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19908 desired_align, align);
19909 /* Ensure that alignment prologue won't copy past end of block. */
19910 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19912 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19913 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19914 Make sure it is power of 2. */
19915 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19917 /* To improve performance of small blocks, we jump around the VAL
19918 promoting mode. This mean that if the promoted VAL is not constant,
19919 we might not use it in the epilogue and have to use byte
19921 if (epilogue_size_needed > 2 && !promoted_val)
19922 force_loopy_epilogue = true;
19925 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19927 /* If main algorithm works on QImode, no epilogue is needed.
19928 For small sizes just don't align anything. */
19929 if (size_needed == 1)
19930 desired_align = align;
19937 label = gen_label_rtx ();
19938 emit_cmp_and_jump_insns (count_exp,
19939 GEN_INT (epilogue_size_needed),
19940 LTU, 0, counter_mode (count_exp), 1, label);
19941 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19942 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19944 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19947 if (dynamic_check != -1)
19949 rtx hot_label = gen_label_rtx ();
19950 jump_around_label = gen_label_rtx ();
19951 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19952 LEU, 0, counter_mode (count_exp), 1, hot_label);
19953 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19954 set_storage_via_libcall (dst, count_exp, val_exp, false);
19955 emit_jump (jump_around_label);
19956 emit_label (hot_label);
19959 /* Step 2: Alignment prologue. */
19961 /* Do the expensive promotion once we branched off the small blocks. */
19963 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19964 desired_align, align);
19965 gcc_assert (desired_align >= 1 && align >= 1);
19967 if (desired_align > align)
19969 if (align_bytes == 0)
19971 /* Except for the first move in epilogue, we no longer know
19972 constant offset in aliasing info. It don't seems to worth
19973 the pain to maintain it for the first move, so throw away
19975 dst = change_address (dst, BLKmode, destreg);
19976 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19981 /* If we know how many bytes need to be stored before dst is
19982 sufficiently aligned, maintain aliasing info accurately. */
19983 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19984 desired_align, align_bytes);
19985 count_exp = plus_constant (count_exp, -align_bytes);
19986 count -= align_bytes;
19988 if (need_zero_guard
19989 && (count < (unsigned HOST_WIDE_INT) size_needed
19990 || (align_bytes == 0
19991 && count < ((unsigned HOST_WIDE_INT) size_needed
19992 + desired_align - align))))
19994 /* It is possible that we copied enough so the main loop will not
19996 gcc_assert (size_needed > 1);
19997 if (label == NULL_RTX)
19998 label = gen_label_rtx ();
19999 emit_cmp_and_jump_insns (count_exp,
20000 GEN_INT (size_needed),
20001 LTU, 0, counter_mode (count_exp), 1, label);
20002 if (expected_size == -1
20003 || expected_size < (desired_align - align) / 2 + size_needed)
20004 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20006 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20009 if (label && size_needed == 1)
20011 emit_label (label);
20012 LABEL_NUSES (label) = 1;
20014 promoted_val = val_exp;
20015 epilogue_size_needed = 1;
20017 else if (label == NULL_RTX)
20018 epilogue_size_needed = size_needed;
20020 /* Step 3: Main loop. */
20026 gcc_unreachable ();
20028 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20029 count_exp, QImode, 1, expected_size);
20032 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20033 count_exp, Pmode, 1, expected_size);
20035 case unrolled_loop:
20036 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20037 count_exp, Pmode, 4, expected_size);
20039 case rep_prefix_8_byte:
20040 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20043 case rep_prefix_4_byte:
20044 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20047 case rep_prefix_1_byte:
20048 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20052 /* Adjust properly the offset of src and dest memory for aliasing. */
20053 if (CONST_INT_P (count_exp))
20054 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20055 (count / size_needed) * size_needed);
20057 dst = change_address (dst, BLKmode, destreg);
20059 /* Step 4: Epilogue to copy the remaining bytes. */
20063 /* When the main loop is done, COUNT_EXP might hold original count,
20064 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20065 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20066 bytes. Compensate if needed. */
20068 if (size_needed < epilogue_size_needed)
20071 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20072 GEN_INT (size_needed - 1), count_exp, 1,
20074 if (tmp != count_exp)
20075 emit_move_insn (count_exp, tmp);
20077 emit_label (label);
20078 LABEL_NUSES (label) = 1;
20081 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20083 if (force_loopy_epilogue)
20084 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20085 epilogue_size_needed);
20087 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20088 epilogue_size_needed);
20090 if (jump_around_label)
20091 emit_label (jump_around_label);
20095 /* Expand the appropriate insns for doing strlen if not just doing
20098 out = result, initialized with the start address
20099 align_rtx = alignment of the address.
20100 scratch = scratch register, initialized with the startaddress when
20101 not aligned, otherwise undefined
20103 This is just the body. It needs the initializations mentioned above and
20104 some address computing at the end. These things are done in i386.md. */
20107 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20111 rtx align_2_label = NULL_RTX;
20112 rtx align_3_label = NULL_RTX;
20113 rtx align_4_label = gen_label_rtx ();
20114 rtx end_0_label = gen_label_rtx ();
20116 rtx tmpreg = gen_reg_rtx (SImode);
20117 rtx scratch = gen_reg_rtx (SImode);
20121 if (CONST_INT_P (align_rtx))
20122 align = INTVAL (align_rtx);
20124 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20126 /* Is there a known alignment and is it less than 4? */
20129 rtx scratch1 = gen_reg_rtx (Pmode);
20130 emit_move_insn (scratch1, out);
20131 /* Is there a known alignment and is it not 2? */
20134 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20135 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20137 /* Leave just the 3 lower bits. */
20138 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20139 NULL_RTX, 0, OPTAB_WIDEN);
20141 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20142 Pmode, 1, align_4_label);
20143 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20144 Pmode, 1, align_2_label);
20145 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20146 Pmode, 1, align_3_label);
20150 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20151 check if is aligned to 4 - byte. */
20153 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20154 NULL_RTX, 0, OPTAB_WIDEN);
20156 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20157 Pmode, 1, align_4_label);
20160 mem = change_address (src, QImode, out);
20162 /* Now compare the bytes. */
20164 /* Compare the first n unaligned byte on a byte per byte basis. */
20165 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20166 QImode, 1, end_0_label);
20168 /* Increment the address. */
20169 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20171 /* Not needed with an alignment of 2 */
20174 emit_label (align_2_label);
20176 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20179 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20181 emit_label (align_3_label);
20184 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20187 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20190 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20191 align this loop. It gives only huge programs, but does not help to
20193 emit_label (align_4_label);
20195 mem = change_address (src, SImode, out);
20196 emit_move_insn (scratch, mem);
20197 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20199 /* This formula yields a nonzero result iff one of the bytes is zero.
20200 This saves three branches inside loop and many cycles. */
20202 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20203 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20204 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20205 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20206 gen_int_mode (0x80808080, SImode)));
20207 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20212 rtx reg = gen_reg_rtx (SImode);
20213 rtx reg2 = gen_reg_rtx (Pmode);
20214 emit_move_insn (reg, tmpreg);
20215 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20217 /* If zero is not in the first two bytes, move two bytes forward. */
20218 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20219 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20220 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20221 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20222 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20225 /* Emit lea manually to avoid clobbering of flags. */
20226 emit_insn (gen_rtx_SET (SImode, reg2,
20227 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20229 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20230 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20231 emit_insn (gen_rtx_SET (VOIDmode, out,
20232 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20238 rtx end_2_label = gen_label_rtx ();
20239 /* Is zero in the first two bytes? */
20241 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20242 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20243 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20244 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20245 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20247 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20248 JUMP_LABEL (tmp) = end_2_label;
20250 /* Not in the first two. Move two bytes forward. */
20251 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20252 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20254 emit_label (end_2_label);
20258 /* Avoid branch in fixing the byte. */
20259 tmpreg = gen_lowpart (QImode, tmpreg);
20260 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20261 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20262 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20263 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20265 emit_label (end_0_label);
20268 /* Expand strlen. */
20271 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20273 rtx addr, scratch1, scratch2, scratch3, scratch4;
20275 /* The generic case of strlen expander is long. Avoid it's
20276 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20278 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20279 && !TARGET_INLINE_ALL_STRINGOPS
20280 && !optimize_insn_for_size_p ()
20281 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20284 addr = force_reg (Pmode, XEXP (src, 0));
20285 scratch1 = gen_reg_rtx (Pmode);
20287 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20288 && !optimize_insn_for_size_p ())
20290 /* Well it seems that some optimizer does not combine a call like
20291 foo(strlen(bar), strlen(bar));
20292 when the move and the subtraction is done here. It does calculate
20293 the length just once when these instructions are done inside of
20294 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20295 often used and I use one fewer register for the lifetime of
20296 output_strlen_unroll() this is better. */
20298 emit_move_insn (out, addr);
20300 ix86_expand_strlensi_unroll_1 (out, src, align);
20302 /* strlensi_unroll_1 returns the address of the zero at the end of
20303 the string, like memchr(), so compute the length by subtracting
20304 the start address. */
20305 emit_insn (ix86_gen_sub3 (out, out, addr));
20311 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20312 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20315 scratch2 = gen_reg_rtx (Pmode);
20316 scratch3 = gen_reg_rtx (Pmode);
20317 scratch4 = force_reg (Pmode, constm1_rtx);
20319 emit_move_insn (scratch3, addr);
20320 eoschar = force_reg (QImode, eoschar);
20322 src = replace_equiv_address_nv (src, scratch3);
20324 /* If .md starts supporting :P, this can be done in .md. */
20325 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20326 scratch4), UNSPEC_SCAS);
20327 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20328 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20329 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20334 /* For given symbol (function) construct code to compute address of it's PLT
20335 entry in large x86-64 PIC model. */
20337 construct_plt_address (rtx symbol)
20339 rtx tmp = gen_reg_rtx (Pmode);
20340 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20342 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20343 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20345 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20346 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20351 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20353 rtx pop, int sibcall)
20355 rtx use = NULL, call;
20357 if (pop == const0_rtx)
20359 gcc_assert (!TARGET_64BIT || !pop);
20361 if (TARGET_MACHO && !TARGET_64BIT)
20364 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20365 fnaddr = machopic_indirect_call_target (fnaddr);
20370 /* Static functions and indirect calls don't need the pic register. */
20371 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20372 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20373 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20374 use_reg (&use, pic_offset_table_rtx);
20377 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20379 rtx al = gen_rtx_REG (QImode, AX_REG);
20380 emit_move_insn (al, callarg2);
20381 use_reg (&use, al);
20384 if (ix86_cmodel == CM_LARGE_PIC
20386 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20387 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20388 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20390 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20391 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20393 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20394 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20397 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20399 call = gen_rtx_SET (VOIDmode, retval, call);
20402 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20403 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20404 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20407 && ix86_cfun_abi () == MS_ABI
20408 && (!callarg2 || INTVAL (callarg2) != -2))
20410 /* We need to represent that SI and DI registers are clobbered
20412 static int clobbered_registers[] = {
20413 XMM6_REG, XMM7_REG, XMM8_REG,
20414 XMM9_REG, XMM10_REG, XMM11_REG,
20415 XMM12_REG, XMM13_REG, XMM14_REG,
20416 XMM15_REG, SI_REG, DI_REG
20419 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20420 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20421 UNSPEC_MS_TO_SYSV_CALL);
20425 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20426 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20429 (SSE_REGNO_P (clobbered_registers[i])
20431 clobbered_registers[i]));
20433 call = gen_rtx_PARALLEL (VOIDmode,
20434 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20438 call = emit_call_insn (call);
20440 CALL_INSN_FUNCTION_USAGE (call) = use;
20444 /* Clear stack slot assignments remembered from previous functions.
20445 This is called from INIT_EXPANDERS once before RTL is emitted for each
20448 static struct machine_function *
20449 ix86_init_machine_status (void)
20451 struct machine_function *f;
20453 f = ggc_alloc_cleared_machine_function ();
20454 f->use_fast_prologue_epilogue_nregs = -1;
20455 f->tls_descriptor_call_expanded_p = 0;
20456 f->call_abi = ix86_abi;
20461 /* Return a MEM corresponding to a stack slot with mode MODE.
20462 Allocate a new slot if necessary.
20464 The RTL for a function can have several slots available: N is
20465 which slot to use. */
20468 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20470 struct stack_local_entry *s;
20472 gcc_assert (n < MAX_386_STACK_LOCALS);
20474 /* Virtual slot is valid only before vregs are instantiated. */
20475 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20477 for (s = ix86_stack_locals; s; s = s->next)
20478 if (s->mode == mode && s->n == n)
20479 return copy_rtx (s->rtl);
20481 s = ggc_alloc_stack_local_entry ();
20484 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20486 s->next = ix86_stack_locals;
20487 ix86_stack_locals = s;
20491 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20493 static GTY(()) rtx ix86_tls_symbol;
20495 ix86_tls_get_addr (void)
20498 if (!ix86_tls_symbol)
20500 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20501 (TARGET_ANY_GNU_TLS
20503 ? "___tls_get_addr"
20504 : "__tls_get_addr");
20507 return ix86_tls_symbol;
20510 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20512 static GTY(()) rtx ix86_tls_module_base_symbol;
20514 ix86_tls_module_base (void)
20517 if (!ix86_tls_module_base_symbol)
20519 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20520 "_TLS_MODULE_BASE_");
20521 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20522 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20525 return ix86_tls_module_base_symbol;
20528 /* Calculate the length of the memory address in the instruction
20529 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20532 memory_address_length (rtx addr)
20534 struct ix86_address parts;
20535 rtx base, index, disp;
20539 if (GET_CODE (addr) == PRE_DEC
20540 || GET_CODE (addr) == POST_INC
20541 || GET_CODE (addr) == PRE_MODIFY
20542 || GET_CODE (addr) == POST_MODIFY)
20545 ok = ix86_decompose_address (addr, &parts);
20548 if (parts.base && GET_CODE (parts.base) == SUBREG)
20549 parts.base = SUBREG_REG (parts.base);
20550 if (parts.index && GET_CODE (parts.index) == SUBREG)
20551 parts.index = SUBREG_REG (parts.index);
20554 index = parts.index;
20559 - esp as the base always wants an index,
20560 - ebp as the base always wants a displacement,
20561 - r12 as the base always wants an index,
20562 - r13 as the base always wants a displacement. */
20564 /* Register Indirect. */
20565 if (base && !index && !disp)
20567 /* esp (for its index) and ebp (for its displacement) need
20568 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20571 && (addr == arg_pointer_rtx
20572 || addr == frame_pointer_rtx
20573 || REGNO (addr) == SP_REG
20574 || REGNO (addr) == BP_REG
20575 || REGNO (addr) == R12_REG
20576 || REGNO (addr) == R13_REG))
20580 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20581 is not disp32, but disp32(%rip), so for disp32
20582 SIB byte is needed, unless print_operand_address
20583 optimizes it into disp32(%rip) or (%rip) is implied
20585 else if (disp && !base && !index)
20592 if (GET_CODE (disp) == CONST)
20593 symbol = XEXP (disp, 0);
20594 if (GET_CODE (symbol) == PLUS
20595 && CONST_INT_P (XEXP (symbol, 1)))
20596 symbol = XEXP (symbol, 0);
20598 if (GET_CODE (symbol) != LABEL_REF
20599 && (GET_CODE (symbol) != SYMBOL_REF
20600 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20601 && (GET_CODE (symbol) != UNSPEC
20602 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20603 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20610 /* Find the length of the displacement constant. */
20613 if (base && satisfies_constraint_K (disp))
20618 /* ebp always wants a displacement. Similarly r13. */
20619 else if (base && REG_P (base)
20620 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20623 /* An index requires the two-byte modrm form.... */
20625 /* ...like esp (or r12), which always wants an index. */
20626 || base == arg_pointer_rtx
20627 || base == frame_pointer_rtx
20628 || (base && REG_P (base)
20629 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20646 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20647 is set, expect that insn have 8bit immediate alternative. */
20649 ix86_attr_length_immediate_default (rtx insn, int shortform)
20653 extract_insn_cached (insn);
20654 for (i = recog_data.n_operands - 1; i >= 0; --i)
20655 if (CONSTANT_P (recog_data.operand[i]))
20657 enum attr_mode mode = get_attr_mode (insn);
20660 if (shortform && CONST_INT_P (recog_data.operand[i]))
20662 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20669 ival = trunc_int_for_mode (ival, HImode);
20672 ival = trunc_int_for_mode (ival, SImode);
20677 if (IN_RANGE (ival, -128, 127))
20694 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20699 fatal_insn ("unknown insn mode", insn);
20704 /* Compute default value for "length_address" attribute. */
20706 ix86_attr_length_address_default (rtx insn)
20710 if (get_attr_type (insn) == TYPE_LEA)
20712 rtx set = PATTERN (insn), addr;
20714 if (GET_CODE (set) == PARALLEL)
20715 set = XVECEXP (set, 0, 0);
20717 gcc_assert (GET_CODE (set) == SET);
20719 addr = SET_SRC (set);
20720 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20722 if (GET_CODE (addr) == ZERO_EXTEND)
20723 addr = XEXP (addr, 0);
20724 if (GET_CODE (addr) == SUBREG)
20725 addr = SUBREG_REG (addr);
20728 return memory_address_length (addr);
20731 extract_insn_cached (insn);
20732 for (i = recog_data.n_operands - 1; i >= 0; --i)
20733 if (MEM_P (recog_data.operand[i]))
20735 constrain_operands_cached (reload_completed);
20736 if (which_alternative != -1)
20738 const char *constraints = recog_data.constraints[i];
20739 int alt = which_alternative;
20741 while (*constraints == '=' || *constraints == '+')
20744 while (*constraints++ != ',')
20746 /* Skip ignored operands. */
20747 if (*constraints == 'X')
20750 return memory_address_length (XEXP (recog_data.operand[i], 0));
20755 /* Compute default value for "length_vex" attribute. It includes
20756 2 or 3 byte VEX prefix and 1 opcode byte. */
20759 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20764 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20765 byte VEX prefix. */
20766 if (!has_0f_opcode || has_vex_w)
20769 /* We can always use 2 byte VEX prefix in 32bit. */
20773 extract_insn_cached (insn);
20775 for (i = recog_data.n_operands - 1; i >= 0; --i)
20776 if (REG_P (recog_data.operand[i]))
20778 /* REX.W bit uses 3 byte VEX prefix. */
20779 if (GET_MODE (recog_data.operand[i]) == DImode
20780 && GENERAL_REG_P (recog_data.operand[i]))
20785 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20786 if (MEM_P (recog_data.operand[i])
20787 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20794 /* Return the maximum number of instructions a cpu can issue. */
20797 ix86_issue_rate (void)
20801 case PROCESSOR_PENTIUM:
20802 case PROCESSOR_ATOM:
20806 case PROCESSOR_PENTIUMPRO:
20807 case PROCESSOR_PENTIUM4:
20808 case PROCESSOR_ATHLON:
20810 case PROCESSOR_AMDFAM10:
20811 case PROCESSOR_NOCONA:
20812 case PROCESSOR_GENERIC32:
20813 case PROCESSOR_GENERIC64:
20814 case PROCESSOR_BDVER1:
20817 case PROCESSOR_CORE2:
20825 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20826 by DEP_INSN and nothing set by DEP_INSN. */
20829 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20833 /* Simplify the test for uninteresting insns. */
20834 if (insn_type != TYPE_SETCC
20835 && insn_type != TYPE_ICMOV
20836 && insn_type != TYPE_FCMOV
20837 && insn_type != TYPE_IBR)
20840 if ((set = single_set (dep_insn)) != 0)
20842 set = SET_DEST (set);
20845 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20846 && XVECLEN (PATTERN (dep_insn), 0) == 2
20847 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20848 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20850 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20851 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20856 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20859 /* This test is true if the dependent insn reads the flags but
20860 not any other potentially set register. */
20861 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20864 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20870 /* Return true iff USE_INSN has a memory address with operands set by
20874 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20877 extract_insn_cached (use_insn);
20878 for (i = recog_data.n_operands - 1; i >= 0; --i)
20879 if (MEM_P (recog_data.operand[i]))
20881 rtx addr = XEXP (recog_data.operand[i], 0);
20882 return modified_in_p (addr, set_insn) != 0;
20888 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20890 enum attr_type insn_type, dep_insn_type;
20891 enum attr_memory memory;
20893 int dep_insn_code_number;
20895 /* Anti and output dependencies have zero cost on all CPUs. */
20896 if (REG_NOTE_KIND (link) != 0)
20899 dep_insn_code_number = recog_memoized (dep_insn);
20901 /* If we can't recognize the insns, we can't really do anything. */
20902 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20905 insn_type = get_attr_type (insn);
20906 dep_insn_type = get_attr_type (dep_insn);
20910 case PROCESSOR_PENTIUM:
20911 /* Address Generation Interlock adds a cycle of latency. */
20912 if (insn_type == TYPE_LEA)
20914 rtx addr = PATTERN (insn);
20916 if (GET_CODE (addr) == PARALLEL)
20917 addr = XVECEXP (addr, 0, 0);
20919 gcc_assert (GET_CODE (addr) == SET);
20921 addr = SET_SRC (addr);
20922 if (modified_in_p (addr, dep_insn))
20925 else if (ix86_agi_dependent (dep_insn, insn))
20928 /* ??? Compares pair with jump/setcc. */
20929 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20932 /* Floating point stores require value to be ready one cycle earlier. */
20933 if (insn_type == TYPE_FMOV
20934 && get_attr_memory (insn) == MEMORY_STORE
20935 && !ix86_agi_dependent (dep_insn, insn))
20939 case PROCESSOR_PENTIUMPRO:
20940 memory = get_attr_memory (insn);
20942 /* INT->FP conversion is expensive. */
20943 if (get_attr_fp_int_src (dep_insn))
20946 /* There is one cycle extra latency between an FP op and a store. */
20947 if (insn_type == TYPE_FMOV
20948 && (set = single_set (dep_insn)) != NULL_RTX
20949 && (set2 = single_set (insn)) != NULL_RTX
20950 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20951 && MEM_P (SET_DEST (set2)))
20954 /* Show ability of reorder buffer to hide latency of load by executing
20955 in parallel with previous instruction in case
20956 previous instruction is not needed to compute the address. */
20957 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20958 && !ix86_agi_dependent (dep_insn, insn))
20960 /* Claim moves to take one cycle, as core can issue one load
20961 at time and the next load can start cycle later. */
20962 if (dep_insn_type == TYPE_IMOV
20963 || dep_insn_type == TYPE_FMOV)
20971 memory = get_attr_memory (insn);
20973 /* The esp dependency is resolved before the instruction is really
20975 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20976 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20979 /* INT->FP conversion is expensive. */
20980 if (get_attr_fp_int_src (dep_insn))
20983 /* Show ability of reorder buffer to hide latency of load by executing
20984 in parallel with previous instruction in case
20985 previous instruction is not needed to compute the address. */
20986 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20987 && !ix86_agi_dependent (dep_insn, insn))
20989 /* Claim moves to take one cycle, as core can issue one load
20990 at time and the next load can start cycle later. */
20991 if (dep_insn_type == TYPE_IMOV
20992 || dep_insn_type == TYPE_FMOV)
21001 case PROCESSOR_ATHLON:
21003 case PROCESSOR_AMDFAM10:
21004 case PROCESSOR_BDVER1:
21005 case PROCESSOR_ATOM:
21006 case PROCESSOR_GENERIC32:
21007 case PROCESSOR_GENERIC64:
21008 memory = get_attr_memory (insn);
21010 /* Show ability of reorder buffer to hide latency of load by executing
21011 in parallel with previous instruction in case
21012 previous instruction is not needed to compute the address. */
21013 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21014 && !ix86_agi_dependent (dep_insn, insn))
21016 enum attr_unit unit = get_attr_unit (insn);
21019 /* Because of the difference between the length of integer and
21020 floating unit pipeline preparation stages, the memory operands
21021 for floating point are cheaper.
21023 ??? For Athlon it the difference is most probably 2. */
21024 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21027 loadcost = TARGET_ATHLON ? 2 : 0;
21029 if (cost >= loadcost)
21042 /* How many alternative schedules to try. This should be as wide as the
21043 scheduling freedom in the DFA, but no wider. Making this value too
21044 large results extra work for the scheduler. */
21047 ia32_multipass_dfa_lookahead (void)
21051 case PROCESSOR_PENTIUM:
21054 case PROCESSOR_PENTIUMPRO:
21064 /* Compute the alignment given to a constant that is being placed in memory.
21065 EXP is the constant and ALIGN is the alignment that the object would
21067 The value of this function is used instead of that alignment to align
21071 ix86_constant_alignment (tree exp, int align)
21073 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21074 || TREE_CODE (exp) == INTEGER_CST)
21076 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21078 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21081 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21082 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21083 return BITS_PER_WORD;
21088 /* Compute the alignment for a static variable.
21089 TYPE is the data type, and ALIGN is the alignment that
21090 the object would ordinarily have. The value of this function is used
21091 instead of that alignment to align the object. */
21094 ix86_data_alignment (tree type, int align)
21096 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21098 if (AGGREGATE_TYPE_P (type)
21099 && TYPE_SIZE (type)
21100 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21101 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21102 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21103 && align < max_align)
21106 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21107 to 16byte boundary. */
21110 if (AGGREGATE_TYPE_P (type)
21111 && TYPE_SIZE (type)
21112 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21113 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21114 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21118 if (TREE_CODE (type) == ARRAY_TYPE)
21120 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21122 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21125 else if (TREE_CODE (type) == COMPLEX_TYPE)
21128 if (TYPE_MODE (type) == DCmode && align < 64)
21130 if ((TYPE_MODE (type) == XCmode
21131 || TYPE_MODE (type) == TCmode) && align < 128)
21134 else if ((TREE_CODE (type) == RECORD_TYPE
21135 || TREE_CODE (type) == UNION_TYPE
21136 || TREE_CODE (type) == QUAL_UNION_TYPE)
21137 && TYPE_FIELDS (type))
21139 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21141 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21144 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21145 || TREE_CODE (type) == INTEGER_TYPE)
21147 if (TYPE_MODE (type) == DFmode && align < 64)
21149 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21156 /* Compute the alignment for a local variable or a stack slot. EXP is
21157 the data type or decl itself, MODE is the widest mode available and
21158 ALIGN is the alignment that the object would ordinarily have. The
21159 value of this macro is used instead of that alignment to align the
21163 ix86_local_alignment (tree exp, enum machine_mode mode,
21164 unsigned int align)
21168 if (exp && DECL_P (exp))
21170 type = TREE_TYPE (exp);
21179 /* Don't do dynamic stack realignment for long long objects with
21180 -mpreferred-stack-boundary=2. */
21183 && ix86_preferred_stack_boundary < 64
21184 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21185 && (!type || !TYPE_USER_ALIGN (type))
21186 && (!decl || !DECL_USER_ALIGN (decl)))
21189 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21190 register in MODE. We will return the largest alignment of XF
21194 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21195 align = GET_MODE_ALIGNMENT (DFmode);
21199 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21200 to 16byte boundary. Exact wording is:
21202 An array uses the same alignment as its elements, except that a local or
21203 global array variable of length at least 16 bytes or
21204 a C99 variable-length array variable always has alignment of at least 16 bytes.
21206 This was added to allow use of aligned SSE instructions at arrays. This
21207 rule is meant for static storage (where compiler can not do the analysis
21208 by itself). We follow it for automatic variables only when convenient.
21209 We fully control everything in the function compiled and functions from
21210 other unit can not rely on the alignment.
21212 Exclude va_list type. It is the common case of local array where
21213 we can not benefit from the alignment. */
21214 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21217 if (AGGREGATE_TYPE_P (type)
21218 && (TYPE_MAIN_VARIANT (type)
21219 != TYPE_MAIN_VARIANT (va_list_type_node))
21220 && TYPE_SIZE (type)
21221 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21222 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21223 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21226 if (TREE_CODE (type) == ARRAY_TYPE)
21228 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21230 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21233 else if (TREE_CODE (type) == COMPLEX_TYPE)
21235 if (TYPE_MODE (type) == DCmode && align < 64)
21237 if ((TYPE_MODE (type) == XCmode
21238 || TYPE_MODE (type) == TCmode) && align < 128)
21241 else if ((TREE_CODE (type) == RECORD_TYPE
21242 || TREE_CODE (type) == UNION_TYPE
21243 || TREE_CODE (type) == QUAL_UNION_TYPE)
21244 && TYPE_FIELDS (type))
21246 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21248 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21251 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21252 || TREE_CODE (type) == INTEGER_TYPE)
21255 if (TYPE_MODE (type) == DFmode && align < 64)
21257 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21263 /* Compute the minimum required alignment for dynamic stack realignment
21264 purposes for a local variable, parameter or a stack slot. EXP is
21265 the data type or decl itself, MODE is its mode and ALIGN is the
21266 alignment that the object would ordinarily have. */
21269 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21270 unsigned int align)
21274 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21277 if (exp && DECL_P (exp))
21279 type = TREE_TYPE (exp);
21288 /* Don't do dynamic stack realignment for long long objects with
21289 -mpreferred-stack-boundary=2. */
21290 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21291 && (!type || !TYPE_USER_ALIGN (type))
21292 && (!decl || !DECL_USER_ALIGN (decl)))
21298 /* Find a location for the static chain incoming to a nested function.
21299 This is a register, unless all free registers are used by arguments. */
21302 ix86_static_chain (const_tree fndecl, bool incoming_p)
21306 if (!DECL_STATIC_CHAIN (fndecl))
21311 /* We always use R10 in 64-bit mode. */
21317 /* By default in 32-bit mode we use ECX to pass the static chain. */
21320 fntype = TREE_TYPE (fndecl);
21321 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21323 /* Fastcall functions use ecx/edx for arguments, which leaves
21324 us with EAX for the static chain. */
21327 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21329 /* Thiscall functions use ecx for arguments, which leaves
21330 us with EAX for the static chain. */
21333 else if (ix86_function_regparm (fntype, fndecl) == 3)
21335 /* For regparm 3, we have no free call-clobbered registers in
21336 which to store the static chain. In order to implement this,
21337 we have the trampoline push the static chain to the stack.
21338 However, we can't push a value below the return address when
21339 we call the nested function directly, so we have to use an
21340 alternate entry point. For this we use ESI, and have the
21341 alternate entry point push ESI, so that things appear the
21342 same once we're executing the nested function. */
21345 if (fndecl == current_function_decl)
21346 ix86_static_chain_on_stack = true;
21347 return gen_frame_mem (SImode,
21348 plus_constant (arg_pointer_rtx, -8));
21354 return gen_rtx_REG (Pmode, regno);
21357 /* Emit RTL insns to initialize the variable parts of a trampoline.
21358 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21359 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21360 to be passed to the target function. */
21363 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21367 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21374 /* Depending on the static chain location, either load a register
21375 with a constant, or push the constant to the stack. All of the
21376 instructions are the same size. */
21377 chain = ix86_static_chain (fndecl, true);
21380 if (REGNO (chain) == CX_REG)
21382 else if (REGNO (chain) == AX_REG)
21385 gcc_unreachable ();
21390 mem = adjust_address (m_tramp, QImode, 0);
21391 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21393 mem = adjust_address (m_tramp, SImode, 1);
21394 emit_move_insn (mem, chain_value);
21396 /* Compute offset from the end of the jmp to the target function.
21397 In the case in which the trampoline stores the static chain on
21398 the stack, we need to skip the first insn which pushes the
21399 (call-saved) register static chain; this push is 1 byte. */
21400 disp = expand_binop (SImode, sub_optab, fnaddr,
21401 plus_constant (XEXP (m_tramp, 0),
21402 MEM_P (chain) ? 9 : 10),
21403 NULL_RTX, 1, OPTAB_DIRECT);
21405 mem = adjust_address (m_tramp, QImode, 5);
21406 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21408 mem = adjust_address (m_tramp, SImode, 6);
21409 emit_move_insn (mem, disp);
21415 /* Load the function address to r11. Try to load address using
21416 the shorter movl instead of movabs. We may want to support
21417 movq for kernel mode, but kernel does not use trampolines at
21419 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21421 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21423 mem = adjust_address (m_tramp, HImode, offset);
21424 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21426 mem = adjust_address (m_tramp, SImode, offset + 2);
21427 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21432 mem = adjust_address (m_tramp, HImode, offset);
21433 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21435 mem = adjust_address (m_tramp, DImode, offset + 2);
21436 emit_move_insn (mem, fnaddr);
21440 /* Load static chain using movabs to r10. */
21441 mem = adjust_address (m_tramp, HImode, offset);
21442 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21444 mem = adjust_address (m_tramp, DImode, offset + 2);
21445 emit_move_insn (mem, chain_value);
21448 /* Jump to r11; the last (unused) byte is a nop, only there to
21449 pad the write out to a single 32-bit store. */
21450 mem = adjust_address (m_tramp, SImode, offset);
21451 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21454 gcc_assert (offset <= TRAMPOLINE_SIZE);
21457 #ifdef ENABLE_EXECUTE_STACK
21458 #ifdef CHECK_EXECUTE_STACK_ENABLED
21459 if (CHECK_EXECUTE_STACK_ENABLED)
21461 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21462 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21466 /* The following file contains several enumerations and data structures
21467 built from the definitions in i386-builtin-types.def. */
21469 #include "i386-builtin-types.inc"
21471 /* Table for the ix86 builtin non-function types. */
21472 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21474 /* Retrieve an element from the above table, building some of
21475 the types lazily. */
21478 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21480 unsigned int index;
21483 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21485 type = ix86_builtin_type_tab[(int) tcode];
21489 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21490 if (tcode <= IX86_BT_LAST_VECT)
21492 enum machine_mode mode;
21494 index = tcode - IX86_BT_LAST_PRIM - 1;
21495 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21496 mode = ix86_builtin_type_vect_mode[index];
21498 type = build_vector_type_for_mode (itype, mode);
21504 index = tcode - IX86_BT_LAST_VECT - 1;
21505 if (tcode <= IX86_BT_LAST_PTR)
21506 quals = TYPE_UNQUALIFIED;
21508 quals = TYPE_QUAL_CONST;
21510 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21511 if (quals != TYPE_UNQUALIFIED)
21512 itype = build_qualified_type (itype, quals);
21514 type = build_pointer_type (itype);
21517 ix86_builtin_type_tab[(int) tcode] = type;
21521 /* Table for the ix86 builtin function types. */
21522 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21524 /* Retrieve an element from the above table, building some of
21525 the types lazily. */
21528 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21532 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21534 type = ix86_builtin_func_type_tab[(int) tcode];
21538 if (tcode <= IX86_BT_LAST_FUNC)
21540 unsigned start = ix86_builtin_func_start[(int) tcode];
21541 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21542 tree rtype, atype, args = void_list_node;
21545 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21546 for (i = after - 1; i > start; --i)
21548 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21549 args = tree_cons (NULL, atype, args);
21552 type = build_function_type (rtype, args);
21556 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21557 enum ix86_builtin_func_type icode;
21559 icode = ix86_builtin_func_alias_base[index];
21560 type = ix86_get_builtin_func_type (icode);
21563 ix86_builtin_func_type_tab[(int) tcode] = type;
21568 /* Codes for all the SSE/MMX builtins. */
21571 IX86_BUILTIN_ADDPS,
21572 IX86_BUILTIN_ADDSS,
21573 IX86_BUILTIN_DIVPS,
21574 IX86_BUILTIN_DIVSS,
21575 IX86_BUILTIN_MULPS,
21576 IX86_BUILTIN_MULSS,
21577 IX86_BUILTIN_SUBPS,
21578 IX86_BUILTIN_SUBSS,
21580 IX86_BUILTIN_CMPEQPS,
21581 IX86_BUILTIN_CMPLTPS,
21582 IX86_BUILTIN_CMPLEPS,
21583 IX86_BUILTIN_CMPGTPS,
21584 IX86_BUILTIN_CMPGEPS,
21585 IX86_BUILTIN_CMPNEQPS,
21586 IX86_BUILTIN_CMPNLTPS,
21587 IX86_BUILTIN_CMPNLEPS,
21588 IX86_BUILTIN_CMPNGTPS,
21589 IX86_BUILTIN_CMPNGEPS,
21590 IX86_BUILTIN_CMPORDPS,
21591 IX86_BUILTIN_CMPUNORDPS,
21592 IX86_BUILTIN_CMPEQSS,
21593 IX86_BUILTIN_CMPLTSS,
21594 IX86_BUILTIN_CMPLESS,
21595 IX86_BUILTIN_CMPNEQSS,
21596 IX86_BUILTIN_CMPNLTSS,
21597 IX86_BUILTIN_CMPNLESS,
21598 IX86_BUILTIN_CMPNGTSS,
21599 IX86_BUILTIN_CMPNGESS,
21600 IX86_BUILTIN_CMPORDSS,
21601 IX86_BUILTIN_CMPUNORDSS,
21603 IX86_BUILTIN_COMIEQSS,
21604 IX86_BUILTIN_COMILTSS,
21605 IX86_BUILTIN_COMILESS,
21606 IX86_BUILTIN_COMIGTSS,
21607 IX86_BUILTIN_COMIGESS,
21608 IX86_BUILTIN_COMINEQSS,
21609 IX86_BUILTIN_UCOMIEQSS,
21610 IX86_BUILTIN_UCOMILTSS,
21611 IX86_BUILTIN_UCOMILESS,
21612 IX86_BUILTIN_UCOMIGTSS,
21613 IX86_BUILTIN_UCOMIGESS,
21614 IX86_BUILTIN_UCOMINEQSS,
21616 IX86_BUILTIN_CVTPI2PS,
21617 IX86_BUILTIN_CVTPS2PI,
21618 IX86_BUILTIN_CVTSI2SS,
21619 IX86_BUILTIN_CVTSI642SS,
21620 IX86_BUILTIN_CVTSS2SI,
21621 IX86_BUILTIN_CVTSS2SI64,
21622 IX86_BUILTIN_CVTTPS2PI,
21623 IX86_BUILTIN_CVTTSS2SI,
21624 IX86_BUILTIN_CVTTSS2SI64,
21626 IX86_BUILTIN_MAXPS,
21627 IX86_BUILTIN_MAXSS,
21628 IX86_BUILTIN_MINPS,
21629 IX86_BUILTIN_MINSS,
21631 IX86_BUILTIN_LOADUPS,
21632 IX86_BUILTIN_STOREUPS,
21633 IX86_BUILTIN_MOVSS,
21635 IX86_BUILTIN_MOVHLPS,
21636 IX86_BUILTIN_MOVLHPS,
21637 IX86_BUILTIN_LOADHPS,
21638 IX86_BUILTIN_LOADLPS,
21639 IX86_BUILTIN_STOREHPS,
21640 IX86_BUILTIN_STORELPS,
21642 IX86_BUILTIN_MASKMOVQ,
21643 IX86_BUILTIN_MOVMSKPS,
21644 IX86_BUILTIN_PMOVMSKB,
21646 IX86_BUILTIN_MOVNTPS,
21647 IX86_BUILTIN_MOVNTQ,
21649 IX86_BUILTIN_LOADDQU,
21650 IX86_BUILTIN_STOREDQU,
21652 IX86_BUILTIN_PACKSSWB,
21653 IX86_BUILTIN_PACKSSDW,
21654 IX86_BUILTIN_PACKUSWB,
21656 IX86_BUILTIN_PADDB,
21657 IX86_BUILTIN_PADDW,
21658 IX86_BUILTIN_PADDD,
21659 IX86_BUILTIN_PADDQ,
21660 IX86_BUILTIN_PADDSB,
21661 IX86_BUILTIN_PADDSW,
21662 IX86_BUILTIN_PADDUSB,
21663 IX86_BUILTIN_PADDUSW,
21664 IX86_BUILTIN_PSUBB,
21665 IX86_BUILTIN_PSUBW,
21666 IX86_BUILTIN_PSUBD,
21667 IX86_BUILTIN_PSUBQ,
21668 IX86_BUILTIN_PSUBSB,
21669 IX86_BUILTIN_PSUBSW,
21670 IX86_BUILTIN_PSUBUSB,
21671 IX86_BUILTIN_PSUBUSW,
21674 IX86_BUILTIN_PANDN,
21678 IX86_BUILTIN_PAVGB,
21679 IX86_BUILTIN_PAVGW,
21681 IX86_BUILTIN_PCMPEQB,
21682 IX86_BUILTIN_PCMPEQW,
21683 IX86_BUILTIN_PCMPEQD,
21684 IX86_BUILTIN_PCMPGTB,
21685 IX86_BUILTIN_PCMPGTW,
21686 IX86_BUILTIN_PCMPGTD,
21688 IX86_BUILTIN_PMADDWD,
21690 IX86_BUILTIN_PMAXSW,
21691 IX86_BUILTIN_PMAXUB,
21692 IX86_BUILTIN_PMINSW,
21693 IX86_BUILTIN_PMINUB,
21695 IX86_BUILTIN_PMULHUW,
21696 IX86_BUILTIN_PMULHW,
21697 IX86_BUILTIN_PMULLW,
21699 IX86_BUILTIN_PSADBW,
21700 IX86_BUILTIN_PSHUFW,
21702 IX86_BUILTIN_PSLLW,
21703 IX86_BUILTIN_PSLLD,
21704 IX86_BUILTIN_PSLLQ,
21705 IX86_BUILTIN_PSRAW,
21706 IX86_BUILTIN_PSRAD,
21707 IX86_BUILTIN_PSRLW,
21708 IX86_BUILTIN_PSRLD,
21709 IX86_BUILTIN_PSRLQ,
21710 IX86_BUILTIN_PSLLWI,
21711 IX86_BUILTIN_PSLLDI,
21712 IX86_BUILTIN_PSLLQI,
21713 IX86_BUILTIN_PSRAWI,
21714 IX86_BUILTIN_PSRADI,
21715 IX86_BUILTIN_PSRLWI,
21716 IX86_BUILTIN_PSRLDI,
21717 IX86_BUILTIN_PSRLQI,
21719 IX86_BUILTIN_PUNPCKHBW,
21720 IX86_BUILTIN_PUNPCKHWD,
21721 IX86_BUILTIN_PUNPCKHDQ,
21722 IX86_BUILTIN_PUNPCKLBW,
21723 IX86_BUILTIN_PUNPCKLWD,
21724 IX86_BUILTIN_PUNPCKLDQ,
21726 IX86_BUILTIN_SHUFPS,
21728 IX86_BUILTIN_RCPPS,
21729 IX86_BUILTIN_RCPSS,
21730 IX86_BUILTIN_RSQRTPS,
21731 IX86_BUILTIN_RSQRTPS_NR,
21732 IX86_BUILTIN_RSQRTSS,
21733 IX86_BUILTIN_RSQRTF,
21734 IX86_BUILTIN_SQRTPS,
21735 IX86_BUILTIN_SQRTPS_NR,
21736 IX86_BUILTIN_SQRTSS,
21738 IX86_BUILTIN_UNPCKHPS,
21739 IX86_BUILTIN_UNPCKLPS,
21741 IX86_BUILTIN_ANDPS,
21742 IX86_BUILTIN_ANDNPS,
21744 IX86_BUILTIN_XORPS,
21747 IX86_BUILTIN_LDMXCSR,
21748 IX86_BUILTIN_STMXCSR,
21749 IX86_BUILTIN_SFENCE,
21751 /* 3DNow! Original */
21752 IX86_BUILTIN_FEMMS,
21753 IX86_BUILTIN_PAVGUSB,
21754 IX86_BUILTIN_PF2ID,
21755 IX86_BUILTIN_PFACC,
21756 IX86_BUILTIN_PFADD,
21757 IX86_BUILTIN_PFCMPEQ,
21758 IX86_BUILTIN_PFCMPGE,
21759 IX86_BUILTIN_PFCMPGT,
21760 IX86_BUILTIN_PFMAX,
21761 IX86_BUILTIN_PFMIN,
21762 IX86_BUILTIN_PFMUL,
21763 IX86_BUILTIN_PFRCP,
21764 IX86_BUILTIN_PFRCPIT1,
21765 IX86_BUILTIN_PFRCPIT2,
21766 IX86_BUILTIN_PFRSQIT1,
21767 IX86_BUILTIN_PFRSQRT,
21768 IX86_BUILTIN_PFSUB,
21769 IX86_BUILTIN_PFSUBR,
21770 IX86_BUILTIN_PI2FD,
21771 IX86_BUILTIN_PMULHRW,
21773 /* 3DNow! Athlon Extensions */
21774 IX86_BUILTIN_PF2IW,
21775 IX86_BUILTIN_PFNACC,
21776 IX86_BUILTIN_PFPNACC,
21777 IX86_BUILTIN_PI2FW,
21778 IX86_BUILTIN_PSWAPDSI,
21779 IX86_BUILTIN_PSWAPDSF,
21782 IX86_BUILTIN_ADDPD,
21783 IX86_BUILTIN_ADDSD,
21784 IX86_BUILTIN_DIVPD,
21785 IX86_BUILTIN_DIVSD,
21786 IX86_BUILTIN_MULPD,
21787 IX86_BUILTIN_MULSD,
21788 IX86_BUILTIN_SUBPD,
21789 IX86_BUILTIN_SUBSD,
21791 IX86_BUILTIN_CMPEQPD,
21792 IX86_BUILTIN_CMPLTPD,
21793 IX86_BUILTIN_CMPLEPD,
21794 IX86_BUILTIN_CMPGTPD,
21795 IX86_BUILTIN_CMPGEPD,
21796 IX86_BUILTIN_CMPNEQPD,
21797 IX86_BUILTIN_CMPNLTPD,
21798 IX86_BUILTIN_CMPNLEPD,
21799 IX86_BUILTIN_CMPNGTPD,
21800 IX86_BUILTIN_CMPNGEPD,
21801 IX86_BUILTIN_CMPORDPD,
21802 IX86_BUILTIN_CMPUNORDPD,
21803 IX86_BUILTIN_CMPEQSD,
21804 IX86_BUILTIN_CMPLTSD,
21805 IX86_BUILTIN_CMPLESD,
21806 IX86_BUILTIN_CMPNEQSD,
21807 IX86_BUILTIN_CMPNLTSD,
21808 IX86_BUILTIN_CMPNLESD,
21809 IX86_BUILTIN_CMPORDSD,
21810 IX86_BUILTIN_CMPUNORDSD,
21812 IX86_BUILTIN_COMIEQSD,
21813 IX86_BUILTIN_COMILTSD,
21814 IX86_BUILTIN_COMILESD,
21815 IX86_BUILTIN_COMIGTSD,
21816 IX86_BUILTIN_COMIGESD,
21817 IX86_BUILTIN_COMINEQSD,
21818 IX86_BUILTIN_UCOMIEQSD,
21819 IX86_BUILTIN_UCOMILTSD,
21820 IX86_BUILTIN_UCOMILESD,
21821 IX86_BUILTIN_UCOMIGTSD,
21822 IX86_BUILTIN_UCOMIGESD,
21823 IX86_BUILTIN_UCOMINEQSD,
21825 IX86_BUILTIN_MAXPD,
21826 IX86_BUILTIN_MAXSD,
21827 IX86_BUILTIN_MINPD,
21828 IX86_BUILTIN_MINSD,
21830 IX86_BUILTIN_ANDPD,
21831 IX86_BUILTIN_ANDNPD,
21833 IX86_BUILTIN_XORPD,
21835 IX86_BUILTIN_SQRTPD,
21836 IX86_BUILTIN_SQRTSD,
21838 IX86_BUILTIN_UNPCKHPD,
21839 IX86_BUILTIN_UNPCKLPD,
21841 IX86_BUILTIN_SHUFPD,
21843 IX86_BUILTIN_LOADUPD,
21844 IX86_BUILTIN_STOREUPD,
21845 IX86_BUILTIN_MOVSD,
21847 IX86_BUILTIN_LOADHPD,
21848 IX86_BUILTIN_LOADLPD,
21850 IX86_BUILTIN_CVTDQ2PD,
21851 IX86_BUILTIN_CVTDQ2PS,
21853 IX86_BUILTIN_CVTPD2DQ,
21854 IX86_BUILTIN_CVTPD2PI,
21855 IX86_BUILTIN_CVTPD2PS,
21856 IX86_BUILTIN_CVTTPD2DQ,
21857 IX86_BUILTIN_CVTTPD2PI,
21859 IX86_BUILTIN_CVTPI2PD,
21860 IX86_BUILTIN_CVTSI2SD,
21861 IX86_BUILTIN_CVTSI642SD,
21863 IX86_BUILTIN_CVTSD2SI,
21864 IX86_BUILTIN_CVTSD2SI64,
21865 IX86_BUILTIN_CVTSD2SS,
21866 IX86_BUILTIN_CVTSS2SD,
21867 IX86_BUILTIN_CVTTSD2SI,
21868 IX86_BUILTIN_CVTTSD2SI64,
21870 IX86_BUILTIN_CVTPS2DQ,
21871 IX86_BUILTIN_CVTPS2PD,
21872 IX86_BUILTIN_CVTTPS2DQ,
21874 IX86_BUILTIN_MOVNTI,
21875 IX86_BUILTIN_MOVNTPD,
21876 IX86_BUILTIN_MOVNTDQ,
21878 IX86_BUILTIN_MOVQ128,
21881 IX86_BUILTIN_MASKMOVDQU,
21882 IX86_BUILTIN_MOVMSKPD,
21883 IX86_BUILTIN_PMOVMSKB128,
21885 IX86_BUILTIN_PACKSSWB128,
21886 IX86_BUILTIN_PACKSSDW128,
21887 IX86_BUILTIN_PACKUSWB128,
21889 IX86_BUILTIN_PADDB128,
21890 IX86_BUILTIN_PADDW128,
21891 IX86_BUILTIN_PADDD128,
21892 IX86_BUILTIN_PADDQ128,
21893 IX86_BUILTIN_PADDSB128,
21894 IX86_BUILTIN_PADDSW128,
21895 IX86_BUILTIN_PADDUSB128,
21896 IX86_BUILTIN_PADDUSW128,
21897 IX86_BUILTIN_PSUBB128,
21898 IX86_BUILTIN_PSUBW128,
21899 IX86_BUILTIN_PSUBD128,
21900 IX86_BUILTIN_PSUBQ128,
21901 IX86_BUILTIN_PSUBSB128,
21902 IX86_BUILTIN_PSUBSW128,
21903 IX86_BUILTIN_PSUBUSB128,
21904 IX86_BUILTIN_PSUBUSW128,
21906 IX86_BUILTIN_PAND128,
21907 IX86_BUILTIN_PANDN128,
21908 IX86_BUILTIN_POR128,
21909 IX86_BUILTIN_PXOR128,
21911 IX86_BUILTIN_PAVGB128,
21912 IX86_BUILTIN_PAVGW128,
21914 IX86_BUILTIN_PCMPEQB128,
21915 IX86_BUILTIN_PCMPEQW128,
21916 IX86_BUILTIN_PCMPEQD128,
21917 IX86_BUILTIN_PCMPGTB128,
21918 IX86_BUILTIN_PCMPGTW128,
21919 IX86_BUILTIN_PCMPGTD128,
21921 IX86_BUILTIN_PMADDWD128,
21923 IX86_BUILTIN_PMAXSW128,
21924 IX86_BUILTIN_PMAXUB128,
21925 IX86_BUILTIN_PMINSW128,
21926 IX86_BUILTIN_PMINUB128,
21928 IX86_BUILTIN_PMULUDQ,
21929 IX86_BUILTIN_PMULUDQ128,
21930 IX86_BUILTIN_PMULHUW128,
21931 IX86_BUILTIN_PMULHW128,
21932 IX86_BUILTIN_PMULLW128,
21934 IX86_BUILTIN_PSADBW128,
21935 IX86_BUILTIN_PSHUFHW,
21936 IX86_BUILTIN_PSHUFLW,
21937 IX86_BUILTIN_PSHUFD,
21939 IX86_BUILTIN_PSLLDQI128,
21940 IX86_BUILTIN_PSLLWI128,
21941 IX86_BUILTIN_PSLLDI128,
21942 IX86_BUILTIN_PSLLQI128,
21943 IX86_BUILTIN_PSRAWI128,
21944 IX86_BUILTIN_PSRADI128,
21945 IX86_BUILTIN_PSRLDQI128,
21946 IX86_BUILTIN_PSRLWI128,
21947 IX86_BUILTIN_PSRLDI128,
21948 IX86_BUILTIN_PSRLQI128,
21950 IX86_BUILTIN_PSLLDQ128,
21951 IX86_BUILTIN_PSLLW128,
21952 IX86_BUILTIN_PSLLD128,
21953 IX86_BUILTIN_PSLLQ128,
21954 IX86_BUILTIN_PSRAW128,
21955 IX86_BUILTIN_PSRAD128,
21956 IX86_BUILTIN_PSRLW128,
21957 IX86_BUILTIN_PSRLD128,
21958 IX86_BUILTIN_PSRLQ128,
21960 IX86_BUILTIN_PUNPCKHBW128,
21961 IX86_BUILTIN_PUNPCKHWD128,
21962 IX86_BUILTIN_PUNPCKHDQ128,
21963 IX86_BUILTIN_PUNPCKHQDQ128,
21964 IX86_BUILTIN_PUNPCKLBW128,
21965 IX86_BUILTIN_PUNPCKLWD128,
21966 IX86_BUILTIN_PUNPCKLDQ128,
21967 IX86_BUILTIN_PUNPCKLQDQ128,
21969 IX86_BUILTIN_CLFLUSH,
21970 IX86_BUILTIN_MFENCE,
21971 IX86_BUILTIN_LFENCE,
21973 IX86_BUILTIN_BSRSI,
21974 IX86_BUILTIN_BSRDI,
21975 IX86_BUILTIN_RDPMC,
21976 IX86_BUILTIN_RDTSC,
21977 IX86_BUILTIN_RDTSCP,
21978 IX86_BUILTIN_ROLQI,
21979 IX86_BUILTIN_ROLHI,
21980 IX86_BUILTIN_RORQI,
21981 IX86_BUILTIN_RORHI,
21984 IX86_BUILTIN_ADDSUBPS,
21985 IX86_BUILTIN_HADDPS,
21986 IX86_BUILTIN_HSUBPS,
21987 IX86_BUILTIN_MOVSHDUP,
21988 IX86_BUILTIN_MOVSLDUP,
21989 IX86_BUILTIN_ADDSUBPD,
21990 IX86_BUILTIN_HADDPD,
21991 IX86_BUILTIN_HSUBPD,
21992 IX86_BUILTIN_LDDQU,
21994 IX86_BUILTIN_MONITOR,
21995 IX86_BUILTIN_MWAIT,
21998 IX86_BUILTIN_PHADDW,
21999 IX86_BUILTIN_PHADDD,
22000 IX86_BUILTIN_PHADDSW,
22001 IX86_BUILTIN_PHSUBW,
22002 IX86_BUILTIN_PHSUBD,
22003 IX86_BUILTIN_PHSUBSW,
22004 IX86_BUILTIN_PMADDUBSW,
22005 IX86_BUILTIN_PMULHRSW,
22006 IX86_BUILTIN_PSHUFB,
22007 IX86_BUILTIN_PSIGNB,
22008 IX86_BUILTIN_PSIGNW,
22009 IX86_BUILTIN_PSIGND,
22010 IX86_BUILTIN_PALIGNR,
22011 IX86_BUILTIN_PABSB,
22012 IX86_BUILTIN_PABSW,
22013 IX86_BUILTIN_PABSD,
22015 IX86_BUILTIN_PHADDW128,
22016 IX86_BUILTIN_PHADDD128,
22017 IX86_BUILTIN_PHADDSW128,
22018 IX86_BUILTIN_PHSUBW128,
22019 IX86_BUILTIN_PHSUBD128,
22020 IX86_BUILTIN_PHSUBSW128,
22021 IX86_BUILTIN_PMADDUBSW128,
22022 IX86_BUILTIN_PMULHRSW128,
22023 IX86_BUILTIN_PSHUFB128,
22024 IX86_BUILTIN_PSIGNB128,
22025 IX86_BUILTIN_PSIGNW128,
22026 IX86_BUILTIN_PSIGND128,
22027 IX86_BUILTIN_PALIGNR128,
22028 IX86_BUILTIN_PABSB128,
22029 IX86_BUILTIN_PABSW128,
22030 IX86_BUILTIN_PABSD128,
22032 /* AMDFAM10 - SSE4A New Instructions. */
22033 IX86_BUILTIN_MOVNTSD,
22034 IX86_BUILTIN_MOVNTSS,
22035 IX86_BUILTIN_EXTRQI,
22036 IX86_BUILTIN_EXTRQ,
22037 IX86_BUILTIN_INSERTQI,
22038 IX86_BUILTIN_INSERTQ,
22041 IX86_BUILTIN_BLENDPD,
22042 IX86_BUILTIN_BLENDPS,
22043 IX86_BUILTIN_BLENDVPD,
22044 IX86_BUILTIN_BLENDVPS,
22045 IX86_BUILTIN_PBLENDVB128,
22046 IX86_BUILTIN_PBLENDW128,
22051 IX86_BUILTIN_INSERTPS128,
22053 IX86_BUILTIN_MOVNTDQA,
22054 IX86_BUILTIN_MPSADBW128,
22055 IX86_BUILTIN_PACKUSDW128,
22056 IX86_BUILTIN_PCMPEQQ,
22057 IX86_BUILTIN_PHMINPOSUW128,
22059 IX86_BUILTIN_PMAXSB128,
22060 IX86_BUILTIN_PMAXSD128,
22061 IX86_BUILTIN_PMAXUD128,
22062 IX86_BUILTIN_PMAXUW128,
22064 IX86_BUILTIN_PMINSB128,
22065 IX86_BUILTIN_PMINSD128,
22066 IX86_BUILTIN_PMINUD128,
22067 IX86_BUILTIN_PMINUW128,
22069 IX86_BUILTIN_PMOVSXBW128,
22070 IX86_BUILTIN_PMOVSXBD128,
22071 IX86_BUILTIN_PMOVSXBQ128,
22072 IX86_BUILTIN_PMOVSXWD128,
22073 IX86_BUILTIN_PMOVSXWQ128,
22074 IX86_BUILTIN_PMOVSXDQ128,
22076 IX86_BUILTIN_PMOVZXBW128,
22077 IX86_BUILTIN_PMOVZXBD128,
22078 IX86_BUILTIN_PMOVZXBQ128,
22079 IX86_BUILTIN_PMOVZXWD128,
22080 IX86_BUILTIN_PMOVZXWQ128,
22081 IX86_BUILTIN_PMOVZXDQ128,
22083 IX86_BUILTIN_PMULDQ128,
22084 IX86_BUILTIN_PMULLD128,
22086 IX86_BUILTIN_ROUNDPD,
22087 IX86_BUILTIN_ROUNDPS,
22088 IX86_BUILTIN_ROUNDSD,
22089 IX86_BUILTIN_ROUNDSS,
22091 IX86_BUILTIN_PTESTZ,
22092 IX86_BUILTIN_PTESTC,
22093 IX86_BUILTIN_PTESTNZC,
22095 IX86_BUILTIN_VEC_INIT_V2SI,
22096 IX86_BUILTIN_VEC_INIT_V4HI,
22097 IX86_BUILTIN_VEC_INIT_V8QI,
22098 IX86_BUILTIN_VEC_EXT_V2DF,
22099 IX86_BUILTIN_VEC_EXT_V2DI,
22100 IX86_BUILTIN_VEC_EXT_V4SF,
22101 IX86_BUILTIN_VEC_EXT_V4SI,
22102 IX86_BUILTIN_VEC_EXT_V8HI,
22103 IX86_BUILTIN_VEC_EXT_V2SI,
22104 IX86_BUILTIN_VEC_EXT_V4HI,
22105 IX86_BUILTIN_VEC_EXT_V16QI,
22106 IX86_BUILTIN_VEC_SET_V2DI,
22107 IX86_BUILTIN_VEC_SET_V4SF,
22108 IX86_BUILTIN_VEC_SET_V4SI,
22109 IX86_BUILTIN_VEC_SET_V8HI,
22110 IX86_BUILTIN_VEC_SET_V4HI,
22111 IX86_BUILTIN_VEC_SET_V16QI,
22113 IX86_BUILTIN_VEC_PACK_SFIX,
22116 IX86_BUILTIN_CRC32QI,
22117 IX86_BUILTIN_CRC32HI,
22118 IX86_BUILTIN_CRC32SI,
22119 IX86_BUILTIN_CRC32DI,
22121 IX86_BUILTIN_PCMPESTRI128,
22122 IX86_BUILTIN_PCMPESTRM128,
22123 IX86_BUILTIN_PCMPESTRA128,
22124 IX86_BUILTIN_PCMPESTRC128,
22125 IX86_BUILTIN_PCMPESTRO128,
22126 IX86_BUILTIN_PCMPESTRS128,
22127 IX86_BUILTIN_PCMPESTRZ128,
22128 IX86_BUILTIN_PCMPISTRI128,
22129 IX86_BUILTIN_PCMPISTRM128,
22130 IX86_BUILTIN_PCMPISTRA128,
22131 IX86_BUILTIN_PCMPISTRC128,
22132 IX86_BUILTIN_PCMPISTRO128,
22133 IX86_BUILTIN_PCMPISTRS128,
22134 IX86_BUILTIN_PCMPISTRZ128,
22136 IX86_BUILTIN_PCMPGTQ,
22138 /* AES instructions */
22139 IX86_BUILTIN_AESENC128,
22140 IX86_BUILTIN_AESENCLAST128,
22141 IX86_BUILTIN_AESDEC128,
22142 IX86_BUILTIN_AESDECLAST128,
22143 IX86_BUILTIN_AESIMC128,
22144 IX86_BUILTIN_AESKEYGENASSIST128,
22146 /* PCLMUL instruction */
22147 IX86_BUILTIN_PCLMULQDQ128,
22150 IX86_BUILTIN_ADDPD256,
22151 IX86_BUILTIN_ADDPS256,
22152 IX86_BUILTIN_ADDSUBPD256,
22153 IX86_BUILTIN_ADDSUBPS256,
22154 IX86_BUILTIN_ANDPD256,
22155 IX86_BUILTIN_ANDPS256,
22156 IX86_BUILTIN_ANDNPD256,
22157 IX86_BUILTIN_ANDNPS256,
22158 IX86_BUILTIN_BLENDPD256,
22159 IX86_BUILTIN_BLENDPS256,
22160 IX86_BUILTIN_BLENDVPD256,
22161 IX86_BUILTIN_BLENDVPS256,
22162 IX86_BUILTIN_DIVPD256,
22163 IX86_BUILTIN_DIVPS256,
22164 IX86_BUILTIN_DPPS256,
22165 IX86_BUILTIN_HADDPD256,
22166 IX86_BUILTIN_HADDPS256,
22167 IX86_BUILTIN_HSUBPD256,
22168 IX86_BUILTIN_HSUBPS256,
22169 IX86_BUILTIN_MAXPD256,
22170 IX86_BUILTIN_MAXPS256,
22171 IX86_BUILTIN_MINPD256,
22172 IX86_BUILTIN_MINPS256,
22173 IX86_BUILTIN_MULPD256,
22174 IX86_BUILTIN_MULPS256,
22175 IX86_BUILTIN_ORPD256,
22176 IX86_BUILTIN_ORPS256,
22177 IX86_BUILTIN_SHUFPD256,
22178 IX86_BUILTIN_SHUFPS256,
22179 IX86_BUILTIN_SUBPD256,
22180 IX86_BUILTIN_SUBPS256,
22181 IX86_BUILTIN_XORPD256,
22182 IX86_BUILTIN_XORPS256,
22183 IX86_BUILTIN_CMPSD,
22184 IX86_BUILTIN_CMPSS,
22185 IX86_BUILTIN_CMPPD,
22186 IX86_BUILTIN_CMPPS,
22187 IX86_BUILTIN_CMPPD256,
22188 IX86_BUILTIN_CMPPS256,
22189 IX86_BUILTIN_CVTDQ2PD256,
22190 IX86_BUILTIN_CVTDQ2PS256,
22191 IX86_BUILTIN_CVTPD2PS256,
22192 IX86_BUILTIN_CVTPS2DQ256,
22193 IX86_BUILTIN_CVTPS2PD256,
22194 IX86_BUILTIN_CVTTPD2DQ256,
22195 IX86_BUILTIN_CVTPD2DQ256,
22196 IX86_BUILTIN_CVTTPS2DQ256,
22197 IX86_BUILTIN_EXTRACTF128PD256,
22198 IX86_BUILTIN_EXTRACTF128PS256,
22199 IX86_BUILTIN_EXTRACTF128SI256,
22200 IX86_BUILTIN_VZEROALL,
22201 IX86_BUILTIN_VZEROUPPER,
22202 IX86_BUILTIN_VPERMILVARPD,
22203 IX86_BUILTIN_VPERMILVARPS,
22204 IX86_BUILTIN_VPERMILVARPD256,
22205 IX86_BUILTIN_VPERMILVARPS256,
22206 IX86_BUILTIN_VPERMILPD,
22207 IX86_BUILTIN_VPERMILPS,
22208 IX86_BUILTIN_VPERMILPD256,
22209 IX86_BUILTIN_VPERMILPS256,
22210 IX86_BUILTIN_VPERMIL2PD,
22211 IX86_BUILTIN_VPERMIL2PS,
22212 IX86_BUILTIN_VPERMIL2PD256,
22213 IX86_BUILTIN_VPERMIL2PS256,
22214 IX86_BUILTIN_VPERM2F128PD256,
22215 IX86_BUILTIN_VPERM2F128PS256,
22216 IX86_BUILTIN_VPERM2F128SI256,
22217 IX86_BUILTIN_VBROADCASTSS,
22218 IX86_BUILTIN_VBROADCASTSD256,
22219 IX86_BUILTIN_VBROADCASTSS256,
22220 IX86_BUILTIN_VBROADCASTPD256,
22221 IX86_BUILTIN_VBROADCASTPS256,
22222 IX86_BUILTIN_VINSERTF128PD256,
22223 IX86_BUILTIN_VINSERTF128PS256,
22224 IX86_BUILTIN_VINSERTF128SI256,
22225 IX86_BUILTIN_LOADUPD256,
22226 IX86_BUILTIN_LOADUPS256,
22227 IX86_BUILTIN_STOREUPD256,
22228 IX86_BUILTIN_STOREUPS256,
22229 IX86_BUILTIN_LDDQU256,
22230 IX86_BUILTIN_MOVNTDQ256,
22231 IX86_BUILTIN_MOVNTPD256,
22232 IX86_BUILTIN_MOVNTPS256,
22233 IX86_BUILTIN_LOADDQU256,
22234 IX86_BUILTIN_STOREDQU256,
22235 IX86_BUILTIN_MASKLOADPD,
22236 IX86_BUILTIN_MASKLOADPS,
22237 IX86_BUILTIN_MASKSTOREPD,
22238 IX86_BUILTIN_MASKSTOREPS,
22239 IX86_BUILTIN_MASKLOADPD256,
22240 IX86_BUILTIN_MASKLOADPS256,
22241 IX86_BUILTIN_MASKSTOREPD256,
22242 IX86_BUILTIN_MASKSTOREPS256,
22243 IX86_BUILTIN_MOVSHDUP256,
22244 IX86_BUILTIN_MOVSLDUP256,
22245 IX86_BUILTIN_MOVDDUP256,
22247 IX86_BUILTIN_SQRTPD256,
22248 IX86_BUILTIN_SQRTPS256,
22249 IX86_BUILTIN_SQRTPS_NR256,
22250 IX86_BUILTIN_RSQRTPS256,
22251 IX86_BUILTIN_RSQRTPS_NR256,
22253 IX86_BUILTIN_RCPPS256,
22255 IX86_BUILTIN_ROUNDPD256,
22256 IX86_BUILTIN_ROUNDPS256,
22258 IX86_BUILTIN_UNPCKHPD256,
22259 IX86_BUILTIN_UNPCKLPD256,
22260 IX86_BUILTIN_UNPCKHPS256,
22261 IX86_BUILTIN_UNPCKLPS256,
22263 IX86_BUILTIN_SI256_SI,
22264 IX86_BUILTIN_PS256_PS,
22265 IX86_BUILTIN_PD256_PD,
22266 IX86_BUILTIN_SI_SI256,
22267 IX86_BUILTIN_PS_PS256,
22268 IX86_BUILTIN_PD_PD256,
22270 IX86_BUILTIN_VTESTZPD,
22271 IX86_BUILTIN_VTESTCPD,
22272 IX86_BUILTIN_VTESTNZCPD,
22273 IX86_BUILTIN_VTESTZPS,
22274 IX86_BUILTIN_VTESTCPS,
22275 IX86_BUILTIN_VTESTNZCPS,
22276 IX86_BUILTIN_VTESTZPD256,
22277 IX86_BUILTIN_VTESTCPD256,
22278 IX86_BUILTIN_VTESTNZCPD256,
22279 IX86_BUILTIN_VTESTZPS256,
22280 IX86_BUILTIN_VTESTCPS256,
22281 IX86_BUILTIN_VTESTNZCPS256,
22282 IX86_BUILTIN_PTESTZ256,
22283 IX86_BUILTIN_PTESTC256,
22284 IX86_BUILTIN_PTESTNZC256,
22286 IX86_BUILTIN_MOVMSKPD256,
22287 IX86_BUILTIN_MOVMSKPS256,
22289 /* TFmode support builtins. */
22291 IX86_BUILTIN_HUGE_VALQ,
22292 IX86_BUILTIN_FABSQ,
22293 IX86_BUILTIN_COPYSIGNQ,
22295 /* Vectorizer support builtins. */
22296 IX86_BUILTIN_CPYSGNPS,
22297 IX86_BUILTIN_CPYSGNPD,
22299 IX86_BUILTIN_CVTUDQ2PS,
22301 IX86_BUILTIN_VEC_PERM_V2DF,
22302 IX86_BUILTIN_VEC_PERM_V4SF,
22303 IX86_BUILTIN_VEC_PERM_V2DI,
22304 IX86_BUILTIN_VEC_PERM_V4SI,
22305 IX86_BUILTIN_VEC_PERM_V8HI,
22306 IX86_BUILTIN_VEC_PERM_V16QI,
22307 IX86_BUILTIN_VEC_PERM_V2DI_U,
22308 IX86_BUILTIN_VEC_PERM_V4SI_U,
22309 IX86_BUILTIN_VEC_PERM_V8HI_U,
22310 IX86_BUILTIN_VEC_PERM_V16QI_U,
22311 IX86_BUILTIN_VEC_PERM_V4DF,
22312 IX86_BUILTIN_VEC_PERM_V8SF,
22314 /* FMA4 and XOP instructions. */
22315 IX86_BUILTIN_VFMADDSS,
22316 IX86_BUILTIN_VFMADDSD,
22317 IX86_BUILTIN_VFMADDPS,
22318 IX86_BUILTIN_VFMADDPD,
22319 IX86_BUILTIN_VFMSUBSS,
22320 IX86_BUILTIN_VFMSUBSD,
22321 IX86_BUILTIN_VFMSUBPS,
22322 IX86_BUILTIN_VFMSUBPD,
22323 IX86_BUILTIN_VFMADDSUBPS,
22324 IX86_BUILTIN_VFMADDSUBPD,
22325 IX86_BUILTIN_VFMSUBADDPS,
22326 IX86_BUILTIN_VFMSUBADDPD,
22327 IX86_BUILTIN_VFNMADDSS,
22328 IX86_BUILTIN_VFNMADDSD,
22329 IX86_BUILTIN_VFNMADDPS,
22330 IX86_BUILTIN_VFNMADDPD,
22331 IX86_BUILTIN_VFNMSUBSS,
22332 IX86_BUILTIN_VFNMSUBSD,
22333 IX86_BUILTIN_VFNMSUBPS,
22334 IX86_BUILTIN_VFNMSUBPD,
22335 IX86_BUILTIN_VFMADDPS256,
22336 IX86_BUILTIN_VFMADDPD256,
22337 IX86_BUILTIN_VFMSUBPS256,
22338 IX86_BUILTIN_VFMSUBPD256,
22339 IX86_BUILTIN_VFMADDSUBPS256,
22340 IX86_BUILTIN_VFMADDSUBPD256,
22341 IX86_BUILTIN_VFMSUBADDPS256,
22342 IX86_BUILTIN_VFMSUBADDPD256,
22343 IX86_BUILTIN_VFNMADDPS256,
22344 IX86_BUILTIN_VFNMADDPD256,
22345 IX86_BUILTIN_VFNMSUBPS256,
22346 IX86_BUILTIN_VFNMSUBPD256,
22348 IX86_BUILTIN_VPCMOV,
22349 IX86_BUILTIN_VPCMOV_V2DI,
22350 IX86_BUILTIN_VPCMOV_V4SI,
22351 IX86_BUILTIN_VPCMOV_V8HI,
22352 IX86_BUILTIN_VPCMOV_V16QI,
22353 IX86_BUILTIN_VPCMOV_V4SF,
22354 IX86_BUILTIN_VPCMOV_V2DF,
22355 IX86_BUILTIN_VPCMOV256,
22356 IX86_BUILTIN_VPCMOV_V4DI256,
22357 IX86_BUILTIN_VPCMOV_V8SI256,
22358 IX86_BUILTIN_VPCMOV_V16HI256,
22359 IX86_BUILTIN_VPCMOV_V32QI256,
22360 IX86_BUILTIN_VPCMOV_V8SF256,
22361 IX86_BUILTIN_VPCMOV_V4DF256,
22363 IX86_BUILTIN_VPPERM,
22365 IX86_BUILTIN_VPMACSSWW,
22366 IX86_BUILTIN_VPMACSWW,
22367 IX86_BUILTIN_VPMACSSWD,
22368 IX86_BUILTIN_VPMACSWD,
22369 IX86_BUILTIN_VPMACSSDD,
22370 IX86_BUILTIN_VPMACSDD,
22371 IX86_BUILTIN_VPMACSSDQL,
22372 IX86_BUILTIN_VPMACSSDQH,
22373 IX86_BUILTIN_VPMACSDQL,
22374 IX86_BUILTIN_VPMACSDQH,
22375 IX86_BUILTIN_VPMADCSSWD,
22376 IX86_BUILTIN_VPMADCSWD,
22378 IX86_BUILTIN_VPHADDBW,
22379 IX86_BUILTIN_VPHADDBD,
22380 IX86_BUILTIN_VPHADDBQ,
22381 IX86_BUILTIN_VPHADDWD,
22382 IX86_BUILTIN_VPHADDWQ,
22383 IX86_BUILTIN_VPHADDDQ,
22384 IX86_BUILTIN_VPHADDUBW,
22385 IX86_BUILTIN_VPHADDUBD,
22386 IX86_BUILTIN_VPHADDUBQ,
22387 IX86_BUILTIN_VPHADDUWD,
22388 IX86_BUILTIN_VPHADDUWQ,
22389 IX86_BUILTIN_VPHADDUDQ,
22390 IX86_BUILTIN_VPHSUBBW,
22391 IX86_BUILTIN_VPHSUBWD,
22392 IX86_BUILTIN_VPHSUBDQ,
22394 IX86_BUILTIN_VPROTB,
22395 IX86_BUILTIN_VPROTW,
22396 IX86_BUILTIN_VPROTD,
22397 IX86_BUILTIN_VPROTQ,
22398 IX86_BUILTIN_VPROTB_IMM,
22399 IX86_BUILTIN_VPROTW_IMM,
22400 IX86_BUILTIN_VPROTD_IMM,
22401 IX86_BUILTIN_VPROTQ_IMM,
22403 IX86_BUILTIN_VPSHLB,
22404 IX86_BUILTIN_VPSHLW,
22405 IX86_BUILTIN_VPSHLD,
22406 IX86_BUILTIN_VPSHLQ,
22407 IX86_BUILTIN_VPSHAB,
22408 IX86_BUILTIN_VPSHAW,
22409 IX86_BUILTIN_VPSHAD,
22410 IX86_BUILTIN_VPSHAQ,
22412 IX86_BUILTIN_VFRCZSS,
22413 IX86_BUILTIN_VFRCZSD,
22414 IX86_BUILTIN_VFRCZPS,
22415 IX86_BUILTIN_VFRCZPD,
22416 IX86_BUILTIN_VFRCZPS256,
22417 IX86_BUILTIN_VFRCZPD256,
22419 IX86_BUILTIN_VPCOMEQUB,
22420 IX86_BUILTIN_VPCOMNEUB,
22421 IX86_BUILTIN_VPCOMLTUB,
22422 IX86_BUILTIN_VPCOMLEUB,
22423 IX86_BUILTIN_VPCOMGTUB,
22424 IX86_BUILTIN_VPCOMGEUB,
22425 IX86_BUILTIN_VPCOMFALSEUB,
22426 IX86_BUILTIN_VPCOMTRUEUB,
22428 IX86_BUILTIN_VPCOMEQUW,
22429 IX86_BUILTIN_VPCOMNEUW,
22430 IX86_BUILTIN_VPCOMLTUW,
22431 IX86_BUILTIN_VPCOMLEUW,
22432 IX86_BUILTIN_VPCOMGTUW,
22433 IX86_BUILTIN_VPCOMGEUW,
22434 IX86_BUILTIN_VPCOMFALSEUW,
22435 IX86_BUILTIN_VPCOMTRUEUW,
22437 IX86_BUILTIN_VPCOMEQUD,
22438 IX86_BUILTIN_VPCOMNEUD,
22439 IX86_BUILTIN_VPCOMLTUD,
22440 IX86_BUILTIN_VPCOMLEUD,
22441 IX86_BUILTIN_VPCOMGTUD,
22442 IX86_BUILTIN_VPCOMGEUD,
22443 IX86_BUILTIN_VPCOMFALSEUD,
22444 IX86_BUILTIN_VPCOMTRUEUD,
22446 IX86_BUILTIN_VPCOMEQUQ,
22447 IX86_BUILTIN_VPCOMNEUQ,
22448 IX86_BUILTIN_VPCOMLTUQ,
22449 IX86_BUILTIN_VPCOMLEUQ,
22450 IX86_BUILTIN_VPCOMGTUQ,
22451 IX86_BUILTIN_VPCOMGEUQ,
22452 IX86_BUILTIN_VPCOMFALSEUQ,
22453 IX86_BUILTIN_VPCOMTRUEUQ,
22455 IX86_BUILTIN_VPCOMEQB,
22456 IX86_BUILTIN_VPCOMNEB,
22457 IX86_BUILTIN_VPCOMLTB,
22458 IX86_BUILTIN_VPCOMLEB,
22459 IX86_BUILTIN_VPCOMGTB,
22460 IX86_BUILTIN_VPCOMGEB,
22461 IX86_BUILTIN_VPCOMFALSEB,
22462 IX86_BUILTIN_VPCOMTRUEB,
22464 IX86_BUILTIN_VPCOMEQW,
22465 IX86_BUILTIN_VPCOMNEW,
22466 IX86_BUILTIN_VPCOMLTW,
22467 IX86_BUILTIN_VPCOMLEW,
22468 IX86_BUILTIN_VPCOMGTW,
22469 IX86_BUILTIN_VPCOMGEW,
22470 IX86_BUILTIN_VPCOMFALSEW,
22471 IX86_BUILTIN_VPCOMTRUEW,
22473 IX86_BUILTIN_VPCOMEQD,
22474 IX86_BUILTIN_VPCOMNED,
22475 IX86_BUILTIN_VPCOMLTD,
22476 IX86_BUILTIN_VPCOMLED,
22477 IX86_BUILTIN_VPCOMGTD,
22478 IX86_BUILTIN_VPCOMGED,
22479 IX86_BUILTIN_VPCOMFALSED,
22480 IX86_BUILTIN_VPCOMTRUED,
22482 IX86_BUILTIN_VPCOMEQQ,
22483 IX86_BUILTIN_VPCOMNEQ,
22484 IX86_BUILTIN_VPCOMLTQ,
22485 IX86_BUILTIN_VPCOMLEQ,
22486 IX86_BUILTIN_VPCOMGTQ,
22487 IX86_BUILTIN_VPCOMGEQ,
22488 IX86_BUILTIN_VPCOMFALSEQ,
22489 IX86_BUILTIN_VPCOMTRUEQ,
22491 /* LWP instructions. */
22492 IX86_BUILTIN_LLWPCB,
22493 IX86_BUILTIN_SLWPCB,
22494 IX86_BUILTIN_LWPVAL32,
22495 IX86_BUILTIN_LWPVAL64,
22496 IX86_BUILTIN_LWPINS32,
22497 IX86_BUILTIN_LWPINS64,
22501 /* FSGSBASE instructions. */
22502 IX86_BUILTIN_RDFSBASE32,
22503 IX86_BUILTIN_RDFSBASE64,
22504 IX86_BUILTIN_RDGSBASE32,
22505 IX86_BUILTIN_RDGSBASE64,
22506 IX86_BUILTIN_WRFSBASE32,
22507 IX86_BUILTIN_WRFSBASE64,
22508 IX86_BUILTIN_WRGSBASE32,
22509 IX86_BUILTIN_WRGSBASE64,
22511 /* RDRND instructions. */
22512 IX86_BUILTIN_RDRAND16,
22513 IX86_BUILTIN_RDRAND32,
22514 IX86_BUILTIN_RDRAND64,
22516 /* F16C instructions. */
22517 IX86_BUILTIN_CVTPH2PS,
22518 IX86_BUILTIN_CVTPH2PS256,
22519 IX86_BUILTIN_CVTPS2PH,
22520 IX86_BUILTIN_CVTPS2PH256,
22525 /* Table for the ix86 builtin decls. */
22526 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22528 /* Table of all of the builtin functions that are possible with different ISA's
22529 but are waiting to be built until a function is declared to use that
22531 struct builtin_isa {
22532 const char *name; /* function name */
22533 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22534 int isa; /* isa_flags this builtin is defined for */
22535 bool const_p; /* true if the declaration is constant */
22536 bool set_and_not_built_p;
22539 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22542 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22543 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22544 function decl in the ix86_builtins array. Returns the function decl or
22545 NULL_TREE, if the builtin was not added.
22547 If the front end has a special hook for builtin functions, delay adding
22548 builtin functions that aren't in the current ISA until the ISA is changed
22549 with function specific optimization. Doing so, can save about 300K for the
22550 default compiler. When the builtin is expanded, check at that time whether
22553 If the front end doesn't have a special hook, record all builtins, even if
22554 it isn't an instruction set in the current ISA in case the user uses
22555 function specific options for a different ISA, so that we don't get scope
22556 errors if a builtin is added in the middle of a function scope. */
22559 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22560 enum ix86_builtins code)
22562 tree decl = NULL_TREE;
22564 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22566 ix86_builtins_isa[(int) code].isa = mask;
22568 mask &= ~OPTION_MASK_ISA_64BIT;
22570 || (mask & ix86_isa_flags) != 0
22571 || (lang_hooks.builtin_function
22572 == lang_hooks.builtin_function_ext_scope))
22575 tree type = ix86_get_builtin_func_type (tcode);
22576 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22578 ix86_builtins[(int) code] = decl;
22579 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22583 ix86_builtins[(int) code] = NULL_TREE;
22584 ix86_builtins_isa[(int) code].tcode = tcode;
22585 ix86_builtins_isa[(int) code].name = name;
22586 ix86_builtins_isa[(int) code].const_p = false;
22587 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22594 /* Like def_builtin, but also marks the function decl "const". */
22597 def_builtin_const (int mask, const char *name,
22598 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22600 tree decl = def_builtin (mask, name, tcode, code);
22602 TREE_READONLY (decl) = 1;
22604 ix86_builtins_isa[(int) code].const_p = true;
22609 /* Add any new builtin functions for a given ISA that may not have been
22610 declared. This saves a bit of space compared to adding all of the
22611 declarations to the tree, even if we didn't use them. */
22614 ix86_add_new_builtins (int isa)
22618 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22620 if ((ix86_builtins_isa[i].isa & isa) != 0
22621 && ix86_builtins_isa[i].set_and_not_built_p)
22625 /* Don't define the builtin again. */
22626 ix86_builtins_isa[i].set_and_not_built_p = false;
22628 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22629 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22630 type, i, BUILT_IN_MD, NULL,
22633 ix86_builtins[i] = decl;
22634 if (ix86_builtins_isa[i].const_p)
22635 TREE_READONLY (decl) = 1;
22640 /* Bits for builtin_description.flag. */
22642 /* Set when we don't support the comparison natively, and should
22643 swap_comparison in order to support it. */
22644 #define BUILTIN_DESC_SWAP_OPERANDS 1
22646 struct builtin_description
22648 const unsigned int mask;
22649 const enum insn_code icode;
22650 const char *const name;
22651 const enum ix86_builtins code;
22652 const enum rtx_code comparison;
22656 static const struct builtin_description bdesc_comi[] =
22658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22660 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22661 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22662 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22665 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22666 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22667 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22668 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22669 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22684 static const struct builtin_description bdesc_pcmpestr[] =
22687 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22688 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22689 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22690 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22691 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22692 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22693 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22696 static const struct builtin_description bdesc_pcmpistr[] =
22699 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22700 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22701 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22702 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22703 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22704 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22705 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22708 /* Special builtins with variable number of arguments. */
22709 static const struct builtin_description bdesc_special_args[] =
22711 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22712 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22715 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22718 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22723 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22725 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22730 /* SSE or 3DNow!A */
22731 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22732 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22749 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22755 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22756 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22762 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22763 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22764 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22789 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22790 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22791 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22792 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22793 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22794 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22797 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22798 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22799 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22800 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22801 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22802 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22803 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22804 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22807 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22808 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22809 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22812 /* Builtins with variable number of arguments. */
22813 static const struct builtin_description bdesc_args[] =
22815 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22816 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22817 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22818 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22819 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22820 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22821 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22828 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22829 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22831 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22837 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22838 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22840 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22841 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22843 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22850 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22851 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22852 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22855 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22856 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22857 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22858 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22859 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22863 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22866 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22872 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22873 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22878 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22879 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22882 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22883 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22884 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22885 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22888 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22889 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22890 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22891 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22893 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22894 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22895 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22896 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22897 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22898 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22899 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22900 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22901 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22902 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22903 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22904 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22905 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22906 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22907 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22910 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22911 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22912 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22913 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22914 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22915 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22918 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22919 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22920 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22922 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22926 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22929 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22933 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22934 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22935 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22944 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22945 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22946 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22947 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22948 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22949 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22950 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22951 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22952 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22953 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22954 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22955 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22956 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22957 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22958 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22959 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22960 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22961 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22962 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22963 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22965 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22966 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22967 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22968 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22970 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22971 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22972 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22973 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22975 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22977 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22980 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22981 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22985 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22987 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22989 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22990 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22993 /* SSE MMX or 3Dnow!A */
22994 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22995 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22996 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22998 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22999 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23000 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23001 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23003 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23004 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23006 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23011 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23012 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23013 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23014 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23015 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23017 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23018 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23019 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23020 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23032 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23041 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23042 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23045 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23048 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23049 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23050 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23051 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23052 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23053 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23054 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23055 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23057 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23058 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23059 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23060 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23061 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23062 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23063 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23064 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23065 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23066 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23067 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23068 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23069 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23070 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23079 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23083 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23085 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23086 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23088 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23090 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23091 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23092 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23094 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23096 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23097 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23098 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23099 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23100 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23101 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23102 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23103 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23109 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23112 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23114 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23115 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23117 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23119 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23120 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23126 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23127 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23132 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23133 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23134 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23137 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23138 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23139 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23140 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23141 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23142 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23143 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23144 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23150 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23159 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23164 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23165 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23166 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23167 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23168 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23169 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23171 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23172 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23173 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23174 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23175 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23176 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23177 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23179 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23180 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23181 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23182 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23184 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23185 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23190 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23191 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23196 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23197 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23200 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23201 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23203 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23204 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23205 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23206 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23207 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23208 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23211 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23212 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23213 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23214 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23215 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23216 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23218 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23219 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23220 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23221 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23222 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23223 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23224 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23225 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23226 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23227 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23228 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23229 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23230 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23231 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23232 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23233 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23234 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23235 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23236 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23237 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23238 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23239 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23240 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23241 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23244 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23245 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23248 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23249 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23250 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23251 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23252 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23253 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23254 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23255 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23256 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23257 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23259 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23260 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23261 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23262 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23263 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23264 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23265 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23266 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23268 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23269 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23270 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23271 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23273 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23274 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23275 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23276 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23287 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23288 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23289 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23290 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23292 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23293 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23294 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23297 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23298 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23299 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23300 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23301 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23304 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23305 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23306 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23307 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23310 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23311 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23313 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23314 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23315 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23316 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23319 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23322 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23323 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23326 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23327 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23329 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23330 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23331 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23332 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23333 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23334 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23335 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23336 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23337 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23338 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23339 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23340 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23341 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23342 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23343 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23344 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23345 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23346 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23347 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23349 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23350 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23351 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23352 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23354 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23355 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23356 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23357 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23358 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23359 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23360 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23361 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23362 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23363 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23364 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23365 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23366 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23367 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23368 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23369 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23370 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23371 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23372 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23373 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23374 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23375 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23376 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23377 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23378 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23379 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23380 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23381 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23382 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23383 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23384 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23385 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23386 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23387 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23389 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23390 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23391 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23393 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23394 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23395 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23396 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23397 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23399 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23401 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23404 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23405 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23406 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23407 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23410 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23412 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23413 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23414 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23435 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23438 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23439 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23440 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23441 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23444 /* FMA4 and XOP. */
23445 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23446 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23447 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23448 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23449 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23450 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23451 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23452 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23453 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23454 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23455 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23456 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23457 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23458 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23459 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23460 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23461 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23462 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23463 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23464 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23465 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23466 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23467 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23468 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23469 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23470 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23471 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23472 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23473 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23474 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23475 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23476 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23477 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23478 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23479 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23480 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23481 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23482 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23483 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23484 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23485 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23486 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23487 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23488 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23489 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23490 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23491 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23492 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23493 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23494 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23495 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23496 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23498 static const struct builtin_description bdesc_multi_arg[] =
23500 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23501 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23502 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23503 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23504 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23505 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23506 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23507 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23509 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23510 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23511 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23512 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23513 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23514 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23515 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23516 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23518 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23519 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23520 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23521 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23523 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23524 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23525 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23526 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23528 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23529 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23530 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23531 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23533 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23534 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23535 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23536 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23698 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23699 in the current target ISA to allow the user to compile particular modules
23700 with different target specific options that differ from the command line
23703 ix86_init_mmx_sse_builtins (void)
23705 const struct builtin_description * d;
23706 enum ix86_builtin_func_type ftype;
23709 /* Add all special builtins with variable number of operands. */
23710 for (i = 0, d = bdesc_special_args;
23711 i < ARRAY_SIZE (bdesc_special_args);
23717 ftype = (enum ix86_builtin_func_type) d->flag;
23718 def_builtin (d->mask, d->name, ftype, d->code);
23721 /* Add all builtins with variable number of operands. */
23722 for (i = 0, d = bdesc_args;
23723 i < ARRAY_SIZE (bdesc_args);
23729 ftype = (enum ix86_builtin_func_type) d->flag;
23730 def_builtin_const (d->mask, d->name, ftype, d->code);
23733 /* pcmpestr[im] insns. */
23734 for (i = 0, d = bdesc_pcmpestr;
23735 i < ARRAY_SIZE (bdesc_pcmpestr);
23738 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23739 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23741 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23742 def_builtin_const (d->mask, d->name, ftype, d->code);
23745 /* pcmpistr[im] insns. */
23746 for (i = 0, d = bdesc_pcmpistr;
23747 i < ARRAY_SIZE (bdesc_pcmpistr);
23750 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23751 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23753 ftype = INT_FTYPE_V16QI_V16QI_INT;
23754 def_builtin_const (d->mask, d->name, ftype, d->code);
23757 /* comi/ucomi insns. */
23758 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23760 if (d->mask == OPTION_MASK_ISA_SSE2)
23761 ftype = INT_FTYPE_V2DF_V2DF;
23763 ftype = INT_FTYPE_V4SF_V4SF;
23764 def_builtin_const (d->mask, d->name, ftype, d->code);
23768 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23769 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23770 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23771 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23773 /* SSE or 3DNow!A */
23774 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23775 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23776 IX86_BUILTIN_MASKMOVQ);
23779 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23780 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23782 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23783 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23784 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23785 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23788 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23789 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23790 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23791 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23794 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23795 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23796 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23797 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23798 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23799 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23800 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23801 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23802 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23803 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23804 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23805 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23808 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23809 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23811 /* MMX access to the vec_init patterns. */
23812 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23813 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23815 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23816 V4HI_FTYPE_HI_HI_HI_HI,
23817 IX86_BUILTIN_VEC_INIT_V4HI);
23819 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23820 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23821 IX86_BUILTIN_VEC_INIT_V8QI);
23823 /* Access to the vec_extract patterns. */
23824 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23825 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23826 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23827 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23828 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23829 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23830 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23831 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23832 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23833 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23835 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23836 "__builtin_ia32_vec_ext_v4hi",
23837 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23839 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23840 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23842 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23843 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23845 /* Access to the vec_set patterns. */
23846 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23847 "__builtin_ia32_vec_set_v2di",
23848 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23850 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23851 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23853 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23854 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23856 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23857 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23859 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23860 "__builtin_ia32_vec_set_v4hi",
23861 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23863 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23864 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23866 /* Add FMA4 multi-arg argument instructions */
23867 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23872 ftype = (enum ix86_builtin_func_type) d->flag;
23873 def_builtin_const (d->mask, d->name, ftype, d->code);
23877 /* Internal method for ix86_init_builtins. */
23880 ix86_init_builtins_va_builtins_abi (void)
23882 tree ms_va_ref, sysv_va_ref;
23883 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23884 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23885 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23886 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23890 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23891 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23892 ms_va_ref = build_reference_type (ms_va_list_type_node);
23894 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23897 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23898 fnvoid_va_start_ms =
23899 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23900 fnvoid_va_end_sysv =
23901 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23902 fnvoid_va_start_sysv =
23903 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23905 fnvoid_va_copy_ms =
23906 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23908 fnvoid_va_copy_sysv =
23909 build_function_type_list (void_type_node, sysv_va_ref,
23910 sysv_va_ref, NULL_TREE);
23912 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23913 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23914 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23915 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23916 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23917 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23918 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23919 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23920 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23921 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23922 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23923 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23927 ix86_init_builtin_types (void)
23929 tree float128_type_node, float80_type_node;
23931 /* The __float80 type. */
23932 float80_type_node = long_double_type_node;
23933 if (TYPE_MODE (float80_type_node) != XFmode)
23935 /* The __float80 type. */
23936 float80_type_node = make_node (REAL_TYPE);
23938 TYPE_PRECISION (float80_type_node) = 80;
23939 layout_type (float80_type_node);
23941 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23943 /* The __float128 type. */
23944 float128_type_node = make_node (REAL_TYPE);
23945 TYPE_PRECISION (float128_type_node) = 128;
23946 layout_type (float128_type_node);
23947 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23949 /* This macro is built by i386-builtin-types.awk. */
23950 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23954 ix86_init_builtins (void)
23958 ix86_init_builtin_types ();
23960 /* TFmode support builtins. */
23961 def_builtin_const (0, "__builtin_infq",
23962 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23963 def_builtin_const (0, "__builtin_huge_valq",
23964 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23966 /* We will expand them to normal call if SSE2 isn't available since
23967 they are used by libgcc. */
23968 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23969 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23970 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23971 TREE_READONLY (t) = 1;
23972 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23974 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23975 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23976 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23977 TREE_READONLY (t) = 1;
23978 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23980 ix86_init_mmx_sse_builtins ();
23983 ix86_init_builtins_va_builtins_abi ();
23986 /* Return the ix86 builtin for CODE. */
23989 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23991 if (code >= IX86_BUILTIN_MAX)
23992 return error_mark_node;
23994 return ix86_builtins[code];
23997 /* Errors in the source file can cause expand_expr to return const0_rtx
23998 where we expect a vector. To avoid crashing, use one of the vector
23999 clear instructions. */
24001 safe_vector_operand (rtx x, enum machine_mode mode)
24003 if (x == const0_rtx)
24004 x = CONST0_RTX (mode);
24008 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24011 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24014 tree arg0 = CALL_EXPR_ARG (exp, 0);
24015 tree arg1 = CALL_EXPR_ARG (exp, 1);
24016 rtx op0 = expand_normal (arg0);
24017 rtx op1 = expand_normal (arg1);
24018 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24019 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24020 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24022 if (VECTOR_MODE_P (mode0))
24023 op0 = safe_vector_operand (op0, mode0);
24024 if (VECTOR_MODE_P (mode1))
24025 op1 = safe_vector_operand (op1, mode1);
24027 if (optimize || !target
24028 || GET_MODE (target) != tmode
24029 || !insn_data[icode].operand[0].predicate (target, tmode))
24030 target = gen_reg_rtx (tmode);
24032 if (GET_MODE (op1) == SImode && mode1 == TImode)
24034 rtx x = gen_reg_rtx (V4SImode);
24035 emit_insn (gen_sse2_loadd (x, op1));
24036 op1 = gen_lowpart (TImode, x);
24039 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24040 op0 = copy_to_mode_reg (mode0, op0);
24041 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24042 op1 = copy_to_mode_reg (mode1, op1);
24044 pat = GEN_FCN (icode) (target, op0, op1);
24053 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24056 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24057 enum ix86_builtin_func_type m_type,
24058 enum rtx_code sub_code)
24063 bool comparison_p = false;
24065 bool last_arg_constant = false;
24066 int num_memory = 0;
24069 enum machine_mode mode;
24072 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24076 case MULTI_ARG_4_DF2_DI_I:
24077 case MULTI_ARG_4_DF2_DI_I1:
24078 case MULTI_ARG_4_SF2_SI_I:
24079 case MULTI_ARG_4_SF2_SI_I1:
24081 last_arg_constant = true;
24084 case MULTI_ARG_3_SF:
24085 case MULTI_ARG_3_DF:
24086 case MULTI_ARG_3_SF2:
24087 case MULTI_ARG_3_DF2:
24088 case MULTI_ARG_3_DI:
24089 case MULTI_ARG_3_SI:
24090 case MULTI_ARG_3_SI_DI:
24091 case MULTI_ARG_3_HI:
24092 case MULTI_ARG_3_HI_SI:
24093 case MULTI_ARG_3_QI:
24094 case MULTI_ARG_3_DI2:
24095 case MULTI_ARG_3_SI2:
24096 case MULTI_ARG_3_HI2:
24097 case MULTI_ARG_3_QI2:
24101 case MULTI_ARG_2_SF:
24102 case MULTI_ARG_2_DF:
24103 case MULTI_ARG_2_DI:
24104 case MULTI_ARG_2_SI:
24105 case MULTI_ARG_2_HI:
24106 case MULTI_ARG_2_QI:
24110 case MULTI_ARG_2_DI_IMM:
24111 case MULTI_ARG_2_SI_IMM:
24112 case MULTI_ARG_2_HI_IMM:
24113 case MULTI_ARG_2_QI_IMM:
24115 last_arg_constant = true;
24118 case MULTI_ARG_1_SF:
24119 case MULTI_ARG_1_DF:
24120 case MULTI_ARG_1_SF2:
24121 case MULTI_ARG_1_DF2:
24122 case MULTI_ARG_1_DI:
24123 case MULTI_ARG_1_SI:
24124 case MULTI_ARG_1_HI:
24125 case MULTI_ARG_1_QI:
24126 case MULTI_ARG_1_SI_DI:
24127 case MULTI_ARG_1_HI_DI:
24128 case MULTI_ARG_1_HI_SI:
24129 case MULTI_ARG_1_QI_DI:
24130 case MULTI_ARG_1_QI_SI:
24131 case MULTI_ARG_1_QI_HI:
24135 case MULTI_ARG_2_DI_CMP:
24136 case MULTI_ARG_2_SI_CMP:
24137 case MULTI_ARG_2_HI_CMP:
24138 case MULTI_ARG_2_QI_CMP:
24140 comparison_p = true;
24143 case MULTI_ARG_2_SF_TF:
24144 case MULTI_ARG_2_DF_TF:
24145 case MULTI_ARG_2_DI_TF:
24146 case MULTI_ARG_2_SI_TF:
24147 case MULTI_ARG_2_HI_TF:
24148 case MULTI_ARG_2_QI_TF:
24154 gcc_unreachable ();
24157 if (optimize || !target
24158 || GET_MODE (target) != tmode
24159 || !insn_data[icode].operand[0].predicate (target, tmode))
24160 target = gen_reg_rtx (tmode);
24162 gcc_assert (nargs <= 4);
24164 for (i = 0; i < nargs; i++)
24166 tree arg = CALL_EXPR_ARG (exp, i);
24167 rtx op = expand_normal (arg);
24168 int adjust = (comparison_p) ? 1 : 0;
24169 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24171 if (last_arg_constant && i == nargs-1)
24173 if (!CONST_INT_P (op))
24175 error ("last argument must be an immediate");
24176 return gen_reg_rtx (tmode);
24181 if (VECTOR_MODE_P (mode))
24182 op = safe_vector_operand (op, mode);
24184 /* If we aren't optimizing, only allow one memory operand to be
24186 if (memory_operand (op, mode))
24189 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24192 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24194 op = force_reg (mode, op);
24198 args[i].mode = mode;
24204 pat = GEN_FCN (icode) (target, args[0].op);
24209 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24210 GEN_INT ((int)sub_code));
24211 else if (! comparison_p)
24212 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24215 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24219 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24224 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24228 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24232 gcc_unreachable ();
24242 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24243 insns with vec_merge. */
24246 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24250 tree arg0 = CALL_EXPR_ARG (exp, 0);
24251 rtx op1, op0 = expand_normal (arg0);
24252 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24253 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24255 if (optimize || !target
24256 || GET_MODE (target) != tmode
24257 || !insn_data[icode].operand[0].predicate (target, tmode))
24258 target = gen_reg_rtx (tmode);
24260 if (VECTOR_MODE_P (mode0))
24261 op0 = safe_vector_operand (op0, mode0);
24263 if ((optimize && !register_operand (op0, mode0))
24264 || !insn_data[icode].operand[1].predicate (op0, mode0))
24265 op0 = copy_to_mode_reg (mode0, op0);
24268 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24269 op1 = copy_to_mode_reg (mode0, op1);
24271 pat = GEN_FCN (icode) (target, op0, op1);
24278 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24281 ix86_expand_sse_compare (const struct builtin_description *d,
24282 tree exp, rtx target, bool swap)
24285 tree arg0 = CALL_EXPR_ARG (exp, 0);
24286 tree arg1 = CALL_EXPR_ARG (exp, 1);
24287 rtx op0 = expand_normal (arg0);
24288 rtx op1 = expand_normal (arg1);
24290 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24291 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24292 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24293 enum rtx_code comparison = d->comparison;
24295 if (VECTOR_MODE_P (mode0))
24296 op0 = safe_vector_operand (op0, mode0);
24297 if (VECTOR_MODE_P (mode1))
24298 op1 = safe_vector_operand (op1, mode1);
24300 /* Swap operands if we have a comparison that isn't available in
24304 rtx tmp = gen_reg_rtx (mode1);
24305 emit_move_insn (tmp, op1);
24310 if (optimize || !target
24311 || GET_MODE (target) != tmode
24312 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24313 target = gen_reg_rtx (tmode);
24315 if ((optimize && !register_operand (op0, mode0))
24316 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24317 op0 = copy_to_mode_reg (mode0, op0);
24318 if ((optimize && !register_operand (op1, mode1))
24319 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24320 op1 = copy_to_mode_reg (mode1, op1);
24322 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24323 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24330 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24333 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24337 tree arg0 = CALL_EXPR_ARG (exp, 0);
24338 tree arg1 = CALL_EXPR_ARG (exp, 1);
24339 rtx op0 = expand_normal (arg0);
24340 rtx op1 = expand_normal (arg1);
24341 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24342 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24343 enum rtx_code comparison = d->comparison;
24345 if (VECTOR_MODE_P (mode0))
24346 op0 = safe_vector_operand (op0, mode0);
24347 if (VECTOR_MODE_P (mode1))
24348 op1 = safe_vector_operand (op1, mode1);
24350 /* Swap operands if we have a comparison that isn't available in
24352 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24359 target = gen_reg_rtx (SImode);
24360 emit_move_insn (target, const0_rtx);
24361 target = gen_rtx_SUBREG (QImode, target, 0);
24363 if ((optimize && !register_operand (op0, mode0))
24364 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24365 op0 = copy_to_mode_reg (mode0, op0);
24366 if ((optimize && !register_operand (op1, mode1))
24367 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24368 op1 = copy_to_mode_reg (mode1, op1);
24370 pat = GEN_FCN (d->icode) (op0, op1);
24374 emit_insn (gen_rtx_SET (VOIDmode,
24375 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24376 gen_rtx_fmt_ee (comparison, QImode,
24380 return SUBREG_REG (target);
24383 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24386 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24390 tree arg0 = CALL_EXPR_ARG (exp, 0);
24391 tree arg1 = CALL_EXPR_ARG (exp, 1);
24392 rtx op0 = expand_normal (arg0);
24393 rtx op1 = expand_normal (arg1);
24394 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24395 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24396 enum rtx_code comparison = d->comparison;
24398 if (VECTOR_MODE_P (mode0))
24399 op0 = safe_vector_operand (op0, mode0);
24400 if (VECTOR_MODE_P (mode1))
24401 op1 = safe_vector_operand (op1, mode1);
24403 target = gen_reg_rtx (SImode);
24404 emit_move_insn (target, const0_rtx);
24405 target = gen_rtx_SUBREG (QImode, target, 0);
24407 if ((optimize && !register_operand (op0, mode0))
24408 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24409 op0 = copy_to_mode_reg (mode0, op0);
24410 if ((optimize && !register_operand (op1, mode1))
24411 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24412 op1 = copy_to_mode_reg (mode1, op1);
24414 pat = GEN_FCN (d->icode) (op0, op1);
24418 emit_insn (gen_rtx_SET (VOIDmode,
24419 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24420 gen_rtx_fmt_ee (comparison, QImode,
24424 return SUBREG_REG (target);
24427 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24430 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24431 tree exp, rtx target)
24434 tree arg0 = CALL_EXPR_ARG (exp, 0);
24435 tree arg1 = CALL_EXPR_ARG (exp, 1);
24436 tree arg2 = CALL_EXPR_ARG (exp, 2);
24437 tree arg3 = CALL_EXPR_ARG (exp, 3);
24438 tree arg4 = CALL_EXPR_ARG (exp, 4);
24439 rtx scratch0, scratch1;
24440 rtx op0 = expand_normal (arg0);
24441 rtx op1 = expand_normal (arg1);
24442 rtx op2 = expand_normal (arg2);
24443 rtx op3 = expand_normal (arg3);
24444 rtx op4 = expand_normal (arg4);
24445 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24447 tmode0 = insn_data[d->icode].operand[0].mode;
24448 tmode1 = insn_data[d->icode].operand[1].mode;
24449 modev2 = insn_data[d->icode].operand[2].mode;
24450 modei3 = insn_data[d->icode].operand[3].mode;
24451 modev4 = insn_data[d->icode].operand[4].mode;
24452 modei5 = insn_data[d->icode].operand[5].mode;
24453 modeimm = insn_data[d->icode].operand[6].mode;
24455 if (VECTOR_MODE_P (modev2))
24456 op0 = safe_vector_operand (op0, modev2);
24457 if (VECTOR_MODE_P (modev4))
24458 op2 = safe_vector_operand (op2, modev4);
24460 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24461 op0 = copy_to_mode_reg (modev2, op0);
24462 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24463 op1 = copy_to_mode_reg (modei3, op1);
24464 if ((optimize && !register_operand (op2, modev4))
24465 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24466 op2 = copy_to_mode_reg (modev4, op2);
24467 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24468 op3 = copy_to_mode_reg (modei5, op3);
24470 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24472 error ("the fifth argument must be a 8-bit immediate");
24476 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24478 if (optimize || !target
24479 || GET_MODE (target) != tmode0
24480 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24481 target = gen_reg_rtx (tmode0);
24483 scratch1 = gen_reg_rtx (tmode1);
24485 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24487 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24489 if (optimize || !target
24490 || GET_MODE (target) != tmode1
24491 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24492 target = gen_reg_rtx (tmode1);
24494 scratch0 = gen_reg_rtx (tmode0);
24496 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24500 gcc_assert (d->flag);
24502 scratch0 = gen_reg_rtx (tmode0);
24503 scratch1 = gen_reg_rtx (tmode1);
24505 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24515 target = gen_reg_rtx (SImode);
24516 emit_move_insn (target, const0_rtx);
24517 target = gen_rtx_SUBREG (QImode, target, 0);
24520 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24521 gen_rtx_fmt_ee (EQ, QImode,
24522 gen_rtx_REG ((enum machine_mode) d->flag,
24525 return SUBREG_REG (target);
24532 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24535 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24536 tree exp, rtx target)
24539 tree arg0 = CALL_EXPR_ARG (exp, 0);
24540 tree arg1 = CALL_EXPR_ARG (exp, 1);
24541 tree arg2 = CALL_EXPR_ARG (exp, 2);
24542 rtx scratch0, scratch1;
24543 rtx op0 = expand_normal (arg0);
24544 rtx op1 = expand_normal (arg1);
24545 rtx op2 = expand_normal (arg2);
24546 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24548 tmode0 = insn_data[d->icode].operand[0].mode;
24549 tmode1 = insn_data[d->icode].operand[1].mode;
24550 modev2 = insn_data[d->icode].operand[2].mode;
24551 modev3 = insn_data[d->icode].operand[3].mode;
24552 modeimm = insn_data[d->icode].operand[4].mode;
24554 if (VECTOR_MODE_P (modev2))
24555 op0 = safe_vector_operand (op0, modev2);
24556 if (VECTOR_MODE_P (modev3))
24557 op1 = safe_vector_operand (op1, modev3);
24559 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24560 op0 = copy_to_mode_reg (modev2, op0);
24561 if ((optimize && !register_operand (op1, modev3))
24562 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24563 op1 = copy_to_mode_reg (modev3, op1);
24565 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24567 error ("the third argument must be a 8-bit immediate");
24571 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24573 if (optimize || !target
24574 || GET_MODE (target) != tmode0
24575 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24576 target = gen_reg_rtx (tmode0);
24578 scratch1 = gen_reg_rtx (tmode1);
24580 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24582 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24584 if (optimize || !target
24585 || GET_MODE (target) != tmode1
24586 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24587 target = gen_reg_rtx (tmode1);
24589 scratch0 = gen_reg_rtx (tmode0);
24591 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24595 gcc_assert (d->flag);
24597 scratch0 = gen_reg_rtx (tmode0);
24598 scratch1 = gen_reg_rtx (tmode1);
24600 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24610 target = gen_reg_rtx (SImode);
24611 emit_move_insn (target, const0_rtx);
24612 target = gen_rtx_SUBREG (QImode, target, 0);
24615 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24616 gen_rtx_fmt_ee (EQ, QImode,
24617 gen_rtx_REG ((enum machine_mode) d->flag,
24620 return SUBREG_REG (target);
24626 /* Subroutine of ix86_expand_builtin to take care of insns with
24627 variable number of operands. */
24630 ix86_expand_args_builtin (const struct builtin_description *d,
24631 tree exp, rtx target)
24633 rtx pat, real_target;
24634 unsigned int i, nargs;
24635 unsigned int nargs_constant = 0;
24636 int num_memory = 0;
24640 enum machine_mode mode;
24642 bool last_arg_count = false;
24643 enum insn_code icode = d->icode;
24644 const struct insn_data_d *insn_p = &insn_data[icode];
24645 enum machine_mode tmode = insn_p->operand[0].mode;
24646 enum machine_mode rmode = VOIDmode;
24648 enum rtx_code comparison = d->comparison;
24650 switch ((enum ix86_builtin_func_type) d->flag)
24652 case INT_FTYPE_V8SF_V8SF_PTEST:
24653 case INT_FTYPE_V4DI_V4DI_PTEST:
24654 case INT_FTYPE_V4DF_V4DF_PTEST:
24655 case INT_FTYPE_V4SF_V4SF_PTEST:
24656 case INT_FTYPE_V2DI_V2DI_PTEST:
24657 case INT_FTYPE_V2DF_V2DF_PTEST:
24658 return ix86_expand_sse_ptest (d, exp, target);
24659 case FLOAT128_FTYPE_FLOAT128:
24660 case FLOAT_FTYPE_FLOAT:
24661 case INT_FTYPE_INT:
24662 case UINT64_FTYPE_INT:
24663 case UINT16_FTYPE_UINT16:
24664 case INT64_FTYPE_INT64:
24665 case INT64_FTYPE_V4SF:
24666 case INT64_FTYPE_V2DF:
24667 case INT_FTYPE_V16QI:
24668 case INT_FTYPE_V8QI:
24669 case INT_FTYPE_V8SF:
24670 case INT_FTYPE_V4DF:
24671 case INT_FTYPE_V4SF:
24672 case INT_FTYPE_V2DF:
24673 case V16QI_FTYPE_V16QI:
24674 case V8SI_FTYPE_V8SF:
24675 case V8SI_FTYPE_V4SI:
24676 case V8HI_FTYPE_V8HI:
24677 case V8HI_FTYPE_V16QI:
24678 case V8QI_FTYPE_V8QI:
24679 case V8SF_FTYPE_V8SF:
24680 case V8SF_FTYPE_V8SI:
24681 case V8SF_FTYPE_V4SF:
24682 case V8SF_FTYPE_V8HI:
24683 case V4SI_FTYPE_V4SI:
24684 case V4SI_FTYPE_V16QI:
24685 case V4SI_FTYPE_V4SF:
24686 case V4SI_FTYPE_V8SI:
24687 case V4SI_FTYPE_V8HI:
24688 case V4SI_FTYPE_V4DF:
24689 case V4SI_FTYPE_V2DF:
24690 case V4HI_FTYPE_V4HI:
24691 case V4DF_FTYPE_V4DF:
24692 case V4DF_FTYPE_V4SI:
24693 case V4DF_FTYPE_V4SF:
24694 case V4DF_FTYPE_V2DF:
24695 case V4SF_FTYPE_V4SF:
24696 case V4SF_FTYPE_V4SI:
24697 case V4SF_FTYPE_V8SF:
24698 case V4SF_FTYPE_V4DF:
24699 case V4SF_FTYPE_V8HI:
24700 case V4SF_FTYPE_V2DF:
24701 case V2DI_FTYPE_V2DI:
24702 case V2DI_FTYPE_V16QI:
24703 case V2DI_FTYPE_V8HI:
24704 case V2DI_FTYPE_V4SI:
24705 case V2DF_FTYPE_V2DF:
24706 case V2DF_FTYPE_V4SI:
24707 case V2DF_FTYPE_V4DF:
24708 case V2DF_FTYPE_V4SF:
24709 case V2DF_FTYPE_V2SI:
24710 case V2SI_FTYPE_V2SI:
24711 case V2SI_FTYPE_V4SF:
24712 case V2SI_FTYPE_V2SF:
24713 case V2SI_FTYPE_V2DF:
24714 case V2SF_FTYPE_V2SF:
24715 case V2SF_FTYPE_V2SI:
24718 case V4SF_FTYPE_V4SF_VEC_MERGE:
24719 case V2DF_FTYPE_V2DF_VEC_MERGE:
24720 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24721 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24722 case V16QI_FTYPE_V16QI_V16QI:
24723 case V16QI_FTYPE_V8HI_V8HI:
24724 case V8QI_FTYPE_V8QI_V8QI:
24725 case V8QI_FTYPE_V4HI_V4HI:
24726 case V8HI_FTYPE_V8HI_V8HI:
24727 case V8HI_FTYPE_V16QI_V16QI:
24728 case V8HI_FTYPE_V4SI_V4SI:
24729 case V8SF_FTYPE_V8SF_V8SF:
24730 case V8SF_FTYPE_V8SF_V8SI:
24731 case V4SI_FTYPE_V4SI_V4SI:
24732 case V4SI_FTYPE_V8HI_V8HI:
24733 case V4SI_FTYPE_V4SF_V4SF:
24734 case V4SI_FTYPE_V2DF_V2DF:
24735 case V4HI_FTYPE_V4HI_V4HI:
24736 case V4HI_FTYPE_V8QI_V8QI:
24737 case V4HI_FTYPE_V2SI_V2SI:
24738 case V4DF_FTYPE_V4DF_V4DF:
24739 case V4DF_FTYPE_V4DF_V4DI:
24740 case V4SF_FTYPE_V4SF_V4SF:
24741 case V4SF_FTYPE_V4SF_V4SI:
24742 case V4SF_FTYPE_V4SF_V2SI:
24743 case V4SF_FTYPE_V4SF_V2DF:
24744 case V4SF_FTYPE_V4SF_DI:
24745 case V4SF_FTYPE_V4SF_SI:
24746 case V2DI_FTYPE_V2DI_V2DI:
24747 case V2DI_FTYPE_V16QI_V16QI:
24748 case V2DI_FTYPE_V4SI_V4SI:
24749 case V2DI_FTYPE_V2DI_V16QI:
24750 case V2DI_FTYPE_V2DF_V2DF:
24751 case V2SI_FTYPE_V2SI_V2SI:
24752 case V2SI_FTYPE_V4HI_V4HI:
24753 case V2SI_FTYPE_V2SF_V2SF:
24754 case V2DF_FTYPE_V2DF_V2DF:
24755 case V2DF_FTYPE_V2DF_V4SF:
24756 case V2DF_FTYPE_V2DF_V2DI:
24757 case V2DF_FTYPE_V2DF_DI:
24758 case V2DF_FTYPE_V2DF_SI:
24759 case V2SF_FTYPE_V2SF_V2SF:
24760 case V1DI_FTYPE_V1DI_V1DI:
24761 case V1DI_FTYPE_V8QI_V8QI:
24762 case V1DI_FTYPE_V2SI_V2SI:
24763 if (comparison == UNKNOWN)
24764 return ix86_expand_binop_builtin (icode, exp, target);
24767 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24768 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24769 gcc_assert (comparison != UNKNOWN);
24773 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24774 case V8HI_FTYPE_V8HI_SI_COUNT:
24775 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24776 case V4SI_FTYPE_V4SI_SI_COUNT:
24777 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24778 case V4HI_FTYPE_V4HI_SI_COUNT:
24779 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24780 case V2DI_FTYPE_V2DI_SI_COUNT:
24781 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24782 case V2SI_FTYPE_V2SI_SI_COUNT:
24783 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24784 case V1DI_FTYPE_V1DI_SI_COUNT:
24786 last_arg_count = true;
24788 case UINT64_FTYPE_UINT64_UINT64:
24789 case UINT_FTYPE_UINT_UINT:
24790 case UINT_FTYPE_UINT_USHORT:
24791 case UINT_FTYPE_UINT_UCHAR:
24792 case UINT16_FTYPE_UINT16_INT:
24793 case UINT8_FTYPE_UINT8_INT:
24796 case V2DI_FTYPE_V2DI_INT_CONVERT:
24799 nargs_constant = 1;
24801 case V8HI_FTYPE_V8HI_INT:
24802 case V8HI_FTYPE_V8SF_INT:
24803 case V8HI_FTYPE_V4SF_INT:
24804 case V8SF_FTYPE_V8SF_INT:
24805 case V4SI_FTYPE_V4SI_INT:
24806 case V4SI_FTYPE_V8SI_INT:
24807 case V4HI_FTYPE_V4HI_INT:
24808 case V4DF_FTYPE_V4DF_INT:
24809 case V4SF_FTYPE_V4SF_INT:
24810 case V4SF_FTYPE_V8SF_INT:
24811 case V2DI_FTYPE_V2DI_INT:
24812 case V2DF_FTYPE_V2DF_INT:
24813 case V2DF_FTYPE_V4DF_INT:
24815 nargs_constant = 1;
24817 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24818 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24819 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24820 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24821 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24824 case V16QI_FTYPE_V16QI_V16QI_INT:
24825 case V8HI_FTYPE_V8HI_V8HI_INT:
24826 case V8SI_FTYPE_V8SI_V8SI_INT:
24827 case V8SI_FTYPE_V8SI_V4SI_INT:
24828 case V8SF_FTYPE_V8SF_V8SF_INT:
24829 case V8SF_FTYPE_V8SF_V4SF_INT:
24830 case V4SI_FTYPE_V4SI_V4SI_INT:
24831 case V4DF_FTYPE_V4DF_V4DF_INT:
24832 case V4DF_FTYPE_V4DF_V2DF_INT:
24833 case V4SF_FTYPE_V4SF_V4SF_INT:
24834 case V2DI_FTYPE_V2DI_V2DI_INT:
24835 case V2DF_FTYPE_V2DF_V2DF_INT:
24837 nargs_constant = 1;
24839 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24842 nargs_constant = 1;
24844 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24847 nargs_constant = 1;
24849 case V2DI_FTYPE_V2DI_UINT_UINT:
24851 nargs_constant = 2;
24853 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24854 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24855 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24856 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24858 nargs_constant = 1;
24860 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24862 nargs_constant = 2;
24865 gcc_unreachable ();
24868 gcc_assert (nargs <= ARRAY_SIZE (args));
24870 if (comparison != UNKNOWN)
24872 gcc_assert (nargs == 2);
24873 return ix86_expand_sse_compare (d, exp, target, swap);
24876 if (rmode == VOIDmode || rmode == tmode)
24880 || GET_MODE (target) != tmode
24881 || !insn_p->operand[0].predicate (target, tmode))
24882 target = gen_reg_rtx (tmode);
24883 real_target = target;
24887 target = gen_reg_rtx (rmode);
24888 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24891 for (i = 0; i < nargs; i++)
24893 tree arg = CALL_EXPR_ARG (exp, i);
24894 rtx op = expand_normal (arg);
24895 enum machine_mode mode = insn_p->operand[i + 1].mode;
24896 bool match = insn_p->operand[i + 1].predicate (op, mode);
24898 if (last_arg_count && (i + 1) == nargs)
24900 /* SIMD shift insns take either an 8-bit immediate or
24901 register as count. But builtin functions take int as
24902 count. If count doesn't match, we put it in register. */
24905 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24906 if (!insn_p->operand[i + 1].predicate (op, mode))
24907 op = copy_to_reg (op);
24910 else if ((nargs - i) <= nargs_constant)
24915 case CODE_FOR_sse4_1_roundpd:
24916 case CODE_FOR_sse4_1_roundps:
24917 case CODE_FOR_sse4_1_roundsd:
24918 case CODE_FOR_sse4_1_roundss:
24919 case CODE_FOR_sse4_1_blendps:
24920 case CODE_FOR_avx_blendpd256:
24921 case CODE_FOR_avx_vpermilv4df:
24922 case CODE_FOR_avx_roundpd256:
24923 case CODE_FOR_avx_roundps256:
24924 error ("the last argument must be a 4-bit immediate");
24927 case CODE_FOR_sse4_1_blendpd:
24928 case CODE_FOR_avx_vpermilv2df:
24929 case CODE_FOR_xop_vpermil2v2df3:
24930 case CODE_FOR_xop_vpermil2v4sf3:
24931 case CODE_FOR_xop_vpermil2v4df3:
24932 case CODE_FOR_xop_vpermil2v8sf3:
24933 error ("the last argument must be a 2-bit immediate");
24936 case CODE_FOR_avx_vextractf128v4df:
24937 case CODE_FOR_avx_vextractf128v8sf:
24938 case CODE_FOR_avx_vextractf128v8si:
24939 case CODE_FOR_avx_vinsertf128v4df:
24940 case CODE_FOR_avx_vinsertf128v8sf:
24941 case CODE_FOR_avx_vinsertf128v8si:
24942 error ("the last argument must be a 1-bit immediate");
24945 case CODE_FOR_avx_cmpsdv2df3:
24946 case CODE_FOR_avx_cmpssv4sf3:
24947 case CODE_FOR_avx_cmppdv2df3:
24948 case CODE_FOR_avx_cmppsv4sf3:
24949 case CODE_FOR_avx_cmppdv4df3:
24950 case CODE_FOR_avx_cmppsv8sf3:
24951 error ("the last argument must be a 5-bit immediate");
24955 switch (nargs_constant)
24958 if ((nargs - i) == nargs_constant)
24960 error ("the next to last argument must be an 8-bit immediate");
24964 error ("the last argument must be an 8-bit immediate");
24967 gcc_unreachable ();
24974 if (VECTOR_MODE_P (mode))
24975 op = safe_vector_operand (op, mode);
24977 /* If we aren't optimizing, only allow one memory operand to
24979 if (memory_operand (op, mode))
24982 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24984 if (optimize || !match || num_memory > 1)
24985 op = copy_to_mode_reg (mode, op);
24989 op = copy_to_reg (op);
24990 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24995 args[i].mode = mode;
25001 pat = GEN_FCN (icode) (real_target, args[0].op);
25004 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25007 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25011 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25012 args[2].op, args[3].op);
25015 gcc_unreachable ();
25025 /* Subroutine of ix86_expand_builtin to take care of special insns
25026 with variable number of operands. */
25029 ix86_expand_special_args_builtin (const struct builtin_description *d,
25030 tree exp, rtx target)
25034 unsigned int i, nargs, arg_adjust, memory;
25038 enum machine_mode mode;
25040 enum insn_code icode = d->icode;
25041 bool last_arg_constant = false;
25042 const struct insn_data_d *insn_p = &insn_data[icode];
25043 enum machine_mode tmode = insn_p->operand[0].mode;
25044 enum { load, store } klass;
25046 switch ((enum ix86_builtin_func_type) d->flag)
25048 case VOID_FTYPE_VOID:
25049 emit_insn (GEN_FCN (icode) (target));
25051 case VOID_FTYPE_UINT64:
25052 case VOID_FTYPE_UNSIGNED:
25058 case UINT64_FTYPE_VOID:
25059 case UNSIGNED_FTYPE_VOID:
25060 case UINT16_FTYPE_VOID:
25065 case UINT64_FTYPE_PUNSIGNED:
25066 case V2DI_FTYPE_PV2DI:
25067 case V32QI_FTYPE_PCCHAR:
25068 case V16QI_FTYPE_PCCHAR:
25069 case V8SF_FTYPE_PCV4SF:
25070 case V8SF_FTYPE_PCFLOAT:
25071 case V4SF_FTYPE_PCFLOAT:
25072 case V4DF_FTYPE_PCV2DF:
25073 case V4DF_FTYPE_PCDOUBLE:
25074 case V2DF_FTYPE_PCDOUBLE:
25075 case VOID_FTYPE_PVOID:
25080 case VOID_FTYPE_PV2SF_V4SF:
25081 case VOID_FTYPE_PV4DI_V4DI:
25082 case VOID_FTYPE_PV2DI_V2DI:
25083 case VOID_FTYPE_PCHAR_V32QI:
25084 case VOID_FTYPE_PCHAR_V16QI:
25085 case VOID_FTYPE_PFLOAT_V8SF:
25086 case VOID_FTYPE_PFLOAT_V4SF:
25087 case VOID_FTYPE_PDOUBLE_V4DF:
25088 case VOID_FTYPE_PDOUBLE_V2DF:
25089 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25090 case VOID_FTYPE_PINT_INT:
25093 /* Reserve memory operand for target. */
25094 memory = ARRAY_SIZE (args);
25096 case V4SF_FTYPE_V4SF_PCV2SF:
25097 case V2DF_FTYPE_V2DF_PCDOUBLE:
25102 case V8SF_FTYPE_PCV8SF_V8SF:
25103 case V4DF_FTYPE_PCV4DF_V4DF:
25104 case V4SF_FTYPE_PCV4SF_V4SF:
25105 case V2DF_FTYPE_PCV2DF_V2DF:
25110 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25111 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25112 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25113 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25116 /* Reserve memory operand for target. */
25117 memory = ARRAY_SIZE (args);
25119 case VOID_FTYPE_UINT_UINT_UINT:
25120 case VOID_FTYPE_UINT64_UINT_UINT:
25121 case UCHAR_FTYPE_UINT_UINT_UINT:
25122 case UCHAR_FTYPE_UINT64_UINT_UINT:
25125 memory = ARRAY_SIZE (args);
25126 last_arg_constant = true;
25129 gcc_unreachable ();
25132 gcc_assert (nargs <= ARRAY_SIZE (args));
25134 if (klass == store)
25136 arg = CALL_EXPR_ARG (exp, 0);
25137 op = expand_normal (arg);
25138 gcc_assert (target == 0);
25140 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25142 target = force_reg (tmode, op);
25150 || GET_MODE (target) != tmode
25151 || !insn_p->operand[0].predicate (target, tmode))
25152 target = gen_reg_rtx (tmode);
25155 for (i = 0; i < nargs; i++)
25157 enum machine_mode mode = insn_p->operand[i + 1].mode;
25160 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25161 op = expand_normal (arg);
25162 match = insn_p->operand[i + 1].predicate (op, mode);
25164 if (last_arg_constant && (i + 1) == nargs)
25168 if (icode == CODE_FOR_lwp_lwpvalsi3
25169 || icode == CODE_FOR_lwp_lwpinssi3
25170 || icode == CODE_FOR_lwp_lwpvaldi3
25171 || icode == CODE_FOR_lwp_lwpinsdi3)
25172 error ("the last argument must be a 32-bit immediate");
25174 error ("the last argument must be an 8-bit immediate");
25182 /* This must be the memory operand. */
25183 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25184 gcc_assert (GET_MODE (op) == mode
25185 || GET_MODE (op) == VOIDmode);
25189 /* This must be register. */
25190 if (VECTOR_MODE_P (mode))
25191 op = safe_vector_operand (op, mode);
25193 gcc_assert (GET_MODE (op) == mode
25194 || GET_MODE (op) == VOIDmode);
25195 op = copy_to_mode_reg (mode, op);
25200 args[i].mode = mode;
25206 pat = GEN_FCN (icode) (target);
25209 pat = GEN_FCN (icode) (target, args[0].op);
25212 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25215 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25218 gcc_unreachable ();
25224 return klass == store ? 0 : target;
25227 /* Return the integer constant in ARG. Constrain it to be in the range
25228 of the subparts of VEC_TYPE; issue an error if not. */
25231 get_element_number (tree vec_type, tree arg)
25233 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25235 if (!host_integerp (arg, 1)
25236 || (elt = tree_low_cst (arg, 1), elt > max))
25238 error ("selector must be an integer constant in the range 0..%wi", max);
25245 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25246 ix86_expand_vector_init. We DO have language-level syntax for this, in
25247 the form of (type){ init-list }. Except that since we can't place emms
25248 instructions from inside the compiler, we can't allow the use of MMX
25249 registers unless the user explicitly asks for it. So we do *not* define
25250 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25251 we have builtins invoked by mmintrin.h that gives us license to emit
25252 these sorts of instructions. */
25255 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25257 enum machine_mode tmode = TYPE_MODE (type);
25258 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25259 int i, n_elt = GET_MODE_NUNITS (tmode);
25260 rtvec v = rtvec_alloc (n_elt);
25262 gcc_assert (VECTOR_MODE_P (tmode));
25263 gcc_assert (call_expr_nargs (exp) == n_elt);
25265 for (i = 0; i < n_elt; ++i)
25267 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25268 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25271 if (!target || !register_operand (target, tmode))
25272 target = gen_reg_rtx (tmode);
25274 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25278 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25279 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25280 had a language-level syntax for referencing vector elements. */
25283 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25285 enum machine_mode tmode, mode0;
25290 arg0 = CALL_EXPR_ARG (exp, 0);
25291 arg1 = CALL_EXPR_ARG (exp, 1);
25293 op0 = expand_normal (arg0);
25294 elt = get_element_number (TREE_TYPE (arg0), arg1);
25296 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25297 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25298 gcc_assert (VECTOR_MODE_P (mode0));
25300 op0 = force_reg (mode0, op0);
25302 if (optimize || !target || !register_operand (target, tmode))
25303 target = gen_reg_rtx (tmode);
25305 ix86_expand_vector_extract (true, target, op0, elt);
25310 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25311 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25312 a language-level syntax for referencing vector elements. */
25315 ix86_expand_vec_set_builtin (tree exp)
25317 enum machine_mode tmode, mode1;
25318 tree arg0, arg1, arg2;
25320 rtx op0, op1, target;
25322 arg0 = CALL_EXPR_ARG (exp, 0);
25323 arg1 = CALL_EXPR_ARG (exp, 1);
25324 arg2 = CALL_EXPR_ARG (exp, 2);
25326 tmode = TYPE_MODE (TREE_TYPE (arg0));
25327 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25328 gcc_assert (VECTOR_MODE_P (tmode));
25330 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25331 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25332 elt = get_element_number (TREE_TYPE (arg0), arg2);
25334 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25335 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25337 op0 = force_reg (tmode, op0);
25338 op1 = force_reg (mode1, op1);
25340 /* OP0 is the source of these builtin functions and shouldn't be
25341 modified. Create a copy, use it and return it as target. */
25342 target = gen_reg_rtx (tmode);
25343 emit_move_insn (target, op0);
25344 ix86_expand_vector_set (true, target, op1, elt);
25349 /* Expand an expression EXP that calls a built-in function,
25350 with result going to TARGET if that's convenient
25351 (and in mode MODE if that's convenient).
25352 SUBTARGET may be used as the target for computing one of EXP's operands.
25353 IGNORE is nonzero if the value is to be ignored. */
25356 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25357 enum machine_mode mode ATTRIBUTE_UNUSED,
25358 int ignore ATTRIBUTE_UNUSED)
25360 const struct builtin_description *d;
25362 enum insn_code icode;
25363 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25364 tree arg0, arg1, arg2;
25365 rtx op0, op1, op2, pat;
25366 enum machine_mode mode0, mode1, mode2;
25367 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25369 /* Determine whether the builtin function is available under the current ISA.
25370 Originally the builtin was not created if it wasn't applicable to the
25371 current ISA based on the command line switches. With function specific
25372 options, we need to check in the context of the function making the call
25373 whether it is supported. */
25374 if (ix86_builtins_isa[fcode].isa
25375 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25377 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25378 NULL, NULL, false);
25381 error ("%qE needs unknown isa option", fndecl);
25384 gcc_assert (opts != NULL);
25385 error ("%qE needs isa option %s", fndecl, opts);
25393 case IX86_BUILTIN_MASKMOVQ:
25394 case IX86_BUILTIN_MASKMOVDQU:
25395 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25396 ? CODE_FOR_mmx_maskmovq
25397 : CODE_FOR_sse2_maskmovdqu);
25398 /* Note the arg order is different from the operand order. */
25399 arg1 = CALL_EXPR_ARG (exp, 0);
25400 arg2 = CALL_EXPR_ARG (exp, 1);
25401 arg0 = CALL_EXPR_ARG (exp, 2);
25402 op0 = expand_normal (arg0);
25403 op1 = expand_normal (arg1);
25404 op2 = expand_normal (arg2);
25405 mode0 = insn_data[icode].operand[0].mode;
25406 mode1 = insn_data[icode].operand[1].mode;
25407 mode2 = insn_data[icode].operand[2].mode;
25409 op0 = force_reg (Pmode, op0);
25410 op0 = gen_rtx_MEM (mode1, op0);
25412 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25413 op0 = copy_to_mode_reg (mode0, op0);
25414 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25415 op1 = copy_to_mode_reg (mode1, op1);
25416 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25417 op2 = copy_to_mode_reg (mode2, op2);
25418 pat = GEN_FCN (icode) (op0, op1, op2);
25424 case IX86_BUILTIN_LDMXCSR:
25425 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25426 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25427 emit_move_insn (target, op0);
25428 emit_insn (gen_sse_ldmxcsr (target));
25431 case IX86_BUILTIN_STMXCSR:
25432 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25433 emit_insn (gen_sse_stmxcsr (target));
25434 return copy_to_mode_reg (SImode, target);
25436 case IX86_BUILTIN_CLFLUSH:
25437 arg0 = CALL_EXPR_ARG (exp, 0);
25438 op0 = expand_normal (arg0);
25439 icode = CODE_FOR_sse2_clflush;
25440 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25441 op0 = copy_to_mode_reg (Pmode, op0);
25443 emit_insn (gen_sse2_clflush (op0));
25446 case IX86_BUILTIN_MONITOR:
25447 arg0 = CALL_EXPR_ARG (exp, 0);
25448 arg1 = CALL_EXPR_ARG (exp, 1);
25449 arg2 = CALL_EXPR_ARG (exp, 2);
25450 op0 = expand_normal (arg0);
25451 op1 = expand_normal (arg1);
25452 op2 = expand_normal (arg2);
25454 op0 = copy_to_mode_reg (Pmode, op0);
25456 op1 = copy_to_mode_reg (SImode, op1);
25458 op2 = copy_to_mode_reg (SImode, op2);
25459 emit_insn (ix86_gen_monitor (op0, op1, op2));
25462 case IX86_BUILTIN_MWAIT:
25463 arg0 = CALL_EXPR_ARG (exp, 0);
25464 arg1 = CALL_EXPR_ARG (exp, 1);
25465 op0 = expand_normal (arg0);
25466 op1 = expand_normal (arg1);
25468 op0 = copy_to_mode_reg (SImode, op0);
25470 op1 = copy_to_mode_reg (SImode, op1);
25471 emit_insn (gen_sse3_mwait (op0, op1));
25474 case IX86_BUILTIN_VEC_INIT_V2SI:
25475 case IX86_BUILTIN_VEC_INIT_V4HI:
25476 case IX86_BUILTIN_VEC_INIT_V8QI:
25477 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25479 case IX86_BUILTIN_VEC_EXT_V2DF:
25480 case IX86_BUILTIN_VEC_EXT_V2DI:
25481 case IX86_BUILTIN_VEC_EXT_V4SF:
25482 case IX86_BUILTIN_VEC_EXT_V4SI:
25483 case IX86_BUILTIN_VEC_EXT_V8HI:
25484 case IX86_BUILTIN_VEC_EXT_V2SI:
25485 case IX86_BUILTIN_VEC_EXT_V4HI:
25486 case IX86_BUILTIN_VEC_EXT_V16QI:
25487 return ix86_expand_vec_ext_builtin (exp, target);
25489 case IX86_BUILTIN_VEC_SET_V2DI:
25490 case IX86_BUILTIN_VEC_SET_V4SF:
25491 case IX86_BUILTIN_VEC_SET_V4SI:
25492 case IX86_BUILTIN_VEC_SET_V8HI:
25493 case IX86_BUILTIN_VEC_SET_V4HI:
25494 case IX86_BUILTIN_VEC_SET_V16QI:
25495 return ix86_expand_vec_set_builtin (exp);
25497 case IX86_BUILTIN_VEC_PERM_V2DF:
25498 case IX86_BUILTIN_VEC_PERM_V4SF:
25499 case IX86_BUILTIN_VEC_PERM_V2DI:
25500 case IX86_BUILTIN_VEC_PERM_V4SI:
25501 case IX86_BUILTIN_VEC_PERM_V8HI:
25502 case IX86_BUILTIN_VEC_PERM_V16QI:
25503 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25504 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25505 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25506 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25507 case IX86_BUILTIN_VEC_PERM_V4DF:
25508 case IX86_BUILTIN_VEC_PERM_V8SF:
25509 return ix86_expand_vec_perm_builtin (exp);
25511 case IX86_BUILTIN_INFQ:
25512 case IX86_BUILTIN_HUGE_VALQ:
25514 REAL_VALUE_TYPE inf;
25518 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25520 tmp = validize_mem (force_const_mem (mode, tmp));
25523 target = gen_reg_rtx (mode);
25525 emit_move_insn (target, tmp);
25529 case IX86_BUILTIN_LLWPCB:
25530 arg0 = CALL_EXPR_ARG (exp, 0);
25531 op0 = expand_normal (arg0);
25532 icode = CODE_FOR_lwp_llwpcb;
25533 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25534 op0 = copy_to_mode_reg (Pmode, op0);
25535 emit_insn (gen_lwp_llwpcb (op0));
25538 case IX86_BUILTIN_SLWPCB:
25539 icode = CODE_FOR_lwp_slwpcb;
25541 || !insn_data[icode].operand[0].predicate (target, Pmode))
25542 target = gen_reg_rtx (Pmode);
25543 emit_insn (gen_lwp_slwpcb (target));
25550 for (i = 0, d = bdesc_special_args;
25551 i < ARRAY_SIZE (bdesc_special_args);
25553 if (d->code == fcode)
25554 return ix86_expand_special_args_builtin (d, exp, target);
25556 for (i = 0, d = bdesc_args;
25557 i < ARRAY_SIZE (bdesc_args);
25559 if (d->code == fcode)
25562 case IX86_BUILTIN_FABSQ:
25563 case IX86_BUILTIN_COPYSIGNQ:
25565 /* Emit a normal call if SSE2 isn't available. */
25566 return expand_call (exp, target, ignore);
25568 return ix86_expand_args_builtin (d, exp, target);
25571 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25572 if (d->code == fcode)
25573 return ix86_expand_sse_comi (d, exp, target);
25575 for (i = 0, d = bdesc_pcmpestr;
25576 i < ARRAY_SIZE (bdesc_pcmpestr);
25578 if (d->code == fcode)
25579 return ix86_expand_sse_pcmpestr (d, exp, target);
25581 for (i = 0, d = bdesc_pcmpistr;
25582 i < ARRAY_SIZE (bdesc_pcmpistr);
25584 if (d->code == fcode)
25585 return ix86_expand_sse_pcmpistr (d, exp, target);
25587 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25588 if (d->code == fcode)
25589 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25590 (enum ix86_builtin_func_type)
25591 d->flag, d->comparison);
25593 gcc_unreachable ();
25596 /* Returns a function decl for a vectorized version of the builtin function
25597 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25598 if it is not available. */
25601 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25604 enum machine_mode in_mode, out_mode;
25606 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25608 if (TREE_CODE (type_out) != VECTOR_TYPE
25609 || TREE_CODE (type_in) != VECTOR_TYPE
25610 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25613 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25614 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25615 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25616 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25620 case BUILT_IN_SQRT:
25621 if (out_mode == DFmode && out_n == 2
25622 && in_mode == DFmode && in_n == 2)
25623 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25626 case BUILT_IN_SQRTF:
25627 if (out_mode == SFmode && out_n == 4
25628 && in_mode == SFmode && in_n == 4)
25629 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25632 case BUILT_IN_LRINT:
25633 if (out_mode == SImode && out_n == 4
25634 && in_mode == DFmode && in_n == 2)
25635 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25638 case BUILT_IN_LRINTF:
25639 if (out_mode == SImode && out_n == 4
25640 && in_mode == SFmode && in_n == 4)
25641 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25644 case BUILT_IN_COPYSIGN:
25645 if (out_mode == DFmode && out_n == 2
25646 && in_mode == DFmode && in_n == 2)
25647 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25650 case BUILT_IN_COPYSIGNF:
25651 if (out_mode == SFmode && out_n == 4
25652 && in_mode == SFmode && in_n == 4)
25653 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25660 /* Dispatch to a handler for a vectorization library. */
25661 if (ix86_veclib_handler)
25662 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25668 /* Handler for an SVML-style interface to
25669 a library with vectorized intrinsics. */
25672 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25675 tree fntype, new_fndecl, args;
25678 enum machine_mode el_mode, in_mode;
25681 /* The SVML is suitable for unsafe math only. */
25682 if (!flag_unsafe_math_optimizations)
25685 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25686 n = TYPE_VECTOR_SUBPARTS (type_out);
25687 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25688 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25689 if (el_mode != in_mode
25697 case BUILT_IN_LOG10:
25699 case BUILT_IN_TANH:
25701 case BUILT_IN_ATAN:
25702 case BUILT_IN_ATAN2:
25703 case BUILT_IN_ATANH:
25704 case BUILT_IN_CBRT:
25705 case BUILT_IN_SINH:
25707 case BUILT_IN_ASINH:
25708 case BUILT_IN_ASIN:
25709 case BUILT_IN_COSH:
25711 case BUILT_IN_ACOSH:
25712 case BUILT_IN_ACOS:
25713 if (el_mode != DFmode || n != 2)
25717 case BUILT_IN_EXPF:
25718 case BUILT_IN_LOGF:
25719 case BUILT_IN_LOG10F:
25720 case BUILT_IN_POWF:
25721 case BUILT_IN_TANHF:
25722 case BUILT_IN_TANF:
25723 case BUILT_IN_ATANF:
25724 case BUILT_IN_ATAN2F:
25725 case BUILT_IN_ATANHF:
25726 case BUILT_IN_CBRTF:
25727 case BUILT_IN_SINHF:
25728 case BUILT_IN_SINF:
25729 case BUILT_IN_ASINHF:
25730 case BUILT_IN_ASINF:
25731 case BUILT_IN_COSHF:
25732 case BUILT_IN_COSF:
25733 case BUILT_IN_ACOSHF:
25734 case BUILT_IN_ACOSF:
25735 if (el_mode != SFmode || n != 4)
25743 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25745 if (fn == BUILT_IN_LOGF)
25746 strcpy (name, "vmlsLn4");
25747 else if (fn == BUILT_IN_LOG)
25748 strcpy (name, "vmldLn2");
25751 sprintf (name, "vmls%s", bname+10);
25752 name[strlen (name)-1] = '4';
25755 sprintf (name, "vmld%s2", bname+10);
25757 /* Convert to uppercase. */
25761 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25762 args = TREE_CHAIN (args))
25766 fntype = build_function_type_list (type_out, type_in, NULL);
25768 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25770 /* Build a function declaration for the vectorized function. */
25771 new_fndecl = build_decl (BUILTINS_LOCATION,
25772 FUNCTION_DECL, get_identifier (name), fntype);
25773 TREE_PUBLIC (new_fndecl) = 1;
25774 DECL_EXTERNAL (new_fndecl) = 1;
25775 DECL_IS_NOVOPS (new_fndecl) = 1;
25776 TREE_READONLY (new_fndecl) = 1;
25781 /* Handler for an ACML-style interface to
25782 a library with vectorized intrinsics. */
25785 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25787 char name[20] = "__vr.._";
25788 tree fntype, new_fndecl, args;
25791 enum machine_mode el_mode, in_mode;
25794 /* The ACML is 64bits only and suitable for unsafe math only as
25795 it does not correctly support parts of IEEE with the required
25796 precision such as denormals. */
25798 || !flag_unsafe_math_optimizations)
25801 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25802 n = TYPE_VECTOR_SUBPARTS (type_out);
25803 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25804 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25805 if (el_mode != in_mode
25815 case BUILT_IN_LOG2:
25816 case BUILT_IN_LOG10:
25819 if (el_mode != DFmode
25824 case BUILT_IN_SINF:
25825 case BUILT_IN_COSF:
25826 case BUILT_IN_EXPF:
25827 case BUILT_IN_POWF:
25828 case BUILT_IN_LOGF:
25829 case BUILT_IN_LOG2F:
25830 case BUILT_IN_LOG10F:
25833 if (el_mode != SFmode
25842 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25843 sprintf (name + 7, "%s", bname+10);
25846 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25847 args = TREE_CHAIN (args))
25851 fntype = build_function_type_list (type_out, type_in, NULL);
25853 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25855 /* Build a function declaration for the vectorized function. */
25856 new_fndecl = build_decl (BUILTINS_LOCATION,
25857 FUNCTION_DECL, get_identifier (name), fntype);
25858 TREE_PUBLIC (new_fndecl) = 1;
25859 DECL_EXTERNAL (new_fndecl) = 1;
25860 DECL_IS_NOVOPS (new_fndecl) = 1;
25861 TREE_READONLY (new_fndecl) = 1;
25867 /* Returns a decl of a function that implements conversion of an integer vector
25868 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25869 are the types involved when converting according to CODE.
25870 Return NULL_TREE if it is not available. */
25873 ix86_vectorize_builtin_conversion (unsigned int code,
25874 tree dest_type, tree src_type)
25882 switch (TYPE_MODE (src_type))
25885 switch (TYPE_MODE (dest_type))
25888 return (TYPE_UNSIGNED (src_type)
25889 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25890 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25892 return (TYPE_UNSIGNED (src_type)
25894 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25900 switch (TYPE_MODE (dest_type))
25903 return (TYPE_UNSIGNED (src_type)
25905 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25914 case FIX_TRUNC_EXPR:
25915 switch (TYPE_MODE (dest_type))
25918 switch (TYPE_MODE (src_type))
25921 return (TYPE_UNSIGNED (dest_type)
25923 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25925 return (TYPE_UNSIGNED (dest_type)
25927 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25934 switch (TYPE_MODE (src_type))
25937 return (TYPE_UNSIGNED (dest_type)
25939 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25956 /* Returns a code for a target-specific builtin that implements
25957 reciprocal of the function, or NULL_TREE if not available. */
25960 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25961 bool sqrt ATTRIBUTE_UNUSED)
25963 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25964 && flag_finite_math_only && !flag_trapping_math
25965 && flag_unsafe_math_optimizations))
25969 /* Machine dependent builtins. */
25972 /* Vectorized version of sqrt to rsqrt conversion. */
25973 case IX86_BUILTIN_SQRTPS_NR:
25974 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25980 /* Normal builtins. */
25983 /* Sqrt to rsqrt conversion. */
25984 case BUILT_IN_SQRTF:
25985 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25992 /* Helper for avx_vpermilps256_operand et al. This is also used by
25993 the expansion functions to turn the parallel back into a mask.
25994 The return value is 0 for no match and the imm8+1 for a match. */
25997 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25999 unsigned i, nelt = GET_MODE_NUNITS (mode);
26001 unsigned char ipar[8];
26003 if (XVECLEN (par, 0) != (int) nelt)
26006 /* Validate that all of the elements are constants, and not totally
26007 out of range. Copy the data into an integral array to make the
26008 subsequent checks easier. */
26009 for (i = 0; i < nelt; ++i)
26011 rtx er = XVECEXP (par, 0, i);
26012 unsigned HOST_WIDE_INT ei;
26014 if (!CONST_INT_P (er))
26025 /* In the 256-bit DFmode case, we can only move elements within
26027 for (i = 0; i < 2; ++i)
26031 mask |= ipar[i] << i;
26033 for (i = 2; i < 4; ++i)
26037 mask |= (ipar[i] - 2) << i;
26042 /* In the 256-bit SFmode case, we have full freedom of movement
26043 within the low 128-bit lane, but the high 128-bit lane must
26044 mirror the exact same pattern. */
26045 for (i = 0; i < 4; ++i)
26046 if (ipar[i] + 4 != ipar[i + 4])
26053 /* In the 128-bit case, we've full freedom in the placement of
26054 the elements from the source operand. */
26055 for (i = 0; i < nelt; ++i)
26056 mask |= ipar[i] << (i * (nelt / 2));
26060 gcc_unreachable ();
26063 /* Make sure success has a non-zero value by adding one. */
26067 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26068 the expansion functions to turn the parallel back into a mask.
26069 The return value is 0 for no match and the imm8+1 for a match. */
26072 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26074 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26076 unsigned char ipar[8];
26078 if (XVECLEN (par, 0) != (int) nelt)
26081 /* Validate that all of the elements are constants, and not totally
26082 out of range. Copy the data into an integral array to make the
26083 subsequent checks easier. */
26084 for (i = 0; i < nelt; ++i)
26086 rtx er = XVECEXP (par, 0, i);
26087 unsigned HOST_WIDE_INT ei;
26089 if (!CONST_INT_P (er))
26092 if (ei >= 2 * nelt)
26097 /* Validate that the halves of the permute are halves. */
26098 for (i = 0; i < nelt2 - 1; ++i)
26099 if (ipar[i] + 1 != ipar[i + 1])
26101 for (i = nelt2; i < nelt - 1; ++i)
26102 if (ipar[i] + 1 != ipar[i + 1])
26105 /* Reconstruct the mask. */
26106 for (i = 0; i < 2; ++i)
26108 unsigned e = ipar[i * nelt2];
26112 mask |= e << (i * 4);
26115 /* Make sure success has a non-zero value by adding one. */
26120 /* Store OPERAND to the memory after reload is completed. This means
26121 that we can't easily use assign_stack_local. */
26123 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26127 gcc_assert (reload_completed);
26128 if (ix86_using_red_zone ())
26130 result = gen_rtx_MEM (mode,
26131 gen_rtx_PLUS (Pmode,
26133 GEN_INT (-RED_ZONE_SIZE)));
26134 emit_move_insn (result, operand);
26136 else if (TARGET_64BIT)
26142 operand = gen_lowpart (DImode, operand);
26146 gen_rtx_SET (VOIDmode,
26147 gen_rtx_MEM (DImode,
26148 gen_rtx_PRE_DEC (DImode,
26149 stack_pointer_rtx)),
26153 gcc_unreachable ();
26155 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26164 split_double_mode (mode, &operand, 1, operands, operands + 1);
26166 gen_rtx_SET (VOIDmode,
26167 gen_rtx_MEM (SImode,
26168 gen_rtx_PRE_DEC (Pmode,
26169 stack_pointer_rtx)),
26172 gen_rtx_SET (VOIDmode,
26173 gen_rtx_MEM (SImode,
26174 gen_rtx_PRE_DEC (Pmode,
26175 stack_pointer_rtx)),
26180 /* Store HImodes as SImodes. */
26181 operand = gen_lowpart (SImode, operand);
26185 gen_rtx_SET (VOIDmode,
26186 gen_rtx_MEM (GET_MODE (operand),
26187 gen_rtx_PRE_DEC (SImode,
26188 stack_pointer_rtx)),
26192 gcc_unreachable ();
26194 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26199 /* Free operand from the memory. */
26201 ix86_free_from_memory (enum machine_mode mode)
26203 if (!ix86_using_red_zone ())
26207 if (mode == DImode || TARGET_64BIT)
26211 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26212 to pop or add instruction if registers are available. */
26213 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26214 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26219 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26220 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26222 static const reg_class_t *
26223 i386_ira_cover_classes (void)
26225 static const reg_class_t sse_fpmath_classes[] = {
26226 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26228 static const reg_class_t no_sse_fpmath_classes[] = {
26229 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26232 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26235 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26236 QImode must go into class Q_REGS.
26237 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26238 movdf to do mem-to-mem moves through integer regs. */
26240 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26242 enum machine_mode mode = GET_MODE (x);
26244 /* We're only allowed to return a subclass of CLASS. Many of the
26245 following checks fail for NO_REGS, so eliminate that early. */
26246 if (regclass == NO_REGS)
26249 /* All classes can load zeros. */
26250 if (x == CONST0_RTX (mode))
26253 /* Force constants into memory if we are loading a (nonzero) constant into
26254 an MMX or SSE register. This is because there are no MMX/SSE instructions
26255 to load from a constant. */
26257 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26260 /* Prefer SSE regs only, if we can use them for math. */
26261 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26262 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26264 /* Floating-point constants need more complex checks. */
26265 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26267 /* General regs can load everything. */
26268 if (reg_class_subset_p (regclass, GENERAL_REGS))
26271 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26272 zero above. We only want to wind up preferring 80387 registers if
26273 we plan on doing computation with them. */
26275 && standard_80387_constant_p (x))
26277 /* Limit class to non-sse. */
26278 if (regclass == FLOAT_SSE_REGS)
26280 if (regclass == FP_TOP_SSE_REGS)
26282 if (regclass == FP_SECOND_SSE_REGS)
26283 return FP_SECOND_REG;
26284 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26291 /* Generally when we see PLUS here, it's the function invariant
26292 (plus soft-fp const_int). Which can only be computed into general
26294 if (GET_CODE (x) == PLUS)
26295 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26297 /* QImode constants are easy to load, but non-constant QImode data
26298 must go into Q_REGS. */
26299 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26301 if (reg_class_subset_p (regclass, Q_REGS))
26303 if (reg_class_subset_p (Q_REGS, regclass))
26311 /* Discourage putting floating-point values in SSE registers unless
26312 SSE math is being used, and likewise for the 387 registers. */
26314 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26316 enum machine_mode mode = GET_MODE (x);
26318 /* Restrict the output reload class to the register bank that we are doing
26319 math on. If we would like not to return a subset of CLASS, reject this
26320 alternative: if reload cannot do this, it will still use its choice. */
26321 mode = GET_MODE (x);
26322 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26323 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26325 if (X87_FLOAT_MODE_P (mode))
26327 if (regclass == FP_TOP_SSE_REGS)
26329 else if (regclass == FP_SECOND_SSE_REGS)
26330 return FP_SECOND_REG;
26332 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26339 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26340 enum machine_mode mode,
26341 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26343 /* QImode spills from non-QI registers require
26344 intermediate register on 32bit targets. */
26345 if (!in_p && mode == QImode && !TARGET_64BIT
26346 && (rclass == GENERAL_REGS
26347 || rclass == LEGACY_REGS
26348 || rclass == INDEX_REGS))
26357 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26358 regno = true_regnum (x);
26360 /* Return Q_REGS if the operand is in memory. */
26368 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26371 ix86_class_likely_spilled_p (reg_class_t rclass)
26382 case SSE_FIRST_REG:
26384 case FP_SECOND_REG:
26394 /* If we are copying between general and FP registers, we need a memory
26395 location. The same is true for SSE and MMX registers.
26397 To optimize register_move_cost performance, allow inline variant.
26399 The macro can't work reliably when one of the CLASSES is class containing
26400 registers from multiple units (SSE, MMX, integer). We avoid this by never
26401 combining those units in single alternative in the machine description.
26402 Ensure that this constraint holds to avoid unexpected surprises.
26404 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26405 enforce these sanity checks. */
26408 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26409 enum machine_mode mode, int strict)
26411 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26412 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26413 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26414 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26415 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26416 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26418 gcc_assert (!strict);
26422 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26425 /* ??? This is a lie. We do have moves between mmx/general, and for
26426 mmx/sse2. But by saying we need secondary memory we discourage the
26427 register allocator from using the mmx registers unless needed. */
26428 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26431 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26433 /* SSE1 doesn't have any direct moves from other classes. */
26437 /* If the target says that inter-unit moves are more expensive
26438 than moving through memory, then don't generate them. */
26439 if (!TARGET_INTER_UNIT_MOVES)
26442 /* Between SSE and general, we have moves no larger than word size. */
26443 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26451 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26452 enum machine_mode mode, int strict)
26454 return inline_secondary_memory_needed (class1, class2, mode, strict);
26457 /* Return true if the registers in CLASS cannot represent the change from
26458 modes FROM to TO. */
26461 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26462 enum reg_class regclass)
26467 /* x87 registers can't do subreg at all, as all values are reformatted
26468 to extended precision. */
26469 if (MAYBE_FLOAT_CLASS_P (regclass))
26472 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26474 /* Vector registers do not support QI or HImode loads. If we don't
26475 disallow a change to these modes, reload will assume it's ok to
26476 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26477 the vec_dupv4hi pattern. */
26478 if (GET_MODE_SIZE (from) < 4)
26481 /* Vector registers do not support subreg with nonzero offsets, which
26482 are otherwise valid for integer registers. Since we can't see
26483 whether we have a nonzero offset from here, prohibit all
26484 nonparadoxical subregs changing size. */
26485 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26492 /* Return the cost of moving data of mode M between a
26493 register and memory. A value of 2 is the default; this cost is
26494 relative to those in `REGISTER_MOVE_COST'.
26496 This function is used extensively by register_move_cost that is used to
26497 build tables at startup. Make it inline in this case.
26498 When IN is 2, return maximum of in and out move cost.
26500 If moving between registers and memory is more expensive than
26501 between two registers, you should define this macro to express the
26504 Model also increased moving costs of QImode registers in non
26508 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26512 if (FLOAT_CLASS_P (regclass))
26530 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26531 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26533 if (SSE_CLASS_P (regclass))
26536 switch (GET_MODE_SIZE (mode))
26551 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26552 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26554 if (MMX_CLASS_P (regclass))
26557 switch (GET_MODE_SIZE (mode))
26569 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26570 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26572 switch (GET_MODE_SIZE (mode))
26575 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26578 return ix86_cost->int_store[0];
26579 if (TARGET_PARTIAL_REG_DEPENDENCY
26580 && optimize_function_for_speed_p (cfun))
26581 cost = ix86_cost->movzbl_load;
26583 cost = ix86_cost->int_load[0];
26585 return MAX (cost, ix86_cost->int_store[0]);
26591 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26593 return ix86_cost->movzbl_load;
26595 return ix86_cost->int_store[0] + 4;
26600 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26601 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26603 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26604 if (mode == TFmode)
26607 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26609 cost = ix86_cost->int_load[2];
26611 cost = ix86_cost->int_store[2];
26612 return (cost * (((int) GET_MODE_SIZE (mode)
26613 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26618 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26621 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26625 /* Return the cost of moving data from a register in class CLASS1 to
26626 one in class CLASS2.
26628 It is not required that the cost always equal 2 when FROM is the same as TO;
26629 on some machines it is expensive to move between registers if they are not
26630 general registers. */
26633 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26634 reg_class_t class2_i)
26636 enum reg_class class1 = (enum reg_class) class1_i;
26637 enum reg_class class2 = (enum reg_class) class2_i;
26639 /* In case we require secondary memory, compute cost of the store followed
26640 by load. In order to avoid bad register allocation choices, we need
26641 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26643 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26647 cost += inline_memory_move_cost (mode, class1, 2);
26648 cost += inline_memory_move_cost (mode, class2, 2);
26650 /* In case of copying from general_purpose_register we may emit multiple
26651 stores followed by single load causing memory size mismatch stall.
26652 Count this as arbitrarily high cost of 20. */
26653 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26656 /* In the case of FP/MMX moves, the registers actually overlap, and we
26657 have to switch modes in order to treat them differently. */
26658 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26659 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26665 /* Moves between SSE/MMX and integer unit are expensive. */
26666 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26667 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26669 /* ??? By keeping returned value relatively high, we limit the number
26670 of moves between integer and MMX/SSE registers for all targets.
26671 Additionally, high value prevents problem with x86_modes_tieable_p(),
26672 where integer modes in MMX/SSE registers are not tieable
26673 because of missing QImode and HImode moves to, from or between
26674 MMX/SSE registers. */
26675 return MAX (8, ix86_cost->mmxsse_to_integer);
26677 if (MAYBE_FLOAT_CLASS_P (class1))
26678 return ix86_cost->fp_move;
26679 if (MAYBE_SSE_CLASS_P (class1))
26680 return ix86_cost->sse_move;
26681 if (MAYBE_MMX_CLASS_P (class1))
26682 return ix86_cost->mmx_move;
26686 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26689 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26691 /* Flags and only flags can only hold CCmode values. */
26692 if (CC_REGNO_P (regno))
26693 return GET_MODE_CLASS (mode) == MODE_CC;
26694 if (GET_MODE_CLASS (mode) == MODE_CC
26695 || GET_MODE_CLASS (mode) == MODE_RANDOM
26696 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26698 if (FP_REGNO_P (regno))
26699 return VALID_FP_MODE_P (mode);
26700 if (SSE_REGNO_P (regno))
26702 /* We implement the move patterns for all vector modes into and
26703 out of SSE registers, even when no operation instructions
26704 are available. OImode move is available only when AVX is
26706 return ((TARGET_AVX && mode == OImode)
26707 || VALID_AVX256_REG_MODE (mode)
26708 || VALID_SSE_REG_MODE (mode)
26709 || VALID_SSE2_REG_MODE (mode)
26710 || VALID_MMX_REG_MODE (mode)
26711 || VALID_MMX_REG_MODE_3DNOW (mode));
26713 if (MMX_REGNO_P (regno))
26715 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26716 so if the register is available at all, then we can move data of
26717 the given mode into or out of it. */
26718 return (VALID_MMX_REG_MODE (mode)
26719 || VALID_MMX_REG_MODE_3DNOW (mode));
26722 if (mode == QImode)
26724 /* Take care for QImode values - they can be in non-QI regs,
26725 but then they do cause partial register stalls. */
26726 if (regno <= BX_REG || TARGET_64BIT)
26728 if (!TARGET_PARTIAL_REG_STALL)
26730 return reload_in_progress || reload_completed;
26732 /* We handle both integer and floats in the general purpose registers. */
26733 else if (VALID_INT_MODE_P (mode))
26735 else if (VALID_FP_MODE_P (mode))
26737 else if (VALID_DFP_MODE_P (mode))
26739 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26740 on to use that value in smaller contexts, this can easily force a
26741 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26742 supporting DImode, allow it. */
26743 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26749 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26750 tieable integer mode. */
26753 ix86_tieable_integer_mode_p (enum machine_mode mode)
26762 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26765 return TARGET_64BIT;
26772 /* Return true if MODE1 is accessible in a register that can hold MODE2
26773 without copying. That is, all register classes that can hold MODE2
26774 can also hold MODE1. */
26777 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26779 if (mode1 == mode2)
26782 if (ix86_tieable_integer_mode_p (mode1)
26783 && ix86_tieable_integer_mode_p (mode2))
26786 /* MODE2 being XFmode implies fp stack or general regs, which means we
26787 can tie any smaller floating point modes to it. Note that we do not
26788 tie this with TFmode. */
26789 if (mode2 == XFmode)
26790 return mode1 == SFmode || mode1 == DFmode;
26792 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26793 that we can tie it with SFmode. */
26794 if (mode2 == DFmode)
26795 return mode1 == SFmode;
26797 /* If MODE2 is only appropriate for an SSE register, then tie with
26798 any other mode acceptable to SSE registers. */
26799 if (GET_MODE_SIZE (mode2) == 16
26800 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26801 return (GET_MODE_SIZE (mode1) == 16
26802 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26804 /* If MODE2 is appropriate for an MMX register, then tie
26805 with any other mode acceptable to MMX registers. */
26806 if (GET_MODE_SIZE (mode2) == 8
26807 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26808 return (GET_MODE_SIZE (mode1) == 8
26809 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26814 /* Compute a (partial) cost for rtx X. Return true if the complete
26815 cost has been computed, and false if subexpressions should be
26816 scanned. In either case, *TOTAL contains the cost result. */
26819 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26821 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26822 enum machine_mode mode = GET_MODE (x);
26823 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26831 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26833 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26835 else if (flag_pic && SYMBOLIC_CONST (x)
26837 || (!GET_CODE (x) != LABEL_REF
26838 && (GET_CODE (x) != SYMBOL_REF
26839 || !SYMBOL_REF_LOCAL_P (x)))))
26846 if (mode == VOIDmode)
26849 switch (standard_80387_constant_p (x))
26854 default: /* Other constants */
26859 /* Start with (MEM (SYMBOL_REF)), since that's where
26860 it'll probably end up. Add a penalty for size. */
26861 *total = (COSTS_N_INSNS (1)
26862 + (flag_pic != 0 && !TARGET_64BIT)
26863 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26869 /* The zero extensions is often completely free on x86_64, so make
26870 it as cheap as possible. */
26871 if (TARGET_64BIT && mode == DImode
26872 && GET_MODE (XEXP (x, 0)) == SImode)
26874 else if (TARGET_ZERO_EXTEND_WITH_AND)
26875 *total = cost->add;
26877 *total = cost->movzx;
26881 *total = cost->movsx;
26885 if (CONST_INT_P (XEXP (x, 1))
26886 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26888 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26891 *total = cost->add;
26894 if ((value == 2 || value == 3)
26895 && cost->lea <= cost->shift_const)
26897 *total = cost->lea;
26907 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26909 if (CONST_INT_P (XEXP (x, 1)))
26911 if (INTVAL (XEXP (x, 1)) > 32)
26912 *total = cost->shift_const + COSTS_N_INSNS (2);
26914 *total = cost->shift_const * 2;
26918 if (GET_CODE (XEXP (x, 1)) == AND)
26919 *total = cost->shift_var * 2;
26921 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26926 if (CONST_INT_P (XEXP (x, 1)))
26927 *total = cost->shift_const;
26929 *total = cost->shift_var;
26934 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26936 /* ??? SSE scalar cost should be used here. */
26937 *total = cost->fmul;
26940 else if (X87_FLOAT_MODE_P (mode))
26942 *total = cost->fmul;
26945 else if (FLOAT_MODE_P (mode))
26947 /* ??? SSE vector cost should be used here. */
26948 *total = cost->fmul;
26953 rtx op0 = XEXP (x, 0);
26954 rtx op1 = XEXP (x, 1);
26956 if (CONST_INT_P (XEXP (x, 1)))
26958 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26959 for (nbits = 0; value != 0; value &= value - 1)
26963 /* This is arbitrary. */
26966 /* Compute costs correctly for widening multiplication. */
26967 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26968 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26969 == GET_MODE_SIZE (mode))
26971 int is_mulwiden = 0;
26972 enum machine_mode inner_mode = GET_MODE (op0);
26974 if (GET_CODE (op0) == GET_CODE (op1))
26975 is_mulwiden = 1, op1 = XEXP (op1, 0);
26976 else if (CONST_INT_P (op1))
26978 if (GET_CODE (op0) == SIGN_EXTEND)
26979 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26982 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26986 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26989 *total = (cost->mult_init[MODE_INDEX (mode)]
26990 + nbits * cost->mult_bit
26991 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27000 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27001 /* ??? SSE cost should be used here. */
27002 *total = cost->fdiv;
27003 else if (X87_FLOAT_MODE_P (mode))
27004 *total = cost->fdiv;
27005 else if (FLOAT_MODE_P (mode))
27006 /* ??? SSE vector cost should be used here. */
27007 *total = cost->fdiv;
27009 *total = cost->divide[MODE_INDEX (mode)];
27013 if (GET_MODE_CLASS (mode) == MODE_INT
27014 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27016 if (GET_CODE (XEXP (x, 0)) == PLUS
27017 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27018 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27019 && CONSTANT_P (XEXP (x, 1)))
27021 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27022 if (val == 2 || val == 4 || val == 8)
27024 *total = cost->lea;
27025 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27026 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27027 outer_code, speed);
27028 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27032 else if (GET_CODE (XEXP (x, 0)) == MULT
27033 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27035 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27036 if (val == 2 || val == 4 || val == 8)
27038 *total = cost->lea;
27039 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27040 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27044 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27046 *total = cost->lea;
27047 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27048 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27049 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27056 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27058 /* ??? SSE cost should be used here. */
27059 *total = cost->fadd;
27062 else if (X87_FLOAT_MODE_P (mode))
27064 *total = cost->fadd;
27067 else if (FLOAT_MODE_P (mode))
27069 /* ??? SSE vector cost should be used here. */
27070 *total = cost->fadd;
27078 if (!TARGET_64BIT && mode == DImode)
27080 *total = (cost->add * 2
27081 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27082 << (GET_MODE (XEXP (x, 0)) != DImode))
27083 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27084 << (GET_MODE (XEXP (x, 1)) != DImode)));
27090 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27092 /* ??? SSE cost should be used here. */
27093 *total = cost->fchs;
27096 else if (X87_FLOAT_MODE_P (mode))
27098 *total = cost->fchs;
27101 else if (FLOAT_MODE_P (mode))
27103 /* ??? SSE vector cost should be used here. */
27104 *total = cost->fchs;
27110 if (!TARGET_64BIT && mode == DImode)
27111 *total = cost->add * 2;
27113 *total = cost->add;
27117 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27118 && XEXP (XEXP (x, 0), 1) == const1_rtx
27119 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27120 && XEXP (x, 1) == const0_rtx)
27122 /* This kind of construct is implemented using test[bwl].
27123 Treat it as if we had an AND. */
27124 *total = (cost->add
27125 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27126 + rtx_cost (const1_rtx, outer_code, speed));
27132 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27137 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27138 /* ??? SSE cost should be used here. */
27139 *total = cost->fabs;
27140 else if (X87_FLOAT_MODE_P (mode))
27141 *total = cost->fabs;
27142 else if (FLOAT_MODE_P (mode))
27143 /* ??? SSE vector cost should be used here. */
27144 *total = cost->fabs;
27148 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27149 /* ??? SSE cost should be used here. */
27150 *total = cost->fsqrt;
27151 else if (X87_FLOAT_MODE_P (mode))
27152 *total = cost->fsqrt;
27153 else if (FLOAT_MODE_P (mode))
27154 /* ??? SSE vector cost should be used here. */
27155 *total = cost->fsqrt;
27159 if (XINT (x, 1) == UNSPEC_TP)
27166 case VEC_DUPLICATE:
27167 /* ??? Assume all of these vector manipulation patterns are
27168 recognizable. In which case they all pretty much have the
27170 *total = COSTS_N_INSNS (1);
27180 static int current_machopic_label_num;
27182 /* Given a symbol name and its associated stub, write out the
27183 definition of the stub. */
27186 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27188 unsigned int length;
27189 char *binder_name, *symbol_name, lazy_ptr_name[32];
27190 int label = ++current_machopic_label_num;
27192 /* For 64-bit we shouldn't get here. */
27193 gcc_assert (!TARGET_64BIT);
27195 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27196 symb = targetm.strip_name_encoding (symb);
27198 length = strlen (stub);
27199 binder_name = XALLOCAVEC (char, length + 32);
27200 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27202 length = strlen (symb);
27203 symbol_name = XALLOCAVEC (char, length + 32);
27204 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27206 sprintf (lazy_ptr_name, "L%d$lz", label);
27209 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27211 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27213 fprintf (file, "%s:\n", stub);
27214 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27218 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27219 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27220 fprintf (file, "\tjmp\t*%%edx\n");
27223 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27225 fprintf (file, "%s:\n", binder_name);
27229 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27230 fputs ("\tpushl\t%eax\n", file);
27233 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27235 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27237 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27238 fprintf (file, "%s:\n", lazy_ptr_name);
27239 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27240 fprintf (file, ASM_LONG "%s\n", binder_name);
27242 #endif /* TARGET_MACHO */
27244 /* Order the registers for register allocator. */
27247 x86_order_regs_for_local_alloc (void)
27252 /* First allocate the local general purpose registers. */
27253 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27254 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27255 reg_alloc_order [pos++] = i;
27257 /* Global general purpose registers. */
27258 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27259 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27260 reg_alloc_order [pos++] = i;
27262 /* x87 registers come first in case we are doing FP math
27264 if (!TARGET_SSE_MATH)
27265 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27266 reg_alloc_order [pos++] = i;
27268 /* SSE registers. */
27269 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27270 reg_alloc_order [pos++] = i;
27271 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27272 reg_alloc_order [pos++] = i;
27274 /* x87 registers. */
27275 if (TARGET_SSE_MATH)
27276 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27277 reg_alloc_order [pos++] = i;
27279 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27280 reg_alloc_order [pos++] = i;
27282 /* Initialize the rest of array as we do not allocate some registers
27284 while (pos < FIRST_PSEUDO_REGISTER)
27285 reg_alloc_order [pos++] = 0;
27288 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27289 struct attribute_spec.handler. */
27291 ix86_handle_abi_attribute (tree *node, tree name,
27292 tree args ATTRIBUTE_UNUSED,
27293 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27295 if (TREE_CODE (*node) != FUNCTION_TYPE
27296 && TREE_CODE (*node) != METHOD_TYPE
27297 && TREE_CODE (*node) != FIELD_DECL
27298 && TREE_CODE (*node) != TYPE_DECL)
27300 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27302 *no_add_attrs = true;
27307 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27309 *no_add_attrs = true;
27313 /* Can combine regparm with all attributes but fastcall. */
27314 if (is_attribute_p ("ms_abi", name))
27316 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27318 error ("ms_abi and sysv_abi attributes are not compatible");
27323 else if (is_attribute_p ("sysv_abi", name))
27325 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27327 error ("ms_abi and sysv_abi attributes are not compatible");
27336 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27337 struct attribute_spec.handler. */
27339 ix86_handle_struct_attribute (tree *node, tree name,
27340 tree args ATTRIBUTE_UNUSED,
27341 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27344 if (DECL_P (*node))
27346 if (TREE_CODE (*node) == TYPE_DECL)
27347 type = &TREE_TYPE (*node);
27352 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27353 || TREE_CODE (*type) == UNION_TYPE)))
27355 warning (OPT_Wattributes, "%qE attribute ignored",
27357 *no_add_attrs = true;
27360 else if ((is_attribute_p ("ms_struct", name)
27361 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27362 || ((is_attribute_p ("gcc_struct", name)
27363 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27365 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27367 *no_add_attrs = true;
27374 ix86_handle_fndecl_attribute (tree *node, tree name,
27375 tree args ATTRIBUTE_UNUSED,
27376 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27378 if (TREE_CODE (*node) != FUNCTION_DECL)
27380 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27382 *no_add_attrs = true;
27388 ix86_ms_bitfield_layout_p (const_tree record_type)
27390 return ((TARGET_MS_BITFIELD_LAYOUT
27391 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27392 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27395 /* Returns an expression indicating where the this parameter is
27396 located on entry to the FUNCTION. */
27399 x86_this_parameter (tree function)
27401 tree type = TREE_TYPE (function);
27402 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27407 const int *parm_regs;
27409 if (ix86_function_type_abi (type) == MS_ABI)
27410 parm_regs = x86_64_ms_abi_int_parameter_registers;
27412 parm_regs = x86_64_int_parameter_registers;
27413 return gen_rtx_REG (DImode, parm_regs[aggr]);
27416 nregs = ix86_function_regparm (type, function);
27418 if (nregs > 0 && !stdarg_p (type))
27422 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27423 regno = aggr ? DX_REG : CX_REG;
27424 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27428 return gen_rtx_MEM (SImode,
27429 plus_constant (stack_pointer_rtx, 4));
27438 return gen_rtx_MEM (SImode,
27439 plus_constant (stack_pointer_rtx, 4));
27442 return gen_rtx_REG (SImode, regno);
27445 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27448 /* Determine whether x86_output_mi_thunk can succeed. */
27451 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27452 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27453 HOST_WIDE_INT vcall_offset, const_tree function)
27455 /* 64-bit can handle anything. */
27459 /* For 32-bit, everything's fine if we have one free register. */
27460 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27463 /* Need a free register for vcall_offset. */
27467 /* Need a free register for GOT references. */
27468 if (flag_pic && !targetm.binds_local_p (function))
27471 /* Otherwise ok. */
27475 /* Output the assembler code for a thunk function. THUNK_DECL is the
27476 declaration for the thunk function itself, FUNCTION is the decl for
27477 the target function. DELTA is an immediate constant offset to be
27478 added to THIS. If VCALL_OFFSET is nonzero, the word at
27479 *(*this + vcall_offset) should be added to THIS. */
27482 x86_output_mi_thunk (FILE *file,
27483 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27484 HOST_WIDE_INT vcall_offset, tree function)
27487 rtx this_param = x86_this_parameter (function);
27490 /* Make sure unwind info is emitted for the thunk if needed. */
27491 final_start_function (emit_barrier (), file, 1);
27493 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27494 pull it in now and let DELTA benefit. */
27495 if (REG_P (this_param))
27496 this_reg = this_param;
27497 else if (vcall_offset)
27499 /* Put the this parameter into %eax. */
27500 xops[0] = this_param;
27501 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27502 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27505 this_reg = NULL_RTX;
27507 /* Adjust the this parameter by a fixed constant. */
27510 xops[0] = GEN_INT (delta);
27511 xops[1] = this_reg ? this_reg : this_param;
27514 if (!x86_64_general_operand (xops[0], DImode))
27516 tmp = gen_rtx_REG (DImode, R10_REG);
27518 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27520 xops[1] = this_param;
27522 if (x86_maybe_negate_const_int (&xops[0], DImode))
27523 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27525 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27527 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27528 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27530 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27533 /* Adjust the this parameter by a value stored in the vtable. */
27537 tmp = gen_rtx_REG (DImode, R10_REG);
27540 int tmp_regno = CX_REG;
27541 if (lookup_attribute ("fastcall",
27542 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27543 || lookup_attribute ("thiscall",
27544 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27545 tmp_regno = AX_REG;
27546 tmp = gen_rtx_REG (SImode, tmp_regno);
27549 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27551 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27553 /* Adjust the this parameter. */
27554 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27555 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27557 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27558 xops[0] = GEN_INT (vcall_offset);
27560 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27561 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27563 xops[1] = this_reg;
27564 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27567 /* If necessary, drop THIS back to its stack slot. */
27568 if (this_reg && this_reg != this_param)
27570 xops[0] = this_reg;
27571 xops[1] = this_param;
27572 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27575 xops[0] = XEXP (DECL_RTL (function), 0);
27578 if (!flag_pic || targetm.binds_local_p (function))
27579 output_asm_insn ("jmp\t%P0", xops);
27580 /* All thunks should be in the same object as their target,
27581 and thus binds_local_p should be true. */
27582 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27583 gcc_unreachable ();
27586 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27587 tmp = gen_rtx_CONST (Pmode, tmp);
27588 tmp = gen_rtx_MEM (QImode, tmp);
27590 output_asm_insn ("jmp\t%A0", xops);
27595 if (!flag_pic || targetm.binds_local_p (function))
27596 output_asm_insn ("jmp\t%P0", xops);
27601 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27602 if (TARGET_MACHO_BRANCH_ISLANDS)
27603 sym_ref = (gen_rtx_SYMBOL_REF
27605 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27606 tmp = gen_rtx_MEM (QImode, sym_ref);
27608 output_asm_insn ("jmp\t%0", xops);
27611 #endif /* TARGET_MACHO */
27613 tmp = gen_rtx_REG (SImode, CX_REG);
27614 output_set_got (tmp, NULL_RTX);
27617 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27618 output_asm_insn ("jmp\t{*}%1", xops);
27621 final_end_function ();
27625 x86_file_start (void)
27627 default_file_start ();
27629 darwin_file_start ();
27631 if (X86_FILE_START_VERSION_DIRECTIVE)
27632 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27633 if (X86_FILE_START_FLTUSED)
27634 fputs ("\t.global\t__fltused\n", asm_out_file);
27635 if (ix86_asm_dialect == ASM_INTEL)
27636 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27640 x86_field_alignment (tree field, int computed)
27642 enum machine_mode mode;
27643 tree type = TREE_TYPE (field);
27645 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27647 mode = TYPE_MODE (strip_array_types (type));
27648 if (mode == DFmode || mode == DCmode
27649 || GET_MODE_CLASS (mode) == MODE_INT
27650 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27651 return MIN (32, computed);
27655 /* Output assembler code to FILE to increment profiler label # LABELNO
27656 for profiling a function entry. */
27658 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27660 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27665 #ifndef NO_PROFILE_COUNTERS
27666 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27669 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27670 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27672 fprintf (file, "\tcall\t%s\n", mcount_name);
27676 #ifndef NO_PROFILE_COUNTERS
27677 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27680 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27684 #ifndef NO_PROFILE_COUNTERS
27685 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27688 fprintf (file, "\tcall\t%s\n", mcount_name);
27692 /* We don't have exact information about the insn sizes, but we may assume
27693 quite safely that we are informed about all 1 byte insns and memory
27694 address sizes. This is enough to eliminate unnecessary padding in
27698 min_insn_size (rtx insn)
27702 if (!INSN_P (insn) || !active_insn_p (insn))
27705 /* Discard alignments we've emit and jump instructions. */
27706 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27707 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27709 if (JUMP_TABLE_DATA_P (insn))
27712 /* Important case - calls are always 5 bytes.
27713 It is common to have many calls in the row. */
27715 && symbolic_reference_mentioned_p (PATTERN (insn))
27716 && !SIBLING_CALL_P (insn))
27718 len = get_attr_length (insn);
27722 /* For normal instructions we rely on get_attr_length being exact,
27723 with a few exceptions. */
27724 if (!JUMP_P (insn))
27726 enum attr_type type = get_attr_type (insn);
27731 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27732 || asm_noperands (PATTERN (insn)) >= 0)
27739 /* Otherwise trust get_attr_length. */
27743 l = get_attr_length_address (insn);
27744 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27753 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27755 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27759 ix86_avoid_jump_mispredicts (void)
27761 rtx insn, start = get_insns ();
27762 int nbytes = 0, njumps = 0;
27765 /* Look for all minimal intervals of instructions containing 4 jumps.
27766 The intervals are bounded by START and INSN. NBYTES is the total
27767 size of instructions in the interval including INSN and not including
27768 START. When the NBYTES is smaller than 16 bytes, it is possible
27769 that the end of START and INSN ends up in the same 16byte page.
27771 The smallest offset in the page INSN can start is the case where START
27772 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27773 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27775 for (insn = start; insn; insn = NEXT_INSN (insn))
27779 if (LABEL_P (insn))
27781 int align = label_to_alignment (insn);
27782 int max_skip = label_to_max_skip (insn);
27786 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27787 already in the current 16 byte page, because otherwise
27788 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27789 bytes to reach 16 byte boundary. */
27791 || (align <= 3 && max_skip != (1 << align) - 1))
27794 fprintf (dump_file, "Label %i with max_skip %i\n",
27795 INSN_UID (insn), max_skip);
27798 while (nbytes + max_skip >= 16)
27800 start = NEXT_INSN (start);
27801 if ((JUMP_P (start)
27802 && GET_CODE (PATTERN (start)) != ADDR_VEC
27803 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27805 njumps--, isjump = 1;
27808 nbytes -= min_insn_size (start);
27814 min_size = min_insn_size (insn);
27815 nbytes += min_size;
27817 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27818 INSN_UID (insn), min_size);
27820 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27821 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27829 start = NEXT_INSN (start);
27830 if ((JUMP_P (start)
27831 && GET_CODE (PATTERN (start)) != ADDR_VEC
27832 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27834 njumps--, isjump = 1;
27837 nbytes -= min_insn_size (start);
27839 gcc_assert (njumps >= 0);
27841 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27842 INSN_UID (start), INSN_UID (insn), nbytes);
27844 if (njumps == 3 && isjump && nbytes < 16)
27846 int padsize = 15 - nbytes + min_insn_size (insn);
27849 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27850 INSN_UID (insn), padsize);
27851 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27857 /* AMD Athlon works faster
27858 when RET is not destination of conditional jump or directly preceded
27859 by other jump instruction. We avoid the penalty by inserting NOP just
27860 before the RET instructions in such cases. */
27862 ix86_pad_returns (void)
27867 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27869 basic_block bb = e->src;
27870 rtx ret = BB_END (bb);
27872 bool replace = false;
27874 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27875 || optimize_bb_for_size_p (bb))
27877 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27878 if (active_insn_p (prev) || LABEL_P (prev))
27880 if (prev && LABEL_P (prev))
27885 FOR_EACH_EDGE (e, ei, bb->preds)
27886 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27887 && !(e->flags & EDGE_FALLTHRU))
27892 prev = prev_active_insn (ret);
27894 && ((JUMP_P (prev) && any_condjump_p (prev))
27897 /* Empty functions get branch mispredict even when the jump destination
27898 is not visible to us. */
27899 if (!prev && !optimize_function_for_size_p (cfun))
27904 emit_jump_insn_before (gen_return_internal_long (), ret);
27910 /* Count the minimum number of instructions in BB. Return 4 if the
27911 number of instructions >= 4. */
27914 ix86_count_insn_bb (basic_block bb)
27917 int insn_count = 0;
27919 /* Count number of instructions in this block. Return 4 if the number
27920 of instructions >= 4. */
27921 FOR_BB_INSNS (bb, insn)
27923 /* Only happen in exit blocks. */
27925 && GET_CODE (PATTERN (insn)) == RETURN)
27928 if (NONDEBUG_INSN_P (insn)
27929 && GET_CODE (PATTERN (insn)) != USE
27930 && GET_CODE (PATTERN (insn)) != CLOBBER)
27933 if (insn_count >= 4)
27942 /* Count the minimum number of instructions in code path in BB.
27943 Return 4 if the number of instructions >= 4. */
27946 ix86_count_insn (basic_block bb)
27950 int min_prev_count;
27952 /* Only bother counting instructions along paths with no
27953 more than 2 basic blocks between entry and exit. Given
27954 that BB has an edge to exit, determine if a predecessor
27955 of BB has an edge from entry. If so, compute the number
27956 of instructions in the predecessor block. If there
27957 happen to be multiple such blocks, compute the minimum. */
27958 min_prev_count = 4;
27959 FOR_EACH_EDGE (e, ei, bb->preds)
27962 edge_iterator prev_ei;
27964 if (e->src == ENTRY_BLOCK_PTR)
27966 min_prev_count = 0;
27969 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
27971 if (prev_e->src == ENTRY_BLOCK_PTR)
27973 int count = ix86_count_insn_bb (e->src);
27974 if (count < min_prev_count)
27975 min_prev_count = count;
27981 if (min_prev_count < 4)
27982 min_prev_count += ix86_count_insn_bb (bb);
27984 return min_prev_count;
27987 /* Pad short funtion to 4 instructions. */
27990 ix86_pad_short_function (void)
27995 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27997 rtx ret = BB_END (e->src);
27998 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28000 int insn_count = ix86_count_insn (e->src);
28002 /* Pad short function. */
28003 if (insn_count < 4)
28007 /* Find epilogue. */
28010 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28011 insn = PREV_INSN (insn);
28016 /* Two NOPs are counted as one instruction. */
28017 insn_count = 2 * (4 - insn_count);
28018 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28024 /* Implement machine specific optimizations. We implement padding of returns
28025 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28029 if (optimize && optimize_function_for_speed_p (cfun))
28031 if (TARGET_PAD_SHORT_FUNCTION)
28032 ix86_pad_short_function ();
28033 else if (TARGET_PAD_RETURNS)
28034 ix86_pad_returns ();
28035 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28036 if (TARGET_FOUR_JUMP_LIMIT)
28037 ix86_avoid_jump_mispredicts ();
28042 /* Return nonzero when QImode register that must be represented via REX prefix
28045 x86_extended_QIreg_mentioned_p (rtx insn)
28048 extract_insn_cached (insn);
28049 for (i = 0; i < recog_data.n_operands; i++)
28050 if (REG_P (recog_data.operand[i])
28051 && REGNO (recog_data.operand[i]) > BX_REG)
28056 /* Return nonzero when P points to register encoded via REX prefix.
28057 Called via for_each_rtx. */
28059 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28061 unsigned int regno;
28064 regno = REGNO (*p);
28065 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28068 /* Return true when INSN mentions register that must be encoded using REX
28071 x86_extended_reg_mentioned_p (rtx insn)
28073 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28074 extended_reg_mentioned_1, NULL);
28077 /* If profitable, negate (without causing overflow) integer constant
28078 of mode MODE at location LOC. Return true in this case. */
28080 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28084 if (!CONST_INT_P (*loc))
28090 /* DImode x86_64 constants must fit in 32 bits. */
28091 gcc_assert (x86_64_immediate_operand (*loc, mode));
28102 gcc_unreachable ();
28105 /* Avoid overflows. */
28106 if (mode_signbit_p (mode, *loc))
28109 val = INTVAL (*loc);
28111 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28112 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28113 if ((val < 0 && val != -128)
28116 *loc = GEN_INT (-val);
28123 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28124 optabs would emit if we didn't have TFmode patterns. */
28127 x86_emit_floatuns (rtx operands[2])
28129 rtx neglab, donelab, i0, i1, f0, in, out;
28130 enum machine_mode mode, inmode;
28132 inmode = GET_MODE (operands[1]);
28133 gcc_assert (inmode == SImode || inmode == DImode);
28136 in = force_reg (inmode, operands[1]);
28137 mode = GET_MODE (out);
28138 neglab = gen_label_rtx ();
28139 donelab = gen_label_rtx ();
28140 f0 = gen_reg_rtx (mode);
28142 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28144 expand_float (out, in, 0);
28146 emit_jump_insn (gen_jump (donelab));
28149 emit_label (neglab);
28151 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28153 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28155 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28157 expand_float (f0, i0, 0);
28159 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28161 emit_label (donelab);
28164 /* AVX does not support 32-byte integer vector operations,
28165 thus the longest vector we are faced with is V16QImode. */
28166 #define MAX_VECT_LEN 16
28168 struct expand_vec_perm_d
28170 rtx target, op0, op1;
28171 unsigned char perm[MAX_VECT_LEN];
28172 enum machine_mode vmode;
28173 unsigned char nelt;
28177 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28178 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28180 /* Get a vector mode of the same size as the original but with elements
28181 twice as wide. This is only guaranteed to apply to integral vectors. */
28183 static inline enum machine_mode
28184 get_mode_wider_vector (enum machine_mode o)
28186 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28187 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28188 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28189 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28193 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28194 with all elements equal to VAR. Return true if successful. */
28197 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28198 rtx target, rtx val)
28221 /* First attempt to recognize VAL as-is. */
28222 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28223 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28224 if (recog_memoized (insn) < 0)
28227 /* If that fails, force VAL into a register. */
28230 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28231 seq = get_insns ();
28234 emit_insn_before (seq, insn);
28236 ok = recog_memoized (insn) >= 0;
28245 if (TARGET_SSE || TARGET_3DNOW_A)
28249 val = gen_lowpart (SImode, val);
28250 x = gen_rtx_TRUNCATE (HImode, val);
28251 x = gen_rtx_VEC_DUPLICATE (mode, x);
28252 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28265 struct expand_vec_perm_d dperm;
28269 memset (&dperm, 0, sizeof (dperm));
28270 dperm.target = target;
28271 dperm.vmode = mode;
28272 dperm.nelt = GET_MODE_NUNITS (mode);
28273 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28275 /* Extend to SImode using a paradoxical SUBREG. */
28276 tmp1 = gen_reg_rtx (SImode);
28277 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28279 /* Insert the SImode value as low element of a V4SImode vector. */
28280 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28281 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28283 ok = (expand_vec_perm_1 (&dperm)
28284 || expand_vec_perm_broadcast_1 (&dperm));
28296 /* Replicate the value once into the next wider mode and recurse. */
28298 enum machine_mode smode, wsmode, wvmode;
28301 smode = GET_MODE_INNER (mode);
28302 wvmode = get_mode_wider_vector (mode);
28303 wsmode = GET_MODE_INNER (wvmode);
28305 val = convert_modes (wsmode, smode, val, true);
28306 x = expand_simple_binop (wsmode, ASHIFT, val,
28307 GEN_INT (GET_MODE_BITSIZE (smode)),
28308 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28309 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28311 x = gen_lowpart (wvmode, target);
28312 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28320 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28321 rtx x = gen_reg_rtx (hvmode);
28323 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28326 x = gen_rtx_VEC_CONCAT (mode, x, x);
28327 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28336 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28337 whose ONE_VAR element is VAR, and other elements are zero. Return true
28341 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28342 rtx target, rtx var, int one_var)
28344 enum machine_mode vsimode;
28347 bool use_vector_set = false;
28352 /* For SSE4.1, we normally use vector set. But if the second
28353 element is zero and inter-unit moves are OK, we use movq
28355 use_vector_set = (TARGET_64BIT
28357 && !(TARGET_INTER_UNIT_MOVES
28363 use_vector_set = TARGET_SSE4_1;
28366 use_vector_set = TARGET_SSE2;
28369 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28376 use_vector_set = TARGET_AVX;
28379 /* Use ix86_expand_vector_set in 64bit mode only. */
28380 use_vector_set = TARGET_AVX && TARGET_64BIT;
28386 if (use_vector_set)
28388 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28389 var = force_reg (GET_MODE_INNER (mode), var);
28390 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28406 var = force_reg (GET_MODE_INNER (mode), var);
28407 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28408 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28413 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28414 new_target = gen_reg_rtx (mode);
28416 new_target = target;
28417 var = force_reg (GET_MODE_INNER (mode), var);
28418 x = gen_rtx_VEC_DUPLICATE (mode, var);
28419 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28420 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28423 /* We need to shuffle the value to the correct position, so
28424 create a new pseudo to store the intermediate result. */
28426 /* With SSE2, we can use the integer shuffle insns. */
28427 if (mode != V4SFmode && TARGET_SSE2)
28429 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28431 GEN_INT (one_var == 1 ? 0 : 1),
28432 GEN_INT (one_var == 2 ? 0 : 1),
28433 GEN_INT (one_var == 3 ? 0 : 1)));
28434 if (target != new_target)
28435 emit_move_insn (target, new_target);
28439 /* Otherwise convert the intermediate result to V4SFmode and
28440 use the SSE1 shuffle instructions. */
28441 if (mode != V4SFmode)
28443 tmp = gen_reg_rtx (V4SFmode);
28444 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28449 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28451 GEN_INT (one_var == 1 ? 0 : 1),
28452 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28453 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28455 if (mode != V4SFmode)
28456 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28457 else if (tmp != target)
28458 emit_move_insn (target, tmp);
28460 else if (target != new_target)
28461 emit_move_insn (target, new_target);
28466 vsimode = V4SImode;
28472 vsimode = V2SImode;
28478 /* Zero extend the variable element to SImode and recurse. */
28479 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28481 x = gen_reg_rtx (vsimode);
28482 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28484 gcc_unreachable ();
28486 emit_move_insn (target, gen_lowpart (mode, x));
28494 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28495 consisting of the values in VALS. It is known that all elements
28496 except ONE_VAR are constants. Return true if successful. */
28499 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28500 rtx target, rtx vals, int one_var)
28502 rtx var = XVECEXP (vals, 0, one_var);
28503 enum machine_mode wmode;
28506 const_vec = copy_rtx (vals);
28507 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28508 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28516 /* For the two element vectors, it's just as easy to use
28517 the general case. */
28521 /* Use ix86_expand_vector_set in 64bit mode only. */
28544 /* There's no way to set one QImode entry easily. Combine
28545 the variable value with its adjacent constant value, and
28546 promote to an HImode set. */
28547 x = XVECEXP (vals, 0, one_var ^ 1);
28550 var = convert_modes (HImode, QImode, var, true);
28551 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28552 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28553 x = GEN_INT (INTVAL (x) & 0xff);
28557 var = convert_modes (HImode, QImode, var, true);
28558 x = gen_int_mode (INTVAL (x) << 8, HImode);
28560 if (x != const0_rtx)
28561 var = expand_simple_binop (HImode, IOR, var, x, var,
28562 1, OPTAB_LIB_WIDEN);
28564 x = gen_reg_rtx (wmode);
28565 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28566 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28568 emit_move_insn (target, gen_lowpart (mode, x));
28575 emit_move_insn (target, const_vec);
28576 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28580 /* A subroutine of ix86_expand_vector_init_general. Use vector
28581 concatenate to handle the most general case: all values variable,
28582 and none identical. */
28585 ix86_expand_vector_init_concat (enum machine_mode mode,
28586 rtx target, rtx *ops, int n)
28588 enum machine_mode cmode, hmode = VOIDmode;
28589 rtx first[8], second[4];
28629 gcc_unreachable ();
28632 if (!register_operand (ops[1], cmode))
28633 ops[1] = force_reg (cmode, ops[1]);
28634 if (!register_operand (ops[0], cmode))
28635 ops[0] = force_reg (cmode, ops[0]);
28636 emit_insn (gen_rtx_SET (VOIDmode, target,
28637 gen_rtx_VEC_CONCAT (mode, ops[0],
28657 gcc_unreachable ();
28673 gcc_unreachable ();
28678 /* FIXME: We process inputs backward to help RA. PR 36222. */
28681 for (; i > 0; i -= 2, j--)
28683 first[j] = gen_reg_rtx (cmode);
28684 v = gen_rtvec (2, ops[i - 1], ops[i]);
28685 ix86_expand_vector_init (false, first[j],
28686 gen_rtx_PARALLEL (cmode, v));
28692 gcc_assert (hmode != VOIDmode);
28693 for (i = j = 0; i < n; i += 2, j++)
28695 second[j] = gen_reg_rtx (hmode);
28696 ix86_expand_vector_init_concat (hmode, second [j],
28700 ix86_expand_vector_init_concat (mode, target, second, n);
28703 ix86_expand_vector_init_concat (mode, target, first, n);
28707 gcc_unreachable ();
28711 /* A subroutine of ix86_expand_vector_init_general. Use vector
28712 interleave to handle the most general case: all values variable,
28713 and none identical. */
28716 ix86_expand_vector_init_interleave (enum machine_mode mode,
28717 rtx target, rtx *ops, int n)
28719 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28722 rtx (*gen_load_even) (rtx, rtx, rtx);
28723 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28724 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28729 gen_load_even = gen_vec_setv8hi;
28730 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28731 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28732 inner_mode = HImode;
28733 first_imode = V4SImode;
28734 second_imode = V2DImode;
28735 third_imode = VOIDmode;
28738 gen_load_even = gen_vec_setv16qi;
28739 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28740 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28741 inner_mode = QImode;
28742 first_imode = V8HImode;
28743 second_imode = V4SImode;
28744 third_imode = V2DImode;
28747 gcc_unreachable ();
28750 for (i = 0; i < n; i++)
28752 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28753 op0 = gen_reg_rtx (SImode);
28754 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28756 /* Insert the SImode value as low element of V4SImode vector. */
28757 op1 = gen_reg_rtx (V4SImode);
28758 op0 = gen_rtx_VEC_MERGE (V4SImode,
28759 gen_rtx_VEC_DUPLICATE (V4SImode,
28761 CONST0_RTX (V4SImode),
28763 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28765 /* Cast the V4SImode vector back to a vector in orignal mode. */
28766 op0 = gen_reg_rtx (mode);
28767 emit_move_insn (op0, gen_lowpart (mode, op1));
28769 /* Load even elements into the second positon. */
28770 emit_insn (gen_load_even (op0,
28771 force_reg (inner_mode,
28775 /* Cast vector to FIRST_IMODE vector. */
28776 ops[i] = gen_reg_rtx (first_imode);
28777 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28780 /* Interleave low FIRST_IMODE vectors. */
28781 for (i = j = 0; i < n; i += 2, j++)
28783 op0 = gen_reg_rtx (first_imode);
28784 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28786 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28787 ops[j] = gen_reg_rtx (second_imode);
28788 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28791 /* Interleave low SECOND_IMODE vectors. */
28792 switch (second_imode)
28795 for (i = j = 0; i < n / 2; i += 2, j++)
28797 op0 = gen_reg_rtx (second_imode);
28798 emit_insn (gen_interleave_second_low (op0, ops[i],
28801 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28803 ops[j] = gen_reg_rtx (third_imode);
28804 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28806 second_imode = V2DImode;
28807 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28811 op0 = gen_reg_rtx (second_imode);
28812 emit_insn (gen_interleave_second_low (op0, ops[0],
28815 /* Cast the SECOND_IMODE vector back to a vector on original
28817 emit_insn (gen_rtx_SET (VOIDmode, target,
28818 gen_lowpart (mode, op0)));
28822 gcc_unreachable ();
28826 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28827 all values variable, and none identical. */
28830 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28831 rtx target, rtx vals)
28833 rtx ops[32], op0, op1;
28834 enum machine_mode half_mode = VOIDmode;
28841 if (!mmx_ok && !TARGET_SSE)
28853 n = GET_MODE_NUNITS (mode);
28854 for (i = 0; i < n; i++)
28855 ops[i] = XVECEXP (vals, 0, i);
28856 ix86_expand_vector_init_concat (mode, target, ops, n);
28860 half_mode = V16QImode;
28864 half_mode = V8HImode;
28868 n = GET_MODE_NUNITS (mode);
28869 for (i = 0; i < n; i++)
28870 ops[i] = XVECEXP (vals, 0, i);
28871 op0 = gen_reg_rtx (half_mode);
28872 op1 = gen_reg_rtx (half_mode);
28873 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28875 ix86_expand_vector_init_interleave (half_mode, op1,
28876 &ops [n >> 1], n >> 2);
28877 emit_insn (gen_rtx_SET (VOIDmode, target,
28878 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28882 if (!TARGET_SSE4_1)
28890 /* Don't use ix86_expand_vector_init_interleave if we can't
28891 move from GPR to SSE register directly. */
28892 if (!TARGET_INTER_UNIT_MOVES)
28895 n = GET_MODE_NUNITS (mode);
28896 for (i = 0; i < n; i++)
28897 ops[i] = XVECEXP (vals, 0, i);
28898 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28906 gcc_unreachable ();
28910 int i, j, n_elts, n_words, n_elt_per_word;
28911 enum machine_mode inner_mode;
28912 rtx words[4], shift;
28914 inner_mode = GET_MODE_INNER (mode);
28915 n_elts = GET_MODE_NUNITS (mode);
28916 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28917 n_elt_per_word = n_elts / n_words;
28918 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28920 for (i = 0; i < n_words; ++i)
28922 rtx word = NULL_RTX;
28924 for (j = 0; j < n_elt_per_word; ++j)
28926 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28927 elt = convert_modes (word_mode, inner_mode, elt, true);
28933 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28934 word, 1, OPTAB_LIB_WIDEN);
28935 word = expand_simple_binop (word_mode, IOR, word, elt,
28936 word, 1, OPTAB_LIB_WIDEN);
28944 emit_move_insn (target, gen_lowpart (mode, words[0]));
28945 else if (n_words == 2)
28947 rtx tmp = gen_reg_rtx (mode);
28948 emit_clobber (tmp);
28949 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28950 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28951 emit_move_insn (target, tmp);
28953 else if (n_words == 4)
28955 rtx tmp = gen_reg_rtx (V4SImode);
28956 gcc_assert (word_mode == SImode);
28957 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28958 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28959 emit_move_insn (target, gen_lowpart (mode, tmp));
28962 gcc_unreachable ();
28966 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28967 instructions unless MMX_OK is true. */
28970 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28972 enum machine_mode mode = GET_MODE (target);
28973 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28974 int n_elts = GET_MODE_NUNITS (mode);
28975 int n_var = 0, one_var = -1;
28976 bool all_same = true, all_const_zero = true;
28980 for (i = 0; i < n_elts; ++i)
28982 x = XVECEXP (vals, 0, i);
28983 if (!(CONST_INT_P (x)
28984 || GET_CODE (x) == CONST_DOUBLE
28985 || GET_CODE (x) == CONST_FIXED))
28986 n_var++, one_var = i;
28987 else if (x != CONST0_RTX (inner_mode))
28988 all_const_zero = false;
28989 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28993 /* Constants are best loaded from the constant pool. */
28996 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29000 /* If all values are identical, broadcast the value. */
29002 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29003 XVECEXP (vals, 0, 0)))
29006 /* Values where only one field is non-constant are best loaded from
29007 the pool and overwritten via move later. */
29011 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29012 XVECEXP (vals, 0, one_var),
29016 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29020 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29024 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29026 enum machine_mode mode = GET_MODE (target);
29027 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29028 enum machine_mode half_mode;
29029 bool use_vec_merge = false;
29031 static rtx (*gen_extract[6][2]) (rtx, rtx)
29033 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29034 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29035 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29036 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29037 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29038 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29040 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29042 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29043 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29044 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29045 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29046 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29047 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29057 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29058 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29060 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29062 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29063 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29069 use_vec_merge = TARGET_SSE4_1;
29077 /* For the two element vectors, we implement a VEC_CONCAT with
29078 the extraction of the other element. */
29080 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29081 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29084 op0 = val, op1 = tmp;
29086 op0 = tmp, op1 = val;
29088 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29089 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29094 use_vec_merge = TARGET_SSE4_1;
29101 use_vec_merge = true;
29105 /* tmp = target = A B C D */
29106 tmp = copy_to_reg (target);
29107 /* target = A A B B */
29108 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29109 /* target = X A B B */
29110 ix86_expand_vector_set (false, target, val, 0);
29111 /* target = A X C D */
29112 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29113 const1_rtx, const0_rtx,
29114 GEN_INT (2+4), GEN_INT (3+4)));
29118 /* tmp = target = A B C D */
29119 tmp = copy_to_reg (target);
29120 /* tmp = X B C D */
29121 ix86_expand_vector_set (false, tmp, val, 0);
29122 /* target = A B X D */
29123 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29124 const0_rtx, const1_rtx,
29125 GEN_INT (0+4), GEN_INT (3+4)));
29129 /* tmp = target = A B C D */
29130 tmp = copy_to_reg (target);
29131 /* tmp = X B C D */
29132 ix86_expand_vector_set (false, tmp, val, 0);
29133 /* target = A B X D */
29134 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29135 const0_rtx, const1_rtx,
29136 GEN_INT (2+4), GEN_INT (0+4)));
29140 gcc_unreachable ();
29145 use_vec_merge = TARGET_SSE4_1;
29149 /* Element 0 handled by vec_merge below. */
29152 use_vec_merge = true;
29158 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29159 store into element 0, then shuffle them back. */
29163 order[0] = GEN_INT (elt);
29164 order[1] = const1_rtx;
29165 order[2] = const2_rtx;
29166 order[3] = GEN_INT (3);
29167 order[elt] = const0_rtx;
29169 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29170 order[1], order[2], order[3]));
29172 ix86_expand_vector_set (false, target, val, 0);
29174 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29175 order[1], order[2], order[3]));
29179 /* For SSE1, we have to reuse the V4SF code. */
29180 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29181 gen_lowpart (SFmode, val), elt);
29186 use_vec_merge = TARGET_SSE2;
29189 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29193 use_vec_merge = TARGET_SSE4_1;
29200 half_mode = V16QImode;
29206 half_mode = V8HImode;
29212 half_mode = V4SImode;
29218 half_mode = V2DImode;
29224 half_mode = V4SFmode;
29230 half_mode = V2DFmode;
29236 /* Compute offset. */
29240 gcc_assert (i <= 1);
29242 /* Extract the half. */
29243 tmp = gen_reg_rtx (half_mode);
29244 emit_insn (gen_extract[j][i] (tmp, target));
29246 /* Put val in tmp at elt. */
29247 ix86_expand_vector_set (false, tmp, val, elt);
29250 emit_insn (gen_insert[j][i] (target, target, tmp));
29259 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29260 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29261 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29265 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29267 emit_move_insn (mem, target);
29269 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29270 emit_move_insn (tmp, val);
29272 emit_move_insn (target, mem);
29277 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29279 enum machine_mode mode = GET_MODE (vec);
29280 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29281 bool use_vec_extr = false;
29294 use_vec_extr = true;
29298 use_vec_extr = TARGET_SSE4_1;
29310 tmp = gen_reg_rtx (mode);
29311 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29312 GEN_INT (elt), GEN_INT (elt),
29313 GEN_INT (elt+4), GEN_INT (elt+4)));
29317 tmp = gen_reg_rtx (mode);
29318 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29322 gcc_unreachable ();
29325 use_vec_extr = true;
29330 use_vec_extr = TARGET_SSE4_1;
29344 tmp = gen_reg_rtx (mode);
29345 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29346 GEN_INT (elt), GEN_INT (elt),
29347 GEN_INT (elt), GEN_INT (elt)));
29351 tmp = gen_reg_rtx (mode);
29352 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29356 gcc_unreachable ();
29359 use_vec_extr = true;
29364 /* For SSE1, we have to reuse the V4SF code. */
29365 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29366 gen_lowpart (V4SFmode, vec), elt);
29372 use_vec_extr = TARGET_SSE2;
29375 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29379 use_vec_extr = TARGET_SSE4_1;
29383 /* ??? Could extract the appropriate HImode element and shift. */
29390 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29391 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29393 /* Let the rtl optimizers know about the zero extension performed. */
29394 if (inner_mode == QImode || inner_mode == HImode)
29396 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29397 target = gen_lowpart (SImode, target);
29400 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29404 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29406 emit_move_insn (mem, vec);
29408 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29409 emit_move_insn (target, tmp);
29413 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29414 pattern to reduce; DEST is the destination; IN is the input vector. */
29417 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29419 rtx tmp1, tmp2, tmp3;
29421 tmp1 = gen_reg_rtx (V4SFmode);
29422 tmp2 = gen_reg_rtx (V4SFmode);
29423 tmp3 = gen_reg_rtx (V4SFmode);
29425 emit_insn (gen_sse_movhlps (tmp1, in, in));
29426 emit_insn (fn (tmp2, tmp1, in));
29428 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29429 const1_rtx, const1_rtx,
29430 GEN_INT (1+4), GEN_INT (1+4)));
29431 emit_insn (fn (dest, tmp2, tmp3));
29434 /* Target hook for scalar_mode_supported_p. */
29436 ix86_scalar_mode_supported_p (enum machine_mode mode)
29438 if (DECIMAL_FLOAT_MODE_P (mode))
29439 return default_decimal_float_supported_p ();
29440 else if (mode == TFmode)
29443 return default_scalar_mode_supported_p (mode);
29446 /* Implements target hook vector_mode_supported_p. */
29448 ix86_vector_mode_supported_p (enum machine_mode mode)
29450 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29452 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29454 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29456 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29458 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29463 /* Target hook for c_mode_for_suffix. */
29464 static enum machine_mode
29465 ix86_c_mode_for_suffix (char suffix)
29475 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29477 We do this in the new i386 backend to maintain source compatibility
29478 with the old cc0-based compiler. */
29481 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29482 tree inputs ATTRIBUTE_UNUSED,
29485 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29487 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29492 /* Implements target vector targetm.asm.encode_section_info. This
29493 is not used by netware. */
29495 static void ATTRIBUTE_UNUSED
29496 ix86_encode_section_info (tree decl, rtx rtl, int first)
29498 default_encode_section_info (decl, rtl, first);
29500 if (TREE_CODE (decl) == VAR_DECL
29501 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29502 && ix86_in_large_data_p (decl))
29503 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29506 /* Worker function for REVERSE_CONDITION. */
29509 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29511 return (mode != CCFPmode && mode != CCFPUmode
29512 ? reverse_condition (code)
29513 : reverse_condition_maybe_unordered (code));
29516 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29520 output_387_reg_move (rtx insn, rtx *operands)
29522 if (REG_P (operands[0]))
29524 if (REG_P (operands[1])
29525 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29527 if (REGNO (operands[0]) == FIRST_STACK_REG)
29528 return output_387_ffreep (operands, 0);
29529 return "fstp\t%y0";
29531 if (STACK_TOP_P (operands[0]))
29532 return "fld%Z1\t%y1";
29535 else if (MEM_P (operands[0]))
29537 gcc_assert (REG_P (operands[1]));
29538 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29539 return "fstp%Z0\t%y0";
29542 /* There is no non-popping store to memory for XFmode.
29543 So if we need one, follow the store with a load. */
29544 if (GET_MODE (operands[0]) == XFmode)
29545 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29547 return "fst%Z0\t%y0";
29554 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29555 FP status register is set. */
29558 ix86_emit_fp_unordered_jump (rtx label)
29560 rtx reg = gen_reg_rtx (HImode);
29563 emit_insn (gen_x86_fnstsw_1 (reg));
29565 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29567 emit_insn (gen_x86_sahf_1 (reg));
29569 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29570 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29574 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29576 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29577 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29580 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29581 gen_rtx_LABEL_REF (VOIDmode, label),
29583 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29585 emit_jump_insn (temp);
29586 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29589 /* Output code to perform a log1p XFmode calculation. */
29591 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29593 rtx label1 = gen_label_rtx ();
29594 rtx label2 = gen_label_rtx ();
29596 rtx tmp = gen_reg_rtx (XFmode);
29597 rtx tmp2 = gen_reg_rtx (XFmode);
29600 emit_insn (gen_absxf2 (tmp, op1));
29601 test = gen_rtx_GE (VOIDmode, tmp,
29602 CONST_DOUBLE_FROM_REAL_VALUE (
29603 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29605 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29607 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29608 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29609 emit_jump (label2);
29611 emit_label (label1);
29612 emit_move_insn (tmp, CONST1_RTX (XFmode));
29613 emit_insn (gen_addxf3 (tmp, op1, tmp));
29614 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29615 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29617 emit_label (label2);
29620 /* Output code to perform a Newton-Rhapson approximation of a single precision
29621 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29623 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29625 rtx x0, x1, e0, e1, two;
29627 x0 = gen_reg_rtx (mode);
29628 e0 = gen_reg_rtx (mode);
29629 e1 = gen_reg_rtx (mode);
29630 x1 = gen_reg_rtx (mode);
29632 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29634 if (VECTOR_MODE_P (mode))
29635 two = ix86_build_const_vector (SFmode, true, two);
29637 two = force_reg (mode, two);
29639 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29641 /* x0 = rcp(b) estimate */
29642 emit_insn (gen_rtx_SET (VOIDmode, x0,
29643 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29646 emit_insn (gen_rtx_SET (VOIDmode, e0,
29647 gen_rtx_MULT (mode, x0, a)));
29649 emit_insn (gen_rtx_SET (VOIDmode, e1,
29650 gen_rtx_MULT (mode, x0, b)));
29652 emit_insn (gen_rtx_SET (VOIDmode, x1,
29653 gen_rtx_MINUS (mode, two, e1)));
29654 /* res = e0 * x1 */
29655 emit_insn (gen_rtx_SET (VOIDmode, res,
29656 gen_rtx_MULT (mode, e0, x1)));
29659 /* Output code to perform a Newton-Rhapson approximation of a
29660 single precision floating point [reciprocal] square root. */
29662 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29665 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29668 x0 = gen_reg_rtx (mode);
29669 e0 = gen_reg_rtx (mode);
29670 e1 = gen_reg_rtx (mode);
29671 e2 = gen_reg_rtx (mode);
29672 e3 = gen_reg_rtx (mode);
29674 real_from_integer (&r, VOIDmode, -3, -1, 0);
29675 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29677 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29678 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29680 if (VECTOR_MODE_P (mode))
29682 mthree = ix86_build_const_vector (SFmode, true, mthree);
29683 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29686 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29687 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29689 /* x0 = rsqrt(a) estimate */
29690 emit_insn (gen_rtx_SET (VOIDmode, x0,
29691 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29694 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29699 zero = gen_reg_rtx (mode);
29700 mask = gen_reg_rtx (mode);
29702 zero = force_reg (mode, CONST0_RTX(mode));
29703 emit_insn (gen_rtx_SET (VOIDmode, mask,
29704 gen_rtx_NE (mode, zero, a)));
29706 emit_insn (gen_rtx_SET (VOIDmode, x0,
29707 gen_rtx_AND (mode, x0, mask)));
29711 emit_insn (gen_rtx_SET (VOIDmode, e0,
29712 gen_rtx_MULT (mode, x0, a)));
29714 emit_insn (gen_rtx_SET (VOIDmode, e1,
29715 gen_rtx_MULT (mode, e0, x0)));
29718 mthree = force_reg (mode, mthree);
29719 emit_insn (gen_rtx_SET (VOIDmode, e2,
29720 gen_rtx_PLUS (mode, e1, mthree)));
29722 mhalf = force_reg (mode, mhalf);
29724 /* e3 = -.5 * x0 */
29725 emit_insn (gen_rtx_SET (VOIDmode, e3,
29726 gen_rtx_MULT (mode, x0, mhalf)));
29728 /* e3 = -.5 * e0 */
29729 emit_insn (gen_rtx_SET (VOIDmode, e3,
29730 gen_rtx_MULT (mode, e0, mhalf)));
29731 /* ret = e2 * e3 */
29732 emit_insn (gen_rtx_SET (VOIDmode, res,
29733 gen_rtx_MULT (mode, e2, e3)));
29736 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29738 static void ATTRIBUTE_UNUSED
29739 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29742 /* With Binutils 2.15, the "@unwind" marker must be specified on
29743 every occurrence of the ".eh_frame" section, not just the first
29746 && strcmp (name, ".eh_frame") == 0)
29748 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29749 flags & SECTION_WRITE ? "aw" : "a");
29752 default_elf_asm_named_section (name, flags, decl);
29755 /* Return the mangling of TYPE if it is an extended fundamental type. */
29757 static const char *
29758 ix86_mangle_type (const_tree type)
29760 type = TYPE_MAIN_VARIANT (type);
29762 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29763 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29766 switch (TYPE_MODE (type))
29769 /* __float128 is "g". */
29772 /* "long double" or __float80 is "e". */
29779 /* For 32-bit code we can save PIC register setup by using
29780 __stack_chk_fail_local hidden function instead of calling
29781 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29782 register, so it is better to call __stack_chk_fail directly. */
29785 ix86_stack_protect_fail (void)
29787 return TARGET_64BIT
29788 ? default_external_stack_protect_fail ()
29789 : default_hidden_stack_protect_fail ();
29792 /* Select a format to encode pointers in exception handling data. CODE
29793 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29794 true if the symbol may be affected by dynamic relocations.
29796 ??? All x86 object file formats are capable of representing this.
29797 After all, the relocation needed is the same as for the call insn.
29798 Whether or not a particular assembler allows us to enter such, I
29799 guess we'll have to see. */
29801 asm_preferred_eh_data_format (int code, int global)
29805 int type = DW_EH_PE_sdata8;
29807 || ix86_cmodel == CM_SMALL_PIC
29808 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29809 type = DW_EH_PE_sdata4;
29810 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29812 if (ix86_cmodel == CM_SMALL
29813 || (ix86_cmodel == CM_MEDIUM && code))
29814 return DW_EH_PE_udata4;
29815 return DW_EH_PE_absptr;
29818 /* Expand copysign from SIGN to the positive value ABS_VALUE
29819 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29822 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29824 enum machine_mode mode = GET_MODE (sign);
29825 rtx sgn = gen_reg_rtx (mode);
29826 if (mask == NULL_RTX)
29828 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29829 if (!VECTOR_MODE_P (mode))
29831 /* We need to generate a scalar mode mask in this case. */
29832 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29833 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29834 mask = gen_reg_rtx (mode);
29835 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29839 mask = gen_rtx_NOT (mode, mask);
29840 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29841 gen_rtx_AND (mode, mask, sign)));
29842 emit_insn (gen_rtx_SET (VOIDmode, result,
29843 gen_rtx_IOR (mode, abs_value, sgn)));
29846 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29847 mask for masking out the sign-bit is stored in *SMASK, if that is
29850 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29852 enum machine_mode mode = GET_MODE (op0);
29855 xa = gen_reg_rtx (mode);
29856 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29857 if (!VECTOR_MODE_P (mode))
29859 /* We need to generate a scalar mode mask in this case. */
29860 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29861 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29862 mask = gen_reg_rtx (mode);
29863 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29865 emit_insn (gen_rtx_SET (VOIDmode, xa,
29866 gen_rtx_AND (mode, op0, mask)));
29874 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29875 swapping the operands if SWAP_OPERANDS is true. The expanded
29876 code is a forward jump to a newly created label in case the
29877 comparison is true. The generated label rtx is returned. */
29879 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29880 bool swap_operands)
29891 label = gen_label_rtx ();
29892 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29893 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29894 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29895 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29896 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29897 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29898 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29899 JUMP_LABEL (tmp) = label;
29904 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29905 using comparison code CODE. Operands are swapped for the comparison if
29906 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29908 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29909 bool swap_operands)
29911 enum machine_mode mode = GET_MODE (op0);
29912 rtx mask = gen_reg_rtx (mode);
29921 if (mode == DFmode)
29922 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29923 gen_rtx_fmt_ee (code, mode, op0, op1)));
29925 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29926 gen_rtx_fmt_ee (code, mode, op0, op1)));
29931 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29932 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29934 ix86_gen_TWO52 (enum machine_mode mode)
29936 REAL_VALUE_TYPE TWO52r;
29939 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29940 TWO52 = const_double_from_real_value (TWO52r, mode);
29941 TWO52 = force_reg (mode, TWO52);
29946 /* Expand SSE sequence for computing lround from OP1 storing
29949 ix86_expand_lround (rtx op0, rtx op1)
29951 /* C code for the stuff we're doing below:
29952 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29955 enum machine_mode mode = GET_MODE (op1);
29956 const struct real_format *fmt;
29957 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29960 /* load nextafter (0.5, 0.0) */
29961 fmt = REAL_MODE_FORMAT (mode);
29962 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29963 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29965 /* adj = copysign (0.5, op1) */
29966 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29967 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29969 /* adj = op1 + adj */
29970 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29972 /* op0 = (imode)adj */
29973 expand_fix (op0, adj, 0);
29976 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29979 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29981 /* C code for the stuff we're doing below (for do_floor):
29983 xi -= (double)xi > op1 ? 1 : 0;
29986 enum machine_mode fmode = GET_MODE (op1);
29987 enum machine_mode imode = GET_MODE (op0);
29988 rtx ireg, freg, label, tmp;
29990 /* reg = (long)op1 */
29991 ireg = gen_reg_rtx (imode);
29992 expand_fix (ireg, op1, 0);
29994 /* freg = (double)reg */
29995 freg = gen_reg_rtx (fmode);
29996 expand_float (freg, ireg, 0);
29998 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29999 label = ix86_expand_sse_compare_and_jump (UNLE,
30000 freg, op1, !do_floor);
30001 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30002 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30003 emit_move_insn (ireg, tmp);
30005 emit_label (label);
30006 LABEL_NUSES (label) = 1;
30008 emit_move_insn (op0, ireg);
30011 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30012 result in OPERAND0. */
30014 ix86_expand_rint (rtx operand0, rtx operand1)
30016 /* C code for the stuff we're doing below:
30017 xa = fabs (operand1);
30018 if (!isless (xa, 2**52))
30020 xa = xa + 2**52 - 2**52;
30021 return copysign (xa, operand1);
30023 enum machine_mode mode = GET_MODE (operand0);
30024 rtx res, xa, label, TWO52, mask;
30026 res = gen_reg_rtx (mode);
30027 emit_move_insn (res, operand1);
30029 /* xa = abs (operand1) */
30030 xa = ix86_expand_sse_fabs (res, &mask);
30032 /* if (!isless (xa, TWO52)) goto label; */
30033 TWO52 = ix86_gen_TWO52 (mode);
30034 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30036 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30037 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30039 ix86_sse_copysign_to_positive (res, xa, res, mask);
30041 emit_label (label);
30042 LABEL_NUSES (label) = 1;
30044 emit_move_insn (operand0, res);
30047 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30050 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30052 /* C code for the stuff we expand below.
30053 double xa = fabs (x), x2;
30054 if (!isless (xa, TWO52))
30056 xa = xa + TWO52 - TWO52;
30057 x2 = copysign (xa, x);
30066 enum machine_mode mode = GET_MODE (operand0);
30067 rtx xa, TWO52, tmp, label, one, res, mask;
30069 TWO52 = ix86_gen_TWO52 (mode);
30071 /* Temporary for holding the result, initialized to the input
30072 operand to ease control flow. */
30073 res = gen_reg_rtx (mode);
30074 emit_move_insn (res, operand1);
30076 /* xa = abs (operand1) */
30077 xa = ix86_expand_sse_fabs (res, &mask);
30079 /* if (!isless (xa, TWO52)) goto label; */
30080 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30082 /* xa = xa + TWO52 - TWO52; */
30083 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30084 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30086 /* xa = copysign (xa, operand1) */
30087 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30089 /* generate 1.0 or -1.0 */
30090 one = force_reg (mode,
30091 const_double_from_real_value (do_floor
30092 ? dconst1 : dconstm1, mode));
30094 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30095 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30096 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30097 gen_rtx_AND (mode, one, tmp)));
30098 /* We always need to subtract here to preserve signed zero. */
30099 tmp = expand_simple_binop (mode, MINUS,
30100 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30101 emit_move_insn (res, tmp);
30103 emit_label (label);
30104 LABEL_NUSES (label) = 1;
30106 emit_move_insn (operand0, res);
30109 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30112 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30114 /* C code for the stuff we expand below.
30115 double xa = fabs (x), x2;
30116 if (!isless (xa, TWO52))
30118 x2 = (double)(long)x;
30125 if (HONOR_SIGNED_ZEROS (mode))
30126 return copysign (x2, x);
30129 enum machine_mode mode = GET_MODE (operand0);
30130 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30132 TWO52 = ix86_gen_TWO52 (mode);
30134 /* Temporary for holding the result, initialized to the input
30135 operand to ease control flow. */
30136 res = gen_reg_rtx (mode);
30137 emit_move_insn (res, operand1);
30139 /* xa = abs (operand1) */
30140 xa = ix86_expand_sse_fabs (res, &mask);
30142 /* if (!isless (xa, TWO52)) goto label; */
30143 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30145 /* xa = (double)(long)x */
30146 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30147 expand_fix (xi, res, 0);
30148 expand_float (xa, xi, 0);
30151 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30153 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30154 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30155 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30156 gen_rtx_AND (mode, one, tmp)));
30157 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30158 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30159 emit_move_insn (res, tmp);
30161 if (HONOR_SIGNED_ZEROS (mode))
30162 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30164 emit_label (label);
30165 LABEL_NUSES (label) = 1;
30167 emit_move_insn (operand0, res);
30170 /* Expand SSE sequence for computing round from OPERAND1 storing
30171 into OPERAND0. Sequence that works without relying on DImode truncation
30172 via cvttsd2siq that is only available on 64bit targets. */
30174 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30176 /* C code for the stuff we expand below.
30177 double xa = fabs (x), xa2, x2;
30178 if (!isless (xa, TWO52))
30180 Using the absolute value and copying back sign makes
30181 -0.0 -> -0.0 correct.
30182 xa2 = xa + TWO52 - TWO52;
30187 else if (dxa > 0.5)
30189 x2 = copysign (xa2, x);
30192 enum machine_mode mode = GET_MODE (operand0);
30193 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30195 TWO52 = ix86_gen_TWO52 (mode);
30197 /* Temporary for holding the result, initialized to the input
30198 operand to ease control flow. */
30199 res = gen_reg_rtx (mode);
30200 emit_move_insn (res, operand1);
30202 /* xa = abs (operand1) */
30203 xa = ix86_expand_sse_fabs (res, &mask);
30205 /* if (!isless (xa, TWO52)) goto label; */
30206 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30208 /* xa2 = xa + TWO52 - TWO52; */
30209 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30210 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30212 /* dxa = xa2 - xa; */
30213 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30215 /* generate 0.5, 1.0 and -0.5 */
30216 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30217 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30218 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30222 tmp = gen_reg_rtx (mode);
30223 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30224 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30225 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30226 gen_rtx_AND (mode, one, tmp)));
30227 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30228 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30229 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30230 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30231 gen_rtx_AND (mode, one, tmp)));
30232 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30234 /* res = copysign (xa2, operand1) */
30235 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30237 emit_label (label);
30238 LABEL_NUSES (label) = 1;
30240 emit_move_insn (operand0, res);
30243 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30246 ix86_expand_trunc (rtx operand0, rtx operand1)
30248 /* C code for SSE variant we expand below.
30249 double xa = fabs (x), x2;
30250 if (!isless (xa, TWO52))
30252 x2 = (double)(long)x;
30253 if (HONOR_SIGNED_ZEROS (mode))
30254 return copysign (x2, x);
30257 enum machine_mode mode = GET_MODE (operand0);
30258 rtx xa, xi, TWO52, label, res, mask;
30260 TWO52 = ix86_gen_TWO52 (mode);
30262 /* Temporary for holding the result, initialized to the input
30263 operand to ease control flow. */
30264 res = gen_reg_rtx (mode);
30265 emit_move_insn (res, operand1);
30267 /* xa = abs (operand1) */
30268 xa = ix86_expand_sse_fabs (res, &mask);
30270 /* if (!isless (xa, TWO52)) goto label; */
30271 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30273 /* x = (double)(long)x */
30274 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30275 expand_fix (xi, res, 0);
30276 expand_float (res, xi, 0);
30278 if (HONOR_SIGNED_ZEROS (mode))
30279 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30281 emit_label (label);
30282 LABEL_NUSES (label) = 1;
30284 emit_move_insn (operand0, res);
30287 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30290 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30292 enum machine_mode mode = GET_MODE (operand0);
30293 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30295 /* C code for SSE variant we expand below.
30296 double xa = fabs (x), x2;
30297 if (!isless (xa, TWO52))
30299 xa2 = xa + TWO52 - TWO52;
30303 x2 = copysign (xa2, x);
30307 TWO52 = ix86_gen_TWO52 (mode);
30309 /* Temporary for holding the result, initialized to the input
30310 operand to ease control flow. */
30311 res = gen_reg_rtx (mode);
30312 emit_move_insn (res, operand1);
30314 /* xa = abs (operand1) */
30315 xa = ix86_expand_sse_fabs (res, &smask);
30317 /* if (!isless (xa, TWO52)) goto label; */
30318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30320 /* res = xa + TWO52 - TWO52; */
30321 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30322 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30323 emit_move_insn (res, tmp);
30326 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30328 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30329 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30330 emit_insn (gen_rtx_SET (VOIDmode, mask,
30331 gen_rtx_AND (mode, mask, one)));
30332 tmp = expand_simple_binop (mode, MINUS,
30333 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30334 emit_move_insn (res, tmp);
30336 /* res = copysign (res, operand1) */
30337 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30339 emit_label (label);
30340 LABEL_NUSES (label) = 1;
30342 emit_move_insn (operand0, res);
30345 /* Expand SSE sequence for computing round from OPERAND1 storing
30348 ix86_expand_round (rtx operand0, rtx operand1)
30350 /* C code for the stuff we're doing below:
30351 double xa = fabs (x);
30352 if (!isless (xa, TWO52))
30354 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30355 return copysign (xa, x);
30357 enum machine_mode mode = GET_MODE (operand0);
30358 rtx res, TWO52, xa, label, xi, half, mask;
30359 const struct real_format *fmt;
30360 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30362 /* Temporary for holding the result, initialized to the input
30363 operand to ease control flow. */
30364 res = gen_reg_rtx (mode);
30365 emit_move_insn (res, operand1);
30367 TWO52 = ix86_gen_TWO52 (mode);
30368 xa = ix86_expand_sse_fabs (res, &mask);
30369 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30371 /* load nextafter (0.5, 0.0) */
30372 fmt = REAL_MODE_FORMAT (mode);
30373 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30374 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30376 /* xa = xa + 0.5 */
30377 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30378 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30380 /* xa = (double)(int64_t)xa */
30381 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30382 expand_fix (xi, xa, 0);
30383 expand_float (xa, xi, 0);
30385 /* res = copysign (xa, operand1) */
30386 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30388 emit_label (label);
30389 LABEL_NUSES (label) = 1;
30391 emit_move_insn (operand0, res);
30395 /* Table of valid machine attributes. */
30396 static const struct attribute_spec ix86_attribute_table[] =
30398 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30399 /* Stdcall attribute says callee is responsible for popping arguments
30400 if they are not variable. */
30401 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30402 /* Fastcall attribute says callee is responsible for popping arguments
30403 if they are not variable. */
30404 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30405 /* Thiscall attribute says callee is responsible for popping arguments
30406 if they are not variable. */
30407 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30408 /* Cdecl attribute says the callee is a normal C declaration */
30409 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30410 /* Regparm attribute specifies how many integer arguments are to be
30411 passed in registers. */
30412 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30413 /* Sseregparm attribute says we are using x86_64 calling conventions
30414 for FP arguments. */
30415 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30416 /* force_align_arg_pointer says this function realigns the stack at entry. */
30417 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30418 false, true, true, ix86_handle_cconv_attribute },
30419 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30420 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30421 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30422 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30424 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30425 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30426 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30427 SUBTARGET_ATTRIBUTE_TABLE,
30429 /* ms_abi and sysv_abi calling convention function attributes. */
30430 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30431 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30432 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30434 { NULL, 0, 0, false, false, false, NULL }
30437 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30439 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30440 tree vectype ATTRIBUTE_UNUSED,
30441 int misalign ATTRIBUTE_UNUSED)
30443 switch (type_of_cost)
30446 return ix86_cost->scalar_stmt_cost;
30449 return ix86_cost->scalar_load_cost;
30452 return ix86_cost->scalar_store_cost;
30455 return ix86_cost->vec_stmt_cost;
30458 return ix86_cost->vec_align_load_cost;
30461 return ix86_cost->vec_store_cost;
30463 case vec_to_scalar:
30464 return ix86_cost->vec_to_scalar_cost;
30466 case scalar_to_vec:
30467 return ix86_cost->scalar_to_vec_cost;
30469 case unaligned_load:
30470 case unaligned_store:
30471 return ix86_cost->vec_unalign_load_cost;
30473 case cond_branch_taken:
30474 return ix86_cost->cond_taken_branch_cost;
30476 case cond_branch_not_taken:
30477 return ix86_cost->cond_not_taken_branch_cost;
30483 gcc_unreachable ();
30488 /* Implement targetm.vectorize.builtin_vec_perm. */
30491 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30493 tree itype = TREE_TYPE (vec_type);
30494 bool u = TYPE_UNSIGNED (itype);
30495 enum machine_mode vmode = TYPE_MODE (vec_type);
30496 enum ix86_builtins fcode;
30497 bool ok = TARGET_SSE2;
30503 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30506 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30508 itype = ix86_get_builtin_type (IX86_BT_DI);
30513 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30517 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30519 itype = ix86_get_builtin_type (IX86_BT_SI);
30523 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30526 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30529 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30532 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30542 *mask_type = itype;
30543 return ix86_builtins[(int) fcode];
30546 /* Return a vector mode with twice as many elements as VMODE. */
30547 /* ??? Consider moving this to a table generated by genmodes.c. */
30549 static enum machine_mode
30550 doublesize_vector_mode (enum machine_mode vmode)
30554 case V2SFmode: return V4SFmode;
30555 case V1DImode: return V2DImode;
30556 case V2SImode: return V4SImode;
30557 case V4HImode: return V8HImode;
30558 case V8QImode: return V16QImode;
30560 case V2DFmode: return V4DFmode;
30561 case V4SFmode: return V8SFmode;
30562 case V2DImode: return V4DImode;
30563 case V4SImode: return V8SImode;
30564 case V8HImode: return V16HImode;
30565 case V16QImode: return V32QImode;
30567 case V4DFmode: return V8DFmode;
30568 case V8SFmode: return V16SFmode;
30569 case V4DImode: return V8DImode;
30570 case V8SImode: return V16SImode;
30571 case V16HImode: return V32HImode;
30572 case V32QImode: return V64QImode;
30575 gcc_unreachable ();
30579 /* Construct (set target (vec_select op0 (parallel perm))) and
30580 return true if that's a valid instruction in the active ISA. */
30583 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30585 rtx rperm[MAX_VECT_LEN], x;
30588 for (i = 0; i < nelt; ++i)
30589 rperm[i] = GEN_INT (perm[i]);
30591 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30592 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30593 x = gen_rtx_SET (VOIDmode, target, x);
30596 if (recog_memoized (x) < 0)
30604 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30607 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30608 const unsigned char *perm, unsigned nelt)
30610 enum machine_mode v2mode;
30613 v2mode = doublesize_vector_mode (GET_MODE (op0));
30614 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30615 return expand_vselect (target, x, perm, nelt);
30618 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30619 in terms of blendp[sd] / pblendw / pblendvb. */
30622 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30624 enum machine_mode vmode = d->vmode;
30625 unsigned i, mask, nelt = d->nelt;
30626 rtx target, op0, op1, x;
30628 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30630 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30633 /* This is a blend, not a permute. Elements must stay in their
30634 respective lanes. */
30635 for (i = 0; i < nelt; ++i)
30637 unsigned e = d->perm[i];
30638 if (!(e == i || e == i + nelt))
30645 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30646 decision should be extracted elsewhere, so that we only try that
30647 sequence once all budget==3 options have been tried. */
30649 /* For bytes, see if bytes move in pairs so we can use pblendw with
30650 an immediate argument, rather than pblendvb with a vector argument. */
30651 if (vmode == V16QImode)
30653 bool pblendw_ok = true;
30654 for (i = 0; i < 16 && pblendw_ok; i += 2)
30655 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30659 rtx rperm[16], vperm;
30661 for (i = 0; i < nelt; ++i)
30662 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30664 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30665 vperm = force_reg (V16QImode, vperm);
30667 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30672 target = d->target;
30684 for (i = 0; i < nelt; ++i)
30685 mask |= (d->perm[i] >= nelt) << i;
30689 for (i = 0; i < 2; ++i)
30690 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30694 for (i = 0; i < 4; ++i)
30695 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30699 for (i = 0; i < 8; ++i)
30700 mask |= (d->perm[i * 2] >= 16) << i;
30704 target = gen_lowpart (vmode, target);
30705 op0 = gen_lowpart (vmode, op0);
30706 op1 = gen_lowpart (vmode, op1);
30710 gcc_unreachable ();
30713 /* This matches five different patterns with the different modes. */
30714 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30715 x = gen_rtx_SET (VOIDmode, target, x);
30721 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30722 in terms of the variable form of vpermilps.
30724 Note that we will have already failed the immediate input vpermilps,
30725 which requires that the high and low part shuffle be identical; the
30726 variable form doesn't require that. */
30729 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30731 rtx rperm[8], vperm;
30734 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30737 /* We can only permute within the 128-bit lane. */
30738 for (i = 0; i < 8; ++i)
30740 unsigned e = d->perm[i];
30741 if (i < 4 ? e >= 4 : e < 4)
30748 for (i = 0; i < 8; ++i)
30750 unsigned e = d->perm[i];
30752 /* Within each 128-bit lane, the elements of op0 are numbered
30753 from 0 and the elements of op1 are numbered from 4. */
30759 rperm[i] = GEN_INT (e);
30762 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30763 vperm = force_reg (V8SImode, vperm);
30764 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30769 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30770 in terms of pshufb or vpperm. */
30773 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30775 unsigned i, nelt, eltsz;
30776 rtx rperm[16], vperm, target, op0, op1;
30778 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30780 if (GET_MODE_SIZE (d->vmode) != 16)
30787 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30789 for (i = 0; i < nelt; ++i)
30791 unsigned j, e = d->perm[i];
30792 for (j = 0; j < eltsz; ++j)
30793 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30796 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30797 vperm = force_reg (V16QImode, vperm);
30799 target = gen_lowpart (V16QImode, d->target);
30800 op0 = gen_lowpart (V16QImode, d->op0);
30801 if (d->op0 == d->op1)
30802 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30805 op1 = gen_lowpart (V16QImode, d->op1);
30806 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30812 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30813 in a single instruction. */
30816 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30818 unsigned i, nelt = d->nelt;
30819 unsigned char perm2[MAX_VECT_LEN];
30821 /* Check plain VEC_SELECT first, because AVX has instructions that could
30822 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30823 input where SEL+CONCAT may not. */
30824 if (d->op0 == d->op1)
30826 int mask = nelt - 1;
30828 for (i = 0; i < nelt; i++)
30829 perm2[i] = d->perm[i] & mask;
30831 if (expand_vselect (d->target, d->op0, perm2, nelt))
30834 /* There are plenty of patterns in sse.md that are written for
30835 SEL+CONCAT and are not replicated for a single op. Perhaps
30836 that should be changed, to avoid the nastiness here. */
30838 /* Recognize interleave style patterns, which means incrementing
30839 every other permutation operand. */
30840 for (i = 0; i < nelt; i += 2)
30842 perm2[i] = d->perm[i] & mask;
30843 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30845 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30848 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30851 for (i = 0; i < nelt; i += 4)
30853 perm2[i + 0] = d->perm[i + 0] & mask;
30854 perm2[i + 1] = d->perm[i + 1] & mask;
30855 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30856 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30859 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30864 /* Finally, try the fully general two operand permute. */
30865 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30868 /* Recognize interleave style patterns with reversed operands. */
30869 if (d->op0 != d->op1)
30871 for (i = 0; i < nelt; ++i)
30873 unsigned e = d->perm[i];
30881 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30885 /* Try the SSE4.1 blend variable merge instructions. */
30886 if (expand_vec_perm_blend (d))
30889 /* Try one of the AVX vpermil variable permutations. */
30890 if (expand_vec_perm_vpermil (d))
30893 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30894 if (expand_vec_perm_pshufb (d))
30900 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30901 in terms of a pair of pshuflw + pshufhw instructions. */
30904 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30906 unsigned char perm2[MAX_VECT_LEN];
30910 if (d->vmode != V8HImode || d->op0 != d->op1)
30913 /* The two permutations only operate in 64-bit lanes. */
30914 for (i = 0; i < 4; ++i)
30915 if (d->perm[i] >= 4)
30917 for (i = 4; i < 8; ++i)
30918 if (d->perm[i] < 4)
30924 /* Emit the pshuflw. */
30925 memcpy (perm2, d->perm, 4);
30926 for (i = 4; i < 8; ++i)
30928 ok = expand_vselect (d->target, d->op0, perm2, 8);
30931 /* Emit the pshufhw. */
30932 memcpy (perm2 + 4, d->perm + 4, 4);
30933 for (i = 0; i < 4; ++i)
30935 ok = expand_vselect (d->target, d->target, perm2, 8);
30941 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30942 the permutation using the SSSE3 palignr instruction. This succeeds
30943 when all of the elements in PERM fit within one vector and we merely
30944 need to shift them down so that a single vector permutation has a
30945 chance to succeed. */
30948 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30950 unsigned i, nelt = d->nelt;
30955 /* Even with AVX, palignr only operates on 128-bit vectors. */
30956 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30959 min = nelt, max = 0;
30960 for (i = 0; i < nelt; ++i)
30962 unsigned e = d->perm[i];
30968 if (min == 0 || max - min >= nelt)
30971 /* Given that we have SSSE3, we know we'll be able to implement the
30972 single operand permutation after the palignr with pshufb. */
30976 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30977 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30978 gen_lowpart (TImode, d->op1),
30979 gen_lowpart (TImode, d->op0), shift));
30981 d->op0 = d->op1 = d->target;
30984 for (i = 0; i < nelt; ++i)
30986 unsigned e = d->perm[i] - min;
30992 /* Test for the degenerate case where the alignment by itself
30993 produces the desired permutation. */
30997 ok = expand_vec_perm_1 (d);
31003 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31004 a two vector permutation into a single vector permutation by using
31005 an interleave operation to merge the vectors. */
31008 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31010 struct expand_vec_perm_d dremap, dfinal;
31011 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31012 unsigned contents, h1, h2, h3, h4;
31013 unsigned char remap[2 * MAX_VECT_LEN];
31017 if (d->op0 == d->op1)
31020 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31021 lanes. We can use similar techniques with the vperm2f128 instruction,
31022 but it requires slightly different logic. */
31023 if (GET_MODE_SIZE (d->vmode) != 16)
31026 /* Examine from whence the elements come. */
31028 for (i = 0; i < nelt; ++i)
31029 contents |= 1u << d->perm[i];
31031 /* Split the two input vectors into 4 halves. */
31032 h1 = (1u << nelt2) - 1;
31037 memset (remap, 0xff, sizeof (remap));
31040 /* If the elements from the low halves use interleave low, and similarly
31041 for interleave high. If the elements are from mis-matched halves, we
31042 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31043 if ((contents & (h1 | h3)) == contents)
31045 for (i = 0; i < nelt2; ++i)
31048 remap[i + nelt] = i * 2 + 1;
31049 dremap.perm[i * 2] = i;
31050 dremap.perm[i * 2 + 1] = i + nelt;
31053 else if ((contents & (h2 | h4)) == contents)
31055 for (i = 0; i < nelt2; ++i)
31057 remap[i + nelt2] = i * 2;
31058 remap[i + nelt + nelt2] = i * 2 + 1;
31059 dremap.perm[i * 2] = i + nelt2;
31060 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31063 else if ((contents & (h1 | h4)) == contents)
31065 for (i = 0; i < nelt2; ++i)
31068 remap[i + nelt + nelt2] = i + nelt2;
31069 dremap.perm[i] = i;
31070 dremap.perm[i + nelt2] = i + nelt + nelt2;
31074 dremap.vmode = V2DImode;
31076 dremap.perm[0] = 0;
31077 dremap.perm[1] = 3;
31080 else if ((contents & (h2 | h3)) == contents)
31082 for (i = 0; i < nelt2; ++i)
31084 remap[i + nelt2] = i;
31085 remap[i + nelt] = i + nelt2;
31086 dremap.perm[i] = i + nelt2;
31087 dremap.perm[i + nelt2] = i + nelt;
31091 dremap.vmode = V2DImode;
31093 dremap.perm[0] = 1;
31094 dremap.perm[1] = 2;
31100 /* Use the remapping array set up above to move the elements from their
31101 swizzled locations into their final destinations. */
31103 for (i = 0; i < nelt; ++i)
31105 unsigned e = remap[d->perm[i]];
31106 gcc_assert (e < nelt);
31107 dfinal.perm[i] = e;
31109 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31110 dfinal.op1 = dfinal.op0;
31111 dremap.target = dfinal.op0;
31113 /* Test if the final remap can be done with a single insn. For V4SFmode or
31114 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31116 ok = expand_vec_perm_1 (&dfinal);
31117 seq = get_insns ();
31123 if (dremap.vmode != dfinal.vmode)
31125 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31126 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31127 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31130 ok = expand_vec_perm_1 (&dremap);
31137 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31138 permutation with two pshufb insns and an ior. We should have already
31139 failed all two instruction sequences. */
31142 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31144 rtx rperm[2][16], vperm, l, h, op, m128;
31145 unsigned int i, nelt, eltsz;
31147 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31149 gcc_assert (d->op0 != d->op1);
31152 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31154 /* Generate two permutation masks. If the required element is within
31155 the given vector it is shuffled into the proper lane. If the required
31156 element is in the other vector, force a zero into the lane by setting
31157 bit 7 in the permutation mask. */
31158 m128 = GEN_INT (-128);
31159 for (i = 0; i < nelt; ++i)
31161 unsigned j, e = d->perm[i];
31162 unsigned which = (e >= nelt);
31166 for (j = 0; j < eltsz; ++j)
31168 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31169 rperm[1-which][i*eltsz + j] = m128;
31173 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31174 vperm = force_reg (V16QImode, vperm);
31176 l = gen_reg_rtx (V16QImode);
31177 op = gen_lowpart (V16QImode, d->op0);
31178 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31180 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31181 vperm = force_reg (V16QImode, vperm);
31183 h = gen_reg_rtx (V16QImode);
31184 op = gen_lowpart (V16QImode, d->op1);
31185 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31187 op = gen_lowpart (V16QImode, d->target);
31188 emit_insn (gen_iorv16qi3 (op, l, h));
31193 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31194 and extract-odd permutations. */
31197 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31199 rtx t1, t2, t3, t4;
31204 t1 = gen_reg_rtx (V4DFmode);
31205 t2 = gen_reg_rtx (V4DFmode);
31207 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31208 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31209 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31211 /* Now an unpck[lh]pd will produce the result required. */
31213 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31215 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31221 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
31222 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
31223 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
31225 t1 = gen_reg_rtx (V8SFmode);
31226 t2 = gen_reg_rtx (V8SFmode);
31227 t3 = gen_reg_rtx (V8SFmode);
31228 t4 = gen_reg_rtx (V8SFmode);
31230 /* Shuffle within the 128-bit lanes to produce:
31231 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
31232 expand_vselect (t1, d->op0, perm1, 8);
31233 expand_vselect (t2, d->op1, perm1, 8);
31235 /* Shuffle the lanes around to produce:
31236 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
31237 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
31238 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
31240 /* Now a vpermil2p will produce the result required. */
31241 /* ??? The vpermil2p requires a vector constant. Another option
31242 is a unpck[lh]ps to merge the two vectors to produce
31243 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
31244 vpermilps to get the elements into the final order. */
31247 memcpy (d->perm, odd ? permo: perme, 8);
31248 expand_vec_perm_vpermil (d);
31256 /* These are always directly implementable by expand_vec_perm_1. */
31257 gcc_unreachable ();
31261 return expand_vec_perm_pshufb2 (d);
31264 /* We need 2*log2(N)-1 operations to achieve odd/even
31265 with interleave. */
31266 t1 = gen_reg_rtx (V8HImode);
31267 t2 = gen_reg_rtx (V8HImode);
31268 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31269 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31270 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31271 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31273 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31275 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31282 return expand_vec_perm_pshufb2 (d);
31285 t1 = gen_reg_rtx (V16QImode);
31286 t2 = gen_reg_rtx (V16QImode);
31287 t3 = gen_reg_rtx (V16QImode);
31288 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31289 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31290 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31291 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31292 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31293 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31295 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31297 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31303 gcc_unreachable ();
31309 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31310 extract-even and extract-odd permutations. */
31313 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31315 unsigned i, odd, nelt = d->nelt;
31318 if (odd != 0 && odd != 1)
31321 for (i = 1; i < nelt; ++i)
31322 if (d->perm[i] != 2 * i + odd)
31325 return expand_vec_perm_even_odd_1 (d, odd);
31328 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31329 permutations. We assume that expand_vec_perm_1 has already failed. */
31332 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31334 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31335 enum machine_mode vmode = d->vmode;
31336 unsigned char perm2[4];
31344 /* These are special-cased in sse.md so that we can optionally
31345 use the vbroadcast instruction. They expand to two insns
31346 if the input happens to be in a register. */
31347 gcc_unreachable ();
31353 /* These are always implementable using standard shuffle patterns. */
31354 gcc_unreachable ();
31358 /* These can be implemented via interleave. We save one insn by
31359 stopping once we have promoted to V4SImode and then use pshufd. */
31362 optab otab = vec_interleave_low_optab;
31366 otab = vec_interleave_high_optab;
31371 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31372 vmode = get_mode_wider_vector (vmode);
31373 op0 = gen_lowpart (vmode, op0);
31375 while (vmode != V4SImode);
31377 memset (perm2, elt, 4);
31378 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31383 gcc_unreachable ();
31387 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31388 broadcast permutations. */
31391 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31393 unsigned i, elt, nelt = d->nelt;
31395 if (d->op0 != d->op1)
31399 for (i = 1; i < nelt; ++i)
31400 if (d->perm[i] != elt)
31403 return expand_vec_perm_broadcast_1 (d);
31406 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31407 With all of the interface bits taken care of, perform the expansion
31408 in D and return true on success. */
31411 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31413 /* Try a single instruction expansion. */
31414 if (expand_vec_perm_1 (d))
31417 /* Try sequences of two instructions. */
31419 if (expand_vec_perm_pshuflw_pshufhw (d))
31422 if (expand_vec_perm_palignr (d))
31425 if (expand_vec_perm_interleave2 (d))
31428 if (expand_vec_perm_broadcast (d))
31431 /* Try sequences of three instructions. */
31433 if (expand_vec_perm_pshufb2 (d))
31436 /* ??? Look for narrow permutations whose element orderings would
31437 allow the promotion to a wider mode. */
31439 /* ??? Look for sequences of interleave or a wider permute that place
31440 the data into the correct lanes for a half-vector shuffle like
31441 pshuf[lh]w or vpermilps. */
31443 /* ??? Look for sequences of interleave that produce the desired results.
31444 The combinatorics of punpck[lh] get pretty ugly... */
31446 if (expand_vec_perm_even_odd (d))
31452 /* Extract the values from the vector CST into the permutation array in D.
31453 Return 0 on error, 1 if all values from the permutation come from the
31454 first vector, 2 if all values from the second vector, and 3 otherwise. */
31457 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31459 tree list = TREE_VECTOR_CST_ELTS (cst);
31460 unsigned i, nelt = d->nelt;
31463 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31465 unsigned HOST_WIDE_INT e;
31467 if (!host_integerp (TREE_VALUE (list), 1))
31469 e = tree_low_cst (TREE_VALUE (list), 1);
31473 ret |= (e < nelt ? 1 : 2);
31476 gcc_assert (list == NULL);
31478 /* For all elements from second vector, fold the elements to first. */
31480 for (i = 0; i < nelt; ++i)
31481 d->perm[i] -= nelt;
31487 ix86_expand_vec_perm_builtin (tree exp)
31489 struct expand_vec_perm_d d;
31490 tree arg0, arg1, arg2;
31492 arg0 = CALL_EXPR_ARG (exp, 0);
31493 arg1 = CALL_EXPR_ARG (exp, 1);
31494 arg2 = CALL_EXPR_ARG (exp, 2);
31496 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31497 d.nelt = GET_MODE_NUNITS (d.vmode);
31498 d.testing_p = false;
31499 gcc_assert (VECTOR_MODE_P (d.vmode));
31501 if (TREE_CODE (arg2) != VECTOR_CST)
31503 error_at (EXPR_LOCATION (exp),
31504 "vector permutation requires vector constant");
31508 switch (extract_vec_perm_cst (&d, arg2))
31514 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31518 if (!operand_equal_p (arg0, arg1, 0))
31520 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31521 d.op0 = force_reg (d.vmode, d.op0);
31522 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31523 d.op1 = force_reg (d.vmode, d.op1);
31527 /* The elements of PERM do not suggest that only the first operand
31528 is used, but both operands are identical. Allow easier matching
31529 of the permutation by folding the permutation into the single
31532 unsigned i, nelt = d.nelt;
31533 for (i = 0; i < nelt; ++i)
31534 if (d.perm[i] >= nelt)
31540 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31541 d.op0 = force_reg (d.vmode, d.op0);
31546 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31547 d.op0 = force_reg (d.vmode, d.op0);
31552 d.target = gen_reg_rtx (d.vmode);
31553 if (ix86_expand_vec_perm_builtin_1 (&d))
31556 /* For compiler generated permutations, we should never got here, because
31557 the compiler should also be checking the ok hook. But since this is a
31558 builtin the user has access too, so don't abort. */
31562 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31565 sorry ("vector permutation (%d %d %d %d)",
31566 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31569 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31570 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31571 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31574 sorry ("vector permutation "
31575 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31576 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31577 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31578 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31579 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31582 gcc_unreachable ();
31585 return CONST0_RTX (d.vmode);
31588 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31591 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31593 struct expand_vec_perm_d d;
31597 d.vmode = TYPE_MODE (vec_type);
31598 d.nelt = GET_MODE_NUNITS (d.vmode);
31599 d.testing_p = true;
31601 /* Given sufficient ISA support we can just return true here
31602 for selected vector modes. */
31603 if (GET_MODE_SIZE (d.vmode) == 16)
31605 /* All implementable with a single vpperm insn. */
31608 /* All implementable with 2 pshufb + 1 ior. */
31611 /* All implementable with shufpd or unpck[lh]pd. */
31616 vec_mask = extract_vec_perm_cst (&d, mask);
31618 /* This hook is cannot be called in response to something that the
31619 user does (unlike the builtin expander) so we shouldn't ever see
31620 an error generated from the extract. */
31621 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31622 one_vec = (vec_mask != 3);
31624 /* Implementable with shufps or pshufd. */
31625 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31628 /* Otherwise we have to go through the motions and see if we can
31629 figure out how to generate the requested permutation. */
31630 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31631 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31633 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31636 ret = ix86_expand_vec_perm_builtin_1 (&d);
31643 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31645 struct expand_vec_perm_d d;
31651 d.vmode = GET_MODE (targ);
31652 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31653 d.testing_p = false;
31655 for (i = 0; i < nelt; ++i)
31656 d.perm[i] = i * 2 + odd;
31658 /* We'll either be able to implement the permutation directly... */
31659 if (expand_vec_perm_1 (&d))
31662 /* ... or we use the special-case patterns. */
31663 expand_vec_perm_even_odd_1 (&d, odd);
31666 /* This function returns the calling abi specific va_list type node.
31667 It returns the FNDECL specific va_list type. */
31670 ix86_fn_abi_va_list (tree fndecl)
31673 return va_list_type_node;
31674 gcc_assert (fndecl != NULL_TREE);
31676 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31677 return ms_va_list_type_node;
31679 return sysv_va_list_type_node;
31682 /* Returns the canonical va_list type specified by TYPE. If there
31683 is no valid TYPE provided, it return NULL_TREE. */
31686 ix86_canonical_va_list_type (tree type)
31690 /* Resolve references and pointers to va_list type. */
31691 if (TREE_CODE (type) == MEM_REF)
31692 type = TREE_TYPE (type);
31693 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31694 type = TREE_TYPE (type);
31695 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31696 type = TREE_TYPE (type);
31700 wtype = va_list_type_node;
31701 gcc_assert (wtype != NULL_TREE);
31703 if (TREE_CODE (wtype) == ARRAY_TYPE)
31705 /* If va_list is an array type, the argument may have decayed
31706 to a pointer type, e.g. by being passed to another function.
31707 In that case, unwrap both types so that we can compare the
31708 underlying records. */
31709 if (TREE_CODE (htype) == ARRAY_TYPE
31710 || POINTER_TYPE_P (htype))
31712 wtype = TREE_TYPE (wtype);
31713 htype = TREE_TYPE (htype);
31716 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31717 return va_list_type_node;
31718 wtype = sysv_va_list_type_node;
31719 gcc_assert (wtype != NULL_TREE);
31721 if (TREE_CODE (wtype) == ARRAY_TYPE)
31723 /* If va_list is an array type, the argument may have decayed
31724 to a pointer type, e.g. by being passed to another function.
31725 In that case, unwrap both types so that we can compare the
31726 underlying records. */
31727 if (TREE_CODE (htype) == ARRAY_TYPE
31728 || POINTER_TYPE_P (htype))
31730 wtype = TREE_TYPE (wtype);
31731 htype = TREE_TYPE (htype);
31734 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31735 return sysv_va_list_type_node;
31736 wtype = ms_va_list_type_node;
31737 gcc_assert (wtype != NULL_TREE);
31739 if (TREE_CODE (wtype) == ARRAY_TYPE)
31741 /* If va_list is an array type, the argument may have decayed
31742 to a pointer type, e.g. by being passed to another function.
31743 In that case, unwrap both types so that we can compare the
31744 underlying records. */
31745 if (TREE_CODE (htype) == ARRAY_TYPE
31746 || POINTER_TYPE_P (htype))
31748 wtype = TREE_TYPE (wtype);
31749 htype = TREE_TYPE (htype);
31752 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31753 return ms_va_list_type_node;
31756 return std_canonical_va_list_type (type);
31759 /* Iterate through the target-specific builtin types for va_list.
31760 IDX denotes the iterator, *PTREE is set to the result type of
31761 the va_list builtin, and *PNAME to its internal type.
31762 Returns zero if there is no element for this index, otherwise
31763 IDX should be increased upon the next call.
31764 Note, do not iterate a base builtin's name like __builtin_va_list.
31765 Used from c_common_nodes_and_builtins. */
31768 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31778 *ptree = ms_va_list_type_node;
31779 *pname = "__builtin_ms_va_list";
31783 *ptree = sysv_va_list_type_node;
31784 *pname = "__builtin_sysv_va_list";
31792 #undef TARGET_SCHED_DISPATCH
31793 #define TARGET_SCHED_DISPATCH has_dispatch
31794 #undef TARGET_SCHED_DISPATCH_DO
31795 #define TARGET_SCHED_DISPATCH_DO do_dispatch
31797 /* The size of the dispatch window is the total number of bytes of
31798 object code allowed in a window. */
31799 #define DISPATCH_WINDOW_SIZE 16
31801 /* Number of dispatch windows considered for scheduling. */
31802 #define MAX_DISPATCH_WINDOWS 3
31804 /* Maximum number of instructions in a window. */
31807 /* Maximum number of immediate operands in a window. */
31810 /* Maximum number of immediate bits allowed in a window. */
31811 #define MAX_IMM_SIZE 128
31813 /* Maximum number of 32 bit immediates allowed in a window. */
31814 #define MAX_IMM_32 4
31816 /* Maximum number of 64 bit immediates allowed in a window. */
31817 #define MAX_IMM_64 2
31819 /* Maximum total of loads or prefetches allowed in a window. */
31822 /* Maximum total of stores allowed in a window. */
31823 #define MAX_STORE 1
31829 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
31830 enum dispatch_group {
31845 /* Number of allowable groups in a dispatch window. It is an array
31846 indexed by dispatch_group enum. 100 is used as a big number,
31847 because the number of these kind of operations does not have any
31848 effect in dispatch window, but we need them for other reasons in
31850 static unsigned int num_allowable_groups[disp_last] = {
31851 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
31854 char group_name[disp_last + 1][16] = {
31855 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
31856 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
31857 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
31860 /* Instruction path. */
31863 path_single, /* Single micro op. */
31864 path_double, /* Double micro op. */
31865 path_multi, /* Instructions with more than 2 micro op.. */
31869 /* sched_insn_info defines a window to the instructions scheduled in
31870 the basic block. It contains a pointer to the insn_info table and
31871 the instruction scheduled.
31873 Windows are allocated for each basic block and are linked
31875 typedef struct sched_insn_info_s {
31877 enum dispatch_group group;
31878 enum insn_path path;
31883 /* Linked list of dispatch windows. This is a two way list of
31884 dispatch windows of a basic block. It contains information about
31885 the number of uops in the window and the total number of
31886 instructions and of bytes in the object code for this dispatch
31888 typedef struct dispatch_windows_s {
31889 int num_insn; /* Number of insn in the window. */
31890 int num_uops; /* Number of uops in the window. */
31891 int window_size; /* Number of bytes in the window. */
31892 int window_num; /* Window number between 0 or 1. */
31893 int num_imm; /* Number of immediates in an insn. */
31894 int num_imm_32; /* Number of 32 bit immediates in an insn. */
31895 int num_imm_64; /* Number of 64 bit immediates in an insn. */
31896 int imm_size; /* Total immediates in the window. */
31897 int num_loads; /* Total memory loads in the window. */
31898 int num_stores; /* Total memory stores in the window. */
31899 int violation; /* Violation exists in window. */
31900 sched_insn_info *window; /* Pointer to the window. */
31901 struct dispatch_windows_s *next;
31902 struct dispatch_windows_s *prev;
31903 } dispatch_windows;
31905 /* Immediate valuse used in an insn. */
31906 typedef struct imm_info_s
31913 static dispatch_windows *dispatch_window_list;
31914 static dispatch_windows *dispatch_window_list1;
31916 /* Get dispatch group of insn. */
31918 static enum dispatch_group
31919 get_mem_group (rtx insn)
31921 enum attr_memory memory;
31923 if (INSN_CODE (insn) < 0)
31924 return disp_no_group;
31925 memory = get_attr_memory (insn);
31926 if (memory == MEMORY_STORE)
31929 if (memory == MEMORY_LOAD)
31932 if (memory == MEMORY_BOTH)
31933 return disp_load_store;
31935 return disp_no_group;
31938 /* Return true if insn is a compare instruction. */
31943 enum attr_type type;
31945 type = get_attr_type (insn);
31946 return (type == TYPE_TEST
31947 || type == TYPE_ICMP
31948 || type == TYPE_FCMP
31949 || GET_CODE (PATTERN (insn)) == COMPARE);
31952 /* Return true if a dispatch violation encountered. */
31955 dispatch_violation (void)
31957 if (dispatch_window_list->next)
31958 return dispatch_window_list->next->violation;
31959 return dispatch_window_list->violation;
31962 /* Return true if insn is a branch instruction. */
31965 is_branch (rtx insn)
31967 return (CALL_P (insn) || JUMP_P (insn));
31970 /* Return true if insn is a prefetch instruction. */
31973 is_prefetch (rtx insn)
31975 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
31978 /* This function initializes a dispatch window and the list container holding a
31979 pointer to the window. */
31982 init_window (int window_num)
31985 dispatch_windows *new_list;
31987 if (window_num == 0)
31988 new_list = dispatch_window_list;
31990 new_list = dispatch_window_list1;
31992 new_list->num_insn = 0;
31993 new_list->num_uops = 0;
31994 new_list->window_size = 0;
31995 new_list->next = NULL;
31996 new_list->prev = NULL;
31997 new_list->window_num = window_num;
31998 new_list->num_imm = 0;
31999 new_list->num_imm_32 = 0;
32000 new_list->num_imm_64 = 0;
32001 new_list->imm_size = 0;
32002 new_list->num_loads = 0;
32003 new_list->num_stores = 0;
32004 new_list->violation = false;
32006 for (i = 0; i < MAX_INSN; i++)
32008 new_list->window[i].insn = NULL;
32009 new_list->window[i].group = disp_no_group;
32010 new_list->window[i].path = no_path;
32011 new_list->window[i].byte_len = 0;
32012 new_list->window[i].imm_bytes = 0;
32017 /* This function allocates and initializes a dispatch window and the
32018 list container holding a pointer to the window. */
32020 static dispatch_windows *
32021 allocate_window (void)
32023 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32024 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32029 /* This routine initializes the dispatch scheduling information. It
32030 initiates building dispatch scheduler tables and constructs the
32031 first dispatch window. */
32034 init_dispatch_sched (void)
32036 /* Allocate a dispatch list and a window. */
32037 dispatch_window_list = allocate_window ();
32038 dispatch_window_list1 = allocate_window ();
32043 /* This function returns true if a branch is detected. End of a basic block
32044 does not have to be a branch, but here we assume only branches end a
32048 is_end_basic_block (enum dispatch_group group)
32050 return group == disp_branch;
32053 /* This function is called when the end of a window processing is reached. */
32056 process_end_window (void)
32058 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32059 if (dispatch_window_list->next)
32061 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32062 gcc_assert (dispatch_window_list->window_size
32063 + dispatch_window_list1->window_size <= 48);
32069 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32070 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32071 for 48 bytes of instructions. Note that these windows are not dispatch
32072 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32074 static dispatch_windows *
32075 allocate_next_window (int window_num)
32077 if (window_num == 0)
32079 if (dispatch_window_list->next)
32082 return dispatch_window_list;
32085 dispatch_window_list->next = dispatch_window_list1;
32086 dispatch_window_list1->prev = dispatch_window_list;
32088 return dispatch_window_list1;
32091 /* Increment the number of immediate operands of an instruction. */
32094 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32099 switch ( GET_CODE (*in_rtx))
32104 (imm_values->imm)++;
32105 if (x86_64_immediate_operand (*in_rtx, SImode))
32106 (imm_values->imm32)++;
32108 (imm_values->imm64)++;
32112 (imm_values->imm)++;
32113 (imm_values->imm64)++;
32117 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32119 (imm_values->imm)++;
32120 (imm_values->imm32)++;
32131 /* Compute number of immediate operands of an instruction. */
32134 find_constant (rtx in_rtx, imm_info *imm_values)
32136 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32137 (rtx_function) find_constant_1, (void *) imm_values);
32140 /* Return total size of immediate operands of an instruction along with number
32141 of corresponding immediate-operands. It initializes its parameters to zero
32142 befor calling FIND_CONSTANT.
32143 INSN is the input instruction. IMM is the total of immediates.
32144 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32148 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32150 imm_info imm_values = {0, 0, 0};
32152 find_constant (insn, &imm_values);
32153 *imm = imm_values.imm;
32154 *imm32 = imm_values.imm32;
32155 *imm64 = imm_values.imm64;
32156 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32159 /* This function indicates if an operand of an instruction is an
32163 has_immediate (rtx insn)
32165 int num_imm_operand;
32166 int num_imm32_operand;
32167 int num_imm64_operand;
32170 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32171 &num_imm64_operand);
32175 /* Return single or double path for instructions. */
32177 static enum insn_path
32178 get_insn_path (rtx insn)
32180 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32182 if ((int)path == 0)
32183 return path_single;
32185 if ((int)path == 1)
32186 return path_double;
32191 /* Return insn dispatch group. */
32193 static enum dispatch_group
32194 get_insn_group (rtx insn)
32196 enum dispatch_group group = get_mem_group (insn);
32200 if (is_branch (insn))
32201 return disp_branch;
32206 if (has_immediate (insn))
32209 if (is_prefetch (insn))
32210 return disp_prefetch;
32212 return disp_no_group;
32215 /* Count number of GROUP restricted instructions in a dispatch
32216 window WINDOW_LIST. */
32219 count_num_restricted (rtx insn, dispatch_windows *window_list)
32221 enum dispatch_group group = get_insn_group (insn);
32223 int num_imm_operand;
32224 int num_imm32_operand;
32225 int num_imm64_operand;
32227 if (group == disp_no_group)
32230 if (group == disp_imm)
32232 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32233 &num_imm64_operand);
32234 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32235 || num_imm_operand + window_list->num_imm > MAX_IMM
32236 || (num_imm32_operand > 0
32237 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32238 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32239 || (num_imm64_operand > 0
32240 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32241 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32242 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32243 && num_imm64_operand > 0
32244 && ((window_list->num_imm_64 > 0
32245 && window_list->num_insn >= 2)
32246 || window_list->num_insn >= 3)))
32252 if ((group == disp_load_store
32253 && (window_list->num_loads >= MAX_LOAD
32254 || window_list->num_stores >= MAX_STORE))
32255 || ((group == disp_load
32256 || group == disp_prefetch)
32257 && window_list->num_loads >= MAX_LOAD)
32258 || (group == disp_store
32259 && window_list->num_stores >= MAX_STORE))
32265 /* This function returns true if insn satisfies dispatch rules on the
32266 last window scheduled. */
32269 fits_dispatch_window (rtx insn)
32271 dispatch_windows *window_list = dispatch_window_list;
32272 dispatch_windows *window_list_next = dispatch_window_list->next;
32273 unsigned int num_restrict;
32274 enum dispatch_group group = get_insn_group (insn);
32275 enum insn_path path = get_insn_path (insn);
32278 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32279 instructions should be given the lowest priority in the
32280 scheduling process in Haifa scheduler to make sure they will be
32281 scheduled in the same dispatch window as the refrence to them. */
32282 if (group == disp_jcc || group == disp_cmp)
32285 /* Check nonrestricted. */
32286 if (group == disp_no_group || group == disp_branch)
32289 /* Get last dispatch window. */
32290 if (window_list_next)
32291 window_list = window_list_next;
32293 if (window_list->window_num == 1)
32295 sum = window_list->prev->window_size + window_list->window_size;
32298 || (min_insn_size (insn) + sum) >= 48)
32299 /* Window 1 is full. Go for next window. */
32303 num_restrict = count_num_restricted (insn, window_list);
32305 if (num_restrict > num_allowable_groups[group])
32308 /* See if it fits in the first window. */
32309 if (window_list->window_num == 0)
32311 /* The first widow should have only single and double path
32313 if (path == path_double
32314 && (window_list->num_uops + 2) > MAX_INSN)
32316 else if (path != path_single)
32322 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32323 dispatch window WINDOW_LIST. */
32326 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32328 int byte_len = min_insn_size (insn);
32329 int num_insn = window_list->num_insn;
32331 sched_insn_info *window = window_list->window;
32332 enum dispatch_group group = get_insn_group (insn);
32333 enum insn_path path = get_insn_path (insn);
32334 int num_imm_operand;
32335 int num_imm32_operand;
32336 int num_imm64_operand;
32338 if (!window_list->violation && group != disp_cmp
32339 && !fits_dispatch_window (insn))
32340 window_list->violation = true;
32342 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32343 &num_imm64_operand);
32345 /* Initialize window with new instruction. */
32346 window[num_insn].insn = insn;
32347 window[num_insn].byte_len = byte_len;
32348 window[num_insn].group = group;
32349 window[num_insn].path = path;
32350 window[num_insn].imm_bytes = imm_size;
32352 window_list->window_size += byte_len;
32353 window_list->num_insn = num_insn + 1;
32354 window_list->num_uops = window_list->num_uops + num_uops;
32355 window_list->imm_size += imm_size;
32356 window_list->num_imm += num_imm_operand;
32357 window_list->num_imm_32 += num_imm32_operand;
32358 window_list->num_imm_64 += num_imm64_operand;
32360 if (group == disp_store)
32361 window_list->num_stores += 1;
32362 else if (group == disp_load
32363 || group == disp_prefetch)
32364 window_list->num_loads += 1;
32365 else if (group == disp_load_store)
32367 window_list->num_stores += 1;
32368 window_list->num_loads += 1;
32372 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32373 If the total bytes of instructions or the number of instructions in
32374 the window exceed allowable, it allocates a new window. */
32377 add_to_dispatch_window (rtx insn)
32380 dispatch_windows *window_list;
32381 dispatch_windows *next_list;
32382 dispatch_windows *window0_list;
32383 enum insn_path path;
32384 enum dispatch_group insn_group;
32392 if (INSN_CODE (insn) < 0)
32395 byte_len = min_insn_size (insn);
32396 window_list = dispatch_window_list;
32397 next_list = window_list->next;
32398 path = get_insn_path (insn);
32399 insn_group = get_insn_group (insn);
32401 /* Get the last dispatch window. */
32403 window_list = dispatch_window_list->next;
32405 if (path == path_single)
32407 else if (path == path_double)
32410 insn_num_uops = (int) path;
32412 /* If current window is full, get a new window.
32413 Window number zero is full, if MAX_INSN uops are scheduled in it.
32414 Window number one is full, if window zero's bytes plus window
32415 one's bytes is 32, or if the bytes of the new instruction added
32416 to the total makes it greater than 48, or it has already MAX_INSN
32417 instructions in it. */
32418 num_insn = window_list->num_insn;
32419 num_uops = window_list->num_uops;
32420 window_num = window_list->window_num;
32421 insn_fits = fits_dispatch_window (insn);
32423 if (num_insn >= MAX_INSN
32424 || num_uops + insn_num_uops > MAX_INSN
32427 window_num = ~window_num & 1;
32428 window_list = allocate_next_window (window_num);
32431 if (window_num == 0)
32433 add_insn_window (insn, window_list, insn_num_uops);
32434 if (window_list->num_insn >= MAX_INSN
32435 && insn_group == disp_branch)
32437 process_end_window ();
32441 else if (window_num == 1)
32443 window0_list = window_list->prev;
32444 sum = window0_list->window_size + window_list->window_size;
32446 || (byte_len + sum) >= 48)
32448 process_end_window ();
32449 window_list = dispatch_window_list;
32452 add_insn_window (insn, window_list, insn_num_uops);
32455 gcc_unreachable ();
32457 if (is_end_basic_block (insn_group))
32459 /* End of basic block is reached do end-basic-block process. */
32460 process_end_window ();
32465 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32467 DEBUG_FUNCTION static void
32468 debug_dispatch_window_file (FILE *file, int window_num)
32470 dispatch_windows *list;
32473 if (window_num == 0)
32474 list = dispatch_window_list;
32476 list = dispatch_window_list1;
32478 fprintf (file, "Window #%d:\n", list->window_num);
32479 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32480 list->num_insn, list->num_uops, list->window_size);
32481 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32482 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32484 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32486 fprintf (file, " insn info:\n");
32488 for (i = 0; i < MAX_INSN; i++)
32490 if (!list->window[i].insn)
32492 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
32493 i, group_name[list->window[i].group],
32494 i, (void *)list->window[i].insn,
32495 i, list->window[i].path,
32496 i, list->window[i].byte_len,
32497 i, list->window[i].imm_bytes);
32501 /* Print to stdout a dispatch window. */
32503 DEBUG_FUNCTION void
32504 debug_dispatch_window (int window_num)
32506 debug_dispatch_window_file (stdout, window_num);
32509 /* Print INSN dispatch information to FILE. */
32511 DEBUG_FUNCTION static void
32512 debug_insn_dispatch_info_file (FILE *file, rtx insn)
32515 enum insn_path path;
32516 enum dispatch_group group;
32518 int num_imm_operand;
32519 int num_imm32_operand;
32520 int num_imm64_operand;
32522 if (INSN_CODE (insn) < 0)
32525 byte_len = min_insn_size (insn);
32526 path = get_insn_path (insn);
32527 group = get_insn_group (insn);
32528 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32529 &num_imm64_operand);
32531 fprintf (file, " insn info:\n");
32532 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
32533 group_name[group], path, byte_len);
32534 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32535 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
32538 /* Print to STDERR the status of the ready list with respect to
32539 dispatch windows. */
32541 DEBUG_FUNCTION void
32542 debug_ready_dispatch (void)
32545 int no_ready = number_in_ready ();
32547 fprintf (stdout, "Number of ready: %d\n", no_ready);
32549 for (i = 0; i < no_ready; i++)
32550 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
32553 /* This routine is the driver of the dispatch scheduler. */
32556 do_dispatch (rtx insn, int mode)
32558 if (mode == DISPATCH_INIT)
32559 init_dispatch_sched ();
32560 else if (mode == ADD_TO_DISPATCH_WINDOW)
32561 add_to_dispatch_window (insn);
32564 /* Return TRUE if Dispatch Scheduling is supported. */
32567 has_dispatch (rtx insn, int action)
32569 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
32575 case IS_DISPATCH_ON:
32580 return is_cmp (insn);
32582 case DISPATCH_VIOLATION:
32583 return dispatch_violation ();
32585 case FITS_DISPATCH_WINDOW:
32586 return fits_dispatch_window (insn);
32592 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
32593 place emms and femms instructions. */
32595 static unsigned int
32596 ix86_units_per_simd_word (enum machine_mode mode)
32598 /* Disable double precision vectorizer if needed. */
32599 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
32600 return UNITS_PER_WORD;
32603 /* FIXME: AVX has 32byte floating point vector operations and 16byte
32604 integer vector operations. But vectorizer doesn't support
32605 different sizes for integer and floating point vectors. We limit
32606 vector size to 16byte. */
32608 return (mode == DFmode || mode == SFmode) ? 32 : 16;
32611 return TARGET_SSE ? 16 : UNITS_PER_WORD;
32614 /* Initialize the GCC target structure. */
32615 #undef TARGET_RETURN_IN_MEMORY
32616 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
32618 #undef TARGET_LEGITIMIZE_ADDRESS
32619 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
32621 #undef TARGET_ATTRIBUTE_TABLE
32622 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
32623 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32624 # undef TARGET_MERGE_DECL_ATTRIBUTES
32625 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
32628 #undef TARGET_COMP_TYPE_ATTRIBUTES
32629 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
32631 #undef TARGET_INIT_BUILTINS
32632 #define TARGET_INIT_BUILTINS ix86_init_builtins
32633 #undef TARGET_BUILTIN_DECL
32634 #define TARGET_BUILTIN_DECL ix86_builtin_decl
32635 #undef TARGET_EXPAND_BUILTIN
32636 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
32638 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
32639 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
32640 ix86_builtin_vectorized_function
32642 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
32643 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
32645 #undef TARGET_BUILTIN_RECIPROCAL
32646 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
32648 #undef TARGET_ASM_FUNCTION_EPILOGUE
32649 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
32651 #undef TARGET_ENCODE_SECTION_INFO
32652 #ifndef SUBTARGET_ENCODE_SECTION_INFO
32653 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
32655 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
32658 #undef TARGET_ASM_OPEN_PAREN
32659 #define TARGET_ASM_OPEN_PAREN ""
32660 #undef TARGET_ASM_CLOSE_PAREN
32661 #define TARGET_ASM_CLOSE_PAREN ""
32663 #undef TARGET_ASM_BYTE_OP
32664 #define TARGET_ASM_BYTE_OP ASM_BYTE
32666 #undef TARGET_ASM_ALIGNED_HI_OP
32667 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
32668 #undef TARGET_ASM_ALIGNED_SI_OP
32669 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
32671 #undef TARGET_ASM_ALIGNED_DI_OP
32672 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
32675 #undef TARGET_PROFILE_BEFORE_PROLOGUE
32676 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
32678 #undef TARGET_ASM_UNALIGNED_HI_OP
32679 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
32680 #undef TARGET_ASM_UNALIGNED_SI_OP
32681 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
32682 #undef TARGET_ASM_UNALIGNED_DI_OP
32683 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
32685 #undef TARGET_PRINT_OPERAND
32686 #define TARGET_PRINT_OPERAND ix86_print_operand
32687 #undef TARGET_PRINT_OPERAND_ADDRESS
32688 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
32689 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
32690 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
32691 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
32692 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
32694 #undef TARGET_SCHED_ADJUST_COST
32695 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
32696 #undef TARGET_SCHED_ISSUE_RATE
32697 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
32698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
32699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
32700 ia32_multipass_dfa_lookahead
32702 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
32703 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
32706 #undef TARGET_HAVE_TLS
32707 #define TARGET_HAVE_TLS true
32709 #undef TARGET_CANNOT_FORCE_CONST_MEM
32710 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
32711 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
32712 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
32714 #undef TARGET_DELEGITIMIZE_ADDRESS
32715 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
32717 #undef TARGET_MS_BITFIELD_LAYOUT_P
32718 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
32721 #undef TARGET_BINDS_LOCAL_P
32722 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
32724 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
32725 #undef TARGET_BINDS_LOCAL_P
32726 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
32729 #undef TARGET_ASM_OUTPUT_MI_THUNK
32730 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
32731 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
32732 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
32734 #undef TARGET_ASM_FILE_START
32735 #define TARGET_ASM_FILE_START x86_file_start
32737 #undef TARGET_DEFAULT_TARGET_FLAGS
32738 #define TARGET_DEFAULT_TARGET_FLAGS \
32740 | TARGET_SUBTARGET_DEFAULT \
32741 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
32744 #undef TARGET_HANDLE_OPTION
32745 #define TARGET_HANDLE_OPTION ix86_handle_option
32747 #undef TARGET_OPTION_OVERRIDE
32748 #define TARGET_OPTION_OVERRIDE ix86_option_override
32749 #undef TARGET_OPTION_OPTIMIZATION
32750 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
32752 #undef TARGET_REGISTER_MOVE_COST
32753 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
32754 #undef TARGET_MEMORY_MOVE_COST
32755 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
32756 #undef TARGET_RTX_COSTS
32757 #define TARGET_RTX_COSTS ix86_rtx_costs
32758 #undef TARGET_ADDRESS_COST
32759 #define TARGET_ADDRESS_COST ix86_address_cost
32761 #undef TARGET_FIXED_CONDITION_CODE_REGS
32762 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
32763 #undef TARGET_CC_MODES_COMPATIBLE
32764 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
32766 #undef TARGET_MACHINE_DEPENDENT_REORG
32767 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
32769 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
32770 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
32772 #undef TARGET_BUILD_BUILTIN_VA_LIST
32773 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
32775 #undef TARGET_ENUM_VA_LIST_P
32776 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
32778 #undef TARGET_FN_ABI_VA_LIST
32779 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
32781 #undef TARGET_CANONICAL_VA_LIST_TYPE
32782 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
32784 #undef TARGET_EXPAND_BUILTIN_VA_START
32785 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
32787 #undef TARGET_MD_ASM_CLOBBERS
32788 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
32790 #undef TARGET_PROMOTE_PROTOTYPES
32791 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
32792 #undef TARGET_STRUCT_VALUE_RTX
32793 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
32794 #undef TARGET_SETUP_INCOMING_VARARGS
32795 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
32796 #undef TARGET_MUST_PASS_IN_STACK
32797 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
32798 #undef TARGET_FUNCTION_ARG_ADVANCE
32799 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
32800 #undef TARGET_FUNCTION_ARG
32801 #define TARGET_FUNCTION_ARG ix86_function_arg
32802 #undef TARGET_PASS_BY_REFERENCE
32803 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
32804 #undef TARGET_INTERNAL_ARG_POINTER
32805 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
32806 #undef TARGET_UPDATE_STACK_BOUNDARY
32807 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
32808 #undef TARGET_GET_DRAP_RTX
32809 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
32810 #undef TARGET_STRICT_ARGUMENT_NAMING
32811 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
32812 #undef TARGET_STATIC_CHAIN
32813 #define TARGET_STATIC_CHAIN ix86_static_chain
32814 #undef TARGET_TRAMPOLINE_INIT
32815 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
32816 #undef TARGET_RETURN_POPS_ARGS
32817 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
32819 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
32820 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
32822 #undef TARGET_SCALAR_MODE_SUPPORTED_P
32823 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
32825 #undef TARGET_VECTOR_MODE_SUPPORTED_P
32826 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
32828 #undef TARGET_C_MODE_FOR_SUFFIX
32829 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
32832 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
32833 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
32836 #ifdef SUBTARGET_INSERT_ATTRIBUTES
32837 #undef TARGET_INSERT_ATTRIBUTES
32838 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
32841 #undef TARGET_MANGLE_TYPE
32842 #define TARGET_MANGLE_TYPE ix86_mangle_type
32844 #undef TARGET_STACK_PROTECT_FAIL
32845 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
32847 #undef TARGET_FUNCTION_VALUE
32848 #define TARGET_FUNCTION_VALUE ix86_function_value
32850 #undef TARGET_FUNCTION_VALUE_REGNO_P
32851 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
32853 #undef TARGET_SECONDARY_RELOAD
32854 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
32856 #undef TARGET_CLASS_LIKELY_SPILLED_P
32857 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
32859 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
32860 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
32861 ix86_builtin_vectorization_cost
32862 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
32863 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
32864 ix86_vectorize_builtin_vec_perm
32865 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
32866 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
32867 ix86_vectorize_builtin_vec_perm_ok
32868 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
32869 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD \
32870 ix86_units_per_simd_word
32872 #undef TARGET_SET_CURRENT_FUNCTION
32873 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
32875 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
32876 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
32878 #undef TARGET_OPTION_SAVE
32879 #define TARGET_OPTION_SAVE ix86_function_specific_save
32881 #undef TARGET_OPTION_RESTORE
32882 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
32884 #undef TARGET_OPTION_PRINT
32885 #define TARGET_OPTION_PRINT ix86_function_specific_print
32887 #undef TARGET_CAN_INLINE_P
32888 #define TARGET_CAN_INLINE_P ix86_can_inline_p
32890 #undef TARGET_EXPAND_TO_RTL_HOOK
32891 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
32893 #undef TARGET_LEGITIMATE_ADDRESS_P
32894 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
32896 #undef TARGET_IRA_COVER_CLASSES
32897 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
32899 #undef TARGET_FRAME_POINTER_REQUIRED
32900 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
32902 #undef TARGET_CAN_ELIMINATE
32903 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
32905 #undef TARGET_ASM_CODE_END
32906 #define TARGET_ASM_CODE_END ix86_code_end
32908 struct gcc_target targetm = TARGET_INITIALIZER;
32910 #include "gt-i386.h"