1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
51 #include "elf/dwarf2.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2079 /* Implement TARGET_HANDLE_OPTION. */
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2430 char target_other[40];
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2506 for (j = 0; j < 2; j++)
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2515 for (i = 0; i < num; i++)
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2535 for (j = 0; j < 2; j++)
2538 memcpy (ptr, opts[i][j], len2[j]);
2540 line_len += len2[j];
2545 gcc_assert (ret + len >= ptr);
2550 /* Function that is callable from the debugger to print the current
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2561 fprintf (stderr, "%s\n\n", opts);
2565 fprintf (stderr, "<no options>\n\n");
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2580 override_options (bool main_args_p)
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2597 PTA_PREFETCH_SSE = 1 << 4,
2599 PTA_3DNOW_A = 1 << 6,
2603 PTA_POPCNT = 1 << 10,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2611 PTA_PCLMUL = 1 << 18,
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2721 prefix = "option(\"";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2771 ix86_tune_string = "generic64";
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2802 ix86_tune_string = "generic64";
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3013 error ("CPU you selected does not support x86-64 "
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3035 ix86_cost = &ix86_size_cost;
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3385 targetm.expand_builtin_va_start = NULL;
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3416 /* Save the initial options in case the user does function specific options */
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Save the current options */
3425 ix86_function_specific_save (struct cl_target_option *ptr)
3427 ptr->arch = ix86_arch;
3428 ptr->schedule = ix86_schedule;
3429 ptr->tune = ix86_tune;
3430 ptr->fpmath = ix86_fpmath;
3431 ptr->branch_cost = ix86_branch_cost;
3432 ptr->tune_defaulted = ix86_tune_defaulted;
3433 ptr->arch_specified = ix86_arch_specified;
3434 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3435 ptr->target_flags_explicit = target_flags_explicit;
3437 /* The fields are char but the variables are not; make sure the
3438 values fit in the fields. */
3439 gcc_assert (ptr->arch == ix86_arch);
3440 gcc_assert (ptr->schedule == ix86_schedule);
3441 gcc_assert (ptr->tune == ix86_tune);
3442 gcc_assert (ptr->fpmath == ix86_fpmath);
3443 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3446 /* Restore the current options */
3449 ix86_function_specific_restore (struct cl_target_option *ptr)
3451 enum processor_type old_tune = ix86_tune;
3452 enum processor_type old_arch = ix86_arch;
3453 unsigned int ix86_arch_mask, ix86_tune_mask;
3456 ix86_arch = (enum processor_type) ptr->arch;
3457 ix86_schedule = (enum attr_cpu) ptr->schedule;
3458 ix86_tune = (enum processor_type) ptr->tune;
3459 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3460 ix86_branch_cost = ptr->branch_cost;
3461 ix86_tune_defaulted = ptr->tune_defaulted;
3462 ix86_arch_specified = ptr->arch_specified;
3463 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3464 target_flags_explicit = ptr->target_flags_explicit;
3466 /* Recreate the arch feature tests if the arch changed */
3467 if (old_arch != ix86_arch)
3469 ix86_arch_mask = 1u << ix86_arch;
3470 for (i = 0; i < X86_ARCH_LAST; ++i)
3471 ix86_arch_features[i]
3472 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3475 /* Recreate the tune optimization tests */
3476 if (old_tune != ix86_tune)
3478 ix86_tune_mask = 1u << ix86_tune;
3479 for (i = 0; i < X86_TUNE_LAST; ++i)
3480 ix86_tune_features[i]
3481 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3485 /* Print the current options */
3488 ix86_function_specific_print (FILE *file, int indent,
3489 struct cl_target_option *ptr)
3492 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3493 NULL, NULL, NULL, false);
3495 fprintf (file, "%*sarch = %d (%s)\n",
3498 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3499 ? cpu_names[ptr->arch]
3502 fprintf (file, "%*stune = %d (%s)\n",
3505 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3506 ? cpu_names[ptr->tune]
3509 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3510 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3511 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3512 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3516 fprintf (file, "%*s%s\n", indent, "", target_string);
3517 free (target_string);
3522 /* Inner function to process the attribute((target(...))), take an argument and
3523 set the current options from the argument. If we have a list, recursively go
3527 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3534 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3535 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3550 enum ix86_opt_type type;
3555 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3556 IX86_ATTR_ISA ("abm", OPT_mabm),
3557 IX86_ATTR_ISA ("aes", OPT_maes),
3558 IX86_ATTR_ISA ("avx", OPT_mavx),
3559 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3560 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3561 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3562 IX86_ATTR_ISA ("sse", OPT_msse),
3563 IX86_ATTR_ISA ("sse2", OPT_msse2),
3564 IX86_ATTR_ISA ("sse3", OPT_msse3),
3565 IX86_ATTR_ISA ("sse4", OPT_msse4),
3566 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3567 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3568 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3569 IX86_ATTR_ISA ("sse5", OPT_msse5),
3570 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3572 /* string options */
3573 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3574 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3575 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3578 IX86_ATTR_YES ("cld",
3582 IX86_ATTR_NO ("fancy-math-387",
3583 OPT_mfancy_math_387,
3584 MASK_NO_FANCY_MATH_387),
3586 IX86_ATTR_NO ("fused-madd",
3588 MASK_NO_FUSED_MADD),
3590 IX86_ATTR_YES ("ieee-fp",
3594 IX86_ATTR_YES ("inline-all-stringops",
3595 OPT_minline_all_stringops,
3596 MASK_INLINE_ALL_STRINGOPS),
3598 IX86_ATTR_YES ("inline-stringops-dynamically",
3599 OPT_minline_stringops_dynamically,
3600 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3602 IX86_ATTR_NO ("align-stringops",
3603 OPT_mno_align_stringops,
3604 MASK_NO_ALIGN_STRINGOPS),
3606 IX86_ATTR_YES ("recip",
3612 /* If this is a list, recurse to get the options. */
3613 if (TREE_CODE (args) == TREE_LIST)
3617 for (; args; args = TREE_CHAIN (args))
3618 if (TREE_VALUE (args)
3619 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3625 else if (TREE_CODE (args) != STRING_CST)
3628 /* Handle multiple arguments separated by commas. */
3629 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3631 while (next_optstr && *next_optstr != '\0')
3633 char *p = next_optstr;
3635 char *comma = strchr (next_optstr, ',');
3636 const char *opt_string;
3637 size_t len, opt_len;
3642 enum ix86_opt_type type = ix86_opt_unknown;
3648 len = comma - next_optstr;
3649 next_optstr = comma + 1;
3657 /* Recognize no-xxx. */
3658 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3667 /* Find the option. */
3670 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3672 type = attrs[i].type;
3673 opt_len = attrs[i].len;
3674 if (ch == attrs[i].string[0]
3675 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3676 && memcmp (p, attrs[i].string, opt_len) == 0)
3679 mask = attrs[i].mask;
3680 opt_string = attrs[i].string;
3685 /* Process the option. */
3688 error ("attribute(target(\"%s\")) is unknown", orig_p);
3692 else if (type == ix86_opt_isa)
3693 ix86_handle_option (opt, p, opt_set_p);
3695 else if (type == ix86_opt_yes || type == ix86_opt_no)
3697 if (type == ix86_opt_no)
3698 opt_set_p = !opt_set_p;
3701 target_flags |= mask;
3703 target_flags &= ~mask;
3706 else if (type == ix86_opt_str)
3710 error ("option(\"%s\") was already specified", opt_string);
3714 p_strings[opt] = xstrdup (p + opt_len);
3724 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3727 ix86_valid_target_attribute_tree (tree args)
3729 const char *orig_arch_string = ix86_arch_string;
3730 const char *orig_tune_string = ix86_tune_string;
3731 const char *orig_fpmath_string = ix86_fpmath_string;
3732 int orig_tune_defaulted = ix86_tune_defaulted;
3733 int orig_arch_specified = ix86_arch_specified;
3734 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3737 struct cl_target_option *def
3738 = TREE_TARGET_OPTION (target_option_default_node);
3740 /* Process each of the options on the chain. */
3741 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3744 /* If the changed options are different from the default, rerun override_options,
3745 and then save the options away. The string options are are attribute options,
3746 and will be undone when we copy the save structure. */
3747 if (ix86_isa_flags != def->ix86_isa_flags
3748 || target_flags != def->target_flags
3749 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3750 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3751 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3753 /* If we are using the default tune= or arch=, undo the string assigned,
3754 and use the default. */
3755 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3756 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3757 else if (!orig_arch_specified)
3758 ix86_arch_string = NULL;
3760 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3761 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3762 else if (orig_tune_defaulted)
3763 ix86_tune_string = NULL;
3765 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3766 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3767 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3768 else if (!TARGET_64BIT && TARGET_SSE)
3769 ix86_fpmath_string = "sse,387";
3771 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3772 override_options (false);
3774 /* Add any builtin functions with the new isa if any. */
3775 ix86_add_new_builtins (ix86_isa_flags);
3777 /* Save the current options unless we are validating options for
3779 t = build_target_option_node ();
3781 ix86_arch_string = orig_arch_string;
3782 ix86_tune_string = orig_tune_string;
3783 ix86_fpmath_string = orig_fpmath_string;
3785 /* Free up memory allocated to hold the strings */
3786 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3787 if (option_strings[i])
3788 free (option_strings[i]);
3794 /* Hook to validate attribute((target("string"))). */
3797 ix86_valid_target_attribute_p (tree fndecl,
3798 tree ARG_UNUSED (name),
3800 int ARG_UNUSED (flags))
3802 struct cl_target_option cur_target;
3804 tree old_optimize = build_optimization_node ();
3805 tree new_target, new_optimize;
3806 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3808 /* If the function changed the optimization levels as well as setting target
3809 options, start with the optimizations specified. */
3810 if (func_optimize && func_optimize != old_optimize)
3811 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3813 /* The target attributes may also change some optimization flags, so update
3814 the optimization options if necessary. */
3815 cl_target_option_save (&cur_target);
3816 new_target = ix86_valid_target_attribute_tree (args);
3817 new_optimize = build_optimization_node ();
3824 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3826 if (old_optimize != new_optimize)
3827 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3830 cl_target_option_restore (&cur_target);
3832 if (old_optimize != new_optimize)
3833 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3839 /* Hook to determine if one function can safely inline another. */
3842 ix86_can_inline_p (tree caller, tree callee)
3845 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3846 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3848 /* If callee has no option attributes, then it is ok to inline. */
3852 /* If caller has no option attributes, but callee does then it is not ok to
3854 else if (!caller_tree)
3859 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3860 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3862 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3863 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3865 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3866 != callee_opts->ix86_isa_flags)
3869 /* See if we have the same non-isa options. */
3870 else if (caller_opts->target_flags != callee_opts->target_flags)
3873 /* See if arch, tune, etc. are the same. */
3874 else if (caller_opts->arch != callee_opts->arch)
3877 else if (caller_opts->tune != callee_opts->tune)
3880 else if (caller_opts->fpmath != callee_opts->fpmath)
3883 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3894 /* Remember the last target of ix86_set_current_function. */
3895 static GTY(()) tree ix86_previous_fndecl;
3897 /* Establish appropriate back-end context for processing the function
3898 FNDECL. The argument might be NULL to indicate processing at top
3899 level, outside of any function scope. */
3901 ix86_set_current_function (tree fndecl)
3903 /* Only change the context if the function changes. This hook is called
3904 several times in the course of compiling a function, and we don't want to
3905 slow things down too much or call target_reinit when it isn't safe. */
3906 if (fndecl && fndecl != ix86_previous_fndecl)
3908 tree old_tree = (ix86_previous_fndecl
3909 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3912 tree new_tree = (fndecl
3913 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3916 ix86_previous_fndecl = fndecl;
3917 if (old_tree == new_tree)
3922 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3928 struct cl_target_option *def
3929 = TREE_TARGET_OPTION (target_option_current_node);
3931 cl_target_option_restore (def);
3938 /* Return true if this goes in large data/bss. */
3941 ix86_in_large_data_p (tree exp)
3943 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3946 /* Functions are never large data. */
3947 if (TREE_CODE (exp) == FUNCTION_DECL)
3950 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3952 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3953 if (strcmp (section, ".ldata") == 0
3954 || strcmp (section, ".lbss") == 0)
3960 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3962 /* If this is an incomplete type with size 0, then we can't put it
3963 in data because it might be too big when completed. */
3964 if (!size || size > ix86_section_threshold)
3971 /* Switch to the appropriate section for output of DECL.
3972 DECL is either a `VAR_DECL' node or a constant of some sort.
3973 RELOC indicates whether forming the initial value of DECL requires
3974 link-time relocations. */
3976 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3980 x86_64_elf_select_section (tree decl, int reloc,
3981 unsigned HOST_WIDE_INT align)
3983 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3984 && ix86_in_large_data_p (decl))
3986 const char *sname = NULL;
3987 unsigned int flags = SECTION_WRITE;
3988 switch (categorize_decl_for_section (decl, reloc))
3993 case SECCAT_DATA_REL:
3994 sname = ".ldata.rel";
3996 case SECCAT_DATA_REL_LOCAL:
3997 sname = ".ldata.rel.local";
3999 case SECCAT_DATA_REL_RO:
4000 sname = ".ldata.rel.ro";
4002 case SECCAT_DATA_REL_RO_LOCAL:
4003 sname = ".ldata.rel.ro.local";
4007 flags |= SECTION_BSS;
4010 case SECCAT_RODATA_MERGE_STR:
4011 case SECCAT_RODATA_MERGE_STR_INIT:
4012 case SECCAT_RODATA_MERGE_CONST:
4016 case SECCAT_SRODATA:
4023 /* We don't split these for medium model. Place them into
4024 default sections and hope for best. */
4026 case SECCAT_EMUTLS_VAR:
4027 case SECCAT_EMUTLS_TMPL:
4032 /* We might get called with string constants, but get_named_section
4033 doesn't like them as they are not DECLs. Also, we need to set
4034 flags in that case. */
4036 return get_section (sname, flags, NULL);
4037 return get_named_section (decl, sname, reloc);
4040 return default_elf_select_section (decl, reloc, align);
4043 /* Build up a unique section name, expressed as a
4044 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4045 RELOC indicates whether the initial value of EXP requires
4046 link-time relocations. */
4048 static void ATTRIBUTE_UNUSED
4049 x86_64_elf_unique_section (tree decl, int reloc)
4051 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4052 && ix86_in_large_data_p (decl))
4054 const char *prefix = NULL;
4055 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4056 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4058 switch (categorize_decl_for_section (decl, reloc))
4061 case SECCAT_DATA_REL:
4062 case SECCAT_DATA_REL_LOCAL:
4063 case SECCAT_DATA_REL_RO:
4064 case SECCAT_DATA_REL_RO_LOCAL:
4065 prefix = one_only ? ".ld" : ".ldata";
4068 prefix = one_only ? ".lb" : ".lbss";
4071 case SECCAT_RODATA_MERGE_STR:
4072 case SECCAT_RODATA_MERGE_STR_INIT:
4073 case SECCAT_RODATA_MERGE_CONST:
4074 prefix = one_only ? ".lr" : ".lrodata";
4076 case SECCAT_SRODATA:
4083 /* We don't split these for medium model. Place them into
4084 default sections and hope for best. */
4086 case SECCAT_EMUTLS_VAR:
4087 prefix = targetm.emutls.var_section;
4089 case SECCAT_EMUTLS_TMPL:
4090 prefix = targetm.emutls.tmpl_section;
4095 const char *name, *linkonce;
4098 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4099 name = targetm.strip_name_encoding (name);
4101 /* If we're using one_only, then there needs to be a .gnu.linkonce
4102 prefix to the section name. */
4103 linkonce = one_only ? ".gnu.linkonce" : "";
4105 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4107 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4111 default_unique_section (decl, reloc);
4114 #ifdef COMMON_ASM_OP
4115 /* This says how to output assembler code to declare an
4116 uninitialized external linkage data object.
4118 For medium model x86-64 we need to use .largecomm opcode for
4121 x86_elf_aligned_common (FILE *file,
4122 const char *name, unsigned HOST_WIDE_INT size,
4125 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4126 && size > (unsigned int)ix86_section_threshold)
4127 fprintf (file, ".largecomm\t");
4129 fprintf (file, "%s", COMMON_ASM_OP);
4130 assemble_name (file, name);
4131 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4132 size, align / BITS_PER_UNIT);
4136 /* Utility function for targets to use in implementing
4137 ASM_OUTPUT_ALIGNED_BSS. */
4140 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4141 const char *name, unsigned HOST_WIDE_INT size,
4144 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4145 && size > (unsigned int)ix86_section_threshold)
4146 switch_to_section (get_named_section (decl, ".lbss", 0));
4148 switch_to_section (bss_section);
4149 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4150 #ifdef ASM_DECLARE_OBJECT_NAME
4151 last_assemble_variable_decl = decl;
4152 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4154 /* Standard thing is just output label for the object. */
4155 ASM_OUTPUT_LABEL (file, name);
4156 #endif /* ASM_DECLARE_OBJECT_NAME */
4157 ASM_OUTPUT_SKIP (file, size ? size : 1);
4161 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4163 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4164 make the problem with not enough registers even worse. */
4165 #ifdef INSN_SCHEDULING
4167 flag_schedule_insns = 0;
4171 /* The Darwin libraries never set errno, so we might as well
4172 avoid calling them when that's the only reason we would. */
4173 flag_errno_math = 0;
4175 /* The default values of these switches depend on the TARGET_64BIT
4176 that is not known at this moment. Mark these values with 2 and
4177 let user the to override these. In case there is no command line option
4178 specifying them, we will set the defaults in override_options. */
4180 flag_omit_frame_pointer = 2;
4181 flag_pcc_struct_return = 2;
4182 flag_asynchronous_unwind_tables = 2;
4183 flag_vect_cost_model = 1;
4184 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4185 SUBTARGET_OPTIMIZATION_OPTIONS;
4189 /* Decide whether we can make a sibling call to a function. DECL is the
4190 declaration of the function being targeted by the call and EXP is the
4191 CALL_EXPR representing the call. */
4194 ix86_function_ok_for_sibcall (tree decl, tree exp)
4199 /* If we are generating position-independent code, we cannot sibcall
4200 optimize any indirect call, or a direct call to a global function,
4201 as the PLT requires %ebx be live. */
4202 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4209 func = TREE_TYPE (CALL_EXPR_FN (exp));
4210 if (POINTER_TYPE_P (func))
4211 func = TREE_TYPE (func);
4214 /* Check that the return value locations are the same. Like
4215 if we are returning floats on the 80387 register stack, we cannot
4216 make a sibcall from a function that doesn't return a float to a
4217 function that does or, conversely, from a function that does return
4218 a float to a function that doesn't; the necessary stack adjustment
4219 would not be executed. This is also the place we notice
4220 differences in the return value ABI. Note that it is ok for one
4221 of the functions to have void return type as long as the return
4222 value of the other is passed in a register. */
4223 a = ix86_function_value (TREE_TYPE (exp), func, false);
4224 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4226 if (STACK_REG_P (a) || STACK_REG_P (b))
4228 if (!rtx_equal_p (a, b))
4231 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4233 else if (!rtx_equal_p (a, b))
4236 /* If this call is indirect, we'll need to be able to use a call-clobbered
4237 register for the address of the target function. Make sure that all
4238 such registers are not used for passing parameters. */
4239 if (!decl && !TARGET_64BIT)
4243 /* We're looking at the CALL_EXPR, we need the type of the function. */
4244 type = CALL_EXPR_FN (exp); /* pointer expression */
4245 type = TREE_TYPE (type); /* pointer type */
4246 type = TREE_TYPE (type); /* function type */
4248 if (ix86_function_regparm (type, NULL) >= 3)
4250 /* ??? Need to count the actual number of registers to be used,
4251 not the possible number of registers. Fix later. */
4256 /* Dllimport'd functions are also called indirectly. */
4257 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4259 && decl && DECL_DLLIMPORT_P (decl)
4260 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4263 /* If we need to align the outgoing stack, then sibcalling would
4264 unalign the stack, which may break the called function. */
4265 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4268 /* Otherwise okay. That also includes certain types of indirect calls. */
4272 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4273 calling convention attributes;
4274 arguments as in struct attribute_spec.handler. */
4277 ix86_handle_cconv_attribute (tree *node, tree name,
4279 int flags ATTRIBUTE_UNUSED,
4282 if (TREE_CODE (*node) != FUNCTION_TYPE
4283 && TREE_CODE (*node) != METHOD_TYPE
4284 && TREE_CODE (*node) != FIELD_DECL
4285 && TREE_CODE (*node) != TYPE_DECL)
4287 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4289 *no_add_attrs = true;
4293 /* Can combine regparm with all attributes but fastcall. */
4294 if (is_attribute_p ("regparm", name))
4298 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4300 error ("fastcall and regparm attributes are not compatible");
4303 cst = TREE_VALUE (args);
4304 if (TREE_CODE (cst) != INTEGER_CST)
4306 warning (OPT_Wattributes,
4307 "%qE attribute requires an integer constant argument",
4309 *no_add_attrs = true;
4311 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4313 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4315 *no_add_attrs = true;
4323 /* Do not warn when emulating the MS ABI. */
4324 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4325 warning (OPT_Wattributes, "%qE attribute ignored",
4327 *no_add_attrs = true;
4331 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4332 if (is_attribute_p ("fastcall", name))
4334 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4336 error ("fastcall and cdecl attributes are not compatible");
4338 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4340 error ("fastcall and stdcall attributes are not compatible");
4342 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4344 error ("fastcall and regparm attributes are not compatible");
4348 /* Can combine stdcall with fastcall (redundant), regparm and
4350 else if (is_attribute_p ("stdcall", name))
4352 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4354 error ("stdcall and cdecl attributes are not compatible");
4356 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4358 error ("stdcall and fastcall attributes are not compatible");
4362 /* Can combine cdecl with regparm and sseregparm. */
4363 else if (is_attribute_p ("cdecl", name))
4365 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4367 error ("stdcall and cdecl attributes are not compatible");
4369 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4371 error ("fastcall and cdecl attributes are not compatible");
4375 /* Can combine sseregparm with all attributes. */
4380 /* Return 0 if the attributes for two types are incompatible, 1 if they
4381 are compatible, and 2 if they are nearly compatible (which causes a
4382 warning to be generated). */
4385 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4387 /* Check for mismatch of non-default calling convention. */
4388 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4390 if (TREE_CODE (type1) != FUNCTION_TYPE
4391 && TREE_CODE (type1) != METHOD_TYPE)
4394 /* Check for mismatched fastcall/regparm types. */
4395 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4396 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4397 || (ix86_function_regparm (type1, NULL)
4398 != ix86_function_regparm (type2, NULL)))
4401 /* Check for mismatched sseregparm types. */
4402 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4403 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4406 /* Check for mismatched return types (cdecl vs stdcall). */
4407 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4408 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4414 /* Return the regparm value for a function with the indicated TYPE and DECL.
4415 DECL may be NULL when calling function indirectly
4416 or considering a libcall. */
4419 ix86_function_regparm (const_tree type, const_tree decl)
4424 static bool error_issued;
4427 return (ix86_function_type_abi (type) == SYSV_ABI
4428 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4430 regparm = ix86_regparm;
4431 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4435 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4437 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4439 /* We can't use regparm(3) for nested functions because
4440 these pass static chain pointer in %ecx register. */
4441 if (!error_issued && regparm == 3
4442 && decl_function_context (decl)
4443 && !DECL_NO_STATIC_CHAIN (decl))
4445 error ("nested functions are limited to 2 register parameters");
4446 error_issued = true;
4454 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4457 /* Use register calling convention for local functions when possible. */
4459 && TREE_CODE (decl) == FUNCTION_DECL
4463 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4464 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4467 int local_regparm, globals = 0, regno;
4470 /* Make sure no regparm register is taken by a
4471 fixed register variable. */
4472 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4473 if (fixed_regs[local_regparm])
4476 /* We can't use regparm(3) for nested functions as these use
4477 static chain pointer in third argument. */
4478 if (local_regparm == 3
4479 && decl_function_context (decl)
4480 && !DECL_NO_STATIC_CHAIN (decl))
4483 /* If the function realigns its stackpointer, the prologue will
4484 clobber %ecx. If we've already generated code for the callee,
4485 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4486 scanning the attributes for the self-realigning property. */
4487 f = DECL_STRUCT_FUNCTION (decl);
4488 /* Since current internal arg pointer won't conflict with
4489 parameter passing regs, so no need to change stack
4490 realignment and adjust regparm number.
4492 Each fixed register usage increases register pressure,
4493 so less registers should be used for argument passing.
4494 This functionality can be overriden by an explicit
4496 for (regno = 0; regno <= DI_REG; regno++)
4497 if (fixed_regs[regno])
4501 = globals < local_regparm ? local_regparm - globals : 0;
4503 if (local_regparm > regparm)
4504 regparm = local_regparm;
4511 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4512 DFmode (2) arguments in SSE registers for a function with the
4513 indicated TYPE and DECL. DECL may be NULL when calling function
4514 indirectly or considering a libcall. Otherwise return 0. */
4517 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4519 gcc_assert (!TARGET_64BIT);
4521 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4522 by the sseregparm attribute. */
4523 if (TARGET_SSEREGPARM
4524 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4531 error ("Calling %qD with attribute sseregparm without "
4532 "SSE/SSE2 enabled", decl);
4534 error ("Calling %qT with attribute sseregparm without "
4535 "SSE/SSE2 enabled", type);
4543 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4544 (and DFmode for SSE2) arguments in SSE registers. */
4545 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4547 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4548 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4550 return TARGET_SSE2 ? 2 : 1;
4556 /* Return true if EAX is live at the start of the function. Used by
4557 ix86_expand_prologue to determine if we need special help before
4558 calling allocate_stack_worker. */
4561 ix86_eax_live_at_start_p (void)
4563 /* Cheat. Don't bother working forward from ix86_function_regparm
4564 to the function type to whether an actual argument is located in
4565 eax. Instead just look at cfg info, which is still close enough
4566 to correct at this point. This gives false positives for broken
4567 functions that might use uninitialized data that happens to be
4568 allocated in eax, but who cares? */
4569 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4572 /* Value is the number of bytes of arguments automatically
4573 popped when returning from a subroutine call.
4574 FUNDECL is the declaration node of the function (as a tree),
4575 FUNTYPE is the data type of the function (as a tree),
4576 or for a library call it is an identifier node for the subroutine name.
4577 SIZE is the number of bytes of arguments passed on the stack.
4579 On the 80386, the RTD insn may be used to pop them if the number
4580 of args is fixed, but if the number is variable then the caller
4581 must pop them all. RTD can't be used for library calls now
4582 because the library is compiled with the Unix compiler.
4583 Use of RTD is a selectable option, since it is incompatible with
4584 standard Unix calling sequences. If the option is not selected,
4585 the caller must always pop the args.
4587 The attribute stdcall is equivalent to RTD on a per module basis. */
4590 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4594 /* None of the 64-bit ABIs pop arguments. */
4598 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4600 /* Cdecl functions override -mrtd, and never pop the stack. */
4601 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4603 /* Stdcall and fastcall functions will pop the stack if not
4605 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4606 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4609 if (rtd && ! stdarg_p (funtype))
4613 /* Lose any fake structure return argument if it is passed on the stack. */
4614 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4615 && !KEEP_AGGREGATE_RETURN_POINTER)
4617 int nregs = ix86_function_regparm (funtype, fundecl);
4619 return GET_MODE_SIZE (Pmode);
4625 /* Argument support functions. */
4627 /* Return true when register may be used to pass function parameters. */
4629 ix86_function_arg_regno_p (int regno)
4632 const int *parm_regs;
4637 return (regno < REGPARM_MAX
4638 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4640 return (regno < REGPARM_MAX
4641 || (TARGET_MMX && MMX_REGNO_P (regno)
4642 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4643 || (TARGET_SSE && SSE_REGNO_P (regno)
4644 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4649 if (SSE_REGNO_P (regno) && TARGET_SSE)
4654 if (TARGET_SSE && SSE_REGNO_P (regno)
4655 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4659 /* TODO: The function should depend on current function ABI but
4660 builtins.c would need updating then. Therefore we use the
4663 /* RAX is used as hidden argument to va_arg functions. */
4664 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4667 if (ix86_abi == MS_ABI)
4668 parm_regs = x86_64_ms_abi_int_parameter_registers;
4670 parm_regs = x86_64_int_parameter_registers;
4671 for (i = 0; i < (ix86_abi == MS_ABI
4672 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4673 if (regno == parm_regs[i])
4678 /* Return if we do not know how to pass TYPE solely in registers. */
4681 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4683 if (must_pass_in_stack_var_size_or_pad (mode, type))
4686 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4687 The layout_type routine is crafty and tries to trick us into passing
4688 currently unsupported vector types on the stack by using TImode. */
4689 return (!TARGET_64BIT && mode == TImode
4690 && type && TREE_CODE (type) != VECTOR_TYPE);
4693 /* It returns the size, in bytes, of the area reserved for arguments passed
4694 in registers for the function represented by fndecl dependent to the used
4697 ix86_reg_parm_stack_space (const_tree fndecl)
4699 enum calling_abi call_abi = SYSV_ABI;
4700 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4701 call_abi = ix86_function_abi (fndecl);
4703 call_abi = ix86_function_type_abi (fndecl);
4704 if (call_abi == MS_ABI)
4709 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4712 ix86_function_type_abi (const_tree fntype)
4714 if (TARGET_64BIT && fntype != NULL)
4716 enum calling_abi abi = ix86_abi;
4717 if (abi == SYSV_ABI)
4719 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4722 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4729 static enum calling_abi
4730 ix86_function_abi (const_tree fndecl)
4734 return ix86_function_type_abi (TREE_TYPE (fndecl));
4737 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4740 ix86_cfun_abi (void)
4742 if (! cfun || ! TARGET_64BIT)
4744 return cfun->machine->call_abi;
4748 extern void init_regs (void);
4750 /* Implementation of call abi switching target hook. Specific to FNDECL
4751 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4752 for more details. */
4754 ix86_call_abi_override (const_tree fndecl)
4756 if (fndecl == NULL_TREE)
4757 cfun->machine->call_abi = ix86_abi;
4759 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4762 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4763 re-initialization of init_regs each time we switch function context since
4764 this is needed only during RTL expansion. */
4766 ix86_maybe_switch_abi (void)
4769 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4773 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4774 for a call to a function whose data type is FNTYPE.
4775 For a library call, FNTYPE is 0. */
4778 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4779 tree fntype, /* tree ptr for function decl */
4780 rtx libname, /* SYMBOL_REF of library name or 0 */
4783 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4784 memset (cum, 0, sizeof (*cum));
4787 cum->call_abi = ix86_function_abi (fndecl);
4789 cum->call_abi = ix86_function_type_abi (fntype);
4790 /* Set up the number of registers to use for passing arguments. */
4792 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4793 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4794 cum->nregs = ix86_regparm;
4797 if (cum->call_abi != ix86_abi)
4798 cum->nregs = (ix86_abi != SYSV_ABI
4799 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4803 cum->sse_nregs = SSE_REGPARM_MAX;
4806 if (cum->call_abi != ix86_abi)
4807 cum->sse_nregs = (ix86_abi != SYSV_ABI
4808 ? X86_64_SSE_REGPARM_MAX
4809 : X86_64_MS_SSE_REGPARM_MAX);
4813 cum->mmx_nregs = MMX_REGPARM_MAX;
4814 cum->warn_avx = true;
4815 cum->warn_sse = true;
4816 cum->warn_mmx = true;
4818 /* Because type might mismatch in between caller and callee, we need to
4819 use actual type of function for local calls.
4820 FIXME: cgraph_analyze can be told to actually record if function uses
4821 va_start so for local functions maybe_vaarg can be made aggressive
4823 FIXME: once typesytem is fixed, we won't need this code anymore. */
4825 fntype = TREE_TYPE (fndecl);
4826 cum->maybe_vaarg = (fntype
4827 ? (!prototype_p (fntype) || stdarg_p (fntype))
4832 /* If there are variable arguments, then we won't pass anything
4833 in registers in 32-bit mode. */
4834 if (stdarg_p (fntype))
4845 /* Use ecx and edx registers if function has fastcall attribute,
4846 else look for regparm information. */
4849 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4855 cum->nregs = ix86_function_regparm (fntype, fndecl);
4858 /* Set up the number of SSE registers used for passing SFmode
4859 and DFmode arguments. Warn for mismatching ABI. */
4860 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4864 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4865 But in the case of vector types, it is some vector mode.
4867 When we have only some of our vector isa extensions enabled, then there
4868 are some modes for which vector_mode_supported_p is false. For these
4869 modes, the generic vector support in gcc will choose some non-vector mode
4870 in order to implement the type. By computing the natural mode, we'll
4871 select the proper ABI location for the operand and not depend on whatever
4872 the middle-end decides to do with these vector types.
4874 The midde-end can't deal with the vector types > 16 bytes. In this
4875 case, we return the original mode and warn ABI change if CUM isn't
4878 static enum machine_mode
4879 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4881 enum machine_mode mode = TYPE_MODE (type);
4883 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4885 HOST_WIDE_INT size = int_size_in_bytes (type);
4886 if ((size == 8 || size == 16 || size == 32)
4887 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4888 && TYPE_VECTOR_SUBPARTS (type) > 1)
4890 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4892 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4893 mode = MIN_MODE_VECTOR_FLOAT;
4895 mode = MIN_MODE_VECTOR_INT;
4897 /* Get the mode which has this inner mode and number of units. */
4898 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4899 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4900 && GET_MODE_INNER (mode) == innermode)
4902 if (size == 32 && !TARGET_AVX)
4904 static bool warnedavx;
4911 warning (0, "AVX vector argument without AVX "
4912 "enabled changes the ABI");
4914 return TYPE_MODE (type);
4927 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4928 this may not agree with the mode that the type system has chosen for the
4929 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4930 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4933 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4938 if (orig_mode != BLKmode)
4939 tmp = gen_rtx_REG (orig_mode, regno);
4942 tmp = gen_rtx_REG (mode, regno);
4943 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4944 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4950 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4951 of this code is to classify each 8bytes of incoming argument by the register
4952 class and assign registers accordingly. */
4954 /* Return the union class of CLASS1 and CLASS2.
4955 See the x86-64 PS ABI for details. */
4957 static enum x86_64_reg_class
4958 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4960 /* Rule #1: If both classes are equal, this is the resulting class. */
4961 if (class1 == class2)
4964 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4966 if (class1 == X86_64_NO_CLASS)
4968 if (class2 == X86_64_NO_CLASS)
4971 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4972 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4973 return X86_64_MEMORY_CLASS;
4975 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4976 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4977 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4978 return X86_64_INTEGERSI_CLASS;
4979 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4980 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4981 return X86_64_INTEGER_CLASS;
4983 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4985 if (class1 == X86_64_X87_CLASS
4986 || class1 == X86_64_X87UP_CLASS
4987 || class1 == X86_64_COMPLEX_X87_CLASS
4988 || class2 == X86_64_X87_CLASS
4989 || class2 == X86_64_X87UP_CLASS
4990 || class2 == X86_64_COMPLEX_X87_CLASS)
4991 return X86_64_MEMORY_CLASS;
4993 /* Rule #6: Otherwise class SSE is used. */
4994 return X86_64_SSE_CLASS;
4997 /* Classify the argument of type TYPE and mode MODE.
4998 CLASSES will be filled by the register class used to pass each word
4999 of the operand. The number of words is returned. In case the parameter
5000 should be passed in memory, 0 is returned. As a special case for zero
5001 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5003 BIT_OFFSET is used internally for handling records and specifies offset
5004 of the offset in bits modulo 256 to avoid overflow cases.
5006 See the x86-64 PS ABI for details.
5010 classify_argument (enum machine_mode mode, const_tree type,
5011 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5013 HOST_WIDE_INT bytes =
5014 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5015 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5017 /* Variable sized entities are always passed/returned in memory. */
5021 if (mode != VOIDmode
5022 && targetm.calls.must_pass_in_stack (mode, type))
5025 if (type && AGGREGATE_TYPE_P (type))
5029 enum x86_64_reg_class subclasses[MAX_CLASSES];
5031 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5035 for (i = 0; i < words; i++)
5036 classes[i] = X86_64_NO_CLASS;
5038 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5039 signalize memory class, so handle it as special case. */
5042 classes[0] = X86_64_NO_CLASS;
5046 /* Classify each field of record and merge classes. */
5047 switch (TREE_CODE (type))
5050 /* And now merge the fields of structure. */
5051 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5053 if (TREE_CODE (field) == FIELD_DECL)
5057 if (TREE_TYPE (field) == error_mark_node)
5060 /* Bitfields are always classified as integer. Handle them
5061 early, since later code would consider them to be
5062 misaligned integers. */
5063 if (DECL_BIT_FIELD (field))
5065 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5066 i < ((int_bit_position (field) + (bit_offset % 64))
5067 + tree_low_cst (DECL_SIZE (field), 0)
5070 merge_classes (X86_64_INTEGER_CLASS,
5077 type = TREE_TYPE (field);
5079 /* Flexible array member is ignored. */
5080 if (TYPE_MODE (type) == BLKmode
5081 && TREE_CODE (type) == ARRAY_TYPE
5082 && TYPE_SIZE (type) == NULL_TREE
5083 && TYPE_DOMAIN (type) != NULL_TREE
5084 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5089 if (!warned && warn_psabi)
5092 inform (input_location,
5093 "The ABI of passing struct with"
5094 " a flexible array member has"
5095 " changed in GCC 4.4");
5099 num = classify_argument (TYPE_MODE (type), type,
5101 (int_bit_position (field)
5102 + bit_offset) % 256);
5105 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5106 for (i = 0; i < num && (i + pos) < words; i++)
5108 merge_classes (subclasses[i], classes[i + pos]);
5115 /* Arrays are handled as small records. */
5118 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5119 TREE_TYPE (type), subclasses, bit_offset);
5123 /* The partial classes are now full classes. */
5124 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5125 subclasses[0] = X86_64_SSE_CLASS;
5126 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5127 && !((bit_offset % 64) == 0 && bytes == 4))
5128 subclasses[0] = X86_64_INTEGER_CLASS;
5130 for (i = 0; i < words; i++)
5131 classes[i] = subclasses[i % num];
5136 case QUAL_UNION_TYPE:
5137 /* Unions are similar to RECORD_TYPE but offset is always 0.
5139 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5141 if (TREE_CODE (field) == FIELD_DECL)
5145 if (TREE_TYPE (field) == error_mark_node)
5148 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5149 TREE_TYPE (field), subclasses,
5153 for (i = 0; i < num; i++)
5154 classes[i] = merge_classes (subclasses[i], classes[i]);
5165 /* When size > 16 bytes, if the first one isn't
5166 X86_64_SSE_CLASS or any other ones aren't
5167 X86_64_SSEUP_CLASS, everything should be passed in
5169 if (classes[0] != X86_64_SSE_CLASS)
5172 for (i = 1; i < words; i++)
5173 if (classes[i] != X86_64_SSEUP_CLASS)
5177 /* Final merger cleanup. */
5178 for (i = 0; i < words; i++)
5180 /* If one class is MEMORY, everything should be passed in
5182 if (classes[i] == X86_64_MEMORY_CLASS)
5185 /* The X86_64_SSEUP_CLASS should be always preceded by
5186 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5187 if (classes[i] == X86_64_SSEUP_CLASS
5188 && classes[i - 1] != X86_64_SSE_CLASS
5189 && classes[i - 1] != X86_64_SSEUP_CLASS)
5191 /* The first one should never be X86_64_SSEUP_CLASS. */
5192 gcc_assert (i != 0);
5193 classes[i] = X86_64_SSE_CLASS;
5196 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5197 everything should be passed in memory. */
5198 if (classes[i] == X86_64_X87UP_CLASS
5199 && (classes[i - 1] != X86_64_X87_CLASS))
5203 /* The first one should never be X86_64_X87UP_CLASS. */
5204 gcc_assert (i != 0);
5205 if (!warned && warn_psabi)
5208 inform (input_location,
5209 "The ABI of passing union with long double"
5210 " has changed in GCC 4.4");
5218 /* Compute alignment needed. We align all types to natural boundaries with
5219 exception of XFmode that is aligned to 64bits. */
5220 if (mode != VOIDmode && mode != BLKmode)
5222 int mode_alignment = GET_MODE_BITSIZE (mode);
5225 mode_alignment = 128;
5226 else if (mode == XCmode)
5227 mode_alignment = 256;
5228 if (COMPLEX_MODE_P (mode))
5229 mode_alignment /= 2;
5230 /* Misaligned fields are always returned in memory. */
5231 if (bit_offset % mode_alignment)
5235 /* for V1xx modes, just use the base mode */
5236 if (VECTOR_MODE_P (mode) && mode != V1DImode
5237 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5238 mode = GET_MODE_INNER (mode);
5240 /* Classification of atomic types. */
5245 classes[0] = X86_64_SSE_CLASS;
5248 classes[0] = X86_64_SSE_CLASS;
5249 classes[1] = X86_64_SSEUP_CLASS;
5259 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5263 classes[0] = X86_64_INTEGERSI_CLASS;
5266 else if (size <= 64)
5268 classes[0] = X86_64_INTEGER_CLASS;
5271 else if (size <= 64+32)
5273 classes[0] = X86_64_INTEGER_CLASS;
5274 classes[1] = X86_64_INTEGERSI_CLASS;
5277 else if (size <= 64+64)
5279 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5287 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5291 /* OImode shouldn't be used directly. */
5296 if (!(bit_offset % 64))
5297 classes[0] = X86_64_SSESF_CLASS;
5299 classes[0] = X86_64_SSE_CLASS;
5302 classes[0] = X86_64_SSEDF_CLASS;
5305 classes[0] = X86_64_X87_CLASS;
5306 classes[1] = X86_64_X87UP_CLASS;
5309 classes[0] = X86_64_SSE_CLASS;
5310 classes[1] = X86_64_SSEUP_CLASS;
5313 classes[0] = X86_64_SSE_CLASS;
5314 if (!(bit_offset % 64))
5320 if (!warned && warn_psabi)
5323 inform (input_location,
5324 "The ABI of passing structure with complex float"
5325 " member has changed in GCC 4.4");
5327 classes[1] = X86_64_SSESF_CLASS;
5331 classes[0] = X86_64_SSEDF_CLASS;
5332 classes[1] = X86_64_SSEDF_CLASS;
5335 classes[0] = X86_64_COMPLEX_X87_CLASS;
5338 /* This modes is larger than 16 bytes. */
5346 classes[0] = X86_64_SSE_CLASS;
5347 classes[1] = X86_64_SSEUP_CLASS;
5348 classes[2] = X86_64_SSEUP_CLASS;
5349 classes[3] = X86_64_SSEUP_CLASS;
5357 classes[0] = X86_64_SSE_CLASS;
5358 classes[1] = X86_64_SSEUP_CLASS;
5365 classes[0] = X86_64_SSE_CLASS;
5371 gcc_assert (VECTOR_MODE_P (mode));
5376 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5378 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5379 classes[0] = X86_64_INTEGERSI_CLASS;
5381 classes[0] = X86_64_INTEGER_CLASS;
5382 classes[1] = X86_64_INTEGER_CLASS;
5383 return 1 + (bytes > 8);
5387 /* Examine the argument and return set number of register required in each
5388 class. Return 0 iff parameter should be passed in memory. */
5390 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5391 int *int_nregs, int *sse_nregs)
5393 enum x86_64_reg_class regclass[MAX_CLASSES];
5394 int n = classify_argument (mode, type, regclass, 0);
5400 for (n--; n >= 0; n--)
5401 switch (regclass[n])
5403 case X86_64_INTEGER_CLASS:
5404 case X86_64_INTEGERSI_CLASS:
5407 case X86_64_SSE_CLASS:
5408 case X86_64_SSESF_CLASS:
5409 case X86_64_SSEDF_CLASS:
5412 case X86_64_NO_CLASS:
5413 case X86_64_SSEUP_CLASS:
5415 case X86_64_X87_CLASS:
5416 case X86_64_X87UP_CLASS:
5420 case X86_64_COMPLEX_X87_CLASS:
5421 return in_return ? 2 : 0;
5422 case X86_64_MEMORY_CLASS:
5428 /* Construct container for the argument used by GCC interface. See
5429 FUNCTION_ARG for the detailed description. */
5432 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5433 const_tree type, int in_return, int nintregs, int nsseregs,
5434 const int *intreg, int sse_regno)
5436 /* The following variables hold the static issued_error state. */
5437 static bool issued_sse_arg_error;
5438 static bool issued_sse_ret_error;
5439 static bool issued_x87_ret_error;
5441 enum machine_mode tmpmode;
5443 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5444 enum x86_64_reg_class regclass[MAX_CLASSES];
5448 int needed_sseregs, needed_intregs;
5449 rtx exp[MAX_CLASSES];
5452 n = classify_argument (mode, type, regclass, 0);
5455 if (!examine_argument (mode, type, in_return, &needed_intregs,
5458 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5461 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5462 some less clueful developer tries to use floating-point anyway. */
5463 if (needed_sseregs && !TARGET_SSE)
5467 if (!issued_sse_ret_error)
5469 error ("SSE register return with SSE disabled");
5470 issued_sse_ret_error = true;
5473 else if (!issued_sse_arg_error)
5475 error ("SSE register argument with SSE disabled");
5476 issued_sse_arg_error = true;
5481 /* Likewise, error if the ABI requires us to return values in the
5482 x87 registers and the user specified -mno-80387. */
5483 if (!TARGET_80387 && in_return)
5484 for (i = 0; i < n; i++)
5485 if (regclass[i] == X86_64_X87_CLASS
5486 || regclass[i] == X86_64_X87UP_CLASS
5487 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5489 if (!issued_x87_ret_error)
5491 error ("x87 register return with x87 disabled");
5492 issued_x87_ret_error = true;
5497 /* First construct simple cases. Avoid SCmode, since we want to use
5498 single register to pass this type. */
5499 if (n == 1 && mode != SCmode)
5500 switch (regclass[0])
5502 case X86_64_INTEGER_CLASS:
5503 case X86_64_INTEGERSI_CLASS:
5504 return gen_rtx_REG (mode, intreg[0]);
5505 case X86_64_SSE_CLASS:
5506 case X86_64_SSESF_CLASS:
5507 case X86_64_SSEDF_CLASS:
5508 if (mode != BLKmode)
5509 return gen_reg_or_parallel (mode, orig_mode,
5510 SSE_REGNO (sse_regno));
5512 case X86_64_X87_CLASS:
5513 case X86_64_COMPLEX_X87_CLASS:
5514 return gen_rtx_REG (mode, FIRST_STACK_REG);
5515 case X86_64_NO_CLASS:
5516 /* Zero sized array, struct or class. */
5521 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5522 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5523 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5525 && regclass[0] == X86_64_SSE_CLASS
5526 && regclass[1] == X86_64_SSEUP_CLASS
5527 && regclass[2] == X86_64_SSEUP_CLASS
5528 && regclass[3] == X86_64_SSEUP_CLASS
5530 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5533 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5534 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5535 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5536 && regclass[1] == X86_64_INTEGER_CLASS
5537 && (mode == CDImode || mode == TImode || mode == TFmode)
5538 && intreg[0] + 1 == intreg[1])
5539 return gen_rtx_REG (mode, intreg[0]);
5541 /* Otherwise figure out the entries of the PARALLEL. */
5542 for (i = 0; i < n; i++)
5546 switch (regclass[i])
5548 case X86_64_NO_CLASS:
5550 case X86_64_INTEGER_CLASS:
5551 case X86_64_INTEGERSI_CLASS:
5552 /* Merge TImodes on aligned occasions here too. */
5553 if (i * 8 + 8 > bytes)
5554 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5555 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5559 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5560 if (tmpmode == BLKmode)
5562 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5563 gen_rtx_REG (tmpmode, *intreg),
5567 case X86_64_SSESF_CLASS:
5568 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5569 gen_rtx_REG (SFmode,
5570 SSE_REGNO (sse_regno)),
5574 case X86_64_SSEDF_CLASS:
5575 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5576 gen_rtx_REG (DFmode,
5577 SSE_REGNO (sse_regno)),
5581 case X86_64_SSE_CLASS:
5589 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5599 && regclass[1] == X86_64_SSEUP_CLASS
5600 && regclass[2] == X86_64_SSEUP_CLASS
5601 && regclass[3] == X86_64_SSEUP_CLASS);
5608 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5609 gen_rtx_REG (tmpmode,
5610 SSE_REGNO (sse_regno)),
5619 /* Empty aligned struct, union or class. */
5623 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5624 for (i = 0; i < nexps; i++)
5625 XVECEXP (ret, 0, i) = exp [i];
5629 /* Update the data in CUM to advance over an argument of mode MODE
5630 and data type TYPE. (TYPE is null for libcalls where that information
5631 may not be available.) */
5634 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5635 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5651 cum->words += words;
5652 cum->nregs -= words;
5653 cum->regno += words;
5655 if (cum->nregs <= 0)
5663 /* OImode shouldn't be used directly. */
5667 if (cum->float_in_sse < 2)
5670 if (cum->float_in_sse < 1)
5687 if (!type || !AGGREGATE_TYPE_P (type))
5689 cum->sse_words += words;
5690 cum->sse_nregs -= 1;
5691 cum->sse_regno += 1;
5692 if (cum->sse_nregs <= 0)
5705 if (!type || !AGGREGATE_TYPE_P (type))
5707 cum->mmx_words += words;
5708 cum->mmx_nregs -= 1;
5709 cum->mmx_regno += 1;
5710 if (cum->mmx_nregs <= 0)
5721 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5722 tree type, HOST_WIDE_INT words, int named)
5724 int int_nregs, sse_nregs;
5726 /* Unnamed 256bit vector mode parameters are passed on stack. */
5727 if (!named && VALID_AVX256_REG_MODE (mode))
5730 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5731 cum->words += words;
5732 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5734 cum->nregs -= int_nregs;
5735 cum->sse_nregs -= sse_nregs;
5736 cum->regno += int_nregs;
5737 cum->sse_regno += sse_nregs;
5740 cum->words += words;
5744 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5745 HOST_WIDE_INT words)
5747 /* Otherwise, this should be passed indirect. */
5748 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5750 cum->words += words;
5759 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5760 tree type, int named)
5762 HOST_WIDE_INT bytes, words;
5764 if (mode == BLKmode)
5765 bytes = int_size_in_bytes (type);
5767 bytes = GET_MODE_SIZE (mode);
5768 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5771 mode = type_natural_mode (type, NULL);
5773 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5774 function_arg_advance_ms_64 (cum, bytes, words);
5775 else if (TARGET_64BIT)
5776 function_arg_advance_64 (cum, mode, type, words, named);
5778 function_arg_advance_32 (cum, mode, type, bytes, words);
5781 /* Define where to put the arguments to a function.
5782 Value is zero to push the argument on the stack,
5783 or a hard register in which to store the argument.
5785 MODE is the argument's machine mode.
5786 TYPE is the data type of the argument (as a tree).
5787 This is null for libcalls where that information may
5789 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5790 the preceding args and about the function being called.
5791 NAMED is nonzero if this argument is a named parameter
5792 (otherwise it is an extra parameter matching an ellipsis). */
5795 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5796 enum machine_mode orig_mode, tree type,
5797 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5799 static bool warnedsse, warnedmmx;
5801 /* Avoid the AL settings for the Unix64 ABI. */
5802 if (mode == VOIDmode)
5818 if (words <= cum->nregs)
5820 int regno = cum->regno;
5822 /* Fastcall allocates the first two DWORD (SImode) or
5823 smaller arguments to ECX and EDX if it isn't an
5829 || (type && AGGREGATE_TYPE_P (type)))
5832 /* ECX not EAX is the first allocated register. */
5833 if (regno == AX_REG)
5836 return gen_rtx_REG (mode, regno);
5841 if (cum->float_in_sse < 2)
5844 if (cum->float_in_sse < 1)
5848 /* In 32bit, we pass TImode in xmm registers. */
5855 if (!type || !AGGREGATE_TYPE_P (type))
5857 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5860 warning (0, "SSE vector argument without SSE enabled "
5864 return gen_reg_or_parallel (mode, orig_mode,
5865 cum->sse_regno + FIRST_SSE_REG);
5870 /* OImode shouldn't be used directly. */
5879 if (!type || !AGGREGATE_TYPE_P (type))
5882 return gen_reg_or_parallel (mode, orig_mode,
5883 cum->sse_regno + FIRST_SSE_REG);
5892 if (!type || !AGGREGATE_TYPE_P (type))
5894 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5897 warning (0, "MMX vector argument without MMX enabled "
5901 return gen_reg_or_parallel (mode, orig_mode,
5902 cum->mmx_regno + FIRST_MMX_REG);
5911 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5912 enum machine_mode orig_mode, tree type, int named)
5914 /* Handle a hidden AL argument containing number of registers
5915 for varargs x86-64 functions. */
5916 if (mode == VOIDmode)
5917 return GEN_INT (cum->maybe_vaarg
5918 ? (cum->sse_nregs < 0
5919 ? (cum->call_abi == ix86_abi
5921 : (ix86_abi != SYSV_ABI
5922 ? X86_64_SSE_REGPARM_MAX
5923 : X86_64_MS_SSE_REGPARM_MAX))
5938 /* Unnamed 256bit vector mode parameters are passed on stack. */
5944 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5946 &x86_64_int_parameter_registers [cum->regno],
5951 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5952 enum machine_mode orig_mode, int named,
5953 HOST_WIDE_INT bytes)
5957 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5958 We use value of -2 to specify that current function call is MSABI. */
5959 if (mode == VOIDmode)
5960 return GEN_INT (-2);
5962 /* If we've run out of registers, it goes on the stack. */
5963 if (cum->nregs == 0)
5966 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5968 /* Only floating point modes are passed in anything but integer regs. */
5969 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5972 regno = cum->regno + FIRST_SSE_REG;
5977 /* Unnamed floating parameters are passed in both the
5978 SSE and integer registers. */
5979 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5980 t2 = gen_rtx_REG (mode, regno);
5981 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5982 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5983 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5986 /* Handle aggregated types passed in register. */
5987 if (orig_mode == BLKmode)
5989 if (bytes > 0 && bytes <= 8)
5990 mode = (bytes > 4 ? DImode : SImode);
5991 if (mode == BLKmode)
5995 return gen_reg_or_parallel (mode, orig_mode, regno);
5999 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6000 tree type, int named)
6002 enum machine_mode mode = omode;
6003 HOST_WIDE_INT bytes, words;
6005 if (mode == BLKmode)
6006 bytes = int_size_in_bytes (type);
6008 bytes = GET_MODE_SIZE (mode);
6009 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6011 /* To simplify the code below, represent vector types with a vector mode
6012 even if MMX/SSE are not active. */
6013 if (type && TREE_CODE (type) == VECTOR_TYPE)
6014 mode = type_natural_mode (type, cum);
6016 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6017 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6018 else if (TARGET_64BIT)
6019 return function_arg_64 (cum, mode, omode, type, named);
6021 return function_arg_32 (cum, mode, omode, type, bytes, words);
6024 /* A C expression that indicates when an argument must be passed by
6025 reference. If nonzero for an argument, a copy of that argument is
6026 made in memory and a pointer to the argument is passed instead of
6027 the argument itself. The pointer is passed in whatever way is
6028 appropriate for passing a pointer to that type. */
6031 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6032 enum machine_mode mode ATTRIBUTE_UNUSED,
6033 const_tree type, bool named ATTRIBUTE_UNUSED)
6035 /* See Windows x64 Software Convention. */
6036 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6038 int msize = (int) GET_MODE_SIZE (mode);
6041 /* Arrays are passed by reference. */
6042 if (TREE_CODE (type) == ARRAY_TYPE)
6045 if (AGGREGATE_TYPE_P (type))
6047 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6048 are passed by reference. */
6049 msize = int_size_in_bytes (type);
6053 /* __m128 is passed by reference. */
6055 case 1: case 2: case 4: case 8:
6061 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6067 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6070 contains_aligned_value_p (tree type)
6072 enum machine_mode mode = TYPE_MODE (type);
6073 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6077 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6079 if (TYPE_ALIGN (type) < 128)
6082 if (AGGREGATE_TYPE_P (type))
6084 /* Walk the aggregates recursively. */
6085 switch (TREE_CODE (type))
6089 case QUAL_UNION_TYPE:
6093 /* Walk all the structure fields. */
6094 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6096 if (TREE_CODE (field) == FIELD_DECL
6097 && contains_aligned_value_p (TREE_TYPE (field)))
6104 /* Just for use if some languages passes arrays by value. */
6105 if (contains_aligned_value_p (TREE_TYPE (type)))
6116 /* Gives the alignment boundary, in bits, of an argument with the
6117 specified mode and type. */
6120 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6125 /* Since canonical type is used for call, we convert it to
6126 canonical type if needed. */
6127 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6128 type = TYPE_CANONICAL (type);
6129 align = TYPE_ALIGN (type);
6132 align = GET_MODE_ALIGNMENT (mode);
6133 if (align < PARM_BOUNDARY)
6134 align = PARM_BOUNDARY;
6135 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6136 natural boundaries. */
6137 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6139 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6140 make an exception for SSE modes since these require 128bit
6143 The handling here differs from field_alignment. ICC aligns MMX
6144 arguments to 4 byte boundaries, while structure fields are aligned
6145 to 8 byte boundaries. */
6148 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6149 align = PARM_BOUNDARY;
6153 if (!contains_aligned_value_p (type))
6154 align = PARM_BOUNDARY;
6157 if (align > BIGGEST_ALIGNMENT)
6158 align = BIGGEST_ALIGNMENT;
6162 /* Return true if N is a possible register number of function value. */
6165 ix86_function_value_regno_p (int regno)
6172 case FIRST_FLOAT_REG:
6173 /* TODO: The function should depend on current function ABI but
6174 builtins.c would need updating then. Therefore we use the
6176 if (TARGET_64BIT && ix86_abi == MS_ABI)
6178 return TARGET_FLOAT_RETURNS_IN_80387;
6184 if (TARGET_MACHO || TARGET_64BIT)
6192 /* Define how to find the value returned by a function.
6193 VALTYPE is the data type of the value (as a tree).
6194 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6195 otherwise, FUNC is 0. */
6198 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6199 const_tree fntype, const_tree fn)
6203 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6204 we normally prevent this case when mmx is not available. However
6205 some ABIs may require the result to be returned like DImode. */
6206 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6207 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6209 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6210 we prevent this case when sse is not available. However some ABIs
6211 may require the result to be returned like integer TImode. */
6212 else if (mode == TImode
6213 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6214 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6216 /* 32-byte vector modes in %ymm0. */
6217 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6218 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6220 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6221 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6222 regno = FIRST_FLOAT_REG;
6224 /* Most things go in %eax. */
6227 /* Override FP return register with %xmm0 for local functions when
6228 SSE math is enabled or for functions with sseregparm attribute. */
6229 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6231 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6232 if ((sse_level >= 1 && mode == SFmode)
6233 || (sse_level == 2 && mode == DFmode))
6234 regno = FIRST_SSE_REG;
6237 /* OImode shouldn't be used directly. */
6238 gcc_assert (mode != OImode);
6240 return gen_rtx_REG (orig_mode, regno);
6244 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6249 /* Handle libcalls, which don't provide a type node. */
6250 if (valtype == NULL)
6262 return gen_rtx_REG (mode, FIRST_SSE_REG);
6265 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6269 return gen_rtx_REG (mode, AX_REG);
6273 ret = construct_container (mode, orig_mode, valtype, 1,
6274 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6275 x86_64_int_return_registers, 0);
6277 /* For zero sized structures, construct_container returns NULL, but we
6278 need to keep rest of compiler happy by returning meaningful value. */
6280 ret = gen_rtx_REG (orig_mode, AX_REG);
6286 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6288 unsigned int regno = AX_REG;
6292 switch (GET_MODE_SIZE (mode))
6295 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6296 && !COMPLEX_MODE_P (mode))
6297 regno = FIRST_SSE_REG;
6301 if (mode == SFmode || mode == DFmode)
6302 regno = FIRST_SSE_REG;
6308 return gen_rtx_REG (orig_mode, regno);
6312 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6313 enum machine_mode orig_mode, enum machine_mode mode)
6315 const_tree fn, fntype;
6318 if (fntype_or_decl && DECL_P (fntype_or_decl))
6319 fn = fntype_or_decl;
6320 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6322 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6323 return function_value_ms_64 (orig_mode, mode);
6324 else if (TARGET_64BIT)
6325 return function_value_64 (orig_mode, mode, valtype);
6327 return function_value_32 (orig_mode, mode, fntype, fn);
6331 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6332 bool outgoing ATTRIBUTE_UNUSED)
6334 enum machine_mode mode, orig_mode;
6336 orig_mode = TYPE_MODE (valtype);
6337 mode = type_natural_mode (valtype, NULL);
6338 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6342 ix86_libcall_value (enum machine_mode mode)
6344 return ix86_function_value_1 (NULL, NULL, mode, mode);
6347 /* Return true iff type is returned in memory. */
6349 static int ATTRIBUTE_UNUSED
6350 return_in_memory_32 (const_tree type, enum machine_mode mode)
6354 if (mode == BLKmode)
6357 size = int_size_in_bytes (type);
6359 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6362 if (VECTOR_MODE_P (mode) || mode == TImode)
6364 /* User-created vectors small enough to fit in EAX. */
6368 /* MMX/3dNow values are returned in MM0,
6369 except when it doesn't exits. */
6371 return (TARGET_MMX ? 0 : 1);
6373 /* SSE values are returned in XMM0, except when it doesn't exist. */
6375 return (TARGET_SSE ? 0 : 1);
6377 /* AVX values are returned in YMM0, except when it doesn't exist. */
6379 return TARGET_AVX ? 0 : 1;
6388 /* OImode shouldn't be used directly. */
6389 gcc_assert (mode != OImode);
6394 static int ATTRIBUTE_UNUSED
6395 return_in_memory_64 (const_tree type, enum machine_mode mode)
6397 int needed_intregs, needed_sseregs;
6398 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6401 static int ATTRIBUTE_UNUSED
6402 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6404 HOST_WIDE_INT size = int_size_in_bytes (type);
6406 /* __m128 is returned in xmm0. */
6407 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6408 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6411 /* Otherwise, the size must be exactly in [1248]. */
6412 return (size != 1 && size != 2 && size != 4 && size != 8);
6416 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6418 #ifdef SUBTARGET_RETURN_IN_MEMORY
6419 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6421 const enum machine_mode mode = type_natural_mode (type, NULL);
6425 if (ix86_function_type_abi (fntype) == MS_ABI)
6426 return return_in_memory_ms_64 (type, mode);
6428 return return_in_memory_64 (type, mode);
6431 return return_in_memory_32 (type, mode);
6435 /* Return false iff TYPE is returned in memory. This version is used
6436 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6437 but differs notably in that when MMX is available, 8-byte vectors
6438 are returned in memory, rather than in MMX registers. */
6441 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6444 enum machine_mode mode = type_natural_mode (type, NULL);
6447 return return_in_memory_64 (type, mode);
6449 if (mode == BLKmode)
6452 size = int_size_in_bytes (type);
6454 if (VECTOR_MODE_P (mode))
6456 /* Return in memory only if MMX registers *are* available. This
6457 seems backwards, but it is consistent with the existing
6464 else if (mode == TImode)
6466 else if (mode == XFmode)
6472 /* When returning SSE vector types, we have a choice of either
6473 (1) being abi incompatible with a -march switch, or
6474 (2) generating an error.
6475 Given no good solution, I think the safest thing is one warning.
6476 The user won't be able to use -Werror, but....
6478 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6479 called in response to actually generating a caller or callee that
6480 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6481 via aggregate_value_p for general type probing from tree-ssa. */
6484 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6486 static bool warnedsse, warnedmmx;
6488 if (!TARGET_64BIT && type)
6490 /* Look at the return type of the function, not the function type. */
6491 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6493 if (!TARGET_SSE && !warnedsse)
6496 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6499 warning (0, "SSE vector return without SSE enabled "
6504 if (!TARGET_MMX && !warnedmmx)
6506 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6509 warning (0, "MMX vector return without MMX enabled "
6519 /* Create the va_list data type. */
6521 /* Returns the calling convention specific va_list date type.
6522 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6525 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6527 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6529 /* For i386 we use plain pointer to argument area. */
6530 if (!TARGET_64BIT || abi == MS_ABI)
6531 return build_pointer_type (char_type_node);
6533 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6534 type_decl = build_decl (BUILTINS_LOCATION,
6535 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6537 f_gpr = build_decl (BUILTINS_LOCATION,
6538 FIELD_DECL, get_identifier ("gp_offset"),
6539 unsigned_type_node);
6540 f_fpr = build_decl (BUILTINS_LOCATION,
6541 FIELD_DECL, get_identifier ("fp_offset"),
6542 unsigned_type_node);
6543 f_ovf = build_decl (BUILTINS_LOCATION,
6544 FIELD_DECL, get_identifier ("overflow_arg_area"),
6546 f_sav = build_decl (BUILTINS_LOCATION,
6547 FIELD_DECL, get_identifier ("reg_save_area"),
6550 va_list_gpr_counter_field = f_gpr;
6551 va_list_fpr_counter_field = f_fpr;
6553 DECL_FIELD_CONTEXT (f_gpr) = record;
6554 DECL_FIELD_CONTEXT (f_fpr) = record;
6555 DECL_FIELD_CONTEXT (f_ovf) = record;
6556 DECL_FIELD_CONTEXT (f_sav) = record;
6558 TREE_CHAIN (record) = type_decl;
6559 TYPE_NAME (record) = type_decl;
6560 TYPE_FIELDS (record) = f_gpr;
6561 TREE_CHAIN (f_gpr) = f_fpr;
6562 TREE_CHAIN (f_fpr) = f_ovf;
6563 TREE_CHAIN (f_ovf) = f_sav;
6565 layout_type (record);
6567 /* The correct type is an array type of one element. */
6568 return build_array_type (record, build_index_type (size_zero_node));
6571 /* Setup the builtin va_list data type and for 64-bit the additional
6572 calling convention specific va_list data types. */
6575 ix86_build_builtin_va_list (void)
6577 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6579 /* Initialize abi specific va_list builtin types. */
6583 if (ix86_abi == MS_ABI)
6585 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6586 if (TREE_CODE (t) != RECORD_TYPE)
6587 t = build_variant_type_copy (t);
6588 sysv_va_list_type_node = t;
6593 if (TREE_CODE (t) != RECORD_TYPE)
6594 t = build_variant_type_copy (t);
6595 sysv_va_list_type_node = t;
6597 if (ix86_abi != MS_ABI)
6599 t = ix86_build_builtin_va_list_abi (MS_ABI);
6600 if (TREE_CODE (t) != RECORD_TYPE)
6601 t = build_variant_type_copy (t);
6602 ms_va_list_type_node = t;
6607 if (TREE_CODE (t) != RECORD_TYPE)
6608 t = build_variant_type_copy (t);
6609 ms_va_list_type_node = t;
6616 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6619 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6628 int regparm = ix86_regparm;
6630 if (cum->call_abi != ix86_abi)
6631 regparm = (ix86_abi != SYSV_ABI
6632 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6634 /* GPR size of varargs save area. */
6635 if (cfun->va_list_gpr_size)
6636 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6638 ix86_varargs_gpr_size = 0;
6640 /* FPR size of varargs save area. We don't need it if we don't pass
6641 anything in SSE registers. */
6642 if (cum->sse_nregs && cfun->va_list_fpr_size)
6643 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6645 ix86_varargs_fpr_size = 0;
6647 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6650 save_area = frame_pointer_rtx;
6651 set = get_varargs_alias_set ();
6653 for (i = cum->regno;
6655 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6658 mem = gen_rtx_MEM (Pmode,
6659 plus_constant (save_area, i * UNITS_PER_WORD));
6660 MEM_NOTRAP_P (mem) = 1;
6661 set_mem_alias_set (mem, set);
6662 emit_move_insn (mem, gen_rtx_REG (Pmode,
6663 x86_64_int_parameter_registers[i]));
6666 if (ix86_varargs_fpr_size)
6668 /* Now emit code to save SSE registers. The AX parameter contains number
6669 of SSE parameter registers used to call this function. We use
6670 sse_prologue_save insn template that produces computed jump across
6671 SSE saves. We need some preparation work to get this working. */
6673 label = gen_label_rtx ();
6674 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6676 /* Compute address to jump to :
6677 label - eax*4 + nnamed_sse_arguments*4 Or
6678 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6679 tmp_reg = gen_reg_rtx (Pmode);
6680 nsse_reg = gen_reg_rtx (Pmode);
6681 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6682 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6683 gen_rtx_MULT (Pmode, nsse_reg,
6686 /* vmovaps is one byte longer than movaps. */
6688 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6689 gen_rtx_PLUS (Pmode, tmp_reg,
6695 gen_rtx_CONST (DImode,
6696 gen_rtx_PLUS (DImode,
6698 GEN_INT (cum->sse_regno
6699 * (TARGET_AVX ? 5 : 4)))));
6701 emit_move_insn (nsse_reg, label_ref);
6702 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6704 /* Compute address of memory block we save into. We always use pointer
6705 pointing 127 bytes after first byte to store - this is needed to keep
6706 instruction size limited by 4 bytes (5 bytes for AVX) with one
6707 byte displacement. */
6708 tmp_reg = gen_reg_rtx (Pmode);
6709 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6710 plus_constant (save_area,
6711 ix86_varargs_gpr_size + 127)));
6712 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6713 MEM_NOTRAP_P (mem) = 1;
6714 set_mem_alias_set (mem, set);
6715 set_mem_align (mem, BITS_PER_WORD);
6717 /* And finally do the dirty job! */
6718 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6719 GEN_INT (cum->sse_regno), label));
6724 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6726 alias_set_type set = get_varargs_alias_set ();
6729 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6733 mem = gen_rtx_MEM (Pmode,
6734 plus_constant (virtual_incoming_args_rtx,
6735 i * UNITS_PER_WORD));
6736 MEM_NOTRAP_P (mem) = 1;
6737 set_mem_alias_set (mem, set);
6739 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6740 emit_move_insn (mem, reg);
6745 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6746 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6749 CUMULATIVE_ARGS next_cum;
6752 /* This argument doesn't appear to be used anymore. Which is good,
6753 because the old code here didn't suppress rtl generation. */
6754 gcc_assert (!no_rtl);
6759 fntype = TREE_TYPE (current_function_decl);
6761 /* For varargs, we do not want to skip the dummy va_dcl argument.
6762 For stdargs, we do want to skip the last named argument. */
6764 if (stdarg_p (fntype))
6765 function_arg_advance (&next_cum, mode, type, 1);
6767 if (cum->call_abi == MS_ABI)
6768 setup_incoming_varargs_ms_64 (&next_cum);
6770 setup_incoming_varargs_64 (&next_cum);
6773 /* Checks if TYPE is of kind va_list char *. */
6776 is_va_list_char_pointer (tree type)
6780 /* For 32-bit it is always true. */
6783 canonic = ix86_canonical_va_list_type (type);
6784 return (canonic == ms_va_list_type_node
6785 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6788 /* Implement va_start. */
6791 ix86_va_start (tree valist, rtx nextarg)
6793 HOST_WIDE_INT words, n_gpr, n_fpr;
6794 tree f_gpr, f_fpr, f_ovf, f_sav;
6795 tree gpr, fpr, ovf, sav, t;
6798 /* Only 64bit target needs something special. */
6799 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6801 std_expand_builtin_va_start (valist, nextarg);
6805 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6806 f_fpr = TREE_CHAIN (f_gpr);
6807 f_ovf = TREE_CHAIN (f_fpr);
6808 f_sav = TREE_CHAIN (f_ovf);
6810 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6811 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6812 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6813 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6814 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6816 /* Count number of gp and fp argument registers used. */
6817 words = crtl->args.info.words;
6818 n_gpr = crtl->args.info.regno;
6819 n_fpr = crtl->args.info.sse_regno;
6821 if (cfun->va_list_gpr_size)
6823 type = TREE_TYPE (gpr);
6824 t = build2 (MODIFY_EXPR, type,
6825 gpr, build_int_cst (type, n_gpr * 8));
6826 TREE_SIDE_EFFECTS (t) = 1;
6827 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6830 if (TARGET_SSE && cfun->va_list_fpr_size)
6832 type = TREE_TYPE (fpr);
6833 t = build2 (MODIFY_EXPR, type, fpr,
6834 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6835 TREE_SIDE_EFFECTS (t) = 1;
6836 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6839 /* Find the overflow area. */
6840 type = TREE_TYPE (ovf);
6841 t = make_tree (type, crtl->args.internal_arg_pointer);
6843 t = build2 (POINTER_PLUS_EXPR, type, t,
6844 size_int (words * UNITS_PER_WORD));
6845 t = build2 (MODIFY_EXPR, type, ovf, t);
6846 TREE_SIDE_EFFECTS (t) = 1;
6847 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6849 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6851 /* Find the register save area.
6852 Prologue of the function save it right above stack frame. */
6853 type = TREE_TYPE (sav);
6854 t = make_tree (type, frame_pointer_rtx);
6855 if (!ix86_varargs_gpr_size)
6856 t = build2 (POINTER_PLUS_EXPR, type, t,
6857 size_int (-8 * X86_64_REGPARM_MAX));
6858 t = build2 (MODIFY_EXPR, type, sav, t);
6859 TREE_SIDE_EFFECTS (t) = 1;
6860 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6864 /* Implement va_arg. */
6867 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6870 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6871 tree f_gpr, f_fpr, f_ovf, f_sav;
6872 tree gpr, fpr, ovf, sav, t;
6874 tree lab_false, lab_over = NULL_TREE;
6879 enum machine_mode nat_mode;
6882 /* Only 64bit target needs something special. */
6883 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6884 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6886 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6887 f_fpr = TREE_CHAIN (f_gpr);
6888 f_ovf = TREE_CHAIN (f_fpr);
6889 f_sav = TREE_CHAIN (f_ovf);
6891 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6892 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6893 valist = build_va_arg_indirect_ref (valist);
6894 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6895 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6896 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6898 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6900 type = build_pointer_type (type);
6901 size = int_size_in_bytes (type);
6902 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6904 nat_mode = type_natural_mode (type, NULL);
6913 /* Unnamed 256bit vector mode parameters are passed on stack. */
6914 if (ix86_cfun_abi () == SYSV_ABI)
6921 container = construct_container (nat_mode, TYPE_MODE (type),
6922 type, 0, X86_64_REGPARM_MAX,
6923 X86_64_SSE_REGPARM_MAX, intreg,
6928 /* Pull the value out of the saved registers. */
6930 addr = create_tmp_var (ptr_type_node, "addr");
6934 int needed_intregs, needed_sseregs;
6936 tree int_addr, sse_addr;
6938 lab_false = create_artificial_label (UNKNOWN_LOCATION);
6939 lab_over = create_artificial_label (UNKNOWN_LOCATION);
6941 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6943 need_temp = (!REG_P (container)
6944 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6945 || TYPE_ALIGN (type) > 128));
6947 /* In case we are passing structure, verify that it is consecutive block
6948 on the register save area. If not we need to do moves. */
6949 if (!need_temp && !REG_P (container))
6951 /* Verify that all registers are strictly consecutive */
6952 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6956 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6958 rtx slot = XVECEXP (container, 0, i);
6959 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6960 || INTVAL (XEXP (slot, 1)) != i * 16)
6968 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6970 rtx slot = XVECEXP (container, 0, i);
6971 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6972 || INTVAL (XEXP (slot, 1)) != i * 8)
6984 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6985 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6988 /* First ensure that we fit completely in registers. */
6991 t = build_int_cst (TREE_TYPE (gpr),
6992 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6993 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6994 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6995 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6996 gimplify_and_add (t, pre_p);
7000 t = build_int_cst (TREE_TYPE (fpr),
7001 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7002 + X86_64_REGPARM_MAX * 8);
7003 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7004 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7005 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7006 gimplify_and_add (t, pre_p);
7009 /* Compute index to start of area used for integer regs. */
7012 /* int_addr = gpr + sav; */
7013 t = fold_convert (sizetype, gpr);
7014 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7015 gimplify_assign (int_addr, t, pre_p);
7019 /* sse_addr = fpr + sav; */
7020 t = fold_convert (sizetype, fpr);
7021 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7022 gimplify_assign (sse_addr, t, pre_p);
7027 tree temp = create_tmp_var (type, "va_arg_tmp");
7030 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7031 gimplify_assign (addr, t, pre_p);
7033 for (i = 0; i < XVECLEN (container, 0); i++)
7035 rtx slot = XVECEXP (container, 0, i);
7036 rtx reg = XEXP (slot, 0);
7037 enum machine_mode mode = GET_MODE (reg);
7038 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7039 tree addr_type = build_pointer_type (piece_type);
7040 tree daddr_type = build_pointer_type_for_mode (piece_type,
7044 tree dest_addr, dest;
7046 if (SSE_REGNO_P (REGNO (reg)))
7048 src_addr = sse_addr;
7049 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7053 src_addr = int_addr;
7054 src_offset = REGNO (reg) * 8;
7056 src_addr = fold_convert (addr_type, src_addr);
7057 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7058 size_int (src_offset));
7059 src = build_va_arg_indirect_ref (src_addr);
7061 dest_addr = fold_convert (daddr_type, addr);
7062 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7063 size_int (INTVAL (XEXP (slot, 1))));
7064 dest = build_va_arg_indirect_ref (dest_addr);
7066 gimplify_assign (dest, src, pre_p);
7072 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7073 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7074 gimplify_assign (gpr, t, pre_p);
7079 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7080 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7081 gimplify_assign (fpr, t, pre_p);
7084 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7086 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7089 /* ... otherwise out of the overflow area. */
7091 /* When we align parameter on stack for caller, if the parameter
7092 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7093 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7094 here with caller. */
7095 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7096 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7097 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7099 /* Care for on-stack alignment if needed. */
7100 if (arg_boundary <= 64
7101 || integer_zerop (TYPE_SIZE (type)))
7105 HOST_WIDE_INT align = arg_boundary / 8;
7106 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7107 size_int (align - 1));
7108 t = fold_convert (sizetype, t);
7109 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7111 t = fold_convert (TREE_TYPE (ovf), t);
7113 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7114 gimplify_assign (addr, t, pre_p);
7116 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7117 size_int (rsize * UNITS_PER_WORD));
7118 gimplify_assign (unshare_expr (ovf), t, pre_p);
7121 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7123 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7124 addr = fold_convert (ptrtype, addr);
7127 addr = build_va_arg_indirect_ref (addr);
7128 return build_va_arg_indirect_ref (addr);
7131 /* Return nonzero if OPNUM's MEM should be matched
7132 in movabs* patterns. */
7135 ix86_check_movabs (rtx insn, int opnum)
7139 set = PATTERN (insn);
7140 if (GET_CODE (set) == PARALLEL)
7141 set = XVECEXP (set, 0, 0);
7142 gcc_assert (GET_CODE (set) == SET);
7143 mem = XEXP (set, opnum);
7144 while (GET_CODE (mem) == SUBREG)
7145 mem = SUBREG_REG (mem);
7146 gcc_assert (MEM_P (mem));
7147 return (volatile_ok || !MEM_VOLATILE_P (mem));
7150 /* Initialize the table of extra 80387 mathematical constants. */
7153 init_ext_80387_constants (void)
7155 static const char * cst[5] =
7157 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7158 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7159 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7160 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7161 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7165 for (i = 0; i < 5; i++)
7167 real_from_string (&ext_80387_constants_table[i], cst[i]);
7168 /* Ensure each constant is rounded to XFmode precision. */
7169 real_convert (&ext_80387_constants_table[i],
7170 XFmode, &ext_80387_constants_table[i]);
7173 ext_80387_constants_init = 1;
7176 /* Return true if the constant is something that can be loaded with
7177 a special instruction. */
7180 standard_80387_constant_p (rtx x)
7182 enum machine_mode mode = GET_MODE (x);
7186 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7189 if (x == CONST0_RTX (mode))
7191 if (x == CONST1_RTX (mode))
7194 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7196 /* For XFmode constants, try to find a special 80387 instruction when
7197 optimizing for size or on those CPUs that benefit from them. */
7199 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7203 if (! ext_80387_constants_init)
7204 init_ext_80387_constants ();
7206 for (i = 0; i < 5; i++)
7207 if (real_identical (&r, &ext_80387_constants_table[i]))
7211 /* Load of the constant -0.0 or -1.0 will be split as
7212 fldz;fchs or fld1;fchs sequence. */
7213 if (real_isnegzero (&r))
7215 if (real_identical (&r, &dconstm1))
7221 /* Return the opcode of the special instruction to be used to load
7225 standard_80387_constant_opcode (rtx x)
7227 switch (standard_80387_constant_p (x))
7251 /* Return the CONST_DOUBLE representing the 80387 constant that is
7252 loaded by the specified special instruction. The argument IDX
7253 matches the return value from standard_80387_constant_p. */
7256 standard_80387_constant_rtx (int idx)
7260 if (! ext_80387_constants_init)
7261 init_ext_80387_constants ();
7277 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7281 /* Return 1 if mode is a valid mode for sse. */
7283 standard_sse_mode_p (enum machine_mode mode)
7300 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7301 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7302 modes and AVX is enabled. */
7305 standard_sse_constant_p (rtx x)
7307 enum machine_mode mode = GET_MODE (x);
7309 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7311 if (vector_all_ones_operand (x, mode))
7313 if (standard_sse_mode_p (mode))
7314 return TARGET_SSE2 ? 2 : -2;
7315 else if (VALID_AVX256_REG_MODE (mode))
7316 return TARGET_AVX ? 3 : -3;
7322 /* Return the opcode of the special instruction to be used to load
7326 standard_sse_constant_opcode (rtx insn, rtx x)
7328 switch (standard_sse_constant_p (x))
7331 switch (get_attr_mode (insn))
7334 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7336 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7338 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7340 return "vxorps\t%x0, %x0, %x0";
7342 return "vxorpd\t%x0, %x0, %x0";
7344 return "vpxor\t%x0, %x0, %x0";
7350 switch (get_attr_mode (insn))
7355 return "vpcmpeqd\t%0, %0, %0";
7361 return "pcmpeqd\t%0, %0";
7366 /* Returns 1 if OP contains a symbol reference */
7369 symbolic_reference_mentioned_p (rtx op)
7374 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7377 fmt = GET_RTX_FORMAT (GET_CODE (op));
7378 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7384 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7385 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7389 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7396 /* Return 1 if it is appropriate to emit `ret' instructions in the
7397 body of a function. Do this only if the epilogue is simple, needing a
7398 couple of insns. Prior to reloading, we can't tell how many registers
7399 must be saved, so return 0 then. Return 0 if there is no frame
7400 marker to de-allocate. */
7403 ix86_can_use_return_insn_p (void)
7405 struct ix86_frame frame;
7407 if (! reload_completed || frame_pointer_needed)
7410 /* Don't allow more than 32 pop, since that's all we can do
7411 with one instruction. */
7412 if (crtl->args.pops_args
7413 && crtl->args.size >= 32768)
7416 ix86_compute_frame_layout (&frame);
7417 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7420 /* Value should be nonzero if functions must have frame pointers.
7421 Zero means the frame pointer need not be set up (and parms may
7422 be accessed via the stack pointer) in functions that seem suitable. */
7425 ix86_frame_pointer_required (void)
7427 /* If we accessed previous frames, then the generated code expects
7428 to be able to access the saved ebp value in our frame. */
7429 if (cfun->machine->accesses_prev_frame)
7432 /* Several x86 os'es need a frame pointer for other reasons,
7433 usually pertaining to setjmp. */
7434 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7437 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7438 the frame pointer by default. Turn it back on now if we've not
7439 got a leaf function. */
7440 if (TARGET_OMIT_LEAF_FRAME_POINTER
7441 && (!current_function_is_leaf
7442 || ix86_current_function_calls_tls_descriptor))
7451 /* Record that the current function accesses previous call frames. */
7454 ix86_setup_frame_addresses (void)
7456 cfun->machine->accesses_prev_frame = 1;
7459 #ifndef USE_HIDDEN_LINKONCE
7460 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7461 # define USE_HIDDEN_LINKONCE 1
7463 # define USE_HIDDEN_LINKONCE 0
7467 static int pic_labels_used;
7469 /* Fills in the label name that should be used for a pc thunk for
7470 the given register. */
7473 get_pc_thunk_name (char name[32], unsigned int regno)
7475 gcc_assert (!TARGET_64BIT);
7477 if (USE_HIDDEN_LINKONCE)
7478 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7480 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7484 /* This function generates code for -fpic that loads %ebx with
7485 the return address of the caller and then returns. */
7488 ix86_file_end (void)
7493 for (regno = 0; regno < 8; ++regno)
7497 if (! ((pic_labels_used >> regno) & 1))
7500 get_pc_thunk_name (name, regno);
7505 switch_to_section (darwin_sections[text_coal_section]);
7506 fputs ("\t.weak_definition\t", asm_out_file);
7507 assemble_name (asm_out_file, name);
7508 fputs ("\n\t.private_extern\t", asm_out_file);
7509 assemble_name (asm_out_file, name);
7510 fputs ("\n", asm_out_file);
7511 ASM_OUTPUT_LABEL (asm_out_file, name);
7515 if (USE_HIDDEN_LINKONCE)
7519 decl = build_decl (BUILTINS_LOCATION,
7520 FUNCTION_DECL, get_identifier (name),
7522 TREE_PUBLIC (decl) = 1;
7523 TREE_STATIC (decl) = 1;
7524 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7526 (*targetm.asm_out.unique_section) (decl, 0);
7527 switch_to_section (get_named_section (decl, NULL, 0));
7529 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7530 fputs ("\t.hidden\t", asm_out_file);
7531 assemble_name (asm_out_file, name);
7532 fputc ('\n', asm_out_file);
7533 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7537 switch_to_section (text_section);
7538 ASM_OUTPUT_LABEL (asm_out_file, name);
7541 xops[0] = gen_rtx_REG (Pmode, regno);
7542 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7543 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7544 output_asm_insn ("ret", xops);
7547 if (NEED_INDICATE_EXEC_STACK)
7548 file_end_indicate_exec_stack ();
7551 /* Emit code for the SET_GOT patterns. */
7554 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7560 if (TARGET_VXWORKS_RTP && flag_pic)
7562 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7563 xops[2] = gen_rtx_MEM (Pmode,
7564 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7565 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7567 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7568 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7569 an unadorned address. */
7570 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7571 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7572 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7576 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7578 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7580 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7583 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7585 output_asm_insn ("call\t%a2", xops);
7588 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7589 is what will be referenced by the Mach-O PIC subsystem. */
7591 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7594 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7595 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7598 output_asm_insn ("pop%z0\t%0", xops);
7603 get_pc_thunk_name (name, REGNO (dest));
7604 pic_labels_used |= 1 << REGNO (dest);
7606 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7607 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7608 output_asm_insn ("call\t%X2", xops);
7609 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7610 is what will be referenced by the Mach-O PIC subsystem. */
7613 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7615 targetm.asm_out.internal_label (asm_out_file, "L",
7616 CODE_LABEL_NUMBER (label));
7623 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7624 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7626 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7631 /* Generate an "push" pattern for input ARG. */
7636 if (ix86_cfa_state->reg == stack_pointer_rtx)
7637 ix86_cfa_state->offset += UNITS_PER_WORD;
7639 return gen_rtx_SET (VOIDmode,
7641 gen_rtx_PRE_DEC (Pmode,
7642 stack_pointer_rtx)),
7646 /* Return >= 0 if there is an unused call-clobbered register available
7647 for the entire function. */
7650 ix86_select_alt_pic_regnum (void)
7652 if (current_function_is_leaf && !crtl->profile
7653 && !ix86_current_function_calls_tls_descriptor)
7656 /* Can't use the same register for both PIC and DRAP. */
7658 drap = REGNO (crtl->drap_reg);
7661 for (i = 2; i >= 0; --i)
7662 if (i != drap && !df_regs_ever_live_p (i))
7666 return INVALID_REGNUM;
7669 /* Return 1 if we need to save REGNO. */
7671 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7673 if (pic_offset_table_rtx
7674 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7675 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7677 || crtl->calls_eh_return
7678 || crtl->uses_const_pool))
7680 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7685 if (crtl->calls_eh_return && maybe_eh_return)
7690 unsigned test = EH_RETURN_DATA_REGNO (i);
7691 if (test == INVALID_REGNUM)
7698 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7701 return (df_regs_ever_live_p (regno)
7702 && !call_used_regs[regno]
7703 && !fixed_regs[regno]
7704 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7707 /* Return number of saved general prupose registers. */
7710 ix86_nsaved_regs (void)
7715 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7716 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7721 /* Return number of saved SSE registrers. */
7724 ix86_nsaved_sseregs (void)
7729 if (ix86_cfun_abi () != MS_ABI)
7731 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7732 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7737 /* Given FROM and TO register numbers, say whether this elimination is
7738 allowed. If stack alignment is needed, we can only replace argument
7739 pointer with hard frame pointer, or replace frame pointer with stack
7740 pointer. Otherwise, frame pointer elimination is automatically
7741 handled and all other eliminations are valid. */
7744 ix86_can_eliminate (int from, int to)
7746 if (stack_realign_fp)
7747 return ((from == ARG_POINTER_REGNUM
7748 && to == HARD_FRAME_POINTER_REGNUM)
7749 || (from == FRAME_POINTER_REGNUM
7750 && to == STACK_POINTER_REGNUM));
7752 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7755 /* Return the offset between two registers, one to be eliminated, and the other
7756 its replacement, at the start of a routine. */
7759 ix86_initial_elimination_offset (int from, int to)
7761 struct ix86_frame frame;
7762 ix86_compute_frame_layout (&frame);
7764 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7765 return frame.hard_frame_pointer_offset;
7766 else if (from == FRAME_POINTER_REGNUM
7767 && to == HARD_FRAME_POINTER_REGNUM)
7768 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7771 gcc_assert (to == STACK_POINTER_REGNUM);
7773 if (from == ARG_POINTER_REGNUM)
7774 return frame.stack_pointer_offset;
7776 gcc_assert (from == FRAME_POINTER_REGNUM);
7777 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7781 /* In a dynamically-aligned function, we can't know the offset from
7782 stack pointer to frame pointer, so we must ensure that setjmp
7783 eliminates fp against the hard fp (%ebp) rather than trying to
7784 index from %esp up to the top of the frame across a gap that is
7785 of unknown (at compile-time) size. */
7787 ix86_builtin_setjmp_frame_value (void)
7789 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7792 /* Fill structure ix86_frame about frame of currently computed function. */
7795 ix86_compute_frame_layout (struct ix86_frame *frame)
7797 HOST_WIDE_INT total_size;
7798 unsigned int stack_alignment_needed;
7799 HOST_WIDE_INT offset;
7800 unsigned int preferred_alignment;
7801 HOST_WIDE_INT size = get_frame_size ();
7803 frame->nregs = ix86_nsaved_regs ();
7804 frame->nsseregs = ix86_nsaved_sseregs ();
7807 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7808 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7810 /* MS ABI seem to require stack alignment to be always 16 except for function
7812 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7814 preferred_alignment = 16;
7815 stack_alignment_needed = 16;
7816 crtl->preferred_stack_boundary = 128;
7817 crtl->stack_alignment_needed = 128;
7820 gcc_assert (!size || stack_alignment_needed);
7821 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7822 gcc_assert (preferred_alignment <= stack_alignment_needed);
7824 /* During reload iteration the amount of registers saved can change.
7825 Recompute the value as needed. Do not recompute when amount of registers
7826 didn't change as reload does multiple calls to the function and does not
7827 expect the decision to change within single iteration. */
7828 if (!optimize_function_for_size_p (cfun)
7829 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7831 int count = frame->nregs;
7833 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7834 /* The fast prologue uses move instead of push to save registers. This
7835 is significantly longer, but also executes faster as modern hardware
7836 can execute the moves in parallel, but can't do that for push/pop.
7838 Be careful about choosing what prologue to emit: When function takes
7839 many instructions to execute we may use slow version as well as in
7840 case function is known to be outside hot spot (this is known with
7841 feedback only). Weight the size of function by number of registers
7842 to save as it is cheap to use one or two push instructions but very
7843 slow to use many of them. */
7845 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7846 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7847 || (flag_branch_probabilities
7848 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7849 cfun->machine->use_fast_prologue_epilogue = false;
7851 cfun->machine->use_fast_prologue_epilogue
7852 = !expensive_function_p (count);
7854 if (TARGET_PROLOGUE_USING_MOVE
7855 && cfun->machine->use_fast_prologue_epilogue)
7856 frame->save_regs_using_mov = true;
7858 frame->save_regs_using_mov = false;
7861 /* Skip return address and saved base pointer. */
7862 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7864 frame->hard_frame_pointer_offset = offset;
7866 /* Set offset to aligned because the realigned frame starts from
7868 if (stack_realign_fp)
7869 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7871 /* Register save area */
7872 offset += frame->nregs * UNITS_PER_WORD;
7874 /* Align SSE reg save area. */
7875 if (frame->nsseregs)
7876 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7878 frame->padding0 = 0;
7880 /* SSE register save area. */
7881 offset += frame->padding0 + frame->nsseregs * 16;
7884 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7885 offset += frame->va_arg_size;
7887 /* Align start of frame for local function. */
7888 frame->padding1 = ((offset + stack_alignment_needed - 1)
7889 & -stack_alignment_needed) - offset;
7891 offset += frame->padding1;
7893 /* Frame pointer points here. */
7894 frame->frame_pointer_offset = offset;
7898 /* Add outgoing arguments area. Can be skipped if we eliminated
7899 all the function calls as dead code.
7900 Skipping is however impossible when function calls alloca. Alloca
7901 expander assumes that last crtl->outgoing_args_size
7902 of stack frame are unused. */
7903 if (ACCUMULATE_OUTGOING_ARGS
7904 && (!current_function_is_leaf || cfun->calls_alloca
7905 || ix86_current_function_calls_tls_descriptor))
7907 offset += crtl->outgoing_args_size;
7908 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7911 frame->outgoing_arguments_size = 0;
7913 /* Align stack boundary. Only needed if we're calling another function
7915 if (!current_function_is_leaf || cfun->calls_alloca
7916 || ix86_current_function_calls_tls_descriptor)
7917 frame->padding2 = ((offset + preferred_alignment - 1)
7918 & -preferred_alignment) - offset;
7920 frame->padding2 = 0;
7922 offset += frame->padding2;
7924 /* We've reached end of stack frame. */
7925 frame->stack_pointer_offset = offset;
7927 /* Size prologue needs to allocate. */
7928 frame->to_allocate =
7929 (size + frame->padding1 + frame->padding2
7930 + frame->outgoing_arguments_size + frame->va_arg_size);
7932 if ((!frame->to_allocate && frame->nregs <= 1)
7933 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7934 frame->save_regs_using_mov = false;
7936 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7937 && current_function_is_leaf
7938 && !ix86_current_function_calls_tls_descriptor)
7940 frame->red_zone_size = frame->to_allocate;
7941 if (frame->save_regs_using_mov)
7942 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7943 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7944 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7947 frame->red_zone_size = 0;
7948 frame->to_allocate -= frame->red_zone_size;
7949 frame->stack_pointer_offset -= frame->red_zone_size;
7951 fprintf (stderr, "\n");
7952 fprintf (stderr, "size: %ld\n", (long)size);
7953 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7954 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7955 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7956 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7957 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7958 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7959 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7960 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7961 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7962 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7963 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7964 (long)frame->hard_frame_pointer_offset);
7965 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7966 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7967 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7968 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7972 /* Emit code to save registers in the prologue. */
7975 ix86_emit_save_regs (void)
7980 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7981 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7983 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7984 RTX_FRAME_RELATED_P (insn) = 1;
7988 /* Emit code to save registers using MOV insns. First register
7989 is restored from POINTER + OFFSET. */
7991 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7996 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7997 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7999 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8001 gen_rtx_REG (Pmode, regno));
8002 RTX_FRAME_RELATED_P (insn) = 1;
8003 offset += UNITS_PER_WORD;
8007 /* Emit code to save registers using MOV insns. First register
8008 is restored from POINTER + OFFSET. */
8010 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8016 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8017 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8019 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8020 set_mem_align (mem, 128);
8021 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8022 RTX_FRAME_RELATED_P (insn) = 1;
8027 static GTY(()) rtx queued_cfa_restores;
8029 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8030 manipulation insn. Don't add it if the previously
8031 saved value will be left untouched within stack red-zone till return,
8032 as unwinders can find the same value in the register and
8036 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8039 && !TARGET_64BIT_MS_ABI
8040 && red_offset + RED_ZONE_SIZE >= 0
8041 && crtl->args.pops_args < 65536)
8046 add_reg_note (insn, REG_CFA_RESTORE, reg);
8047 RTX_FRAME_RELATED_P (insn) = 1;
8051 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8054 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8057 ix86_add_queued_cfa_restore_notes (rtx insn)
8060 if (!queued_cfa_restores)
8062 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8064 XEXP (last, 1) = REG_NOTES (insn);
8065 REG_NOTES (insn) = queued_cfa_restores;
8066 queued_cfa_restores = NULL_RTX;
8067 RTX_FRAME_RELATED_P (insn) = 1;
8070 /* Expand prologue or epilogue stack adjustment.
8071 The pattern exist to put a dependency on all ebp-based memory accesses.
8072 STYLE should be negative if instructions should be marked as frame related,
8073 zero if %r11 register is live and cannot be freely used and positive
8077 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8078 int style, bool set_cfa)
8083 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8084 else if (x86_64_immediate_operand (offset, DImode))
8085 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8089 /* r11 is used by indirect sibcall return as well, set before the
8090 epilogue and used after the epilogue. ATM indirect sibcall
8091 shouldn't be used together with huge frame sizes in one
8092 function because of the frame_size check in sibcall.c. */
8094 r11 = gen_rtx_REG (DImode, R11_REG);
8095 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8097 RTX_FRAME_RELATED_P (insn) = 1;
8098 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8103 ix86_add_queued_cfa_restore_notes (insn);
8109 gcc_assert (ix86_cfa_state->reg == src);
8110 ix86_cfa_state->offset += INTVAL (offset);
8111 ix86_cfa_state->reg = dest;
8113 r = gen_rtx_PLUS (Pmode, src, offset);
8114 r = gen_rtx_SET (VOIDmode, dest, r);
8115 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8116 RTX_FRAME_RELATED_P (insn) = 1;
8119 RTX_FRAME_RELATED_P (insn) = 1;
8122 /* Find an available register to be used as dynamic realign argument
8123 pointer regsiter. Such a register will be written in prologue and
8124 used in begin of body, so it must not be
8125 1. parameter passing register.
8127 We reuse static-chain register if it is available. Otherwise, we
8128 use DI for i386 and R13 for x86-64. We chose R13 since it has
8131 Return: the regno of chosen register. */
8134 find_drap_reg (void)
8136 tree decl = cfun->decl;
8140 /* Use R13 for nested function or function need static chain.
8141 Since function with tail call may use any caller-saved
8142 registers in epilogue, DRAP must not use caller-saved
8143 register in such case. */
8144 if ((decl_function_context (decl)
8145 && !DECL_NO_STATIC_CHAIN (decl))
8146 || crtl->tail_call_emit)
8153 /* Use DI for nested function or function need static chain.
8154 Since function with tail call may use any caller-saved
8155 registers in epilogue, DRAP must not use caller-saved
8156 register in such case. */
8157 if ((decl_function_context (decl)
8158 && !DECL_NO_STATIC_CHAIN (decl))
8159 || crtl->tail_call_emit)
8162 /* Reuse static chain register if it isn't used for parameter
8164 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8165 && !lookup_attribute ("fastcall",
8166 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8173 /* Update incoming stack boundary and estimated stack alignment. */
8176 ix86_update_stack_boundary (void)
8178 /* Prefer the one specified at command line. */
8179 ix86_incoming_stack_boundary
8180 = (ix86_user_incoming_stack_boundary
8181 ? ix86_user_incoming_stack_boundary
8182 : ix86_default_incoming_stack_boundary);
8184 /* Incoming stack alignment can be changed on individual functions
8185 via force_align_arg_pointer attribute. We use the smallest
8186 incoming stack boundary. */
8187 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8188 && lookup_attribute (ix86_force_align_arg_pointer_string,
8189 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8190 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8192 /* The incoming stack frame has to be aligned at least at
8193 parm_stack_boundary. */
8194 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8195 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8197 /* Stack at entrance of main is aligned by runtime. We use the
8198 smallest incoming stack boundary. */
8199 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8200 && DECL_NAME (current_function_decl)
8201 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8202 && DECL_FILE_SCOPE_P (current_function_decl))
8203 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8205 /* x86_64 vararg needs 16byte stack alignment for register save
8209 && crtl->stack_alignment_estimated < 128)
8210 crtl->stack_alignment_estimated = 128;
8213 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8214 needed or an rtx for DRAP otherwise. */
8217 ix86_get_drap_rtx (void)
8219 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8220 crtl->need_drap = true;
8222 if (stack_realign_drap)
8224 /* Assign DRAP to vDRAP and returns vDRAP */
8225 unsigned int regno = find_drap_reg ();
8230 arg_ptr = gen_rtx_REG (Pmode, regno);
8231 crtl->drap_reg = arg_ptr;
8234 drap_vreg = copy_to_reg (arg_ptr);
8238 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8239 RTX_FRAME_RELATED_P (insn) = 1;
8246 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8249 ix86_internal_arg_pointer (void)
8251 return virtual_incoming_args_rtx;
8254 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8255 to be generated in correct form. */
8257 ix86_finalize_stack_realign_flags (void)
8259 /* Check if stack realign is really needed after reload, and
8260 stores result in cfun */
8261 unsigned int incoming_stack_boundary
8262 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8263 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8264 unsigned int stack_realign = (incoming_stack_boundary
8265 < (current_function_is_leaf
8266 ? crtl->max_used_stack_slot_alignment
8267 : crtl->stack_alignment_needed));
8269 if (crtl->stack_realign_finalized)
8271 /* After stack_realign_needed is finalized, we can't no longer
8273 gcc_assert (crtl->stack_realign_needed == stack_realign);
8277 crtl->stack_realign_needed = stack_realign;
8278 crtl->stack_realign_finalized = true;
8282 /* Expand the prologue into a bunch of separate insns. */
8285 ix86_expand_prologue (void)
8289 struct ix86_frame frame;
8290 HOST_WIDE_INT allocate;
8292 ix86_finalize_stack_realign_flags ();
8294 /* DRAP should not coexist with stack_realign_fp */
8295 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8297 /* Initialize CFA state for before the prologue. */
8298 ix86_cfa_state->reg = stack_pointer_rtx;
8299 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8301 ix86_compute_frame_layout (&frame);
8303 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8304 of DRAP is needed and stack realignment is really needed after reload */
8305 if (crtl->drap_reg && crtl->stack_realign_needed)
8308 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8309 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8310 ? 0 : UNITS_PER_WORD);
8312 gcc_assert (stack_realign_drap);
8314 /* Grab the argument pointer. */
8315 x = plus_constant (stack_pointer_rtx,
8316 (UNITS_PER_WORD + param_ptr_offset));
8319 /* Only need to push parameter pointer reg if it is caller
8321 if (!call_used_regs[REGNO (crtl->drap_reg)])
8323 /* Push arg pointer reg */
8324 insn = emit_insn (gen_push (y));
8325 RTX_FRAME_RELATED_P (insn) = 1;
8328 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8329 RTX_FRAME_RELATED_P (insn) = 1;
8330 ix86_cfa_state->reg = crtl->drap_reg;
8332 /* Align the stack. */
8333 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8335 GEN_INT (-align_bytes)));
8336 RTX_FRAME_RELATED_P (insn) = 1;
8338 /* Replicate the return address on the stack so that return
8339 address can be reached via (argp - 1) slot. This is needed
8340 to implement macro RETURN_ADDR_RTX and intrinsic function
8341 expand_builtin_return_addr etc. */
8343 x = gen_frame_mem (Pmode,
8344 plus_constant (x, -UNITS_PER_WORD));
8345 insn = emit_insn (gen_push (x));
8346 RTX_FRAME_RELATED_P (insn) = 1;
8349 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8350 slower on all targets. Also sdb doesn't like it. */
8352 if (frame_pointer_needed)
8354 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8355 RTX_FRAME_RELATED_P (insn) = 1;
8357 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8358 RTX_FRAME_RELATED_P (insn) = 1;
8360 if (ix86_cfa_state->reg == stack_pointer_rtx)
8361 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8364 if (stack_realign_fp)
8366 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8367 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8369 /* Align the stack. */
8370 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8372 GEN_INT (-align_bytes)));
8373 RTX_FRAME_RELATED_P (insn) = 1;
8376 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8378 if (!frame.save_regs_using_mov)
8379 ix86_emit_save_regs ();
8381 allocate += frame.nregs * UNITS_PER_WORD;
8383 /* When using red zone we may start register saving before allocating
8384 the stack frame saving one cycle of the prologue. However I will
8385 avoid doing this if I am going to have to probe the stack since
8386 at least on x86_64 the stack probe can turn into a call that clobbers
8387 a red zone location */
8388 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8389 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8390 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8391 && !crtl->stack_realign_needed)
8392 ? hard_frame_pointer_rtx
8393 : stack_pointer_rtx,
8394 -frame.nregs * UNITS_PER_WORD);
8398 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8399 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8400 GEN_INT (-allocate), -1,
8401 ix86_cfa_state->reg == stack_pointer_rtx);
8404 /* Only valid for Win32. */
8405 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8409 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8411 if (cfun->machine->call_abi == MS_ABI)
8414 eax_live = ix86_eax_live_at_start_p ();
8418 emit_insn (gen_push (eax));
8419 allocate -= UNITS_PER_WORD;
8422 emit_move_insn (eax, GEN_INT (allocate));
8425 insn = gen_allocate_stack_worker_64 (eax, eax);
8427 insn = gen_allocate_stack_worker_32 (eax, eax);
8428 insn = emit_insn (insn);
8430 if (ix86_cfa_state->reg == stack_pointer_rtx)
8432 ix86_cfa_state->offset += allocate;
8433 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8434 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8435 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8436 RTX_FRAME_RELATED_P (insn) = 1;
8441 if (frame_pointer_needed)
8442 t = plus_constant (hard_frame_pointer_rtx,
8445 - frame.nregs * UNITS_PER_WORD);
8447 t = plus_constant (stack_pointer_rtx, allocate);
8448 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8452 if (frame.save_regs_using_mov
8453 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8454 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8456 if (!frame_pointer_needed
8457 || !frame.to_allocate
8458 || crtl->stack_realign_needed)
8459 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8461 + frame.nsseregs * 16 + frame.padding0);
8463 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8464 -frame.nregs * UNITS_PER_WORD);
8466 if (!frame_pointer_needed
8467 || !frame.to_allocate
8468 || crtl->stack_realign_needed)
8469 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8472 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8473 - frame.nregs * UNITS_PER_WORD
8474 - frame.nsseregs * 16
8477 pic_reg_used = false;
8478 if (pic_offset_table_rtx
8479 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8482 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8484 if (alt_pic_reg_used != INVALID_REGNUM)
8485 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8487 pic_reg_used = true;
8494 if (ix86_cmodel == CM_LARGE_PIC)
8496 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8497 rtx label = gen_label_rtx ();
8499 LABEL_PRESERVE_P (label) = 1;
8500 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8501 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8502 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8503 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8504 pic_offset_table_rtx, tmp_reg));
8507 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8510 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8513 /* In the pic_reg_used case, make sure that the got load isn't deleted
8514 when mcount needs it. Blockage to avoid call movement across mcount
8515 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8517 if (crtl->profile && pic_reg_used)
8518 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8520 if (crtl->drap_reg && !crtl->stack_realign_needed)
8522 /* vDRAP is setup but after reload it turns out stack realign
8523 isn't necessary, here we will emit prologue to setup DRAP
8524 without stack realign adjustment */
8525 int drap_bp_offset = UNITS_PER_WORD * 2;
8526 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8527 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8530 /* Prevent instructions from being scheduled into register save push
8531 sequence when access to the redzone area is done through frame pointer.
8532 The offset betweeh the frame pointer and the stack pointer is calculated
8533 relative to the value of the stack pointer at the end of the function
8534 prologue, and moving instructions that access redzone area via frame
8535 pointer inside push sequence violates this assumption. */
8536 if (frame_pointer_needed && frame.red_zone_size)
8537 emit_insn (gen_memory_blockage ());
8539 /* Emit cld instruction if stringops are used in the function. */
8540 if (TARGET_CLD && ix86_current_function_needs_cld)
8541 emit_insn (gen_cld ());
8544 /* Emit code to restore REG using a POP insn. */
8547 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8549 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8551 if (ix86_cfa_state->reg == crtl->drap_reg
8552 && REGNO (reg) == REGNO (crtl->drap_reg))
8554 /* Previously we'd represented the CFA as an expression
8555 like *(%ebp - 8). We've just popped that value from
8556 the stack, which means we need to reset the CFA to
8557 the drap register. This will remain until we restore
8558 the stack pointer. */
8559 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8560 RTX_FRAME_RELATED_P (insn) = 1;
8564 if (ix86_cfa_state->reg == stack_pointer_rtx)
8566 ix86_cfa_state->offset -= UNITS_PER_WORD;
8567 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8568 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8569 RTX_FRAME_RELATED_P (insn) = 1;
8572 /* When the frame pointer is the CFA, and we pop it, we are
8573 swapping back to the stack pointer as the CFA. This happens
8574 for stack frames that don't allocate other data, so we assume
8575 the stack pointer is now pointing at the return address, i.e.
8576 the function entry state, which makes the offset be 1 word. */
8577 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8578 && reg == hard_frame_pointer_rtx)
8580 ix86_cfa_state->reg = stack_pointer_rtx;
8581 ix86_cfa_state->offset = UNITS_PER_WORD;
8583 add_reg_note (insn, REG_CFA_DEF_CFA,
8584 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8585 GEN_INT (UNITS_PER_WORD)));
8586 RTX_FRAME_RELATED_P (insn) = 1;
8589 ix86_add_cfa_restore_note (insn, reg, red_offset);
8592 /* Emit code to restore saved registers using POP insns. */
8595 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8599 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8600 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8602 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8604 red_offset += UNITS_PER_WORD;
8608 /* Emit code and notes for the LEAVE instruction. */
8611 ix86_emit_leave (HOST_WIDE_INT red_offset)
8613 rtx insn = emit_insn (ix86_gen_leave ());
8615 ix86_add_queued_cfa_restore_notes (insn);
8617 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8619 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8620 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8621 RTX_FRAME_RELATED_P (insn) = 1;
8622 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8626 /* Emit code to restore saved registers using MOV insns. First register
8627 is restored from POINTER + OFFSET. */
8629 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8630 HOST_WIDE_INT red_offset,
8631 int maybe_eh_return)
8634 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8638 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8640 rtx reg = gen_rtx_REG (Pmode, regno);
8642 /* Ensure that adjust_address won't be forced to produce pointer
8643 out of range allowed by x86-64 instruction set. */
8644 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8648 r11 = gen_rtx_REG (DImode, R11_REG);
8649 emit_move_insn (r11, GEN_INT (offset));
8650 emit_insn (gen_adddi3 (r11, r11, pointer));
8651 base_address = gen_rtx_MEM (Pmode, r11);
8654 insn = emit_move_insn (reg,
8655 adjust_address (base_address, Pmode, offset));
8656 offset += UNITS_PER_WORD;
8658 if (ix86_cfa_state->reg == crtl->drap_reg
8659 && regno == REGNO (crtl->drap_reg))
8661 /* Previously we'd represented the CFA as an expression
8662 like *(%ebp - 8). We've just popped that value from
8663 the stack, which means we need to reset the CFA to
8664 the drap register. This will remain until we restore
8665 the stack pointer. */
8666 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8667 RTX_FRAME_RELATED_P (insn) = 1;
8670 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8672 red_offset += UNITS_PER_WORD;
8676 /* Emit code to restore saved registers using MOV insns. First register
8677 is restored from POINTER + OFFSET. */
8679 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8680 HOST_WIDE_INT red_offset,
8681 int maybe_eh_return)
8684 rtx base_address = gen_rtx_MEM (TImode, pointer);
8687 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8688 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8690 rtx reg = gen_rtx_REG (TImode, regno);
8692 /* Ensure that adjust_address won't be forced to produce pointer
8693 out of range allowed by x86-64 instruction set. */
8694 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8698 r11 = gen_rtx_REG (DImode, R11_REG);
8699 emit_move_insn (r11, GEN_INT (offset));
8700 emit_insn (gen_adddi3 (r11, r11, pointer));
8701 base_address = gen_rtx_MEM (TImode, r11);
8704 mem = adjust_address (base_address, TImode, offset);
8705 set_mem_align (mem, 128);
8706 insn = emit_move_insn (reg, mem);
8709 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8715 /* Restore function stack, frame, and registers. */
8718 ix86_expand_epilogue (int style)
8721 struct ix86_frame frame;
8722 HOST_WIDE_INT offset, red_offset;
8723 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8726 ix86_finalize_stack_realign_flags ();
8728 /* When stack is realigned, SP must be valid. */
8729 sp_valid = (!frame_pointer_needed
8730 || current_function_sp_is_unchanging
8731 || stack_realign_fp);
8733 ix86_compute_frame_layout (&frame);
8735 /* See the comment about red zone and frame
8736 pointer usage in ix86_expand_prologue. */
8737 if (frame_pointer_needed && frame.red_zone_size)
8738 emit_insn (gen_memory_blockage ());
8740 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8741 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8743 /* Calculate start of saved registers relative to ebp. Special care
8744 must be taken for the normal return case of a function using
8745 eh_return: the eax and edx registers are marked as saved, but not
8746 restored along this path. */
8747 offset = frame.nregs;
8748 if (crtl->calls_eh_return && style != 2)
8750 offset *= -UNITS_PER_WORD;
8751 offset -= frame.nsseregs * 16 + frame.padding0;
8753 /* Calculate start of saved registers relative to esp on entry of the
8754 function. When realigning stack, this needs to be the most negative
8755 value possible at runtime. */
8756 red_offset = offset;
8758 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8760 else if (stack_realign_fp)
8761 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8763 if (frame_pointer_needed)
8764 red_offset -= UNITS_PER_WORD;
8766 /* If we're only restoring one register and sp is not valid then
8767 using a move instruction to restore the register since it's
8768 less work than reloading sp and popping the register.
8770 The default code result in stack adjustment using add/lea instruction,
8771 while this code results in LEAVE instruction (or discrete equivalent),
8772 so it is profitable in some other cases as well. Especially when there
8773 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8774 and there is exactly one register to pop. This heuristic may need some
8775 tuning in future. */
8776 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8777 || (TARGET_EPILOGUE_USING_MOVE
8778 && cfun->machine->use_fast_prologue_epilogue
8779 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8780 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8781 && frame.to_allocate)
8782 || (frame_pointer_needed && TARGET_USE_LEAVE
8783 && cfun->machine->use_fast_prologue_epilogue
8784 && (frame.nregs + frame.nsseregs) == 1)
8785 || crtl->calls_eh_return)
8787 /* Restore registers. We can use ebp or esp to address the memory
8788 locations. If both are available, default to ebp, since offsets
8789 are known to be small. Only exception is esp pointing directly
8790 to the end of block of saved registers, where we may simplify
8793 If we are realigning stack with bp and sp, regs restore can't
8794 be addressed by bp. sp must be used instead. */
8796 if (!frame_pointer_needed
8797 || (sp_valid && !frame.to_allocate)
8798 || stack_realign_fp)
8800 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8801 frame.to_allocate, red_offset,
8803 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8805 + frame.nsseregs * 16
8808 + frame.nsseregs * 16
8809 + frame.padding0, style == 2);
8813 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8816 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8818 + frame.nsseregs * 16
8821 + frame.nsseregs * 16
8822 + frame.padding0, style == 2);
8825 red_offset -= offset;
8827 /* eh_return epilogues need %ecx added to the stack pointer. */
8830 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8832 /* Stack align doesn't work with eh_return. */
8833 gcc_assert (!crtl->stack_realign_needed);
8835 if (frame_pointer_needed)
8837 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8838 tmp = plus_constant (tmp, UNITS_PER_WORD);
8839 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8841 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8842 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8844 /* Note that we use SA as a temporary CFA, as the return
8845 address is at the proper place relative to it. We
8846 pretend this happens at the FP restore insn because
8847 prior to this insn the FP would be stored at the wrong
8848 offset relative to SA, and after this insn we have no
8849 other reasonable register to use for the CFA. We don't
8850 bother resetting the CFA to the SP for the duration of
8852 add_reg_note (tmp, REG_CFA_DEF_CFA,
8853 plus_constant (sa, UNITS_PER_WORD));
8854 ix86_add_queued_cfa_restore_notes (tmp);
8855 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8856 RTX_FRAME_RELATED_P (tmp) = 1;
8857 ix86_cfa_state->reg = sa;
8858 ix86_cfa_state->offset = UNITS_PER_WORD;
8860 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8861 const0_rtx, style, false);
8865 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8866 tmp = plus_constant (tmp, (frame.to_allocate
8867 + frame.nregs * UNITS_PER_WORD
8868 + frame.nsseregs * 16
8870 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8871 ix86_add_queued_cfa_restore_notes (tmp);
8873 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8874 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8876 ix86_cfa_state->offset = UNITS_PER_WORD;
8877 add_reg_note (tmp, REG_CFA_DEF_CFA,
8878 plus_constant (stack_pointer_rtx,
8880 RTX_FRAME_RELATED_P (tmp) = 1;
8884 else if (!frame_pointer_needed)
8885 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8886 GEN_INT (frame.to_allocate
8887 + frame.nregs * UNITS_PER_WORD
8888 + frame.nsseregs * 16
8890 style, !using_drap);
8891 /* If not an i386, mov & pop is faster than "leave". */
8892 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8893 || !cfun->machine->use_fast_prologue_epilogue)
8894 ix86_emit_leave (red_offset);
8897 pro_epilogue_adjust_stack (stack_pointer_rtx,
8898 hard_frame_pointer_rtx,
8899 const0_rtx, style, !using_drap);
8901 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8906 /* First step is to deallocate the stack frame so that we can
8909 If we realign stack with frame pointer, then stack pointer
8910 won't be able to recover via lea $offset(%bp), %sp, because
8911 there is a padding area between bp and sp for realign.
8912 "add $to_allocate, %sp" must be used instead. */
8915 gcc_assert (frame_pointer_needed);
8916 gcc_assert (!stack_realign_fp);
8917 pro_epilogue_adjust_stack (stack_pointer_rtx,
8918 hard_frame_pointer_rtx,
8919 GEN_INT (offset), style, false);
8920 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8921 frame.to_allocate, red_offset,
8923 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8924 GEN_INT (frame.nsseregs * 16),
8927 else if (frame.to_allocate || frame.nsseregs)
8929 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8930 frame.to_allocate, red_offset,
8932 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8933 GEN_INT (frame.to_allocate
8934 + frame.nsseregs * 16
8935 + frame.padding0), style,
8936 !using_drap && !frame_pointer_needed);
8939 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
8941 red_offset -= offset;
8943 if (frame_pointer_needed)
8945 /* Leave results in shorter dependency chains on CPUs that are
8946 able to grok it fast. */
8947 if (TARGET_USE_LEAVE)
8948 ix86_emit_leave (red_offset);
8951 /* For stack realigned really happens, recover stack
8952 pointer to hard frame pointer is a must, if not using
8954 if (stack_realign_fp)
8955 pro_epilogue_adjust_stack (stack_pointer_rtx,
8956 hard_frame_pointer_rtx,
8957 const0_rtx, style, !using_drap);
8958 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
8966 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8967 ? 0 : UNITS_PER_WORD);
8970 gcc_assert (stack_realign_drap);
8972 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8974 GEN_INT (-(UNITS_PER_WORD
8975 + param_ptr_offset))));
8977 ix86_cfa_state->reg = stack_pointer_rtx;
8978 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
8980 add_reg_note (insn, REG_CFA_DEF_CFA,
8981 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
8982 GEN_INT (ix86_cfa_state->offset)));
8983 RTX_FRAME_RELATED_P (insn) = 1;
8985 if (param_ptr_offset)
8986 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
8989 /* Sibcall epilogues don't want a return instruction. */
8992 *ix86_cfa_state = cfa_state_save;
8996 if (crtl->args.pops_args && crtl->args.size)
8998 rtx popc = GEN_INT (crtl->args.pops_args);
9000 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9001 address, do explicit add, and jump indirectly to the caller. */
9003 if (crtl->args.pops_args >= 65536)
9005 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9008 /* There is no "pascal" calling convention in any 64bit ABI. */
9009 gcc_assert (!TARGET_64BIT);
9011 insn = emit_insn (gen_popsi1 (ecx));
9012 ix86_cfa_state->offset -= UNITS_PER_WORD;
9014 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9015 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9016 add_reg_note (insn, REG_CFA_REGISTER,
9017 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9018 RTX_FRAME_RELATED_P (insn) = 1;
9020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9022 emit_jump_insn (gen_return_indirect_internal (ecx));
9025 emit_jump_insn (gen_return_pop_internal (popc));
9028 emit_jump_insn (gen_return_internal ());
9030 /* Restore the state back to the state from the prologue,
9031 so that it's correct for the next epilogue. */
9032 *ix86_cfa_state = cfa_state_save;
9035 /* Reset from the function's potential modifications. */
9038 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9039 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9041 if (pic_offset_table_rtx)
9042 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9044 /* Mach-O doesn't support labels at the end of objects, so if
9045 it looks like we might want one, insert a NOP. */
9047 rtx insn = get_last_insn ();
9050 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9051 insn = PREV_INSN (insn);
9055 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9056 fputs ("\tnop\n", file);
9062 /* Extract the parts of an RTL expression that is a valid memory address
9063 for an instruction. Return 0 if the structure of the address is
9064 grossly off. Return -1 if the address contains ASHIFT, so it is not
9065 strictly valid, but still used for computing length of lea instruction. */
9068 ix86_decompose_address (rtx addr, struct ix86_address *out)
9070 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9071 rtx base_reg, index_reg;
9072 HOST_WIDE_INT scale = 1;
9073 rtx scale_rtx = NULL_RTX;
9075 enum ix86_address_seg seg = SEG_DEFAULT;
9077 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9079 else if (GET_CODE (addr) == PLUS)
9089 addends[n++] = XEXP (op, 1);
9092 while (GET_CODE (op) == PLUS);
9097 for (i = n; i >= 0; --i)
9100 switch (GET_CODE (op))
9105 index = XEXP (op, 0);
9106 scale_rtx = XEXP (op, 1);
9110 if (XINT (op, 1) == UNSPEC_TP
9111 && TARGET_TLS_DIRECT_SEG_REFS
9112 && seg == SEG_DEFAULT)
9113 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9142 else if (GET_CODE (addr) == MULT)
9144 index = XEXP (addr, 0); /* index*scale */
9145 scale_rtx = XEXP (addr, 1);
9147 else if (GET_CODE (addr) == ASHIFT)
9151 /* We're called for lea too, which implements ashift on occasion. */
9152 index = XEXP (addr, 0);
9153 tmp = XEXP (addr, 1);
9154 if (!CONST_INT_P (tmp))
9156 scale = INTVAL (tmp);
9157 if ((unsigned HOST_WIDE_INT) scale > 3)
9163 disp = addr; /* displacement */
9165 /* Extract the integral value of scale. */
9168 if (!CONST_INT_P (scale_rtx))
9170 scale = INTVAL (scale_rtx);
9173 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9174 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9176 /* Avoid useless 0 displacement. */
9177 if (disp == const0_rtx && (base || index))
9180 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9181 if (base_reg && index_reg && scale == 1
9182 && (index_reg == arg_pointer_rtx
9183 || index_reg == frame_pointer_rtx
9184 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9187 tmp = base, base = index, index = tmp;
9188 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9191 /* Special case: %ebp cannot be encoded as a base without a displacement.
9195 && (base_reg == hard_frame_pointer_rtx
9196 || base_reg == frame_pointer_rtx
9197 || base_reg == arg_pointer_rtx
9198 || (REG_P (base_reg)
9199 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9200 || REGNO (base_reg) == R13_REG))))
9203 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9204 Avoid this by transforming to [%esi+0].
9205 Reload calls address legitimization without cfun defined, so we need
9206 to test cfun for being non-NULL. */
9207 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9208 && base_reg && !index_reg && !disp
9210 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9213 /* Special case: encode reg+reg instead of reg*2. */
9214 if (!base && index && scale == 2)
9215 base = index, base_reg = index_reg, scale = 1;
9217 /* Special case: scaling cannot be encoded without base or displacement. */
9218 if (!base && !disp && index && scale != 1)
9230 /* Return cost of the memory address x.
9231 For i386, it is better to use a complex address than let gcc copy
9232 the address into a reg and make a new pseudo. But not if the address
9233 requires to two regs - that would mean more pseudos with longer
9236 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9238 struct ix86_address parts;
9240 int ok = ix86_decompose_address (x, &parts);
9244 if (parts.base && GET_CODE (parts.base) == SUBREG)
9245 parts.base = SUBREG_REG (parts.base);
9246 if (parts.index && GET_CODE (parts.index) == SUBREG)
9247 parts.index = SUBREG_REG (parts.index);
9249 /* Attempt to minimize number of registers in the address. */
9251 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9253 && (!REG_P (parts.index)
9254 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9258 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9260 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9261 && parts.base != parts.index)
9264 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9265 since it's predecode logic can't detect the length of instructions
9266 and it degenerates to vector decoded. Increase cost of such
9267 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9268 to split such addresses or even refuse such addresses at all.
9270 Following addressing modes are affected:
9275 The first and last case may be avoidable by explicitly coding the zero in
9276 memory address, but I don't have AMD-K6 machine handy to check this
9280 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9281 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9282 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9288 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9289 this is used for to form addresses to local data when -fPIC is in
9293 darwin_local_data_pic (rtx disp)
9295 return (GET_CODE (disp) == UNSPEC
9296 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9299 /* Determine if a given RTX is a valid constant. We already know this
9300 satisfies CONSTANT_P. */
9303 legitimate_constant_p (rtx x)
9305 switch (GET_CODE (x))
9310 if (GET_CODE (x) == PLUS)
9312 if (!CONST_INT_P (XEXP (x, 1)))
9317 if (TARGET_MACHO && darwin_local_data_pic (x))
9320 /* Only some unspecs are valid as "constants". */
9321 if (GET_CODE (x) == UNSPEC)
9322 switch (XINT (x, 1))
9327 return TARGET_64BIT;
9330 x = XVECEXP (x, 0, 0);
9331 return (GET_CODE (x) == SYMBOL_REF
9332 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9334 x = XVECEXP (x, 0, 0);
9335 return (GET_CODE (x) == SYMBOL_REF
9336 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9341 /* We must have drilled down to a symbol. */
9342 if (GET_CODE (x) == LABEL_REF)
9344 if (GET_CODE (x) != SYMBOL_REF)
9349 /* TLS symbols are never valid. */
9350 if (SYMBOL_REF_TLS_MODEL (x))
9353 /* DLLIMPORT symbols are never valid. */
9354 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9355 && SYMBOL_REF_DLLIMPORT_P (x))
9360 if (GET_MODE (x) == TImode
9361 && x != CONST0_RTX (TImode)
9367 if (!standard_sse_constant_p (x))
9374 /* Otherwise we handle everything else in the move patterns. */
9378 /* Determine if it's legal to put X into the constant pool. This
9379 is not possible for the address of thread-local symbols, which
9380 is checked above. */
9383 ix86_cannot_force_const_mem (rtx x)
9385 /* We can always put integral constants and vectors in memory. */
9386 switch (GET_CODE (x))
9396 return !legitimate_constant_p (x);
9400 /* Nonzero if the constant value X is a legitimate general operand
9401 when generating PIC code. It is given that flag_pic is on and
9402 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9405 legitimate_pic_operand_p (rtx x)
9409 switch (GET_CODE (x))
9412 inner = XEXP (x, 0);
9413 if (GET_CODE (inner) == PLUS
9414 && CONST_INT_P (XEXP (inner, 1)))
9415 inner = XEXP (inner, 0);
9417 /* Only some unspecs are valid as "constants". */
9418 if (GET_CODE (inner) == UNSPEC)
9419 switch (XINT (inner, 1))
9424 return TARGET_64BIT;
9426 x = XVECEXP (inner, 0, 0);
9427 return (GET_CODE (x) == SYMBOL_REF
9428 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9429 case UNSPEC_MACHOPIC_OFFSET:
9430 return legitimate_pic_address_disp_p (x);
9438 return legitimate_pic_address_disp_p (x);
9445 /* Determine if a given CONST RTX is a valid memory displacement
9449 legitimate_pic_address_disp_p (rtx disp)
9453 /* In 64bit mode we can allow direct addresses of symbols and labels
9454 when they are not dynamic symbols. */
9457 rtx op0 = disp, op1;
9459 switch (GET_CODE (disp))
9465 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9467 op0 = XEXP (XEXP (disp, 0), 0);
9468 op1 = XEXP (XEXP (disp, 0), 1);
9469 if (!CONST_INT_P (op1)
9470 || INTVAL (op1) >= 16*1024*1024
9471 || INTVAL (op1) < -16*1024*1024)
9473 if (GET_CODE (op0) == LABEL_REF)
9475 if (GET_CODE (op0) != SYMBOL_REF)
9480 /* TLS references should always be enclosed in UNSPEC. */
9481 if (SYMBOL_REF_TLS_MODEL (op0))
9483 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9484 && ix86_cmodel != CM_LARGE_PIC)
9492 if (GET_CODE (disp) != CONST)
9494 disp = XEXP (disp, 0);
9498 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9499 of GOT tables. We should not need these anyway. */
9500 if (GET_CODE (disp) != UNSPEC
9501 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9502 && XINT (disp, 1) != UNSPEC_GOTOFF
9503 && XINT (disp, 1) != UNSPEC_PLTOFF))
9506 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9507 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9513 if (GET_CODE (disp) == PLUS)
9515 if (!CONST_INT_P (XEXP (disp, 1)))
9517 disp = XEXP (disp, 0);
9521 if (TARGET_MACHO && darwin_local_data_pic (disp))
9524 if (GET_CODE (disp) != UNSPEC)
9527 switch (XINT (disp, 1))
9532 /* We need to check for both symbols and labels because VxWorks loads
9533 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9535 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9536 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9538 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9539 While ABI specify also 32bit relocation but we don't produce it in
9540 small PIC model at all. */
9541 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9542 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9544 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9546 case UNSPEC_GOTTPOFF:
9547 case UNSPEC_GOTNTPOFF:
9548 case UNSPEC_INDNTPOFF:
9551 disp = XVECEXP (disp, 0, 0);
9552 return (GET_CODE (disp) == SYMBOL_REF
9553 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9555 disp = XVECEXP (disp, 0, 0);
9556 return (GET_CODE (disp) == SYMBOL_REF
9557 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9559 disp = XVECEXP (disp, 0, 0);
9560 return (GET_CODE (disp) == SYMBOL_REF
9561 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9567 /* Recognizes RTL expressions that are valid memory addresses for an
9568 instruction. The MODE argument is the machine mode for the MEM
9569 expression that wants to use this address.
9571 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9572 convert common non-canonical forms to canonical form so that they will
9576 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9577 rtx addr, bool strict)
9579 struct ix86_address parts;
9580 rtx base, index, disp;
9581 HOST_WIDE_INT scale;
9582 const char *reason = NULL;
9583 rtx reason_rtx = NULL_RTX;
9585 if (ix86_decompose_address (addr, &parts) <= 0)
9587 reason = "decomposition failed";
9592 index = parts.index;
9594 scale = parts.scale;
9596 /* Validate base register.
9598 Don't allow SUBREG's that span more than a word here. It can lead to spill
9599 failures when the base is one word out of a two word structure, which is
9600 represented internally as a DImode int. */
9609 else if (GET_CODE (base) == SUBREG
9610 && REG_P (SUBREG_REG (base))
9611 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9613 reg = SUBREG_REG (base);
9616 reason = "base is not a register";
9620 if (GET_MODE (base) != Pmode)
9622 reason = "base is not in Pmode";
9626 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9627 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9629 reason = "base is not valid";
9634 /* Validate index register.
9636 Don't allow SUBREG's that span more than a word here -- same as above. */
9645 else if (GET_CODE (index) == SUBREG
9646 && REG_P (SUBREG_REG (index))
9647 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9649 reg = SUBREG_REG (index);
9652 reason = "index is not a register";
9656 if (GET_MODE (index) != Pmode)
9658 reason = "index is not in Pmode";
9662 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9663 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9665 reason = "index is not valid";
9670 /* Validate scale factor. */
9673 reason_rtx = GEN_INT (scale);
9676 reason = "scale without index";
9680 if (scale != 2 && scale != 4 && scale != 8)
9682 reason = "scale is not a valid multiplier";
9687 /* Validate displacement. */
9692 if (GET_CODE (disp) == CONST
9693 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9694 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9695 switch (XINT (XEXP (disp, 0), 1))
9697 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9698 used. While ABI specify also 32bit relocations, we don't produce
9699 them at all and use IP relative instead. */
9702 gcc_assert (flag_pic);
9704 goto is_legitimate_pic;
9705 reason = "64bit address unspec";
9708 case UNSPEC_GOTPCREL:
9709 gcc_assert (flag_pic);
9710 goto is_legitimate_pic;
9712 case UNSPEC_GOTTPOFF:
9713 case UNSPEC_GOTNTPOFF:
9714 case UNSPEC_INDNTPOFF:
9720 reason = "invalid address unspec";
9724 else if (SYMBOLIC_CONST (disp)
9728 && MACHOPIC_INDIRECT
9729 && !machopic_operand_p (disp)
9735 if (TARGET_64BIT && (index || base))
9737 /* foo@dtpoff(%rX) is ok. */
9738 if (GET_CODE (disp) != CONST
9739 || GET_CODE (XEXP (disp, 0)) != PLUS
9740 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9741 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9742 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9743 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9745 reason = "non-constant pic memory reference";
9749 else if (! legitimate_pic_address_disp_p (disp))
9751 reason = "displacement is an invalid pic construct";
9755 /* This code used to verify that a symbolic pic displacement
9756 includes the pic_offset_table_rtx register.
9758 While this is good idea, unfortunately these constructs may
9759 be created by "adds using lea" optimization for incorrect
9768 This code is nonsensical, but results in addressing
9769 GOT table with pic_offset_table_rtx base. We can't
9770 just refuse it easily, since it gets matched by
9771 "addsi3" pattern, that later gets split to lea in the
9772 case output register differs from input. While this
9773 can be handled by separate addsi pattern for this case
9774 that never results in lea, this seems to be easier and
9775 correct fix for crash to disable this test. */
9777 else if (GET_CODE (disp) != LABEL_REF
9778 && !CONST_INT_P (disp)
9779 && (GET_CODE (disp) != CONST
9780 || !legitimate_constant_p (disp))
9781 && (GET_CODE (disp) != SYMBOL_REF
9782 || !legitimate_constant_p (disp)))
9784 reason = "displacement is not constant";
9787 else if (TARGET_64BIT
9788 && !x86_64_immediate_operand (disp, VOIDmode))
9790 reason = "displacement is out of range";
9795 /* Everything looks valid. */
9802 /* Determine if a given RTX is a valid constant address. */
9805 constant_address_p (rtx x)
9807 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9810 /* Return a unique alias set for the GOT. */
9812 static alias_set_type
9813 ix86_GOT_alias_set (void)
9815 static alias_set_type set = -1;
9817 set = new_alias_set ();
9821 /* Return a legitimate reference for ORIG (an address) using the
9822 register REG. If REG is 0, a new pseudo is generated.
9824 There are two types of references that must be handled:
9826 1. Global data references must load the address from the GOT, via
9827 the PIC reg. An insn is emitted to do this load, and the reg is
9830 2. Static data references, constant pool addresses, and code labels
9831 compute the address as an offset from the GOT, whose base is in
9832 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9833 differentiate them from global data objects. The returned
9834 address is the PIC reg + an unspec constant.
9836 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9837 reg also appears in the address. */
9840 legitimize_pic_address (rtx orig, rtx reg)
9847 if (TARGET_MACHO && !TARGET_64BIT)
9850 reg = gen_reg_rtx (Pmode);
9851 /* Use the generic Mach-O PIC machinery. */
9852 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9856 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9858 else if (TARGET_64BIT
9859 && ix86_cmodel != CM_SMALL_PIC
9860 && gotoff_operand (addr, Pmode))
9863 /* This symbol may be referenced via a displacement from the PIC
9864 base address (@GOTOFF). */
9866 if (reload_in_progress)
9867 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9868 if (GET_CODE (addr) == CONST)
9869 addr = XEXP (addr, 0);
9870 if (GET_CODE (addr) == PLUS)
9872 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9874 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9877 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9878 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9880 tmpreg = gen_reg_rtx (Pmode);
9883 emit_move_insn (tmpreg, new_rtx);
9887 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9888 tmpreg, 1, OPTAB_DIRECT);
9891 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9893 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9895 /* This symbol may be referenced via a displacement from the PIC
9896 base address (@GOTOFF). */
9898 if (reload_in_progress)
9899 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9900 if (GET_CODE (addr) == CONST)
9901 addr = XEXP (addr, 0);
9902 if (GET_CODE (addr) == PLUS)
9904 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9906 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9909 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9910 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9911 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9915 emit_move_insn (reg, new_rtx);
9919 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9920 /* We can't use @GOTOFF for text labels on VxWorks;
9921 see gotoff_operand. */
9922 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9924 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9926 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9927 return legitimize_dllimport_symbol (addr, true);
9928 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9929 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9930 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9932 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9933 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9937 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9939 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9940 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9941 new_rtx = gen_const_mem (Pmode, new_rtx);
9942 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9945 reg = gen_reg_rtx (Pmode);
9946 /* Use directly gen_movsi, otherwise the address is loaded
9947 into register for CSE. We don't want to CSE this addresses,
9948 instead we CSE addresses from the GOT table, so skip this. */
9949 emit_insn (gen_movsi (reg, new_rtx));
9954 /* This symbol must be referenced via a load from the
9955 Global Offset Table (@GOT). */
9957 if (reload_in_progress)
9958 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9959 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9960 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9962 new_rtx = force_reg (Pmode, new_rtx);
9963 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9964 new_rtx = gen_const_mem (Pmode, new_rtx);
9965 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9968 reg = gen_reg_rtx (Pmode);
9969 emit_move_insn (reg, new_rtx);
9975 if (CONST_INT_P (addr)
9976 && !x86_64_immediate_operand (addr, VOIDmode))
9980 emit_move_insn (reg, addr);
9984 new_rtx = force_reg (Pmode, addr);
9986 else if (GET_CODE (addr) == CONST)
9988 addr = XEXP (addr, 0);
9990 /* We must match stuff we generate before. Assume the only
9991 unspecs that can get here are ours. Not that we could do
9992 anything with them anyway.... */
9993 if (GET_CODE (addr) == UNSPEC
9994 || (GET_CODE (addr) == PLUS
9995 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9997 gcc_assert (GET_CODE (addr) == PLUS);
9999 if (GET_CODE (addr) == PLUS)
10001 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10003 /* Check first to see if this is a constant offset from a @GOTOFF
10004 symbol reference. */
10005 if (gotoff_operand (op0, Pmode)
10006 && CONST_INT_P (op1))
10010 if (reload_in_progress)
10011 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10012 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10014 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10015 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10016 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10020 emit_move_insn (reg, new_rtx);
10026 if (INTVAL (op1) < -16*1024*1024
10027 || INTVAL (op1) >= 16*1024*1024)
10029 if (!x86_64_immediate_operand (op1, Pmode))
10030 op1 = force_reg (Pmode, op1);
10031 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10037 base = legitimize_pic_address (XEXP (addr, 0), reg);
10038 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10039 base == reg ? NULL_RTX : reg);
10041 if (CONST_INT_P (new_rtx))
10042 new_rtx = plus_constant (base, INTVAL (new_rtx));
10045 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10047 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10048 new_rtx = XEXP (new_rtx, 1);
10050 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10058 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10061 get_thread_pointer (int to_reg)
10065 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10069 reg = gen_reg_rtx (Pmode);
10070 insn = gen_rtx_SET (VOIDmode, reg, tp);
10071 insn = emit_insn (insn);
10076 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10077 false if we expect this to be used for a memory address and true if
10078 we expect to load the address into a register. */
10081 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10083 rtx dest, base, off, pic, tp;
10088 case TLS_MODEL_GLOBAL_DYNAMIC:
10089 dest = gen_reg_rtx (Pmode);
10090 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10092 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10094 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10097 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10098 insns = get_insns ();
10101 RTL_CONST_CALL_P (insns) = 1;
10102 emit_libcall_block (insns, dest, rax, x);
10104 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10105 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10107 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10109 if (TARGET_GNU2_TLS)
10111 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10113 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10117 case TLS_MODEL_LOCAL_DYNAMIC:
10118 base = gen_reg_rtx (Pmode);
10119 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10121 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10123 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10126 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10127 insns = get_insns ();
10130 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10131 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10132 RTL_CONST_CALL_P (insns) = 1;
10133 emit_libcall_block (insns, base, rax, note);
10135 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10136 emit_insn (gen_tls_local_dynamic_base_64 (base));
10138 emit_insn (gen_tls_local_dynamic_base_32 (base));
10140 if (TARGET_GNU2_TLS)
10142 rtx x = ix86_tls_module_base ();
10144 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10145 gen_rtx_MINUS (Pmode, x, tp));
10148 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10149 off = gen_rtx_CONST (Pmode, off);
10151 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10153 if (TARGET_GNU2_TLS)
10155 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10157 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10162 case TLS_MODEL_INITIAL_EXEC:
10166 type = UNSPEC_GOTNTPOFF;
10170 if (reload_in_progress)
10171 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10172 pic = pic_offset_table_rtx;
10173 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10175 else if (!TARGET_ANY_GNU_TLS)
10177 pic = gen_reg_rtx (Pmode);
10178 emit_insn (gen_set_got (pic));
10179 type = UNSPEC_GOTTPOFF;
10184 type = UNSPEC_INDNTPOFF;
10187 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10188 off = gen_rtx_CONST (Pmode, off);
10190 off = gen_rtx_PLUS (Pmode, pic, off);
10191 off = gen_const_mem (Pmode, off);
10192 set_mem_alias_set (off, ix86_GOT_alias_set ());
10194 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10196 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10197 off = force_reg (Pmode, off);
10198 return gen_rtx_PLUS (Pmode, base, off);
10202 base = get_thread_pointer (true);
10203 dest = gen_reg_rtx (Pmode);
10204 emit_insn (gen_subsi3 (dest, base, off));
10208 case TLS_MODEL_LOCAL_EXEC:
10209 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10210 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10211 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10212 off = gen_rtx_CONST (Pmode, off);
10214 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10216 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10217 return gen_rtx_PLUS (Pmode, base, off);
10221 base = get_thread_pointer (true);
10222 dest = gen_reg_rtx (Pmode);
10223 emit_insn (gen_subsi3 (dest, base, off));
10228 gcc_unreachable ();
10234 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10237 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10238 htab_t dllimport_map;
10241 get_dllimport_decl (tree decl)
10243 struct tree_map *h, in;
10246 const char *prefix;
10247 size_t namelen, prefixlen;
10252 if (!dllimport_map)
10253 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10255 in.hash = htab_hash_pointer (decl);
10256 in.base.from = decl;
10257 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10258 h = (struct tree_map *) *loc;
10262 *loc = h = GGC_NEW (struct tree_map);
10264 h->base.from = decl;
10265 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10266 VAR_DECL, NULL, ptr_type_node);
10267 DECL_ARTIFICIAL (to) = 1;
10268 DECL_IGNORED_P (to) = 1;
10269 DECL_EXTERNAL (to) = 1;
10270 TREE_READONLY (to) = 1;
10272 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10273 name = targetm.strip_name_encoding (name);
10274 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10275 ? "*__imp_" : "*__imp__";
10276 namelen = strlen (name);
10277 prefixlen = strlen (prefix);
10278 imp_name = (char *) alloca (namelen + prefixlen + 1);
10279 memcpy (imp_name, prefix, prefixlen);
10280 memcpy (imp_name + prefixlen, name, namelen + 1);
10282 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10283 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10284 SET_SYMBOL_REF_DECL (rtl, to);
10285 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10287 rtl = gen_const_mem (Pmode, rtl);
10288 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10290 SET_DECL_RTL (to, rtl);
10291 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10296 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10297 true if we require the result be a register. */
10300 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10305 gcc_assert (SYMBOL_REF_DECL (symbol));
10306 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10308 x = DECL_RTL (imp_decl);
10310 x = force_reg (Pmode, x);
10314 /* Try machine-dependent ways of modifying an illegitimate address
10315 to be legitimate. If we find one, return the new, valid address.
10316 This macro is used in only one place: `memory_address' in explow.c.
10318 OLDX is the address as it was before break_out_memory_refs was called.
10319 In some cases it is useful to look at this to decide what needs to be done.
10321 It is always safe for this macro to do nothing. It exists to recognize
10322 opportunities to optimize the output.
10324 For the 80386, we handle X+REG by loading X into a register R and
10325 using R+REG. R will go in a general reg and indexing will be used.
10326 However, if REG is a broken-out memory address or multiplication,
10327 nothing needs to be done because REG can certainly go in a general reg.
10329 When -fpic is used, special handling is needed for symbolic references.
10330 See comments by legitimize_pic_address in i386.c for details. */
10333 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10334 enum machine_mode mode)
10339 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10341 return legitimize_tls_address (x, (enum tls_model) log, false);
10342 if (GET_CODE (x) == CONST
10343 && GET_CODE (XEXP (x, 0)) == PLUS
10344 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10345 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10347 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10348 (enum tls_model) log, false);
10349 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10352 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10354 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10355 return legitimize_dllimport_symbol (x, true);
10356 if (GET_CODE (x) == CONST
10357 && GET_CODE (XEXP (x, 0)) == PLUS
10358 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10359 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10361 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10362 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10366 if (flag_pic && SYMBOLIC_CONST (x))
10367 return legitimize_pic_address (x, 0);
10369 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10370 if (GET_CODE (x) == ASHIFT
10371 && CONST_INT_P (XEXP (x, 1))
10372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10375 log = INTVAL (XEXP (x, 1));
10376 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10377 GEN_INT (1 << log));
10380 if (GET_CODE (x) == PLUS)
10382 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10384 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10385 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10386 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10389 log = INTVAL (XEXP (XEXP (x, 0), 1));
10390 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10391 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10392 GEN_INT (1 << log));
10395 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10396 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10397 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10400 log = INTVAL (XEXP (XEXP (x, 1), 1));
10401 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10402 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10403 GEN_INT (1 << log));
10406 /* Put multiply first if it isn't already. */
10407 if (GET_CODE (XEXP (x, 1)) == MULT)
10409 rtx tmp = XEXP (x, 0);
10410 XEXP (x, 0) = XEXP (x, 1);
10415 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10416 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10417 created by virtual register instantiation, register elimination, and
10418 similar optimizations. */
10419 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10422 x = gen_rtx_PLUS (Pmode,
10423 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10424 XEXP (XEXP (x, 1), 0)),
10425 XEXP (XEXP (x, 1), 1));
10429 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10430 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10431 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10432 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10433 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10434 && CONSTANT_P (XEXP (x, 1)))
10437 rtx other = NULL_RTX;
10439 if (CONST_INT_P (XEXP (x, 1)))
10441 constant = XEXP (x, 1);
10442 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10444 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10446 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10447 other = XEXP (x, 1);
10455 x = gen_rtx_PLUS (Pmode,
10456 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10457 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10458 plus_constant (other, INTVAL (constant)));
10462 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10465 if (GET_CODE (XEXP (x, 0)) == MULT)
10468 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10471 if (GET_CODE (XEXP (x, 1)) == MULT)
10474 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10478 && REG_P (XEXP (x, 1))
10479 && REG_P (XEXP (x, 0)))
10482 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10485 x = legitimize_pic_address (x, 0);
10488 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10491 if (REG_P (XEXP (x, 0)))
10493 rtx temp = gen_reg_rtx (Pmode);
10494 rtx val = force_operand (XEXP (x, 1), temp);
10496 emit_move_insn (temp, val);
10498 XEXP (x, 1) = temp;
10502 else if (REG_P (XEXP (x, 1)))
10504 rtx temp = gen_reg_rtx (Pmode);
10505 rtx val = force_operand (XEXP (x, 0), temp);
10507 emit_move_insn (temp, val);
10509 XEXP (x, 0) = temp;
10517 /* Print an integer constant expression in assembler syntax. Addition
10518 and subtraction are the only arithmetic that may appear in these
10519 expressions. FILE is the stdio stream to write to, X is the rtx, and
10520 CODE is the operand print code from the output string. */
10523 output_pic_addr_const (FILE *file, rtx x, int code)
10527 switch (GET_CODE (x))
10530 gcc_assert (flag_pic);
10535 if (! TARGET_MACHO || TARGET_64BIT)
10536 output_addr_const (file, x);
10539 const char *name = XSTR (x, 0);
10541 /* Mark the decl as referenced so that cgraph will
10542 output the function. */
10543 if (SYMBOL_REF_DECL (x))
10544 mark_decl_referenced (SYMBOL_REF_DECL (x));
10547 if (MACHOPIC_INDIRECT
10548 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10549 name = machopic_indirection_name (x, /*stub_p=*/true);
10551 assemble_name (file, name);
10553 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10554 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10555 fputs ("@PLT", file);
10562 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10563 assemble_name (asm_out_file, buf);
10567 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10571 /* This used to output parentheses around the expression,
10572 but that does not work on the 386 (either ATT or BSD assembler). */
10573 output_pic_addr_const (file, XEXP (x, 0), code);
10577 if (GET_MODE (x) == VOIDmode)
10579 /* We can use %d if the number is <32 bits and positive. */
10580 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10581 fprintf (file, "0x%lx%08lx",
10582 (unsigned long) CONST_DOUBLE_HIGH (x),
10583 (unsigned long) CONST_DOUBLE_LOW (x));
10585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10588 /* We can't handle floating point constants;
10589 PRINT_OPERAND must handle them. */
10590 output_operand_lossage ("floating constant misused");
10594 /* Some assemblers need integer constants to appear first. */
10595 if (CONST_INT_P (XEXP (x, 0)))
10597 output_pic_addr_const (file, XEXP (x, 0), code);
10599 output_pic_addr_const (file, XEXP (x, 1), code);
10603 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10604 output_pic_addr_const (file, XEXP (x, 1), code);
10606 output_pic_addr_const (file, XEXP (x, 0), code);
10612 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10613 output_pic_addr_const (file, XEXP (x, 0), code);
10615 output_pic_addr_const (file, XEXP (x, 1), code);
10617 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10621 gcc_assert (XVECLEN (x, 0) == 1);
10622 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10623 switch (XINT (x, 1))
10626 fputs ("@GOT", file);
10628 case UNSPEC_GOTOFF:
10629 fputs ("@GOTOFF", file);
10631 case UNSPEC_PLTOFF:
10632 fputs ("@PLTOFF", file);
10634 case UNSPEC_GOTPCREL:
10635 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10636 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10638 case UNSPEC_GOTTPOFF:
10639 /* FIXME: This might be @TPOFF in Sun ld too. */
10640 fputs ("@GOTTPOFF", file);
10643 fputs ("@TPOFF", file);
10645 case UNSPEC_NTPOFF:
10647 fputs ("@TPOFF", file);
10649 fputs ("@NTPOFF", file);
10651 case UNSPEC_DTPOFF:
10652 fputs ("@DTPOFF", file);
10654 case UNSPEC_GOTNTPOFF:
10656 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10657 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10659 fputs ("@GOTNTPOFF", file);
10661 case UNSPEC_INDNTPOFF:
10662 fputs ("@INDNTPOFF", file);
10665 case UNSPEC_MACHOPIC_OFFSET:
10667 machopic_output_function_base_name (file);
10671 output_operand_lossage ("invalid UNSPEC as operand");
10677 output_operand_lossage ("invalid expression as operand");
10681 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10682 We need to emit DTP-relative relocations. */
10684 static void ATTRIBUTE_UNUSED
10685 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10687 fputs (ASM_LONG, file);
10688 output_addr_const (file, x);
10689 fputs ("@DTPOFF", file);
10695 fputs (", 0", file);
10698 gcc_unreachable ();
10702 /* Return true if X is a representation of the PIC register. This copes
10703 with calls from ix86_find_base_term, where the register might have
10704 been replaced by a cselib value. */
10707 ix86_pic_register_p (rtx x)
10709 if (GET_CODE (x) == VALUE)
10710 return (pic_offset_table_rtx
10711 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10713 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10716 /* In the name of slightly smaller debug output, and to cater to
10717 general assembler lossage, recognize PIC+GOTOFF and turn it back
10718 into a direct symbol reference.
10720 On Darwin, this is necessary to avoid a crash, because Darwin
10721 has a different PIC label for each routine but the DWARF debugging
10722 information is not associated with any particular routine, so it's
10723 necessary to remove references to the PIC label from RTL stored by
10724 the DWARF output code. */
10727 ix86_delegitimize_address (rtx orig_x)
10730 /* reg_addend is NULL or a multiple of some register. */
10731 rtx reg_addend = NULL_RTX;
10732 /* const_addend is NULL or a const_int. */
10733 rtx const_addend = NULL_RTX;
10734 /* This is the result, or NULL. */
10735 rtx result = NULL_RTX;
10742 if (GET_CODE (x) != CONST
10743 || GET_CODE (XEXP (x, 0)) != UNSPEC
10744 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10745 || !MEM_P (orig_x))
10747 return XVECEXP (XEXP (x, 0), 0, 0);
10750 if (GET_CODE (x) != PLUS
10751 || GET_CODE (XEXP (x, 1)) != CONST)
10754 if (ix86_pic_register_p (XEXP (x, 0)))
10755 /* %ebx + GOT/GOTOFF */
10757 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10759 /* %ebx + %reg * scale + GOT/GOTOFF */
10760 reg_addend = XEXP (x, 0);
10761 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10762 reg_addend = XEXP (reg_addend, 1);
10763 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10764 reg_addend = XEXP (reg_addend, 0);
10767 if (!REG_P (reg_addend)
10768 && GET_CODE (reg_addend) != MULT
10769 && GET_CODE (reg_addend) != ASHIFT)
10775 x = XEXP (XEXP (x, 1), 0);
10776 if (GET_CODE (x) == PLUS
10777 && CONST_INT_P (XEXP (x, 1)))
10779 const_addend = XEXP (x, 1);
10783 if (GET_CODE (x) == UNSPEC
10784 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10785 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10786 result = XVECEXP (x, 0, 0);
10788 if (TARGET_MACHO && darwin_local_data_pic (x)
10789 && !MEM_P (orig_x))
10790 result = XVECEXP (x, 0, 0);
10796 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10798 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10802 /* If X is a machine specific address (i.e. a symbol or label being
10803 referenced as a displacement from the GOT implemented using an
10804 UNSPEC), then return the base term. Otherwise return X. */
10807 ix86_find_base_term (rtx x)
10813 if (GET_CODE (x) != CONST)
10815 term = XEXP (x, 0);
10816 if (GET_CODE (term) == PLUS
10817 && (CONST_INT_P (XEXP (term, 1))
10818 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10819 term = XEXP (term, 0);
10820 if (GET_CODE (term) != UNSPEC
10821 || XINT (term, 1) != UNSPEC_GOTPCREL)
10824 return XVECEXP (term, 0, 0);
10827 return ix86_delegitimize_address (x);
10831 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10832 int fp, FILE *file)
10834 const char *suffix;
10836 if (mode == CCFPmode || mode == CCFPUmode)
10838 code = ix86_fp_compare_code_to_integer (code);
10842 code = reverse_condition (code);
10893 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10897 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10898 Those same assemblers have the same but opposite lossage on cmov. */
10899 if (mode == CCmode)
10900 suffix = fp ? "nbe" : "a";
10901 else if (mode == CCCmode)
10904 gcc_unreachable ();
10920 gcc_unreachable ();
10924 gcc_assert (mode == CCmode || mode == CCCmode);
10941 gcc_unreachable ();
10945 /* ??? As above. */
10946 gcc_assert (mode == CCmode || mode == CCCmode);
10947 suffix = fp ? "nb" : "ae";
10950 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10954 /* ??? As above. */
10955 if (mode == CCmode)
10957 else if (mode == CCCmode)
10958 suffix = fp ? "nb" : "ae";
10960 gcc_unreachable ();
10963 suffix = fp ? "u" : "p";
10966 suffix = fp ? "nu" : "np";
10969 gcc_unreachable ();
10971 fputs (suffix, file);
10974 /* Print the name of register X to FILE based on its machine mode and number.
10975 If CODE is 'w', pretend the mode is HImode.
10976 If CODE is 'b', pretend the mode is QImode.
10977 If CODE is 'k', pretend the mode is SImode.
10978 If CODE is 'q', pretend the mode is DImode.
10979 If CODE is 'x', pretend the mode is V4SFmode.
10980 If CODE is 't', pretend the mode is V8SFmode.
10981 If CODE is 'h', pretend the reg is the 'high' byte register.
10982 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10983 If CODE is 'd', duplicate the operand for AVX instruction.
10987 print_reg (rtx x, int code, FILE *file)
10990 bool duplicated = code == 'd' && TARGET_AVX;
10992 gcc_assert (x == pc_rtx
10993 || (REGNO (x) != ARG_POINTER_REGNUM
10994 && REGNO (x) != FRAME_POINTER_REGNUM
10995 && REGNO (x) != FLAGS_REG
10996 && REGNO (x) != FPSR_REG
10997 && REGNO (x) != FPCR_REG));
10999 if (ASSEMBLER_DIALECT == ASM_ATT)
11004 gcc_assert (TARGET_64BIT);
11005 fputs ("rip", file);
11009 if (code == 'w' || MMX_REG_P (x))
11011 else if (code == 'b')
11013 else if (code == 'k')
11015 else if (code == 'q')
11017 else if (code == 'y')
11019 else if (code == 'h')
11021 else if (code == 'x')
11023 else if (code == 't')
11026 code = GET_MODE_SIZE (GET_MODE (x));
11028 /* Irritatingly, AMD extended registers use different naming convention
11029 from the normal registers. */
11030 if (REX_INT_REG_P (x))
11032 gcc_assert (TARGET_64BIT);
11036 error ("extended registers have no high halves");
11039 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11042 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11045 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11048 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11051 error ("unsupported operand size for extended register");
11061 if (STACK_TOP_P (x))
11070 if (! ANY_FP_REG_P (x))
11071 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11076 reg = hi_reg_name[REGNO (x)];
11079 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11081 reg = qi_reg_name[REGNO (x)];
11084 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11086 reg = qi_high_reg_name[REGNO (x)];
11091 gcc_assert (!duplicated);
11093 fputs (hi_reg_name[REGNO (x)] + 1, file);
11098 gcc_unreachable ();
11104 if (ASSEMBLER_DIALECT == ASM_ATT)
11105 fprintf (file, ", %%%s", reg);
11107 fprintf (file, ", %s", reg);
11111 /* Locate some local-dynamic symbol still in use by this function
11112 so that we can print its name in some tls_local_dynamic_base
11116 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11120 if (GET_CODE (x) == SYMBOL_REF
11121 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11123 cfun->machine->some_ld_name = XSTR (x, 0);
11130 static const char *
11131 get_some_local_dynamic_name (void)
11135 if (cfun->machine->some_ld_name)
11136 return cfun->machine->some_ld_name;
11138 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11140 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11141 return cfun->machine->some_ld_name;
11143 gcc_unreachable ();
11146 /* Meaning of CODE:
11147 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11148 C -- print opcode suffix for set/cmov insn.
11149 c -- like C, but print reversed condition
11150 E,e -- likewise, but for compare-and-branch fused insn.
11151 F,f -- likewise, but for floating-point.
11152 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11154 R -- print the prefix for register names.
11155 z -- print the opcode suffix for the size of the current operand.
11156 Z -- likewise, with special suffixes for x87 instructions.
11157 * -- print a star (in certain assembler syntax)
11158 A -- print an absolute memory reference.
11159 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11160 s -- print a shift double count, followed by the assemblers argument
11162 b -- print the QImode name of the register for the indicated operand.
11163 %b0 would print %al if operands[0] is reg 0.
11164 w -- likewise, print the HImode name of the register.
11165 k -- likewise, print the SImode name of the register.
11166 q -- likewise, print the DImode name of the register.
11167 x -- likewise, print the V4SFmode name of the register.
11168 t -- likewise, print the V8SFmode name of the register.
11169 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11170 y -- print "st(0)" instead of "st" as a register.
11171 d -- print duplicated register operand for AVX instruction.
11172 D -- print condition for SSE cmp instruction.
11173 P -- if PIC, print an @PLT suffix.
11174 X -- don't print any sort of PIC '@' suffix for a symbol.
11175 & -- print some in-use local-dynamic symbol name.
11176 H -- print a memory address offset by 8; used for sse high-parts
11177 Y -- print condition for SSE5 com* instruction.
11178 + -- print a branch hint as 'cs' or 'ds' prefix
11179 ; -- print a semicolon (after prefixes due to bug in older gas).
11183 print_operand (FILE *file, rtx x, int code)
11190 if (ASSEMBLER_DIALECT == ASM_ATT)
11195 assemble_name (file, get_some_local_dynamic_name ());
11199 switch (ASSEMBLER_DIALECT)
11206 /* Intel syntax. For absolute addresses, registers should not
11207 be surrounded by braces. */
11211 PRINT_OPERAND (file, x, 0);
11218 gcc_unreachable ();
11221 PRINT_OPERAND (file, x, 0);
11226 if (ASSEMBLER_DIALECT == ASM_ATT)
11231 if (ASSEMBLER_DIALECT == ASM_ATT)
11236 if (ASSEMBLER_DIALECT == ASM_ATT)
11241 if (ASSEMBLER_DIALECT == ASM_ATT)
11246 if (ASSEMBLER_DIALECT == ASM_ATT)
11251 if (ASSEMBLER_DIALECT == ASM_ATT)
11256 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11258 /* Opcodes don't get size suffixes if using Intel opcodes. */
11259 if (ASSEMBLER_DIALECT == ASM_INTEL)
11262 switch (GET_MODE_SIZE (GET_MODE (x)))
11281 output_operand_lossage
11282 ("invalid operand size for operand code '%c'", code);
11287 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11289 (0, "non-integer operand used with operand code '%c'", code);
11293 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11294 if (ASSEMBLER_DIALECT == ASM_INTEL)
11297 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11299 switch (GET_MODE_SIZE (GET_MODE (x)))
11302 #ifdef HAVE_AS_IX86_FILDS
11312 #ifdef HAVE_AS_IX86_FILDQ
11315 fputs ("ll", file);
11323 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11325 /* 387 opcodes don't get size suffixes
11326 if the operands are registers. */
11327 if (STACK_REG_P (x))
11330 switch (GET_MODE_SIZE (GET_MODE (x)))
11351 output_operand_lossage
11352 ("invalid operand type used with operand code '%c'", code);
11356 output_operand_lossage
11357 ("invalid operand size for operand code '%c'", code);
11374 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11376 PRINT_OPERAND (file, x, 0);
11377 fputs (", ", file);
11382 /* Little bit of braindamage here. The SSE compare instructions
11383 does use completely different names for the comparisons that the
11384 fp conditional moves. */
11387 switch (GET_CODE (x))
11390 fputs ("eq", file);
11393 fputs ("eq_us", file);
11396 fputs ("lt", file);
11399 fputs ("nge", file);
11402 fputs ("le", file);
11405 fputs ("ngt", file);
11408 fputs ("unord", file);
11411 fputs ("neq", file);
11414 fputs ("neq_oq", file);
11417 fputs ("ge", file);
11420 fputs ("nlt", file);
11423 fputs ("gt", file);
11426 fputs ("nle", file);
11429 fputs ("ord", file);
11432 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11438 switch (GET_CODE (x))
11442 fputs ("eq", file);
11446 fputs ("lt", file);
11450 fputs ("le", file);
11453 fputs ("unord", file);
11457 fputs ("neq", file);
11461 fputs ("nlt", file);
11465 fputs ("nle", file);
11468 fputs ("ord", file);
11471 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11477 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11478 if (ASSEMBLER_DIALECT == ASM_ATT)
11480 switch (GET_MODE (x))
11482 case HImode: putc ('w', file); break;
11484 case SFmode: putc ('l', file); break;
11486 case DFmode: putc ('q', file); break;
11487 default: gcc_unreachable ();
11494 if (!COMPARISON_P (x))
11496 output_operand_lossage ("operand is neither a constant nor a "
11497 "condition code, invalid operand code "
11501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11504 if (!COMPARISON_P (x))
11506 output_operand_lossage ("operand is neither a constant nor a "
11507 "condition code, invalid operand code "
11511 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11512 if (ASSEMBLER_DIALECT == ASM_ATT)
11515 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11518 /* Like above, but reverse condition */
11520 /* Check to see if argument to %c is really a constant
11521 and not a condition code which needs to be reversed. */
11522 if (!COMPARISON_P (x))
11524 output_operand_lossage ("operand is neither a constant nor a "
11525 "condition code, invalid operand "
11529 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11532 if (!COMPARISON_P (x))
11534 output_operand_lossage ("operand is neither a constant nor a "
11535 "condition code, invalid operand "
11539 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11540 if (ASSEMBLER_DIALECT == ASM_ATT)
11543 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11547 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11551 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11555 /* It doesn't actually matter what mode we use here, as we're
11556 only going to use this for printing. */
11557 x = adjust_address_nv (x, DImode, 8);
11565 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11568 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11571 int pred_val = INTVAL (XEXP (x, 0));
11573 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11574 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11576 int taken = pred_val > REG_BR_PROB_BASE / 2;
11577 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11579 /* Emit hints only in the case default branch prediction
11580 heuristics would fail. */
11581 if (taken != cputaken)
11583 /* We use 3e (DS) prefix for taken branches and
11584 2e (CS) prefix for not taken branches. */
11586 fputs ("ds ; ", file);
11588 fputs ("cs ; ", file);
11596 switch (GET_CODE (x))
11599 fputs ("neq", file);
11602 fputs ("eq", file);
11606 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11610 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11614 fputs ("le", file);
11618 fputs ("lt", file);
11621 fputs ("unord", file);
11624 fputs ("ord", file);
11627 fputs ("ueq", file);
11630 fputs ("nlt", file);
11633 fputs ("nle", file);
11636 fputs ("ule", file);
11639 fputs ("ult", file);
11642 fputs ("une", file);
11645 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11652 fputs (" ; ", file);
11659 output_operand_lossage ("invalid operand code '%c'", code);
11664 print_reg (x, code, file);
11666 else if (MEM_P (x))
11668 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11669 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11670 && GET_MODE (x) != BLKmode)
11673 switch (GET_MODE_SIZE (GET_MODE (x)))
11675 case 1: size = "BYTE"; break;
11676 case 2: size = "WORD"; break;
11677 case 4: size = "DWORD"; break;
11678 case 8: size = "QWORD"; break;
11679 case 12: size = "XWORD"; break;
11681 if (GET_MODE (x) == XFmode)
11687 gcc_unreachable ();
11690 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11693 else if (code == 'w')
11695 else if (code == 'k')
11698 fputs (size, file);
11699 fputs (" PTR ", file);
11703 /* Avoid (%rip) for call operands. */
11704 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11705 && !CONST_INT_P (x))
11706 output_addr_const (file, x);
11707 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11708 output_operand_lossage ("invalid constraints for operand");
11710 output_address (x);
11713 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11718 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11719 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11721 if (ASSEMBLER_DIALECT == ASM_ATT)
11723 fprintf (file, "0x%08lx", (long unsigned int) l);
11726 /* These float cases don't actually occur as immediate operands. */
11727 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11731 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11732 fprintf (file, "%s", dstr);
11735 else if (GET_CODE (x) == CONST_DOUBLE
11736 && GET_MODE (x) == XFmode)
11740 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11741 fprintf (file, "%s", dstr);
11746 /* We have patterns that allow zero sets of memory, for instance.
11747 In 64-bit mode, we should probably support all 8-byte vectors,
11748 since we can in fact encode that into an immediate. */
11749 if (GET_CODE (x) == CONST_VECTOR)
11751 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11757 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11759 if (ASSEMBLER_DIALECT == ASM_ATT)
11762 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11763 || GET_CODE (x) == LABEL_REF)
11765 if (ASSEMBLER_DIALECT == ASM_ATT)
11768 fputs ("OFFSET FLAT:", file);
11771 if (CONST_INT_P (x))
11772 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11774 output_pic_addr_const (file, x, code);
11776 output_addr_const (file, x);
11780 /* Print a memory operand whose address is ADDR. */
11783 print_operand_address (FILE *file, rtx addr)
11785 struct ix86_address parts;
11786 rtx base, index, disp;
11788 int ok = ix86_decompose_address (addr, &parts);
11793 index = parts.index;
11795 scale = parts.scale;
11803 if (ASSEMBLER_DIALECT == ASM_ATT)
11805 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11808 gcc_unreachable ();
11811 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11812 if (TARGET_64BIT && !base && !index)
11816 if (GET_CODE (disp) == CONST
11817 && GET_CODE (XEXP (disp, 0)) == PLUS
11818 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11819 symbol = XEXP (XEXP (disp, 0), 0);
11821 if (GET_CODE (symbol) == LABEL_REF
11822 || (GET_CODE (symbol) == SYMBOL_REF
11823 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11826 if (!base && !index)
11828 /* Displacement only requires special attention. */
11830 if (CONST_INT_P (disp))
11832 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11833 fputs ("ds:", file);
11834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11837 output_pic_addr_const (file, disp, 0);
11839 output_addr_const (file, disp);
11843 if (ASSEMBLER_DIALECT == ASM_ATT)
11848 output_pic_addr_const (file, disp, 0);
11849 else if (GET_CODE (disp) == LABEL_REF)
11850 output_asm_label (disp);
11852 output_addr_const (file, disp);
11857 print_reg (base, 0, file);
11861 print_reg (index, 0, file);
11863 fprintf (file, ",%d", scale);
11869 rtx offset = NULL_RTX;
11873 /* Pull out the offset of a symbol; print any symbol itself. */
11874 if (GET_CODE (disp) == CONST
11875 && GET_CODE (XEXP (disp, 0)) == PLUS
11876 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11878 offset = XEXP (XEXP (disp, 0), 1);
11879 disp = gen_rtx_CONST (VOIDmode,
11880 XEXP (XEXP (disp, 0), 0));
11884 output_pic_addr_const (file, disp, 0);
11885 else if (GET_CODE (disp) == LABEL_REF)
11886 output_asm_label (disp);
11887 else if (CONST_INT_P (disp))
11890 output_addr_const (file, disp);
11896 print_reg (base, 0, file);
11899 if (INTVAL (offset) >= 0)
11901 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11905 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11912 print_reg (index, 0, file);
11914 fprintf (file, "*%d", scale);
11922 output_addr_const_extra (FILE *file, rtx x)
11926 if (GET_CODE (x) != UNSPEC)
11929 op = XVECEXP (x, 0, 0);
11930 switch (XINT (x, 1))
11932 case UNSPEC_GOTTPOFF:
11933 output_addr_const (file, op);
11934 /* FIXME: This might be @TPOFF in Sun ld. */
11935 fputs ("@GOTTPOFF", file);
11938 output_addr_const (file, op);
11939 fputs ("@TPOFF", file);
11941 case UNSPEC_NTPOFF:
11942 output_addr_const (file, op);
11944 fputs ("@TPOFF", file);
11946 fputs ("@NTPOFF", file);
11948 case UNSPEC_DTPOFF:
11949 output_addr_const (file, op);
11950 fputs ("@DTPOFF", file);
11952 case UNSPEC_GOTNTPOFF:
11953 output_addr_const (file, op);
11955 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11956 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11958 fputs ("@GOTNTPOFF", file);
11960 case UNSPEC_INDNTPOFF:
11961 output_addr_const (file, op);
11962 fputs ("@INDNTPOFF", file);
11965 case UNSPEC_MACHOPIC_OFFSET:
11966 output_addr_const (file, op);
11968 machopic_output_function_base_name (file);
11979 /* Split one or more DImode RTL references into pairs of SImode
11980 references. The RTL can be REG, offsettable MEM, integer constant, or
11981 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11982 split and "num" is its length. lo_half and hi_half are output arrays
11983 that parallel "operands". */
11986 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11990 rtx op = operands[num];
11992 /* simplify_subreg refuse to split volatile memory addresses,
11993 but we still have to handle it. */
11996 lo_half[num] = adjust_address (op, SImode, 0);
11997 hi_half[num] = adjust_address (op, SImode, 4);
12001 lo_half[num] = simplify_gen_subreg (SImode, op,
12002 GET_MODE (op) == VOIDmode
12003 ? DImode : GET_MODE (op), 0);
12004 hi_half[num] = simplify_gen_subreg (SImode, op,
12005 GET_MODE (op) == VOIDmode
12006 ? DImode : GET_MODE (op), 4);
12010 /* Split one or more TImode RTL references into pairs of DImode
12011 references. The RTL can be REG, offsettable MEM, integer constant, or
12012 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12013 split and "num" is its length. lo_half and hi_half are output arrays
12014 that parallel "operands". */
12017 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12021 rtx op = operands[num];
12023 /* simplify_subreg refuse to split volatile memory addresses, but we
12024 still have to handle it. */
12027 lo_half[num] = adjust_address (op, DImode, 0);
12028 hi_half[num] = adjust_address (op, DImode, 8);
12032 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12033 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12038 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12039 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12040 is the expression of the binary operation. The output may either be
12041 emitted here, or returned to the caller, like all output_* functions.
12043 There is no guarantee that the operands are the same mode, as they
12044 might be within FLOAT or FLOAT_EXTEND expressions. */
12046 #ifndef SYSV386_COMPAT
12047 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12048 wants to fix the assemblers because that causes incompatibility
12049 with gcc. No-one wants to fix gcc because that causes
12050 incompatibility with assemblers... You can use the option of
12051 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12052 #define SYSV386_COMPAT 1
12056 output_387_binary_op (rtx insn, rtx *operands)
12058 static char buf[40];
12061 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12063 #ifdef ENABLE_CHECKING
12064 /* Even if we do not want to check the inputs, this documents input
12065 constraints. Which helps in understanding the following code. */
12066 if (STACK_REG_P (operands[0])
12067 && ((REG_P (operands[1])
12068 && REGNO (operands[0]) == REGNO (operands[1])
12069 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12070 || (REG_P (operands[2])
12071 && REGNO (operands[0]) == REGNO (operands[2])
12072 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12073 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12076 gcc_assert (is_sse);
12079 switch (GET_CODE (operands[3]))
12082 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12083 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12091 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12092 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12100 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12101 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12109 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12110 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12118 gcc_unreachable ();
12125 strcpy (buf, ssep);
12126 if (GET_MODE (operands[0]) == SFmode)
12127 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12129 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12133 strcpy (buf, ssep + 1);
12134 if (GET_MODE (operands[0]) == SFmode)
12135 strcat (buf, "ss\t{%2, %0|%0, %2}");
12137 strcat (buf, "sd\t{%2, %0|%0, %2}");
12143 switch (GET_CODE (operands[3]))
12147 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12149 rtx temp = operands[2];
12150 operands[2] = operands[1];
12151 operands[1] = temp;
12154 /* know operands[0] == operands[1]. */
12156 if (MEM_P (operands[2]))
12162 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12164 if (STACK_TOP_P (operands[0]))
12165 /* How is it that we are storing to a dead operand[2]?
12166 Well, presumably operands[1] is dead too. We can't
12167 store the result to st(0) as st(0) gets popped on this
12168 instruction. Instead store to operands[2] (which I
12169 think has to be st(1)). st(1) will be popped later.
12170 gcc <= 2.8.1 didn't have this check and generated
12171 assembly code that the Unixware assembler rejected. */
12172 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12174 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12178 if (STACK_TOP_P (operands[0]))
12179 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12181 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12186 if (MEM_P (operands[1]))
12192 if (MEM_P (operands[2]))
12198 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12201 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12202 derived assemblers, confusingly reverse the direction of
12203 the operation for fsub{r} and fdiv{r} when the
12204 destination register is not st(0). The Intel assembler
12205 doesn't have this brain damage. Read !SYSV386_COMPAT to
12206 figure out what the hardware really does. */
12207 if (STACK_TOP_P (operands[0]))
12208 p = "{p\t%0, %2|rp\t%2, %0}";
12210 p = "{rp\t%2, %0|p\t%0, %2}";
12212 if (STACK_TOP_P (operands[0]))
12213 /* As above for fmul/fadd, we can't store to st(0). */
12214 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12216 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12221 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12224 if (STACK_TOP_P (operands[0]))
12225 p = "{rp\t%0, %1|p\t%1, %0}";
12227 p = "{p\t%1, %0|rp\t%0, %1}";
12229 if (STACK_TOP_P (operands[0]))
12230 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12232 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12237 if (STACK_TOP_P (operands[0]))
12239 if (STACK_TOP_P (operands[1]))
12240 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12242 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12245 else if (STACK_TOP_P (operands[1]))
12248 p = "{\t%1, %0|r\t%0, %1}";
12250 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12256 p = "{r\t%2, %0|\t%0, %2}";
12258 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12264 gcc_unreachable ();
12271 /* Return needed mode for entity in optimize_mode_switching pass. */
12274 ix86_mode_needed (int entity, rtx insn)
12276 enum attr_i387_cw mode;
12278 /* The mode UNINITIALIZED is used to store control word after a
12279 function call or ASM pattern. The mode ANY specify that function
12280 has no requirements on the control word and make no changes in the
12281 bits we are interested in. */
12284 || (NONJUMP_INSN_P (insn)
12285 && (asm_noperands (PATTERN (insn)) >= 0
12286 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12287 return I387_CW_UNINITIALIZED;
12289 if (recog_memoized (insn) < 0)
12290 return I387_CW_ANY;
12292 mode = get_attr_i387_cw (insn);
12297 if (mode == I387_CW_TRUNC)
12302 if (mode == I387_CW_FLOOR)
12307 if (mode == I387_CW_CEIL)
12312 if (mode == I387_CW_MASK_PM)
12317 gcc_unreachable ();
12320 return I387_CW_ANY;
12323 /* Output code to initialize control word copies used by trunc?f?i and
12324 rounding patterns. CURRENT_MODE is set to current control word,
12325 while NEW_MODE is set to new control word. */
12328 emit_i387_cw_initialization (int mode)
12330 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12333 enum ix86_stack_slot slot;
12335 rtx reg = gen_reg_rtx (HImode);
12337 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12338 emit_move_insn (reg, copy_rtx (stored_mode));
12340 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12341 || optimize_function_for_size_p (cfun))
12345 case I387_CW_TRUNC:
12346 /* round toward zero (truncate) */
12347 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12348 slot = SLOT_CW_TRUNC;
12351 case I387_CW_FLOOR:
12352 /* round down toward -oo */
12353 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12354 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12355 slot = SLOT_CW_FLOOR;
12359 /* round up toward +oo */
12360 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12362 slot = SLOT_CW_CEIL;
12365 case I387_CW_MASK_PM:
12366 /* mask precision exception for nearbyint() */
12367 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12368 slot = SLOT_CW_MASK_PM;
12372 gcc_unreachable ();
12379 case I387_CW_TRUNC:
12380 /* round toward zero (truncate) */
12381 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12382 slot = SLOT_CW_TRUNC;
12385 case I387_CW_FLOOR:
12386 /* round down toward -oo */
12387 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12388 slot = SLOT_CW_FLOOR;
12392 /* round up toward +oo */
12393 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12394 slot = SLOT_CW_CEIL;
12397 case I387_CW_MASK_PM:
12398 /* mask precision exception for nearbyint() */
12399 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12400 slot = SLOT_CW_MASK_PM;
12404 gcc_unreachable ();
12408 gcc_assert (slot < MAX_386_STACK_LOCALS);
12410 new_mode = assign_386_stack_local (HImode, slot);
12411 emit_move_insn (new_mode, reg);
12414 /* Output code for INSN to convert a float to a signed int. OPERANDS
12415 are the insn operands. The output may be [HSD]Imode and the input
12416 operand may be [SDX]Fmode. */
12419 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12421 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12422 int dimode_p = GET_MODE (operands[0]) == DImode;
12423 int round_mode = get_attr_i387_cw (insn);
12425 /* Jump through a hoop or two for DImode, since the hardware has no
12426 non-popping instruction. We used to do this a different way, but
12427 that was somewhat fragile and broke with post-reload splitters. */
12428 if ((dimode_p || fisttp) && !stack_top_dies)
12429 output_asm_insn ("fld\t%y1", operands);
12431 gcc_assert (STACK_TOP_P (operands[1]));
12432 gcc_assert (MEM_P (operands[0]));
12433 gcc_assert (GET_MODE (operands[1]) != TFmode);
12436 output_asm_insn ("fisttp%Z0\t%0", operands);
12439 if (round_mode != I387_CW_ANY)
12440 output_asm_insn ("fldcw\t%3", operands);
12441 if (stack_top_dies || dimode_p)
12442 output_asm_insn ("fistp%Z0\t%0", operands);
12444 output_asm_insn ("fist%Z0\t%0", operands);
12445 if (round_mode != I387_CW_ANY)
12446 output_asm_insn ("fldcw\t%2", operands);
12452 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12453 have the values zero or one, indicates the ffreep insn's operand
12454 from the OPERANDS array. */
12456 static const char *
12457 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12459 if (TARGET_USE_FFREEP)
12460 #if HAVE_AS_IX86_FFREEP
12461 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12464 static char retval[] = ".word\t0xc_df";
12465 int regno = REGNO (operands[opno]);
12467 gcc_assert (FP_REGNO_P (regno));
12469 retval[9] = '0' + (regno - FIRST_STACK_REG);
12474 return opno ? "fstp\t%y1" : "fstp\t%y0";
12478 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12479 should be used. UNORDERED_P is true when fucom should be used. */
12482 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12484 int stack_top_dies;
12485 rtx cmp_op0, cmp_op1;
12486 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12490 cmp_op0 = operands[0];
12491 cmp_op1 = operands[1];
12495 cmp_op0 = operands[1];
12496 cmp_op1 = operands[2];
12501 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12502 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12503 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12504 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12506 if (GET_MODE (operands[0]) == SFmode)
12508 return &ucomiss[TARGET_AVX ? 0 : 1];
12510 return &comiss[TARGET_AVX ? 0 : 1];
12513 return &ucomisd[TARGET_AVX ? 0 : 1];
12515 return &comisd[TARGET_AVX ? 0 : 1];
12518 gcc_assert (STACK_TOP_P (cmp_op0));
12520 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12522 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12524 if (stack_top_dies)
12526 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12527 return output_387_ffreep (operands, 1);
12530 return "ftst\n\tfnstsw\t%0";
12533 if (STACK_REG_P (cmp_op1)
12535 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12536 && REGNO (cmp_op1) != FIRST_STACK_REG)
12538 /* If both the top of the 387 stack dies, and the other operand
12539 is also a stack register that dies, then this must be a
12540 `fcompp' float compare */
12544 /* There is no double popping fcomi variant. Fortunately,
12545 eflags is immune from the fstp's cc clobbering. */
12547 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12549 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12550 return output_387_ffreep (operands, 0);
12555 return "fucompp\n\tfnstsw\t%0";
12557 return "fcompp\n\tfnstsw\t%0";
12562 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12564 static const char * const alt[16] =
12566 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12567 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12568 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12569 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12571 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12572 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12576 "fcomi\t{%y1, %0|%0, %y1}",
12577 "fcomip\t{%y1, %0|%0, %y1}",
12578 "fucomi\t{%y1, %0|%0, %y1}",
12579 "fucomip\t{%y1, %0|%0, %y1}",
12590 mask = eflags_p << 3;
12591 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12592 mask |= unordered_p << 1;
12593 mask |= stack_top_dies;
12595 gcc_assert (mask < 16);
12604 ix86_output_addr_vec_elt (FILE *file, int value)
12606 const char *directive = ASM_LONG;
12610 directive = ASM_QUAD;
12612 gcc_assert (!TARGET_64BIT);
12615 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12619 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12621 const char *directive = ASM_LONG;
12624 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12625 directive = ASM_QUAD;
12627 gcc_assert (!TARGET_64BIT);
12629 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12630 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12631 fprintf (file, "%s%s%d-%s%d\n",
12632 directive, LPREFIX, value, LPREFIX, rel);
12633 else if (HAVE_AS_GOTOFF_IN_DATA)
12634 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12636 else if (TARGET_MACHO)
12638 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12639 machopic_output_function_base_name (file);
12640 fprintf(file, "\n");
12644 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12645 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12648 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12652 ix86_expand_clear (rtx dest)
12656 /* We play register width games, which are only valid after reload. */
12657 gcc_assert (reload_completed);
12659 /* Avoid HImode and its attendant prefix byte. */
12660 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12661 dest = gen_rtx_REG (SImode, REGNO (dest));
12662 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12664 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12665 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12667 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12668 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12674 /* X is an unchanging MEM. If it is a constant pool reference, return
12675 the constant pool rtx, else NULL. */
12678 maybe_get_pool_constant (rtx x)
12680 x = ix86_delegitimize_address (XEXP (x, 0));
12682 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12683 return get_pool_constant (x);
12689 ix86_expand_move (enum machine_mode mode, rtx operands[])
12692 enum tls_model model;
12697 if (GET_CODE (op1) == SYMBOL_REF)
12699 model = SYMBOL_REF_TLS_MODEL (op1);
12702 op1 = legitimize_tls_address (op1, model, true);
12703 op1 = force_operand (op1, op0);
12707 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12708 && SYMBOL_REF_DLLIMPORT_P (op1))
12709 op1 = legitimize_dllimport_symbol (op1, false);
12711 else if (GET_CODE (op1) == CONST
12712 && GET_CODE (XEXP (op1, 0)) == PLUS
12713 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12715 rtx addend = XEXP (XEXP (op1, 0), 1);
12716 rtx symbol = XEXP (XEXP (op1, 0), 0);
12719 model = SYMBOL_REF_TLS_MODEL (symbol);
12721 tmp = legitimize_tls_address (symbol, model, true);
12722 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12723 && SYMBOL_REF_DLLIMPORT_P (symbol))
12724 tmp = legitimize_dllimport_symbol (symbol, true);
12728 tmp = force_operand (tmp, NULL);
12729 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12730 op0, 1, OPTAB_DIRECT);
12736 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12738 if (TARGET_MACHO && !TARGET_64BIT)
12743 rtx temp = ((reload_in_progress
12744 || ((op0 && REG_P (op0))
12746 ? op0 : gen_reg_rtx (Pmode));
12747 op1 = machopic_indirect_data_reference (op1, temp);
12748 op1 = machopic_legitimize_pic_address (op1, mode,
12749 temp == op1 ? 0 : temp);
12751 else if (MACHOPIC_INDIRECT)
12752 op1 = machopic_indirect_data_reference (op1, 0);
12760 op1 = force_reg (Pmode, op1);
12761 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12763 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12764 op1 = legitimize_pic_address (op1, reg);
12773 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12774 || !push_operand (op0, mode))
12776 op1 = force_reg (mode, op1);
12778 if (push_operand (op0, mode)
12779 && ! general_no_elim_operand (op1, mode))
12780 op1 = copy_to_mode_reg (mode, op1);
12782 /* Force large constants in 64bit compilation into register
12783 to get them CSEed. */
12784 if (can_create_pseudo_p ()
12785 && (mode == DImode) && TARGET_64BIT
12786 && immediate_operand (op1, mode)
12787 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12788 && !register_operand (op0, mode)
12790 op1 = copy_to_mode_reg (mode, op1);
12792 if (can_create_pseudo_p ()
12793 && FLOAT_MODE_P (mode)
12794 && GET_CODE (op1) == CONST_DOUBLE)
12796 /* If we are loading a floating point constant to a register,
12797 force the value to memory now, since we'll get better code
12798 out the back end. */
12800 op1 = validize_mem (force_const_mem (mode, op1));
12801 if (!register_operand (op0, mode))
12803 rtx temp = gen_reg_rtx (mode);
12804 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12805 emit_move_insn (op0, temp);
12811 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12815 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12817 rtx op0 = operands[0], op1 = operands[1];
12818 unsigned int align = GET_MODE_ALIGNMENT (mode);
12820 /* Force constants other than zero into memory. We do not know how
12821 the instructions used to build constants modify the upper 64 bits
12822 of the register, once we have that information we may be able
12823 to handle some of them more efficiently. */
12824 if (can_create_pseudo_p ()
12825 && register_operand (op0, mode)
12826 && (CONSTANT_P (op1)
12827 || (GET_CODE (op1) == SUBREG
12828 && CONSTANT_P (SUBREG_REG (op1))))
12829 && standard_sse_constant_p (op1) <= 0)
12830 op1 = validize_mem (force_const_mem (mode, op1));
12832 /* We need to check memory alignment for SSE mode since attribute
12833 can make operands unaligned. */
12834 if (can_create_pseudo_p ()
12835 && SSE_REG_MODE_P (mode)
12836 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12837 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12841 /* ix86_expand_vector_move_misalign() does not like constants ... */
12842 if (CONSTANT_P (op1)
12843 || (GET_CODE (op1) == SUBREG
12844 && CONSTANT_P (SUBREG_REG (op1))))
12845 op1 = validize_mem (force_const_mem (mode, op1));
12847 /* ... nor both arguments in memory. */
12848 if (!register_operand (op0, mode)
12849 && !register_operand (op1, mode))
12850 op1 = force_reg (mode, op1);
12852 tmp[0] = op0; tmp[1] = op1;
12853 ix86_expand_vector_move_misalign (mode, tmp);
12857 /* Make operand1 a register if it isn't already. */
12858 if (can_create_pseudo_p ()
12859 && !register_operand (op0, mode)
12860 && !register_operand (op1, mode))
12862 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12866 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12869 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12870 straight to ix86_expand_vector_move. */
12871 /* Code generation for scalar reg-reg moves of single and double precision data:
12872 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12876 if (x86_sse_partial_reg_dependency == true)
12881 Code generation for scalar loads of double precision data:
12882 if (x86_sse_split_regs == true)
12883 movlpd mem, reg (gas syntax)
12887 Code generation for unaligned packed loads of single precision data
12888 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12889 if (x86_sse_unaligned_move_optimal)
12892 if (x86_sse_partial_reg_dependency == true)
12904 Code generation for unaligned packed loads of double precision data
12905 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12906 if (x86_sse_unaligned_move_optimal)
12909 if (x86_sse_split_regs == true)
12922 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12931 switch (GET_MODE_CLASS (mode))
12933 case MODE_VECTOR_INT:
12935 switch (GET_MODE_SIZE (mode))
12938 op0 = gen_lowpart (V16QImode, op0);
12939 op1 = gen_lowpart (V16QImode, op1);
12940 emit_insn (gen_avx_movdqu (op0, op1));
12943 op0 = gen_lowpart (V32QImode, op0);
12944 op1 = gen_lowpart (V32QImode, op1);
12945 emit_insn (gen_avx_movdqu256 (op0, op1));
12948 gcc_unreachable ();
12951 case MODE_VECTOR_FLOAT:
12952 op0 = gen_lowpart (mode, op0);
12953 op1 = gen_lowpart (mode, op1);
12958 emit_insn (gen_avx_movups (op0, op1));
12961 emit_insn (gen_avx_movups256 (op0, op1));
12964 emit_insn (gen_avx_movupd (op0, op1));
12967 emit_insn (gen_avx_movupd256 (op0, op1));
12970 gcc_unreachable ();
12975 gcc_unreachable ();
12983 /* If we're optimizing for size, movups is the smallest. */
12984 if (optimize_insn_for_size_p ())
12986 op0 = gen_lowpart (V4SFmode, op0);
12987 op1 = gen_lowpart (V4SFmode, op1);
12988 emit_insn (gen_sse_movups (op0, op1));
12992 /* ??? If we have typed data, then it would appear that using
12993 movdqu is the only way to get unaligned data loaded with
12995 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12997 op0 = gen_lowpart (V16QImode, op0);
12998 op1 = gen_lowpart (V16QImode, op1);
12999 emit_insn (gen_sse2_movdqu (op0, op1));
13003 if (TARGET_SSE2 && mode == V2DFmode)
13007 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13009 op0 = gen_lowpart (V2DFmode, op0);
13010 op1 = gen_lowpart (V2DFmode, op1);
13011 emit_insn (gen_sse2_movupd (op0, op1));
13015 /* When SSE registers are split into halves, we can avoid
13016 writing to the top half twice. */
13017 if (TARGET_SSE_SPLIT_REGS)
13019 emit_clobber (op0);
13024 /* ??? Not sure about the best option for the Intel chips.
13025 The following would seem to satisfy; the register is
13026 entirely cleared, breaking the dependency chain. We
13027 then store to the upper half, with a dependency depth
13028 of one. A rumor has it that Intel recommends two movsd
13029 followed by an unpacklpd, but this is unconfirmed. And
13030 given that the dependency depth of the unpacklpd would
13031 still be one, I'm not sure why this would be better. */
13032 zero = CONST0_RTX (V2DFmode);
13035 m = adjust_address (op1, DFmode, 0);
13036 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13037 m = adjust_address (op1, DFmode, 8);
13038 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13042 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13044 op0 = gen_lowpart (V4SFmode, op0);
13045 op1 = gen_lowpart (V4SFmode, op1);
13046 emit_insn (gen_sse_movups (op0, op1));
13050 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13051 emit_move_insn (op0, CONST0_RTX (mode));
13053 emit_clobber (op0);
13055 if (mode != V4SFmode)
13056 op0 = gen_lowpart (V4SFmode, op0);
13057 m = adjust_address (op1, V2SFmode, 0);
13058 emit_insn (gen_sse_loadlps (op0, op0, m));
13059 m = adjust_address (op1, V2SFmode, 8);
13060 emit_insn (gen_sse_loadhps (op0, op0, m));
13063 else if (MEM_P (op0))
13065 /* If we're optimizing for size, movups is the smallest. */
13066 if (optimize_insn_for_size_p ())
13068 op0 = gen_lowpart (V4SFmode, op0);
13069 op1 = gen_lowpart (V4SFmode, op1);
13070 emit_insn (gen_sse_movups (op0, op1));
13074 /* ??? Similar to above, only less clear because of quote
13075 typeless stores unquote. */
13076 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13077 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13079 op0 = gen_lowpart (V16QImode, op0);
13080 op1 = gen_lowpart (V16QImode, op1);
13081 emit_insn (gen_sse2_movdqu (op0, op1));
13085 if (TARGET_SSE2 && mode == V2DFmode)
13087 m = adjust_address (op0, DFmode, 0);
13088 emit_insn (gen_sse2_storelpd (m, op1));
13089 m = adjust_address (op0, DFmode, 8);
13090 emit_insn (gen_sse2_storehpd (m, op1));
13094 if (mode != V4SFmode)
13095 op1 = gen_lowpart (V4SFmode, op1);
13096 m = adjust_address (op0, V2SFmode, 0);
13097 emit_insn (gen_sse_storelps (m, op1));
13098 m = adjust_address (op0, V2SFmode, 8);
13099 emit_insn (gen_sse_storehps (m, op1));
13103 gcc_unreachable ();
13106 /* Expand a push in MODE. This is some mode for which we do not support
13107 proper push instructions, at least from the registers that we expect
13108 the value to live in. */
13111 ix86_expand_push (enum machine_mode mode, rtx x)
13115 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13116 GEN_INT (-GET_MODE_SIZE (mode)),
13117 stack_pointer_rtx, 1, OPTAB_DIRECT);
13118 if (tmp != stack_pointer_rtx)
13119 emit_move_insn (stack_pointer_rtx, tmp);
13121 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13123 /* When we push an operand onto stack, it has to be aligned at least
13124 at the function argument boundary. However since we don't have
13125 the argument type, we can't determine the actual argument
13127 emit_move_insn (tmp, x);
13130 /* Helper function of ix86_fixup_binary_operands to canonicalize
13131 operand order. Returns true if the operands should be swapped. */
13134 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13137 rtx dst = operands[0];
13138 rtx src1 = operands[1];
13139 rtx src2 = operands[2];
13141 /* If the operation is not commutative, we can't do anything. */
13142 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13145 /* Highest priority is that src1 should match dst. */
13146 if (rtx_equal_p (dst, src1))
13148 if (rtx_equal_p (dst, src2))
13151 /* Next highest priority is that immediate constants come second. */
13152 if (immediate_operand (src2, mode))
13154 if (immediate_operand (src1, mode))
13157 /* Lowest priority is that memory references should come second. */
13167 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13168 destination to use for the operation. If different from the true
13169 destination in operands[0], a copy operation will be required. */
13172 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13175 rtx dst = operands[0];
13176 rtx src1 = operands[1];
13177 rtx src2 = operands[2];
13179 /* Canonicalize operand order. */
13180 if (ix86_swap_binary_operands_p (code, mode, operands))
13184 /* It is invalid to swap operands of different modes. */
13185 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13192 /* Both source operands cannot be in memory. */
13193 if (MEM_P (src1) && MEM_P (src2))
13195 /* Optimization: Only read from memory once. */
13196 if (rtx_equal_p (src1, src2))
13198 src2 = force_reg (mode, src2);
13202 src2 = force_reg (mode, src2);
13205 /* If the destination is memory, and we do not have matching source
13206 operands, do things in registers. */
13207 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13208 dst = gen_reg_rtx (mode);
13210 /* Source 1 cannot be a constant. */
13211 if (CONSTANT_P (src1))
13212 src1 = force_reg (mode, src1);
13214 /* Source 1 cannot be a non-matching memory. */
13215 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13216 src1 = force_reg (mode, src1);
13218 operands[1] = src1;
13219 operands[2] = src2;
13223 /* Similarly, but assume that the destination has already been
13224 set up properly. */
13227 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13228 enum machine_mode mode, rtx operands[])
13230 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13231 gcc_assert (dst == operands[0]);
13234 /* Attempt to expand a binary operator. Make the expansion closer to the
13235 actual machine, then just general_operand, which will allow 3 separate
13236 memory references (one output, two input) in a single insn. */
13239 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13242 rtx src1, src2, dst, op, clob;
13244 dst = ix86_fixup_binary_operands (code, mode, operands);
13245 src1 = operands[1];
13246 src2 = operands[2];
13248 /* Emit the instruction. */
13250 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13251 if (reload_in_progress)
13253 /* Reload doesn't know about the flags register, and doesn't know that
13254 it doesn't want to clobber it. We can only do this with PLUS. */
13255 gcc_assert (code == PLUS);
13260 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13261 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13264 /* Fix up the destination if needed. */
13265 if (dst != operands[0])
13266 emit_move_insn (operands[0], dst);
13269 /* Return TRUE or FALSE depending on whether the binary operator meets the
13270 appropriate constraints. */
13273 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13276 rtx dst = operands[0];
13277 rtx src1 = operands[1];
13278 rtx src2 = operands[2];
13280 /* Both source operands cannot be in memory. */
13281 if (MEM_P (src1) && MEM_P (src2))
13284 /* Canonicalize operand order for commutative operators. */
13285 if (ix86_swap_binary_operands_p (code, mode, operands))
13292 /* If the destination is memory, we must have a matching source operand. */
13293 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13296 /* Source 1 cannot be a constant. */
13297 if (CONSTANT_P (src1))
13300 /* Source 1 cannot be a non-matching memory. */
13301 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13307 /* Attempt to expand a unary operator. Make the expansion closer to the
13308 actual machine, then just general_operand, which will allow 2 separate
13309 memory references (one output, one input) in a single insn. */
13312 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13315 int matching_memory;
13316 rtx src, dst, op, clob;
13321 /* If the destination is memory, and we do not have matching source
13322 operands, do things in registers. */
13323 matching_memory = 0;
13326 if (rtx_equal_p (dst, src))
13327 matching_memory = 1;
13329 dst = gen_reg_rtx (mode);
13332 /* When source operand is memory, destination must match. */
13333 if (MEM_P (src) && !matching_memory)
13334 src = force_reg (mode, src);
13336 /* Emit the instruction. */
13338 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13339 if (reload_in_progress || code == NOT)
13341 /* Reload doesn't know about the flags register, and doesn't know that
13342 it doesn't want to clobber it. */
13343 gcc_assert (code == NOT);
13348 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13349 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13352 /* Fix up the destination if needed. */
13353 if (dst != operands[0])
13354 emit_move_insn (operands[0], dst);
13357 #define LEA_SEARCH_THRESHOLD 12
13359 /* Search backward for non-agu definition of register number REGNO1
13360 or register number REGNO2 in INSN's basic block until
13361 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13362 2. Reach BB boundary, or
13363 3. Reach agu definition.
13364 Returns the distance between the non-agu definition point and INSN.
13365 If no definition point, returns -1. */
13368 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13371 basic_block bb = BLOCK_FOR_INSN (insn);
13374 enum attr_type insn_type;
13376 if (insn != BB_HEAD (bb))
13378 rtx prev = PREV_INSN (insn);
13379 while (prev && distance < LEA_SEARCH_THRESHOLD)
13384 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13385 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13386 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13387 && (regno1 == DF_REF_REGNO (*def_rec)
13388 || regno2 == DF_REF_REGNO (*def_rec)))
13390 insn_type = get_attr_type (prev);
13391 if (insn_type != TYPE_LEA)
13395 if (prev == BB_HEAD (bb))
13397 prev = PREV_INSN (prev);
13401 if (distance < LEA_SEARCH_THRESHOLD)
13405 bool simple_loop = false;
13407 FOR_EACH_EDGE (e, ei, bb->preds)
13410 simple_loop = true;
13416 rtx prev = BB_END (bb);
13419 && distance < LEA_SEARCH_THRESHOLD)
13424 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13425 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13426 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13427 && (regno1 == DF_REF_REGNO (*def_rec)
13428 || regno2 == DF_REF_REGNO (*def_rec)))
13430 insn_type = get_attr_type (prev);
13431 if (insn_type != TYPE_LEA)
13435 prev = PREV_INSN (prev);
13443 /* get_attr_type may modify recog data. We want to make sure
13444 that recog data is valid for instruction INSN, on which
13445 distance_non_agu_define is called. INSN is unchanged here. */
13446 extract_insn_cached (insn);
13450 /* Return the distance between INSN and the next insn that uses
13451 register number REGNO0 in memory address. Return -1 if no such
13452 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13455 distance_agu_use (unsigned int regno0, rtx insn)
13457 basic_block bb = BLOCK_FOR_INSN (insn);
13462 if (insn != BB_END (bb))
13464 rtx next = NEXT_INSN (insn);
13465 while (next && distance < LEA_SEARCH_THRESHOLD)
13471 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13472 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13473 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13474 && regno0 == DF_REF_REGNO (*use_rec))
13476 /* Return DISTANCE if OP0 is used in memory
13477 address in NEXT. */
13481 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13482 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13483 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13484 && regno0 == DF_REF_REGNO (*def_rec))
13486 /* Return -1 if OP0 is set in NEXT. */
13490 if (next == BB_END (bb))
13492 next = NEXT_INSN (next);
13496 if (distance < LEA_SEARCH_THRESHOLD)
13500 bool simple_loop = false;
13502 FOR_EACH_EDGE (e, ei, bb->succs)
13505 simple_loop = true;
13511 rtx next = BB_HEAD (bb);
13514 && distance < LEA_SEARCH_THRESHOLD)
13520 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13521 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13522 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13523 && regno0 == DF_REF_REGNO (*use_rec))
13525 /* Return DISTANCE if OP0 is used in memory
13526 address in NEXT. */
13530 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13531 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13532 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13533 && regno0 == DF_REF_REGNO (*def_rec))
13535 /* Return -1 if OP0 is set in NEXT. */
13540 next = NEXT_INSN (next);
13548 /* Define this macro to tune LEA priority vs ADD, it take effect when
13549 there is a dilemma of choicing LEA or ADD
13550 Negative value: ADD is more preferred than LEA
13552 Positive value: LEA is more preferred than ADD*/
13553 #define IX86_LEA_PRIORITY 2
13555 /* Return true if it is ok to optimize an ADD operation to LEA
13556 operation to avoid flag register consumation. For the processors
13557 like ATOM, if the destination register of LEA holds an actual
13558 address which will be used soon, LEA is better and otherwise ADD
13562 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13563 rtx insn, rtx operands[])
13565 unsigned int regno0 = true_regnum (operands[0]);
13566 unsigned int regno1 = true_regnum (operands[1]);
13567 unsigned int regno2;
13569 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13570 return regno0 != regno1;
13572 regno2 = true_regnum (operands[2]);
13574 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13575 if (regno0 != regno1 && regno0 != regno2)
13579 int dist_define, dist_use;
13580 dist_define = distance_non_agu_define (regno1, regno2, insn);
13581 if (dist_define <= 0)
13584 /* If this insn has both backward non-agu dependence and forward
13585 agu dependence, the one with short distance take effect. */
13586 dist_use = distance_agu_use (regno0, insn);
13588 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13595 /* Return true if destination reg of SET_BODY is shift count of
13599 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13605 /* Retrieve destination of SET_BODY. */
13606 switch (GET_CODE (set_body))
13609 set_dest = SET_DEST (set_body);
13610 if (!set_dest || !REG_P (set_dest))
13614 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13615 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13623 /* Retrieve shift count of USE_BODY. */
13624 switch (GET_CODE (use_body))
13627 shift_rtx = XEXP (use_body, 1);
13630 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13631 if (ix86_dep_by_shift_count_body (set_body,
13632 XVECEXP (use_body, 0, i)))
13640 && (GET_CODE (shift_rtx) == ASHIFT
13641 || GET_CODE (shift_rtx) == LSHIFTRT
13642 || GET_CODE (shift_rtx) == ASHIFTRT
13643 || GET_CODE (shift_rtx) == ROTATE
13644 || GET_CODE (shift_rtx) == ROTATERT))
13646 rtx shift_count = XEXP (shift_rtx, 1);
13648 /* Return true if shift count is dest of SET_BODY. */
13649 if (REG_P (shift_count)
13650 && true_regnum (set_dest) == true_regnum (shift_count))
13657 /* Return true if destination reg of SET_INSN is shift count of
13661 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13663 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13664 PATTERN (use_insn));
13667 /* Return TRUE or FALSE depending on whether the unary operator meets the
13668 appropriate constraints. */
13671 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13672 enum machine_mode mode ATTRIBUTE_UNUSED,
13673 rtx operands[2] ATTRIBUTE_UNUSED)
13675 /* If one of operands is memory, source and destination must match. */
13676 if ((MEM_P (operands[0])
13677 || MEM_P (operands[1]))
13678 && ! rtx_equal_p (operands[0], operands[1]))
13683 /* Post-reload splitter for converting an SF or DFmode value in an
13684 SSE register into an unsigned SImode. */
13687 ix86_split_convert_uns_si_sse (rtx operands[])
13689 enum machine_mode vecmode;
13690 rtx value, large, zero_or_two31, input, two31, x;
13692 large = operands[1];
13693 zero_or_two31 = operands[2];
13694 input = operands[3];
13695 two31 = operands[4];
13696 vecmode = GET_MODE (large);
13697 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13699 /* Load up the value into the low element. We must ensure that the other
13700 elements are valid floats -- zero is the easiest such value. */
13703 if (vecmode == V4SFmode)
13704 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13706 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13710 input = gen_rtx_REG (vecmode, REGNO (input));
13711 emit_move_insn (value, CONST0_RTX (vecmode));
13712 if (vecmode == V4SFmode)
13713 emit_insn (gen_sse_movss (value, value, input));
13715 emit_insn (gen_sse2_movsd (value, value, input));
13718 emit_move_insn (large, two31);
13719 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13721 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13722 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13724 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13725 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13727 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13728 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13730 large = gen_rtx_REG (V4SImode, REGNO (large));
13731 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13733 x = gen_rtx_REG (V4SImode, REGNO (value));
13734 if (vecmode == V4SFmode)
13735 emit_insn (gen_sse2_cvttps2dq (x, value));
13737 emit_insn (gen_sse2_cvttpd2dq (x, value));
13740 emit_insn (gen_xorv4si3 (value, value, large));
13743 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13744 Expects the 64-bit DImode to be supplied in a pair of integral
13745 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13746 -mfpmath=sse, !optimize_size only. */
13749 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13751 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13752 rtx int_xmm, fp_xmm;
13753 rtx biases, exponents;
13756 int_xmm = gen_reg_rtx (V4SImode);
13757 if (TARGET_INTER_UNIT_MOVES)
13758 emit_insn (gen_movdi_to_sse (int_xmm, input));
13759 else if (TARGET_SSE_SPLIT_REGS)
13761 emit_clobber (int_xmm);
13762 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13766 x = gen_reg_rtx (V2DImode);
13767 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13768 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13771 x = gen_rtx_CONST_VECTOR (V4SImode,
13772 gen_rtvec (4, GEN_INT (0x43300000UL),
13773 GEN_INT (0x45300000UL),
13774 const0_rtx, const0_rtx));
13775 exponents = validize_mem (force_const_mem (V4SImode, x));
13777 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13778 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13780 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13781 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13782 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13783 (0x1.0p84 + double(fp_value_hi_xmm)).
13784 Note these exponents differ by 32. */
13786 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13788 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13789 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13790 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13791 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13792 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13793 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13794 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13795 biases = validize_mem (force_const_mem (V2DFmode, biases));
13796 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13798 /* Add the upper and lower DFmode values together. */
13800 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13803 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13804 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13805 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13808 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13811 /* Not used, but eases macroization of patterns. */
13813 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13814 rtx input ATTRIBUTE_UNUSED)
13816 gcc_unreachable ();
13819 /* Convert an unsigned SImode value into a DFmode. Only currently used
13820 for SSE, but applicable anywhere. */
13823 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13825 REAL_VALUE_TYPE TWO31r;
13828 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13829 NULL, 1, OPTAB_DIRECT);
13831 fp = gen_reg_rtx (DFmode);
13832 emit_insn (gen_floatsidf2 (fp, x));
13834 real_ldexp (&TWO31r, &dconst1, 31);
13835 x = const_double_from_real_value (TWO31r, DFmode);
13837 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13839 emit_move_insn (target, x);
13842 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13843 32-bit mode; otherwise we have a direct convert instruction. */
13846 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13848 REAL_VALUE_TYPE TWO32r;
13849 rtx fp_lo, fp_hi, x;
13851 fp_lo = gen_reg_rtx (DFmode);
13852 fp_hi = gen_reg_rtx (DFmode);
13854 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13856 real_ldexp (&TWO32r, &dconst1, 32);
13857 x = const_double_from_real_value (TWO32r, DFmode);
13858 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13860 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13862 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13865 emit_move_insn (target, x);
13868 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13869 For x86_32, -mfpmath=sse, !optimize_size only. */
13871 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13873 REAL_VALUE_TYPE ONE16r;
13874 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13876 real_ldexp (&ONE16r, &dconst1, 16);
13877 x = const_double_from_real_value (ONE16r, SFmode);
13878 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13879 NULL, 0, OPTAB_DIRECT);
13880 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13881 NULL, 0, OPTAB_DIRECT);
13882 fp_hi = gen_reg_rtx (SFmode);
13883 fp_lo = gen_reg_rtx (SFmode);
13884 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13885 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13886 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13888 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13890 if (!rtx_equal_p (target, fp_hi))
13891 emit_move_insn (target, fp_hi);
13894 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13895 then replicate the value for all elements of the vector
13899 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13906 v = gen_rtvec (4, value, value, value, value);
13907 return gen_rtx_CONST_VECTOR (V4SImode, v);
13911 v = gen_rtvec (2, value, value);
13912 return gen_rtx_CONST_VECTOR (V2DImode, v);
13916 v = gen_rtvec (4, value, value, value, value);
13918 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13919 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13920 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13924 v = gen_rtvec (2, value, value);
13926 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13927 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13930 gcc_unreachable ();
13934 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13935 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13936 for an SSE register. If VECT is true, then replicate the mask for
13937 all elements of the vector register. If INVERT is true, then create
13938 a mask excluding the sign bit. */
13941 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13943 enum machine_mode vec_mode, imode;
13944 HOST_WIDE_INT hi, lo;
13949 /* Find the sign bit, sign extended to 2*HWI. */
13955 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13956 lo = 0x80000000, hi = lo < 0;
13962 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13963 if (HOST_BITS_PER_WIDE_INT >= 64)
13964 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13966 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13971 vec_mode = VOIDmode;
13972 if (HOST_BITS_PER_WIDE_INT >= 64)
13975 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13982 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13986 lo = ~lo, hi = ~hi;
13992 mask = immed_double_const (lo, hi, imode);
13994 vec = gen_rtvec (2, v, mask);
13995 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13996 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14003 gcc_unreachable ();
14007 lo = ~lo, hi = ~hi;
14009 /* Force this value into the low part of a fp vector constant. */
14010 mask = immed_double_const (lo, hi, imode);
14011 mask = gen_lowpart (mode, mask);
14013 if (vec_mode == VOIDmode)
14014 return force_reg (mode, mask);
14016 v = ix86_build_const_vector (mode, vect, mask);
14017 return force_reg (vec_mode, v);
14020 /* Generate code for floating point ABS or NEG. */
14023 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14026 rtx mask, set, use, clob, dst, src;
14027 bool use_sse = false;
14028 bool vector_mode = VECTOR_MODE_P (mode);
14029 enum machine_mode elt_mode = mode;
14033 elt_mode = GET_MODE_INNER (mode);
14036 else if (mode == TFmode)
14038 else if (TARGET_SSE_MATH)
14039 use_sse = SSE_FLOAT_MODE_P (mode);
14041 /* NEG and ABS performed with SSE use bitwise mask operations.
14042 Create the appropriate mask now. */
14044 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14053 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14054 set = gen_rtx_SET (VOIDmode, dst, set);
14059 set = gen_rtx_fmt_e (code, mode, src);
14060 set = gen_rtx_SET (VOIDmode, dst, set);
14063 use = gen_rtx_USE (VOIDmode, mask);
14064 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14065 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14066 gen_rtvec (3, set, use, clob)));
14073 /* Expand a copysign operation. Special case operand 0 being a constant. */
14076 ix86_expand_copysign (rtx operands[])
14078 enum machine_mode mode;
14079 rtx dest, op0, op1, mask, nmask;
14081 dest = operands[0];
14085 mode = GET_MODE (dest);
14087 if (GET_CODE (op0) == CONST_DOUBLE)
14089 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14091 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14092 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14094 if (mode == SFmode || mode == DFmode)
14096 enum machine_mode vmode;
14098 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14100 if (op0 == CONST0_RTX (mode))
14101 op0 = CONST0_RTX (vmode);
14106 if (mode == SFmode)
14107 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
14108 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14110 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
14112 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
14115 else if (op0 != CONST0_RTX (mode))
14116 op0 = force_reg (mode, op0);
14118 mask = ix86_build_signbit_mask (mode, 0, 0);
14120 if (mode == SFmode)
14121 copysign_insn = gen_copysignsf3_const;
14122 else if (mode == DFmode)
14123 copysign_insn = gen_copysigndf3_const;
14125 copysign_insn = gen_copysigntf3_const;
14127 emit_insn (copysign_insn (dest, op0, op1, mask));
14131 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14133 nmask = ix86_build_signbit_mask (mode, 0, 1);
14134 mask = ix86_build_signbit_mask (mode, 0, 0);
14136 if (mode == SFmode)
14137 copysign_insn = gen_copysignsf3_var;
14138 else if (mode == DFmode)
14139 copysign_insn = gen_copysigndf3_var;
14141 copysign_insn = gen_copysigntf3_var;
14143 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14147 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14148 be a constant, and so has already been expanded into a vector constant. */
14151 ix86_split_copysign_const (rtx operands[])
14153 enum machine_mode mode, vmode;
14154 rtx dest, op0, op1, mask, x;
14156 dest = operands[0];
14159 mask = operands[3];
14161 mode = GET_MODE (dest);
14162 vmode = GET_MODE (mask);
14164 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14165 x = gen_rtx_AND (vmode, dest, mask);
14166 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14168 if (op0 != CONST0_RTX (vmode))
14170 x = gen_rtx_IOR (vmode, dest, op0);
14171 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14175 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14176 so we have to do two masks. */
14179 ix86_split_copysign_var (rtx operands[])
14181 enum machine_mode mode, vmode;
14182 rtx dest, scratch, op0, op1, mask, nmask, x;
14184 dest = operands[0];
14185 scratch = operands[1];
14188 nmask = operands[4];
14189 mask = operands[5];
14191 mode = GET_MODE (dest);
14192 vmode = GET_MODE (mask);
14194 if (rtx_equal_p (op0, op1))
14196 /* Shouldn't happen often (it's useless, obviously), but when it does
14197 we'd generate incorrect code if we continue below. */
14198 emit_move_insn (dest, op0);
14202 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14204 gcc_assert (REGNO (op1) == REGNO (scratch));
14206 x = gen_rtx_AND (vmode, scratch, mask);
14207 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14210 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14211 x = gen_rtx_NOT (vmode, dest);
14212 x = gen_rtx_AND (vmode, x, op0);
14213 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14217 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14219 x = gen_rtx_AND (vmode, scratch, mask);
14221 else /* alternative 2,4 */
14223 gcc_assert (REGNO (mask) == REGNO (scratch));
14224 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14225 x = gen_rtx_AND (vmode, scratch, op1);
14227 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14229 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14231 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14232 x = gen_rtx_AND (vmode, dest, nmask);
14234 else /* alternative 3,4 */
14236 gcc_assert (REGNO (nmask) == REGNO (dest));
14238 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14239 x = gen_rtx_AND (vmode, dest, op0);
14241 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14244 x = gen_rtx_IOR (vmode, dest, scratch);
14245 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14248 /* Return TRUE or FALSE depending on whether the first SET in INSN
14249 has source and destination with matching CC modes, and that the
14250 CC mode is at least as constrained as REQ_MODE. */
14253 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14256 enum machine_mode set_mode;
14258 set = PATTERN (insn);
14259 if (GET_CODE (set) == PARALLEL)
14260 set = XVECEXP (set, 0, 0);
14261 gcc_assert (GET_CODE (set) == SET);
14262 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14264 set_mode = GET_MODE (SET_DEST (set));
14268 if (req_mode != CCNOmode
14269 && (req_mode != CCmode
14270 || XEXP (SET_SRC (set), 1) != const0_rtx))
14274 if (req_mode == CCGCmode)
14278 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14282 if (req_mode == CCZmode)
14293 gcc_unreachable ();
14296 return (GET_MODE (SET_SRC (set)) == set_mode);
14299 /* Generate insn patterns to do an integer compare of OPERANDS. */
14302 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14304 enum machine_mode cmpmode;
14307 cmpmode = SELECT_CC_MODE (code, op0, op1);
14308 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14310 /* This is very simple, but making the interface the same as in the
14311 FP case makes the rest of the code easier. */
14312 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14313 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14315 /* Return the test that should be put into the flags user, i.e.
14316 the bcc, scc, or cmov instruction. */
14317 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14320 /* Figure out whether to use ordered or unordered fp comparisons.
14321 Return the appropriate mode to use. */
14324 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14326 /* ??? In order to make all comparisons reversible, we do all comparisons
14327 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14328 all forms trapping and nontrapping comparisons, we can make inequality
14329 comparisons trapping again, since it results in better code when using
14330 FCOM based compares. */
14331 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14335 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14337 enum machine_mode mode = GET_MODE (op0);
14339 if (SCALAR_FLOAT_MODE_P (mode))
14341 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14342 return ix86_fp_compare_mode (code);
14347 /* Only zero flag is needed. */
14348 case EQ: /* ZF=0 */
14349 case NE: /* ZF!=0 */
14351 /* Codes needing carry flag. */
14352 case GEU: /* CF=0 */
14353 case LTU: /* CF=1 */
14354 /* Detect overflow checks. They need just the carry flag. */
14355 if (GET_CODE (op0) == PLUS
14356 && rtx_equal_p (op1, XEXP (op0, 0)))
14360 case GTU: /* CF=0 & ZF=0 */
14361 case LEU: /* CF=1 | ZF=1 */
14362 /* Detect overflow checks. They need just the carry flag. */
14363 if (GET_CODE (op0) == MINUS
14364 && rtx_equal_p (op1, XEXP (op0, 0)))
14368 /* Codes possibly doable only with sign flag when
14369 comparing against zero. */
14370 case GE: /* SF=OF or SF=0 */
14371 case LT: /* SF<>OF or SF=1 */
14372 if (op1 == const0_rtx)
14375 /* For other cases Carry flag is not required. */
14377 /* Codes doable only with sign flag when comparing
14378 against zero, but we miss jump instruction for it
14379 so we need to use relational tests against overflow
14380 that thus needs to be zero. */
14381 case GT: /* ZF=0 & SF=OF */
14382 case LE: /* ZF=1 | SF<>OF */
14383 if (op1 == const0_rtx)
14387 /* strcmp pattern do (use flags) and combine may ask us for proper
14392 gcc_unreachable ();
14396 /* Return the fixed registers used for condition codes. */
14399 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14406 /* If two condition code modes are compatible, return a condition code
14407 mode which is compatible with both. Otherwise, return
14410 static enum machine_mode
14411 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14416 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14419 if ((m1 == CCGCmode && m2 == CCGOCmode)
14420 || (m1 == CCGOCmode && m2 == CCGCmode))
14426 gcc_unreachable ();
14456 /* These are only compatible with themselves, which we already
14463 /* Return a comparison we can do and that it is equivalent to
14464 swap_condition (code) apart possibly from orderedness.
14465 But, never change orderedness if TARGET_IEEE_FP, returning
14466 UNKNOWN in that case if necessary. */
14468 static enum rtx_code
14469 ix86_fp_swap_condition (enum rtx_code code)
14473 case GT: /* GTU - CF=0 & ZF=0 */
14474 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14475 case GE: /* GEU - CF=0 */
14476 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14477 case UNLT: /* LTU - CF=1 */
14478 return TARGET_IEEE_FP ? UNKNOWN : GT;
14479 case UNLE: /* LEU - CF=1 | ZF=1 */
14480 return TARGET_IEEE_FP ? UNKNOWN : GE;
14482 return swap_condition (code);
14486 /* Return cost of comparison CODE using the best strategy for performance.
14487 All following functions do use number of instructions as a cost metrics.
14488 In future this should be tweaked to compute bytes for optimize_size and
14489 take into account performance of various instructions on various CPUs. */
14492 ix86_fp_comparison_cost (enum rtx_code code)
14496 /* The cost of code using bit-twiddling on %ah. */
14513 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14517 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14520 gcc_unreachable ();
14523 switch (ix86_fp_comparison_strategy (code))
14525 case IX86_FPCMP_COMI:
14526 return arith_cost > 4 ? 3 : 2;
14527 case IX86_FPCMP_SAHF:
14528 return arith_cost > 4 ? 4 : 3;
14534 /* Return strategy to use for floating-point. We assume that fcomi is always
14535 preferrable where available, since that is also true when looking at size
14536 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14538 enum ix86_fpcmp_strategy
14539 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14541 /* Do fcomi/sahf based test when profitable. */
14544 return IX86_FPCMP_COMI;
14546 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
14547 return IX86_FPCMP_SAHF;
14549 return IX86_FPCMP_ARITH;
14552 /* Swap, force into registers, or otherwise massage the two operands
14553 to a fp comparison. The operands are updated in place; the new
14554 comparison code is returned. */
14556 static enum rtx_code
14557 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14559 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14560 rtx op0 = *pop0, op1 = *pop1;
14561 enum machine_mode op_mode = GET_MODE (op0);
14562 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14564 /* All of the unordered compare instructions only work on registers.
14565 The same is true of the fcomi compare instructions. The XFmode
14566 compare instructions require registers except when comparing
14567 against zero or when converting operand 1 from fixed point to
14571 && (fpcmp_mode == CCFPUmode
14572 || (op_mode == XFmode
14573 && ! (standard_80387_constant_p (op0) == 1
14574 || standard_80387_constant_p (op1) == 1)
14575 && GET_CODE (op1) != FLOAT)
14576 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14578 op0 = force_reg (op_mode, op0);
14579 op1 = force_reg (op_mode, op1);
14583 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14584 things around if they appear profitable, otherwise force op0
14585 into a register. */
14587 if (standard_80387_constant_p (op0) == 0
14589 && ! (standard_80387_constant_p (op1) == 0
14592 enum rtx_code new_code = ix86_fp_swap_condition (code);
14593 if (new_code != UNKNOWN)
14596 tmp = op0, op0 = op1, op1 = tmp;
14602 op0 = force_reg (op_mode, op0);
14604 if (CONSTANT_P (op1))
14606 int tmp = standard_80387_constant_p (op1);
14608 op1 = validize_mem (force_const_mem (op_mode, op1));
14612 op1 = force_reg (op_mode, op1);
14615 op1 = force_reg (op_mode, op1);
14619 /* Try to rearrange the comparison to make it cheaper. */
14620 if (ix86_fp_comparison_cost (code)
14621 > ix86_fp_comparison_cost (swap_condition (code))
14622 && (REG_P (op1) || can_create_pseudo_p ()))
14625 tmp = op0, op0 = op1, op1 = tmp;
14626 code = swap_condition (code);
14628 op0 = force_reg (op_mode, op0);
14636 /* Convert comparison codes we use to represent FP comparison to integer
14637 code that will result in proper branch. Return UNKNOWN if no such code
14641 ix86_fp_compare_code_to_integer (enum rtx_code code)
14670 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14673 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14675 enum machine_mode fpcmp_mode, intcmp_mode;
14678 fpcmp_mode = ix86_fp_compare_mode (code);
14679 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14681 /* Do fcomi/sahf based test when profitable. */
14682 switch (ix86_fp_comparison_strategy (code))
14684 case IX86_FPCMP_COMI:
14685 intcmp_mode = fpcmp_mode;
14686 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14687 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14692 case IX86_FPCMP_SAHF:
14693 intcmp_mode = fpcmp_mode;
14694 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14695 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14699 scratch = gen_reg_rtx (HImode);
14700 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14701 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14704 case IX86_FPCMP_ARITH:
14705 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14706 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14707 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14709 scratch = gen_reg_rtx (HImode);
14710 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14712 /* In the unordered case, we have to check C2 for NaN's, which
14713 doesn't happen to work out to anything nice combination-wise.
14714 So do some bit twiddling on the value we've got in AH to come
14715 up with an appropriate set of condition codes. */
14717 intcmp_mode = CCNOmode;
14722 if (code == GT || !TARGET_IEEE_FP)
14724 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14729 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14730 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14731 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14732 intcmp_mode = CCmode;
14738 if (code == LT && TARGET_IEEE_FP)
14740 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14741 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14742 intcmp_mode = CCmode;
14747 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14753 if (code == GE || !TARGET_IEEE_FP)
14755 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14760 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14761 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14768 if (code == LE && TARGET_IEEE_FP)
14770 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14771 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14772 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14773 intcmp_mode = CCmode;
14778 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14784 if (code == EQ && TARGET_IEEE_FP)
14786 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14787 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14788 intcmp_mode = CCmode;
14793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14800 if (code == NE && TARGET_IEEE_FP)
14802 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14803 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14809 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14819 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14824 gcc_unreachable ();
14832 /* Return the test that should be put into the flags user, i.e.
14833 the bcc, scc, or cmov instruction. */
14834 return gen_rtx_fmt_ee (code, VOIDmode,
14835 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14840 ix86_expand_compare (enum rtx_code code)
14843 op0 = ix86_compare_op0;
14844 op1 = ix86_compare_op1;
14846 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14847 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14849 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14851 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14852 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
14855 ret = ix86_expand_int_compare (code, op0, op1);
14861 ix86_expand_branch (enum rtx_code code, rtx label)
14865 switch (GET_MODE (ix86_compare_op0))
14874 tmp = ix86_expand_compare (code);
14875 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14876 gen_rtx_LABEL_REF (VOIDmode, label),
14878 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14885 /* Expand DImode branch into multiple compare+branch. */
14887 rtx lo[2], hi[2], label2;
14888 enum rtx_code code1, code2, code3;
14889 enum machine_mode submode;
14891 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14893 tmp = ix86_compare_op0;
14894 ix86_compare_op0 = ix86_compare_op1;
14895 ix86_compare_op1 = tmp;
14896 code = swap_condition (code);
14898 if (GET_MODE (ix86_compare_op0) == DImode)
14900 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14901 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14906 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14907 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14911 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14912 avoid two branches. This costs one extra insn, so disable when
14913 optimizing for size. */
14915 if ((code == EQ || code == NE)
14916 && (!optimize_insn_for_size_p ()
14917 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14922 if (hi[1] != const0_rtx)
14923 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14924 NULL_RTX, 0, OPTAB_WIDEN);
14927 if (lo[1] != const0_rtx)
14928 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14929 NULL_RTX, 0, OPTAB_WIDEN);
14931 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14932 NULL_RTX, 0, OPTAB_WIDEN);
14934 ix86_compare_op0 = tmp;
14935 ix86_compare_op1 = const0_rtx;
14936 ix86_expand_branch (code, label);
14940 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14941 op1 is a constant and the low word is zero, then we can just
14942 examine the high word. Similarly for low word -1 and
14943 less-or-equal-than or greater-than. */
14945 if (CONST_INT_P (hi[1]))
14948 case LT: case LTU: case GE: case GEU:
14949 if (lo[1] == const0_rtx)
14951 ix86_compare_op0 = hi[0];
14952 ix86_compare_op1 = hi[1];
14953 ix86_expand_branch (code, label);
14957 case LE: case LEU: case GT: case GTU:
14958 if (lo[1] == constm1_rtx)
14960 ix86_compare_op0 = hi[0];
14961 ix86_compare_op1 = hi[1];
14962 ix86_expand_branch (code, label);
14970 /* Otherwise, we need two or three jumps. */
14972 label2 = gen_label_rtx ();
14975 code2 = swap_condition (code);
14976 code3 = unsigned_condition (code);
14980 case LT: case GT: case LTU: case GTU:
14983 case LE: code1 = LT; code2 = GT; break;
14984 case GE: code1 = GT; code2 = LT; break;
14985 case LEU: code1 = LTU; code2 = GTU; break;
14986 case GEU: code1 = GTU; code2 = LTU; break;
14988 case EQ: code1 = UNKNOWN; code2 = NE; break;
14989 case NE: code2 = UNKNOWN; break;
14992 gcc_unreachable ();
14997 * if (hi(a) < hi(b)) goto true;
14998 * if (hi(a) > hi(b)) goto false;
14999 * if (lo(a) < lo(b)) goto true;
15003 ix86_compare_op0 = hi[0];
15004 ix86_compare_op1 = hi[1];
15006 if (code1 != UNKNOWN)
15007 ix86_expand_branch (code1, label);
15008 if (code2 != UNKNOWN)
15009 ix86_expand_branch (code2, label2);
15011 ix86_compare_op0 = lo[0];
15012 ix86_compare_op1 = lo[1];
15013 ix86_expand_branch (code3, label);
15015 if (code2 != UNKNOWN)
15016 emit_label (label2);
15021 /* If we have already emitted a compare insn, go straight to simple.
15022 ix86_expand_compare won't emit anything if ix86_compare_emitted
15024 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15029 /* Split branch based on floating point condition. */
15031 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15032 rtx target1, rtx target2, rtx tmp, rtx pushed)
15037 if (target2 != pc_rtx)
15040 code = reverse_condition_maybe_unordered (code);
15045 condition = ix86_expand_fp_compare (code, op1, op2,
15048 /* Remove pushed operand from stack. */
15050 ix86_free_from_memory (GET_MODE (pushed));
15052 i = emit_jump_insn (gen_rtx_SET
15054 gen_rtx_IF_THEN_ELSE (VOIDmode,
15055 condition, target1, target2)));
15056 if (split_branch_probability >= 0)
15057 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15061 ix86_expand_setcc (enum rtx_code code, rtx dest)
15065 gcc_assert (GET_MODE (dest) == QImode);
15067 ret = ix86_expand_compare (code);
15068 PUT_MODE (ret, QImode);
15069 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15072 /* Expand comparison setting or clearing carry flag. Return true when
15073 successful and set pop for the operation. */
15075 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15077 enum machine_mode mode =
15078 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15080 /* Do not handle DImode compares that go through special path. */
15081 if (mode == (TARGET_64BIT ? TImode : DImode))
15084 if (SCALAR_FLOAT_MODE_P (mode))
15086 rtx compare_op, compare_seq;
15088 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15090 /* Shortcut: following common codes never translate
15091 into carry flag compares. */
15092 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15093 || code == ORDERED || code == UNORDERED)
15096 /* These comparisons require zero flag; swap operands so they won't. */
15097 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15098 && !TARGET_IEEE_FP)
15103 code = swap_condition (code);
15106 /* Try to expand the comparison and verify that we end up with
15107 carry flag based comparison. This fails to be true only when
15108 we decide to expand comparison using arithmetic that is not
15109 too common scenario. */
15111 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15112 compare_seq = get_insns ();
15115 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15116 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15117 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15119 code = GET_CODE (compare_op);
15121 if (code != LTU && code != GEU)
15124 emit_insn (compare_seq);
15129 if (!INTEGRAL_MODE_P (mode))
15138 /* Convert a==0 into (unsigned)a<1. */
15141 if (op1 != const0_rtx)
15144 code = (code == EQ ? LTU : GEU);
15147 /* Convert a>b into b<a or a>=b-1. */
15150 if (CONST_INT_P (op1))
15152 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15153 /* Bail out on overflow. We still can swap operands but that
15154 would force loading of the constant into register. */
15155 if (op1 == const0_rtx
15156 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15158 code = (code == GTU ? GEU : LTU);
15165 code = (code == GTU ? LTU : GEU);
15169 /* Convert a>=0 into (unsigned)a<0x80000000. */
15172 if (mode == DImode || op1 != const0_rtx)
15174 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15175 code = (code == LT ? GEU : LTU);
15179 if (mode == DImode || op1 != constm1_rtx)
15181 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15182 code = (code == LE ? GEU : LTU);
15188 /* Swapping operands may cause constant to appear as first operand. */
15189 if (!nonimmediate_operand (op0, VOIDmode))
15191 if (!can_create_pseudo_p ())
15193 op0 = force_reg (mode, op0);
15195 ix86_compare_op0 = op0;
15196 ix86_compare_op1 = op1;
15197 *pop = ix86_expand_compare (code);
15198 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15203 ix86_expand_int_movcc (rtx operands[])
15205 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15206 rtx compare_seq, compare_op;
15207 enum machine_mode mode = GET_MODE (operands[0]);
15208 bool sign_bit_compare_p = false;;
15211 ix86_compare_op0 = XEXP (operands[1], 0);
15212 ix86_compare_op1 = XEXP (operands[1], 1);
15213 compare_op = ix86_expand_compare (code);
15214 compare_seq = get_insns ();
15217 compare_code = GET_CODE (compare_op);
15219 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15220 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15221 sign_bit_compare_p = true;
15223 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15224 HImode insns, we'd be swallowed in word prefix ops. */
15226 if ((mode != HImode || TARGET_FAST_PREFIX)
15227 && (mode != (TARGET_64BIT ? TImode : DImode))
15228 && CONST_INT_P (operands[2])
15229 && CONST_INT_P (operands[3]))
15231 rtx out = operands[0];
15232 HOST_WIDE_INT ct = INTVAL (operands[2]);
15233 HOST_WIDE_INT cf = INTVAL (operands[3]);
15234 HOST_WIDE_INT diff;
15237 /* Sign bit compares are better done using shifts than we do by using
15239 if (sign_bit_compare_p
15240 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15241 ix86_compare_op1, &compare_op))
15243 /* Detect overlap between destination and compare sources. */
15246 if (!sign_bit_compare_p)
15248 bool fpcmp = false;
15250 compare_code = GET_CODE (compare_op);
15252 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15253 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15256 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15259 /* To simplify rest of code, restrict to the GEU case. */
15260 if (compare_code == LTU)
15262 HOST_WIDE_INT tmp = ct;
15265 compare_code = reverse_condition (compare_code);
15266 code = reverse_condition (code);
15271 PUT_CODE (compare_op,
15272 reverse_condition_maybe_unordered
15273 (GET_CODE (compare_op)));
15275 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15279 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15280 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15281 tmp = gen_reg_rtx (mode);
15283 if (mode == DImode)
15284 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15286 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15290 if (code == GT || code == GE)
15291 code = reverse_condition (code);
15294 HOST_WIDE_INT tmp = ct;
15299 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15300 ix86_compare_op1, VOIDmode, 0, -1);
15313 tmp = expand_simple_binop (mode, PLUS,
15315 copy_rtx (tmp), 1, OPTAB_DIRECT);
15326 tmp = expand_simple_binop (mode, IOR,
15328 copy_rtx (tmp), 1, OPTAB_DIRECT);
15330 else if (diff == -1 && ct)
15340 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15342 tmp = expand_simple_binop (mode, PLUS,
15343 copy_rtx (tmp), GEN_INT (cf),
15344 copy_rtx (tmp), 1, OPTAB_DIRECT);
15352 * andl cf - ct, dest
15362 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15365 tmp = expand_simple_binop (mode, AND,
15367 gen_int_mode (cf - ct, mode),
15368 copy_rtx (tmp), 1, OPTAB_DIRECT);
15370 tmp = expand_simple_binop (mode, PLUS,
15371 copy_rtx (tmp), GEN_INT (ct),
15372 copy_rtx (tmp), 1, OPTAB_DIRECT);
15375 if (!rtx_equal_p (tmp, out))
15376 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15378 return 1; /* DONE */
15383 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15386 tmp = ct, ct = cf, cf = tmp;
15389 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15391 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15393 /* We may be reversing unordered compare to normal compare, that
15394 is not valid in general (we may convert non-trapping condition
15395 to trapping one), however on i386 we currently emit all
15396 comparisons unordered. */
15397 compare_code = reverse_condition_maybe_unordered (compare_code);
15398 code = reverse_condition_maybe_unordered (code);
15402 compare_code = reverse_condition (compare_code);
15403 code = reverse_condition (code);
15407 compare_code = UNKNOWN;
15408 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15409 && CONST_INT_P (ix86_compare_op1))
15411 if (ix86_compare_op1 == const0_rtx
15412 && (code == LT || code == GE))
15413 compare_code = code;
15414 else if (ix86_compare_op1 == constm1_rtx)
15418 else if (code == GT)
15423 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15424 if (compare_code != UNKNOWN
15425 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15426 && (cf == -1 || ct == -1))
15428 /* If lea code below could be used, only optimize
15429 if it results in a 2 insn sequence. */
15431 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15432 || diff == 3 || diff == 5 || diff == 9)
15433 || (compare_code == LT && ct == -1)
15434 || (compare_code == GE && cf == -1))
15437 * notl op1 (if necessary)
15445 code = reverse_condition (code);
15448 out = emit_store_flag (out, code, ix86_compare_op0,
15449 ix86_compare_op1, VOIDmode, 0, -1);
15451 out = expand_simple_binop (mode, IOR,
15453 out, 1, OPTAB_DIRECT);
15454 if (out != operands[0])
15455 emit_move_insn (operands[0], out);
15457 return 1; /* DONE */
15462 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15463 || diff == 3 || diff == 5 || diff == 9)
15464 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15466 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15472 * lea cf(dest*(ct-cf)),dest
15476 * This also catches the degenerate setcc-only case.
15482 out = emit_store_flag (out, code, ix86_compare_op0,
15483 ix86_compare_op1, VOIDmode, 0, 1);
15486 /* On x86_64 the lea instruction operates on Pmode, so we need
15487 to get arithmetics done in proper mode to match. */
15489 tmp = copy_rtx (out);
15493 out1 = copy_rtx (out);
15494 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15498 tmp = gen_rtx_PLUS (mode, tmp, out1);
15504 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15507 if (!rtx_equal_p (tmp, out))
15510 out = force_operand (tmp, copy_rtx (out));
15512 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15514 if (!rtx_equal_p (out, operands[0]))
15515 emit_move_insn (operands[0], copy_rtx (out));
15517 return 1; /* DONE */
15521 * General case: Jumpful:
15522 * xorl dest,dest cmpl op1, op2
15523 * cmpl op1, op2 movl ct, dest
15524 * setcc dest jcc 1f
15525 * decl dest movl cf, dest
15526 * andl (cf-ct),dest 1:
15529 * Size 20. Size 14.
15531 * This is reasonably steep, but branch mispredict costs are
15532 * high on modern cpus, so consider failing only if optimizing
15536 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15537 && BRANCH_COST (optimize_insn_for_speed_p (),
15542 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15547 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15549 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15551 /* We may be reversing unordered compare to normal compare,
15552 that is not valid in general (we may convert non-trapping
15553 condition to trapping one), however on i386 we currently
15554 emit all comparisons unordered. */
15555 code = reverse_condition_maybe_unordered (code);
15559 code = reverse_condition (code);
15560 if (compare_code != UNKNOWN)
15561 compare_code = reverse_condition (compare_code);
15565 if (compare_code != UNKNOWN)
15567 /* notl op1 (if needed)
15572 For x < 0 (resp. x <= -1) there will be no notl,
15573 so if possible swap the constants to get rid of the
15575 True/false will be -1/0 while code below (store flag
15576 followed by decrement) is 0/-1, so the constants need
15577 to be exchanged once more. */
15579 if (compare_code == GE || !cf)
15581 code = reverse_condition (code);
15586 HOST_WIDE_INT tmp = cf;
15591 out = emit_store_flag (out, code, ix86_compare_op0,
15592 ix86_compare_op1, VOIDmode, 0, -1);
15596 out = emit_store_flag (out, code, ix86_compare_op0,
15597 ix86_compare_op1, VOIDmode, 0, 1);
15599 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15600 copy_rtx (out), 1, OPTAB_DIRECT);
15603 out = expand_simple_binop (mode, AND, copy_rtx (out),
15604 gen_int_mode (cf - ct, mode),
15605 copy_rtx (out), 1, OPTAB_DIRECT);
15607 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15608 copy_rtx (out), 1, OPTAB_DIRECT);
15609 if (!rtx_equal_p (out, operands[0]))
15610 emit_move_insn (operands[0], copy_rtx (out));
15612 return 1; /* DONE */
15616 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15618 /* Try a few things more with specific constants and a variable. */
15621 rtx var, orig_out, out, tmp;
15623 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15624 return 0; /* FAIL */
15626 /* If one of the two operands is an interesting constant, load a
15627 constant with the above and mask it in with a logical operation. */
15629 if (CONST_INT_P (operands[2]))
15632 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15633 operands[3] = constm1_rtx, op = and_optab;
15634 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15635 operands[3] = const0_rtx, op = ior_optab;
15637 return 0; /* FAIL */
15639 else if (CONST_INT_P (operands[3]))
15642 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15643 operands[2] = constm1_rtx, op = and_optab;
15644 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15645 operands[2] = const0_rtx, op = ior_optab;
15647 return 0; /* FAIL */
15650 return 0; /* FAIL */
15652 orig_out = operands[0];
15653 tmp = gen_reg_rtx (mode);
15656 /* Recurse to get the constant loaded. */
15657 if (ix86_expand_int_movcc (operands) == 0)
15658 return 0; /* FAIL */
15660 /* Mask in the interesting variable. */
15661 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15663 if (!rtx_equal_p (out, orig_out))
15664 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15666 return 1; /* DONE */
15670 * For comparison with above,
15680 if (! nonimmediate_operand (operands[2], mode))
15681 operands[2] = force_reg (mode, operands[2]);
15682 if (! nonimmediate_operand (operands[3], mode))
15683 operands[3] = force_reg (mode, operands[3]);
15685 if (! register_operand (operands[2], VOIDmode)
15687 || ! register_operand (operands[3], VOIDmode)))
15688 operands[2] = force_reg (mode, operands[2]);
15691 && ! register_operand (operands[3], VOIDmode))
15692 operands[3] = force_reg (mode, operands[3]);
15694 emit_insn (compare_seq);
15695 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15696 gen_rtx_IF_THEN_ELSE (mode,
15697 compare_op, operands[2],
15700 return 1; /* DONE */
15703 /* Swap, force into registers, or otherwise massage the two operands
15704 to an sse comparison with a mask result. Thus we differ a bit from
15705 ix86_prepare_fp_compare_args which expects to produce a flags result.
15707 The DEST operand exists to help determine whether to commute commutative
15708 operators. The POP0/POP1 operands are updated in place. The new
15709 comparison code is returned, or UNKNOWN if not implementable. */
15711 static enum rtx_code
15712 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15713 rtx *pop0, rtx *pop1)
15721 /* We have no LTGT as an operator. We could implement it with
15722 NE & ORDERED, but this requires an extra temporary. It's
15723 not clear that it's worth it. */
15730 /* These are supported directly. */
15737 /* For commutative operators, try to canonicalize the destination
15738 operand to be first in the comparison - this helps reload to
15739 avoid extra moves. */
15740 if (!dest || !rtx_equal_p (dest, *pop1))
15748 /* These are not supported directly. Swap the comparison operands
15749 to transform into something that is supported. */
15753 code = swap_condition (code);
15757 gcc_unreachable ();
15763 /* Detect conditional moves that exactly match min/max operational
15764 semantics. Note that this is IEEE safe, as long as we don't
15765 interchange the operands.
15767 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15768 and TRUE if the operation is successful and instructions are emitted. */
15771 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15772 rtx cmp_op1, rtx if_true, rtx if_false)
15774 enum machine_mode mode;
15780 else if (code == UNGE)
15783 if_true = if_false;
15789 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15791 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15796 mode = GET_MODE (dest);
15798 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15799 but MODE may be a vector mode and thus not appropriate. */
15800 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15802 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15805 if_true = force_reg (mode, if_true);
15806 v = gen_rtvec (2, if_true, if_false);
15807 tmp = gen_rtx_UNSPEC (mode, v, u);
15811 code = is_min ? SMIN : SMAX;
15812 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15815 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15819 /* Expand an sse vector comparison. Return the register with the result. */
15822 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15823 rtx op_true, rtx op_false)
15825 enum machine_mode mode = GET_MODE (dest);
15828 cmp_op0 = force_reg (mode, cmp_op0);
15829 if (!nonimmediate_operand (cmp_op1, mode))
15830 cmp_op1 = force_reg (mode, cmp_op1);
15833 || reg_overlap_mentioned_p (dest, op_true)
15834 || reg_overlap_mentioned_p (dest, op_false))
15835 dest = gen_reg_rtx (mode);
15837 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15838 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15843 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15844 operations. This is used for both scalar and vector conditional moves. */
15847 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15849 enum machine_mode mode = GET_MODE (dest);
15852 if (op_false == CONST0_RTX (mode))
15854 op_true = force_reg (mode, op_true);
15855 x = gen_rtx_AND (mode, cmp, op_true);
15856 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15858 else if (op_true == CONST0_RTX (mode))
15860 op_false = force_reg (mode, op_false);
15861 x = gen_rtx_NOT (mode, cmp);
15862 x = gen_rtx_AND (mode, x, op_false);
15863 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15865 else if (TARGET_SSE5)
15867 rtx pcmov = gen_rtx_SET (mode, dest,
15868 gen_rtx_IF_THEN_ELSE (mode, cmp,
15875 op_true = force_reg (mode, op_true);
15876 op_false = force_reg (mode, op_false);
15878 t2 = gen_reg_rtx (mode);
15880 t3 = gen_reg_rtx (mode);
15884 x = gen_rtx_AND (mode, op_true, cmp);
15885 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15887 x = gen_rtx_NOT (mode, cmp);
15888 x = gen_rtx_AND (mode, x, op_false);
15889 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15891 x = gen_rtx_IOR (mode, t3, t2);
15892 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15896 /* Expand a floating-point conditional move. Return true if successful. */
15899 ix86_expand_fp_movcc (rtx operands[])
15901 enum machine_mode mode = GET_MODE (operands[0]);
15902 enum rtx_code code = GET_CODE (operands[1]);
15903 rtx tmp, compare_op;
15905 ix86_compare_op0 = XEXP (operands[1], 0);
15906 ix86_compare_op1 = XEXP (operands[1], 1);
15907 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15909 enum machine_mode cmode;
15911 /* Since we've no cmove for sse registers, don't force bad register
15912 allocation just to gain access to it. Deny movcc when the
15913 comparison mode doesn't match the move mode. */
15914 cmode = GET_MODE (ix86_compare_op0);
15915 if (cmode == VOIDmode)
15916 cmode = GET_MODE (ix86_compare_op1);
15920 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15922 &ix86_compare_op1);
15923 if (code == UNKNOWN)
15926 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15927 ix86_compare_op1, operands[2],
15931 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15932 ix86_compare_op1, operands[2], operands[3]);
15933 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15937 /* The floating point conditional move instructions don't directly
15938 support conditions resulting from a signed integer comparison. */
15940 compare_op = ix86_expand_compare (code);
15941 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15943 tmp = gen_reg_rtx (QImode);
15944 ix86_expand_setcc (code, tmp);
15946 ix86_compare_op0 = tmp;
15947 ix86_compare_op1 = const0_rtx;
15948 compare_op = ix86_expand_compare (code);
15951 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15952 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15953 operands[2], operands[3])));
15958 /* Expand a floating-point vector conditional move; a vcond operation
15959 rather than a movcc operation. */
15962 ix86_expand_fp_vcond (rtx operands[])
15964 enum rtx_code code = GET_CODE (operands[3]);
15967 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15968 &operands[4], &operands[5]);
15969 if (code == UNKNOWN)
15972 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15973 operands[5], operands[1], operands[2]))
15976 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15977 operands[1], operands[2]);
15978 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15982 /* Expand a signed/unsigned integral vector conditional move. */
15985 ix86_expand_int_vcond (rtx operands[])
15987 enum machine_mode mode = GET_MODE (operands[0]);
15988 enum rtx_code code = GET_CODE (operands[3]);
15989 bool negate = false;
15992 cop0 = operands[4];
15993 cop1 = operands[5];
15995 /* SSE5 supports all of the comparisons on all vector int types. */
15998 /* Canonicalize the comparison to EQ, GT, GTU. */
16009 code = reverse_condition (code);
16015 code = reverse_condition (code);
16021 code = swap_condition (code);
16022 x = cop0, cop0 = cop1, cop1 = x;
16026 gcc_unreachable ();
16029 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16030 if (mode == V2DImode)
16035 /* SSE4.1 supports EQ. */
16036 if (!TARGET_SSE4_1)
16042 /* SSE4.2 supports GT/GTU. */
16043 if (!TARGET_SSE4_2)
16048 gcc_unreachable ();
16052 /* Unsigned parallel compare is not supported by the hardware. Play some
16053 tricks to turn this into a signed comparison against 0. */
16056 cop0 = force_reg (mode, cop0);
16065 /* Perform a parallel modulo subtraction. */
16066 t1 = gen_reg_rtx (mode);
16067 emit_insn ((mode == V4SImode
16069 : gen_subv2di3) (t1, cop0, cop1));
16071 /* Extract the original sign bit of op0. */
16072 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16074 t2 = gen_reg_rtx (mode);
16075 emit_insn ((mode == V4SImode
16077 : gen_andv2di3) (t2, cop0, mask));
16079 /* XOR it back into the result of the subtraction. This results
16080 in the sign bit set iff we saw unsigned underflow. */
16081 x = gen_reg_rtx (mode);
16082 emit_insn ((mode == V4SImode
16084 : gen_xorv2di3) (x, t1, t2));
16092 /* Perform a parallel unsigned saturating subtraction. */
16093 x = gen_reg_rtx (mode);
16094 emit_insn (gen_rtx_SET (VOIDmode, x,
16095 gen_rtx_US_MINUS (mode, cop0, cop1)));
16102 gcc_unreachable ();
16106 cop1 = CONST0_RTX (mode);
16110 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16111 operands[1+negate], operands[2-negate]);
16113 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16114 operands[2-negate]);
16118 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16119 true if we should do zero extension, else sign extension. HIGH_P is
16120 true if we want the N/2 high elements, else the low elements. */
16123 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16125 enum machine_mode imode = GET_MODE (operands[1]);
16126 rtx (*unpack)(rtx, rtx, rtx);
16133 unpack = gen_vec_interleave_highv16qi;
16135 unpack = gen_vec_interleave_lowv16qi;
16139 unpack = gen_vec_interleave_highv8hi;
16141 unpack = gen_vec_interleave_lowv8hi;
16145 unpack = gen_vec_interleave_highv4si;
16147 unpack = gen_vec_interleave_lowv4si;
16150 gcc_unreachable ();
16153 dest = gen_lowpart (imode, operands[0]);
16156 se = force_reg (imode, CONST0_RTX (imode));
16158 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16159 operands[1], pc_rtx, pc_rtx);
16161 emit_insn (unpack (dest, operands[1], se));
16164 /* This function performs the same task as ix86_expand_sse_unpack,
16165 but with SSE4.1 instructions. */
16168 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16170 enum machine_mode imode = GET_MODE (operands[1]);
16171 rtx (*unpack)(rtx, rtx);
16178 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16180 unpack = gen_sse4_1_extendv8qiv8hi2;
16184 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16186 unpack = gen_sse4_1_extendv4hiv4si2;
16190 unpack = gen_sse4_1_zero_extendv2siv2di2;
16192 unpack = gen_sse4_1_extendv2siv2di2;
16195 gcc_unreachable ();
16198 dest = operands[0];
16201 /* Shift higher 8 bytes to lower 8 bytes. */
16202 src = gen_reg_rtx (imode);
16203 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16204 gen_lowpart (TImode, operands[1]),
16210 emit_insn (unpack (dest, src));
16213 /* This function performs the same task as ix86_expand_sse_unpack,
16214 but with sse5 instructions. */
16217 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16219 enum machine_mode imode = GET_MODE (operands[1]);
16220 int pperm_bytes[16];
16222 int h = (high_p) ? 8 : 0;
16225 rtvec v = rtvec_alloc (16);
16228 rtx op0 = operands[0], op1 = operands[1];
16233 vs = rtvec_alloc (8);
16234 h2 = (high_p) ? 8 : 0;
16235 for (i = 0; i < 8; i++)
16237 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16238 pperm_bytes[2*i+1] = ((unsigned_p)
16240 : PPERM_SIGN | PPERM_SRC2 | i | h);
16243 for (i = 0; i < 16; i++)
16244 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16246 for (i = 0; i < 8; i++)
16247 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16249 p = gen_rtx_PARALLEL (VOIDmode, vs);
16250 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16252 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16254 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16258 vs = rtvec_alloc (4);
16259 h2 = (high_p) ? 4 : 0;
16260 for (i = 0; i < 4; i++)
16262 sign_extend = ((unsigned_p)
16264 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16265 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16266 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16267 pperm_bytes[4*i+2] = sign_extend;
16268 pperm_bytes[4*i+3] = sign_extend;
16271 for (i = 0; i < 16; i++)
16272 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16274 for (i = 0; i < 4; i++)
16275 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16277 p = gen_rtx_PARALLEL (VOIDmode, vs);
16278 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16280 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16282 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16286 vs = rtvec_alloc (2);
16287 h2 = (high_p) ? 2 : 0;
16288 for (i = 0; i < 2; i++)
16290 sign_extend = ((unsigned_p)
16292 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16293 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16294 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16295 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16296 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16297 pperm_bytes[8*i+4] = sign_extend;
16298 pperm_bytes[8*i+5] = sign_extend;
16299 pperm_bytes[8*i+6] = sign_extend;
16300 pperm_bytes[8*i+7] = sign_extend;
16303 for (i = 0; i < 16; i++)
16304 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16306 for (i = 0; i < 2; i++)
16307 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16309 p = gen_rtx_PARALLEL (VOIDmode, vs);
16310 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16312 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16314 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16318 gcc_unreachable ();
16324 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16325 next narrower integer vector type */
16327 ix86_expand_sse5_pack (rtx operands[3])
16329 enum machine_mode imode = GET_MODE (operands[0]);
16330 int pperm_bytes[16];
16332 rtvec v = rtvec_alloc (16);
16334 rtx op0 = operands[0];
16335 rtx op1 = operands[1];
16336 rtx op2 = operands[2];
16341 for (i = 0; i < 8; i++)
16343 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16344 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16347 for (i = 0; i < 16; i++)
16348 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16350 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16351 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16355 for (i = 0; i < 4; i++)
16357 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16358 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16359 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16360 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16363 for (i = 0; i < 16; i++)
16364 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16366 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16367 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16371 for (i = 0; i < 2; i++)
16373 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16374 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16375 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16376 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16377 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16378 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16379 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16380 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16383 for (i = 0; i < 16; i++)
16384 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16386 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16387 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16391 gcc_unreachable ();
16397 /* Expand conditional increment or decrement using adb/sbb instructions.
16398 The default case using setcc followed by the conditional move can be
16399 done by generic code. */
16401 ix86_expand_int_addcc (rtx operands[])
16403 enum rtx_code code = GET_CODE (operands[1]);
16405 rtx val = const0_rtx;
16406 bool fpcmp = false;
16407 enum machine_mode mode = GET_MODE (operands[0]);
16409 ix86_compare_op0 = XEXP (operands[1], 0);
16410 ix86_compare_op1 = XEXP (operands[1], 1);
16411 if (operands[3] != const1_rtx
16412 && operands[3] != constm1_rtx)
16414 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16415 ix86_compare_op1, &compare_op))
16417 code = GET_CODE (compare_op);
16419 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16420 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16423 code = ix86_fp_compare_code_to_integer (code);
16430 PUT_CODE (compare_op,
16431 reverse_condition_maybe_unordered
16432 (GET_CODE (compare_op)));
16434 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16436 PUT_MODE (compare_op, mode);
16438 /* Construct either adc or sbb insn. */
16439 if ((code == LTU) == (operands[3] == constm1_rtx))
16441 switch (GET_MODE (operands[0]))
16444 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16447 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16450 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16453 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16456 gcc_unreachable ();
16461 switch (GET_MODE (operands[0]))
16464 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16467 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16470 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16473 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16476 gcc_unreachable ();
16479 return 1; /* DONE */
16483 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16484 works for floating pointer parameters and nonoffsetable memories.
16485 For pushes, it returns just stack offsets; the values will be saved
16486 in the right order. Maximally three parts are generated. */
16489 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16494 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16496 size = (GET_MODE_SIZE (mode) + 4) / 8;
16498 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16499 gcc_assert (size >= 2 && size <= 4);
16501 /* Optimize constant pool reference to immediates. This is used by fp
16502 moves, that force all constants to memory to allow combining. */
16503 if (MEM_P (operand) && MEM_READONLY_P (operand))
16505 rtx tmp = maybe_get_pool_constant (operand);
16510 if (MEM_P (operand) && !offsettable_memref_p (operand))
16512 /* The only non-offsetable memories we handle are pushes. */
16513 int ok = push_operand (operand, VOIDmode);
16517 operand = copy_rtx (operand);
16518 PUT_MODE (operand, Pmode);
16519 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16523 if (GET_CODE (operand) == CONST_VECTOR)
16525 enum machine_mode imode = int_mode_for_mode (mode);
16526 /* Caution: if we looked through a constant pool memory above,
16527 the operand may actually have a different mode now. That's
16528 ok, since we want to pun this all the way back to an integer. */
16529 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16530 gcc_assert (operand != NULL);
16536 if (mode == DImode)
16537 split_di (&operand, 1, &parts[0], &parts[1]);
16542 if (REG_P (operand))
16544 gcc_assert (reload_completed);
16545 for (i = 0; i < size; i++)
16546 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16548 else if (offsettable_memref_p (operand))
16550 operand = adjust_address (operand, SImode, 0);
16551 parts[0] = operand;
16552 for (i = 1; i < size; i++)
16553 parts[i] = adjust_address (operand, SImode, 4 * i);
16555 else if (GET_CODE (operand) == CONST_DOUBLE)
16560 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16564 real_to_target (l, &r, mode);
16565 parts[3] = gen_int_mode (l[3], SImode);
16566 parts[2] = gen_int_mode (l[2], SImode);
16569 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16570 parts[2] = gen_int_mode (l[2], SImode);
16573 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16576 gcc_unreachable ();
16578 parts[1] = gen_int_mode (l[1], SImode);
16579 parts[0] = gen_int_mode (l[0], SImode);
16582 gcc_unreachable ();
16587 if (mode == TImode)
16588 split_ti (&operand, 1, &parts[0], &parts[1]);
16589 if (mode == XFmode || mode == TFmode)
16591 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16592 if (REG_P (operand))
16594 gcc_assert (reload_completed);
16595 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16596 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16598 else if (offsettable_memref_p (operand))
16600 operand = adjust_address (operand, DImode, 0);
16601 parts[0] = operand;
16602 parts[1] = adjust_address (operand, upper_mode, 8);
16604 else if (GET_CODE (operand) == CONST_DOUBLE)
16609 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16610 real_to_target (l, &r, mode);
16612 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16613 if (HOST_BITS_PER_WIDE_INT >= 64)
16616 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16617 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16620 parts[0] = immed_double_const (l[0], l[1], DImode);
16622 if (upper_mode == SImode)
16623 parts[1] = gen_int_mode (l[2], SImode);
16624 else if (HOST_BITS_PER_WIDE_INT >= 64)
16627 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16628 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16631 parts[1] = immed_double_const (l[2], l[3], DImode);
16634 gcc_unreachable ();
16641 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16642 Return false when normal moves are needed; true when all required
16643 insns have been emitted. Operands 2-4 contain the input values
16644 int the correct order; operands 5-7 contain the output values. */
16647 ix86_split_long_move (rtx operands[])
16652 int collisions = 0;
16653 enum machine_mode mode = GET_MODE (operands[0]);
16654 bool collisionparts[4];
16656 /* The DFmode expanders may ask us to move double.
16657 For 64bit target this is single move. By hiding the fact
16658 here we simplify i386.md splitters. */
16659 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16661 /* Optimize constant pool reference to immediates. This is used by
16662 fp moves, that force all constants to memory to allow combining. */
16664 if (MEM_P (operands[1])
16665 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16666 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16667 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16668 if (push_operand (operands[0], VOIDmode))
16670 operands[0] = copy_rtx (operands[0]);
16671 PUT_MODE (operands[0], Pmode);
16674 operands[0] = gen_lowpart (DImode, operands[0]);
16675 operands[1] = gen_lowpart (DImode, operands[1]);
16676 emit_move_insn (operands[0], operands[1]);
16680 /* The only non-offsettable memory we handle is push. */
16681 if (push_operand (operands[0], VOIDmode))
16684 gcc_assert (!MEM_P (operands[0])
16685 || offsettable_memref_p (operands[0]));
16687 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16688 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16690 /* When emitting push, take care for source operands on the stack. */
16691 if (push && MEM_P (operands[1])
16692 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16693 for (i = 0; i < nparts - 1; i++)
16694 part[1][i] = change_address (part[1][i],
16695 GET_MODE (part[1][i]),
16696 XEXP (part[1][i + 1], 0));
16698 /* We need to do copy in the right order in case an address register
16699 of the source overlaps the destination. */
16700 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16704 for (i = 0; i < nparts; i++)
16707 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16708 if (collisionparts[i])
16712 /* Collision in the middle part can be handled by reordering. */
16713 if (collisions == 1 && nparts == 3 && collisionparts [1])
16715 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16716 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16718 else if (collisions == 1
16720 && (collisionparts [1] || collisionparts [2]))
16722 if (collisionparts [1])
16724 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16725 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16729 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16730 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16734 /* If there are more collisions, we can't handle it by reordering.
16735 Do an lea to the last part and use only one colliding move. */
16736 else if (collisions > 1)
16742 base = part[0][nparts - 1];
16744 /* Handle the case when the last part isn't valid for lea.
16745 Happens in 64-bit mode storing the 12-byte XFmode. */
16746 if (GET_MODE (base) != Pmode)
16747 base = gen_rtx_REG (Pmode, REGNO (base));
16749 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16750 part[1][0] = replace_equiv_address (part[1][0], base);
16751 for (i = 1; i < nparts; i++)
16753 tmp = plus_constant (base, UNITS_PER_WORD * i);
16754 part[1][i] = replace_equiv_address (part[1][i], tmp);
16765 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16766 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16767 emit_move_insn (part[0][2], part[1][2]);
16769 else if (nparts == 4)
16771 emit_move_insn (part[0][3], part[1][3]);
16772 emit_move_insn (part[0][2], part[1][2]);
16777 /* In 64bit mode we don't have 32bit push available. In case this is
16778 register, it is OK - we will just use larger counterpart. We also
16779 retype memory - these comes from attempt to avoid REX prefix on
16780 moving of second half of TFmode value. */
16781 if (GET_MODE (part[1][1]) == SImode)
16783 switch (GET_CODE (part[1][1]))
16786 part[1][1] = adjust_address (part[1][1], DImode, 0);
16790 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16794 gcc_unreachable ();
16797 if (GET_MODE (part[1][0]) == SImode)
16798 part[1][0] = part[1][1];
16801 emit_move_insn (part[0][1], part[1][1]);
16802 emit_move_insn (part[0][0], part[1][0]);
16806 /* Choose correct order to not overwrite the source before it is copied. */
16807 if ((REG_P (part[0][0])
16808 && REG_P (part[1][1])
16809 && (REGNO (part[0][0]) == REGNO (part[1][1])
16811 && REGNO (part[0][0]) == REGNO (part[1][2]))
16813 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16815 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16817 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16819 operands[2 + i] = part[0][j];
16820 operands[6 + i] = part[1][j];
16825 for (i = 0; i < nparts; i++)
16827 operands[2 + i] = part[0][i];
16828 operands[6 + i] = part[1][i];
16832 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16833 if (optimize_insn_for_size_p ())
16835 for (j = 0; j < nparts - 1; j++)
16836 if (CONST_INT_P (operands[6 + j])
16837 && operands[6 + j] != const0_rtx
16838 && REG_P (operands[2 + j]))
16839 for (i = j; i < nparts - 1; i++)
16840 if (CONST_INT_P (operands[7 + i])
16841 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16842 operands[7 + i] = operands[2 + j];
16845 for (i = 0; i < nparts; i++)
16846 emit_move_insn (operands[2 + i], operands[6 + i]);
16851 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16852 left shift by a constant, either using a single shift or
16853 a sequence of add instructions. */
16856 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16860 emit_insn ((mode == DImode
16862 : gen_adddi3) (operand, operand, operand));
16864 else if (!optimize_insn_for_size_p ()
16865 && count * ix86_cost->add <= ix86_cost->shift_const)
16868 for (i=0; i<count; i++)
16870 emit_insn ((mode == DImode
16872 : gen_adddi3) (operand, operand, operand));
16876 emit_insn ((mode == DImode
16878 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16882 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16884 rtx low[2], high[2];
16886 const int single_width = mode == DImode ? 32 : 64;
16888 if (CONST_INT_P (operands[2]))
16890 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16891 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16893 if (count >= single_width)
16895 emit_move_insn (high[0], low[1]);
16896 emit_move_insn (low[0], const0_rtx);
16898 if (count > single_width)
16899 ix86_expand_ashl_const (high[0], count - single_width, mode);
16903 if (!rtx_equal_p (operands[0], operands[1]))
16904 emit_move_insn (operands[0], operands[1]);
16905 emit_insn ((mode == DImode
16907 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16908 ix86_expand_ashl_const (low[0], count, mode);
16913 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16915 if (operands[1] == const1_rtx)
16917 /* Assuming we've chosen a QImode capable registers, then 1 << N
16918 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16919 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16921 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16923 ix86_expand_clear (low[0]);
16924 ix86_expand_clear (high[0]);
16925 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16927 d = gen_lowpart (QImode, low[0]);
16928 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16929 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16930 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16932 d = gen_lowpart (QImode, high[0]);
16933 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16934 s = gen_rtx_NE (QImode, flags, const0_rtx);
16935 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16938 /* Otherwise, we can get the same results by manually performing
16939 a bit extract operation on bit 5/6, and then performing the two
16940 shifts. The two methods of getting 0/1 into low/high are exactly
16941 the same size. Avoiding the shift in the bit extract case helps
16942 pentium4 a bit; no one else seems to care much either way. */
16947 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16948 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16950 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16951 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16953 emit_insn ((mode == DImode
16955 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16956 emit_insn ((mode == DImode
16958 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16959 emit_move_insn (low[0], high[0]);
16960 emit_insn ((mode == DImode
16962 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16965 emit_insn ((mode == DImode
16967 : gen_ashldi3) (low[0], low[0], operands[2]));
16968 emit_insn ((mode == DImode
16970 : gen_ashldi3) (high[0], high[0], operands[2]));
16974 if (operands[1] == constm1_rtx)
16976 /* For -1 << N, we can avoid the shld instruction, because we
16977 know that we're shifting 0...31/63 ones into a -1. */
16978 emit_move_insn (low[0], constm1_rtx);
16979 if (optimize_insn_for_size_p ())
16980 emit_move_insn (high[0], low[0]);
16982 emit_move_insn (high[0], constm1_rtx);
16986 if (!rtx_equal_p (operands[0], operands[1]))
16987 emit_move_insn (operands[0], operands[1]);
16989 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16990 emit_insn ((mode == DImode
16992 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16995 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16997 if (TARGET_CMOVE && scratch)
16999 ix86_expand_clear (scratch);
17000 emit_insn ((mode == DImode
17001 ? gen_x86_shift_adj_1
17002 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17006 emit_insn ((mode == DImode
17007 ? gen_x86_shift_adj_2
17008 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17012 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17014 rtx low[2], high[2];
17016 const int single_width = mode == DImode ? 32 : 64;
17018 if (CONST_INT_P (operands[2]))
17020 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17021 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17023 if (count == single_width * 2 - 1)
17025 emit_move_insn (high[0], high[1]);
17026 emit_insn ((mode == DImode
17028 : gen_ashrdi3) (high[0], high[0],
17029 GEN_INT (single_width - 1)));
17030 emit_move_insn (low[0], high[0]);
17033 else if (count >= single_width)
17035 emit_move_insn (low[0], high[1]);
17036 emit_move_insn (high[0], low[0]);
17037 emit_insn ((mode == DImode
17039 : gen_ashrdi3) (high[0], high[0],
17040 GEN_INT (single_width - 1)));
17041 if (count > single_width)
17042 emit_insn ((mode == DImode
17044 : gen_ashrdi3) (low[0], low[0],
17045 GEN_INT (count - single_width)));
17049 if (!rtx_equal_p (operands[0], operands[1]))
17050 emit_move_insn (operands[0], operands[1]);
17051 emit_insn ((mode == DImode
17053 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17054 emit_insn ((mode == DImode
17056 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17061 if (!rtx_equal_p (operands[0], operands[1]))
17062 emit_move_insn (operands[0], operands[1]);
17064 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17066 emit_insn ((mode == DImode
17068 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17069 emit_insn ((mode == DImode
17071 : gen_ashrdi3) (high[0], high[0], operands[2]));
17073 if (TARGET_CMOVE && scratch)
17075 emit_move_insn (scratch, high[0]);
17076 emit_insn ((mode == DImode
17078 : gen_ashrdi3) (scratch, scratch,
17079 GEN_INT (single_width - 1)));
17080 emit_insn ((mode == DImode
17081 ? gen_x86_shift_adj_1
17082 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17086 emit_insn ((mode == DImode
17087 ? gen_x86_shift_adj_3
17088 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17093 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17095 rtx low[2], high[2];
17097 const int single_width = mode == DImode ? 32 : 64;
17099 if (CONST_INT_P (operands[2]))
17101 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17102 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17104 if (count >= single_width)
17106 emit_move_insn (low[0], high[1]);
17107 ix86_expand_clear (high[0]);
17109 if (count > single_width)
17110 emit_insn ((mode == DImode
17112 : gen_lshrdi3) (low[0], low[0],
17113 GEN_INT (count - single_width)));
17117 if (!rtx_equal_p (operands[0], operands[1]))
17118 emit_move_insn (operands[0], operands[1]);
17119 emit_insn ((mode == DImode
17121 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17122 emit_insn ((mode == DImode
17124 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17129 if (!rtx_equal_p (operands[0], operands[1]))
17130 emit_move_insn (operands[0], operands[1]);
17132 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17134 emit_insn ((mode == DImode
17136 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17137 emit_insn ((mode == DImode
17139 : gen_lshrdi3) (high[0], high[0], operands[2]));
17141 /* Heh. By reversing the arguments, we can reuse this pattern. */
17142 if (TARGET_CMOVE && scratch)
17144 ix86_expand_clear (scratch);
17145 emit_insn ((mode == DImode
17146 ? gen_x86_shift_adj_1
17147 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17151 emit_insn ((mode == DImode
17152 ? gen_x86_shift_adj_2
17153 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17157 /* Predict just emitted jump instruction to be taken with probability PROB. */
17159 predict_jump (int prob)
17161 rtx insn = get_last_insn ();
17162 gcc_assert (JUMP_P (insn));
17163 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17166 /* Helper function for the string operations below. Dest VARIABLE whether
17167 it is aligned to VALUE bytes. If true, jump to the label. */
17169 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17171 rtx label = gen_label_rtx ();
17172 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17173 if (GET_MODE (variable) == DImode)
17174 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17176 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17177 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17180 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17182 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17186 /* Adjust COUNTER by the VALUE. */
17188 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17190 if (GET_MODE (countreg) == DImode)
17191 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17193 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17196 /* Zero extend possibly SImode EXP to Pmode register. */
17198 ix86_zero_extend_to_Pmode (rtx exp)
17201 if (GET_MODE (exp) == VOIDmode)
17202 return force_reg (Pmode, exp);
17203 if (GET_MODE (exp) == Pmode)
17204 return copy_to_mode_reg (Pmode, exp);
17205 r = gen_reg_rtx (Pmode);
17206 emit_insn (gen_zero_extendsidi2 (r, exp));
17210 /* Divide COUNTREG by SCALE. */
17212 scale_counter (rtx countreg, int scale)
17215 rtx piece_size_mask;
17219 if (CONST_INT_P (countreg))
17220 return GEN_INT (INTVAL (countreg) / scale);
17221 gcc_assert (REG_P (countreg));
17223 piece_size_mask = GEN_INT (scale - 1);
17224 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17225 GEN_INT (exact_log2 (scale)),
17226 NULL, 1, OPTAB_DIRECT);
17230 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17231 DImode for constant loop counts. */
17233 static enum machine_mode
17234 counter_mode (rtx count_exp)
17236 if (GET_MODE (count_exp) != VOIDmode)
17237 return GET_MODE (count_exp);
17238 if (!CONST_INT_P (count_exp))
17240 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17245 /* When SRCPTR is non-NULL, output simple loop to move memory
17246 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17247 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17248 equivalent loop to set memory by VALUE (supposed to be in MODE).
17250 The size is rounded down to whole number of chunk size moved at once.
17251 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17255 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17256 rtx destptr, rtx srcptr, rtx value,
17257 rtx count, enum machine_mode mode, int unroll,
17260 rtx out_label, top_label, iter, tmp;
17261 enum machine_mode iter_mode = counter_mode (count);
17262 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17263 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17269 top_label = gen_label_rtx ();
17270 out_label = gen_label_rtx ();
17271 iter = gen_reg_rtx (iter_mode);
17273 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17274 NULL, 1, OPTAB_DIRECT);
17275 /* Those two should combine. */
17276 if (piece_size == const1_rtx)
17278 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17280 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17282 emit_move_insn (iter, const0_rtx);
17284 emit_label (top_label);
17286 tmp = convert_modes (Pmode, iter_mode, iter, true);
17287 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17288 destmem = change_address (destmem, mode, x_addr);
17292 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17293 srcmem = change_address (srcmem, mode, y_addr);
17295 /* When unrolling for chips that reorder memory reads and writes,
17296 we can save registers by using single temporary.
17297 Also using 4 temporaries is overkill in 32bit mode. */
17298 if (!TARGET_64BIT && 0)
17300 for (i = 0; i < unroll; i++)
17305 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17307 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17309 emit_move_insn (destmem, srcmem);
17315 gcc_assert (unroll <= 4);
17316 for (i = 0; i < unroll; i++)
17318 tmpreg[i] = gen_reg_rtx (mode);
17322 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17324 emit_move_insn (tmpreg[i], srcmem);
17326 for (i = 0; i < unroll; i++)
17331 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17333 emit_move_insn (destmem, tmpreg[i]);
17338 for (i = 0; i < unroll; i++)
17342 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17343 emit_move_insn (destmem, value);
17346 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17347 true, OPTAB_LIB_WIDEN);
17349 emit_move_insn (iter, tmp);
17351 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17353 if (expected_size != -1)
17355 expected_size /= GET_MODE_SIZE (mode) * unroll;
17356 if (expected_size == 0)
17358 else if (expected_size > REG_BR_PROB_BASE)
17359 predict_jump (REG_BR_PROB_BASE - 1);
17361 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17364 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17365 iter = ix86_zero_extend_to_Pmode (iter);
17366 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17367 true, OPTAB_LIB_WIDEN);
17368 if (tmp != destptr)
17369 emit_move_insn (destptr, tmp);
17372 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17373 true, OPTAB_LIB_WIDEN);
17375 emit_move_insn (srcptr, tmp);
17377 emit_label (out_label);
17380 /* Output "rep; mov" instruction.
17381 Arguments have same meaning as for previous function */
17383 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17384 rtx destptr, rtx srcptr,
17386 enum machine_mode mode)
17392 /* If the size is known, it is shorter to use rep movs. */
17393 if (mode == QImode && CONST_INT_P (count)
17394 && !(INTVAL (count) & 3))
17397 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17398 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17399 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17400 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17401 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17402 if (mode != QImode)
17404 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17405 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17406 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17407 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17408 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17409 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17413 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17414 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17416 if (CONST_INT_P (count))
17418 count = GEN_INT (INTVAL (count)
17419 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17420 destmem = shallow_copy_rtx (destmem);
17421 srcmem = shallow_copy_rtx (srcmem);
17422 set_mem_size (destmem, count);
17423 set_mem_size (srcmem, count);
17427 if (MEM_SIZE (destmem))
17428 set_mem_size (destmem, NULL_RTX);
17429 if (MEM_SIZE (srcmem))
17430 set_mem_size (srcmem, NULL_RTX);
17432 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17436 /* Output "rep; stos" instruction.
17437 Arguments have same meaning as for previous function */
17439 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17440 rtx count, enum machine_mode mode,
17446 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17447 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17448 value = force_reg (mode, gen_lowpart (mode, value));
17449 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17450 if (mode != QImode)
17452 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17453 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17454 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17457 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17458 if (orig_value == const0_rtx && CONST_INT_P (count))
17460 count = GEN_INT (INTVAL (count)
17461 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17462 destmem = shallow_copy_rtx (destmem);
17463 set_mem_size (destmem, count);
17465 else if (MEM_SIZE (destmem))
17466 set_mem_size (destmem, NULL_RTX);
17467 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17471 emit_strmov (rtx destmem, rtx srcmem,
17472 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17474 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17475 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17476 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17479 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17481 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17482 rtx destptr, rtx srcptr, rtx count, int max_size)
17485 if (CONST_INT_P (count))
17487 HOST_WIDE_INT countval = INTVAL (count);
17490 if ((countval & 0x10) && max_size > 16)
17494 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17495 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17498 gcc_unreachable ();
17501 if ((countval & 0x08) && max_size > 8)
17504 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17507 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17508 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17512 if ((countval & 0x04) && max_size > 4)
17514 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17517 if ((countval & 0x02) && max_size > 2)
17519 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17522 if ((countval & 0x01) && max_size > 1)
17524 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17531 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17532 count, 1, OPTAB_DIRECT);
17533 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17534 count, QImode, 1, 4);
17538 /* When there are stringops, we can cheaply increase dest and src pointers.
17539 Otherwise we save code size by maintaining offset (zero is readily
17540 available from preceding rep operation) and using x86 addressing modes.
17542 if (TARGET_SINGLE_STRINGOP)
17546 rtx label = ix86_expand_aligntest (count, 4, true);
17547 src = change_address (srcmem, SImode, srcptr);
17548 dest = change_address (destmem, SImode, destptr);
17549 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17550 emit_label (label);
17551 LABEL_NUSES (label) = 1;
17555 rtx label = ix86_expand_aligntest (count, 2, true);
17556 src = change_address (srcmem, HImode, srcptr);
17557 dest = change_address (destmem, HImode, destptr);
17558 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17559 emit_label (label);
17560 LABEL_NUSES (label) = 1;
17564 rtx label = ix86_expand_aligntest (count, 1, true);
17565 src = change_address (srcmem, QImode, srcptr);
17566 dest = change_address (destmem, QImode, destptr);
17567 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17568 emit_label (label);
17569 LABEL_NUSES (label) = 1;
17574 rtx offset = force_reg (Pmode, const0_rtx);
17579 rtx label = ix86_expand_aligntest (count, 4, true);
17580 src = change_address (srcmem, SImode, srcptr);
17581 dest = change_address (destmem, SImode, destptr);
17582 emit_move_insn (dest, src);
17583 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17584 true, OPTAB_LIB_WIDEN);
17586 emit_move_insn (offset, tmp);
17587 emit_label (label);
17588 LABEL_NUSES (label) = 1;
17592 rtx label = ix86_expand_aligntest (count, 2, true);
17593 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17594 src = change_address (srcmem, HImode, tmp);
17595 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17596 dest = change_address (destmem, HImode, tmp);
17597 emit_move_insn (dest, src);
17598 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17599 true, OPTAB_LIB_WIDEN);
17601 emit_move_insn (offset, tmp);
17602 emit_label (label);
17603 LABEL_NUSES (label) = 1;
17607 rtx label = ix86_expand_aligntest (count, 1, true);
17608 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17609 src = change_address (srcmem, QImode, tmp);
17610 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17611 dest = change_address (destmem, QImode, tmp);
17612 emit_move_insn (dest, src);
17613 emit_label (label);
17614 LABEL_NUSES (label) = 1;
17619 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17621 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17622 rtx count, int max_size)
17625 expand_simple_binop (counter_mode (count), AND, count,
17626 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17627 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17628 gen_lowpart (QImode, value), count, QImode,
17632 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17634 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17638 if (CONST_INT_P (count))
17640 HOST_WIDE_INT countval = INTVAL (count);
17643 if ((countval & 0x10) && max_size > 16)
17647 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17648 emit_insn (gen_strset (destptr, dest, value));
17649 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17650 emit_insn (gen_strset (destptr, dest, value));
17653 gcc_unreachable ();
17656 if ((countval & 0x08) && max_size > 8)
17660 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17661 emit_insn (gen_strset (destptr, dest, value));
17665 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17666 emit_insn (gen_strset (destptr, dest, value));
17667 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17668 emit_insn (gen_strset (destptr, dest, value));
17672 if ((countval & 0x04) && max_size > 4)
17674 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17675 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17678 if ((countval & 0x02) && max_size > 2)
17680 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17681 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17684 if ((countval & 0x01) && max_size > 1)
17686 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17687 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17694 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17699 rtx label = ix86_expand_aligntest (count, 16, true);
17702 dest = change_address (destmem, DImode, destptr);
17703 emit_insn (gen_strset (destptr, dest, value));
17704 emit_insn (gen_strset (destptr, dest, value));
17708 dest = change_address (destmem, SImode, destptr);
17709 emit_insn (gen_strset (destptr, dest, value));
17710 emit_insn (gen_strset (destptr, dest, value));
17711 emit_insn (gen_strset (destptr, dest, value));
17712 emit_insn (gen_strset (destptr, dest, value));
17714 emit_label (label);
17715 LABEL_NUSES (label) = 1;
17719 rtx label = ix86_expand_aligntest (count, 8, true);
17722 dest = change_address (destmem, DImode, destptr);
17723 emit_insn (gen_strset (destptr, dest, value));
17727 dest = change_address (destmem, SImode, destptr);
17728 emit_insn (gen_strset (destptr, dest, value));
17729 emit_insn (gen_strset (destptr, dest, value));
17731 emit_label (label);
17732 LABEL_NUSES (label) = 1;
17736 rtx label = ix86_expand_aligntest (count, 4, true);
17737 dest = change_address (destmem, SImode, destptr);
17738 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17739 emit_label (label);
17740 LABEL_NUSES (label) = 1;
17744 rtx label = ix86_expand_aligntest (count, 2, true);
17745 dest = change_address (destmem, HImode, destptr);
17746 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17747 emit_label (label);
17748 LABEL_NUSES (label) = 1;
17752 rtx label = ix86_expand_aligntest (count, 1, true);
17753 dest = change_address (destmem, QImode, destptr);
17754 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17755 emit_label (label);
17756 LABEL_NUSES (label) = 1;
17760 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17761 DESIRED_ALIGNMENT. */
17763 expand_movmem_prologue (rtx destmem, rtx srcmem,
17764 rtx destptr, rtx srcptr, rtx count,
17765 int align, int desired_alignment)
17767 if (align <= 1 && desired_alignment > 1)
17769 rtx label = ix86_expand_aligntest (destptr, 1, false);
17770 srcmem = change_address (srcmem, QImode, srcptr);
17771 destmem = change_address (destmem, QImode, destptr);
17772 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17773 ix86_adjust_counter (count, 1);
17774 emit_label (label);
17775 LABEL_NUSES (label) = 1;
17777 if (align <= 2 && desired_alignment > 2)
17779 rtx label = ix86_expand_aligntest (destptr, 2, false);
17780 srcmem = change_address (srcmem, HImode, srcptr);
17781 destmem = change_address (destmem, HImode, destptr);
17782 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17783 ix86_adjust_counter (count, 2);
17784 emit_label (label);
17785 LABEL_NUSES (label) = 1;
17787 if (align <= 4 && desired_alignment > 4)
17789 rtx label = ix86_expand_aligntest (destptr, 4, false);
17790 srcmem = change_address (srcmem, SImode, srcptr);
17791 destmem = change_address (destmem, SImode, destptr);
17792 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17793 ix86_adjust_counter (count, 4);
17794 emit_label (label);
17795 LABEL_NUSES (label) = 1;
17797 gcc_assert (desired_alignment <= 8);
17800 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17801 ALIGN_BYTES is how many bytes need to be copied. */
17803 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17804 int desired_align, int align_bytes)
17807 rtx src_size, dst_size;
17809 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17810 if (src_align_bytes >= 0)
17811 src_align_bytes = desired_align - src_align_bytes;
17812 src_size = MEM_SIZE (src);
17813 dst_size = MEM_SIZE (dst);
17814 if (align_bytes & 1)
17816 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17817 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17819 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17821 if (align_bytes & 2)
17823 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17824 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17825 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17826 set_mem_align (dst, 2 * BITS_PER_UNIT);
17827 if (src_align_bytes >= 0
17828 && (src_align_bytes & 1) == (align_bytes & 1)
17829 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17830 set_mem_align (src, 2 * BITS_PER_UNIT);
17832 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17834 if (align_bytes & 4)
17836 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17837 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17838 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17839 set_mem_align (dst, 4 * BITS_PER_UNIT);
17840 if (src_align_bytes >= 0)
17842 unsigned int src_align = 0;
17843 if ((src_align_bytes & 3) == (align_bytes & 3))
17845 else if ((src_align_bytes & 1) == (align_bytes & 1))
17847 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17848 set_mem_align (src, src_align * BITS_PER_UNIT);
17851 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17853 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17854 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17855 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17856 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17857 if (src_align_bytes >= 0)
17859 unsigned int src_align = 0;
17860 if ((src_align_bytes & 7) == (align_bytes & 7))
17862 else if ((src_align_bytes & 3) == (align_bytes & 3))
17864 else if ((src_align_bytes & 1) == (align_bytes & 1))
17866 if (src_align > (unsigned int) desired_align)
17867 src_align = desired_align;
17868 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17869 set_mem_align (src, src_align * BITS_PER_UNIT);
17872 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17874 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17879 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17880 DESIRED_ALIGNMENT. */
17882 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17883 int align, int desired_alignment)
17885 if (align <= 1 && desired_alignment > 1)
17887 rtx label = ix86_expand_aligntest (destptr, 1, false);
17888 destmem = change_address (destmem, QImode, destptr);
17889 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17890 ix86_adjust_counter (count, 1);
17891 emit_label (label);
17892 LABEL_NUSES (label) = 1;
17894 if (align <= 2 && desired_alignment > 2)
17896 rtx label = ix86_expand_aligntest (destptr, 2, false);
17897 destmem = change_address (destmem, HImode, destptr);
17898 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17899 ix86_adjust_counter (count, 2);
17900 emit_label (label);
17901 LABEL_NUSES (label) = 1;
17903 if (align <= 4 && desired_alignment > 4)
17905 rtx label = ix86_expand_aligntest (destptr, 4, false);
17906 destmem = change_address (destmem, SImode, destptr);
17907 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17908 ix86_adjust_counter (count, 4);
17909 emit_label (label);
17910 LABEL_NUSES (label) = 1;
17912 gcc_assert (desired_alignment <= 8);
17915 /* Set enough from DST to align DST known to by aligned by ALIGN to
17916 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17918 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17919 int desired_align, int align_bytes)
17922 rtx dst_size = MEM_SIZE (dst);
17923 if (align_bytes & 1)
17925 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17927 emit_insn (gen_strset (destreg, dst,
17928 gen_lowpart (QImode, value)));
17930 if (align_bytes & 2)
17932 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17933 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17934 set_mem_align (dst, 2 * BITS_PER_UNIT);
17936 emit_insn (gen_strset (destreg, dst,
17937 gen_lowpart (HImode, value)));
17939 if (align_bytes & 4)
17941 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17942 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17943 set_mem_align (dst, 4 * BITS_PER_UNIT);
17945 emit_insn (gen_strset (destreg, dst,
17946 gen_lowpart (SImode, value)));
17948 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17949 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17950 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17952 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17956 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17957 static enum stringop_alg
17958 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17959 int *dynamic_check)
17961 const struct stringop_algs * algs;
17962 bool optimize_for_speed;
17963 /* Algorithms using the rep prefix want at least edi and ecx;
17964 additionally, memset wants eax and memcpy wants esi. Don't
17965 consider such algorithms if the user has appropriated those
17966 registers for their own purposes. */
17967 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17969 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17971 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17972 || (alg != rep_prefix_1_byte \
17973 && alg != rep_prefix_4_byte \
17974 && alg != rep_prefix_8_byte))
17975 const struct processor_costs *cost;
17977 /* Even if the string operation call is cold, we still might spend a lot
17978 of time processing large blocks. */
17979 if (optimize_function_for_size_p (cfun)
17980 || (optimize_insn_for_size_p ()
17981 && expected_size != -1 && expected_size < 256))
17982 optimize_for_speed = false;
17984 optimize_for_speed = true;
17986 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17988 *dynamic_check = -1;
17990 algs = &cost->memset[TARGET_64BIT != 0];
17992 algs = &cost->memcpy[TARGET_64BIT != 0];
17993 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17994 return stringop_alg;
17995 /* rep; movq or rep; movl is the smallest variant. */
17996 else if (!optimize_for_speed)
17998 if (!count || (count & 3))
17999 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18001 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18003 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18005 else if (expected_size != -1 && expected_size < 4)
18006 return loop_1_byte;
18007 else if (expected_size != -1)
18010 enum stringop_alg alg = libcall;
18011 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18013 /* We get here if the algorithms that were not libcall-based
18014 were rep-prefix based and we are unable to use rep prefixes
18015 based on global register usage. Break out of the loop and
18016 use the heuristic below. */
18017 if (algs->size[i].max == 0)
18019 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18021 enum stringop_alg candidate = algs->size[i].alg;
18023 if (candidate != libcall && ALG_USABLE_P (candidate))
18025 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18026 last non-libcall inline algorithm. */
18027 if (TARGET_INLINE_ALL_STRINGOPS)
18029 /* When the current size is best to be copied by a libcall,
18030 but we are still forced to inline, run the heuristic below
18031 that will pick code for medium sized blocks. */
18032 if (alg != libcall)
18036 else if (ALG_USABLE_P (candidate))
18040 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18042 /* When asked to inline the call anyway, try to pick meaningful choice.
18043 We look for maximal size of block that is faster to copy by hand and
18044 take blocks of at most of that size guessing that average size will
18045 be roughly half of the block.
18047 If this turns out to be bad, we might simply specify the preferred
18048 choice in ix86_costs. */
18049 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18050 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18053 enum stringop_alg alg;
18055 bool any_alg_usable_p = true;
18057 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18059 enum stringop_alg candidate = algs->size[i].alg;
18060 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18062 if (candidate != libcall && candidate
18063 && ALG_USABLE_P (candidate))
18064 max = algs->size[i].max;
18066 /* If there aren't any usable algorithms, then recursing on
18067 smaller sizes isn't going to find anything. Just return the
18068 simple byte-at-a-time copy loop. */
18069 if (!any_alg_usable_p)
18071 /* Pick something reasonable. */
18072 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18073 *dynamic_check = 128;
18074 return loop_1_byte;
18078 alg = decide_alg (count, max / 2, memset, dynamic_check);
18079 gcc_assert (*dynamic_check == -1);
18080 gcc_assert (alg != libcall);
18081 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18082 *dynamic_check = max;
18085 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18086 #undef ALG_USABLE_P
18089 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18090 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18092 decide_alignment (int align,
18093 enum stringop_alg alg,
18096 int desired_align = 0;
18100 gcc_unreachable ();
18102 case unrolled_loop:
18103 desired_align = GET_MODE_SIZE (Pmode);
18105 case rep_prefix_8_byte:
18108 case rep_prefix_4_byte:
18109 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18110 copying whole cacheline at once. */
18111 if (TARGET_PENTIUMPRO)
18116 case rep_prefix_1_byte:
18117 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18118 copying whole cacheline at once. */
18119 if (TARGET_PENTIUMPRO)
18133 if (desired_align < align)
18134 desired_align = align;
18135 if (expected_size != -1 && expected_size < 4)
18136 desired_align = align;
18137 return desired_align;
18140 /* Return the smallest power of 2 greater than VAL. */
18142 smallest_pow2_greater_than (int val)
18150 /* Expand string move (memcpy) operation. Use i386 string operations when
18151 profitable. expand_setmem contains similar code. The code depends upon
18152 architecture, block size and alignment, but always has the same
18155 1) Prologue guard: Conditional that jumps up to epilogues for small
18156 blocks that can be handled by epilogue alone. This is faster but
18157 also needed for correctness, since prologue assume the block is larger
18158 than the desired alignment.
18160 Optional dynamic check for size and libcall for large
18161 blocks is emitted here too, with -minline-stringops-dynamically.
18163 2) Prologue: copy first few bytes in order to get destination aligned
18164 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18165 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18166 We emit either a jump tree on power of two sized blocks, or a byte loop.
18168 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18169 with specified algorithm.
18171 4) Epilogue: code copying tail of the block that is too small to be
18172 handled by main body (or up to size guarded by prologue guard). */
18175 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18176 rtx expected_align_exp, rtx expected_size_exp)
18182 rtx jump_around_label = NULL;
18183 HOST_WIDE_INT align = 1;
18184 unsigned HOST_WIDE_INT count = 0;
18185 HOST_WIDE_INT expected_size = -1;
18186 int size_needed = 0, epilogue_size_needed;
18187 int desired_align = 0, align_bytes = 0;
18188 enum stringop_alg alg;
18190 bool need_zero_guard = false;
18192 if (CONST_INT_P (align_exp))
18193 align = INTVAL (align_exp);
18194 /* i386 can do misaligned access on reasonably increased cost. */
18195 if (CONST_INT_P (expected_align_exp)
18196 && INTVAL (expected_align_exp) > align)
18197 align = INTVAL (expected_align_exp);
18198 /* ALIGN is the minimum of destination and source alignment, but we care here
18199 just about destination alignment. */
18200 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18201 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18203 if (CONST_INT_P (count_exp))
18204 count = expected_size = INTVAL (count_exp);
18205 if (CONST_INT_P (expected_size_exp) && count == 0)
18206 expected_size = INTVAL (expected_size_exp);
18208 /* Make sure we don't need to care about overflow later on. */
18209 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18212 /* Step 0: Decide on preferred algorithm, desired alignment and
18213 size of chunks to be copied by main loop. */
18215 alg = decide_alg (count, expected_size, false, &dynamic_check);
18216 desired_align = decide_alignment (align, alg, expected_size);
18218 if (!TARGET_ALIGN_STRINGOPS)
18219 align = desired_align;
18221 if (alg == libcall)
18223 gcc_assert (alg != no_stringop);
18225 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18226 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18227 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18232 gcc_unreachable ();
18234 need_zero_guard = true;
18235 size_needed = GET_MODE_SIZE (Pmode);
18237 case unrolled_loop:
18238 need_zero_guard = true;
18239 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18241 case rep_prefix_8_byte:
18244 case rep_prefix_4_byte:
18247 case rep_prefix_1_byte:
18251 need_zero_guard = true;
18256 epilogue_size_needed = size_needed;
18258 /* Step 1: Prologue guard. */
18260 /* Alignment code needs count to be in register. */
18261 if (CONST_INT_P (count_exp) && desired_align > align)
18263 if (INTVAL (count_exp) > desired_align
18264 && INTVAL (count_exp) > size_needed)
18267 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18268 if (align_bytes <= 0)
18271 align_bytes = desired_align - align_bytes;
18273 if (align_bytes == 0)
18274 count_exp = force_reg (counter_mode (count_exp), count_exp);
18276 gcc_assert (desired_align >= 1 && align >= 1);
18278 /* Ensure that alignment prologue won't copy past end of block. */
18279 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18281 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18282 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18283 Make sure it is power of 2. */
18284 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18288 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18290 /* If main algorithm works on QImode, no epilogue is needed.
18291 For small sizes just don't align anything. */
18292 if (size_needed == 1)
18293 desired_align = align;
18300 label = gen_label_rtx ();
18301 emit_cmp_and_jump_insns (count_exp,
18302 GEN_INT (epilogue_size_needed),
18303 LTU, 0, counter_mode (count_exp), 1, label);
18304 if (expected_size == -1 || expected_size < epilogue_size_needed)
18305 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18307 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18311 /* Emit code to decide on runtime whether library call or inline should be
18313 if (dynamic_check != -1)
18315 if (CONST_INT_P (count_exp))
18317 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18319 emit_block_move_via_libcall (dst, src, count_exp, false);
18320 count_exp = const0_rtx;
18326 rtx hot_label = gen_label_rtx ();
18327 jump_around_label = gen_label_rtx ();
18328 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18329 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18330 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18331 emit_block_move_via_libcall (dst, src, count_exp, false);
18332 emit_jump (jump_around_label);
18333 emit_label (hot_label);
18337 /* Step 2: Alignment prologue. */
18339 if (desired_align > align)
18341 if (align_bytes == 0)
18343 /* Except for the first move in epilogue, we no longer know
18344 constant offset in aliasing info. It don't seems to worth
18345 the pain to maintain it for the first move, so throw away
18347 src = change_address (src, BLKmode, srcreg);
18348 dst = change_address (dst, BLKmode, destreg);
18349 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18354 /* If we know how many bytes need to be stored before dst is
18355 sufficiently aligned, maintain aliasing info accurately. */
18356 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18357 desired_align, align_bytes);
18358 count_exp = plus_constant (count_exp, -align_bytes);
18359 count -= align_bytes;
18361 if (need_zero_guard
18362 && (count < (unsigned HOST_WIDE_INT) size_needed
18363 || (align_bytes == 0
18364 && count < ((unsigned HOST_WIDE_INT) size_needed
18365 + desired_align - align))))
18367 /* It is possible that we copied enough so the main loop will not
18369 gcc_assert (size_needed > 1);
18370 if (label == NULL_RTX)
18371 label = gen_label_rtx ();
18372 emit_cmp_and_jump_insns (count_exp,
18373 GEN_INT (size_needed),
18374 LTU, 0, counter_mode (count_exp), 1, label);
18375 if (expected_size == -1
18376 || expected_size < (desired_align - align) / 2 + size_needed)
18377 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18379 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18382 if (label && size_needed == 1)
18384 emit_label (label);
18385 LABEL_NUSES (label) = 1;
18387 epilogue_size_needed = 1;
18389 else if (label == NULL_RTX)
18390 epilogue_size_needed = size_needed;
18392 /* Step 3: Main loop. */
18398 gcc_unreachable ();
18400 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18401 count_exp, QImode, 1, expected_size);
18404 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18405 count_exp, Pmode, 1, expected_size);
18407 case unrolled_loop:
18408 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18409 registers for 4 temporaries anyway. */
18410 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18411 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18414 case rep_prefix_8_byte:
18415 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18418 case rep_prefix_4_byte:
18419 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18422 case rep_prefix_1_byte:
18423 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18427 /* Adjust properly the offset of src and dest memory for aliasing. */
18428 if (CONST_INT_P (count_exp))
18430 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18431 (count / size_needed) * size_needed);
18432 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18433 (count / size_needed) * size_needed);
18437 src = change_address (src, BLKmode, srcreg);
18438 dst = change_address (dst, BLKmode, destreg);
18441 /* Step 4: Epilogue to copy the remaining bytes. */
18445 /* When the main loop is done, COUNT_EXP might hold original count,
18446 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18447 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18448 bytes. Compensate if needed. */
18450 if (size_needed < epilogue_size_needed)
18453 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18454 GEN_INT (size_needed - 1), count_exp, 1,
18456 if (tmp != count_exp)
18457 emit_move_insn (count_exp, tmp);
18459 emit_label (label);
18460 LABEL_NUSES (label) = 1;
18463 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18464 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18465 epilogue_size_needed);
18466 if (jump_around_label)
18467 emit_label (jump_around_label);
18471 /* Helper function for memcpy. For QImode value 0xXY produce
18472 0xXYXYXYXY of wide specified by MODE. This is essentially
18473 a * 0x10101010, but we can do slightly better than
18474 synth_mult by unwinding the sequence by hand on CPUs with
18477 promote_duplicated_reg (enum machine_mode mode, rtx val)
18479 enum machine_mode valmode = GET_MODE (val);
18481 int nops = mode == DImode ? 3 : 2;
18483 gcc_assert (mode == SImode || mode == DImode);
18484 if (val == const0_rtx)
18485 return copy_to_mode_reg (mode, const0_rtx);
18486 if (CONST_INT_P (val))
18488 HOST_WIDE_INT v = INTVAL (val) & 255;
18492 if (mode == DImode)
18493 v |= (v << 16) << 16;
18494 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18497 if (valmode == VOIDmode)
18499 if (valmode != QImode)
18500 val = gen_lowpart (QImode, val);
18501 if (mode == QImode)
18503 if (!TARGET_PARTIAL_REG_STALL)
18505 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18506 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18507 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18508 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18510 rtx reg = convert_modes (mode, QImode, val, true);
18511 tmp = promote_duplicated_reg (mode, const1_rtx);
18512 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18517 rtx reg = convert_modes (mode, QImode, val, true);
18519 if (!TARGET_PARTIAL_REG_STALL)
18520 if (mode == SImode)
18521 emit_insn (gen_movsi_insv_1 (reg, reg));
18523 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18526 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18527 NULL, 1, OPTAB_DIRECT);
18529 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18531 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18532 NULL, 1, OPTAB_DIRECT);
18533 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18534 if (mode == SImode)
18536 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18537 NULL, 1, OPTAB_DIRECT);
18538 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18543 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18544 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18545 alignment from ALIGN to DESIRED_ALIGN. */
18547 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18552 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18553 promoted_val = promote_duplicated_reg (DImode, val);
18554 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18555 promoted_val = promote_duplicated_reg (SImode, val);
18556 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18557 promoted_val = promote_duplicated_reg (HImode, val);
18559 promoted_val = val;
18561 return promoted_val;
18564 /* Expand string clear operation (bzero). Use i386 string operations when
18565 profitable. See expand_movmem comment for explanation of individual
18566 steps performed. */
18568 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18569 rtx expected_align_exp, rtx expected_size_exp)
18574 rtx jump_around_label = NULL;
18575 HOST_WIDE_INT align = 1;
18576 unsigned HOST_WIDE_INT count = 0;
18577 HOST_WIDE_INT expected_size = -1;
18578 int size_needed = 0, epilogue_size_needed;
18579 int desired_align = 0, align_bytes = 0;
18580 enum stringop_alg alg;
18581 rtx promoted_val = NULL;
18582 bool force_loopy_epilogue = false;
18584 bool need_zero_guard = false;
18586 if (CONST_INT_P (align_exp))
18587 align = INTVAL (align_exp);
18588 /* i386 can do misaligned access on reasonably increased cost. */
18589 if (CONST_INT_P (expected_align_exp)
18590 && INTVAL (expected_align_exp) > align)
18591 align = INTVAL (expected_align_exp);
18592 if (CONST_INT_P (count_exp))
18593 count = expected_size = INTVAL (count_exp);
18594 if (CONST_INT_P (expected_size_exp) && count == 0)
18595 expected_size = INTVAL (expected_size_exp);
18597 /* Make sure we don't need to care about overflow later on. */
18598 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18601 /* Step 0: Decide on preferred algorithm, desired alignment and
18602 size of chunks to be copied by main loop. */
18604 alg = decide_alg (count, expected_size, true, &dynamic_check);
18605 desired_align = decide_alignment (align, alg, expected_size);
18607 if (!TARGET_ALIGN_STRINGOPS)
18608 align = desired_align;
18610 if (alg == libcall)
18612 gcc_assert (alg != no_stringop);
18614 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18615 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18620 gcc_unreachable ();
18622 need_zero_guard = true;
18623 size_needed = GET_MODE_SIZE (Pmode);
18625 case unrolled_loop:
18626 need_zero_guard = true;
18627 size_needed = GET_MODE_SIZE (Pmode) * 4;
18629 case rep_prefix_8_byte:
18632 case rep_prefix_4_byte:
18635 case rep_prefix_1_byte:
18639 need_zero_guard = true;
18643 epilogue_size_needed = size_needed;
18645 /* Step 1: Prologue guard. */
18647 /* Alignment code needs count to be in register. */
18648 if (CONST_INT_P (count_exp) && desired_align > align)
18650 if (INTVAL (count_exp) > desired_align
18651 && INTVAL (count_exp) > size_needed)
18654 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18655 if (align_bytes <= 0)
18658 align_bytes = desired_align - align_bytes;
18660 if (align_bytes == 0)
18662 enum machine_mode mode = SImode;
18663 if (TARGET_64BIT && (count & ~0xffffffff))
18665 count_exp = force_reg (mode, count_exp);
18668 /* Do the cheap promotion to allow better CSE across the
18669 main loop and epilogue (ie one load of the big constant in the
18670 front of all code. */
18671 if (CONST_INT_P (val_exp))
18672 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18673 desired_align, align);
18674 /* Ensure that alignment prologue won't copy past end of block. */
18675 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18677 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18678 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18679 Make sure it is power of 2. */
18680 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18682 /* To improve performance of small blocks, we jump around the VAL
18683 promoting mode. This mean that if the promoted VAL is not constant,
18684 we might not use it in the epilogue and have to use byte
18686 if (epilogue_size_needed > 2 && !promoted_val)
18687 force_loopy_epilogue = true;
18690 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18692 /* If main algorithm works on QImode, no epilogue is needed.
18693 For small sizes just don't align anything. */
18694 if (size_needed == 1)
18695 desired_align = align;
18702 label = gen_label_rtx ();
18703 emit_cmp_and_jump_insns (count_exp,
18704 GEN_INT (epilogue_size_needed),
18705 LTU, 0, counter_mode (count_exp), 1, label);
18706 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18707 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18709 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18712 if (dynamic_check != -1)
18714 rtx hot_label = gen_label_rtx ();
18715 jump_around_label = gen_label_rtx ();
18716 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18717 LEU, 0, counter_mode (count_exp), 1, hot_label);
18718 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18719 set_storage_via_libcall (dst, count_exp, val_exp, false);
18720 emit_jump (jump_around_label);
18721 emit_label (hot_label);
18724 /* Step 2: Alignment prologue. */
18726 /* Do the expensive promotion once we branched off the small blocks. */
18728 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18729 desired_align, align);
18730 gcc_assert (desired_align >= 1 && align >= 1);
18732 if (desired_align > align)
18734 if (align_bytes == 0)
18736 /* Except for the first move in epilogue, we no longer know
18737 constant offset in aliasing info. It don't seems to worth
18738 the pain to maintain it for the first move, so throw away
18740 dst = change_address (dst, BLKmode, destreg);
18741 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18746 /* If we know how many bytes need to be stored before dst is
18747 sufficiently aligned, maintain aliasing info accurately. */
18748 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18749 desired_align, align_bytes);
18750 count_exp = plus_constant (count_exp, -align_bytes);
18751 count -= align_bytes;
18753 if (need_zero_guard
18754 && (count < (unsigned HOST_WIDE_INT) size_needed
18755 || (align_bytes == 0
18756 && count < ((unsigned HOST_WIDE_INT) size_needed
18757 + desired_align - align))))
18759 /* It is possible that we copied enough so the main loop will not
18761 gcc_assert (size_needed > 1);
18762 if (label == NULL_RTX)
18763 label = gen_label_rtx ();
18764 emit_cmp_and_jump_insns (count_exp,
18765 GEN_INT (size_needed),
18766 LTU, 0, counter_mode (count_exp), 1, label);
18767 if (expected_size == -1
18768 || expected_size < (desired_align - align) / 2 + size_needed)
18769 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18771 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18774 if (label && size_needed == 1)
18776 emit_label (label);
18777 LABEL_NUSES (label) = 1;
18779 promoted_val = val_exp;
18780 epilogue_size_needed = 1;
18782 else if (label == NULL_RTX)
18783 epilogue_size_needed = size_needed;
18785 /* Step 3: Main loop. */
18791 gcc_unreachable ();
18793 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18794 count_exp, QImode, 1, expected_size);
18797 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18798 count_exp, Pmode, 1, expected_size);
18800 case unrolled_loop:
18801 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18802 count_exp, Pmode, 4, expected_size);
18804 case rep_prefix_8_byte:
18805 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18808 case rep_prefix_4_byte:
18809 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18812 case rep_prefix_1_byte:
18813 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18817 /* Adjust properly the offset of src and dest memory for aliasing. */
18818 if (CONST_INT_P (count_exp))
18819 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18820 (count / size_needed) * size_needed);
18822 dst = change_address (dst, BLKmode, destreg);
18824 /* Step 4: Epilogue to copy the remaining bytes. */
18828 /* When the main loop is done, COUNT_EXP might hold original count,
18829 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18830 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18831 bytes. Compensate if needed. */
18833 if (size_needed < epilogue_size_needed)
18836 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18837 GEN_INT (size_needed - 1), count_exp, 1,
18839 if (tmp != count_exp)
18840 emit_move_insn (count_exp, tmp);
18842 emit_label (label);
18843 LABEL_NUSES (label) = 1;
18846 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18848 if (force_loopy_epilogue)
18849 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18850 epilogue_size_needed);
18852 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18853 epilogue_size_needed);
18855 if (jump_around_label)
18856 emit_label (jump_around_label);
18860 /* Expand the appropriate insns for doing strlen if not just doing
18863 out = result, initialized with the start address
18864 align_rtx = alignment of the address.
18865 scratch = scratch register, initialized with the startaddress when
18866 not aligned, otherwise undefined
18868 This is just the body. It needs the initializations mentioned above and
18869 some address computing at the end. These things are done in i386.md. */
18872 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18876 rtx align_2_label = NULL_RTX;
18877 rtx align_3_label = NULL_RTX;
18878 rtx align_4_label = gen_label_rtx ();
18879 rtx end_0_label = gen_label_rtx ();
18881 rtx tmpreg = gen_reg_rtx (SImode);
18882 rtx scratch = gen_reg_rtx (SImode);
18886 if (CONST_INT_P (align_rtx))
18887 align = INTVAL (align_rtx);
18889 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18891 /* Is there a known alignment and is it less than 4? */
18894 rtx scratch1 = gen_reg_rtx (Pmode);
18895 emit_move_insn (scratch1, out);
18896 /* Is there a known alignment and is it not 2? */
18899 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18900 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18902 /* Leave just the 3 lower bits. */
18903 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18904 NULL_RTX, 0, OPTAB_WIDEN);
18906 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18907 Pmode, 1, align_4_label);
18908 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18909 Pmode, 1, align_2_label);
18910 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18911 Pmode, 1, align_3_label);
18915 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18916 check if is aligned to 4 - byte. */
18918 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18919 NULL_RTX, 0, OPTAB_WIDEN);
18921 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18922 Pmode, 1, align_4_label);
18925 mem = change_address (src, QImode, out);
18927 /* Now compare the bytes. */
18929 /* Compare the first n unaligned byte on a byte per byte basis. */
18930 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18931 QImode, 1, end_0_label);
18933 /* Increment the address. */
18934 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18936 /* Not needed with an alignment of 2 */
18939 emit_label (align_2_label);
18941 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18944 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18946 emit_label (align_3_label);
18949 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18952 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18955 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18956 align this loop. It gives only huge programs, but does not help to
18958 emit_label (align_4_label);
18960 mem = change_address (src, SImode, out);
18961 emit_move_insn (scratch, mem);
18962 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18964 /* This formula yields a nonzero result iff one of the bytes is zero.
18965 This saves three branches inside loop and many cycles. */
18967 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18968 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18969 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18970 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18971 gen_int_mode (0x80808080, SImode)));
18972 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18977 rtx reg = gen_reg_rtx (SImode);
18978 rtx reg2 = gen_reg_rtx (Pmode);
18979 emit_move_insn (reg, tmpreg);
18980 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18982 /* If zero is not in the first two bytes, move two bytes forward. */
18983 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18984 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18985 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18986 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18987 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18990 /* Emit lea manually to avoid clobbering of flags. */
18991 emit_insn (gen_rtx_SET (SImode, reg2,
18992 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18994 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18995 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18996 emit_insn (gen_rtx_SET (VOIDmode, out,
18997 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19004 rtx end_2_label = gen_label_rtx ();
19005 /* Is zero in the first two bytes? */
19007 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19008 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19009 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19010 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19011 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19013 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19014 JUMP_LABEL (tmp) = end_2_label;
19016 /* Not in the first two. Move two bytes forward. */
19017 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19018 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19020 emit_label (end_2_label);
19024 /* Avoid branch in fixing the byte. */
19025 tmpreg = gen_lowpart (QImode, tmpreg);
19026 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19027 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19028 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19030 emit_label (end_0_label);
19033 /* Expand strlen. */
19036 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19038 rtx addr, scratch1, scratch2, scratch3, scratch4;
19040 /* The generic case of strlen expander is long. Avoid it's
19041 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19043 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19044 && !TARGET_INLINE_ALL_STRINGOPS
19045 && !optimize_insn_for_size_p ()
19046 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19049 addr = force_reg (Pmode, XEXP (src, 0));
19050 scratch1 = gen_reg_rtx (Pmode);
19052 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19053 && !optimize_insn_for_size_p ())
19055 /* Well it seems that some optimizer does not combine a call like
19056 foo(strlen(bar), strlen(bar));
19057 when the move and the subtraction is done here. It does calculate
19058 the length just once when these instructions are done inside of
19059 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19060 often used and I use one fewer register for the lifetime of
19061 output_strlen_unroll() this is better. */
19063 emit_move_insn (out, addr);
19065 ix86_expand_strlensi_unroll_1 (out, src, align);
19067 /* strlensi_unroll_1 returns the address of the zero at the end of
19068 the string, like memchr(), so compute the length by subtracting
19069 the start address. */
19070 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19076 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19077 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19080 scratch2 = gen_reg_rtx (Pmode);
19081 scratch3 = gen_reg_rtx (Pmode);
19082 scratch4 = force_reg (Pmode, constm1_rtx);
19084 emit_move_insn (scratch3, addr);
19085 eoschar = force_reg (QImode, eoschar);
19087 src = replace_equiv_address_nv (src, scratch3);
19089 /* If .md starts supporting :P, this can be done in .md. */
19090 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19091 scratch4), UNSPEC_SCAS);
19092 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19093 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19094 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19099 /* For given symbol (function) construct code to compute address of it's PLT
19100 entry in large x86-64 PIC model. */
19102 construct_plt_address (rtx symbol)
19104 rtx tmp = gen_reg_rtx (Pmode);
19105 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19107 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19108 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19110 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19111 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19116 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19118 rtx pop, int sibcall)
19120 rtx use = NULL, call;
19122 if (pop == const0_rtx)
19124 gcc_assert (!TARGET_64BIT || !pop);
19126 if (TARGET_MACHO && !TARGET_64BIT)
19129 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19130 fnaddr = machopic_indirect_call_target (fnaddr);
19135 /* Static functions and indirect calls don't need the pic register. */
19136 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19137 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19138 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19139 use_reg (&use, pic_offset_table_rtx);
19142 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19144 rtx al = gen_rtx_REG (QImode, AX_REG);
19145 emit_move_insn (al, callarg2);
19146 use_reg (&use, al);
19149 if (ix86_cmodel == CM_LARGE_PIC
19151 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19152 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19153 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19154 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19156 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19157 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19159 if (sibcall && TARGET_64BIT
19160 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19163 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19164 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19165 emit_move_insn (fnaddr, addr);
19166 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19169 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19171 call = gen_rtx_SET (VOIDmode, retval, call);
19174 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19175 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19176 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19179 && ix86_cfun_abi () == MS_ABI
19180 && (!callarg2 || INTVAL (callarg2) != -2))
19182 /* We need to represent that SI and DI registers are clobbered
19184 static int clobbered_registers[] = {
19185 XMM6_REG, XMM7_REG, XMM8_REG,
19186 XMM9_REG, XMM10_REG, XMM11_REG,
19187 XMM12_REG, XMM13_REG, XMM14_REG,
19188 XMM15_REG, SI_REG, DI_REG
19191 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19192 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19193 UNSPEC_MS_TO_SYSV_CALL);
19197 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19198 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19201 (SSE_REGNO_P (clobbered_registers[i])
19203 clobbered_registers[i]));
19205 call = gen_rtx_PARALLEL (VOIDmode,
19206 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19210 call = emit_call_insn (call);
19212 CALL_INSN_FUNCTION_USAGE (call) = use;
19216 /* Clear stack slot assignments remembered from previous functions.
19217 This is called from INIT_EXPANDERS once before RTL is emitted for each
19220 static struct machine_function *
19221 ix86_init_machine_status (void)
19223 struct machine_function *f;
19225 f = GGC_CNEW (struct machine_function);
19226 f->use_fast_prologue_epilogue_nregs = -1;
19227 f->tls_descriptor_call_expanded_p = 0;
19228 f->call_abi = ix86_abi;
19233 /* Return a MEM corresponding to a stack slot with mode MODE.
19234 Allocate a new slot if necessary.
19236 The RTL for a function can have several slots available: N is
19237 which slot to use. */
19240 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19242 struct stack_local_entry *s;
19244 gcc_assert (n < MAX_386_STACK_LOCALS);
19246 /* Virtual slot is valid only before vregs are instantiated. */
19247 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19249 for (s = ix86_stack_locals; s; s = s->next)
19250 if (s->mode == mode && s->n == n)
19251 return copy_rtx (s->rtl);
19253 s = (struct stack_local_entry *)
19254 ggc_alloc (sizeof (struct stack_local_entry));
19257 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19259 s->next = ix86_stack_locals;
19260 ix86_stack_locals = s;
19264 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19266 static GTY(()) rtx ix86_tls_symbol;
19268 ix86_tls_get_addr (void)
19271 if (!ix86_tls_symbol)
19273 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19274 (TARGET_ANY_GNU_TLS
19276 ? "___tls_get_addr"
19277 : "__tls_get_addr");
19280 return ix86_tls_symbol;
19283 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19285 static GTY(()) rtx ix86_tls_module_base_symbol;
19287 ix86_tls_module_base (void)
19290 if (!ix86_tls_module_base_symbol)
19292 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19293 "_TLS_MODULE_BASE_");
19294 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19295 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19298 return ix86_tls_module_base_symbol;
19301 /* Calculate the length of the memory address in the instruction
19302 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19305 memory_address_length (rtx addr)
19307 struct ix86_address parts;
19308 rtx base, index, disp;
19312 if (GET_CODE (addr) == PRE_DEC
19313 || GET_CODE (addr) == POST_INC
19314 || GET_CODE (addr) == PRE_MODIFY
19315 || GET_CODE (addr) == POST_MODIFY)
19318 ok = ix86_decompose_address (addr, &parts);
19321 if (parts.base && GET_CODE (parts.base) == SUBREG)
19322 parts.base = SUBREG_REG (parts.base);
19323 if (parts.index && GET_CODE (parts.index) == SUBREG)
19324 parts.index = SUBREG_REG (parts.index);
19327 index = parts.index;
19332 - esp as the base always wants an index,
19333 - ebp as the base always wants a displacement,
19334 - r12 as the base always wants an index,
19335 - r13 as the base always wants a displacement. */
19337 /* Register Indirect. */
19338 if (base && !index && !disp)
19340 /* esp (for its index) and ebp (for its displacement) need
19341 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19344 && (addr == arg_pointer_rtx
19345 || addr == frame_pointer_rtx
19346 || REGNO (addr) == SP_REG
19347 || REGNO (addr) == BP_REG
19348 || REGNO (addr) == R12_REG
19349 || REGNO (addr) == R13_REG))
19353 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19354 is not disp32, but disp32(%rip), so for disp32
19355 SIB byte is needed, unless print_operand_address
19356 optimizes it into disp32(%rip) or (%rip) is implied
19358 else if (disp && !base && !index)
19365 if (GET_CODE (disp) == CONST)
19366 symbol = XEXP (disp, 0);
19367 if (GET_CODE (symbol) == PLUS
19368 && CONST_INT_P (XEXP (symbol, 1)))
19369 symbol = XEXP (symbol, 0);
19371 if (GET_CODE (symbol) != LABEL_REF
19372 && (GET_CODE (symbol) != SYMBOL_REF
19373 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19374 && (GET_CODE (symbol) != UNSPEC
19375 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19376 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19383 /* Find the length of the displacement constant. */
19386 if (base && satisfies_constraint_K (disp))
19391 /* ebp always wants a displacement. Similarly r13. */
19392 else if (REG_P (base)
19393 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19396 /* An index requires the two-byte modrm form.... */
19398 /* ...like esp (or r12), which always wants an index. */
19399 || base == arg_pointer_rtx
19400 || base == frame_pointer_rtx
19402 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19419 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19420 is set, expect that insn have 8bit immediate alternative. */
19422 ix86_attr_length_immediate_default (rtx insn, int shortform)
19426 extract_insn_cached (insn);
19427 for (i = recog_data.n_operands - 1; i >= 0; --i)
19428 if (CONSTANT_P (recog_data.operand[i]))
19430 enum attr_mode mode = get_attr_mode (insn);
19433 if (shortform && CONST_INT_P (recog_data.operand[i]))
19435 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19442 ival = trunc_int_for_mode (ival, HImode);
19445 ival = trunc_int_for_mode (ival, SImode);
19450 if (IN_RANGE (ival, -128, 127))
19467 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19472 fatal_insn ("unknown insn mode", insn);
19477 /* Compute default value for "length_address" attribute. */
19479 ix86_attr_length_address_default (rtx insn)
19483 if (get_attr_type (insn) == TYPE_LEA)
19485 rtx set = PATTERN (insn), addr;
19487 if (GET_CODE (set) == PARALLEL)
19488 set = XVECEXP (set, 0, 0);
19490 gcc_assert (GET_CODE (set) == SET);
19492 addr = SET_SRC (set);
19493 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19495 if (GET_CODE (addr) == ZERO_EXTEND)
19496 addr = XEXP (addr, 0);
19497 if (GET_CODE (addr) == SUBREG)
19498 addr = SUBREG_REG (addr);
19501 return memory_address_length (addr);
19504 extract_insn_cached (insn);
19505 for (i = recog_data.n_operands - 1; i >= 0; --i)
19506 if (MEM_P (recog_data.operand[i]))
19508 constrain_operands_cached (reload_completed);
19509 if (which_alternative != -1)
19511 const char *constraints = recog_data.constraints[i];
19512 int alt = which_alternative;
19514 while (*constraints == '=' || *constraints == '+')
19517 while (*constraints++ != ',')
19519 /* Skip ignored operands. */
19520 if (*constraints == 'X')
19523 return memory_address_length (XEXP (recog_data.operand[i], 0));
19528 /* Compute default value for "length_vex" attribute. It includes
19529 2 or 3 byte VEX prefix and 1 opcode byte. */
19532 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19537 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19538 byte VEX prefix. */
19539 if (!has_0f_opcode || has_vex_w)
19542 /* We can always use 2 byte VEX prefix in 32bit. */
19546 extract_insn_cached (insn);
19548 for (i = recog_data.n_operands - 1; i >= 0; --i)
19549 if (REG_P (recog_data.operand[i]))
19551 /* REX.W bit uses 3 byte VEX prefix. */
19552 if (GET_MODE (recog_data.operand[i]) == DImode
19553 && GENERAL_REG_P (recog_data.operand[i]))
19558 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19559 if (MEM_P (recog_data.operand[i])
19560 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19567 /* Return the maximum number of instructions a cpu can issue. */
19570 ix86_issue_rate (void)
19574 case PROCESSOR_PENTIUM:
19575 case PROCESSOR_ATOM:
19579 case PROCESSOR_PENTIUMPRO:
19580 case PROCESSOR_PENTIUM4:
19581 case PROCESSOR_ATHLON:
19583 case PROCESSOR_AMDFAM10:
19584 case PROCESSOR_NOCONA:
19585 case PROCESSOR_GENERIC32:
19586 case PROCESSOR_GENERIC64:
19589 case PROCESSOR_CORE2:
19597 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19598 by DEP_INSN and nothing set by DEP_INSN. */
19601 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19605 /* Simplify the test for uninteresting insns. */
19606 if (insn_type != TYPE_SETCC
19607 && insn_type != TYPE_ICMOV
19608 && insn_type != TYPE_FCMOV
19609 && insn_type != TYPE_IBR)
19612 if ((set = single_set (dep_insn)) != 0)
19614 set = SET_DEST (set);
19617 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19618 && XVECLEN (PATTERN (dep_insn), 0) == 2
19619 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19620 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19622 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19623 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19628 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19631 /* This test is true if the dependent insn reads the flags but
19632 not any other potentially set register. */
19633 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19636 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19642 /* Return true iff USE_INSN has a memory address with operands set by
19646 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19649 extract_insn_cached (use_insn);
19650 for (i = recog_data.n_operands - 1; i >= 0; --i)
19651 if (MEM_P (recog_data.operand[i]))
19653 rtx addr = XEXP (recog_data.operand[i], 0);
19654 return modified_in_p (addr, set_insn) != 0;
19660 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19662 enum attr_type insn_type, dep_insn_type;
19663 enum attr_memory memory;
19665 int dep_insn_code_number;
19667 /* Anti and output dependencies have zero cost on all CPUs. */
19668 if (REG_NOTE_KIND (link) != 0)
19671 dep_insn_code_number = recog_memoized (dep_insn);
19673 /* If we can't recognize the insns, we can't really do anything. */
19674 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19677 insn_type = get_attr_type (insn);
19678 dep_insn_type = get_attr_type (dep_insn);
19682 case PROCESSOR_PENTIUM:
19683 /* Address Generation Interlock adds a cycle of latency. */
19684 if (insn_type == TYPE_LEA)
19686 rtx addr = PATTERN (insn);
19688 if (GET_CODE (addr) == PARALLEL)
19689 addr = XVECEXP (addr, 0, 0);
19691 gcc_assert (GET_CODE (addr) == SET);
19693 addr = SET_SRC (addr);
19694 if (modified_in_p (addr, dep_insn))
19697 else if (ix86_agi_dependent (dep_insn, insn))
19700 /* ??? Compares pair with jump/setcc. */
19701 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19704 /* Floating point stores require value to be ready one cycle earlier. */
19705 if (insn_type == TYPE_FMOV
19706 && get_attr_memory (insn) == MEMORY_STORE
19707 && !ix86_agi_dependent (dep_insn, insn))
19711 case PROCESSOR_PENTIUMPRO:
19712 memory = get_attr_memory (insn);
19714 /* INT->FP conversion is expensive. */
19715 if (get_attr_fp_int_src (dep_insn))
19718 /* There is one cycle extra latency between an FP op and a store. */
19719 if (insn_type == TYPE_FMOV
19720 && (set = single_set (dep_insn)) != NULL_RTX
19721 && (set2 = single_set (insn)) != NULL_RTX
19722 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19723 && MEM_P (SET_DEST (set2)))
19726 /* Show ability of reorder buffer to hide latency of load by executing
19727 in parallel with previous instruction in case
19728 previous instruction is not needed to compute the address. */
19729 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19730 && !ix86_agi_dependent (dep_insn, insn))
19732 /* Claim moves to take one cycle, as core can issue one load
19733 at time and the next load can start cycle later. */
19734 if (dep_insn_type == TYPE_IMOV
19735 || dep_insn_type == TYPE_FMOV)
19743 memory = get_attr_memory (insn);
19745 /* The esp dependency is resolved before the instruction is really
19747 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19748 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19751 /* INT->FP conversion is expensive. */
19752 if (get_attr_fp_int_src (dep_insn))
19755 /* Show ability of reorder buffer to hide latency of load by executing
19756 in parallel with previous instruction in case
19757 previous instruction is not needed to compute the address. */
19758 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19759 && !ix86_agi_dependent (dep_insn, insn))
19761 /* Claim moves to take one cycle, as core can issue one load
19762 at time and the next load can start cycle later. */
19763 if (dep_insn_type == TYPE_IMOV
19764 || dep_insn_type == TYPE_FMOV)
19773 case PROCESSOR_ATHLON:
19775 case PROCESSOR_AMDFAM10:
19776 case PROCESSOR_ATOM:
19777 case PROCESSOR_GENERIC32:
19778 case PROCESSOR_GENERIC64:
19779 memory = get_attr_memory (insn);
19781 /* Show ability of reorder buffer to hide latency of load by executing
19782 in parallel with previous instruction in case
19783 previous instruction is not needed to compute the address. */
19784 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19785 && !ix86_agi_dependent (dep_insn, insn))
19787 enum attr_unit unit = get_attr_unit (insn);
19790 /* Because of the difference between the length of integer and
19791 floating unit pipeline preparation stages, the memory operands
19792 for floating point are cheaper.
19794 ??? For Athlon it the difference is most probably 2. */
19795 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19798 loadcost = TARGET_ATHLON ? 2 : 0;
19800 if (cost >= loadcost)
19813 /* How many alternative schedules to try. This should be as wide as the
19814 scheduling freedom in the DFA, but no wider. Making this value too
19815 large results extra work for the scheduler. */
19818 ia32_multipass_dfa_lookahead (void)
19822 case PROCESSOR_PENTIUM:
19825 case PROCESSOR_PENTIUMPRO:
19835 /* Compute the alignment given to a constant that is being placed in memory.
19836 EXP is the constant and ALIGN is the alignment that the object would
19838 The value of this function is used instead of that alignment to align
19842 ix86_constant_alignment (tree exp, int align)
19844 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19845 || TREE_CODE (exp) == INTEGER_CST)
19847 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19849 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19852 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19853 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19854 return BITS_PER_WORD;
19859 /* Compute the alignment for a static variable.
19860 TYPE is the data type, and ALIGN is the alignment that
19861 the object would ordinarily have. The value of this function is used
19862 instead of that alignment to align the object. */
19865 ix86_data_alignment (tree type, int align)
19867 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19869 if (AGGREGATE_TYPE_P (type)
19870 && TYPE_SIZE (type)
19871 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19872 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19873 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19874 && align < max_align)
19877 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19878 to 16byte boundary. */
19881 if (AGGREGATE_TYPE_P (type)
19882 && TYPE_SIZE (type)
19883 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19884 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19885 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19889 if (TREE_CODE (type) == ARRAY_TYPE)
19891 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19893 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19896 else if (TREE_CODE (type) == COMPLEX_TYPE)
19899 if (TYPE_MODE (type) == DCmode && align < 64)
19901 if ((TYPE_MODE (type) == XCmode
19902 || TYPE_MODE (type) == TCmode) && align < 128)
19905 else if ((TREE_CODE (type) == RECORD_TYPE
19906 || TREE_CODE (type) == UNION_TYPE
19907 || TREE_CODE (type) == QUAL_UNION_TYPE)
19908 && TYPE_FIELDS (type))
19910 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19912 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19915 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19916 || TREE_CODE (type) == INTEGER_TYPE)
19918 if (TYPE_MODE (type) == DFmode && align < 64)
19920 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19927 /* Compute the alignment for a local variable or a stack slot. EXP is
19928 the data type or decl itself, MODE is the widest mode available and
19929 ALIGN is the alignment that the object would ordinarily have. The
19930 value of this macro is used instead of that alignment to align the
19934 ix86_local_alignment (tree exp, enum machine_mode mode,
19935 unsigned int align)
19939 if (exp && DECL_P (exp))
19941 type = TREE_TYPE (exp);
19950 /* Don't do dynamic stack realignment for long long objects with
19951 -mpreferred-stack-boundary=2. */
19954 && ix86_preferred_stack_boundary < 64
19955 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19956 && (!type || !TYPE_USER_ALIGN (type))
19957 && (!decl || !DECL_USER_ALIGN (decl)))
19960 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19961 register in MODE. We will return the largest alignment of XF
19965 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19966 align = GET_MODE_ALIGNMENT (DFmode);
19970 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19971 to 16byte boundary. */
19974 if (AGGREGATE_TYPE_P (type)
19975 && TYPE_SIZE (type)
19976 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19977 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19978 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19981 if (TREE_CODE (type) == ARRAY_TYPE)
19983 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19985 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19988 else if (TREE_CODE (type) == COMPLEX_TYPE)
19990 if (TYPE_MODE (type) == DCmode && align < 64)
19992 if ((TYPE_MODE (type) == XCmode
19993 || TYPE_MODE (type) == TCmode) && align < 128)
19996 else if ((TREE_CODE (type) == RECORD_TYPE
19997 || TREE_CODE (type) == UNION_TYPE
19998 || TREE_CODE (type) == QUAL_UNION_TYPE)
19999 && TYPE_FIELDS (type))
20001 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20003 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20006 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20007 || TREE_CODE (type) == INTEGER_TYPE)
20010 if (TYPE_MODE (type) == DFmode && align < 64)
20012 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20018 /* Emit RTL insns to initialize the variable parts of a trampoline.
20019 FNADDR is an RTX for the address of the function's pure code.
20020 CXT is an RTX for the static chain value for the function. */
20022 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20026 /* Compute offset from the end of the jmp to the target function. */
20027 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20028 plus_constant (tramp, 10),
20029 NULL_RTX, 1, OPTAB_DIRECT);
20030 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20031 gen_int_mode (0xb9, QImode));
20032 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20033 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20034 gen_int_mode (0xe9, QImode));
20035 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20040 /* Try to load address using shorter movl instead of movabs.
20041 We may want to support movq for kernel mode, but kernel does not use
20042 trampolines at the moment. */
20043 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20045 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20046 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20047 gen_int_mode (0xbb41, HImode));
20048 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20049 gen_lowpart (SImode, fnaddr));
20054 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20055 gen_int_mode (0xbb49, HImode));
20056 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20060 /* Load static chain using movabs to r10. */
20061 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20062 gen_int_mode (0xba49, HImode));
20063 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20066 /* Jump to the r11 */
20067 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20068 gen_int_mode (0xff49, HImode));
20069 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20070 gen_int_mode (0xe3, QImode));
20072 gcc_assert (offset <= TRAMPOLINE_SIZE);
20075 #ifdef ENABLE_EXECUTE_STACK
20076 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20077 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20081 /* Codes for all the SSE/MMX builtins. */
20084 IX86_BUILTIN_ADDPS,
20085 IX86_BUILTIN_ADDSS,
20086 IX86_BUILTIN_DIVPS,
20087 IX86_BUILTIN_DIVSS,
20088 IX86_BUILTIN_MULPS,
20089 IX86_BUILTIN_MULSS,
20090 IX86_BUILTIN_SUBPS,
20091 IX86_BUILTIN_SUBSS,
20093 IX86_BUILTIN_CMPEQPS,
20094 IX86_BUILTIN_CMPLTPS,
20095 IX86_BUILTIN_CMPLEPS,
20096 IX86_BUILTIN_CMPGTPS,
20097 IX86_BUILTIN_CMPGEPS,
20098 IX86_BUILTIN_CMPNEQPS,
20099 IX86_BUILTIN_CMPNLTPS,
20100 IX86_BUILTIN_CMPNLEPS,
20101 IX86_BUILTIN_CMPNGTPS,
20102 IX86_BUILTIN_CMPNGEPS,
20103 IX86_BUILTIN_CMPORDPS,
20104 IX86_BUILTIN_CMPUNORDPS,
20105 IX86_BUILTIN_CMPEQSS,
20106 IX86_BUILTIN_CMPLTSS,
20107 IX86_BUILTIN_CMPLESS,
20108 IX86_BUILTIN_CMPNEQSS,
20109 IX86_BUILTIN_CMPNLTSS,
20110 IX86_BUILTIN_CMPNLESS,
20111 IX86_BUILTIN_CMPNGTSS,
20112 IX86_BUILTIN_CMPNGESS,
20113 IX86_BUILTIN_CMPORDSS,
20114 IX86_BUILTIN_CMPUNORDSS,
20116 IX86_BUILTIN_COMIEQSS,
20117 IX86_BUILTIN_COMILTSS,
20118 IX86_BUILTIN_COMILESS,
20119 IX86_BUILTIN_COMIGTSS,
20120 IX86_BUILTIN_COMIGESS,
20121 IX86_BUILTIN_COMINEQSS,
20122 IX86_BUILTIN_UCOMIEQSS,
20123 IX86_BUILTIN_UCOMILTSS,
20124 IX86_BUILTIN_UCOMILESS,
20125 IX86_BUILTIN_UCOMIGTSS,
20126 IX86_BUILTIN_UCOMIGESS,
20127 IX86_BUILTIN_UCOMINEQSS,
20129 IX86_BUILTIN_CVTPI2PS,
20130 IX86_BUILTIN_CVTPS2PI,
20131 IX86_BUILTIN_CVTSI2SS,
20132 IX86_BUILTIN_CVTSI642SS,
20133 IX86_BUILTIN_CVTSS2SI,
20134 IX86_BUILTIN_CVTSS2SI64,
20135 IX86_BUILTIN_CVTTPS2PI,
20136 IX86_BUILTIN_CVTTSS2SI,
20137 IX86_BUILTIN_CVTTSS2SI64,
20139 IX86_BUILTIN_MAXPS,
20140 IX86_BUILTIN_MAXSS,
20141 IX86_BUILTIN_MINPS,
20142 IX86_BUILTIN_MINSS,
20144 IX86_BUILTIN_LOADUPS,
20145 IX86_BUILTIN_STOREUPS,
20146 IX86_BUILTIN_MOVSS,
20148 IX86_BUILTIN_MOVHLPS,
20149 IX86_BUILTIN_MOVLHPS,
20150 IX86_BUILTIN_LOADHPS,
20151 IX86_BUILTIN_LOADLPS,
20152 IX86_BUILTIN_STOREHPS,
20153 IX86_BUILTIN_STORELPS,
20155 IX86_BUILTIN_MASKMOVQ,
20156 IX86_BUILTIN_MOVMSKPS,
20157 IX86_BUILTIN_PMOVMSKB,
20159 IX86_BUILTIN_MOVNTPS,
20160 IX86_BUILTIN_MOVNTQ,
20162 IX86_BUILTIN_LOADDQU,
20163 IX86_BUILTIN_STOREDQU,
20165 IX86_BUILTIN_PACKSSWB,
20166 IX86_BUILTIN_PACKSSDW,
20167 IX86_BUILTIN_PACKUSWB,
20169 IX86_BUILTIN_PADDB,
20170 IX86_BUILTIN_PADDW,
20171 IX86_BUILTIN_PADDD,
20172 IX86_BUILTIN_PADDQ,
20173 IX86_BUILTIN_PADDSB,
20174 IX86_BUILTIN_PADDSW,
20175 IX86_BUILTIN_PADDUSB,
20176 IX86_BUILTIN_PADDUSW,
20177 IX86_BUILTIN_PSUBB,
20178 IX86_BUILTIN_PSUBW,
20179 IX86_BUILTIN_PSUBD,
20180 IX86_BUILTIN_PSUBQ,
20181 IX86_BUILTIN_PSUBSB,
20182 IX86_BUILTIN_PSUBSW,
20183 IX86_BUILTIN_PSUBUSB,
20184 IX86_BUILTIN_PSUBUSW,
20187 IX86_BUILTIN_PANDN,
20191 IX86_BUILTIN_PAVGB,
20192 IX86_BUILTIN_PAVGW,
20194 IX86_BUILTIN_PCMPEQB,
20195 IX86_BUILTIN_PCMPEQW,
20196 IX86_BUILTIN_PCMPEQD,
20197 IX86_BUILTIN_PCMPGTB,
20198 IX86_BUILTIN_PCMPGTW,
20199 IX86_BUILTIN_PCMPGTD,
20201 IX86_BUILTIN_PMADDWD,
20203 IX86_BUILTIN_PMAXSW,
20204 IX86_BUILTIN_PMAXUB,
20205 IX86_BUILTIN_PMINSW,
20206 IX86_BUILTIN_PMINUB,
20208 IX86_BUILTIN_PMULHUW,
20209 IX86_BUILTIN_PMULHW,
20210 IX86_BUILTIN_PMULLW,
20212 IX86_BUILTIN_PSADBW,
20213 IX86_BUILTIN_PSHUFW,
20215 IX86_BUILTIN_PSLLW,
20216 IX86_BUILTIN_PSLLD,
20217 IX86_BUILTIN_PSLLQ,
20218 IX86_BUILTIN_PSRAW,
20219 IX86_BUILTIN_PSRAD,
20220 IX86_BUILTIN_PSRLW,
20221 IX86_BUILTIN_PSRLD,
20222 IX86_BUILTIN_PSRLQ,
20223 IX86_BUILTIN_PSLLWI,
20224 IX86_BUILTIN_PSLLDI,
20225 IX86_BUILTIN_PSLLQI,
20226 IX86_BUILTIN_PSRAWI,
20227 IX86_BUILTIN_PSRADI,
20228 IX86_BUILTIN_PSRLWI,
20229 IX86_BUILTIN_PSRLDI,
20230 IX86_BUILTIN_PSRLQI,
20232 IX86_BUILTIN_PUNPCKHBW,
20233 IX86_BUILTIN_PUNPCKHWD,
20234 IX86_BUILTIN_PUNPCKHDQ,
20235 IX86_BUILTIN_PUNPCKLBW,
20236 IX86_BUILTIN_PUNPCKLWD,
20237 IX86_BUILTIN_PUNPCKLDQ,
20239 IX86_BUILTIN_SHUFPS,
20241 IX86_BUILTIN_RCPPS,
20242 IX86_BUILTIN_RCPSS,
20243 IX86_BUILTIN_RSQRTPS,
20244 IX86_BUILTIN_RSQRTPS_NR,
20245 IX86_BUILTIN_RSQRTSS,
20246 IX86_BUILTIN_RSQRTF,
20247 IX86_BUILTIN_SQRTPS,
20248 IX86_BUILTIN_SQRTPS_NR,
20249 IX86_BUILTIN_SQRTSS,
20251 IX86_BUILTIN_UNPCKHPS,
20252 IX86_BUILTIN_UNPCKLPS,
20254 IX86_BUILTIN_ANDPS,
20255 IX86_BUILTIN_ANDNPS,
20257 IX86_BUILTIN_XORPS,
20260 IX86_BUILTIN_LDMXCSR,
20261 IX86_BUILTIN_STMXCSR,
20262 IX86_BUILTIN_SFENCE,
20264 /* 3DNow! Original */
20265 IX86_BUILTIN_FEMMS,
20266 IX86_BUILTIN_PAVGUSB,
20267 IX86_BUILTIN_PF2ID,
20268 IX86_BUILTIN_PFACC,
20269 IX86_BUILTIN_PFADD,
20270 IX86_BUILTIN_PFCMPEQ,
20271 IX86_BUILTIN_PFCMPGE,
20272 IX86_BUILTIN_PFCMPGT,
20273 IX86_BUILTIN_PFMAX,
20274 IX86_BUILTIN_PFMIN,
20275 IX86_BUILTIN_PFMUL,
20276 IX86_BUILTIN_PFRCP,
20277 IX86_BUILTIN_PFRCPIT1,
20278 IX86_BUILTIN_PFRCPIT2,
20279 IX86_BUILTIN_PFRSQIT1,
20280 IX86_BUILTIN_PFRSQRT,
20281 IX86_BUILTIN_PFSUB,
20282 IX86_BUILTIN_PFSUBR,
20283 IX86_BUILTIN_PI2FD,
20284 IX86_BUILTIN_PMULHRW,
20286 /* 3DNow! Athlon Extensions */
20287 IX86_BUILTIN_PF2IW,
20288 IX86_BUILTIN_PFNACC,
20289 IX86_BUILTIN_PFPNACC,
20290 IX86_BUILTIN_PI2FW,
20291 IX86_BUILTIN_PSWAPDSI,
20292 IX86_BUILTIN_PSWAPDSF,
20295 IX86_BUILTIN_ADDPD,
20296 IX86_BUILTIN_ADDSD,
20297 IX86_BUILTIN_DIVPD,
20298 IX86_BUILTIN_DIVSD,
20299 IX86_BUILTIN_MULPD,
20300 IX86_BUILTIN_MULSD,
20301 IX86_BUILTIN_SUBPD,
20302 IX86_BUILTIN_SUBSD,
20304 IX86_BUILTIN_CMPEQPD,
20305 IX86_BUILTIN_CMPLTPD,
20306 IX86_BUILTIN_CMPLEPD,
20307 IX86_BUILTIN_CMPGTPD,
20308 IX86_BUILTIN_CMPGEPD,
20309 IX86_BUILTIN_CMPNEQPD,
20310 IX86_BUILTIN_CMPNLTPD,
20311 IX86_BUILTIN_CMPNLEPD,
20312 IX86_BUILTIN_CMPNGTPD,
20313 IX86_BUILTIN_CMPNGEPD,
20314 IX86_BUILTIN_CMPORDPD,
20315 IX86_BUILTIN_CMPUNORDPD,
20316 IX86_BUILTIN_CMPEQSD,
20317 IX86_BUILTIN_CMPLTSD,
20318 IX86_BUILTIN_CMPLESD,
20319 IX86_BUILTIN_CMPNEQSD,
20320 IX86_BUILTIN_CMPNLTSD,
20321 IX86_BUILTIN_CMPNLESD,
20322 IX86_BUILTIN_CMPORDSD,
20323 IX86_BUILTIN_CMPUNORDSD,
20325 IX86_BUILTIN_COMIEQSD,
20326 IX86_BUILTIN_COMILTSD,
20327 IX86_BUILTIN_COMILESD,
20328 IX86_BUILTIN_COMIGTSD,
20329 IX86_BUILTIN_COMIGESD,
20330 IX86_BUILTIN_COMINEQSD,
20331 IX86_BUILTIN_UCOMIEQSD,
20332 IX86_BUILTIN_UCOMILTSD,
20333 IX86_BUILTIN_UCOMILESD,
20334 IX86_BUILTIN_UCOMIGTSD,
20335 IX86_BUILTIN_UCOMIGESD,
20336 IX86_BUILTIN_UCOMINEQSD,
20338 IX86_BUILTIN_MAXPD,
20339 IX86_BUILTIN_MAXSD,
20340 IX86_BUILTIN_MINPD,
20341 IX86_BUILTIN_MINSD,
20343 IX86_BUILTIN_ANDPD,
20344 IX86_BUILTIN_ANDNPD,
20346 IX86_BUILTIN_XORPD,
20348 IX86_BUILTIN_SQRTPD,
20349 IX86_BUILTIN_SQRTSD,
20351 IX86_BUILTIN_UNPCKHPD,
20352 IX86_BUILTIN_UNPCKLPD,
20354 IX86_BUILTIN_SHUFPD,
20356 IX86_BUILTIN_LOADUPD,
20357 IX86_BUILTIN_STOREUPD,
20358 IX86_BUILTIN_MOVSD,
20360 IX86_BUILTIN_LOADHPD,
20361 IX86_BUILTIN_LOADLPD,
20363 IX86_BUILTIN_CVTDQ2PD,
20364 IX86_BUILTIN_CVTDQ2PS,
20366 IX86_BUILTIN_CVTPD2DQ,
20367 IX86_BUILTIN_CVTPD2PI,
20368 IX86_BUILTIN_CVTPD2PS,
20369 IX86_BUILTIN_CVTTPD2DQ,
20370 IX86_BUILTIN_CVTTPD2PI,
20372 IX86_BUILTIN_CVTPI2PD,
20373 IX86_BUILTIN_CVTSI2SD,
20374 IX86_BUILTIN_CVTSI642SD,
20376 IX86_BUILTIN_CVTSD2SI,
20377 IX86_BUILTIN_CVTSD2SI64,
20378 IX86_BUILTIN_CVTSD2SS,
20379 IX86_BUILTIN_CVTSS2SD,
20380 IX86_BUILTIN_CVTTSD2SI,
20381 IX86_BUILTIN_CVTTSD2SI64,
20383 IX86_BUILTIN_CVTPS2DQ,
20384 IX86_BUILTIN_CVTPS2PD,
20385 IX86_BUILTIN_CVTTPS2DQ,
20387 IX86_BUILTIN_MOVNTI,
20388 IX86_BUILTIN_MOVNTPD,
20389 IX86_BUILTIN_MOVNTDQ,
20391 IX86_BUILTIN_MOVQ128,
20394 IX86_BUILTIN_MASKMOVDQU,
20395 IX86_BUILTIN_MOVMSKPD,
20396 IX86_BUILTIN_PMOVMSKB128,
20398 IX86_BUILTIN_PACKSSWB128,
20399 IX86_BUILTIN_PACKSSDW128,
20400 IX86_BUILTIN_PACKUSWB128,
20402 IX86_BUILTIN_PADDB128,
20403 IX86_BUILTIN_PADDW128,
20404 IX86_BUILTIN_PADDD128,
20405 IX86_BUILTIN_PADDQ128,
20406 IX86_BUILTIN_PADDSB128,
20407 IX86_BUILTIN_PADDSW128,
20408 IX86_BUILTIN_PADDUSB128,
20409 IX86_BUILTIN_PADDUSW128,
20410 IX86_BUILTIN_PSUBB128,
20411 IX86_BUILTIN_PSUBW128,
20412 IX86_BUILTIN_PSUBD128,
20413 IX86_BUILTIN_PSUBQ128,
20414 IX86_BUILTIN_PSUBSB128,
20415 IX86_BUILTIN_PSUBSW128,
20416 IX86_BUILTIN_PSUBUSB128,
20417 IX86_BUILTIN_PSUBUSW128,
20419 IX86_BUILTIN_PAND128,
20420 IX86_BUILTIN_PANDN128,
20421 IX86_BUILTIN_POR128,
20422 IX86_BUILTIN_PXOR128,
20424 IX86_BUILTIN_PAVGB128,
20425 IX86_BUILTIN_PAVGW128,
20427 IX86_BUILTIN_PCMPEQB128,
20428 IX86_BUILTIN_PCMPEQW128,
20429 IX86_BUILTIN_PCMPEQD128,
20430 IX86_BUILTIN_PCMPGTB128,
20431 IX86_BUILTIN_PCMPGTW128,
20432 IX86_BUILTIN_PCMPGTD128,
20434 IX86_BUILTIN_PMADDWD128,
20436 IX86_BUILTIN_PMAXSW128,
20437 IX86_BUILTIN_PMAXUB128,
20438 IX86_BUILTIN_PMINSW128,
20439 IX86_BUILTIN_PMINUB128,
20441 IX86_BUILTIN_PMULUDQ,
20442 IX86_BUILTIN_PMULUDQ128,
20443 IX86_BUILTIN_PMULHUW128,
20444 IX86_BUILTIN_PMULHW128,
20445 IX86_BUILTIN_PMULLW128,
20447 IX86_BUILTIN_PSADBW128,
20448 IX86_BUILTIN_PSHUFHW,
20449 IX86_BUILTIN_PSHUFLW,
20450 IX86_BUILTIN_PSHUFD,
20452 IX86_BUILTIN_PSLLDQI128,
20453 IX86_BUILTIN_PSLLWI128,
20454 IX86_BUILTIN_PSLLDI128,
20455 IX86_BUILTIN_PSLLQI128,
20456 IX86_BUILTIN_PSRAWI128,
20457 IX86_BUILTIN_PSRADI128,
20458 IX86_BUILTIN_PSRLDQI128,
20459 IX86_BUILTIN_PSRLWI128,
20460 IX86_BUILTIN_PSRLDI128,
20461 IX86_BUILTIN_PSRLQI128,
20463 IX86_BUILTIN_PSLLDQ128,
20464 IX86_BUILTIN_PSLLW128,
20465 IX86_BUILTIN_PSLLD128,
20466 IX86_BUILTIN_PSLLQ128,
20467 IX86_BUILTIN_PSRAW128,
20468 IX86_BUILTIN_PSRAD128,
20469 IX86_BUILTIN_PSRLW128,
20470 IX86_BUILTIN_PSRLD128,
20471 IX86_BUILTIN_PSRLQ128,
20473 IX86_BUILTIN_PUNPCKHBW128,
20474 IX86_BUILTIN_PUNPCKHWD128,
20475 IX86_BUILTIN_PUNPCKHDQ128,
20476 IX86_BUILTIN_PUNPCKHQDQ128,
20477 IX86_BUILTIN_PUNPCKLBW128,
20478 IX86_BUILTIN_PUNPCKLWD128,
20479 IX86_BUILTIN_PUNPCKLDQ128,
20480 IX86_BUILTIN_PUNPCKLQDQ128,
20482 IX86_BUILTIN_CLFLUSH,
20483 IX86_BUILTIN_MFENCE,
20484 IX86_BUILTIN_LFENCE,
20486 IX86_BUILTIN_BSRSI,
20487 IX86_BUILTIN_BSRDI,
20488 IX86_BUILTIN_RDPMC,
20489 IX86_BUILTIN_RDTSC,
20490 IX86_BUILTIN_RDTSCP,
20491 IX86_BUILTIN_ROLQI,
20492 IX86_BUILTIN_ROLHI,
20493 IX86_BUILTIN_RORQI,
20494 IX86_BUILTIN_RORHI,
20497 IX86_BUILTIN_ADDSUBPS,
20498 IX86_BUILTIN_HADDPS,
20499 IX86_BUILTIN_HSUBPS,
20500 IX86_BUILTIN_MOVSHDUP,
20501 IX86_BUILTIN_MOVSLDUP,
20502 IX86_BUILTIN_ADDSUBPD,
20503 IX86_BUILTIN_HADDPD,
20504 IX86_BUILTIN_HSUBPD,
20505 IX86_BUILTIN_LDDQU,
20507 IX86_BUILTIN_MONITOR,
20508 IX86_BUILTIN_MWAIT,
20511 IX86_BUILTIN_PHADDW,
20512 IX86_BUILTIN_PHADDD,
20513 IX86_BUILTIN_PHADDSW,
20514 IX86_BUILTIN_PHSUBW,
20515 IX86_BUILTIN_PHSUBD,
20516 IX86_BUILTIN_PHSUBSW,
20517 IX86_BUILTIN_PMADDUBSW,
20518 IX86_BUILTIN_PMULHRSW,
20519 IX86_BUILTIN_PSHUFB,
20520 IX86_BUILTIN_PSIGNB,
20521 IX86_BUILTIN_PSIGNW,
20522 IX86_BUILTIN_PSIGND,
20523 IX86_BUILTIN_PALIGNR,
20524 IX86_BUILTIN_PABSB,
20525 IX86_BUILTIN_PABSW,
20526 IX86_BUILTIN_PABSD,
20528 IX86_BUILTIN_PHADDW128,
20529 IX86_BUILTIN_PHADDD128,
20530 IX86_BUILTIN_PHADDSW128,
20531 IX86_BUILTIN_PHSUBW128,
20532 IX86_BUILTIN_PHSUBD128,
20533 IX86_BUILTIN_PHSUBSW128,
20534 IX86_BUILTIN_PMADDUBSW128,
20535 IX86_BUILTIN_PMULHRSW128,
20536 IX86_BUILTIN_PSHUFB128,
20537 IX86_BUILTIN_PSIGNB128,
20538 IX86_BUILTIN_PSIGNW128,
20539 IX86_BUILTIN_PSIGND128,
20540 IX86_BUILTIN_PALIGNR128,
20541 IX86_BUILTIN_PABSB128,
20542 IX86_BUILTIN_PABSW128,
20543 IX86_BUILTIN_PABSD128,
20545 /* AMDFAM10 - SSE4A New Instructions. */
20546 IX86_BUILTIN_MOVNTSD,
20547 IX86_BUILTIN_MOVNTSS,
20548 IX86_BUILTIN_EXTRQI,
20549 IX86_BUILTIN_EXTRQ,
20550 IX86_BUILTIN_INSERTQI,
20551 IX86_BUILTIN_INSERTQ,
20554 IX86_BUILTIN_BLENDPD,
20555 IX86_BUILTIN_BLENDPS,
20556 IX86_BUILTIN_BLENDVPD,
20557 IX86_BUILTIN_BLENDVPS,
20558 IX86_BUILTIN_PBLENDVB128,
20559 IX86_BUILTIN_PBLENDW128,
20564 IX86_BUILTIN_INSERTPS128,
20566 IX86_BUILTIN_MOVNTDQA,
20567 IX86_BUILTIN_MPSADBW128,
20568 IX86_BUILTIN_PACKUSDW128,
20569 IX86_BUILTIN_PCMPEQQ,
20570 IX86_BUILTIN_PHMINPOSUW128,
20572 IX86_BUILTIN_PMAXSB128,
20573 IX86_BUILTIN_PMAXSD128,
20574 IX86_BUILTIN_PMAXUD128,
20575 IX86_BUILTIN_PMAXUW128,
20577 IX86_BUILTIN_PMINSB128,
20578 IX86_BUILTIN_PMINSD128,
20579 IX86_BUILTIN_PMINUD128,
20580 IX86_BUILTIN_PMINUW128,
20582 IX86_BUILTIN_PMOVSXBW128,
20583 IX86_BUILTIN_PMOVSXBD128,
20584 IX86_BUILTIN_PMOVSXBQ128,
20585 IX86_BUILTIN_PMOVSXWD128,
20586 IX86_BUILTIN_PMOVSXWQ128,
20587 IX86_BUILTIN_PMOVSXDQ128,
20589 IX86_BUILTIN_PMOVZXBW128,
20590 IX86_BUILTIN_PMOVZXBD128,
20591 IX86_BUILTIN_PMOVZXBQ128,
20592 IX86_BUILTIN_PMOVZXWD128,
20593 IX86_BUILTIN_PMOVZXWQ128,
20594 IX86_BUILTIN_PMOVZXDQ128,
20596 IX86_BUILTIN_PMULDQ128,
20597 IX86_BUILTIN_PMULLD128,
20599 IX86_BUILTIN_ROUNDPD,
20600 IX86_BUILTIN_ROUNDPS,
20601 IX86_BUILTIN_ROUNDSD,
20602 IX86_BUILTIN_ROUNDSS,
20604 IX86_BUILTIN_PTESTZ,
20605 IX86_BUILTIN_PTESTC,
20606 IX86_BUILTIN_PTESTNZC,
20608 IX86_BUILTIN_VEC_INIT_V2SI,
20609 IX86_BUILTIN_VEC_INIT_V4HI,
20610 IX86_BUILTIN_VEC_INIT_V8QI,
20611 IX86_BUILTIN_VEC_EXT_V2DF,
20612 IX86_BUILTIN_VEC_EXT_V2DI,
20613 IX86_BUILTIN_VEC_EXT_V4SF,
20614 IX86_BUILTIN_VEC_EXT_V4SI,
20615 IX86_BUILTIN_VEC_EXT_V8HI,
20616 IX86_BUILTIN_VEC_EXT_V2SI,
20617 IX86_BUILTIN_VEC_EXT_V4HI,
20618 IX86_BUILTIN_VEC_EXT_V16QI,
20619 IX86_BUILTIN_VEC_SET_V2DI,
20620 IX86_BUILTIN_VEC_SET_V4SF,
20621 IX86_BUILTIN_VEC_SET_V4SI,
20622 IX86_BUILTIN_VEC_SET_V8HI,
20623 IX86_BUILTIN_VEC_SET_V4HI,
20624 IX86_BUILTIN_VEC_SET_V16QI,
20626 IX86_BUILTIN_VEC_PACK_SFIX,
20629 IX86_BUILTIN_CRC32QI,
20630 IX86_BUILTIN_CRC32HI,
20631 IX86_BUILTIN_CRC32SI,
20632 IX86_BUILTIN_CRC32DI,
20634 IX86_BUILTIN_PCMPESTRI128,
20635 IX86_BUILTIN_PCMPESTRM128,
20636 IX86_BUILTIN_PCMPESTRA128,
20637 IX86_BUILTIN_PCMPESTRC128,
20638 IX86_BUILTIN_PCMPESTRO128,
20639 IX86_BUILTIN_PCMPESTRS128,
20640 IX86_BUILTIN_PCMPESTRZ128,
20641 IX86_BUILTIN_PCMPISTRI128,
20642 IX86_BUILTIN_PCMPISTRM128,
20643 IX86_BUILTIN_PCMPISTRA128,
20644 IX86_BUILTIN_PCMPISTRC128,
20645 IX86_BUILTIN_PCMPISTRO128,
20646 IX86_BUILTIN_PCMPISTRS128,
20647 IX86_BUILTIN_PCMPISTRZ128,
20649 IX86_BUILTIN_PCMPGTQ,
20651 /* AES instructions */
20652 IX86_BUILTIN_AESENC128,
20653 IX86_BUILTIN_AESENCLAST128,
20654 IX86_BUILTIN_AESDEC128,
20655 IX86_BUILTIN_AESDECLAST128,
20656 IX86_BUILTIN_AESIMC128,
20657 IX86_BUILTIN_AESKEYGENASSIST128,
20659 /* PCLMUL instruction */
20660 IX86_BUILTIN_PCLMULQDQ128,
20663 IX86_BUILTIN_ADDPD256,
20664 IX86_BUILTIN_ADDPS256,
20665 IX86_BUILTIN_ADDSUBPD256,
20666 IX86_BUILTIN_ADDSUBPS256,
20667 IX86_BUILTIN_ANDPD256,
20668 IX86_BUILTIN_ANDPS256,
20669 IX86_BUILTIN_ANDNPD256,
20670 IX86_BUILTIN_ANDNPS256,
20671 IX86_BUILTIN_BLENDPD256,
20672 IX86_BUILTIN_BLENDPS256,
20673 IX86_BUILTIN_BLENDVPD256,
20674 IX86_BUILTIN_BLENDVPS256,
20675 IX86_BUILTIN_DIVPD256,
20676 IX86_BUILTIN_DIVPS256,
20677 IX86_BUILTIN_DPPS256,
20678 IX86_BUILTIN_HADDPD256,
20679 IX86_BUILTIN_HADDPS256,
20680 IX86_BUILTIN_HSUBPD256,
20681 IX86_BUILTIN_HSUBPS256,
20682 IX86_BUILTIN_MAXPD256,
20683 IX86_BUILTIN_MAXPS256,
20684 IX86_BUILTIN_MINPD256,
20685 IX86_BUILTIN_MINPS256,
20686 IX86_BUILTIN_MULPD256,
20687 IX86_BUILTIN_MULPS256,
20688 IX86_BUILTIN_ORPD256,
20689 IX86_BUILTIN_ORPS256,
20690 IX86_BUILTIN_SHUFPD256,
20691 IX86_BUILTIN_SHUFPS256,
20692 IX86_BUILTIN_SUBPD256,
20693 IX86_BUILTIN_SUBPS256,
20694 IX86_BUILTIN_XORPD256,
20695 IX86_BUILTIN_XORPS256,
20696 IX86_BUILTIN_CMPSD,
20697 IX86_BUILTIN_CMPSS,
20698 IX86_BUILTIN_CMPPD,
20699 IX86_BUILTIN_CMPPS,
20700 IX86_BUILTIN_CMPPD256,
20701 IX86_BUILTIN_CMPPS256,
20702 IX86_BUILTIN_CVTDQ2PD256,
20703 IX86_BUILTIN_CVTDQ2PS256,
20704 IX86_BUILTIN_CVTPD2PS256,
20705 IX86_BUILTIN_CVTPS2DQ256,
20706 IX86_BUILTIN_CVTPS2PD256,
20707 IX86_BUILTIN_CVTTPD2DQ256,
20708 IX86_BUILTIN_CVTPD2DQ256,
20709 IX86_BUILTIN_CVTTPS2DQ256,
20710 IX86_BUILTIN_EXTRACTF128PD256,
20711 IX86_BUILTIN_EXTRACTF128PS256,
20712 IX86_BUILTIN_EXTRACTF128SI256,
20713 IX86_BUILTIN_VZEROALL,
20714 IX86_BUILTIN_VZEROUPPER,
20715 IX86_BUILTIN_VZEROUPPER_REX64,
20716 IX86_BUILTIN_VPERMILVARPD,
20717 IX86_BUILTIN_VPERMILVARPS,
20718 IX86_BUILTIN_VPERMILVARPD256,
20719 IX86_BUILTIN_VPERMILVARPS256,
20720 IX86_BUILTIN_VPERMILPD,
20721 IX86_BUILTIN_VPERMILPS,
20722 IX86_BUILTIN_VPERMILPD256,
20723 IX86_BUILTIN_VPERMILPS256,
20724 IX86_BUILTIN_VPERM2F128PD256,
20725 IX86_BUILTIN_VPERM2F128PS256,
20726 IX86_BUILTIN_VPERM2F128SI256,
20727 IX86_BUILTIN_VBROADCASTSS,
20728 IX86_BUILTIN_VBROADCASTSD256,
20729 IX86_BUILTIN_VBROADCASTSS256,
20730 IX86_BUILTIN_VBROADCASTPD256,
20731 IX86_BUILTIN_VBROADCASTPS256,
20732 IX86_BUILTIN_VINSERTF128PD256,
20733 IX86_BUILTIN_VINSERTF128PS256,
20734 IX86_BUILTIN_VINSERTF128SI256,
20735 IX86_BUILTIN_LOADUPD256,
20736 IX86_BUILTIN_LOADUPS256,
20737 IX86_BUILTIN_STOREUPD256,
20738 IX86_BUILTIN_STOREUPS256,
20739 IX86_BUILTIN_LDDQU256,
20740 IX86_BUILTIN_MOVNTDQ256,
20741 IX86_BUILTIN_MOVNTPD256,
20742 IX86_BUILTIN_MOVNTPS256,
20743 IX86_BUILTIN_LOADDQU256,
20744 IX86_BUILTIN_STOREDQU256,
20745 IX86_BUILTIN_MASKLOADPD,
20746 IX86_BUILTIN_MASKLOADPS,
20747 IX86_BUILTIN_MASKSTOREPD,
20748 IX86_BUILTIN_MASKSTOREPS,
20749 IX86_BUILTIN_MASKLOADPD256,
20750 IX86_BUILTIN_MASKLOADPS256,
20751 IX86_BUILTIN_MASKSTOREPD256,
20752 IX86_BUILTIN_MASKSTOREPS256,
20753 IX86_BUILTIN_MOVSHDUP256,
20754 IX86_BUILTIN_MOVSLDUP256,
20755 IX86_BUILTIN_MOVDDUP256,
20757 IX86_BUILTIN_SQRTPD256,
20758 IX86_BUILTIN_SQRTPS256,
20759 IX86_BUILTIN_SQRTPS_NR256,
20760 IX86_BUILTIN_RSQRTPS256,
20761 IX86_BUILTIN_RSQRTPS_NR256,
20763 IX86_BUILTIN_RCPPS256,
20765 IX86_BUILTIN_ROUNDPD256,
20766 IX86_BUILTIN_ROUNDPS256,
20768 IX86_BUILTIN_UNPCKHPD256,
20769 IX86_BUILTIN_UNPCKLPD256,
20770 IX86_BUILTIN_UNPCKHPS256,
20771 IX86_BUILTIN_UNPCKLPS256,
20773 IX86_BUILTIN_SI256_SI,
20774 IX86_BUILTIN_PS256_PS,
20775 IX86_BUILTIN_PD256_PD,
20776 IX86_BUILTIN_SI_SI256,
20777 IX86_BUILTIN_PS_PS256,
20778 IX86_BUILTIN_PD_PD256,
20780 IX86_BUILTIN_VTESTZPD,
20781 IX86_BUILTIN_VTESTCPD,
20782 IX86_BUILTIN_VTESTNZCPD,
20783 IX86_BUILTIN_VTESTZPS,
20784 IX86_BUILTIN_VTESTCPS,
20785 IX86_BUILTIN_VTESTNZCPS,
20786 IX86_BUILTIN_VTESTZPD256,
20787 IX86_BUILTIN_VTESTCPD256,
20788 IX86_BUILTIN_VTESTNZCPD256,
20789 IX86_BUILTIN_VTESTZPS256,
20790 IX86_BUILTIN_VTESTCPS256,
20791 IX86_BUILTIN_VTESTNZCPS256,
20792 IX86_BUILTIN_PTESTZ256,
20793 IX86_BUILTIN_PTESTC256,
20794 IX86_BUILTIN_PTESTNZC256,
20796 IX86_BUILTIN_MOVMSKPD256,
20797 IX86_BUILTIN_MOVMSKPS256,
20799 /* TFmode support builtins. */
20801 IX86_BUILTIN_HUGE_VALQ,
20802 IX86_BUILTIN_FABSQ,
20803 IX86_BUILTIN_COPYSIGNQ,
20805 /* SSE5 instructions */
20806 IX86_BUILTIN_FMADDSS,
20807 IX86_BUILTIN_FMADDSD,
20808 IX86_BUILTIN_FMADDPS,
20809 IX86_BUILTIN_FMADDPD,
20810 IX86_BUILTIN_FMSUBSS,
20811 IX86_BUILTIN_FMSUBSD,
20812 IX86_BUILTIN_FMSUBPS,
20813 IX86_BUILTIN_FMSUBPD,
20814 IX86_BUILTIN_FNMADDSS,
20815 IX86_BUILTIN_FNMADDSD,
20816 IX86_BUILTIN_FNMADDPS,
20817 IX86_BUILTIN_FNMADDPD,
20818 IX86_BUILTIN_FNMSUBSS,
20819 IX86_BUILTIN_FNMSUBSD,
20820 IX86_BUILTIN_FNMSUBPS,
20821 IX86_BUILTIN_FNMSUBPD,
20822 IX86_BUILTIN_PCMOV,
20823 IX86_BUILTIN_PCMOV_V2DI,
20824 IX86_BUILTIN_PCMOV_V4SI,
20825 IX86_BUILTIN_PCMOV_V8HI,
20826 IX86_BUILTIN_PCMOV_V16QI,
20827 IX86_BUILTIN_PCMOV_V4SF,
20828 IX86_BUILTIN_PCMOV_V2DF,
20829 IX86_BUILTIN_PPERM,
20830 IX86_BUILTIN_PERMPS,
20831 IX86_BUILTIN_PERMPD,
20832 IX86_BUILTIN_PMACSSWW,
20833 IX86_BUILTIN_PMACSWW,
20834 IX86_BUILTIN_PMACSSWD,
20835 IX86_BUILTIN_PMACSWD,
20836 IX86_BUILTIN_PMACSSDD,
20837 IX86_BUILTIN_PMACSDD,
20838 IX86_BUILTIN_PMACSSDQL,
20839 IX86_BUILTIN_PMACSSDQH,
20840 IX86_BUILTIN_PMACSDQL,
20841 IX86_BUILTIN_PMACSDQH,
20842 IX86_BUILTIN_PMADCSSWD,
20843 IX86_BUILTIN_PMADCSWD,
20844 IX86_BUILTIN_PHADDBW,
20845 IX86_BUILTIN_PHADDBD,
20846 IX86_BUILTIN_PHADDBQ,
20847 IX86_BUILTIN_PHADDWD,
20848 IX86_BUILTIN_PHADDWQ,
20849 IX86_BUILTIN_PHADDDQ,
20850 IX86_BUILTIN_PHADDUBW,
20851 IX86_BUILTIN_PHADDUBD,
20852 IX86_BUILTIN_PHADDUBQ,
20853 IX86_BUILTIN_PHADDUWD,
20854 IX86_BUILTIN_PHADDUWQ,
20855 IX86_BUILTIN_PHADDUDQ,
20856 IX86_BUILTIN_PHSUBBW,
20857 IX86_BUILTIN_PHSUBWD,
20858 IX86_BUILTIN_PHSUBDQ,
20859 IX86_BUILTIN_PROTB,
20860 IX86_BUILTIN_PROTW,
20861 IX86_BUILTIN_PROTD,
20862 IX86_BUILTIN_PROTQ,
20863 IX86_BUILTIN_PROTB_IMM,
20864 IX86_BUILTIN_PROTW_IMM,
20865 IX86_BUILTIN_PROTD_IMM,
20866 IX86_BUILTIN_PROTQ_IMM,
20867 IX86_BUILTIN_PSHLB,
20868 IX86_BUILTIN_PSHLW,
20869 IX86_BUILTIN_PSHLD,
20870 IX86_BUILTIN_PSHLQ,
20871 IX86_BUILTIN_PSHAB,
20872 IX86_BUILTIN_PSHAW,
20873 IX86_BUILTIN_PSHAD,
20874 IX86_BUILTIN_PSHAQ,
20875 IX86_BUILTIN_FRCZSS,
20876 IX86_BUILTIN_FRCZSD,
20877 IX86_BUILTIN_FRCZPS,
20878 IX86_BUILTIN_FRCZPD,
20879 IX86_BUILTIN_CVTPH2PS,
20880 IX86_BUILTIN_CVTPS2PH,
20882 IX86_BUILTIN_COMEQSS,
20883 IX86_BUILTIN_COMNESS,
20884 IX86_BUILTIN_COMLTSS,
20885 IX86_BUILTIN_COMLESS,
20886 IX86_BUILTIN_COMGTSS,
20887 IX86_BUILTIN_COMGESS,
20888 IX86_BUILTIN_COMUEQSS,
20889 IX86_BUILTIN_COMUNESS,
20890 IX86_BUILTIN_COMULTSS,
20891 IX86_BUILTIN_COMULESS,
20892 IX86_BUILTIN_COMUGTSS,
20893 IX86_BUILTIN_COMUGESS,
20894 IX86_BUILTIN_COMORDSS,
20895 IX86_BUILTIN_COMUNORDSS,
20896 IX86_BUILTIN_COMFALSESS,
20897 IX86_BUILTIN_COMTRUESS,
20899 IX86_BUILTIN_COMEQSD,
20900 IX86_BUILTIN_COMNESD,
20901 IX86_BUILTIN_COMLTSD,
20902 IX86_BUILTIN_COMLESD,
20903 IX86_BUILTIN_COMGTSD,
20904 IX86_BUILTIN_COMGESD,
20905 IX86_BUILTIN_COMUEQSD,
20906 IX86_BUILTIN_COMUNESD,
20907 IX86_BUILTIN_COMULTSD,
20908 IX86_BUILTIN_COMULESD,
20909 IX86_BUILTIN_COMUGTSD,
20910 IX86_BUILTIN_COMUGESD,
20911 IX86_BUILTIN_COMORDSD,
20912 IX86_BUILTIN_COMUNORDSD,
20913 IX86_BUILTIN_COMFALSESD,
20914 IX86_BUILTIN_COMTRUESD,
20916 IX86_BUILTIN_COMEQPS,
20917 IX86_BUILTIN_COMNEPS,
20918 IX86_BUILTIN_COMLTPS,
20919 IX86_BUILTIN_COMLEPS,
20920 IX86_BUILTIN_COMGTPS,
20921 IX86_BUILTIN_COMGEPS,
20922 IX86_BUILTIN_COMUEQPS,
20923 IX86_BUILTIN_COMUNEPS,
20924 IX86_BUILTIN_COMULTPS,
20925 IX86_BUILTIN_COMULEPS,
20926 IX86_BUILTIN_COMUGTPS,
20927 IX86_BUILTIN_COMUGEPS,
20928 IX86_BUILTIN_COMORDPS,
20929 IX86_BUILTIN_COMUNORDPS,
20930 IX86_BUILTIN_COMFALSEPS,
20931 IX86_BUILTIN_COMTRUEPS,
20933 IX86_BUILTIN_COMEQPD,
20934 IX86_BUILTIN_COMNEPD,
20935 IX86_BUILTIN_COMLTPD,
20936 IX86_BUILTIN_COMLEPD,
20937 IX86_BUILTIN_COMGTPD,
20938 IX86_BUILTIN_COMGEPD,
20939 IX86_BUILTIN_COMUEQPD,
20940 IX86_BUILTIN_COMUNEPD,
20941 IX86_BUILTIN_COMULTPD,
20942 IX86_BUILTIN_COMULEPD,
20943 IX86_BUILTIN_COMUGTPD,
20944 IX86_BUILTIN_COMUGEPD,
20945 IX86_BUILTIN_COMORDPD,
20946 IX86_BUILTIN_COMUNORDPD,
20947 IX86_BUILTIN_COMFALSEPD,
20948 IX86_BUILTIN_COMTRUEPD,
20950 IX86_BUILTIN_PCOMEQUB,
20951 IX86_BUILTIN_PCOMNEUB,
20952 IX86_BUILTIN_PCOMLTUB,
20953 IX86_BUILTIN_PCOMLEUB,
20954 IX86_BUILTIN_PCOMGTUB,
20955 IX86_BUILTIN_PCOMGEUB,
20956 IX86_BUILTIN_PCOMFALSEUB,
20957 IX86_BUILTIN_PCOMTRUEUB,
20958 IX86_BUILTIN_PCOMEQUW,
20959 IX86_BUILTIN_PCOMNEUW,
20960 IX86_BUILTIN_PCOMLTUW,
20961 IX86_BUILTIN_PCOMLEUW,
20962 IX86_BUILTIN_PCOMGTUW,
20963 IX86_BUILTIN_PCOMGEUW,
20964 IX86_BUILTIN_PCOMFALSEUW,
20965 IX86_BUILTIN_PCOMTRUEUW,
20966 IX86_BUILTIN_PCOMEQUD,
20967 IX86_BUILTIN_PCOMNEUD,
20968 IX86_BUILTIN_PCOMLTUD,
20969 IX86_BUILTIN_PCOMLEUD,
20970 IX86_BUILTIN_PCOMGTUD,
20971 IX86_BUILTIN_PCOMGEUD,
20972 IX86_BUILTIN_PCOMFALSEUD,
20973 IX86_BUILTIN_PCOMTRUEUD,
20974 IX86_BUILTIN_PCOMEQUQ,
20975 IX86_BUILTIN_PCOMNEUQ,
20976 IX86_BUILTIN_PCOMLTUQ,
20977 IX86_BUILTIN_PCOMLEUQ,
20978 IX86_BUILTIN_PCOMGTUQ,
20979 IX86_BUILTIN_PCOMGEUQ,
20980 IX86_BUILTIN_PCOMFALSEUQ,
20981 IX86_BUILTIN_PCOMTRUEUQ,
20983 IX86_BUILTIN_PCOMEQB,
20984 IX86_BUILTIN_PCOMNEB,
20985 IX86_BUILTIN_PCOMLTB,
20986 IX86_BUILTIN_PCOMLEB,
20987 IX86_BUILTIN_PCOMGTB,
20988 IX86_BUILTIN_PCOMGEB,
20989 IX86_BUILTIN_PCOMFALSEB,
20990 IX86_BUILTIN_PCOMTRUEB,
20991 IX86_BUILTIN_PCOMEQW,
20992 IX86_BUILTIN_PCOMNEW,
20993 IX86_BUILTIN_PCOMLTW,
20994 IX86_BUILTIN_PCOMLEW,
20995 IX86_BUILTIN_PCOMGTW,
20996 IX86_BUILTIN_PCOMGEW,
20997 IX86_BUILTIN_PCOMFALSEW,
20998 IX86_BUILTIN_PCOMTRUEW,
20999 IX86_BUILTIN_PCOMEQD,
21000 IX86_BUILTIN_PCOMNED,
21001 IX86_BUILTIN_PCOMLTD,
21002 IX86_BUILTIN_PCOMLED,
21003 IX86_BUILTIN_PCOMGTD,
21004 IX86_BUILTIN_PCOMGED,
21005 IX86_BUILTIN_PCOMFALSED,
21006 IX86_BUILTIN_PCOMTRUED,
21007 IX86_BUILTIN_PCOMEQQ,
21008 IX86_BUILTIN_PCOMNEQ,
21009 IX86_BUILTIN_PCOMLTQ,
21010 IX86_BUILTIN_PCOMLEQ,
21011 IX86_BUILTIN_PCOMGTQ,
21012 IX86_BUILTIN_PCOMGEQ,
21013 IX86_BUILTIN_PCOMFALSEQ,
21014 IX86_BUILTIN_PCOMTRUEQ,
21019 /* Table for the ix86 builtin decls. */
21020 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21022 /* Table of all of the builtin functions that are possible with different ISA's
21023 but are waiting to be built until a function is declared to use that
21025 struct GTY(()) builtin_isa {
21026 tree type; /* builtin type to use in the declaration */
21027 const char *name; /* function name */
21028 int isa; /* isa_flags this builtin is defined for */
21029 bool const_p; /* true if the declaration is constant */
21032 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21035 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21036 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21037 * function decl in the ix86_builtins array. Returns the function decl or
21038 * NULL_TREE, if the builtin was not added.
21040 * If the front end has a special hook for builtin functions, delay adding
21041 * builtin functions that aren't in the current ISA until the ISA is changed
21042 * with function specific optimization. Doing so, can save about 300K for the
21043 * default compiler. When the builtin is expanded, check at that time whether
21046 * If the front end doesn't have a special hook, record all builtins, even if
21047 * it isn't an instruction set in the current ISA in case the user uses
21048 * function specific options for a different ISA, so that we don't get scope
21049 * errors if a builtin is added in the middle of a function scope. */
21052 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21054 tree decl = NULL_TREE;
21056 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21058 ix86_builtins_isa[(int) code].isa = mask;
21060 if ((mask & ix86_isa_flags) != 0
21061 || (lang_hooks.builtin_function
21062 == lang_hooks.builtin_function_ext_scope))
21065 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21067 ix86_builtins[(int) code] = decl;
21068 ix86_builtins_isa[(int) code].type = NULL_TREE;
21072 ix86_builtins[(int) code] = NULL_TREE;
21073 ix86_builtins_isa[(int) code].const_p = false;
21074 ix86_builtins_isa[(int) code].type = type;
21075 ix86_builtins_isa[(int) code].name = name;
21082 /* Like def_builtin, but also marks the function decl "const". */
21085 def_builtin_const (int mask, const char *name, tree type,
21086 enum ix86_builtins code)
21088 tree decl = def_builtin (mask, name, type, code);
21090 TREE_READONLY (decl) = 1;
21092 ix86_builtins_isa[(int) code].const_p = true;
21097 /* Add any new builtin functions for a given ISA that may not have been
21098 declared. This saves a bit of space compared to adding all of the
21099 declarations to the tree, even if we didn't use them. */
21102 ix86_add_new_builtins (int isa)
21107 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21109 if ((ix86_builtins_isa[i].isa & isa) != 0
21110 && ix86_builtins_isa[i].type != NULL_TREE)
21112 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21113 ix86_builtins_isa[i].type,
21114 i, BUILT_IN_MD, NULL,
21117 ix86_builtins[i] = decl;
21118 ix86_builtins_isa[i].type = NULL_TREE;
21119 if (ix86_builtins_isa[i].const_p)
21120 TREE_READONLY (decl) = 1;
21125 /* Bits for builtin_description.flag. */
21127 /* Set when we don't support the comparison natively, and should
21128 swap_comparison in order to support it. */
21129 #define BUILTIN_DESC_SWAP_OPERANDS 1
21131 struct builtin_description
21133 const unsigned int mask;
21134 const enum insn_code icode;
21135 const char *const name;
21136 const enum ix86_builtins code;
21137 const enum rtx_code comparison;
21141 static const struct builtin_description bdesc_comi[] =
21143 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21144 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21145 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21146 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21147 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21148 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21149 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21150 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21151 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21152 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21153 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21154 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21157 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21159 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21162 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21163 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21165 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21166 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21169 static const struct builtin_description bdesc_pcmpestr[] =
21172 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21173 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21174 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21175 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21176 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21177 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21178 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21181 static const struct builtin_description bdesc_pcmpistr[] =
21184 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21185 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21186 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21187 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21188 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21189 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21190 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21193 /* Special builtin types */
21194 enum ix86_special_builtin_type
21196 SPECIAL_FTYPE_UNKNOWN,
21199 UINT64_FTYPE_PUNSIGNED,
21200 V32QI_FTYPE_PCCHAR,
21201 V16QI_FTYPE_PCCHAR,
21203 V8SF_FTYPE_PCFLOAT,
21205 V4DF_FTYPE_PCDOUBLE,
21206 V4SF_FTYPE_PCFLOAT,
21207 V2DF_FTYPE_PCDOUBLE,
21208 V8SF_FTYPE_PCV8SF_V8SF,
21209 V4DF_FTYPE_PCV4DF_V4DF,
21210 V4SF_FTYPE_V4SF_PCV2SF,
21211 V4SF_FTYPE_PCV4SF_V4SF,
21212 V2DF_FTYPE_V2DF_PCDOUBLE,
21213 V2DF_FTYPE_PCV2DF_V2DF,
21215 VOID_FTYPE_PV2SF_V4SF,
21216 VOID_FTYPE_PV4DI_V4DI,
21217 VOID_FTYPE_PV2DI_V2DI,
21218 VOID_FTYPE_PCHAR_V32QI,
21219 VOID_FTYPE_PCHAR_V16QI,
21220 VOID_FTYPE_PFLOAT_V8SF,
21221 VOID_FTYPE_PFLOAT_V4SF,
21222 VOID_FTYPE_PDOUBLE_V4DF,
21223 VOID_FTYPE_PDOUBLE_V2DF,
21225 VOID_FTYPE_PINT_INT,
21226 VOID_FTYPE_PV8SF_V8SF_V8SF,
21227 VOID_FTYPE_PV4DF_V4DF_V4DF,
21228 VOID_FTYPE_PV4SF_V4SF_V4SF,
21229 VOID_FTYPE_PV2DF_V2DF_V2DF
21232 /* Builtin types */
21233 enum ix86_builtin_type
21236 FLOAT128_FTYPE_FLOAT128,
21238 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21239 INT_FTYPE_V8SF_V8SF_PTEST,
21240 INT_FTYPE_V4DI_V4DI_PTEST,
21241 INT_FTYPE_V4DF_V4DF_PTEST,
21242 INT_FTYPE_V4SF_V4SF_PTEST,
21243 INT_FTYPE_V2DI_V2DI_PTEST,
21244 INT_FTYPE_V2DF_V2DF_PTEST,
21279 V4SF_FTYPE_V4SF_VEC_MERGE,
21288 V2DF_FTYPE_V2DF_VEC_MERGE,
21299 V16QI_FTYPE_V16QI_V16QI,
21300 V16QI_FTYPE_V8HI_V8HI,
21301 V8QI_FTYPE_V8QI_V8QI,
21302 V8QI_FTYPE_V4HI_V4HI,
21303 V8HI_FTYPE_V8HI_V8HI,
21304 V8HI_FTYPE_V8HI_V8HI_COUNT,
21305 V8HI_FTYPE_V16QI_V16QI,
21306 V8HI_FTYPE_V4SI_V4SI,
21307 V8HI_FTYPE_V8HI_SI_COUNT,
21308 V8SF_FTYPE_V8SF_V8SF,
21309 V8SF_FTYPE_V8SF_V8SI,
21310 V4SI_FTYPE_V4SI_V4SI,
21311 V4SI_FTYPE_V4SI_V4SI_COUNT,
21312 V4SI_FTYPE_V8HI_V8HI,
21313 V4SI_FTYPE_V4SF_V4SF,
21314 V4SI_FTYPE_V2DF_V2DF,
21315 V4SI_FTYPE_V4SI_SI_COUNT,
21316 V4HI_FTYPE_V4HI_V4HI,
21317 V4HI_FTYPE_V4HI_V4HI_COUNT,
21318 V4HI_FTYPE_V8QI_V8QI,
21319 V4HI_FTYPE_V2SI_V2SI,
21320 V4HI_FTYPE_V4HI_SI_COUNT,
21321 V4DF_FTYPE_V4DF_V4DF,
21322 V4DF_FTYPE_V4DF_V4DI,
21323 V4SF_FTYPE_V4SF_V4SF,
21324 V4SF_FTYPE_V4SF_V4SF_SWAP,
21325 V4SF_FTYPE_V4SF_V4SI,
21326 V4SF_FTYPE_V4SF_V2SI,
21327 V4SF_FTYPE_V4SF_V2DF,
21328 V4SF_FTYPE_V4SF_DI,
21329 V4SF_FTYPE_V4SF_SI,
21330 V2DI_FTYPE_V2DI_V2DI,
21331 V2DI_FTYPE_V2DI_V2DI_COUNT,
21332 V2DI_FTYPE_V16QI_V16QI,
21333 V2DI_FTYPE_V4SI_V4SI,
21334 V2DI_FTYPE_V2DI_V16QI,
21335 V2DI_FTYPE_V2DF_V2DF,
21336 V2DI_FTYPE_V2DI_SI_COUNT,
21337 V2SI_FTYPE_V2SI_V2SI,
21338 V2SI_FTYPE_V2SI_V2SI_COUNT,
21339 V2SI_FTYPE_V4HI_V4HI,
21340 V2SI_FTYPE_V2SF_V2SF,
21341 V2SI_FTYPE_V2SI_SI_COUNT,
21342 V2DF_FTYPE_V2DF_V2DF,
21343 V2DF_FTYPE_V2DF_V2DF_SWAP,
21344 V2DF_FTYPE_V2DF_V4SF,
21345 V2DF_FTYPE_V2DF_V2DI,
21346 V2DF_FTYPE_V2DF_DI,
21347 V2DF_FTYPE_V2DF_SI,
21348 V2SF_FTYPE_V2SF_V2SF,
21349 V1DI_FTYPE_V1DI_V1DI,
21350 V1DI_FTYPE_V1DI_V1DI_COUNT,
21351 V1DI_FTYPE_V8QI_V8QI,
21352 V1DI_FTYPE_V2SI_V2SI,
21353 V1DI_FTYPE_V1DI_SI_COUNT,
21354 UINT64_FTYPE_UINT64_UINT64,
21355 UINT_FTYPE_UINT_UINT,
21356 UINT_FTYPE_UINT_USHORT,
21357 UINT_FTYPE_UINT_UCHAR,
21358 UINT16_FTYPE_UINT16_INT,
21359 UINT8_FTYPE_UINT8_INT,
21360 V8HI_FTYPE_V8HI_INT,
21361 V4SI_FTYPE_V4SI_INT,
21362 V4HI_FTYPE_V4HI_INT,
21363 V8SF_FTYPE_V8SF_INT,
21364 V4SI_FTYPE_V8SI_INT,
21365 V4SF_FTYPE_V8SF_INT,
21366 V2DF_FTYPE_V4DF_INT,
21367 V4DF_FTYPE_V4DF_INT,
21368 V4SF_FTYPE_V4SF_INT,
21369 V2DI_FTYPE_V2DI_INT,
21370 V2DI2TI_FTYPE_V2DI_INT,
21371 V2DF_FTYPE_V2DF_INT,
21372 V16QI_FTYPE_V16QI_V16QI_V16QI,
21373 V8SF_FTYPE_V8SF_V8SF_V8SF,
21374 V4DF_FTYPE_V4DF_V4DF_V4DF,
21375 V4SF_FTYPE_V4SF_V4SF_V4SF,
21376 V2DF_FTYPE_V2DF_V2DF_V2DF,
21377 V16QI_FTYPE_V16QI_V16QI_INT,
21378 V8SI_FTYPE_V8SI_V8SI_INT,
21379 V8SI_FTYPE_V8SI_V4SI_INT,
21380 V8HI_FTYPE_V8HI_V8HI_INT,
21381 V8SF_FTYPE_V8SF_V8SF_INT,
21382 V8SF_FTYPE_V8SF_V4SF_INT,
21383 V4SI_FTYPE_V4SI_V4SI_INT,
21384 V4DF_FTYPE_V4DF_V4DF_INT,
21385 V4DF_FTYPE_V4DF_V2DF_INT,
21386 V4SF_FTYPE_V4SF_V4SF_INT,
21387 V2DI_FTYPE_V2DI_V2DI_INT,
21388 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21389 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21390 V2DF_FTYPE_V2DF_V2DF_INT,
21391 V2DI_FTYPE_V2DI_UINT_UINT,
21392 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21395 /* Special builtins with variable number of arguments. */
21396 static const struct builtin_description bdesc_special_args[] =
21398 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21399 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21417 /* SSE or 3DNow!A */
21418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21419 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21436 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21439 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21442 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21443 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21447 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21448 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21459 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21468 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21469 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21470 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21471 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21472 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21473 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21474 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21475 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21478 /* Builtins with variable number of arguments. */
21479 static const struct builtin_description bdesc_args[] =
21481 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21482 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21483 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21484 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21485 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21486 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21487 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21530 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21541 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21545 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21549 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21550 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21554 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21555 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21556 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21557 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21559 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21560 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21561 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21562 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21563 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21564 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21565 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21566 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21567 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21568 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21569 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21570 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21571 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21572 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21573 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21576 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21577 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21578 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21579 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21580 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21581 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21586 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21588 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21592 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21599 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21600 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21601 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21631 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21632 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21636 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21638 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21639 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21644 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21645 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21649 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21651 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21653 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21654 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21655 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21657 /* SSE MMX or 3Dnow!A */
21658 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21659 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21660 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21662 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21663 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21664 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21667 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21668 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21670 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21691 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21692 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21698 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21699 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21700 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21701 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21729 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21733 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21735 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21736 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21742 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21744 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21745 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21746 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21747 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21748 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21749 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21750 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21751 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21762 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21763 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21765 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21767 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21768 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21780 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21807 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21813 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21814 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21815 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21816 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21825 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21838 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21844 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21845 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21848 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21849 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21851 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21852 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21853 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21854 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21855 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21856 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21892 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21934 /* SSE4.1 and SSE5 */
21935 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21936 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21937 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21938 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21940 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21941 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21942 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21945 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21946 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21947 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21948 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21949 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21952 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21953 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21954 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21955 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21958 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21959 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21962 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21963 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21967 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21970 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21971 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21974 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21975 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21978 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21984 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21985 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21986 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21987 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21988 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21989 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21990 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21991 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21992 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21993 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21994 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21995 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22041 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22043 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22045 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22085 enum multi_arg_type {
22095 MULTI_ARG_3_PERMPS,
22096 MULTI_ARG_3_PERMPD,
22103 MULTI_ARG_2_DI_IMM,
22104 MULTI_ARG_2_SI_IMM,
22105 MULTI_ARG_2_HI_IMM,
22106 MULTI_ARG_2_QI_IMM,
22107 MULTI_ARG_2_SF_CMP,
22108 MULTI_ARG_2_DF_CMP,
22109 MULTI_ARG_2_DI_CMP,
22110 MULTI_ARG_2_SI_CMP,
22111 MULTI_ARG_2_HI_CMP,
22112 MULTI_ARG_2_QI_CMP,
22135 static const struct builtin_description bdesc_multi_arg[] =
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22189 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22198 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22207 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22216 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22217 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22218 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22219 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22220 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22221 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22222 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22223 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22224 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22225 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22226 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22227 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22228 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22230 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22231 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22232 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22233 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22234 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22235 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22236 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22329 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22346 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22363 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22373 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22374 in the current target ISA to allow the user to compile particular modules
22375 with different target specific options that differ from the command line
22378 ix86_init_mmx_sse_builtins (void)
22380 const struct builtin_description * d;
22383 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22384 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22385 tree V1DI_type_node
22386 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22387 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22388 tree V2DI_type_node
22389 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22390 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22391 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22392 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22393 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22394 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22395 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22397 tree pchar_type_node = build_pointer_type (char_type_node);
22398 tree pcchar_type_node
22399 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22400 tree pfloat_type_node = build_pointer_type (float_type_node);
22401 tree pcfloat_type_node
22402 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22403 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22404 tree pcv2sf_type_node
22405 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22406 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22407 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22410 tree int_ftype_v4sf_v4sf
22411 = build_function_type_list (integer_type_node,
22412 V4SF_type_node, V4SF_type_node, NULL_TREE);
22413 tree v4si_ftype_v4sf_v4sf
22414 = build_function_type_list (V4SI_type_node,
22415 V4SF_type_node, V4SF_type_node, NULL_TREE);
22416 /* MMX/SSE/integer conversions. */
22417 tree int_ftype_v4sf
22418 = build_function_type_list (integer_type_node,
22419 V4SF_type_node, NULL_TREE);
22420 tree int64_ftype_v4sf
22421 = build_function_type_list (long_long_integer_type_node,
22422 V4SF_type_node, NULL_TREE);
22423 tree int_ftype_v8qi
22424 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22425 tree v4sf_ftype_v4sf_int
22426 = build_function_type_list (V4SF_type_node,
22427 V4SF_type_node, integer_type_node, NULL_TREE);
22428 tree v4sf_ftype_v4sf_int64
22429 = build_function_type_list (V4SF_type_node,
22430 V4SF_type_node, long_long_integer_type_node,
22432 tree v4sf_ftype_v4sf_v2si
22433 = build_function_type_list (V4SF_type_node,
22434 V4SF_type_node, V2SI_type_node, NULL_TREE);
22436 /* Miscellaneous. */
22437 tree v8qi_ftype_v4hi_v4hi
22438 = build_function_type_list (V8QI_type_node,
22439 V4HI_type_node, V4HI_type_node, NULL_TREE);
22440 tree v4hi_ftype_v2si_v2si
22441 = build_function_type_list (V4HI_type_node,
22442 V2SI_type_node, V2SI_type_node, NULL_TREE);
22443 tree v4sf_ftype_v4sf_v4sf_int
22444 = build_function_type_list (V4SF_type_node,
22445 V4SF_type_node, V4SF_type_node,
22446 integer_type_node, NULL_TREE);
22447 tree v2si_ftype_v4hi_v4hi
22448 = build_function_type_list (V2SI_type_node,
22449 V4HI_type_node, V4HI_type_node, NULL_TREE);
22450 tree v4hi_ftype_v4hi_int
22451 = build_function_type_list (V4HI_type_node,
22452 V4HI_type_node, integer_type_node, NULL_TREE);
22453 tree v2si_ftype_v2si_int
22454 = build_function_type_list (V2SI_type_node,
22455 V2SI_type_node, integer_type_node, NULL_TREE);
22456 tree v1di_ftype_v1di_int
22457 = build_function_type_list (V1DI_type_node,
22458 V1DI_type_node, integer_type_node, NULL_TREE);
22460 tree void_ftype_void
22461 = build_function_type (void_type_node, void_list_node);
22462 tree void_ftype_unsigned
22463 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22464 tree void_ftype_unsigned_unsigned
22465 = build_function_type_list (void_type_node, unsigned_type_node,
22466 unsigned_type_node, NULL_TREE);
22467 tree void_ftype_pcvoid_unsigned_unsigned
22468 = build_function_type_list (void_type_node, const_ptr_type_node,
22469 unsigned_type_node, unsigned_type_node,
22471 tree unsigned_ftype_void
22472 = build_function_type (unsigned_type_node, void_list_node);
22473 tree v2si_ftype_v4sf
22474 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22475 /* Loads/stores. */
22476 tree void_ftype_v8qi_v8qi_pchar
22477 = build_function_type_list (void_type_node,
22478 V8QI_type_node, V8QI_type_node,
22479 pchar_type_node, NULL_TREE);
22480 tree v4sf_ftype_pcfloat
22481 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22482 tree v4sf_ftype_v4sf_pcv2sf
22483 = build_function_type_list (V4SF_type_node,
22484 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22485 tree void_ftype_pv2sf_v4sf
22486 = build_function_type_list (void_type_node,
22487 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22488 tree void_ftype_pfloat_v4sf
22489 = build_function_type_list (void_type_node,
22490 pfloat_type_node, V4SF_type_node, NULL_TREE);
22491 tree void_ftype_pdi_di
22492 = build_function_type_list (void_type_node,
22493 pdi_type_node, long_long_unsigned_type_node,
22495 tree void_ftype_pv2di_v2di
22496 = build_function_type_list (void_type_node,
22497 pv2di_type_node, V2DI_type_node, NULL_TREE);
22498 /* Normal vector unops. */
22499 tree v4sf_ftype_v4sf
22500 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22501 tree v16qi_ftype_v16qi
22502 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22503 tree v8hi_ftype_v8hi
22504 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22505 tree v4si_ftype_v4si
22506 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22507 tree v8qi_ftype_v8qi
22508 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22509 tree v4hi_ftype_v4hi
22510 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22512 /* Normal vector binops. */
22513 tree v4sf_ftype_v4sf_v4sf
22514 = build_function_type_list (V4SF_type_node,
22515 V4SF_type_node, V4SF_type_node, NULL_TREE);
22516 tree v8qi_ftype_v8qi_v8qi
22517 = build_function_type_list (V8QI_type_node,
22518 V8QI_type_node, V8QI_type_node, NULL_TREE);
22519 tree v4hi_ftype_v4hi_v4hi
22520 = build_function_type_list (V4HI_type_node,
22521 V4HI_type_node, V4HI_type_node, NULL_TREE);
22522 tree v2si_ftype_v2si_v2si
22523 = build_function_type_list (V2SI_type_node,
22524 V2SI_type_node, V2SI_type_node, NULL_TREE);
22525 tree v1di_ftype_v1di_v1di
22526 = build_function_type_list (V1DI_type_node,
22527 V1DI_type_node, V1DI_type_node, NULL_TREE);
22528 tree v1di_ftype_v1di_v1di_int
22529 = build_function_type_list (V1DI_type_node,
22530 V1DI_type_node, V1DI_type_node,
22531 integer_type_node, NULL_TREE);
22532 tree v2si_ftype_v2sf
22533 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22534 tree v2sf_ftype_v2si
22535 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22536 tree v2si_ftype_v2si
22537 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22538 tree v2sf_ftype_v2sf
22539 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22540 tree v2sf_ftype_v2sf_v2sf
22541 = build_function_type_list (V2SF_type_node,
22542 V2SF_type_node, V2SF_type_node, NULL_TREE);
22543 tree v2si_ftype_v2sf_v2sf
22544 = build_function_type_list (V2SI_type_node,
22545 V2SF_type_node, V2SF_type_node, NULL_TREE);
22546 tree pint_type_node = build_pointer_type (integer_type_node);
22547 tree pdouble_type_node = build_pointer_type (double_type_node);
22548 tree pcdouble_type_node = build_pointer_type (
22549 build_type_variant (double_type_node, 1, 0));
22550 tree int_ftype_v2df_v2df
22551 = build_function_type_list (integer_type_node,
22552 V2DF_type_node, V2DF_type_node, NULL_TREE);
22554 tree void_ftype_pcvoid
22555 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22556 tree v4sf_ftype_v4si
22557 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22558 tree v4si_ftype_v4sf
22559 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22560 tree v2df_ftype_v4si
22561 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22562 tree v4si_ftype_v2df
22563 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22564 tree v4si_ftype_v2df_v2df
22565 = build_function_type_list (V4SI_type_node,
22566 V2DF_type_node, V2DF_type_node, NULL_TREE);
22567 tree v2si_ftype_v2df
22568 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22569 tree v4sf_ftype_v2df
22570 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22571 tree v2df_ftype_v2si
22572 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22573 tree v2df_ftype_v4sf
22574 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22575 tree int_ftype_v2df
22576 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22577 tree int64_ftype_v2df
22578 = build_function_type_list (long_long_integer_type_node,
22579 V2DF_type_node, NULL_TREE);
22580 tree v2df_ftype_v2df_int
22581 = build_function_type_list (V2DF_type_node,
22582 V2DF_type_node, integer_type_node, NULL_TREE);
22583 tree v2df_ftype_v2df_int64
22584 = build_function_type_list (V2DF_type_node,
22585 V2DF_type_node, long_long_integer_type_node,
22587 tree v4sf_ftype_v4sf_v2df
22588 = build_function_type_list (V4SF_type_node,
22589 V4SF_type_node, V2DF_type_node, NULL_TREE);
22590 tree v2df_ftype_v2df_v4sf
22591 = build_function_type_list (V2DF_type_node,
22592 V2DF_type_node, V4SF_type_node, NULL_TREE);
22593 tree v2df_ftype_v2df_v2df_int
22594 = build_function_type_list (V2DF_type_node,
22595 V2DF_type_node, V2DF_type_node,
22598 tree v2df_ftype_v2df_pcdouble
22599 = build_function_type_list (V2DF_type_node,
22600 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22601 tree void_ftype_pdouble_v2df
22602 = build_function_type_list (void_type_node,
22603 pdouble_type_node, V2DF_type_node, NULL_TREE);
22604 tree void_ftype_pint_int
22605 = build_function_type_list (void_type_node,
22606 pint_type_node, integer_type_node, NULL_TREE);
22607 tree void_ftype_v16qi_v16qi_pchar
22608 = build_function_type_list (void_type_node,
22609 V16QI_type_node, V16QI_type_node,
22610 pchar_type_node, NULL_TREE);
22611 tree v2df_ftype_pcdouble
22612 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22613 tree v2df_ftype_v2df_v2df
22614 = build_function_type_list (V2DF_type_node,
22615 V2DF_type_node, V2DF_type_node, NULL_TREE);
22616 tree v16qi_ftype_v16qi_v16qi
22617 = build_function_type_list (V16QI_type_node,
22618 V16QI_type_node, V16QI_type_node, NULL_TREE);
22619 tree v8hi_ftype_v8hi_v8hi
22620 = build_function_type_list (V8HI_type_node,
22621 V8HI_type_node, V8HI_type_node, NULL_TREE);
22622 tree v4si_ftype_v4si_v4si
22623 = build_function_type_list (V4SI_type_node,
22624 V4SI_type_node, V4SI_type_node, NULL_TREE);
22625 tree v2di_ftype_v2di_v2di
22626 = build_function_type_list (V2DI_type_node,
22627 V2DI_type_node, V2DI_type_node, NULL_TREE);
22628 tree v2di_ftype_v2df_v2df
22629 = build_function_type_list (V2DI_type_node,
22630 V2DF_type_node, V2DF_type_node, NULL_TREE);
22631 tree v2df_ftype_v2df
22632 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22633 tree v2di_ftype_v2di_int
22634 = build_function_type_list (V2DI_type_node,
22635 V2DI_type_node, integer_type_node, NULL_TREE);
22636 tree v2di_ftype_v2di_v2di_int
22637 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22638 V2DI_type_node, integer_type_node, NULL_TREE);
22639 tree v4si_ftype_v4si_int
22640 = build_function_type_list (V4SI_type_node,
22641 V4SI_type_node, integer_type_node, NULL_TREE);
22642 tree v8hi_ftype_v8hi_int
22643 = build_function_type_list (V8HI_type_node,
22644 V8HI_type_node, integer_type_node, NULL_TREE);
22645 tree v4si_ftype_v8hi_v8hi
22646 = build_function_type_list (V4SI_type_node,
22647 V8HI_type_node, V8HI_type_node, NULL_TREE);
22648 tree v1di_ftype_v8qi_v8qi
22649 = build_function_type_list (V1DI_type_node,
22650 V8QI_type_node, V8QI_type_node, NULL_TREE);
22651 tree v1di_ftype_v2si_v2si
22652 = build_function_type_list (V1DI_type_node,
22653 V2SI_type_node, V2SI_type_node, NULL_TREE);
22654 tree v2di_ftype_v16qi_v16qi
22655 = build_function_type_list (V2DI_type_node,
22656 V16QI_type_node, V16QI_type_node, NULL_TREE);
22657 tree v2di_ftype_v4si_v4si
22658 = build_function_type_list (V2DI_type_node,
22659 V4SI_type_node, V4SI_type_node, NULL_TREE);
22660 tree int_ftype_v16qi
22661 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22662 tree v16qi_ftype_pcchar
22663 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22664 tree void_ftype_pchar_v16qi
22665 = build_function_type_list (void_type_node,
22666 pchar_type_node, V16QI_type_node, NULL_TREE);
22668 tree v2di_ftype_v2di_unsigned_unsigned
22669 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22670 unsigned_type_node, unsigned_type_node,
22672 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22673 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22674 unsigned_type_node, unsigned_type_node,
22676 tree v2di_ftype_v2di_v16qi
22677 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22679 tree v2df_ftype_v2df_v2df_v2df
22680 = build_function_type_list (V2DF_type_node,
22681 V2DF_type_node, V2DF_type_node,
22682 V2DF_type_node, NULL_TREE);
22683 tree v4sf_ftype_v4sf_v4sf_v4sf
22684 = build_function_type_list (V4SF_type_node,
22685 V4SF_type_node, V4SF_type_node,
22686 V4SF_type_node, NULL_TREE);
22687 tree v8hi_ftype_v16qi
22688 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22690 tree v4si_ftype_v16qi
22691 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22693 tree v2di_ftype_v16qi
22694 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22696 tree v4si_ftype_v8hi
22697 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22699 tree v2di_ftype_v8hi
22700 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22702 tree v2di_ftype_v4si
22703 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22705 tree v2di_ftype_pv2di
22706 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22708 tree v16qi_ftype_v16qi_v16qi_int
22709 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22710 V16QI_type_node, integer_type_node,
22712 tree v16qi_ftype_v16qi_v16qi_v16qi
22713 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22714 V16QI_type_node, V16QI_type_node,
22716 tree v8hi_ftype_v8hi_v8hi_int
22717 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22718 V8HI_type_node, integer_type_node,
22720 tree v4si_ftype_v4si_v4si_int
22721 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22722 V4SI_type_node, integer_type_node,
22724 tree int_ftype_v2di_v2di
22725 = build_function_type_list (integer_type_node,
22726 V2DI_type_node, V2DI_type_node,
22728 tree int_ftype_v16qi_int_v16qi_int_int
22729 = build_function_type_list (integer_type_node,
22736 tree v16qi_ftype_v16qi_int_v16qi_int_int
22737 = build_function_type_list (V16QI_type_node,
22744 tree int_ftype_v16qi_v16qi_int
22745 = build_function_type_list (integer_type_node,
22751 /* SSE5 instructions */
22752 tree v2di_ftype_v2di_v2di_v2di
22753 = build_function_type_list (V2DI_type_node,
22759 tree v4si_ftype_v4si_v4si_v4si
22760 = build_function_type_list (V4SI_type_node,
22766 tree v4si_ftype_v4si_v4si_v2di
22767 = build_function_type_list (V4SI_type_node,
22773 tree v8hi_ftype_v8hi_v8hi_v8hi
22774 = build_function_type_list (V8HI_type_node,
22780 tree v8hi_ftype_v8hi_v8hi_v4si
22781 = build_function_type_list (V8HI_type_node,
22787 tree v2df_ftype_v2df_v2df_v16qi
22788 = build_function_type_list (V2DF_type_node,
22794 tree v4sf_ftype_v4sf_v4sf_v16qi
22795 = build_function_type_list (V4SF_type_node,
22801 tree v2di_ftype_v2di_si
22802 = build_function_type_list (V2DI_type_node,
22807 tree v4si_ftype_v4si_si
22808 = build_function_type_list (V4SI_type_node,
22813 tree v8hi_ftype_v8hi_si
22814 = build_function_type_list (V8HI_type_node,
22819 tree v16qi_ftype_v16qi_si
22820 = build_function_type_list (V16QI_type_node,
22824 tree v4sf_ftype_v4hi
22825 = build_function_type_list (V4SF_type_node,
22829 tree v4hi_ftype_v4sf
22830 = build_function_type_list (V4HI_type_node,
22834 tree v2di_ftype_v2di
22835 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22837 tree v16qi_ftype_v8hi_v8hi
22838 = build_function_type_list (V16QI_type_node,
22839 V8HI_type_node, V8HI_type_node,
22841 tree v8hi_ftype_v4si_v4si
22842 = build_function_type_list (V8HI_type_node,
22843 V4SI_type_node, V4SI_type_node,
22845 tree v8hi_ftype_v16qi_v16qi
22846 = build_function_type_list (V8HI_type_node,
22847 V16QI_type_node, V16QI_type_node,
22849 tree v4hi_ftype_v8qi_v8qi
22850 = build_function_type_list (V4HI_type_node,
22851 V8QI_type_node, V8QI_type_node,
22853 tree unsigned_ftype_unsigned_uchar
22854 = build_function_type_list (unsigned_type_node,
22855 unsigned_type_node,
22856 unsigned_char_type_node,
22858 tree unsigned_ftype_unsigned_ushort
22859 = build_function_type_list (unsigned_type_node,
22860 unsigned_type_node,
22861 short_unsigned_type_node,
22863 tree unsigned_ftype_unsigned_unsigned
22864 = build_function_type_list (unsigned_type_node,
22865 unsigned_type_node,
22866 unsigned_type_node,
22868 tree uint64_ftype_uint64_uint64
22869 = build_function_type_list (long_long_unsigned_type_node,
22870 long_long_unsigned_type_node,
22871 long_long_unsigned_type_node,
22873 tree float_ftype_float
22874 = build_function_type_list (float_type_node,
22879 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22881 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22883 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22885 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22887 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22889 tree v8sf_ftype_v8sf
22890 = build_function_type_list (V8SF_type_node,
22893 tree v8si_ftype_v8sf
22894 = build_function_type_list (V8SI_type_node,
22897 tree v8sf_ftype_v8si
22898 = build_function_type_list (V8SF_type_node,
22901 tree v4si_ftype_v4df
22902 = build_function_type_list (V4SI_type_node,
22905 tree v4df_ftype_v4df
22906 = build_function_type_list (V4DF_type_node,
22909 tree v4df_ftype_v4si
22910 = build_function_type_list (V4DF_type_node,
22913 tree v4df_ftype_v4sf
22914 = build_function_type_list (V4DF_type_node,
22917 tree v4sf_ftype_v4df
22918 = build_function_type_list (V4SF_type_node,
22921 tree v8sf_ftype_v8sf_v8sf
22922 = build_function_type_list (V8SF_type_node,
22923 V8SF_type_node, V8SF_type_node,
22925 tree v4df_ftype_v4df_v4df
22926 = build_function_type_list (V4DF_type_node,
22927 V4DF_type_node, V4DF_type_node,
22929 tree v8sf_ftype_v8sf_int
22930 = build_function_type_list (V8SF_type_node,
22931 V8SF_type_node, integer_type_node,
22933 tree v4si_ftype_v8si_int
22934 = build_function_type_list (V4SI_type_node,
22935 V8SI_type_node, integer_type_node,
22937 tree v4df_ftype_v4df_int
22938 = build_function_type_list (V4DF_type_node,
22939 V4DF_type_node, integer_type_node,
22941 tree v4sf_ftype_v8sf_int
22942 = build_function_type_list (V4SF_type_node,
22943 V8SF_type_node, integer_type_node,
22945 tree v2df_ftype_v4df_int
22946 = build_function_type_list (V2DF_type_node,
22947 V4DF_type_node, integer_type_node,
22949 tree v8sf_ftype_v8sf_v8sf_int
22950 = build_function_type_list (V8SF_type_node,
22951 V8SF_type_node, V8SF_type_node,
22954 tree v8sf_ftype_v8sf_v8sf_v8sf
22955 = build_function_type_list (V8SF_type_node,
22956 V8SF_type_node, V8SF_type_node,
22959 tree v4df_ftype_v4df_v4df_v4df
22960 = build_function_type_list (V4DF_type_node,
22961 V4DF_type_node, V4DF_type_node,
22964 tree v8si_ftype_v8si_v8si_int
22965 = build_function_type_list (V8SI_type_node,
22966 V8SI_type_node, V8SI_type_node,
22969 tree v4df_ftype_v4df_v4df_int
22970 = build_function_type_list (V4DF_type_node,
22971 V4DF_type_node, V4DF_type_node,
22974 tree v8sf_ftype_pcfloat
22975 = build_function_type_list (V8SF_type_node,
22978 tree v4df_ftype_pcdouble
22979 = build_function_type_list (V4DF_type_node,
22980 pcdouble_type_node,
22982 tree pcv4sf_type_node
22983 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22984 tree pcv2df_type_node
22985 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22986 tree v8sf_ftype_pcv4sf
22987 = build_function_type_list (V8SF_type_node,
22990 tree v4df_ftype_pcv2df
22991 = build_function_type_list (V4DF_type_node,
22994 tree v32qi_ftype_pcchar
22995 = build_function_type_list (V32QI_type_node,
22998 tree void_ftype_pchar_v32qi
22999 = build_function_type_list (void_type_node,
23000 pchar_type_node, V32QI_type_node,
23002 tree v8si_ftype_v8si_v4si_int
23003 = build_function_type_list (V8SI_type_node,
23004 V8SI_type_node, V4SI_type_node,
23007 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23008 tree void_ftype_pv4di_v4di
23009 = build_function_type_list (void_type_node,
23010 pv4di_type_node, V4DI_type_node,
23012 tree v8sf_ftype_v8sf_v4sf_int
23013 = build_function_type_list (V8SF_type_node,
23014 V8SF_type_node, V4SF_type_node,
23017 tree v4df_ftype_v4df_v2df_int
23018 = build_function_type_list (V4DF_type_node,
23019 V4DF_type_node, V2DF_type_node,
23022 tree void_ftype_pfloat_v8sf
23023 = build_function_type_list (void_type_node,
23024 pfloat_type_node, V8SF_type_node,
23026 tree void_ftype_pdouble_v4df
23027 = build_function_type_list (void_type_node,
23028 pdouble_type_node, V4DF_type_node,
23030 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23031 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23032 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23033 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23034 tree pcv8sf_type_node
23035 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23036 tree pcv4df_type_node
23037 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23038 tree v8sf_ftype_pcv8sf_v8sf
23039 = build_function_type_list (V8SF_type_node,
23040 pcv8sf_type_node, V8SF_type_node,
23042 tree v4df_ftype_pcv4df_v4df
23043 = build_function_type_list (V4DF_type_node,
23044 pcv4df_type_node, V4DF_type_node,
23046 tree v4sf_ftype_pcv4sf_v4sf
23047 = build_function_type_list (V4SF_type_node,
23048 pcv4sf_type_node, V4SF_type_node,
23050 tree v2df_ftype_pcv2df_v2df
23051 = build_function_type_list (V2DF_type_node,
23052 pcv2df_type_node, V2DF_type_node,
23054 tree void_ftype_pv8sf_v8sf_v8sf
23055 = build_function_type_list (void_type_node,
23056 pv8sf_type_node, V8SF_type_node,
23059 tree void_ftype_pv4df_v4df_v4df
23060 = build_function_type_list (void_type_node,
23061 pv4df_type_node, V4DF_type_node,
23064 tree void_ftype_pv4sf_v4sf_v4sf
23065 = build_function_type_list (void_type_node,
23066 pv4sf_type_node, V4SF_type_node,
23069 tree void_ftype_pv2df_v2df_v2df
23070 = build_function_type_list (void_type_node,
23071 pv2df_type_node, V2DF_type_node,
23074 tree v4df_ftype_v2df
23075 = build_function_type_list (V4DF_type_node,
23078 tree v8sf_ftype_v4sf
23079 = build_function_type_list (V8SF_type_node,
23082 tree v8si_ftype_v4si
23083 = build_function_type_list (V8SI_type_node,
23086 tree v2df_ftype_v4df
23087 = build_function_type_list (V2DF_type_node,
23090 tree v4sf_ftype_v8sf
23091 = build_function_type_list (V4SF_type_node,
23094 tree v4si_ftype_v8si
23095 = build_function_type_list (V4SI_type_node,
23098 tree int_ftype_v4df
23099 = build_function_type_list (integer_type_node,
23102 tree int_ftype_v8sf
23103 = build_function_type_list (integer_type_node,
23106 tree int_ftype_v8sf_v8sf
23107 = build_function_type_list (integer_type_node,
23108 V8SF_type_node, V8SF_type_node,
23110 tree int_ftype_v4di_v4di
23111 = build_function_type_list (integer_type_node,
23112 V4DI_type_node, V4DI_type_node,
23114 tree int_ftype_v4df_v4df
23115 = build_function_type_list (integer_type_node,
23116 V4DF_type_node, V4DF_type_node,
23118 tree v8sf_ftype_v8sf_v8si
23119 = build_function_type_list (V8SF_type_node,
23120 V8SF_type_node, V8SI_type_node,
23122 tree v4df_ftype_v4df_v4di
23123 = build_function_type_list (V4DF_type_node,
23124 V4DF_type_node, V4DI_type_node,
23126 tree v4sf_ftype_v4sf_v4si
23127 = build_function_type_list (V4SF_type_node,
23128 V4SF_type_node, V4SI_type_node, NULL_TREE);
23129 tree v2df_ftype_v2df_v2di
23130 = build_function_type_list (V2DF_type_node,
23131 V2DF_type_node, V2DI_type_node, NULL_TREE);
23133 /* Integer intrinsics. */
23134 tree uint64_ftype_void
23135 = build_function_type (long_long_unsigned_type_node,
23138 = build_function_type_list (integer_type_node,
23139 integer_type_node, NULL_TREE);
23140 tree int64_ftype_int64
23141 = build_function_type_list (long_long_integer_type_node,
23142 long_long_integer_type_node,
23144 tree uint64_ftype_int
23145 = build_function_type_list (long_long_unsigned_type_node,
23146 integer_type_node, NULL_TREE);
23147 tree punsigned_type_node = build_pointer_type (unsigned_type_node);
23148 tree uint64_ftype_punsigned
23149 = build_function_type_list (long_long_unsigned_type_node,
23150 punsigned_type_node, NULL_TREE);
23151 tree ushort_ftype_ushort_int
23152 = build_function_type_list (short_unsigned_type_node,
23153 short_unsigned_type_node,
23156 tree uchar_ftype_uchar_int
23157 = build_function_type_list (unsigned_char_type_node,
23158 unsigned_char_type_node,
23164 /* Add all special builtins with variable number of operands. */
23165 for (i = 0, d = bdesc_special_args;
23166 i < ARRAY_SIZE (bdesc_special_args);
23174 switch ((enum ix86_special_builtin_type) d->flag)
23176 case VOID_FTYPE_VOID:
23177 type = void_ftype_void;
23179 case UINT64_FTYPE_VOID:
23180 type = uint64_ftype_void;
23182 case UINT64_FTYPE_PUNSIGNED:
23183 type = uint64_ftype_punsigned;
23185 case V32QI_FTYPE_PCCHAR:
23186 type = v32qi_ftype_pcchar;
23188 case V16QI_FTYPE_PCCHAR:
23189 type = v16qi_ftype_pcchar;
23191 case V8SF_FTYPE_PCV4SF:
23192 type = v8sf_ftype_pcv4sf;
23194 case V8SF_FTYPE_PCFLOAT:
23195 type = v8sf_ftype_pcfloat;
23197 case V4DF_FTYPE_PCV2DF:
23198 type = v4df_ftype_pcv2df;
23200 case V4DF_FTYPE_PCDOUBLE:
23201 type = v4df_ftype_pcdouble;
23203 case V4SF_FTYPE_PCFLOAT:
23204 type = v4sf_ftype_pcfloat;
23206 case V2DI_FTYPE_PV2DI:
23207 type = v2di_ftype_pv2di;
23209 case V2DF_FTYPE_PCDOUBLE:
23210 type = v2df_ftype_pcdouble;
23212 case V8SF_FTYPE_PCV8SF_V8SF:
23213 type = v8sf_ftype_pcv8sf_v8sf;
23215 case V4DF_FTYPE_PCV4DF_V4DF:
23216 type = v4df_ftype_pcv4df_v4df;
23218 case V4SF_FTYPE_V4SF_PCV2SF:
23219 type = v4sf_ftype_v4sf_pcv2sf;
23221 case V4SF_FTYPE_PCV4SF_V4SF:
23222 type = v4sf_ftype_pcv4sf_v4sf;
23224 case V2DF_FTYPE_V2DF_PCDOUBLE:
23225 type = v2df_ftype_v2df_pcdouble;
23227 case V2DF_FTYPE_PCV2DF_V2DF:
23228 type = v2df_ftype_pcv2df_v2df;
23230 case VOID_FTYPE_PV2SF_V4SF:
23231 type = void_ftype_pv2sf_v4sf;
23233 case VOID_FTYPE_PV4DI_V4DI:
23234 type = void_ftype_pv4di_v4di;
23236 case VOID_FTYPE_PV2DI_V2DI:
23237 type = void_ftype_pv2di_v2di;
23239 case VOID_FTYPE_PCHAR_V32QI:
23240 type = void_ftype_pchar_v32qi;
23242 case VOID_FTYPE_PCHAR_V16QI:
23243 type = void_ftype_pchar_v16qi;
23245 case VOID_FTYPE_PFLOAT_V8SF:
23246 type = void_ftype_pfloat_v8sf;
23248 case VOID_FTYPE_PFLOAT_V4SF:
23249 type = void_ftype_pfloat_v4sf;
23251 case VOID_FTYPE_PDOUBLE_V4DF:
23252 type = void_ftype_pdouble_v4df;
23254 case VOID_FTYPE_PDOUBLE_V2DF:
23255 type = void_ftype_pdouble_v2df;
23257 case VOID_FTYPE_PDI_DI:
23258 type = void_ftype_pdi_di;
23260 case VOID_FTYPE_PINT_INT:
23261 type = void_ftype_pint_int;
23263 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23264 type = void_ftype_pv8sf_v8sf_v8sf;
23266 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23267 type = void_ftype_pv4df_v4df_v4df;
23269 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23270 type = void_ftype_pv4sf_v4sf_v4sf;
23272 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23273 type = void_ftype_pv2df_v2df_v2df;
23276 gcc_unreachable ();
23279 def_builtin (d->mask, d->name, type, d->code);
23282 /* Add all builtins with variable number of operands. */
23283 for (i = 0, d = bdesc_args;
23284 i < ARRAY_SIZE (bdesc_args);
23292 switch ((enum ix86_builtin_type) d->flag)
23294 case FLOAT_FTYPE_FLOAT:
23295 type = float_ftype_float;
23297 case INT_FTYPE_V8SF_V8SF_PTEST:
23298 type = int_ftype_v8sf_v8sf;
23300 case INT_FTYPE_V4DI_V4DI_PTEST:
23301 type = int_ftype_v4di_v4di;
23303 case INT_FTYPE_V4DF_V4DF_PTEST:
23304 type = int_ftype_v4df_v4df;
23306 case INT_FTYPE_V4SF_V4SF_PTEST:
23307 type = int_ftype_v4sf_v4sf;
23309 case INT_FTYPE_V2DI_V2DI_PTEST:
23310 type = int_ftype_v2di_v2di;
23312 case INT_FTYPE_V2DF_V2DF_PTEST:
23313 type = int_ftype_v2df_v2df;
23315 case INT_FTYPE_INT:
23316 type = int_ftype_int;
23318 case UINT64_FTYPE_INT:
23319 type = uint64_ftype_int;
23321 case INT64_FTYPE_INT64:
23322 type = int64_ftype_int64;
23324 case INT64_FTYPE_V4SF:
23325 type = int64_ftype_v4sf;
23327 case INT64_FTYPE_V2DF:
23328 type = int64_ftype_v2df;
23330 case INT_FTYPE_V16QI:
23331 type = int_ftype_v16qi;
23333 case INT_FTYPE_V8QI:
23334 type = int_ftype_v8qi;
23336 case INT_FTYPE_V8SF:
23337 type = int_ftype_v8sf;
23339 case INT_FTYPE_V4DF:
23340 type = int_ftype_v4df;
23342 case INT_FTYPE_V4SF:
23343 type = int_ftype_v4sf;
23345 case INT_FTYPE_V2DF:
23346 type = int_ftype_v2df;
23348 case V16QI_FTYPE_V16QI:
23349 type = v16qi_ftype_v16qi;
23351 case V8SI_FTYPE_V8SF:
23352 type = v8si_ftype_v8sf;
23354 case V8SI_FTYPE_V4SI:
23355 type = v8si_ftype_v4si;
23357 case V8HI_FTYPE_V8HI:
23358 type = v8hi_ftype_v8hi;
23360 case V8HI_FTYPE_V16QI:
23361 type = v8hi_ftype_v16qi;
23363 case V8QI_FTYPE_V8QI:
23364 type = v8qi_ftype_v8qi;
23366 case V8SF_FTYPE_V8SF:
23367 type = v8sf_ftype_v8sf;
23369 case V8SF_FTYPE_V8SI:
23370 type = v8sf_ftype_v8si;
23372 case V8SF_FTYPE_V4SF:
23373 type = v8sf_ftype_v4sf;
23375 case V4SI_FTYPE_V4DF:
23376 type = v4si_ftype_v4df;
23378 case V4SI_FTYPE_V4SI:
23379 type = v4si_ftype_v4si;
23381 case V4SI_FTYPE_V16QI:
23382 type = v4si_ftype_v16qi;
23384 case V4SI_FTYPE_V8SI:
23385 type = v4si_ftype_v8si;
23387 case V4SI_FTYPE_V8HI:
23388 type = v4si_ftype_v8hi;
23390 case V4SI_FTYPE_V4SF:
23391 type = v4si_ftype_v4sf;
23393 case V4SI_FTYPE_V2DF:
23394 type = v4si_ftype_v2df;
23396 case V4HI_FTYPE_V4HI:
23397 type = v4hi_ftype_v4hi;
23399 case V4DF_FTYPE_V4DF:
23400 type = v4df_ftype_v4df;
23402 case V4DF_FTYPE_V4SI:
23403 type = v4df_ftype_v4si;
23405 case V4DF_FTYPE_V4SF:
23406 type = v4df_ftype_v4sf;
23408 case V4DF_FTYPE_V2DF:
23409 type = v4df_ftype_v2df;
23411 case V4SF_FTYPE_V4SF:
23412 case V4SF_FTYPE_V4SF_VEC_MERGE:
23413 type = v4sf_ftype_v4sf;
23415 case V4SF_FTYPE_V8SF:
23416 type = v4sf_ftype_v8sf;
23418 case V4SF_FTYPE_V4SI:
23419 type = v4sf_ftype_v4si;
23421 case V4SF_FTYPE_V4DF:
23422 type = v4sf_ftype_v4df;
23424 case V4SF_FTYPE_V2DF:
23425 type = v4sf_ftype_v2df;
23427 case V2DI_FTYPE_V2DI:
23428 type = v2di_ftype_v2di;
23430 case V2DI_FTYPE_V16QI:
23431 type = v2di_ftype_v16qi;
23433 case V2DI_FTYPE_V8HI:
23434 type = v2di_ftype_v8hi;
23436 case V2DI_FTYPE_V4SI:
23437 type = v2di_ftype_v4si;
23439 case V2SI_FTYPE_V2SI:
23440 type = v2si_ftype_v2si;
23442 case V2SI_FTYPE_V4SF:
23443 type = v2si_ftype_v4sf;
23445 case V2SI_FTYPE_V2DF:
23446 type = v2si_ftype_v2df;
23448 case V2SI_FTYPE_V2SF:
23449 type = v2si_ftype_v2sf;
23451 case V2DF_FTYPE_V4DF:
23452 type = v2df_ftype_v4df;
23454 case V2DF_FTYPE_V4SF:
23455 type = v2df_ftype_v4sf;
23457 case V2DF_FTYPE_V2DF:
23458 case V2DF_FTYPE_V2DF_VEC_MERGE:
23459 type = v2df_ftype_v2df;
23461 case V2DF_FTYPE_V2SI:
23462 type = v2df_ftype_v2si;
23464 case V2DF_FTYPE_V4SI:
23465 type = v2df_ftype_v4si;
23467 case V2SF_FTYPE_V2SF:
23468 type = v2sf_ftype_v2sf;
23470 case V2SF_FTYPE_V2SI:
23471 type = v2sf_ftype_v2si;
23473 case V16QI_FTYPE_V16QI_V16QI:
23474 type = v16qi_ftype_v16qi_v16qi;
23476 case V16QI_FTYPE_V8HI_V8HI:
23477 type = v16qi_ftype_v8hi_v8hi;
23479 case V8QI_FTYPE_V8QI_V8QI:
23480 type = v8qi_ftype_v8qi_v8qi;
23482 case V8QI_FTYPE_V4HI_V4HI:
23483 type = v8qi_ftype_v4hi_v4hi;
23485 case V8HI_FTYPE_V8HI_V8HI:
23486 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23487 type = v8hi_ftype_v8hi_v8hi;
23489 case V8HI_FTYPE_V16QI_V16QI:
23490 type = v8hi_ftype_v16qi_v16qi;
23492 case V8HI_FTYPE_V4SI_V4SI:
23493 type = v8hi_ftype_v4si_v4si;
23495 case V8HI_FTYPE_V8HI_SI_COUNT:
23496 type = v8hi_ftype_v8hi_int;
23498 case V8SF_FTYPE_V8SF_V8SF:
23499 type = v8sf_ftype_v8sf_v8sf;
23501 case V8SF_FTYPE_V8SF_V8SI:
23502 type = v8sf_ftype_v8sf_v8si;
23504 case V4SI_FTYPE_V4SI_V4SI:
23505 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23506 type = v4si_ftype_v4si_v4si;
23508 case V4SI_FTYPE_V8HI_V8HI:
23509 type = v4si_ftype_v8hi_v8hi;
23511 case V4SI_FTYPE_V4SF_V4SF:
23512 type = v4si_ftype_v4sf_v4sf;
23514 case V4SI_FTYPE_V2DF_V2DF:
23515 type = v4si_ftype_v2df_v2df;
23517 case V4SI_FTYPE_V4SI_SI_COUNT:
23518 type = v4si_ftype_v4si_int;
23520 case V4HI_FTYPE_V4HI_V4HI:
23521 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23522 type = v4hi_ftype_v4hi_v4hi;
23524 case V4HI_FTYPE_V8QI_V8QI:
23525 type = v4hi_ftype_v8qi_v8qi;
23527 case V4HI_FTYPE_V2SI_V2SI:
23528 type = v4hi_ftype_v2si_v2si;
23530 case V4HI_FTYPE_V4HI_SI_COUNT:
23531 type = v4hi_ftype_v4hi_int;
23533 case V4DF_FTYPE_V4DF_V4DF:
23534 type = v4df_ftype_v4df_v4df;
23536 case V4DF_FTYPE_V4DF_V4DI:
23537 type = v4df_ftype_v4df_v4di;
23539 case V4SF_FTYPE_V4SF_V4SF:
23540 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23541 type = v4sf_ftype_v4sf_v4sf;
23543 case V4SF_FTYPE_V4SF_V4SI:
23544 type = v4sf_ftype_v4sf_v4si;
23546 case V4SF_FTYPE_V4SF_V2SI:
23547 type = v4sf_ftype_v4sf_v2si;
23549 case V4SF_FTYPE_V4SF_V2DF:
23550 type = v4sf_ftype_v4sf_v2df;
23552 case V4SF_FTYPE_V4SF_DI:
23553 type = v4sf_ftype_v4sf_int64;
23555 case V4SF_FTYPE_V4SF_SI:
23556 type = v4sf_ftype_v4sf_int;
23558 case V2DI_FTYPE_V2DI_V2DI:
23559 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23560 type = v2di_ftype_v2di_v2di;
23562 case V2DI_FTYPE_V16QI_V16QI:
23563 type = v2di_ftype_v16qi_v16qi;
23565 case V2DI_FTYPE_V4SI_V4SI:
23566 type = v2di_ftype_v4si_v4si;
23568 case V2DI_FTYPE_V2DI_V16QI:
23569 type = v2di_ftype_v2di_v16qi;
23571 case V2DI_FTYPE_V2DF_V2DF:
23572 type = v2di_ftype_v2df_v2df;
23574 case V2DI_FTYPE_V2DI_SI_COUNT:
23575 type = v2di_ftype_v2di_int;
23577 case V2SI_FTYPE_V2SI_V2SI:
23578 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23579 type = v2si_ftype_v2si_v2si;
23581 case V2SI_FTYPE_V4HI_V4HI:
23582 type = v2si_ftype_v4hi_v4hi;
23584 case V2SI_FTYPE_V2SF_V2SF:
23585 type = v2si_ftype_v2sf_v2sf;
23587 case V2SI_FTYPE_V2SI_SI_COUNT:
23588 type = v2si_ftype_v2si_int;
23590 case V2DF_FTYPE_V2DF_V2DF:
23591 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23592 type = v2df_ftype_v2df_v2df;
23594 case V2DF_FTYPE_V2DF_V4SF:
23595 type = v2df_ftype_v2df_v4sf;
23597 case V2DF_FTYPE_V2DF_V2DI:
23598 type = v2df_ftype_v2df_v2di;
23600 case V2DF_FTYPE_V2DF_DI:
23601 type = v2df_ftype_v2df_int64;
23603 case V2DF_FTYPE_V2DF_SI:
23604 type = v2df_ftype_v2df_int;
23606 case V2SF_FTYPE_V2SF_V2SF:
23607 type = v2sf_ftype_v2sf_v2sf;
23609 case V1DI_FTYPE_V1DI_V1DI:
23610 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23611 type = v1di_ftype_v1di_v1di;
23613 case V1DI_FTYPE_V8QI_V8QI:
23614 type = v1di_ftype_v8qi_v8qi;
23616 case V1DI_FTYPE_V2SI_V2SI:
23617 type = v1di_ftype_v2si_v2si;
23619 case V1DI_FTYPE_V1DI_SI_COUNT:
23620 type = v1di_ftype_v1di_int;
23622 case UINT64_FTYPE_UINT64_UINT64:
23623 type = uint64_ftype_uint64_uint64;
23625 case UINT_FTYPE_UINT_UINT:
23626 type = unsigned_ftype_unsigned_unsigned;
23628 case UINT_FTYPE_UINT_USHORT:
23629 type = unsigned_ftype_unsigned_ushort;
23631 case UINT_FTYPE_UINT_UCHAR:
23632 type = unsigned_ftype_unsigned_uchar;
23634 case UINT16_FTYPE_UINT16_INT:
23635 type = ushort_ftype_ushort_int;
23637 case UINT8_FTYPE_UINT8_INT:
23638 type = uchar_ftype_uchar_int;
23640 case V8HI_FTYPE_V8HI_INT:
23641 type = v8hi_ftype_v8hi_int;
23643 case V8SF_FTYPE_V8SF_INT:
23644 type = v8sf_ftype_v8sf_int;
23646 case V4SI_FTYPE_V4SI_INT:
23647 type = v4si_ftype_v4si_int;
23649 case V4SI_FTYPE_V8SI_INT:
23650 type = v4si_ftype_v8si_int;
23652 case V4HI_FTYPE_V4HI_INT:
23653 type = v4hi_ftype_v4hi_int;
23655 case V4DF_FTYPE_V4DF_INT:
23656 type = v4df_ftype_v4df_int;
23658 case V4SF_FTYPE_V4SF_INT:
23659 type = v4sf_ftype_v4sf_int;
23661 case V4SF_FTYPE_V8SF_INT:
23662 type = v4sf_ftype_v8sf_int;
23664 case V2DI_FTYPE_V2DI_INT:
23665 case V2DI2TI_FTYPE_V2DI_INT:
23666 type = v2di_ftype_v2di_int;
23668 case V2DF_FTYPE_V2DF_INT:
23669 type = v2df_ftype_v2df_int;
23671 case V2DF_FTYPE_V4DF_INT:
23672 type = v2df_ftype_v4df_int;
23674 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23675 type = v16qi_ftype_v16qi_v16qi_v16qi;
23677 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23678 type = v8sf_ftype_v8sf_v8sf_v8sf;
23680 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23681 type = v4df_ftype_v4df_v4df_v4df;
23683 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23684 type = v4sf_ftype_v4sf_v4sf_v4sf;
23686 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23687 type = v2df_ftype_v2df_v2df_v2df;
23689 case V16QI_FTYPE_V16QI_V16QI_INT:
23690 type = v16qi_ftype_v16qi_v16qi_int;
23692 case V8SI_FTYPE_V8SI_V8SI_INT:
23693 type = v8si_ftype_v8si_v8si_int;
23695 case V8SI_FTYPE_V8SI_V4SI_INT:
23696 type = v8si_ftype_v8si_v4si_int;
23698 case V8HI_FTYPE_V8HI_V8HI_INT:
23699 type = v8hi_ftype_v8hi_v8hi_int;
23701 case V8SF_FTYPE_V8SF_V8SF_INT:
23702 type = v8sf_ftype_v8sf_v8sf_int;
23704 case V8SF_FTYPE_V8SF_V4SF_INT:
23705 type = v8sf_ftype_v8sf_v4sf_int;
23707 case V4SI_FTYPE_V4SI_V4SI_INT:
23708 type = v4si_ftype_v4si_v4si_int;
23710 case V4DF_FTYPE_V4DF_V4DF_INT:
23711 type = v4df_ftype_v4df_v4df_int;
23713 case V4DF_FTYPE_V4DF_V2DF_INT:
23714 type = v4df_ftype_v4df_v2df_int;
23716 case V4SF_FTYPE_V4SF_V4SF_INT:
23717 type = v4sf_ftype_v4sf_v4sf_int;
23719 case V2DI_FTYPE_V2DI_V2DI_INT:
23720 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23721 type = v2di_ftype_v2di_v2di_int;
23723 case V2DF_FTYPE_V2DF_V2DF_INT:
23724 type = v2df_ftype_v2df_v2df_int;
23726 case V2DI_FTYPE_V2DI_UINT_UINT:
23727 type = v2di_ftype_v2di_unsigned_unsigned;
23729 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23730 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23732 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23733 type = v1di_ftype_v1di_v1di_int;
23736 gcc_unreachable ();
23739 def_builtin_const (d->mask, d->name, type, d->code);
23742 /* pcmpestr[im] insns. */
23743 for (i = 0, d = bdesc_pcmpestr;
23744 i < ARRAY_SIZE (bdesc_pcmpestr);
23747 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23748 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23750 ftype = int_ftype_v16qi_int_v16qi_int_int;
23751 def_builtin_const (d->mask, d->name, ftype, d->code);
23754 /* pcmpistr[im] insns. */
23755 for (i = 0, d = bdesc_pcmpistr;
23756 i < ARRAY_SIZE (bdesc_pcmpistr);
23759 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23760 ftype = v16qi_ftype_v16qi_v16qi_int;
23762 ftype = int_ftype_v16qi_v16qi_int;
23763 def_builtin_const (d->mask, d->name, ftype, d->code);
23766 /* comi/ucomi insns. */
23767 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23768 if (d->mask == OPTION_MASK_ISA_SSE2)
23769 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23771 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23774 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23775 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23777 /* SSE or 3DNow!A */
23778 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23781 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23783 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23784 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23787 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23788 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23791 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23792 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23793 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23794 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23795 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23796 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23799 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23802 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23803 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23805 /* Access to the vec_init patterns. */
23806 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23807 integer_type_node, NULL_TREE);
23808 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23810 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23811 short_integer_type_node,
23812 short_integer_type_node,
23813 short_integer_type_node, NULL_TREE);
23814 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23816 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23817 char_type_node, char_type_node,
23818 char_type_node, char_type_node,
23819 char_type_node, char_type_node,
23820 char_type_node, NULL_TREE);
23821 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23823 /* Access to the vec_extract patterns. */
23824 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23825 integer_type_node, NULL_TREE);
23826 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23828 ftype = build_function_type_list (long_long_integer_type_node,
23829 V2DI_type_node, integer_type_node,
23831 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23833 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23834 integer_type_node, NULL_TREE);
23835 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23837 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23838 integer_type_node, NULL_TREE);
23839 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23841 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23842 integer_type_node, NULL_TREE);
23843 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23845 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23846 integer_type_node, NULL_TREE);
23847 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23849 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23850 integer_type_node, NULL_TREE);
23851 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23853 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23854 integer_type_node, NULL_TREE);
23855 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23857 /* Access to the vec_set patterns. */
23858 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23860 integer_type_node, NULL_TREE);
23861 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23863 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23865 integer_type_node, NULL_TREE);
23866 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23868 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23870 integer_type_node, NULL_TREE);
23871 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23873 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23875 integer_type_node, NULL_TREE);
23876 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23878 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23880 integer_type_node, NULL_TREE);
23881 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23883 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23885 integer_type_node, NULL_TREE);
23886 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23888 /* Add SSE5 multi-arg argument instructions */
23889 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23891 tree mtype = NULL_TREE;
23896 switch ((enum multi_arg_type)d->flag)
23898 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23899 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23900 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23901 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23902 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23903 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23904 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23905 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23906 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23907 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23908 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23909 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23910 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23911 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23912 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23913 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23914 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23915 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23916 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23917 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23918 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23919 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23920 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23921 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23922 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23923 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23924 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23925 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23926 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23927 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23928 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23929 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23930 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23931 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23932 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23933 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23934 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23935 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23936 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23937 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23938 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23939 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23940 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23941 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23942 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23943 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23944 case MULTI_ARG_UNKNOWN:
23946 gcc_unreachable ();
23950 def_builtin_const (d->mask, d->name, mtype, d->code);
23954 /* Internal method for ix86_init_builtins. */
23957 ix86_init_builtins_va_builtins_abi (void)
23959 tree ms_va_ref, sysv_va_ref;
23960 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23961 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23962 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23963 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23967 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23968 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23969 ms_va_ref = build_reference_type (ms_va_list_type_node);
23971 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23974 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23975 fnvoid_va_start_ms =
23976 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23977 fnvoid_va_end_sysv =
23978 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23979 fnvoid_va_start_sysv =
23980 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23982 fnvoid_va_copy_ms =
23983 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23985 fnvoid_va_copy_sysv =
23986 build_function_type_list (void_type_node, sysv_va_ref,
23987 sysv_va_ref, NULL_TREE);
23989 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23990 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23991 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23992 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23993 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23994 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23995 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23996 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23997 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23998 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23999 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24000 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24004 ix86_init_builtins (void)
24006 tree float128_type_node = make_node (REAL_TYPE);
24009 /* The __float80 type. */
24010 if (TYPE_MODE (long_double_type_node) == XFmode)
24011 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24015 /* The __float80 type. */
24016 tree float80_type_node = make_node (REAL_TYPE);
24018 TYPE_PRECISION (float80_type_node) = 80;
24019 layout_type (float80_type_node);
24020 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24024 /* The __float128 type. */
24025 TYPE_PRECISION (float128_type_node) = 128;
24026 layout_type (float128_type_node);
24027 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24030 /* TFmode support builtins. */
24031 ftype = build_function_type (float128_type_node, void_list_node);
24032 decl = add_builtin_function ("__builtin_infq", ftype,
24033 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24035 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24037 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24038 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24040 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24042 /* We will expand them to normal call if SSE2 isn't available since
24043 they are used by libgcc. */
24044 ftype = build_function_type_list (float128_type_node,
24045 float128_type_node,
24047 decl = add_builtin_function ("__builtin_fabsq", ftype,
24048 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24049 "__fabstf2", NULL_TREE);
24050 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24051 TREE_READONLY (decl) = 1;
24053 ftype = build_function_type_list (float128_type_node,
24054 float128_type_node,
24055 float128_type_node,
24057 decl = add_builtin_function ("__builtin_copysignq", ftype,
24058 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24059 "__copysigntf3", NULL_TREE);
24060 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24061 TREE_READONLY (decl) = 1;
24063 ix86_init_mmx_sse_builtins ();
24065 ix86_init_builtins_va_builtins_abi ();
24068 /* Errors in the source file can cause expand_expr to return const0_rtx
24069 where we expect a vector. To avoid crashing, use one of the vector
24070 clear instructions. */
24072 safe_vector_operand (rtx x, enum machine_mode mode)
24074 if (x == const0_rtx)
24075 x = CONST0_RTX (mode);
24079 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24082 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24085 tree arg0 = CALL_EXPR_ARG (exp, 0);
24086 tree arg1 = CALL_EXPR_ARG (exp, 1);
24087 rtx op0 = expand_normal (arg0);
24088 rtx op1 = expand_normal (arg1);
24089 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24090 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24091 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24093 if (VECTOR_MODE_P (mode0))
24094 op0 = safe_vector_operand (op0, mode0);
24095 if (VECTOR_MODE_P (mode1))
24096 op1 = safe_vector_operand (op1, mode1);
24098 if (optimize || !target
24099 || GET_MODE (target) != tmode
24100 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24101 target = gen_reg_rtx (tmode);
24103 if (GET_MODE (op1) == SImode && mode1 == TImode)
24105 rtx x = gen_reg_rtx (V4SImode);
24106 emit_insn (gen_sse2_loadd (x, op1));
24107 op1 = gen_lowpart (TImode, x);
24110 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24111 op0 = copy_to_mode_reg (mode0, op0);
24112 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24113 op1 = copy_to_mode_reg (mode1, op1);
24115 pat = GEN_FCN (icode) (target, op0, op1);
24124 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24127 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24128 enum multi_arg_type m_type,
24129 enum rtx_code sub_code)
24134 bool comparison_p = false;
24136 bool last_arg_constant = false;
24137 int num_memory = 0;
24140 enum machine_mode mode;
24143 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24147 case MULTI_ARG_3_SF:
24148 case MULTI_ARG_3_DF:
24149 case MULTI_ARG_3_DI:
24150 case MULTI_ARG_3_SI:
24151 case MULTI_ARG_3_SI_DI:
24152 case MULTI_ARG_3_HI:
24153 case MULTI_ARG_3_HI_SI:
24154 case MULTI_ARG_3_QI:
24155 case MULTI_ARG_3_PERMPS:
24156 case MULTI_ARG_3_PERMPD:
24160 case MULTI_ARG_2_SF:
24161 case MULTI_ARG_2_DF:
24162 case MULTI_ARG_2_DI:
24163 case MULTI_ARG_2_SI:
24164 case MULTI_ARG_2_HI:
24165 case MULTI_ARG_2_QI:
24169 case MULTI_ARG_2_DI_IMM:
24170 case MULTI_ARG_2_SI_IMM:
24171 case MULTI_ARG_2_HI_IMM:
24172 case MULTI_ARG_2_QI_IMM:
24174 last_arg_constant = true;
24177 case MULTI_ARG_1_SF:
24178 case MULTI_ARG_1_DF:
24179 case MULTI_ARG_1_DI:
24180 case MULTI_ARG_1_SI:
24181 case MULTI_ARG_1_HI:
24182 case MULTI_ARG_1_QI:
24183 case MULTI_ARG_1_SI_DI:
24184 case MULTI_ARG_1_HI_DI:
24185 case MULTI_ARG_1_HI_SI:
24186 case MULTI_ARG_1_QI_DI:
24187 case MULTI_ARG_1_QI_SI:
24188 case MULTI_ARG_1_QI_HI:
24189 case MULTI_ARG_1_PH2PS:
24190 case MULTI_ARG_1_PS2PH:
24194 case MULTI_ARG_2_SF_CMP:
24195 case MULTI_ARG_2_DF_CMP:
24196 case MULTI_ARG_2_DI_CMP:
24197 case MULTI_ARG_2_SI_CMP:
24198 case MULTI_ARG_2_HI_CMP:
24199 case MULTI_ARG_2_QI_CMP:
24201 comparison_p = true;
24204 case MULTI_ARG_2_SF_TF:
24205 case MULTI_ARG_2_DF_TF:
24206 case MULTI_ARG_2_DI_TF:
24207 case MULTI_ARG_2_SI_TF:
24208 case MULTI_ARG_2_HI_TF:
24209 case MULTI_ARG_2_QI_TF:
24214 case MULTI_ARG_UNKNOWN:
24216 gcc_unreachable ();
24219 if (optimize || !target
24220 || GET_MODE (target) != tmode
24221 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24222 target = gen_reg_rtx (tmode);
24224 gcc_assert (nargs <= 4);
24226 for (i = 0; i < nargs; i++)
24228 tree arg = CALL_EXPR_ARG (exp, i);
24229 rtx op = expand_normal (arg);
24230 int adjust = (comparison_p) ? 1 : 0;
24231 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24233 if (last_arg_constant && i == nargs-1)
24235 if (!CONST_INT_P (op))
24237 error ("last argument must be an immediate");
24238 return gen_reg_rtx (tmode);
24243 if (VECTOR_MODE_P (mode))
24244 op = safe_vector_operand (op, mode);
24246 /* If we aren't optimizing, only allow one memory operand to be
24248 if (memory_operand (op, mode))
24251 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24254 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24256 op = force_reg (mode, op);
24260 args[i].mode = mode;
24266 pat = GEN_FCN (icode) (target, args[0].op);
24271 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24272 GEN_INT ((int)sub_code));
24273 else if (! comparison_p)
24274 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24277 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24281 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24286 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24290 gcc_unreachable ();
24300 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24301 insns with vec_merge. */
24304 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24308 tree arg0 = CALL_EXPR_ARG (exp, 0);
24309 rtx op1, op0 = expand_normal (arg0);
24310 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24311 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24313 if (optimize || !target
24314 || GET_MODE (target) != tmode
24315 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24316 target = gen_reg_rtx (tmode);
24318 if (VECTOR_MODE_P (mode0))
24319 op0 = safe_vector_operand (op0, mode0);
24321 if ((optimize && !register_operand (op0, mode0))
24322 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24323 op0 = copy_to_mode_reg (mode0, op0);
24326 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24327 op1 = copy_to_mode_reg (mode0, op1);
24329 pat = GEN_FCN (icode) (target, op0, op1);
24336 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24339 ix86_expand_sse_compare (const struct builtin_description *d,
24340 tree exp, rtx target, bool swap)
24343 tree arg0 = CALL_EXPR_ARG (exp, 0);
24344 tree arg1 = CALL_EXPR_ARG (exp, 1);
24345 rtx op0 = expand_normal (arg0);
24346 rtx op1 = expand_normal (arg1);
24348 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24349 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24350 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24351 enum rtx_code comparison = d->comparison;
24353 if (VECTOR_MODE_P (mode0))
24354 op0 = safe_vector_operand (op0, mode0);
24355 if (VECTOR_MODE_P (mode1))
24356 op1 = safe_vector_operand (op1, mode1);
24358 /* Swap operands if we have a comparison that isn't available in
24362 rtx tmp = gen_reg_rtx (mode1);
24363 emit_move_insn (tmp, op1);
24368 if (optimize || !target
24369 || GET_MODE (target) != tmode
24370 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24371 target = gen_reg_rtx (tmode);
24373 if ((optimize && !register_operand (op0, mode0))
24374 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24375 op0 = copy_to_mode_reg (mode0, op0);
24376 if ((optimize && !register_operand (op1, mode1))
24377 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24378 op1 = copy_to_mode_reg (mode1, op1);
24380 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24381 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24388 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24391 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24395 tree arg0 = CALL_EXPR_ARG (exp, 0);
24396 tree arg1 = CALL_EXPR_ARG (exp, 1);
24397 rtx op0 = expand_normal (arg0);
24398 rtx op1 = expand_normal (arg1);
24399 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24400 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24401 enum rtx_code comparison = d->comparison;
24403 if (VECTOR_MODE_P (mode0))
24404 op0 = safe_vector_operand (op0, mode0);
24405 if (VECTOR_MODE_P (mode1))
24406 op1 = safe_vector_operand (op1, mode1);
24408 /* Swap operands if we have a comparison that isn't available in
24410 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24417 target = gen_reg_rtx (SImode);
24418 emit_move_insn (target, const0_rtx);
24419 target = gen_rtx_SUBREG (QImode, target, 0);
24421 if ((optimize && !register_operand (op0, mode0))
24422 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24423 op0 = copy_to_mode_reg (mode0, op0);
24424 if ((optimize && !register_operand (op1, mode1))
24425 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24426 op1 = copy_to_mode_reg (mode1, op1);
24428 pat = GEN_FCN (d->icode) (op0, op1);
24432 emit_insn (gen_rtx_SET (VOIDmode,
24433 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24434 gen_rtx_fmt_ee (comparison, QImode,
24438 return SUBREG_REG (target);
24441 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24444 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24448 tree arg0 = CALL_EXPR_ARG (exp, 0);
24449 tree arg1 = CALL_EXPR_ARG (exp, 1);
24450 rtx op0 = expand_normal (arg0);
24451 rtx op1 = expand_normal (arg1);
24452 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24453 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24454 enum rtx_code comparison = d->comparison;
24456 if (VECTOR_MODE_P (mode0))
24457 op0 = safe_vector_operand (op0, mode0);
24458 if (VECTOR_MODE_P (mode1))
24459 op1 = safe_vector_operand (op1, mode1);
24461 target = gen_reg_rtx (SImode);
24462 emit_move_insn (target, const0_rtx);
24463 target = gen_rtx_SUBREG (QImode, target, 0);
24465 if ((optimize && !register_operand (op0, mode0))
24466 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24467 op0 = copy_to_mode_reg (mode0, op0);
24468 if ((optimize && !register_operand (op1, mode1))
24469 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24470 op1 = copy_to_mode_reg (mode1, op1);
24472 pat = GEN_FCN (d->icode) (op0, op1);
24476 emit_insn (gen_rtx_SET (VOIDmode,
24477 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24478 gen_rtx_fmt_ee (comparison, QImode,
24482 return SUBREG_REG (target);
24485 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24488 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24489 tree exp, rtx target)
24492 tree arg0 = CALL_EXPR_ARG (exp, 0);
24493 tree arg1 = CALL_EXPR_ARG (exp, 1);
24494 tree arg2 = CALL_EXPR_ARG (exp, 2);
24495 tree arg3 = CALL_EXPR_ARG (exp, 3);
24496 tree arg4 = CALL_EXPR_ARG (exp, 4);
24497 rtx scratch0, scratch1;
24498 rtx op0 = expand_normal (arg0);
24499 rtx op1 = expand_normal (arg1);
24500 rtx op2 = expand_normal (arg2);
24501 rtx op3 = expand_normal (arg3);
24502 rtx op4 = expand_normal (arg4);
24503 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24505 tmode0 = insn_data[d->icode].operand[0].mode;
24506 tmode1 = insn_data[d->icode].operand[1].mode;
24507 modev2 = insn_data[d->icode].operand[2].mode;
24508 modei3 = insn_data[d->icode].operand[3].mode;
24509 modev4 = insn_data[d->icode].operand[4].mode;
24510 modei5 = insn_data[d->icode].operand[5].mode;
24511 modeimm = insn_data[d->icode].operand[6].mode;
24513 if (VECTOR_MODE_P (modev2))
24514 op0 = safe_vector_operand (op0, modev2);
24515 if (VECTOR_MODE_P (modev4))
24516 op2 = safe_vector_operand (op2, modev4);
24518 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24519 op0 = copy_to_mode_reg (modev2, op0);
24520 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24521 op1 = copy_to_mode_reg (modei3, op1);
24522 if ((optimize && !register_operand (op2, modev4))
24523 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24524 op2 = copy_to_mode_reg (modev4, op2);
24525 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24526 op3 = copy_to_mode_reg (modei5, op3);
24528 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24530 error ("the fifth argument must be a 8-bit immediate");
24534 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24536 if (optimize || !target
24537 || GET_MODE (target) != tmode0
24538 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24539 target = gen_reg_rtx (tmode0);
24541 scratch1 = gen_reg_rtx (tmode1);
24543 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24545 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24547 if (optimize || !target
24548 || GET_MODE (target) != tmode1
24549 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24550 target = gen_reg_rtx (tmode1);
24552 scratch0 = gen_reg_rtx (tmode0);
24554 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24558 gcc_assert (d->flag);
24560 scratch0 = gen_reg_rtx (tmode0);
24561 scratch1 = gen_reg_rtx (tmode1);
24563 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24573 target = gen_reg_rtx (SImode);
24574 emit_move_insn (target, const0_rtx);
24575 target = gen_rtx_SUBREG (QImode, target, 0);
24578 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24579 gen_rtx_fmt_ee (EQ, QImode,
24580 gen_rtx_REG ((enum machine_mode) d->flag,
24583 return SUBREG_REG (target);
24590 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24593 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24594 tree exp, rtx target)
24597 tree arg0 = CALL_EXPR_ARG (exp, 0);
24598 tree arg1 = CALL_EXPR_ARG (exp, 1);
24599 tree arg2 = CALL_EXPR_ARG (exp, 2);
24600 rtx scratch0, scratch1;
24601 rtx op0 = expand_normal (arg0);
24602 rtx op1 = expand_normal (arg1);
24603 rtx op2 = expand_normal (arg2);
24604 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24606 tmode0 = insn_data[d->icode].operand[0].mode;
24607 tmode1 = insn_data[d->icode].operand[1].mode;
24608 modev2 = insn_data[d->icode].operand[2].mode;
24609 modev3 = insn_data[d->icode].operand[3].mode;
24610 modeimm = insn_data[d->icode].operand[4].mode;
24612 if (VECTOR_MODE_P (modev2))
24613 op0 = safe_vector_operand (op0, modev2);
24614 if (VECTOR_MODE_P (modev3))
24615 op1 = safe_vector_operand (op1, modev3);
24617 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24618 op0 = copy_to_mode_reg (modev2, op0);
24619 if ((optimize && !register_operand (op1, modev3))
24620 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24621 op1 = copy_to_mode_reg (modev3, op1);
24623 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24625 error ("the third argument must be a 8-bit immediate");
24629 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24631 if (optimize || !target
24632 || GET_MODE (target) != tmode0
24633 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24634 target = gen_reg_rtx (tmode0);
24636 scratch1 = gen_reg_rtx (tmode1);
24638 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24640 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24642 if (optimize || !target
24643 || GET_MODE (target) != tmode1
24644 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24645 target = gen_reg_rtx (tmode1);
24647 scratch0 = gen_reg_rtx (tmode0);
24649 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24653 gcc_assert (d->flag);
24655 scratch0 = gen_reg_rtx (tmode0);
24656 scratch1 = gen_reg_rtx (tmode1);
24658 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24668 target = gen_reg_rtx (SImode);
24669 emit_move_insn (target, const0_rtx);
24670 target = gen_rtx_SUBREG (QImode, target, 0);
24673 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24674 gen_rtx_fmt_ee (EQ, QImode,
24675 gen_rtx_REG ((enum machine_mode) d->flag,
24678 return SUBREG_REG (target);
24684 /* Subroutine of ix86_expand_builtin to take care of insns with
24685 variable number of operands. */
24688 ix86_expand_args_builtin (const struct builtin_description *d,
24689 tree exp, rtx target)
24691 rtx pat, real_target;
24692 unsigned int i, nargs;
24693 unsigned int nargs_constant = 0;
24694 int num_memory = 0;
24698 enum machine_mode mode;
24700 bool last_arg_count = false;
24701 enum insn_code icode = d->icode;
24702 const struct insn_data *insn_p = &insn_data[icode];
24703 enum machine_mode tmode = insn_p->operand[0].mode;
24704 enum machine_mode rmode = VOIDmode;
24706 enum rtx_code comparison = d->comparison;
24708 switch ((enum ix86_builtin_type) d->flag)
24710 case INT_FTYPE_V8SF_V8SF_PTEST:
24711 case INT_FTYPE_V4DI_V4DI_PTEST:
24712 case INT_FTYPE_V4DF_V4DF_PTEST:
24713 case INT_FTYPE_V4SF_V4SF_PTEST:
24714 case INT_FTYPE_V2DI_V2DI_PTEST:
24715 case INT_FTYPE_V2DF_V2DF_PTEST:
24716 return ix86_expand_sse_ptest (d, exp, target);
24717 case FLOAT128_FTYPE_FLOAT128:
24718 case FLOAT_FTYPE_FLOAT:
24719 case INT_FTYPE_INT:
24720 case UINT64_FTYPE_INT:
24721 case INT64_FTYPE_INT64:
24722 case INT64_FTYPE_V4SF:
24723 case INT64_FTYPE_V2DF:
24724 case INT_FTYPE_V16QI:
24725 case INT_FTYPE_V8QI:
24726 case INT_FTYPE_V8SF:
24727 case INT_FTYPE_V4DF:
24728 case INT_FTYPE_V4SF:
24729 case INT_FTYPE_V2DF:
24730 case V16QI_FTYPE_V16QI:
24731 case V8SI_FTYPE_V8SF:
24732 case V8SI_FTYPE_V4SI:
24733 case V8HI_FTYPE_V8HI:
24734 case V8HI_FTYPE_V16QI:
24735 case V8QI_FTYPE_V8QI:
24736 case V8SF_FTYPE_V8SF:
24737 case V8SF_FTYPE_V8SI:
24738 case V8SF_FTYPE_V4SF:
24739 case V4SI_FTYPE_V4SI:
24740 case V4SI_FTYPE_V16QI:
24741 case V4SI_FTYPE_V4SF:
24742 case V4SI_FTYPE_V8SI:
24743 case V4SI_FTYPE_V8HI:
24744 case V4SI_FTYPE_V4DF:
24745 case V4SI_FTYPE_V2DF:
24746 case V4HI_FTYPE_V4HI:
24747 case V4DF_FTYPE_V4DF:
24748 case V4DF_FTYPE_V4SI:
24749 case V4DF_FTYPE_V4SF:
24750 case V4DF_FTYPE_V2DF:
24751 case V4SF_FTYPE_V4SF:
24752 case V4SF_FTYPE_V4SI:
24753 case V4SF_FTYPE_V8SF:
24754 case V4SF_FTYPE_V4DF:
24755 case V4SF_FTYPE_V2DF:
24756 case V2DI_FTYPE_V2DI:
24757 case V2DI_FTYPE_V16QI:
24758 case V2DI_FTYPE_V8HI:
24759 case V2DI_FTYPE_V4SI:
24760 case V2DF_FTYPE_V2DF:
24761 case V2DF_FTYPE_V4SI:
24762 case V2DF_FTYPE_V4DF:
24763 case V2DF_FTYPE_V4SF:
24764 case V2DF_FTYPE_V2SI:
24765 case V2SI_FTYPE_V2SI:
24766 case V2SI_FTYPE_V4SF:
24767 case V2SI_FTYPE_V2SF:
24768 case V2SI_FTYPE_V2DF:
24769 case V2SF_FTYPE_V2SF:
24770 case V2SF_FTYPE_V2SI:
24773 case V4SF_FTYPE_V4SF_VEC_MERGE:
24774 case V2DF_FTYPE_V2DF_VEC_MERGE:
24775 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24776 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24777 case V16QI_FTYPE_V16QI_V16QI:
24778 case V16QI_FTYPE_V8HI_V8HI:
24779 case V8QI_FTYPE_V8QI_V8QI:
24780 case V8QI_FTYPE_V4HI_V4HI:
24781 case V8HI_FTYPE_V8HI_V8HI:
24782 case V8HI_FTYPE_V16QI_V16QI:
24783 case V8HI_FTYPE_V4SI_V4SI:
24784 case V8SF_FTYPE_V8SF_V8SF:
24785 case V8SF_FTYPE_V8SF_V8SI:
24786 case V4SI_FTYPE_V4SI_V4SI:
24787 case V4SI_FTYPE_V8HI_V8HI:
24788 case V4SI_FTYPE_V4SF_V4SF:
24789 case V4SI_FTYPE_V2DF_V2DF:
24790 case V4HI_FTYPE_V4HI_V4HI:
24791 case V4HI_FTYPE_V8QI_V8QI:
24792 case V4HI_FTYPE_V2SI_V2SI:
24793 case V4DF_FTYPE_V4DF_V4DF:
24794 case V4DF_FTYPE_V4DF_V4DI:
24795 case V4SF_FTYPE_V4SF_V4SF:
24796 case V4SF_FTYPE_V4SF_V4SI:
24797 case V4SF_FTYPE_V4SF_V2SI:
24798 case V4SF_FTYPE_V4SF_V2DF:
24799 case V4SF_FTYPE_V4SF_DI:
24800 case V4SF_FTYPE_V4SF_SI:
24801 case V2DI_FTYPE_V2DI_V2DI:
24802 case V2DI_FTYPE_V16QI_V16QI:
24803 case V2DI_FTYPE_V4SI_V4SI:
24804 case V2DI_FTYPE_V2DI_V16QI:
24805 case V2DI_FTYPE_V2DF_V2DF:
24806 case V2SI_FTYPE_V2SI_V2SI:
24807 case V2SI_FTYPE_V4HI_V4HI:
24808 case V2SI_FTYPE_V2SF_V2SF:
24809 case V2DF_FTYPE_V2DF_V2DF:
24810 case V2DF_FTYPE_V2DF_V4SF:
24811 case V2DF_FTYPE_V2DF_V2DI:
24812 case V2DF_FTYPE_V2DF_DI:
24813 case V2DF_FTYPE_V2DF_SI:
24814 case V2SF_FTYPE_V2SF_V2SF:
24815 case V1DI_FTYPE_V1DI_V1DI:
24816 case V1DI_FTYPE_V8QI_V8QI:
24817 case V1DI_FTYPE_V2SI_V2SI:
24818 if (comparison == UNKNOWN)
24819 return ix86_expand_binop_builtin (icode, exp, target);
24822 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24823 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24824 gcc_assert (comparison != UNKNOWN);
24828 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24829 case V8HI_FTYPE_V8HI_SI_COUNT:
24830 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24831 case V4SI_FTYPE_V4SI_SI_COUNT:
24832 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24833 case V4HI_FTYPE_V4HI_SI_COUNT:
24834 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24835 case V2DI_FTYPE_V2DI_SI_COUNT:
24836 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24837 case V2SI_FTYPE_V2SI_SI_COUNT:
24838 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24839 case V1DI_FTYPE_V1DI_SI_COUNT:
24841 last_arg_count = true;
24843 case UINT64_FTYPE_UINT64_UINT64:
24844 case UINT_FTYPE_UINT_UINT:
24845 case UINT_FTYPE_UINT_USHORT:
24846 case UINT_FTYPE_UINT_UCHAR:
24847 case UINT16_FTYPE_UINT16_INT:
24848 case UINT8_FTYPE_UINT8_INT:
24851 case V2DI2TI_FTYPE_V2DI_INT:
24854 nargs_constant = 1;
24856 case V8HI_FTYPE_V8HI_INT:
24857 case V8SF_FTYPE_V8SF_INT:
24858 case V4SI_FTYPE_V4SI_INT:
24859 case V4SI_FTYPE_V8SI_INT:
24860 case V4HI_FTYPE_V4HI_INT:
24861 case V4DF_FTYPE_V4DF_INT:
24862 case V4SF_FTYPE_V4SF_INT:
24863 case V4SF_FTYPE_V8SF_INT:
24864 case V2DI_FTYPE_V2DI_INT:
24865 case V2DF_FTYPE_V2DF_INT:
24866 case V2DF_FTYPE_V4DF_INT:
24868 nargs_constant = 1;
24870 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24871 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24872 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24873 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24874 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24877 case V16QI_FTYPE_V16QI_V16QI_INT:
24878 case V8HI_FTYPE_V8HI_V8HI_INT:
24879 case V8SI_FTYPE_V8SI_V8SI_INT:
24880 case V8SI_FTYPE_V8SI_V4SI_INT:
24881 case V8SF_FTYPE_V8SF_V8SF_INT:
24882 case V8SF_FTYPE_V8SF_V4SF_INT:
24883 case V4SI_FTYPE_V4SI_V4SI_INT:
24884 case V4DF_FTYPE_V4DF_V4DF_INT:
24885 case V4DF_FTYPE_V4DF_V2DF_INT:
24886 case V4SF_FTYPE_V4SF_V4SF_INT:
24887 case V2DI_FTYPE_V2DI_V2DI_INT:
24888 case V2DF_FTYPE_V2DF_V2DF_INT:
24890 nargs_constant = 1;
24892 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24895 nargs_constant = 1;
24897 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24900 nargs_constant = 1;
24902 case V2DI_FTYPE_V2DI_UINT_UINT:
24904 nargs_constant = 2;
24906 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24908 nargs_constant = 2;
24911 gcc_unreachable ();
24914 gcc_assert (nargs <= ARRAY_SIZE (args));
24916 if (comparison != UNKNOWN)
24918 gcc_assert (nargs == 2);
24919 return ix86_expand_sse_compare (d, exp, target, swap);
24922 if (rmode == VOIDmode || rmode == tmode)
24926 || GET_MODE (target) != tmode
24927 || ! (*insn_p->operand[0].predicate) (target, tmode))
24928 target = gen_reg_rtx (tmode);
24929 real_target = target;
24933 target = gen_reg_rtx (rmode);
24934 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24937 for (i = 0; i < nargs; i++)
24939 tree arg = CALL_EXPR_ARG (exp, i);
24940 rtx op = expand_normal (arg);
24941 enum machine_mode mode = insn_p->operand[i + 1].mode;
24942 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24944 if (last_arg_count && (i + 1) == nargs)
24946 /* SIMD shift insns take either an 8-bit immediate or
24947 register as count. But builtin functions take int as
24948 count. If count doesn't match, we put it in register. */
24951 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24952 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24953 op = copy_to_reg (op);
24956 else if ((nargs - i) <= nargs_constant)
24961 case CODE_FOR_sse4_1_roundpd:
24962 case CODE_FOR_sse4_1_roundps:
24963 case CODE_FOR_sse4_1_roundsd:
24964 case CODE_FOR_sse4_1_roundss:
24965 case CODE_FOR_sse4_1_blendps:
24966 case CODE_FOR_avx_blendpd256:
24967 case CODE_FOR_avx_vpermilv4df:
24968 case CODE_FOR_avx_roundpd256:
24969 case CODE_FOR_avx_roundps256:
24970 error ("the last argument must be a 4-bit immediate");
24973 case CODE_FOR_sse4_1_blendpd:
24974 case CODE_FOR_avx_vpermilv2df:
24975 error ("the last argument must be a 2-bit immediate");
24978 case CODE_FOR_avx_vextractf128v4df:
24979 case CODE_FOR_avx_vextractf128v8sf:
24980 case CODE_FOR_avx_vextractf128v8si:
24981 case CODE_FOR_avx_vinsertf128v4df:
24982 case CODE_FOR_avx_vinsertf128v8sf:
24983 case CODE_FOR_avx_vinsertf128v8si:
24984 error ("the last argument must be a 1-bit immediate");
24987 case CODE_FOR_avx_cmpsdv2df3:
24988 case CODE_FOR_avx_cmpssv4sf3:
24989 case CODE_FOR_avx_cmppdv2df3:
24990 case CODE_FOR_avx_cmppsv4sf3:
24991 case CODE_FOR_avx_cmppdv4df3:
24992 case CODE_FOR_avx_cmppsv8sf3:
24993 error ("the last argument must be a 5-bit immediate");
24997 switch (nargs_constant)
25000 if ((nargs - i) == nargs_constant)
25002 error ("the next to last argument must be an 8-bit immediate");
25006 error ("the last argument must be an 8-bit immediate");
25009 gcc_unreachable ();
25016 if (VECTOR_MODE_P (mode))
25017 op = safe_vector_operand (op, mode);
25019 /* If we aren't optimizing, only allow one memory operand to
25021 if (memory_operand (op, mode))
25024 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25026 if (optimize || !match || num_memory > 1)
25027 op = copy_to_mode_reg (mode, op);
25031 op = copy_to_reg (op);
25032 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25037 args[i].mode = mode;
25043 pat = GEN_FCN (icode) (real_target, args[0].op);
25046 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25049 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25053 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25054 args[2].op, args[3].op);
25057 gcc_unreachable ();
25067 /* Subroutine of ix86_expand_builtin to take care of special insns
25068 with variable number of operands. */
25071 ix86_expand_special_args_builtin (const struct builtin_description *d,
25072 tree exp, rtx target)
25076 unsigned int i, nargs, arg_adjust, memory;
25080 enum machine_mode mode;
25082 enum insn_code icode = d->icode;
25083 bool last_arg_constant = false;
25084 const struct insn_data *insn_p = &insn_data[icode];
25085 enum machine_mode tmode = insn_p->operand[0].mode;
25086 enum { load, store } klass;
25088 switch ((enum ix86_special_builtin_type) d->flag)
25090 case VOID_FTYPE_VOID:
25091 emit_insn (GEN_FCN (icode) (target));
25093 case UINT64_FTYPE_VOID:
25098 case UINT64_FTYPE_PUNSIGNED:
25099 case V2DI_FTYPE_PV2DI:
25100 case V32QI_FTYPE_PCCHAR:
25101 case V16QI_FTYPE_PCCHAR:
25102 case V8SF_FTYPE_PCV4SF:
25103 case V8SF_FTYPE_PCFLOAT:
25104 case V4SF_FTYPE_PCFLOAT:
25105 case V4DF_FTYPE_PCV2DF:
25106 case V4DF_FTYPE_PCDOUBLE:
25107 case V2DF_FTYPE_PCDOUBLE:
25112 case VOID_FTYPE_PV2SF_V4SF:
25113 case VOID_FTYPE_PV4DI_V4DI:
25114 case VOID_FTYPE_PV2DI_V2DI:
25115 case VOID_FTYPE_PCHAR_V32QI:
25116 case VOID_FTYPE_PCHAR_V16QI:
25117 case VOID_FTYPE_PFLOAT_V8SF:
25118 case VOID_FTYPE_PFLOAT_V4SF:
25119 case VOID_FTYPE_PDOUBLE_V4DF:
25120 case VOID_FTYPE_PDOUBLE_V2DF:
25121 case VOID_FTYPE_PDI_DI:
25122 case VOID_FTYPE_PINT_INT:
25125 /* Reserve memory operand for target. */
25126 memory = ARRAY_SIZE (args);
25128 case V4SF_FTYPE_V4SF_PCV2SF:
25129 case V2DF_FTYPE_V2DF_PCDOUBLE:
25134 case V8SF_FTYPE_PCV8SF_V8SF:
25135 case V4DF_FTYPE_PCV4DF_V4DF:
25136 case V4SF_FTYPE_PCV4SF_V4SF:
25137 case V2DF_FTYPE_PCV2DF_V2DF:
25142 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25143 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25144 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25145 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25148 /* Reserve memory operand for target. */
25149 memory = ARRAY_SIZE (args);
25152 gcc_unreachable ();
25155 gcc_assert (nargs <= ARRAY_SIZE (args));
25157 if (klass == store)
25159 arg = CALL_EXPR_ARG (exp, 0);
25160 op = expand_normal (arg);
25161 gcc_assert (target == 0);
25162 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25170 || GET_MODE (target) != tmode
25171 || ! (*insn_p->operand[0].predicate) (target, tmode))
25172 target = gen_reg_rtx (tmode);
25175 for (i = 0; i < nargs; i++)
25177 enum machine_mode mode = insn_p->operand[i + 1].mode;
25180 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25181 op = expand_normal (arg);
25182 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25184 if (last_arg_constant && (i + 1) == nargs)
25190 error ("the last argument must be an 8-bit immediate");
25198 /* This must be the memory operand. */
25199 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25200 gcc_assert (GET_MODE (op) == mode
25201 || GET_MODE (op) == VOIDmode);
25205 /* This must be register. */
25206 if (VECTOR_MODE_P (mode))
25207 op = safe_vector_operand (op, mode);
25209 gcc_assert (GET_MODE (op) == mode
25210 || GET_MODE (op) == VOIDmode);
25211 op = copy_to_mode_reg (mode, op);
25216 args[i].mode = mode;
25222 pat = GEN_FCN (icode) (target);
25225 pat = GEN_FCN (icode) (target, args[0].op);
25228 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25231 gcc_unreachable ();
25237 return klass == store ? 0 : target;
25240 /* Return the integer constant in ARG. Constrain it to be in the range
25241 of the subparts of VEC_TYPE; issue an error if not. */
25244 get_element_number (tree vec_type, tree arg)
25246 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25248 if (!host_integerp (arg, 1)
25249 || (elt = tree_low_cst (arg, 1), elt > max))
25251 error ("selector must be an integer constant in the range 0..%wi", max);
25258 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25259 ix86_expand_vector_init. We DO have language-level syntax for this, in
25260 the form of (type){ init-list }. Except that since we can't place emms
25261 instructions from inside the compiler, we can't allow the use of MMX
25262 registers unless the user explicitly asks for it. So we do *not* define
25263 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25264 we have builtins invoked by mmintrin.h that gives us license to emit
25265 these sorts of instructions. */
25268 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25270 enum machine_mode tmode = TYPE_MODE (type);
25271 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25272 int i, n_elt = GET_MODE_NUNITS (tmode);
25273 rtvec v = rtvec_alloc (n_elt);
25275 gcc_assert (VECTOR_MODE_P (tmode));
25276 gcc_assert (call_expr_nargs (exp) == n_elt);
25278 for (i = 0; i < n_elt; ++i)
25280 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25281 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25284 if (!target || !register_operand (target, tmode))
25285 target = gen_reg_rtx (tmode);
25287 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25291 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25292 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25293 had a language-level syntax for referencing vector elements. */
25296 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25298 enum machine_mode tmode, mode0;
25303 arg0 = CALL_EXPR_ARG (exp, 0);
25304 arg1 = CALL_EXPR_ARG (exp, 1);
25306 op0 = expand_normal (arg0);
25307 elt = get_element_number (TREE_TYPE (arg0), arg1);
25309 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25310 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25311 gcc_assert (VECTOR_MODE_P (mode0));
25313 op0 = force_reg (mode0, op0);
25315 if (optimize || !target || !register_operand (target, tmode))
25316 target = gen_reg_rtx (tmode);
25318 ix86_expand_vector_extract (true, target, op0, elt);
25323 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25324 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25325 a language-level syntax for referencing vector elements. */
25328 ix86_expand_vec_set_builtin (tree exp)
25330 enum machine_mode tmode, mode1;
25331 tree arg0, arg1, arg2;
25333 rtx op0, op1, target;
25335 arg0 = CALL_EXPR_ARG (exp, 0);
25336 arg1 = CALL_EXPR_ARG (exp, 1);
25337 arg2 = CALL_EXPR_ARG (exp, 2);
25339 tmode = TYPE_MODE (TREE_TYPE (arg0));
25340 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25341 gcc_assert (VECTOR_MODE_P (tmode));
25343 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25344 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25345 elt = get_element_number (TREE_TYPE (arg0), arg2);
25347 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25348 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25350 op0 = force_reg (tmode, op0);
25351 op1 = force_reg (mode1, op1);
25353 /* OP0 is the source of these builtin functions and shouldn't be
25354 modified. Create a copy, use it and return it as target. */
25355 target = gen_reg_rtx (tmode);
25356 emit_move_insn (target, op0);
25357 ix86_expand_vector_set (true, target, op1, elt);
25362 /* Expand an expression EXP that calls a built-in function,
25363 with result going to TARGET if that's convenient
25364 (and in mode MODE if that's convenient).
25365 SUBTARGET may be used as the target for computing one of EXP's operands.
25366 IGNORE is nonzero if the value is to be ignored. */
25369 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25370 enum machine_mode mode ATTRIBUTE_UNUSED,
25371 int ignore ATTRIBUTE_UNUSED)
25373 const struct builtin_description *d;
25375 enum insn_code icode;
25376 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25377 tree arg0, arg1, arg2;
25378 rtx op0, op1, op2, pat;
25379 enum machine_mode mode0, mode1, mode2;
25380 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25382 /* Determine whether the builtin function is available under the current ISA.
25383 Originally the builtin was not created if it wasn't applicable to the
25384 current ISA based on the command line switches. With function specific
25385 options, we need to check in the context of the function making the call
25386 whether it is supported. */
25387 if (ix86_builtins_isa[fcode].isa
25388 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25390 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25391 NULL, NULL, false);
25394 error ("%qE needs unknown isa option", fndecl);
25397 gcc_assert (opts != NULL);
25398 error ("%qE needs isa option %s", fndecl, opts);
25406 case IX86_BUILTIN_MASKMOVQ:
25407 case IX86_BUILTIN_MASKMOVDQU:
25408 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25409 ? CODE_FOR_mmx_maskmovq
25410 : CODE_FOR_sse2_maskmovdqu);
25411 /* Note the arg order is different from the operand order. */
25412 arg1 = CALL_EXPR_ARG (exp, 0);
25413 arg2 = CALL_EXPR_ARG (exp, 1);
25414 arg0 = CALL_EXPR_ARG (exp, 2);
25415 op0 = expand_normal (arg0);
25416 op1 = expand_normal (arg1);
25417 op2 = expand_normal (arg2);
25418 mode0 = insn_data[icode].operand[0].mode;
25419 mode1 = insn_data[icode].operand[1].mode;
25420 mode2 = insn_data[icode].operand[2].mode;
25422 op0 = force_reg (Pmode, op0);
25423 op0 = gen_rtx_MEM (mode1, op0);
25425 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25426 op0 = copy_to_mode_reg (mode0, op0);
25427 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25428 op1 = copy_to_mode_reg (mode1, op1);
25429 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25430 op2 = copy_to_mode_reg (mode2, op2);
25431 pat = GEN_FCN (icode) (op0, op1, op2);
25437 case IX86_BUILTIN_LDMXCSR:
25438 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25439 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25440 emit_move_insn (target, op0);
25441 emit_insn (gen_sse_ldmxcsr (target));
25444 case IX86_BUILTIN_STMXCSR:
25445 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25446 emit_insn (gen_sse_stmxcsr (target));
25447 return copy_to_mode_reg (SImode, target);
25449 case IX86_BUILTIN_CLFLUSH:
25450 arg0 = CALL_EXPR_ARG (exp, 0);
25451 op0 = expand_normal (arg0);
25452 icode = CODE_FOR_sse2_clflush;
25453 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25454 op0 = copy_to_mode_reg (Pmode, op0);
25456 emit_insn (gen_sse2_clflush (op0));
25459 case IX86_BUILTIN_MONITOR:
25460 arg0 = CALL_EXPR_ARG (exp, 0);
25461 arg1 = CALL_EXPR_ARG (exp, 1);
25462 arg2 = CALL_EXPR_ARG (exp, 2);
25463 op0 = expand_normal (arg0);
25464 op1 = expand_normal (arg1);
25465 op2 = expand_normal (arg2);
25467 op0 = copy_to_mode_reg (Pmode, op0);
25469 op1 = copy_to_mode_reg (SImode, op1);
25471 op2 = copy_to_mode_reg (SImode, op2);
25472 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25475 case IX86_BUILTIN_MWAIT:
25476 arg0 = CALL_EXPR_ARG (exp, 0);
25477 arg1 = CALL_EXPR_ARG (exp, 1);
25478 op0 = expand_normal (arg0);
25479 op1 = expand_normal (arg1);
25481 op0 = copy_to_mode_reg (SImode, op0);
25483 op1 = copy_to_mode_reg (SImode, op1);
25484 emit_insn (gen_sse3_mwait (op0, op1));
25487 case IX86_BUILTIN_VEC_INIT_V2SI:
25488 case IX86_BUILTIN_VEC_INIT_V4HI:
25489 case IX86_BUILTIN_VEC_INIT_V8QI:
25490 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25492 case IX86_BUILTIN_VEC_EXT_V2DF:
25493 case IX86_BUILTIN_VEC_EXT_V2DI:
25494 case IX86_BUILTIN_VEC_EXT_V4SF:
25495 case IX86_BUILTIN_VEC_EXT_V4SI:
25496 case IX86_BUILTIN_VEC_EXT_V8HI:
25497 case IX86_BUILTIN_VEC_EXT_V2SI:
25498 case IX86_BUILTIN_VEC_EXT_V4HI:
25499 case IX86_BUILTIN_VEC_EXT_V16QI:
25500 return ix86_expand_vec_ext_builtin (exp, target);
25502 case IX86_BUILTIN_VEC_SET_V2DI:
25503 case IX86_BUILTIN_VEC_SET_V4SF:
25504 case IX86_BUILTIN_VEC_SET_V4SI:
25505 case IX86_BUILTIN_VEC_SET_V8HI:
25506 case IX86_BUILTIN_VEC_SET_V4HI:
25507 case IX86_BUILTIN_VEC_SET_V16QI:
25508 return ix86_expand_vec_set_builtin (exp);
25510 case IX86_BUILTIN_INFQ:
25511 case IX86_BUILTIN_HUGE_VALQ:
25513 REAL_VALUE_TYPE inf;
25517 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25519 tmp = validize_mem (force_const_mem (mode, tmp));
25522 target = gen_reg_rtx (mode);
25524 emit_move_insn (target, tmp);
25532 for (i = 0, d = bdesc_special_args;
25533 i < ARRAY_SIZE (bdesc_special_args);
25535 if (d->code == fcode)
25536 return ix86_expand_special_args_builtin (d, exp, target);
25538 for (i = 0, d = bdesc_args;
25539 i < ARRAY_SIZE (bdesc_args);
25541 if (d->code == fcode)
25544 case IX86_BUILTIN_FABSQ:
25545 case IX86_BUILTIN_COPYSIGNQ:
25547 /* Emit a normal call if SSE2 isn't available. */
25548 return expand_call (exp, target, ignore);
25550 return ix86_expand_args_builtin (d, exp, target);
25553 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25554 if (d->code == fcode)
25555 return ix86_expand_sse_comi (d, exp, target);
25557 for (i = 0, d = bdesc_pcmpestr;
25558 i < ARRAY_SIZE (bdesc_pcmpestr);
25560 if (d->code == fcode)
25561 return ix86_expand_sse_pcmpestr (d, exp, target);
25563 for (i = 0, d = bdesc_pcmpistr;
25564 i < ARRAY_SIZE (bdesc_pcmpistr);
25566 if (d->code == fcode)
25567 return ix86_expand_sse_pcmpistr (d, exp, target);
25569 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25570 if (d->code == fcode)
25571 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25572 (enum multi_arg_type)d->flag,
25575 gcc_unreachable ();
25578 /* Returns a function decl for a vectorized version of the builtin function
25579 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25580 if it is not available. */
25583 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25586 enum machine_mode in_mode, out_mode;
25589 if (TREE_CODE (type_out) != VECTOR_TYPE
25590 || TREE_CODE (type_in) != VECTOR_TYPE)
25593 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25594 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25595 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25596 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25600 case BUILT_IN_SQRT:
25601 if (out_mode == DFmode && out_n == 2
25602 && in_mode == DFmode && in_n == 2)
25603 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25606 case BUILT_IN_SQRTF:
25607 if (out_mode == SFmode && out_n == 4
25608 && in_mode == SFmode && in_n == 4)
25609 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25612 case BUILT_IN_LRINT:
25613 if (out_mode == SImode && out_n == 4
25614 && in_mode == DFmode && in_n == 2)
25615 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25618 case BUILT_IN_LRINTF:
25619 if (out_mode == SImode && out_n == 4
25620 && in_mode == SFmode && in_n == 4)
25621 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25628 /* Dispatch to a handler for a vectorization library. */
25629 if (ix86_veclib_handler)
25630 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25636 /* Handler for an SVML-style interface to
25637 a library with vectorized intrinsics. */
25640 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25643 tree fntype, new_fndecl, args;
25646 enum machine_mode el_mode, in_mode;
25649 /* The SVML is suitable for unsafe math only. */
25650 if (!flag_unsafe_math_optimizations)
25653 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25654 n = TYPE_VECTOR_SUBPARTS (type_out);
25655 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25656 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25657 if (el_mode != in_mode
25665 case BUILT_IN_LOG10:
25667 case BUILT_IN_TANH:
25669 case BUILT_IN_ATAN:
25670 case BUILT_IN_ATAN2:
25671 case BUILT_IN_ATANH:
25672 case BUILT_IN_CBRT:
25673 case BUILT_IN_SINH:
25675 case BUILT_IN_ASINH:
25676 case BUILT_IN_ASIN:
25677 case BUILT_IN_COSH:
25679 case BUILT_IN_ACOSH:
25680 case BUILT_IN_ACOS:
25681 if (el_mode != DFmode || n != 2)
25685 case BUILT_IN_EXPF:
25686 case BUILT_IN_LOGF:
25687 case BUILT_IN_LOG10F:
25688 case BUILT_IN_POWF:
25689 case BUILT_IN_TANHF:
25690 case BUILT_IN_TANF:
25691 case BUILT_IN_ATANF:
25692 case BUILT_IN_ATAN2F:
25693 case BUILT_IN_ATANHF:
25694 case BUILT_IN_CBRTF:
25695 case BUILT_IN_SINHF:
25696 case BUILT_IN_SINF:
25697 case BUILT_IN_ASINHF:
25698 case BUILT_IN_ASINF:
25699 case BUILT_IN_COSHF:
25700 case BUILT_IN_COSF:
25701 case BUILT_IN_ACOSHF:
25702 case BUILT_IN_ACOSF:
25703 if (el_mode != SFmode || n != 4)
25711 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25713 if (fn == BUILT_IN_LOGF)
25714 strcpy (name, "vmlsLn4");
25715 else if (fn == BUILT_IN_LOG)
25716 strcpy (name, "vmldLn2");
25719 sprintf (name, "vmls%s", bname+10);
25720 name[strlen (name)-1] = '4';
25723 sprintf (name, "vmld%s2", bname+10);
25725 /* Convert to uppercase. */
25729 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25730 args = TREE_CHAIN (args))
25734 fntype = build_function_type_list (type_out, type_in, NULL);
25736 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25738 /* Build a function declaration for the vectorized function. */
25739 new_fndecl = build_decl (BUILTINS_LOCATION,
25740 FUNCTION_DECL, get_identifier (name), fntype);
25741 TREE_PUBLIC (new_fndecl) = 1;
25742 DECL_EXTERNAL (new_fndecl) = 1;
25743 DECL_IS_NOVOPS (new_fndecl) = 1;
25744 TREE_READONLY (new_fndecl) = 1;
25749 /* Handler for an ACML-style interface to
25750 a library with vectorized intrinsics. */
25753 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25755 char name[20] = "__vr.._";
25756 tree fntype, new_fndecl, args;
25759 enum machine_mode el_mode, in_mode;
25762 /* The ACML is 64bits only and suitable for unsafe math only as
25763 it does not correctly support parts of IEEE with the required
25764 precision such as denormals. */
25766 || !flag_unsafe_math_optimizations)
25769 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25770 n = TYPE_VECTOR_SUBPARTS (type_out);
25771 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25772 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25773 if (el_mode != in_mode
25783 case BUILT_IN_LOG2:
25784 case BUILT_IN_LOG10:
25787 if (el_mode != DFmode
25792 case BUILT_IN_SINF:
25793 case BUILT_IN_COSF:
25794 case BUILT_IN_EXPF:
25795 case BUILT_IN_POWF:
25796 case BUILT_IN_LOGF:
25797 case BUILT_IN_LOG2F:
25798 case BUILT_IN_LOG10F:
25801 if (el_mode != SFmode
25810 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25811 sprintf (name + 7, "%s", bname+10);
25814 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25815 args = TREE_CHAIN (args))
25819 fntype = build_function_type_list (type_out, type_in, NULL);
25821 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25823 /* Build a function declaration for the vectorized function. */
25824 new_fndecl = build_decl (BUILTINS_LOCATION,
25825 FUNCTION_DECL, get_identifier (name), fntype);
25826 TREE_PUBLIC (new_fndecl) = 1;
25827 DECL_EXTERNAL (new_fndecl) = 1;
25828 DECL_IS_NOVOPS (new_fndecl) = 1;
25829 TREE_READONLY (new_fndecl) = 1;
25835 /* Returns a decl of a function that implements conversion of an integer vector
25836 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25837 side of the conversion.
25838 Return NULL_TREE if it is not available. */
25841 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25843 if (TREE_CODE (type) != VECTOR_TYPE
25844 /* There are only conversions from/to signed integers. */
25845 || TYPE_UNSIGNED (TREE_TYPE (type)))
25851 switch (TYPE_MODE (type))
25854 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25859 case FIX_TRUNC_EXPR:
25860 switch (TYPE_MODE (type))
25863 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25873 /* Returns a code for a target-specific builtin that implements
25874 reciprocal of the function, or NULL_TREE if not available. */
25877 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25878 bool sqrt ATTRIBUTE_UNUSED)
25880 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25881 && flag_finite_math_only && !flag_trapping_math
25882 && flag_unsafe_math_optimizations))
25886 /* Machine dependent builtins. */
25889 /* Vectorized version of sqrt to rsqrt conversion. */
25890 case IX86_BUILTIN_SQRTPS_NR:
25891 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25897 /* Normal builtins. */
25900 /* Sqrt to rsqrt conversion. */
25901 case BUILT_IN_SQRTF:
25902 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25909 /* Store OPERAND to the memory after reload is completed. This means
25910 that we can't easily use assign_stack_local. */
25912 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25916 gcc_assert (reload_completed);
25917 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25919 result = gen_rtx_MEM (mode,
25920 gen_rtx_PLUS (Pmode,
25922 GEN_INT (-RED_ZONE_SIZE)));
25923 emit_move_insn (result, operand);
25925 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25931 operand = gen_lowpart (DImode, operand);
25935 gen_rtx_SET (VOIDmode,
25936 gen_rtx_MEM (DImode,
25937 gen_rtx_PRE_DEC (DImode,
25938 stack_pointer_rtx)),
25942 gcc_unreachable ();
25944 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25953 split_di (&operand, 1, operands, operands + 1);
25955 gen_rtx_SET (VOIDmode,
25956 gen_rtx_MEM (SImode,
25957 gen_rtx_PRE_DEC (Pmode,
25958 stack_pointer_rtx)),
25961 gen_rtx_SET (VOIDmode,
25962 gen_rtx_MEM (SImode,
25963 gen_rtx_PRE_DEC (Pmode,
25964 stack_pointer_rtx)),
25969 /* Store HImodes as SImodes. */
25970 operand = gen_lowpart (SImode, operand);
25974 gen_rtx_SET (VOIDmode,
25975 gen_rtx_MEM (GET_MODE (operand),
25976 gen_rtx_PRE_DEC (SImode,
25977 stack_pointer_rtx)),
25981 gcc_unreachable ();
25983 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25988 /* Free operand from the memory. */
25990 ix86_free_from_memory (enum machine_mode mode)
25992 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25996 if (mode == DImode || TARGET_64BIT)
26000 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26001 to pop or add instruction if registers are available. */
26002 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26003 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26008 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26009 QImode must go into class Q_REGS.
26010 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26011 movdf to do mem-to-mem moves through integer regs. */
26013 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26015 enum machine_mode mode = GET_MODE (x);
26017 /* We're only allowed to return a subclass of CLASS. Many of the
26018 following checks fail for NO_REGS, so eliminate that early. */
26019 if (regclass == NO_REGS)
26022 /* All classes can load zeros. */
26023 if (x == CONST0_RTX (mode))
26026 /* Force constants into memory if we are loading a (nonzero) constant into
26027 an MMX or SSE register. This is because there are no MMX/SSE instructions
26028 to load from a constant. */
26030 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26033 /* Prefer SSE regs only, if we can use them for math. */
26034 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26035 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26037 /* Floating-point constants need more complex checks. */
26038 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26040 /* General regs can load everything. */
26041 if (reg_class_subset_p (regclass, GENERAL_REGS))
26044 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26045 zero above. We only want to wind up preferring 80387 registers if
26046 we plan on doing computation with them. */
26048 && standard_80387_constant_p (x))
26050 /* Limit class to non-sse. */
26051 if (regclass == FLOAT_SSE_REGS)
26053 if (regclass == FP_TOP_SSE_REGS)
26055 if (regclass == FP_SECOND_SSE_REGS)
26056 return FP_SECOND_REG;
26057 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26064 /* Generally when we see PLUS here, it's the function invariant
26065 (plus soft-fp const_int). Which can only be computed into general
26067 if (GET_CODE (x) == PLUS)
26068 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26070 /* QImode constants are easy to load, but non-constant QImode data
26071 must go into Q_REGS. */
26072 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26074 if (reg_class_subset_p (regclass, Q_REGS))
26076 if (reg_class_subset_p (Q_REGS, regclass))
26084 /* Discourage putting floating-point values in SSE registers unless
26085 SSE math is being used, and likewise for the 387 registers. */
26087 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26089 enum machine_mode mode = GET_MODE (x);
26091 /* Restrict the output reload class to the register bank that we are doing
26092 math on. If we would like not to return a subset of CLASS, reject this
26093 alternative: if reload cannot do this, it will still use its choice. */
26094 mode = GET_MODE (x);
26095 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26096 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26098 if (X87_FLOAT_MODE_P (mode))
26100 if (regclass == FP_TOP_SSE_REGS)
26102 else if (regclass == FP_SECOND_SSE_REGS)
26103 return FP_SECOND_REG;
26105 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26111 static enum reg_class
26112 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26113 enum machine_mode mode,
26114 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26116 /* QImode spills from non-QI registers require
26117 intermediate register on 32bit targets. */
26118 if (!in_p && mode == QImode && !TARGET_64BIT
26119 && (rclass == GENERAL_REGS
26120 || rclass == LEGACY_REGS
26121 || rclass == INDEX_REGS))
26130 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26131 regno = true_regnum (x);
26133 /* Return Q_REGS if the operand is in memory. */
26141 /* If we are copying between general and FP registers, we need a memory
26142 location. The same is true for SSE and MMX registers.
26144 To optimize register_move_cost performance, allow inline variant.
26146 The macro can't work reliably when one of the CLASSES is class containing
26147 registers from multiple units (SSE, MMX, integer). We avoid this by never
26148 combining those units in single alternative in the machine description.
26149 Ensure that this constraint holds to avoid unexpected surprises.
26151 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26152 enforce these sanity checks. */
26155 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26156 enum machine_mode mode, int strict)
26158 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26159 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26160 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26161 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26162 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26163 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26165 gcc_assert (!strict);
26169 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26172 /* ??? This is a lie. We do have moves between mmx/general, and for
26173 mmx/sse2. But by saying we need secondary memory we discourage the
26174 register allocator from using the mmx registers unless needed. */
26175 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26178 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26180 /* SSE1 doesn't have any direct moves from other classes. */
26184 /* If the target says that inter-unit moves are more expensive
26185 than moving through memory, then don't generate them. */
26186 if (!TARGET_INTER_UNIT_MOVES)
26189 /* Between SSE and general, we have moves no larger than word size. */
26190 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26198 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26199 enum machine_mode mode, int strict)
26201 return inline_secondary_memory_needed (class1, class2, mode, strict);
26204 /* Return true if the registers in CLASS cannot represent the change from
26205 modes FROM to TO. */
26208 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26209 enum reg_class regclass)
26214 /* x87 registers can't do subreg at all, as all values are reformatted
26215 to extended precision. */
26216 if (MAYBE_FLOAT_CLASS_P (regclass))
26219 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26221 /* Vector registers do not support QI or HImode loads. If we don't
26222 disallow a change to these modes, reload will assume it's ok to
26223 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26224 the vec_dupv4hi pattern. */
26225 if (GET_MODE_SIZE (from) < 4)
26228 /* Vector registers do not support subreg with nonzero offsets, which
26229 are otherwise valid for integer registers. Since we can't see
26230 whether we have a nonzero offset from here, prohibit all
26231 nonparadoxical subregs changing size. */
26232 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26239 /* Return the cost of moving data of mode M between a
26240 register and memory. A value of 2 is the default; this cost is
26241 relative to those in `REGISTER_MOVE_COST'.
26243 This function is used extensively by register_move_cost that is used to
26244 build tables at startup. Make it inline in this case.
26245 When IN is 2, return maximum of in and out move cost.
26247 If moving between registers and memory is more expensive than
26248 between two registers, you should define this macro to express the
26251 Model also increased moving costs of QImode registers in non
26255 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26259 if (FLOAT_CLASS_P (regclass))
26277 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26278 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26280 if (SSE_CLASS_P (regclass))
26283 switch (GET_MODE_SIZE (mode))
26298 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26299 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26301 if (MMX_CLASS_P (regclass))
26304 switch (GET_MODE_SIZE (mode))
26316 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26317 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26319 switch (GET_MODE_SIZE (mode))
26322 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26325 return ix86_cost->int_store[0];
26326 if (TARGET_PARTIAL_REG_DEPENDENCY
26327 && optimize_function_for_speed_p (cfun))
26328 cost = ix86_cost->movzbl_load;
26330 cost = ix86_cost->int_load[0];
26332 return MAX (cost, ix86_cost->int_store[0]);
26338 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26340 return ix86_cost->movzbl_load;
26342 return ix86_cost->int_store[0] + 4;
26347 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26348 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26350 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26351 if (mode == TFmode)
26354 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26356 cost = ix86_cost->int_load[2];
26358 cost = ix86_cost->int_store[2];
26359 return (cost * (((int) GET_MODE_SIZE (mode)
26360 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26365 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26367 return inline_memory_move_cost (mode, regclass, in);
26371 /* Return the cost of moving data from a register in class CLASS1 to
26372 one in class CLASS2.
26374 It is not required that the cost always equal 2 when FROM is the same as TO;
26375 on some machines it is expensive to move between registers if they are not
26376 general registers. */
26379 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26380 enum reg_class class2)
26382 /* In case we require secondary memory, compute cost of the store followed
26383 by load. In order to avoid bad register allocation choices, we need
26384 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26386 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26390 cost += inline_memory_move_cost (mode, class1, 2);
26391 cost += inline_memory_move_cost (mode, class2, 2);
26393 /* In case of copying from general_purpose_register we may emit multiple
26394 stores followed by single load causing memory size mismatch stall.
26395 Count this as arbitrarily high cost of 20. */
26396 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26399 /* In the case of FP/MMX moves, the registers actually overlap, and we
26400 have to switch modes in order to treat them differently. */
26401 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26402 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26408 /* Moves between SSE/MMX and integer unit are expensive. */
26409 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26410 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26412 /* ??? By keeping returned value relatively high, we limit the number
26413 of moves between integer and MMX/SSE registers for all targets.
26414 Additionally, high value prevents problem with x86_modes_tieable_p(),
26415 where integer modes in MMX/SSE registers are not tieable
26416 because of missing QImode and HImode moves to, from or between
26417 MMX/SSE registers. */
26418 return MAX (8, ix86_cost->mmxsse_to_integer);
26420 if (MAYBE_FLOAT_CLASS_P (class1))
26421 return ix86_cost->fp_move;
26422 if (MAYBE_SSE_CLASS_P (class1))
26423 return ix86_cost->sse_move;
26424 if (MAYBE_MMX_CLASS_P (class1))
26425 return ix86_cost->mmx_move;
26429 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26432 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26434 /* Flags and only flags can only hold CCmode values. */
26435 if (CC_REGNO_P (regno))
26436 return GET_MODE_CLASS (mode) == MODE_CC;
26437 if (GET_MODE_CLASS (mode) == MODE_CC
26438 || GET_MODE_CLASS (mode) == MODE_RANDOM
26439 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26441 if (FP_REGNO_P (regno))
26442 return VALID_FP_MODE_P (mode);
26443 if (SSE_REGNO_P (regno))
26445 /* We implement the move patterns for all vector modes into and
26446 out of SSE registers, even when no operation instructions
26447 are available. OImode move is available only when AVX is
26449 return ((TARGET_AVX && mode == OImode)
26450 || VALID_AVX256_REG_MODE (mode)
26451 || VALID_SSE_REG_MODE (mode)
26452 || VALID_SSE2_REG_MODE (mode)
26453 || VALID_MMX_REG_MODE (mode)
26454 || VALID_MMX_REG_MODE_3DNOW (mode));
26456 if (MMX_REGNO_P (regno))
26458 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26459 so if the register is available at all, then we can move data of
26460 the given mode into or out of it. */
26461 return (VALID_MMX_REG_MODE (mode)
26462 || VALID_MMX_REG_MODE_3DNOW (mode));
26465 if (mode == QImode)
26467 /* Take care for QImode values - they can be in non-QI regs,
26468 but then they do cause partial register stalls. */
26469 if (regno <= BX_REG || TARGET_64BIT)
26471 if (!TARGET_PARTIAL_REG_STALL)
26473 return reload_in_progress || reload_completed;
26475 /* We handle both integer and floats in the general purpose registers. */
26476 else if (VALID_INT_MODE_P (mode))
26478 else if (VALID_FP_MODE_P (mode))
26480 else if (VALID_DFP_MODE_P (mode))
26482 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26483 on to use that value in smaller contexts, this can easily force a
26484 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26485 supporting DImode, allow it. */
26486 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26492 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26493 tieable integer mode. */
26496 ix86_tieable_integer_mode_p (enum machine_mode mode)
26505 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26508 return TARGET_64BIT;
26515 /* Return true if MODE1 is accessible in a register that can hold MODE2
26516 without copying. That is, all register classes that can hold MODE2
26517 can also hold MODE1. */
26520 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26522 if (mode1 == mode2)
26525 if (ix86_tieable_integer_mode_p (mode1)
26526 && ix86_tieable_integer_mode_p (mode2))
26529 /* MODE2 being XFmode implies fp stack or general regs, which means we
26530 can tie any smaller floating point modes to it. Note that we do not
26531 tie this with TFmode. */
26532 if (mode2 == XFmode)
26533 return mode1 == SFmode || mode1 == DFmode;
26535 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26536 that we can tie it with SFmode. */
26537 if (mode2 == DFmode)
26538 return mode1 == SFmode;
26540 /* If MODE2 is only appropriate for an SSE register, then tie with
26541 any other mode acceptable to SSE registers. */
26542 if (GET_MODE_SIZE (mode2) == 16
26543 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26544 return (GET_MODE_SIZE (mode1) == 16
26545 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26547 /* If MODE2 is appropriate for an MMX register, then tie
26548 with any other mode acceptable to MMX registers. */
26549 if (GET_MODE_SIZE (mode2) == 8
26550 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26551 return (GET_MODE_SIZE (mode1) == 8
26552 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26557 /* Compute a (partial) cost for rtx X. Return true if the complete
26558 cost has been computed, and false if subexpressions should be
26559 scanned. In either case, *TOTAL contains the cost result. */
26562 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26564 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26565 enum machine_mode mode = GET_MODE (x);
26566 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26574 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26576 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26578 else if (flag_pic && SYMBOLIC_CONST (x)
26580 || (!GET_CODE (x) != LABEL_REF
26581 && (GET_CODE (x) != SYMBOL_REF
26582 || !SYMBOL_REF_LOCAL_P (x)))))
26589 if (mode == VOIDmode)
26592 switch (standard_80387_constant_p (x))
26597 default: /* Other constants */
26602 /* Start with (MEM (SYMBOL_REF)), since that's where
26603 it'll probably end up. Add a penalty for size. */
26604 *total = (COSTS_N_INSNS (1)
26605 + (flag_pic != 0 && !TARGET_64BIT)
26606 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26612 /* The zero extensions is often completely free on x86_64, so make
26613 it as cheap as possible. */
26614 if (TARGET_64BIT && mode == DImode
26615 && GET_MODE (XEXP (x, 0)) == SImode)
26617 else if (TARGET_ZERO_EXTEND_WITH_AND)
26618 *total = cost->add;
26620 *total = cost->movzx;
26624 *total = cost->movsx;
26628 if (CONST_INT_P (XEXP (x, 1))
26629 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26631 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26634 *total = cost->add;
26637 if ((value == 2 || value == 3)
26638 && cost->lea <= cost->shift_const)
26640 *total = cost->lea;
26650 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26652 if (CONST_INT_P (XEXP (x, 1)))
26654 if (INTVAL (XEXP (x, 1)) > 32)
26655 *total = cost->shift_const + COSTS_N_INSNS (2);
26657 *total = cost->shift_const * 2;
26661 if (GET_CODE (XEXP (x, 1)) == AND)
26662 *total = cost->shift_var * 2;
26664 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26669 if (CONST_INT_P (XEXP (x, 1)))
26670 *total = cost->shift_const;
26672 *total = cost->shift_var;
26677 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26679 /* ??? SSE scalar cost should be used here. */
26680 *total = cost->fmul;
26683 else if (X87_FLOAT_MODE_P (mode))
26685 *total = cost->fmul;
26688 else if (FLOAT_MODE_P (mode))
26690 /* ??? SSE vector cost should be used here. */
26691 *total = cost->fmul;
26696 rtx op0 = XEXP (x, 0);
26697 rtx op1 = XEXP (x, 1);
26699 if (CONST_INT_P (XEXP (x, 1)))
26701 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26702 for (nbits = 0; value != 0; value &= value - 1)
26706 /* This is arbitrary. */
26709 /* Compute costs correctly for widening multiplication. */
26710 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26711 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26712 == GET_MODE_SIZE (mode))
26714 int is_mulwiden = 0;
26715 enum machine_mode inner_mode = GET_MODE (op0);
26717 if (GET_CODE (op0) == GET_CODE (op1))
26718 is_mulwiden = 1, op1 = XEXP (op1, 0);
26719 else if (CONST_INT_P (op1))
26721 if (GET_CODE (op0) == SIGN_EXTEND)
26722 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26725 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26729 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26732 *total = (cost->mult_init[MODE_INDEX (mode)]
26733 + nbits * cost->mult_bit
26734 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26743 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26744 /* ??? SSE cost should be used here. */
26745 *total = cost->fdiv;
26746 else if (X87_FLOAT_MODE_P (mode))
26747 *total = cost->fdiv;
26748 else if (FLOAT_MODE_P (mode))
26749 /* ??? SSE vector cost should be used here. */
26750 *total = cost->fdiv;
26752 *total = cost->divide[MODE_INDEX (mode)];
26756 if (GET_MODE_CLASS (mode) == MODE_INT
26757 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26759 if (GET_CODE (XEXP (x, 0)) == PLUS
26760 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26761 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26762 && CONSTANT_P (XEXP (x, 1)))
26764 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26765 if (val == 2 || val == 4 || val == 8)
26767 *total = cost->lea;
26768 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26769 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26770 outer_code, speed);
26771 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26775 else if (GET_CODE (XEXP (x, 0)) == MULT
26776 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26778 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26779 if (val == 2 || val == 4 || val == 8)
26781 *total = cost->lea;
26782 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26783 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26787 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26789 *total = cost->lea;
26790 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26791 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26792 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26799 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26801 /* ??? SSE cost should be used here. */
26802 *total = cost->fadd;
26805 else if (X87_FLOAT_MODE_P (mode))
26807 *total = cost->fadd;
26810 else if (FLOAT_MODE_P (mode))
26812 /* ??? SSE vector cost should be used here. */
26813 *total = cost->fadd;
26821 if (!TARGET_64BIT && mode == DImode)
26823 *total = (cost->add * 2
26824 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26825 << (GET_MODE (XEXP (x, 0)) != DImode))
26826 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26827 << (GET_MODE (XEXP (x, 1)) != DImode)));
26833 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26835 /* ??? SSE cost should be used here. */
26836 *total = cost->fchs;
26839 else if (X87_FLOAT_MODE_P (mode))
26841 *total = cost->fchs;
26844 else if (FLOAT_MODE_P (mode))
26846 /* ??? SSE vector cost should be used here. */
26847 *total = cost->fchs;
26853 if (!TARGET_64BIT && mode == DImode)
26854 *total = cost->add * 2;
26856 *total = cost->add;
26860 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26861 && XEXP (XEXP (x, 0), 1) == const1_rtx
26862 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26863 && XEXP (x, 1) == const0_rtx)
26865 /* This kind of construct is implemented using test[bwl].
26866 Treat it as if we had an AND. */
26867 *total = (cost->add
26868 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26869 + rtx_cost (const1_rtx, outer_code, speed));
26875 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26880 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26881 /* ??? SSE cost should be used here. */
26882 *total = cost->fabs;
26883 else if (X87_FLOAT_MODE_P (mode))
26884 *total = cost->fabs;
26885 else if (FLOAT_MODE_P (mode))
26886 /* ??? SSE vector cost should be used here. */
26887 *total = cost->fabs;
26891 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26892 /* ??? SSE cost should be used here. */
26893 *total = cost->fsqrt;
26894 else if (X87_FLOAT_MODE_P (mode))
26895 *total = cost->fsqrt;
26896 else if (FLOAT_MODE_P (mode))
26897 /* ??? SSE vector cost should be used here. */
26898 *total = cost->fsqrt;
26902 if (XINT (x, 1) == UNSPEC_TP)
26913 static int current_machopic_label_num;
26915 /* Given a symbol name and its associated stub, write out the
26916 definition of the stub. */
26919 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26921 unsigned int length;
26922 char *binder_name, *symbol_name, lazy_ptr_name[32];
26923 int label = ++current_machopic_label_num;
26925 /* For 64-bit we shouldn't get here. */
26926 gcc_assert (!TARGET_64BIT);
26928 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26929 symb = (*targetm.strip_name_encoding) (symb);
26931 length = strlen (stub);
26932 binder_name = XALLOCAVEC (char, length + 32);
26933 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26935 length = strlen (symb);
26936 symbol_name = XALLOCAVEC (char, length + 32);
26937 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26939 sprintf (lazy_ptr_name, "L%d$lz", label);
26942 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26944 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26946 fprintf (file, "%s:\n", stub);
26947 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26951 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26952 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26953 fprintf (file, "\tjmp\t*%%edx\n");
26956 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26958 fprintf (file, "%s:\n", binder_name);
26962 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26963 fprintf (file, "\tpushl\t%%eax\n");
26966 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26968 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26970 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26971 fprintf (file, "%s:\n", lazy_ptr_name);
26972 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26973 fprintf (file, "\t.long %s\n", binder_name);
26977 darwin_x86_file_end (void)
26979 darwin_file_end ();
26982 #endif /* TARGET_MACHO */
26984 /* Order the registers for register allocator. */
26987 x86_order_regs_for_local_alloc (void)
26992 /* First allocate the local general purpose registers. */
26993 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26994 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26995 reg_alloc_order [pos++] = i;
26997 /* Global general purpose registers. */
26998 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26999 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27000 reg_alloc_order [pos++] = i;
27002 /* x87 registers come first in case we are doing FP math
27004 if (!TARGET_SSE_MATH)
27005 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27006 reg_alloc_order [pos++] = i;
27008 /* SSE registers. */
27009 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27010 reg_alloc_order [pos++] = i;
27011 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27012 reg_alloc_order [pos++] = i;
27014 /* x87 registers. */
27015 if (TARGET_SSE_MATH)
27016 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27017 reg_alloc_order [pos++] = i;
27019 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27020 reg_alloc_order [pos++] = i;
27022 /* Initialize the rest of array as we do not allocate some registers
27024 while (pos < FIRST_PSEUDO_REGISTER)
27025 reg_alloc_order [pos++] = 0;
27028 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27029 struct attribute_spec.handler. */
27031 ix86_handle_abi_attribute (tree *node, tree name,
27032 tree args ATTRIBUTE_UNUSED,
27033 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27035 if (TREE_CODE (*node) != FUNCTION_TYPE
27036 && TREE_CODE (*node) != METHOD_TYPE
27037 && TREE_CODE (*node) != FIELD_DECL
27038 && TREE_CODE (*node) != TYPE_DECL)
27040 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27042 *no_add_attrs = true;
27047 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27049 *no_add_attrs = true;
27053 /* Can combine regparm with all attributes but fastcall. */
27054 if (is_attribute_p ("ms_abi", name))
27056 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27058 error ("ms_abi and sysv_abi attributes are not compatible");
27063 else if (is_attribute_p ("sysv_abi", name))
27065 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27067 error ("ms_abi and sysv_abi attributes are not compatible");
27076 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27077 struct attribute_spec.handler. */
27079 ix86_handle_struct_attribute (tree *node, tree name,
27080 tree args ATTRIBUTE_UNUSED,
27081 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27084 if (DECL_P (*node))
27086 if (TREE_CODE (*node) == TYPE_DECL)
27087 type = &TREE_TYPE (*node);
27092 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27093 || TREE_CODE (*type) == UNION_TYPE)))
27095 warning (OPT_Wattributes, "%qE attribute ignored",
27097 *no_add_attrs = true;
27100 else if ((is_attribute_p ("ms_struct", name)
27101 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27102 || ((is_attribute_p ("gcc_struct", name)
27103 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27105 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27107 *no_add_attrs = true;
27114 ix86_ms_bitfield_layout_p (const_tree record_type)
27116 return (TARGET_MS_BITFIELD_LAYOUT &&
27117 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27118 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27121 /* Returns an expression indicating where the this parameter is
27122 located on entry to the FUNCTION. */
27125 x86_this_parameter (tree function)
27127 tree type = TREE_TYPE (function);
27128 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27133 const int *parm_regs;
27135 if (ix86_function_type_abi (type) == MS_ABI)
27136 parm_regs = x86_64_ms_abi_int_parameter_registers;
27138 parm_regs = x86_64_int_parameter_registers;
27139 return gen_rtx_REG (DImode, parm_regs[aggr]);
27142 nregs = ix86_function_regparm (type, function);
27144 if (nregs > 0 && !stdarg_p (type))
27148 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27149 regno = aggr ? DX_REG : CX_REG;
27157 return gen_rtx_MEM (SImode,
27158 plus_constant (stack_pointer_rtx, 4));
27161 return gen_rtx_REG (SImode, regno);
27164 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27167 /* Determine whether x86_output_mi_thunk can succeed. */
27170 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27171 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27172 HOST_WIDE_INT vcall_offset, const_tree function)
27174 /* 64-bit can handle anything. */
27178 /* For 32-bit, everything's fine if we have one free register. */
27179 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27182 /* Need a free register for vcall_offset. */
27186 /* Need a free register for GOT references. */
27187 if (flag_pic && !(*targetm.binds_local_p) (function))
27190 /* Otherwise ok. */
27194 /* Output the assembler code for a thunk function. THUNK_DECL is the
27195 declaration for the thunk function itself, FUNCTION is the decl for
27196 the target function. DELTA is an immediate constant offset to be
27197 added to THIS. If VCALL_OFFSET is nonzero, the word at
27198 *(*this + vcall_offset) should be added to THIS. */
27201 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27202 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27203 HOST_WIDE_INT vcall_offset, tree function)
27206 rtx this_param = x86_this_parameter (function);
27209 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27210 pull it in now and let DELTA benefit. */
27211 if (REG_P (this_param))
27212 this_reg = this_param;
27213 else if (vcall_offset)
27215 /* Put the this parameter into %eax. */
27216 xops[0] = this_param;
27217 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27218 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27221 this_reg = NULL_RTX;
27223 /* Adjust the this parameter by a fixed constant. */
27226 xops[0] = GEN_INT (delta);
27227 xops[1] = this_reg ? this_reg : this_param;
27230 if (!x86_64_general_operand (xops[0], DImode))
27232 tmp = gen_rtx_REG (DImode, R10_REG);
27234 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27236 xops[1] = this_param;
27238 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27241 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27244 /* Adjust the this parameter by a value stored in the vtable. */
27248 tmp = gen_rtx_REG (DImode, R10_REG);
27251 int tmp_regno = CX_REG;
27252 if (lookup_attribute ("fastcall",
27253 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27254 tmp_regno = AX_REG;
27255 tmp = gen_rtx_REG (SImode, tmp_regno);
27258 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27260 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27262 /* Adjust the this parameter. */
27263 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27264 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27266 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27267 xops[0] = GEN_INT (vcall_offset);
27269 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27270 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27272 xops[1] = this_reg;
27273 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27276 /* If necessary, drop THIS back to its stack slot. */
27277 if (this_reg && this_reg != this_param)
27279 xops[0] = this_reg;
27280 xops[1] = this_param;
27281 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27284 xops[0] = XEXP (DECL_RTL (function), 0);
27287 if (!flag_pic || (*targetm.binds_local_p) (function))
27288 output_asm_insn ("jmp\t%P0", xops);
27289 /* All thunks should be in the same object as their target,
27290 and thus binds_local_p should be true. */
27291 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27292 gcc_unreachable ();
27295 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27296 tmp = gen_rtx_CONST (Pmode, tmp);
27297 tmp = gen_rtx_MEM (QImode, tmp);
27299 output_asm_insn ("jmp\t%A0", xops);
27304 if (!flag_pic || (*targetm.binds_local_p) (function))
27305 output_asm_insn ("jmp\t%P0", xops);
27310 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27311 tmp = (gen_rtx_SYMBOL_REF
27313 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27314 tmp = gen_rtx_MEM (QImode, tmp);
27316 output_asm_insn ("jmp\t%0", xops);
27319 #endif /* TARGET_MACHO */
27321 tmp = gen_rtx_REG (SImode, CX_REG);
27322 output_set_got (tmp, NULL_RTX);
27325 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27326 output_asm_insn ("jmp\t{*}%1", xops);
27332 x86_file_start (void)
27334 default_file_start ();
27336 darwin_file_start ();
27338 if (X86_FILE_START_VERSION_DIRECTIVE)
27339 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27340 if (X86_FILE_START_FLTUSED)
27341 fputs ("\t.global\t__fltused\n", asm_out_file);
27342 if (ix86_asm_dialect == ASM_INTEL)
27343 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27347 x86_field_alignment (tree field, int computed)
27349 enum machine_mode mode;
27350 tree type = TREE_TYPE (field);
27352 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27354 mode = TYPE_MODE (strip_array_types (type));
27355 if (mode == DFmode || mode == DCmode
27356 || GET_MODE_CLASS (mode) == MODE_INT
27357 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27358 return MIN (32, computed);
27362 /* Output assembler code to FILE to increment profiler label # LABELNO
27363 for profiling a function entry. */
27365 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27369 #ifndef NO_PROFILE_COUNTERS
27370 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27373 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27374 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27376 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27380 #ifndef NO_PROFILE_COUNTERS
27381 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27382 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27384 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27388 #ifndef NO_PROFILE_COUNTERS
27389 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27390 PROFILE_COUNT_REGISTER);
27392 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27396 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27397 /* We don't have exact information about the insn sizes, but we may assume
27398 quite safely that we are informed about all 1 byte insns and memory
27399 address sizes. This is enough to eliminate unnecessary padding in
27403 min_insn_size (rtx insn)
27407 if (!INSN_P (insn) || !active_insn_p (insn))
27410 /* Discard alignments we've emit and jump instructions. */
27411 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27412 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27414 if (JUMP_TABLE_DATA_P (insn))
27417 /* Important case - calls are always 5 bytes.
27418 It is common to have many calls in the row. */
27420 && symbolic_reference_mentioned_p (PATTERN (insn))
27421 && !SIBLING_CALL_P (insn))
27423 len = get_attr_length (insn);
27427 /* For normal instructions we rely on get_attr_length being exact,
27428 with a few exceptions. */
27429 if (!JUMP_P (insn))
27431 enum attr_type type = get_attr_type (insn);
27436 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27437 || asm_noperands (PATTERN (insn)) >= 0)
27444 /* Otherwise trust get_attr_length. */
27448 l = get_attr_length_address (insn);
27449 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27458 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27462 ix86_avoid_jump_mispredicts (void)
27464 rtx insn, start = get_insns ();
27465 int nbytes = 0, njumps = 0;
27468 /* Look for all minimal intervals of instructions containing 4 jumps.
27469 The intervals are bounded by START and INSN. NBYTES is the total
27470 size of instructions in the interval including INSN and not including
27471 START. When the NBYTES is smaller than 16 bytes, it is possible
27472 that the end of START and INSN ends up in the same 16byte page.
27474 The smallest offset in the page INSN can start is the case where START
27475 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27476 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27478 for (insn = start; insn; insn = NEXT_INSN (insn))
27482 if (LABEL_P (insn))
27484 int align = label_to_alignment (insn);
27485 int max_skip = label_to_max_skip (insn);
27489 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27490 already in the current 16 byte page, because otherwise
27491 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27492 bytes to reach 16 byte boundary. */
27494 || (align <= 3 && max_skip != (1 << align) - 1))
27497 fprintf (dump_file, "Label %i with max_skip %i\n",
27498 INSN_UID (insn), max_skip);
27501 while (nbytes + max_skip >= 16)
27503 start = NEXT_INSN (start);
27504 if ((JUMP_P (start)
27505 && GET_CODE (PATTERN (start)) != ADDR_VEC
27506 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27508 njumps--, isjump = 1;
27511 nbytes -= min_insn_size (start);
27517 min_size = min_insn_size (insn);
27518 nbytes += min_size;
27520 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27521 INSN_UID (insn), min_size);
27523 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27524 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27532 start = NEXT_INSN (start);
27533 if ((JUMP_P (start)
27534 && GET_CODE (PATTERN (start)) != ADDR_VEC
27535 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27537 njumps--, isjump = 1;
27540 nbytes -= min_insn_size (start);
27542 gcc_assert (njumps >= 0);
27544 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27545 INSN_UID (start), INSN_UID (insn), nbytes);
27547 if (njumps == 3 && isjump && nbytes < 16)
27549 int padsize = 15 - nbytes + min_insn_size (insn);
27552 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27553 INSN_UID (insn), padsize);
27554 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27560 /* AMD Athlon works faster
27561 when RET is not destination of conditional jump or directly preceded
27562 by other jump instruction. We avoid the penalty by inserting NOP just
27563 before the RET instructions in such cases. */
27565 ix86_pad_returns (void)
27570 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27572 basic_block bb = e->src;
27573 rtx ret = BB_END (bb);
27575 bool replace = false;
27577 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27578 || optimize_bb_for_size_p (bb))
27580 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27581 if (active_insn_p (prev) || LABEL_P (prev))
27583 if (prev && LABEL_P (prev))
27588 FOR_EACH_EDGE (e, ei, bb->preds)
27589 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27590 && !(e->flags & EDGE_FALLTHRU))
27595 prev = prev_active_insn (ret);
27597 && ((JUMP_P (prev) && any_condjump_p (prev))
27600 /* Empty functions get branch mispredict even when the jump destination
27601 is not visible to us. */
27602 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27607 emit_jump_insn_before (gen_return_internal_long (), ret);
27613 /* Implement machine specific optimizations. We implement padding of returns
27614 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27618 if (optimize && optimize_function_for_speed_p (cfun))
27620 if (TARGET_PAD_RETURNS)
27621 ix86_pad_returns ();
27622 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27623 if (TARGET_FOUR_JUMP_LIMIT)
27624 ix86_avoid_jump_mispredicts ();
27629 /* Return nonzero when QImode register that must be represented via REX prefix
27632 x86_extended_QIreg_mentioned_p (rtx insn)
27635 extract_insn_cached (insn);
27636 for (i = 0; i < recog_data.n_operands; i++)
27637 if (REG_P (recog_data.operand[i])
27638 && REGNO (recog_data.operand[i]) > BX_REG)
27643 /* Return nonzero when P points to register encoded via REX prefix.
27644 Called via for_each_rtx. */
27646 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27648 unsigned int regno;
27651 regno = REGNO (*p);
27652 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27655 /* Return true when INSN mentions register that must be encoded using REX
27658 x86_extended_reg_mentioned_p (rtx insn)
27660 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27661 extended_reg_mentioned_1, NULL);
27664 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27665 optabs would emit if we didn't have TFmode patterns. */
27668 x86_emit_floatuns (rtx operands[2])
27670 rtx neglab, donelab, i0, i1, f0, in, out;
27671 enum machine_mode mode, inmode;
27673 inmode = GET_MODE (operands[1]);
27674 gcc_assert (inmode == SImode || inmode == DImode);
27677 in = force_reg (inmode, operands[1]);
27678 mode = GET_MODE (out);
27679 neglab = gen_label_rtx ();
27680 donelab = gen_label_rtx ();
27681 f0 = gen_reg_rtx (mode);
27683 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27685 expand_float (out, in, 0);
27687 emit_jump_insn (gen_jump (donelab));
27690 emit_label (neglab);
27692 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27694 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27696 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27698 expand_float (f0, i0, 0);
27700 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27702 emit_label (donelab);
27705 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27706 with all elements equal to VAR. Return true if successful. */
27709 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27710 rtx target, rtx val)
27712 enum machine_mode hmode, smode, wsmode, wvmode;
27727 val = force_reg (GET_MODE_INNER (mode), val);
27728 x = gen_rtx_VEC_DUPLICATE (mode, val);
27729 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27735 if (TARGET_SSE || TARGET_3DNOW_A)
27737 val = gen_lowpart (SImode, val);
27738 x = gen_rtx_TRUNCATE (HImode, val);
27739 x = gen_rtx_VEC_DUPLICATE (mode, x);
27740 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27762 /* Extend HImode to SImode using a paradoxical SUBREG. */
27763 tmp1 = gen_reg_rtx (SImode);
27764 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27765 /* Insert the SImode value as low element of V4SImode vector. */
27766 tmp2 = gen_reg_rtx (V4SImode);
27767 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27768 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27769 CONST0_RTX (V4SImode),
27771 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27772 /* Cast the V4SImode vector back to a V8HImode vector. */
27773 tmp1 = gen_reg_rtx (V8HImode);
27774 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27775 /* Duplicate the low short through the whole low SImode word. */
27776 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27777 /* Cast the V8HImode vector back to a V4SImode vector. */
27778 tmp2 = gen_reg_rtx (V4SImode);
27779 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27780 /* Replicate the low element of the V4SImode vector. */
27781 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27782 /* Cast the V2SImode back to V8HImode, and store in target. */
27783 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27794 /* Extend QImode to SImode using a paradoxical SUBREG. */
27795 tmp1 = gen_reg_rtx (SImode);
27796 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27797 /* Insert the SImode value as low element of V4SImode vector. */
27798 tmp2 = gen_reg_rtx (V4SImode);
27799 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27800 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27801 CONST0_RTX (V4SImode),
27803 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27804 /* Cast the V4SImode vector back to a V16QImode vector. */
27805 tmp1 = gen_reg_rtx (V16QImode);
27806 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27807 /* Duplicate the low byte through the whole low SImode word. */
27808 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27809 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27810 /* Cast the V16QImode vector back to a V4SImode vector. */
27811 tmp2 = gen_reg_rtx (V4SImode);
27812 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27813 /* Replicate the low element of the V4SImode vector. */
27814 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27815 /* Cast the V2SImode back to V16QImode, and store in target. */
27816 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27824 /* Replicate the value once into the next wider mode and recurse. */
27825 val = convert_modes (wsmode, smode, val, true);
27826 x = expand_simple_binop (wsmode, ASHIFT, val,
27827 GEN_INT (GET_MODE_BITSIZE (smode)),
27828 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27829 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27831 x = gen_reg_rtx (wvmode);
27832 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27833 gcc_unreachable ();
27834 emit_move_insn (target, gen_lowpart (mode, x));
27857 rtx tmp = gen_reg_rtx (hmode);
27858 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27859 emit_insn (gen_rtx_SET (VOIDmode, target,
27860 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27869 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27870 whose ONE_VAR element is VAR, and other elements are zero. Return true
27874 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27875 rtx target, rtx var, int one_var)
27877 enum machine_mode vsimode;
27880 bool use_vector_set = false;
27885 /* For SSE4.1, we normally use vector set. But if the second
27886 element is zero and inter-unit moves are OK, we use movq
27888 use_vector_set = (TARGET_64BIT
27890 && !(TARGET_INTER_UNIT_MOVES
27896 use_vector_set = TARGET_SSE4_1;
27899 use_vector_set = TARGET_SSE2;
27902 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27909 use_vector_set = TARGET_AVX;
27912 /* Use ix86_expand_vector_set in 64bit mode only. */
27913 use_vector_set = TARGET_AVX && TARGET_64BIT;
27919 if (use_vector_set)
27921 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27922 var = force_reg (GET_MODE_INNER (mode), var);
27923 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27939 var = force_reg (GET_MODE_INNER (mode), var);
27940 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27941 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27946 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27947 new_target = gen_reg_rtx (mode);
27949 new_target = target;
27950 var = force_reg (GET_MODE_INNER (mode), var);
27951 x = gen_rtx_VEC_DUPLICATE (mode, var);
27952 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27953 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27956 /* We need to shuffle the value to the correct position, so
27957 create a new pseudo to store the intermediate result. */
27959 /* With SSE2, we can use the integer shuffle insns. */
27960 if (mode != V4SFmode && TARGET_SSE2)
27962 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27964 GEN_INT (one_var == 1 ? 0 : 1),
27965 GEN_INT (one_var == 2 ? 0 : 1),
27966 GEN_INT (one_var == 3 ? 0 : 1)));
27967 if (target != new_target)
27968 emit_move_insn (target, new_target);
27972 /* Otherwise convert the intermediate result to V4SFmode and
27973 use the SSE1 shuffle instructions. */
27974 if (mode != V4SFmode)
27976 tmp = gen_reg_rtx (V4SFmode);
27977 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27982 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27984 GEN_INT (one_var == 1 ? 0 : 1),
27985 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27986 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27988 if (mode != V4SFmode)
27989 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27990 else if (tmp != target)
27991 emit_move_insn (target, tmp);
27993 else if (target != new_target)
27994 emit_move_insn (target, new_target);
27999 vsimode = V4SImode;
28005 vsimode = V2SImode;
28011 /* Zero extend the variable element to SImode and recurse. */
28012 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28014 x = gen_reg_rtx (vsimode);
28015 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28017 gcc_unreachable ();
28019 emit_move_insn (target, gen_lowpart (mode, x));
28027 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28028 consisting of the values in VALS. It is known that all elements
28029 except ONE_VAR are constants. Return true if successful. */
28032 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28033 rtx target, rtx vals, int one_var)
28035 rtx var = XVECEXP (vals, 0, one_var);
28036 enum machine_mode wmode;
28039 const_vec = copy_rtx (vals);
28040 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28041 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28049 /* For the two element vectors, it's just as easy to use
28050 the general case. */
28054 /* Use ix86_expand_vector_set in 64bit mode only. */
28077 /* There's no way to set one QImode entry easily. Combine
28078 the variable value with its adjacent constant value, and
28079 promote to an HImode set. */
28080 x = XVECEXP (vals, 0, one_var ^ 1);
28083 var = convert_modes (HImode, QImode, var, true);
28084 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28085 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28086 x = GEN_INT (INTVAL (x) & 0xff);
28090 var = convert_modes (HImode, QImode, var, true);
28091 x = gen_int_mode (INTVAL (x) << 8, HImode);
28093 if (x != const0_rtx)
28094 var = expand_simple_binop (HImode, IOR, var, x, var,
28095 1, OPTAB_LIB_WIDEN);
28097 x = gen_reg_rtx (wmode);
28098 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28099 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28101 emit_move_insn (target, gen_lowpart (mode, x));
28108 emit_move_insn (target, const_vec);
28109 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28113 /* A subroutine of ix86_expand_vector_init_general. Use vector
28114 concatenate to handle the most general case: all values variable,
28115 and none identical. */
28118 ix86_expand_vector_init_concat (enum machine_mode mode,
28119 rtx target, rtx *ops, int n)
28121 enum machine_mode cmode, hmode = VOIDmode;
28122 rtx first[8], second[4];
28162 gcc_unreachable ();
28165 if (!register_operand (ops[1], cmode))
28166 ops[1] = force_reg (cmode, ops[1]);
28167 if (!register_operand (ops[0], cmode))
28168 ops[0] = force_reg (cmode, ops[0]);
28169 emit_insn (gen_rtx_SET (VOIDmode, target,
28170 gen_rtx_VEC_CONCAT (mode, ops[0],
28190 gcc_unreachable ();
28206 gcc_unreachable ();
28211 /* FIXME: We process inputs backward to help RA. PR 36222. */
28214 for (; i > 0; i -= 2, j--)
28216 first[j] = gen_reg_rtx (cmode);
28217 v = gen_rtvec (2, ops[i - 1], ops[i]);
28218 ix86_expand_vector_init (false, first[j],
28219 gen_rtx_PARALLEL (cmode, v));
28225 gcc_assert (hmode != VOIDmode);
28226 for (i = j = 0; i < n; i += 2, j++)
28228 second[j] = gen_reg_rtx (hmode);
28229 ix86_expand_vector_init_concat (hmode, second [j],
28233 ix86_expand_vector_init_concat (mode, target, second, n);
28236 ix86_expand_vector_init_concat (mode, target, first, n);
28240 gcc_unreachable ();
28244 /* A subroutine of ix86_expand_vector_init_general. Use vector
28245 interleave to handle the most general case: all values variable,
28246 and none identical. */
28249 ix86_expand_vector_init_interleave (enum machine_mode mode,
28250 rtx target, rtx *ops, int n)
28252 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28255 rtx (*gen_load_even) (rtx, rtx, rtx);
28256 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28257 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28262 gen_load_even = gen_vec_setv8hi;
28263 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28264 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28265 inner_mode = HImode;
28266 first_imode = V4SImode;
28267 second_imode = V2DImode;
28268 third_imode = VOIDmode;
28271 gen_load_even = gen_vec_setv16qi;
28272 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28273 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28274 inner_mode = QImode;
28275 first_imode = V8HImode;
28276 second_imode = V4SImode;
28277 third_imode = V2DImode;
28280 gcc_unreachable ();
28283 for (i = 0; i < n; i++)
28285 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28286 op0 = gen_reg_rtx (SImode);
28287 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28289 /* Insert the SImode value as low element of V4SImode vector. */
28290 op1 = gen_reg_rtx (V4SImode);
28291 op0 = gen_rtx_VEC_MERGE (V4SImode,
28292 gen_rtx_VEC_DUPLICATE (V4SImode,
28294 CONST0_RTX (V4SImode),
28296 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28298 /* Cast the V4SImode vector back to a vector in orignal mode. */
28299 op0 = gen_reg_rtx (mode);
28300 emit_move_insn (op0, gen_lowpart (mode, op1));
28302 /* Load even elements into the second positon. */
28303 emit_insn ((*gen_load_even) (op0,
28304 force_reg (inner_mode,
28308 /* Cast vector to FIRST_IMODE vector. */
28309 ops[i] = gen_reg_rtx (first_imode);
28310 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28313 /* Interleave low FIRST_IMODE vectors. */
28314 for (i = j = 0; i < n; i += 2, j++)
28316 op0 = gen_reg_rtx (first_imode);
28317 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28319 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28320 ops[j] = gen_reg_rtx (second_imode);
28321 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28324 /* Interleave low SECOND_IMODE vectors. */
28325 switch (second_imode)
28328 for (i = j = 0; i < n / 2; i += 2, j++)
28330 op0 = gen_reg_rtx (second_imode);
28331 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28334 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28336 ops[j] = gen_reg_rtx (third_imode);
28337 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28339 second_imode = V2DImode;
28340 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28344 op0 = gen_reg_rtx (second_imode);
28345 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28348 /* Cast the SECOND_IMODE vector back to a vector on original
28350 emit_insn (gen_rtx_SET (VOIDmode, target,
28351 gen_lowpart (mode, op0)));
28355 gcc_unreachable ();
28359 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28360 all values variable, and none identical. */
28363 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28364 rtx target, rtx vals)
28366 rtx ops[32], op0, op1;
28367 enum machine_mode half_mode = VOIDmode;
28374 if (!mmx_ok && !TARGET_SSE)
28386 n = GET_MODE_NUNITS (mode);
28387 for (i = 0; i < n; i++)
28388 ops[i] = XVECEXP (vals, 0, i);
28389 ix86_expand_vector_init_concat (mode, target, ops, n);
28393 half_mode = V16QImode;
28397 half_mode = V8HImode;
28401 n = GET_MODE_NUNITS (mode);
28402 for (i = 0; i < n; i++)
28403 ops[i] = XVECEXP (vals, 0, i);
28404 op0 = gen_reg_rtx (half_mode);
28405 op1 = gen_reg_rtx (half_mode);
28406 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28408 ix86_expand_vector_init_interleave (half_mode, op1,
28409 &ops [n >> 1], n >> 2);
28410 emit_insn (gen_rtx_SET (VOIDmode, target,
28411 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28415 if (!TARGET_SSE4_1)
28423 /* Don't use ix86_expand_vector_init_interleave if we can't
28424 move from GPR to SSE register directly. */
28425 if (!TARGET_INTER_UNIT_MOVES)
28428 n = GET_MODE_NUNITS (mode);
28429 for (i = 0; i < n; i++)
28430 ops[i] = XVECEXP (vals, 0, i);
28431 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28439 gcc_unreachable ();
28443 int i, j, n_elts, n_words, n_elt_per_word;
28444 enum machine_mode inner_mode;
28445 rtx words[4], shift;
28447 inner_mode = GET_MODE_INNER (mode);
28448 n_elts = GET_MODE_NUNITS (mode);
28449 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28450 n_elt_per_word = n_elts / n_words;
28451 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28453 for (i = 0; i < n_words; ++i)
28455 rtx word = NULL_RTX;
28457 for (j = 0; j < n_elt_per_word; ++j)
28459 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28460 elt = convert_modes (word_mode, inner_mode, elt, true);
28466 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28467 word, 1, OPTAB_LIB_WIDEN);
28468 word = expand_simple_binop (word_mode, IOR, word, elt,
28469 word, 1, OPTAB_LIB_WIDEN);
28477 emit_move_insn (target, gen_lowpart (mode, words[0]));
28478 else if (n_words == 2)
28480 rtx tmp = gen_reg_rtx (mode);
28481 emit_clobber (tmp);
28482 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28483 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28484 emit_move_insn (target, tmp);
28486 else if (n_words == 4)
28488 rtx tmp = gen_reg_rtx (V4SImode);
28489 gcc_assert (word_mode == SImode);
28490 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28491 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28492 emit_move_insn (target, gen_lowpart (mode, tmp));
28495 gcc_unreachable ();
28499 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28500 instructions unless MMX_OK is true. */
28503 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28505 enum machine_mode mode = GET_MODE (target);
28506 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28507 int n_elts = GET_MODE_NUNITS (mode);
28508 int n_var = 0, one_var = -1;
28509 bool all_same = true, all_const_zero = true;
28513 for (i = 0; i < n_elts; ++i)
28515 x = XVECEXP (vals, 0, i);
28516 if (!(CONST_INT_P (x)
28517 || GET_CODE (x) == CONST_DOUBLE
28518 || GET_CODE (x) == CONST_FIXED))
28519 n_var++, one_var = i;
28520 else if (x != CONST0_RTX (inner_mode))
28521 all_const_zero = false;
28522 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28526 /* Constants are best loaded from the constant pool. */
28529 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28533 /* If all values are identical, broadcast the value. */
28535 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28536 XVECEXP (vals, 0, 0)))
28539 /* Values where only one field is non-constant are best loaded from
28540 the pool and overwritten via move later. */
28544 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28545 XVECEXP (vals, 0, one_var),
28549 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28553 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28557 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28559 enum machine_mode mode = GET_MODE (target);
28560 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28561 enum machine_mode half_mode;
28562 bool use_vec_merge = false;
28564 static rtx (*gen_extract[6][2]) (rtx, rtx)
28566 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28567 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28568 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28569 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28570 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28571 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28573 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28575 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28576 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28577 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28578 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28579 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28580 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28590 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28591 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28593 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28595 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28596 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28602 use_vec_merge = TARGET_SSE4_1;
28610 /* For the two element vectors, we implement a VEC_CONCAT with
28611 the extraction of the other element. */
28613 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28614 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28617 op0 = val, op1 = tmp;
28619 op0 = tmp, op1 = val;
28621 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28622 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28627 use_vec_merge = TARGET_SSE4_1;
28634 use_vec_merge = true;
28638 /* tmp = target = A B C D */
28639 tmp = copy_to_reg (target);
28640 /* target = A A B B */
28641 emit_insn (gen_sse_unpcklps (target, target, target));
28642 /* target = X A B B */
28643 ix86_expand_vector_set (false, target, val, 0);
28644 /* target = A X C D */
28645 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28646 GEN_INT (1), GEN_INT (0),
28647 GEN_INT (2+4), GEN_INT (3+4)));
28651 /* tmp = target = A B C D */
28652 tmp = copy_to_reg (target);
28653 /* tmp = X B C D */
28654 ix86_expand_vector_set (false, tmp, val, 0);
28655 /* target = A B X D */
28656 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28657 GEN_INT (0), GEN_INT (1),
28658 GEN_INT (0+4), GEN_INT (3+4)));
28662 /* tmp = target = A B C D */
28663 tmp = copy_to_reg (target);
28664 /* tmp = X B C D */
28665 ix86_expand_vector_set (false, tmp, val, 0);
28666 /* target = A B X D */
28667 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28668 GEN_INT (0), GEN_INT (1),
28669 GEN_INT (2+4), GEN_INT (0+4)));
28673 gcc_unreachable ();
28678 use_vec_merge = TARGET_SSE4_1;
28682 /* Element 0 handled by vec_merge below. */
28685 use_vec_merge = true;
28691 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28692 store into element 0, then shuffle them back. */
28696 order[0] = GEN_INT (elt);
28697 order[1] = const1_rtx;
28698 order[2] = const2_rtx;
28699 order[3] = GEN_INT (3);
28700 order[elt] = const0_rtx;
28702 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28703 order[1], order[2], order[3]));
28705 ix86_expand_vector_set (false, target, val, 0);
28707 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28708 order[1], order[2], order[3]));
28712 /* For SSE1, we have to reuse the V4SF code. */
28713 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28714 gen_lowpart (SFmode, val), elt);
28719 use_vec_merge = TARGET_SSE2;
28722 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28726 use_vec_merge = TARGET_SSE4_1;
28733 half_mode = V16QImode;
28739 half_mode = V8HImode;
28745 half_mode = V4SImode;
28751 half_mode = V2DImode;
28757 half_mode = V4SFmode;
28763 half_mode = V2DFmode;
28769 /* Compute offset. */
28773 gcc_assert (i <= 1);
28775 /* Extract the half. */
28776 tmp = gen_reg_rtx (half_mode);
28777 emit_insn ((*gen_extract[j][i]) (tmp, target));
28779 /* Put val in tmp at elt. */
28780 ix86_expand_vector_set (false, tmp, val, elt);
28783 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28792 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28793 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28794 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28798 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28800 emit_move_insn (mem, target);
28802 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28803 emit_move_insn (tmp, val);
28805 emit_move_insn (target, mem);
28810 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28812 enum machine_mode mode = GET_MODE (vec);
28813 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28814 bool use_vec_extr = false;
28827 use_vec_extr = true;
28831 use_vec_extr = TARGET_SSE4_1;
28843 tmp = gen_reg_rtx (mode);
28844 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28845 GEN_INT (elt), GEN_INT (elt),
28846 GEN_INT (elt+4), GEN_INT (elt+4)));
28850 tmp = gen_reg_rtx (mode);
28851 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28855 gcc_unreachable ();
28858 use_vec_extr = true;
28863 use_vec_extr = TARGET_SSE4_1;
28877 tmp = gen_reg_rtx (mode);
28878 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28879 GEN_INT (elt), GEN_INT (elt),
28880 GEN_INT (elt), GEN_INT (elt)));
28884 tmp = gen_reg_rtx (mode);
28885 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28889 gcc_unreachable ();
28892 use_vec_extr = true;
28897 /* For SSE1, we have to reuse the V4SF code. */
28898 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28899 gen_lowpart (V4SFmode, vec), elt);
28905 use_vec_extr = TARGET_SSE2;
28908 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28912 use_vec_extr = TARGET_SSE4_1;
28916 /* ??? Could extract the appropriate HImode element and shift. */
28923 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28924 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28926 /* Let the rtl optimizers know about the zero extension performed. */
28927 if (inner_mode == QImode || inner_mode == HImode)
28929 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28930 target = gen_lowpart (SImode, target);
28933 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28937 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28939 emit_move_insn (mem, vec);
28941 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28942 emit_move_insn (target, tmp);
28946 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28947 pattern to reduce; DEST is the destination; IN is the input vector. */
28950 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28952 rtx tmp1, tmp2, tmp3;
28954 tmp1 = gen_reg_rtx (V4SFmode);
28955 tmp2 = gen_reg_rtx (V4SFmode);
28956 tmp3 = gen_reg_rtx (V4SFmode);
28958 emit_insn (gen_sse_movhlps (tmp1, in, in));
28959 emit_insn (fn (tmp2, tmp1, in));
28961 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28962 GEN_INT (1), GEN_INT (1),
28963 GEN_INT (1+4), GEN_INT (1+4)));
28964 emit_insn (fn (dest, tmp2, tmp3));
28967 /* Target hook for scalar_mode_supported_p. */
28969 ix86_scalar_mode_supported_p (enum machine_mode mode)
28971 if (DECIMAL_FLOAT_MODE_P (mode))
28973 else if (mode == TFmode)
28976 return default_scalar_mode_supported_p (mode);
28979 /* Implements target hook vector_mode_supported_p. */
28981 ix86_vector_mode_supported_p (enum machine_mode mode)
28983 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28985 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28987 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28989 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28991 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28996 /* Target hook for c_mode_for_suffix. */
28997 static enum machine_mode
28998 ix86_c_mode_for_suffix (char suffix)
29008 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29010 We do this in the new i386 backend to maintain source compatibility
29011 with the old cc0-based compiler. */
29014 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29015 tree inputs ATTRIBUTE_UNUSED,
29018 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29020 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29025 /* Implements target vector targetm.asm.encode_section_info. This
29026 is not used by netware. */
29028 static void ATTRIBUTE_UNUSED
29029 ix86_encode_section_info (tree decl, rtx rtl, int first)
29031 default_encode_section_info (decl, rtl, first);
29033 if (TREE_CODE (decl) == VAR_DECL
29034 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29035 && ix86_in_large_data_p (decl))
29036 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29039 /* Worker function for REVERSE_CONDITION. */
29042 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29044 return (mode != CCFPmode && mode != CCFPUmode
29045 ? reverse_condition (code)
29046 : reverse_condition_maybe_unordered (code));
29049 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29053 output_387_reg_move (rtx insn, rtx *operands)
29055 if (REG_P (operands[0]))
29057 if (REG_P (operands[1])
29058 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29060 if (REGNO (operands[0]) == FIRST_STACK_REG)
29061 return output_387_ffreep (operands, 0);
29062 return "fstp\t%y0";
29064 if (STACK_TOP_P (operands[0]))
29065 return "fld%Z1\t%y1";
29068 else if (MEM_P (operands[0]))
29070 gcc_assert (REG_P (operands[1]));
29071 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29072 return "fstp%Z0\t%y0";
29075 /* There is no non-popping store to memory for XFmode.
29076 So if we need one, follow the store with a load. */
29077 if (GET_MODE (operands[0]) == XFmode)
29078 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29080 return "fst%Z0\t%y0";
29087 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29088 FP status register is set. */
29091 ix86_emit_fp_unordered_jump (rtx label)
29093 rtx reg = gen_reg_rtx (HImode);
29096 emit_insn (gen_x86_fnstsw_1 (reg));
29098 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29100 emit_insn (gen_x86_sahf_1 (reg));
29102 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29103 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29107 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29109 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29110 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29113 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29114 gen_rtx_LABEL_REF (VOIDmode, label),
29116 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29118 emit_jump_insn (temp);
29119 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29122 /* Output code to perform a log1p XFmode calculation. */
29124 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29126 rtx label1 = gen_label_rtx ();
29127 rtx label2 = gen_label_rtx ();
29129 rtx tmp = gen_reg_rtx (XFmode);
29130 rtx tmp2 = gen_reg_rtx (XFmode);
29133 emit_insn (gen_absxf2 (tmp, op1));
29134 test = gen_rtx_GE (VOIDmode, tmp,
29135 CONST_DOUBLE_FROM_REAL_VALUE (
29136 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29138 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29140 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29141 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29142 emit_jump (label2);
29144 emit_label (label1);
29145 emit_move_insn (tmp, CONST1_RTX (XFmode));
29146 emit_insn (gen_addxf3 (tmp, op1, tmp));
29147 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29148 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29150 emit_label (label2);
29153 /* Output code to perform a Newton-Rhapson approximation of a single precision
29154 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29156 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29158 rtx x0, x1, e0, e1, two;
29160 x0 = gen_reg_rtx (mode);
29161 e0 = gen_reg_rtx (mode);
29162 e1 = gen_reg_rtx (mode);
29163 x1 = gen_reg_rtx (mode);
29165 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29167 if (VECTOR_MODE_P (mode))
29168 two = ix86_build_const_vector (SFmode, true, two);
29170 two = force_reg (mode, two);
29172 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29174 /* x0 = rcp(b) estimate */
29175 emit_insn (gen_rtx_SET (VOIDmode, x0,
29176 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29179 emit_insn (gen_rtx_SET (VOIDmode, e0,
29180 gen_rtx_MULT (mode, x0, b)));
29182 emit_insn (gen_rtx_SET (VOIDmode, e1,
29183 gen_rtx_MINUS (mode, two, e0)));
29185 emit_insn (gen_rtx_SET (VOIDmode, x1,
29186 gen_rtx_MULT (mode, x0, e1)));
29188 emit_insn (gen_rtx_SET (VOIDmode, res,
29189 gen_rtx_MULT (mode, a, x1)));
29192 /* Output code to perform a Newton-Rhapson approximation of a
29193 single precision floating point [reciprocal] square root. */
29195 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29198 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29201 x0 = gen_reg_rtx (mode);
29202 e0 = gen_reg_rtx (mode);
29203 e1 = gen_reg_rtx (mode);
29204 e2 = gen_reg_rtx (mode);
29205 e3 = gen_reg_rtx (mode);
29207 real_from_integer (&r, VOIDmode, -3, -1, 0);
29208 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29210 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29211 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29213 if (VECTOR_MODE_P (mode))
29215 mthree = ix86_build_const_vector (SFmode, true, mthree);
29216 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29219 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29220 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29222 /* x0 = rsqrt(a) estimate */
29223 emit_insn (gen_rtx_SET (VOIDmode, x0,
29224 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29227 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29232 zero = gen_reg_rtx (mode);
29233 mask = gen_reg_rtx (mode);
29235 zero = force_reg (mode, CONST0_RTX(mode));
29236 emit_insn (gen_rtx_SET (VOIDmode, mask,
29237 gen_rtx_NE (mode, zero, a)));
29239 emit_insn (gen_rtx_SET (VOIDmode, x0,
29240 gen_rtx_AND (mode, x0, mask)));
29244 emit_insn (gen_rtx_SET (VOIDmode, e0,
29245 gen_rtx_MULT (mode, x0, a)));
29247 emit_insn (gen_rtx_SET (VOIDmode, e1,
29248 gen_rtx_MULT (mode, e0, x0)));
29251 mthree = force_reg (mode, mthree);
29252 emit_insn (gen_rtx_SET (VOIDmode, e2,
29253 gen_rtx_PLUS (mode, e1, mthree)));
29255 mhalf = force_reg (mode, mhalf);
29257 /* e3 = -.5 * x0 */
29258 emit_insn (gen_rtx_SET (VOIDmode, e3,
29259 gen_rtx_MULT (mode, x0, mhalf)));
29261 /* e3 = -.5 * e0 */
29262 emit_insn (gen_rtx_SET (VOIDmode, e3,
29263 gen_rtx_MULT (mode, e0, mhalf)));
29264 /* ret = e2 * e3 */
29265 emit_insn (gen_rtx_SET (VOIDmode, res,
29266 gen_rtx_MULT (mode, e2, e3)));
29269 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29271 static void ATTRIBUTE_UNUSED
29272 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29275 /* With Binutils 2.15, the "@unwind" marker must be specified on
29276 every occurrence of the ".eh_frame" section, not just the first
29279 && strcmp (name, ".eh_frame") == 0)
29281 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29282 flags & SECTION_WRITE ? "aw" : "a");
29285 default_elf_asm_named_section (name, flags, decl);
29288 /* Return the mangling of TYPE if it is an extended fundamental type. */
29290 static const char *
29291 ix86_mangle_type (const_tree type)
29293 type = TYPE_MAIN_VARIANT (type);
29295 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29296 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29299 switch (TYPE_MODE (type))
29302 /* __float128 is "g". */
29305 /* "long double" or __float80 is "e". */
29312 /* For 32-bit code we can save PIC register setup by using
29313 __stack_chk_fail_local hidden function instead of calling
29314 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29315 register, so it is better to call __stack_chk_fail directly. */
29318 ix86_stack_protect_fail (void)
29320 return TARGET_64BIT
29321 ? default_external_stack_protect_fail ()
29322 : default_hidden_stack_protect_fail ();
29325 /* Select a format to encode pointers in exception handling data. CODE
29326 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29327 true if the symbol may be affected by dynamic relocations.
29329 ??? All x86 object file formats are capable of representing this.
29330 After all, the relocation needed is the same as for the call insn.
29331 Whether or not a particular assembler allows us to enter such, I
29332 guess we'll have to see. */
29334 asm_preferred_eh_data_format (int code, int global)
29338 int type = DW_EH_PE_sdata8;
29340 || ix86_cmodel == CM_SMALL_PIC
29341 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29342 type = DW_EH_PE_sdata4;
29343 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29345 if (ix86_cmodel == CM_SMALL
29346 || (ix86_cmodel == CM_MEDIUM && code))
29347 return DW_EH_PE_udata4;
29348 return DW_EH_PE_absptr;
29351 /* Expand copysign from SIGN to the positive value ABS_VALUE
29352 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29355 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29357 enum machine_mode mode = GET_MODE (sign);
29358 rtx sgn = gen_reg_rtx (mode);
29359 if (mask == NULL_RTX)
29361 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29362 if (!VECTOR_MODE_P (mode))
29364 /* We need to generate a scalar mode mask in this case. */
29365 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29366 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29367 mask = gen_reg_rtx (mode);
29368 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29372 mask = gen_rtx_NOT (mode, mask);
29373 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29374 gen_rtx_AND (mode, mask, sign)));
29375 emit_insn (gen_rtx_SET (VOIDmode, result,
29376 gen_rtx_IOR (mode, abs_value, sgn)));
29379 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29380 mask for masking out the sign-bit is stored in *SMASK, if that is
29383 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29385 enum machine_mode mode = GET_MODE (op0);
29388 xa = gen_reg_rtx (mode);
29389 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29390 if (!VECTOR_MODE_P (mode))
29392 /* We need to generate a scalar mode mask in this case. */
29393 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29394 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29395 mask = gen_reg_rtx (mode);
29396 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29398 emit_insn (gen_rtx_SET (VOIDmode, xa,
29399 gen_rtx_AND (mode, op0, mask)));
29407 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29408 swapping the operands if SWAP_OPERANDS is true. The expanded
29409 code is a forward jump to a newly created label in case the
29410 comparison is true. The generated label rtx is returned. */
29412 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29413 bool swap_operands)
29424 label = gen_label_rtx ();
29425 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29426 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29427 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29428 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29429 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29430 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29431 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29432 JUMP_LABEL (tmp) = label;
29437 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29438 using comparison code CODE. Operands are swapped for the comparison if
29439 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29441 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29442 bool swap_operands)
29444 enum machine_mode mode = GET_MODE (op0);
29445 rtx mask = gen_reg_rtx (mode);
29454 if (mode == DFmode)
29455 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29456 gen_rtx_fmt_ee (code, mode, op0, op1)));
29458 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29459 gen_rtx_fmt_ee (code, mode, op0, op1)));
29464 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29465 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29467 ix86_gen_TWO52 (enum machine_mode mode)
29469 REAL_VALUE_TYPE TWO52r;
29472 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29473 TWO52 = const_double_from_real_value (TWO52r, mode);
29474 TWO52 = force_reg (mode, TWO52);
29479 /* Expand SSE sequence for computing lround from OP1 storing
29482 ix86_expand_lround (rtx op0, rtx op1)
29484 /* C code for the stuff we're doing below:
29485 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29488 enum machine_mode mode = GET_MODE (op1);
29489 const struct real_format *fmt;
29490 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29493 /* load nextafter (0.5, 0.0) */
29494 fmt = REAL_MODE_FORMAT (mode);
29495 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29496 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29498 /* adj = copysign (0.5, op1) */
29499 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29500 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29502 /* adj = op1 + adj */
29503 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29505 /* op0 = (imode)adj */
29506 expand_fix (op0, adj, 0);
29509 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29512 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29514 /* C code for the stuff we're doing below (for do_floor):
29516 xi -= (double)xi > op1 ? 1 : 0;
29519 enum machine_mode fmode = GET_MODE (op1);
29520 enum machine_mode imode = GET_MODE (op0);
29521 rtx ireg, freg, label, tmp;
29523 /* reg = (long)op1 */
29524 ireg = gen_reg_rtx (imode);
29525 expand_fix (ireg, op1, 0);
29527 /* freg = (double)reg */
29528 freg = gen_reg_rtx (fmode);
29529 expand_float (freg, ireg, 0);
29531 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29532 label = ix86_expand_sse_compare_and_jump (UNLE,
29533 freg, op1, !do_floor);
29534 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29535 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29536 emit_move_insn (ireg, tmp);
29538 emit_label (label);
29539 LABEL_NUSES (label) = 1;
29541 emit_move_insn (op0, ireg);
29544 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29545 result in OPERAND0. */
29547 ix86_expand_rint (rtx operand0, rtx operand1)
29549 /* C code for the stuff we're doing below:
29550 xa = fabs (operand1);
29551 if (!isless (xa, 2**52))
29553 xa = xa + 2**52 - 2**52;
29554 return copysign (xa, operand1);
29556 enum machine_mode mode = GET_MODE (operand0);
29557 rtx res, xa, label, TWO52, mask;
29559 res = gen_reg_rtx (mode);
29560 emit_move_insn (res, operand1);
29562 /* xa = abs (operand1) */
29563 xa = ix86_expand_sse_fabs (res, &mask);
29565 /* if (!isless (xa, TWO52)) goto label; */
29566 TWO52 = ix86_gen_TWO52 (mode);
29567 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29569 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29570 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29572 ix86_sse_copysign_to_positive (res, xa, res, mask);
29574 emit_label (label);
29575 LABEL_NUSES (label) = 1;
29577 emit_move_insn (operand0, res);
29580 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29583 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29585 /* C code for the stuff we expand below.
29586 double xa = fabs (x), x2;
29587 if (!isless (xa, TWO52))
29589 xa = xa + TWO52 - TWO52;
29590 x2 = copysign (xa, x);
29599 enum machine_mode mode = GET_MODE (operand0);
29600 rtx xa, TWO52, tmp, label, one, res, mask;
29602 TWO52 = ix86_gen_TWO52 (mode);
29604 /* Temporary for holding the result, initialized to the input
29605 operand to ease control flow. */
29606 res = gen_reg_rtx (mode);
29607 emit_move_insn (res, operand1);
29609 /* xa = abs (operand1) */
29610 xa = ix86_expand_sse_fabs (res, &mask);
29612 /* if (!isless (xa, TWO52)) goto label; */
29613 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29615 /* xa = xa + TWO52 - TWO52; */
29616 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29617 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29619 /* xa = copysign (xa, operand1) */
29620 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29622 /* generate 1.0 or -1.0 */
29623 one = force_reg (mode,
29624 const_double_from_real_value (do_floor
29625 ? dconst1 : dconstm1, mode));
29627 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29628 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29629 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29630 gen_rtx_AND (mode, one, tmp)));
29631 /* We always need to subtract here to preserve signed zero. */
29632 tmp = expand_simple_binop (mode, MINUS,
29633 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29634 emit_move_insn (res, tmp);
29636 emit_label (label);
29637 LABEL_NUSES (label) = 1;
29639 emit_move_insn (operand0, res);
29642 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29645 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29647 /* C code for the stuff we expand below.
29648 double xa = fabs (x), x2;
29649 if (!isless (xa, TWO52))
29651 x2 = (double)(long)x;
29658 if (HONOR_SIGNED_ZEROS (mode))
29659 return copysign (x2, x);
29662 enum machine_mode mode = GET_MODE (operand0);
29663 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29665 TWO52 = ix86_gen_TWO52 (mode);
29667 /* Temporary for holding the result, initialized to the input
29668 operand to ease control flow. */
29669 res = gen_reg_rtx (mode);
29670 emit_move_insn (res, operand1);
29672 /* xa = abs (operand1) */
29673 xa = ix86_expand_sse_fabs (res, &mask);
29675 /* if (!isless (xa, TWO52)) goto label; */
29676 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29678 /* xa = (double)(long)x */
29679 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29680 expand_fix (xi, res, 0);
29681 expand_float (xa, xi, 0);
29684 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29686 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29687 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29688 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29689 gen_rtx_AND (mode, one, tmp)));
29690 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29691 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29692 emit_move_insn (res, tmp);
29694 if (HONOR_SIGNED_ZEROS (mode))
29695 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29697 emit_label (label);
29698 LABEL_NUSES (label) = 1;
29700 emit_move_insn (operand0, res);
29703 /* Expand SSE sequence for computing round from OPERAND1 storing
29704 into OPERAND0. Sequence that works without relying on DImode truncation
29705 via cvttsd2siq that is only available on 64bit targets. */
29707 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29709 /* C code for the stuff we expand below.
29710 double xa = fabs (x), xa2, x2;
29711 if (!isless (xa, TWO52))
29713 Using the absolute value and copying back sign makes
29714 -0.0 -> -0.0 correct.
29715 xa2 = xa + TWO52 - TWO52;
29720 else if (dxa > 0.5)
29722 x2 = copysign (xa2, x);
29725 enum machine_mode mode = GET_MODE (operand0);
29726 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29728 TWO52 = ix86_gen_TWO52 (mode);
29730 /* Temporary for holding the result, initialized to the input
29731 operand to ease control flow. */
29732 res = gen_reg_rtx (mode);
29733 emit_move_insn (res, operand1);
29735 /* xa = abs (operand1) */
29736 xa = ix86_expand_sse_fabs (res, &mask);
29738 /* if (!isless (xa, TWO52)) goto label; */
29739 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29741 /* xa2 = xa + TWO52 - TWO52; */
29742 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29743 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29745 /* dxa = xa2 - xa; */
29746 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29748 /* generate 0.5, 1.0 and -0.5 */
29749 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29750 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29751 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29755 tmp = gen_reg_rtx (mode);
29756 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29757 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29758 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29759 gen_rtx_AND (mode, one, tmp)));
29760 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29761 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29762 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29763 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29764 gen_rtx_AND (mode, one, tmp)));
29765 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29767 /* res = copysign (xa2, operand1) */
29768 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29770 emit_label (label);
29771 LABEL_NUSES (label) = 1;
29773 emit_move_insn (operand0, res);
29776 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29779 ix86_expand_trunc (rtx operand0, rtx operand1)
29781 /* C code for SSE variant we expand below.
29782 double xa = fabs (x), x2;
29783 if (!isless (xa, TWO52))
29785 x2 = (double)(long)x;
29786 if (HONOR_SIGNED_ZEROS (mode))
29787 return copysign (x2, x);
29790 enum machine_mode mode = GET_MODE (operand0);
29791 rtx xa, xi, TWO52, label, res, mask;
29793 TWO52 = ix86_gen_TWO52 (mode);
29795 /* Temporary for holding the result, initialized to the input
29796 operand to ease control flow. */
29797 res = gen_reg_rtx (mode);
29798 emit_move_insn (res, operand1);
29800 /* xa = abs (operand1) */
29801 xa = ix86_expand_sse_fabs (res, &mask);
29803 /* if (!isless (xa, TWO52)) goto label; */
29804 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29806 /* x = (double)(long)x */
29807 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29808 expand_fix (xi, res, 0);
29809 expand_float (res, xi, 0);
29811 if (HONOR_SIGNED_ZEROS (mode))
29812 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29814 emit_label (label);
29815 LABEL_NUSES (label) = 1;
29817 emit_move_insn (operand0, res);
29820 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29823 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29825 enum machine_mode mode = GET_MODE (operand0);
29826 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29828 /* C code for SSE variant we expand below.
29829 double xa = fabs (x), x2;
29830 if (!isless (xa, TWO52))
29832 xa2 = xa + TWO52 - TWO52;
29836 x2 = copysign (xa2, x);
29840 TWO52 = ix86_gen_TWO52 (mode);
29842 /* Temporary for holding the result, initialized to the input
29843 operand to ease control flow. */
29844 res = gen_reg_rtx (mode);
29845 emit_move_insn (res, operand1);
29847 /* xa = abs (operand1) */
29848 xa = ix86_expand_sse_fabs (res, &smask);
29850 /* if (!isless (xa, TWO52)) goto label; */
29851 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29853 /* res = xa + TWO52 - TWO52; */
29854 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29855 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29856 emit_move_insn (res, tmp);
29859 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29861 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29862 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29863 emit_insn (gen_rtx_SET (VOIDmode, mask,
29864 gen_rtx_AND (mode, mask, one)));
29865 tmp = expand_simple_binop (mode, MINUS,
29866 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29867 emit_move_insn (res, tmp);
29869 /* res = copysign (res, operand1) */
29870 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29872 emit_label (label);
29873 LABEL_NUSES (label) = 1;
29875 emit_move_insn (operand0, res);
29878 /* Expand SSE sequence for computing round from OPERAND1 storing
29881 ix86_expand_round (rtx operand0, rtx operand1)
29883 /* C code for the stuff we're doing below:
29884 double xa = fabs (x);
29885 if (!isless (xa, TWO52))
29887 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29888 return copysign (xa, x);
29890 enum machine_mode mode = GET_MODE (operand0);
29891 rtx res, TWO52, xa, label, xi, half, mask;
29892 const struct real_format *fmt;
29893 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29895 /* Temporary for holding the result, initialized to the input
29896 operand to ease control flow. */
29897 res = gen_reg_rtx (mode);
29898 emit_move_insn (res, operand1);
29900 TWO52 = ix86_gen_TWO52 (mode);
29901 xa = ix86_expand_sse_fabs (res, &mask);
29902 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29904 /* load nextafter (0.5, 0.0) */
29905 fmt = REAL_MODE_FORMAT (mode);
29906 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29907 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29909 /* xa = xa + 0.5 */
29910 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29911 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29913 /* xa = (double)(int64_t)xa */
29914 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29915 expand_fix (xi, xa, 0);
29916 expand_float (xa, xi, 0);
29918 /* res = copysign (xa, operand1) */
29919 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29921 emit_label (label);
29922 LABEL_NUSES (label) = 1;
29924 emit_move_insn (operand0, res);
29928 /* Validate whether a SSE5 instruction is valid or not.
29929 OPERANDS is the array of operands.
29930 NUM is the number of operands.
29931 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29932 NUM_MEMORY is the maximum number of memory operands to accept.
29933 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29936 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29937 bool uses_oc0, int num_memory, bool commutative)
29943 /* Count the number of memory arguments */
29946 for (i = 0; i < num; i++)
29948 enum machine_mode mode = GET_MODE (operands[i]);
29949 if (register_operand (operands[i], mode))
29952 else if (memory_operand (operands[i], mode))
29954 mem_mask |= (1 << i);
29960 rtx pattern = PATTERN (insn);
29962 /* allow 0 for pcmov */
29963 if (GET_CODE (pattern) != SET
29964 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29966 || operands[i] != CONST0_RTX (mode))
29971 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29972 a memory operation. */
29973 if (num_memory < 0)
29975 num_memory = -num_memory;
29976 if ((mem_mask & (1 << (num-1))) != 0)
29978 mem_mask &= ~(1 << (num-1));
29983 /* If there were no memory operations, allow the insn */
29987 /* Do not allow the destination register to be a memory operand. */
29988 else if (mem_mask & (1 << 0))
29991 /* If there are too many memory operations, disallow the instruction. While
29992 the hardware only allows 1 memory reference, before register allocation
29993 for some insns, we allow two memory operations sometimes in order to allow
29994 code like the following to be optimized:
29996 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29998 or similar cases that are vectorized into using the fmaddss
30000 else if (mem_count > num_memory)
30003 /* Don't allow more than one memory operation if not optimizing. */
30004 else if (mem_count > 1 && !optimize)
30007 else if (num == 4 && mem_count == 1)
30009 /* formats (destination is the first argument), example fmaddss:
30010 xmm1, xmm1, xmm2, xmm3/mem
30011 xmm1, xmm1, xmm2/mem, xmm3
30012 xmm1, xmm2, xmm3/mem, xmm1
30013 xmm1, xmm2/mem, xmm3, xmm1 */
30015 return ((mem_mask == (1 << 1))
30016 || (mem_mask == (1 << 2))
30017 || (mem_mask == (1 << 3)));
30019 /* format, example pmacsdd:
30020 xmm1, xmm2, xmm3/mem, xmm1 */
30022 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30024 return (mem_mask == (1 << 2));
30027 else if (num == 4 && num_memory == 2)
30029 /* If there are two memory operations, we can load one of the memory ops
30030 into the destination register. This is for optimizing the
30031 multiply/add ops, which the combiner has optimized both the multiply
30032 and the add insns to have a memory operation. We have to be careful
30033 that the destination doesn't overlap with the inputs. */
30034 rtx op0 = operands[0];
30036 if (reg_mentioned_p (op0, operands[1])
30037 || reg_mentioned_p (op0, operands[2])
30038 || reg_mentioned_p (op0, operands[3]))
30041 /* formats (destination is the first argument), example fmaddss:
30042 xmm1, xmm1, xmm2, xmm3/mem
30043 xmm1, xmm1, xmm2/mem, xmm3
30044 xmm1, xmm2, xmm3/mem, xmm1
30045 xmm1, xmm2/mem, xmm3, xmm1
30047 For the oc0 case, we will load either operands[1] or operands[3] into
30048 operands[0], so any combination of 2 memory operands is ok. */
30052 /* format, example pmacsdd:
30053 xmm1, xmm2, xmm3/mem, xmm1
30055 For the integer multiply/add instructions be more restrictive and
30056 require operands[2] and operands[3] to be the memory operands. */
30058 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30060 return (mem_mask == ((1 << 2) | (1 << 3)));
30063 else if (num == 3 && num_memory == 1)
30065 /* formats, example protb:
30066 xmm1, xmm2, xmm3/mem
30067 xmm1, xmm2/mem, xmm3 */
30069 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30071 /* format, example comeq:
30072 xmm1, xmm2, xmm3/mem */
30074 return (mem_mask == (1 << 2));
30078 gcc_unreachable ();
30084 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30085 hardware will allow by using the destination register to load one of the
30086 memory operations. Presently this is used by the multiply/add routines to
30087 allow 2 memory references. */
30090 ix86_expand_sse5_multiple_memory (rtx operands[],
30092 enum machine_mode mode)
30094 rtx op0 = operands[0];
30096 || memory_operand (op0, mode)
30097 || reg_mentioned_p (op0, operands[1])
30098 || reg_mentioned_p (op0, operands[2])
30099 || reg_mentioned_p (op0, operands[3]))
30100 gcc_unreachable ();
30102 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30103 the destination register. */
30104 if (memory_operand (operands[1], mode))
30106 emit_move_insn (op0, operands[1]);
30109 else if (memory_operand (operands[3], mode))
30111 emit_move_insn (op0, operands[3]);
30115 gcc_unreachable ();
30121 /* Table of valid machine attributes. */
30122 static const struct attribute_spec ix86_attribute_table[] =
30124 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30125 /* Stdcall attribute says callee is responsible for popping arguments
30126 if they are not variable. */
30127 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30128 /* Fastcall attribute says callee is responsible for popping arguments
30129 if they are not variable. */
30130 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30131 /* Cdecl attribute says the callee is a normal C declaration */
30132 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30133 /* Regparm attribute specifies how many integer arguments are to be
30134 passed in registers. */
30135 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30136 /* Sseregparm attribute says we are using x86_64 calling conventions
30137 for FP arguments. */
30138 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30139 /* force_align_arg_pointer says this function realigns the stack at entry. */
30140 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30141 false, true, true, ix86_handle_cconv_attribute },
30142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30143 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30144 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30145 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30147 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30148 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30149 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30150 SUBTARGET_ATTRIBUTE_TABLE,
30152 /* ms_abi and sysv_abi calling convention function attributes. */
30153 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30154 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30156 { NULL, 0, 0, false, false, false, NULL }
30159 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30161 x86_builtin_vectorization_cost (bool runtime_test)
30163 /* If the branch of the runtime test is taken - i.e. - the vectorized
30164 version is skipped - this incurs a misprediction cost (because the
30165 vectorized version is expected to be the fall-through). So we subtract
30166 the latency of a mispredicted branch from the costs that are incured
30167 when the vectorized version is executed.
30169 TODO: The values in individual target tables have to be tuned or new
30170 fields may be needed. For eg. on K8, the default branch path is the
30171 not-taken path. If the taken path is predicted correctly, the minimum
30172 penalty of going down the taken-path is 1 cycle. If the taken-path is
30173 not predicted correctly, then the minimum penalty is 10 cycles. */
30177 return (-(ix86_cost->cond_taken_branch_cost));
30183 /* This function returns the calling abi specific va_list type node.
30184 It returns the FNDECL specific va_list type. */
30187 ix86_fn_abi_va_list (tree fndecl)
30190 return va_list_type_node;
30191 gcc_assert (fndecl != NULL_TREE);
30193 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30194 return ms_va_list_type_node;
30196 return sysv_va_list_type_node;
30199 /* Returns the canonical va_list type specified by TYPE. If there
30200 is no valid TYPE provided, it return NULL_TREE. */
30203 ix86_canonical_va_list_type (tree type)
30207 /* Resolve references and pointers to va_list type. */
30208 if (INDIRECT_REF_P (type))
30209 type = TREE_TYPE (type);
30210 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30211 type = TREE_TYPE (type);
30215 wtype = va_list_type_node;
30216 gcc_assert (wtype != NULL_TREE);
30218 if (TREE_CODE (wtype) == ARRAY_TYPE)
30220 /* If va_list is an array type, the argument may have decayed
30221 to a pointer type, e.g. by being passed to another function.
30222 In that case, unwrap both types so that we can compare the
30223 underlying records. */
30224 if (TREE_CODE (htype) == ARRAY_TYPE
30225 || POINTER_TYPE_P (htype))
30227 wtype = TREE_TYPE (wtype);
30228 htype = TREE_TYPE (htype);
30231 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30232 return va_list_type_node;
30233 wtype = sysv_va_list_type_node;
30234 gcc_assert (wtype != NULL_TREE);
30236 if (TREE_CODE (wtype) == ARRAY_TYPE)
30238 /* If va_list is an array type, the argument may have decayed
30239 to a pointer type, e.g. by being passed to another function.
30240 In that case, unwrap both types so that we can compare the
30241 underlying records. */
30242 if (TREE_CODE (htype) == ARRAY_TYPE
30243 || POINTER_TYPE_P (htype))
30245 wtype = TREE_TYPE (wtype);
30246 htype = TREE_TYPE (htype);
30249 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30250 return sysv_va_list_type_node;
30251 wtype = ms_va_list_type_node;
30252 gcc_assert (wtype != NULL_TREE);
30254 if (TREE_CODE (wtype) == ARRAY_TYPE)
30256 /* If va_list is an array type, the argument may have decayed
30257 to a pointer type, e.g. by being passed to another function.
30258 In that case, unwrap both types so that we can compare the
30259 underlying records. */
30260 if (TREE_CODE (htype) == ARRAY_TYPE
30261 || POINTER_TYPE_P (htype))
30263 wtype = TREE_TYPE (wtype);
30264 htype = TREE_TYPE (htype);
30267 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30268 return ms_va_list_type_node;
30271 return std_canonical_va_list_type (type);
30274 /* Iterate through the target-specific builtin types for va_list.
30275 IDX denotes the iterator, *PTREE is set to the result type of
30276 the va_list builtin, and *PNAME to its internal type.
30277 Returns zero if there is no element for this index, otherwise
30278 IDX should be increased upon the next call.
30279 Note, do not iterate a base builtin's name like __builtin_va_list.
30280 Used from c_common_nodes_and_builtins. */
30283 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30289 *ptree = ms_va_list_type_node;
30290 *pname = "__builtin_ms_va_list";
30293 *ptree = sysv_va_list_type_node;
30294 *pname = "__builtin_sysv_va_list";
30302 /* Initialize the GCC target structure. */
30303 #undef TARGET_RETURN_IN_MEMORY
30304 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30306 #undef TARGET_LEGITIMIZE_ADDRESS
30307 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30309 #undef TARGET_ATTRIBUTE_TABLE
30310 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30311 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30312 # undef TARGET_MERGE_DECL_ATTRIBUTES
30313 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30316 #undef TARGET_COMP_TYPE_ATTRIBUTES
30317 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30319 #undef TARGET_INIT_BUILTINS
30320 #define TARGET_INIT_BUILTINS ix86_init_builtins
30321 #undef TARGET_EXPAND_BUILTIN
30322 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30324 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30325 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30326 ix86_builtin_vectorized_function
30328 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30329 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30331 #undef TARGET_BUILTIN_RECIPROCAL
30332 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30334 #undef TARGET_ASM_FUNCTION_EPILOGUE
30335 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30337 #undef TARGET_ENCODE_SECTION_INFO
30338 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30339 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30341 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30344 #undef TARGET_ASM_OPEN_PAREN
30345 #define TARGET_ASM_OPEN_PAREN ""
30346 #undef TARGET_ASM_CLOSE_PAREN
30347 #define TARGET_ASM_CLOSE_PAREN ""
30349 #undef TARGET_ASM_ALIGNED_HI_OP
30350 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30351 #undef TARGET_ASM_ALIGNED_SI_OP
30352 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30354 #undef TARGET_ASM_ALIGNED_DI_OP
30355 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30358 #undef TARGET_ASM_UNALIGNED_HI_OP
30359 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30360 #undef TARGET_ASM_UNALIGNED_SI_OP
30361 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30362 #undef TARGET_ASM_UNALIGNED_DI_OP
30363 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30365 #undef TARGET_SCHED_ADJUST_COST
30366 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30367 #undef TARGET_SCHED_ISSUE_RATE
30368 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30369 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30370 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30371 ia32_multipass_dfa_lookahead
30373 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30374 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30377 #undef TARGET_HAVE_TLS
30378 #define TARGET_HAVE_TLS true
30380 #undef TARGET_CANNOT_FORCE_CONST_MEM
30381 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30382 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30383 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30385 #undef TARGET_DELEGITIMIZE_ADDRESS
30386 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30388 #undef TARGET_MS_BITFIELD_LAYOUT_P
30389 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30392 #undef TARGET_BINDS_LOCAL_P
30393 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30395 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30396 #undef TARGET_BINDS_LOCAL_P
30397 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30400 #undef TARGET_ASM_OUTPUT_MI_THUNK
30401 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30405 #undef TARGET_ASM_FILE_START
30406 #define TARGET_ASM_FILE_START x86_file_start
30408 #undef TARGET_DEFAULT_TARGET_FLAGS
30409 #define TARGET_DEFAULT_TARGET_FLAGS \
30411 | TARGET_SUBTARGET_DEFAULT \
30412 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30414 #undef TARGET_HANDLE_OPTION
30415 #define TARGET_HANDLE_OPTION ix86_handle_option
30417 #undef TARGET_RTX_COSTS
30418 #define TARGET_RTX_COSTS ix86_rtx_costs
30419 #undef TARGET_ADDRESS_COST
30420 #define TARGET_ADDRESS_COST ix86_address_cost
30422 #undef TARGET_FIXED_CONDITION_CODE_REGS
30423 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30424 #undef TARGET_CC_MODES_COMPATIBLE
30425 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30427 #undef TARGET_MACHINE_DEPENDENT_REORG
30428 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30430 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30431 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30433 #undef TARGET_BUILD_BUILTIN_VA_LIST
30434 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30436 #undef TARGET_FN_ABI_VA_LIST
30437 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30439 #undef TARGET_CANONICAL_VA_LIST_TYPE
30440 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30442 #undef TARGET_EXPAND_BUILTIN_VA_START
30443 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30445 #undef TARGET_MD_ASM_CLOBBERS
30446 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30448 #undef TARGET_PROMOTE_PROTOTYPES
30449 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30450 #undef TARGET_STRUCT_VALUE_RTX
30451 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30452 #undef TARGET_SETUP_INCOMING_VARARGS
30453 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30454 #undef TARGET_MUST_PASS_IN_STACK
30455 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30456 #undef TARGET_PASS_BY_REFERENCE
30457 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30458 #undef TARGET_INTERNAL_ARG_POINTER
30459 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30460 #undef TARGET_UPDATE_STACK_BOUNDARY
30461 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30462 #undef TARGET_GET_DRAP_RTX
30463 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30464 #undef TARGET_STRICT_ARGUMENT_NAMING
30465 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30467 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30468 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30470 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30471 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30473 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30474 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30476 #undef TARGET_C_MODE_FOR_SUFFIX
30477 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30480 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30481 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30484 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30485 #undef TARGET_INSERT_ATTRIBUTES
30486 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30489 #undef TARGET_MANGLE_TYPE
30490 #define TARGET_MANGLE_TYPE ix86_mangle_type
30492 #undef TARGET_STACK_PROTECT_FAIL
30493 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30495 #undef TARGET_FUNCTION_VALUE
30496 #define TARGET_FUNCTION_VALUE ix86_function_value
30498 #undef TARGET_SECONDARY_RELOAD
30499 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30501 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30502 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30504 #undef TARGET_SET_CURRENT_FUNCTION
30505 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30507 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30508 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30510 #undef TARGET_OPTION_SAVE
30511 #define TARGET_OPTION_SAVE ix86_function_specific_save
30513 #undef TARGET_OPTION_RESTORE
30514 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30516 #undef TARGET_OPTION_PRINT
30517 #define TARGET_OPTION_PRINT ix86_function_specific_print
30519 #undef TARGET_OPTION_CAN_INLINE_P
30520 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30522 #undef TARGET_EXPAND_TO_RTL_HOOK
30523 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30525 #undef TARGET_LEGITIMATE_ADDRESS_P
30526 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30528 #undef TARGET_FRAME_POINTER_REQUIRED
30529 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30531 struct gcc_target targetm = TARGET_INITIALIZER;
30533 #include "gt-i386.h"